From 6d0d6c4b44f097549433ec61c6ab66bce58fbda9 Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Thu, 22 Nov 2018 12:56:47 +0000 Subject: [PATCH] [ML] Remove out-of-phase buckets feature (#318) This feature was never fully completed and in fact we no longer need it as the multibucket feature covers the benefits from supporting out-of-phase buckets. --- bin/autodetect/CCmdLineParser.cc | 6 - bin/autodetect/CCmdLineParser.h | 1 - bin/autodetect/Main.cc | 8 +- include/api/CAnomalyJob.h | 44 +- include/model/CAnomalyDetectorModel.h | 11 - include/model/CAnomalyDetectorModelConfig.h | 19 +- include/model/CBucketGatherer.h | 6 +- include/model/CCountingModel.h | 11 - include/model/CCountingModelFactory.h | 6 - include/model/CDataGatherer.h | 20 +- include/model/CEventRateBucketGatherer.h | 2 +- include/model/CEventRateModelFactory.h | 6 - .../model/CEventRatePopulationModelFactory.h | 6 - include/model/CHierarchicalResults.h | 30 - include/model/CIndividualModel.h | 10 - include/model/CMetricBucketGatherer.h | 2 +- include/model/CMetricModelFactory.h | 6 - include/model/CMetricPopulationModelFactory.h | 6 - include/model/CModelFactory.h | 3 - include/model/CModelParams.h | 3 - include/model/CModelPlotData.h | 8 - include/model/CPopulationModel.h | 10 - include/model/CResultsQueue.h | 79 - lib/api/CAnomalyJob.cc | 182 +-- lib/api/dump_state/Main.cc | 2 +- lib/api/unittest/CAnomalyJobLimitTest.cc | 2 +- lib/api/unittest/CAnomalyJobTest.cc | 1307 +---------------- lib/api/unittest/CAnomalyJobTest.h | 2 - lib/api/unittest/CMultiFileDataAdderTest.cc | 2 +- lib/api/unittest/CRestorePreviousStateTest.cc | 2 +- .../unittest/CSingleStreamDataAdderTest.cc | 2 +- lib/api/unittest/CStringStoreTest.cc | 17 +- lib/model/CAnomalyDetector.cc | 3 - lib/model/CAnomalyDetectorModelConfig.cc | 18 +- lib/model/CCountingModel.cc | 6 - lib/model/CCountingModelFactory.cc | 8 +- lib/model/CDataGatherer.cc | 184 +-- lib/model/CEventRateModel.cc | 4 +- lib/model/CEventRateModelFactory.cc | 4 - lib/model/CEventRatePopulationModelFactory.cc | 4 - lib/model/CHierarchicalResults.cc | 365 ----- lib/model/CIndividualModel.cc | 15 - lib/model/CMetricModel.cc | 6 +- lib/model/CMetricModelFactory.cc | 4 - lib/model/CMetricPopulationModelFactory.cc | 4 - lib/model/CModelParams.cc | 3 +- lib/model/CModelPlotData.cc | 106 -- lib/model/CPopulationModel.cc | 16 - lib/model/CResultsQueue.cc | 152 -- lib/model/Makefile | 1 - .../unittest/CEventRateDataGathererTest.cc | 1 - lib/model/unittest/Mocks.cc | 5 - lib/model/unittest/Mocks.h | 4 - 53 files changed, 186 insertions(+), 2548 deletions(-) delete mode 100644 include/model/CResultsQueue.h delete mode 100644 lib/model/CResultsQueue.cc diff --git a/bin/autodetect/CCmdLineParser.cc b/bin/autodetect/CCmdLineParser.cc index 5f7d5a5771..52c396a3de 100644 --- a/bin/autodetect/CCmdLineParser.cc +++ b/bin/autodetect/CCmdLineParser.cc @@ -50,7 +50,6 @@ bool CCmdLineParser::parse(int argc, bool& isPersistFileNamedPipe, size_t& maxAnomalyRecords, bool& memoryUsage, - std::size_t& bucketResultsDelay, bool& multivariateByFields, TStrVec& clauseTokens) { try { @@ -111,8 +110,6 @@ bool CCmdLineParser::parse(int argc, "The maximum number of records to be outputted for each bucket. Defaults to 100, a value 0 removes the limit.") ("memoryUsage", "Log the model memory usage at the end of the job") - ("resultFinalizationWindow", boost::program_options::value(), - "The numer of half buckets to store before choosing which overlapping bucket has the biggest anomaly") ("multivariateByFields", "Optional flag to enable multi-variate analysis of correlated by fields") ; @@ -222,9 +219,6 @@ bool CCmdLineParser::parse(int argc, if (vm.count("memoryUsage") > 0) { memoryUsage = true; } - if (vm.count("resultFinalizationWindow") > 0) { - bucketResultsDelay = vm["resultFinalizationWindow"].as(); - } if (vm.count("multivariateByFields") > 0) { multivariateByFields = true; } diff --git a/bin/autodetect/CCmdLineParser.h b/bin/autodetect/CCmdLineParser.h index 237af3465c..8ed2dd4540 100644 --- a/bin/autodetect/CCmdLineParser.h +++ b/bin/autodetect/CCmdLineParser.h @@ -62,7 +62,6 @@ class CCmdLineParser { bool& isPersistFileNamedPipe, size_t& maxAnomalyRecords, bool& memoryUsage, - std::size_t& bucketResultsDelay, bool& multivariateByFields, TStrVec& clauseTokens); diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 1b68dc7951..f5a0305ade 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -86,7 +86,6 @@ int main(int argc, char** argv) { bool isPersistFileNamedPipe(false); size_t maxAnomalyRecords(100u); bool memoryUsage(false); - std::size_t bucketResultsDelay(0); bool multivariateByFields(false); TStrVec clauseTokens; if (ml::autodetect::CCmdLineParser::parse( @@ -96,8 +95,8 @@ int main(int argc, char** argv) { timeFormat, quantilesStateFile, deleteStateFiles, persistInterval, maxQuantileInterval, inputFileName, isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe, - persistFileName, isPersistFileNamedPipe, maxAnomalyRecords, memoryUsage, - bucketResultsDelay, multivariateByFields, clauseTokens) == false) { + persistFileName, isPersistFileNamedPipe, maxAnomalyRecords, + memoryUsage, multivariateByFields, clauseTokens) == false) { return EXIT_FAILURE; } @@ -143,8 +142,7 @@ int main(int argc, char** argv) { summaryCountFieldName.empty() ? ml::model_t::E_None : ml::model_t::E_Manual); ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( - bucketSpan, summaryMode, summaryCountFieldName, latency, - bucketResultsDelay, multivariateByFields); + bucketSpan, summaryMode, summaryCountFieldName, latency, multivariateByFields); modelConfig.detectionRules(ml::model::CAnomalyDetectorModelConfig::TIntDetectionRuleVecUMapCRef( fieldConfig.detectionRules())); modelConfig.scheduledEvents(ml::model::CAnomalyDetectorModelConfig::TStrDetectionRulePrVecCRef( diff --git a/include/api/CAnomalyJob.h b/include/api/CAnomalyJob.h index e7425fc25e..76e96edb06 100644 --- a/include/api/CAnomalyJob.h +++ b/include/api/CAnomalyJob.h @@ -12,13 +12,11 @@ #include #include -#include #include #include #include #include #include -#include #include #include @@ -117,8 +115,6 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { std::pair; using TKeyCRefAnomalyDetectorPtrPrVec = std::vector; using TModelPlotDataVec = model::CAnomalyDetector::TModelPlotDataVec; - using TModelPlotDataVecCItr = TModelPlotDataVec::const_iterator; - using TModelPlotDataVecQueue = model::CBucketQueue; struct API_EXPORT SRestoredStateDetail { ERestoreStateStatus s_RestoredStateStatus; @@ -126,17 +122,13 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { }; struct SBackgroundPersistArgs { - SBackgroundPersistArgs(const model::CResultsQueue& resultsQueue, - const TModelPlotDataVecQueue& modelPlotQueue, - core_t::TTime time, + SBackgroundPersistArgs(core_t::TTime time, const model::CResourceMonitor::SResults& modelSizeStats, const model::CInterimBucketCorrector& interimBucketCorrector, const model::CHierarchicalResultsAggregator& aggregator, core_t::TTime latestRecordTime, core_t::TTime lastResultsTime); - model::CResultsQueue s_ResultsQueue; - TModelPlotDataVecQueue s_ModelPlotQueue; core_t::TTime s_Time; model::CResourceMonitor::SResults s_ModelSizeStats; model::CInterimBucketCorrector s_InterimBucketCorrector; @@ -224,14 +216,11 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { void outputInterimResults(core_t::TTime bucketStartTime); //! Helper function for outputResults. - //! \p processingTimer is the processing time can be written to the bucket - //! \p sumPastProcessingTime is the total time previously spent processing - //! but resulted in no bucket being outputted. + //! \p processingTime is the processing time of the bucket void writeOutResults(bool interim, model::CHierarchicalResults& results, core_t::TTime bucketTime, - uint64_t processingTime, - uint64_t sumPastProcessingTime); + uint64_t processingTime); //! Reset buckets in the range specified by the control message. void resetBuckets(const std::string& controlMessage); @@ -259,8 +248,6 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { //! Persist the detectors to a stream. bool persistState(const std::string& descriptionPrefix, - const model::CResultsQueue& resultsQueue, - const TModelPlotDataVecQueue& modelPlotQueue, core_t::TTime time, const TKeyCRefAnomalyDetectorPtrPrVec& detectors, const model::CResourceMonitor::SResults& modelSizeStats, @@ -296,16 +283,9 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { //! \param[in] endTime The end of the time interval to skip sampling. void skipSampling(core_t::TTime endTime); - //! Outputs queued results and resets the queue to the given \p startTime - void flushAndResetResultsQueue(core_t::TTime startTime); - //! Roll time forward to \p time void timeNow(core_t::TTime time); - //! Get the bucketLength, or half the bucketLength if - //! out-of-phase buckets are active - core_t::TTime effectiveBucketLength() const; - //! Update configuration void updateConfig(const std::string& config); @@ -333,15 +313,12 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { //! specified time range. void generateModelPlot(core_t::TTime startTime, core_t::TTime endTime, - const model::CAnomalyDetector& detector); + const model::CAnomalyDetector& detector, + TModelPlotDataVec& modelPlotData); //! Write the pre-generated model plot to the output stream of the user's //! choosing: either file or streamed to the API - void writeOutModelPlot(core_t::TTime resultsTime); - - //! Write the pre-generated model plot to the output stream of the user's - //! choosing: either file or streamed to the API - void writeOutModelPlot(core_t::TTime, CModelPlotDataJsonWriter& writer); + void writeOutModelPlot(const TModelPlotDataVec& modelPlotData); //! Persist one detector to a stream. //! This method is static so that there is no danger of it accessing @@ -477,15 +454,6 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { //! The hierarchical results normalizer. model::CHierarchicalResultsNormalizer m_Normalizer; - //! Store the last N half-buckets' results in order - //! to choose the best result - model::CResultsQueue m_ResultsQueue; - - //! Also store the model plot for the buckets for each - //! result time - these will be output when the corresponding - //! result is output - TModelPlotDataVecQueue m_ModelPlotQueue; - friend class ::CBackgroundPersisterTest; friend class ::CAnomalyJobTest; }; diff --git a/include/model/CAnomalyDetectorModel.h b/include/model/CAnomalyDetectorModel.h index d9046f8aec..135032e17b 100644 --- a/include/model/CAnomalyDetectorModel.h +++ b/include/model/CAnomalyDetectorModel.h @@ -352,17 +352,6 @@ class MODEL_EXPORT CAnomalyDetectorModel { core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; - //! This samples the bucket statistics, and any state needed - //! by computeProbablity, in the time interval [\p startTime, - //! \p endTime], but does not update the model. This is needed - //! by the results preview. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - virtual void sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor& resourceMonitor) = 0; - //! Rolls time to \p endTime while skipping sampling the models for //! buckets within the gap. //! diff --git a/include/model/CAnomalyDetectorModelConfig.h b/include/model/CAnomalyDetectorModelConfig.h index b47b7b8851..7d62f64949 100644 --- a/include/model/CAnomalyDetectorModelConfig.h +++ b/include/model/CAnomalyDetectorModelConfig.h @@ -114,10 +114,6 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig { //! Bucket length corresponding to the default decay and learn rates. static const core_t::TTime STANDARD_BUCKET_LENGTH; - - //! The default number of half buckets to store before choosing which - //! overlapping bucket has the biggest anomaly - static const std::size_t DEFAULT_BUCKET_RESULTS_DELAY; //@} //! \name Modelling @@ -239,15 +235,12 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig { //! then this is the name of the field holding the summary count. //! \param[in] latency The amount of time records are buffered for, to //! allow out-of-order records to be seen by the models in order. - //! \param[in] bucketResultsDelay The number of half-bucket results - //! to sit on before giving a definitive result. //! \param[in] multivariateByFields Should multivariate analysis of //! correlated 'by' fields be performed? static CAnomalyDetectorModelConfig defaultConfig(core_t::TTime bucketLength, model_t::ESummaryMode summaryMode, const std::string& summaryCountFieldName, core_t::TTime latency, - std::size_t bucketResultsDelay, bool multivariateByFields); //! Overload using defaults. @@ -256,8 +249,7 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig { model_t::ESummaryMode summaryMode = model_t::E_None, const std::string& summaryCountFieldName = "") { return defaultConfig(bucketLength, summaryMode, summaryCountFieldName, - DEFAULT_LATENCY_BUCKETS * bucketLength, - DEFAULT_BUCKET_RESULTS_DELAY, false); + DEFAULT_LATENCY_BUCKETS * bucketLength, false); } //! Get the factor to normalize all bucket lengths to the default @@ -273,8 +265,6 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig { //! Set the data bucketing interval. void bucketLength(core_t::TTime length); - //! Set the number of buckets to delay finalizing out-of-phase buckets. - void bucketResultsDelay(std::size_t delay); //! Set the single interim bucket correction calculator. void interimBucketCorrector(const TInterimBucketCorrectorPtr& interimBucketCorrector); //! Set whether to model multibucket features. @@ -358,9 +348,6 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig { //! numbers of buckets. std::size_t latencyBuckets() const; - //! Get the bucket result delay window. - std::size_t bucketResultsDelay() const; - //! Get the single interim bucket correction calculator. const CInterimBucketCorrector& interimBucketCorrector() const; @@ -444,10 +431,6 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig { //! Bucket length. core_t::TTime m_BucketLength; - //! Get the bucket result delay window: The numer of half buckets to - //! store before choosing which overlapping bucket has the biggest anomaly - std::size_t m_BucketResultsDelay; - //! Should multivariate analysis of correlated 'by' fields be performed? bool m_MultivariateByFields; diff --git a/include/model/CBucketGatherer.h b/include/model/CBucketGatherer.h index 3ffe53a861..524a2d959a 100644 --- a/include/model/CBucketGatherer.h +++ b/include/model/CBucketGatherer.h @@ -56,7 +56,8 @@ class CResourceMonitor; //! IMPLEMENTATION:\n //! This functionality has been separated from the CDataGatherer in order //! to allow the CDataGatherer to support multiple overlapping buckets and -//! buckets with different time spans. +//! buckets with different time spans. However, the overlapping feature +//! has been removed but this class is kept to avoid BWC issues. class MODEL_EXPORT CBucketGatherer { public: using TDoubleVec = std::vector; @@ -394,6 +395,9 @@ class MODEL_EXPORT CBucketGatherer { //! Create samples if possible for the bucket pointed out by \p time. virtual void sample(core_t::TTime time) = 0; + //! Persist state by passing information \p inserter. + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const = 0; + private: //! Resize the necessary data structures so they can hold values //! for the person and/or attribute identified by \p pid and \p cid, diff --git a/include/model/CCountingModel.h b/include/model/CCountingModel.h index 0376361f6c..1b8cb4da54 100644 --- a/include/model/CCountingModel.h +++ b/include/model/CCountingModel.h @@ -160,17 +160,6 @@ class MODEL_EXPORT CCountingModel : public CAnomalyDetectorModel { core_t::TTime endTime, CResourceMonitor& resourceMonitor); - //! This samples the bucket statistics, and any state needed - //! by computeProbablity, in the time interval [\p startTime, - //! \p endTime], but does not update the model. This is needed - //! by the results preview. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - virtual void sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor& resourceMonitor); - //! This samples the bucket statistics, in the time interval //! [\p startTime, \p endTime]. //! diff --git a/include/model/CCountingModelFactory.h b/include/model/CCountingModelFactory.h index 820459881d..50f2b3767c 100644 --- a/include/model/CCountingModelFactory.h +++ b/include/model/CCountingModelFactory.h @@ -117,9 +117,6 @@ class MODEL_EXPORT CCountingModelFactory : public CModelFactory { //! Set the features which will be modeled. virtual void features(const TFeatureVec& features); - - //! Set the bucket results delay - virtual void bucketResultsDelay(std::size_t bucketResultsDelay); //@} //! Get the minimum seasonal variance scale @@ -153,9 +150,6 @@ class MODEL_EXPORT CCountingModelFactory : public CModelFactory { //! The count features which will be modeled. TFeatureVec m_Features; - //! The bucket results delay. - std::size_t m_BucketResultsDelay; - //! A cached search key. mutable TOptionalSearchKey m_SearchKeyCache; }; diff --git a/include/model/CDataGatherer.h b/include/model/CDataGatherer.h index 77eb6641e6..167e67cdf4 100644 --- a/include/model/CDataGatherer.h +++ b/include/model/CDataGatherer.h @@ -120,7 +120,6 @@ class MODEL_EXPORT CDataGatherer { CBucketQueue; using TSearchKeyCRef = boost::reference_wrapper; using TBucketGathererPtr = std::unique_ptr; - using TBucketGathererPtrVec = std::vector; using TFeatureAnyPr = std::pair; using TFeatureAnyPrVec = std::vector; using TMetricCategoryVec = std::vector; @@ -338,7 +337,7 @@ class MODEL_EXPORT CDataGatherer { core_t::TTime bucketLength, std::vector>& result) const { TFeatureAnyPrVec rawFeatureData; - this->chooseBucketGatherer(time).featureData(time, bucketLength, rawFeatureData); + m_BucketGatherer->featureData(time, bucketLength, rawFeatureData); bool succeeded = true; @@ -558,7 +557,7 @@ class MODEL_EXPORT CDataGatherer { void timeNow(core_t::TTime time); //! Print the current bucket. - std::string printCurrentBucket(core_t::TTime time) const; + std::string printCurrentBucket() const; //! Record a attribute called \p attribute. std::size_t addAttribute(const std::string& attribute, @@ -591,9 +590,6 @@ class MODEL_EXPORT CDataGatherer { //! Reset bucket and return true if bucket was successfully //! reset or false otherwise. - //! Note that this should not be used in conjunction with out-of-phase buckets - //! where the concept of resetting a specific bucketed period of time is - //! not valid. bool resetBucket(core_t::TTime bucketStart); //! Release memory that is no longer needed @@ -693,14 +689,6 @@ class MODEL_EXPORT CDataGatherer { using TModelParamsCRef = boost::reference_wrapper; private: - //! Select the correct bucket gatherer based on the time: if we have - //! out-of-phase buckets, select either in-phase or out-of-phase. - const CBucketGatherer& chooseBucketGatherer(core_t::TTime time) const; - - //! Select the correct bucket gatherer based on the time: if we have - //! out-of-phase buckets, select either in-phase or out-of-phase. - CBucketGatherer& chooseBucketGatherer(core_t::TTime time); - //! Restore state from supplied traverser. bool acceptRestoreTraverser(const std::string& summaryCountFieldName, const std::string& personFieldName, @@ -738,9 +726,9 @@ class MODEL_EXPORT CDataGatherer { //! The collection of features on which to gather data. TFeatureVec m_Features; - //! The collection of bucket gatherers which contain the bucket-specific + //! The bucket gatherer which contains the bucket-specific //! metrics and counts. - TBucketGathererPtrVec m_Gatherers; + TBucketGathererPtr m_BucketGatherer; //! Indicates whether the data being gathered are already summarized //! by an external aggregation process. diff --git a/include/model/CEventRateBucketGatherer.h b/include/model/CEventRateBucketGatherer.h index 24fccd3700..8cd3d33e78 100644 --- a/include/model/CEventRateBucketGatherer.h +++ b/include/model/CEventRateBucketGatherer.h @@ -155,7 +155,7 @@ class MODEL_EXPORT CEventRateBucketGatherer final : public CBucketGatherer { bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; //! Create a clone of this data gatherer that will result in the same //! persisted state. The clone may be incomplete in ways that do not diff --git a/include/model/CEventRateModelFactory.h b/include/model/CEventRateModelFactory.h index 34b72e38b0..53ca1f0905 100644 --- a/include/model/CEventRateModelFactory.h +++ b/include/model/CEventRateModelFactory.h @@ -127,9 +127,6 @@ class MODEL_EXPORT CEventRateModelFactory final : public CModelFactory { //! Set the features which will be modeled. virtual void features(const TFeatureVec& features); - - //! Set the bucket results delay - virtual void bucketResultsDelay(std::size_t bucketResultsDelay); //@} //! Get the minimum seasonal variance scale @@ -171,9 +168,6 @@ class MODEL_EXPORT CEventRateModelFactory final : public CModelFactory { //! The count features which will be modeled. TFeatureVec m_Features; - //! The bucket results delay. - std::size_t m_BucketResultsDelay = 0; - //! A cached search key. mutable TOptionalSearchKey m_SearchKeyCache; }; diff --git a/include/model/CEventRatePopulationModelFactory.h b/include/model/CEventRatePopulationModelFactory.h index bec5da8575..af20e5710b 100644 --- a/include/model/CEventRatePopulationModelFactory.h +++ b/include/model/CEventRatePopulationModelFactory.h @@ -129,9 +129,6 @@ class MODEL_EXPORT CEventRatePopulationModelFactory final : public CModelFactory //! Set the features which will be modeled. virtual void features(const TFeatureVec& features); - - //! Set the bucket results delay - virtual void bucketResultsDelay(std::size_t bucketResultsDelay); //@} //! Get the minimum seasonal variance scale @@ -179,9 +176,6 @@ class MODEL_EXPORT CEventRatePopulationModelFactory final : public CModelFactory //! The count features which will be modeled. TFeatureVec m_Features; - //! The bucket results delay. - std::size_t m_BucketResultsDelay = 0; - //! A cached search key. mutable TOptionalSearchKey m_SearchKeyCache; }; diff --git a/include/model/CHierarchicalResults.h b/include/model/CHierarchicalResults.h index fbf2bb494a..9f4af15d8a 100644 --- a/include/model/CHierarchicalResults.h +++ b/include/model/CHierarchicalResults.h @@ -29,10 +29,6 @@ class CHierarchicalResultsTest; namespace ml { -namespace core { -class CStatePersistInserter; -class CStateRestoreTraverser; -} namespace model { class CAnomalyDetectorModel; class CLimits; @@ -72,12 +68,6 @@ using TStr1Vec = core::CSmallVector; struct MODEL_EXPORT SResultSpec { SResultSpec(); - //! Persist the result specification by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - - //! Restore the result specification reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //! Print of the specification for debugging. std::string print() const; @@ -154,20 +144,6 @@ struct MODEL_EXPORT SNode { //! Efficient swap void swap(SNode& other); - //! Persist the node state by passing information to \p inserter. - void acceptPersistInserter1(core::CStatePersistInserter& inserter, - TNodePtrSizeUMap& nodePointers) const; - //! Persist the node connectivity by passing information to \p inserter. - void acceptPersistInserter2(core::CStatePersistInserter& inserter, - const TNodePtrSizeUMap& nodePointers) const; - - //! Restore the node state reading state from \p traverser. - bool acceptRestoreTraverser1(core::CStateRestoreTraverser& traverser, - TSizeNodePtrUMap& nodePointers); - //! Restore the node connectivity reading state from \p traverser. - bool acceptRestoreTraverser2(core::CStateRestoreTraverser& traverser, - const TSizeNodePtrUMap& nodePointers); - //! \name Connectivity //@{ //! The node's parent. @@ -381,12 +357,6 @@ class MODEL_EXPORT CHierarchicalResults { //! Get type of result model_t::CResultType resultType() const; - //! Persist the results by passing information to \p inserter. - void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - - //! Restore the results reading state from \p traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //! Print the results for debug. std::string print() const; diff --git a/include/model/CIndividualModel.h b/include/model/CIndividualModel.h index b2ed9e62fc..9931785cb8 100644 --- a/include/model/CIndividualModel.h +++ b/include/model/CIndividualModel.h @@ -113,16 +113,6 @@ class MODEL_EXPORT CIndividualModel : public CAnomalyDetectorModel { core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; - //! Sample any state needed by computeProbablity for the out- - //! of-phase bucket in the time interval [\p startTime, \p endTime] - //! but do not update the model. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - virtual void sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor& resourceMonitor); - //! Update the model with features samples from the time interval //! [\p startTime, \p endTime]. //! diff --git a/include/model/CMetricBucketGatherer.h b/include/model/CMetricBucketGatherer.h index ed1a317978..0e259e9990 100644 --- a/include/model/CMetricBucketGatherer.h +++ b/include/model/CMetricBucketGatherer.h @@ -90,7 +90,7 @@ class MODEL_EXPORT CMetricBucketGatherer final : public CBucketGatherer { //! \name Persistence //@{ //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const; //! Fill in the state from \p traverser. bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); diff --git a/include/model/CMetricModelFactory.h b/include/model/CMetricModelFactory.h index 8b77d304ed..8398b57c1d 100644 --- a/include/model/CMetricModelFactory.h +++ b/include/model/CMetricModelFactory.h @@ -130,9 +130,6 @@ class MODEL_EXPORT CMetricModelFactory final : public CModelFactory { //! Set the modeled bucket length. virtual void bucketLength(core_t::TTime bucketLength); - - //! Set the bucket results delay - virtual void bucketResultsDelay(std::size_t bucketResultsDelay); //@} //! Get the minimum seasonal variance scale @@ -179,9 +176,6 @@ class MODEL_EXPORT CMetricModelFactory final : public CModelFactory { //! The bucket length to analyze. core_t::TTime m_BucketLength; - //! The bucket results delay. - std::size_t m_BucketResultsDelay = 0; - //! A cached search key. mutable TOptionalSearchKey m_SearchKeyCache; }; diff --git a/include/model/CMetricPopulationModelFactory.h b/include/model/CMetricPopulationModelFactory.h index 9ba356514b..bc7f764a66 100644 --- a/include/model/CMetricPopulationModelFactory.h +++ b/include/model/CMetricPopulationModelFactory.h @@ -129,9 +129,6 @@ class MODEL_EXPORT CMetricPopulationModelFactory final : public CModelFactory { //! Set the features which will be modeled. virtual void features(const TFeatureVec& features); - - //! Set the bucket results delay - virtual void bucketResultsDelay(std::size_t bucketResultsDelay); //@} //! Get the minimum seasonal variance scale @@ -185,9 +182,6 @@ class MODEL_EXPORT CMetricPopulationModelFactory final : public CModelFactory { //! The count features which will be modeled. TFeatureVec m_Features; - //! The bucket results delay. - std::size_t m_BucketResultsDelay = 0; - //! A cached search key. mutable TOptionalSearchKey m_SearchKeyCache; }; diff --git a/include/model/CModelFactory.h b/include/model/CModelFactory.h index 619fc5522f..dd0de5b97f 100644 --- a/include/model/CModelFactory.h +++ b/include/model/CModelFactory.h @@ -287,9 +287,6 @@ class MODEL_EXPORT CModelFactory { //! fine-grained sampling when there is latency. void sampleCountFactor(std::size_t sampleCountFactor); - //! Set the bucket results delay - virtual void bucketResultsDelay(std::size_t bucketResultsDelay) = 0; - //! Set whether the model should exclude frequent hitters from the //! calculations. void excludeFrequent(model_t::EExcludeFrequent excludeFrequent); diff --git a/include/model/CModelParams.h b/include/model/CModelParams.h index 1a178b9c21..f94d6e2b78 100644 --- a/include/model/CModelParams.h +++ b/include/model/CModelParams.h @@ -173,9 +173,6 @@ struct MODEL_EXPORT SModelParams { //! value is judged to have any influence on a feature value. double s_InfluenceCutoff; - //! The number of buckets to delay finalizing out-of-phase buckets. - std::size_t s_BucketResultsDelay; - //! The minimum data size to trigger fuzzy de-duplication of samples to add //! to population models. std::size_t s_MinimumToFuzzyDeduplicate; diff --git a/include/model/CModelPlotData.h b/include/model/CModelPlotData.h index 9a50e24e81..c86ef2db31 100644 --- a/include/model/CModelPlotData.h +++ b/include/model/CModelPlotData.h @@ -17,10 +17,6 @@ #include namespace ml { -namespace core { -class CStatePersistInserter; -class CStateRestoreTraverser; -} namespace model { //! \brief Data necessary to create a model plot @@ -35,8 +31,6 @@ class MODEL_EXPORT CModelPlotData { SByFieldData(double lowerBound, double upperBound, double median); void addValue(const std::string& personName, double value); - void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); double s_LowerBound; double s_UpperBound; @@ -63,8 +57,6 @@ class MODEL_EXPORT CModelPlotData { const std::string& byFieldName, core_t::TTime bucketSpan, int detectorIndex); - void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); TFeatureStrByFieldDataUMapUMapCItr begin() const; TFeatureStrByFieldDataUMapUMapCItr end() const; SByFieldData& get(const model_t::EFeature& feature, const std::string& byFieldValue); diff --git a/include/model/CPopulationModel.h b/include/model/CPopulationModel.h index 0066fa46aa..77e93627b0 100644 --- a/include/model/CPopulationModel.h +++ b/include/model/CPopulationModel.h @@ -140,16 +140,6 @@ class MODEL_EXPORT CPopulationModel : public CAnomalyDetectorModel { //! \name Update //@{ - //! Sample any state needed by computeProbablity for the out- - //! of-phase bucket in the time interval [\p startTime, \p endTime] - //! but do not update the model. - //! - //! \param[in] startTime The start of the time interval to sample. - //! \param[in] endTime The end of the time interval to sample. - virtual void sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor& resourceMonitor); - //! Update the rates for \p feature and \p people. virtual void sample(core_t::TTime startTime, core_t::TTime endTime, diff --git a/include/model/CResultsQueue.h b/include/model/CResultsQueue.h deleted file mode 100644 index 198b450974..0000000000 --- a/include/model/CResultsQueue.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -#ifndef INCLUDED_ml_model_CResultsQueue_h -#define INCLUDED_ml_model_CResultsQueue_h - -#include -#include - -namespace ml { -namespace model { -class CHierarchicalResults; - -//! \brief A queue for CHierarchicalResults objects. -//! -//! DESCRIPTION:\n -//! A queue for CHierarchicalResults objects that handles -//! overlapping bucket result selection -class MODEL_EXPORT CResultsQueue { -public: - using THierarchicalResultsQueue = CBucketQueue; - -public: - //! Constructor - CResultsQueue(std::size_t delayBuckets, core_t::TTime bucketLength); - - //! Reset the underlying queue - void reset(core_t::TTime time); - - //! Have we got unsent items in the queue? - bool hasInterimResults() const; - - //! Push to the underlying queue - void push(const CHierarchicalResults& item, core_t::TTime time); - - //! Push to the underlying queue - void push(const CHierarchicalResults& item); - - //! Get a result from the queue - const CHierarchicalResults& get(core_t::TTime time) const; - - //! Get a result from the queue - CHierarchicalResults& get(core_t::TTime time); - - //! Returns the size of the queue. - std::size_t size() const; - - //! Get the latest result from the queue - CHierarchicalResults& latest(); - - //! Returns the latest bucket end time, as tracked by the queue - core_t::TTime latestBucketEnd() const; - - //! Select which queued result object to output, based on anomaly score - //! and which have been output most recently - core_t::TTime chooseResultTime(core_t::TTime bucketStartTime, - core_t::TTime bucketLength, - model::CHierarchicalResults& results); - - //! Standard persistence - void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - - //! Standard restoration - bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - -private: - //! The collection of results objects - THierarchicalResultsQueue m_Results; - - //! Which of the previous results did we output? - size_t m_LastResultsIndex; -}; - -} // model -} // ml - -#endif // INCLUDED_ml_model_CResultsQueue_h diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index 124c10934a..ef136bade2 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -68,9 +69,14 @@ const std::string VERSION_TAG("b"); const std::string KEY_TAG("c"); const std::string PARTITION_FIELD_TAG("d"); const std::string DETECTOR_TAG("e"); -const std::string HIERARCHICAL_RESULTS_TAG("f"); + +// This is no longer used - removed in 6.6 +// const std::string HIERARCHICAL_RESULTS_TAG("f"); const std::string LATEST_RECORD_TIME_TAG("h"); -const std::string MODEL_PLOT_TAG("i"); + +// This is no longer used - removed in 6.6 +// const std::string MODEL_PLOT_TAG("i"); + const std::string LAST_RESULTS_TIME_TAG("j"); const std::string INTERIM_BUCKET_CORRECTOR_TAG("k"); } @@ -104,9 +110,7 @@ CAnomalyJob::CAnomalyJob(const std::string& jobId, m_PeriodicPersister(periodicPersister), m_MaxQuantileInterval(maxQuantileInterval), m_LastNormalizerPersistTime(core::CTimeUtils::now()), m_LatestRecordTime(0), - m_LastResultsTime(0), m_Aggregator(modelConfig), m_Normalizer(modelConfig), - m_ResultsQueue(m_ModelConfig.bucketResultsDelay(), this->effectiveBucketLength()), - m_ModelPlotQueue(m_ModelConfig.bucketResultsDelay(), this->effectiveBucketLength(), 0) { + m_LastResultsTime(0), m_Aggregator(modelConfig), m_Normalizer(modelConfig) { m_JsonOutputWriter.limitNumberRecords(maxAnomalyRecords); m_Limits.resourceMonitor().memoryUsageReporter( @@ -396,24 +400,23 @@ void CAnomalyJob::advanceTime(const std::string& time_) { void CAnomalyJob::outputBucketResultsUntil(core_t::TTime time) { // If the bucket time has increased, output results for all field names core_t::TTime bucketLength = m_ModelConfig.bucketLength(); - core_t::TTime effectiveBucketLength = this->effectiveBucketLength(); core_t::TTime latency = m_ModelConfig.latency(); if (m_LastFinalisedBucketEndTime == 0) { - m_LastFinalisedBucketEndTime = std::max( - m_LastFinalisedBucketEndTime, - maths::CIntegerTools::floor(time, effectiveBucketLength) - latency); + m_LastFinalisedBucketEndTime = + std::max(m_LastFinalisedBucketEndTime, + maths::CIntegerTools::floor(time, bucketLength) - latency); } m_Normalizer.resetBigChange(); for (core_t::TTime lastBucketEndTime = m_LastFinalisedBucketEndTime; lastBucketEndTime + bucketLength + latency <= time; - lastBucketEndTime += effectiveBucketLength) { + lastBucketEndTime += bucketLength) { this->outputResults(lastBucketEndTime); m_Limits.resourceMonitor().decreaseMargin(bucketLength); m_Limits.resourceMonitor().sendMemoryUsageReportIfSignificantlyChanged(lastBucketEndTime); - m_LastFinalisedBucketEndTime = lastBucketEndTime + effectiveBucketLength; + m_LastFinalisedBucketEndTime = lastBucketEndTime + bucketLength; // Check for periodic persistence immediately after calculating results // for the last bucket but before adding the first piece of data for the @@ -448,8 +451,6 @@ void CAnomalyJob::skipTime(const std::string& time_) { void CAnomalyJob::skipSampling(core_t::TTime endTime) { LOG_INFO(<< "Skipping time to: " << endTime); - this->flushAndResetResultsQueue(endTime); - for (const auto& detector_ : m_Detectors) { model::CAnomalyDetector* detector(detector_.second.get()); if (detector == nullptr) { @@ -463,39 +464,6 @@ void CAnomalyJob::skipSampling(core_t::TTime endTime) { m_LastFinalisedBucketEndTime = endTime; } -void CAnomalyJob::flushAndResetResultsQueue(core_t::TTime startTime) { - LOG_DEBUG(<< "Flush & reset results queue: " << startTime); - if (m_ModelConfig.bucketResultsDelay() != 0) { - core_t::TTime effectiveBucketLength = this->effectiveBucketLength(); - core_t::TTime earliestResultTime = m_LastFinalisedBucketEndTime - - m_ResultsQueue.size() * effectiveBucketLength; - for (core_t::TTime bucketStart = earliestResultTime; - bucketStart < m_LastFinalisedBucketEndTime; - bucketStart += effectiveBucketLength) { - model::CHierarchicalResults& results = m_ResultsQueue.latest(); - core_t::TTime resultsTime = m_ResultsQueue.chooseResultTime( - bucketStart, m_ModelConfig.bucketLength(), results); - if (resultsTime != 0) { - core::CStopWatch timer(true); - model::CHierarchicalResults& resultsToOutput = m_ResultsQueue.get(resultsTime); - uint64_t processingTime = timer.stop(); - // Model plots must be written first so the Java persists them - // once the bucket result is processed - this->writeOutModelPlot(resultsTime); - this->writeOutResults(false, resultsToOutput, resultsTime, - processingTime, 0l); - } - m_ResultsQueue.push(model::CHierarchicalResults()); - } - } - - // Reset to a bucket before the bucket we skip to because - // when results are output we push the current bucket to the queue - core_t::TTime resetTime = startTime - m_ModelConfig.bucketLength(); - m_ResultsQueue.reset(resetTime); - m_ModelPlotQueue.reset(resetTime); -} - void CAnomalyJob::timeNow(core_t::TTime time) { for (const auto& detector_ : m_Detectors) { model::CAnomalyDetector* detector(detector_.second.get()); @@ -508,11 +476,6 @@ void CAnomalyJob::timeNow(core_t::TTime time) { } } -core_t::TTime CAnomalyJob::effectiveBucketLength() const { - return m_ModelConfig.bucketResultsDelay() ? m_ModelConfig.bucketLength() / 2 - : m_ModelConfig.bucketLength(); -} - void CAnomalyJob::generateInterimResults(const std::string& controlMessage) { LOG_TRACE(<< "Generating interim results"); @@ -523,7 +486,7 @@ void CAnomalyJob::generateInterimResults(const std::string& controlMessage) { core_t::TTime start = m_LastFinalisedBucketEndTime; core_t::TTime end = m_LastFinalisedBucketEndTime + - (m_ModelConfig.latencyBuckets() + 1) * this->effectiveBucketLength(); + (m_ModelConfig.latencyBuckets() + 1) * m_ModelConfig.bucketLength(); if (this->parseTimeRangeInControlMessage(controlMessage, start, end)) { LOG_TRACE(<< "Time range for results: " << start << " : " << end); @@ -576,19 +539,12 @@ void CAnomalyJob::outputResults(core_t::TTime bucketStartTime) { using TKeyAnomalyDetectorPtrUMapCItr = TKeyAnomalyDetectorPtrUMap::const_iterator; using TKeyAnomalyDetectorPtrUMapCItrVec = std::vector; - static uint64_t cumulativeTime = 0; - core::CStopWatch timer(true); core_t::TTime bucketLength = m_ModelConfig.bucketLength(); - if (m_ModelPlotQueue.latestBucketEnd() < bucketLength) { - m_ModelPlotQueue.reset(bucketStartTime - m_ModelPlotQueue.bucketLength()); - } - - m_ResultsQueue.push(model::CHierarchicalResults(), bucketStartTime); - model::CHierarchicalResults& results = m_ResultsQueue.get(bucketStartTime); - m_ModelPlotQueue.push(TModelPlotDataVec(), bucketStartTime); + model::CHierarchicalResults results; + TModelPlotDataVec modelPlotData; TKeyAnomalyDetectorPtrUMapCItrVec iterators; iterators.reserve(m_Detectors.size()); @@ -609,7 +565,8 @@ void CAnomalyJob::outputResults(core_t::TTime bucketStartTime) { detector->buildResults(bucketStartTime, bucketStartTime + bucketLength, results); detector->releaseMemory(bucketStartTime - m_ModelConfig.samplingAgeCutoff()); - this->generateModelPlot(bucketStartTime, bucketStartTime + bucketLength, *detector); + this->generateModelPlot(bucketStartTime, bucketStartTime + bucketLength, + *detector, modelPlotData); } if (!results.empty()) { @@ -628,20 +585,12 @@ void CAnomalyJob::outputResults(core_t::TTime bucketStartTime) { this->updateNormalizerAndNormalizeResults(false, results); } - core_t::TTime resultsTime = - m_ResultsQueue.chooseResultTime(bucketStartTime, bucketLength, results); - if (resultsTime != 0) { - model::CHierarchicalResults& resultsToOutput = m_ResultsQueue.get(resultsTime); - uint64_t processingTime = timer.stop(); - // Model plots must be written first so the Java persists them - // once the bucket result is processed - this->writeOutModelPlot(resultsTime); - this->writeOutResults(false, resultsToOutput, resultsTime, - processingTime, cumulativeTime); - cumulativeTime = 0; - } else { - cumulativeTime += timer.stop(); - } + uint64_t processingTime = timer.stop(); + + // Model plots must be written first so the Java persists them + // once the bucket result is processed + this->writeOutModelPlot(modelPlotData); + this->writeOutResults(false, results, bucketStartTime, processingTime); m_Limits.resourceMonitor().pruneIfRequired(bucketStartTime); model::CStringStore::tidyUpNotThreadSafe(); @@ -681,22 +630,14 @@ void CAnomalyJob::outputInterimResults(core_t::TTime bucketStartTime) { this->updateNormalizerAndNormalizeResults(true, results); } - // For the case where there are out-of-phase buckets, and there is a gap for an - // intermediate bucket, output it as interim too. uint64_t processingTime = timer.stop(); - if (m_ResultsQueue.hasInterimResults()) { - core_t::TTime olderTime = bucketStartTime - bucketLength; - model::CHierarchicalResults& olderResult = m_ResultsQueue.get(olderTime); - this->writeOutResults(true, olderResult, olderTime, processingTime, 0l); - } - this->writeOutResults(true, results, bucketStartTime, processingTime, 0l); + this->writeOutResults(true, results, bucketStartTime, processingTime); } void CAnomalyJob::writeOutResults(bool interim, model::CHierarchicalResults& results, core_t::TTime bucketTime, - uint64_t processingTime, - uint64_t sumPastProcessingTime) { + uint64_t processingTime) { if (!results.empty()) { LOG_TRACE(<< "Got results object here: " << results.root()->s_RawAnomalyScore << " / " << results.root()->s_NormalizedAnomalyScore @@ -720,8 +661,7 @@ void CAnomalyJob::writeOutResults(bool interim, bucketTime, results.root()->s_AnnotatedProbability.s_Probability, results.root()->s_RawAnomalyScore, results.root()->s_NormalizedAnomalyScore); - if (m_JsonOutputWriter.endOutputBatch( - interim, sumPastProcessingTime + processingTime) == false) { + if (m_JsonOutputWriter.endOutputBatch(interim, processingTime) == false) { LOG_ERROR(<< "Problem writing anomaly output"); } m_LastResultsTime = bucketTime; @@ -904,15 +844,6 @@ bool CAnomalyJob::restoreState(core::CStateRestoreTraverser& traverser, LOG_ERROR(<< "Cannot restore results aggregator"); return false; } - } else if (name == HIERARCHICAL_RESULTS_TAG) { - core::CPersistUtils::restore(HIERARCHICAL_RESULTS_TAG, m_ResultsQueue, traverser); - } else if (name == MODEL_PLOT_TAG) { - core_t::TTime resultsQueueResetTime = - m_ModelConfig.bucketResultsDelay() == 0 - ? m_LastFinalisedBucketEndTime - : m_LastFinalisedBucketEndTime - this->effectiveBucketLength(); - m_ModelPlotQueue.reset(resultsQueueResetTime); - core::CPersistUtils::restore(MODEL_PLOT_TAG, m_ModelPlotQueue, traverser); } else if (name == LATEST_RECORD_TIME_TAG) { core::CPersistUtils::restore(LATEST_RECORD_TIME_TAG, m_LatestRecordTime, traverser); } else if (name == LAST_RESULTS_TIME_TAG) { @@ -1054,8 +985,7 @@ bool CAnomalyJob::persistState(core::CDataAdder& persister) { m_Normalizer.toJson(m_LastResultsTime, "api", normaliserState, true); return this->persistState( - "State persisted due to job close at ", m_ResultsQueue, - m_ModelPlotQueue, m_LastFinalisedBucketEndTime, detectors, + "State persisted due to job close at ", m_LastFinalisedBucketEndTime, detectors, m_Limits.resourceMonitor().createMemoryUsageReport( m_LastFinalisedBucketEndTime - m_ModelConfig.bucketLength()), m_ModelConfig.interimBucketCorrector(), m_Aggregator, normaliserState, @@ -1070,7 +1000,7 @@ bool CAnomalyJob::backgroundPersistState(CBackgroundPersister& backgroundPersist // Do NOT add boost::ref wrappers around these arguments - they // MUST be copied for thread safety TBackgroundPersistArgsPtr args = std::make_shared( - m_ResultsQueue, m_ModelPlotQueue, m_LastFinalisedBucketEndTime, + m_LastFinalisedBucketEndTime, m_Limits.resourceMonitor().createMemoryUsageReport( m_LastFinalisedBucketEndTime - m_ModelConfig.bucketLength()), m_ModelConfig.interimBucketCorrector(), m_Aggregator, @@ -1119,16 +1049,14 @@ bool CAnomalyJob::runBackgroundPersist(TBackgroundPersistArgsPtr args, return false; } - return this->persistState( - "Periodic background persist at ", args->s_ResultsQueue, - args->s_ModelPlotQueue, args->s_Time, args->s_Detectors, args->s_ModelSizeStats, - args->s_InterimBucketCorrector, args->s_Aggregator, args->s_NormalizerState, - args->s_LatestRecordTime, args->s_LastResultsTime, persister); + return this->persistState("Periodic background persist at ", args->s_Time, + args->s_Detectors, args->s_ModelSizeStats, + args->s_InterimBucketCorrector, args->s_Aggregator, + args->s_NormalizerState, args->s_LatestRecordTime, + args->s_LastResultsTime, persister); } bool CAnomalyJob::persistState(const std::string& descriptionPrefix, - const model::CResultsQueue& resultsQueue, - const TModelPlotDataVecQueue& modelPlotQueue, core_t::TTime lastFinalisedBucketEnd, const TKeyCRefAnomalyDetectorPtrPrVec& detectors, const model::CResourceMonitor::SResults& modelSizeStats, @@ -1157,15 +1085,6 @@ bool CAnomalyJob::persistState(const std::string& descriptionPrefix, core::CJsonStatePersistInserter inserter(*strm); inserter.insertValue(TIME_TAG, lastFinalisedBucketEnd); inserter.insertValue(VERSION_TAG, model::CAnomalyDetector::STATE_VERSION); - - if (resultsQueue.size() > 1) { - core::CPersistUtils::persist(HIERARCHICAL_RESULTS_TAG, - resultsQueue, inserter); - } - if (modelPlotQueue.size() > 1) { - core::CPersistUtils::persist(MODEL_PLOT_TAG, modelPlotQueue, inserter); - } - inserter.insertLevel(INTERIM_BUCKET_CORRECTOR_TAG, boost::bind(&model::CInterimBucketCorrector::acceptPersistInserter, &interimBucketCorrector, _1)); @@ -1312,29 +1231,21 @@ void CAnomalyJob::outputResultsWithinRange(bool isInterim, core_t::TTime start, void CAnomalyJob::generateModelPlot(core_t::TTime startTime, core_t::TTime endTime, - const model::CAnomalyDetector& detector) { + const model::CAnomalyDetector& detector, + TModelPlotDataVec& modelPlotData) { double modelPlotBoundsPercentile(m_ModelConfig.modelPlotBoundsPercentile()); if (modelPlotBoundsPercentile > 0.0) { LOG_TRACE(<< "Generating model debug data at " << startTime); - detector.generateModelPlot( - startTime, endTime, m_ModelConfig.modelPlotBoundsPercentile(), - m_ModelConfig.modelPlotTerms(), m_ModelPlotQueue.get(startTime)); - } -} - -void CAnomalyJob::writeOutModelPlot(core_t::TTime resultsTime) { - double modelPlotBoundsPercentile(m_ModelConfig.modelPlotBoundsPercentile()); - if (modelPlotBoundsPercentile > 0.0) { - LOG_TRACE(<< "Writing debug data at time " << resultsTime); - CModelPlotDataJsonWriter modelPlotWriter(m_OutputStream); - this->writeOutModelPlot(resultsTime, modelPlotWriter); + detector.generateModelPlot(startTime, endTime, + m_ModelConfig.modelPlotBoundsPercentile(), + m_ModelConfig.modelPlotTerms(), modelPlotData); } } -void CAnomalyJob::writeOutModelPlot(core_t::TTime resultsTime, - CModelPlotDataJsonWriter& writer) { - for (const auto& plot : m_ModelPlotQueue.get(resultsTime)) { - writer.writeFlat(m_JobId, plot); +void CAnomalyJob::writeOutModelPlot(const TModelPlotDataVec& modelPlotData) { + CModelPlotDataJsonWriter modelPlotWriter(m_OutputStream); + for (const auto& plot : modelPlotData) { + modelPlotWriter.writeFlat(m_JobId, plot); } } @@ -1504,16 +1415,13 @@ void CAnomalyJob::addRecord(const TAnomalyDetectorPtr detector, } CAnomalyJob::SBackgroundPersistArgs::SBackgroundPersistArgs( - const model::CResultsQueue& resultsQueue, - const TModelPlotDataVecQueue& modelPlotQueue, core_t::TTime time, const model::CResourceMonitor::SResults& modelSizeStats, const model::CInterimBucketCorrector& interimBucketCorrector, const model::CHierarchicalResultsAggregator& aggregator, core_t::TTime latestRecordTime, core_t::TTime lastResultsTime) - : s_ResultsQueue(resultsQueue), s_ModelPlotQueue(modelPlotQueue), - s_Time(time), s_ModelSizeStats(modelSizeStats), + : s_Time(time), s_ModelSizeStats(modelSizeStats), s_InterimBucketCorrector(interimBucketCorrector), s_Aggregator(aggregator), s_LatestRecordTime(latestRecordTime), s_LastResultsTime(lastResultsTime) { } diff --git a/lib/api/dump_state/Main.cc b/lib/api/dump_state/Main.cc index c1502da5f3..e19a6b1bc8 100644 --- a/lib/api/dump_state/Main.cc +++ b/lib/api/dump_state/Main.cc @@ -186,7 +186,7 @@ bool persistAnomalyDetectorStateToFile(const std::string& configFileName, std::string jobId("foo"); ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( - bucketSize, ml::model_t::E_None, "", bucketSize * latencyBuckets, 0, false); + bucketSize, ml::model_t::E_None, "", bucketSize * latencyBuckets, false); ml::api::CAnomalyJob origJob(jobId, limits, fieldConfig, modelConfig, wrappedOutputStream, boost::bind(&reportPersistComplete, _1), diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index be7b57dbd8..8b838afb49 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -458,7 +458,7 @@ void CAnomalyJobLimitTest::testModelledEntityCountForFixedMemoryLimit() { CPPUNIT_ASSERT(used.s_PartitionFields > testParam.s_ExpectedPartitionFields && used.s_PartitionFields < 450); CPPUNIT_ASSERT(static_cast(used.s_ByFields) > - 0.97 * static_cast(used.s_PartitionFields)); + 0.96 * static_cast(used.s_PartitionFields)); CPPUNIT_ASSERT_DOUBLES_EQUAL( memoryLimit * 1024 * 1024 / 2, used.s_Usage, memoryLimit * 1024 * 1024 / testParam.s_ExpectedPartitionLowerMemoryLimit); diff --git a/lib/api/unittest/CAnomalyJobTest.cc b/lib/api/unittest/CAnomalyJobTest.cc index 2b3e979216..d198251b0f 100644 --- a/lib/api/unittest/CAnomalyJobTest.cc +++ b/lib/api/unittest/CAnomalyJobTest.cc @@ -483,1283 +483,80 @@ void CAnomalyJobTest::testSkipTimeControlMessage() { CPPUNIT_ASSERT_EQUAL(std::size_t(11), countBuckets("bucket", outputStrm.str() + "]")); } -void CAnomalyJobTest::testOutOfPhase() { - // Ensure the right data ends up in the right buckets - // First we test that it works as expected for non-out-of-phase, - // then we crank in the out-of-phase - - // Ensure that gaps in a bucket's data do not cause errors or problems - - // Ensure that we can start at a variety of times, - // and finish at a variety of times, and get the - // right output always - - // The code is pretty verbose here, but executes quickly - { - LOG_DEBUG(<< "*** testing non-out-of-phase metric ***"); - core_t::TTime bucketSize = 100; - model::CLimits limits; - api::CFieldConfig fieldConfig; - api::CFieldConfig::TStrVec clauses; - clauses.push_back("mean(value)"); - fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); - std::stringstream outputStrm; - - core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); - - api::CAnomalyJob::TStrStrUMap dataRows; - - CPPUNIT_ASSERT_EQUAL(core_t::TTime(99), job.m_ResultsQueue.latestBucketEnd()); - dataRows["time"] = "10000"; - dataRows["value"] = "1.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(99), job.m_ResultsQueue.latestBucketEnd()); - - dataRows["time"] = "10050"; - dataRows["value"] = "3.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(99), job.m_ResultsQueue.latestBucketEnd()); - - dataRows["time"] = "10100"; - dataRows["value"] = "1.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10099), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(2.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10200"; - dataRows["value"] = "0.0005"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10300"; - dataRows["value"] = "5.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0005, visitor.lastResults(), 0.000005); - } - - dataRows["time"] = "10400"; - dataRows["value"] = "5.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(5.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10500"; - dataRows["value"] = "5.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(5.0, visitor.lastResults(), 0.0005); - } - - // Bucket at 10600 not present - - dataRows["time"] = "10700"; - dataRows["value"] = "50"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10700"; - dataRows["value"] = "80"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10700"; - dataRows["value"] = "20"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10800"; - dataRows["value"] = "5.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(50.0, visitor.lastResults(), 0.005); - } - - dataRows["time"] = "10800"; - dataRows["value"] = "6.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10799), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(50.0, visitor.lastResults(), 0.005); - } - job.finalise(); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10799), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(50.0, visitor.lastResults(), 0.005); - } - } - { - LOG_DEBUG(<< "*** testing non-out-of-phase metric ***"); - // Same as previous test but starting not on a bucket boundary - core_t::TTime bucketSize = 100; - model::CLimits limits; - api::CFieldConfig fieldConfig; - api::CFieldConfig::TStrVec clauses; - clauses.push_back("mean(value)"); - fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); - std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); - - api::CAnomalyJob::TStrStrUMap dataRows; - - // The first two values are in an incomplete bucket and should be ignored - CPPUNIT_ASSERT_EQUAL(core_t::TTime(99), job.m_ResultsQueue.latestBucketEnd()); - dataRows["time"] = "10001"; - dataRows["value"] = "1.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(99), job.m_ResultsQueue.latestBucketEnd()); - - dataRows["time"] = "10051"; - dataRows["value"] = "3.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT(job.m_ResultsQueue.latest().empty()); - - // This next bucket should be the first valid one - dataRows["time"] = "10101"; - dataRows["value"] = "1.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT(job.m_ResultsQueue.latest().empty()); - - dataRows["time"] = "10201"; - dataRows["value"] = "0.0005"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10199), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10301"; - dataRows["value"] = "5.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0005, visitor.lastResults(), 0.000005); - } - - dataRows["time"] = "10401"; - dataRows["value"] = "5.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(5.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10501"; - dataRows["value"] = "5.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(5.0, visitor.lastResults(), 0.0005); - } - - // Bucket at 10600 not present - - dataRows["time"] = "10701"; - dataRows["value"] = "50"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10701"; - dataRows["value"] = "80"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10701"; - dataRows["value"] = "20"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10801"; - dataRows["value"] = "5.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(50.0, visitor.lastResults(), 0.005); - } - - dataRows["time"] = "10895"; - dataRows["value"] = "6.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - job.finalise(); - } - { - LOG_DEBUG(<< "*** testing non-out-of-phase count ***"); - core_t::TTime bucketSize = 100; - model::CLimits limits; - api::CFieldConfig fieldConfig; - api::CFieldConfig::TStrVec clauses; - clauses.push_back("count"); - fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); - std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); - - api::CAnomalyJob::TStrStrUMap dataRows; - - CPPUNIT_ASSERT_EQUAL(core_t::TTime(99), job.m_ResultsQueue.latestBucketEnd()); - dataRows["time"] = "10000"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(99), job.m_ResultsQueue.latestBucketEnd()); - - dataRows["time"] = "10050"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(99), job.m_ResultsQueue.latestBucketEnd()); - - dataRows["time"] = "10100"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10110"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10120"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10099), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(2.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10200"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10300"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10300"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, visitor.lastResults(), 0.000005); - } - - dataRows["time"] = "10400"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10401"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10402"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10403"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(2.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10500"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(4.0, visitor.lastResults(), 0.0005); - } - - // Bucket at 10600 not present - - dataRows["time"] = "10700"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10700"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10700"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10800"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, visitor.lastResults(), 0.005); - } - - dataRows["time"] = "10895"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10799), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, visitor.lastResults(), 0.005); - } - job.finalise(); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10799), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, visitor.lastResults(), 0.005); - } - } - { - LOG_DEBUG(<< "*** testing non-out-of-phase count ***"); - core_t::TTime bucketSize = 100; - model::CLimits limits; - api::CFieldConfig fieldConfig; - api::CFieldConfig::TStrVec clauses; - clauses.push_back("count"); - fieldConfig.initFromClause(clauses); - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize); - std::stringstream outputStrm; - - core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); - - api::CAnomalyJob::TStrStrUMap dataRows; - - CPPUNIT_ASSERT_EQUAL(core_t::TTime(99), job.m_ResultsQueue.latestBucketEnd()); - dataRows["time"] = "10088"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(99), job.m_ResultsQueue.latestBucketEnd()); - - dataRows["time"] = "10097"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(99), job.m_ResultsQueue.latestBucketEnd()); - - dataRows["time"] = "10100"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10110"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10120"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT(job.m_ResultsQueue.latest().empty()); - - dataRows["time"] = "10200"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10300"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10300"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, visitor.lastResults(), 0.000005); - } - - dataRows["time"] = "10400"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10401"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10402"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10403"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(2.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10500"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(4.0, visitor.lastResults(), 0.0005); - } - - // Bucket at 10600 not present - - dataRows["time"] = "10700"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10700"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10700"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10800"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, visitor.lastResults(), 0.005); - } - - dataRows["time"] = "10805"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10799), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, visitor.lastResults(), 0.005); - } - job.finalise(); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10799), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, visitor.lastResults(), 0.005); - } - } - // Now we come to the real meat and potatoes of the test, the out-of-phase buckets - { - LOG_DEBUG(<< "*** testing out-of-phase metric ***"); - core_t::TTime bucketSize = 100; - model::CLimits limits; - api::CFieldConfig fieldConfig; - api::CFieldConfig::TStrVec clauses; - clauses.push_back("mean(value)"); - fieldConfig.initFromClause(clauses); - - // 2 delay buckets - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig( - bucketSize, model_t::E_None, "", 0, 2, false); - std::stringstream outputStrm; - - core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); - - api::CAnomalyJob::TStrStrUMap dataRows; - - // main bucket should start at 10000 -> 10100 - // out-of-phase bucket start at 10050 -> 10150 - CPPUNIT_ASSERT_EQUAL(core_t::TTime(49), job.m_ResultsQueue.latestBucketEnd()); - dataRows["time"] = "10000"; - dataRows["value"] = "5.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(49), job.m_ResultsQueue.latestBucketEnd()); - - dataRows["time"] = "10050"; - dataRows["value"] = "3.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10100"; - dataRows["value"] = "1.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(4.0, visitor.lastResults(), 0.0005); - } - dataRows["time"] = "10150"; - dataRows["value"] = "4.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10099), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(2.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10200"; - dataRows["value"] = "0.0005"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10149), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(2.5, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10300"; - dataRows["value"] = "5.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0005, visitor.lastResults(), 0.000005); - } - - dataRows["time"] = "10499"; - dataRows["value"] = "5.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - LOG_DEBUG(<< "Result time is " << (job.m_ResultsQueue.latestBucketEnd() - 49)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, visitor.lastResults(), 0.0000005); - } - - dataRows["time"] = "10500"; - dataRows["value"] = "5.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(5.0, visitor.lastResults(), 0.0005); - } - - // Bucket at 10600 not present - - dataRows["time"] = "10700"; - dataRows["value"] = "50"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10720"; - dataRows["value"] = "80"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10760"; - dataRows["value"] = "20"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(65.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10780"; - dataRows["value"] = "80"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); +void CAnomalyJobTest::testModelPlot() { + core_t::TTime bucketSize = 10000; + model::CLimits limits; + api::CFieldConfig fieldConfig; + api::CFieldConfig::TStrVec clauses; + clauses.push_back("mean(value)"); + clauses.push_back("by"); + clauses.push_back("animal"); + fieldConfig.initFromClause(clauses); - dataRows["time"] = "10800"; - dataRows["value"] = "5.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(57.5, visitor.lastResults(), 0.005); - } + model::CAnomalyDetectorModelConfig modelConfig = + model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize, model_t::E_None, + "", 0, false); + modelConfig.modelPlotBoundsPercentile(1.0); + std::stringstream outputStrm; - // 10895, triggers bucket 10750->10850 - dataRows["time"] = "10895"; - dataRows["value"] = "6.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - LOG_DEBUG(<< "Result time is " << (job.m_ResultsQueue.latestBucketEnd())); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10799), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(35.0, visitor.lastResults(), 0.005); - } - LOG_DEBUG(<< "Finalising job"); - job.finalise(); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10799), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(35.0, visitor.lastResults(), 0.005); - } - } { - LOG_DEBUG(<< "*** testing out-of-phase metric ***"); - core_t::TTime bucketSize = 100; - model::CLimits limits; - api::CFieldConfig fieldConfig; - api::CFieldConfig::TStrVec clauses; - clauses.push_back("mean(value)"); - fieldConfig.initFromClause(clauses); - - // 2 delay buckets - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig( - bucketSize, model_t::E_None, "", 0, 2, false); - std::stringstream outputStrm; - core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); api::CAnomalyJob::TStrStrUMap dataRows; - - CPPUNIT_ASSERT_EQUAL(core_t::TTime(49), job.m_ResultsQueue.latestBucketEnd()); - dataRows["time"] = "10045"; - dataRows["value"] = "5.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(49), job.m_ResultsQueue.latestBucketEnd()); - - dataRows["time"] = "10050"; - dataRows["value"] = "3.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - // This is the first complete bucket - dataRows["time"] = "10100"; - dataRows["value"] = "1.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, visitor.lastResults(), 0.0005); - } - dataRows["time"] = "10150"; - dataRows["value"] = "4.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10099), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10200"; - dataRows["value"] = "0.0005"; + dataRows["time"] = "10000000"; + dataRows["value"] = "2.0"; + dataRows["animal"] = "baboon"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10149), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(2.5, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10300"; dataRows["value"] = "5.0"; + dataRows["animal"] = "shark"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0005, visitor.lastResults(), 0.000005); - } - - dataRows["time"] = "10499"; - dataRows["value"] = "5.0"; + dataRows["time"] = "10010000"; + dataRows["value"] = "2.0"; + dataRows["animal"] = "baboon"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - LOG_DEBUG(<< "Result time is " << (job.m_ResultsQueue.latestBucketEnd() - 49)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, visitor.lastResults(), 0.0000005); - } - - dataRows["time"] = "10500"; dataRows["value"] = "5.0"; + dataRows["animal"] = "shark"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(5.0, visitor.lastResults(), 0.0005); - } - - // Bucket at 10600 not present - - dataRows["time"] = "10700"; - dataRows["value"] = "50"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10720"; - dataRows["value"] = "80"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10760"; - dataRows["value"] = "20"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(65.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10780"; - dataRows["value"] = "80"; + dataRows["time"] = "10020000"; + dataRows["value"] = "2.0"; + dataRows["animal"] = "baboon"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10800"; dataRows["value"] = "5.0"; + dataRows["animal"] = "shark"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(57.5, visitor.lastResults(), 0.005); - } - - // 10895, triggers bucket 10750->10850 - dataRows["time"] = "10895"; - dataRows["value"] = "6.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - LOG_DEBUG(<< "Result time is " << (job.m_ResultsQueue.latestBucketEnd())); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10799), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(35.0, visitor.lastResults(), 0.005); - } - LOG_DEBUG(<< "Finalising job"); - job.finalise(); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10799), job.m_ResultsQueue.latestBucketEnd()); - { - CSingleResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(35.0, visitor.lastResults(), 0.005); - } - } - { - LOG_DEBUG(<< "*** testing out-of-phase eventrate ***"); - core_t::TTime bucketSize = 100; - model::CLimits limits; - api::CFieldConfig fieldConfig; - api::CFieldConfig::TStrVec clauses; - clauses.push_back("high_count"); - clauses.push_back("by"); - clauses.push_back("person"); - fieldConfig.initFromClause(clauses); - - // 2 delay buckets - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig( - bucketSize, model_t::E_None, "", 0, 2, false); - std::stringstream outputStrm; - - core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); - - api::CAnomalyJob::TStrStrUMap dataRows; - - // main bucket should start at 10000 -> 10100 - // out-of-phase bucket start at 10050 -> 10150 - CPPUNIT_ASSERT_EQUAL(core_t::TTime(49), job.m_ResultsQueue.latestBucketEnd()); - dataRows["time"] = "10000"; - dataRows["person"] = "Candice"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10001"; - dataRows["person"] = "Behati"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10002"; - dataRows["person"] = "Cara"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10003"; - dataRows["person"] = "Kate"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10004"; - dataRows["person"] = "Gisele"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(49), job.m_ResultsQueue.latestBucketEnd()); - - dataRows["time"] = "10050"; - dataRows["person"] = "Behati"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10070"; - dataRows["person"] = "Candice"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10101"; - dataRows["person"] = "Cara"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CMultiResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(7.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10110"; - dataRows["person"] = "Kate"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10150"; - dataRows["person"] = "Gisele"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10099), job.m_ResultsQueue.latestBucketEnd()); - { - CMultiResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(4.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10201"; - dataRows["person"] = "Behati"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10201"; - dataRows["person"] = "Candice"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10201"; - dataRows["person"] = "Gisele"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10149), job.m_ResultsQueue.latestBucketEnd()); - { - CMultiResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10300"; - dataRows["person"] = "Cara"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10300"; - dataRows["person"] = "Kate"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10300"; - dataRows["person"] = "Gisele the imposter"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10301"; - dataRows["person"] = "Cara"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CMultiResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10490"; - dataRows["person"] = "Gisele"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10492"; - dataRows["person"] = "Kate"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10494"; - dataRows["person"] = "Behati"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10499"; - dataRows["person"] = "Cara"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - LOG_DEBUG(<< "Result time is " << (job.m_ResultsQueue.latestBucketEnd() - 49)); - { - CMultiResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10500"; - dataRows["person"] = "Cara"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CMultiResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(4.0, visitor.lastResults(), 0.0005); - } - - // Bucket at 10600 not present - - dataRows["time"] = "10700"; - dataRows["person"] = "Behati"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CMultiResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10720"; - dataRows["person"] = "Kate"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10760"; - dataRows["person"] = "Behati"; + dataRows["time"] = "10030000"; + dataRows["value"] = "2.0"; + dataRows["animal"] = "baboon"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CMultiResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(2.0, visitor.lastResults(), 0.0005); - } - - dataRows["time"] = "10780"; - dataRows["person"] = "Cara"; + dataRows["value"] = "5.0"; + dataRows["animal"] = "shark"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - dataRows["time"] = "10800"; - dataRows["person"] = "Candice"; + dataRows["time"] = "10040000"; + dataRows["value"] = "3.0"; + dataRows["animal"] = "baboon"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - { - CMultiResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(4.0, visitor.lastResults(), 0.005); - } - - // 10895, triggers bucket 10750->10850 - dataRows["time"] = "10895"; - dataRows["person"] = "Cara"; + dataRows["value"] = "5.0"; + dataRows["animal"] = "shark"; CPPUNIT_ASSERT(job.handleRecord(dataRows)); - LOG_DEBUG(<< "Result time is " << (job.m_ResultsQueue.latestBucketEnd())); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10799), job.m_ResultsQueue.latestBucketEnd()); - { - CMultiResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, visitor.lastResults(), 0.005); - } - LOG_DEBUG(<< "Finalising job"); job.finalise(); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(10799), job.m_ResultsQueue.latestBucketEnd()); - { - CMultiResultVisitor visitor; - job.m_ResultsQueue.latest().topDownBreadthFirst(visitor); - CPPUNIT_ASSERT_DOUBLES_EQUAL(3.0, visitor.lastResults(), 0.005); - } - } -} - -void CAnomalyJobTest::testBucketSelection() { - core_t::TTime bucketSize = 100; - model::CLimits limits; - api::CFieldConfig fieldConfig; - api::CFieldConfig::TStrVec clauses; - clauses.push_back("mean(value)"); - fieldConfig.initFromClause(clauses); - - // 2 delay buckets - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig(bucketSize, model_t::E_None, - "", 0, 2, false); - std::stringstream outputStrm; - - core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); - - job.m_ResultsQueue.reset(950); - { - model::SAnnotatedProbability prob(1.0); - - model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, nullptr, 1000); - CResultsScoreVisitor visitor(10); - results.topDownBreadthFirst(visitor); - job.m_ResultsQueue.push(results, 1000); - LOG_DEBUG(<< "Adding 10 at 1000"); - } - { - model::SAnnotatedProbability prob(1.0); - - model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, nullptr, 1000); - CResultsScoreVisitor visitor(20); - results.topDownBreadthFirst(visitor); - job.m_ResultsQueue.push(results, 1050); - LOG_DEBUG(<< "Adding 20 at 1050"); - } - { - model::SAnnotatedProbability prob(1.0); - - model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, nullptr, 1000); - CResultsScoreVisitor visitor(15); - results.topDownBreadthFirst(visitor); - job.m_ResultsQueue.push(results, 1100); - LOG_DEBUG(<< "Adding 15 at 1100"); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), job.m_ResultsQueue.chooseResultTime( - 1100, bucketSize, results)); - } - { - model::SAnnotatedProbability prob(1.0); - - model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, nullptr, 1000); - CResultsScoreVisitor visitor(20); - results.topDownBreadthFirst(visitor); - job.m_ResultsQueue.push(results, 1150); - LOG_DEBUG(<< "Adding 20 at 1150"); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), job.m_ResultsQueue.chooseResultTime( - 1150, bucketSize, results)); - } - { - model::SAnnotatedProbability prob(1.0); - - model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, nullptr, 1000); - CResultsScoreVisitor visitor(25); - results.topDownBreadthFirst(visitor); - job.m_ResultsQueue.push(results, 1200); - LOG_DEBUG(<< "Adding 25 at 1200"); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(1100), job.m_ResultsQueue.chooseResultTime( - 1200, bucketSize, results)); - } - { - model::SAnnotatedProbability prob(1.0); - - model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, nullptr, 1000); - CResultsScoreVisitor visitor(0); - results.topDownBreadthFirst(visitor); - job.m_ResultsQueue.push(results, 1250); - LOG_DEBUG(<< "Adding 0 at 1250"); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), job.m_ResultsQueue.chooseResultTime( - 1250, bucketSize, results)); - } - { - model::SAnnotatedProbability prob(1.0); - - model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, nullptr, 1000); - CResultsScoreVisitor visitor(5); - results.topDownBreadthFirst(visitor); - job.m_ResultsQueue.push(results, 1300); - LOG_DEBUG(<< "Adding 5 at 1300"); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(1200), job.m_ResultsQueue.chooseResultTime( - 1300, bucketSize, results)); - } - { - model::SAnnotatedProbability prob(1.0); - - model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, nullptr, 1000); - CResultsScoreVisitor visitor(5); - results.topDownBreadthFirst(visitor); - job.m_ResultsQueue.push(results, 1350); - LOG_DEBUG(<< "Adding 5 at 1350"); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(0), job.m_ResultsQueue.chooseResultTime( - 1350, bucketSize, results)); - } - { - model::SAnnotatedProbability prob(1.0); - - model::CHierarchicalResults results; - results.addModelResult(0, false, "mean", model::function_t::E_IndividualMetricMean, - "", "", "", "", "value", prob, nullptr, 1000); - CResultsScoreVisitor visitor(1); - results.topDownBreadthFirst(visitor); - job.m_ResultsQueue.push(results, 1400); - LOG_DEBUG(<< "Adding 1 at 1400"); - CPPUNIT_ASSERT_EQUAL(core_t::TTime(1300), job.m_ResultsQueue.chooseResultTime( - 1400, bucketSize, results)); - } -} - -void CAnomalyJobTest::testModelPlot() { - { - // Test non-overlapping buckets - core_t::TTime bucketSize = 10000; - model::CLimits limits; - api::CFieldConfig fieldConfig; - api::CFieldConfig::TStrVec clauses; - clauses.push_back("mean(value)"); - clauses.push_back("by"); - clauses.push_back("animal"); - fieldConfig.initFromClause(clauses); - - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig( - bucketSize, model_t::E_None, "", 0, 0, false); - modelConfig.modelPlotBoundsPercentile(1.0); - std::stringstream outputStrm; - - { - core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); - - api::CAnomalyJob::TStrStrUMap dataRows; - dataRows["time"] = "10000000"; - dataRows["value"] = "2.0"; - dataRows["animal"] = "baboon"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["value"] = "5.0"; - dataRows["animal"] = "shark"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10010000"; - dataRows["value"] = "2.0"; - dataRows["animal"] = "baboon"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["value"] = "5.0"; - dataRows["animal"] = "shark"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10020000"; - dataRows["value"] = "2.0"; - dataRows["animal"] = "baboon"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["value"] = "5.0"; - dataRows["animal"] = "shark"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10030000"; - dataRows["value"] = "2.0"; - dataRows["animal"] = "baboon"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["value"] = "5.0"; - dataRows["animal"] = "shark"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10040000"; - dataRows["value"] = "3.0"; - dataRows["animal"] = "baboon"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["value"] = "5.0"; - dataRows["animal"] = "shark"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - job.finalise(); - } - - std::string output = outputStrm.str(); - LOG_TRACE(<< "Output has yielded: " << output); - core::CRegex regex; - regex.init("\n"); - core::CRegex::TStrVec lines; - regex.split(output, lines); - CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10000000.*baboon", lines)); - CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10000000.*shark", lines)); - CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10010000.*baboon", lines)); - CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10010000.*shark", lines)); - CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10020000.*baboon", lines)); - CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10020000.*shark", lines)); - CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10030000.*baboon", lines)); - CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10030000.*shark", lines)); } - { - // Test overlapping buckets - core_t::TTime bucketSize = 10000; - model::CLimits limits; - api::CFieldConfig fieldConfig; - api::CFieldConfig::TStrVec clauses; - clauses.push_back("max(value)"); - fieldConfig.initFromClause(clauses); - - // 2 delay buckets - model::CAnomalyDetectorModelConfig modelConfig = - model::CAnomalyDetectorModelConfig::defaultConfig( - bucketSize, model_t::E_None, "", 0, 2, false); - modelConfig.modelPlotBoundsPercentile(1.0); - - std::stringstream outputStrm; - { - core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - - api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); - - api::CAnomalyJob::TStrStrUMap dataRows; - - // Data contains 2 anomalies - dataRows["time"] = "10000000"; - dataRows["value"] = "2.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10010000"; - dataRows["value"] = "2.1"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10020000"; - dataRows["value"] = "2.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10030000"; - dataRows["value"] = "2.3"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10040000"; - dataRows["value"] = "2.2"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10055500"; - dataRows["value"] = "2.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10060000"; - dataRows["value"] = "2.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10077700"; - dataRows["value"] = "2.1"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10080000"; - dataRows["value"] = "2.4"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10090000"; - dataRows["value"] = "2.1"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10094400"; - dataRows["value"] = "2.0003"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10110000"; - dataRows["value"] = "2.01"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10120000"; - dataRows["value"] = "2.03"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10140000"; - dataRows["value"] = "2.001"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10150000"; - dataRows["value"] = "2.1"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10167000"; - dataRows["value"] = "200.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10170000"; - dataRows["value"] = "2.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10183000"; - dataRows["value"] = "400.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10190000"; - dataRows["value"] = "2.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10200000"; - dataRows["value"] = "2.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10210000"; - dataRows["value"] = "2.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - dataRows["time"] = "10230000"; - dataRows["value"] = "2.0"; - CPPUNIT_ASSERT(job.handleRecord(dataRows)); - - job.finalise(); - } - std::string output = outputStrm.str(); - LOG_TRACE(<< "Output has yielded: " << output); - core::CRegex regex; - regex.init("\n"); - core::CRegex::TStrVec lines; - regex.split(output, lines); - CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10000000000", lines)); - CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10010000000", lines)); - CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10020000000", lines)); - CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10160000000.*actual..200\\.0", lines)); - CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10175000000.*actual..400\\.0", lines)); - } + std::string output = outputStrm.str(); + LOG_TRACE(<< "Output has yielded: " << output); + core::CRegex regex; + regex.init("\n"); + core::CRegex::TStrVec lines; + regex.split(output, lines); + CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10000000.*baboon", lines)); + CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10000000.*shark", lines)); + CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10010000.*baboon", lines)); + CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10010000.*shark", lines)); + CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10020000.*baboon", lines)); + CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10020000.*shark", lines)); + CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10030000.*baboon", lines)); + CPPUNIT_ASSERT(findLine("model_feature.*timestamp.*10030000.*shark", lines)); } void CAnomalyJobTest::testInterimResultEdgeCases() { @@ -1854,10 +651,6 @@ CppUnit::Test* CAnomalyJobTest::suite() { suiteOfTests->addTest(new CppUnit::TestCaller( "CAnomalyJobTest::testSkipTimeControlMessage", &CAnomalyJobTest::testSkipTimeControlMessage)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CAnomalyJobTest::testOutOfPhase", &CAnomalyJobTest::testOutOfPhase)); - suiteOfTests->addTest(new CppUnit::TestCaller( - "CAnomalyJobTest::testBucketSelection", &CAnomalyJobTest::testBucketSelection)); suiteOfTests->addTest(new CppUnit::TestCaller( "CAnomalyJobTest::testModelPlot", &CAnomalyJobTest::testModelPlot)); suiteOfTests->addTest(new CppUnit::TestCaller( diff --git a/lib/api/unittest/CAnomalyJobTest.h b/lib/api/unittest/CAnomalyJobTest.h index 84fb840f98..a207a5898e 100644 --- a/lib/api/unittest/CAnomalyJobTest.h +++ b/lib/api/unittest/CAnomalyJobTest.h @@ -17,8 +17,6 @@ class CAnomalyJobTest : public CppUnit::TestFixture { void testOutOfSequence(); void testControlMessages(); void testSkipTimeControlMessage(); - void testOutOfPhase(); - void testBucketSelection(); void testModelPlot(); void testInterimResultEdgeCases(); void testRestoreFailsWithEmptyStream(); diff --git a/lib/api/unittest/CMultiFileDataAdderTest.cc b/lib/api/unittest/CMultiFileDataAdderTest.cc index 2894492db7..8f03070b4e 100644 --- a/lib/api/unittest/CMultiFileDataAdderTest.cc +++ b/lib/api/unittest/CMultiFileDataAdderTest.cc @@ -180,7 +180,7 @@ void CMultiFileDataAdderTest::detectorPersistHelper(const std::string& configFil ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( - BUCKET_SIZE, ml::model_t::E_None, "", BUCKET_SIZE * latencyBuckets, 0, false); + BUCKET_SIZE, ml::model_t::E_None, "", BUCKET_SIZE * latencyBuckets, false); std::string origSnapshotId; std::size_t numOrigDocs(0); diff --git a/lib/api/unittest/CRestorePreviousStateTest.cc b/lib/api/unittest/CRestorePreviousStateTest.cc index ccb8ce0e90..48af5a78f9 100644 --- a/lib/api/unittest/CRestorePreviousStateTest.cc +++ b/lib/api/unittest/CRestorePreviousStateTest.cc @@ -206,7 +206,7 @@ void CRestorePreviousStateTest::anomalyDetectorRestoreHelper(const std::string& ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( - BUCKET_SIZE, ml::model_t::E_None, "", BUCKET_SIZE * latencyBuckets, 0, false); + BUCKET_SIZE, ml::model_t::E_None, "", BUCKET_SIZE * latencyBuckets, false); std::ofstream outputStrm(ml::core::COsFileFuncs::NULL_FILENAME); CPPUNIT_ASSERT(outputStrm.is_open()); diff --git a/lib/api/unittest/CSingleStreamDataAdderTest.cc b/lib/api/unittest/CSingleStreamDataAdderTest.cc index 4f21132877..c873d56704 100644 --- a/lib/api/unittest/CSingleStreamDataAdderTest.cc +++ b/lib/api/unittest/CSingleStreamDataAdderTest.cc @@ -120,7 +120,7 @@ void CSingleStreamDataAdderTest::detectorPersistHelper(const std::string& config ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( - BUCKET_SIZE, ml::model_t::E_None, "", BUCKET_SIZE * latencyBuckets, 0, false); + BUCKET_SIZE, ml::model_t::E_None, "", BUCKET_SIZE * latencyBuckets, false); ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); ml::api::CJsonOutputWriter outputWriter(JOB_ID, wrappedOutputStream); diff --git a/lib/api/unittest/CStringStoreTest.cc b/lib/api/unittest/CStringStoreTest.cc index 35217b29d9..165561abb5 100644 --- a/lib/api/unittest/CStringStoreTest.cc +++ b/lib/api/unittest/CStringStoreTest.cc @@ -128,7 +128,6 @@ void CStringStoreTest::testPersonStringPruning() { model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SPAN); modelConfig.decayRate(0.001); - modelConfig.bucketResultsDelay(2); model::CLimits limits; @@ -153,9 +152,6 @@ void CStringStoreTest::testPersonStringPruning() { api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); - // There will be one anomaly in this batch, which will be stuck in the - // results queue. - time = playData(time, BUCKET_SPAN, 100, 3, 2, 99, job); wrappedOutputStream.syncFlush(); @@ -211,9 +207,8 @@ void CStringStoreTest::testPersonStringPruning() { CPPUNIT_ASSERT_EQUAL(std::size_t(0), model::CStringStore::influencers().m_Strings.size()); - // "", "count", "max", "notes", "composer", "instrument", "Elgar", "Holst", "Delius", "flute", "tuba" + // "", "count", "notes", "composer", "instrument", "Elgar", "Holst", "Delius", "flute", "tuba" CPPUNIT_ASSERT(this->nameExists("count")); - CPPUNIT_ASSERT(this->nameExists("max")); CPPUNIT_ASSERT(this->nameExists("notes")); CPPUNIT_ASSERT(this->nameExists("composer")); CPPUNIT_ASSERT(this->nameExists("instrument")); @@ -256,7 +251,6 @@ void CStringStoreTest::testPersonStringPruning() { // While the 3 composers from the second partition should have been culled in the prune, // their names still exist in the first partition, so will still be in the string store CPPUNIT_ASSERT(this->nameExists("count")); - CPPUNIT_ASSERT(this->nameExists("max")); CPPUNIT_ASSERT(this->nameExists("notes")); CPPUNIT_ASSERT(this->nameExists("composer")); CPPUNIT_ASSERT(this->nameExists("instrument")); @@ -298,7 +292,6 @@ void CStringStoreTest::testPersonStringPruning() { // One composer should have been culled! CPPUNIT_ASSERT(this->nameExists("count")); - CPPUNIT_ASSERT(this->nameExists("max")); CPPUNIT_ASSERT(this->nameExists("notes")); CPPUNIT_ASSERT(this->nameExists("composer")); CPPUNIT_ASSERT(this->nameExists("instrument")); @@ -326,7 +319,6 @@ void CStringStoreTest::testAttributeStringPruning() { model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SPAN); modelConfig.decayRate(0.001); - modelConfig.bucketResultsDelay(2); model::CLimits limits; @@ -350,9 +342,6 @@ void CStringStoreTest::testAttributeStringPruning() { api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); - // There will be one anomaly in this batch, which will be stuck in the - // results queue. - time = playData(time, BUCKET_SPAN, 100, 3, 2, 99, job); wrappedOutputStream.syncFlush(); CPPUNIT_ASSERT_EQUAL(std::size_t(0), @@ -409,7 +398,6 @@ void CStringStoreTest::testAttributeStringPruning() { // "", "count", "distinct_count", "notes", "composer", "instrument", "Elgar", "Holst", "Delius", "flute", "tuba" CPPUNIT_ASSERT(this->nameExists("count")); - CPPUNIT_ASSERT(this->nameExists("distinct_count")); CPPUNIT_ASSERT(this->nameExists("notes")); CPPUNIT_ASSERT(this->nameExists("composer")); CPPUNIT_ASSERT(this->nameExists("instrument")); @@ -453,7 +441,6 @@ void CStringStoreTest::testAttributeStringPruning() { // While the 3 composers from the second partition should have been culled in the prune, // their names still exist in the first partition, so will still be in the string store CPPUNIT_ASSERT(this->nameExists("count")); - CPPUNIT_ASSERT(this->nameExists("distinct_count")); CPPUNIT_ASSERT(this->nameExists("notes")); CPPUNIT_ASSERT(this->nameExists("composer")); CPPUNIT_ASSERT(this->nameExists("instrument")); @@ -496,7 +483,6 @@ void CStringStoreTest::testAttributeStringPruning() { // One composer should have been culled! CPPUNIT_ASSERT(this->nameExists("count")); - CPPUNIT_ASSERT(this->nameExists("distinct_count")); CPPUNIT_ASSERT(this->nameExists("notes")); CPPUNIT_ASSERT(this->nameExists("composer")); CPPUNIT_ASSERT(this->nameExists("instrument")); @@ -522,7 +508,6 @@ void CStringStoreTest::testInfluencerStringPruning() { model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SPAN); - modelConfig.bucketResultsDelay(2); model::CLimits limits; diff --git a/lib/model/CAnomalyDetector.cc b/lib/model/CAnomalyDetector.cc index c4928e0f85..7147e7fdc7 100644 --- a/lib/model/CAnomalyDetector.cc +++ b/lib/model/CAnomalyDetector.cc @@ -367,9 +367,6 @@ void CAnomalyDetector::buildResults(core_t::TTime bucketStartTime, core_t::TTime bucketEndTime, CHierarchicalResults& results) { core_t::TTime bucketLength = m_ModelConfig.bucketLength(); - if (m_ModelConfig.bucketResultsDelay()) { - bucketLength /= 2; - } bucketStartTime = maths::CIntegerTools::floor(bucketStartTime, bucketLength); bucketEndTime = maths::CIntegerTools::floor(bucketEndTime, bucketLength); if (bucketEndTime <= m_LastBucketEndTime) { diff --git a/lib/model/CAnomalyDetectorModelConfig.cc b/lib/model/CAnomalyDetectorModelConfig.cc index a9aaed9523..6a02a8857b 100644 --- a/lib/model/CAnomalyDetectorModelConfig.cc +++ b/lib/model/CAnomalyDetectorModelConfig.cc @@ -59,7 +59,6 @@ const std::size_t CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_NO_LA const std::size_t CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_WITH_LATENCY(10); const double CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_QUEUE_GROWTH_FACTOR(0.1); const core_t::TTime CAnomalyDetectorModelConfig::STANDARD_BUCKET_LENGTH(1800); -const std::size_t CAnomalyDetectorModelConfig::DEFAULT_BUCKET_RESULTS_DELAY(0); const double CAnomalyDetectorModelConfig::DEFAULT_DECAY_RATE(0.0005); const double CAnomalyDetectorModelConfig::DEFAULT_INITIAL_DECAY_RATE_MULTIPLIER(4.0); const double CAnomalyDetectorModelConfig::DEFAULT_LEARN_RATE(1.0); @@ -105,7 +104,6 @@ CAnomalyDetectorModelConfig::defaultConfig(core_t::TTime bucketLength, model_t::ESummaryMode summaryMode, const std::string& summaryCountFieldName, core_t::TTime latency, - std::size_t bucketResultsDelay, bool multivariateByFields) { bucketLength = detail::validateBucketLength(bucketLength); @@ -117,7 +115,6 @@ CAnomalyDetectorModelConfig::defaultConfig(core_t::TTime bucketLength, params.s_DecayRate = decayRate; params.s_ExcludeFrequent = model_t::E_XF_None; params.configureLatency(latency, bucketLength); - params.s_BucketResultsDelay = bucketResultsDelay; TInterimBucketCorrectorPtr interimBucketCorrector = std::make_shared(bucketLength); @@ -139,7 +136,6 @@ CAnomalyDetectorModelConfig::defaultConfig(core_t::TTime bucketLength, CAnomalyDetectorModelConfig result; result.bucketLength(bucketLength); - result.bucketResultsDelay(bucketResultsDelay); result.interimBucketCorrector(interimBucketCorrector); result.multivariateByFields(multivariateByFields); result.factories(factories); @@ -164,9 +160,8 @@ double CAnomalyDetectorModelConfig::trendDecayRate(double modelDecayRate, } CAnomalyDetectorModelConfig::CAnomalyDetectorModelConfig() - : m_BucketLength(STANDARD_BUCKET_LENGTH), - m_BucketResultsDelay(DEFAULT_BUCKET_RESULTS_DELAY), - m_MultivariateByFields(false), m_ModelPlotBoundsPercentile(-1.0), + : m_BucketLength(STANDARD_BUCKET_LENGTH), m_MultivariateByFields(false), + m_ModelPlotBoundsPercentile(-1.0), m_MaximumAnomalousProbability(DEFAULT_MAXIMUM_ANOMALOUS_PROBABILITY), m_NoisePercentile(DEFAULT_NOISE_PERCENTILE), m_NoiseMultiplier(DEFAULT_NOISE_MULTIPLIER), @@ -187,10 +182,6 @@ void CAnomalyDetectorModelConfig::bucketLength(core_t::TTime length) { } } -void CAnomalyDetectorModelConfig::bucketResultsDelay(std::size_t delay) { - m_BucketResultsDelay = delay; -} - void CAnomalyDetectorModelConfig::interimBucketCorrector(const TInterimBucketCorrectorPtr& interimBucketCorrector) { m_InterimBucketCorrector = interimBucketCorrector; for (auto& factory : m_Factories) { @@ -626,7 +617,6 @@ CAnomalyDetectorModelConfig::factory(int identifier, result->useNull(useNull); result->excludeFrequent(excludeFrequent); result->features(features); - result->bucketResultsDelay(m_BucketResultsDelay); result->multivariateByFields(m_MultivariateByFields); TIntDetectionRuleVecUMapCItr rulesItr = m_DetectionRules.get().find(identifier); if (rulesItr != m_DetectionRules.get().end()) { @@ -659,10 +649,6 @@ std::size_t CAnomalyDetectorModelConfig::latencyBuckets() const { return m_Factories.begin()->second->modelParams().s_LatencyBuckets; } -std::size_t CAnomalyDetectorModelConfig::bucketResultsDelay() const { - return m_BucketResultsDelay; -} - const CInterimBucketCorrector& CAnomalyDetectorModelConfig::interimBucketCorrector() const { return *m_InterimBucketCorrector; } diff --git a/lib/model/CCountingModel.cc b/lib/model/CCountingModel.cc index 25ec6bec6c..a3c9138cab 100644 --- a/lib/model/CCountingModel.cc +++ b/lib/model/CCountingModel.cc @@ -171,12 +171,6 @@ void CCountingModel::currentBucketPersonIds(core_t::TTime time, TSizeVec& result result.assign(people.begin(), people.end()); } -void CCountingModel::sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor& resourceMonitor) { - this->sampleBucketStatistics(startTime, endTime, resourceMonitor); -} - void CCountingModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& /*resourceMonitor*/) { diff --git a/lib/model/CCountingModelFactory.cc b/lib/model/CCountingModelFactory.cc index 86e3d0d242..58612de387 100644 --- a/lib/model/CCountingModelFactory.cc +++ b/lib/model/CCountingModelFactory.cc @@ -28,8 +28,8 @@ CCountingModelFactory::CCountingModelFactory(const SModelParams& params, model_t::ESummaryMode summaryMode, const std::string& summaryCountFieldName) : CModelFactory(params, interimBucketCorrector), m_Identifier(), - m_SummaryMode(summaryMode), m_SummaryCountFieldName(summaryCountFieldName), - m_UseNull(false), m_BucketResultsDelay(0) { + m_SummaryMode(summaryMode), + m_SummaryCountFieldName(summaryCountFieldName), m_UseNull(false) { } CCountingModelFactory* CCountingModelFactory::clone() const { @@ -140,10 +140,6 @@ void CCountingModelFactory::features(const TFeatureVec& features) { m_SearchKeyCache.reset(); } -void CCountingModelFactory::bucketResultsDelay(std::size_t bucketResultsDelay) { - m_BucketResultsDelay = bucketResultsDelay; -} - CCountingModelFactory::TStrCRefVec CCountingModelFactory::partitioningFields() const { TStrCRefVec result; result.reserve(2); diff --git a/lib/model/CDataGatherer.cc b/lib/model/CDataGatherer.cc index 38d4249dd5..e17bc7e60d 100644 --- a/lib/model/CDataGatherer.cc +++ b/lib/model/CDataGatherer.cc @@ -191,21 +191,11 @@ CDataGatherer::CDataGatherer(model_t::EAnalysisCategory gathererType, stat_t::E_NumberNewAttributesNotAllowed, stat_t::E_NumberNewAttributesRecycled), m_Population(detail::isPopulation(gathererType)), m_UseNull(key.useNull()) { - // Constructor needs to create 1 bucket gatherer at the startTime - // and possibly 1 bucket gatherer at (startTime + bucketLength / 2). std::sort(m_Features.begin(), m_Features.end()); - core_t::TTime bucketLength = modelParams.s_BucketLength; - this->createBucketGatherer(gathererType, summaryCountFieldName, personFieldName, attributeFieldName, valueFieldName, influenceFieldNames, startTime, sampleCountOverride); - - if (modelParams.s_BucketResultsDelay > 0) { - this->createBucketGatherer(gathererType, summaryCountFieldName, personFieldName, - attributeFieldName, valueFieldName, influenceFieldNames, - startTime + (bucketLength / 2), sampleCountOverride); - } } CDataGatherer::CDataGatherer(model_t::EAnalysisCategory gathererType, @@ -250,9 +240,7 @@ CDataGatherer::CDataGatherer(bool isForPersistence, const CDataGatherer& other) if (!isForPersistence) { LOG_ABORT(<< "This constructor only creates clones for persistence"); } - for (const auto& gatherer : other.m_Gatherers) { - m_Gatherers.emplace_back(gatherer->cloneForPersistence()); - } + m_BucketGatherer.reset(other.m_BucketGatherer->cloneForPersistence()); if (other.m_SampleCounts) { m_SampleCounts.reset(other.m_SampleCounts->cloneForPersistence()); } @@ -278,7 +266,7 @@ bool CDataGatherer::isPopulation() const { } std::string CDataGatherer::description() const { - return m_Gatherers.front()->description(); + return m_BucketGatherer->description(); } std::size_t CDataGatherer::maxDimension() const { @@ -298,27 +286,27 @@ const CSearchKey& CDataGatherer::searchKey() const { } CDataGatherer::TStrVecCItr CDataGatherer::beginInfluencers() const { - return m_Gatherers.front()->beginInfluencers(); + return m_BucketGatherer->beginInfluencers(); } CDataGatherer::TStrVecCItr CDataGatherer::endInfluencers() const { - return m_Gatherers.front()->endInfluencers(); + return m_BucketGatherer->endInfluencers(); } const std::string& CDataGatherer::personFieldName() const { - return m_Gatherers.front()->personFieldName(); + return m_BucketGatherer->personFieldName(); } const std::string& CDataGatherer::attributeFieldName() const { - return m_Gatherers.front()->attributeFieldName(); + return m_BucketGatherer->attributeFieldName(); } const std::string& CDataGatherer::valueFieldName() const { - return m_Gatherers.front()->valueFieldName(); + return m_BucketGatherer->valueFieldName(); } const CDataGatherer::TStrVec& CDataGatherer::fieldsOfInterest() const { - return m_Gatherers.front()->fieldsOfInterest(); + return m_BucketGatherer->fieldsOfInterest(); } std::size_t CDataGatherer::numberByFieldValues() const { @@ -333,7 +321,7 @@ std::size_t CDataGatherer::numberOverFieldValues() const { bool CDataGatherer::processFields(const TStrCPtrVec& fieldValues, CEventData& result, CResourceMonitor& resourceMonitor) { - return m_Gatherers.front()->processFields(fieldValues, result, resourceMonitor); + return m_BucketGatherer->processFields(fieldValues, result, resourceMonitor); } bool CDataGatherer::addArrival(const TStrCPtrVec& fieldValues, @@ -342,31 +330,25 @@ bool CDataGatherer::addArrival(const TStrCPtrVec& fieldValues, // We process fields even if we are in the first partial bucket so that // we add enough extra memory to the resource monitor in order to control // the number of partitions created. - m_Gatherers.front()->processFields(fieldValues, data, resourceMonitor); + m_BucketGatherer->processFields(fieldValues, data, resourceMonitor); core_t::TTime time = data.time(); - if (time < m_Gatherers.front()->earliestBucketStartTime()) { + if (time < m_BucketGatherer->earliestBucketStartTime()) { // Ignore records that are out of the latency window. // Records in an incomplete first bucket will end up here, // but we don't want to model these. return false; } - bool result = true; - for (auto& gatherer : m_Gatherers) { - result &= gatherer->addEventData(data); - } - return result; + return m_BucketGatherer->addEventData(data); } void CDataGatherer::sampleNow(core_t::TTime sampleBucketStart) { - this->chooseBucketGatherer(sampleBucketStart).sampleNow(sampleBucketStart); + m_BucketGatherer->sampleNow(sampleBucketStart); } void CDataGatherer::skipSampleNow(core_t::TTime sampleBucketStart) { - for (auto& gatherer : m_Gatherers) { - gatherer->skipSampleNow(sampleBucketStart); - } + m_BucketGatherer->skipSampleNow(sampleBucketStart); } std::size_t CDataGatherer::numberFeatures() const { @@ -414,7 +396,7 @@ const std::string& CDataGatherer::personName(std::size_t pid, const std::string& } void CDataGatherer::personNonZeroCounts(core_t::TTime time, TSizeUInt64PrVec& result) const { - return this->chooseBucketGatherer(time).personNonZeroCounts(time, result); + return m_BucketGatherer->personNonZeroCounts(time, result); } void CDataGatherer::recyclePeople(const TSizeVec& peopleToRemove) { @@ -422,9 +404,7 @@ void CDataGatherer::recyclePeople(const TSizeVec& peopleToRemove) { return; } - for (auto& gatherer : m_Gatherers) { - gatherer->recyclePeople(peopleToRemove); - } + m_BucketGatherer->recyclePeople(peopleToRemove); if (!this->isPopulation() && m_SampleCounts) { m_SampleCounts->recycle(peopleToRemove); @@ -445,9 +425,7 @@ void CDataGatherer::removePeople(std::size_t lowestPersonToRemove) { m_SampleCounts->remove(lowestPersonToRemove); } - for (auto& gatherer : m_Gatherers) { - gatherer->removePeople(lowestPersonToRemove); - } + m_BucketGatherer->removePeople(lowestPersonToRemove); m_PeopleRegistry.removeNames(lowestPersonToRemove); } @@ -463,7 +441,7 @@ bool CDataGatherer::isPersonActive(std::size_t pid) const { std::size_t CDataGatherer::addPerson(const std::string& person, CResourceMonitor& resourceMonitor, bool& addedPerson) { - return m_PeopleRegistry.addName(person, this->chooseBucketGatherer(0).currentBucketStartTime(), + return m_PeopleRegistry.addName(person, m_BucketGatherer->currentBucketStartTime(), resourceMonitor, addedPerson); } @@ -501,9 +479,7 @@ void CDataGatherer::recycleAttributes(const TSizeVec& attributesToRemove) { m_SampleCounts->recycle(attributesToRemove); } - for (auto& gatherer : m_Gatherers) { - gatherer->recycleAttributes(attributesToRemove); - } + m_BucketGatherer->recycleAttributes(attributesToRemove); m_AttributesRegistry.recycleNames(attributesToRemove, DEFAULT_ATTRIBUTE_NAME); core::CStatistics::instance() @@ -520,9 +496,7 @@ void CDataGatherer::removeAttributes(std::size_t lowestAttributeToRemove) { m_SampleCounts->remove(lowestAttributeToRemove); } - for (auto& gatherer : m_Gatherers) { - gatherer->removeAttributes(lowestAttributeToRemove); - } + m_BucketGatherer->removeAttributes(lowestAttributeToRemove); m_AttributesRegistry.removeNames(lowestAttributeToRemove); } @@ -538,9 +512,9 @@ bool CDataGatherer::isAttributeActive(std::size_t cid) const { std::size_t CDataGatherer::addAttribute(const std::string& attribute, CResourceMonitor& resourceMonitor, bool& addedAttribute) { - return m_AttributesRegistry.addName( - attribute, this->chooseBucketGatherer(0).currentBucketStartTime(), - resourceMonitor, addedAttribute); + return m_AttributesRegistry.addName(attribute, + m_BucketGatherer->currentBucketStartTime(), + resourceMonitor, addedAttribute); } double CDataGatherer::sampleCount(std::size_t id) const { @@ -571,49 +545,41 @@ const CDataGatherer::TSampleCountsPtr& CDataGatherer::sampleCounts() const { return m_SampleCounts; } -// Be careful here! core_t::TTime CDataGatherer::currentBucketStartTime() const { - return m_Gatherers.front()->currentBucketStartTime(); + return m_BucketGatherer->currentBucketStartTime(); } -// Be careful here! void CDataGatherer::currentBucketStartTime(core_t::TTime bucketStart) { - m_Gatherers[0]->currentBucketStartTime(bucketStart); - if (m_Gatherers.size() > 1) { - m_Gatherers[1]->currentBucketStartTime( - bucketStart - (m_Gatherers[1]->bucketLength() / 2)); - } + m_BucketGatherer->currentBucketStartTime(bucketStart); } core_t::TTime CDataGatherer::bucketLength() const { - return m_Gatherers.front()->bucketLength(); + return m_BucketGatherer->bucketLength(); } bool CDataGatherer::dataAvailable(core_t::TTime time) const { - return this->chooseBucketGatherer(time).dataAvailable(time); + return m_BucketGatherer->dataAvailable(time); } bool CDataGatherer::validateSampleTimes(core_t::TTime& startTime, core_t::TTime endTime) const { - return this->chooseBucketGatherer(startTime).validateSampleTimes(startTime, endTime); + return m_BucketGatherer->validateSampleTimes(startTime, endTime); } void CDataGatherer::timeNow(core_t::TTime time) { - for (auto& gatherer : m_Gatherers) { - gatherer->timeNow(time); - } + m_BucketGatherer->timeNow(time); } -std::string CDataGatherer::printCurrentBucket(core_t::TTime time) const { - return this->chooseBucketGatherer(time).printCurrentBucket(); +std::string CDataGatherer::printCurrentBucket() const { + return m_BucketGatherer->printCurrentBucket(); } const CDataGatherer::TSizeSizePrUInt64UMap& CDataGatherer::bucketCounts(core_t::TTime time) const { - return this->chooseBucketGatherer(time).bucketCounts(time); + return m_BucketGatherer->bucketCounts(time); } const CDataGatherer::TSizeSizePrStoredStringPtrPrUInt64UMapVec& CDataGatherer::influencerCounts(core_t::TTime time) const { - return this->chooseBucketGatherer(time).influencerCounts(time); + return m_BucketGatherer->influencerCounts(time); } uint64_t CDataGatherer::checksum() const { @@ -624,9 +590,7 @@ uint64_t CDataGatherer::checksum() const { if (m_SampleCounts) { result = maths::CChecksum::calculate(result, m_SampleCounts->checksum(*this)); } - for (const auto& gatherer : m_Gatherers) { - result = maths::CChecksum::calculate(result, gatherer); - } + result = maths::CChecksum::calculate(result, m_BucketGatherer); LOG_TRACE(<< "checksum = " << result); @@ -639,9 +603,7 @@ void CDataGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) co core::CMemoryDebug::dynamicSize("m_PeopleRegistry", m_PeopleRegistry, mem); core::CMemoryDebug::dynamicSize("m_AttributesRegistry", m_AttributesRegistry, mem); core::CMemoryDebug::dynamicSize("m_SampleCounts", m_SampleCounts, mem); - for (const auto& gatherer : m_Gatherers) { - core::CMemoryDebug::dynamicSize("BucketGatherer", *gatherer, mem); - } + core::CMemoryDebug::dynamicSize("m_BucketGatherer", m_BucketGatherer, mem); } std::size_t CDataGatherer::memoryUsage() const { @@ -649,9 +611,7 @@ std::size_t CDataGatherer::memoryUsage() const { mem += core::CMemory::dynamicSize(m_PeopleRegistry); mem += core::CMemory::dynamicSize(m_AttributesRegistry); mem += core::CMemory::dynamicSize(m_SampleCounts); - for (const auto& gatherer : m_Gatherers) { - mem += core::CMemory::dynamicSize(*gatherer); - } + mem += core::CMemory::dynamicSize(m_BucketGatherer); return mem; } @@ -665,24 +625,18 @@ void CDataGatherer::clear() { if (m_SampleCounts) { m_SampleCounts->clear(); } - for (auto& gatherer : m_Gatherers) { - gatherer->clear(); + if (m_BucketGatherer) { + m_BucketGatherer->clear(); } } bool CDataGatherer::resetBucket(core_t::TTime bucketStart) { - bool result = true; - for (auto& gatherer : m_Gatherers) { - result &= gatherer->resetBucket(bucketStart); - } - return result; + return m_BucketGatherer->resetBucket(bucketStart); } void CDataGatherer::releaseMemory(core_t::TTime samplingCutoffTime) { if (this->isPopulation()) { - for (auto& gatherer : m_Gatherers) { - gatherer->releaseMemory(samplingCutoffTime); - } + m_BucketGatherer->releaseMemory(samplingCutoffTime); } } @@ -802,7 +756,7 @@ bool CDataGatherer::extractMetricFromField(const std::string& fieldName, } core_t::TTime CDataGatherer::earliestBucketStartTime() const { - return m_Gatherers.front()->earliestBucketStartTime(); + return m_BucketGatherer->earliestBucketStartTime(); } bool CDataGatherer::checkInvariants() const { @@ -813,19 +767,6 @@ bool CDataGatherer::checkInvariants() const { return result; } -const CBucketGatherer& CDataGatherer::chooseBucketGatherer(core_t::TTime time) const { - return const_cast(this)->chooseBucketGatherer(time); -} - -CBucketGatherer& CDataGatherer::chooseBucketGatherer(core_t::TTime time) { - core_t::TTime bucketLength = m_Gatherers.front()->bucketLength(); - if ((m_Gatherers.size() > 1) && (time % bucketLength != 0)) { - return *m_Gatherers[1]; - } else { - return *m_Gatherers[0]; - } -} - bool CDataGatherer::acceptRestoreTraverser(const std::string& summaryCountFieldName, const std::string& personFieldName, const std::string& attributeFieldName, @@ -877,23 +818,21 @@ bool CDataGatherer::restoreBucketGatherer(const std::string& summaryCountFieldNa do { const std::string& name = traverser.name(); if (name == CBucketGatherer::EVENTRATE_BUCKET_GATHERER_TAG) { - TBucketGathererPtr gatherer{boost::make_unique( + m_BucketGatherer = boost::make_unique( *this, summaryCountFieldName, personFieldName, attributeFieldName, - valueFieldName, influenceFieldNames, traverser)}; - if (gatherer == nullptr) { - LOG_ERROR(<< "Failed to create gatherer"); + valueFieldName, influenceFieldNames, traverser); + if (m_BucketGatherer == nullptr) { + LOG_ERROR(<< "Failed to create event rate bucket gatherer"); return false; } - m_Gatherers.push_back(std::move(gatherer)); } else if (name == CBucketGatherer::METRIC_BUCKET_GATHERER_TAG) { - TBucketGathererPtr gatherer{boost::make_unique( + m_BucketGatherer = boost::make_unique( *this, summaryCountFieldName, personFieldName, attributeFieldName, - valueFieldName, influenceFieldNames, traverser)}; - if (gatherer == nullptr) { - LOG_ERROR(<< "Failed to create gatherer"); + valueFieldName, influenceFieldNames, traverser); + if (m_BucketGatherer == nullptr) { + LOG_ERROR(<< "Failed to create metric bucket gatherer"); return false; } - m_Gatherers.push_back(std::move(gatherer)); } } while (traverser.next()); @@ -901,20 +840,9 @@ bool CDataGatherer::restoreBucketGatherer(const std::string& summaryCountFieldNa } void CDataGatherer::persistBucketGatherers(core::CStatePersistInserter& inserter) const { - for (const auto& gatherer : m_Gatherers) { - const std::string& tag = gatherer->persistenceTag(); - if (tag == CBucketGatherer::EVENTRATE_BUCKET_GATHERER_TAG) { - const CEventRateBucketGatherer* gatherer_ = - dynamic_cast(gatherer.get()); - inserter.insertLevel(tag, boost::bind(&CEventRateBucketGatherer::acceptPersistInserter, - boost::cref(gatherer_), _1)); - } else if (tag == CBucketGatherer::METRIC_BUCKET_GATHERER_TAG) { - const CMetricBucketGatherer* gatherer_ = - dynamic_cast(gatherer.get()); - inserter.insertLevel(tag, boost::bind(&CMetricBucketGatherer::acceptPersistInserter, - boost::cref(gatherer_), _1)); - } - } + inserter.insertLevel(m_BucketGatherer->persistenceTag(), + boost::bind(&CBucketGatherer::acceptPersistInserter, + m_BucketGatherer.get(), _1)); } void CDataGatherer::createBucketGatherer(model_t::EAnalysisCategory gathererType, @@ -929,17 +857,17 @@ void CDataGatherer::createBucketGatherer(model_t::EAnalysisCategory gathererType case model_t::E_EventRate: case model_t::E_PopulationEventRate: case model_t::E_PeersEventRate: - m_Gatherers.push_back(boost::make_unique( + m_BucketGatherer = boost::make_unique( *this, summaryCountFieldName, personFieldName, attributeFieldName, - valueFieldName, influenceFieldNames, startTime)); + valueFieldName, influenceFieldNames, startTime); break; case model_t::E_Metric: case model_t::E_PopulationMetric: case model_t::E_PeersMetric: m_SampleCounts = boost::make_unique(sampleCountOverride); - m_Gatherers.push_back(boost::make_unique( + m_BucketGatherer = boost::make_unique( *this, summaryCountFieldName, personFieldName, attributeFieldName, - valueFieldName, influenceFieldNames, startTime)); + valueFieldName, influenceFieldNames, startTime); break; } } diff --git a/lib/model/CEventRateModel.cc b/lib/model/CEventRateModel.cc index 7b8938cce5..5beee0f8ff 100644 --- a/lib/model/CEventRateModel.cc +++ b/lib/model/CEventRateModel.cc @@ -351,9 +351,7 @@ bool CEventRateModel::computeProbability(std::size_t pid, } if (pid >= this->firstBucketTimes().size()) { - // This is not necessarily an error: the person might have been added - // only in an out of phase bucket so far - LOG_TRACE(<< "No first time for person = " << gatherer.personName(pid)); + LOG_ERROR(<< "No first time for person = " << gatherer.personName(pid)); return false; } diff --git a/lib/model/CEventRateModelFactory.cc b/lib/model/CEventRateModelFactory.cc index 50e66f35da..f36fd2fda5 100644 --- a/lib/model/CEventRateModelFactory.cc +++ b/lib/model/CEventRateModelFactory.cc @@ -258,10 +258,6 @@ void CEventRateModelFactory::features(const TFeatureVec& features) { m_SearchKeyCache.reset(); } -void CEventRateModelFactory::bucketResultsDelay(std::size_t bucketResultsDelay) { - m_BucketResultsDelay = bucketResultsDelay; -} - double CEventRateModelFactory::minimumSeasonalVarianceScale() const { return 0.4; } diff --git a/lib/model/CEventRatePopulationModelFactory.cc b/lib/model/CEventRatePopulationModelFactory.cc index 714f831e8c..102769d330 100644 --- a/lib/model/CEventRatePopulationModelFactory.cc +++ b/lib/model/CEventRatePopulationModelFactory.cc @@ -257,10 +257,6 @@ void CEventRatePopulationModelFactory::features(const TFeatureVec& features) { m_SearchKeyCache.reset(); } -void CEventRatePopulationModelFactory::bucketResultsDelay(std::size_t bucketResultsDelay) { - m_BucketResultsDelay = bucketResultsDelay; -} - CEventRatePopulationModelFactory::TStrCRefVec CEventRatePopulationModelFactory::partitioningFields() const { TStrCRefVec result; diff --git a/lib/model/CHierarchicalResults.cc b/lib/model/CHierarchicalResults.cc index 099110a33a..f1abb9d5ce 100644 --- a/lib/model/CHierarchicalResults.cc +++ b/lib/model/CHierarchicalResults.cc @@ -10,7 +10,6 @@ #include #include #include -#include #include @@ -32,45 +31,6 @@ namespace { using TNodeCPtr = SNode::TNodeCPtr; -//! CHierarchicalResults tags -const std::string NODES_1_TAG("a"); -const std::string NODES_2_TAG("b"); -const std::string PIVOT_NAME_TAG("c"); -const std::string PIVOT_VALUE_TAG("d"); -const std::string PIVOT_NODES_1_TAG("e"); -const std::string PIVOT_NODES_2_TAG("f"); -const std::string PIVOT_ROOT_NODES_1_TAG("g"); -const std::string PIVOT_ROOT_NODES_2_TAG("h"); - -//! SNode tags -const std::string PARENT_TAG("a"); -const std::string CHILD_TAG("b"); -const std::string SELF_TAG("c"); -const std::string SPEC_TAG("d"); -const std::string ANNOTATED_PROBABILITY_TAG("e"); -const std::string DETECTOR_TAG("f"); -const std::string AGGREGATION_STYLE_TAG("g"); -const std::string SMALLEST_CHILD_TAG("h"); -const std::string SMALLEST_DESCENDANT_TAG("i"); -const std::string RAW_ANOMALY_SCORE_TAG("j"); -const std::string NORMALIZED_ANOMALY_SCORE_TAG("k"); -const std::string BUCKET_START_TAG("l"); -const std::string BUCKET_LENGTH_TAG("m"); - -//! SResultSpec tags -const std::string DETECTOR_ID_TAG("a"); -const std::string SIMPLE_COUNT_TAG("b"); -const std::string POPULATION_TAG("c"); -const std::string PARTITION_FIELD_NAME_TAG("d"); -const std::string PARTITION_FIELD_VALUE_TAG("e"); -const std::string PERSON_FIELD_NAME_TAG("f"); -const std::string PERSON_FIELD_VALUE_TAG("g"); -const std::string VALUE_FIELD_NAME_TAG("h"); -const std::string USE_NULL_TAG("j"); -const std::string BY_FIELD_NAME_TAG("k"); -const std::string FUNCTION_NAME_TAG("i"); -const std::string FUNCTION_TAG("l"); - const std::string COUNT("count"); // This is intentionally NOT an empty string from the string store, but instead // a completely separate empty string, such that its pointer will be different @@ -248,66 +208,6 @@ std::string SResultSpec::print() const { *s_PersonFieldValue + '/' + *s_ValueFieldName + '\''; } -void SResultSpec::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertValue(DETECTOR_ID_TAG, s_Detector); - inserter.insertValue(SIMPLE_COUNT_TAG, s_IsSimpleCount); - inserter.insertValue(POPULATION_TAG, s_IsPopulation); - inserter.insertValue(USE_NULL_TAG, s_UseNull); - core::CPersistUtils::persist(FUNCTION_TAG, s_Function, inserter); - if (!unset(s_PartitionFieldName)) { - inserter.insertValue(PARTITION_FIELD_NAME_TAG, *s_PartitionFieldName); - } - if (!unset(s_PartitionFieldValue)) { - inserter.insertValue(PARTITION_FIELD_VALUE_TAG, *s_PartitionFieldValue); - } - if (!unset(s_PersonFieldName)) { - inserter.insertValue(PERSON_FIELD_NAME_TAG, *s_PersonFieldName); - } - if (!unset(s_PersonFieldValue)) { - inserter.insertValue(PERSON_FIELD_VALUE_TAG, *s_PersonFieldValue); - } - if (!unset(s_ValueFieldName)) { - inserter.insertValue(VALUE_FIELD_NAME_TAG, *s_ValueFieldName); - } - if (!unset(s_FunctionName)) { - inserter.insertValue(FUNCTION_NAME_TAG, *s_FunctionName); - } - if (!unset(s_ByFieldName)) { - inserter.insertValue(BY_FIELD_NAME_TAG, *s_ByFieldName); - } -} - -bool SResultSpec::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { - do { - const std::string& name = traverser.name(); - RESTORE_BUILT_IN(DETECTOR_ID_TAG, s_Detector) - RESTORE_BUILT_IN(SIMPLE_COUNT_TAG, s_IsSimpleCount) - RESTORE_BUILT_IN(POPULATION_TAG, s_IsPopulation) - RESTORE_BUILT_IN(USE_NULL_TAG, s_UseNull) - RESTORE_SETUP_TEARDOWN(FUNCTION_TAG, int f = 0, - core::CPersistUtils::restore(FUNCTION_TAG, f, traverser), - s_Function = function_t::EFunction(f)) - RESTORE_NO_ERROR( - PARTITION_FIELD_NAME_TAG, - s_PartitionFieldName = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR( - PARTITION_FIELD_VALUE_TAG, - s_PartitionFieldValue = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(PERSON_FIELD_NAME_TAG, - s_PersonFieldName = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR( - PERSON_FIELD_VALUE_TAG, - s_PersonFieldValue = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(VALUE_FIELD_NAME_TAG, - s_ValueFieldName = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(FUNCTION_NAME_TAG, - s_FunctionName = CStringStore::names().get(traverser.value())) - RESTORE_NO_ERROR(BY_FIELD_NAME_TAG, - s_ByFieldName = CStringStore::names().get(traverser.value())) - } while (traverser.next()); - return true; -} - SNode::SNode() : s_Parent(nullptr), s_AnnotatedProbability(1.0), s_Detector(-3), s_AggregationStyle(-1), s_SmallestChildProbability(1.0), @@ -390,100 +290,6 @@ void SNode::swap(SNode& other) { std::swap(s_BucketLength, other.s_BucketLength); } -void SNode::acceptPersistInserter1(core::CStatePersistInserter& inserter, - TNodePtrSizeUMap& nodePointers) const { - std::size_t index = nodePointers.emplace(this, nodePointers.size()).first->second; - inserter.insertValue(SELF_TAG, index); - core::CPersistUtils::persist(SPEC_TAG, s_Spec, inserter); - core::CPersistUtils::persist(ANNOTATED_PROBABILITY_TAG, s_AnnotatedProbability, inserter); - inserter.insertValue(DETECTOR_TAG, s_Detector); - inserter.insertValue(AGGREGATION_STYLE_TAG, s_AggregationStyle); - inserter.insertValue(SMALLEST_CHILD_TAG, s_SmallestChildProbability); - inserter.insertValue(SMALLEST_DESCENDANT_TAG, s_SmallestDescendantProbability); - inserter.insertValue(RAW_ANOMALY_SCORE_TAG, s_RawAnomalyScore); - inserter.insertValue(NORMALIZED_ANOMALY_SCORE_TAG, s_NormalizedAnomalyScore); - inserter.insertValue(BUCKET_START_TAG, s_BucketStartTime); - inserter.insertValue(BUCKET_LENGTH_TAG, s_BucketLength); -} - -void SNode::acceptPersistInserter2(core::CStatePersistInserter& inserter, - const TNodePtrSizeUMap& nodePointers) const { - if (s_Parent != nullptr) { - auto found = nodePointers.find(s_Parent); - if (found == nodePointers.end()) { - LOG_ERROR(<< "Parent not in persistence hierarchy!"); - return; - } - core::CPersistUtils::persist(PARENT_TAG, found->second, inserter); - } - - for (const auto& child : s_Children) { - auto found = nodePointers.find(child); - if (found == nodePointers.end()) { - LOG_ERROR(<< "Child not in persistence hierarchy!"); - return; - } - core::CPersistUtils::persist(CHILD_TAG, found->second, inserter); - } -} - -bool SNode::acceptRestoreTraverser1(core::CStateRestoreTraverser& traverser, - TSizeNodePtrUMap& nodePointers) { - do { - const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN(SELF_TAG, std::size_t index = 0, - core::CStringUtils::stringToType(traverser.value(), index), - nodePointers.insert(std::make_pair(index, this))) - RESTORE(SPEC_TAG, core::CPersistUtils::restore(SPEC_TAG, s_Spec, traverser)) - RESTORE(ANNOTATED_PROBABILITY_TAG, - core::CPersistUtils::restore(ANNOTATED_PROBABILITY_TAG, - s_AnnotatedProbability, traverser)) - RESTORE_BUILT_IN(DETECTOR_TAG, s_Detector); - RESTORE_BUILT_IN(AGGREGATION_STYLE_TAG, s_AggregationStyle); - RESTORE_BUILT_IN(SMALLEST_CHILD_TAG, s_SmallestChildProbability) - RESTORE_BUILT_IN(SMALLEST_DESCENDANT_TAG, s_SmallestDescendantProbability) - RESTORE_BUILT_IN(RAW_ANOMALY_SCORE_TAG, s_RawAnomalyScore) - RESTORE_BUILT_IN(NORMALIZED_ANOMALY_SCORE_TAG, s_NormalizedAnomalyScore) - RESTORE_BUILT_IN(BUCKET_START_TAG, s_BucketStartTime) - RESTORE_BUILT_IN(BUCKET_LENGTH_TAG, s_BucketLength) - } while (traverser.next()); - return true; -} - -bool SNode::acceptRestoreTraverser2(core::CStateRestoreTraverser& traverser, - const TSizeNodePtrUMap& nodePointers) { - do { - const std::string& name = traverser.name(); - std::size_t index = 0; - if (name == PARENT_TAG) { - if (!core::CPersistUtils::restore(PARENT_TAG, index, traverser)) { - LOG_ERROR(<< "Restore error for " << traverser.name() << " / " - << traverser.value()); - return false; - } - auto found = nodePointers.find(index); - if (found == nodePointers.end()) { - LOG_ERROR(<< "Parent not in persistence hierarchy!"); - return false; - } - s_Parent = found->second; - } else if (name == CHILD_TAG) { - if (!core::CPersistUtils::restore(CHILD_TAG, index, traverser)) { - LOG_ERROR(<< "Restore error for " << traverser.name() << " / " - << traverser.value()); - return false; - } - auto found = nodePointers.find(index); - if (found == nodePointers.end()) { - LOG_ERROR(<< "Parent not in persistence hierarchy!"); - return false; - } - s_Children.push_back(found->second); - } - } while (traverser.next()); - return true; -} - void swap(SNode& node1, SNode& node2) { node1.swap(node2); } @@ -733,177 +539,6 @@ model_t::CResultType CHierarchicalResults::resultType() const { return m_ResultType; } -void CHierarchicalResults::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - using TStoredStringPtrNodeMapCItr = TStoredStringPtrNodeMap::const_iterator; - using TStoredStringPtrNodeMapCItrVec = std::vector; - using TStoredStringPtrStoredStringPtrPrNodeMapCItr = - TStoredStringPtrStoredStringPtrPrNodeMap::const_iterator; - using TStoredStringPtrStoredStringPtrPrNodeMapCItrVec = - std::vector; - - TNodePtrSizeUMap nodePointers; - - for (const auto& node : m_Nodes) { - inserter.insertLevel(NODES_1_TAG, boost::bind(&SNode::acceptPersistInserter1, - boost::cref(node), _1, - boost::ref(nodePointers))); - } - - // Sort the keys by *value* order to ensure consistent persist state. - TStoredStringPtrStoredStringPtrPrNodeMapCItrVec pivotIterators; - pivotIterators.reserve(m_PivotNodes.size()); - for (auto i = m_PivotNodes.begin(); i != m_PivotNodes.end(); ++i) { - pivotIterators.push_back(i); - } - std::sort(pivotIterators.begin(), pivotIterators.end(), - core::CFunctional::SDereference()); - for (auto i : pivotIterators) { - core::CPersistUtils::persist(PIVOT_NAME_TAG, *i->first.first, inserter); - core::CPersistUtils::persist(PIVOT_VALUE_TAG, *i->first.second, inserter); - inserter.insertLevel(PIVOT_NODES_1_TAG, - boost::bind(&SNode::acceptPersistInserter1, - boost::cref(i->second), _1, - boost::ref(nodePointers))); - } - - // Sort the keys by *value* order to ensure consistent persist state. - TStoredStringPtrNodeMapCItrVec pivotRootIterators; - pivotRootIterators.reserve(m_PivotRootNodes.size()); - for (auto i = m_PivotRootNodes.begin(); i != m_PivotRootNodes.end(); ++i) { - pivotRootIterators.push_back(i); - } - std::sort(pivotRootIterators.begin(), pivotRootIterators.end(), - core::CFunctional::SDereference()); - for (auto i : pivotRootIterators) { - core::CPersistUtils::persist(PIVOT_NAME_TAG, *i->first, inserter); - inserter.insertLevel(PIVOT_ROOT_NODES_1_TAG, - boost::bind(&SNode::acceptPersistInserter1, - boost::cref(i->second), _1, - boost::ref(nodePointers))); - } - - for (const auto& node : m_Nodes) { - inserter.insertLevel(NODES_2_TAG, boost::bind(&SNode::acceptPersistInserter2, - boost::cref(node), _1, - boost::cref(nodePointers))); - } - - for (auto i : pivotIterators) { - core::CPersistUtils::persist(PIVOT_NAME_TAG, *i->first.first, inserter); - core::CPersistUtils::persist(PIVOT_VALUE_TAG, *i->first.second, inserter); - inserter.insertLevel(PIVOT_NODES_2_TAG, - boost::bind(&SNode::acceptPersistInserter2, - boost::cref(i->second), _1, - boost::cref(nodePointers))); - } - - for (auto i : pivotRootIterators) { - core::CPersistUtils::persist(PIVOT_NAME_TAG, *i->first, inserter); - inserter.insertLevel(PIVOT_ROOT_NODES_2_TAG, - boost::bind(&SNode::acceptPersistInserter2, - boost::cref(i->second), _1, - boost::cref(nodePointers))); - } -} - -bool CHierarchicalResults::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { - TSizeNodePtrUMap nodePointers; - core::CStoredStringPtr influencerName; - core::CStoredStringPtr influencerValue; - std::size_t nodesFullyRestored = 0; - - do { - const std::string& name = traverser.name(); - RESTORE_SETUP_TEARDOWN( - NODES_1_TAG, m_Nodes.push_back(SNode()), - traverser.traverseSubLevel(boost::bind(&SNode::acceptRestoreTraverser1, - boost::ref(m_Nodes.back()), - _1, boost::ref(nodePointers))), - /**/) - if (name == NODES_2_TAG) { - if (nodesFullyRestored > m_Nodes.size()) { - LOG_ERROR(<< "Invalid restore index for node: " << nodesFullyRestored); - } - if (traverser.traverseSubLevel(boost::bind( - &SNode::acceptRestoreTraverser2, boost::ref(m_Nodes[nodesFullyRestored]), - _1, boost::cref(nodePointers))) == false) { - LOG_ERROR(<< "Failed to restore node"); - return false; - } - ++nodesFullyRestored; - continue; - } - RESTORE_NO_ERROR( - PIVOT_NAME_TAG, - influencerName = CStringStore::influencers().get(traverser.value())) - RESTORE_NO_ERROR( - PIVOT_VALUE_TAG, - influencerValue = CStringStore::influencers().get(traverser.value())) - if (name == PIVOT_NODES_1_TAG) { - if (!influencerName || !influencerValue) { - LOG_ERROR(<< "Invalid influencers for node"); - return false; - } - SNode& node = m_PivotNodes[TStoredStringPtrStoredStringPtrPr(influencerName, influencerValue)]; - if (traverser.traverseSubLevel( - boost::bind(&SNode::acceptRestoreTraverser1, boost::ref(node), - _1, boost::ref(nodePointers))) == false) { - LOG_ERROR(<< "Failed to restore pivot node"); - return false; - } - influencerName = core::CStoredStringPtr(); - influencerValue = core::CStoredStringPtr(); - continue; - } else if (name == PIVOT_NODES_2_TAG) { - if (!influencerName || !influencerValue) { - LOG_ERROR(<< "Invalid influencers for node"); - return false; - } - SNode& node = m_PivotNodes[TStoredStringPtrStoredStringPtrPr(influencerName, influencerValue)]; - if (traverser.traverseSubLevel( - boost::bind(&SNode::acceptRestoreTraverser2, boost::ref(node), - _1, boost::cref(nodePointers))) == false) { - LOG_ERROR(<< "Failed to restore pivot node"); - return false; - } - influencerName = core::CStoredStringPtr(); - influencerValue = core::CStoredStringPtr(); - continue; - } - if (name == PIVOT_ROOT_NODES_1_TAG) { - if (!influencerName) { - LOG_ERROR(<< "Invalid influencer for node"); - return false; - } - SNode& node = m_PivotRootNodes[influencerName]; - if (traverser.traverseSubLevel( - boost::bind(&SNode::acceptRestoreTraverser1, boost::ref(node), - _1, boost::ref(nodePointers))) == false) { - LOG_ERROR(<< "Failed to restore pivot node"); - return false; - } - influencerName = core::CStoredStringPtr(); - continue; - } - if (name == PIVOT_ROOT_NODES_2_TAG) { - if (!influencerName) { - LOG_ERROR(<< "Invalid influencer for node"); - return false; - } - SNode& node = m_PivotRootNodes[influencerName]; - if (traverser.traverseSubLevel( - boost::bind(&SNode::acceptRestoreTraverser2, boost::ref(node), - _1, boost::cref(nodePointers))) == false) { - LOG_ERROR(<< "Failed to restore pivot node"); - return false; - } - influencerName = core::CStoredStringPtr(); - continue; - } - } while (traverser.next()); - return true; -} - std::string CHierarchicalResults::print() const { std::ostringstream ss; for (const auto& node : m_Nodes) { diff --git a/lib/model/CIndividualModel.cc b/lib/model/CIndividualModel.cc index 0c902b9479..39ffb69ca1 100644 --- a/lib/model/CIndividualModel.cc +++ b/lib/model/CIndividualModel.cc @@ -172,21 +172,6 @@ void CIndividualModel::sampleBucketStatistics(core_t::TTime startTime, } } -void CIndividualModel::sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor& resourceMonitor) { - CDataGatherer& gatherer = this->dataGatherer(); - if (!gatherer.dataAvailable(startTime)) { - return; - } - - for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); - time < endTime; time += bucketLength) { - gatherer.sampleNow(time); - this->sampleBucketStatistics(time, time + bucketLength, resourceMonitor); - } -} - void CIndividualModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { diff --git a/lib/model/CMetricModel.cc b/lib/model/CMetricModel.cc index 2d62f13b1f..6c18a734ec 100644 --- a/lib/model/CMetricModel.cc +++ b/lib/model/CMetricModel.cc @@ -271,7 +271,7 @@ void CMetricModel::sample(core_t::TTime startTime, : 1.0; double ceff = emptyBucketWeight * count * this->learnRate(feature); - LOG_TRACE(<< "Bucket = " << gatherer.printCurrentBucket(time) + LOG_TRACE(<< "Bucket = " << gatherer.printCurrentBucket() << ", feature = " << model_t::print(feature) << ", samples = " << core::CContainerPrinter::print(samples) << ", isInteger = " << data_.second.s_IsInteger @@ -338,9 +338,7 @@ bool CMetricModel::computeProbability(const std::size_t pid, } if (pid >= this->firstBucketTimes().size()) { - // This is not necessarily an error: the person might have been added - // only in an out of phase bucket so far - LOG_TRACE(<< "No first time for person = " << gatherer.personName(pid)); + LOG_ERROR(<< "No first time for person = " << gatherer.personName(pid)); return false; } diff --git a/lib/model/CMetricModelFactory.cc b/lib/model/CMetricModelFactory.cc index 54c59b5c89..266fc79bf0 100644 --- a/lib/model/CMetricModelFactory.cc +++ b/lib/model/CMetricModelFactory.cc @@ -256,10 +256,6 @@ void CMetricModelFactory::bucketLength(core_t::TTime bucketLength) { m_BucketLength = bucketLength; } -void CMetricModelFactory::bucketResultsDelay(std::size_t bucketResultsDelay) { - m_BucketResultsDelay = bucketResultsDelay; -} - double CMetricModelFactory::minimumSeasonalVarianceScale() const { return 0.4; } diff --git a/lib/model/CMetricPopulationModelFactory.cc b/lib/model/CMetricPopulationModelFactory.cc index 492f0601a5..b65be7e089 100644 --- a/lib/model/CMetricPopulationModelFactory.cc +++ b/lib/model/CMetricPopulationModelFactory.cc @@ -255,10 +255,6 @@ void CMetricPopulationModelFactory::features(const TFeatureVec& features) { m_SearchKeyCache.reset(); } -void CMetricPopulationModelFactory::bucketResultsDelay(std::size_t bucketResultsDelay) { - m_BucketResultsDelay = bucketResultsDelay; -} - double CMetricPopulationModelFactory::minimumSeasonalVarianceScale() const { return 1.0; } diff --git a/lib/model/CModelParams.cc b/lib/model/CModelParams.cc index 03b447d608..b424c2e9ba 100644 --- a/lib/model/CModelParams.cc +++ b/lib/model/CModelParams.cc @@ -53,8 +53,7 @@ SModelParams::SModelParams(core_t::TTime bucketLength) s_PruneWindowScaleMaximum(CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MAXIMUM), s_DetectionRules(EMPTY_RULES), s_ScheduledEvents(EMPTY_SCHEDULED_EVENTS), s_InfluenceCutoff(CAnomalyDetectorModelConfig::DEFAULT_INFLUENCE_CUTOFF), - s_BucketResultsDelay(0), s_MinimumToFuzzyDeduplicate(10000), - s_CacheProbabilities(true) { + s_MinimumToFuzzyDeduplicate(10000), s_CacheProbabilities(true) { } void SModelParams::configureLatency(core_t::TTime latency, core_t::TTime bucketLength) { diff --git a/lib/model/CModelPlotData.cc b/lib/model/CModelPlotData.cc index 277652d603..83b2188575 100644 --- a/lib/model/CModelPlotData.cc +++ b/lib/model/CModelPlotData.cc @@ -5,28 +5,8 @@ */ #include -#include -#include -#include - -#include - namespace ml { namespace model { -namespace { - -const std::string DATA_PER_FEATURE_TAG("a"); -const std::string TIME_TAG("b"); -const std::string PARTITION_FIELD_NAME_TAG("c"); -const std::string PARTITION_FIELD_VALUE_TAG("d"); -const std::string OVER_FIELD_NAME_TAG("e"); -const std::string BY_FIELD_NAME_TAG("f"); - -const std::string LOWER_BOUND_TAG("a"); -const std::string UPPER_BOUND_TAG("b"); -const std::string MEDIAN_TAG("c"); -const std::string VALUES_PER_OVERFIELD_TAG("d"); -} CModelPlotData::CModelPlotData() : m_Time(0) { } @@ -53,92 +33,6 @@ CModelPlotData::SByFieldData::SByFieldData(double lowerBound, double upperBound, s_ValuesPerOverField() { } -void CModelPlotData::SByFieldData::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - core::CPersistUtils::persist(LOWER_BOUND_TAG, s_LowerBound, inserter); - core::CPersistUtils::persist(UPPER_BOUND_TAG, s_UpperBound, inserter); - core::CPersistUtils::persist(MEDIAN_TAG, s_Median, inserter); - core::CPersistUtils::persist(VALUES_PER_OVERFIELD_TAG, s_ValuesPerOverField, inserter); -} - -bool CModelPlotData::SByFieldData::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { - do { - const std::string& name = traverser.name(); - if (name == LOWER_BOUND_TAG) { - if (!core::CPersistUtils::restore(LOWER_BOUND_TAG, s_LowerBound, traverser)) { - return false; - } - } else if (name == UPPER_BOUND_TAG) { - if (!core::CPersistUtils::restore(UPPER_BOUND_TAG, s_UpperBound, traverser)) { - return false; - } - } else if (name == MEDIAN_TAG) { - if (!core::CPersistUtils::restore(MEDIAN_TAG, s_Median, traverser)) { - return false; - } - } else if (name == VALUES_PER_OVERFIELD_TAG) { - if (!core::CPersistUtils::restore(VALUES_PER_OVERFIELD_TAG, - s_ValuesPerOverField, traverser)) { - return false; - } - } - } while (traverser.next()); - - return true; -} - -void CModelPlotData::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - TIntStrByFieldDataUMapUMap data(m_DataPerFeature.begin(), m_DataPerFeature.end()); - core::CPersistUtils::persist(DATA_PER_FEATURE_TAG, data, inserter); - core::CPersistUtils::persist(TIME_TAG, m_Time, inserter); - core::CPersistUtils::persist(PARTITION_FIELD_NAME_TAG, m_PartitionFieldName, inserter); - core::CPersistUtils::persist(PARTITION_FIELD_VALUE_TAG, m_PartitionFieldValue, inserter); - core::CPersistUtils::persist(OVER_FIELD_NAME_TAG, m_OverFieldName, inserter); - core::CPersistUtils::persist(BY_FIELD_NAME_TAG, m_ByFieldName, inserter); -} - -bool CModelPlotData::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { - do { - const std::string& name = traverser.name(); - if (name == DATA_PER_FEATURE_TAG) { - TIntStrByFieldDataUMapUMap data; - if (!core::CPersistUtils::restore(DATA_PER_FEATURE_TAG, data, traverser)) { - return false; - } - m_DataPerFeature.clear(); - - for (TIntStrByFieldDataUMapUMap::const_iterator i = data.begin(); - i != data.end(); ++i) { - m_DataPerFeature.insert(TFeatureStrByFieldDataUMapPr( - model_t::EFeature(i->first), i->second)); - } - } else if (name == TIME_TAG) { - if (!core::CPersistUtils::restore(TIME_TAG, m_Time, traverser)) { - return false; - } - } else if (name == PARTITION_FIELD_NAME_TAG) { - if (!core::CPersistUtils::restore(PARTITION_FIELD_NAME_TAG, - m_PartitionFieldName, traverser)) { - return false; - } - } else if (name == PARTITION_FIELD_VALUE_TAG) { - if (!core::CPersistUtils::restore(PARTITION_FIELD_VALUE_TAG, - m_PartitionFieldValue, traverser)) { - return false; - } - } else if (name == OVER_FIELD_NAME_TAG) { - if (!core::CPersistUtils::restore(OVER_FIELD_NAME_TAG, m_OverFieldName, traverser)) { - return false; - } - } else if (name == BY_FIELD_NAME_TAG) { - if (!core::CPersistUtils::restore(BY_FIELD_NAME_TAG, m_ByFieldName, traverser)) { - return false; - } - } - } while (traverser.next()); - - return true; -} - const std::string& CModelPlotData::partitionFieldName() const { return m_PartitionFieldName; } diff --git a/lib/model/CPopulationModel.cc b/lib/model/CPopulationModel.cc index f048da5699..0a29d97153 100644 --- a/lib/model/CPopulationModel.cc +++ b/lib/model/CPopulationModel.cc @@ -161,22 +161,6 @@ void CPopulationModel::currentBucketPersonIds(core_t::TTime time, TSizeVec& resu } } -void CPopulationModel::sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor& resourceMonitor) { - CDataGatherer& gatherer = this->dataGatherer(); - - if (!gatherer.dataAvailable(startTime)) { - return; - } - - for (core_t::TTime time = startTime, bucketLength = gatherer.bucketLength(); - time < endTime; time += bucketLength) { - gatherer.sampleNow(time); - this->sampleBucketStatistics(time, time + bucketLength, resourceMonitor); - } -} - void CPopulationModel::sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { diff --git a/lib/model/CResultsQueue.cc b/lib/model/CResultsQueue.cc deleted file mode 100644 index 99e945e387..0000000000 --- a/lib/model/CResultsQueue.cc +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -#include - -#include -#include - -#include - -namespace ml { -namespace model { - -namespace { -const std::string RESULTS_TAG("a"); -const std::string LAST_RESULTS_INDEX_TAG("b"); -const std::string INITIALISATION_TIME_TAG("c"); -} - -CResultsQueue::CResultsQueue(std::size_t delayBuckets, core_t::TTime bucketLength) - : m_Results(delayBuckets, bucketLength, 0), m_LastResultsIndex(2) { -} - -void CResultsQueue::push(const CHierarchicalResults& result, core_t::TTime time) { - if (m_Results.latestBucketEnd() + 1 - m_Results.bucketLength() == 0) { - m_Results.reset(time - m_Results.bucketLength()); - LOG_TRACE(<< "Resetting results queue. Queue's latestBucketEnd is " - << m_Results.latestBucketEnd()); - } - m_Results.push(result, time); -} - -void CResultsQueue::push(const CHierarchicalResults& result) { - m_Results.push(result); -} - -const CHierarchicalResults& CResultsQueue::get(core_t::TTime time) const { - return m_Results.get(time); -} - -CHierarchicalResults& CResultsQueue::get(core_t::TTime time) { - return m_Results.get(time); -} - -CHierarchicalResults& CResultsQueue::latest() { - return m_Results.latest(); -} - -core_t::TTime CResultsQueue::latestBucketEnd() const { - return m_Results.latestBucketEnd(); -} - -std::size_t CResultsQueue::size() const { - return m_Results.size(); -} - -void CResultsQueue::reset(core_t::TTime time) { - m_Results.reset(time); - m_LastResultsIndex = m_Results.size() - 1; -} - -bool CResultsQueue::hasInterimResults() const { - return m_Results.size() > 2 && m_LastResultsIndex == 0; -} - -core_t::TTime CResultsQueue::chooseResultTime(core_t::TTime bucketStartTime, - core_t::TTime bucketLength, - model::CHierarchicalResults& results) { - if (m_Results.size() == 1) { - return bucketStartTime; - } - - // Select the correct bucket to use - LOG_TRACE(<< "Asking for queue items at " << (bucketStartTime - bucketLength) - << " and " << (bucketStartTime - (bucketLength / 2))); - - core_t::TTime resultsTime = 0; - const model::CHierarchicalResults::TNode* node = - m_Results.get(bucketStartTime - bucketLength).root(); - double r1 = 0.0; - if (node) { - r1 = node->s_NormalizedAnomalyScore; - } - node = m_Results.get(bucketStartTime - (bucketLength / 2)).root(); - double r2 = 0.0; - if (node) { - r2 = node->s_NormalizedAnomalyScore; - } - double r3 = 0.0; - if (results.root()) { - r3 = results.root()->s_NormalizedAnomalyScore; - } - - LOG_TRACE(<< "Testing results " << r1 << ", " << r2 << ", " << r3); - - if (m_LastResultsIndex == 0) { - // With 3 clear buckets to look at, start choosing - if ((r3 > r2) && (r3 > r1)) { - // We want this guy, so choose r1 so that he can be selected next time - resultsTime = bucketStartTime - bucketLength; - m_LastResultsIndex = 2; - } else { - // Pick the bigger of 1 / 2 - if (r2 > r1) { - resultsTime = bucketStartTime - (bucketLength / 2); - m_LastResultsIndex = 3; - } else { - resultsTime = bucketStartTime - bucketLength; - m_LastResultsIndex = 2; - } - } - } - --m_LastResultsIndex; - return resultsTime; -} - -void CResultsQueue::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - core_t::TTime initialisationTime = m_Results.latestBucketEnd() + 1 - - m_Results.bucketLength(); - core::CPersistUtils::persist(INITIALISATION_TIME_TAG, initialisationTime, inserter); - core::CPersistUtils::persist(RESULTS_TAG, m_Results, inserter); - core::CPersistUtils::persist(LAST_RESULTS_INDEX_TAG, m_LastResultsIndex, inserter); -} - -bool CResultsQueue::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { - do { - const std::string& name = traverser.name(); - if (name == RESULTS_TAG) { - if (!core::CPersistUtils::restore(RESULTS_TAG, m_Results, traverser)) { - return false; - } - } else if (name == LAST_RESULTS_INDEX_TAG) { - if (!core::CPersistUtils::restore(LAST_RESULTS_INDEX_TAG, - m_LastResultsIndex, traverser)) { - return false; - } - } else if (name == INITIALISATION_TIME_TAG) { - core_t::TTime initialisationTime = 0; - if (!core::CPersistUtils::restore(INITIALISATION_TIME_TAG, - initialisationTime, traverser)) { - return false; - } - m_Results.reset(initialisationTime); - } - } while (traverser.next()); - return true; -} - -} // model -} // ml diff --git a/lib/model/Makefile b/lib/model/Makefile index 6b097207c5..d1abb569fb 100644 --- a/lib/model/Makefile +++ b/lib/model/Makefile @@ -64,7 +64,6 @@ CPartitioningFields.cc \ CPopulationModel.cc \ CProbabilityAndInfluenceCalculator.cc \ CResourceMonitor.cc \ -CResultsQueue.cc \ CRuleCondition.cc \ CRuleScope.cc \ CSample.cc \ diff --git a/lib/model/unittest/CEventRateDataGathererTest.cc b/lib/model/unittest/CEventRateDataGathererTest.cc index 86c63b1385..b957e763a7 100644 --- a/lib/model/unittest/CEventRateDataGathererTest.cc +++ b/lib/model/unittest/CEventRateDataGathererTest.cc @@ -1529,7 +1529,6 @@ void CEventRateDataGathererTest::testInfluencerBucketStatistics() { void CEventRateDataGathererTest::testDistinctStrings() { using TStoredStringPtrVec = std::vector; - TSizeSizePr pair(0, 0); // Test the SUniqueStringFeatureData struct { diff --git a/lib/model/unittest/Mocks.cc b/lib/model/unittest/Mocks.cc index 7fddaadb53..19e818bd22 100644 --- a/lib/model/unittest/Mocks.cc +++ b/lib/model/unittest/Mocks.cc @@ -91,11 +91,6 @@ void CMockModel::sample(core_t::TTime /*startTime*/, CResourceMonitor& /*resourceMonitor*/) { } -void CMockModel::sampleOutOfPhase(core_t::TTime /*startTime*/, - core_t::TTime /*endTime*/, - CResourceMonitor& /*resourceMonitor*/) { -} - void CMockModel::prune(std::size_t /*maximumAge*/) { } diff --git a/lib/model/unittest/Mocks.h b/lib/model/unittest/Mocks.h index 2801c97573..8d2e501e5e 100644 --- a/lib/model/unittest/Mocks.h +++ b/lib/model/unittest/Mocks.h @@ -68,10 +68,6 @@ class CMockModel : public CAnomalyDetectorModel { virtual void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor); - virtual void sampleOutOfPhase(core_t::TTime startTime, - core_t::TTime endTime, - CResourceMonitor& resourceMonitor); - virtual void prune(std::size_t maximumAge); virtual bool computeProbability(std::size_t pid,