From 78a91a823dece33c749b6b014b1fa6343dddaa43 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 1 Dec 2020 16:14:07 +0000 Subject: [PATCH 1/3] [ML] Init AD model config from JSON config Initialise CAnomalyDetectorModelConfig with values from the JSON config file parsed by CAnomalyJobConfig. Relates to #1253 --- bin/autodetect/CCmdLineParser.cc | 16 ------- bin/autodetect/CCmdLineParser.h | 6 +-- bin/autodetect/Main.cc | 24 +++++----- include/api/CAnomalyJobConfig.h | 31 +++++++++---- lib/api/CAnomalyJobConfig.cc | 48 ++++++++++++------- lib/api/unittest/CAnomalyJobConfigTest.cc | 56 ++++++----------------- 6 files changed, 84 insertions(+), 97 deletions(-) diff --git a/bin/autodetect/CCmdLineParser.cc b/bin/autodetect/CCmdLineParser.cc index b7b69a780d..04b2c3e83b 100644 --- a/bin/autodetect/CCmdLineParser.cc +++ b/bin/autodetect/CCmdLineParser.cc @@ -29,9 +29,6 @@ bool CCmdLineParser::parse(int argc, std::string& modelPlotConfigFile, std::string& logProperties, std::string& logPipe, - core_t::TTime& bucketSpan, - core_t::TTime& latency, - std::string& summaryCountFieldName, char& delimiter, bool& lengthEncodedInput, std::string& timeField, @@ -53,7 +50,6 @@ bool CCmdLineParser::parse(int argc, bool& isPersistInForeground, std::size_t& maxAnomalyRecords, bool& memoryUsage, - bool& multivariateByFields, bool& stopCategorizationOnWarnStatus, TStrVec& clauseTokens) { try { @@ -177,15 +173,6 @@ bool CCmdLineParser::parse(int argc, if (vm.count("logPipe") > 0) { logPipe = vm["logPipe"].as(); } - if (vm.count("bucketspan") > 0) { - bucketSpan = vm["bucketspan"].as(); - } - if (vm.count("latency") > 0) { - latency = vm["latency"].as(); - } - if (vm.count("summarycountfield") > 0) { - summaryCountFieldName = vm["summarycountfield"].as(); - } if (vm.count("delimiter") > 0) { delimiter = vm["delimiter"].as(); } @@ -250,9 +237,6 @@ bool CCmdLineParser::parse(int argc, if (vm.count("memoryUsage") > 0) { memoryUsage = true; } - if (vm.count("multivariateByFields") > 0) { - multivariateByFields = true; - } if (vm.count("stopCategorizationOnWarnStatus") > 0) { stopCategorizationOnWarnStatus = true; } diff --git a/bin/autodetect/CCmdLineParser.h b/bin/autodetect/CCmdLineParser.h index 4f57339f79..3bdc5b9f0f 100644 --- a/bin/autodetect/CCmdLineParser.h +++ b/bin/autodetect/CCmdLineParser.h @@ -29,7 +29,7 @@ class CCmdLineParser { using TStrVec = std::vector; public: - //! Parse the arguments and return options if appropriate. Unamed + //! Parse the arguments and return options if appropriate. Unnamed //! options are placed in a vector for further processing/validation //! later on by the api::CFieldConfig class. static bool parse(int argc, @@ -41,9 +41,6 @@ class CCmdLineParser { std::string& modelPlotConfigFile, std::string& logProperties, std::string& logPipe, - core_t::TTime& bucketSpan, - core_t::TTime& latency, - std::string& summaryCountFieldName, char& delimiter, bool& lengthEncodedInput, std::string& timeField, @@ -65,7 +62,6 @@ class CCmdLineParser { bool& isPersistInForeground, std::size_t& maxAnomalyRecords, bool& memoryUsage, - bool& multivariateByFields, bool& stopCategorizationOnWarnStatus, TStrVec& clauseTokens); diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index c4b6a04a96..1610202825 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -91,9 +91,6 @@ int main(int argc, char** argv) { std::string modelPlotConfigFile; std::string logProperties; std::string logPipe; - ml::core_t::TTime bucketSpan{0}; - ml::core_t::TTime latency{0}; - std::string summaryCountFieldName; char delimiter{'\t'}; bool lengthEncodedInput{false}; std::string timeField{ml::api::CAnomalyJob::DEFAULT_TIME_FIELD_NAME}; @@ -116,19 +113,17 @@ int main(int argc, char** argv) { bool isPersistInForeground{false}; std::size_t maxAnomalyRecords{100}; bool memoryUsage{false}; - bool multivariateByFields{false}; bool stopCategorizationOnWarnStatus{false}; TStrVec clauseTokens; if (ml::autodetect::CCmdLineParser::parse( argc, argv, configFile, limitConfigFile, modelConfigFile, fieldConfigFile, - modelPlotConfigFile, logProperties, logPipe, bucketSpan, latency, - summaryCountFieldName, delimiter, lengthEncodedInput, timeField, - timeFormat, quantilesStateFile, deleteStateFiles, persistInterval, + modelPlotConfigFile, logProperties, logPipe, delimiter, lengthEncodedInput, + timeField, timeFormat, quantilesStateFile, deleteStateFiles, persistInterval, bucketPersistInterval, maxQuantileInterval, namedPipeConnectTimeout, inputFileName, isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe, - restoreFileName, isRestoreFileNamedPipe, persistFileName, isPersistFileNamedPipe, - isPersistInForeground, maxAnomalyRecords, memoryUsage, multivariateByFields, - stopCategorizationOnWarnStatus, clauseTokens) == false) { + restoreFileName, isRestoreFileNamedPipe, persistFileName, + isPersistFileNamedPipe, isPersistInForeground, maxAnomalyRecords, + memoryUsage, stopCategorizationOnWarnStatus, clauseTokens) == false) { return EXIT_FAILURE; } @@ -212,12 +207,19 @@ int main(int argc, char** argv) { mutableFields.push_back(ml::api::CFieldDataCategorizer::MLCATEGORY_NAME); } + const ml::api::CAnomalyJobConfig::CAnalysisConfig& analysisConfig = + jobConfig.analysisConfig(); + const std::string& summaryCountFieldName = analysisConfig.summaryCountFieldName(); + ml::core_t::TTime bucketSpan = analysisConfig.bucketSpan(); + ml::core_t::TTime latency = analysisConfig.latency(); + bool multivariateByFields = analysisConfig.multivariateByFields(); + ml::model_t::ESummaryMode summaryMode{ summaryCountFieldName.empty() ? ml::model_t::E_None : ml::model_t::E_Manual}; ml::model::CAnomalyDetectorModelConfig modelConfig{ml::model::CAnomalyDetectorModelConfig::defaultConfig( bucketSpan, summaryMode, summaryCountFieldName, latency, multivariateByFields)}; modelConfig.detectionRules(ml::model::CAnomalyDetectorModelConfig::TIntDetectionRuleVecUMapCRef( - fieldConfig.detectionRules())); + analysisConfig.detectionRules())); modelConfig.scheduledEvents(ml::model::CAnomalyDetectorModelConfig::TStrDetectionRulePrVecCRef( fieldConfig.scheduledEvents())); diff --git a/include/api/CAnomalyJobConfig.h b/include/api/CAnomalyJobConfig.h index 103eed9971..dbd5d7cea5 100644 --- a/include/api/CAnomalyJobConfig.h +++ b/include/api/CAnomalyJobConfig.h @@ -34,6 +34,7 @@ class API_EXPORT CAnomalyJobConfig { static const std::string OVER_FIELD_NAME; static const std::string PARTITION_FIELD_NAME; static const std::string DETECTOR_DESCRIPTION; + static const std::string DETECTOR_INDEX; static const std::string EXCLUDE_FREQUENT; static const std::string CUSTOM_RULES; static const std::string USE_NULL; @@ -42,7 +43,8 @@ class API_EXPORT CAnomalyJobConfig { CDetectorConfig() {} void parse(const rapidjson::Value& detectorConfig, - const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters); + const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters, + CDetectionRulesJsonParser::TDetectionRuleVec& detectionRules); std::string function() const { return m_Function; } std::string fieldName() const { return m_FieldName; } @@ -55,9 +57,6 @@ class API_EXPORT CAnomalyJobConfig { std::string detectorDescription() const { return m_DetectorDescription; } - CDetectionRulesJsonParser::TDetectionRuleVec customRules() const { - return m_CustomRules; - } bool useNull() const { return m_UseNull; } private: @@ -68,7 +67,7 @@ class API_EXPORT CAnomalyJobConfig { std::string m_PartitionFieldName{}; std::string m_ExcludeFrequent{}; std::string m_DetectorDescription{}; - CDetectionRulesJsonParser::TDetectionRuleVec m_CustomRules{}; + std::size_t m_DetectorIndex{}; bool m_UseNull{false}; }; @@ -80,16 +79,21 @@ class API_EXPORT CAnomalyJobConfig { static const std::string DETECTORS; static const std::string INFLUENCERS; static const std::string LATENCY; + static const std::string MULTIVARIATE_BY_FIELDS; static const std::string PER_PARTITION_CATEGORIZATION; static const std::string ENABLED; static const std::string STOP_ON_WARN; static const core_t::TTime DEFAULT_BUCKET_SPAN; + static const core_t::TTime DEFAULT_LATENCY; public: using TStrVec = std::vector; using TDetectorConfigVec = std::vector; + using TIntDetectionRuleVecUMap = + boost::unordered_map; + public: //! Default constructor CAnalysisConfig() {} @@ -121,9 +125,16 @@ class API_EXPORT CAnomalyJobConfig { return m_Detectors; } const TStrVec& influencers() const { return m_Influencers; } - std::string latency() const { return m_Latency; } + core_t::TTime latency() const { return m_Latency; } + + bool multivariateByFields() const { return m_MultivariateByFields; } - static core_t::TTime bucketSpanSeconds(const std::string& bucketSpanString); + const TIntDetectionRuleVecUMap& detectionRules() const { + return m_DetectorRules; + } + + static core_t::TTime durationSeconds(const std::string& durationString, + core_t::TTime defaultDuration); private: core_t::TTime m_BucketSpan{DEFAULT_BUCKET_SPAN}; @@ -134,7 +145,11 @@ class API_EXPORT CAnomalyJobConfig { bool m_PerPartitionCategorizationStopOnWarn{false}; TDetectorConfigVec m_Detectors{}; TStrVec m_Influencers{}; - std::string m_Latency{}; + core_t::TTime m_Latency{}; + bool m_MultivariateByFields{false}; + + //! The detection rules per detector index. + TIntDetectionRuleVecUMap m_DetectorRules; //! The filters per id used by categorical rule conditions. CDetectionRulesJsonParser::TStrPatternSetUMap m_RuleFilters{}; diff --git a/lib/api/CAnomalyJobConfig.cc b/lib/api/CAnomalyJobConfig.cc index 627e4932eb..eeddf6328b 100644 --- a/lib/api/CAnomalyJobConfig.cc +++ b/lib/api/CAnomalyJobConfig.cc @@ -54,8 +54,10 @@ const std::string CAnomalyJobConfig::CAnalysisConfig::PER_PARTITION_CATEGORIZATI const std::string CAnomalyJobConfig::CAnalysisConfig::ENABLED{"enabled"}; const std::string CAnomalyJobConfig::CAnalysisConfig::STOP_ON_WARN{"stop_on_warn"}; const std::string CAnomalyJobConfig::CAnalysisConfig::LATENCY{"latency"}; +const std::string CAnomalyJobConfig::CAnalysisConfig::MULTIVARIATE_BY_FIELDS{"multivariate_by_fields"}; const core_t::TTime CAnomalyJobConfig::CAnalysisConfig::DEFAULT_BUCKET_SPAN{300}; +const core_t::TTime CAnomalyJobConfig::CAnalysisConfig::DEFAULT_LATENCY{0}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION{"function"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FIELD_NAME{"field_name"}; @@ -66,6 +68,7 @@ const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::PARTITION "partition_field_name"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_DESCRIPTION{ "detector_description"}; +const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_INDEX{"detector_index"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::EXCLUDE_FREQUENT{ "exclude_frequent"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::USE_NULL{"use_null"}; @@ -147,6 +150,8 @@ const CAnomalyJobConfigReader DETECTOR_CONFIG_READER{[] { CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_DESCRIPTION, CAnomalyJobConfigReader::E_OptionalParameter); + theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_INDEX, + CAnomalyJobConfigReader::E_RequiredParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::EXCLUDE_FREQUENT, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::USE_NULL, @@ -288,8 +293,9 @@ void CAnomalyJobConfig::CAnalysisConfig::parse(const rapidjson::Value& analysisC // We choose to ignore any errors here parsing the time duration string as // we assume that it has already been validated by ES. In the event that any // error _does_ occur an error is logged and a default value used. - const std::string bucketSpanString{parameters[BUCKET_SPAN].as()}; - m_BucketSpan = CAnomalyJobConfig::CAnalysisConfig::bucketSpanSeconds(bucketSpanString); + const std::string& bucketSpanString{parameters[BUCKET_SPAN].as()}; + m_BucketSpan = CAnomalyJobConfig::CAnalysisConfig::durationSeconds( + bucketSpanString, DEFAULT_BUCKET_SPAN); m_SummaryCountFieldName = parameters[SUMMARY_COUNT_FIELD_NAME].fallback(EMPTY_STRING); m_CategorizationFieldName = parameters[CATEGORIZATION_FIELD_NAME].fallback(EMPTY_STRING); @@ -310,33 +316,42 @@ void CAnomalyJobConfig::CAnalysisConfig::parse(const rapidjson::Value& analysisC if (detectorsConfig != nullptr && detectorsConfig->IsArray()) { m_Detectors.resize(detectorsConfig->Size()); for (std::size_t i = 0; i < detectorsConfig->Size(); ++i) { - m_Detectors[i].parse((*detectorsConfig)[static_cast(i)], m_RuleFilters); + m_Detectors[i].parse((*detectorsConfig)[static_cast(i)], m_RuleFilters, + m_DetectorRules[static_cast(i)]); } } m_Influencers = parameters[INFLUENCERS].fallback(TStrVec{}); - m_Latency = parameters[LATENCY].fallback(EMPTY_STRING); + + const std::string& latencyString{parameters[LATENCY].fallback(EMPTY_STRING)}; + m_Latency = CAnomalyJobConfig::CAnalysisConfig::durationSeconds(latencyString, DEFAULT_LATENCY); + + m_MultivariateByFields = parameters[MULTIVARIATE_BY_FIELDS].fallback(false); } -core_t::TTime CAnomalyJobConfig::CAnalysisConfig::bucketSpanSeconds(const std::string& bucketSpanString) { - core_t::TTime bucketSpanSeconds{0}; - std::tie(bucketSpanSeconds, std::ignore) = - core::CTimeUtils::timeDurationStringToSeconds(bucketSpanString, DEFAULT_BUCKET_SPAN); +core_t::TTime +CAnomalyJobConfig::CAnalysisConfig::durationSeconds(const std::string& durationString, + core_t::TTime defaultDuration) { + core_t::TTime durationSeconds{0}; + std::tie(durationSeconds, std::ignore) = + core::CTimeUtils::timeDurationStringToSeconds(durationString, defaultDuration); - if (bucketSpanSeconds == 0) { - LOG_ERROR(<< "Invalid bucket span value " << bucketSpanString + if (durationSeconds == 0) { + LOG_ERROR(<< "Invalid duration value " << durationString << ". Duration must have a minimum value of 1s. " - "Using default bucket span value " - << DEFAULT_BUCKET_SPAN); - bucketSpanSeconds = DEFAULT_BUCKET_SPAN; + "Using default duration value " + << defaultDuration); + durationSeconds = defaultDuration; } - return bucketSpanSeconds; + return durationSeconds; } void CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::parse( const rapidjson::Value& detectorConfig, - const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters) { + const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters, + CDetectionRulesJsonParser::TDetectionRuleVec& detectionRules) { + auto parameters = DETECTOR_CONFIG_READER.read(detectorConfig); m_Function = parameters[FUNCTION].as(); @@ -347,12 +362,13 @@ void CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::parse( m_PartitionFieldName = parameters[PARTITION_FIELD_NAME].fallback(EMPTY_STRING); m_ExcludeFrequent = parameters[EXCLUDE_FREQUENT].fallback(EMPTY_STRING); m_DetectorDescription = parameters[DETECTOR_DESCRIPTION].fallback(EMPTY_STRING); + m_DetectorIndex = parameters[DETECTOR_INDEX].as(); auto customRules = parameters[CUSTOM_RULES].jsonObject(); if (customRules != nullptr) { std::string errorString; CDetectionRulesJsonParser rulesParser(ruleFilters); - if (rulesParser.parseRules(*customRules, m_CustomRules, errorString) == false) { + if (rulesParser.parseRules(*customRules, detectionRules, errorString) == false) { LOG_ERROR(<< errorString << toString(*customRules)); throw CAnomalyJobConfigReader::CParseError( "Error parsing custom rules: " + toString(*customRules)); diff --git a/lib/api/unittest/CAnomalyJobConfigTest.cc b/lib/api/unittest/CAnomalyJobConfigTest.cc index c061762121..1831e5b457 100644 --- a/lib/api/unittest/CAnomalyJobConfigTest.cc +++ b/lib/api/unittest/CAnomalyJobConfigTest.cc @@ -15,6 +15,15 @@ BOOST_AUTO_TEST_SUITE(CAnomalyJobConfigTest) BOOST_AUTO_TEST_CASE(testParse) { + + using TAnalysisConfig = ml::api::CAnomalyJobConfig::CAnalysisConfig; + using TDataDescription = ml::api::CAnomalyJobConfig::CDataDescription; + using TDetectorConfigVec = ml::api::CAnomalyJobConfig::CAnalysisConfig::TDetectorConfigVec; + + using TStrVec = ml::api::CAnomalyJobConfig::CAnalysisConfig::TStrVec; + using TAnalysisLimits = ml::api::CAnomalyJobConfig::CAnalysisLimits; + using TModelPlotConfig = ml::api::CAnomalyJobConfig::CModelPlotConfig; + { const std::string inValidModelMemoryLimitBytes{ "[{\"job_id\":\"flight_event_rate\",\"job_type\":\"anomaly_detector\",\"job_version\":\"8.0.0\",\"create_time\":1603110779167," @@ -91,9 +100,6 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("anomaly_detector", jobConfig.jobType()); BOOST_REQUIRE_EQUAL("flight_event_rate", jobConfig.jobId()); - using TAnalysisConfig = ml::api::CAnomalyJobConfig::CAnalysisConfig; - using TDataDescription = ml::api::CAnomalyJobConfig::CDataDescription; - const TAnalysisConfig& analysisConfig = jobConfig.analysisConfig(); BOOST_REQUIRE_EQUAL(1800, analysisConfig.bucketSpan()); @@ -104,7 +110,6 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("timestamp", dataDescription.timeField()); - using TDetectorConfigVec = ml::api::CAnomalyJobConfig::CAnalysisConfig::TDetectorConfigVec; const TDetectorConfigVec& detectorsConfig = analysisConfig.detectorsConfig(); BOOST_REQUIRE_EQUAL(1, detectorsConfig.size()); BOOST_REQUIRE_EQUAL("count", detectorsConfig[0].detectorDescription()); @@ -114,21 +119,18 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("", detectorsConfig[0].overFieldName()); BOOST_REQUIRE_EQUAL("", detectorsConfig[0].partitionFieldName()); BOOST_REQUIRE_EQUAL("", detectorsConfig[0].excludeFrequent()); - BOOST_REQUIRE_EQUAL(0, detectorsConfig[0].customRules().size()); + BOOST_REQUIRE_EQUAL(0, analysisConfig.detectionRules().at(0).size()); BOOST_REQUIRE_EQUAL(false, detectorsConfig[0].useNull()); - using TStrVec = ml::api::CAnomalyJobConfig::CAnalysisConfig::TStrVec; const TStrVec& influencers = analysisConfig.influencers(); BOOST_REQUIRE_EQUAL(0, influencers.size()); - using TAnalysisLimits = ml::api::CAnomalyJobConfig::CAnalysisLimits; const TAnalysisLimits& analysisLimits = jobConfig.analysisLimits(); BOOST_REQUIRE_EQUAL(4, analysisLimits.categorizationExamplesLimit()); // Expect the model memory limit to be rounded down to the nearest whole number of megabytes BOOST_REQUIRE_EQUAL(4, analysisLimits.modelMemoryLimit()); - using TModelPlotConfig = ml::api::CAnomalyJobConfig::CModelPlotConfig; const TModelPlotConfig& modelPlotConfig = jobConfig.modelPlotConfig(); BOOST_REQUIRE_EQUAL(true, modelPlotConfig.enabled()); BOOST_REQUIRE_EQUAL(true, modelPlotConfig.annotationsEnabled()); @@ -151,9 +153,6 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("anomaly_detector", jobConfig.jobType()); BOOST_REQUIRE_EQUAL("logs_max_bytes_by_geo", jobConfig.jobId()); - using TAnalysisConfig = ml::api::CAnomalyJobConfig::CAnalysisConfig; - using TDataDescription = ml::api::CAnomalyJobConfig::CDataDescription; - const TAnalysisConfig& analysisConfig = jobConfig.analysisConfig(); // When the configured bucket span equates to less than 1s expect the default value @@ -167,7 +166,6 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("timestamp", dataDescription.timeField()); - using TDetectorConfigVec = ml::api::CAnomalyJobConfig::CAnalysisConfig::TDetectorConfigVec; const TDetectorConfigVec& detectorsConfig = analysisConfig.detectorsConfig(); BOOST_REQUIRE_EQUAL(1, detectorsConfig.size()); @@ -179,23 +177,20 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("", detectorsConfig[0].overFieldName()); BOOST_REQUIRE_EQUAL("host.keyword", detectorsConfig[0].partitionFieldName()); BOOST_REQUIRE_EQUAL("", detectorsConfig[0].excludeFrequent()); - BOOST_REQUIRE_EQUAL(0, detectorsConfig[0].customRules().size()); + BOOST_REQUIRE_EQUAL(0, analysisConfig.detectionRules().at(0).size()); BOOST_REQUIRE_EQUAL(false, detectorsConfig[0].useNull()); - using TStrVec = ml::api::CAnomalyJobConfig::CAnalysisConfig::TStrVec; const TStrVec& influencers = analysisConfig.influencers(); BOOST_REQUIRE_EQUAL(2, influencers.size()); BOOST_REQUIRE_EQUAL("geo.src", influencers[0]); BOOST_REQUIRE_EQUAL("host.keyword", influencers[1]); - using TAnalysisLimits = ml::api::CAnomalyJobConfig::CAnalysisLimits; const TAnalysisLimits& analysisLimits = jobConfig.analysisLimits(); BOOST_REQUIRE_EQUAL(4, analysisLimits.categorizationExamplesLimit()); // Expect the model memory limit to be rounded down to the nearest whole number of megabytes BOOST_REQUIRE_EQUAL(5, analysisLimits.modelMemoryLimit()); - using TModelPlotConfig = ml::api::CAnomalyJobConfig::CModelPlotConfig; const TModelPlotConfig& modelPlotConfig = jobConfig.modelPlotConfig(); BOOST_REQUIRE_EQUAL(true, modelPlotConfig.enabled()); BOOST_REQUIRE_EQUAL(true, modelPlotConfig.annotationsEnabled()); @@ -220,9 +215,6 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("anomaly_detector", jobConfig.jobType()); BOOST_REQUIRE_EQUAL("ecommerce_population", jobConfig.jobId()); - using TAnalysisConfig = ml::api::CAnomalyJobConfig::CAnalysisConfig; - using TDataDescription = ml::api::CAnomalyJobConfig::CDataDescription; - const TAnalysisConfig& analysisConfig = jobConfig.analysisConfig(); // When the configured bucket span equates to less than 1s expect the default value @@ -235,7 +227,6 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("order_date", dataDescription.timeField()); - using TDetectorConfigVec = ml::api::CAnomalyJobConfig::CAnalysisConfig::TDetectorConfigVec; const TDetectorConfigVec& detectorsConfig = analysisConfig.detectorsConfig(); BOOST_REQUIRE_EQUAL(2, detectorsConfig.size()); @@ -247,7 +238,7 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("category.keyword", detectorsConfig[0].overFieldName()); BOOST_REQUIRE_EQUAL("", detectorsConfig[0].partitionFieldName()); BOOST_REQUIRE_EQUAL("", detectorsConfig[0].excludeFrequent()); - BOOST_REQUIRE_EQUAL(0, detectorsConfig[0].customRules().size()); + BOOST_REQUIRE_EQUAL(0, analysisConfig.detectionRules().at(0).size()); BOOST_REQUIRE_EQUAL(false, detectorsConfig[0].useNull()); BOOST_REQUIRE_EQUAL("count over \"category.keyword\"", @@ -258,21 +249,18 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("category.keyword", detectorsConfig[1].overFieldName()); BOOST_REQUIRE_EQUAL("", detectorsConfig[1].partitionFieldName()); BOOST_REQUIRE_EQUAL("", detectorsConfig[1].excludeFrequent()); - BOOST_REQUIRE_EQUAL(0, detectorsConfig[1].customRules().size()); + BOOST_REQUIRE_EQUAL(0, analysisConfig.detectionRules().at(1).size()); BOOST_REQUIRE_EQUAL(false, detectorsConfig[1].useNull()); - using TStrVec = ml::api::CAnomalyJobConfig::CAnalysisConfig::TStrVec; const TStrVec& influencers = analysisConfig.influencers(); BOOST_REQUIRE_EQUAL(2, influencers.size()); BOOST_REQUIRE_EQUAL("category.keyword", influencers[0]); BOOST_REQUIRE_EQUAL("customer_id", influencers[1]); - using TAnalysisLimits = ml::api::CAnomalyJobConfig::CAnalysisLimits; const TAnalysisLimits& analysisLimits = jobConfig.analysisLimits(); BOOST_REQUIRE_EQUAL(4, analysisLimits.categorizationExamplesLimit()); BOOST_REQUIRE_EQUAL(17, analysisLimits.modelMemoryLimit()); - using TModelPlotConfig = ml::api::CAnomalyJobConfig::CModelPlotConfig; const TModelPlotConfig& modelPlotConfig = jobConfig.modelPlotConfig(); BOOST_REQUIRE_EQUAL(true, modelPlotConfig.enabled()); BOOST_REQUIRE_EQUAL(true, modelPlotConfig.annotationsEnabled()); @@ -292,9 +280,6 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("anomaly_detector", jobConfig.jobType()); BOOST_REQUIRE_EQUAL("count_with_range", jobConfig.jobId()); - using TAnalysisConfig = ml::api::CAnomalyJobConfig::CAnalysisConfig; - using TDataDescription = ml::api::CAnomalyJobConfig::CDataDescription; - const TAnalysisConfig& analysisConfig = jobConfig.analysisConfig(); // When the configured bucket span equates to less than 1s expect the default value @@ -308,7 +293,6 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("timestamp", dataDescription.timeField()); - using TDetectorConfigVec = ml::api::CAnomalyJobConfig::CAnalysisConfig::TDetectorConfigVec; const TDetectorConfigVec& detectorsConfig = analysisConfig.detectorsConfig(); BOOST_REQUIRE_EQUAL(1, detectorsConfig.size()); @@ -319,19 +303,16 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("", detectorsConfig[0].overFieldName()); BOOST_REQUIRE_EQUAL("", detectorsConfig[0].partitionFieldName()); BOOST_REQUIRE_EQUAL("", detectorsConfig[0].excludeFrequent()); - BOOST_REQUIRE_EQUAL(1, detectorsConfig[0].customRules().size()); + BOOST_REQUIRE_EQUAL(1, analysisConfig.detectionRules().at(0).size()); BOOST_REQUIRE_EQUAL(false, detectorsConfig[0].useNull()); - using TStrVec = ml::api::CAnomalyJobConfig::CAnalysisConfig::TStrVec; const TStrVec& influencers = analysisConfig.influencers(); BOOST_REQUIRE_EQUAL(0, influencers.size()); - using TAnalysisLimits = ml::api::CAnomalyJobConfig::CAnalysisLimits; const TAnalysisLimits& analysisLimits = jobConfig.analysisLimits(); BOOST_REQUIRE_EQUAL(5, analysisLimits.categorizationExamplesLimit()); BOOST_REQUIRE_EQUAL(11, analysisLimits.modelMemoryLimit()); - using TModelPlotConfig = ml::api::CAnomalyJobConfig::CModelPlotConfig; const TModelPlotConfig& modelPlotConfig = jobConfig.modelPlotConfig(); BOOST_REQUIRE_EQUAL(false, modelPlotConfig.enabled()); BOOST_REQUIRE_EQUAL(false, modelPlotConfig.annotationsEnabled()); @@ -350,9 +331,6 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("anomaly_detector", jobConfig.jobType()); BOOST_REQUIRE_EQUAL("unusual_message_counts", jobConfig.jobId()); - using TAnalysisConfig = ml::api::CAnomalyJobConfig::CAnalysisConfig; - using TDataDescription = ml::api::CAnomalyJobConfig::CDataDescription; - const TAnalysisConfig& analysisConfig = jobConfig.analysisConfig(); BOOST_REQUIRE_EQUAL(900, analysisConfig.bucketSpan()); @@ -363,7 +341,6 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("timestamp", dataDescription.timeField()); - using TDetectorConfigVec = ml::api::CAnomalyJobConfig::CAnalysisConfig::TDetectorConfigVec; const TDetectorConfigVec& detectorsConfig = analysisConfig.detectorsConfig(); BOOST_REQUIRE_EQUAL(1, detectorsConfig.size()); @@ -374,10 +351,9 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("", detectorsConfig[0].overFieldName()); BOOST_REQUIRE_EQUAL("", detectorsConfig[0].partitionFieldName()); BOOST_REQUIRE_EQUAL("", detectorsConfig[0].excludeFrequent()); - BOOST_REQUIRE_EQUAL(0, detectorsConfig[0].customRules().size()); + BOOST_REQUIRE_EQUAL(0, analysisConfig.detectionRules().at(0).size()); BOOST_REQUIRE_EQUAL(false, detectorsConfig[0].useNull()); - using TStrVec = ml::api::CAnomalyJobConfig::CAnalysisConfig::TStrVec; const TStrVec& influencers = analysisConfig.influencers(); BOOST_REQUIRE_EQUAL(1, influencers.size()); BOOST_REQUIRE_EQUAL("mlcategory", influencers[0]); @@ -388,12 +364,10 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_REQUIRE_EQUAL("foo.*", categorizationFilters[0]); BOOST_REQUIRE_EQUAL("bar.*", categorizationFilters[1]); - using TAnalysisLimits = ml::api::CAnomalyJobConfig::CAnalysisLimits; const TAnalysisLimits& analysisLimits = jobConfig.analysisLimits(); BOOST_REQUIRE_EQUAL(4, analysisLimits.categorizationExamplesLimit()); BOOST_REQUIRE_EQUAL(26, analysisLimits.modelMemoryLimit()); - using TModelPlotConfig = ml::api::CAnomalyJobConfig::CModelPlotConfig; const TModelPlotConfig& modelPlotConfig = jobConfig.modelPlotConfig(); BOOST_REQUIRE_EQUAL(false, modelPlotConfig.enabled()); BOOST_REQUIRE_EQUAL(false, modelPlotConfig.annotationsEnabled()); From 6e044e6a9bc2759126ba1573866f2dcd7a552190 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Thu, 3 Dec 2020 11:10:13 +0000 Subject: [PATCH 2/3] Handle updates to filter configuration Keep the Java REST integration tests happy by ensuring that the detector rules are correctly updated with changes to the filter configuration. --- bin/autodetect/Main.cc | 1 + include/api/CAnomalyJob.h | 6 +++ include/api/CAnomalyJobConfig.h | 12 ++++- include/api/CConfigUpdater.h | 6 ++- lib/api/CAnomalyJob.cc | 8 +-- lib/api/CAnomalyJobConfig.cc | 31 ++++++++++- lib/api/CConfigUpdater.cc | 8 +-- lib/api/dump_state/Main.cc | 10 ++-- lib/api/unittest/CAnomalyJobLimitTest.cc | 29 +++++++--- lib/api/unittest/CAnomalyJobTest.cc | 54 +++++++++++++------ lib/api/unittest/CConfigUpdaterTest.cc | 26 +++++---- lib/api/unittest/CForecastRunnerTest.cc | 13 +++-- lib/api/unittest/CMultiFileDataAdderTest.cc | 6 ++- lib/api/unittest/CPersistenceManagerTest.cc | 7 +++ lib/api/unittest/CRestorePreviousStateTest.cc | 4 +- .../unittest/CSingleStreamDataAdderTest.cc | 6 ++- lib/api/unittest/CStringStoreTest.cc | 25 +++++---- lib/api/unittest/CTestAnomalyJob.cc | 2 + lib/api/unittest/CTestAnomalyJob.h | 1 + 19 files changed, 192 insertions(+), 63 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 1610202825..c4c5af8959 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -297,6 +297,7 @@ int main(int argc, char** argv) { // The anomaly job knows how to detect anomalies ml::api::CAnomalyJob job{jobId, limits, + jobConfig, fieldConfig, modelConfig, wrappedOutputStream, diff --git a/include/api/CAnomalyJob.h b/include/api/CAnomalyJob.h index 3713d37b8c..57c162f156 100644 --- a/include/api/CAnomalyJob.h +++ b/include/api/CAnomalyJob.h @@ -50,6 +50,7 @@ class CHierarchicalResults; class CLimits; } namespace api { +class CAnomalyJobConfig; class CPersistenceManager; class CModelPlotDataJsonWriter; class CFieldConfig; @@ -142,6 +143,7 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { public: CAnomalyJob(const std::string& jobId, model::CLimits& limits, + CAnomalyJobConfig& jobConfig, CFieldConfig& fieldConfig, model::CAnomalyDetectorModelConfig& modelConfig, core::CJsonOutputStreamWrapper& outputBuffer, @@ -424,6 +426,10 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { //! Object to which the output is passed CJsonOutputWriter m_JsonOutputWriter; + //! Configuration settings for the analysis parsed from + //! JSON configuration file. + CAnomalyJobConfig& m_JobConfig; + //! Field names to use for the analysis CFieldConfig& m_FieldConfig; diff --git a/include/api/CAnomalyJobConfig.h b/include/api/CAnomalyJobConfig.h index dbd5d7cea5..8b02594ba0 100644 --- a/include/api/CAnomalyJobConfig.h +++ b/include/api/CAnomalyJobConfig.h @@ -104,6 +104,10 @@ class API_EXPORT CAnomalyJobConfig { void parse(const rapidjson::Value& json); + bool processFilter(const std::string& key, const std::string& value); + + bool updateFilters(const boost::property_tree::ptree& propTree); + core_t::TTime bucketSpan() const { return m_BucketSpan; } std::string summaryCountFieldName() const { @@ -133,6 +137,10 @@ class API_EXPORT CAnomalyJobConfig { return m_DetectorRules; } + const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters() const { + return m_RuleFilters; + } + static core_t::TTime durationSeconds(const std::string& durationString, core_t::TTime defaultDuration); @@ -145,7 +153,7 @@ class API_EXPORT CAnomalyJobConfig { bool m_PerPartitionCategorizationStopOnWarn{false}; TDetectorConfigVec m_Detectors{}; TStrVec m_Influencers{}; - core_t::TTime m_Latency{}; + core_t::TTime m_Latency{DEFAULT_LATENCY}; bool m_MultivariateByFields{false}; //! The detection rules per detector index. @@ -249,7 +257,7 @@ class API_EXPORT CAnomalyJobConfig { std::string jobId() const { return m_JobId; } std::string jobType() const { return m_JobType; } - const CAnalysisConfig& analysisConfig() const { return m_AnalysisConfig; } + CAnalysisConfig& analysisConfig() { return m_AnalysisConfig; } const CDataDescription& dataDescription() const { return m_DataDescription; } diff --git a/include/api/CConfigUpdater.h b/include/api/CConfigUpdater.h index c3a4573983..325967a3de 100644 --- a/include/api/CConfigUpdater.h +++ b/include/api/CConfigUpdater.h @@ -8,6 +8,7 @@ #include +#include #include #include @@ -34,7 +35,9 @@ namespace api { //! class API_EXPORT CConfigUpdater { public: - CConfigUpdater(CFieldConfig& fieldConfig, model::CAnomalyDetectorModelConfig& modelConfig); + CConfigUpdater(CAnomalyJobConfig& jobConfig, + CFieldConfig& fieldConfig, + model::CAnomalyDetectorModelConfig& modelConfig); //! Update from given config changes //! \param config the requested changes in an ini syntax @@ -49,6 +52,7 @@ class API_EXPORT CConfigUpdater { static const std::string SCHEDULED_EVENTS; private: + CAnomalyJobConfig& m_JobConfig; CFieldConfig& m_FieldConfig; model::CAnomalyDetectorModelConfig& m_ModelConfig; }; diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index afcfd73882..6835fd2b41 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -37,6 +37,7 @@ #include #include +#include #include #include #include @@ -127,6 +128,7 @@ const CAnomalyJob::TAnomalyDetectorPtr CAnomalyJob::NULL_DETECTOR; CAnomalyJob::CAnomalyJob(const std::string& jobId, model::CLimits& limits, + CAnomalyJobConfig& jobConfig, CFieldConfig& fieldConfig, model::CAnomalyDetectorModelConfig& modelConfig, core::CJsonOutputStreamWrapper& outputStream, @@ -139,8 +141,8 @@ CAnomalyJob::CAnomalyJob(const std::string& jobId, : CDataProcessor{timeFieldName, timeFieldFormat}, m_JobId{jobId}, m_Limits{limits}, m_OutputStream{outputStream}, m_ForecastRunner{m_JobId, m_OutputStream, limits.resourceMonitor()}, - m_JsonOutputWriter{m_JobId, m_OutputStream}, m_FieldConfig{fieldConfig}, - m_ModelConfig{modelConfig}, m_NumRecordsHandled{0}, + m_JsonOutputWriter{m_JobId, m_OutputStream}, m_JobConfig{jobConfig}, + m_FieldConfig{fieldConfig}, m_ModelConfig{modelConfig}, m_NumRecordsHandled{0}, m_LastFinalisedBucketEndTime{0}, m_PersistCompleteFunc{persistCompleteFunc}, m_MaxDetectors{std::numeric_limits::max()}, m_PersistenceManager{persistenceManager}, m_MaxQuantileInterval{maxQuantileInterval}, @@ -455,7 +457,7 @@ void CAnomalyJob::acknowledgeFlush(const std::string& flushId) { void CAnomalyJob::updateConfig(const std::string& config) { LOG_DEBUG(<< "Received update config request: " << config); - CConfigUpdater configUpdater(m_FieldConfig, m_ModelConfig); + CConfigUpdater configUpdater(m_JobConfig, m_FieldConfig, m_ModelConfig); if (configUpdater.update(config) == false) { LOG_ERROR(<< "Failed to update configuration"); } diff --git a/lib/api/CAnomalyJobConfig.cc b/lib/api/CAnomalyJobConfig.cc index eeddf6328b..58784294f2 100644 --- a/lib/api/CAnomalyJobConfig.cc +++ b/lib/api/CAnomalyJobConfig.cc @@ -324,11 +324,40 @@ void CAnomalyJobConfig::CAnalysisConfig::parse(const rapidjson::Value& analysisC m_Influencers = parameters[INFLUENCERS].fallback(TStrVec{}); const std::string& latencyString{parameters[LATENCY].fallback(EMPTY_STRING)}; - m_Latency = CAnomalyJobConfig::CAnalysisConfig::durationSeconds(latencyString, DEFAULT_LATENCY); + if (latencyString.empty() == false) { + m_Latency = CAnomalyJobConfig::CAnalysisConfig::durationSeconds( + latencyString, DEFAULT_LATENCY); + } m_MultivariateByFields = parameters[MULTIVARIATE_BY_FIELDS].fallback(false); } +// TODO: Process updates as JSON +bool CAnomalyJobConfig::CAnalysisConfig::processFilter(const std::string& key, + const std::string& value) { + // expected format is filter.=[json, array] + std::size_t sepPos{key.find('.')}; + if (sepPos == std::string::npos) { + LOG_ERROR(<< "Unrecognised filter key: " + key); + return false; + } + std::string filterId = key.substr(sepPos + 1); + core::CPatternSet& filter = m_RuleFilters[filterId]; + return filter.initFromJson(value); +} + +// TODO: Process updates as JSON +bool CAnomalyJobConfig::CAnalysisConfig::updateFilters(const boost::property_tree::ptree& propTree) { + for (const auto& filterEntry : propTree) { + const std::string& key = filterEntry.first; + const std::string& value = filterEntry.second.data(); + if (this->processFilter(key, value) == false) { + return false; + } + } + return true; +} + core_t::TTime CAnomalyJobConfig::CAnalysisConfig::durationSeconds(const std::string& durationString, core_t::TTime defaultDuration) { diff --git a/lib/api/CConfigUpdater.cc b/lib/api/CConfigUpdater.cc index a1a51b9471..4b38ea8c13 100644 --- a/lib/api/CConfigUpdater.cc +++ b/lib/api/CConfigUpdater.cc @@ -20,9 +20,10 @@ const std::string CConfigUpdater::RULES_JSON("rulesJson"); const std::string CConfigUpdater::FILTERS("filters"); const std::string CConfigUpdater::SCHEDULED_EVENTS("scheduledEvents"); -CConfigUpdater::CConfigUpdater(CFieldConfig& fieldConfig, +CConfigUpdater::CConfigUpdater(CAnomalyJobConfig& jobConfig, + CFieldConfig& fieldConfig, model::CAnomalyDetectorModelConfig& modelConfig) - : m_FieldConfig(fieldConfig), m_ModelConfig(modelConfig) { + : m_JobConfig(jobConfig), m_FieldConfig(fieldConfig), m_ModelConfig(modelConfig) { } bool CConfigUpdater::update(const std::string& config) { @@ -59,7 +60,8 @@ bool CConfigUpdater::update(const std::string& config) { return false; } } else if (stanzaName == FILTERS) { - if (m_FieldConfig.updateFilters(subTree) == false) { + // TODO: Move to JSON format for config updates. + if (m_JobConfig.analysisConfig().updateFilters(subTree) == false) { LOG_ERROR(<< "Failed to update filters"); return false; } diff --git a/lib/api/dump_state/Main.cc b/lib/api/dump_state/Main.cc index c18711b206..a1a78e5ab6 100644 --- a/lib/api/dump_state/Main.cc +++ b/lib/api/dump_state/Main.cc @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -183,15 +184,18 @@ bool persistAnomalyDetectorStateToFile(const std::string& configFileName, return false; } + ml::api::CAnomalyJobConfig jobConfig; + ml::core_t::TTime bucketSize(3600); std::string jobId("foo"); ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig( bucketSize, ml::model_t::E_None, "", bucketSize * latencyBuckets, false); - ml::api::CAnomalyJob origJob(jobId, limits, fieldConfig, modelConfig, wrappedOutputStream, - std::bind(&reportPersistComplete, std::placeholders::_1), - nullptr, -1, "time", timeFormat, 0); + ml::api::CAnomalyJob origJob( + jobId, limits, jobConfig, fieldConfig, modelConfig, wrappedOutputStream, + std::bind(&reportPersistComplete, std::placeholders::_1), nullptr, -1, + "time", timeFormat, 0); using TInputParserUPtr = std::unique_ptr; const TInputParserUPtr parser{[&inputFilename, &inputStrm]() -> TInputParserUPtr { diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index 6600e2d87e..4ff7248596 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -96,6 +97,7 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // Without limits, this data set should make the models around // 1230000 bytes // Run the data once to find out what the current platform uses + ml::api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clause; clause.push_back("value"); @@ -118,7 +120,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { { LOG_TRACE(<< "Setting up job"); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, + modelConfig, wrappedOutputStream); std::ifstream inputStrm("testfiles/resource_accuracy.csv"); BOOST_TEST_REQUIRE(inputStrm.is_open()); @@ -139,6 +142,7 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { } { // Now run the data with limiting + ml::api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clause; clause.push_back("value"); @@ -163,7 +167,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { limits.resourceMonitor().m_ByteLimitHigh - 1024; LOG_TRACE(<< "Setting up job"); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, + modelConfig, wrappedOutputStream); std::ifstream inputStrm("testfiles/resource_accuracy.csv"); BOOST_TEST_REQUIRE(inputStrm.is_open()); @@ -199,6 +204,7 @@ BOOST_AUTO_TEST_CASE(testLimit) { // Run the data without any resource limits and check that // all the expected fields are in the results set model::CLimits limits; + ml::api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clause; clause.push_back("value"); @@ -214,7 +220,8 @@ BOOST_AUTO_TEST_CASE(testLimit) { model::CAnomalyDetectorModelConfig::defaultConfig(3600); LOG_TRACE(<< "Setting up job"); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, + wrappedOutputStream); std::ifstream inputStrm("testfiles/resource_limits_3_2over_3partition.csv"); BOOST_TEST_REQUIRE(inputStrm.is_open()); @@ -244,6 +251,7 @@ BOOST_AUTO_TEST_CASE(testLimit) { // Run the data with some resource limits after the first 4 records and // check that we get only anomalies from the first 2 partitions model::CLimits limits; + ml::api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clause; clause.push_back("value"); @@ -262,7 +270,8 @@ BOOST_AUTO_TEST_CASE(testLimit) { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); LOG_TRACE(<< "Setting up job"); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, + wrappedOutputStream); std::ifstream inputStrm("testfiles/resource_limits_3_2over_3partition_first8.csv"); BOOST_TEST_REQUIRE(inputStrm.is_open()); @@ -371,12 +380,14 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { std::size_t memoryLimit{10 /*MB*/}; model::CLimits limits; limits.resourceMonitor().memoryLimit(memoryLimit); + ml::api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses{"mean(foo)", "by", "bar"}; fieldConfig.initFromClause(clauses); model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(testParam.s_BucketLength); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, + modelConfig, wrappedOutputStream); core_t::TTime startTime{1495110323}; core_t::TTime endTime{1495260323}; @@ -422,12 +433,14 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { std::size_t memoryLimit{10 /*MB*/}; model::CLimits limits; limits.resourceMonitor().memoryLimit(memoryLimit); + ml::api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses{"mean(foo)", "partitionfield=bar"}; fieldConfig.initFromClause(clauses); model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(testParam.s_BucketLength); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, + modelConfig, wrappedOutputStream); core_t::TTime startTime{1495110323}; core_t::TTime endTime{1495260323}; @@ -473,12 +486,14 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { std::size_t memoryLimit{5 /*MB*/}; model::CLimits limits; limits.resourceMonitor().memoryLimit(memoryLimit); + ml::api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses{"mean(foo)", "over", "bar"}; fieldConfig.initFromClause(clauses); model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(testParam.s_BucketLength); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, + modelConfig, wrappedOutputStream); core_t::TTime startTime{1495110323}; core_t::TTime endTime{1495230323}; diff --git a/lib/api/unittest/CAnomalyJobTest.cc b/lib/api/unittest/CAnomalyJobTest.cc index e517760075..8eade5d8b3 100644 --- a/lib/api/unittest/CAnomalyJobTest.cc +++ b/lib/api/unittest/CAnomalyJobTest.cc @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -184,6 +185,7 @@ BOOST_AUTO_TEST_CASE(testBadTimes) { { // Test with no time field model::CLimits limits; + api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses; clauses.push_back("value"); @@ -194,7 +196,8 @@ BOOST_AUTO_TEST_CASE(testBadTimes) { std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, + wrappedOutputStream); CTestAnomalyJob::TStrStrUMap dataRows; dataRows["wibble"] = "12345678"; @@ -207,6 +210,7 @@ BOOST_AUTO_TEST_CASE(testBadTimes) { { // Test with bad time field model::CLimits limits; + api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses; clauses.push_back("value"); @@ -217,7 +221,8 @@ BOOST_AUTO_TEST_CASE(testBadTimes) { std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, + wrappedOutputStream); CTestAnomalyJob::TStrStrUMap dataRows; dataRows["time"] = "hello"; @@ -230,6 +235,7 @@ BOOST_AUTO_TEST_CASE(testBadTimes) { { // Test with bad time field format model::CLimits limits; + api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses; clauses.push_back("value"); @@ -240,9 +246,9 @@ BOOST_AUTO_TEST_CASE(testBadTimes) { std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, - CTestAnomalyJob::TPersistCompleteFunc(), nullptr, - -1, "time", "%Y%m%m%H%M%S"); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, + wrappedOutputStream, CTestAnomalyJob::TPersistCompleteFunc(), + nullptr, -1, "time", "%Y%m%m%H%M%S"); CTestAnomalyJob::TStrStrUMap dataRows; dataRows["time"] = "hello world"; @@ -258,6 +264,7 @@ BOOST_AUTO_TEST_CASE(testOutOfSequence) { { // Test out of sequence record model::CLimits limits; + api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses; clauses.push_back("value"); @@ -268,7 +275,8 @@ BOOST_AUTO_TEST_CASE(testOutOfSequence) { std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, + wrappedOutputStream); job.description(); job.descriptionAndDebugMemoryUsage(); @@ -294,6 +302,7 @@ BOOST_AUTO_TEST_CASE(testControlMessages) { { // Test control messages model::CLimits limits; + api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses; clauses.push_back("value"); @@ -304,7 +313,8 @@ BOOST_AUTO_TEST_CASE(testControlMessages) { std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, + wrappedOutputStream); CTestAnomalyJob::TStrStrUMap dataRows; dataRows["."] = " "; @@ -326,6 +336,7 @@ BOOST_AUTO_TEST_CASE(testControlMessages) { { // Test reset bucket model::CLimits limits; + api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses; clauses.push_back("count"); @@ -341,7 +352,8 @@ BOOST_AUTO_TEST_CASE(testControlMessages) { std::stringstream outputStrm; { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, + modelConfig, wrappedOutputStream); core_t::TTime time = 12345678; for (std::size_t i = 0; i < 50; i++, time += (BUCKET_SIZE / 2)) { @@ -389,7 +401,8 @@ BOOST_AUTO_TEST_CASE(testControlMessages) { std::stringstream outputStrm2; { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm2); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, + modelConfig, wrappedOutputStream); core_t::TTime time = 12345678; for (std::size_t i = 0; i < 50; i++, time += (BUCKET_SIZE / 2)) { @@ -441,6 +454,7 @@ BOOST_AUTO_TEST_CASE(testControlMessages) { BOOST_AUTO_TEST_CASE(testSkipTimeControlMessage) { model::CLimits limits; + api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses; clauses.push_back("count"); @@ -451,7 +465,7 @@ BOOST_AUTO_TEST_CASE(testSkipTimeControlMessage) { std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, wrappedOutputStream); CTestAnomalyJob::TStrStrUMap dataRows; @@ -492,6 +506,7 @@ BOOST_AUTO_TEST_CASE(testSkipTimeControlMessage) { BOOST_AUTO_TEST_CASE(testIsPersistenceNeeded) { model::CLimits limits; + api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses; clauses.push_back("count"); @@ -506,7 +521,8 @@ BOOST_AUTO_TEST_CASE(testIsPersistenceNeeded) { std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, + wrappedOutputStream); BOOST_REQUIRE_EQUAL(false, job.isPersistenceNeeded("test state")); @@ -532,7 +548,8 @@ BOOST_AUTO_TEST_CASE(testIsPersistenceNeeded) { std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, + wrappedOutputStream); CTestAnomalyJob::TStrStrUMap dataRows; @@ -565,7 +582,8 @@ BOOST_AUTO_TEST_CASE(testIsPersistenceNeeded) { std::stringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, + wrappedOutputStream); CTestAnomalyJob::TStrStrUMap dataRows; @@ -595,6 +613,7 @@ BOOST_AUTO_TEST_CASE(testIsPersistenceNeeded) { BOOST_AUTO_TEST_CASE(testModelPlot) { core_t::TTime bucketSize = 10000; model::CLimits limits; + api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses; clauses.push_back("mean(value)"); @@ -611,7 +630,8 @@ BOOST_AUTO_TEST_CASE(testModelPlot) { { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, + wrappedOutputStream); CTestAnomalyJob::TStrStrUMap dataRows; dataRows["time"] = "10000000"; @@ -673,6 +693,7 @@ BOOST_AUTO_TEST_CASE(testInterimResultEdgeCases) { core_t::TTime bucketSize = 3600; model::CLimits limits; + api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses{"count", "by", "error"}; fieldConfig.initFromClause(clauses); @@ -684,7 +705,7 @@ BOOST_AUTO_TEST_CASE(testInterimResultEdgeCases) { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, wrappedOutputStream); std::remove(logFile); BOOST_TEST_REQUIRE(ml::core::CLogger::instance().reconfigureFromFile( @@ -734,6 +755,7 @@ BOOST_AUTO_TEST_CASE(testInterimResultEdgeCases) { BOOST_AUTO_TEST_CASE(testRestoreFailsWithEmptyStream) { model::CLimits limits; + api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses; clauses.push_back("value"); @@ -744,7 +766,7 @@ BOOST_AUTO_TEST_CASE(testRestoreFailsWithEmptyStream) { std::ostringstream outputStrm; core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, wrappedOutputStream); core_t::TTime completeToTime(0); CEmptySearcher restoreSearcher; diff --git a/lib/api/unittest/CConfigUpdaterTest.cc b/lib/api/unittest/CConfigUpdaterTest.cc index 19c87e521e..0ba465d2e7 100644 --- a/lib/api/unittest/CConfigUpdaterTest.cc +++ b/lib/api/unittest/CConfigUpdaterTest.cc @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -25,22 +26,25 @@ using namespace ml; using namespace api; BOOST_AUTO_TEST_CASE(testUpdateGivenUpdateCannotBeParsed) { + CAnomalyJobConfig jobConfig; CFieldConfig fieldConfig; model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); - CConfigUpdater configUpdater(fieldConfig, modelConfig); + CConfigUpdater configUpdater(jobConfig, fieldConfig, modelConfig); BOOST_TEST_REQUIRE(configUpdater.update("this is invalid") == false); } BOOST_AUTO_TEST_CASE(testUpdateGivenUnknownStanzas) { + CAnomalyJobConfig jobConfig; CFieldConfig fieldConfig; model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); - CConfigUpdater configUpdater(fieldConfig, modelConfig); + CConfigUpdater configUpdater(jobConfig, fieldConfig, modelConfig); BOOST_TEST_REQUIRE(configUpdater.update("[unknown1]\na = 1\n[unknown2]\nb = 2\n") == false); } BOOST_AUTO_TEST_CASE(testUpdateGivenModelPlotConfig) { + CAnomalyJobConfig jobConfig; CFieldConfig fieldConfig; model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); @@ -52,7 +56,7 @@ BOOST_AUTO_TEST_CASE(testUpdateGivenModelPlotConfig) { std::string configUpdate("[modelPlotConfig]\nboundspercentile = 83.5\nterms = c,d\nannotations_enabled = false\n"); - CConfigUpdater configUpdater(fieldConfig, modelConfig); + CConfigUpdater configUpdater(jobConfig, fieldConfig, modelConfig); BOOST_TEST_REQUIRE(configUpdater.update(configUpdate)); BOOST_REQUIRE_EQUAL(83.5, modelConfig.modelPlotBoundsPercentile()); @@ -64,6 +68,7 @@ BOOST_AUTO_TEST_CASE(testUpdateGivenModelPlotConfig) { } BOOST_AUTO_TEST_CASE(testUpdateGivenDetectorRules) { + CAnomalyJobConfig jobConfig; CFieldConfig fieldConfig; std::string originalRules0("[{\"actions\":[\"skip_result\"],"); originalRules0 += "\"conditions\":[{\"applies_to\":\"actual\",\"operator\":\"lt\",\"value\": 5.0}]}]"; @@ -80,7 +85,7 @@ BOOST_AUTO_TEST_CASE(testUpdateGivenDetectorRules) { "[{\"actions\":[\"skip_result\"],\"conditions\":[{\"applies_to\":\"typical\"," "\"operator\":\"lt\",\"value\": 15.0}]}]"); - CConfigUpdater configUpdater(fieldConfig, modelConfig); + CConfigUpdater configUpdater(jobConfig, fieldConfig, modelConfig); BOOST_TEST_REQUIRE(configUpdater.update(configUpdate0)); BOOST_TEST_REQUIRE(configUpdater.update(configUpdate1)); @@ -95,6 +100,7 @@ BOOST_AUTO_TEST_CASE(testUpdateGivenDetectorRules) { } BOOST_AUTO_TEST_CASE(testUpdateGivenRulesWithInvalidDetectorIndex) { + CAnomalyJobConfig jobConfig; CFieldConfig fieldConfig; std::string originalRules("[{\"actions\":[\"skip_result\"],"); originalRules += "\"conditions\":[{\"applies_to\":\"actual\",\"operator\":\"lt\",\"value\": 5.0}]}]"; @@ -105,12 +111,13 @@ BOOST_AUTO_TEST_CASE(testUpdateGivenRulesWithInvalidDetectorIndex) { std::string configUpdate("[detectorRules]\ndetectorIndex = invalid\nrulesJson = []\n"); - CConfigUpdater configUpdater(fieldConfig, modelConfig); + CConfigUpdater configUpdater(jobConfig, fieldConfig, modelConfig); BOOST_TEST_REQUIRE(configUpdater.update(configUpdate) == false); } BOOST_AUTO_TEST_CASE(testUpdateGivenFilters) { + CAnomalyJobConfig jobConfig; CFieldConfig fieldConfig; fieldConfig.processFilter("filter.filter_1", "[\"aaa\",\"bbb\"]"); fieldConfig.processFilter("filter.filter_2", "[\"ccc\",\"ddd\"]"); @@ -134,11 +141,11 @@ BOOST_AUTO_TEST_CASE(testUpdateGivenFilters) { // Update existing ones std::string configUpdate("[filters]\nfilter.filter_1=[\"ccc\",\"ddd\"]\nfilter.filter_2=[\"aaa\",\"bbb\"]\n"); - CConfigUpdater configUpdater(fieldConfig, modelConfig); + CConfigUpdater configUpdater(jobConfig, fieldConfig, modelConfig); BOOST_TEST_REQUIRE(configUpdater.update(configUpdate)); - ruleFilters = fieldConfig.ruleFilters(); + ruleFilters = jobConfig.analysisConfig().ruleFilters(); BOOST_REQUIRE_EQUAL(std::size_t(2), ruleFilters.size()); BOOST_TEST_REQUIRE(ruleFilters["filter_1"].contains("aaa") == false); @@ -155,7 +162,7 @@ BOOST_AUTO_TEST_CASE(testUpdateGivenFilters) { configUpdate = "[filters]\nfilter.filter_3=[\"new\"]\n"; BOOST_TEST_REQUIRE(configUpdater.update(configUpdate)); - ruleFilters = fieldConfig.ruleFilters(); + ruleFilters = jobConfig.analysisConfig().ruleFilters(); BOOST_REQUIRE_EQUAL(std::size_t(3), ruleFilters.size()); BOOST_TEST_REQUIRE(ruleFilters["filter_3"].contains("new")); } @@ -170,6 +177,7 @@ BOOST_AUTO_TEST_CASE(testUpdateGivenScheduledEvents) { "\"conditions\":[{\"applies_to\":\"time\",\"operator\":\"gte\",\"value\": 3.0}," "{\"applies_to\":\"time\",\"operator\":\"lt\",\"value\": 4.0}]}]"; + CAnomalyJobConfig jobConfig; CFieldConfig fieldConfig; // Set up some events @@ -197,7 +205,7 @@ BOOST_AUTO_TEST_CASE(testUpdateGivenScheduledEvents) { model::CAnomalyDetectorModelConfig modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(); - CConfigUpdater configUpdater(fieldConfig, modelConfig); + CConfigUpdater configUpdater(jobConfig, fieldConfig, modelConfig); // Test an update that replaces the events { diff --git a/lib/api/unittest/CForecastRunnerTest.cc b/lib/api/unittest/CForecastRunnerTest.cc index be4c2469f4..243937a7dd 100644 --- a/lib/api/unittest/CForecastRunnerTest.cc +++ b/lib/api/unittest/CForecastRunnerTest.cc @@ -11,6 +11,7 @@ #include #include +#include #include #include "CTestAnomalyJob.h" @@ -74,6 +75,7 @@ BOOST_AUTO_TEST_CASE(testSummaryCount) { ml::core::CJsonOutputStreamWrapper streamWrapper(outputStrm); ml::model::CLimits limits; ml::api::CFieldConfig fieldConfig; + ml::api::CAnomalyJobConfig jobConfig; ml::api::CFieldConfig::TStrVec clauses; clauses.push_back("count"); clauses.push_back("summarycountfield=count"); @@ -81,7 +83,7 @@ BOOST_AUTO_TEST_CASE(testSummaryCount) { ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, streamWrapper); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, streamWrapper); populateJob(generateRecordWithSummaryCount, job); CTestAnomalyJob::TStrStrUMap dataRows; @@ -144,6 +146,7 @@ BOOST_AUTO_TEST_CASE(testPopulation) { { ml::core::CJsonOutputStreamWrapper streamWrapper(outputStrm); ml::model::CLimits limits; + ml::api::CAnomalyJobConfig jobConfig; ml::api::CFieldConfig fieldConfig; ml::api::CFieldConfig::TStrVec clauses; clauses.push_back("count"); @@ -153,7 +156,7 @@ BOOST_AUTO_TEST_CASE(testPopulation) { ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, streamWrapper); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, streamWrapper); populateJob(generatePopulationRecord, job); CTestAnomalyJob::TStrStrUMap dataRows; @@ -187,6 +190,7 @@ BOOST_AUTO_TEST_CASE(testRare) { { ml::core::CJsonOutputStreamWrapper streamWrapper(outputStrm); ml::model::CLimits limits; + ml::api::CAnomalyJobConfig jobConfig; ml::api::CFieldConfig fieldConfig; ml::api::CFieldConfig::TStrVec clauses; clauses.push_back("rare"); @@ -197,7 +201,7 @@ BOOST_AUTO_TEST_CASE(testRare) { ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, streamWrapper); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, streamWrapper); populateJob(generateRecordWithStatus, job, 5000); CTestAnomalyJob::TStrStrUMap dataRows; @@ -231,6 +235,7 @@ BOOST_AUTO_TEST_CASE(testInsufficientData) { { ml::core::CJsonOutputStreamWrapper streamWrapper(outputStrm); ml::model::CLimits limits; + ml::api::CAnomalyJobConfig jobConfig; ml::api::CFieldConfig fieldConfig; ml::api::CFieldConfig::TStrVec clauses; clauses.push_back("count"); @@ -238,7 +243,7 @@ BOOST_AUTO_TEST_CASE(testInsufficientData) { ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, streamWrapper); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, streamWrapper); populateJob(generateRecord, job, 3); CTestAnomalyJob::TStrStrUMap dataRows; diff --git a/lib/api/unittest/CMultiFileDataAdderTest.cc b/lib/api/unittest/CMultiFileDataAdderTest.cc index 79e1c778ce..0f798e9a06 100644 --- a/lib/api/unittest/CMultiFileDataAdderTest.cc +++ b/lib/api/unittest/CMultiFileDataAdderTest.cc @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -68,6 +69,7 @@ void detectorPersistHelper(const std::string& configFileName, ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); ml::model::CLimits limits; + ml::api::CAnomalyJobConfig jobConfig; ml::api::CFieldConfig fieldConfig; BOOST_TEST_REQUIRE(fieldConfig.initFromFile(configFileName)); @@ -77,7 +79,7 @@ void detectorPersistHelper(const std::string& configFileName, std::string origSnapshotId; std::size_t numOrigDocs(0); - CTestAnomalyJob origJob(JOB_ID, limits, fieldConfig, modelConfig, wrappedOutputStream, + CTestAnomalyJob origJob(JOB_ID, limits, jobConfig, fieldConfig, modelConfig, wrappedOutputStream, std::bind(&reportPersistComplete, std::placeholders::_1, std::ref(origSnapshotId), std::ref(numOrigDocs)), nullptr, -1, "time", timeFormat); @@ -135,7 +137,7 @@ void detectorPersistHelper(const std::string& configFileName, std::string restoredSnapshotId; std::size_t numRestoredDocs(0); CTestAnomalyJob restoredJob( - JOB_ID, limits, fieldConfig, modelConfig, wrappedOutputStream, + JOB_ID, limits, jobConfig, fieldConfig, modelConfig, wrappedOutputStream, std::bind(&reportPersistComplete, std::placeholders::_1, std::ref(restoredSnapshotId), std::ref(numRestoredDocs))); diff --git a/lib/api/unittest/CPersistenceManagerTest.cc b/lib/api/unittest/CPersistenceManagerTest.cc index 7e04881d23..5b7d7d9029 100644 --- a/lib/api/unittest/CPersistenceManagerTest.cc +++ b/lib/api/unittest/CPersistenceManagerTest.cc @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -65,6 +66,7 @@ class CTestFixture { BOOST_TEST_REQUIRE(outputStrm.is_open()); ml::model::CLimits limits; + ml::api::CAnomalyJobConfig jobConfig; ml::api::CFieldConfig fieldConfig; BOOST_TEST_REQUIRE(fieldConfig.initFromFile(configFileName)); @@ -104,6 +106,7 @@ class CTestFixture { CTestAnomalyJob job{JOB_ID, limits, + jobConfig, fieldConfig, modelConfig, wrappedOutputStream, @@ -200,6 +203,7 @@ class CTestFixture { BOOST_TEST_REQUIRE(outputStrm.is_open()); ml::model::CLimits limits; + ml::api::CAnomalyJobConfig jobConfig; ml::api::CFieldConfig fieldConfig; BOOST_TEST_REQUIRE(fieldConfig.initFromFile(configFileName)); @@ -237,6 +241,7 @@ class CTestFixture { CTestAnomalyJob job{JOB_ID, limits, + jobConfig, fieldConfig, modelConfig, wrappedOutputStream, @@ -314,6 +319,7 @@ class CTestFixture { BOOST_TEST_REQUIRE(outputStrm.is_open()); ml::model::CLimits limits; + ml::api::CAnomalyJobConfig jobConfig; ml::api::CFieldConfig fieldConfig; BOOST_TEST_REQUIRE(fieldConfig.initFromFile(configFileName)); @@ -346,6 +352,7 @@ class CTestFixture { CTestAnomalyJob job{ JOB_ID, limits, + jobConfig, fieldConfig, modelConfig, wrappedOutputStream, diff --git a/lib/api/unittest/CRestorePreviousStateTest.cc b/lib/api/unittest/CRestorePreviousStateTest.cc index 019239f816..1f4c00b35e 100644 --- a/lib/api/unittest/CRestorePreviousStateTest.cc +++ b/lib/api/unittest/CRestorePreviousStateTest.cc @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -156,6 +157,7 @@ void anomalyDetectorRestoreHelper(const std::string& stateFile, static const std::string JOB_ID("job"); ml::model::CLimits limits; + ml::api::CAnomalyJobConfig jobConfig; ml::api::CFieldConfig fieldConfig; BOOST_TEST_REQUIRE(fieldConfig.initFromFile(configFileName)); @@ -171,7 +173,7 @@ void anomalyDetectorRestoreHelper(const std::string& stateFile, std::string restoredSnapshotId; std::size_t numRestoredDocs(0); CTestAnomalyJob restoredJob( - JOB_ID, limits, fieldConfig, modelConfig, wrappedOutputStream, + JOB_ID, limits, jobConfig, fieldConfig, modelConfig, wrappedOutputStream, std::bind(&reportPersistComplete, std::placeholders::_1, std::ref(restoredSnapshotId), std::ref(numRestoredDocs))); diff --git a/lib/api/unittest/CSingleStreamDataAdderTest.cc b/lib/api/unittest/CSingleStreamDataAdderTest.cc index ef67adc9d2..a9c1babae5 100644 --- a/lib/api/unittest/CSingleStreamDataAdderTest.cc +++ b/lib/api/unittest/CSingleStreamDataAdderTest.cc @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -60,6 +61,7 @@ void detectorPersistHelper(const std::string& configFileName, BOOST_TEST_REQUIRE(outputStrm.is_open()); ml::model::CLimits limits; + ml::api::CAnomalyJobConfig jobConfig; ml::api::CFieldConfig fieldConfig; BOOST_TEST_REQUIRE(fieldConfig.initFromFile(configFileName)); @@ -75,7 +77,7 @@ void detectorPersistHelper(const std::string& configFileName, { CTestAnomalyJob origJob( - JOB_ID, limits, fieldConfig, modelConfig, wrappedOutputStream, + JOB_ID, limits, jobConfig, fieldConfig, modelConfig, wrappedOutputStream, std::bind(&reportPersistComplete, std::placeholders::_1, std::ref(origSnapshotId), std::ref(numOrigDocs)), nullptr, -1, "time", timeFormat); @@ -126,7 +128,7 @@ void detectorPersistHelper(const std::string& configFileName, { CTestAnomalyJob restoredJob( - JOB_ID, limits, fieldConfig, modelConfig, wrappedOutputStream, + JOB_ID, limits, jobConfig, fieldConfig, modelConfig, wrappedOutputStream, std::bind(&reportPersistComplete, std::placeholders::_1, std::ref(restoredSnapshotId), std::ref(numRestoredDocs))); diff --git a/lib/api/unittest/CStringStoreTest.cc b/lib/api/unittest/CStringStoreTest.cc index 9e12263eb1..20fea7f434 100644 --- a/lib/api/unittest/CStringStoreTest.cc +++ b/lib/api/unittest/CStringStoreTest.cc @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -124,6 +125,7 @@ BOOST_FIXTURE_TEST_CASE(testPersonStringPruning, CTestFixture) { core_t::TTime BUCKET_SPAN(10000); core_t::TTime time = 100000000; + api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clause; clause.push_back("max(notes)"); @@ -158,7 +160,8 @@ BOOST_FIXTURE_TEST_CASE(testPersonStringPruning, CTestFixture) { std::ostringstream outputStrm; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, + wrappedOutputStream); time = playData(time, BUCKET_SPAN, 100, 3, 2, 99, job); wrappedOutputStream.syncFlush(); @@ -204,7 +207,7 @@ BOOST_FIXTURE_TEST_CASE(testPersonStringPruning, CTestFixture) { std::ostringstream outputStrm; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, wrappedOutputStream, CTestAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); @@ -245,7 +248,7 @@ BOOST_FIXTURE_TEST_CASE(testPersonStringPruning, CTestFixture) { std::ostringstream outputStrm; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, wrappedOutputStream, CTestAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); @@ -287,7 +290,7 @@ BOOST_FIXTURE_TEST_CASE(testPersonStringPruning, CTestFixture) { std::ostringstream outputStrm; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, wrappedOutputStream, CTestAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); @@ -315,6 +318,7 @@ BOOST_FIXTURE_TEST_CASE(testAttributeStringPruning, CTestFixture) { core_t::TTime BUCKET_SPAN(10000); core_t::TTime time = 100000000; + api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clause; clause.push_back("dc(notes)"); @@ -348,7 +352,8 @@ BOOST_FIXTURE_TEST_CASE(testAttributeStringPruning, CTestFixture) { std::ostringstream outputStrm; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, + wrappedOutputStream); time = playData(time, BUCKET_SPAN, 100, 3, 2, 99, job); wrappedOutputStream.syncFlush(); @@ -393,7 +398,7 @@ BOOST_FIXTURE_TEST_CASE(testAttributeStringPruning, CTestFixture) { std::ostringstream outputStrm; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, wrappedOutputStream, CTestAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); @@ -435,7 +440,7 @@ BOOST_FIXTURE_TEST_CASE(testAttributeStringPruning, CTestFixture) { std::ostringstream outputStrm; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, wrappedOutputStream, CTestAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); @@ -478,7 +483,7 @@ BOOST_FIXTURE_TEST_CASE(testAttributeStringPruning, CTestFixture) { std::ostringstream outputStrm; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream, + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, wrappedOutputStream, CTestAnomalyJob::TPersistCompleteFunc()); core_t::TTime completeToTime(0); @@ -506,6 +511,7 @@ BOOST_FIXTURE_TEST_CASE(testInfluencerStringPruning, CTestFixture) { core_t::TTime BUCKET_SPAN(10000); core_t::TTime time = 100000000; + api::CAnomalyJobConfig jobConfig; api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clause; clause.push_back("max(notes)"); @@ -537,7 +543,8 @@ BOOST_FIXTURE_TEST_CASE(testInfluencerStringPruning, CTestFixture) { std::ostringstream outputStrm; ml::core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); - CTestAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob job("job", limits, jobConfig, fieldConfig, modelConfig, + wrappedOutputStream); // Play in a few buckets with influencers, and see that they stick around for // 3 buckets diff --git a/lib/api/unittest/CTestAnomalyJob.cc b/lib/api/unittest/CTestAnomalyJob.cc index 826b7dfa0c..22362898fa 100644 --- a/lib/api/unittest/CTestAnomalyJob.cc +++ b/lib/api/unittest/CTestAnomalyJob.cc @@ -7,6 +7,7 @@ CTestAnomalyJob::CTestAnomalyJob(const std::string& jobId, ml::model::CLimits& limits, + ml::api::CAnomalyJobConfig& jobConfig, ml::api::CFieldConfig& fieldConfig, ml::model::CAnomalyDetectorModelConfig& modelConfig, ml::core::CJsonOutputStreamWrapper& outputBuffer, @@ -18,6 +19,7 @@ CTestAnomalyJob::CTestAnomalyJob(const std::string& jobId, std::size_t maxAnomalyRecords) : ml::api::CAnomalyJob(jobId, limits, + jobConfig, fieldConfig, modelConfig, outputBuffer, diff --git a/lib/api/unittest/CTestAnomalyJob.h b/lib/api/unittest/CTestAnomalyJob.h index 960caa0e97..a4526f25f2 100644 --- a/lib/api/unittest/CTestAnomalyJob.h +++ b/lib/api/unittest/CTestAnomalyJob.h @@ -25,6 +25,7 @@ class CTestAnomalyJob : public ml::api::CAnomalyJob { public: CTestAnomalyJob(const std::string& jobId, ml::model::CLimits& limits, + ml::api::CAnomalyJobConfig& jobConfig, ml::api::CFieldConfig& fieldConfig, ml::model::CAnomalyDetectorModelConfig& modelConfig, ml::core::CJsonOutputStreamWrapper& outputBuffer, From d6449cdc27a74cb24469a82d90cb69cd7ca0c27e Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Thu, 3 Dec 2020 13:04:57 +0000 Subject: [PATCH 3/3] Attend to code review comments --- bin/autodetect/Main.cc | 1 + include/api/CAnomalyJob.h | 6 +++++- include/api/CAnomalyJobConfig.h | 3 ++- lib/api/CAnomalyJobConfig.cc | 7 ++++++- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index c4c5af8959..8c50987eb6 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -207,6 +207,7 @@ int main(int argc, char** argv) { mutableFields.push_back(ml::api::CFieldDataCategorizer::MLCATEGORY_NAME); } + // TODO: Encapsulate the logic below into say CAnomalyJobConfig::makeModelConfig const ml::api::CAnomalyJobConfig::CAnalysisConfig& analysisConfig = jobConfig.analysisConfig(); const std::string& summaryCountFieldName = analysisConfig.summaryCountFieldName(); diff --git a/include/api/CAnomalyJob.h b/include/api/CAnomalyJob.h index 57c162f156..1799dd6471 100644 --- a/include/api/CAnomalyJob.h +++ b/include/api/CAnomalyJob.h @@ -428,9 +428,13 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { //! Configuration settings for the analysis parsed from //! JSON configuration file. + //! Note that this is a non-const reference as it needs to be capable of + //! being modified by job updates (and those changes reflected wherever a + //! reference is held). CAnomalyJobConfig& m_JobConfig; - //! Field names to use for the analysis + //! Field names to use for the analysis. + //! This is a non-const reference for the same reasons as for m_JobConfig. CFieldConfig& m_FieldConfig; //! The model configuration diff --git a/include/api/CAnomalyJobConfig.h b/include/api/CAnomalyJobConfig.h index 8b02594ba0..2675ebcf1a 100644 --- a/include/api/CAnomalyJobConfig.h +++ b/include/api/CAnomalyJobConfig.h @@ -67,7 +67,7 @@ class API_EXPORT CAnomalyJobConfig { std::string m_PartitionFieldName{}; std::string m_ExcludeFrequent{}; std::string m_DetectorDescription{}; - std::size_t m_DetectorIndex{}; + int m_DetectorIndex{}; bool m_UseNull{false}; }; @@ -258,6 +258,7 @@ class API_EXPORT CAnomalyJobConfig { std::string jobId() const { return m_JobId; } std::string jobType() const { return m_JobType; } CAnalysisConfig& analysisConfig() { return m_AnalysisConfig; } + const CAnalysisConfig& analysisConfig() const { return m_AnalysisConfig; } const CDataDescription& dataDescription() const { return m_DataDescription; } diff --git a/lib/api/CAnomalyJobConfig.cc b/lib/api/CAnomalyJobConfig.cc index 58784294f2..58433390b9 100644 --- a/lib/api/CAnomalyJobConfig.cc +++ b/lib/api/CAnomalyJobConfig.cc @@ -391,7 +391,12 @@ void CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::parse( m_PartitionFieldName = parameters[PARTITION_FIELD_NAME].fallback(EMPTY_STRING); m_ExcludeFrequent = parameters[EXCLUDE_FREQUENT].fallback(EMPTY_STRING); m_DetectorDescription = parameters[DETECTOR_DESCRIPTION].fallback(EMPTY_STRING); - m_DetectorIndex = parameters[DETECTOR_INDEX].as(); + + // The detector index is of type int for historical reasons + // and for consistency across the code base. + // The explicit cast is required here as our JSON parser does not support + // 32 bit integer values. + m_DetectorIndex = static_cast(parameters[DETECTOR_INDEX].as()); auto customRules = parameters[CUSTOM_RULES].jsonObject(); if (customRules != nullptr) {