From 85d0ca4de524d219e8ec6536ebc4bacd486a96d2 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 13 Jan 2021 15:07:15 +0000 Subject: [PATCH] [ML] Remove redundant autodetect options (#1663) Remove the now redundant autodetect command line options and exit with error message on seeing unknown command line arguments Relates #1253 --- bin/autodetect/CCmdLineParser.cc | 59 ++++------------------- bin/autodetect/CCmdLineParser.h | 7 +-- bin/autodetect/Main.cc | 15 +++--- include/api/CAnomalyJobConfig.h | 12 ----- lib/api/unittest/CAnomalyJobConfigTest.cc | 15 ++++-- 5 files changed, 29 insertions(+), 79 deletions(-) diff --git a/bin/autodetect/CCmdLineParser.cc b/bin/autodetect/CCmdLineParser.cc index a4bcd5c075..ceed340a46 100644 --- a/bin/autodetect/CCmdLineParser.cc +++ b/bin/autodetect/CCmdLineParser.cc @@ -17,7 +17,7 @@ namespace ml { namespace autodetect { -const std::string CCmdLineParser::DESCRIPTION = "Usage: autodetect [options] [+ [by ]]\n" +const std::string CCmdLineParser::DESCRIPTION = "Usage: autodetect [options]]\n" "Options:"; bool CCmdLineParser::parse(int argc, @@ -30,7 +30,6 @@ bool CCmdLineParser::parse(int argc, std::string& logPipe, char& delimiter, bool& lengthEncodedInput, - std::string& timeField, std::string& timeFormat, std::string& quantilesState, bool& deleteStateFiles, @@ -46,47 +45,29 @@ bool CCmdLineParser::parse(int argc, bool& isPersistFileNamedPipe, bool& isPersistInForeground, std::size_t& maxAnomalyRecords, - bool& memoryUsage, - bool& stopCategorizationOnWarnStatus, - TStrVec& clauseTokens) { + bool& memoryUsage) { try { boost::program_options::options_description desc(DESCRIPTION); // clang-format off desc.add_options() ("help", "Display this information and exit") ("version", "Display version information and exit") - ("config", boost::program_options::value(), + ("config", boost::program_options::value()->required(), "The job configuration file") ("filtersconfig", boost::program_options::value(), - "The filters configuration file") + "The filters configuration file") ("eventsconfig", boost::program_options::value(), - "The scheduled events configuration file") - ("limitconfig", boost::program_options::value(), - "Optional limit config file") + "The scheduled events configuration file") ("modelconfig", boost::program_options::value(), "Optional model config file") - ("fieldconfig", boost::program_options::value(), - "Optional field config file") - ("modelplotconfig", boost::program_options::value(), - "Optional model plot config file") - ("jobid", boost::program_options::value(), - "ID of the job this process is associated with") ("logProperties", boost::program_options::value(), "Optional logger properties file") ("logPipe", boost::program_options::value(), "Optional log to named pipe") - ("bucketspan", boost::program_options::value(), - "Optional aggregation bucket span (in seconds) - default is 300") - ("latency", boost::program_options::value(), - "Optional maximum delay for out-of-order records (in seconds) - default is 0") - ("summarycountfield", boost::program_options::value(), - "Optional field to that contains counts for pre-summarized input - default is none") ("delimiter", boost::program_options::value(), "Optional delimiter character for delimited data formats - default is '\t' (tab separated)") ("lengthEncodedInput", "Take input in length encoded binary format - default is delimited") - ("timefield", boost::program_options::value(), - "Optional name of the field containing the timestamp - default is 'time'") ("timeformat", boost::program_options::value(), "Optional format of the date in the time field in strptime code - default is the epoch time in seconds") ("quantilesState", boost::program_options::value(), @@ -107,31 +88,19 @@ bool CCmdLineParser::parse(int argc, ("persist", boost::program_options::value(), "Optional file to persist state to - not present means no state persistence") ("persistIsPipe", "Specified persist file is a named pipe") - ("persistInterval", boost::program_options::value(), - "Optional time interval at which to periodically persist model state (Mutually exclusive with bucketPersistInterval)") ("persistInForeground", "Persistence occurs in the foreground. Defaults to background persistence.") ("bucketPersistInterval", boost::program_options::value(), - "Optional number of buckets after which to periodically persist model state (Mutually exclusive with persistInterval)") - ("maxQuantileInterval", boost::program_options::value(), - "Optional interval at which to periodically output quantiles if they have not been output due to an anomaly - if not specified then quantiles will only be output following a big anomaly") + "Optional number of buckets after which to periodically persist model state.") ("maxAnomalyRecords", boost::program_options::value(), "The maximum number of records to be outputted for each bucket. Defaults to 100, a value 0 removes the limit.") ("memoryUsage", "Log the model memory usage at the end of the job") - ("multivariateByFields", - "Optional flag to enable multi-variate analysis of correlated by fields") - ("stopCategorizationOnWarnStatus", - "Optional flag to stop categorization for partitions where the status is 'warn'.") ; // clang-format on - boost::program_options::variables_map vm; - boost::program_options::parsed_options parsed = - boost::program_options::command_line_parser(argc, argv) - .options(desc) - .allow_unregistered() - .run(); - boost::program_options::store(parsed, vm); + boost::program_options::store( + boost::program_options::parse_command_line(argc, argv, desc), vm); + boost::program_options::notify(vm); if (vm.count("help") > 0) { std::cerr << desc << std::endl; @@ -169,9 +138,6 @@ bool CCmdLineParser::parse(int argc, if (vm.count("lengthEncodedInput") > 0) { lengthEncodedInput = true; } - if (vm.count("timefield") > 0) { - timeField = vm["timefield"].as(); - } if (vm.count("timeformat") > 0) { timeFormat = vm["timeformat"].as(); } @@ -220,13 +186,6 @@ bool CCmdLineParser::parse(int argc, if (vm.count("memoryUsage") > 0) { memoryUsage = true; } - if (vm.count("stopCategorizationOnWarnStatus") > 0) { - stopCategorizationOnWarnStatus = true; - } - - boost::program_options::collect_unrecognized( - parsed.options, boost::program_options::include_positional) - .swap(clauseTokens); } catch (std::exception& e) { std::cerr << "Error processing command line: " << e.what() << std::endl; return false; diff --git a/bin/autodetect/CCmdLineParser.h b/bin/autodetect/CCmdLineParser.h index db87e7947a..eb74eb14cc 100644 --- a/bin/autodetect/CCmdLineParser.h +++ b/bin/autodetect/CCmdLineParser.h @@ -37,12 +37,11 @@ class CCmdLineParser { std::string& config, std::string& filtersConfig, std::string& eventsConfig, - std::string& modelPlotConfigFile, + std::string& modelConfigFile, std::string& logProperties, std::string& logPipe, char& delimiter, bool& lengthEncodedInput, - std::string& timeField, std::string& timeFormat, std::string& quantilesState, bool& deleteStateFiles, @@ -58,9 +57,7 @@ class CCmdLineParser { bool& isPersistFileNamedPipe, bool& isPersistInForeground, std::size_t& maxAnomalyRecords, - bool& memoryUsage, - bool& stopCategorizationOnWarnStatus, - TStrVec& clauseTokens); + bool& memoryUsage); private: static const std::string DESCRIPTION; diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index c1f2bdaccb..ce794cc661 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -92,7 +92,6 @@ int main(int argc, char** argv) { std::string logPipe; char delimiter{'\t'}; bool lengthEncodedInput{false}; - std::string timeField{ml::api::CAnomalyJob::DEFAULT_TIME_FIELD_NAME}; std::string timeFormat; std::string quantilesStateFile; bool deleteStateFiles{false}; @@ -110,16 +109,14 @@ int main(int argc, char** argv) { bool isPersistInForeground{false}; std::size_t maxAnomalyRecords{100}; bool memoryUsage{false}; - bool stopCategorizationOnWarnStatus{false}; - TStrVec clauseTokens; if (ml::autodetect::CCmdLineParser::parse( - argc, argv, configFile, filtersConfigFile, eventsConfigFile, modelConfigFile, - logProperties, logPipe, delimiter, lengthEncodedInput, timeField, + argc, argv, configFile, filtersConfigFile, eventsConfigFile, + modelConfigFile, logProperties, logPipe, delimiter, lengthEncodedInput, timeFormat, quantilesStateFile, deleteStateFiles, bucketPersistInterval, - namedPipeConnectTimeout, inputFileName, isInputFileNamedPipe, outputFileName, - isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe, persistFileName, - isPersistFileNamedPipe, isPersistInForeground, maxAnomalyRecords, - memoryUsage, stopCategorizationOnWarnStatus, clauseTokens) == false) { + namedPipeConnectTimeout, inputFileName, isInputFileNamedPipe, + outputFileName, isOutputFileNamedPipe, restoreFileName, + isRestoreFileNamedPipe, persistFileName, isPersistFileNamedPipe, + isPersistInForeground, maxAnomalyRecords, memoryUsage) == false) { return EXIT_FAILURE; } diff --git a/include/api/CAnomalyJobConfig.h b/include/api/CAnomalyJobConfig.h index 248e23c6d5..6e1c9fec08 100644 --- a/include/api/CAnomalyJobConfig.h +++ b/include/api/CAnomalyJobConfig.h @@ -219,12 +219,6 @@ class API_EXPORT CAnomalyJobConfig { CAnalysisConfig(const std::string& categorizationFieldName) : m_CategorizationFieldName{categorizationFieldName} {} - //! Constructor taking a map of detector rule filters keyed by filter_id & - //! a vector of scheduled events data - CAnalysisConfig(const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters, - const TStrDetectionRulePrVec& scheduledEvents) - : m_RuleFilters(ruleFilters), m_ScheduledEvents(scheduledEvents) {} - void init(const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters, const TStrDetectionRulePrVec& scheduledEvents) { m_RuleFilters = ruleFilters; @@ -488,12 +482,6 @@ class API_EXPORT CAnomalyJobConfig { explicit CAnomalyJobConfig(const std::string& categorizationFieldName) : m_AnalysisConfig(categorizationFieldName) {} - // This one is only needed for historical reasons. Once The Java side sends the - // scheduled events and filters config as JSON this can go. - CAnomalyJobConfig(const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters, - const TStrDetectionRulePrVec& scheduledEvents) - : m_AnalysisConfig(ruleFilters, scheduledEvents) {} - bool readFile(const std::string& fileName, std::string& fileContents); bool initFromFile(const std::string& configFile); diff --git a/lib/api/unittest/CAnomalyJobConfigTest.cc b/lib/api/unittest/CAnomalyJobConfigTest.cc index 0f6d70cb94..2053a4e7f8 100644 --- a/lib/api/unittest/CAnomalyJobConfigTest.cc +++ b/lib/api/unittest/CAnomalyJobConfigTest.cc @@ -1040,9 +1040,18 @@ BOOST_AUTO_TEST_CASE(testParse) { BOOST_TEST_REQUIRE(!jobConfigEmptyFilterMap.isInitialized()); // Expect parsing to succeed if the filter referenced by the custom rule can be found in the filter map. - ml::api::CDetectionRulesJsonParser::TStrPatternSetUMap filterMap{{"safe_ips", {}}}; - ml::api::CAnomalyJobConfig::TStrDetectionRulePrVec scheduledEvents{}; - ml::api::CAnomalyJobConfig jobConfig(filterMap, scheduledEvents); + const std::string filterConfigJson{"{\"filters\":[{\"filter_id\":\"safe_ips\",\"items\":[]}]}"}; + ml::api::CAnomalyJobConfig jobConfig; + BOOST_TEST_REQUIRE(jobConfig.parseFilterConfig(filterConfigJson)); + + const std::string validScheduledEventsConfigJson{"{\"events\":[" + "]}"}; + + BOOST_TEST_REQUIRE(jobConfig.parseEventConfig(validScheduledEventsConfigJson)); + + jobConfig.analysisConfig().init(jobConfig.ruleFilters(), + jobConfig.scheduledEvents()); + BOOST_REQUIRE_MESSAGE(jobConfig.parse(validAnomalyJobConfigWithCustomRuleFilter), "Cannot parse JSON job config!"); BOOST_TEST_REQUIRE(jobConfig.isInitialized());