Skip to content

Commit

Permalink
[7.x][ML] Remove redundant autodetect options (#1663) (#1666)
Browse files Browse the repository at this point in the history
Remove the now redundant autodetect command line options and
exit with error message on seeing unknown command line arguments

Relates #1253
Backports #1663
  • Loading branch information
edsavage authored Jan 13, 2021
1 parent a043c74 commit 3bc3e3a
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 79 deletions.
59 changes: 9 additions & 50 deletions bin/autodetect/CCmdLineParser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
namespace ml {
namespace autodetect {

const std::string CCmdLineParser::DESCRIPTION = "Usage: autodetect [options] [<fieldname>+ [by <fieldname>]]\n"
const std::string CCmdLineParser::DESCRIPTION = "Usage: autodetect [options]]\n"
"Options:";

bool CCmdLineParser::parse(int argc,
Expand All @@ -30,7 +30,6 @@ bool CCmdLineParser::parse(int argc,
std::string& logPipe,
char& delimiter,
bool& lengthEncodedInput,
std::string& timeField,
std::string& timeFormat,
std::string& quantilesState,
bool& deleteStateFiles,
Expand All @@ -46,47 +45,29 @@ bool CCmdLineParser::parse(int argc,
bool& isPersistFileNamedPipe,
bool& isPersistInForeground,
std::size_t& maxAnomalyRecords,
bool& memoryUsage,
bool& stopCategorizationOnWarnStatus,
TStrVec& clauseTokens) {
bool& memoryUsage) {
try {
boost::program_options::options_description desc(DESCRIPTION);
// clang-format off
desc.add_options()
("help", "Display this information and exit")
("version", "Display version information and exit")
("config", boost::program_options::value<std::string>(),
("config", boost::program_options::value<std::string>()->required(),
"The job configuration file")
("filtersconfig", boost::program_options::value<std::string>(),
"The filters configuration file")
"The filters configuration file")
("eventsconfig", boost::program_options::value<std::string>(),
"The scheduled events configuration file")
("limitconfig", boost::program_options::value<std::string>(),
"Optional limit config file")
"The scheduled events configuration file")
("modelconfig", boost::program_options::value<std::string>(),
"Optional model config file")
("fieldconfig", boost::program_options::value<std::string>(),
"Optional field config file")
("modelplotconfig", boost::program_options::value<std::string>(),
"Optional model plot config file")
("jobid", boost::program_options::value<std::string>(),
"ID of the job this process is associated with")
("logProperties", boost::program_options::value<std::string>(),
"Optional logger properties file")
("logPipe", boost::program_options::value<std::string>(),
"Optional log to named pipe")
("bucketspan", boost::program_options::value<core_t::TTime>(),
"Optional aggregation bucket span (in seconds) - default is 300")
("latency", boost::program_options::value<core_t::TTime>(),
"Optional maximum delay for out-of-order records (in seconds) - default is 0")
("summarycountfield", boost::program_options::value<std::string>(),
"Optional field to that contains counts for pre-summarized input - default is none")
("delimiter", boost::program_options::value<char>(),
"Optional delimiter character for delimited data formats - default is '\t' (tab separated)")
("lengthEncodedInput",
"Take input in length encoded binary format - default is delimited")
("timefield", boost::program_options::value<std::string>(),
"Optional name of the field containing the timestamp - default is 'time'")
("timeformat", boost::program_options::value<std::string>(),
"Optional format of the date in the time field in strptime code - default is the epoch time in seconds")
("quantilesState", boost::program_options::value<std::string>(),
Expand All @@ -107,31 +88,19 @@ bool CCmdLineParser::parse(int argc,
("persist", boost::program_options::value<std::string>(),
"Optional file to persist state to - not present means no state persistence")
("persistIsPipe", "Specified persist file is a named pipe")
("persistInterval", boost::program_options::value<core_t::TTime>(),
"Optional time interval at which to periodically persist model state (Mutually exclusive with bucketPersistInterval)")
("persistInForeground", "Persistence occurs in the foreground. Defaults to background persistence.")
("bucketPersistInterval", boost::program_options::value<std::size_t>(),
"Optional number of buckets after which to periodically persist model state (Mutually exclusive with persistInterval)")
("maxQuantileInterval", boost::program_options::value<core_t::TTime>(),
"Optional interval at which to periodically output quantiles if they have not been output due to an anomaly - if not specified then quantiles will only be output following a big anomaly")
"Optional number of buckets after which to periodically persist model state.")
("maxAnomalyRecords", boost::program_options::value<std::size_t>(),
"The maximum number of records to be outputted for each bucket. Defaults to 100, a value 0 removes the limit.")
("memoryUsage",
"Log the model memory usage at the end of the job")
("multivariateByFields",
"Optional flag to enable multi-variate analysis of correlated by fields")
("stopCategorizationOnWarnStatus",
"Optional flag to stop categorization for partitions where the status is 'warn'.")
;
// clang-format on

boost::program_options::variables_map vm;
boost::program_options::parsed_options parsed =
boost::program_options::command_line_parser(argc, argv)
.options(desc)
.allow_unregistered()
.run();
boost::program_options::store(parsed, vm);
boost::program_options::store(
boost::program_options::parse_command_line(argc, argv, desc), vm);
boost::program_options::notify(vm);

if (vm.count("help") > 0) {
std::cerr << desc << std::endl;
Expand Down Expand Up @@ -169,9 +138,6 @@ bool CCmdLineParser::parse(int argc,
if (vm.count("lengthEncodedInput") > 0) {
lengthEncodedInput = true;
}
if (vm.count("timefield") > 0) {
timeField = vm["timefield"].as<std::string>();
}
if (vm.count("timeformat") > 0) {
timeFormat = vm["timeformat"].as<std::string>();
}
Expand Down Expand Up @@ -220,13 +186,6 @@ bool CCmdLineParser::parse(int argc,
if (vm.count("memoryUsage") > 0) {
memoryUsage = true;
}
if (vm.count("stopCategorizationOnWarnStatus") > 0) {
stopCategorizationOnWarnStatus = true;
}

boost::program_options::collect_unrecognized(
parsed.options, boost::program_options::include_positional)
.swap(clauseTokens);
} catch (std::exception& e) {
std::cerr << "Error processing command line: " << e.what() << std::endl;
return false;
Expand Down
7 changes: 2 additions & 5 deletions bin/autodetect/CCmdLineParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,11 @@ class CCmdLineParser {
std::string& config,
std::string& filtersConfig,
std::string& eventsConfig,
std::string& modelPlotConfigFile,
std::string& modelConfigFile,
std::string& logProperties,
std::string& logPipe,
char& delimiter,
bool& lengthEncodedInput,
std::string& timeField,
std::string& timeFormat,
std::string& quantilesState,
bool& deleteStateFiles,
Expand All @@ -58,9 +57,7 @@ class CCmdLineParser {
bool& isPersistFileNamedPipe,
bool& isPersistInForeground,
std::size_t& maxAnomalyRecords,
bool& memoryUsage,
bool& stopCategorizationOnWarnStatus,
TStrVec& clauseTokens);
bool& memoryUsage);

private:
static const std::string DESCRIPTION;
Expand Down
15 changes: 6 additions & 9 deletions bin/autodetect/Main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ int main(int argc, char** argv) {
std::string logPipe;
char delimiter{'\t'};
bool lengthEncodedInput{false};
std::string timeField{ml::api::CAnomalyJob::DEFAULT_TIME_FIELD_NAME};
std::string timeFormat;
std::string quantilesStateFile;
bool deleteStateFiles{false};
Expand All @@ -110,16 +109,14 @@ int main(int argc, char** argv) {
bool isPersistInForeground{false};
std::size_t maxAnomalyRecords{100};
bool memoryUsage{false};
bool stopCategorizationOnWarnStatus{false};
TStrVec clauseTokens;
if (ml::autodetect::CCmdLineParser::parse(
argc, argv, configFile, filtersConfigFile, eventsConfigFile, modelConfigFile,
logProperties, logPipe, delimiter, lengthEncodedInput, timeField,
argc, argv, configFile, filtersConfigFile, eventsConfigFile,
modelConfigFile, logProperties, logPipe, delimiter, lengthEncodedInput,
timeFormat, quantilesStateFile, deleteStateFiles, bucketPersistInterval,
namedPipeConnectTimeout, inputFileName, isInputFileNamedPipe, outputFileName,
isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe, persistFileName,
isPersistFileNamedPipe, isPersistInForeground, maxAnomalyRecords,
memoryUsage, stopCategorizationOnWarnStatus, clauseTokens) == false) {
namedPipeConnectTimeout, inputFileName, isInputFileNamedPipe,
outputFileName, isOutputFileNamedPipe, restoreFileName,
isRestoreFileNamedPipe, persistFileName, isPersistFileNamedPipe,
isPersistInForeground, maxAnomalyRecords, memoryUsage) == false) {
return EXIT_FAILURE;
}

Expand Down
12 changes: 0 additions & 12 deletions include/api/CAnomalyJobConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,12 +219,6 @@ class API_EXPORT CAnomalyJobConfig {
CAnalysisConfig(const std::string& categorizationFieldName)
: m_CategorizationFieldName{categorizationFieldName} {}

//! Constructor taking a map of detector rule filters keyed by filter_id &
//! a vector of scheduled events data
CAnalysisConfig(const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters,
const TStrDetectionRulePrVec& scheduledEvents)
: m_RuleFilters(ruleFilters), m_ScheduledEvents(scheduledEvents) {}

void init(const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters,
const TStrDetectionRulePrVec& scheduledEvents) {
m_RuleFilters = ruleFilters;
Expand Down Expand Up @@ -488,12 +482,6 @@ class API_EXPORT CAnomalyJobConfig {
explicit CAnomalyJobConfig(const std::string& categorizationFieldName)
: m_AnalysisConfig(categorizationFieldName) {}

// This one is only needed for historical reasons. Once The Java side sends the
// scheduled events and filters config as JSON this can go.
CAnomalyJobConfig(const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters,
const TStrDetectionRulePrVec& scheduledEvents)
: m_AnalysisConfig(ruleFilters, scheduledEvents) {}

bool readFile(const std::string& fileName, std::string& fileContents);

bool initFromFile(const std::string& configFile);
Expand Down
15 changes: 12 additions & 3 deletions lib/api/unittest/CAnomalyJobConfigTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1040,9 +1040,18 @@ BOOST_AUTO_TEST_CASE(testParse) {
BOOST_TEST_REQUIRE(!jobConfigEmptyFilterMap.isInitialized());

// Expect parsing to succeed if the filter referenced by the custom rule can be found in the filter map.
ml::api::CDetectionRulesJsonParser::TStrPatternSetUMap filterMap{{"safe_ips", {}}};
ml::api::CAnomalyJobConfig::TStrDetectionRulePrVec scheduledEvents{};
ml::api::CAnomalyJobConfig jobConfig(filterMap, scheduledEvents);
const std::string filterConfigJson{"{\"filters\":[{\"filter_id\":\"safe_ips\",\"items\":[]}]}"};
ml::api::CAnomalyJobConfig jobConfig;
BOOST_TEST_REQUIRE(jobConfig.parseFilterConfig(filterConfigJson));

const std::string validScheduledEventsConfigJson{"{\"events\":["
"]}"};

BOOST_TEST_REQUIRE(jobConfig.parseEventConfig(validScheduledEventsConfigJson));

jobConfig.analysisConfig().init(jobConfig.ruleFilters(),
jobConfig.scheduledEvents());

BOOST_REQUIRE_MESSAGE(jobConfig.parse(validAnomalyJobConfigWithCustomRuleFilter),
"Cannot parse JSON job config!");
BOOST_TEST_REQUIRE(jobConfig.isInitialized());
Expand Down

0 comments on commit 3bc3e3a

Please sign in to comment.