Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Remove redundant autodetect options #1663

Merged
merged 3 commits into from
Jan 13, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 9 additions & 50 deletions bin/autodetect/CCmdLineParser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
namespace ml {
namespace autodetect {

const std::string CCmdLineParser::DESCRIPTION = "Usage: autodetect [options] [<fieldname>+ [by <fieldname>]]\n"
const std::string CCmdLineParser::DESCRIPTION = "Usage: autodetect [options]]\n"
"Options:";

bool CCmdLineParser::parse(int argc,
Expand All @@ -30,7 +30,6 @@ bool CCmdLineParser::parse(int argc,
std::string& logPipe,
char& delimiter,
bool& lengthEncodedInput,
std::string& timeField,
std::string& timeFormat,
std::string& quantilesState,
bool& deleteStateFiles,
Expand All @@ -46,47 +45,29 @@ bool CCmdLineParser::parse(int argc,
bool& isPersistFileNamedPipe,
bool& isPersistInForeground,
std::size_t& maxAnomalyRecords,
bool& memoryUsage,
bool& stopCategorizationOnWarnStatus,
TStrVec& clauseTokens) {
bool& memoryUsage) {
try {
boost::program_options::options_description desc(DESCRIPTION);
// clang-format off
desc.add_options()
("help", "Display this information and exit")
("version", "Display version information and exit")
("config", boost::program_options::value<std::string>(),
("config", boost::program_options::value<std::string>()->required(),
"The job configuration file")
("filtersconfig", boost::program_options::value<std::string>(),
"The filters configuration file")
"The filters configuration file")
("eventsconfig", boost::program_options::value<std::string>(),
"The scheduled events configuration file")
("limitconfig", boost::program_options::value<std::string>(),
"Optional limit config file")
"The scheduled events configuration file")
("modelconfig", boost::program_options::value<std::string>(),
"Optional model config file")
("fieldconfig", boost::program_options::value<std::string>(),
"Optional field config file")
("modelplotconfig", boost::program_options::value<std::string>(),
"Optional model plot config file")
("jobid", boost::program_options::value<std::string>(),
"ID of the job this process is associated with")
("logProperties", boost::program_options::value<std::string>(),
"Optional logger properties file")
("logPipe", boost::program_options::value<std::string>(),
"Optional log to named pipe")
("bucketspan", boost::program_options::value<core_t::TTime>(),
"Optional aggregation bucket span (in seconds) - default is 300")
("latency", boost::program_options::value<core_t::TTime>(),
"Optional maximum delay for out-of-order records (in seconds) - default is 0")
("summarycountfield", boost::program_options::value<std::string>(),
"Optional field to that contains counts for pre-summarized input - default is none")
("delimiter", boost::program_options::value<char>(),
"Optional delimiter character for delimited data formats - default is '\t' (tab separated)")
("lengthEncodedInput",
"Take input in length encoded binary format - default is delimited")
("timefield", boost::program_options::value<std::string>(),
"Optional name of the field containing the timestamp - default is 'time'")
("timeformat", boost::program_options::value<std::string>(),
"Optional format of the date in the time field in strptime code - default is the epoch time in seconds")
("quantilesState", boost::program_options::value<std::string>(),
Expand All @@ -107,31 +88,19 @@ bool CCmdLineParser::parse(int argc,
("persist", boost::program_options::value<std::string>(),
"Optional file to persist state to - not present means no state persistence")
("persistIsPipe", "Specified persist file is a named pipe")
("persistInterval", boost::program_options::value<core_t::TTime>(),
"Optional time interval at which to periodically persist model state (Mutually exclusive with bucketPersistInterval)")
("persistInForeground", "Persistence occurs in the foreground. Defaults to background persistence.")
("bucketPersistInterval", boost::program_options::value<std::size_t>(),
"Optional number of buckets after which to periodically persist model state (Mutually exclusive with persistInterval)")
("maxQuantileInterval", boost::program_options::value<core_t::TTime>(),
"Optional interval at which to periodically output quantiles if they have not been output due to an anomaly - if not specified then quantiles will only be output following a big anomaly")
"Optional number of buckets after which to periodically persist model state.")
edsavage marked this conversation as resolved.
Show resolved Hide resolved
("maxAnomalyRecords", boost::program_options::value<std::size_t>(),
"The maximum number of records to be outputted for each bucket. Defaults to 100, a value 0 removes the limit.")
("memoryUsage",
"Log the model memory usage at the end of the job")
("multivariateByFields",
"Optional flag to enable multi-variate analysis of correlated by fields")
("stopCategorizationOnWarnStatus",
"Optional flag to stop categorization for partitions where the status is 'warn'.")
;
// clang-format on

boost::program_options::variables_map vm;
boost::program_options::parsed_options parsed =
boost::program_options::command_line_parser(argc, argv)
.options(desc)
.allow_unregistered()
.run();
boost::program_options::store(parsed, vm);
boost::program_options::store(
boost::program_options::parse_command_line(argc, argv, desc), vm);
boost::program_options::notify(vm);

if (vm.count("help") > 0) {
std::cerr << desc << std::endl;
Expand Down Expand Up @@ -169,9 +138,6 @@ bool CCmdLineParser::parse(int argc,
if (vm.count("lengthEncodedInput") > 0) {
lengthEncodedInput = true;
}
if (vm.count("timefield") > 0) {
timeField = vm["timefield"].as<std::string>();
}
if (vm.count("timeformat") > 0) {
timeFormat = vm["timeformat"].as<std::string>();
}
Expand Down Expand Up @@ -220,13 +186,6 @@ bool CCmdLineParser::parse(int argc,
if (vm.count("memoryUsage") > 0) {
memoryUsage = true;
}
if (vm.count("stopCategorizationOnWarnStatus") > 0) {
stopCategorizationOnWarnStatus = true;
}

boost::program_options::collect_unrecognized(
parsed.options, boost::program_options::include_positional)
.swap(clauseTokens);
} catch (std::exception& e) {
std::cerr << "Error processing command line: " << e.what() << std::endl;
return false;
Expand Down
7 changes: 2 additions & 5 deletions bin/autodetect/CCmdLineParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,11 @@ class CCmdLineParser {
std::string& config,
std::string& filtersConfig,
std::string& eventsConfig,
std::string& modelPlotConfigFile,
std::string& modelConfigFile,
std::string& logProperties,
std::string& logPipe,
char& delimiter,
bool& lengthEncodedInput,
std::string& timeField,
std::string& timeFormat,
std::string& quantilesState,
bool& deleteStateFiles,
Expand All @@ -58,9 +57,7 @@ class CCmdLineParser {
bool& isPersistFileNamedPipe,
bool& isPersistInForeground,
std::size_t& maxAnomalyRecords,
bool& memoryUsage,
bool& stopCategorizationOnWarnStatus,
TStrVec& clauseTokens);
bool& memoryUsage);

private:
static const std::string DESCRIPTION;
Expand Down
16 changes: 7 additions & 9 deletions bin/autodetect/Main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ int main(int argc, char** argv) {
std::string logPipe;
char delimiter{'\t'};
bool lengthEncodedInput{false};
std::string timeField{ml::api::CAnomalyJob::DEFAULT_TIME_FIELD_NAME};
std::string timeFormat;
std::string quantilesStateFile;
bool deleteStateFiles{false};
Expand All @@ -110,16 +109,15 @@ int main(int argc, char** argv) {
bool isPersistInForeground{false};
std::size_t maxAnomalyRecords{100};
bool memoryUsage{false};
bool stopCategorizationOnWarnStatus{false};
TStrVec clauseTokens;
TStrVec unknownTokens;
edsavage marked this conversation as resolved.
Show resolved Hide resolved
if (ml::autodetect::CCmdLineParser::parse(
argc, argv, configFile, filtersConfigFile, eventsConfigFile, modelConfigFile,
logProperties, logPipe, delimiter, lengthEncodedInput, timeField,
argc, argv, configFile, filtersConfigFile, eventsConfigFile,
modelConfigFile, logProperties, logPipe, delimiter, lengthEncodedInput,
timeFormat, quantilesStateFile, deleteStateFiles, bucketPersistInterval,
namedPipeConnectTimeout, inputFileName, isInputFileNamedPipe, outputFileName,
isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe, persistFileName,
isPersistFileNamedPipe, isPersistInForeground, maxAnomalyRecords,
memoryUsage, stopCategorizationOnWarnStatus, clauseTokens) == false) {
namedPipeConnectTimeout, inputFileName, isInputFileNamedPipe,
outputFileName, isOutputFileNamedPipe, restoreFileName,
isRestoreFileNamedPipe, persistFileName, isPersistFileNamedPipe,
isPersistInForeground, maxAnomalyRecords, memoryUsage) == false) {
return EXIT_FAILURE;
}

Expand Down
12 changes: 0 additions & 12 deletions include/api/CAnomalyJobConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,12 +219,6 @@ class API_EXPORT CAnomalyJobConfig {
CAnalysisConfig(const std::string& categorizationFieldName)
: m_CategorizationFieldName{categorizationFieldName} {}

//! Constructor taking a map of detector rule filters keyed by filter_id &
//! a vector of scheduled events data
CAnalysisConfig(const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters,
const TStrDetectionRulePrVec& scheduledEvents)
: m_RuleFilters(ruleFilters), m_ScheduledEvents(scheduledEvents) {}

void init(const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters,
const TStrDetectionRulePrVec& scheduledEvents) {
m_RuleFilters = ruleFilters;
Expand Down Expand Up @@ -488,12 +482,6 @@ class API_EXPORT CAnomalyJobConfig {
explicit CAnomalyJobConfig(const std::string& categorizationFieldName)
: m_AnalysisConfig(categorizationFieldName) {}

// This one is only needed for historical reasons. Once The Java side sends the
// scheduled events and filters config as JSON this can go.
CAnomalyJobConfig(const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters,
const TStrDetectionRulePrVec& scheduledEvents)
: m_AnalysisConfig(ruleFilters, scheduledEvents) {}

bool readFile(const std::string& fileName, std::string& fileContents);

bool initFromFile(const std::string& configFile);
Expand Down
15 changes: 12 additions & 3 deletions lib/api/unittest/CAnomalyJobConfigTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1040,9 +1040,18 @@ BOOST_AUTO_TEST_CASE(testParse) {
BOOST_TEST_REQUIRE(!jobConfigEmptyFilterMap.isInitialized());

// Expect parsing to succeed if the filter referenced by the custom rule can be found in the filter map.
ml::api::CDetectionRulesJsonParser::TStrPatternSetUMap filterMap{{"safe_ips", {}}};
ml::api::CAnomalyJobConfig::TStrDetectionRulePrVec scheduledEvents{};
ml::api::CAnomalyJobConfig jobConfig(filterMap, scheduledEvents);
const std::string filterConfigJson{"{\"filters\":[{\"filter_id\":\"safe_ips\",\"items\":[]}]}"};
ml::api::CAnomalyJobConfig jobConfig;
BOOST_TEST_REQUIRE(jobConfig.parseFilterConfig(filterConfigJson));

const std::string validScheduledEventsConfigJson{"{\"events\":["
"]}"};

BOOST_TEST_REQUIRE(jobConfig.parseEventConfig(validScheduledEventsConfigJson));

jobConfig.analysisConfig().init(jobConfig.ruleFilters(),
jobConfig.scheduledEvents());

BOOST_REQUIRE_MESSAGE(jobConfig.parse(validAnomalyJobConfigWithCustomRuleFilter),
"Cannot parse JSON job config!");
BOOST_TEST_REQUIRE(jobConfig.isInitialized());
Expand Down