Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[7.x][ML] Initialize detectors parser with filters map (#1562) #1564

Merged
merged 1 commit into from
Nov 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 16 additions & 15 deletions bin/autodetect/Main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -175,21 +175,6 @@ int main(int argc, char** argv) {
// hence is done before reducing CPU priority.
ml::core::CProcessPriority::reduceCpuPriority();

std::string anomalyJobConfigJson;
bool couldReadConfigFile;
std::tie(anomalyJobConfigJson, couldReadConfigFile) =
ml::core::CStringUtils::readFileToString(configFile);
if (couldReadConfigFile == false) {
LOG_FATAL(<< "Failed to read config file '" << configFile << "'");
return EXIT_FAILURE;
}

ml::api::CAnomalyJobConfig jobConfig;
if (jobConfig.parse(anomalyJobConfigJson) == false) {
LOG_FATAL(<< "Failed to parse anomaly job config: '" << anomalyJobConfigJson << "'");
return EXIT_FAILURE;
}

ml::model::CLimits limits{isPersistInForeground};
if (!limitConfigFile.empty() && limits.init(limitConfigFile) == false) {
LOG_FATAL(<< "ML limit config file '" << limitConfigFile << "' could not be loaded");
Expand All @@ -203,6 +188,22 @@ int main(int argc, char** argv) {
return EXIT_FAILURE;
}

std::string anomalyJobConfigJson;
bool couldReadConfigFile;
std::tie(anomalyJobConfigJson, couldReadConfigFile) =
ml::core::CStringUtils::readFileToString(configFile);
if (couldReadConfigFile == false) {
LOG_FATAL(<< "Failed to read config file '" << configFile << "'");
return EXIT_FAILURE;
}
// For now we need to reference the rule filters parsed by the old-style
// field config.
ml::api::CAnomalyJobConfig jobConfig{fieldConfig.ruleFilters()};
if (jobConfig.parse(anomalyJobConfigJson) == false) {
LOG_FATAL(<< "Failed to parse anomaly job config: '" << anomalyJobConfigJson << "'");
return EXIT_FAILURE;
}

bool doingCategorization{fieldConfig.fieldNameSuperset().count(
ml::api::CFieldDataCategorizer::MLCATEGORY_NAME) > 0};
TStrVec mutableFields;
Expand Down
15 changes: 13 additions & 2 deletions include/api/CAnomalyJobConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class API_EXPORT CAnomalyJobConfig {
public:
CDetectorConfig() {}

void parse(const rapidjson::Value& json);
void parse(const rapidjson::Value& detectorConfig,
const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters);

std::string function() const { return m_Function; }
std::string fieldName() const { return m_FieldName; }
Expand Down Expand Up @@ -90,8 +91,13 @@ class API_EXPORT CAnomalyJobConfig {
using TDetectorConfigVec = std::vector<CDetectorConfig>;

public:
//! Default constructor
CAnalysisConfig() {}

//! Constructor taking a map of detector rule filters keyed by filter_id.
explicit CAnalysisConfig(const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters)
: m_RuleFilters(ruleFilters) {}

void parse(const rapidjson::Value& json);

core_t::TTime bucketSpan() const { return m_BucketSpan; }
Expand Down Expand Up @@ -120,7 +126,7 @@ class API_EXPORT CAnomalyJobConfig {
static core_t::TTime bucketSpanSeconds(const std::string& bucketSpanString);

private:
std::size_t m_BucketSpan{300}; // 5m
core_t::TTime m_BucketSpan{DEFAULT_BUCKET_SPAN};
std::string m_SummaryCountFieldName{};
std::string m_CategorizationFieldName{};
TStrVec m_CategorizationFilters{};
Expand All @@ -129,6 +135,9 @@ class API_EXPORT CAnomalyJobConfig {
TDetectorConfigVec m_Detectors{};
TStrVec m_Influencers{};
std::string m_Latency{};

//! The filters per id used by categorical rule conditions.
CDetectionRulesJsonParser::TStrPatternSetUMap m_RuleFilters{};
};

class API_EXPORT CDataDescription {
Expand Down Expand Up @@ -218,6 +227,8 @@ class API_EXPORT CAnomalyJobConfig {
public:
//! Default constructor
CAnomalyJobConfig() {}
explicit CAnomalyJobConfig(const CDetectionRulesJsonParser::TStrPatternSetUMap& rulesFilter)
: m_AnalysisConfig(rulesFilter) {}

bool parse(const std::string& json);

Expand Down
2 changes: 1 addition & 1 deletion include/api/CAnomalyJobConfigReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ class API_EXPORT CAnomalyJobConfigReader {

private:
CParameter(const std::string& name, SArrayElementTag);
void handleFatal() const;
[[noreturn]] void handleFatal() const;

private:
std::string m_Name;
Expand Down
4 changes: 2 additions & 2 deletions include/api/CDetectionRulesJsonParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class API_EXPORT CDetectionRulesJsonParser {

public:
//! Default constructor
CDetectionRulesJsonParser(TStrPatternSetUMap& filtersByIdMap);
CDetectionRulesJsonParser(const TStrPatternSetUMap& filtersByIdMap);

//! Parses a string expected to contain a JSON array with
//! detection rules and adds the rule objects into the given vector.
Expand Down Expand Up @@ -62,7 +62,7 @@ class API_EXPORT CDetectionRulesJsonParser {

private:
//! The filters per id used by categorical rule conditions.
TStrPatternSetUMap& m_FiltersByIdMap;
const TStrPatternSetUMap& m_FiltersByIdMap;
};
}
}
Expand Down
20 changes: 8 additions & 12 deletions lib/api/CAnomalyJobConfig.cc
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ void CAnomalyJobConfig::CAnalysisConfig::parse(const rapidjson::Value& analysisC
if (detectorsConfig != nullptr && detectorsConfig->IsArray()) {
m_Detectors.resize(detectorsConfig->Size());
for (std::size_t i = 0; i < detectorsConfig->Size(); ++i) {
m_Detectors[i].parse((*detectorsConfig)[static_cast<int>(i)]);
m_Detectors[i].parse((*detectorsConfig)[static_cast<int>(i)], m_RuleFilters);
}
}

Expand All @@ -321,7 +321,9 @@ core_t::TTime CAnomalyJobConfig::CAnalysisConfig::bucketSpanSeconds(const std::s
return bucketSpanSeconds;
}

void CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::parse(const rapidjson::Value& detectorConfig) {
void CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::parse(
const rapidjson::Value& detectorConfig,
const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters) {
auto parameters = DETECTOR_CONFIG_READER.read(detectorConfig);

m_Function = parameters[FUNCTION].as<std::string>();
Expand All @@ -336,17 +338,11 @@ void CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::parse(const rapidjson:
auto customRules = parameters[CUSTOM_RULES].jsonObject();
if (customRules != nullptr) {
std::string errorString;
using TStrPatternSetUMap = CDetectionRulesJsonParser::TStrPatternSetUMap;
// TODO How to populate the filters by Id map passed to the detection rules JSON parser?
// Pass an empty map as a placeholder
TStrPatternSetUMap emptyMap{};
CDetectionRulesJsonParser rulesParser(emptyMap);
CDetectionRulesJsonParser rulesParser(ruleFilters);
if (rulesParser.parseRules(*customRules, m_CustomRules, errorString) == false) {
// TODO Re-enable this error handling when the CDetectionRulesJsonParser instance
// has been constructed with a valid filter map.
// LOG_ERROR(<< errorString << toString(*customRules));
// throw CAnomalyJobConfigReader::CParseError(
// "Error parsing custom rules: " + toString(*customRules));
LOG_ERROR(<< errorString << toString(*customRules));
throw CAnomalyJobConfigReader::CParseError(
"Error parsing custom rules: " + toString(*customRules));
}
}

Expand Down
5 changes: 0 additions & 5 deletions lib/api/CAnomalyJobConfigReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ bool CAnomalyJobConfigReader::CParameter::fallback(bool value) const {
}
if (m_Value->IsBool() == false) {
this->handleFatal();
return value;
}
return m_Value->GetBool();
}
Expand All @@ -84,7 +83,6 @@ std::size_t CAnomalyJobConfigReader::CParameter::fallback(std::size_t value) con
}
if (m_Value->IsUint64() == false) {
this->handleFatal();
return value;
}
return m_Value->GetUint64();
}
Expand All @@ -95,7 +93,6 @@ std::ptrdiff_t CAnomalyJobConfigReader::CParameter::fallback(std::ptrdiff_t valu
}
if (m_Value->IsInt64() == false) {
this->handleFatal();
return value;
}
return m_Value->GetInt64();
}
Expand All @@ -109,7 +106,6 @@ double CAnomalyJobConfigReader::CParameter::fallback(double value) const {
}
if (m_Value->IsDouble() == false) {
this->handleFatal();
return value;
}
return m_Value->GetDouble();
}
Expand All @@ -120,7 +116,6 @@ std::string CAnomalyJobConfigReader::CParameter::fallback(const std::string& val
}
if (m_Value->IsString() == false) {
this->handleFatal();
return value;
}
return m_Value->GetString();
}
Expand Down
2 changes: 1 addition & 1 deletion lib/api/CDetectionRulesJsonParser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ const std::string TYPICAL("typical");
const std::string VALUE("value");
}

CDetectionRulesJsonParser::CDetectionRulesJsonParser(TStrPatternSetUMap& filtersByIdMap)
CDetectionRulesJsonParser::CDetectionRulesJsonParser(const TStrPatternSetUMap& filtersByIdMap)
: m_FiltersByIdMap(filtersByIdMap) {
}

Expand Down
19 changes: 19 additions & 0 deletions lib/api/unittest/CAnomalyJobConfigTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,25 @@ BOOST_AUTO_TEST_CASE(testParse) {
BOOST_REQUIRE_EQUAL(false, modelPlotConfig.enabled());
BOOST_REQUIRE_EQUAL(false, modelPlotConfig.annotationsEnabled());
}
{
const std::string validAnomalyJobConfigWithCustomRuleFilter{
"{\"job_id\":\"mean_bytes_by_clientip\",\"job_type\":\"anomaly_detector\",\"job_version\":\"8.0.0\",\"create_time\":1604671135245,\"description\":\"mean bytes by clientip\","
"\"analysis_config\":{\"bucket_span\":\"3h\",\"detectors\":[{\"detector_description\":\"mean(bytes) by clientip\",\"function\":\"mean\",\"field_name\":\"bytes\",\"by_field_name\":\"clientip\","
"\"custom_rules\":[{\"actions\":[\"skip_result\"],\"scope\":{\"clientip\":{\"filter_id\":\"safe_ips\",\"filter_type\":\"include\"}},\"conditions\":[{\"applies_to\":\"actual\",\"operator\":\"lt\",\"value\":10.0}]}],"
"\"detector_index\":0}],\"influencers\":[\"clientip\"]},\"analysis_limits\":{\"model_memory_limit\":\"42mb\",\"categorization_examples_limit\":4},"
"\"data_description\":{\"time_field\":\"timestamp\",\"time_format\":\"epoch_ms\"},\"model_plot_config\":{\"enabled\":false,\"annotations_enabled\":false},"
"\"model_snapshot_retention_days\":10,\"daily_model_snapshot_retention_after_days\":1,\"results_index_name\":\"shared\",\"allow_lazy_open\":false}"};

// Expect parsing to fail if the filter referenced by the custom rule cannot be found
ml::api::CAnomalyJobConfig jobConfigEmptyFilterMap;
BOOST_TEST_REQUIRE(!jobConfigEmptyFilterMap.parse(validAnomalyJobConfigWithCustomRuleFilter));

// Expect parsing to succeed if the filter referenced by the custom rule can be found in the filter map.
ml::api::CDetectionRulesJsonParser::TStrPatternSetUMap filterMap{{"safe_ips", {}}};
ml::api::CAnomalyJobConfig jobConfig(filterMap);
BOOST_REQUIRE_MESSAGE(jobConfig.parse(validAnomalyJobConfigWithCustomRuleFilter),
"Cannot parse JSON job config!");
}
}

BOOST_AUTO_TEST_SUITE_END()