Skip to content

Commit

Permalink
[ML] Initialize detectors parser with filters map (#1562)
Browse files Browse the repository at this point in the history
Detector rules may possibly refer to rule filters that are defined
separately, outside of the anomaly job configuration. As an interim
solution pass the rule filters parsed from the old-style field config to
the new-style JSON parser.

Relates to #1253
  • Loading branch information
edsavage authored Nov 9, 2020
1 parent ccd255e commit f466bd8
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 39 deletions.
31 changes: 16 additions & 15 deletions bin/autodetect/Main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -175,21 +175,6 @@ int main(int argc, char** argv) {
// hence is done before reducing CPU priority.
ml::core::CProcessPriority::reduceCpuPriority();

std::string anomalyJobConfigJson;
bool couldReadConfigFile;
std::tie(anomalyJobConfigJson, couldReadConfigFile) =
ml::core::CStringUtils::readFileToString(configFile);
if (couldReadConfigFile == false) {
LOG_FATAL(<< "Failed to read config file '" << configFile << "'");
return EXIT_FAILURE;
}

ml::api::CAnomalyJobConfig jobConfig;
if (jobConfig.parse(anomalyJobConfigJson) == false) {
LOG_FATAL(<< "Failed to parse anomaly job config: '" << anomalyJobConfigJson << "'");
return EXIT_FAILURE;
}

ml::model::CLimits limits{isPersistInForeground};
if (!limitConfigFile.empty() && limits.init(limitConfigFile) == false) {
LOG_FATAL(<< "ML limit config file '" << limitConfigFile << "' could not be loaded");
Expand All @@ -203,6 +188,22 @@ int main(int argc, char** argv) {
return EXIT_FAILURE;
}

std::string anomalyJobConfigJson;
bool couldReadConfigFile;
std::tie(anomalyJobConfigJson, couldReadConfigFile) =
ml::core::CStringUtils::readFileToString(configFile);
if (couldReadConfigFile == false) {
LOG_FATAL(<< "Failed to read config file '" << configFile << "'");
return EXIT_FAILURE;
}
// For now we need to reference the rule filters parsed by the old-style
// field config.
ml::api::CAnomalyJobConfig jobConfig{fieldConfig.ruleFilters()};
if (jobConfig.parse(anomalyJobConfigJson) == false) {
LOG_FATAL(<< "Failed to parse anomaly job config: '" << anomalyJobConfigJson << "'");
return EXIT_FAILURE;
}

bool doingCategorization{fieldConfig.fieldNameSuperset().count(
ml::api::CFieldDataCategorizer::MLCATEGORY_NAME) > 0};
TStrVec mutableFields;
Expand Down
15 changes: 13 additions & 2 deletions include/api/CAnomalyJobConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class API_EXPORT CAnomalyJobConfig {
public:
CDetectorConfig() {}

void parse(const rapidjson::Value& json);
void parse(const rapidjson::Value& detectorConfig,
const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters);

std::string function() const { return m_Function; }
std::string fieldName() const { return m_FieldName; }
Expand Down Expand Up @@ -90,8 +91,13 @@ class API_EXPORT CAnomalyJobConfig {
using TDetectorConfigVec = std::vector<CDetectorConfig>;

public:
//! Default constructor
CAnalysisConfig() {}

//! Constructor taking a map of detector rule filters keyed by filter_id.
explicit CAnalysisConfig(const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters)
: m_RuleFilters(ruleFilters) {}

void parse(const rapidjson::Value& json);

core_t::TTime bucketSpan() const { return m_BucketSpan; }
Expand Down Expand Up @@ -120,7 +126,7 @@ class API_EXPORT CAnomalyJobConfig {
static core_t::TTime bucketSpanSeconds(const std::string& bucketSpanString);

private:
std::size_t m_BucketSpan{300}; // 5m
core_t::TTime m_BucketSpan{DEFAULT_BUCKET_SPAN};
std::string m_SummaryCountFieldName{};
std::string m_CategorizationFieldName{};
TStrVec m_CategorizationFilters{};
Expand All @@ -129,6 +135,9 @@ class API_EXPORT CAnomalyJobConfig {
TDetectorConfigVec m_Detectors{};
TStrVec m_Influencers{};
std::string m_Latency{};

//! The filters per id used by categorical rule conditions.
CDetectionRulesJsonParser::TStrPatternSetUMap m_RuleFilters{};
};

class API_EXPORT CDataDescription {
Expand Down Expand Up @@ -218,6 +227,8 @@ class API_EXPORT CAnomalyJobConfig {
public:
//! Default constructor
CAnomalyJobConfig() {}
explicit CAnomalyJobConfig(const CDetectionRulesJsonParser::TStrPatternSetUMap& rulesFilter)
: m_AnalysisConfig(rulesFilter) {}

bool parse(const std::string& json);

Expand Down
4 changes: 2 additions & 2 deletions include/api/CAnomalyJobConfigReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class API_EXPORT CAnomalyJobConfigReader {
public:
explicit CParseError(const std::string& what)
: std::runtime_error{what} {}
virtual ~CParseError() throw() = default;
virtual ~CParseError() noexcept = default;
};

public:
Expand Down Expand Up @@ -142,7 +142,7 @@ class API_EXPORT CAnomalyJobConfigReader {

private:
CParameter(const std::string& name, SArrayElementTag);
void handleFatal() const;
[[noreturn]] void handleFatal() const;

private:
std::string m_Name;
Expand Down
4 changes: 2 additions & 2 deletions include/api/CDetectionRulesJsonParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class API_EXPORT CDetectionRulesJsonParser {

public:
//! Default constructor
CDetectionRulesJsonParser(TStrPatternSetUMap& filtersByIdMap);
CDetectionRulesJsonParser(const TStrPatternSetUMap& filtersByIdMap);

//! Parses a string expected to contain a JSON array with
//! detection rules and adds the rule objects into the given vector.
Expand Down Expand Up @@ -62,7 +62,7 @@ class API_EXPORT CDetectionRulesJsonParser {

private:
//! The filters per id used by categorical rule conditions.
TStrPatternSetUMap& m_FiltersByIdMap;
const TStrPatternSetUMap& m_FiltersByIdMap;
};
}
}
Expand Down
20 changes: 8 additions & 12 deletions lib/api/CAnomalyJobConfig.cc
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ void CAnomalyJobConfig::CAnalysisConfig::parse(const rapidjson::Value& analysisC
if (detectorsConfig != nullptr && detectorsConfig->IsArray()) {
m_Detectors.resize(detectorsConfig->Size());
for (std::size_t i = 0; i < detectorsConfig->Size(); ++i) {
m_Detectors[i].parse((*detectorsConfig)[static_cast<int>(i)]);
m_Detectors[i].parse((*detectorsConfig)[static_cast<int>(i)], m_RuleFilters);
}
}

Expand All @@ -321,7 +321,9 @@ core_t::TTime CAnomalyJobConfig::CAnalysisConfig::bucketSpanSeconds(const std::s
return bucketSpanSeconds;
}

void CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::parse(const rapidjson::Value& detectorConfig) {
void CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::parse(
const rapidjson::Value& detectorConfig,
const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters) {
auto parameters = DETECTOR_CONFIG_READER.read(detectorConfig);

m_Function = parameters[FUNCTION].as<std::string>();
Expand All @@ -336,17 +338,11 @@ void CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::parse(const rapidjson:
auto customRules = parameters[CUSTOM_RULES].jsonObject();
if (customRules != nullptr) {
std::string errorString;
using TStrPatternSetUMap = CDetectionRulesJsonParser::TStrPatternSetUMap;
// TODO How to populate the filters by Id map passed to the detection rules JSON parser?
// Pass an empty map as a placeholder
TStrPatternSetUMap emptyMap{};
CDetectionRulesJsonParser rulesParser(emptyMap);
CDetectionRulesJsonParser rulesParser(ruleFilters);
if (rulesParser.parseRules(*customRules, m_CustomRules, errorString) == false) {
// TODO Re-enable this error handling when the CDetectionRulesJsonParser instance
// has been constructed with a valid filter map.
// LOG_ERROR(<< errorString << toString(*customRules));
// throw CAnomalyJobConfigReader::CParseError(
// "Error parsing custom rules: " + toString(*customRules));
LOG_ERROR(<< errorString << toString(*customRules));
throw CAnomalyJobConfigReader::CParseError(
"Error parsing custom rules: " + toString(*customRules));
}
}

Expand Down
5 changes: 0 additions & 5 deletions lib/api/CAnomalyJobConfigReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ bool CAnomalyJobConfigReader::CParameter::fallback(bool value) const {
}
if (m_Value->IsBool() == false) {
this->handleFatal();
return value;
}
return m_Value->GetBool();
}
Expand All @@ -84,7 +83,6 @@ std::size_t CAnomalyJobConfigReader::CParameter::fallback(std::size_t value) con
}
if (m_Value->IsUint64() == false) {
this->handleFatal();
return value;
}
return m_Value->GetUint64();
}
Expand All @@ -95,7 +93,6 @@ std::ptrdiff_t CAnomalyJobConfigReader::CParameter::fallback(std::ptrdiff_t valu
}
if (m_Value->IsInt64() == false) {
this->handleFatal();
return value;
}
return m_Value->GetInt64();
}
Expand All @@ -109,7 +106,6 @@ double CAnomalyJobConfigReader::CParameter::fallback(double value) const {
}
if (m_Value->IsDouble() == false) {
this->handleFatal();
return value;
}
return m_Value->GetDouble();
}
Expand All @@ -120,7 +116,6 @@ std::string CAnomalyJobConfigReader::CParameter::fallback(const std::string& val
}
if (m_Value->IsString() == false) {
this->handleFatal();
return value;
}
return m_Value->GetString();
}
Expand Down
2 changes: 1 addition & 1 deletion lib/api/CDetectionRulesJsonParser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ const std::string TYPICAL("typical");
const std::string VALUE("value");
}

CDetectionRulesJsonParser::CDetectionRulesJsonParser(TStrPatternSetUMap& filtersByIdMap)
CDetectionRulesJsonParser::CDetectionRulesJsonParser(const TStrPatternSetUMap& filtersByIdMap)
: m_FiltersByIdMap(filtersByIdMap) {
}

Expand Down
19 changes: 19 additions & 0 deletions lib/api/unittest/CAnomalyJobConfigTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,25 @@ BOOST_AUTO_TEST_CASE(testParse) {
BOOST_REQUIRE_EQUAL(false, modelPlotConfig.enabled());
BOOST_REQUIRE_EQUAL(false, modelPlotConfig.annotationsEnabled());
}
{
const std::string validAnomalyJobConfigWithCustomRuleFilter{
"{\"job_id\":\"mean_bytes_by_clientip\",\"job_type\":\"anomaly_detector\",\"job_version\":\"8.0.0\",\"create_time\":1604671135245,\"description\":\"mean bytes by clientip\","
"\"analysis_config\":{\"bucket_span\":\"3h\",\"detectors\":[{\"detector_description\":\"mean(bytes) by clientip\",\"function\":\"mean\",\"field_name\":\"bytes\",\"by_field_name\":\"clientip\","
"\"custom_rules\":[{\"actions\":[\"skip_result\"],\"scope\":{\"clientip\":{\"filter_id\":\"safe_ips\",\"filter_type\":\"include\"}},\"conditions\":[{\"applies_to\":\"actual\",\"operator\":\"lt\",\"value\":10.0}]}],"
"\"detector_index\":0}],\"influencers\":[\"clientip\"]},\"analysis_limits\":{\"model_memory_limit\":\"42mb\",\"categorization_examples_limit\":4},"
"\"data_description\":{\"time_field\":\"timestamp\",\"time_format\":\"epoch_ms\"},\"model_plot_config\":{\"enabled\":false,\"annotations_enabled\":false},"
"\"model_snapshot_retention_days\":10,\"daily_model_snapshot_retention_after_days\":1,\"results_index_name\":\"shared\",\"allow_lazy_open\":false}"};

// Expect parsing to fail if the filter referenced by the custom rule cannot be found
ml::api::CAnomalyJobConfig jobConfigEmptyFilterMap;
BOOST_TEST_REQUIRE(!jobConfigEmptyFilterMap.parse(validAnomalyJobConfigWithCustomRuleFilter));

// Expect parsing to succeed if the filter referenced by the custom rule can be found in the filter map.
ml::api::CDetectionRulesJsonParser::TStrPatternSetUMap filterMap{{"safe_ips", {}}};
ml::api::CAnomalyJobConfig jobConfig(filterMap);
BOOST_REQUIRE_MESSAGE(jobConfig.parse(validAnomalyJobConfigWithCustomRuleFilter),
"Cannot parse JSON job config!");
}
}

BOOST_AUTO_TEST_SUITE_END()

0 comments on commit f466bd8

Please sign in to comment.