-
Notifications
You must be signed in to change notification settings - Fork 62
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ML] Init AD model config from JSON config #1602
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,6 +50,7 @@ class CHierarchicalResults; | |
class CLimits; | ||
} | ||
namespace api { | ||
class CAnomalyJobConfig; | ||
class CPersistenceManager; | ||
class CModelPlotDataJsonWriter; | ||
class CFieldConfig; | ||
|
@@ -142,6 +143,7 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { | |
public: | ||
CAnomalyJob(const std::string& jobId, | ||
model::CLimits& limits, | ||
CAnomalyJobConfig& jobConfig, | ||
CFieldConfig& fieldConfig, | ||
model::CAnomalyDetectorModelConfig& modelConfig, | ||
core::CJsonOutputStreamWrapper& outputBuffer, | ||
|
@@ -424,7 +426,15 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { | |
//! Object to which the output is passed | ||
CJsonOutputWriter m_JsonOutputWriter; | ||
|
||
//! Field names to use for the analysis | ||
//! Configuration settings for the analysis parsed from | ||
//! JSON configuration file. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be good to say why this is non-const. It's because it gets updated by job updates right? And of course since it's a reference that will also affect everywhere else that has a reference. |
||
//! Note that this is a non-const reference as it needs to be capable of | ||
//! being modified by job updates (and those changes reflected wherever a | ||
//! reference is held). | ||
CAnomalyJobConfig& m_JobConfig; | ||
|
||
//! Field names to use for the analysis. | ||
//! This is a non-const reference for the same reasons as for m_JobConfig. | ||
CFieldConfig& m_FieldConfig; | ||
|
||
//! The model configuration | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,7 @@ class API_EXPORT CAnomalyJobConfig { | |
static const std::string OVER_FIELD_NAME; | ||
static const std::string PARTITION_FIELD_NAME; | ||
static const std::string DETECTOR_DESCRIPTION; | ||
static const std::string DETECTOR_INDEX; | ||
static const std::string EXCLUDE_FREQUENT; | ||
static const std::string CUSTOM_RULES; | ||
static const std::string USE_NULL; | ||
|
@@ -42,7 +43,8 @@ class API_EXPORT CAnomalyJobConfig { | |
CDetectorConfig() {} | ||
|
||
void parse(const rapidjson::Value& detectorConfig, | ||
const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters); | ||
const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters, | ||
CDetectionRulesJsonParser::TDetectionRuleVec& detectionRules); | ||
|
||
std::string function() const { return m_Function; } | ||
std::string fieldName() const { return m_FieldName; } | ||
|
@@ -55,9 +57,6 @@ class API_EXPORT CAnomalyJobConfig { | |
std::string detectorDescription() const { | ||
return m_DetectorDescription; | ||
} | ||
CDetectionRulesJsonParser::TDetectionRuleVec customRules() const { | ||
return m_CustomRules; | ||
} | ||
bool useNull() const { return m_UseNull; } | ||
|
||
private: | ||
|
@@ -68,7 +67,7 @@ class API_EXPORT CAnomalyJobConfig { | |
std::string m_PartitionFieldName{}; | ||
std::string m_ExcludeFrequent{}; | ||
std::string m_DetectorDescription{}; | ||
CDetectionRulesJsonParser::TDetectionRuleVec m_CustomRules{}; | ||
int m_DetectorIndex{}; | ||
bool m_UseNull{false}; | ||
}; | ||
|
||
|
@@ -80,16 +79,21 @@ class API_EXPORT CAnomalyJobConfig { | |
static const std::string DETECTORS; | ||
static const std::string INFLUENCERS; | ||
static const std::string LATENCY; | ||
static const std::string MULTIVARIATE_BY_FIELDS; | ||
static const std::string PER_PARTITION_CATEGORIZATION; | ||
static const std::string ENABLED; | ||
static const std::string STOP_ON_WARN; | ||
|
||
static const core_t::TTime DEFAULT_BUCKET_SPAN; | ||
static const core_t::TTime DEFAULT_LATENCY; | ||
|
||
public: | ||
using TStrVec = std::vector<std::string>; | ||
using TDetectorConfigVec = std::vector<CDetectorConfig>; | ||
|
||
using TIntDetectionRuleVecUMap = | ||
boost::unordered_map<int, CDetectionRulesJsonParser::TDetectionRuleVec>; | ||
|
||
public: | ||
//! Default constructor | ||
CAnalysisConfig() {} | ||
|
@@ -100,6 +104,10 @@ class API_EXPORT CAnomalyJobConfig { | |
|
||
void parse(const rapidjson::Value& json); | ||
|
||
bool processFilter(const std::string& key, const std::string& value); | ||
|
||
bool updateFilters(const boost::property_tree::ptree& propTree); | ||
|
||
core_t::TTime bucketSpan() const { return m_BucketSpan; } | ||
|
||
std::string summaryCountFieldName() const { | ||
|
@@ -121,9 +129,20 @@ class API_EXPORT CAnomalyJobConfig { | |
return m_Detectors; | ||
} | ||
const TStrVec& influencers() const { return m_Influencers; } | ||
std::string latency() const { return m_Latency; } | ||
core_t::TTime latency() const { return m_Latency; } | ||
|
||
bool multivariateByFields() const { return m_MultivariateByFields; } | ||
|
||
static core_t::TTime bucketSpanSeconds(const std::string& bucketSpanString); | ||
const TIntDetectionRuleVecUMap& detectionRules() const { | ||
return m_DetectorRules; | ||
} | ||
|
||
const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters() const { | ||
return m_RuleFilters; | ||
} | ||
|
||
static core_t::TTime durationSeconds(const std::string& durationString, | ||
core_t::TTime defaultDuration); | ||
|
||
private: | ||
core_t::TTime m_BucketSpan{DEFAULT_BUCKET_SPAN}; | ||
|
@@ -134,7 +153,11 @@ class API_EXPORT CAnomalyJobConfig { | |
bool m_PerPartitionCategorizationStopOnWarn{false}; | ||
TDetectorConfigVec m_Detectors{}; | ||
TStrVec m_Influencers{}; | ||
std::string m_Latency{}; | ||
core_t::TTime m_Latency{DEFAULT_LATENCY}; | ||
bool m_MultivariateByFields{false}; | ||
|
||
//! The detection rules per detector index. | ||
TIntDetectionRuleVecUMap m_DetectorRules; | ||
|
||
//! The filters per id used by categorical rule conditions. | ||
CDetectionRulesJsonParser::TStrPatternSetUMap m_RuleFilters{}; | ||
|
@@ -234,6 +257,7 @@ class API_EXPORT CAnomalyJobConfig { | |
|
||
std::string jobId() const { return m_JobId; } | ||
std::string jobType() const { return m_JobType; } | ||
CAnalysisConfig& analysisConfig() { return m_AnalysisConfig; } | ||
const CAnalysisConfig& analysisConfig() const { return m_AnalysisConfig; } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could provide both const and non-const accessors. In future as this class gets used more widely some places may hold a const reference to the |
||
const CDataDescription& dataDescription() const { | ||
return m_DataDescription; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the long term this could be encapsulated in a method of
CAnomalyJobConfig
, sayCAnomalyJobConfig::makeModelConfig()
. But since you're doing a series of PRs you can save this for the next one.