From c6200ea43e701a383205bcfc0131a70471985e3e Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 13 Oct 2020 11:38:08 +0100 Subject: [PATCH] [7.x][ML] Provide factory setup for creating models (#1527) (#1532) Move boilerplate code for creating models to a base class method. This goes some way to reducing duplicated code and standardizing how models are created in the tests. Backports #1527 --- lib/model/unittest/CCountingModelTest.cc | 86 +++--- lib/model/unittest/CEventRateModelTest.cc | 155 +++++------ .../unittest/CEventRatePopulationModelTest.cc | 243 +++++++---------- lib/model/unittest/CMetricModelTest.cc | 100 +++---- .../unittest/CMetricPopulationModelTest.cc | 255 ++++++++---------- lib/model/unittest/CModelTestFixtureBase.h | 42 +++ 6 files changed, 386 insertions(+), 495 deletions(-) diff --git a/lib/model/unittest/CCountingModelTest.cc b/lib/model/unittest/CCountingModelTest.cc index 4743c1dadb..c8aa309184 100644 --- a/lib/model/unittest/CCountingModelTest.cc +++ b/lib/model/unittest/CCountingModelTest.cc @@ -31,7 +31,7 @@ using namespace model; class CTestFixture : public CModelTestFixtureBase { protected: - SModelParams::TStrDetectionRulePr + static SModelParams::TStrDetectionRulePr makeScheduledEvent(const std::string& description, double start, double end) { CRuleCondition conditionGte; conditionGte.appliesTo(CRuleCondition::E_Time); @@ -50,6 +50,13 @@ class CTestFixture : public CModelTestFixtureBase { SModelParams::TStrDetectionRulePr event = std::make_pair(description, rule); return event; } + + void makeModel(const SModelParams& params, + const model_t::TFeatureVec& features, + core_t::TTime startTime) { + this->makeModelT( + params, features, startTime, model_t::E_Counting, m_Gatherer, m_Model); + } }; BOOST_FIXTURE_TEST_CASE(testSkipSampling, CTestFixture) { @@ -66,14 +73,11 @@ BOOST_FIXTURE_TEST_CASE(testSkipSampling, CTestFixture) { // Model where gap is not skipped { - CModelFactory::SGathererInitializationData gathererNoGapInitData(startTime); - CModelFactory::TDataGathererPtr gathererNoGap( - factory.makeDataGatherer(gathererNoGapInitData)); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", gathererNoGap)); - CModelFactory::SModelInitializationData modelNoGapInitData(gathererNoGap); - CAnomalyDetectorModel::TModelPtr modelHolderNoGap(factory.makeModel(modelNoGapInitData)); - CCountingModel* modelNoGap = - dynamic_cast(modelHolderNoGap.get()); + CModelFactory::TDataGathererPtr gathererNoGap; + CModelFactory::TModelPtr modelNoGap; + this->makeModelT( + params, features, startTime, model_t::E_Counting, gathererNoGap, modelNoGap); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gathererNoGap)); // |2|2|0|0|1| -> 1.0 mean count this->addArrival(*gathererNoGap, 100, "p"); @@ -90,15 +94,12 @@ BOOST_FIXTURE_TEST_CASE(testSkipSampling, CTestFixture) { // Model where gap is skipped { - CModelFactory::SGathererInitializationData gathererWithGapInitData(startTime); - CModelFactory::TDataGathererPtr gathererWithGap( - factory.makeDataGatherer(gathererWithGapInitData)); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", gathererWithGap)); - CModelFactory::SModelInitializationData modelWithGapInitData(gathererWithGap); - CAnomalyDetectorModel::TModelPtr modelHolderWithGap( - factory.makeModel(modelWithGapInitData)); - CCountingModel* modelWithGap = - dynamic_cast(modelHolderWithGap.get()); + CModelFactory::TDataGathererPtr gathererWithGap; + CModelFactory::TModelPtr modelWithGap; + this->makeModelT(params, features, startTime, + model_t::E_Counting, + gathererWithGap, modelWithGap); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gathererWithGap)); // |2|2|0|0|1| // |2|X|X|X|1| -> 1.5 mean count where X means skipped bucket @@ -109,7 +110,7 @@ BOOST_FIXTURE_TEST_CASE(testSkipSampling, CTestFixture) { this->addArrival(*gathererWithGap, 280, "p"); modelWithGap->skipSampling(500); modelWithGap->prune(maxAgeBuckets); - BOOST_REQUIRE_EQUAL(std::size_t(1), gathererWithGap->numberActivePeople()); + BOOST_REQUIRE_EQUAL(1, gathererWithGap->numberActivePeople()); this->addArrival(*gathererWithGap, 500, "p"); modelWithGap->sample(500, 600, m_ResourceMonitor); @@ -137,14 +138,10 @@ BOOST_FIXTURE_TEST_CASE(testCheckScheduledEvents, CTestFixture) { factory.features(features); { - CModelFactory::SGathererInitializationData gathererNoGapInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererNoGapInitData)); - CModelFactory::SModelInitializationData modelNoGapInitData(gatherer); - this->addArrival(*gatherer, 200, "p"); - - CAnomalyDetectorModel::TModelPtr modelHolderNoGap(factory.makeModel(modelNoGapInitData)); - CCountingModel* modelNoGap = - dynamic_cast(modelHolderNoGap.get()); + this->makeModel(params, features, startTime); + CCountingModel* modelNoGap = dynamic_cast(m_Model.get()); + BOOST_TEST_REQUIRE(modelNoGap); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", m_Gatherer)); SModelParams::TStrDetectionRulePrVec matchedEvents = modelNoGap->checkScheduledEvents(50); @@ -186,14 +183,10 @@ BOOST_FIXTURE_TEST_CASE(testCheckScheduledEvents, CTestFixture) { // Test sampleBucketStatistics { - CModelFactory::SGathererInitializationData gathererNoGapInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererNoGapInitData)); - CModelFactory::SModelInitializationData modelNoGapInitData(gatherer); - this->addArrival(*gatherer, 100, "p"); - - CAnomalyDetectorModel::TModelPtr modelHolderNoGap(factory.makeModel(modelNoGapInitData)); - CCountingModel* modelNoGap = - dynamic_cast(modelHolderNoGap.get()); + this->makeModel(params, features, startTime); + CCountingModel* modelNoGap = dynamic_cast(m_Model.get()); + BOOST_TEST_REQUIRE(modelNoGap); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", m_Gatherer)); // There are no events at this time modelNoGap->sampleBucketStatistics(0, 100, m_ResourceMonitor); @@ -226,18 +219,13 @@ BOOST_FIXTURE_TEST_CASE(testInterimBucketCorrector, CTestFixture) { SModelParams params(bucketLength); params.s_DecayRate = 0.001; - auto interimBucketCorrector = std::make_shared(bucketLength); - CCountingModelFactory factory(params, interimBucketCorrector); - model_t::TFeatureVec features{model_t::E_IndividualCountByBucketAndPerson}; - factory.features(features); - CModelFactory::SGathererInitializationData gathererInitData(time); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p1", gatherer)); - BOOST_REQUIRE_EQUAL(std::size_t(1), this->addPerson("p2", gatherer)); - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); - CCountingModel* model{dynamic_cast(modelHolder.get())}; + this->makeModel(params, {model_t::E_IndividualCountByBucketAndPerson}, time); + CCountingModel* model = dynamic_cast(m_Model.get()); + BOOST_TEST_REQUIRE(model); + + BOOST_REQUIRE_EQUAL(0, this->addPerson("p1", m_Gatherer)); + BOOST_REQUIRE_EQUAL(1, this->addPerson("p2", m_Gatherer)); test::CRandomNumbers rng; @@ -249,7 +237,7 @@ BOOST_FIXTURE_TEST_CASE(testInterimBucketCorrector, CTestFixture) { std::sort(offsets.begin(), offsets.end()); for (auto offset : offsets) { rng.generateUniformSamples(0.0, 1.0, 1, uniform01); - this->addArrival(*gatherer, time + static_cast(offset), + this->addArrival(*m_Gatherer, time + static_cast(offset), uniform01[0] < 0.5 ? "p1" : "p2"); } model->sample(time, time + bucketLength, m_ResourceMonitor); @@ -260,11 +248,11 @@ BOOST_FIXTURE_TEST_CASE(testInterimBucketCorrector, CTestFixture) { for (std::size_t i = 0u; i < offsets.size(); ++i) { rng.generateUniformSamples(0.0, 1.0, 1, uniform01); - this->addArrival(*gatherer, time + static_cast(offsets[i]), + this->addArrival(*m_Gatherer, time + static_cast(offsets[i]), uniform01[0] < 0.5 ? "p1" : "p2"); model->sampleBucketStatistics(time, time + bucketLength, m_ResourceMonitor); BOOST_REQUIRE_EQUAL(static_cast(i + 1) / 10.0, - interimBucketCorrector->completeness()); + m_InterimBucketCorrector->completeness()); } } diff --git a/lib/model/unittest/CEventRateModelTest.cc b/lib/model/unittest/CEventRateModelTest.cc index eea54e1f22..706a3ebbbc 100644 --- a/lib/model/unittest/CEventRateModelTest.cc +++ b/lib/model/unittest/CEventRateModelTest.cc @@ -187,53 +187,20 @@ class CTestFixture : public CModelTestFixtureBase { core_t::TTime startTime, std::size_t numberPeople, const std::string& summaryCountField = EMPTY_STRING) { - this->makeModel(params, features, startTime, numberPeople, m_Gatherer, - m_Model, summaryCountField); - } + this->makeModelT(params, features, startTime, + model_t::E_EventRateOnline, m_Gatherer, + m_Model, {}, summaryCountField); - void makeModel(const SModelParams& params, - const model_t::TFeatureVec& features, - core_t::TTime startTime, - std::size_t numberPeople, - CModelFactory::TDataGathererPtr& gatherer, - CModelFactory::TModelPtr& model, - const std::string& summaryCountField = EMPTY_STRING) { - if (m_InterimBucketCorrector == nullptr) { - m_InterimBucketCorrector = - std::make_shared(params.s_BucketLength); - } - if (m_Factory == nullptr) { - m_Factory.reset(new CEventRateModelFactory( - params, m_InterimBucketCorrector, - summaryCountField.empty() ? model_t::E_None : model_t::E_Manual, - summaryCountField)); - m_Factory->features(features); - } - gatherer.reset(m_Factory->makeDataGatherer({startTime})); - model.reset(m_Factory->makeModel({gatherer})); - BOOST_TEST_REQUIRE(model); - BOOST_REQUIRE_EQUAL(model_t::E_EventRateOnline, model->category()); - BOOST_REQUIRE_EQUAL(params.s_BucketLength, model->bucketLength()); for (std::size_t i = 0u; i < numberPeople; ++i) { BOOST_REQUIRE_EQUAL( - std::size_t(i), - this->addPerson("p" + core::CStringUtils::typeToString(i + 1), gatherer)); + i, this->addPerson("p" + core::CStringUtils::typeToString(i + 1), m_Gatherer)); } } protected: - using TInterimBucketCorrectorPtr = std::shared_ptr; - using TEventRateModelFactoryPtr = std::shared_ptr; - using TDoubleSizeStrTr = core::CTriple; using TMinAccumulator = maths::CBasicStatistics::COrderStatisticsHeap; using TMinAccumulatorVec = std::vector; - -protected: - TInterimBucketCorrectorPtr m_InterimBucketCorrector; - TEventRateModelFactoryPtr m_Factory; - ml::model::CModelFactory::TDataGathererPtr m_Gatherer; - ml::model::CModelFactory::TModelPtr m_Model; }; BOOST_FIXTURE_TEST_CASE(testCountSample, CTestFixture) { @@ -441,7 +408,7 @@ BOOST_FIXTURE_TEST_CASE(testRare, CTestFixture) { } // We expect "p1 = p2 > p3 = p4 >> p5". - BOOST_REQUIRE_EQUAL(std::size_t(5), probabilities.size()); + BOOST_REQUIRE_EQUAL(5, probabilities.size()); BOOST_REQUIRE_EQUAL(probabilities[0], probabilities[1]); BOOST_TEST_REQUIRE(probabilities[1] > probabilities[2]); BOOST_REQUIRE_EQUAL(probabilities[2], probabilities[3]); @@ -598,7 +565,7 @@ BOOST_FIXTURE_TEST_CASE(testProbabilityCalculationForLowNonZeroCount, CTestFixtu } LOG_DEBUG(<< "probabilities = " << core::CContainerPrinter::print(probabilities)); - BOOST_REQUIRE_EQUAL(std::size_t(11), probabilities.size()); + BOOST_REQUIRE_EQUAL(11, probabilities.size()); BOOST_TEST_REQUIRE(probabilities[lowNonZeroCountBucket] < 0.06); BOOST_TEST_REQUIRE(probabilities[highNonZeroCountBucket] > 0.9); } @@ -642,7 +609,7 @@ BOOST_FIXTURE_TEST_CASE(testProbabilityCalculationForHighNonZeroCount, CTestFixt } LOG_DEBUG(<< "probabilities = " << core::CContainerPrinter::print(probabilities)); - BOOST_REQUIRE_EQUAL(std::size_t(11), probabilities.size()); + BOOST_REQUIRE_EQUAL(11, probabilities.size()); BOOST_TEST_REQUIRE(probabilities[lowNonZeroCountBucket] < 0.06); BOOST_TEST_REQUIRE(probabilities[highNonZeroCountBucket] > 0.9); } @@ -986,12 +953,15 @@ BOOST_FIXTURE_TEST_CASE(testPrune, CTestFixture) { features.push_back(model_t::E_IndividualTotalBucketCountByPerson); CModelFactory::TDataGathererPtr gatherer; CModelFactory::TModelPtr model_; - this->makeModel(params, features, startTime, 0, gatherer, model_); + this->makeModelT( + params, features, startTime, model_t::E_EventRateOnline, gatherer, model_); CEventRateModel* model = dynamic_cast(model_.get()); BOOST_TEST_REQUIRE(model); CModelFactory::TDataGathererPtr expectedGatherer; CModelFactory::TModelPtr expectedModel_; - this->makeModel(params, features, startTime, 0, expectedGatherer, expectedModel_); + this->makeModelT(params, features, startTime, + model_t::E_EventRateOnline, + expectedGatherer, expectedModel_); CEventRateModel* expectedModel = dynamic_cast(expectedModel_.get()); BOOST_TEST_REQUIRE(expectedModel); @@ -1245,7 +1215,7 @@ BOOST_FIXTURE_TEST_CASE(testCountProbabilityCalculationWithInfluence, CTestFixtu factory.fieldNames("", "", "", "", influenceFieldNames); factory.features({model_t::E_IndividualCountByBucketAndPerson}); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(startTime)); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", gatherer, 1)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gatherer, 1)); CModelFactory::TModelPtr modelHolder(factory.makeModel(gatherer)); CEventRateModel* model = dynamic_cast(modelHolder.get()); BOOST_TEST_REQUIRE(model); @@ -1301,7 +1271,7 @@ BOOST_FIXTURE_TEST_CASE(testCountProbabilityCalculationWithInfluence, CTestFixtu factory.fieldNames("", "", "", "", influenceFieldNames); factory.features({model_t::E_IndividualCountByBucketAndPerson}); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(startTime)); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", gatherer, 1)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gatherer, 1)); CModelFactory::TModelPtr modelHolder(factory.makeModel(gatherer)); CEventRateModel* model = dynamic_cast(modelHolder.get()); BOOST_TEST_REQUIRE(model); @@ -1349,7 +1319,7 @@ BOOST_FIXTURE_TEST_CASE(testCountProbabilityCalculationWithInfluence, CTestFixtu // We expect equal influence since the influencers share the count. // Also the count would be fairly normal if either influencer were // removed so their influence is high. - BOOST_REQUIRE_EQUAL(std::size_t(2), lastInfluencersResult.size()); + BOOST_REQUIRE_EQUAL(2, lastInfluencersResult.size()); BOOST_REQUIRE_CLOSE_ABSOLUTE(lastInfluencersResult[0].second, lastInfluencersResult[1].second, 0.05); BOOST_TEST_REQUIRE(lastInfluencersResult[0].second > 0.75); @@ -1364,7 +1334,7 @@ BOOST_FIXTURE_TEST_CASE(testCountProbabilityCalculationWithInfluence, CTestFixtu factory.fieldNames("", "", "", "", influenceFieldNames); factory.features({model_t::E_IndividualCountByBucketAndPerson}); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(startTime)); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", gatherer, 1)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gatherer, 1)); CModelFactory::TModelPtr modelHolder(factory.makeModel(gatherer)); CEventRateModel* model = dynamic_cast(modelHolder.get()); BOOST_TEST_REQUIRE(model); @@ -1412,7 +1382,7 @@ BOOST_FIXTURE_TEST_CASE(testCountProbabilityCalculationWithInfluence, CTestFixtu // We expect equal influence since the influencers share the count. // However, the bucket is still significantly anomalous omitting // the records from either influencer so their influence is smaller. - BOOST_REQUIRE_EQUAL(std::size_t(2), lastInfluencersResult.size()); + BOOST_REQUIRE_EQUAL(2, lastInfluencersResult.size()); BOOST_REQUIRE_CLOSE_ABSOLUTE(lastInfluencersResult[0].second, lastInfluencersResult[1].second, 0.05); BOOST_TEST_REQUIRE(lastInfluencersResult[0].second > 0.5); @@ -1428,7 +1398,7 @@ BOOST_FIXTURE_TEST_CASE(testCountProbabilityCalculationWithInfluence, CTestFixtu factory.fieldNames("", "", "", "", influenceFieldNames); factory.features({model_t::E_IndividualCountByBucketAndPerson}); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(startTime)); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", gatherer, 1)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gatherer, 1)); CModelFactory::TModelPtr modelHolder(factory.makeModel(gatherer)); CEventRateModel* model = dynamic_cast(modelHolder.get()); BOOST_TEST_REQUIRE(model); @@ -1478,7 +1448,7 @@ BOOST_FIXTURE_TEST_CASE(testCountProbabilityCalculationWithInfluence, CTestFixtu } // The influence should be dominated by the first influencer, and the // _extra influencers should be dropped by the cutoff threshold - BOOST_REQUIRE_EQUAL(std::size_t(1), lastInfluencersResult.size()); + BOOST_REQUIRE_EQUAL(1, lastInfluencersResult.size()); BOOST_TEST_REQUIRE(lastInfluencersResult[0].second > 0.99); } { @@ -1491,7 +1461,7 @@ BOOST_FIXTURE_TEST_CASE(testCountProbabilityCalculationWithInfluence, CTestFixtu factory.fieldNames("", "", "", "", influenceFieldNames); factory.features({model_t::E_IndividualCountByBucketAndPerson}); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(startTime)); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", gatherer, 2)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gatherer, 2)); CModelFactory::TModelPtr modelHolder(factory.makeModel(gatherer)); CEventRateModel* model = dynamic_cast(modelHolder.get()); BOOST_TEST_REQUIRE(model); @@ -1544,7 +1514,7 @@ BOOST_FIXTURE_TEST_CASE(testCountProbabilityCalculationWithInfluence, CTestFixtu } // The influence should be dominated by the first influencer for both fields, // and the _extra influencers should be dropped by the cutoff threshold - BOOST_REQUIRE_EQUAL(std::size_t(2), lastInfluencersResult.size()); + BOOST_REQUIRE_EQUAL(2, lastInfluencersResult.size()); BOOST_REQUIRE_EQUAL(std::string("IF1"), *lastInfluencersResult[0].first.first); BOOST_REQUIRE_EQUAL(std::string("inf"), *lastInfluencersResult[0].first.second); BOOST_REQUIRE_EQUAL(std::string("IF2"), *lastInfluencersResult[1].first.first); @@ -1564,7 +1534,7 @@ BOOST_FIXTURE_TEST_CASE(testCountProbabilityCalculationWithInfluence, CTestFixtu factory.fieldNames("", "", byFieldName, "", {byFieldName}); factory.features({model_t::E_IndividualCountByBucketAndPerson}); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(startTime)); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", gatherer, 1)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gatherer, 1)); CModelFactory::TModelPtr modelHolder(factory.makeModel(gatherer)); CEventRateModel* model = dynamic_cast(modelHolder.get()); BOOST_TEST_REQUIRE(model); @@ -1624,8 +1594,7 @@ BOOST_FIXTURE_TEST_CASE(testDistinctCountProbabilityCalculationWithInfluence, CT factory.fieldNames("", "", "", "foo", influenceFieldNames); factory.features({model_t::E_IndividualUniqueCountByBucketAndPerson}); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(startTime)); - BOOST_REQUIRE_EQUAL(std::size_t(0), - this->addPerson("p", gatherer, 1, TOptionalStr("v"))); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gatherer, 1, TOptionalStr("v"))); CModelFactory::TModelPtr modelHolder(factory.makeModel(gatherer)); CEventRateModel* model = dynamic_cast(modelHolder.get()); BOOST_TEST_REQUIRE(model); @@ -1692,8 +1661,7 @@ BOOST_FIXTURE_TEST_CASE(testDistinctCountProbabilityCalculationWithInfluence, CT factory.fieldNames("", "", "", "foo", influenceFieldNames); factory.features({model_t::E_IndividualUniqueCountByBucketAndPerson}); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(startTime)); - BOOST_REQUIRE_EQUAL(std::size_t(0), - this->addPerson("p", gatherer, 1, TOptionalStr("v"))); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gatherer, 1, TOptionalStr("v"))); CModelFactory::TModelPtr modelHolder(factory.makeModel(gatherer)); CEventRateModel* model = dynamic_cast(modelHolder.get()); BOOST_TEST_REQUIRE(model); @@ -1755,7 +1723,7 @@ BOOST_FIXTURE_TEST_CASE(testDistinctCountProbabilityCalculationWithInfluence, CT // The influence should be shared by the two influencers, and as the anomaly // is about twice the regular count, each influencer contributes a lot to // the anomaly - BOOST_REQUIRE_EQUAL(std::size_t(2), lastInfluencersResult.size()); + BOOST_REQUIRE_EQUAL(2, lastInfluencersResult.size()); BOOST_REQUIRE_CLOSE_ABSOLUTE(lastInfluencersResult[0].second, lastInfluencersResult[1].second, 0.05); BOOST_TEST_REQUIRE(lastInfluencersResult[0].second > 0.6); @@ -1770,8 +1738,7 @@ BOOST_FIXTURE_TEST_CASE(testDistinctCountProbabilityCalculationWithInfluence, CT factory.fieldNames("", "", "", "foo", influenceFieldNames); factory.features({model_t::E_IndividualUniqueCountByBucketAndPerson}); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(startTime)); - BOOST_REQUIRE_EQUAL(std::size_t(0), - this->addPerson("p", gatherer, 1, TOptionalStr("v"))); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gatherer, 1, TOptionalStr("v"))); CModelFactory::TModelPtr modelHolder(factory.makeModel(gatherer)); CEventRateModel* model = dynamic_cast(modelHolder.get()); BOOST_TEST_REQUIRE(model); @@ -1831,7 +1798,7 @@ BOOST_FIXTURE_TEST_CASE(testDistinctCountProbabilityCalculationWithInfluence, CT } // The influence should be dominated by the first influencer, and the // _extra influencer should be dropped by the cutoff threshold - BOOST_REQUIRE_EQUAL(std::size_t(1), lastInfluencersResult.size()); + BOOST_REQUIRE_EQUAL(1, lastInfluencersResult.size()); BOOST_TEST_REQUIRE(lastInfluencersResult[0].second > 0.8); } { @@ -1844,8 +1811,7 @@ BOOST_FIXTURE_TEST_CASE(testDistinctCountProbabilityCalculationWithInfluence, CT factory.fieldNames("", "", "", "foo", influenceFieldNames); factory.features({model_t::E_IndividualUniqueCountByBucketAndPerson}); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(startTime)); - BOOST_REQUIRE_EQUAL(std::size_t(0), - this->addPerson("p", gatherer, 2, TOptionalStr("v"))); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gatherer, 2, TOptionalStr("v"))); CModelFactory::TModelPtr modelHolder(factory.makeModel(gatherer)); CEventRateModel* model = dynamic_cast(modelHolder.get()); BOOST_TEST_REQUIRE(model); @@ -1912,7 +1878,7 @@ BOOST_FIXTURE_TEST_CASE(testDistinctCountProbabilityCalculationWithInfluence, CT } // The influence should be dominated by the first influencer for both fields, and the // _extra influencers should be dropped by the cutoff threshold - BOOST_REQUIRE_EQUAL(std::size_t(2), lastInfluencersResult.size()); + BOOST_REQUIRE_EQUAL(2, lastInfluencersResult.size()); BOOST_REQUIRE_EQUAL(std::string("IF1"), *lastInfluencersResult[0].first.first); BOOST_REQUIRE_EQUAL(std::string("inf"), *lastInfluencersResult[0].first.second); BOOST_REQUIRE_EQUAL(std::string("IF2"), *lastInfluencersResult[1].first.first); @@ -1934,11 +1900,11 @@ BOOST_FIXTURE_TEST_CASE(testRareWithInfluence, CTestFixture) { factory.fieldNames("", "", "", "", influenceFieldNames); factory.features(function_t::features(function_t::E_IndividualRare)); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(startTime)); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p1", gatherer, 1)); - BOOST_REQUIRE_EQUAL(std::size_t(1), this->addPerson("p2", gatherer, 1)); - BOOST_REQUIRE_EQUAL(std::size_t(2), this->addPerson("p3", gatherer, 1)); - BOOST_REQUIRE_EQUAL(std::size_t(3), this->addPerson("p4", gatherer, 1)); - BOOST_REQUIRE_EQUAL(std::size_t(4), this->addPerson("p5", gatherer, 1)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p1", gatherer, 1)); + BOOST_REQUIRE_EQUAL(1, this->addPerson("p2", gatherer, 1)); + BOOST_REQUIRE_EQUAL(2, this->addPerson("p3", gatherer, 1)); + BOOST_REQUIRE_EQUAL(3, this->addPerson("p4", gatherer, 1)); + BOOST_REQUIRE_EQUAL(4, this->addPerson("p5", gatherer, 1)); CModelFactory::TModelPtr modelHolder(factory.makeModel(gatherer)); CEventRateModel* model = dynamic_cast(modelHolder.get()); BOOST_TEST_REQUIRE(model); @@ -1978,7 +1944,7 @@ BOOST_FIXTURE_TEST_CASE(testRareWithInfluence, CTestFixture) { } // We expect "p1 = p2 = p3 = p4 >> p5". - BOOST_REQUIRE_EQUAL(std::size_t(5), probabilities.size()); + BOOST_REQUIRE_EQUAL(5, probabilities.size()); BOOST_REQUIRE_EQUAL(probabilities[0], probabilities[1]); BOOST_REQUIRE_EQUAL(probabilities[1], probabilities[2]); BOOST_REQUIRE_EQUAL(probabilities[2], probabilities[3]); @@ -1986,7 +1952,7 @@ BOOST_FIXTURE_TEST_CASE(testRareWithInfluence, CTestFixture) { // Expect the influence for this anomaly to be "INF1":"inf2" LOG_DEBUG(<< core::CContainerPrinter::print(lastInfluencersResult)); - BOOST_REQUIRE_EQUAL(std::size_t(1), lastInfluencersResult.size()); + BOOST_REQUIRE_EQUAL(1, lastInfluencersResult.size()); BOOST_TEST_REQUIRE(lastInfluencersResult[0].second > 0.75); BOOST_REQUIRE_EQUAL(std::string("IF1"), *lastInfluencersResult[0].first.first); BOOST_REQUIRE_EQUAL(std::string("inf2"), *lastInfluencersResult[0].first.second); @@ -2026,8 +1992,14 @@ BOOST_FIXTURE_TEST_CASE(testSkipSampling, CTestFixture) { model_t::TFeatureVec features{feature}; CModelFactory::TDataGathererPtr gathererNoGap; CModelFactory::TModelPtr modelNoGap_; - this->makeModel(params, features, startTime, 2, gathererNoGap, modelNoGap_); + this->makeModelT(params, features, startTime, + model_t::E_EventRateOnline, + gathererNoGap, modelNoGap_); CEventRateModel* modelNoGap = dynamic_cast(modelNoGap_.get()); + for (std::size_t i = 0u; i < 2; ++i) { + BOOST_REQUIRE_EQUAL( + i, this->addPerson("p" + core::CStringUtils::typeToString(i + 1), gathererNoGap)); + } // p1: |1|1|1| // p2: |1|0|0| @@ -2041,8 +2013,14 @@ BOOST_FIXTURE_TEST_CASE(testSkipSampling, CTestFixture) { CModelFactory::TDataGathererPtr gathererWithGap; CModelFactory::TModelPtr modelWithGap_; - this->makeModel(params, features, startTime, 2, gathererWithGap, modelWithGap_); + this->makeModelT(params, features, startTime, + model_t::E_EventRateOnline, + gathererWithGap, modelWithGap_); CEventRateModel* modelWithGap = dynamic_cast(modelWithGap_.get()); + for (std::size_t i = 0u; i < 2; ++i) { + BOOST_REQUIRE_EQUAL( + i, this->addPerson("p" + core::CStringUtils::typeToString(i + 1), gathererWithGap)); + } // p1: |1|1|0|0|0|0|0|0|0|0|1|1| // p1: |1|X|X|X|X|X|X|X|X|X|1|1| -> equal to |1|1|1| @@ -2061,7 +2039,7 @@ BOOST_FIXTURE_TEST_CASE(testSkipSampling, CTestFixture) { // Check prune does not remove people because last seen times are updated by adding gap duration modelWithGap->prune(maxAgeBuckets); - BOOST_REQUIRE_EQUAL(std::size_t(2), gathererWithGap->numberActivePeople()); + BOOST_REQUIRE_EQUAL(2, gathererWithGap->numberActivePeople()); this->addArrival(*gathererWithGap, 1000, "p1"); modelWithGap->sample(1000, 1100, m_ResourceMonitor); @@ -2090,11 +2068,11 @@ BOOST_FIXTURE_TEST_CASE(testSkipSampling, CTestFixture) { modelWithGap->sample(1200, 1500, m_ResourceMonitor); modelWithGap->prune(maxAgeBuckets); // Age at this point will be 500 and since it's equal to maxAge it should still be here - BOOST_REQUIRE_EQUAL(std::size_t(2), gathererWithGap->numberActivePeople()); + BOOST_REQUIRE_EQUAL(2, gathererWithGap->numberActivePeople()); modelWithGap->sample(1500, 1600, m_ResourceMonitor); modelWithGap->prune(maxAgeBuckets); // Age at this point will be 600 so it should get pruned - BOOST_REQUIRE_EQUAL(std::size_t(1), gathererWithGap->numberActivePeople()); + BOOST_REQUIRE_EQUAL(1, gathererWithGap->numberActivePeople()); } BOOST_FIXTURE_TEST_CASE(testExplicitNulls, CTestFixture) { @@ -2108,8 +2086,9 @@ BOOST_FIXTURE_TEST_CASE(testExplicitNulls, CTestFixture) { model_t::TFeatureVec features{feature}; CModelFactory::TDataGathererPtr gathererSkipGap; CModelFactory::TModelPtr modelSkipGap_; - this->makeModel(params, features, startTime, 0, gathererSkipGap, - modelSkipGap_, summaryCountField); + this->makeModelT(params, features, startTime, + model_t::E_EventRateOnline, gathererSkipGap, + modelSkipGap_, {}, summaryCountField); CEventRateModel* modelSkipGap = dynamic_cast(modelSkipGap_.get()); // The idea here is to compare a model that has a gap skipped against a model @@ -2137,8 +2116,9 @@ BOOST_FIXTURE_TEST_CASE(testExplicitNulls, CTestFixture) { CModelFactory::TDataGathererPtr gathererExNull; CModelFactory::TModelPtr modelExNullGap_; - this->makeModel(params, features, startTime, 0, gathererExNull, - modelExNullGap_, summaryCountField); + this->makeModelT(params, features, startTime, + model_t::E_EventRateOnline, gathererExNull, + modelExNullGap_, {}, summaryCountField); CEventRateModel* modelExNullGap = dynamic_cast(modelExNullGap_.get()); @@ -2211,7 +2191,7 @@ BOOST_FIXTURE_TEST_CASE(testInterimCorrections, CTestFixture) { core_t::TTime now{startTime}; TDoubleVec samples(3, 0.0); while (now < endTime) { - rng.generateUniformSamples(50.0, 70.0, std::size_t(3), samples); + rng.generateUniformSamples(50.0, 70.0, 3, samples); for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) { this->addArrival(*m_Gatherer, now, "p1"); } @@ -2336,7 +2316,7 @@ BOOST_FIXTURE_TEST_CASE(testInterimCorrectionsWithCorrelations, CTestFixture) { test::CRandomNumbers rng; TDoubleVec samples(1, 0.0); while (now < endTime) { - rng.generateUniformSamples(80.0, 100.0, std::size_t(1), samples); + rng.generateUniformSamples(80.0, 100.0, 1, samples); for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) { this->addArrival(*m_Gatherer, now, "p1"); } @@ -2413,14 +2393,17 @@ BOOST_FIXTURE_TEST_CASE(testSummaryCountZeroRecordsAreIgnored, CTestFixture) { CModelFactory::TDataGathererPtr gathererWithZeros; CModelFactory::TModelPtr modelWithZerosPtr; - this->makeModel(params, {model_t::E_IndividualCountByBucketAndPerson}, startTime, - 0, gathererWithZeros, modelWithZerosPtr, summaryCountField); + this->makeModelT( + params, {model_t::E_IndividualCountByBucketAndPerson}, startTime, + model_t::E_EventRateOnline, gathererWithZeros, modelWithZerosPtr, {}, + summaryCountField); CEventRateModel& modelWithZeros = static_cast(*modelWithZerosPtr); CModelFactory::TDataGathererPtr gathererNoZeros; CModelFactory::TModelPtr modelNoZerosPtr; - this->makeModel(params, {model_t::E_IndividualCountByBucketAndPerson}, startTime, - 0, gathererNoZeros, modelNoZerosPtr, summaryCountField); + this->makeModelT( + params, {model_t::E_IndividualCountByBucketAndPerson}, startTime, + model_t::E_EventRateOnline, gathererNoZeros, modelNoZerosPtr, {}, summaryCountField); CEventRateModel& modelNoZeros = static_cast(*modelNoZerosPtr); // The idea here is to compare a model that has records with summary count of zero @@ -2477,7 +2460,7 @@ BOOST_FIXTURE_TEST_CASE(testComputeProbabilityGivenDetectionRule, CTestFixture) core_t::TTime now = startTime; TDoubleVec samples(1, 0.0); while (now < endTime) { - rng.generateUniformSamples(50.0, 70.0, std::size_t(1), samples); + rng.generateUniformSamples(50.0, 70.0, 1, samples); for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) { this->addArrival(*m_Gatherer, now, "p1"); } diff --git a/lib/model/unittest/CEventRatePopulationModelTest.cc b/lib/model/unittest/CEventRatePopulationModelTest.cc index a04066b676..99255da9e1 100644 --- a/lib/model/unittest/CEventRatePopulationModelTest.cc +++ b/lib/model/unittest/CEventRatePopulationModelTest.cc @@ -34,7 +34,6 @@ #include "CModelTestFixtureBase.h" -#include #include #include #include @@ -45,6 +44,7 @@ #include #include #include +#include #include #include @@ -85,12 +85,12 @@ class CTestFixture : public CModelTestFixtureBase { TStrVec attributes; for (std::size_t i = 0u; i < numberAttributes; ++i) { - attributes.push_back("c" + boost::lexical_cast(i)); + attributes.push_back("c" + std::to_string(i)); } TStrVec people; for (std::size_t i = 0u; i < numberPeople; ++i) { - people.push_back("p" + boost::lexical_cast(i)); + people.push_back("p" + std::to_string(i)); } TSizeVecVec attributePeople{ @@ -141,6 +141,13 @@ class CTestFixture : public CModelTestFixtureBase { std::sort(messages.begin(), messages.end()); } + + void makeModel(const SModelParams& params, + const model_t::TFeatureVec& features, + core_t::TTime startTime) { + this->makeModelT( + params, features, startTime, model_t::E_EventRateOnline, m_Gatherer, m_Model); + } }; BOOST_FIXTURE_TEST_CASE(testBasicAccessors, CTestFixture) { @@ -158,16 +165,10 @@ BOOST_FIXTURE_TEST_CASE(testBasicAccessors, CTestFixture) { SModelParams params(bucketLength); params.s_DecayRate = 0.001; - auto interimBucketCorrector = std::make_shared(bucketLength); - CEventRatePopulationModelFactory factory(params, interimBucketCorrector); - factory.features({model_t::E_PopulationCountByBucketPersonAndAttribute}); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer( - dynamic_cast(factory.makeDataGatherer(gathererInitData))); - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - - BOOST_REQUIRE_EQUAL(model_t::E_EventRateOnline, model->category()); + this->makeModel(params, {model_t::E_PopulationCountByBucketPersonAndAttribute}, startTime); + CEventRatePopulationModel* model = + dynamic_cast(m_Model.get()); + BOOST_TEST_REQUIRE(model); TSizeUInt64Map expectedBucketPersonCounts; TSizeSizePrUInt64Map expectedBucketPersonAttributeCounts; @@ -180,16 +181,16 @@ BOOST_FIXTURE_TEST_CASE(testBasicAccessors, CTestFixture) { << startTime + bucketLength << ")"); // Test the person and attribute invariants. - for (std::size_t j = 0u; j < gatherer->numberActivePeople(); ++j) { + for (std::size_t j = 0u; j < m_Gatherer->numberActivePeople(); ++j) { const std::string& name = model->personName(j); std::size_t pid; - BOOST_TEST_REQUIRE(gatherer->personId(name, pid)); + BOOST_TEST_REQUIRE(m_Gatherer->personId(name, pid)); BOOST_REQUIRE_EQUAL(j, pid); } - for (std::size_t j = 0u; j < gatherer->numberActiveAttributes(); ++j) { + for (std::size_t j = 0u; j < m_Gatherer->numberActiveAttributes(); ++j) { const std::string& name = model->attributeName(j); std::size_t cid; - BOOST_TEST_REQUIRE(gatherer->attributeId(name, cid)); + BOOST_TEST_REQUIRE(m_Gatherer->attributeId(name, cid)); BOOST_REQUIRE_EQUAL(j, cid); } @@ -227,11 +228,11 @@ BOOST_FIXTURE_TEST_CASE(testBasicAccessors, CTestFixture) { startTime += bucketLength; } - this->addArrival(message, gatherer); + this->addArrival(message, m_Gatherer); std::size_t pid, cid; - BOOST_TEST_REQUIRE(gatherer->personId(message.s_Person, pid)); - BOOST_TEST_REQUIRE(gatherer->attributeId(message.s_Attribute, cid)); + BOOST_TEST_REQUIRE(m_Gatherer->personId(message.s_Person, pid)); + BOOST_TEST_REQUIRE(m_Gatherer->attributeId(message.s_Attribute, cid)); ++expectedBucketPersonCounts[pid]; ++expectedBucketPersonAttributeCounts[{pid, cid}]; } @@ -269,14 +270,14 @@ BOOST_FIXTURE_TEST_CASE(testFeatures, CTestFixture) { TDouble2VecWeightsAryVec& weights() { return m_Weights; } private: - using TDoubleSizeUMap = boost::unordered_map; + using TDoubleSizeUMap = std::unordered_map; private: TDoubleSizeUMap m_Uniques; TDouble2VecVec m_Values; TDouble2VecWeightsAryVec m_Weights; }; - using TSizeUniqueValuesUMap = boost::unordered_map; + using TSizeUniqueValuesUMap = std::unordered_map; core_t::TTime startTime = 1367280000; const core_t::TTime bucketLength = 3600; @@ -288,22 +289,16 @@ BOOST_FIXTURE_TEST_CASE(testFeatures, CTestFixture) { // Bucket non-zero count unique person count. SModelParams params(bucketLength); params.s_InitialDecayRateMultiplier = 1.0; - auto interimBucketCorrector = std::make_shared(bucketLength); - CEventRatePopulationModelFactory factory(params, interimBucketCorrector); model_t::TFeatureVec features{model_t::E_PopulationCountByBucketPersonAndAttribute, model_t::E_PopulationUniquePersonCountByAttribute}; - factory.features(features); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer( - dynamic_cast(factory.makeDataGatherer(gathererInitData))); - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); + this->makeModel(params, features, startTime); CEventRatePopulationModel* model = - dynamic_cast(modelHolder.get()); + dynamic_cast(m_Model.get()); + BOOST_TEST_REQUIRE(model); model::CModelFactory::TFeatureMathsModelPtrPrVec models{ - factory.defaultFeatureModels(features, bucketLength, 1.0, false)}; - BOOST_REQUIRE_EQUAL(std::size_t(1), models.size()); + m_Factory->defaultFeatureModels(features, bucketLength, 1.0, false)}; + BOOST_REQUIRE_EQUAL(1, models.size()); BOOST_REQUIRE_EQUAL(model_t::E_PopulationCountByBucketPersonAndAttribute, models[0].first); @@ -399,11 +394,11 @@ BOOST_FIXTURE_TEST_CASE(testFeatures, CTestFixture) { expectedCounts.clear(); } - this->addArrival(message, gatherer); + this->addArrival(message, m_Gatherer); std::size_t pid, cid; - BOOST_TEST_REQUIRE(gatherer->personId(message.s_Person, pid)); - BOOST_TEST_REQUIRE(gatherer->attributeId(message.s_Attribute, cid)); + BOOST_TEST_REQUIRE(m_Gatherer->personId(message.s_Person, pid)); + BOOST_TEST_REQUIRE(m_Gatherer->attributeId(message.s_Attribute, cid)); ++expectedCounts[{pid, cid}]; } } @@ -418,16 +413,12 @@ BOOST_FIXTURE_TEST_CASE(testComputeProbability, CTestFixture) { SModelParams params(bucketLength); params.s_DecayRate = 0.001; - auto interimBucketCorrector = std::make_shared(bucketLength); - CEventRatePopulationModelFactory factory(params, interimBucketCorrector); - factory.features({model_t::E_PopulationCountByBucketPersonAndAttribute, - model_t::E_PopulationUniquePersonCountByAttribute}); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); + model_t::TFeatureVec features{model_t::E_PopulationCountByBucketPersonAndAttribute, + model_t::E_PopulationUniquePersonCountByAttribute}; + this->makeModel(params, features, startTime); CEventRatePopulationModel* model = - dynamic_cast(modelHolder.get()); + dynamic_cast(m_Model.get()); + BOOST_TEST_REQUIRE(model); TStrVec expectedAnomalies{"[10, p1, c0]", "[15, p11, c0]", "[30, p4, c2]", "[35, p5, c2]", "[50, p11, c0]", "[75, p5, c2]"}; @@ -435,7 +426,7 @@ BOOST_FIXTURE_TEST_CASE(testComputeProbability, CTestFixture) { TAnomalyVec orderedAnomalies; this->generateOrderedAnomalies(6u, startTime, bucketLength, messages, - gatherer, *model, orderedAnomalies); + m_Gatherer, *model, orderedAnomalies); BOOST_REQUIRE_EQUAL(expectedAnomalies.size(), orderedAnomalies.size()); for (std::size_t i = 0u; i < orderedAnomalies.size(); ++i) { @@ -506,18 +497,15 @@ BOOST_FIXTURE_TEST_CASE(testPrune, CTestFixture) { SModelParams params(bucketLength); params.s_DecayRate = 0.01; - auto interimBucketCorrector = std::make_shared(bucketLength); - CEventRatePopulationModelFactory factory(params, interimBucketCorrector); - factory.features({model_t::E_PopulationCountByBucketPersonAndAttribute, - model_t::E_PopulationUniquePersonCountByAttribute}); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); + model_t::TFeatureVec features{model_t::E_PopulationCountByBucketPersonAndAttribute, + model_t::E_PopulationUniquePersonCountByAttribute}; + this->makeModel(params, features, startTime); + CEventRatePopulationModel* model = + dynamic_cast(m_Model.get()); BOOST_TEST_REQUIRE(model); - CModelFactory::TDataGathererPtr expectedGatherer(factory.makeDataGatherer(gathererInitData)); - CModelFactory::SModelInitializationData expectedModelInitData(expectedGatherer); - CAnomalyDetectorModel::TModelPtr expectedModel(factory.makeModel(expectedModelInitData)); + + CModelFactory::TDataGathererPtr expectedGatherer(m_Factory->makeDataGatherer({startTime})); + CAnomalyDetectorModel::TModelPtr expectedModel(m_Factory->makeModel({expectedGatherer})); BOOST_TEST_REQUIRE(expectedModel); TMessageVec messages; @@ -558,11 +546,11 @@ BOOST_FIXTURE_TEST_CASE(testPrune, CTestFixture) { model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); bucketStart += bucketLength; } - this->addArrival(message, gatherer); + this->addArrival(message, m_Gatherer); } model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); size_t maxDimensionBeforePrune(model->dataGatherer().maxDimension()); - model->prune(); + model->prune(model->defaultPruneWindow()); size_t maxDimensionAfterPrune(model->dataGatherer().maxDimension()); BOOST_REQUIRE_EQUAL(maxDimensionBeforePrune, maxDimensionAfterPrune); @@ -582,14 +570,14 @@ BOOST_FIXTURE_TEST_CASE(testPrune, CTestFixture) { // Now check that we recycle the person and attribute slots. - bucketStart = gatherer->currentBucketStartTime() + bucketLength; + bucketStart = m_Gatherer->currentBucketStartTime() + bucketLength; TMessageVec newMessages{{bucketStart + 10, "p1", "c2"}, {bucketStart + 200, "p5", "c6"}, {bucketStart + 2100, "p5", "c6"}}; for (const auto& newMessage : newMessages) { - this->addArrival(newMessage, gatherer); + this->addArrival(newMessage, m_Gatherer); this->addArrival(newMessage, expectedGatherer); } model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); @@ -668,20 +656,12 @@ BOOST_FIXTURE_TEST_CASE(testFrequency, CTestFixture) { SModelParams params(bucketLength); params.s_DecayRate = 0.001; - auto interimBucketCorrector = std::make_shared(bucketLength); - CEventRatePopulationModelFactory factory(params, interimBucketCorrector); - factory.features({model_t::E_PopulationCountByBucketPersonAndAttribute, - model_t::E_PopulationUniquePersonCountByAttribute}); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - const model::CDataGatherer& populationGatherer( - dynamic_cast(*gatherer)); - - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); + model_t::TFeatureVec features{model_t::E_PopulationCountByBucketPersonAndAttribute, + model_t::E_PopulationUniquePersonCountByAttribute}; + this->makeModel(params, features, startTime); CEventRatePopulationModel* populationModel = - dynamic_cast(model.get()); + dynamic_cast(m_Model.get()); BOOST_TEST_REQUIRE(populationModel); core_t::TTime time{startTime}; @@ -690,7 +670,7 @@ BOOST_FIXTURE_TEST_CASE(testFrequency, CTestFixture) { populationModel->sample(time, time + bucketLength, m_ResourceMonitor); time += bucketLength; } - this->addArrival(message, gatherer); + this->addArrival(message, m_Gatherer); } { @@ -698,7 +678,7 @@ BOOST_FIXTURE_TEST_CASE(testFrequency, CTestFixture) { for (auto& datum : data) { LOG_DEBUG(<< "*** person = " << datum.s_Person << " ***"); std::size_t pid; - BOOST_TEST_REQUIRE(gatherer->personId(datum.s_Person, pid)); + BOOST_TEST_REQUIRE(m_Gatherer->personId(datum.s_Person, pid)); LOG_DEBUG(<< "frequency = " << populationModel->personFrequency(pid)); LOG_DEBUG(<< "expected frequency = " << 1.0 / static_cast(datum.s_Period)); @@ -716,7 +696,7 @@ BOOST_FIXTURE_TEST_CASE(testFrequency, CTestFixture) { for (auto& datum : data) { LOG_DEBUG(<< "*** attribute = " << datum.s_Attribute << " ***"); std::size_t cid; - BOOST_TEST_REQUIRE(populationGatherer.attributeId(datum.s_Attribute, cid)); + BOOST_TEST_REQUIRE(m_Gatherer->attributeId(datum.s_Attribute, cid)); LOG_DEBUG(<< "frequency = " << populationModel->attributeFrequency(cid)); LOG_DEBUG(<< "expected frequency = " << (10.0 - static_cast(i)) / 10.0); BOOST_REQUIRE_EQUAL((10.0 - static_cast(i)) / 10.0, @@ -781,18 +761,12 @@ BOOST_FIXTURE_TEST_CASE(testSampleRateWeight, CTestFixture) { SModelParams params(bucketLength); params.s_DecayRate = 0.001; - auto interimBucketCorrector = std::make_shared(bucketLength); - CEventRatePopulationModelFactory factory(params, interimBucketCorrector); - factory.features({model_t::E_PopulationCountByBucketPersonAndAttribute, - model_t::E_PopulationUniquePersonCountByAttribute}); - - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); + model_t::TFeatureVec features{model_t::E_PopulationCountByBucketPersonAndAttribute, + model_t::E_PopulationUniquePersonCountByAttribute}; + this->makeModel(params, features, startTime); - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); CEventRatePopulationModel* populationModel = - dynamic_cast(model.get()); + dynamic_cast(m_Model.get()); BOOST_TEST_REQUIRE(populationModel); core_t::TTime time{startTime}; @@ -801,7 +775,7 @@ BOOST_FIXTURE_TEST_CASE(testSampleRateWeight, CTestFixture) { populationModel->sample(time, time + bucketLength, m_ResourceMonitor); time += bucketLength; } - this->addArrival(message, gatherer); + this->addArrival(message, m_Gatherer); } // The heavy hitters generate one value per attribute per bucket. @@ -820,7 +794,7 @@ BOOST_FIXTURE_TEST_CASE(testSampleRateWeight, CTestFixture) { for (auto& heavyHitter : heavyHitters) { LOG_DEBUG(<< "*** person = " << people[heavyHitter] << " ***"); std::size_t pid; - BOOST_TEST_REQUIRE(gatherer->personId(people[heavyHitter], pid)); + BOOST_TEST_REQUIRE(m_Gatherer->personId(people[heavyHitter], pid)); for (std::size_t cid = 0u; cid < attributes.size(); ++cid) { double sampleRateWeight = populationModel->sampleRateWeight(pid, cid); LOG_DEBUG(<< "attribute = " << populationModel->attributeName(cid) @@ -833,7 +807,7 @@ BOOST_FIXTURE_TEST_CASE(testSampleRateWeight, CTestFixture) { for (auto& norm : normal) { LOG_DEBUG(<< "*** person = " << people[norm] << " ***"); std::size_t pid; - BOOST_TEST_REQUIRE(gatherer->personId(people[norm], pid)); + BOOST_TEST_REQUIRE(m_Gatherer->personId(people[norm], pid)); for (std::size_t cid = 0u; cid < attributes.size(); ++cid) { double sampleRateWeight = populationModel->sampleRateWeight(pid, cid); LOG_DEBUG(<< "attribute = " << populationModel->attributeName(cid) @@ -888,19 +862,9 @@ BOOST_FIXTURE_TEST_CASE(testPeriodicity, CTestFixture) { SModelParams params(bucketLength); params.s_DecayRate = 0.001; params.s_MinimumModeCount = 24.0; - auto interimBucketCorrector = std::make_shared(bucketLength); - CEventRatePopulationModelFactory factory(params, interimBucketCorrector); - factory.features({model_t::E_PopulationCountByBucketPersonAndAttribute, - model_t::E_PopulationUniquePersonCountByAttribute}); - - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - CEventRatePopulationModel* populationModel = - dynamic_cast(model.get()); - BOOST_TEST_REQUIRE(populationModel); + model_t::TFeatureVec features{model_t::E_PopulationCountByBucketPersonAndAttribute, + model_t::E_PopulationUniquePersonCountByAttribute}; + this->makeModel(params, features, startTime); TStrDoubleMap personProbabilitiesWithoutPeriodicity; TStrDoubleMap personProbabilitiesWithPeriodicity; @@ -908,19 +872,18 @@ BOOST_FIXTURE_TEST_CASE(testPeriodicity, CTestFixture) { core_t::TTime time{startTime}; for (const auto& message : messages) { if (message.s_Time >= time + bucketLength) { - populationModel->sample(time, time + bucketLength, m_ResourceMonitor); + m_Model->sample(time, time + bucketLength, m_ResourceMonitor); for (const auto& person : people) { std::size_t pid; - if (!gatherer->personId(person, pid)) { + if (!m_Gatherer->personId(person, pid)) { continue; } CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - if (populationModel->computeProbability( - pid, time, time + bucketLength, partitioningFields, 1, - annotatedProbability) == false) { + if (m_Model->computeProbability(pid, time, time + bucketLength, partitioningFields, + 1, annotatedProbability) == false) { continue; } @@ -943,7 +906,7 @@ BOOST_FIXTURE_TEST_CASE(testPeriodicity, CTestFixture) { time += bucketLength; } - this->addArrival(message, gatherer); + this->addArrival(message, m_Gatherer); } double totalw{0.0}; @@ -1009,8 +972,8 @@ BOOST_FIXTURE_TEST_CASE(testSkipSampling, CTestFixture) { // Check prune does not remove people because last seen times are updated by adding gap duration modelWithGap->prune(maxAgeBuckets); - BOOST_REQUIRE_EQUAL(std::size_t(2), gathererWithGap->numberActivePeople()); - BOOST_REQUIRE_EQUAL(std::size_t(2), gathererWithGap->numberActiveAttributes()); + BOOST_REQUIRE_EQUAL(2, gathererWithGap->numberActivePeople()); + BOOST_REQUIRE_EQUAL(2, gathererWithGap->numberActiveAttributes()); this->addArrival(SMessage(1000, "p1", "a1"), gathererWithGap); modelWithGap->sample(1000, 1100, m_ResourceMonitor); @@ -1041,12 +1004,12 @@ BOOST_FIXTURE_TEST_CASE(testSkipSampling, CTestFixture) { modelWithGap->sample(1200, 1500, m_ResourceMonitor); modelWithGap->prune(maxAgeBuckets); // Age at this point will be 500 and since it's equal to maxAge it should still be here - BOOST_REQUIRE_EQUAL(std::size_t(2), gathererWithGap->numberActiveAttributes()); + BOOST_REQUIRE_EQUAL(2, gathererWithGap->numberActiveAttributes()); modelWithGap->sample(1500, 1600, m_ResourceMonitor); modelWithGap->prune(maxAgeBuckets); // Age at this point will be 600 so it should get pruned - BOOST_REQUIRE_EQUAL(std::size_t(1), gathererWithGap->numberActivePeople()); - BOOST_REQUIRE_EQUAL(std::size_t(1), gathererWithGap->numberActiveAttributes()); + BOOST_REQUIRE_EQUAL(1, gathererWithGap->numberActivePeople()); + BOOST_REQUIRE_EQUAL(1, gathererWithGap->numberActiveAttributes()); } BOOST_FIXTURE_TEST_CASE(testInterimCorrections, CTestFixture) { @@ -1054,42 +1017,41 @@ BOOST_FIXTURE_TEST_CASE(testInterimCorrections, CTestFixture) { std::size_t bucketLength{3600}; SModelParams params(bucketLength); - auto interimBucketCorrector = std::make_shared(bucketLength); - CEventRatePopulationModelFactory factory(params, interimBucketCorrector); - factory.features({model_t::E_PopulationCountByBucketPersonAndAttribute}); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(startTime)); - CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(gatherer)); + model_t::TFeatureVec features{model_t::E_PopulationCountByBucketPersonAndAttribute}; + this->makeModel(params, features, startTime); CEventRatePopulationModel* model = - dynamic_cast(modelHolder.get()); - CCountingModel countingModel(params, gatherer, interimBucketCorrector); + dynamic_cast(m_Model.get()); + BOOST_TEST_REQUIRE(model); + + CCountingModel countingModel(params, m_Gatherer, m_InterimBucketCorrector); test::CRandomNumbers rng; core_t::TTime now{startTime}; core_t::TTime endTime = now + 2 * 24 * bucketLength; TDoubleVec samples(3, 0.0); while (now < endTime) { - rng.generateUniformSamples(50.0, 70.0, std::size_t(3), samples); + rng.generateUniformSamples(50.0, 70.0, 3, samples); for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) { - this->addArrival(SMessage(now, "p1", "a1"), gatherer); + this->addArrival(SMessage(now, "p1", "a1"), m_Gatherer); } for (std::size_t i = 0; i < static_cast(samples[1] + 0.5); ++i) { - this->addArrival(SMessage(now, "p2", "a1"), gatherer); + this->addArrival(SMessage(now, "p2", "a1"), m_Gatherer); } for (std::size_t i = 0; i < static_cast(samples[2] + 0.5); ++i) { - this->addArrival(SMessage(now, "p3", "a2"), gatherer); + this->addArrival(SMessage(now, "p3", "a2"), m_Gatherer); } countingModel.sample(now, now + bucketLength, m_ResourceMonitor); model->sample(now, now + bucketLength, m_ResourceMonitor); now += bucketLength; } for (std::size_t i = 0; i < 35; ++i) { - this->addArrival(SMessage(now, "p1", "a1"), gatherer); + this->addArrival(SMessage(now, "p1", "a1"), m_Gatherer); } for (std::size_t i = 0; i < 1; ++i) { - this->addArrival(SMessage(now, "p2", "a1"), gatherer); + this->addArrival(SMessage(now, "p2", "a1"), m_Gatherer); } for (std::size_t i = 0; i < 100; ++i) { - this->addArrival(SMessage(now, "p3", "a2"), gatherer); + this->addArrival(SMessage(now, "p3", "a2"), m_Gatherer); } countingModel.sampleBucketStatistics(now, now + bucketLength, m_ResourceMonitor); model->sampleBucketStatistics(now, now + bucketLength, m_ResourceMonitor); @@ -1144,32 +1106,26 @@ BOOST_FIXTURE_TEST_CASE(testPersistence, CTestFixture) { SModelParams params(bucketLength); params.s_DecayRate = 0.001; - auto interimBucketCorrector = std::make_shared(bucketLength); - CEventRatePopulationModelFactory factory(params, interimBucketCorrector); - factory.features({model_t::E_PopulationCountByBucketPersonAndAttribute, - model_t::E_PopulationUniquePersonCountByAttribute}); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr origModel(factory.makeModel(modelInitData)); + model_t::TFeatureVec features{model_t::E_PopulationCountByBucketPersonAndAttribute, + model_t::E_PopulationUniquePersonCountByAttribute}; + this->makeModel(params, features, startTime); CEventRatePopulationModel* populationModel = - dynamic_cast(origModel.get()); + dynamic_cast(m_Model.get()); BOOST_TEST_REQUIRE(populationModel); for (const auto& message : messages) { if (message.s_Time >= startTime + bucketLength) { - origModel->sample(startTime, startTime + bucketLength, m_ResourceMonitor); + m_Model->sample(startTime, startTime + bucketLength, m_ResourceMonitor); startTime += bucketLength; } - this->addArrival(message, gatherer); + this->addArrival(message, m_Gatherer); } std::string origXml; { core::CRapidXmlStatePersistInserter inserter("root"); - origModel->acceptPersistInserter(inserter); + m_Model->acceptPersistInserter(inserter); inserter.toXml(origXml); } @@ -1181,7 +1137,8 @@ BOOST_FIXTURE_TEST_CASE(testPersistence, CTestFixture) { BOOST_TEST_REQUIRE(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CAnomalyDetectorModel::TModelPtr restoredModel(factory.makeModel(modelInitData, traverser)); + CAnomalyDetectorModel::TModelPtr restoredModel( + m_Factory->makeModel({m_Gatherer}, traverser)); populationModel = dynamic_cast(restoredModel.get()); BOOST_TEST_REQUIRE(populationModel); @@ -1195,9 +1152,9 @@ BOOST_FIXTURE_TEST_CASE(testPersistence, CTestFixture) { inserter.toXml(newXml); } - LOG_DEBUG(<< "original checksum = " << origModel->checksum(false)); + LOG_DEBUG(<< "original checksum = " << m_Model->checksum(false)); LOG_DEBUG(<< "restored checksum = " << restoredModel->checksum(false)); - BOOST_REQUIRE_EQUAL(origModel->checksum(false), restoredModel->checksum(false)); + BOOST_REQUIRE_EQUAL(m_Model->checksum(false), restoredModel->checksum(false)); BOOST_REQUIRE_EQUAL(origXml, newXml); } diff --git a/lib/model/unittest/CMetricModelTest.cc b/lib/model/unittest/CMetricModelTest.cc index c0c8f6ab10..eb1d117763 100644 --- a/lib/model/unittest/CMetricModelTest.cc +++ b/lib/model/unittest/CMetricModelTest.cc @@ -76,44 +76,11 @@ class CTestFixture : public CModelTestFixtureBase { void makeModel(const SModelParams& params, const model_t::TFeatureVec& features, core_t::TTime startTime, - unsigned int* sampleCount = nullptr) { - this->makeModel(params, features, startTime, m_Gatherer, m_Model, sampleCount); + TOptionalUInt sampleCount = TOptionalUInt()) { + this->makeModelT(params, features, startTime, + model_t::E_MetricOnline, + m_Gatherer, m_Model, sampleCount); } - - void makeModel(const SModelParams& params, - const model_t::TFeatureVec& features, - core_t::TTime startTime, - CModelFactory::TDataGathererPtr& gatherer, - CModelFactory::TModelPtr& model, - unsigned int* sampleCount = nullptr) { - if (m_InterimBucketCorrector == nullptr) { - m_InterimBucketCorrector = - std::make_shared(params.s_BucketLength); - } - if (m_Factory == nullptr) { - m_Factory.reset(new CMetricModelFactory(params, m_InterimBucketCorrector)); - m_Factory->features(features); - } - CModelFactory::SGathererInitializationData initData(startTime); - if (sampleCount) { - initData.s_SampleOverrideCount = *sampleCount; - } - gatherer.reset(m_Factory->makeDataGatherer(initData)); - model.reset(m_Factory->makeModel({gatherer})); - BOOST_TEST_REQUIRE(model); - BOOST_REQUIRE_EQUAL(model_t::E_MetricOnline, model->category()); - BOOST_REQUIRE_EQUAL(params.s_BucketLength, model->bucketLength()); - } - -protected: - using TInterimBucketCorrectorPtr = std::shared_ptr; - using TMetricModelFactoryPtr = boost::shared_ptr; - -protected: - TInterimBucketCorrectorPtr m_InterimBucketCorrector; - TMetricModelFactoryPtr m_Factory; - ml::model::CModelFactory::TDataGathererPtr m_Gatherer; - ml::model::CModelFactory::TModelPtr m_Model; }; BOOST_FIXTURE_TEST_CASE(testSample, CTestFixture) { @@ -138,9 +105,9 @@ BOOST_FIXTURE_TEST_CASE(testSample, CTestFixture) { model_t::E_IndividualMinByPerson, model_t::E_IndividualMaxByPerson}; - this->makeModel(params, features, startTime, &sampleCount); + this->makeModel(params, features, startTime, sampleCount); CMetricModel& model = static_cast(*m_Model); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", m_Gatherer)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", m_Gatherer)); // Bucket values. uint64_t expectedCount{0}; @@ -233,14 +200,11 @@ BOOST_FIXTURE_TEST_CASE(testSample, CTestFixture) { core_t::TTime sampleTime{static_cast( expectedSampleTimes[k] + 0.5)}; expectedMeanSamples_.emplace_back( - sampleTime, TDouble2Vec{expectedMeanSamples[k]}, - std::size_t(0)); + sampleTime, TDouble2Vec{expectedMeanSamples[k]}, 0); expectedMinSamples_.emplace_back( - sampleTime, TDouble2Vec{expectedMinSamples[k]}, - std::size_t(0)); + sampleTime, TDouble2Vec{expectedMinSamples[k]}, 0); expectedMaxSamples_.emplace_back( - sampleTime, TDouble2Vec{expectedMaxSamples[k]}, - std::size_t(0)); + sampleTime, TDouble2Vec{expectedMaxSamples[k]}, 0); } expectedMeanModel->addSamples(params_, expectedMeanSamples_); expectedMinModel->addSamples(params_, expectedMinSamples_); @@ -387,9 +351,9 @@ BOOST_FIXTURE_TEST_CASE(testMultivariateSample, CTestFixture) { LOG_DEBUG(<< "*** sample count = " << sampleCount << " ***"); this->makeModel(params, {model_t::E_IndividualMeanLatLongByPerson}, - startTime, &sampleCount); + startTime, sampleCount); CMetricModel& model = static_cast(*m_Model); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", m_Gatherer)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", m_Gatherer)); // Bucket values. uint64_t expectedCount{0}; @@ -553,7 +517,7 @@ BOOST_FIXTURE_TEST_CASE(testProbabilityCalculationForMetric, CTestFixture) { this->makeModel(params, features, startTime); CMetricModel& model = static_cast(*m_Model); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", m_Gatherer)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", m_Gatherer)); maths::CBasicStatistics::COrderStatisticsHeap minProbabilities(2u); test::CRandomNumbers rng; @@ -604,7 +568,7 @@ BOOST_FIXTURE_TEST_CASE(testProbabilityCalculationForMedian, CTestFixture) { SModelParams params(bucketLength); this->makeModel(params, {model_t::E_IndividualMedianByPerson}, startTime); CMetricModel& model = static_cast(*m_Model); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", m_Gatherer)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", m_Gatherer)); maths::CBasicStatistics::COrderStatisticsHeap minProbabilities(2u); test::CRandomNumbers rng; @@ -658,7 +622,7 @@ BOOST_FIXTURE_TEST_CASE(testProbabilityCalculationForMedian, CTestFixture) { // assert there is only 1 value in the last bucket and its the median BOOST_REQUIRE_EQUAL(fd->s_BucketValue->value()[0], mean * 3.0); - BOOST_REQUIRE_EQUAL(fd->s_BucketValue->value().size(), std::size_t(1)); + BOOST_REQUIRE_EQUAL(fd->s_BucketValue->value().size(), 1); } BOOST_FIXTURE_TEST_CASE(testProbabilityCalculationForLowMean, CTestFixture) { @@ -676,7 +640,7 @@ BOOST_FIXTURE_TEST_CASE(testProbabilityCalculationForLowMean, CTestFixture) { SModelParams params(bucketLength); this->makeModel(params, {model_t::E_IndividualLowMeanByPerson}, startTime); CMetricModel& model = static_cast(*m_Model); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", m_Gatherer)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", m_Gatherer)); TOptionalDoubleVec probabilities; test::CRandomNumbers rng; @@ -732,7 +696,7 @@ BOOST_FIXTURE_TEST_CASE(testProbabilityCalculationForHighMean, CTestFixture) { SModelParams params(bucketLength); this->makeModel(params, {model_t::E_IndividualHighMeanByPerson}, startTime); CMetricModel& model = static_cast(*m_Model); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", m_Gatherer)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", m_Gatherer)); TOptionalDoubleVec probabilities; test::CRandomNumbers rng; @@ -786,7 +750,7 @@ BOOST_FIXTURE_TEST_CASE(testProbabilityCalculationForLowSum, CTestFixture) { SModelParams params(bucketLength); this->makeModel(params, {model_t::E_IndividualLowSumByBucketAndPerson}, startTime); CMetricModel& model = static_cast(*m_Model); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", m_Gatherer)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", m_Gatherer)); TOptionalDoubleVec probabilities; test::CRandomNumbers rng; @@ -839,7 +803,7 @@ BOOST_FIXTURE_TEST_CASE(testProbabilityCalculationForHighSum, CTestFixture) { SModelParams params(bucketLength); this->makeModel(params, {model_t::E_IndividualHighSumByBucketAndPerson}, startTime); CMetricModel& model = static_cast(*m_Model); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", m_Gatherer)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", m_Gatherer)); TOptionalDoubleVec probabilities; test::CRandomNumbers rng; @@ -908,7 +872,7 @@ BOOST_FIXTURE_TEST_CASE(testInfluence, CTestFixture) { factory.bucketLength(bucketLength); factory.fieldNames("", "", "P", "V", TStrVec{"I"}); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(startTime)); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", gatherer)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gatherer)); CModelFactory::TModelPtr model_(factory.makeModel(gatherer)); BOOST_TEST_REQUIRE(model_); BOOST_REQUIRE_EQUAL(model_t::E_MetricOnline, model_->category()); @@ -974,7 +938,7 @@ BOOST_FIXTURE_TEST_CASE(testInfluence, CTestFixture) { factory.fieldNames("", "", "P", "V", TStrVec(1, "I")); CModelFactory::SGathererInitializationData gathererInitData(startTime); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", gatherer)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gatherer)); CModelFactory::TModelPtr model_(factory.makeModel(gatherer)); BOOST_TEST_REQUIRE(model_); BOOST_REQUIRE_EQUAL(model_t::E_MetricOnline, model_->category()); @@ -1178,12 +1142,14 @@ BOOST_FIXTURE_TEST_CASE(testPrune, CTestFixture) { CModelFactory::TDataGathererPtr gatherer; CModelFactory::TModelPtr model_; - this->makeModel(params, features, startTime, gatherer, model_); + this->makeModelT(params, features, startTime, + model_t::E_MetricOnline, gatherer, model_); CMetricModel* model = dynamic_cast(model_.get()); BOOST_TEST_REQUIRE(model); CModelFactory::TDataGathererPtr expectedGatherer; CModelFactory::TModelPtr expectedModel_; - this->makeModel(params, features, startTime, expectedGatherer, expectedModel_); + this->makeModelT(params, features, startTime, model_t::E_MetricOnline, + expectedGatherer, expectedModel_); CMetricModel* expectedModel = dynamic_cast(expectedModel_.get()); BOOST_TEST_REQUIRE(expectedModel); @@ -1321,7 +1287,7 @@ BOOST_FIXTURE_TEST_CASE(testSkipSampling, CTestFixture) { factory.fieldNames("", "", "P", "V", TStrVec(1, "I")); CModelFactory::TDataGathererPtr gathererNoGap(factory.makeDataGatherer(startTime)); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", gathererNoGap)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gathererNoGap)); CModelFactory::TModelPtr modelNoGapPtr(factory.makeModel(gathererNoGap)); BOOST_TEST_REQUIRE(modelNoGapPtr); BOOST_REQUIRE_EQUAL(model_t::E_MetricOnline, modelNoGapPtr->category()); @@ -1349,7 +1315,7 @@ BOOST_FIXTURE_TEST_CASE(testSkipSampling, CTestFixture) { } CModelFactory::TDataGathererPtr gathererWithGap(factory.makeDataGatherer(startTime)); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", gathererWithGap)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gathererWithGap)); CModelFactory::TModelPtr modelWithGapPtr(factory.makeModel(gathererWithGap)); BOOST_TEST_REQUIRE(modelWithGapPtr); BOOST_REQUIRE_EQUAL(model_t::E_MetricOnline, modelWithGapPtr->category()); @@ -1493,8 +1459,8 @@ BOOST_FIXTURE_TEST_CASE(testVarp, CTestFixture) { factory.fieldNames("", "", "P", "V", TStrVec()); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(startTime)); BOOST_TEST_REQUIRE(!gatherer->isPopulation()); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", gatherer)); - BOOST_REQUIRE_EQUAL(std::size_t(1), this->addPerson("q", gatherer)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gatherer)); + BOOST_REQUIRE_EQUAL(1, this->addPerson("q", gatherer)); CModelFactory::TModelPtr model_(factory.makeModel(gatherer)); BOOST_TEST_REQUIRE(model_); BOOST_REQUIRE_EQUAL(model_t::E_MetricOnline, model_->category()); @@ -1614,7 +1580,7 @@ BOOST_FIXTURE_TEST_CASE(testInterimCorrections, CTestFixture) { factory.fieldNames("", "", "P", "V", TStrVec(1, "I")); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(startTime)); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", gatherer)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", gatherer)); CModelFactory::TModelPtr model_(factory.makeModel(gatherer)); BOOST_TEST_REQUIRE(model_); BOOST_REQUIRE_EQUAL(model_t::E_MetricOnline, model_->category()); @@ -1630,7 +1596,7 @@ BOOST_FIXTURE_TEST_CASE(testInterimCorrections, CTestFixture) { test::CRandomNumbers rng; TDoubleVec samples(3, 0.0); while (now < endTime) { - rng.generateUniformSamples(50.0, 70.0, std::size_t(3), samples); + rng.generateUniformSamples(50.0, 70.0, 3, samples); for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) { this->addArrival(*gatherer, now, "p1", 1.0, TOptionalStr("i1")); } @@ -1724,7 +1690,7 @@ BOOST_FIXTURE_TEST_CASE(testInterimCorrectionsWithCorrelations, CTestFixture) { test::CRandomNumbers rng; TDoubleVec samples(1, 0.0); while (now < endTime) { - rng.generateUniformSamples(80.0, 100.0, std::size_t(1), samples); + rng.generateUniformSamples(80.0, 100.0, 1, samples); for (std::size_t i = 0; i < static_cast(samples[0] + 0.5); ++i) { this->addArrival(*gatherer, now, "p1", 1.0, TOptionalStr("i1")); } @@ -2135,7 +2101,7 @@ BOOST_FIXTURE_TEST_CASE(testProbabilityCalculationForLowMedian, CTestFixture) { SModelParams params(bucketLength); this->makeModel(params, {model_t::E_IndividualLowMedianByPerson}, startTime); CMetricModel& model = static_cast(*m_Model); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", m_Gatherer)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", m_Gatherer)); TOptionalDoubleVec probabilities; test::CRandomNumbers rng; @@ -2189,7 +2155,7 @@ BOOST_FIXTURE_TEST_CASE(testProbabilityCalculationForHighMedian, CTestFixture) { SModelParams params(bucketLength); makeModel(params, {model_t::E_IndividualHighMeanByPerson}, startTime); CMetricModel& model = static_cast(*m_Model); - BOOST_REQUIRE_EQUAL(std::size_t(0), this->addPerson("p", m_Gatherer)); + BOOST_REQUIRE_EQUAL(0, this->addPerson("p", m_Gatherer)); TOptionalDoubleVec probabilities; test::CRandomNumbers rng; diff --git a/lib/model/unittest/CMetricPopulationModelTest.cc b/lib/model/unittest/CMetricPopulationModelTest.cc index 2a09263720..4f999095d3 100644 --- a/lib/model/unittest/CMetricPopulationModelTest.cc +++ b/lib/model/unittest/CMetricPopulationModelTest.cc @@ -151,6 +151,13 @@ class CTestFixture : public CModelTestFixtureBase { std::sort(messages.begin(), messages.end()); } + void makeModel(const SModelParams& params, + const model_t::TFeatureVec& features, + core_t::TTime startTime) { + this->makeModelT( + params, features, startTime, model_t::E_MetricOnline, m_Gatherer, m_Model); + } + private: double roundToNearestPersisted(double value) { std::string valueAsString{core::CStringUtils::typeToStringPrecise( @@ -177,19 +184,15 @@ BOOST_FIXTURE_TEST_CASE(testBasicAccessors, CTestFixture) { LOG_DEBUG(<< "# messages = " << messages.size()); SModelParams params(bucketLength); - auto interimBucketCorrector = std::make_shared(bucketLength); - CMetricPopulationModelFactory factory(params, interimBucketCorrector); + model_t::TFeatureVec features{model_t::E_PopulationMeanByPersonAndAttribute, model_t::E_PopulationMinByPersonAndAttribute, model_t::E_PopulationMaxByPersonAndAttribute}; - factory.features(features); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer( - dynamic_cast(factory.makeDataGatherer(gathererInitData))); - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - BOOST_REQUIRE_EQUAL(model_t::E_MetricOnline, model->category()); + this->makeModel(params, features, startTime); + CMetricPopulationModel* model = + dynamic_cast(m_Model.get()); + BOOST_TEST_REQUIRE(model); TStrUInt64Map expectedBucketPersonCounts; TMeanAccumulatorVec expectedBucketMeans(numberPeople * numberAttributes); @@ -203,20 +206,20 @@ BOOST_FIXTURE_TEST_CASE(testBasicAccessors, CTestFixture) { LOG_DEBUG(<< "Testing bucket = [" << startTime << "," << startTime + bucketLength << ")"); - BOOST_REQUIRE_EQUAL(numberPeople, gatherer->numberActivePeople()); - BOOST_REQUIRE_EQUAL(numberAttributes, gatherer->numberActiveAttributes()); + BOOST_REQUIRE_EQUAL(numberPeople, m_Gatherer->numberActivePeople()); + BOOST_REQUIRE_EQUAL(numberAttributes, m_Gatherer->numberActiveAttributes()); // Test the person and attribute invariants. - for (std::size_t j = 0u; j < gatherer->numberActivePeople(); ++j) { + for (std::size_t j = 0u; j < m_Gatherer->numberActivePeople(); ++j) { const std::string& name = model->personName(j); std::size_t pid; - BOOST_TEST_REQUIRE(gatherer->personId(name, pid)); + BOOST_TEST_REQUIRE(m_Gatherer->personId(name, pid)); BOOST_REQUIRE_EQUAL(j, pid); } - for (std::size_t j = 0u; j < gatherer->numberActiveAttributes(); ++j) { + for (std::size_t j = 0u; j < m_Gatherer->numberActiveAttributes(); ++j) { const std::string& name = model->attributeName(j); std::size_t cid; - BOOST_TEST_REQUIRE(gatherer->attributeId(name, cid)); + BOOST_TEST_REQUIRE(m_Gatherer->attributeId(name, cid)); BOOST_REQUIRE_EQUAL(j, cid); } @@ -228,7 +231,7 @@ BOOST_FIXTURE_TEST_CASE(testBasicAccessors, CTestFixture) { // Test the person counts. for (const auto& count_ : expectedBucketPersonCounts) { std::size_t pid; - BOOST_TEST_REQUIRE(gatherer->personId(count_.first, pid)); + BOOST_TEST_REQUIRE(m_Gatherer->personId(count_.first, pid)); expectedCurrentBucketPersonIds.push_back(pid); TOptionalUInt64 count = model->currentBucketCount(pid, startTime); BOOST_TEST_REQUIRE(count); @@ -297,7 +300,7 @@ BOOST_FIXTURE_TEST_CASE(testBasicAccessors, CTestFixture) { startTime += bucketLength; } - CEventData eventData = this->addArrival(message, gatherer); + CEventData eventData = this->addArrival(message, m_Gatherer); std::size_t pid = *eventData.personId(); std::size_t cid = *eventData.attributeId(); ++expectedBucketPersonCounts[message.s_Person]; @@ -337,22 +340,18 @@ BOOST_FIXTURE_TEST_CASE(testMinMaxAndMean, CTestFixture) { SModelParams params(bucketLength); params.s_InitialDecayRateMultiplier = 1.0; params.s_MaximumUpdatesPerBucket = 0.0; - auto interimBucketCorrector = std::make_shared(bucketLength); - CMetricPopulationModelFactory factory(params, interimBucketCorrector); + model_t::TFeatureVec features{model_t::E_PopulationMeanByPersonAndAttribute, model_t::E_PopulationMinByPersonAndAttribute, model_t::E_PopulationMaxByPersonAndAttribute}; - factory.features(features); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer( - dynamic_cast(factory.makeDataGatherer(gathererInitData))); - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); + + this->makeModel(params, features, startTime); CMetricPopulationModel* model = - dynamic_cast(modelHolder.get()); + dynamic_cast(m_Model.get()); + BOOST_TEST_REQUIRE(model); CModelFactory::TFeatureMathsModelPtrPrVec models{ - factory.defaultFeatureModels(features, bucketLength, 1.0, false)}; + m_Factory->defaultFeatureModels(features, bucketLength, 1.0, false)}; BOOST_REQUIRE_EQUAL(features.size(), models.size()); BOOST_REQUIRE_EQUAL(features[0], models[0].first); BOOST_REQUIRE_EQUAL(features[1], models[1].first); @@ -382,7 +381,7 @@ BOOST_FIXTURE_TEST_CASE(testMinMaxAndMean, CTestFixture) { populationWeightedSamples[feature][cid].second; TMathsModelPtr& model_ = expectedPopulationModels[feature][cid]; if (!model_) { - model_ = factory.defaultFeatureModel( + model_ = m_Factory->defaultFeatureModel( features[feature], bucketLength, 1.0, false); } for (std::size_t j = 0u; j < samples_.second.size(); ++j) { @@ -438,12 +437,12 @@ BOOST_FIXTURE_TEST_CASE(testMinMaxAndMean, CTestFixture) { startTime += bucketLength; } - CEventData eventData = this->addArrival(message, gatherer); + CEventData eventData = this->addArrival(message, m_Gatherer); std::size_t pid = *eventData.personId(); std::size_t cid = *eventData.attributeId(); nonNegative &= message.s_Value[0] < 0.0; - double sampleCount = gatherer->sampleCount(cid); + double sampleCount = m_Gatherer->sampleCount(cid); if (sampleCount > 0.0) { TSizeSizePr key{pid, cid}; sampleTimes[key].add(static_cast(message.s_Time)); @@ -470,18 +469,18 @@ BOOST_FIXTURE_TEST_CASE(testVarp, CTestFixture) { core_t::TTime startTime{3600}; core_t::TTime bucketLength{3600}; SModelParams params(bucketLength); - auto interimBucketCorrector = std::make_shared(bucketLength); - CMetricPopulationModelFactory factory(params, interimBucketCorrector); - factory.features({model_t::E_PopulationVarianceByPersonAndAttribute}); - factory.fieldNames("", "P", "", "V", TStrVec{1, "I"}); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - BOOST_TEST_REQUIRE(gatherer->isPopulation()); - CModelFactory::SModelInitializationData initData(gatherer); - CAnomalyDetectorModel::TModelPtr model_(factory.makeModel(initData)); - BOOST_TEST_REQUIRE(model_); - BOOST_REQUIRE_EQUAL(model_t::E_MetricOnline, model_->category()); - CMetricPopulationModel& model = static_cast(*model_.get()); + + model_t::TFeatureVec features{model_t::E_PopulationVarianceByPersonAndAttribute}; + + m_InterimBucketCorrector = std::make_shared(bucketLength); + m_Factory.reset(new CMetricPopulationModelFactory(params, m_InterimBucketCorrector)); + m_Factory->features({model_t::E_PopulationVarianceByPersonAndAttribute}); + m_Factory->fieldNames("", "P", "", "V", TStrVec{1, "I"}); + + this->makeModel(params, features, startTime); + + CMetricPopulationModel& model = + static_cast(*m_Model.get()); TDoubleStrPrVec b1{{1.0, "i1"}, {1.1, "i1"}, {1.01, "i2"}, {1.02, "i2"}}; TDoubleStrPrVec b2{{10.0, "i1"}}; @@ -501,45 +500,45 @@ BOOST_FIXTURE_TEST_CASE(testVarp, CTestFixture) { SAnnotatedProbability annotatedProbability; core_t::TTime time = startTime; - processBucket(time, bucketLength, b1, *gatherer, model, annotatedProbability); + processBucket(time, bucketLength, b1, *m_Gatherer, model, annotatedProbability); BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, b2, *gatherer, model, annotatedProbability); + processBucket(time, bucketLength, b2, *m_Gatherer, model, annotatedProbability); BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, b3, *gatherer, model, annotatedProbability); + processBucket(time, bucketLength, b3, *m_Gatherer, model, annotatedProbability); BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, b4, *gatherer, model, annotatedProbability); + processBucket(time, bucketLength, b4, *m_Gatherer, model, annotatedProbability); BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, b5, *gatherer, model, annotatedProbability); + processBucket(time, bucketLength, b5, *m_Gatherer, model, annotatedProbability); BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, b6, *gatherer, model, annotatedProbability); + processBucket(time, bucketLength, b6, *m_Gatherer, model, annotatedProbability); BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, b7, *gatherer, model, annotatedProbability); + processBucket(time, bucketLength, b7, *m_Gatherer, model, annotatedProbability); BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, b8, *gatherer, model, annotatedProbability); + processBucket(time, bucketLength, b8, *m_Gatherer, model, annotatedProbability); BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); time += bucketLength; - processBucket(time, bucketLength, b9, *gatherer, model, annotatedProbability); + processBucket(time, bucketLength, b9, *m_Gatherer, model, annotatedProbability); BOOST_TEST_REQUIRE(annotatedProbability.s_Probability < 0.85); time += bucketLength; - processBucket(time, bucketLength, b10, *gatherer, model, annotatedProbability); + processBucket(time, bucketLength, b10, *m_Gatherer, model, annotatedProbability); BOOST_TEST_REQUIRE(annotatedProbability.s_Probability < 0.1); - BOOST_REQUIRE_EQUAL(std::size_t(1), annotatedProbability.s_Influences.size()); + BOOST_REQUIRE_EQUAL(1, annotatedProbability.s_Influences.size()); BOOST_REQUIRE_EQUAL(std::string("I"), *annotatedProbability.s_Influences[0].first.first); BOOST_REQUIRE_EQUAL(std::string("i2"), @@ -566,15 +565,10 @@ BOOST_FIXTURE_TEST_CASE(testComputeProbability, CTestFixture) { generateTestMessages(model_t::dimension(feature), startTime, bucketLength, messages); SModelParams params(bucketLength); - auto interimBucketCorrector = std::make_shared(bucketLength); - CMetricPopulationModelFactory factory(params, interimBucketCorrector); - factory.features({feature}); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr modelHolder(factory.makeModel(modelInitData)); + this->makeModel(params, {feature}, startTime); CMetricPopulationModel* model = - dynamic_cast(modelHolder.get()); + static_cast(m_Model.get()); + BOOST_TEST_REQUIRE(model); TStrVec expectedAnomalies{ "[12, p2, c0 c3]", "[15, p3, c0]", "[30, p5, c2]", "[40, p6, c0]", @@ -583,7 +577,7 @@ BOOST_FIXTURE_TEST_CASE(testComputeProbability, CTestFixture) { TAnomalyVec orderedAnomalies; this->generateOrderedAnomalies(7u, startTime, bucketLength, messages, - gatherer, *model, orderedAnomalies); + m_Gatherer, *model, orderedAnomalies); BOOST_REQUIRE_EQUAL(expectedAnomalies.size(), orderedAnomalies.size()); for (std::size_t j = 0u; j < orderedAnomalies.size(); ++j) { @@ -663,20 +657,15 @@ BOOST_FIXTURE_TEST_CASE(testPrune, CTestFixture) { SModelParams params(bucketLength); params.s_DecayRate = 0.01; - auto interimBucketCorrector = std::make_shared(bucketLength); - CMetricPopulationModelFactory factory(params, interimBucketCorrector); model_t::TFeatureVec features{model_t::E_PopulationMeanByPersonAndAttribute, model_t::E_PopulationMinByPersonAndAttribute, model_t::E_PopulationMaxByPersonAndAttribute}; - factory.features(features); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - BOOST_TEST_REQUIRE(model); - CModelFactory::TDataGathererPtr expectedGatherer(factory.makeDataGatherer(gathererInitData)); + + this->makeModel(params, features, startTime); + + CModelFactory::TDataGathererPtr expectedGatherer(m_Factory->makeDataGatherer({startTime})); CModelFactory::SModelInitializationData expectedModelInitData(expectedGatherer); - CAnomalyDetectorModel::TModelPtr expectedModel(factory.makeModel(expectedModelInitData)); + CAnomalyDetectorModel::TModelPtr expectedModel(m_Factory->makeModel(expectedModelInitData)); BOOST_TEST_REQUIRE(expectedModel); test::CRandomNumbers rng; @@ -722,15 +711,15 @@ BOOST_FIXTURE_TEST_CASE(testPrune, CTestFixture) { core_t::TTime bucketStart{startTime}; for (const auto& message : messages) { if (message.s_Time >= bucketStart + bucketLength) { - model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); + m_Model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); bucketStart += bucketLength; } - this->addArrival(message, gatherer); + this->addArrival(message, m_Gatherer); } - model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); - size_t maxDimensionBeforePrune(model->dataGatherer().maxDimension()); - model->prune(); - size_t maxDimensionAfterPrune(model->dataGatherer().maxDimension()); + m_Model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); + size_t maxDimensionBeforePrune(m_Model->dataGatherer().maxDimension()); + m_Model->prune(); + size_t maxDimensionAfterPrune(m_Model->dataGatherer().maxDimension()); BOOST_REQUIRE_EQUAL(maxDimensionBeforePrune, maxDimensionAfterPrune); bucketStart = startTime; @@ -743,31 +732,31 @@ BOOST_FIXTURE_TEST_CASE(testPrune, CTestFixture) { } expectedModel->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); - LOG_DEBUG(<< "checksum = " << model->checksum()); + LOG_DEBUG(<< "checksum = " << m_Model->checksum()); LOG_DEBUG(<< "expected checksum = " << expectedModel->checksum()); - BOOST_REQUIRE_EQUAL(expectedModel->checksum(), model->checksum()); + BOOST_REQUIRE_EQUAL(expectedModel->checksum(), m_Model->checksum()); // Now check that we recycle the person and attribute slots. - bucketStart = gatherer->currentBucketStartTime() + bucketLength; + bucketStart = m_Gatherer->currentBucketStartTime() + bucketLength; TMessageVec newMessages{{bucketStart + 10, "p1", "c2", TDouble1Vec(1, 20.0)}, {bucketStart + 200, "p5", "c6", TDouble1Vec(1, 10.0)}, {bucketStart + 2100, "p5", "c6", TDouble1Vec(1, 15.0)}}; for (auto& newMessage : newMessages) { - this->addArrival(newMessage, gatherer); + this->addArrival(newMessage, m_Gatherer); this->addArrival(newMessage, expectedGatherer); } - model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); + m_Model->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); expectedModel->sample(bucketStart, bucketStart + bucketLength, m_ResourceMonitor); - LOG_DEBUG(<< "checksum = " << model->checksum()); + LOG_DEBUG(<< "checksum = " << m_Model->checksum()); LOG_DEBUG(<< "expected checksum = " << expectedModel->checksum()); - BOOST_REQUIRE_EQUAL(expectedModel->checksum(), model->checksum()); + BOOST_REQUIRE_EQUAL(expectedModel->checksum(), m_Model->checksum()); // Test that calling prune on a cloned model which has seen no new data does nothing - CAnomalyDetectorModel::TModelPtr clonedModelHolder(model->cloneForPersistence()); + CAnomalyDetectorModel::TModelPtr clonedModelHolder(m_Model->cloneForPersistence()); std::size_t numberOfPeopleBeforePrune( clonedModelHolder->dataGatherer().numberActivePeople()); BOOST_TEST_REQUIRE(numberOfPeopleBeforePrune > 0); @@ -831,28 +820,16 @@ BOOST_FIXTURE_TEST_CASE(testFrequency, CTestFixture) { SModelParams params(bucketLength); params.s_DecayRate = 0.001; - auto interimBucketCorrector = std::make_shared(bucketLength); - CMetricPopulationModelFactory factory(params, interimBucketCorrector); - factory.features({model_t::E_PopulationMeanByPersonAndAttribute}); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - const model::CDataGatherer& populationGatherer( - dynamic_cast(*gatherer)); - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - - CMetricPopulationModel* populationModel = - dynamic_cast(model.get()); - BOOST_TEST_REQUIRE(populationModel); + this->makeModel(params, {model_t::E_PopulationMeanByPersonAndAttribute}, startTime); core_t::TTime time{startTime}; for (const auto& message : messages) { if (message.s_Time >= time + bucketLength) { - populationModel->sample(time, time + bucketLength, m_ResourceMonitor); + m_Model->sample(time, time + bucketLength, m_ResourceMonitor); time += bucketLength; } - this->addArrival(message, gatherer); + this->addArrival(message, m_Gatherer); } { @@ -860,14 +837,14 @@ BOOST_FIXTURE_TEST_CASE(testFrequency, CTestFixture) { for (auto& datum : data) { LOG_DEBUG(<< "*** person = " << datum.s_Person << " ***"); std::size_t pid; - BOOST_TEST_REQUIRE(gatherer->personId(datum.s_Person, pid)); - LOG_DEBUG(<< "frequency = " << populationModel->personFrequency(pid)); + BOOST_TEST_REQUIRE(m_Gatherer->personId(datum.s_Person, pid)); + LOG_DEBUG(<< "frequency = " << m_Model->personFrequency(pid)); LOG_DEBUG(<< "expected frequency = " << 1.0 / static_cast(datum.s_Period)); BOOST_REQUIRE_CLOSE_ABSOLUTE(1.0 / static_cast(datum.s_Period), - populationModel->personFrequency(pid), + m_Model->personFrequency(pid), 0.1 / static_cast(datum.s_Period)); - meanError.add(std::fabs(populationModel->personFrequency(pid) - + meanError.add(std::fabs(m_Model->personFrequency(pid) - 1.0 / static_cast(datum.s_Period))); } LOG_DEBUG(<< "error = " << maths::CBasicStatistics::mean(meanError)); @@ -878,11 +855,11 @@ BOOST_FIXTURE_TEST_CASE(testFrequency, CTestFixture) { for (auto& datum : data) { LOG_DEBUG(<< "*** attributes = " << datum.s_Attribute << " ***"); std::size_t cid; - BOOST_TEST_REQUIRE(populationGatherer.attributeId(datum.s_Attribute, cid)); - LOG_DEBUG(<< "frequency = " << populationModel->attributeFrequency(cid)); + BOOST_TEST_REQUIRE(m_Gatherer->attributeId(datum.s_Attribute, cid)); + LOG_DEBUG(<< "frequency = " << m_Model->attributeFrequency(cid)); LOG_DEBUG(<< "expected frequency = " << (10.0 - static_cast(i)) / 10.0); BOOST_REQUIRE_EQUAL((10.0 - static_cast(i)) / 10.0, - populationModel->attributeFrequency(cid)); + m_Model->attributeFrequency(cid)); ++i; } } @@ -945,17 +922,11 @@ BOOST_FIXTURE_TEST_CASE(testSampleRateWeight, CTestFixture) { SModelParams params(bucketLength); params.s_DecayRate = 0.001; - auto interimBucketCorrector = std::make_shared(bucketLength); - CMetricPopulationModelFactory factory(params, interimBucketCorrector); - factory.features({model_t::E_PopulationSumByBucketPersonAndAttribute}); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); + this->makeModel(params, {model_t::E_PopulationSumByBucketPersonAndAttribute}, startTime); CMetricPopulationModel* populationModel = - dynamic_cast(model.get()); + dynamic_cast(m_Model.get()); BOOST_TEST_REQUIRE(populationModel); core_t::TTime time{startTime}; @@ -964,7 +935,7 @@ BOOST_FIXTURE_TEST_CASE(testSampleRateWeight, CTestFixture) { populationModel->sample(time, time + bucketLength, m_ResourceMonitor); time += bucketLength; } - this->addArrival(message, gatherer); + this->addArrival(message, m_Gatherer); } // The heavy hitters generate one value per attribute per bucket. @@ -983,7 +954,7 @@ BOOST_FIXTURE_TEST_CASE(testSampleRateWeight, CTestFixture) { for (auto& heavyHitter : heavyHitters) { LOG_DEBUG(<< "*** person = " << people[heavyHitter] << " ***"); std::size_t pid; - BOOST_TEST_REQUIRE(gatherer->personId(people[heavyHitter], pid)); + BOOST_TEST_REQUIRE(m_Gatherer->personId(people[heavyHitter], pid)); for (std::size_t cid = 0u; cid < attributes.size(); ++cid) { double sampleRateWeight = populationModel->sampleRateWeight(pid, cid); LOG_DEBUG(<< "attribute = " << populationModel->attributeName(cid) @@ -996,7 +967,7 @@ BOOST_FIXTURE_TEST_CASE(testSampleRateWeight, CTestFixture) { for (auto& norm : normal) { LOG_DEBUG(<< "*** person = " << people[norm] << " ***"); std::size_t pid; - BOOST_TEST_REQUIRE(gatherer->personId(people[norm], pid)); + BOOST_TEST_REQUIRE(m_Gatherer->personId(people[norm], pid)); for (std::size_t cid = 0u; cid < attributes.size(); ++cid) { double sampleRateWeight = populationModel->sampleRateWeight(pid, cid); LOG_DEBUG(<< "attribute = " << populationModel->attributeName(cid) @@ -1050,18 +1021,8 @@ BOOST_FIXTURE_TEST_CASE(testPeriodicity, CTestFixture) { SModelParams params(bucketLength); params.s_DecayRate = 0.001; - auto interimBucketCorrector = std::make_shared(bucketLength); - CMetricPopulationModelFactory factory(params, interimBucketCorrector); - factory.features({model_t::E_PopulationMeanByPersonAndAttribute}); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr model(factory.makeModel(modelInitData)); - CMetricPopulationModel* populationModel = - dynamic_cast(model.get()); - BOOST_TEST_REQUIRE(populationModel); + this->makeModel(params, {model_t::E_PopulationMeanByPersonAndAttribute}, startTime); TStrDoubleMap personProbabilitiesWithoutPeriodicity; TStrDoubleMap personProbabilitiesWithPeriodicity; @@ -1069,19 +1030,18 @@ BOOST_FIXTURE_TEST_CASE(testPeriodicity, CTestFixture) { core_t::TTime time = startTime; for (const auto& message : messages) { if (message.s_Time >= time + bucketLength) { - populationModel->sample(time, time + bucketLength, m_ResourceMonitor); + m_Model->sample(time, time + bucketLength, m_ResourceMonitor); for (const auto& person : people) { std::size_t pid; - if (!gatherer->personId(person, pid)) { + if (!m_Gatherer->personId(person, pid)) { continue; } CPartitioningFields partitioningFields(EMPTY_STRING, EMPTY_STRING); SAnnotatedProbability annotatedProbability; - if (populationModel->computeProbability( - pid, time, time + bucketLength, partitioningFields, 1, - annotatedProbability) == false) { + if (m_Model->computeProbability(pid, time, time + bucketLength, partitioningFields, + 1, annotatedProbability) == false) { continue; } @@ -1102,7 +1062,7 @@ BOOST_FIXTURE_TEST_CASE(testPeriodicity, CTestFixture) { time += bucketLength; } - this->addArrival(message, gatherer); + this->addArrival(message, m_Gatherer); } double totalw{0.0}; @@ -1132,34 +1092,28 @@ BOOST_FIXTURE_TEST_CASE(testPersistence, CTestFixture) { SModelParams params(bucketLength); params.s_DecayRate = 0.001; - auto interimBucketCorrector = std::make_shared(bucketLength); - CMetricPopulationModelFactory factory(params, interimBucketCorrector); model_t::TFeatureVec features{model_t::E_PopulationMeanByPersonAndAttribute, model_t::E_PopulationMinByPersonAndAttribute, model_t::E_PopulationMaxByPersonAndAttribute}; - factory.features(features); - CModelFactory::SGathererInitializationData gathererInitData(startTime); - CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(gathererInitData)); - CModelFactory::SModelInitializationData modelInitData(gatherer); - CAnomalyDetectorModel::TModelPtr origModel(factory.makeModel(modelInitData)); + this->makeModel(params, features, startTime); CMetricPopulationModel* populationModel = - dynamic_cast(origModel.get()); + dynamic_cast(m_Model.get()); BOOST_TEST_REQUIRE(populationModel); for (auto& message : messages) { if (message.s_Time >= startTime + bucketLength) { - origModel->sample(startTime, startTime + bucketLength, m_ResourceMonitor); + m_Model->sample(startTime, startTime + bucketLength, m_ResourceMonitor); startTime += bucketLength; } - this->addArrival(message, gatherer); + this->addArrival(message, m_Gatherer); } std::string origXml; { core::CRapidXmlStatePersistInserter inserter("root"); - origModel->acceptPersistInserter(inserter); + m_Model->acceptPersistInserter(inserter); inserter.toXml(origXml); } @@ -1170,7 +1124,8 @@ BOOST_FIXTURE_TEST_CASE(testPersistence, CTestFixture) { BOOST_TEST_REQUIRE(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CAnomalyDetectorModel::TModelPtr restoredModel(factory.makeModel(modelInitData, traverser)); + CAnomalyDetectorModel::TModelPtr restoredModel( + m_Factory->makeModel({m_Gatherer}, traverser)); populationModel = dynamic_cast(restoredModel.get()); BOOST_TEST_REQUIRE(populationModel); @@ -1184,9 +1139,9 @@ BOOST_FIXTURE_TEST_CASE(testPersistence, CTestFixture) { inserter.toXml(newXml); } - LOG_DEBUG(<< "original checksum = " << origModel->checksum(false)); + LOG_DEBUG(<< "original checksum = " << m_Model->checksum(false)); LOG_DEBUG(<< "restored checksum = " << restoredModel->checksum(false)); - BOOST_REQUIRE_EQUAL(origModel->checksum(false), restoredModel->checksum(false)); + BOOST_REQUIRE_EQUAL(m_Model->checksum(false), restoredModel->checksum(false)); BOOST_REQUIRE_EQUAL(origXml, newXml); } diff --git a/lib/model/unittest/CModelTestFixtureBase.h b/lib/model/unittest/CModelTestFixtureBase.h index 919eb502e2..8bd017a1e2 100644 --- a/lib/model/unittest/CModelTestFixtureBase.h +++ b/lib/model/unittest/CModelTestFixtureBase.h @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -54,6 +55,7 @@ class CModelTestFixtureBase { using TOptionalDouble = boost::optional; using TOptionalDoubleVec = std::vector; using TOptionalStr = boost::optional; + using TOptionalUInt = boost::optional; using TOptionalUInt64 = boost::optional; using TPriorPtr = std::shared_ptr; @@ -220,8 +222,48 @@ class CModelTestFixtureBase { const std::string& overFieldName, TKeyCompareFunc keyCompare); + template + void makeModelT(const ml::model::SModelParams& params, + const ml::model_t::TFeatureVec& features, + ml::core_t::TTime startTime, + ml::model_t::EModelType modelType, + ml::model::CModelFactory::TDataGathererPtr& gatherer, + ml::model::CModelFactory::TModelPtr& model, + TOptionalUInt sampleCount = TOptionalUInt(), + const std::string& summaryCountField = EMPTY_STRING) { + if (m_InterimBucketCorrector == nullptr) { + m_InterimBucketCorrector = + std::make_shared(params.s_BucketLength); + } + if (m_Factory == nullptr) { + m_Factory.reset(new T(params, m_InterimBucketCorrector, + summaryCountField.empty() ? ml::model_t::E_None + : ml::model_t::E_Manual, + summaryCountField)); + m_Factory->features(features); + } + ml::model::CModelFactory::SGathererInitializationData initData(startTime); + if (sampleCount) { + initData.s_SampleOverrideCount = *sampleCount; + } + gatherer.reset(m_Factory->makeDataGatherer(initData)); + model.reset(m_Factory->makeModel({gatherer})); + + BOOST_TEST_REQUIRE(model); + BOOST_REQUIRE_EQUAL(modelType, model->category()); + BOOST_REQUIRE_EQUAL(params.s_BucketLength, model->bucketLength()); + } + +protected: + using TInterimBucketCorrectorPtr = std::shared_ptr; + using TModelFactoryPtr = std::shared_ptr; + protected: ml::model::CResourceMonitor m_ResourceMonitor; + TInterimBucketCorrectorPtr m_InterimBucketCorrector; + TModelFactoryPtr m_Factory; + ml::model::CModelFactory::TDataGathererPtr m_Gatherer; + ml::model::CModelFactory::TModelPtr m_Model; }; #endif //INCLUDED_CModelTestFixtureBase_h