From 2b68d39ca1121670cab1365ef2301ad450fe8bbb Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Wed, 5 Dec 2018 11:01:20 +0000 Subject: [PATCH] [ML] Correct query times for model plot and forecast (#327) --- docs/CHANGELOG.asciidoc | 8 ++++++++ include/api/CForecastRunner.h | 2 +- include/model/CForecastDataSink.h | 28 ++++++++++++++++++--------- lib/api/CForecastRunner.cc | 15 ++++----------- lib/model/CForecastDataSink.cc | 32 +++++++++++++++++++++++++------ lib/model/CModelDetailsView.cc | 3 ++- 6 files changed, 60 insertions(+), 28 deletions(-) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index db56d0c1f5..f59c429bf8 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -28,6 +28,14 @@ //=== Regressions + == {es} version 6.5.3 + +=== Bug Fixes + +Correct query times for model plot and forecast in the bucket to match the times we assign +the samples we add to the model for each bucket. For long bucket lengths, this could result +in apparently shifted model plot with respect to the data and increased errors in forecasts. + == {es} version 6.5.0 //=== Breaking Changes diff --git a/include/api/CForecastRunner.h b/include/api/CForecastRunner.h index 2d4ea182fa..2d2ddc8293 100644 --- a/include/api/CForecastRunner.h +++ b/include/api/CForecastRunner.h @@ -112,7 +112,7 @@ class API_EXPORT CForecastRunner final : private core::CNonCopyable { using TAnomalyDetectorPtr = std::shared_ptr; using TAnomalyDetectorPtrVec = std::vector; - using TForecastModelWrapper = model::CForecastDataSink::SForecastModelWrapper; + using TForecastModelWrapper = model::CForecastDataSink::CForecastModelWrapper; using TForecastResultSeries = model::CForecastDataSink::SForecastResultSeries; using TForecastResultSeriesVec = std::vector; using TMathsModelPtr = std::unique_ptr; diff --git a/include/model/CForecastDataSink.h b/include/model/CForecastDataSink.h index b3430a2c8c..a994f9b6da 100644 --- a/include/model/CForecastDataSink.h +++ b/include/model/CForecastDataSink.h @@ -40,21 +40,31 @@ class MODEL_EXPORT CForecastDataSink final : private core::CNonCopyable { public: using TMathsModelPtr = std::shared_ptr; using TStrUMap = boost::unordered_set; + struct SForecastResultSeries; //! Wrapper for 1 timeseries model, its feature and by Field - struct MODEL_EXPORT SForecastModelWrapper { - SForecastModelWrapper(model_t::EFeature feature, + class MODEL_EXPORT CForecastModelWrapper { + public: + CForecastModelWrapper(model_t::EFeature feature, TMathsModelPtr&& forecastModel, const std::string& byFieldValue); - SForecastModelWrapper(SForecastModelWrapper&& other); + CForecastModelWrapper(CForecastModelWrapper&& other); - SForecastModelWrapper(const SForecastModelWrapper& that) = delete; - SForecastModelWrapper& operator=(const SForecastModelWrapper&) = delete; + CForecastModelWrapper(const CForecastModelWrapper& that) = delete; + CForecastModelWrapper& operator=(const CForecastModelWrapper&) = delete; - model_t::EFeature s_Feature; - TMathsModelPtr s_ForecastModel; - std::string s_ByFieldValue; + bool forecast(const SForecastResultSeries& series, + core_t::TTime startTime, + core_t::TTime endTime, + double boundsPercentile, + CForecastDataSink& sink, + std::string& message) const; + + private: + model_t::EFeature m_Feature; + TMathsModelPtr m_ForecastModel; + std::string m_ByFieldValue; }; //! Everything that defines 1 series of forecasts @@ -68,7 +78,7 @@ class MODEL_EXPORT CForecastDataSink final : private core::CNonCopyable { SModelParams s_ModelParams; int s_DetectorIndex; - std::vector s_ToForecast; + std::vector s_ToForecast; std::string s_ToForecastPersisted; std::string s_PartitionFieldName; std::string s_PartitionFieldValue; diff --git a/lib/api/CForecastRunner.cc b/lib/api/CForecastRunner.cc index 899c301d01..c440739372 100644 --- a/lib/api/CForecastRunner.cc +++ b/lib/api/CForecastRunner.cc @@ -169,17 +169,10 @@ void CForecastRunner::forecastWorker() { } } - const TForecastModelWrapper& model = series.s_ToForecast.back(); - model_t::TDouble1VecDouble1VecPr support = - model_t::support(model.s_Feature); - bool success = model.s_ForecastModel->forecast( - forecastJob.s_StartTime, forecastJob.forecastEnd(), - forecastJob.s_BoundsPercentile, support.first, support.second, - boost::bind(&model::CForecastDataSink::push, &sink, _1, - model_t::print(model.s_Feature), series.s_PartitionFieldName, - series.s_PartitionFieldValue, series.s_ByFieldName, - model.s_ByFieldValue, series.s_DetectorIndex), - message); + const TForecastModelWrapper& model{series.s_ToForecast.back()}; + bool success{model.forecast( + series, forecastJob.s_StartTime, forecastJob.forecastEnd(), + forecastJob.s_BoundsPercentile, sink, message)}; series.s_ToForecast.pop_back(); if (success == false) { diff --git a/lib/model/CForecastDataSink.cc b/lib/model/CForecastDataSink.cc index a82dcd7aa8..8e5818755d 100644 --- a/lib/model/CForecastDataSink.cc +++ b/lib/model/CForecastDataSink.cc @@ -9,6 +9,8 @@ #include #include +#include + #include namespace ml { @@ -54,16 +56,34 @@ const std::string CForecastDataSink::STATUS("forecast_status"); using TScopedAllocator = core::CScopedRapidJsonPoolAllocator; -CForecastDataSink::SForecastModelWrapper::SForecastModelWrapper(model_t::EFeature feature, +CForecastDataSink::CForecastModelWrapper::CForecastModelWrapper(model_t::EFeature feature, TMathsModelPtr&& forecastModel, const std::string& byFieldValue) - : s_Feature(feature), s_ForecastModel(std::move(forecastModel)), - s_ByFieldValue(byFieldValue) { + : m_Feature(feature), m_ForecastModel(std::move(forecastModel)), + m_ByFieldValue(byFieldValue) { +} + +CForecastDataSink::CForecastModelWrapper::CForecastModelWrapper(CForecastModelWrapper&& other) + : m_Feature(other.m_Feature), m_ForecastModel(std::move(other.m_ForecastModel)), + m_ByFieldValue(std::move(other.m_ByFieldValue)) { } -CForecastDataSink::SForecastModelWrapper::SForecastModelWrapper(SForecastModelWrapper&& other) - : s_Feature(other.s_Feature), s_ForecastModel(std::move(other.s_ForecastModel)), - s_ByFieldValue(std::move(other.s_ByFieldValue)) { +bool CForecastDataSink::CForecastModelWrapper::forecast(const SForecastResultSeries& series, + core_t::TTime startTime, + core_t::TTime endTime, + double boundsPercentile, + CForecastDataSink& sink, + std::string& message) const { + core_t::TTime bucketLength{m_ForecastModel->params().bucketLength()}; + startTime = model_t::sampleTime(m_Feature, startTime, bucketLength); + endTime = model_t::sampleTime(m_Feature, endTime, bucketLength); + model_t::TDouble1VecDouble1VecPr support{model_t::support(m_Feature)}; + return m_ForecastModel->forecast( + startTime, endTime, boundsPercentile, support.first, support.second, + boost::bind(&model::CForecastDataSink::push, &sink, _1, model_t::print(m_Feature), + series.s_PartitionFieldName, series.s_PartitionFieldValue, + series.s_ByFieldName, m_ByFieldValue, series.s_DetectorIndex), + message); } CForecastDataSink::SForecastResultSeries::SForecastResultSeries(const SModelParams& modelParams) diff --git a/lib/model/CModelDetailsView.cc b/lib/model/CModelDetailsView.cc index 22e723edab..be4ea231ac 100644 --- a/lib/model/CModelDetailsView.cc +++ b/lib/model/CModelDetailsView.cc @@ -77,11 +77,12 @@ void CModelDetailsView::modelPlotForByFieldId(core_t::TTime time, if (this->isByFieldIdActive(byFieldId)) { const maths::CModel* model = this->model(feature, byFieldId); - if (!model) { + if (model == nullptr) { return; } std::size_t dimension = model_t::dimension(feature); + time = model_t::sampleTime(feature, time, model->params().bucketLength()); maths_t::TDouble2VecWeightsAry weights( maths_t::CUnitWeights::unit(dimension));