Skip to content

Commit

Permalink
TimeSeries - fix confidence parameter type for some detectors (#4058) (
Browse files Browse the repository at this point in the history
…#5623)

* TimeSeries - fix confidence parameter type for some detectors.

- The public API exposed confidence parameters as int even though it's internally implemented as double
- There was no workaround since all classes where double is used are internal
- This caused major issues for software requiring high precision predictions
- This change to API should be backwards compatible since int can be passed to parameter of type double

* TimeSeries - reintroduce original methods with confidence parameter of type int (to not break the API).

* TimeSeries - make catalog API methods with int confidence parameter deprecated.

- Tests adjusted to not use the deprecated methods
  • Loading branch information
esso23 authored Feb 18, 2021
1 parent 05ef676 commit 5ec4472
Show file tree
Hide file tree
Showing 15 changed files with 128 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public static void Example()
// Setup SsaChangePointDetector arguments
var inputColumnName = nameof(TimeSeriesData.Value);
var outputColumnName = nameof(ChangePointPrediction.Prediction);
int confidence = 95;
double confidence = 95;
int changeHistoryLength = 8;

// Train the change point detector.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public static void Example()

// The transformed data.
var transformedData = ml.Transforms.DetectChangePointBySsa(
outputColumnName, inputColumnName, 95, 8, TrainingSize,
outputColumnName, inputColumnName, 95.0d, 8, TrainingSize,
SeasonalitySize + 1).Fit(dataView).Transform(dataView);

// Getting the data of the newly created column as an IEnumerable of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public static void Example()
// Setup SsaChangePointDetector arguments
var inputColumnName = nameof(TimeSeriesData.Value);
var outputColumnName = nameof(ChangePointPrediction.Prediction);
int confidence = 95;
double confidence = 95;
int changeHistoryLength = 8;

// Train the change point detector.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public static void Example()

// Time Series model.
ITransformer model = ml.Transforms.DetectIidChangePoint(
outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView);
outputColumnName, inputColumnName, 95.0d, Size / 4).Fit(dataView);

// Create a time series prediction engine from the model.
var engine = model.CreateTimeSeriesEngine<TimeSeriesData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public static void Example()

// The transformed data.
var transformedData = ml.Transforms.DetectIidChangePoint(
outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView)
outputColumnName, inputColumnName, 95.0d, Size / 4).Fit(dataView)
.Transform(dataView);

// Getting the data of the newly created column as an IEnumerable of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public static void Example()

// The transformed model.
ITransformer model = ml.Transforms.DetectIidSpike(outputColumnName,
inputColumnName, 95, Size).Fit(dataView);
inputColumnName, 95.0d, Size).Fit(dataView);

// Create a time series prediction engine from the model.
var engine = model.CreateTimeSeriesEngine<TimeSeriesData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public static void Example()

// The transformed data.
var transformedData = ml.Transforms.DetectIidSpike(outputColumnName,
inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView);
inputColumnName, 95.0d, Size / 4).Fit(dataView).Transform(dataView);

// Getting the data of the newly created column as an IEnumerable of
// IidSpikePrediction.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public static void Example()

// Train the change point detector.
ITransformer model = ml.Transforms.DetectSpikeBySsa(outputColumnName,
inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(
inputColumnName, 95.0d, 8, TrainingSize, SeasonalitySize + 1).Fit(
dataView);

// Create a prediction engine from the model for feeding new data.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public static void Example()

// The transformed data.
var transformedData = ml.Transforms.DetectSpikeBySsa(outputColumnName,
inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(
inputColumnName, 95.0d, 8, TrainingSize, SeasonalitySize + 1).Fit(
dataView).Transform(dataView);

// Getting the data of the newly created column as an IEnumerable of
Expand Down
107 changes: 107 additions & 0 deletions src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using Microsoft.ML.Data;
using Microsoft.ML.Runtime;
Expand Down Expand Up @@ -32,8 +33,33 @@ public static class TimeSeriesCatalog
/// ]]>
/// </format>
/// </example>
[Obsolete("This API method is deprecated, please use the overload with confidence parameter of type double.")]
public static IidChangePointEstimator DetectIidChangePoint(this TransformsCatalog catalog, string outputColumnName, string inputColumnName,
int confidence, int changeHistoryLength, MartingaleType martingale = MartingaleType.Power, double eps = 0.1)
=> DetectIidChangePoint(catalog, outputColumnName, inputColumnName, (double)confidence, changeHistoryLength, martingale, eps);

/// <summary>
/// Create <see cref="IidChangePointEstimator"/>, which predicts change points in an
/// <a href="https://en.wikipedia.org/wiki/Independent_and_identically_distributed_random_variables">independent identically distributed (i.i.d.)</a>
/// time series based on adaptive kernel density estimations and martingale scores.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// The column data is a vector of <see cref="System.Double"/>. The vector contains 4 elements: alert (non-zero value means a change point), raw score, p-Value and martingale score.</param>
/// <param name="inputColumnName">Name of column to transform. The column data must be <see cref="System.Single"/>. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="confidence">The confidence for change point detection in the range [0, 100].</param>
/// <param name="changeHistoryLength">The length of the sliding window on p-values for computing the martingale score.</param>
/// <param name="martingale">The martingale used for scoring.</param>
/// <param name="eps">The epsilon parameter for the Power martingale.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[DetectIidChangePoint](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs)]
/// ]]>
/// </format>
/// </example>
public static IidChangePointEstimator DetectIidChangePoint(this TransformsCatalog catalog, string outputColumnName, string inputColumnName,
double confidence, int changeHistoryLength, MartingaleType martingale = MartingaleType.Power, double eps = 0.1)
=> new IidChangePointEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, confidence, changeHistoryLength, inputColumnName, martingale, eps);

/// <summary>
Expand All @@ -56,8 +82,33 @@ public static IidChangePointEstimator DetectIidChangePoint(this TransformsCatalo
/// ]]>
/// </format>
/// </example>
[Obsolete("This API method is deprecated, please use the overload with confidence parameter of type double.")]
public static IidSpikeEstimator DetectIidSpike(this TransformsCatalog catalog, string outputColumnName, string inputColumnName,
int confidence, int pvalueHistoryLength, AnomalySide side = AnomalySide.TwoSided)
=> DetectIidSpike(catalog, outputColumnName, inputColumnName, (double)confidence, pvalueHistoryLength, side);

/// <summary>
/// Create <see cref="IidSpikeEstimator"/>, which predicts spikes in
/// <a href="https://en.wikipedia.org/wiki/Independent_and_identically_distributed_random_variables"> independent identically distributed (i.i.d.)</a>
/// time series based on adaptive kernel density estimations and martingale scores.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// The column data is a vector of <see cref="System.Double"/>. The vector contains 3 elements: alert (non-zero value means a spike), raw score, and p-value.</param>
/// <param name="inputColumnName">Name of column to transform. The column data must be <see cref="System.Single"/>.
/// If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="confidence">The confidence for spike detection in the range [0, 100].</param>
/// <param name="pvalueHistoryLength">The size of the sliding window for computing the p-value.</param>
/// <param name="side">The argument that determines whether to detect positive or negative anomalies, or both.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[DetectIidSpike](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs)]
/// ]]>
/// </format>
/// </example>
public static IidSpikeEstimator DetectIidSpike(this TransformsCatalog catalog, string outputColumnName, string inputColumnName,
double confidence, int pvalueHistoryLength, AnomalySide side = AnomalySide.TwoSided)
=> new IidSpikeEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, confidence, pvalueHistoryLength, inputColumnName, side);

/// <summary>
Expand All @@ -83,9 +134,38 @@ public static IidSpikeEstimator DetectIidSpike(this TransformsCatalog catalog, s
/// ]]>
/// </format>
/// </example>
[Obsolete("This API method is deprecated, please use the overload with confidence parameter of type double.")]
public static SsaChangePointEstimator DetectChangePointBySsa(this TransformsCatalog catalog, string outputColumnName, string inputColumnName,
int confidence, int changeHistoryLength, int trainingWindowSize, int seasonalityWindowSize, ErrorFunction errorFunction = ErrorFunction.SignedDifference,
MartingaleType martingale = MartingaleType.Power, double eps = 0.1)
=> DetectChangePointBySsa(catalog, outputColumnName, inputColumnName, (double)confidence, changeHistoryLength, trainingWindowSize, seasonalityWindowSize, errorFunction, martingale, eps);

/// <summary>
/// Create <see cref="SsaChangePointEstimator"/>, which predicts change points in time series
/// using <a href="https://en.wikipedia.org/wiki/Singular_spectrum_analysis">Singular Spectrum Analysis (SSA)</a>.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// The column data is a vector of <see cref="System.Double"/>. The vector contains 4 elements: alert (non-zero value means a change point), raw score, p-Value and martingale score.</param>
/// <param name="inputColumnName">Name of column to transform. The column data must be <see cref="System.Single"/>.
/// If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="confidence">The confidence for change point detection in the range [0, 100].</param>
/// <param name="trainingWindowSize">The number of points from the beginning of the sequence used for training.</param>
/// <param name="changeHistoryLength">The size of the sliding window for computing the p-value.</param>
/// <param name="seasonalityWindowSize">An upper bound on the largest relevant seasonality in the input time-series.</param>
/// <param name="errorFunction">The function used to compute the error between the expected and the observed value.</param>
/// <param name="martingale">The martingale used for scoring.</param>
/// <param name="eps">The epsilon parameter for the Power martingale.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[DetectChangePointBySsa](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs)]
/// ]]>
/// </format>
/// </example>
public static SsaChangePointEstimator DetectChangePointBySsa(this TransformsCatalog catalog, string outputColumnName, string inputColumnName,
double confidence, int changeHistoryLength, int trainingWindowSize, int seasonalityWindowSize, ErrorFunction errorFunction = ErrorFunction.SignedDifference,
MartingaleType martingale = MartingaleType.Power, double eps = 0.1)
=> new SsaChangePointEstimator(CatalogUtils.GetEnvironment(catalog), new SsaChangePointDetector.Options
{
Name = outputColumnName,
Expand Down Expand Up @@ -121,7 +201,34 @@ public static SsaChangePointEstimator DetectChangePointBySsa(this TransformsCata
/// ]]>
/// </format>
/// </example>
[Obsolete("This API method is deprecated, please use the overload with confidence parameter of type double.")]
public static SsaSpikeEstimator DetectSpikeBySsa(this TransformsCatalog catalog, string outputColumnName, string inputColumnName, int confidence, int pvalueHistoryLength,
int trainingWindowSize, int seasonalityWindowSize, AnomalySide side = AnomalySide.TwoSided, ErrorFunction errorFunction = ErrorFunction.SignedDifference)
=> DetectSpikeBySsa(catalog, outputColumnName, inputColumnName, (double)confidence, pvalueHistoryLength, trainingWindowSize, seasonalityWindowSize, side, errorFunction);

/// <summary>
/// Create <see cref="SsaSpikeEstimator"/>, which predicts spikes in time series
/// using <a href="https://en.wikipedia.org/wiki/Singular_spectrum_analysis">Singular Spectrum Analysis (SSA)</a>.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// The column data is a vector of <see cref="System.Double"/>. The vector contains 3 elements: alert (non-zero value means a spike), raw score, and p-value.</param>
/// <param name="inputColumnName">Name of column to transform. The column data must be <see cref="System.Single"/>.
/// If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="confidence">The confidence for spike detection in the range [0, 100].</param>
/// <param name="pvalueHistoryLength">The size of the sliding window for computing the p-value.</param>
/// <param name="trainingWindowSize">The number of points from the beginning of the sequence used for training.</param>
/// <param name="seasonalityWindowSize">An upper bound on the largest relevant seasonality in the input time-series.</param>
/// <param name="side">The argument that determines whether to detect positive or negative anomalies, or both.</param>
/// <param name="errorFunction">The function used to compute the error between the expected and the observed value.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[DetectSpikeBySsa](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs)]
/// ]]>
/// </format>
/// </example>
public static SsaSpikeEstimator DetectSpikeBySsa(this TransformsCatalog catalog, string outputColumnName, string inputColumnName, double confidence, int pvalueHistoryLength,
int trainingWindowSize, int seasonalityWindowSize, AnomalySide side = AnomalySide.TwoSided, ErrorFunction errorFunction = ErrorFunction.SignedDifference)
=> new SsaSpikeEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, confidence, pvalueHistoryLength, trainingWindowSize, seasonalityWindowSize, inputColumnName, side, errorFunction);

Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat
/// ]]>
/// </format>
/// </remarks>
/// <seealso cref="Microsoft.ML.TimeSeriesCatalog.DetectIidChangePoint(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)" />
/// <seealso cref="Microsoft.ML.TimeSeriesCatalog.DetectIidChangePoint(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Double,System.Int32,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)" />
public sealed class IidChangePointEstimator : TrivialEstimator<IidChangePointDetector>
{
/// <summary>
Expand All @@ -233,7 +233,7 @@ public sealed class IidChangePointEstimator : TrivialEstimator<IidChangePointDet
/// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="martingale">The martingale used for scoring.</param>
/// <param name="eps">The epsilon parameter for the Power martingale.</param>
internal IidChangePointEstimator(IHostEnvironment env, string outputColumnName, int confidence,
internal IidChangePointEstimator(IHostEnvironment env, string outputColumnName, double confidence,
int changeHistoryLength, string inputColumnName, MartingaleType martingale = MartingaleType.Power, double eps = 0.1)
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(IidChangePointEstimator)),
new IidChangePointDetector(env, new IidChangePointDetector.Options
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat
/// ]]>
/// </format>
/// </remarks>
/// <seealso cref="Microsoft.ML.TimeSeriesCatalog.DetectIidSpike(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.AnomalySide)" />
/// <seealso cref="Microsoft.ML.TimeSeriesCatalog.DetectIidSpike(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Double,System.Int32,Microsoft.ML.Transforms.TimeSeries.AnomalySide)" />
public sealed class IidSpikeEstimator : TrivialEstimator<IidSpikeDetector>
{
/// <summary>
Expand All @@ -212,7 +212,7 @@ public sealed class IidSpikeEstimator : TrivialEstimator<IidSpikeDetector>
/// <param name="pvalueHistoryLength">The size of the sliding window for computing the p-value.</param>
/// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="side">The argument that determines whether to detect positive or negative anomalies, or both.</param>
internal IidSpikeEstimator(IHostEnvironment env, string outputColumnName, int confidence, int pvalueHistoryLength, string inputColumnName, AnomalySide side = AnomalySide.TwoSided)
internal IidSpikeEstimator(IHostEnvironment env, string outputColumnName, double confidence, int pvalueHistoryLength, string inputColumnName, AnomalySide side = AnomalySide.TwoSided)
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(IidSpikeDetector)),
new IidSpikeDetector(env, new IidSpikeDetector.Options
{
Expand Down
Loading

0 comments on commit 5ec4472

Please sign in to comment.