From 2058a36667ebf2f2d457320383c19edd0ce0f1df Mon Sep 17 00:00:00 2001 From: "yuyi@microsoft.com" Date: Tue, 13 Oct 2020 16:44:07 +0800 Subject: [PATCH 01/10] adjust expected value --- .../SrCnnEntireAnomalyDetector.cs | 29 +++++++- .../TimeSeriesDirectApi.cs | 62 ++++++++++++++++ test/data/Timeseries/non_negative_case.csv | 70 +++++++++++++++++++ 3 files changed, 158 insertions(+), 3 deletions(-) create mode 100644 test/data/Timeseries/non_negative_case.csv diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index 7cca7f893a..d418146edf 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -387,6 +387,8 @@ internal sealed class SrCnnEntireModeler private readonly IDeseasonality _deseasonalityFunction; //used in all modes + private double _minimumOriginValue; + private double _maximumOriginValue; private readonly double[] _predictArray; private double[] _backAddArray; private double[] _fftRe; @@ -449,10 +451,15 @@ public void Train(double[] values, ref double[][] results) Array.Resize(ref results, values.Length); } + _minimumOriginValue = Double.MaxValue; + _maximumOriginValue = Double.MinValue; + Array.Resize(ref _seriesToDetect, values.Length); for (int i = 0; i < values.Length; ++i) { _seriesToDetect[i] = values[i]; + _minimumOriginValue = Math.Min(_minimumOriginValue, values[i]); + _maximumOriginValue = Math.Max(_maximumOriginValue, values[i]); } if (_period > 0) @@ -641,7 +648,7 @@ private void GetExpectedValue(double[] values, double[][] results) for (int i = 0; i < results.Length; ++i) { - results[i][3] = _ifftRe[i]; + results[i][3] = AdjustExpectedValueBasedOnOriginalDataRange(_ifftRe[i]); } } @@ -650,7 +657,7 @@ private void GetExpectedValuePeriod(double[] values, double[][] results, IReadOn //Step 8: Calculate Expected Value for (int i = 0; i < values.Length; ++i) { - results[i][3] = values[i] - residual[i]; + results[i][3] = AdjustExpectedValueBasedOnOriginalDataRange(values[i] - residual[i]); } } @@ -762,7 +769,8 @@ private void GetMargin(double[] values, double[][] results, double sensitivity) { //Step 10: Calculate UpperBound and LowerBound var margin = CalculateMargin(_units[i], sensitivity); - results[i][3] = _ifftRe[i]; + results[i][3] = AdjustExpectedValueBasedOnOriginalDataRange(_ifftRe[i]); + results[i][4] = _units[i]; results[i][5] = _ifftRe[i] + margin; results[i][6] = _ifftRe[i] - margin; @@ -783,6 +791,21 @@ private void GetMargin(double[] values, double[][] results, double sensitivity) } } + // Adjust the expected value if original data range is non-negative or non-positive + private double AdjustExpectedValueBasedOnOriginalDataRange(double expectedValue) + { + if (_minimumOriginValue >= 0 && expectedValue < 0) + { + expectedValue = 0; + } + else if (_maximumOriginValue <= 0 && expectedValue > 0) + { + expectedValue = 0; + } + + return expectedValue; + } + // Adjust the expected value so that it is within the bound margin of value private double AdjustExpectedValueBasedOnBound(double value, double expectedValue, double unit) { diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 1ffdf2ba96..a97a6111fb 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -776,6 +776,68 @@ public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData( } } + [Theory, CombinatorialData] + public void TestNonnegativeData( + [CombinatorialValues(true, false)] bool isPositive) + { + var ml = new MLContext(1); + IDataView dataView; + List data; + + // Load data from file into the dataView + var dataPath = GetDataPath("Timeseries", "non_negative_case.csv"); + + // Load data from file into the dataView + dataView = ml.Data.LoadFromTextFile(dataPath, hasHeader: true); + data = ml.Data.CreateEnumerable(dataView, reuseRowObject: false).ToList(); + + if (!isPositive) + { + for (int i = 0; i < data.Count; ++i) + { + data[i].Value = - data[i].Value; + } + } + + dataView = ml.Data.LoadFromEnumerable(data); + + // Setup the detection arguments + string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction); + string inputColumnName = nameof(TimeSeriesDataDouble.Value); + + // Do batch anomaly detection + var options = new SrCnnEntireAnomalyDetectorOptions() + { + Threshold = 0.10, + BatchSize = -1, + Sensitivity = 99.0, + DetectMode = SrCnnDetectMode.AnomalyAndMargin, + Period = 0, + DeseasonalityMode = SrCnnDeseasonalityMode.Stl + }; + + var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, options); + + // Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection. + var predictionColumn = ml.Data.CreateEnumerable( + outputDataView, reuseRowObject: false); + + if (isPositive) + { + foreach (var prediction in predictionColumn) + { + Assert.True(prediction.Prediction[3] >= 0); + } + } + else + { + foreach (var prediction in predictionColumn) + { + Assert.True(prediction.Prediction[3] <= 0); + } + } + } + [Fact] public void RootCauseLocalization() { diff --git a/test/data/Timeseries/non_negative_case.csv b/test/data/Timeseries/non_negative_case.csv new file mode 100644 index 0000000000..2dbf4b3fe5 --- /dev/null +++ b/test/data/Timeseries/non_negative_case.csv @@ -0,0 +1,70 @@ +Value +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +5 +1 +4 +1 +4 +3 +3 +6 +3 +6 +4 +7 +9 +11 +28 +16 +35 +59 +42 +77 +91 +132 +189 +264 +258 +359 +493 +352 +550 From cf2940834b7e5b9e0f3b04d4c29bb1a5b379c04e Mon Sep 17 00:00:00 2001 From: "yuyi@microsoft.com" Date: Wed, 14 Oct 2020 15:42:22 +0800 Subject: [PATCH 02/10] update boundary calculation --- .../SrCnnEntireAnomalyDetector.cs | 161 ++++++++++++------ .../TimeSeriesDirectApi.cs | 6 +- 2 files changed, 113 insertions(+), 54 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index 7cca7f893a..89baa05543 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -351,34 +351,52 @@ internal sealed class SrCnnEntireModeler private static readonly double _deanomalyThreshold = 0.35; private static readonly double _boundSensitivity = 70.0; - // A fixed lookup table which returns factor using sensitivity as index. - // Since Margin = BoundaryUnit * factor, this factor is calculated to make sure Margin == Boundary when sensitivity is 50, - // and increases/decreases exponentially as sensitivity increases/decreases. - // The factor array is generated by formula: - // f(x)=1, if x=50; - // f(x)=f(x+1)*(1.25+0.001*x), if 0<=x<50; - // f(x)=f(x+1)/(1.25+0.001*(x-50)), if 50 0, values[i]); results[i][4] = _units[i]; - results[i][5] = results[i][3] + margin; - results[i][6] = results[i][3] - margin; + results[i][5] = results[i][3] + margin.Item1; + results[i][6] = results[i][3] - margin.Item2; // update anomaly result according to the boundary results[i][0] = results[i][0] > 0 && (values[i] < results[i][6] || results[i][5] < values[i]) ? 1 : 0; @@ -761,11 +779,11 @@ private void GetMargin(double[] values, double[][] results, double sensitivity) for (int i = 0; i < results.Length; ++i) { //Step 10: Calculate UpperBound and LowerBound - var margin = CalculateMargin(_units[i], sensitivity); + var margin = CalculateMargin(_units[i], sensitivity, results[i][3], results[i][0] > 0, values[i]); results[i][3] = _ifftRe[i]; results[i][4] = _units[i]; - results[i][5] = _ifftRe[i] + margin; - results[i][6] = _ifftRe[i] - margin; + results[i][5] = _ifftRe[i] + margin.Item1; + results[i][6] = _ifftRe[i] - margin.Item2; //Step 11: Update Anomaly Score results[i][1] = CalculateAnomalyScore(values[i], _ifftRe[i], _units[i], results[i][0] > 0); @@ -777,8 +795,8 @@ private void GetMargin(double[] values, double[][] results, double sensitivity) if (results[i][0] == 0) { results[i][3] = AdjustExpectedValueBasedOnBound(values[i], results[i][3], _units[i]); - results[i][5] = results[i][3] + margin; - results[i][6] = results[i][3] - margin; + results[i][5] = results[i][3] + margin.Item1; + results[i][6] = results[i][3] - margin.Item2; } } } @@ -786,8 +804,8 @@ private void GetMargin(double[] values, double[][] results, double sensitivity) // Adjust the expected value so that it is within the bound margin of value private double AdjustExpectedValueBasedOnBound(double value, double expectedValue, double unit) { - var boundMargin = CalculateMargin(unit, _boundSensitivity); - return Math.Max(Math.Min(expectedValue, value + boundMargin), value - boundMargin); + var boundMargin = CalculateMargin(unit, _boundSensitivity, expectedValue, true, value); + return Math.Max(Math.Min(expectedValue, value + boundMargin.Item1), value - boundMargin.Item2); } private int[] GetAnomalyIndex(double[] scores) @@ -1009,16 +1027,62 @@ private double SortedMedian(double[] sortedValues, int begin, int end) } } - private double CalculateMargin(double unit, double sensitivity) + private Tuple CalculateMarginCore(double unit, double sensitivity, double expectedValue, bool isAnomaly, double value) + { + double percent = 0.5; + double delta = unit * _factors[(int)sensitivity]; + double ignoreRatio = 0.0001; + + double lowerMargin = delta; + double upperMargin = delta; + if (!isAnomaly) + { + if (value < expectedValue - delta) + { + lowerMargin = expectedValue - value + delta * percent; + upperMargin = delta; + } + else if (value > expectedValue + delta) + { + lowerMargin = delta; + upperMargin = value - expectedValue + delta * percent; + } + } + else + { + if (value > expectedValue - delta && value < expectedValue + delta && Math.Abs(value - expectedValue) > ignoreRatio * unit && sensitivity == 99) + { + if (value > expectedValue) + { + lowerMargin = percent * (value - expectedValue); + upperMargin = percent * (value - expectedValue); + } + else + { + lowerMargin = percent * (expectedValue - value); + upperMargin = percent * (expectedValue - value); + } + } + } + + return new Tuple(upperMargin, lowerMargin); + } + + private Tuple CalculateMargin(double unit, double sensitivity, double expectedValue, bool isAnomaly, double value) { if (Math.Floor(sensitivity) == sensitivity) { - return unit * _factors[(int)sensitivity]; + return CalculateMarginCore(unit, sensitivity, expectedValue, isAnomaly, value); } else { int lb = (int)sensitivity; - return (_factors[lb + 1] + (_factors[lb] - _factors[lb + 1]) * (1 - sensitivity + lb)) * unit; + var tightMargin = CalculateMargin(unit, lb + 1, expectedValue, isAnomaly, value); + var looseMargin = CalculateMargin(unit, lb, expectedValue, isAnomaly, value); + double upper = tightMargin.Item1 + (1 - sensitivity + lb) * (looseMargin.Item1 - tightMargin.Item1); + double lower = tightMargin.Item2 + (1 - sensitivity + lb) * (looseMargin.Item2 - tightMargin.Item2); + + return new Tuple(upper, lower); } } @@ -1031,19 +1095,14 @@ private double CalculateAnomalyScore(double value, double exp, double unit, bool return anomalyScore; } - double distance = Math.Abs(exp - value); - List margins = new List(); - for (int i = 100; i >= 0; --i) - { - margins.Add(CalculateMargin(unit, i)); - } + double distanceFactor = Math.Abs(exp - value) / unit; int lb = 0; int ub = 100; while (lb < ub) { int mid = (lb + ub) / 2; - if (margins[mid] < distance) + if (_factors[100 - mid] < distanceFactor) { lb = mid + 1; } @@ -1053,15 +1112,15 @@ private double CalculateAnomalyScore(double value, double exp, double unit, bool } } - if (Math.Abs(margins[lb] - distance) < _eps || lb == 0) + if (_factors[100 - lb] == distanceFactor || lb == 0) { anomalyScore = lb; } else { - double lowerMargin = margins[lb - 1]; - double upperMargin = margins[lb]; - anomalyScore = lb - 1 + (distance - lowerMargin) / (upperMargin - lowerMargin); + double lowerMargin = _factors[101 - lb]; + double upperMargin = _factors[101 - lb]; + anomalyScore = lb - 1 + (distanceFactor - lowerMargin) / (upperMargin - lowerMargin); } return anomalyScore / 100.0f; diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 1ffdf2ba96..f3aac33f34 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -621,7 +621,7 @@ public void TestSrCnnBatchAnomalyDetector( // Do batch anomaly detection var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, - threshold: 0.35, batchSize: batchSize, sensitivity: 90.0, mode); + threshold: 0.35, batchSize: batchSize, sensitivity: 98.0, mode); // Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection. var predictionColumn = ml.Data.CreateEnumerable( @@ -694,7 +694,7 @@ public void TestSrCnnAnomalyDetectorWithSeasonalData( { Threshold = 0.3, BatchSize = -1, - Sensitivity = 53.0, + Sensitivity = 64.0, DetectMode = SrCnnDetectMode.AnomalyAndMargin, Period = 288, DeseasonalityMode = mode @@ -741,7 +741,7 @@ public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData( { Threshold = 0.23, BatchSize = -1, - Sensitivity = 53.0, + Sensitivity = 63.0, DetectMode = SrCnnDetectMode.AnomalyAndMargin, Period = 288, DeseasonalityMode = mode From 5eaa0a07f1aef765afef5759035e24c55630fca5 Mon Sep 17 00:00:00 2001 From: "yuyi@microsoft.com" Date: Wed, 14 Oct 2020 17:37:28 +0800 Subject: [PATCH 03/10] fix boundary --- .../SrCnnEntireAnomalyDetector.cs | 72 ++++--------------- 1 file changed, 13 insertions(+), 59 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index 89baa05543..1741859f78 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -683,10 +683,10 @@ private void GetMarginPeriod(double[] values, double[][] results, IReadOnlyList< for (int i = 0; i < results.Length; ++i) { //Step 10: Calculate UpperBound and LowerBound - var margin = CalculateMargin(_units[i], sensitivity, results[i][3], results[i][0] > 0, values[i]); + var margin = CalculateMargin(_units[i], sensitivity); results[i][4] = _units[i]; - results[i][5] = results[i][3] + margin.Item1; - results[i][6] = results[i][3] - margin.Item2; + results[i][5] = results[i][3] + margin; + results[i][6] = results[i][3] - margin; // update anomaly result according to the boundary results[i][0] = results[i][0] > 0 && (values[i] < results[i][6] || results[i][5] < values[i]) ? 1 : 0; @@ -779,11 +779,11 @@ private void GetMargin(double[] values, double[][] results, double sensitivity) for (int i = 0; i < results.Length; ++i) { //Step 10: Calculate UpperBound and LowerBound - var margin = CalculateMargin(_units[i], sensitivity, results[i][3], results[i][0] > 0, values[i]); + var margin = CalculateMargin(_units[i], sensitivity); results[i][3] = _ifftRe[i]; results[i][4] = _units[i]; - results[i][5] = _ifftRe[i] + margin.Item1; - results[i][6] = _ifftRe[i] - margin.Item2; + results[i][5] = _ifftRe[i] + margin; + results[i][6] = _ifftRe[i] - margin; //Step 11: Update Anomaly Score results[i][1] = CalculateAnomalyScore(values[i], _ifftRe[i], _units[i], results[i][0] > 0); @@ -795,8 +795,8 @@ private void GetMargin(double[] values, double[][] results, double sensitivity) if (results[i][0] == 0) { results[i][3] = AdjustExpectedValueBasedOnBound(values[i], results[i][3], _units[i]); - results[i][5] = results[i][3] + margin.Item1; - results[i][6] = results[i][3] - margin.Item2; + results[i][5] = results[i][3] + margin; + results[i][6] = results[i][3] - margin; } } } @@ -804,8 +804,8 @@ private void GetMargin(double[] values, double[][] results, double sensitivity) // Adjust the expected value so that it is within the bound margin of value private double AdjustExpectedValueBasedOnBound(double value, double expectedValue, double unit) { - var boundMargin = CalculateMargin(unit, _boundSensitivity, expectedValue, true, value); - return Math.Max(Math.Min(expectedValue, value + boundMargin.Item1), value - boundMargin.Item2); + var boundMargin = CalculateMargin(unit, _boundSensitivity); + return Math.Max(Math.Min(expectedValue, value + boundMargin), value - boundMargin); } private int[] GetAnomalyIndex(double[] scores) @@ -1027,62 +1027,16 @@ private double SortedMedian(double[] sortedValues, int begin, int end) } } - private Tuple CalculateMarginCore(double unit, double sensitivity, double expectedValue, bool isAnomaly, double value) - { - double percent = 0.5; - double delta = unit * _factors[(int)sensitivity]; - double ignoreRatio = 0.0001; - - double lowerMargin = delta; - double upperMargin = delta; - if (!isAnomaly) - { - if (value < expectedValue - delta) - { - lowerMargin = expectedValue - value + delta * percent; - upperMargin = delta; - } - else if (value > expectedValue + delta) - { - lowerMargin = delta; - upperMargin = value - expectedValue + delta * percent; - } - } - else - { - if (value > expectedValue - delta && value < expectedValue + delta && Math.Abs(value - expectedValue) > ignoreRatio * unit && sensitivity == 99) - { - if (value > expectedValue) - { - lowerMargin = percent * (value - expectedValue); - upperMargin = percent * (value - expectedValue); - } - else - { - lowerMargin = percent * (expectedValue - value); - upperMargin = percent * (expectedValue - value); - } - } - } - - return new Tuple(upperMargin, lowerMargin); - } - - private Tuple CalculateMargin(double unit, double sensitivity, double expectedValue, bool isAnomaly, double value) + private double CalculateMargin(double unit, double sensitivity) { if (Math.Floor(sensitivity) == sensitivity) { - return CalculateMarginCore(unit, sensitivity, expectedValue, isAnomaly, value); + return unit * _factors[(int)sensitivity]; } else { int lb = (int)sensitivity; - var tightMargin = CalculateMargin(unit, lb + 1, expectedValue, isAnomaly, value); - var looseMargin = CalculateMargin(unit, lb, expectedValue, isAnomaly, value); - double upper = tightMargin.Item1 + (1 - sensitivity + lb) * (looseMargin.Item1 - tightMargin.Item1); - double lower = tightMargin.Item2 + (1 - sensitivity + lb) * (looseMargin.Item2 - tightMargin.Item2); - - return new Tuple(upper, lower); + return (_factors[lb + 1] + (_factors[lb] - _factors[lb + 1]) * (1 - sensitivity + lb)) * unit; } } From a9dfaec16bfa457af98873cf036ab43752ed8ad8 Mon Sep 17 00:00:00 2001 From: "yuyi@microsoft.com" Date: Wed, 14 Oct 2020 17:55:01 +0800 Subject: [PATCH 04/10] adjust default values --- src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index b4855d0d14..30b5cff7a7 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -84,7 +84,7 @@ internal static class Defaults { public const double Threshold = 0.3; public const int BatchSize = 2000; - public const double Sensitivity = 55; + public const double Sensitivity = 70; public const SrCnnDetectMode DetectMode = SrCnnDetectMode.AnomalyOnly; public const int Period = 0; public const SrCnnDeseasonalityMode DeseasonalityMode = SrCnnDeseasonalityMode.Stl; @@ -349,7 +349,7 @@ internal sealed class SrCnnEntireModeler private static readonly int _judgementWindowSize = 40; private static readonly double _eps = 1e-8; private static readonly double _deanomalyThreshold = 0.35; - private static readonly double _boundSensitivity = 70.0; + private static readonly double _boundSensitivity = 93.0; // pseudo-code to generate the factors. // factors = [] From d8a0d9bff950afd162c244c45fdb9df46225cbff Mon Sep 17 00:00:00 2001 From: "yuyi@microsoft.com" Date: Thu, 15 Oct 2020 17:08:04 +0800 Subject: [PATCH 05/10] fix percent case --- .../SrCnnEntireAnomalyDetector.cs | 20 ++++++++++++++----- .../TimeSeriesDirectApi.cs | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index 30b5cff7a7..c8703f44f8 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -350,6 +350,7 @@ internal sealed class SrCnnEntireModeler private static readonly double _eps = 1e-8; private static readonly double _deanomalyThreshold = 0.35; private static readonly double _boundSensitivity = 93.0; + private static readonly double _unitForZero = 0.3; // pseudo-code to generate the factors. // factors = [] @@ -921,18 +922,20 @@ private void CalculateExpectedValueByFft(double[] data) FftUtils.ComputeBackwardFft(_fftRe, _fftIm, _ifftRe, _ifftIm, length); } - private void CalculateBoundaryUnit(double[] data, bool[] isAnomalys) + private void CalculateBoundaryUnit(double[] data, bool[] isAnomalies) { int window = Math.Min(data.Length / 3, 512); double trendFraction = 0.5; // mix trend and average of trend double trendSum = 0; int calculationSize = 0; + bool closeToZero = true; MedianFilter(data, window, true); for (int i = 0; i < _trends.Length; ++i) { - if (!isAnomalys[i]) + if (!isAnomalies[i]) { + closeToZero = closeToZero && _trends[i] < _eps; trendSum += Math.Abs(_trends[i]); ++calculationSize; } @@ -951,10 +954,17 @@ private void CalculateBoundaryUnit(double[] data, bool[] isAnomalys) Array.Resize(ref _units, _trends.Length); for (int i = 0; i < _units.Length; ++i) { - _units[i] = Math.Max(1, averageTrendPart + Math.Abs(_trends[i]) * trendFraction); - if (double.IsInfinity(_units[i])) + if (closeToZero) { - throw new ArithmeticException("Not finite unit value"); + _units[i] = _unitForZero; + } + else + { + _units[i] = averageTrendPart + Math.Abs(_trends[i]) * trendFraction; + if (double.IsInfinity(_units[i])) + { + throw new ArithmeticException("Not finite unit value"); + } } } } diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 60a8aa51f9..3c81426021 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -777,7 +777,7 @@ public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData( } [Theory, CombinatorialData] - public void TestNonnegativeData( + public void TestSrcnnEntireDetectNonnegativeData( [CombinatorialValues(true, false)] bool isPositive) { var ml = new MLContext(1); From 411fdea97fe41710922c2382cf764ce90db50bde Mon Sep 17 00:00:00 2001 From: "yuyi@microsoft.com" Date: Fri, 16 Oct 2020 15:03:43 +0800 Subject: [PATCH 06/10] fix error in anomaly score calculation --- src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index c8703f44f8..ddcbfd79d3 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -1106,7 +1106,7 @@ private double CalculateAnomalyScore(double value, double exp, double unit, bool else { double lowerMargin = _factors[101 - lb]; - double upperMargin = _factors[101 - lb]; + double upperMargin = _factors[100 - lb]; anomalyScore = lb - 1 + (distanceFactor - lowerMargin) / (upperMargin - lowerMargin); } From 79a87ce8cef218fbd7fec37d9fe50eb60fd614f0 Mon Sep 17 00:00:00 2001 From: "yuyi@microsoft.com" Date: Fri, 20 Nov 2020 15:38:07 +0800 Subject: [PATCH 07/10] adjust score calculation for first & second points --- .../SrCnnEntireAnomalyDetector.cs | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index ddcbfd79d3..eb4f9fe42c 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -351,6 +351,10 @@ internal sealed class SrCnnEntireModeler private static readonly double _deanomalyThreshold = 0.35; private static readonly double _boundSensitivity = 93.0; private static readonly double _unitForZero = 0.3; + private static readonly double _minimumScore = 0.0; + private static readonly double _maximumScore = 1.0; + // If the score window is smaller than this value, the anomaly score is tend to be small. + private static readonly int _minimumScoreWindowSize = (int)(_maximumScore * 10) + 1; // pseudo-code to generate the factors. // factors = [] @@ -438,6 +442,7 @@ public SrCnnEntireModeler(double threshold, double sensitivity, SrCnnDetectMode _sensitivity = sensitivity; _detectMode = detectMode; _period = period; + // it will reduce the probability of a point detected as anomaly if its score window is too short _predictArray = new double[_lookaheadWindowSize + 1]; switch (deseasonalityMode) @@ -577,15 +582,19 @@ private void SpectralResidual(double[] values, double[][] results, double thresh { _ifftMagList[i] = Math.Sqrt(_ifftRe[i] * _ifftRe[i] + _ifftIm[i] * _ifftIm[i]); } - AverageFilter(_ifftMagList, Math.Min(_ifftMagList.Length, _judgementWindowSize)); + AverageFilter(_ifftMagList, Math.Min(_ifftMagList.Length, _judgementWindowSize), true); + for (int i = 1; i <= Math.Min(length, _minimumScoreWindowSize); ++i) + { + _cumSumList[i] = _cumSumList[Math.Min(length - 1, _minimumScoreWindowSize + 1)]; + } // Step 7: Calculate raw score and set result for (int i = 0; i < results.GetLength(0); ++i) { var score = CalculateScore(_ifftMagList[i], _cumSumList[i]); score /= 10.0f; - score = Math.Min(score, 1); - score = Math.Max(score, 0); + score = Math.Min(score, _maximumScore); + score = Math.Max(score, _minimumScore); var detres = score > threshold ? 1 : 0; @@ -625,7 +634,7 @@ private double PredictNext(double[] data) return (data[1] + slopeSum); } - private void AverageFilter(double[] data, int n) + private void AverageFilter(double[] data, int n, bool ignoreFirst=false) { double cumsum = 0.0f; int length = data.Length; @@ -639,13 +648,13 @@ private void AverageFilter(double[] data, int n) _cumSumList[i] = cumsum; _cumSumShift[i] = cumsum; } - for (int i = n; i < length; ++i) + for (int i = n + 1; i < length; ++i) { _cumSumList[i] = (_cumSumList[i] - _cumSumShift[i - n]) / n; } - for (int i = 1; i < n; ++i) + for (int i = 1; i <= n && i < length; ++i) { - _cumSumList[i] /= (i + 1); + _cumSumList[i] = ignoreFirst ? (_cumSumShift[i] - _cumSumShift[0]) / i : _cumSumList[i] / (i + 1); } } From 0f516fc9ad4fff42c32fd84301ab98da98d2bd84 Mon Sep 17 00:00:00 2001 From: "yuyi@microsoft.com" Date: Fri, 20 Nov 2020 16:39:37 +0800 Subject: [PATCH 08/10] fix sr do not report anomaly at beginning --- .../SrCnnEntireAnomalyDetector.cs | 5 ++ .../TimeSeriesDirectApi.cs | 58 +++++++++++++++++++ test/data/Timeseries/anomaly_at_beginning.csv | 39 +++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 test/data/Timeseries/anomaly_at_beginning.csv diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index eb4f9fe42c..20925d6ca6 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -354,6 +354,11 @@ internal sealed class SrCnnEntireModeler private static readonly double _minimumScore = 0.0; private static readonly double _maximumScore = 1.0; // If the score window is smaller than this value, the anomaly score is tend to be small. + // Proof: For each point, the SR anomaly score is calculated as (w is average window size): + // (mag - avg_mag) / avg_mag + // = max (w * mag_{a} - sum_{i=0 to w-1} mag_{a - i}) / sum_{i=0 to w-1} mag_{a - i} + // = max ((w - 1) * mag_{a} + C) / (mag_{a} + C) + // <= w - 1 private static readonly int _minimumScoreWindowSize = (int)(_maximumScore * 10) + 1; // pseudo-code to generate the factors. diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 3c81426021..2877c3150f 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -776,6 +776,64 @@ public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData( } } + [Theory, CombinatorialData] + public void TestSrCnnAnomalyDetectorWithAnomalyAtBeginning( + [CombinatorialValues(SrCnnDeseasonalityMode.Stl, SrCnnDeseasonalityMode.Mean, SrCnnDeseasonalityMode.Median)] SrCnnDeseasonalityMode mode + ) + { + var ml = new MLContext(1); + IDataView dataView; + List data; + + var dataPath = GetDataPath("Timeseries", "anomaly_at_beginning.csv"); + + // Load data from file into the dataView + dataView = ml.Data.LoadFromTextFile(dataPath, hasHeader: true); + data = ml.Data.CreateEnumerable(dataView, reuseRowObject: false).ToList(); + + // Setup the detection arguments + string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction); + string inputColumnName = nameof(TimeSeriesDataDouble.Value); + + // Do batch anomaly detection + var options = new SrCnnEntireAnomalyDetectorOptions() + { + Threshold = 0.30, + BatchSize = -1, + Sensitivity = 80.0, + DetectMode = SrCnnDetectMode.AnomalyAndMargin, + Period = 0, + DeseasonalityMode = mode + }; + + var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, options); + + // Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection. + var predictionColumn = ml.Data.CreateEnumerable( + outputDataView, reuseRowObject: false); + + var anomalyIndex = 1; + + int k = 0; + foreach (var prediction in predictionColumn) + { + Assert.Equal(7, prediction.Prediction.Length); + if (anomalyIndex == k) + { + Assert.Equal(1, prediction.Prediction[0]); + Assert.True(prediction.Prediction[6] > data[k].Value || data[k].Value > prediction.Prediction[5]); + } + else + { + Assert.Equal(0, prediction.Prediction[0]); + Assert.True(prediction.Prediction[6] <= data[k].Value); + Assert.True(data[k].Value <= prediction.Prediction[5]); + } + + ++k; + } + } + [Theory, CombinatorialData] public void TestSrcnnEntireDetectNonnegativeData( [CombinatorialValues(true, false)] bool isPositive) diff --git a/test/data/Timeseries/anomaly_at_beginning.csv b/test/data/Timeseries/anomaly_at_beginning.csv new file mode 100644 index 0000000000..609c3a5f1c --- /dev/null +++ b/test/data/Timeseries/anomaly_at_beginning.csv @@ -0,0 +1,39 @@ +Value +181.944 +37.176 +57.14 +67.128 +72.12 +77.112 +82.104 +83.1 +87.09 +92.088 +92.01 +97.08 +102.072 +107.05 +107.06 +117.048 +122.04 +132.024 +147 +151.82 +151.992 +151.72 +151.94 +156.969 +156.984 +156.92 +161.976 +161.94 +161.97 +166.968 +176.952 +181.94 +186.936 +201.91 +201.912 +201.9 +206.904 +216.88 From 17303e0774eab7bde15093d618d53718c5e163bd Mon Sep 17 00:00:00 2001 From: "yuyi@microsoft.com" Date: Fri, 20 Nov 2020 19:31:49 +0800 Subject: [PATCH 09/10] fix a issue in batch process --- .../SrCnnEntireAnomalyDetector.cs | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index 20925d6ca6..0ec11eee6e 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -309,6 +309,15 @@ public void Process() _previousBatch = _previousBatch.GetRange(_batch.Count, _bLen); _previousBatch.AddRange(_batch); _modeler.Train(_previousBatch.ToArray(), ref _results); + + // move the values to front + for (int i = 0; i < _batch.Count; ++i) + { + for (int j = 0; j < _outputLength; ++j) + { + _results[i][j] = _results[_bLen + i][j]; + } + } } else { @@ -334,7 +343,7 @@ public ValueGetter> CreateGetter(DataViewRowCursor input, string double src = default; srcGetter(ref src); var result = VBufferEditor.Create(ref dst, _outputLength); - _results[input.Position % _batchSize + _bLen].CopyTo(result.Values); + _results[input.Position % _batchSize].CopyTo(result.Values); dst = result.Commit(); }; return getter; @@ -587,10 +596,11 @@ private void SpectralResidual(double[] values, double[][] results, double thresh { _ifftMagList[i] = Math.Sqrt(_ifftRe[i] * _ifftRe[i] + _ifftIm[i] * _ifftIm[i]); } - AverageFilter(_ifftMagList, Math.Min(_ifftMagList.Length, _judgementWindowSize), true); - for (int i = 1; i <= Math.Min(length, _minimumScoreWindowSize); ++i) + + AverageFilter(_ifftMagList, Math.Min(_ifftMagList.Length, _judgementWindowSize)); + for (int i = 0; i <= Math.Min(length, _minimumScoreWindowSize); ++i) { - _cumSumList[i] = _cumSumList[Math.Min(length - 1, _minimumScoreWindowSize + 1)]; + _cumSumList[i] = _cumSumList[Math.Min(length, _minimumScoreWindowSize) - 1]; } // Step 7: Calculate raw score and set result @@ -653,13 +663,13 @@ private void AverageFilter(double[] data, int n, bool ignoreFirst=false) _cumSumList[i] = cumsum; _cumSumShift[i] = cumsum; } - for (int i = n + 1; i < length; ++i) + for (int i = n; i < length; ++i) { _cumSumList[i] = (_cumSumList[i] - _cumSumShift[i - n]) / n; } - for (int i = 1; i <= n && i < length; ++i) + for (int i = 1; i < n; ++i) { - _cumSumList[i] = ignoreFirst ? (_cumSumShift[i] - _cumSumShift[0]) / i : _cumSumList[i] / (i + 1); + _cumSumList[i] /= (i + 1); } } From fc780bc724742016fe8751d2cb732046c381b057 Mon Sep 17 00:00:00 2001 From: "yuyi@microsoft.com" Date: Thu, 26 Nov 2020 10:24:23 +0800 Subject: [PATCH 10/10] remove a unused parameter --- src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index 0ec11eee6e..45505d5ece 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -456,7 +456,6 @@ public SrCnnEntireModeler(double threshold, double sensitivity, SrCnnDetectMode _sensitivity = sensitivity; _detectMode = detectMode; _period = period; - // it will reduce the probability of a point detected as anomaly if its score window is too short _predictArray = new double[_lookaheadWindowSize + 1]; switch (deseasonalityMode) @@ -649,7 +648,7 @@ private double PredictNext(double[] data) return (data[1] + slopeSum); } - private void AverageFilter(double[] data, int n, bool ignoreFirst=false) + private void AverageFilter(double[] data, int n) { double cumsum = 0.0f; int length = data.Length;