Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix SR anomaly score calculation at beginning #5502

Merged
merged 12 commits into from
Dec 2, 2020
29 changes: 26 additions & 3 deletions src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,15 @@ public void Process()
_previousBatch = _previousBatch.GetRange(_batch.Count, _bLen);
_previousBatch.AddRange(_batch);
_modeler.Train(_previousBatch.ToArray(), ref _results);

// move the values to front
for (int i = 0; i < _batch.Count; ++i)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only for the first batch right

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code snippet is for another bug, which is the result not aligned for the last batch. It is not related with the reported case.


In reply to: 527820005 [](ancestors = 527820005)

{
for (int j = 0; j < _outputLength; ++j)
{
_results[i][j] = _results[_bLen + i][j];
}
}
}
else
{
Expand All @@ -334,7 +343,7 @@ public ValueGetter<VBuffer<double>> CreateGetter(DataViewRowCursor input, string
double src = default;
srcGetter(ref src);
var result = VBufferEditor.Create(ref dst, _outputLength);
_results[input.Position % _batchSize + _bLen].CopyTo(result.Values);
_results[input.Position % _batchSize].CopyTo(result.Values);
dst = result.Commit();
};
return getter;
Expand All @@ -351,6 +360,15 @@ internal sealed class SrCnnEntireModeler
private static readonly double _deanomalyThreshold = 0.35;
private static readonly double _boundSensitivity = 93.0;
private static readonly double _unitForZero = 0.3;
private static readonly double _minimumScore = 0.0;
private static readonly double _maximumScore = 1.0;
// If the score window is smaller than this value, the anomaly score is tend to be small.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

still a bit confused at score window, is it the same as batchSize?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just curious, score window is the same as batchSize?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not the same as batchSize, it is the _judgementWindowSize.


In reply to: 527820502 [](ancestors = 527820502)

// Proof: For each point, the SR anomaly score is calculated as (w is average window size):
// (mag - avg_mag) / avg_mag
// = max (w * mag_{a} - sum_{i=0 to w-1} mag_{a - i}) / sum_{i=0 to w-1} mag_{a - i}
// = max ((w - 1) * mag_{a} + C) / (mag_{a} + C)
// <= w - 1
private static readonly int _minimumScoreWindowSize = (int)(_maximumScore * 10) + 1;

// pseudo-code to generate the factors.
// factors = []
Expand Down Expand Up @@ -577,15 +595,20 @@ private void SpectralResidual(double[] values, double[][] results, double thresh
{
_ifftMagList[i] = Math.Sqrt(_ifftRe[i] * _ifftRe[i] + _ifftIm[i] * _ifftIm[i]);
}

AverageFilter(_ifftMagList, Math.Min(_ifftMagList.Length, _judgementWindowSize));
for (int i = 0; i <= Math.Min(length, _minimumScoreWindowSize); ++i)
{
_cumSumList[i] = _cumSumList[Math.Min(length, _minimumScoreWindowSize) - 1];
}

// Step 7: Calculate raw score and set result
for (int i = 0; i < results.GetLength(0); ++i)
{
var score = CalculateScore(_ifftMagList[i], _cumSumList[i]);
score /= 10.0f;
score = Math.Min(score, 1);
score = Math.Max(score, 0);
score = Math.Min(score, _maximumScore);
score = Math.Max(score, _minimumScore);

var detres = score > threshold ? 1 : 0;

Expand Down
58 changes: 58 additions & 0 deletions test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,64 @@ public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData(
}
}

[Theory, CombinatorialData]
public void TestSrCnnAnomalyDetectorWithAnomalyAtBeginning(
[CombinatorialValues(SrCnnDeseasonalityMode.Stl, SrCnnDeseasonalityMode.Mean, SrCnnDeseasonalityMode.Median)] SrCnnDeseasonalityMode mode
)
{
var ml = new MLContext(1);
IDataView dataView;
List<TimeSeriesDataDouble> data;

var dataPath = GetDataPath("Timeseries", "anomaly_at_beginning.csv");

// Load data from file into the dataView
dataView = ml.Data.LoadFromTextFile<TimeSeriesDataDouble>(dataPath, hasHeader: true);
data = ml.Data.CreateEnumerable<TimeSeriesDataDouble>(dataView, reuseRowObject: false).ToList();

// Setup the detection arguments
string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction);
string inputColumnName = nameof(TimeSeriesDataDouble.Value);

// Do batch anomaly detection
var options = new SrCnnEntireAnomalyDetectorOptions()
{
Threshold = 0.30,
BatchSize = -1,
Sensitivity = 80.0,
DetectMode = SrCnnDetectMode.AnomalyAndMargin,
Period = 0,
DeseasonalityMode = mode
};

var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, options);

// Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection.
var predictionColumn = ml.Data.CreateEnumerable<SrCnnAnomalyDetection>(
outputDataView, reuseRowObject: false);

var anomalyIndex = 1;

int k = 0;
foreach (var prediction in predictionColumn)
{
Assert.Equal(7, prediction.Prediction.Length);
if (anomalyIndex == k)
{
Assert.Equal(1, prediction.Prediction[0]);
Assert.True(prediction.Prediction[6] > data[k].Value || data[k].Value > prediction.Prediction[5]);
}
else
{
Assert.Equal(0, prediction.Prediction[0]);
Assert.True(prediction.Prediction[6] <= data[k].Value);
Assert.True(data[k].Value <= prediction.Prediction[5]);
}

++k;
}
}

[Theory, CombinatorialData]
public void TestSrcnnEntireDetectNonnegativeData(
[CombinatorialValues(true, false)] bool isPositive)
Expand Down
39 changes: 39 additions & 0 deletions test/data/Timeseries/anomaly_at_beginning.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
Value
181.944
37.176
57.14
67.128
72.12
77.112
82.104
83.1
87.09
92.088
92.01
97.08
102.072
107.05
107.06
117.048
122.04
132.024
147
151.82
151.992
151.72
151.94
156.969
156.984
156.92
161.976
161.94
161.97
166.968
176.952
181.94
186.936
201.91
201.912
201.9
206.904
216.88