-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add an example of random PCA using in-memory data structure (#2780)
- Loading branch information
Showing
6 changed files
with
283 additions
and
2 deletions.
There are no files selected for viewing
86 changes: 86 additions & 0 deletions
86
docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSample.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic.Trainers.AnomalyDetection | ||
{ | ||
public static class RandomizedPcaSample | ||
{ | ||
public static void Example() | ||
{ | ||
// Create a new context for ML.NET operations. It can be used for exception tracking and logging, | ||
// as a catalog of available operations and as the source of randomness. | ||
// Setting the seed to a fixed number in this example to make outputs deterministic. | ||
var mlContext = new MLContext(seed: 0); | ||
|
||
// Training data. | ||
var samples = new List<DataPoint>() | ||
{ | ||
new DataPoint(){ Features = new float[3] {1, 0, 0} }, | ||
new DataPoint(){ Features = new float[3] {0, 2, 1} }, | ||
new DataPoint(){ Features = new float[3] {1, 2, 3} }, | ||
new DataPoint(){ Features = new float[3] {0, 1, 0} }, | ||
new DataPoint(){ Features = new float[3] {0, 2, 1} }, | ||
new DataPoint(){ Features = new float[3] {-100, 50, -100} } | ||
}; | ||
|
||
// Convert the List<DataPoint> to IDataView, a consumble format to ML.NET functions. | ||
var data = mlContext.Data.LoadFromEnumerable(samples); | ||
|
||
// Create an anomaly detector. Its underlying algorithm is randomized PCA. | ||
var pipeline = mlContext.AnomalyDetection.Trainers.RandomizedPca(featureColumnName: nameof(DataPoint.Features), rank: 1, center: false); | ||
|
||
// Train the anomaly detector. | ||
var model = pipeline.Fit(data); | ||
|
||
// Apply the trained model on the training data. | ||
var transformed = model.Transform(data); | ||
|
||
// Read ML.NET predictions into IEnumerable<Result>. | ||
var results = mlContext.Data.CreateEnumerable<Result>(transformed, reuseRowObject: false).ToList(); | ||
|
||
// Let's go through all predictions. | ||
for (int i = 0; i < samples.Count; ++i) | ||
{ | ||
// The i-th example's prediction result. | ||
var result = results[i]; | ||
|
||
// The i-th example's feature vector in text format. | ||
var featuresInText = string.Join(',', samples[i].Features); | ||
|
||
if (result.PredictedLabel) | ||
// The i-th sample is predicted as an inlier. | ||
Console.WriteLine("The {0}-th example with features [{1}] is an inlier with a score of being inlier {2}", | ||
i, featuresInText, result.Score); | ||
else | ||
// The i-th sample is predicted as an outlier. | ||
Console.WriteLine("The {0}-th example with features [{1}] is an outlier with a score of being inlier {2}", | ||
i, featuresInText, result.Score); | ||
} | ||
// Lines printed out should be | ||
// The 0 - th example with features[1, 0, 0] is an inlier with a score of being inlier 0.7453707 | ||
// The 1 - th example with features[0, 2, 1] is an inlier with a score of being inlier 0.9999999 | ||
// The 2 - th example with features[1, 2, 3] is an inlier with a score of being inlier 0.8450122 | ||
// The 3 - th example with features[0, 1, 0] is an inlier with a score of being inlier 0.9428905 | ||
// The 4 - th example with features[0, 2, 1] is an inlier with a score of being inlier 0.9999999 | ||
// The 5 - th example with features[-100, 50, -100] is an outlier with a score of being inlier 0 | ||
} | ||
|
||
// Example with 3 feature values. A training data set is a collection of such examples. | ||
private class DataPoint | ||
{ | ||
[VectorType(3)] | ||
public float[] Features { get; set; } | ||
} | ||
|
||
// Class used to capture prediction of DataPoint. | ||
private class Result | ||
{ | ||
// Outlier gets false while inlier has true. | ||
public bool PredictedLabel { get; set; } | ||
// Outlier gets smaller score. | ||
public float Score { get; set; } | ||
} | ||
} | ||
} |
93 changes: 93 additions & 0 deletions
93
.../Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSampleWithOptions.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic.Trainers.AnomalyDetection | ||
{ | ||
public static class RandomizedPcaSampleWithOptions | ||
{ | ||
public static void Example() | ||
{ | ||
// Create a new context for ML.NET operations. It can be used for exception tracking and logging, | ||
// as a catalog of available operations and as the source of randomness. | ||
// Setting the seed to a fixed number in this example to make outputs deterministic. | ||
var mlContext = new MLContext(seed: 0); | ||
|
||
// Training data. | ||
var samples = new List<DataPoint>() | ||
{ | ||
new DataPoint(){ Features = new float[3] {1, 0, 0} }, | ||
new DataPoint(){ Features = new float[3] {0, 2, 1} }, | ||
new DataPoint(){ Features = new float[3] {1, 2, 3} }, | ||
new DataPoint(){ Features = new float[3] {0, 1, 0} }, | ||
new DataPoint(){ Features = new float[3] {0, 2, 1} }, | ||
new DataPoint(){ Features = new float[3] {-100, 50, -100} } | ||
}; | ||
|
||
// Convert the List<DataPoint> to IDataView, a consumble format to ML.NET functions. | ||
var data = mlContext.Data.LoadFromEnumerable(samples); | ||
|
||
var options = new ML.Trainers.RandomizedPcaTrainer.Options() | ||
{ | ||
FeatureColumnName = nameof(DataPoint.Features), | ||
Rank = 1, | ||
Seed = 10, | ||
}; | ||
|
||
// Create an anomaly detector. Its underlying algorithm is randomized PCA. | ||
var pipeline = mlContext.AnomalyDetection.Trainers.RandomizedPca(options); | ||
|
||
// Train the anomaly detector. | ||
var model = pipeline.Fit(data); | ||
|
||
// Apply the trained model on the training data. | ||
var transformed = model.Transform(data); | ||
|
||
// Read ML.NET predictions into IEnumerable<Result>. | ||
var results = mlContext.Data.CreateEnumerable<Result>(transformed, reuseRowObject: false).ToList(); | ||
|
||
// Let's go through all predictions. | ||
for (int i = 0; i < samples.Count; ++i) | ||
{ | ||
// The i-th example's prediction result. | ||
var result = results[i]; | ||
|
||
// The i-th example's feature vector in text format. | ||
var featuresInText = string.Join(',', samples[i].Features); | ||
|
||
if (result.PredictedLabel) | ||
// The i-th sample is predicted as an inlier. | ||
Console.WriteLine("The {0}-th example with features [{1}] is an inlier with a score of being inlier {2}", | ||
i, featuresInText, result.Score); | ||
else | ||
// The i-th sample is predicted as an outlier. | ||
Console.WriteLine("The {0}-th example with features [{1}] is an outlier with a score of being inlier {2}", | ||
i, featuresInText, result.Score); | ||
} | ||
// Lines printed out should be | ||
// The 0 - th example with features[1, 0, 0] is an inlier with a score of being inlier 0.7453707 | ||
// The 1 - th example with features[0, 2, 1] is an inlier with a score of being inlier 0.9999999 | ||
// The 2 - th example with features[1, 2, 3] is an inlier with a score of being inlier 0.8450122 | ||
// The 3 - th example with features[0, 1, 0] is an inlier with a score of being inlier 0.9428905 | ||
// The 4 - th example with features[0, 2, 1] is an inlier with a score of being inlier 0.9999999 | ||
// The 5 - th example with features[-100, 50, -100] is an outlier with a score of being inlier 0 | ||
} | ||
|
||
// Example with 3 feature values. A training data set is a collection of such examples. | ||
private class DataPoint | ||
{ | ||
[VectorType(3)] | ||
public float[] Features { get; set; } | ||
} | ||
|
||
// Class used to capture prediction of DataPoint. | ||
private class Result | ||
{ | ||
// Outlier gets false while inlier has true. | ||
public bool PredictedLabel { get; set; } | ||
// Outlier gets smaller score. | ||
public float Score { get; set; } | ||
} | ||
} | ||
} |
1 change: 0 additions & 1 deletion
1
docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorization.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 change: 0 additions & 1 deletion
1
...es/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationWithOptions.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters