From d2a0db33cbcda5db4f62aefe7ccee0b8ee6acbe5 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Wed, 31 Oct 2018 23:38:28 -0700 Subject: [PATCH 1/3] Adding more transform extensions --- .../Transforms/CategoricalCatalog.cs | 47 +++++++++++++++++++ src/Microsoft.ML.OnnxTransform/OnnxCatalog.cs | 34 ++++++++++++++ src/Microsoft.ML.PCA/PCACatalog.cs | 40 ++++++++++++++++ .../ExtensionsCatalog.cs | 25 ++++++++++ .../ProjectionCatalog.cs | 36 ++++++++++++++ 5 files changed, 182 insertions(+) create mode 100644 src/Microsoft.ML.Data/Transforms/CategoricalCatalog.cs create mode 100644 src/Microsoft.ML.OnnxTransform/OnnxCatalog.cs create mode 100644 src/Microsoft.ML.PCA/PCACatalog.cs create mode 100644 src/Microsoft.ML.Transforms/ProjectionCatalog.cs diff --git a/src/Microsoft.ML.Data/Transforms/CategoricalCatalog.cs b/src/Microsoft.ML.Data/Transforms/CategoricalCatalog.cs new file mode 100644 index 0000000000..c2657da11b --- /dev/null +++ b/src/Microsoft.ML.Data/Transforms/CategoricalCatalog.cs @@ -0,0 +1,47 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Transforms.Categorical; + +namespace Microsoft.ML +{ + /// + /// Extensions for the ValueToKeyMappingEstimator + /// + public static class ValueToKeyCatalog + { + /// + /// Initializes a new instance of . + /// + /// The categorical transform's catalog. + /// Name of the column to be transformed. + /// Name of the output column. If this is null '' will be used. + /// Maximum number of keys to keep per column when auto-training. + /// How items should be ordered when vectorized. By default, they will be in the order encountered. + /// If by value items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). + public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.CategoricalTransforms catalog, + string inputColumn, + string outputColumn = null, + int maxNumTerms = ValueToKeyMappingEstimator.Defaults.MaxNumTerms, + TermTransform.SortOrder sort = ValueToKeyMappingEstimator.Defaults.Sort) + => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, maxNumTerms, sort); + + /// + /// Initializes a new instance of loading the terms to use from . + /// + /// The categorical transform's catalog. + /// The data columns to map to keys. + /// The path of the file containing the terms. + /// + /// + public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.CategoricalTransforms catalog, + TermTransform.ColumnInfo[] columns, + string file = null, + string termsColumn = null, + IComponentFactory loaderFactory = null) + => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, file, termsColumn, loaderFactory); + } +} diff --git a/src/Microsoft.ML.OnnxTransform/OnnxCatalog.cs b/src/Microsoft.ML.OnnxTransform/OnnxCatalog.cs new file mode 100644 index 0000000000..102b4e2a80 --- /dev/null +++ b/src/Microsoft.ML.OnnxTransform/OnnxCatalog.cs @@ -0,0 +1,34 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Transforms; + +namespace Microsoft.ML +{ + public static class OnnxCatalog + { + /// + /// Initializes a new instance of . + /// + /// The transform's catalog. + /// The path of the file containing the ONNX model. + /// The input column. + /// The output column resulting from the transformation. + public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog.CategoricalTransforms catalog, + string modelFile, + string inputColumn, + string outputColumn) + => new OnnxScoringEstimator(CatalogUtils.GetEnvironment(catalog), modelFile, inputColumn, outputColumn); + + /// + /// Initializes a new instance of . + /// + /// The transform's catalog. + /// The ONNX transformer. + public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog.CategoricalTransforms catalog, OnnxTransform transformer) + => new OnnxScoringEstimator(CatalogUtils.GetEnvironment(catalog), transformer); + } +} diff --git a/src/Microsoft.ML.PCA/PCACatalog.cs b/src/Microsoft.ML.PCA/PCACatalog.cs new file mode 100644 index 0000000000..2be4218784 --- /dev/null +++ b/src/Microsoft.ML.PCA/PCACatalog.cs @@ -0,0 +1,40 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Transforms.PCA; + +namespace Microsoft.ML +{ + public static class PcaCatalog + { + + /// Initializes a new instance of . + /// The transform's catalog. + /// Input column to apply PrincipalComponentAnalysis on. + /// Optional output column. Null means is replaced. + /// The name of the weight column. + /// The number of principal components. + /// Oversampling parameter for randomized PrincipalComponentAnalysis training. + /// If enabled, data is centered to be zero mean. + /// The seed for random number generation. + public static PrincipalComponentAnalysisEstimator ProjectToPrincipalComponents(this TransformsCatalog.ProjectionTransforms catalog, + string inputColumn, + string outputColumn = null, + string weightColumn = PrincipalComponentAnalysisEstimator.Defaults.WeightColumn, + int rank = PrincipalComponentAnalysisEstimator.Defaults.Rank, + int overSampling = PrincipalComponentAnalysisEstimator.Defaults.Oversampling, + bool center = PrincipalComponentAnalysisEstimator.Defaults.Center, + int? seed = null) + => new PrincipalComponentAnalysisEstimator(CatalogUtils.GetEnvironment(catalog), + inputColumn, outputColumn, weightColumn, rank, overSampling, center, seed); + + /// Initializes a new instance of . + /// The transform's catalog. + /// Input columns to apply PrincipalComponentAnalysis on. + public static PrincipalComponentAnalysisEstimator ProjectToPrincipalComponents(this TransformsCatalog.ProjectionTransforms catalog, params PcaTransform.ColumnInfo[] columns) + => new PrincipalComponentAnalysisEstimator(CatalogUtils.GetEnvironment(catalog), columns); + } +} diff --git a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs index c699ffbb58..2ece80109a 100644 --- a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs @@ -32,6 +32,31 @@ public static MissingValueIndicatorEstimator IndicateMissingValues(this Transfor => new MissingValueIndicatorEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn); } + public static class MissingValueReplacerCatalog + { + /// + /// Initializes a new instance of + /// + /// The transform's catalog. + /// The name of the input column. + /// The optional name of the output column, + /// If not provided, the will be replaced with the results of the transforms. + /// The type of replacement to use as specified in + public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, + string inputColumn, + string outputColumn = null, + NAReplaceTransform.ColumnInfo.ReplacementMode replacementKind = MissingValueReplacingEstimator.Defaults.ReplacementMode) + => new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, replacementKind); + + /// + /// Initializes a new instance of + /// + /// The transform's catalog. + /// The name of the columns to use, and per-column transformation configuraiton. + public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, params NAReplaceTransform.ColumnInfo[] columns) + => new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), columns); + } + /// /// Extensions for KeyToVectorMappingEstimator. /// diff --git a/src/Microsoft.ML.Transforms/ProjectionCatalog.cs b/src/Microsoft.ML.Transforms/ProjectionCatalog.cs new file mode 100644 index 0000000000..58403b5685 --- /dev/null +++ b/src/Microsoft.ML.Transforms/ProjectionCatalog.cs @@ -0,0 +1,36 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Transforms; + +namespace Microsoft.ML +{ + public static class ProjectionCatalog + { + /// + /// Initializes a new instance of . + /// + /// The transform's catalog. + /// Name of the column to be transformed. + /// Name of the output column. If this is null '' will be used. + /// The number of random Fourier features to create. + /// Create two features for every random Fourier frequency? (one for cos and one for sin). + public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this TransformsCatalog.ProjectionTransforms catalog, + string inputColumn, + string outputColumn = null, + int newDim = RandomFourierFeaturizingEstimator.Defaults.NewDim, + bool useSin = RandomFourierFeaturizingEstimator.Defaults.UseSin) + => new RandomFourierFeaturizingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, newDim, useSin); + + /// + /// Initializes a new instance of . + /// + /// The transform's catalog. + /// The input columns to use for the transformation. + public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this TransformsCatalog.ProjectionTransforms catalog, params RffTransform.ColumnInfo[] columns) + => new RandomFourierFeaturizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); + } +} From 32869cbc2e8d64f835e36a415b8c50666c755970 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Wed, 31 Oct 2018 23:45:43 -0700 Subject: [PATCH 2/3] bringing master in --- src/Microsoft.ML.PCA/PCACatalog.cs | 2 +- src/Microsoft.ML.Transforms/ProjectionCatalog.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.PCA/PCACatalog.cs b/src/Microsoft.ML.PCA/PCACatalog.cs index 2be4218784..005638421f 100644 --- a/src/Microsoft.ML.PCA/PCACatalog.cs +++ b/src/Microsoft.ML.PCA/PCACatalog.cs @@ -4,7 +4,7 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Transforms.PCA; +using Microsoft.ML.Transforms.Projections; namespace Microsoft.ML { diff --git a/src/Microsoft.ML.Transforms/ProjectionCatalog.cs b/src/Microsoft.ML.Transforms/ProjectionCatalog.cs index 58403b5685..ff182e46d6 100644 --- a/src/Microsoft.ML.Transforms/ProjectionCatalog.cs +++ b/src/Microsoft.ML.Transforms/ProjectionCatalog.cs @@ -4,7 +4,7 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Transforms; +using Microsoft.ML.Transforms.Projections; namespace Microsoft.ML { From 360d06618bd8a0180ee7c01d9de446c54aa5971f Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Thu, 1 Nov 2018 00:09:21 -0700 Subject: [PATCH 3/3] name standartizing --- .../Transforms/ConversionsCatalog.cs | 16 ++++++++-------- .../Transforms/TransformsCatalog.cs | 12 ++++++------ src/Microsoft.ML.OnnxTransform/OnnxCatalog.cs | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsCatalog.cs index ef7d6044e1..2d589f8e58 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsCatalog.cs @@ -24,7 +24,7 @@ public static class HashingEstimatorCatalog /// Name of the column to be transformed. If this is null '' will be used. /// Number of bits to hash into. Must be between 1 and 31, inclusive. /// Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit. - public static HashingEstimator Hash(this TransformsCatalog.Conversions catalog, string inputColumn, string outputColumn = null, + public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, string inputColumn, string outputColumn = null, int hashBits = HashDefaults.HashBits, int invertHash = HashDefaults.InvertHash) => new HashingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, hashBits, invertHash); @@ -33,7 +33,7 @@ public static HashingEstimator Hash(this TransformsCatalog.Conversions catalog, /// /// The transform's catalog. /// Description of dataset columns and how to process them. - public static HashingEstimator Hash(this TransformsCatalog.Conversions catalog, params HashTransformer.ColumnInfo[] columns) + public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, params HashTransformer.ColumnInfo[] columns) => new HashingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// @@ -43,7 +43,7 @@ public static HashingEstimator Hash(this TransformsCatalog.Conversions catalog, /// Name of the input column. /// Name of the column to be transformed. If this is null '' will be used. /// Number of bits to hash into. Must be between 1 and 31, inclusive. - public static ConvertingEstimator ConvertTo(this TransformsCatalog.Conversions catalog, string inputColumn, string outputColumn = null, + public static ConvertingEstimator ConvertTo(this TransformsCatalog.ConversionTransforms catalog, string inputColumn, string outputColumn = null, DataKind outputKind = ConvertDefaults.DefaultOutputKind) => new ConvertingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, outputKind); @@ -52,7 +52,7 @@ public static ConvertingEstimator ConvertTo(this TransformsCatalog.Conversions c /// /// The transform's catalog. /// Description of dataset columns and how to process them. - public static ConvertingEstimator ConvertTo(this TransformsCatalog.Conversions catalog, params ConvertingTransform.ColumnInfo[] columns) + public static ConvertingEstimator ConvertTo(this TransformsCatalog.ConversionTransforms catalog, params ConvertingTransform.ColumnInfo[] columns) => new ConvertingEstimator(CatalogUtils.GetEnvironment(catalog), columns); } @@ -63,7 +63,7 @@ public static class ToValueCatalog /// /// The categorical transform's catalog. /// Name of the input column. - public static KeyToValueEstimator MapKeyToValue(this TransformsCatalog.Conversions catalog, string inputColumn) + public static KeyToValueEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, string inputColumn) => new KeyToValueEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn); /// @@ -72,7 +72,7 @@ public static KeyToValueEstimator MapKeyToValue(this TransformsCatalog.Conversio /// /// The categorical transform's catalog /// The pairs of input and output columns. - public static KeyToValueEstimator MapKeyToValue(this TransformsCatalog.Conversions catalog, params (string input, string output)[] columns) + public static KeyToValueEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, params (string input, string output)[] columns) => new KeyToValueEstimator(CatalogUtils.GetEnvironment(catalog), columns); } @@ -86,7 +86,7 @@ public static class ToVectorCatalog /// /// The categorical transform's catalog. /// The input column to map back to vectors. - public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.Conversions catalog, + public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog, params KeyToVectorTransform.ColumnInfo[] columns) => new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns); @@ -97,7 +97,7 @@ public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog. /// The name of the input column. /// The name of the output column. /// Whether bagging is used for the conversion. - public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.Conversions catalog, + public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog, string inputColumn, string outputColumn = null, bool bag = KeyToVectorMappingEstimator.Defaults.Bag) => new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, bag); } diff --git a/src/Microsoft.ML.Data/Transforms/TransformsCatalog.cs b/src/Microsoft.ML.Data/Transforms/TransformsCatalog.cs index 22706e7421..f9464d85ce 100644 --- a/src/Microsoft.ML.Data/Transforms/TransformsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/TransformsCatalog.cs @@ -13,9 +13,9 @@ public sealed class TransformsCatalog internal IHostEnvironment Environment { get; } public CategoricalTransforms Categorical { get; } - public Conversions Conversion { get; } + public ConversionTransforms Conversion { get; } public TextTransforms Text { get; } - public ProjectionTransforms Projections { get; } + public ProjectionTransforms Projection { get; } internal TransformsCatalog(IHostEnvironment env) { @@ -23,9 +23,9 @@ internal TransformsCatalog(IHostEnvironment env) Environment = env; Categorical = new CategoricalTransforms(this); - Conversion = new Conversions(this); + Conversion = new ConversionTransforms(this); Text = new TextTransforms(this); - Projections = new ProjectionTransforms(this); + Projection = new ProjectionTransforms(this); } public abstract class SubCatalogBase @@ -52,9 +52,9 @@ internal CategoricalTransforms(TransformsCatalog owner) : base(owner) /// /// The catalog of rescaling operations. /// - public sealed class Conversions : SubCatalogBase + public sealed class ConversionTransforms : SubCatalogBase { - public Conversions(TransformsCatalog owner) : base(owner) + public ConversionTransforms(TransformsCatalog owner) : base(owner) { } } diff --git a/src/Microsoft.ML.OnnxTransform/OnnxCatalog.cs b/src/Microsoft.ML.OnnxTransform/OnnxCatalog.cs index 102b4e2a80..054900776d 100644 --- a/src/Microsoft.ML.OnnxTransform/OnnxCatalog.cs +++ b/src/Microsoft.ML.OnnxTransform/OnnxCatalog.cs @@ -17,7 +17,7 @@ public static class OnnxCatalog /// The path of the file containing the ONNX model. /// The input column. /// The output column resulting from the transformation. - public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog.CategoricalTransforms catalog, + public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog catalog, string modelFile, string inputColumn, string outputColumn) @@ -28,7 +28,7 @@ public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog.Categor /// /// The transform's catalog. /// The ONNX transformer. - public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog.CategoricalTransforms catalog, OnnxTransform transformer) + public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog catalog, OnnxTransform transformer) => new OnnxScoringEstimator(CatalogUtils.GetEnvironment(catalog), transformer); } }