From 4c91252a2b7f3d1bbdce0870bee99cf24d1e79c5 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Tue, 30 Oct 2018 12:21:42 -0700 Subject: [PATCH 1/3] more namespace move and make it build --- .../SubsetSelector/BootstrapSelector.cs | 1 + src/Microsoft.ML.Legacy/CSharpApi.cs | 88 ------------------- .../CategoricalTransform.cs | 1 + .../KeyToBinaryVectorTransform.cs | 3 +- .../LearnerFeatureSelection.cs | 2 +- src/Microsoft.ML.Transforms/LoadTransform.cs | 9 +- .../MissingValueIndicatorTransform.cs | 3 +- .../MutualInformationFeatureSelection.cs | 4 +- .../OptionalColumnTransform.cs | 4 +- .../TermLookupTransform.cs | 11 ++- .../UngroupTransform.cs | 7 +- .../WhiteningTransform.cs | 3 +- .../WrappedFeatureSelectionTransformers.cs | 2 +- .../Common/EntryPoints/core_ep-list.tsv | 5 +- .../Common/EntryPoints/core_manifest.json | 64 -------------- .../UnitTests/TestEntryPoints.cs | 2 +- .../KeyToBinaryVectorEstimatorTest.cs | 1 + 17 files changed, 32 insertions(+), 178 deletions(-) diff --git a/src/Microsoft.ML.Ensemble/Selector/SubsetSelector/BootstrapSelector.cs b/src/Microsoft.ML.Ensemble/Selector/SubsetSelector/BootstrapSelector.cs index 97dda8aeba..102b9d10d5 100644 --- a/src/Microsoft.ML.Ensemble/Selector/SubsetSelector/BootstrapSelector.cs +++ b/src/Microsoft.ML.Ensemble/Selector/SubsetSelector/BootstrapSelector.cs @@ -8,6 +8,7 @@ using Microsoft.ML.Runtime.Ensemble.Selector; using Microsoft.ML.Runtime.Ensemble.Selector.SubsetSelector; using Microsoft.ML.Runtime.EntryPoints; +using Microsoft.ML.Transforms; [assembly: LoadableClass(typeof(BootstrapSelector), typeof(BootstrapSelector.Arguments), typeof(SignatureEnsembleDataSelector), BootstrapSelector.UserName, BootstrapSelector.LoadName)] diff --git a/src/Microsoft.ML.Legacy/CSharpApi.cs b/src/Microsoft.ML.Legacy/CSharpApi.cs index 9f5b2e8791..c17ae9168a 100644 --- a/src/Microsoft.ML.Legacy/CSharpApi.cs +++ b/src/Microsoft.ML.Legacy/CSharpApi.cs @@ -1582,18 +1582,6 @@ public void Add(Microsoft.ML.Legacy.Transforms.Scorer input, Microsoft.ML.Legacy _jsonNodes.Add(Serialize("Transforms.Scorer", input, output)); } - public Microsoft.ML.Legacy.Transforms.Segregator.Output Add(Microsoft.ML.Legacy.Transforms.Segregator input) - { - var output = new Microsoft.ML.Legacy.Transforms.Segregator.Output(); - Add(input, output); - return output; - } - - public void Add(Microsoft.ML.Legacy.Transforms.Segregator input, Microsoft.ML.Legacy.Transforms.Segregator.Output output) - { - _jsonNodes.Add(Serialize("Transforms.Segregator", input, output)); - } - public Microsoft.ML.Legacy.Transforms.SentimentAnalyzer.Output Add(Microsoft.ML.Legacy.Transforms.SentimentAnalyzer input) { var output = new Microsoft.ML.Legacy.Transforms.SentimentAnalyzer.Output(); @@ -16431,82 +16419,6 @@ public sealed class Output } } - namespace Legacy.Transforms - { - public enum UngroupTransformUngroupMode - { - Inner = 0, - Outer = 1, - First = 2 - } - - - /// - /// - public sealed partial class Segregator : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Columns to unroll, or 'pivot' - /// - public string[] Column { get; set; } - - /// - /// Specifies how to unroll multiple pivot columns of different size. - /// - public UngroupTransformUngroupMode Mode { get; set; } = UngroupTransformUngroupMode.Inner; - - /// - /// Input dataset - /// - public Var Data { get; set; } = new Var(); - - - public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - public Var GetInputData() => Data; - - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(Segregator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new SegregatorPipelineStep(output); - } - - private class SegregatorPipelineStep : ILearningPipelineDataStep - { - public SegregatorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - public Var Data { get; } - public Var Model { get; } - } - } - } - namespace Legacy.Transforms { diff --git a/src/Microsoft.ML.Transforms/CategoricalTransform.cs b/src/Microsoft.ML.Transforms/CategoricalTransform.cs index 07240890a0..14c000a085 100644 --- a/src/Microsoft.ML.Transforms/CategoricalTransform.cs +++ b/src/Microsoft.ML.Transforms/CategoricalTransform.cs @@ -13,6 +13,7 @@ using Microsoft.ML.StaticPipe; using Microsoft.ML.StaticPipe.Runtime; using Microsoft.ML.Transforms.Categorical; +using Microsoft.ML.Transforms.Conversions; using System; using System.Collections.Generic; using System.Linq; diff --git a/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs b/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs index 41f205ef9a..dcd78bd397 100644 --- a/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs +++ b/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs @@ -11,6 +11,7 @@ using Microsoft.ML.StaticPipe; using Microsoft.ML.StaticPipe.Runtime; using Microsoft.ML.Transforms.Categorical; +using Microsoft.ML.Transforms.Conversions; using System; using System.Collections.Generic; using System.Linq; @@ -28,7 +29,7 @@ [assembly: LoadableClass(typeof(IRowMapper), typeof(KeyToBinaryVectorTransform), null, typeof(SignatureLoadRowMapper), KeyToBinaryVectorTransform.UserName, KeyToBinaryVectorTransform.LoaderSignature)] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms.Conversions { public sealed class KeyToBinaryVectorTransform : OneToOneTransformerBase { diff --git a/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs b/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs index c8a90bc390..a75cec4a19 100644 --- a/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs @@ -14,7 +14,7 @@ [assembly: LoadableClass(LearnerFeatureSelectionTransform.Summary, typeof(IDataTransform), typeof(LearnerFeatureSelectionTransform), typeof(LearnerFeatureSelectionTransform.Arguments), typeof(SignatureDataTransform), "Learner Feature Selection Transform", "LearnerFeatureSelectionTransform", "LearnerFeatureSelection")] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms { /// /// Selects the slots for which the absolute value of the corresponding weight in a linear learner diff --git a/src/Microsoft.ML.Transforms/LoadTransform.cs b/src/Microsoft.ML.Transforms/LoadTransform.cs index 28eb3cb2f3..55a8ebf359 100644 --- a/src/Microsoft.ML.Transforms/LoadTransform.cs +++ b/src/Microsoft.ML.Transforms/LoadTransform.cs @@ -2,19 +2,20 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; -using System.Collections.Generic; -using System.IO; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Model; +using Microsoft.ML.Transforms; +using System; +using System.Collections.Generic; +using System.IO; [assembly: LoadableClass(LoadTransform.Summary, typeof(IDataTransform), typeof(LoadTransform), typeof(LoadTransform.Arguments), typeof(SignatureDataTransform), "Load Transform", "LoadTransform", "Load")] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms { /// /// Load specific transforms from the specified model file. Allows one to 'cherry pick' transforms from diff --git a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs index 41412e6e26..5e42acfedb 100644 --- a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs +++ b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs @@ -11,6 +11,7 @@ using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Model; +using Microsoft.ML.Transforms; [assembly: LoadableClass(typeof(MissingValueIndicatorTransform), typeof(MissingValueIndicatorTransform.Arguments), typeof(SignatureDataTransform), "", "MissingValueIndicatorTransform", "MissingValueTransform", "MissingTransform", "Missing")] @@ -18,7 +19,7 @@ [assembly: LoadableClass(typeof(MissingValueIndicatorTransform), null, typeof(SignatureLoadDataTransform), "Missing Value Indicator Transform", MissingValueIndicatorTransform.LoaderSignature, "MissingFeatureFunction")] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms { public sealed class MissingValueIndicatorTransform : OneToOneTransformBase { diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs index 2caf226d92..635ed63f38 100644 --- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs @@ -19,7 +19,7 @@ [assembly: LoadableClass(MutualInformationFeatureSelectionTransform.Summary, typeof(IDataTransform), typeof(MutualInformationFeatureSelectionTransform), typeof(MutualInformationFeatureSelectionTransform.Arguments), typeof(SignatureDataTransform), MutualInformationFeatureSelectionTransform.UserName, "MutualInformationFeatureSelection", "MutualInformationFeatureSelectionTransform", MutualInformationFeatureSelectionTransform.ShortName)] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms { /// public static class MutualInformationFeatureSelectionTransform @@ -647,7 +647,7 @@ private void FillTable(ref VBuffer features, int offset, int numFeatures) /// private static ValueMapper, VBuffer> BinKeys(ColumnType colType) { - var conv = Conversion.Conversions.Instance.GetStandardConversion(colType, NumberType.U4, out bool identity); + var conv = Runtime.Data.Conversion.Conversions.Instance.GetStandardConversion(colType, NumberType.U4, out bool identity); ValueMapper mapper; if (identity) { diff --git a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs index 57bdf158fb..8139f3da01 100644 --- a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs +++ b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs @@ -10,7 +10,7 @@ using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Data.IO; -using Microsoft.ML.Runtime.DataPipe; +using Microsoft.ML.Transforms; using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Model; @@ -24,7 +24,7 @@ [assembly: EntryPointModule(typeof(OptionalColumnTransform))] -namespace Microsoft.ML.Runtime.DataPipe +namespace Microsoft.ML.Transforms { /// public class OptionalColumnTransform : RowToRowMapperTransformBase diff --git a/src/Microsoft.ML.Transforms/TermLookupTransform.cs b/src/Microsoft.ML.Transforms/TermLookupTransform.cs index f34be7c2d7..80b8f92299 100644 --- a/src/Microsoft.ML.Transforms/TermLookupTransform.cs +++ b/src/Microsoft.ML.Transforms/TermLookupTransform.cs @@ -5,11 +5,10 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.Data.Conversion; using Microsoft.ML.Runtime.Data.IO; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Model; -using Microsoft.ML.Transforms; +using Microsoft.ML.Transforms.Text; using System; using System.Collections.Generic; using System.IO; @@ -22,7 +21,7 @@ [assembly: LoadableClass(TermLookupTransform.Summary, typeof(TermLookupTransform), null, typeof(SignatureLoadDataTransform), "Term Lookup Transform", TermLookupTransform.LoaderSignature)] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms.Text { using Conditional = System.Diagnostics.ConditionalAttribute; @@ -223,7 +222,7 @@ public OneValueMap(PrimitiveType type) // We should probably have a mapping from type to its bad value somewhere, perhaps in Conversions. bool identity; ValueMapper, TRes> conv; - if (Conversions.Instance.TryGetStandardConversion, TRes>(TextType.Instance, type, + if (Runtime.Data.Conversion.Conversions.Instance.TryGetStandardConversion, TRes>(TextType.Instance, type, out conv, out identity)) { //Empty string will map to NA for R4 and R8, the only two types that can @@ -386,7 +385,7 @@ private static IComponentFactory GetLoaderFacto // Try to parse the text as a key value between 1 and ulong.MaxValue. If this succeeds and res>0, // we update max and min accordingly. If res==0 it means the value is missing, in which case we ignore it for // computing max and min. - if (Conversions.Instance.TryParseKey(in txt, 1, ulong.MaxValue, out res)) + if (Runtime.Data.Conversion.Conversions.Instance.TryParseKey(in txt, 1, ulong.MaxValue, out res)) { if (res < min && res != 0) min = res; @@ -395,7 +394,7 @@ private static IComponentFactory GetLoaderFacto } // If parsing as key did not succeed, the value can still be 0, so we try parsing it as a ulong. If it succeeds, // then the value is 0, and we update min accordingly. - else if (Conversions.Instance.TryParse(in txt, out res)) + else if (Runtime.Data.Conversion.Conversions.Instance.TryParse(in txt, out res)) { ch.Assert(res == 0); min = 0; diff --git a/src/Microsoft.ML.Transforms/UngroupTransform.cs b/src/Microsoft.ML.Transforms/UngroupTransform.cs index ad2de0e30a..5601ce0ee9 100644 --- a/src/Microsoft.ML.Transforms/UngroupTransform.cs +++ b/src/Microsoft.ML.Transforms/UngroupTransform.cs @@ -13,6 +13,7 @@ using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Model; +using Microsoft.ML.Transforms; [assembly: LoadableClass(UngroupTransform.Summary, typeof(UngroupTransform), typeof(UngroupTransform.Arguments), typeof(SignatureDataTransform), UngroupTransform.UserName, UngroupTransform.ShortName)] @@ -20,7 +21,7 @@ [assembly: LoadableClass(UngroupTransform.Summary, typeof(UngroupTransform), null, typeof(SignatureLoadDataTransform), UngroupTransform.UserName, UngroupTransform.LoaderSignature)] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms { // This can be thought of as an inverse of GroupTransform. For all specified vector columns @@ -267,7 +268,7 @@ public SchemaImpl(IExceptionContext ectx, Schema inputSchema, UngroupMode mode, _pivotIndex[info.Index] = i; } - AsSchema = Data.Schema.Create(this); + AsSchema = Runtime.Data.Schema.Create(this); } private static void CheckAndBind(IExceptionContext ectx, ISchema inputSchema, @@ -613,7 +614,7 @@ private ValueGetter MakeGetter(int col, PrimitiveType itemType) // cachedIndex == row.Count || _pivotColPosition <= row.Indices[cachedIndex]. int cachedIndex = 0; VBuffer row = default(VBuffer); - T naValue = Conversions.Instance.GetNAOrDefault(itemType); + T naValue = Runtime.Data.Conversion.Conversions.Instance.GetNAOrDefault(itemType); return (ref T value) => { diff --git a/src/Microsoft.ML.Transforms/WhiteningTransform.cs b/src/Microsoft.ML.Transforms/WhiteningTransform.cs index d5a5ca1455..b4ec9990f6 100644 --- a/src/Microsoft.ML.Transforms/WhiteningTransform.cs +++ b/src/Microsoft.ML.Transforms/WhiteningTransform.cs @@ -16,6 +16,7 @@ using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Model; using Microsoft.ML.Runtime.Internal.Internallearn; +using Microsoft.ML.Transforms; [assembly: LoadableClass(WhiteningTransform.Summary, typeof(WhiteningTransform), typeof(WhiteningTransform.Arguments), typeof(SignatureDataTransform), "Whitening Transform", "WhiteningTransform", "Whitening")] @@ -23,7 +24,7 @@ [assembly: LoadableClass(WhiteningTransform.Summary, typeof(WhiteningTransform), null, typeof(SignatureLoadDataTransform), "Whitening Transform", WhiteningTransform.LoaderSignature, WhiteningTransform.LoaderSignatureOld)] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms { public enum WhiteningKind { diff --git a/src/Microsoft.ML.Transforms/WrappedFeatureSelectionTransformers.cs b/src/Microsoft.ML.Transforms/WrappedFeatureSelectionTransformers.cs index c960068fd4..3fa2b55a37 100644 --- a/src/Microsoft.ML.Transforms/WrappedFeatureSelectionTransformers.cs +++ b/src/Microsoft.ML.Transforms/WrappedFeatureSelectionTransformers.cs @@ -9,7 +9,7 @@ using Microsoft.ML.StaticPipe.Runtime; using System.Collections.Generic; using System.Linq; -using static Microsoft.ML.Runtime.Data.MutualInformationFeatureSelectionTransform; +using static Microsoft.ML.Transforms.MutualInformationFeatureSelectionTransform; namespace Microsoft.ML.Transforms { diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index 327f4ce556..2a02355613 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -93,7 +93,7 @@ Transforms.DatasetTransformScorer Score a dataset with a transform model Microso Transforms.Dictionarizer Converts input values (words, numbers, etc.) to index in a dictionary. Microsoft.ML.Transforms.Text.TextAnalytics TermTransform Microsoft.ML.Transforms.Categorical.TermTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.FeatureCombiner Combines all the features into one feature column. Microsoft.ML.Runtime.EntryPoints.FeatureCombiner PrepareFeatures Microsoft.ML.Runtime.EntryPoints.FeatureCombiner+FeatureCombinerInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.FeatureSelectorByCount Selects the slots for which the count of non-default values is greater than or equal to a threshold. Microsoft.ML.Transforms.SelectFeatures CountSelect Microsoft.ML.Transforms.CountFeatureSelectionTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput -Transforms.FeatureSelectorByMutualInformation Selects the top k slots across all specified columns ordered by their mutual information with the label column. Microsoft.ML.Transforms.SelectFeatures MutualInformationSelect Microsoft.ML.Runtime.Data.MutualInformationFeatureSelectionTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput +Transforms.FeatureSelectorByMutualInformation Selects the top k slots across all specified columns ordered by their mutual information with the label column. Microsoft.ML.Transforms.SelectFeatures MutualInformationSelect Microsoft.ML.Transforms.MutualInformationFeatureSelectionTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.GlobalContrastNormalizer Performs a global contrast normalization on input values: Y = (s * X - M) / D, where s is a scale, M is mean and D is either L2 norm or standard deviation. Microsoft.ML.Runtime.Data.LpNormalization GcNormalize Microsoft.ML.Runtime.Data.LpNormNormalizerTransform+GcnArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.HashConverter Converts column values into hashes. This transform accepts both numeric and text inputs, both single and vector-valued columns. Microsoft.ML.Runtime.Data.HashJoin Apply Microsoft.ML.Runtime.Data.HashJoinTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ImageGrayscale Convert image into grayscale. Microsoft.ML.Runtime.ImageAnalytics.EntryPoints.ImageAnalytics ImageGrayscale Microsoft.ML.Runtime.ImageAnalytics.ImageGrayscaleTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput @@ -118,7 +118,7 @@ Transforms.MissingValueSubstitutor Create an output column of the same type and Transforms.ModelCombiner Combines a sequence of TransformModels into a single model Microsoft.ML.Runtime.EntryPoints.ModelOperations CombineTransformModels Microsoft.ML.Runtime.EntryPoints.ModelOperations+CombineTransformModelsInput Microsoft.ML.Runtime.EntryPoints.ModelOperations+CombineTransformModelsOutput Transforms.NGramTranslator Produces a bag of counts of ngrams (sequences of consecutive values of length 1-n) in a given vector of keys. It does so by building a dictionary of ngrams and using the id in the dictionary as the index in the bag. Microsoft.ML.Transforms.Text.TextAnalytics NGramTransform Microsoft.ML.Runtime.Data.NgramTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.NoOperation Does nothing. Microsoft.ML.Runtime.Data.NopTransform Nop Microsoft.ML.Runtime.Data.NopTransform+NopInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput -Transforms.OptionalColumnCreator If the source column does not exist after deserialization, create a column with the right type and default values. Microsoft.ML.Runtime.DataPipe.OptionalColumnTransform MakeOptional Microsoft.ML.Runtime.DataPipe.OptionalColumnTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput +Transforms.OptionalColumnCreator If the source column does not exist after deserialization, create a column with the right type and default values. Microsoft.ML.Transforms.OptionalColumnTransform MakeOptional Microsoft.ML.Transforms.OptionalColumnTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.PcaCalculator PCA is a dimensionality-reduction transform which computes the projection of a numeric vector onto a low-rank subspace. Microsoft.ML.Transforms.PCA.PcaTransform Calculate Microsoft.ML.Transforms.PCA.PcaTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.PredictedLabelColumnOriginalValueConverter Transforms a predicted label column to its original values, unless it is of type bool. Microsoft.ML.Runtime.EntryPoints.FeatureCombiner ConvertPredictedLabel Microsoft.ML.Runtime.EntryPoints.FeatureCombiner+PredictedLabelInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.RandomNumberGenerator Adds a column with a generated number sequence. Microsoft.ML.Transforms.RandomNumberGenerator Generate Microsoft.ML.Transforms.GenerateNumberTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput @@ -128,7 +128,6 @@ Transforms.RowSkipFilter Allows limiting input to a subset of rows by skipping a Transforms.RowTakeFilter Allows limiting input to a subset of rows by taking N first rows. Microsoft.ML.Runtime.EntryPoints.SelectRows TakeFilter Microsoft.ML.Transforms.SkipTakeFilter+TakeArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ScoreColumnSelector Selects only the last score columns and the extra columns specified in the arguments. Microsoft.ML.Runtime.EntryPoints.ScoreModel SelectColumns Microsoft.ML.Runtime.EntryPoints.ScoreModel+ScoreColumnSelectorInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.Scorer Turn the predictor model into a transform model Microsoft.ML.Runtime.EntryPoints.ScoreModel MakeScoringTransform Microsoft.ML.Runtime.EntryPoints.ScoreModel+ModelInput Microsoft.ML.Runtime.EntryPoints.ScoreModel+Output -Transforms.Segregator Un-groups vector columns into sequences of rows, inverse of Group transform Microsoft.ML.Runtime.Data.GroupingOperations Ungroup Microsoft.ML.Runtime.Data.UngroupTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.SentimentAnalyzer Uses a pretrained sentiment model to score input strings Microsoft.ML.Transforms.Text.TextAnalytics AnalyzeSentiment Microsoft.ML.Runtime.TextAnalytics.SentimentAnalyzingTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.TensorFlowScorer Transforms the data using the TensorFlow model. Microsoft.ML.Transforms.TensorFlowTransform TensorFlowScorer Microsoft.ML.Transforms.TensorFlowTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.TextFeaturizer A transform that turns a collection of text documents into numerical feature vectors. The feature vectors are normalized counts of (word and/or character) ngrams in a given tokenized text. Microsoft.ML.Transforms.Text.TextAnalytics TextTransform Microsoft.ML.Transforms.Text.TextFeaturizingEstimator+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 3c174d996e..52c449f55d 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -22449,70 +22449,6 @@ } ] }, - { - "Name": "Transforms.Segregator", - "Desc": "Un-groups vector columns into sequences of rows, inverse of Group transform", - "FriendlyName": "Un-group Transform", - "ShortName": "Ungroup", - "Inputs": [ - { - "Name": "Data", - "Type": "DataView", - "Desc": "Input dataset", - "Required": true, - "SortOrder": 1.0, - "IsNullable": false - }, - { - "Name": "Column", - "Type": { - "Kind": "Array", - "ItemType": "String" - }, - "Desc": "Columns to unroll, or 'pivot'", - "Aliases": [ - "col" - ], - "Required": true, - "SortOrder": 150.0, - "IsNullable": false - }, - { - "Name": "Mode", - "Type": { - "Kind": "Enum", - "Values": [ - "Inner", - "Outer", - "First" - ] - }, - "Desc": "Specifies how to unroll multiple pivot columns of different size.", - "Required": false, - "SortOrder": 150.0, - "IsNullable": false, - "Default": "Inner" - } - ], - "Outputs": [ - { - "Name": "OutputData", - "Type": "DataView", - "Desc": "Transformed dataset" - }, - { - "Name": "Model", - "Type": "TransformModel", - "Desc": "Transform model" - } - ], - "InputKind": [ - "ITransformInput" - ], - "OutputKind": [ - "ITransformOutput" - ] - }, { "Name": "Transforms.SentimentAnalyzer", "Desc": "Uses a pretrained sentiment model to score input strings", diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index 863d280fca..68ecdc0832 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -248,7 +248,7 @@ private string GetBuildPrefix() #endif } - [Fact(Skip = "Execute this test if you want to regenerate CSharpApi file")] + [Fact(Skip = "Execute this test if you want to regenerate the core_manifest and core_ep_list files")] public void RegenerateEntryPointCatalog() { var (epListContents, jObj) = BuildManifests(); diff --git a/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs b/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs index a350f4a5e0..45243f93ae 100644 --- a/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs +++ b/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs @@ -9,6 +9,7 @@ using Microsoft.ML.Runtime.RunTests; using Microsoft.ML.Runtime.Tools; using Microsoft.ML.Transforms.Categorical; +using Microsoft.ML.Transforms.Conversions; using System; using System.IO; using System.Linq; From 8ba8ec3729b6bf83cdbf96b4f4cfc7f4fdef367b Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Tue, 30 Oct 2018 13:11:20 -0700 Subject: [PATCH 2/3] TermLookup moved to Categoricals namespace. --- src/Microsoft.ML.Transforms/TermLookupTransform.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.Transforms/TermLookupTransform.cs b/src/Microsoft.ML.Transforms/TermLookupTransform.cs index 80b8f92299..cda2f086dc 100644 --- a/src/Microsoft.ML.Transforms/TermLookupTransform.cs +++ b/src/Microsoft.ML.Transforms/TermLookupTransform.cs @@ -8,7 +8,7 @@ using Microsoft.ML.Runtime.Data.IO; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Model; -using Microsoft.ML.Transforms.Text; +using Microsoft.ML.Transforms.Categoricals; using System; using System.Collections.Generic; using System.IO; @@ -21,7 +21,7 @@ [assembly: LoadableClass(TermLookupTransform.Summary, typeof(TermLookupTransform), null, typeof(SignatureLoadDataTransform), "Term Lookup Transform", TermLookupTransform.LoaderSignature)] -namespace Microsoft.ML.Transforms.Text +namespace Microsoft.ML.Transforms.Categoricals { using Conditional = System.Diagnostics.ConditionalAttribute; From a058aedf13fab0cea92d8662163d9fad9ad626e0 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Tue, 30 Oct 2018 14:15:34 -0700 Subject: [PATCH 3/3] regenerating the ep --- src/Microsoft.ML.Legacy/CSharpApi.cs | 88 +++++++++++++++++++ src/Microsoft.ML.Transforms/GroupTransform.cs | 22 +++-- .../Common/EntryPoints/core_ep-list.tsv | 3 +- .../Common/EntryPoints/core_manifest.json | 64 ++++++++++++++ 4 files changed, 168 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.ML.Legacy/CSharpApi.cs b/src/Microsoft.ML.Legacy/CSharpApi.cs index c17ae9168a..9f5b2e8791 100644 --- a/src/Microsoft.ML.Legacy/CSharpApi.cs +++ b/src/Microsoft.ML.Legacy/CSharpApi.cs @@ -1582,6 +1582,18 @@ public void Add(Microsoft.ML.Legacy.Transforms.Scorer input, Microsoft.ML.Legacy _jsonNodes.Add(Serialize("Transforms.Scorer", input, output)); } + public Microsoft.ML.Legacy.Transforms.Segregator.Output Add(Microsoft.ML.Legacy.Transforms.Segregator input) + { + var output = new Microsoft.ML.Legacy.Transforms.Segregator.Output(); + Add(input, output); + return output; + } + + public void Add(Microsoft.ML.Legacy.Transforms.Segregator input, Microsoft.ML.Legacy.Transforms.Segregator.Output output) + { + _jsonNodes.Add(Serialize("Transforms.Segregator", input, output)); + } + public Microsoft.ML.Legacy.Transforms.SentimentAnalyzer.Output Add(Microsoft.ML.Legacy.Transforms.SentimentAnalyzer input) { var output = new Microsoft.ML.Legacy.Transforms.SentimentAnalyzer.Output(); @@ -16419,6 +16431,82 @@ public sealed class Output } } + namespace Legacy.Transforms + { + public enum UngroupTransformUngroupMode + { + Inner = 0, + Outer = 1, + First = 2 + } + + + /// + /// + public sealed partial class Segregator : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem + { + + + /// + /// Columns to unroll, or 'pivot' + /// + public string[] Column { get; set; } + + /// + /// Specifies how to unroll multiple pivot columns of different size. + /// + public UngroupTransformUngroupMode Mode { get; set; } = UngroupTransformUngroupMode.Inner; + + /// + /// Input dataset + /// + public Var Data { get; set; } = new Var(); + + + public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITransformOutput + { + /// + /// Transformed dataset + /// + public Var OutputData { get; set; } = new Var(); + + /// + /// Transform model + /// + public Var Model { get; set; } = new Var(); + + } + public Var GetInputData() => Data; + + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) + { + if (previousStep != null) + { + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(Segregator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } + + Data = dataStep.Data; + } + Output output = experiment.Add(this); + return new SegregatorPipelineStep(output); + } + + private class SegregatorPipelineStep : ILearningPipelineDataStep + { + public SegregatorPipelineStep(Output output) + { + Data = output.OutputData; + Model = output.Model; + } + + public Var Data { get; } + public Var Model { get; } + } + } + } + namespace Legacy.Transforms { diff --git a/src/Microsoft.ML.Transforms/GroupTransform.cs b/src/Microsoft.ML.Transforms/GroupTransform.cs index 75224c52e3..a64ad33d30 100644 --- a/src/Microsoft.ML.Transforms/GroupTransform.cs +++ b/src/Microsoft.ML.Transforms/GroupTransform.cs @@ -12,6 +12,7 @@ using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Model; +using Microsoft.ML.Transforms; [assembly: LoadableClass(GroupTransform.Summary, typeof(GroupTransform), typeof(GroupTransform.Arguments), typeof(SignatureDataTransform), GroupTransform.UserName, GroupTransform.ShortName)] @@ -21,21 +22,25 @@ [assembly: EntryPointModule(typeof(GroupingOperations))] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms { /// - /// This transform essentially performs the following SQL-like operation: - /// SELECT GroupKey1, GroupKey2, ... GroupKeyK, LIST(Value1), LIST(Value2), ... LIST(ValueN) + /// A Trasforms that groups values of a scalar column into a vector, by a contiguous group ID. + /// + /// + ///

This transform essentially performs the following SQL-like operation:

+ ///

SELECT GroupKey1, GroupKey2, ... GroupKeyK, LIST(Value1), LIST(Value2), ... LIST(ValueN) /// FROM Data - /// GROUP BY GroupKey1, GroupKey2, ... GroupKeyK. + /// GROUP BY GroupKey1, GroupKey2, ... GroupKeyK.

/// - /// It assumes that the group keys are contiguous (if a new group key sequence is encountered, the group is over). + ///

It assumes that the group keys are contiguous (if a new group key sequence is encountered, the group is over). /// The GroupKeyN and ValueN columns can be of any primitive types. The code requires that every raw type T of the group key column /// is an , which is currently true for all existing primitive types. - /// The produced ValueN columns will be variable-length vectors of the original value column types. + /// The produced ValueN columns will be variable-length vectors of the original value column types.

/// - /// The order of ValueN entries in the lists is preserved. + ///

The order of ValueN entries in the lists is preserved.

/// + /// /// Example: /// User Item /// Pete Book @@ -49,7 +54,8 @@ namespace Microsoft.ML.Runtime.Data /// Pete [Book] /// Tom [Table, Kitten] /// Pete [Chair, Cup] - ///
+ /// + /// public sealed class GroupTransform : TransformBase { public const string Summary = "Groups values of a scalar column into a vector, by a contiguous group ID"; diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index 2a02355613..589cc05c38 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -85,7 +85,7 @@ Transforms.ColumnConcatenator Concatenates one or more columns of the same item Transforms.ColumnCopier Duplicates columns from the dataset Microsoft.ML.Runtime.EntryPoints.SchemaManipulation CopyColumns Microsoft.ML.Transforms.CopyColumnsTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ColumnSelector Selects a set of columns, dropping all others Microsoft.ML.Runtime.EntryPoints.SchemaManipulation SelectColumns Microsoft.ML.Transforms.SelectColumnsTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ColumnTypeConverter Converts a column to a different type, using standard conversions. Microsoft.ML.Transforms.TypeConversion Convert Microsoft.ML.Transforms.ConvertTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput -Transforms.CombinerByContiguousGroupId Groups values of a scalar column into a vector, by a contiguous group ID Microsoft.ML.Runtime.Data.GroupingOperations Group Microsoft.ML.Runtime.Data.GroupTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput +Transforms.CombinerByContiguousGroupId Groups values of a scalar column into a vector, by a contiguous group ID Microsoft.ML.Transforms.GroupingOperations Group Microsoft.ML.Transforms.GroupTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ConditionalNormalizer Normalize the columns only if needed Microsoft.ML.Runtime.Data.Normalize IfNeeded Microsoft.ML.Transforms.Normalizers.NormalizeTransform+MinMaxArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MacroOutput`1[Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput] Transforms.DataCache Caches using the specified cache option. Microsoft.ML.Runtime.EntryPoints.Cache CacheData Microsoft.ML.Runtime.EntryPoints.Cache+CacheInput Microsoft.ML.Runtime.EntryPoints.Cache+CacheOutput Transforms.DatasetScorer Score a dataset with a predictor model Microsoft.ML.Runtime.EntryPoints.ScoreModel Score Microsoft.ML.Runtime.EntryPoints.ScoreModel+Input Microsoft.ML.Runtime.EntryPoints.ScoreModel+Output @@ -128,6 +128,7 @@ Transforms.RowSkipFilter Allows limiting input to a subset of rows by skipping a Transforms.RowTakeFilter Allows limiting input to a subset of rows by taking N first rows. Microsoft.ML.Runtime.EntryPoints.SelectRows TakeFilter Microsoft.ML.Transforms.SkipTakeFilter+TakeArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ScoreColumnSelector Selects only the last score columns and the extra columns specified in the arguments. Microsoft.ML.Runtime.EntryPoints.ScoreModel SelectColumns Microsoft.ML.Runtime.EntryPoints.ScoreModel+ScoreColumnSelectorInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.Scorer Turn the predictor model into a transform model Microsoft.ML.Runtime.EntryPoints.ScoreModel MakeScoringTransform Microsoft.ML.Runtime.EntryPoints.ScoreModel+ModelInput Microsoft.ML.Runtime.EntryPoints.ScoreModel+Output +Transforms.Segregator Un-groups vector columns into sequences of rows, inverse of Group transform Microsoft.ML.Transforms.GroupingOperations Ungroup Microsoft.ML.Transforms.UngroupTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.SentimentAnalyzer Uses a pretrained sentiment model to score input strings Microsoft.ML.Transforms.Text.TextAnalytics AnalyzeSentiment Microsoft.ML.Runtime.TextAnalytics.SentimentAnalyzingTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.TensorFlowScorer Transforms the data using the TensorFlow model. Microsoft.ML.Transforms.TensorFlowTransform TensorFlowScorer Microsoft.ML.Transforms.TensorFlowTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.TextFeaturizer A transform that turns a collection of text documents into numerical feature vectors. The feature vectors are normalized counts of (word and/or character) ngrams in a given tokenized text. Microsoft.ML.Transforms.Text.TextAnalytics TextTransform Microsoft.ML.Transforms.Text.TextFeaturizingEstimator+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 52c449f55d..3c174d996e 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -22449,6 +22449,70 @@ } ] }, + { + "Name": "Transforms.Segregator", + "Desc": "Un-groups vector columns into sequences of rows, inverse of Group transform", + "FriendlyName": "Un-group Transform", + "ShortName": "Ungroup", + "Inputs": [ + { + "Name": "Data", + "Type": "DataView", + "Desc": "Input dataset", + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + }, + { + "Name": "Column", + "Type": { + "Kind": "Array", + "ItemType": "String" + }, + "Desc": "Columns to unroll, or 'pivot'", + "Aliases": [ + "col" + ], + "Required": true, + "SortOrder": 150.0, + "IsNullable": false + }, + { + "Name": "Mode", + "Type": { + "Kind": "Enum", + "Values": [ + "Inner", + "Outer", + "First" + ] + }, + "Desc": "Specifies how to unroll multiple pivot columns of different size.", + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": "Inner" + } + ], + "Outputs": [ + { + "Name": "OutputData", + "Type": "DataView", + "Desc": "Transformed dataset" + }, + { + "Name": "Model", + "Type": "TransformModel", + "Desc": "Transform model" + } + ], + "InputKind": [ + "ITransformInput" + ], + "OutputKind": [ + "ITransformOutput" + ] + }, { "Name": "Transforms.SentimentAnalyzer", "Desc": "Uses a pretrained sentiment model to score input strings",