Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding another handful transform's extensions #1494

Merged
merged 4 commits into from
Nov 1, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions src/Microsoft.ML.Data/Transforms/CategoricalCatalog.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Transforms.Categorical;

namespace Microsoft.ML
{
/// <summary>
/// Extensions for the ValueToKeyMappingEstimator
/// </summary>
public static class ValueToKeyCatalog
{
/// <summary>
/// Initializes a new instance of <see cref="ValueToKeyMappingEstimator"/>.
/// </summary>
/// <param name="catalog">The categorical transform's catalog.</param>
/// <param name="inputColumn">Name of the column to be transformed.</param>
/// <param name="outputColumn">Name of the output column. If this is null '<paramref name="inputColumn"/>' will be used.</param>
/// <param name="maxNumTerms">Maximum number of keys to keep per column when auto-training.</param>
/// <param name="sort">How items should be ordered when vectorized. By default, they will be in the order encountered.
/// If by value items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param>
public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.CategoricalTransforms catalog,
string inputColumn,
string outputColumn = null,
int maxNumTerms = ValueToKeyMappingEstimator.Defaults.MaxNumTerms,
TermTransform.SortOrder sort = ValueToKeyMappingEstimator.Defaults.Sort)
=> new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, maxNumTerms, sort);

/// <summary>
/// Initializes a new instance of <see cref="ValueToKeyMappingEstimator"/> loading the terms to use from <paramref name="file"/>.
/// </summary>
/// <param name="catalog">The categorical transform's catalog.</param>
/// <param name="columns">The data columns to map to keys.</param>
/// <param name="file">The path of the file containing the terms.</param>
/// <param name="termsColumn"></param>
/// <param name="loaderFactory"></param>
public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.CategoricalTransforms catalog,
TermTransform.ColumnInfo[] columns,
string file = null,
string termsColumn = null,
IComponentFactory<IMultiStreamSource, IDataLoader> loaderFactory = null)
=> new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, file, termsColumn, loaderFactory);
}
}
16 changes: 8 additions & 8 deletions src/Microsoft.ML.Data/Transforms/ConversionsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public static class HashingEstimatorCatalog
/// <param name="outputColumn">Name of the column to be transformed. If this is null '<paramref name="inputColumn"/>' will be used.</param>
/// <param name="hashBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
/// <param name="invertHash">Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.</param>
public static HashingEstimator Hash(this TransformsCatalog.Conversions catalog, string inputColumn, string outputColumn = null,
public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, string inputColumn, string outputColumn = null,
singlis marked this conversation as resolved.
Show resolved Hide resolved
int hashBits = HashDefaults.HashBits, int invertHash = HashDefaults.InvertHash)
=> new HashingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, hashBits, invertHash);

Expand All @@ -33,7 +33,7 @@ public static HashingEstimator Hash(this TransformsCatalog.Conversions catalog,
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="columns">Description of dataset columns and how to process them.</param>
public static HashingEstimator Hash(this TransformsCatalog.Conversions catalog, params HashTransformer.ColumnInfo[] columns)
public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, params HashTransformer.ColumnInfo[] columns)
=> new HashingEstimator(CatalogUtils.GetEnvironment(catalog), columns);

/// <summary>
Expand All @@ -43,7 +43,7 @@ public static HashingEstimator Hash(this TransformsCatalog.Conversions catalog,
/// <param name="inputColumn">Name of the input column.</param>
/// <param name="outputColumn">Name of the column to be transformed. If this is null '<paramref name="inputColumn"/>' will be used.</param>
/// <param name="outputKind">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
public static ConvertingEstimator ConvertTo(this TransformsCatalog.Conversions catalog, string inputColumn, string outputColumn = null,
public static ConvertingEstimator ConvertTo(this TransformsCatalog.ConversionTransforms catalog, string inputColumn, string outputColumn = null,
DataKind outputKind = ConvertDefaults.DefaultOutputKind)
=> new ConvertingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, outputKind);

Expand All @@ -52,7 +52,7 @@ public static ConvertingEstimator ConvertTo(this TransformsCatalog.Conversions c
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="columns">Description of dataset columns and how to process them.</param>
public static ConvertingEstimator ConvertTo(this TransformsCatalog.Conversions catalog, params ConvertingTransform.ColumnInfo[] columns)
public static ConvertingEstimator ConvertTo(this TransformsCatalog.ConversionTransforms catalog, params ConvertingTransform.ColumnInfo[] columns)
=> new ConvertingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
}

Expand All @@ -63,7 +63,7 @@ public static class ToValueCatalog
/// </summary>
/// <param name="catalog">The categorical transform's catalog.</param>
/// <param name="inputColumn">Name of the input column.</param>
public static KeyToValueEstimator MapKeyToValue(this TransformsCatalog.Conversions catalog, string inputColumn)
public static KeyToValueEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, string inputColumn)
=> new KeyToValueEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn);

/// <summary>
Expand All @@ -72,7 +72,7 @@ public static KeyToValueEstimator MapKeyToValue(this TransformsCatalog.Conversio
/// </summary>
/// <param name="catalog">The categorical transform's catalog</param>
/// <param name="columns">The pairs of input and output columns.</param>
public static KeyToValueEstimator MapKeyToValue(this TransformsCatalog.Conversions catalog, params (string input, string output)[] columns)
public static KeyToValueEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, params (string input, string output)[] columns)
=> new KeyToValueEstimator(CatalogUtils.GetEnvironment(catalog), columns);
}

Expand All @@ -86,7 +86,7 @@ public static class ToVectorCatalog
/// </summary>
/// <param name="catalog">The categorical transform's catalog.</param>
/// <param name="columns">The input column to map back to vectors.</param>
public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.Conversions catalog,
public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog,
params KeyToVectorTransform.ColumnInfo[] columns)
=> new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns);

Expand All @@ -97,7 +97,7 @@ public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.
/// <param name="inputColumn">The name of the input column.</param>
/// <param name="outputColumn">The name of the output column.</param>
/// <param name="bag">Whether bagging is used for the conversion. </param>
public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.Conversions catalog,
public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog,
string inputColumn, string outputColumn = null, bool bag = KeyToVectorMappingEstimator.Defaults.Bag)
=> new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, bag);
}
Expand Down
12 changes: 6 additions & 6 deletions src/Microsoft.ML.Data/Transforms/TransformsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@ public sealed class TransformsCatalog
internal IHostEnvironment Environment { get; }

public CategoricalTransforms Categorical { get; }
public Conversions Conversion { get; }
public ConversionTransforms Conversion { get; }
public TextTransforms Text { get; }
public ProjectionTransforms Projections { get; }
public ProjectionTransforms Projection { get; }

internal TransformsCatalog(IHostEnvironment env)
{
Contracts.AssertValue(env);
Environment = env;

Categorical = new CategoricalTransforms(this);
Conversion = new Conversions(this);
Conversion = new ConversionTransforms(this);
Text = new TextTransforms(this);
Projections = new ProjectionTransforms(this);
Projection = new ProjectionTransforms(this);
}

public abstract class SubCatalogBase
Expand All @@ -52,9 +52,9 @@ internal CategoricalTransforms(TransformsCatalog owner) : base(owner)
/// <summary>
/// The catalog of rescaling operations.
/// </summary>
public sealed class Conversions : SubCatalogBase
public sealed class ConversionTransforms : SubCatalogBase
{
public Conversions(TransformsCatalog owner) : base(owner)
public ConversionTransforms(TransformsCatalog owner) : base(owner)
{
}
}
Expand Down
34 changes: 34 additions & 0 deletions src/Microsoft.ML.OnnxTransform/OnnxCatalog.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Transforms;

namespace Microsoft.ML
{
public static class OnnxCatalog
{
/// <summary>
/// Initializes a new instance of <see cref="OnnxScoringEstimator"/>.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="modelFile">The path of the file containing the ONNX model.</param>
/// <param name="inputColumn">The input column.</param>
/// <param name="outputColumn">The output column resulting from the transformation.</param>
public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog catalog,
string modelFile,
string inputColumn,
string outputColumn)
=> new OnnxScoringEstimator(CatalogUtils.GetEnvironment(catalog), modelFile, inputColumn, outputColumn);

/// <summary>
/// Initializes a new instance of <see cref="OnnxScoringEstimator"/>.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="transformer">The ONNX transformer.</param>
public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog catalog, OnnxTransform transformer)
=> new OnnxScoringEstimator(CatalogUtils.GetEnvironment(catalog), transformer);
}
}
40 changes: 40 additions & 0 deletions src/Microsoft.ML.PCA/PCACatalog.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Transforms.Projections;

namespace Microsoft.ML
{
public static class PcaCatalog
{

/// <summary>Initializes a new instance of <see cref="PrincipalComponentAnalysisEstimator"/>.</summary>
Copy link
Member

@singlis singlis Nov 1, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[](start = 101, length = 10)

Not needed. #Pending

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why? All the examples close the tag:

https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/xmldoc/xml-documentation-comments


In reply to: 230154186 [](ancestors = 230154186)

/// <param name="catalog">The transform's catalog.</param>
/// <param name="inputColumn">Input column to apply PrincipalComponentAnalysis on.</param>
/// <param name="outputColumn">Optional output column. Null means <paramref name="inputColumn"/> is replaced.</param>
/// <param name="weightColumn">The name of the weight column.</param>
/// <param name="rank">The number of principal components.</param>
/// <param name="overSampling">Oversampling parameter for randomized PrincipalComponentAnalysis training.</param>
/// <param name="center">If enabled, data is centered to be zero mean.</param>
/// <param name="seed">The seed for random number generation.</param>
public static PrincipalComponentAnalysisEstimator ProjectToPrincipalComponents(this TransformsCatalog.ProjectionTransforms catalog,
string inputColumn,
string outputColumn = null,
string weightColumn = PrincipalComponentAnalysisEstimator.Defaults.WeightColumn,
int rank = PrincipalComponentAnalysisEstimator.Defaults.Rank,
int overSampling = PrincipalComponentAnalysisEstimator.Defaults.Oversampling,
bool center = PrincipalComponentAnalysisEstimator.Defaults.Center,
int? seed = null)
=> new PrincipalComponentAnalysisEstimator(CatalogUtils.GetEnvironment(catalog),
inputColumn, outputColumn, weightColumn, rank, overSampling, center, seed);

/// <summary>Initializes a new instance of <see cref="PrincipalComponentAnalysisEstimator"/>.</summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="columns">Input columns to apply PrincipalComponentAnalysis on.</param>
public static PrincipalComponentAnalysisEstimator ProjectToPrincipalComponents(this TransformsCatalog.ProjectionTransforms catalog, params PcaTransform.ColumnInfo[] columns)
=> new PrincipalComponentAnalysisEstimator(CatalogUtils.GetEnvironment(catalog), columns);
}
}
25 changes: 25 additions & 0 deletions src/Microsoft.ML.Transforms/ExtensionsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,31 @@ public static MissingValueIndicatorEstimator IndicateMissingValues(this Transfor
=> new MissingValueIndicatorEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn);
}

public static class MissingValueReplacerCatalog
{
/// <summary>
/// Initializes a new instance of <see cref="MissingValueReplacingEstimator"/>
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="inputColumn">The name of the input column.</param>
/// <param name="outputColumn">The optional name of the output column,
/// If not provided, the <paramref name="inputColumn"/> will be replaced with the results of the transforms.</param>
singlis marked this conversation as resolved.
Show resolved Hide resolved
/// <param name="replacementKind">The type of replacement to use as specified in <see cref="NAReplaceTransform.ColumnInfo.ReplacementMode"/></param>
public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog,
string inputColumn,
string outputColumn = null,
NAReplaceTransform.ColumnInfo.ReplacementMode replacementKind = MissingValueReplacingEstimator.Defaults.ReplacementMode)
=> new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, replacementKind);
singlis marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>
/// Initializes a new instance of <see cref="MissingValueReplacingEstimator"/>
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="columns">The name of the columns to use, and per-column transformation configuraiton.</param>
public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, params NAReplaceTransform.ColumnInfo[] columns)
=> new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
}

/// <summary>
/// Extensions for KeyToVectorMappingEstimator.
/// </summary>
Expand Down
36 changes: 36 additions & 0 deletions src/Microsoft.ML.Transforms/ProjectionCatalog.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Transforms.Projections;

namespace Microsoft.ML
{
public static class ProjectionCatalog
{
/// <summary>
/// Initializes a new instance of <see cref="RandomFourierFeaturizingEstimator"/>.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="inputColumn">Name of the column to be transformed.</param>
/// <param name="outputColumn">Name of the output column. If this is null '<paramref name="inputColumn"/>' will be used.</param>
/// <param name="newDim">The number of random Fourier features to create.</param>
/// <param name="useSin">Create two features for every random Fourier frequency? (one for cos and one for sin).</param>
singlis marked this conversation as resolved.
Show resolved Hide resolved
public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this TransformsCatalog.ProjectionTransforms catalog,
string inputColumn,
string outputColumn = null,
int newDim = RandomFourierFeaturizingEstimator.Defaults.NewDim,
bool useSin = RandomFourierFeaturizingEstimator.Defaults.UseSin)
=> new RandomFourierFeaturizingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, newDim, useSin);

/// <summary>
/// Initializes a new instance of <see cref="RandomFourierFeaturizingEstimator"/>.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="columns">The input columns to use for the transformation.</param>
public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this TransformsCatalog.ProjectionTransforms catalog, params RffTransform.ColumnInfo[] columns)
=> new RandomFourierFeaturizingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
}
}