diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs
index c3bd9d604e..576b98cbee 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs
@@ -20,10 +20,10 @@ public static void Example()
// Create the Estimator pipeline. For simplicity, we will train a small tree with 4 leaves and 2 boosting iterations.
var pipeline = mlContext.Ranking.Trainers.LightGbm(
- numLeaves: 4,
- minDataPerLeaf: 10,
+ leafCount: 4,
+ minimumDataPerLeaf: 10,
learningRate: 0.1,
- numBoostRound: 2);
+ numberOfIterations: 2);
// Fit this Pipeline to the Training Data.
var model = pipeline.Fit(split.TrainSet);
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs
index dc898fb4d3..235d30e078 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs
@@ -23,10 +23,10 @@ public static void Example()
var pipeline = mlContext.Ranking.Trainers.LightGbm(
new Options
{
- NumLeaves = 4,
- MinDataPerLeaf = 10,
+ NumberOfLeaves = 4,
+ MinimumDataPerLeaf = 10,
LearningRate = 0.1,
- NumBoostRound = 2,
+ NumberOfIterations = 2,
Booster = new TreeBooster.Options
{
FeatureFraction = 0.9
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs
index ce9e27a0fc..5f43a78c25 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs
@@ -35,8 +35,8 @@ public static void Example()
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
.Append(mlContext.Regression.Trainers.LightGbm(
labelColumnName: labelName,
- numLeaves: 4,
- minDataPerLeaf: 6,
+ leafCount: 4,
+ minimumDataPerLeaf: 6,
learningRate: 0.001));
// Fit this pipeline to the training data.
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs
index e93eeb3f96..c2255554fb 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs
@@ -39,8 +39,8 @@ public static void Example()
.Append(mlContext.Regression.Trainers.LightGbm(new Options
{
LabelColumnName = labelName,
- NumLeaves = 4,
- MinDataPerLeaf = 6,
+ NumberOfLeaves = 4,
+ MinimumDataPerLeaf = 6,
LearningRate = 0.001,
Booster = new GossBooster.Options
{
diff --git a/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs b/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs
index 61225fe1e9..aa2f2e65fc 100644
--- a/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs
+++ b/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs
@@ -38,8 +38,8 @@ public static void LightGbmRegression()
.Append(r => (r.label, score: mlContext.Regression.Trainers.LightGbm(
r.label,
r.features,
- numLeaves: 4,
- minDataPerLeaf: 6,
+ numberOfLeaves: 4,
+ minimumDataPerLeaf: 6,
learningRate: 0.001,
onFit: p => pred = p)
)
diff --git a/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs b/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs
index 581bbc0b59..9fc5c33c95 100644
--- a/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs
+++ b/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs
@@ -21,10 +21,10 @@ public static class LightGbmStaticExtensions
/// The label column.
/// The features column.
/// The weights column.
- /// The number of leaves to use.
- /// Number of iterations.
- /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
+ /// The number of leaves to use.
+ /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
/// The learning rate.
+ /// Number of iterations.
/// A delegate that is called every time the
/// method is called on the
/// instance created out of this. This delegate will receive
@@ -39,19 +39,19 @@ public static class LightGbmStaticExtensions
///
public static Scalar LightGbm(this RegressionCatalog.RegressionTrainers catalog,
Scalar label, Vector features, Scalar weights = null,
- int? numLeaves = null,
- int? minDataPerLeaf = null,
+ int? numberOfLeaves = null,
+ int? minimumDataPerLeaf = null,
double? learningRate = null,
- int numBoostRound = Options.Defaults.NumBoostRound,
+ int numberOfIterations = Options.Defaults.NumberOfIterations,
Action onFit = null)
{
- CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit);
+ CheckUserValues(label, features, weights, numberOfLeaves, minimumDataPerLeaf, learningRate, numberOfIterations, onFit);
var rec = new TrainerEstimatorReconciler.Regression(
(env, labelName, featuresName, weightsName) =>
{
- var trainer = new LightGbmRegressorTrainer(env, labelName, featuresName, weightsName, numLeaves,
- minDataPerLeaf, learningRate, numBoostRound);
+ var trainer = new LightGbmRegressorTrainer(env, labelName, featuresName, weightsName, numberOfLeaves,
+ minimumDataPerLeaf, learningRate, numberOfIterations);
if (onFit != null)
return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
return trainer;
@@ -122,11 +122,13 @@ public static Scalar LightGbm(this RegressionCatalog.RegressionTrainers c
/// ]]>
///
public static (Scalar score, Scalar probability, Scalar predictedLabel) LightGbm(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
- Scalar label, Vector features, Scalar weights = null,
+ Scalar label,
+ Vector features,
+ Scalar weights = null,
int? numLeaves = null,
int? minDataPerLeaf = null,
double? learningRate = null,
- int numBoostRound = Options.Defaults.NumBoostRound,
+ int numBoostRound = Options.Defaults.NumberOfIterations,
Action> onFit = null)
{
CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit);
@@ -194,9 +196,9 @@ public static (Scalar score, Scalar probability, Scalar pred
/// The features column.
/// The groupId column.
/// The weights column.
- /// The number of leaves to use.
- /// Number of iterations.
- /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
+ /// The number of leaves to use.
+ /// Number of iterations.
+ /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
/// The learning rate.
/// A delegate that is called every time the
/// method is called on the
@@ -206,21 +208,24 @@ public static (Scalar score, Scalar probability, Scalar pred
/// The set of output columns including in order the predicted binary classification score (which will range
/// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.
public static Scalar LightGbm(this RankingCatalog.RankingTrainers catalog,
- Scalar label, Vector features, Key groupId, Scalar weights = null,
- int? numLeaves = null,
- int? minDataPerLeaf = null,
+ Scalar label,
+ Vector features,
+ Key groupId,
+ Scalar weights = null,
+ int? numberOfLeaves = null,
+ int? minimumDataPerLeaf = null,
double? learningRate = null,
- int numBoostRound = Options.Defaults.NumBoostRound,
+ int numberOfIterations = Options.Defaults.NumberOfIterations,
Action onFit = null)
{
- CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit);
+ CheckUserValues(label, features, weights, numberOfLeaves, minimumDataPerLeaf, learningRate, numberOfIterations, onFit);
Contracts.CheckValue(groupId, nameof(groupId));
var rec = new TrainerEstimatorReconciler.Ranker(
(env, labelName, featuresName, groupIdName, weightsName) =>
{
- var trainer = new LightGbmRankingTrainer(env, labelName, featuresName, groupIdName, weightsName, numLeaves,
- minDataPerLeaf, learningRate, numBoostRound);
+ var trainer = new LightGbmRankingTrainer(env, labelName, featuresName, groupIdName, weightsName, numberOfLeaves,
+ minimumDataPerLeaf, learningRate, numberOfIterations);
if (onFit != null)
return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
@@ -279,10 +284,10 @@ public static Scalar LightGbm(this RankingCatalog.RankingTrainers c
/// The label, or dependent variable.
/// The features, or independent variables.
/// The weights column.
- /// The number of leaves to use.
- /// Number of iterations.
- /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
+ /// The number of leaves to use.
+ /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
/// The learning rate.
+ /// Number of iterations.
/// A delegate that is called every time the
/// method is called on the
/// instance created out of this. This delegate will receive
@@ -301,19 +306,19 @@ public static (Vector score, Key predictedLabel)
Key label,
Vector features,
Scalar weights = null,
- int? numLeaves = null,
- int? minDataPerLeaf = null,
+ int? numberOfLeaves = null,
+ int? minimumDataPerLeaf = null,
double? learningRate = null,
- int numBoostRound = Options.Defaults.NumBoostRound,
+ int numberOfIterations = Options.Defaults.NumberOfIterations,
Action onFit = null)
{
- CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit);
+ CheckUserValues(label, features, weights, numberOfLeaves, minimumDataPerLeaf, learningRate, numberOfIterations, onFit);
var rec = new TrainerEstimatorReconciler.MulticlassClassifier(
(env, labelName, featuresName, weightsName) =>
{
- var trainer = new LightGbmMulticlassTrainer(env, labelName, featuresName, weightsName, numLeaves,
- minDataPerLeaf, learningRate, numBoostRound);
+ var trainer = new LightGbmMulticlassTrainer(env, labelName, featuresName, weightsName, numberOfLeaves,
+ minimumDataPerLeaf, learningRate, numberOfIterations);
if (onFit != null)
return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
diff --git a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs
index 0b635bfaed..047c19b88d 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs
@@ -66,14 +66,14 @@ internal virtual void UpdateParameters(Dictionary res)
if (attribute == null)
continue;
- res[GetArgName(field.Name)] = field.GetValue(BoosterParameterOptions);
+ res[GetOptionName(field.Name)] = field.GetValue(BoosterParameterOptions);
}
}
void IBoosterParameter.UpdateParameters(Dictionary res) => UpdateParameters(res);
}
- private static string GetArgName(string name)
+ private static string GetOptionName(string name)
{
StringBuilder strBuf = new StringBuilder();
bool first = true;
@@ -96,7 +96,7 @@ private static string GetArgName(string name)
[BestFriend]
internal static class Defaults
{
- public const int NumBoostRound = 100;
+ public const int NumberOfIterations = 100;
}
public sealed class TreeBooster : BoosterParameter
@@ -107,7 +107,7 @@ public sealed class TreeBooster : BoosterParameter
[TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Traditional Gradient Boosting Decision Tree.")]
public class Options : ISupportBoosterParameterFactory
{
- [Argument(ArgumentType.AtMostOnce, HelpText = "Use for binary classification when classes are not balanced.", ShortName = "us")]
+ [Argument(ArgumentType.AtMostOnce, HelpText = "Use for binary classification when training data is not balanced.", ShortName = "us")]
public bool UnbalancedSets = false;
[Argument(ArgumentType.AtMostOnce,
@@ -129,7 +129,7 @@ public class Options : ISupportBoosterParameterFactory
public double MinChildWeight = 0.1;
[Argument(ArgumentType.AtMostOnce,
- HelpText = "Subsample frequency. 0 means no subsample. "
+ HelpText = "Subsample frequency for bagging. 0 means no subsample. "
+ "If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.")]
[TlcModule.Range(Min = 0, Max = int.MaxValue)]
public int SubsampleFreq = 0;
@@ -179,7 +179,9 @@ internal TreeBooster(Options options)
Contracts.CheckUserArg(BoosterParameterOptions.MinChildWeight >= 0, nameof(BoosterParameterOptions.MinChildWeight), "must be >= 0.");
Contracts.CheckUserArg(BoosterParameterOptions.Subsample > 0 && BoosterParameterOptions.Subsample <= 1, nameof(BoosterParameterOptions.Subsample), "must be in (0,1].");
Contracts.CheckUserArg(BoosterParameterOptions.FeatureFraction > 0 && BoosterParameterOptions.FeatureFraction <= 1, nameof(BoosterParameterOptions.FeatureFraction), "must be in (0,1].");
- Contracts.CheckUserArg(BoosterParameterOptions.ScalePosWeight > 0 && BoosterParameterOptions.ScalePosWeight <= 1, nameof(BoosterParameterOptions.ScalePosWeight), "must be in (0,1].");
+ Contracts.CheckUserArg(BoosterParameterOptions.RegLambda >= 0, nameof(BoosterParameterOptions.RegLambda), "must be >= 0.");
+ Contracts.CheckUserArg(BoosterParameterOptions.RegAlpha >= 0, nameof(BoosterParameterOptions.RegAlpha), "must be >= 0.");
+ Contracts.CheckUserArg(BoosterParameterOptions.ScalePosWeight > 0, nameof(BoosterParameterOptions.ScalePosWeight), "must be >= 0.");
}
internal override void UpdateParameters(Dictionary res)
@@ -197,15 +199,15 @@ public sealed class DartBooster : BoosterParameter
[TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Dropouts meet Multiple Additive Regresion Trees. See https://arxiv.org/abs/1505.01866")]
public sealed class Options : TreeBooster.Options
{
- [Argument(ArgumentType.AtMostOnce, HelpText = "Drop ratio for trees. Range:(0,1).")]
+ [Argument(ArgumentType.AtMostOnce, HelpText = "The drop ratio for trees. Range:(0,1).")]
[TlcModule.Range(Inf = 0.0, Max = 1.0)]
public double DropRate = 0.1;
- [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of dropped tree in a boosting round.")]
+ [Argument(ArgumentType.AtMostOnce, HelpText = "Maximum number of dropped tree in a boosting round.")]
[TlcModule.Range(Inf = 0, Max = int.MaxValue)]
public int MaxDrop = 1;
- [Argument(ArgumentType.AtMostOnce, HelpText = "Probability for not perform dropping in a boosting round.")]
+ [Argument(ArgumentType.AtMostOnce, HelpText = "Probability for not dropping in a boosting round.")]
[TlcModule.Range(Inf = 0.0, Max = 1.0)]
public double SkipDrop = 0.5;
@@ -222,7 +224,6 @@ internal DartBooster(Options options)
: base(options)
{
Contracts.CheckUserArg(BoosterParameterOptions.DropRate > 0 && BoosterParameterOptions.DropRate < 1, nameof(BoosterParameterOptions.DropRate), "must be in (0,1).");
- Contracts.CheckUserArg(BoosterParameterOptions.MaxDrop > 0, nameof(BoosterParameterOptions.MaxDrop), "must be > 0.");
Contracts.CheckUserArg(BoosterParameterOptions.SkipDrop >= 0 && BoosterParameterOptions.SkipDrop < 1, nameof(BoosterParameterOptions.SkipDrop), "must be in [0,1).");
}
@@ -241,14 +242,11 @@ public sealed class GossBooster : BoosterParameter
[TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Gradient-based One-Side Sampling.")]
public sealed class Options : TreeBooster.Options
{
- [Argument(ArgumentType.AtMostOnce,
- HelpText = "Retain ratio for large gradient instances.")]
+ [Argument(ArgumentType.AtMostOnce, HelpText = "Retain ratio for large gradient instances.")]
[TlcModule.Range(Inf = 0.0, Max = 1.0)]
public double TopRate = 0.2;
- [Argument(ArgumentType.AtMostOnce,
- HelpText =
- "Retain ratio for small gradient instances.")]
+ [Argument(ArgumentType.AtMostOnce, HelpText = "Retain ratio for small gradient instances.")]
[TlcModule.Range(Inf = 0.0, Max = 1.0)]
public double OtherRate = 0.1;
@@ -287,7 +285,7 @@ public enum EvalMetricType
[Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations.", SortOrder = 1, ShortName = "iter")]
[TGUI(Label = "Number of boosting iterations", SuggestedSweeps = "10,20,50,100,150,200")]
[TlcModule.SweepableDiscreteParam("NumBoostRound", new object[] { 10, 20, 50, 100, 150, 200 })]
- public int NumBoostRound = Defaults.NumBoostRound;
+ public int NumberOfIterations = Defaults.NumberOfIterations;
[Argument(ArgumentType.AtMostOnce,
HelpText = "Shrinkage rate for trees, used to prevent over-fitting. Range: (0,1].",
@@ -300,37 +298,37 @@ public enum EvalMetricType
SortOrder = 2, ShortName = "nl", NullName = "")]
[TGUI(Description = "The maximum number of leaves per tree", SuggestedSweeps = "2-128;log;inc:4")]
[TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, isLogScale: true, stepSize: 4)]
- public int? NumLeaves;
+ public int? NumberOfLeaves;
[Argument(ArgumentType.AtMostOnce, HelpText = "Minimum number of instances needed in a child.",
SortOrder = 2, ShortName = "mil", NullName = "")]
[TGUI(Label = "Min Documents In Leaves", SuggestedSweeps = "1,10,20,50 ")]
[TlcModule.SweepableDiscreteParamAttribute("MinDataPerLeaf", new object[] { 1, 10, 20, 50 })]
- public int? MinDataPerLeaf;
+ public int? MinimumDataPerLeaf;
- [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of bucket bin for features.", ShortName = "mb")]
- public int MaxBin = 255;
+ [Argument(ArgumentType.AtMostOnce, HelpText = "Maximum number of bucket bin for features.", ShortName = "mb")]
+ public int MaximumBin = 255;
[Argument(ArgumentType.Multiple, HelpText = "Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function.", SortOrder = 3)]
public ISupportBoosterParameterFactory Booster = new TreeBooster.Options();
[Argument(ArgumentType.AtMostOnce, HelpText = "Verbose", ShortName = "v")]
- public bool VerboseEval = false;
+ public bool Verbose = false;
[Argument(ArgumentType.AtMostOnce, HelpText = "Printing running messages.")]
public bool Silent = true;
[Argument(ArgumentType.AtMostOnce, HelpText = "Number of parallel threads used to run LightGBM.", ShortName = "nt")]
- public int? NThread;
+ public int? NumberOfThreads;
[Argument(ArgumentType.AtMostOnce,
HelpText = "Evaluation metrics.",
ShortName = "em")]
- public EvalMetricType EvalMetric = EvalMetricType.DefaultMetric;
+ public EvalMetricType EvaluationMetric = EvalMetricType.DefaultMetric;
[Argument(ArgumentType.AtMostOnce, HelpText = "Use softmax loss for the multi classification.")]
[TlcModule.SweepableDiscreteParam("UseSoftmax", new object[] { true, false })]
- public bool? UseSoftmax;
+ public bool? UseSoftMaximum;
[Argument(ArgumentType.AtMostOnce, HelpText = "Rounds of early stopping, 0 will disable it.",
ShortName = "es")]
@@ -350,31 +348,31 @@ public enum EvalMetricType
[Argument(ArgumentType.AtMostOnce, HelpText = "Enable categorical split or not.", ShortName = "cat")]
[TlcModule.SweepableDiscreteParam("UseCat", new object[] { true, false })]
- public bool? UseCat;
+ public bool? UseCategoricalSplit;
- [Argument(ArgumentType.AtMostOnce, HelpText = "Enable missing value auto infer or not.")]
+ [Argument(ArgumentType.AtMostOnce, HelpText = "Enable special handling of missing value or not.")]
[TlcModule.SweepableDiscreteParam("UseMissing", new object[] { true, false })]
public bool UseMissing = false;
- [Argument(ArgumentType.AtMostOnce, HelpText = "Min number of instances per categorical group.", ShortName = "mdpg")]
+ [Argument(ArgumentType.AtMostOnce, HelpText = "Minimum number of instances per categorical group.", ShortName = "mdpg")]
[TlcModule.Range(Inf = 0, Max = int.MaxValue)]
[TlcModule.SweepableDiscreteParam("MinDataPerGroup", new object[] { 10, 50, 100, 200 })]
- public int MinDataPerGroup = 100;
+ public int MinimumDataPerGroup = 100;
[Argument(ArgumentType.AtMostOnce, HelpText = "Max number of categorical thresholds.", ShortName = "maxcat")]
[TlcModule.Range(Inf = 0, Max = int.MaxValue)]
[TlcModule.SweepableDiscreteParam("MaxCatThreshold", new object[] { 8, 16, 32, 64 })]
- public int MaxCatThreshold = 32;
+ public int MaximumCategoricalThreshold = 32;
[Argument(ArgumentType.AtMostOnce, HelpText = "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.")]
[TlcModule.Range(Min = 0.0)]
[TlcModule.SweepableDiscreteParam("CatSmooth", new object[] { 1, 10, 20 })]
- public double CatSmooth = 10;
+ public double CategoricalSmoothing = 10;
[Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization for categorical split.")]
[TlcModule.Range(Min = 0.0)]
[TlcModule.SweepableDiscreteParam("CatL2", new object[] { 0.1, 0.5, 1, 5, 10 })]
- public double CatL2 = 10;
+ public double L2Categorical = 10;
[Argument(ArgumentType.AtMostOnce, HelpText = "Sets the random seed for LightGBM to use.")]
public int? Seed;
@@ -385,23 +383,23 @@ public enum EvalMetricType
internal Dictionary ToDictionary(IHost host)
{
Contracts.CheckValue(host, nameof(host));
- Contracts.CheckUserArg(MaxBin > 0, nameof(MaxBin), "must be > 0.");
+ Contracts.CheckUserArg(MaximumBin > 0, nameof(MaximumBin), "must be > 0.");
Contracts.CheckUserArg(Sigmoid > 0, nameof(Sigmoid), "must be > 0.");
Dictionary res = new Dictionary();
var boosterParams = Booster.CreateComponent(host);
boosterParams.UpdateParameters(res);
- res[GetArgName(nameof(MaxBin))] = MaxBin;
+ res["max_bin"] = MaximumBin;
res["verbose"] = Silent ? "-1" : "1";
- if (NThread.HasValue)
- res["nthread"] = NThread.Value;
+ if (NumberOfThreads.HasValue)
+ res["nthread"] = NumberOfThreads.Value;
res["seed"] = (Seed.HasValue) ? Seed : host.Rand.Next();
string metric = null;
- switch (EvalMetric)
+ switch (EvaluationMetric)
{
case EvalMetricType.DefaultMetric:
break;
@@ -424,18 +422,18 @@ internal Dictionary ToDictionary(IHost host)
case EvalMetricType.Auc:
case EvalMetricType.Ndcg:
case EvalMetricType.Map:
- metric = EvalMetric.ToString().ToLower();
+ metric = EvaluationMetric.ToString().ToLower();
break;
}
if (!string.IsNullOrEmpty(metric))
res["metric"] = metric;
res["sigmoid"] = Sigmoid;
res["label_gain"] = CustomGains;
- res[GetArgName(nameof(UseMissing))] = UseMissing;
- res[GetArgName(nameof(MinDataPerGroup))] = MinDataPerGroup;
- res[GetArgName(nameof(MaxCatThreshold))] = MaxCatThreshold;
- res[GetArgName(nameof(CatSmooth))] = CatSmooth;
- res[GetArgName(nameof(CatL2))] = CatL2;
+ res["use_missing"] = UseMissing;
+ res["min_data_per_group"] = MinimumDataPerGroup;
+ res["max_cat_threshold"] = MaximumCategoricalThreshold;
+ res["cat_smooth"] = CategoricalSmoothing;
+ res["cat_l2"] = L2Categorical;
return res;
}
}
diff --git a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs
index 5df67201f8..21693a6882 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs
@@ -102,22 +102,22 @@ internal LightGbmBinaryTrainer(IHostEnvironment env, Options options)
/// Initializes a new instance of
///
/// The private instance of .
- /// The name of The label column.
- /// The name of the feature column.
+ /// The name of The label column.
+ /// The name of the feature column.
/// The name for the column containing the initial weight.
- /// The number of leaves to use.
- /// Number of iterations.
- /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
+ /// The number of leaves to use.
+ /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
/// The learning rate.
+ /// Number of iterations.
internal LightGbmBinaryTrainer(IHostEnvironment env,
- string labelColumn = DefaultColumnNames.Label,
- string featureColumn = DefaultColumnNames.Features,
+ string labelColumnName = DefaultColumnNames.Label,
+ string featureColumnName = DefaultColumnNames.Features,
string weights = null,
- int? numLeaves = null,
- int? minDataPerLeaf = null,
+ int? leafCount = null,
+ int? minimumDataPerLeaf = null,
double? learningRate = null,
- int numBoostRound = LightGBM.Options.Defaults.NumBoostRound)
- : base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound)
+ int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations)
+ : base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumnName), featureColumnName, weights, null, leafCount, minimumDataPerLeaf, learningRate, numberOfIterations)
{
}
@@ -138,7 +138,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data)
if (!(labelType is BooleanDataViewType || labelType is KeyType || labelType == NumberDataViewType.Single))
{
throw ch.ExceptParam(nameof(data),
- $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType}', but must be key, boolean or R4.");
+ $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType.RawType}', but must be unsigned int, boolean or float.");
}
}
diff --git a/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs b/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs
index 3e38fa248a..03988ad45f 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs
@@ -20,10 +20,10 @@ public static class LightGbmExtensions
/// The name of the label column.
/// The name of the feature column.
/// The name of the example weight column (optional).
- /// The number of leaves to use.
- /// Number of iterations.
- /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
+ /// The number of leaves to use.
+ /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
/// The learning rate.
+ /// The number of iterations to use.
///
///
///
@@ -72,10 +72,10 @@ public static LightGbmRegressorTrainer LightGbm(this RegressionCatalog.Regressio
/// The name of the label column.
/// The name of the feature column.
/// The name of the example weight column (optional).
- /// The number of leaves to use.
- /// Number of iterations.
- /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
+ /// The number of leaves to use.
+ /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
/// The learning rate.
+ /// The number of iterations to use.
///
///
///
@@ -125,23 +125,23 @@ public static LightGbmBinaryTrainer LightGbm(this BinaryClassificationCatalog.Bi
/// The name of the feature column.
/// The name of the group column.
/// The name of the example weight column (optional).
- /// The number of leaves to use.
- /// Number of iterations.
- /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
+ /// The number of leaves to use.
+ /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
/// The learning rate.
+ /// The number of iterations to use.
public static LightGbmRankingTrainer LightGbm(this RankingCatalog.RankingTrainers catalog,
string labelColumnName = DefaultColumnNames.Label,
string featureColumnName = DefaultColumnNames.Features,
string rowGroupColumnName = DefaultColumnNames.GroupId,
string exampleWeightColumnName = null,
- int? numLeaves = null,
- int? minDataPerLeaf = null,
+ int? leafCount = null,
+ int? minimumDataPerLeaf = null,
double? learningRate = null,
- int numBoostRound = Options.Defaults.NumBoostRound)
+ int numberOfIterations = Options.Defaults.NumberOfIterations)
{
Contracts.CheckValue(catalog, nameof(catalog));
var env = CatalogUtils.GetEnvironment(catalog);
- return new LightGbmRankingTrainer(env, labelColumnName, featureColumnName, rowGroupColumnName, exampleWeightColumnName, numLeaves, minDataPerLeaf, learningRate, numBoostRound);
+ return new LightGbmRankingTrainer(env, labelColumnName, featureColumnName, rowGroupColumnName, exampleWeightColumnName, leafCount, minimumDataPerLeaf, learningRate, numberOfIterations);
}
///
@@ -164,10 +164,10 @@ public static LightGbmRankingTrainer LightGbm(this RankingCatalog.RankingTrainer
/// The name of the label column.
/// The name of the feature column.
/// The name of the example weight column (optional).
- /// The number of leaves to use.
- /// Number of iterations.
- /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
+ /// The number of leaves to use.
+ /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
/// The learning rate.
+ /// The number of iterations to use.
///
///
///
diff --git a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs
index 67e709c5c6..4ddfd845e4 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs
@@ -43,22 +43,22 @@ internal LightGbmMulticlassTrainer(IHostEnvironment env, Options options)
/// Initializes a new instance of
///
/// The private instance of .
- /// The name of The label column.
- /// The name of the feature column.
+ /// The name of The label column.
+ /// The name of the feature column.
/// The name for the column containing the initial weight.
/// The number of leaves to use.
- /// Number of iterations.
/// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
/// The learning rate.
+ /// The number of iterations to use.
internal LightGbmMulticlassTrainer(IHostEnvironment env,
- string labelColumn = DefaultColumnNames.Label,
- string featureColumn = DefaultColumnNames.Features,
+ string labelColumnName = DefaultColumnNames.Label,
+ string featureColumnName = DefaultColumnNames.Features,
string weights = null,
int? numLeaves = null,
int? minDataPerLeaf = null,
double? learningRate = null,
- int numBoostRound = LightGBM.Options.Defaults.NumBoostRound)
- : base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(labelColumn), featureColumn, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound)
+ int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations)
+ : base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(labelColumnName), featureColumnName, weights, null, numLeaves, minDataPerLeaf, learningRate, numberOfIterations)
{
_numClass = -1;
}
@@ -110,7 +110,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data)
if (!(labelType is BooleanDataViewType || labelType is KeyType || labelType == NumberDataViewType.Single))
{
throw ch.ExceptParam(nameof(data),
- $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType}', but must be key, boolean or R4.");
+ $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType.RawType}', but must be of unsigned int, boolean or float.");
}
}
@@ -132,9 +132,9 @@ private protected override void ConvertNaNLabels(IChannel ch, RoleMappedData dat
maxLabel = Math.Max(maxLabel, labelColumn);
}
}
- ch.CheckParam(minLabel >= 0, nameof(data), "min labelColumn cannot be negative");
+ ch.CheckParam(minLabel >= 0, nameof(data), "Minimum value in label column cannot be negative");
if (maxLabel >= _maxNumClass)
- throw ch.ExceptParam(nameof(data), $"max labelColumn cannot exceed {_maxNumClass}");
+ throw ch.ExceptParam(nameof(data), $"Maximum value {maxLabel} in label column exceeds {_maxNumClass}");
if (data.Schema.Label.Value.Type is KeyType keyType)
{
@@ -163,16 +163,16 @@ protected override void GetDefaultParameters(IChannel ch, int numRow, bool hasCa
{
base.GetDefaultParameters(ch, numRow, hasCategorical, totalCats, true);
int numLeaves = (int)Options["num_leaves"];
- int minDataPerLeaf = LightGbmTrainerOptions.MinDataPerLeaf ?? DefaultMinDataPerLeaf(numRow, numLeaves, _numClass);
+ int minDataPerLeaf = LightGbmTrainerOptions.MinimumDataPerLeaf ?? DefaultMinDataPerLeaf(numRow, numLeaves, _numClass);
Options["min_data_per_leaf"] = minDataPerLeaf;
if (!hiddenMsg)
{
if (!LightGbmTrainerOptions.LearningRate.HasValue)
ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.LearningRate) + " = " + Options["learning_rate"]);
- if (!LightGbmTrainerOptions.NumLeaves.HasValue)
- ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.NumLeaves) + " = " + numLeaves);
- if (!LightGbmTrainerOptions.MinDataPerLeaf.HasValue)
- ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.MinDataPerLeaf) + " = " + minDataPerLeaf);
+ if (!LightGbmTrainerOptions.NumberOfLeaves.HasValue)
+ ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.NumberOfLeaves) + " = " + numLeaves);
+ if (!LightGbmTrainerOptions.MinimumDataPerLeaf.HasValue)
+ ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.MinimumDataPerLeaf) + " = " + minDataPerLeaf);
}
}
@@ -184,14 +184,14 @@ private protected override void CheckAndUpdateParametersBeforeTraining(IChannel
Options["num_class"] = _numClass;
bool useSoftmax = false;
- if (LightGbmTrainerOptions.UseSoftmax.HasValue)
- useSoftmax = LightGbmTrainerOptions.UseSoftmax.Value;
+ if (LightGbmTrainerOptions.UseSoftMaximum.HasValue)
+ useSoftmax = LightGbmTrainerOptions.UseSoftMaximum.Value;
else
{
if (labels.Length >= _minDataToUseSoftmax)
useSoftmax = true;
- ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.UseSoftmax) + " = " + useSoftmax);
+ ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.UseSoftMaximum) + " = " + useSoftmax);
}
if (useSoftmax)
diff --git a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs
index 482a04f0d9..d9870205a3 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs
@@ -22,7 +22,6 @@
namespace Microsoft.ML.LightGBM
{
-
public sealed class LightGbmRankingModelParameters : TreeEnsembleModelParametersBasedOnRegressionTree
{
internal const string LoaderSignature = "LightGBMRankerExec";
@@ -89,26 +88,28 @@ internal LightGbmRankingTrainer(IHostEnvironment env, Options options)
/// Initializes a new instance of
///
/// The private instance of .
- /// The name of the label column.
- /// The name of the feature column.
- /// The name of the column containing the group ID.
- /// The name of the optional column containing the initial weights.
- /// The number of leaves to use.
- /// Number of iterations.
- /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
+ /// The name of the label column.
+ /// The name of the feature column.
+ /// The name of the column containing the group ID.
+ /// The name of the optional column containing the initial weights.
+ /// The number of leaves to use.
/// The learning rate.
+ /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
+ /// The number of iterations to use.
internal LightGbmRankingTrainer(IHostEnvironment env,
- string labelColumn = DefaultColumnNames.Label,
- string featureColumn = DefaultColumnNames.Features,
- string groupId = DefaultColumnNames.GroupId,
- string weights = null,
- int? numLeaves = null,
- int? minDataPerLeaf = null,
+ string labelColumnName = DefaultColumnNames.Label,
+ string featureColumnName = DefaultColumnNames.Features,
+ string groupIdColumnName = DefaultColumnNames.GroupId,
+ string weightsColumnName = null,
+ int? leafCount = null,
+ int? minimumDataPerLeaf = null,
double? learningRate = null,
- int numBoostRound = LightGBM.Options.Defaults.NumBoostRound)
- : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumn), featureColumn, weights, groupId, numLeaves, minDataPerLeaf, learningRate, numBoostRound)
+ int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations)
+ : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumnName),
+ featureColumnName, weightsColumnName, groupIdColumnName, leafCount,
+ minimumDataPerLeaf, learningRate, numberOfIterations)
{
- Host.CheckNonEmpty(groupId, nameof(groupId));
+ Host.CheckNonEmpty(groupIdColumnName, nameof(groupIdColumnName));
}
private protected override void CheckDataValid(IChannel ch, RoleMappedData data)
@@ -121,7 +122,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data)
if (!(labelType is KeyType || labelType == NumberDataViewType.Single))
{
throw ch.ExceptParam(nameof(data),
- $"Label column '{labelCol.Name}' is of type '{labelType}', but must be key or R4.");
+ $"Label column '{labelCol.Name}' is of type '{labelType.RawType}', but must be unsigned int or float.");
}
// Check group types.
ch.CheckParam(data.Schema.Group.HasValue, nameof(data), "Need a group column.");
@@ -130,7 +131,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data)
if (!(groupType == NumberDataViewType.UInt32 || groupType is KeyType))
{
throw ch.ExceptParam(nameof(data),
- $"Group column '{groupCol.Name}' is of type '{groupType}', but must be U4 or a Key.");
+ $"Group column '{groupCol.Name}' is of type '{groupType.RawType}', but must be unsigned int.");
}
}
@@ -139,7 +140,7 @@ private protected override void CheckLabelCompatible(SchemaShape.Column labelCol
Contracts.Assert(labelCol.IsValid);
Action error =
- () => throw Host.ExceptSchemaMismatch(nameof(labelCol), "label", labelCol.Name, "float or KeyType", labelCol.GetTypeString());
+ () => throw Host.ExceptSchemaMismatch(nameof(labelCol), "label", labelCol.Name, "float or unsigned int", labelCol.GetTypeString());
if (labelCol.Kind != SchemaShape.Column.VectorKind.Scalar)
error();
diff --git a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs
index 3729154cbd..f93e2126c6 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs
@@ -87,22 +87,22 @@ public sealed class LightGbmRegressorTrainer : LightGbmTrainerBase
///
/// The private instance of .
- /// The name of the label column.
- /// The name of the feature column.
- /// The name for the column containing the initial weight.
- /// The number of leaves to use.
- /// Number of iterations.
- /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
+ /// The name of the label column.
+ /// The name of the feature column.
+ /// The name for the column containing the initial weight.
+ /// The number of leaves to use.
+ /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.
/// The learning rate.
+ /// Number of iterations.
internal LightGbmRegressorTrainer(IHostEnvironment env,
- string labelColumn = DefaultColumnNames.Label,
- string featureColumn = DefaultColumnNames.Features,
- string weights = null,
- int? numLeaves = null,
- int? minDataPerLeaf = null,
+ string labelColumnName = DefaultColumnNames.Label,
+ string featureColumnName = DefaultColumnNames.Features,
+ string weightsColumnName = null,
+ int? numberOfLeaves = null,
+ int? minimumDataPerLeaf = null,
double? learningRate = null,
- int numBoostRound = LightGBM.Options.Defaults.NumBoostRound)
- : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumn), featureColumn, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound)
+ int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations)
+ : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumnName), featureColumnName, weightsColumnName, null, numberOfLeaves, minimumDataPerLeaf, learningRate, numberOfIterations)
{
}
@@ -127,7 +127,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data)
if (!(labelType is BooleanDataViewType || labelType is KeyType || labelType == NumberDataViewType.Single))
{
throw ch.ExceptParam(nameof(data),
- $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType}', but must be key, boolean or R4.");
+ $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType.RawType}', but must be an unsigned int, boolean or float.");
}
}
diff --git a/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs b/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs
index 614251c117..7ae79c09fc 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs
@@ -58,27 +58,28 @@ private sealed class CategoricalMetaData
private protected LightGbmTrainerBase(IHostEnvironment env,
string name,
- SchemaShape.Column label,
- string featureColumn,
- string weightColumn,
- string groupIdColumn,
- int? numLeaves,
- int? minDataPerLeaf,
+ SchemaShape.Column labelColumn,
+ string featureColumnName,
+ string weightColumnName,
+ string groupIdColumnName,
+ int? leafCount,
+ int? minimumDataPerLeaf,
double? learningRate,
- int numBoostRound)
- : base(Contracts.CheckRef(env, nameof(env)).Register(name), TrainerUtils.MakeR4VecFeature(featureColumn), label, TrainerUtils.MakeR4ScalarWeightColumn(weightColumn), TrainerUtils.MakeU4ScalarColumn(groupIdColumn))
+ int numberOfIterations)
+ : base(Contracts.CheckRef(env, nameof(env)).Register(name), TrainerUtils.MakeR4VecFeature(featureColumnName),
+ labelColumn, TrainerUtils.MakeR4ScalarWeightColumn(weightColumnName), TrainerUtils.MakeU4ScalarColumn(groupIdColumnName))
{
LightGbmTrainerOptions = new Options();
- LightGbmTrainerOptions.NumLeaves = numLeaves;
- LightGbmTrainerOptions.MinDataPerLeaf = minDataPerLeaf;
+ LightGbmTrainerOptions.NumberOfLeaves = leafCount;
+ LightGbmTrainerOptions.MinimumDataPerLeaf = minimumDataPerLeaf;
LightGbmTrainerOptions.LearningRate = learningRate;
- LightGbmTrainerOptions.NumBoostRound = numBoostRound;
+ LightGbmTrainerOptions.NumberOfIterations = numberOfIterations;
- LightGbmTrainerOptions.LabelColumnName = label.Name;
- LightGbmTrainerOptions.FeatureColumnName = featureColumn;
- LightGbmTrainerOptions.ExampleWeightColumnName = weightColumn;
- LightGbmTrainerOptions.RowGroupColumnName = groupIdColumn;
+ LightGbmTrainerOptions.LabelColumnName = labelColumn.Name;
+ LightGbmTrainerOptions.FeatureColumnName = featureColumnName;
+ LightGbmTrainerOptions.ExampleWeightColumnName = weightColumnName;
+ LightGbmTrainerOptions.RowGroupColumnName = groupIdColumnName;
InitParallelTraining();
}
@@ -167,8 +168,8 @@ private protected virtual void CheckDataValid(IChannel ch, RoleMappedData data)
protected virtual void GetDefaultParameters(IChannel ch, int numRow, bool hasCategarical, int totalCats, bool hiddenMsg = false)
{
double learningRate = LightGbmTrainerOptions.LearningRate ?? DefaultLearningRate(numRow, hasCategarical, totalCats);
- int numLeaves = LightGbmTrainerOptions.NumLeaves ?? DefaultNumLeaves(numRow, hasCategarical, totalCats);
- int minDataPerLeaf = LightGbmTrainerOptions.MinDataPerLeaf ?? DefaultMinDataPerLeaf(numRow, numLeaves, 1);
+ int numLeaves = LightGbmTrainerOptions.NumberOfLeaves ?? DefaultNumLeaves(numRow, hasCategarical, totalCats);
+ int minDataPerLeaf = LightGbmTrainerOptions.MinimumDataPerLeaf ?? DefaultMinDataPerLeaf(numRow, numLeaves, 1);
Options["learning_rate"] = learningRate;
Options["num_leaves"] = numLeaves;
Options["min_data_per_leaf"] = minDataPerLeaf;
@@ -176,10 +177,10 @@ protected virtual void GetDefaultParameters(IChannel ch, int numRow, bool hasCat
{
if (!LightGbmTrainerOptions.LearningRate.HasValue)
ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.LearningRate) + " = " + learningRate);
- if (!LightGbmTrainerOptions.NumLeaves.HasValue)
- ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.NumLeaves) + " = " + numLeaves);
- if (!LightGbmTrainerOptions.MinDataPerLeaf.HasValue)
- ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.MinDataPerLeaf) + " = " + minDataPerLeaf);
+ if (!LightGbmTrainerOptions.NumberOfLeaves.HasValue)
+ ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.NumberOfLeaves) + " = " + numLeaves);
+ if (!LightGbmTrainerOptions.MinimumDataPerLeaf.HasValue)
+ ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.MinimumDataPerLeaf) + " = " + minDataPerLeaf);
}
}
@@ -274,9 +275,9 @@ private CategoricalMetaData GetCategoricalMetaData(IChannel ch, RoleMappedData t
int[] categoricalFeatures = null;
const int useCatThreshold = 50000;
// Disable cat when data is too small, reduce the overfitting.
- bool useCat = LightGbmTrainerOptions.UseCat ?? numRow > useCatThreshold;
- if (!LightGbmTrainerOptions.UseCat.HasValue)
- ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.UseCat) + " = " + useCat);
+ bool useCat = LightGbmTrainerOptions.UseCategoricalSplit ?? numRow > useCatThreshold;
+ if (!LightGbmTrainerOptions.UseCategoricalSplit.HasValue)
+ ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.UseCategoricalSplit) + " = " + useCat);
if (useCat)
{
var featureCol = trainData.Schema.Schema[DefaultColumnNames.Features];
@@ -369,8 +370,8 @@ private void TrainCore(IChannel ch, IProgressChannel pch, Dataset dtrain, Catego
{
ch.Info("LightGBM objective={0}", Options["objective"]);
using (Booster bst = WrappedLightGbmTraining.Train(ch, pch, Options, dtrain,
- dvalid: dvalid, numIteration: LightGbmTrainerOptions.NumBoostRound,
- verboseEval: LightGbmTrainerOptions.VerboseEval, earlyStoppingRound: LightGbmTrainerOptions.EarlyStoppingRound))
+ dvalid: dvalid, numIteration: LightGbmTrainerOptions.NumberOfIterations,
+ verboseEval: LightGbmTrainerOptions.Verbose, earlyStoppingRound: LightGbmTrainerOptions.EarlyStoppingRound))
{
TrainedEnsemble = bst.GetModel(catMetaData.CategoricalBoudaries);
}
diff --git a/src/Microsoft.ML.LightGBM/doc.xml b/src/Microsoft.ML.LightGBM/doc.xml
index 1fcd38dd7a..dfa4ccfad7 100644
--- a/src/Microsoft.ML.LightGBM/doc.xml
+++ b/src/Microsoft.ML.LightGBM/doc.xml
@@ -16,10 +16,10 @@
new LightGbmBinaryClassifier
{
- NumBoostRound = 200,
+ NumberOfIterations = 200,
LearningRate = 0.5f,
- NumLeaves = 32,
- MinDataPerLeaf = 20
+ NumberOfLeaves = 32,
+ MinimumDataPerLeaf = 20
}
@@ -29,10 +29,10 @@
new LightGbmClassifier
{
- NumBoostRound = 200,
+ NumberOfIterations = 200,
LearningRate = 0.5f,
- NumLeaves = 32,
- MinDataPerLeaf = 20
+ NumberOfLeaves = 32,
+ MinimumDataPerLeaf = 20
}
@@ -42,10 +42,10 @@
new LightGbmRegressor
{
- NumBoostRound = 100,
+ NumberOfIterations = 100,
LearningRate = 0.5f,
- NumLeaves = 32,
- MinDataPerLeaf = 20,
+ NumberOfLeaves = 32,
+ MinimumDataPerLeaf = 20,
Booster = new DartBoosterParameterFunction
{
XgboostDartMode = true,
@@ -60,10 +60,10 @@
new LightGbmRanker
{
- NumBoostRound = 100,
+ NumberOfIterations = 100,
LearningRate = 0.5f,
- NumLeaves = 32,
- MinDataPerLeaf = 20,
+ NumberOfLeaves = 32,
+ MinimumDataPerLeaf = 20,
Booster = new GbdtBoosterParameterFunction
{
MinSplitGain = 3,
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
index c91fae0244..3fe119d98a 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
@@ -121,15 +121,15 @@ internal AveragedPerceptronTrainer(IHostEnvironment env, Options options)
///
/// The local instance of the
/// The classification loss function.
- /// The name of the label column.
- /// The name of the feature column.
+ /// The name of the label column.
+ /// The name of the feature column.
/// The learning rate.
/// Whether to decrease learning rate as iterations progress.
/// L2 Regularization Weight.
/// The number of training iterations.
internal AveragedPerceptronTrainer(IHostEnvironment env,
- string labelColumn = DefaultColumnNames.Label,
- string featureColumn = DefaultColumnNames.Features,
+ string labelColumnName = DefaultColumnNames.Label,
+ string featureColumnName = DefaultColumnNames.Features,
IClassificationLoss lossFunction = null,
float learningRate = Options.AveragedDefault.LearningRate,
bool decreaseLearningRate = Options.AveragedDefault.DecreaseLearningRate,
@@ -137,8 +137,8 @@ internal AveragedPerceptronTrainer(IHostEnvironment env,
int numIterations = Options.AveragedDefault.NumIterations)
: this(env, new Options
{
- LabelColumnName = labelColumn,
- FeatureColumnName = featureColumn,
+ LabelColumnName = labelColumnName,
+ FeatureColumnName = featureColumnName,
LearningRate = learningRate,
DecreaseLearningRate = decreaseLearningRate,
L2RegularizerWeight = l2RegularizerWeight,
diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
index d3e0180dbd..8654c4c032 100644
--- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json
+++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
@@ -11129,7 +11129,7 @@
"ShortName": "LightGBM",
"Inputs": [
{
- "Name": "NumBoostRound",
+ "Name": "NumberOfIterations",
"Type": "Int",
"Desc": "Number of iterations.",
"Aliases": [
@@ -11181,7 +11181,7 @@
}
},
{
- "Name": "NumLeaves",
+ "Name": "NumberOfLeaves",
"Type": "Int",
"Desc": "Maximum leaves for trees.",
"Aliases": [
@@ -11200,7 +11200,7 @@
}
},
{
- "Name": "MinDataPerLeaf",
+ "Name": "MinimumDataPerLeaf",
"Type": "Int",
"Desc": "Minimum number of instances needed in a child.",
"Aliases": [
@@ -11322,9 +11322,9 @@
"Default": "Auto"
},
{
- "Name": "MaxBin",
+ "Name": "MaximumBin",
"Type": "Int",
- "Desc": "Max number of bucket bin for features.",
+ "Desc": "Maximum number of bucket bin for features.",
"Aliases": [
"mb"
],
@@ -11334,7 +11334,7 @@
"Default": 255
},
{
- "Name": "VerboseEval",
+ "Name": "Verbose",
"Type": "Bool",
"Desc": "Verbose",
"Aliases": [
@@ -11355,7 +11355,7 @@
"Default": true
},
{
- "Name": "NThread",
+ "Name": "NumberOfThreads",
"Type": "Int",
"Desc": "Number of parallel threads used to run LightGBM.",
"Aliases": [
@@ -11367,7 +11367,7 @@
"Default": null
},
{
- "Name": "EvalMetric",
+ "Name": "EvaluationMetric",
"Type": {
"Kind": "Enum",
"Values": [
@@ -11393,7 +11393,7 @@
"Default": "DefaultMetric"
},
{
- "Name": "UseSoftmax",
+ "Name": "UseSoftMaximum",
"Type": "Bool",
"Desc": "Use softmax loss for the multi classification.",
"Required": false,
@@ -11454,7 +11454,7 @@
"Default": 1048576
},
{
- "Name": "UseCat",
+ "Name": "UseCategoricalSplit",
"Type": "Bool",
"Desc": "Enable categorical split or not.",
"Aliases": [
@@ -11475,7 +11475,7 @@
{
"Name": "UseMissing",
"Type": "Bool",
- "Desc": "Enable missing value auto infer or not.",
+ "Desc": "Enable special handling of missing value or not.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
@@ -11489,9 +11489,9 @@
}
},
{
- "Name": "MinDataPerGroup",
+ "Name": "MinimumDataPerGroup",
"Type": "Int",
- "Desc": "Min number of instances per categorical group.",
+ "Desc": "Minimum number of instances per categorical group.",
"Aliases": [
"mdpg"
],
@@ -11514,7 +11514,7 @@
}
},
{
- "Name": "MaxCatThreshold",
+ "Name": "MaximumCategoricalThreshold",
"Type": "Int",
"Desc": "Max number of categorical thresholds.",
"Aliases": [
@@ -11539,7 +11539,7 @@
}
},
{
- "Name": "CatSmooth",
+ "Name": "CategoricalSmoothing",
"Type": "Float",
"Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.",
"Required": false,
@@ -11559,7 +11559,7 @@
}
},
{
- "Name": "CatL2",
+ "Name": "L2Categorical",
"Type": "Float",
"Desc": "L2 Regularization for categorical split.",
"Required": false,
@@ -11632,7 +11632,7 @@
"ShortName": "LightGBMMC",
"Inputs": [
{
- "Name": "NumBoostRound",
+ "Name": "NumberOfIterations",
"Type": "Int",
"Desc": "Number of iterations.",
"Aliases": [
@@ -11684,7 +11684,7 @@
}
},
{
- "Name": "NumLeaves",
+ "Name": "NumberOfLeaves",
"Type": "Int",
"Desc": "Maximum leaves for trees.",
"Aliases": [
@@ -11703,7 +11703,7 @@
}
},
{
- "Name": "MinDataPerLeaf",
+ "Name": "MinimumDataPerLeaf",
"Type": "Int",
"Desc": "Minimum number of instances needed in a child.",
"Aliases": [
@@ -11825,9 +11825,9 @@
"Default": "Auto"
},
{
- "Name": "MaxBin",
+ "Name": "MaximumBin",
"Type": "Int",
- "Desc": "Max number of bucket bin for features.",
+ "Desc": "Maximum number of bucket bin for features.",
"Aliases": [
"mb"
],
@@ -11837,7 +11837,7 @@
"Default": 255
},
{
- "Name": "VerboseEval",
+ "Name": "Verbose",
"Type": "Bool",
"Desc": "Verbose",
"Aliases": [
@@ -11858,7 +11858,7 @@
"Default": true
},
{
- "Name": "NThread",
+ "Name": "NumberOfThreads",
"Type": "Int",
"Desc": "Number of parallel threads used to run LightGBM.",
"Aliases": [
@@ -11870,7 +11870,7 @@
"Default": null
},
{
- "Name": "EvalMetric",
+ "Name": "EvaluationMetric",
"Type": {
"Kind": "Enum",
"Values": [
@@ -11896,7 +11896,7 @@
"Default": "DefaultMetric"
},
{
- "Name": "UseSoftmax",
+ "Name": "UseSoftMaximum",
"Type": "Bool",
"Desc": "Use softmax loss for the multi classification.",
"Required": false,
@@ -11957,7 +11957,7 @@
"Default": 1048576
},
{
- "Name": "UseCat",
+ "Name": "UseCategoricalSplit",
"Type": "Bool",
"Desc": "Enable categorical split or not.",
"Aliases": [
@@ -11978,7 +11978,7 @@
{
"Name": "UseMissing",
"Type": "Bool",
- "Desc": "Enable missing value auto infer or not.",
+ "Desc": "Enable special handling of missing value or not.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
@@ -11992,9 +11992,9 @@
}
},
{
- "Name": "MinDataPerGroup",
+ "Name": "MinimumDataPerGroup",
"Type": "Int",
- "Desc": "Min number of instances per categorical group.",
+ "Desc": "Minimum number of instances per categorical group.",
"Aliases": [
"mdpg"
],
@@ -12017,7 +12017,7 @@
}
},
{
- "Name": "MaxCatThreshold",
+ "Name": "MaximumCategoricalThreshold",
"Type": "Int",
"Desc": "Max number of categorical thresholds.",
"Aliases": [
@@ -12042,7 +12042,7 @@
}
},
{
- "Name": "CatSmooth",
+ "Name": "CategoricalSmoothing",
"Type": "Float",
"Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.",
"Required": false,
@@ -12062,7 +12062,7 @@
}
},
{
- "Name": "CatL2",
+ "Name": "L2Categorical",
"Type": "Float",
"Desc": "L2 Regularization for categorical split.",
"Required": false,
@@ -12135,7 +12135,7 @@
"ShortName": "LightGBMRank",
"Inputs": [
{
- "Name": "NumBoostRound",
+ "Name": "NumberOfIterations",
"Type": "Int",
"Desc": "Number of iterations.",
"Aliases": [
@@ -12187,7 +12187,7 @@
}
},
{
- "Name": "NumLeaves",
+ "Name": "NumberOfLeaves",
"Type": "Int",
"Desc": "Maximum leaves for trees.",
"Aliases": [
@@ -12206,7 +12206,7 @@
}
},
{
- "Name": "MinDataPerLeaf",
+ "Name": "MinimumDataPerLeaf",
"Type": "Int",
"Desc": "Minimum number of instances needed in a child.",
"Aliases": [
@@ -12328,9 +12328,9 @@
"Default": "Auto"
},
{
- "Name": "MaxBin",
+ "Name": "MaximumBin",
"Type": "Int",
- "Desc": "Max number of bucket bin for features.",
+ "Desc": "Maximum number of bucket bin for features.",
"Aliases": [
"mb"
],
@@ -12340,7 +12340,7 @@
"Default": 255
},
{
- "Name": "VerboseEval",
+ "Name": "Verbose",
"Type": "Bool",
"Desc": "Verbose",
"Aliases": [
@@ -12361,7 +12361,7 @@
"Default": true
},
{
- "Name": "NThread",
+ "Name": "NumberOfThreads",
"Type": "Int",
"Desc": "Number of parallel threads used to run LightGBM.",
"Aliases": [
@@ -12373,7 +12373,7 @@
"Default": null
},
{
- "Name": "EvalMetric",
+ "Name": "EvaluationMetric",
"Type": {
"Kind": "Enum",
"Values": [
@@ -12399,7 +12399,7 @@
"Default": "DefaultMetric"
},
{
- "Name": "UseSoftmax",
+ "Name": "UseSoftMaximum",
"Type": "Bool",
"Desc": "Use softmax loss for the multi classification.",
"Required": false,
@@ -12460,7 +12460,7 @@
"Default": 1048576
},
{
- "Name": "UseCat",
+ "Name": "UseCategoricalSplit",
"Type": "Bool",
"Desc": "Enable categorical split or not.",
"Aliases": [
@@ -12481,7 +12481,7 @@
{
"Name": "UseMissing",
"Type": "Bool",
- "Desc": "Enable missing value auto infer or not.",
+ "Desc": "Enable special handling of missing value or not.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
@@ -12495,9 +12495,9 @@
}
},
{
- "Name": "MinDataPerGroup",
+ "Name": "MinimumDataPerGroup",
"Type": "Int",
- "Desc": "Min number of instances per categorical group.",
+ "Desc": "Minimum number of instances per categorical group.",
"Aliases": [
"mdpg"
],
@@ -12520,7 +12520,7 @@
}
},
{
- "Name": "MaxCatThreshold",
+ "Name": "MaximumCategoricalThreshold",
"Type": "Int",
"Desc": "Max number of categorical thresholds.",
"Aliases": [
@@ -12545,7 +12545,7 @@
}
},
{
- "Name": "CatSmooth",
+ "Name": "CategoricalSmoothing",
"Type": "Float",
"Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.",
"Required": false,
@@ -12565,7 +12565,7 @@
}
},
{
- "Name": "CatL2",
+ "Name": "L2Categorical",
"Type": "Float",
"Desc": "L2 Regularization for categorical split.",
"Required": false,
@@ -12638,7 +12638,7 @@
"ShortName": "LightGBMR",
"Inputs": [
{
- "Name": "NumBoostRound",
+ "Name": "NumberOfIterations",
"Type": "Int",
"Desc": "Number of iterations.",
"Aliases": [
@@ -12690,7 +12690,7 @@
}
},
{
- "Name": "NumLeaves",
+ "Name": "NumberOfLeaves",
"Type": "Int",
"Desc": "Maximum leaves for trees.",
"Aliases": [
@@ -12709,7 +12709,7 @@
}
},
{
- "Name": "MinDataPerLeaf",
+ "Name": "MinimumDataPerLeaf",
"Type": "Int",
"Desc": "Minimum number of instances needed in a child.",
"Aliases": [
@@ -12831,9 +12831,9 @@
"Default": "Auto"
},
{
- "Name": "MaxBin",
+ "Name": "MaximumBin",
"Type": "Int",
- "Desc": "Max number of bucket bin for features.",
+ "Desc": "Maximum number of bucket bin for features.",
"Aliases": [
"mb"
],
@@ -12843,7 +12843,7 @@
"Default": 255
},
{
- "Name": "VerboseEval",
+ "Name": "Verbose",
"Type": "Bool",
"Desc": "Verbose",
"Aliases": [
@@ -12864,7 +12864,7 @@
"Default": true
},
{
- "Name": "NThread",
+ "Name": "NumberOfThreads",
"Type": "Int",
"Desc": "Number of parallel threads used to run LightGBM.",
"Aliases": [
@@ -12876,7 +12876,7 @@
"Default": null
},
{
- "Name": "EvalMetric",
+ "Name": "EvaluationMetric",
"Type": {
"Kind": "Enum",
"Values": [
@@ -12902,7 +12902,7 @@
"Default": "DefaultMetric"
},
{
- "Name": "UseSoftmax",
+ "Name": "UseSoftMaximum",
"Type": "Bool",
"Desc": "Use softmax loss for the multi classification.",
"Required": false,
@@ -12963,7 +12963,7 @@
"Default": 1048576
},
{
- "Name": "UseCat",
+ "Name": "UseCategoricalSplit",
"Type": "Bool",
"Desc": "Enable categorical split or not.",
"Aliases": [
@@ -12984,7 +12984,7 @@
{
"Name": "UseMissing",
"Type": "Bool",
- "Desc": "Enable missing value auto infer or not.",
+ "Desc": "Enable special handling of missing value or not.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
@@ -12998,9 +12998,9 @@
}
},
{
- "Name": "MinDataPerGroup",
+ "Name": "MinimumDataPerGroup",
"Type": "Int",
- "Desc": "Min number of instances per categorical group.",
+ "Desc": "Minimum number of instances per categorical group.",
"Aliases": [
"mdpg"
],
@@ -13023,7 +13023,7 @@
}
},
{
- "Name": "MaxCatThreshold",
+ "Name": "MaximumCategoricalThreshold",
"Type": "Int",
"Desc": "Max number of categorical thresholds.",
"Aliases": [
@@ -13048,7 +13048,7 @@
}
},
{
- "Name": "CatSmooth",
+ "Name": "CategoricalSmoothing",
"Type": "Float",
"Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.",
"Required": false,
@@ -13068,7 +13068,7 @@
}
},
{
- "Name": "CatL2",
+ "Name": "L2Categorical",
"Type": "Float",
"Desc": "L2 Regularization for categorical split.",
"Required": false,
@@ -23538,7 +23538,7 @@
{
"Name": "DropRate",
"Type": "Float",
- "Desc": "Drop ratio for trees. Range:(0,1).",
+ "Desc": "The drop ratio for trees. Range:(0,1).",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
@@ -23551,7 +23551,7 @@
{
"Name": "MaxDrop",
"Type": "Int",
- "Desc": "Max number of dropped tree in a boosting round.",
+ "Desc": "Maximum number of dropped tree in a boosting round.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
@@ -23564,7 +23564,7 @@
{
"Name": "SkipDrop",
"Type": "Float",
- "Desc": "Probability for not perform dropping in a boosting round.",
+ "Desc": "Probability for not dropping in a boosting round.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
@@ -23595,7 +23595,7 @@
{
"Name": "UnbalancedSets",
"Type": "Bool",
- "Desc": "Use for binary classification when classes are not balanced.",
+ "Desc": "Use for binary classification when training data is not balanced.",
"Aliases": [
"us"
],
@@ -23644,7 +23644,7 @@
{
"Name": "SubsampleFreq",
"Type": "Int",
- "Desc": "Subsample frequency. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.",
+ "Desc": "Subsample frequency for bagging. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
@@ -23748,7 +23748,7 @@
{
"Name": "UnbalancedSets",
"Type": "Bool",
- "Desc": "Use for binary classification when classes are not balanced.",
+ "Desc": "Use for binary classification when training data is not balanced.",
"Aliases": [
"us"
],
@@ -23797,7 +23797,7 @@
{
"Name": "SubsampleFreq",
"Type": "Int",
- "Desc": "Subsample frequency. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.",
+ "Desc": "Subsample frequency for bagging. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
@@ -23927,7 +23927,7 @@
{
"Name": "UnbalancedSets",
"Type": "Bool",
- "Desc": "Use for binary classification when classes are not balanced.",
+ "Desc": "Use for binary classification when training data is not balanced.",
"Aliases": [
"us"
],
@@ -23976,7 +23976,7 @@
{
"Name": "SubsampleFreq",
"Type": "Int",
- "Desc": "Subsample frequency. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.",
+ "Desc": "Subsample frequency for bagging. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
diff --git a/test/BaselineOutput/Common/LightGBM/LightGBMDart-CV-breast-cancer.dart-out.txt b/test/BaselineOutput/Common/LightGBM/LightGBMDart-CV-breast-cancer.dart-out.txt
index 44635aa70a..e79b7a61e7 100644
--- a/test/BaselineOutput/Common/LightGBM/LightGBMDart-CV-breast-cancer.dart-out.txt
+++ b/test/BaselineOutput/Common/LightGBM/LightGBMDart-CV-breast-cancer.dart-out.txt
@@ -1,10 +1,10 @@
maml.exe CV tr=LightGBM{nt=1 iter=10 booster=dart lr=0.2 mil=10 nl=20} threads=- cache=- dout=%Output% loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} data=%Data% seed=1
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=binary
Not training a calibrator because it is not needed.
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=binary
Not training a calibrator because it is not needed.
TEST POSITIVE RATIO: 0.3702 (134.0/(134.0+228.0))
diff --git a/test/BaselineOutput/Common/LightGBM/LightGBMDart-TrainTest-breast-cancer.dart-out.txt b/test/BaselineOutput/Common/LightGBM/LightGBMDart-TrainTest-breast-cancer.dart-out.txt
index 232f6326d4..bfa0bf3f97 100644
--- a/test/BaselineOutput/Common/LightGBM/LightGBMDart-TrainTest-breast-cancer.dart-out.txt
+++ b/test/BaselineOutput/Common/LightGBM/LightGBMDart-TrainTest-breast-cancer.dart-out.txt
@@ -1,6 +1,6 @@
maml.exe TrainTest test=%Data% tr=LightGBM{nt=1 iter=10 booster=dart lr=0.2 mil=10 nl=20} cache=- dout=%Output% loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} data=%Data% out=%Output% seed=1
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=binary
Not training a calibrator because it is not needed.
TEST POSITIVE RATIO: 0.3448 (241.0/(241.0+458.0))
diff --git a/test/BaselineOutput/Common/LightGBM/LightGBMGoss-CV-breast-cancer.goss-out.txt b/test/BaselineOutput/Common/LightGBM/LightGBMGoss-CV-breast-cancer.goss-out.txt
index 8bd89002c1..a331a81b7e 100644
--- a/test/BaselineOutput/Common/LightGBM/LightGBMGoss-CV-breast-cancer.goss-out.txt
+++ b/test/BaselineOutput/Common/LightGBM/LightGBMGoss-CV-breast-cancer.goss-out.txt
@@ -1,10 +1,10 @@
maml.exe CV tr=LightGBM{nt=1 iter=10 v=+ booster=goss lr=0.2 mil=10 nl=20} threads=- cache=- dout=%Output% loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} data=%Data% seed=1
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=binary
Not training a calibrator because it is not needed.
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=binary
Not training a calibrator because it is not needed.
TEST POSITIVE RATIO: 0.3702 (134.0/(134.0+228.0))
diff --git a/test/BaselineOutput/Common/LightGBM/LightGBMGoss-TrainTest-breast-cancer.goss-out.txt b/test/BaselineOutput/Common/LightGBM/LightGBMGoss-TrainTest-breast-cancer.goss-out.txt
index ba50420a1d..d249f34e1b 100644
--- a/test/BaselineOutput/Common/LightGBM/LightGBMGoss-TrainTest-breast-cancer.goss-out.txt
+++ b/test/BaselineOutput/Common/LightGBM/LightGBMGoss-TrainTest-breast-cancer.goss-out.txt
@@ -1,6 +1,6 @@
maml.exe TrainTest test=%Data% tr=LightGBM{nt=1 iter=10 v=+ booster=goss lr=0.2 mil=10 nl=20} cache=- dout=%Output% loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} data=%Data% out=%Output% seed=1
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=binary
Not training a calibrator because it is not needed.
TEST POSITIVE RATIO: 0.3448 (241.0/(241.0+458.0))
diff --git a/test/BaselineOutput/Common/LightGBMBinary/LightGBM-TrainTest-breast-cancer-out.txt b/test/BaselineOutput/Common/LightGBMBinary/LightGBM-TrainTest-breast-cancer-out.txt
index 2496917e04..391a8665ce 100644
--- a/test/BaselineOutput/Common/LightGBMBinary/LightGBM-TrainTest-breast-cancer-out.txt
+++ b/test/BaselineOutput/Common/LightGBMBinary/LightGBM-TrainTest-breast-cancer-out.txt
@@ -1,6 +1,6 @@
maml.exe TrainTest test=%Data% tr=LightGBMBinary{nt=1 nl=5 mil=5 lr=0.25 iter=20 mb=255} cache=- dout=%Output% loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} data=%Data% out=%Output% seed=1
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=binary
Not training a calibrator because it is not needed.
TEST POSITIVE RATIO: 0.3448 (241.0/(241.0+458.0))
diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt
index dcd0b08107..a9a5af5543 100644
--- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt
+++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt
@@ -1,12 +1,12 @@
maml.exe CV tr=LightGBMMC{nt=1 iter=10 v=- lr=0.2 mil=10 nl=20} threads=- dout=%Output% loader=Text{col=Label:TX:0 col=Features:1-*} data=%Data% seed=1 xf=Term{col=Label}
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
-Auto-tuning parameters: UseSoftmax = False
+Auto-tuning parameters: UseCategoricalSplit = False
+Auto-tuning parameters: UseSoftMaximum = False
LightGBM objective=multiclassova
Not training a calibrator because it is not needed.
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
-Auto-tuning parameters: UseSoftmax = False
+Auto-tuning parameters: UseCategoricalSplit = False
+Auto-tuning parameters: UseSoftMaximum = False
LightGBM objective=multiclassova
Not training a calibrator because it is not needed.
diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt
index db69b4b0d8..9958fa93b8 100644
--- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt
+++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt
@@ -1,12 +1,12 @@
maml.exe CV tr=LightGBMMC{nt=1 iter=10 v=- lr=0.2 mil=10 nl=20} threads=- dout=%Output% loader=Text{col=Label:U4[0-2]:0 col=Features:1-4} data=%Data% seed=1
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
-Auto-tuning parameters: UseSoftmax = False
+Auto-tuning parameters: UseCategoricalSplit = False
+Auto-tuning parameters: UseSoftMaximum = False
LightGBM objective=multiclassova
Not training a calibrator because it is not needed.
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
-Auto-tuning parameters: UseSoftmax = False
+Auto-tuning parameters: UseCategoricalSplit = False
+Auto-tuning parameters: UseSoftMaximum = False
LightGBM objective=multiclassova
Not training a calibrator because it is not needed.
diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt
index 1c4cb95912..a92727951e 100644
--- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt
+++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt
@@ -1,7 +1,7 @@
maml.exe TrainTest test=%Data% tr=LightGBMMC{nt=1 iter=10 v=- lr=0.2 mil=10 nl=20} dout=%Output% loader=Text{col=Label:TX:0 col=Features:1-*} data=%Data% out=%Output% seed=1 xf=Term{col=Label}
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
-Auto-tuning parameters: UseSoftmax = False
+Auto-tuning parameters: UseCategoricalSplit = False
+Auto-tuning parameters: UseSoftMaximum = False
LightGBM objective=multiclassova
Not training a calibrator because it is not needed.
diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt
index 1de8c3d919..e0001f3d38 100644
--- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt
+++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt
@@ -1,7 +1,7 @@
maml.exe TrainTest test=%Data% tr=LightGBMMC{nt=1 iter=10 v=- lr=0.2 mil=10 nl=20} dout=%Output% loader=Text{col=Label:U4[0-2]:0 col=Features:1-4} data=%Data% out=%Output% seed=1
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
-Auto-tuning parameters: UseSoftmax = False
+Auto-tuning parameters: UseCategoricalSplit = False
+Auto-tuning parameters: UseSoftMaximum = False
LightGBM objective=multiclassova
Not training a calibrator because it is not needed.
diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMReg-CV-generatedRegressionDataset-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMReg-CV-generatedRegressionDataset-out.txt
index afa867d488..1fc6084997 100644
--- a/test/BaselineOutput/Common/LightGBMR/LightGBMReg-CV-generatedRegressionDataset-out.txt
+++ b/test/BaselineOutput/Common/LightGBMR/LightGBMReg-CV-generatedRegressionDataset-out.txt
@@ -1,10 +1,10 @@
maml.exe CV tr=LightGBMR{nt=1 iter=50 v=+ booster=gbdt{l1=0.2 l2=0.2} lr=0.2 mil=10 nl=20} threads=- dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% seed=1
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=regression
Not training a calibrator because it is not needed.
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=regression
Not training a calibrator because it is not needed.
L1(avg): 27.477977
diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMReg-TrainTest-generatedRegressionDataset-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMReg-TrainTest-generatedRegressionDataset-out.txt
index f15a4bb020..909d9f0012 100644
--- a/test/BaselineOutput/Common/LightGBMR/LightGBMReg-TrainTest-generatedRegressionDataset-out.txt
+++ b/test/BaselineOutput/Common/LightGBMR/LightGBMReg-TrainTest-generatedRegressionDataset-out.txt
@@ -1,6 +1,6 @@
maml.exe TrainTest test=%Data% tr=LightGBMR{nt=1 iter=50 v=+ booster=gbdt{l1=0.2 l2=0.2} lr=0.2 mil=10 nl=20} dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% out=%Output% seed=1
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=regression
Not training a calibrator because it is not needed.
L1(avg): 3.472291
diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-CV-generatedRegressionDataset.MAE-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-CV-generatedRegressionDataset.MAE-out.txt
index c2530555e1..4550a80d3c 100644
--- a/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-CV-generatedRegressionDataset.MAE-out.txt
+++ b/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-CV-generatedRegressionDataset.MAE-out.txt
@@ -1,10 +1,10 @@
maml.exe CV tr=LightGBMR{nt=1 iter=50 em=mae v=+ lr=0.2 mil=10 nl=20} threads=- dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% seed=1
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=regression
Not training a calibrator because it is not needed.
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=regression
Not training a calibrator because it is not needed.
L1(avg): 27.482854
diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-TrainTest-generatedRegressionDataset.MAE-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-TrainTest-generatedRegressionDataset.MAE-out.txt
index aaad5d20e5..59d2ceaa05 100644
--- a/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-TrainTest-generatedRegressionDataset.MAE-out.txt
+++ b/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-TrainTest-generatedRegressionDataset.MAE-out.txt
@@ -1,6 +1,6 @@
maml.exe TrainTest test=%Data% tr=LightGBMR{nt=1 iter=50 em=mae v=+ lr=0.2 mil=10 nl=20} dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% out=%Output% seed=1
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=regression
Not training a calibrator because it is not needed.
L1(avg): 3.428896
diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-CV-generatedRegressionDataset.RMSE-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-CV-generatedRegressionDataset.RMSE-out.txt
index 483c724038..71d131bb5a 100644
--- a/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-CV-generatedRegressionDataset.RMSE-out.txt
+++ b/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-CV-generatedRegressionDataset.RMSE-out.txt
@@ -1,10 +1,10 @@
maml.exe CV tr=LightGBMR{nt=1 iter=50 em=rmse v=+ lr=0.2 mil=10 nl=20} threads=- dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% seed=1
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=regression
Not training a calibrator because it is not needed.
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=regression
Not training a calibrator because it is not needed.
L1(avg): 27.482854
diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-TrainTest-generatedRegressionDataset.RMSE-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-TrainTest-generatedRegressionDataset.RMSE-out.txt
index 1ed592dd87..c919475347 100644
--- a/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-TrainTest-generatedRegressionDataset.RMSE-out.txt
+++ b/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-TrainTest-generatedRegressionDataset.RMSE-out.txt
@@ -1,6 +1,6 @@
maml.exe TrainTest test=%Data% tr=LightGBMR{nt=1 iter=50 em=rmse v=+ lr=0.2 mil=10 nl=20} dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% out=%Output% seed=1
Not adding a normalizer.
-Auto-tuning parameters: UseCat = False
+Auto-tuning parameters: UseCategoricalSplit = False
LightGBM objective=regression
Not training a calibrator because it is not needed.
L1(avg): 3.428896
diff --git a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
index 731b013189..cc2257fe71 100644
--- a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
+++ b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
@@ -777,8 +777,8 @@ public void TestMultiClassEnsembleCombiner()
LightGbm.TrainMultiClass(Env, new Options
{
FeatureColumnName = "Features",
- NumBoostRound = 5,
- NumLeaves = 4,
+ NumberOfIterations = 5,
+ NumberOfLeaves = 4,
LabelColumnName = DefaultColumnNames.Label,
TrainingData = dataView
}).PredictorModel,
diff --git a/test/Microsoft.ML.StaticPipelineTesting/Training.cs b/test/Microsoft.ML.StaticPipelineTesting/Training.cs
index 3daaa1f3c2..cfdd9851d6 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/Training.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/Training.cs
@@ -576,8 +576,8 @@ public void LightGbmRegression()
var est = reader.MakeNewEstimator()
.Append(r => (r.label, score: catalog.Trainers.LightGbm(r.label, r.features,
- numBoostRound: 10,
- numLeaves: 5,
+ numberOfIterations: 10,
+ numberOfLeaves: 5,
onFit: (p) => { pred = p; })));
var pipe = reader.Append(est);
diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs
index 492a71a501..0983dbef3d 100644
--- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs
+++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs
@@ -345,7 +345,7 @@ public void LightGbmBinaryClassificationOnnxConversionTest()
var dynamicPipeline =
mlContext.Transforms.Normalize("FeatureVector")
.AppendCacheCheckpoint(mlContext)
- .Append(mlContext.Regression.Trainers.LightGbm(labelColumnName: "Target", featureColumnName: "FeatureVector", numBoostRound: 3, numLeaves: 16, minDataPerLeaf: 100));
+ .Append(mlContext.Regression.Trainers.LightGbm(labelColumnName: "Target", featureColumnName: "FeatureVector", numberOfIterations: 3, leafCount: 16, minimumDataPerLeaf: 100));
var model = dynamicPipeline.Fit(data);
// Step 2: Convert ML.NET model to ONNX format and save it as a file.
diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
index 8e6823739c..c495ae0428 100644
--- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
+++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
@@ -685,8 +685,8 @@ private void ExecuteTFTransformMNISTConvTrainingTest(bool shuffle, int? shuffleS
LabelColumnName = "Label",
FeatureColumnName = "Features",
Seed = 1,
- NThread = 1,
- NumBoostRound = 1
+ NumberOfThreads = 1,
+ NumberOfIterations = 1
}));
var trainedModel = pipe.Fit(preprocessedTrainData);
diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs b/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs
index 444db573da..db58372e6e 100644
--- a/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs
+++ b/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs
@@ -50,9 +50,9 @@ public void LightGBMBinaryEstimator()
var trainer = ML.BinaryClassification.Trainers.LightGbm(new Options
{
- NumLeaves = 10,
- NThread = 1,
- MinDataPerLeaf = 2,
+ NumberOfLeaves = 10,
+ NumberOfThreads = 1,
+ MinimumDataPerLeaf = 2,
});
var pipeWithTrainer = pipe.Append(trainer);
@@ -169,9 +169,9 @@ public void LightGBMRegressorEstimator()
var dataView = GetRegressionPipeline();
var trainer = ML.Regression.Trainers.LightGbm(new Options
{
- NThread = 1,
+ NumberOfThreads = 1,
NormalizeFeatures = NormalizeOption.Warn,
- CatL2 = 5,
+ L2Categorical = 5,
});
TestEstimatorCore(trainer, dataView);
@@ -295,10 +295,10 @@ private void LightGbmHelper(bool useSoftmax, out string modelString, out List