Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AutoML] CLI telemetry rev #3789

Merged
merged 11 commits into from
Jun 17, 2019
18 changes: 18 additions & 0 deletions src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,24 @@ public static IEnumerable<string> GetColumnNames(ColumnInformation columnInforma
AddStringsToListIfNotNull(columnNames, columnInformation.TextColumnNames);
return columnNames;
}

public static IDictionary<ColumnPurpose, int> CountColumnsByPurpose(ColumnInformation columnInformation)
{
var result = new Dictionary<ColumnPurpose, int>();
var columnNames = GetColumnNames(columnInformation);
foreach (var columnName in columnNames)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks to me this can be a more terse implementation using linq grouping. are we anti-linq in this repo? :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I love LINQ! Here, I think the existing way is more readable, but I see where you're coming from. Style is so idiosyncratic

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is my version, you think this isn't that readable? Fair warning, I haven't tested if it works :)

        var columnNames = ColumnInformationUtil.GetColumnNames(this.ColumnInformation);
        var columnPurposes = columnNames.Select(c => (Name: c, Purpose: ColumnInformation.GetColumnPurpose(c))).Where(c => c.Purpose != null);
        var purposeCounts = columnPurposes.GroupBy(c => c.Purpose).ToDictionary(c => c.Key, c => c.Count());

{
var purpose = columnInformation.GetColumnPurpose(columnName);
if (purpose == null)
{
continue;
}

result.TryGetValue(purpose.Value, out int count);
result[purpose.Value] = ++count;
}
return result;
}

private static void AddStringsToListIfNotNull(List<string> list, IEnumerable<string> strings)
{
Expand Down
29 changes: 29 additions & 0 deletions src/Microsoft.ML.AutoML/TrainerExtensions/SweepableParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ private static IEnumerable<SweepableParam> BuildLbfgsArgsParams()
};
}

/// <summary>
/// The names of every hyperparameter swept across all trainers.
/// </summary>
public static ISet<string> AllHyperparameterNames = GetAllSweepableParameterNames();
Dmitry-A marked this conversation as resolved.
Show resolved Hide resolved

public static IEnumerable<SweepableParam> BuildAveragePerceptronParams()
{
return BuildAveragedLinearArgsParams().Concat(BuildOnlineLinearArgsParams());
Expand Down Expand Up @@ -172,5 +177,29 @@ public static IEnumerable<SweepableParam> BuildSymSgdLogisticRegressionParams()
new SweepableDiscreteParam("UpdateFrequency", new object[] { "<Auto>", 5, 20 })
};
}

/// <summary>
/// Gets the name of every hyperparameter swept across all trainers.
/// </summary>
public static ISet<string> GetAllSweepableParameterNames()
{
var sweepableParams = new List<SweepableParam>();
Dmitry-A marked this conversation as resolved.
Show resolved Hide resolved
sweepableParams.AddRange(BuildAveragePerceptronParams());
sweepableParams.AddRange(BuildAveragePerceptronParams());
sweepableParams.AddRange(BuildFastForestParams());
sweepableParams.AddRange(BuildFastTreeParams());
sweepableParams.AddRange(BuildFastTreeTweedieParams());
sweepableParams.AddRange(BuildLightGbmParamsMulticlass());
sweepableParams.AddRange(BuildLightGbmParams());
sweepableParams.AddRange(BuildLinearSvmParams());
sweepableParams.AddRange(BuildLbfgsLogisticRegressionParams());
sweepableParams.AddRange(BuildOnlineGradientDescentParams());
sweepableParams.AddRange(BuildLbfgsPoissonRegressionParams());
sweepableParams.AddRange(BuildSdcaParams());
sweepableParams.AddRange(BuildOlsParams());
sweepableParams.AddRange(BuildSgdParams());
sweepableParams.AddRange(BuildSymSgdLogisticRegressionParams());
return new HashSet<string>(sweepableParams.Select(p => p.Name));
}
}
}
9 changes: 9 additions & 0 deletions src/mlnet/CodeGenerator/CodeGenerationHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
using Microsoft.ML.CLI.CodeGenerator.CSharp;
using Microsoft.ML.CLI.Data;
using Microsoft.ML.CLI.ShellProgressBar;
using Microsoft.ML.CLI.Telemetry.Events;
using Microsoft.ML.CLI.Utilities;
using Microsoft.ML.Data;
using NLog;
Expand Down Expand Up @@ -51,7 +52,9 @@ public void GenerateCode()
{
inputColumnInformation.IgnoredColumnNames.Add(value);
}
var inferColumnsStopwatch = Stopwatch.StartNew();
columnInference = automlEngine.InferColumns(context, inputColumnInformation);
InferColumnsEvent.TrackEvent(columnInference.ColumnInformation, inferColumnsStopwatch.Elapsed);
Dmitry-A marked this conversation as resolved.
Show resolved Hide resolved
}
catch (Exception)
{
Expand All @@ -74,6 +77,9 @@ public void GenerateCode()
// The reason why we are doing this way of defining 3 different results is because of the AutoML API
// i.e there is no common class/interface to handle all three tasks together.

// Start a timer for the experiment
var stopwatch = Stopwatch.StartNew();

List<RunDetail<BinaryClassificationMetrics>> completedBinaryRuns = new List<RunDetail<BinaryClassificationMetrics>>();
List<RunDetail<MulticlassClassificationMetrics>> completedMulticlassRuns = new List<RunDetail<MulticlassClassificationMetrics>>();
List<RunDetail<RegressionMetrics>> completedRegressionRuns = new List<RunDetail<RegressionMetrics>>();
Expand Down Expand Up @@ -236,6 +242,7 @@ public void GenerateCode()
{
var binaryMetric = new BinaryExperimentSettings().OptimizingMetric;
var bestBinaryIteration = BestResultUtil.GetBestRun(completedBinaryRuns, binaryMetric);
ExperimentCompletedEvent.TrackEvent(bestBinaryIteration, completedBinaryRuns, TaskKind.BinaryClassification, stopwatch.Elapsed);
bestPipeline = bestBinaryIteration.Pipeline;
bestModel = bestBinaryIteration.Model;
ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, settings.MlTask, settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), completedBinaryRuns.Count());
Expand All @@ -253,6 +260,7 @@ public void GenerateCode()
{
var regressionMetric = new RegressionExperimentSettings().OptimizingMetric;
var bestRegressionIteration = BestResultUtil.GetBestRun(completedRegressionRuns, regressionMetric);
ExperimentCompletedEvent.TrackEvent(bestRegressionIteration, completedRegressionRuns, TaskKind.Regression, stopwatch.Elapsed);
bestPipeline = bestRegressionIteration.Pipeline;
bestModel = bestRegressionIteration.Model;
ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, settings.MlTask, settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), completedRegressionRuns.Count());
Expand All @@ -270,6 +278,7 @@ public void GenerateCode()
{
var muliclassMetric = new MulticlassExperimentSettings().OptimizingMetric;
var bestMulticlassIteration = BestResultUtil.GetBestRun(completedMulticlassRuns, muliclassMetric);
ExperimentCompletedEvent.TrackEvent(bestMulticlassIteration, completedMulticlassRuns, TaskKind.MulticlassClassification, stopwatch.Elapsed);
bestPipeline = bestMulticlassIteration.Pipeline;
bestModel = bestMulticlassIteration.Model;
ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, settings.MlTask, settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), completedMulticlassRuns.Count());
Expand Down
7 changes: 1 addition & 6 deletions src/mlnet/Commands/New/NewCommandHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.DotNet.Cli.Telemetry;
using Microsoft.ML.CLI.CodeGenerator;
using Microsoft.ML.CLI.Data;

Expand All @@ -11,18 +10,14 @@ namespace Microsoft.ML.CLI.Commands.New
internal class NewCommand : ICommand
{
private readonly NewCommandSettings settings;
private readonly MlTelemetry telemetry;

internal NewCommand(NewCommandSettings settings, MlTelemetry telemetry)
internal NewCommand(NewCommandSettings settings)
{
this.settings = settings;
this.telemetry = telemetry;
}

public void Execute()
{
telemetry.LogAutoTrainMlCommand(settings.Dataset.Name, settings.MlTask.ToString(), settings.Dataset.Length);

CodeGenerationHelper codeGenerationHelper = new CodeGenerationHelper(new AutoMLEngine(settings), settings); // Needs to be improved.
codeGenerationHelper.GenerateCode();
}
Expand Down
35 changes: 28 additions & 7 deletions src/mlnet/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
using System;
using System.CommandLine.Builder;
using System.CommandLine.Invocation;
using System.Diagnostics;
using System.IO;
using System.Linq;
using Microsoft.DotNet.Cli.Telemetry;
using Microsoft.ML.CLI.Commands;
using Microsoft.ML.CLI.Commands.New;
using Microsoft.ML.CLI.Data;
using Microsoft.ML.CLI.Telemetry.Events;
using Microsoft.ML.CLI.Utilities;
using NLog;
using NLog.Targets;
Expand All @@ -20,24 +21,33 @@ namespace Microsoft.ML.CLI
class Program
{
private static Logger logger = LogManager.GetCurrentClassLogger();

public static void Main(string[] args)
{
var telemetry = new MlTelemetry();
Telemetry.Telemetry.Initialize();
int exitCode = 1;
Exception ex = null;
var stopwatch = Stopwatch.StartNew();

var mlNetCommandEvent = new MLNetCommandEvent();

// Create handler outside so that commandline and the handler is decoupled and testable.
var handler = CommandHandler.Create<NewCommandSettings>(
(options) =>
{
try
{
// Send telemetry event for command issued
mlNetCommandEvent.AutoTrainCommandSettings = options;
Dmitry-A marked this conversation as resolved.
Show resolved Hide resolved
mlNetCommandEvent.TrackEvent();

// Map the verbosity to internal levels
var verbosity = Utils.GetVerbosity(options.Verbosity);

// Build the output path
string outputBaseDir = string.Empty;
if (options.Name == null)
{

options.Name = "Sample" + Utils.GetTaskKind(options.MlTask).ToString();
outputBaseDir = Path.Combine(options.OutputPath.FullName, options.Name);
}
Expand All @@ -50,7 +60,7 @@ public static void Main(string[] args)
options.OutputPath = new DirectoryInfo(outputBaseDir);

// Instantiate the command
var command = new NewCommand(options, telemetry);
var command = new NewCommand(options);

// Override the Logger Configuration
var logconsole = LogManager.Configuration.FindTargetByName("logconsole");
Expand All @@ -67,11 +77,14 @@ public static void Main(string[] args)
}
catch (Exception e)
{
ex = e;
logger.Log(LogLevel.Error, e.Message);
logger.Log(LogLevel.Debug, e.ToString());
logger.Log(LogLevel.Info, Strings.LookIntoLogFile);
logger.Log(LogLevel.Error, Strings.Exiting);
}

MLNetCommandEndEvent.TrackEvent(stopwatch.Elapsed, ex);
});

var parser = new CommandLineBuilder()
Expand All @@ -82,7 +95,8 @@ public static void Main(string[] args)

var parseResult = parser.Parse(args);

if (parseResult.Errors.Count == 0)
var commandParseSucceeded = !parseResult.Errors.Any();
if (commandParseSucceeded)
{
if (parseResult.RootCommandResult.Children.Count > 0)
{
Expand All @@ -95,13 +109,20 @@ public static void Main(string[] args)

var explicitlySpecifiedOptions = options.Where(opt => !opt.IsImplicit).Select(opt => opt.Name);

telemetry.SetCommandAndParameters(command.Name, explicitlySpecifiedOptions);
mlNetCommandEvent.CommandLineParametersUsed = explicitlySpecifiedOptions;
}
}
}

// Send system info telemetry
SystemInfoEvent.TrackEvent();

parser.InvokeAsync(parseResult).Wait();
// Send exit telemetry
ApplicationExitEvent.TrackEvent(exitCode, commandParseSucceeded, stopwatch.Elapsed, ex);
// Flush pending telemetry logs
Telemetry.Telemetry.Flush(TimeSpan.FromSeconds(5));
Environment.Exit(exitCode);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using System.IO;
using Microsoft.DotNet.AutoML;
using Microsoft.Extensions.EnvironmentAbstractions;
using Microsoft.ML.CLI.Telemetry;

namespace Microsoft.DotNet.Configurer
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

using System;
using System.Collections.Generic;
using Microsoft.DotNet.AutoML;
using System.IO;
using Microsoft.DotNet.Configurer;
using Microsoft.ML.CLI.Telemetry;
using RuntimeEnvironment = Microsoft.DotNet.PlatformAbstractions.RuntimeEnvironment;
using RuntimeInformation = System.Runtime.InteropServices.RuntimeInformation;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

using System;
using System.IO;
using Microsoft.DotNet.AutoML;
using Microsoft.Extensions.EnvironmentAbstractions;
using Microsoft.ML.CLI.Telemetry;

namespace Microsoft.DotNet.Configurer
{
Expand Down
28 changes: 28 additions & 0 deletions src/mlnet/Telemetry/Events/ApplicationExitEvent.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Diagnostics;

namespace Microsoft.ML.CLI.Telemetry.Events
{
/// <summary>
/// Telemetry event for CLI application exit.
/// </summary>
internal class ApplicationExitEvent
{
public static void TrackEvent(int exitCode, bool commandParseSucceeded, TimeSpan duration, Exception ex)
{
Telemetry.TrackEvent("application-exit",
new Dictionary<string, string>
{
{ "CommandParseSucceeded", commandParseSucceeded.ToString() },
{ "ExitCode", exitCode.ToString() },
{ "PeakMemory", Process.GetCurrentProcess().PeakWorkingSet64.ToString() },
},
duration, ex);
}
}
}
36 changes: 36 additions & 0 deletions src/mlnet/Telemetry/Events/ExperimentCompletedEvent.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using Microsoft.ML.AutoML;

namespace Microsoft.ML.CLI.Telemetry.Events
{
/// <summary>
/// Telemetry event for AutoML experiment completion.
/// </summary>
internal static class ExperimentCompletedEvent
{
public static void TrackEvent<TMetrics>(RunDetail<TMetrics> bestRun,
List<RunDetail<TMetrics>> allRuns,
TaskKind machineLearningTask,
TimeSpan duration)
{
Telemetry.TrackEvent("experiment-completed",
new Dictionary<string, string>()
{
{ "BestIterationNum", (allRuns.IndexOf(bestRun) + 1).ToString() },
{ "BestPipeline", Telemetry.GetSanitizedPipelineStr(bestRun.Pipeline) },
{ "BestTrainer", bestRun.TrainerName },
{ "MachineLearningTask", machineLearningTask.ToString() },
{ "NumIterations", allRuns.Count().ToString() },
{ "PeakMemory", Process.GetCurrentProcess().PeakWorkingSet64.ToString() },
},
duration);
}
}
}
Loading