From a51fe70c30f8e07b078165c964815c0a7c5ba792 Mon Sep 17 00:00:00 2001 From: Tim Date: Tue, 8 Nov 2022 08:54:11 -0500 Subject: [PATCH] Added memory diagnoser option to measure survived memory from the first benchmark run. --- .../Attributes/MemoryDiagnoserAttribute.cs | 5 +- src/BenchmarkDotNet/Code/CodeGenerator.cs | 1 + .../Configs/ImmutableConfig.cs | 2 + .../ConsoleArguments/CommandLineOptions.cs | 3 + .../ConsoleArguments/ConfigParser.cs | 5 +- .../Diagnosers/MemoryDiagnoser.cs | 19 +++ .../Diagnosers/MemoryDiagnoserConfig.cs | 5 +- src/BenchmarkDotNet/Engines/Consumer.cs | 8 + src/BenchmarkDotNet/Engines/Engine.cs | 112 +++++++++++-- src/BenchmarkDotNet/Engines/EngineFactory.cs | 8 +- .../Engines/EngineParameters.cs | 2 + src/BenchmarkDotNet/Engines/GcStats.cs | 37 +++-- .../Exporters/Csv/CsvMeasurementsExporter.cs | 4 + .../Templates/BenchmarkType.txt | 5 + .../Emitters/ConsumableConsumeEmitter.cs | 8 + .../Runnable/RunnableReuse.cs | 1 + .../InProcess.NoEmit/InProcessNoEmitRunner.cs | 1 + .../Toolchains/InProcess/InProcessRunner.cs | 1 + .../MemoryDiagnoserTests.cs | 147 +++++++++++++++++- 19 files changed, 342 insertions(+), 32 deletions(-) diff --git a/src/BenchmarkDotNet/Attributes/MemoryDiagnoserAttribute.cs b/src/BenchmarkDotNet/Attributes/MemoryDiagnoserAttribute.cs index 52e2151441..6d488c217b 100644 --- a/src/BenchmarkDotNet/Attributes/MemoryDiagnoserAttribute.cs +++ b/src/BenchmarkDotNet/Attributes/MemoryDiagnoserAttribute.cs @@ -10,9 +10,10 @@ public class MemoryDiagnoserAttribute : Attribute, IConfigSource public IConfig Config { get; } /// Display Garbage Collections per Generation columns (Gen 0, Gen 1, Gen 2). True by default. - public MemoryDiagnoserAttribute(bool displayGenColumns = true) + /// If true, monitoring will be enabled and survived memory will be measured on the first benchmark run. + public MemoryDiagnoserAttribute(bool displayGenColumns = true, bool includeSurvived = false) { - Config = ManualConfig.CreateEmpty().AddDiagnoser(new MemoryDiagnoser(new MemoryDiagnoserConfig(displayGenColumns))); + Config = ManualConfig.CreateEmpty().AddDiagnoser(new MemoryDiagnoser(new MemoryDiagnoserConfig(displayGenColumns, includeSurvived))); } } } \ No newline at end of file diff --git a/src/BenchmarkDotNet/Code/CodeGenerator.cs b/src/BenchmarkDotNet/Code/CodeGenerator.cs index 52ae0e5478..4e60f8e8f3 100644 --- a/src/BenchmarkDotNet/Code/CodeGenerator.cs +++ b/src/BenchmarkDotNet/Code/CodeGenerator.cs @@ -63,6 +63,7 @@ internal static string Generate(BuildPartition buildPartition) .Replace("$PassArguments$", passArguments) .Replace("$EngineFactoryType$", GetEngineFactoryTypeName(benchmark)) .Replace("$MeasureExtraStats$", buildInfo.Config.HasExtraStatsDiagnoser() ? "true" : "false") + .Replace("$MeasureSurvivedMemory$", buildInfo.Config.HasSurvivedMemoryDiagnoser() ? "true" : "false") .Replace("$DisassemblerEntryMethodName$", DisassemblerConstants.DisassemblerEntryMethodName) .Replace("$WorkloadMethodCall$", provider.GetWorkloadMethodCall(passArguments)) .RemoveRedundantIfDefines(compilationId); diff --git a/src/BenchmarkDotNet/Configs/ImmutableConfig.cs b/src/BenchmarkDotNet/Configs/ImmutableConfig.cs index 185aa03f0e..5d3cb28632 100644 --- a/src/BenchmarkDotNet/Configs/ImmutableConfig.cs +++ b/src/BenchmarkDotNet/Configs/ImmutableConfig.cs @@ -104,6 +104,8 @@ internal ImmutableConfig( public bool HasMemoryDiagnoser() => diagnosers.OfType().Any(); + public bool HasSurvivedMemoryDiagnoser() => diagnosers.Any(diagnoser => diagnoser is MemoryDiagnoser md && md.Config.IncludeSurvived); + public bool HasThreadingDiagnoser() => diagnosers.Contains(ThreadingDiagnoser.Default); public bool HasExceptionDiagnoser() => diagnosers.Contains(ExceptionDiagnoser.Default); diff --git a/src/BenchmarkDotNet/ConsoleArguments/CommandLineOptions.cs b/src/BenchmarkDotNet/ConsoleArguments/CommandLineOptions.cs index e3aed1fedd..3c4e1f2e26 100644 --- a/src/BenchmarkDotNet/ConsoleArguments/CommandLineOptions.cs +++ b/src/BenchmarkDotNet/ConsoleArguments/CommandLineOptions.cs @@ -62,6 +62,9 @@ public bool UseDisassemblyDiagnoser [Option('a', "artifacts", Required = false, HelpText = "Valid path to accessible directory")] public DirectoryInfo ArtifactsDirectory { get; set; } + [Option("memorySurvived", Required = false, Default = false, HelpText = "Measures survived memory.")] + public bool UseSurvivedMemoryDiagnoser { get; set; } + [Option("outliers", Required = false, Default = OutlierMode.RemoveUpper, HelpText = "DontRemove/RemoveUpper/RemoveLower/RemoveAll")] public OutlierMode Outliers { get; set; } diff --git a/src/BenchmarkDotNet/ConsoleArguments/ConfigParser.cs b/src/BenchmarkDotNet/ConsoleArguments/ConfigParser.cs index 004639181c..6677733c2b 100644 --- a/src/BenchmarkDotNet/ConsoleArguments/ConfigParser.cs +++ b/src/BenchmarkDotNet/ConsoleArguments/ConfigParser.cs @@ -210,8 +210,11 @@ private static IConfig CreateConfig(CommandLineOptions options, IConfig globalCo .Select(counterName => (HardwareCounter)Enum.Parse(typeof(HardwareCounter), counterName, ignoreCase: true)) .ToArray()); - if (options.UseMemoryDiagnoser) + if (options.UseSurvivedMemoryDiagnoser) + config.AddDiagnoser(new MemoryDiagnoser(new MemoryDiagnoserConfig(includeSurvived: true))); + else if (options.UseMemoryDiagnoser) config.AddDiagnoser(MemoryDiagnoser.Default); + if (options.UseThreadingDiagnoser) config.AddDiagnoser(ThreadingDiagnoser.Default); if (options.UseExceptionDiagnoser) diff --git a/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoser.cs b/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoser.cs index b6c2eb12bb..349c58116b 100644 --- a/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoser.cs +++ b/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoser.cs @@ -42,6 +42,25 @@ public IEnumerable ProcessResults(DiagnoserResults diagnoserResults) yield return new Metric(GarbageCollectionsMetricDescriptor.Gen2, diagnoserResults.GcStats.Gen2Collections / (double)diagnoserResults.GcStats.TotalOperations * 1000); yield return new Metric(AllocatedMemoryMetricDescriptor.Instance, diagnoserResults.GcStats.GetBytesAllocatedPerOperation(diagnoserResults.BenchmarkCase)); + + if (Config.IncludeSurvived) + { + yield return new Metric(SurvivedMemoryMetricDescriptor.Instance, diagnoserResults.GcStats.SurvivedBytes); + } + } + + private class SurvivedMemoryMetricDescriptor : IMetricDescriptor + { + internal static readonly IMetricDescriptor Instance = new SurvivedMemoryMetricDescriptor(); + + public string Id => "Survived Memory"; + public string DisplayName => "Survived"; + public string Legend => "Memory survived after the first operation (managed only, inclusive, 1KB = 1024B)"; + public string NumberFormat => "N0"; + public UnitType UnitType => UnitType.Size; + public string Unit => SizeUnit.B.Name; + public bool TheGreaterTheBetter => false; + public int PriorityInCategory { get; } = AllocatedMemoryMetricDescriptor.Instance.PriorityInCategory + 1; } private class GarbageCollectionsMetricDescriptor : IMetricDescriptor diff --git a/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoserConfig.cs b/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoserConfig.cs index cb5eb7221e..e7fd17a645 100644 --- a/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoserConfig.cs +++ b/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoserConfig.cs @@ -5,12 +5,15 @@ namespace BenchmarkDotNet.Diagnosers public class MemoryDiagnoserConfig { /// Display Garbage Collections per Generation columns (Gen 0, Gen 1, Gen 2). True by default. + /// If true, monitoring will be enabled and survived memory will be measured on the first benchmark run. [PublicAPI] - public MemoryDiagnoserConfig(bool displayGenColumns = true) + public MemoryDiagnoserConfig(bool displayGenColumns = true, bool includeSurvived = false) { DisplayGenColumns = displayGenColumns; + IncludeSurvived = includeSurvived; } public bool DisplayGenColumns { get; } + public bool IncludeSurvived { get; } } } \ No newline at end of file diff --git a/src/BenchmarkDotNet/Engines/Consumer.cs b/src/BenchmarkDotNet/Engines/Consumer.cs index 015b3a952d..435b22eee4 100644 --- a/src/BenchmarkDotNet/Engines/Consumer.cs +++ b/src/BenchmarkDotNet/Engines/Consumer.cs @@ -35,6 +35,14 @@ private static readonly HashSet SupportedTypes private IntPtr ptrHolder; private UIntPtr uptrHolder; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [PublicAPI] + public void Clear() + { + Volatile.Write(ref stringHolder, null); + Volatile.Write(ref objectHolder, null); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] [PublicAPI] public void Consume(byte byteValue) => byteHolder = byteValue; diff --git a/src/BenchmarkDotNet/Engines/Engine.cs b/src/BenchmarkDotNet/Engines/Engine.cs index cdf8adcebc..0c75ee0538 100644 --- a/src/BenchmarkDotNet/Engines/Engine.cs +++ b/src/BenchmarkDotNet/Engines/Engine.cs @@ -20,6 +20,7 @@ public class Engine : IEngine [PublicAPI] public IHost Host { get; } [PublicAPI] public Action WorkloadAction { get; } + [PublicAPI] public Action WorkloadActionNoUnroll { get; } [PublicAPI] public Action Dummy1Action { get; } [PublicAPI] public Action Dummy2Action { get; } [PublicAPI] public Action Dummy3Action { get; } @@ -45,19 +46,23 @@ public class Engine : IEngine private readonly EnginePilotStage pilotStage; private readonly EngineWarmupStage warmupStage; private readonly EngineActualStage actualStage; - private readonly bool includeExtraStats; private readonly Random random; + private readonly bool includeExtraStats, includeSurvivedMemory; + + private long survivedBytes; + private bool survivedBytesMeasured; + private static Func GetTotalBytes { get; set; } internal Engine( IHost host, IResolver resolver, - Action dummy1Action, Action dummy2Action, Action dummy3Action, Action overheadAction, Action workloadAction, Job targetJob, + Action dummy1Action, Action dummy2Action, Action dummy3Action, Action overheadAction, Action workloadAction, Action workloadActionNoUnroll, Job targetJob, Action globalSetupAction, Action globalCleanupAction, Action iterationSetupAction, Action iterationCleanupAction, long operationsPerInvoke, - bool includeExtraStats, string benchmarkName) + bool includeExtraStats, bool includeSurvivedMemory, string benchmarkName) { - Host = host; OverheadAction = overheadAction; + WorkloadActionNoUnroll = workloadActionNoUnroll; Dummy1Action = dummy1Action; Dummy2Action = dummy2Action; Dummy3Action = dummy3Action; @@ -70,6 +75,7 @@ internal Engine( OperationsPerInvoke = operationsPerInvoke; this.includeExtraStats = includeExtraStats; BenchmarkName = benchmarkName; + this.includeSurvivedMemory = includeSurvivedMemory; Resolver = resolver; @@ -85,6 +91,55 @@ internal Engine( actualStage = new EngineActualStage(this); random = new Random(12345); // we are using constant seed to try to get repeatable results + + if (includeSurvivedMemory && GetTotalBytes is null) + { + // CreateGetTotalBytesFunc enables monitoring, so we only call it if we need to measure survived memory. + GetTotalBytes = CreateGetTotalBytesFunc(); + + // Necessary for CORE runtimes. + // Measure bytes to allow GC monitor to make its allocations. + GetTotalBytes(); + // Run the clock once to allow it to make its allocations. + MeasureAction(_ => { }, 0); + GetTotalBytes(); + } + } + + private static Func CreateGetTotalBytesFunc() + { + // Don't try to measure in Mono, Monitoring is not available, and GC.GetTotalMemory is very inaccurate. + if (RuntimeInformation.IsMono) + return () => 0; + try + { + // Docs say this should be available in .NET Core 2.1, but it throws an exception. + // Just try this on all non-Mono runtimes, fallback to GC.GetTotalMemory. + AppDomain.MonitoringIsEnabled = true; + return () => + { + // Enforce GC.Collect here to make sure we get accurate results. + ForceGcCollect(); + return AppDomain.CurrentDomain.MonitoringSurvivedMemorySize; + }; + } + catch + { + return () => + { + // Enforce GC.Collect here to make sure we get accurate results. + ForceGcCollect(); + return GC.GetTotalMemory(true); + }; + } + } + + internal Engine WithInitialData(Engine other) + { + // Copy the survived bytes from the other engine so we only measure it once. + survivedBytes = other.survivedBytes; + survivedBytesMeasured = other.survivedBytesMeasured; + return this; } public void Dispose() @@ -160,7 +215,9 @@ public Measurement RunIteration(IterationData data) var action = isOverhead ? OverheadAction : WorkloadAction; if (!isOverhead) + { IterationSetupAction(); + } GcCollect(); @@ -169,10 +226,36 @@ public Measurement RunIteration(IterationData data) Span stackMemory = randomizeMemory ? stackalloc byte[random.Next(32)] : Span.Empty; - // Measure - var clock = Clock.Start(); - action(invokeCount / unrollFactor); - var clockSpan = clock.GetElapsed(); + bool needsSurvivedMeasurement = includeSurvivedMemory && !isOverhead && !survivedBytesMeasured; + double nanoseconds; + if (needsSurvivedMeasurement) + { + // Measure survived bytes for only the first invocation. + survivedBytesMeasured = true; + if (totalOperations == 1) + { + // Measure normal invocation for both survived memory and time. + long beforeBytes = GetTotalBytes(); + nanoseconds = MeasureAction(action, invokeCount / unrollFactor); + long afterBytes = GetTotalBytes(); + survivedBytes = afterBytes - beforeBytes; + } + else + { + // Measure a single invocation for survived memory, plus normal invocations for time. + ++totalOperations; + long beforeBytes = GetTotalBytes(); + nanoseconds = MeasureAction(WorkloadActionNoUnroll, 1); + long afterBytes = GetTotalBytes(); + survivedBytes = afterBytes - beforeBytes; + nanoseconds += MeasureAction(action, invokeCount / unrollFactor); + } + } + else + { + // Measure time normally. + nanoseconds = MeasureAction(action, invokeCount / unrollFactor); + } if (EngineEventSource.Log.IsEnabled()) EngineEventSource.Log.IterationStop(data.IterationMode, data.IterationStage, totalOperations); @@ -186,7 +269,7 @@ public Measurement RunIteration(IterationData data) GcCollect(); // Results - var measurement = new Measurement(0, data.IterationMode, data.IterationStage, data.Index, totalOperations, clockSpan.GetNanoseconds()); + var measurement = new Measurement(0, data.IterationMode, data.IterationStage, data.Index, totalOperations, nanoseconds); WriteLine(measurement.ToString()); if (measurement.IterationStage == IterationStage.Jitting) jittingMeasurements.Add(measurement); @@ -196,6 +279,15 @@ public Measurement RunIteration(IterationData data) return measurement; } + // This is necessary for the CORE runtime to clean up the memory from the clock. + [MethodImpl(MethodImplOptions.NoInlining)] + private double MeasureAction(Action action, long arg) + { + var clock = Clock.Start(); + action(arg); + return clock.GetElapsed().GetNanoseconds(); + } + private (GcStats, ThreadingStats, double) GetExtraStats(IterationData data) { // we enable monitoring after main target run, for this single iteration which is executed at the end @@ -219,7 +311,7 @@ public Measurement RunIteration(IterationData data) IterationCleanupAction(); // we run iteration cleanup after collecting GC stats var totalOperationsCount = data.InvokeCount * OperationsPerInvoke; - GcStats gcStats = (finalGcStats - initialGcStats).WithTotalOperations(totalOperationsCount); + GcStats gcStats = (finalGcStats - initialGcStats).WithTotalOperationsAndSurvivedBytes(data.InvokeCount * OperationsPerInvoke, survivedBytes); ThreadingStats threadingStats = (finalThreadingStats - initialThreadingStats).WithTotalOperations(data.InvokeCount * OperationsPerInvoke); return (gcStats, threadingStats, exceptionsStats.ExceptionsCount / (double)totalOperationsCount); diff --git a/src/BenchmarkDotNet/Engines/EngineFactory.cs b/src/BenchmarkDotNet/Engines/EngineFactory.cs index 0588218522..633d722544 100644 --- a/src/BenchmarkDotNet/Engines/EngineFactory.cs +++ b/src/BenchmarkDotNet/Engines/EngineFactory.cs @@ -69,10 +69,12 @@ public IEngine CreateReadyToRun(EngineParameters engineParameters) .WithMinInvokeCount(2) // the minimum is 2 (not the default 4 which can be too much and not 1 which we already know is not enough) .WithEvaluateOverhead(false); // it's something very time consuming, it overhead is too small compared to total time - return CreateEngine(engineParameters, needsPilot, engineParameters.OverheadActionNoUnroll, engineParameters.WorkloadActionNoUnroll); + return CreateEngine(engineParameters, needsPilot, engineParameters.OverheadActionNoUnroll, engineParameters.WorkloadActionNoUnroll) + .WithInitialData(singleActionEngine); } - var multiActionEngine = CreateMultiActionEngine(engineParameters); + var multiActionEngine = CreateMultiActionEngine(engineParameters) + .WithInitialData(singleActionEngine); DeadCodeEliminationHelper.KeepAliveWithoutBoxing(Jit(multiActionEngine, ++jitIndex, invokeCount: defaultUnrollFactor, unrollFactor: defaultUnrollFactor)); @@ -118,6 +120,7 @@ private static Engine CreateEngine(EngineParameters engineParameters, Job job, A engineParameters.Dummy3Action, idle, main, + engineParameters.WorkloadActionNoUnroll, job, engineParameters.GlobalSetupAction, engineParameters.GlobalCleanupAction, @@ -125,6 +128,7 @@ private static Engine CreateEngine(EngineParameters engineParameters, Job job, A engineParameters.IterationCleanupAction, engineParameters.OperationsPerInvoke, engineParameters.MeasureExtraStats, + engineParameters.MeasureSurvivedMemory, engineParameters.BenchmarkName); } } diff --git a/src/BenchmarkDotNet/Engines/EngineParameters.cs b/src/BenchmarkDotNet/Engines/EngineParameters.cs index ec61582529..337a5a2acd 100644 --- a/src/BenchmarkDotNet/Engines/EngineParameters.cs +++ b/src/BenchmarkDotNet/Engines/EngineParameters.cs @@ -27,6 +27,8 @@ public class EngineParameters public Action IterationCleanupAction { get; set; } public bool MeasureExtraStats { get; set; } + public bool MeasureSurvivedMemory { get; set; } + [PublicAPI] public string BenchmarkName { get; set; } public bool NeedsJitting => TargetJob.ResolveValue(RunMode.RunStrategyCharacteristic, DefaultResolver).NeedsJitting(); diff --git a/src/BenchmarkDotNet/Engines/GcStats.cs b/src/BenchmarkDotNet/Engines/GcStats.cs index ae39c4718e..199004a440 100644 --- a/src/BenchmarkDotNet/Engines/GcStats.cs +++ b/src/BenchmarkDotNet/Engines/GcStats.cs @@ -16,15 +16,16 @@ public struct GcStats : IEquatable private static readonly Func GetAllocatedBytesForCurrentThreadDelegate = CreateGetAllocatedBytesForCurrentThreadDelegate(); private static readonly Func GetTotalAllocatedBytesDelegate = CreateGetTotalAllocatedBytesDelegate(); - public static readonly GcStats Empty = new GcStats(0, 0, 0, 0, 0); + public static readonly GcStats Empty = new GcStats(0, 0, 0, 0, 0, 0); - private GcStats(int gen0Collections, int gen1Collections, int gen2Collections, long allocatedBytes, long totalOperations) + private GcStats(int gen0Collections, int gen1Collections, int gen2Collections, long allocatedBytes, long totalOperations, long survivedBytes) { Gen0Collections = gen0Collections; Gen1Collections = gen1Collections; Gen2Collections = gen2Collections; AllocatedBytes = allocatedBytes; TotalOperations = totalOperations; + SurvivedBytes = survivedBytes; } // did not use array here just to avoid heap allocation @@ -38,6 +39,7 @@ private GcStats(int gen0Collections, int gen1Collections, int gen2Collections, l private long AllocatedBytes { get; } public long TotalOperations { get; } + public long SurvivedBytes { get; } public long GetBytesAllocatedPerOperation(BenchmarkCase benchmarkCase) { @@ -57,7 +59,8 @@ public long GetBytesAllocatedPerOperation(BenchmarkCase benchmarkCase) left.Gen1Collections + right.Gen1Collections, left.Gen2Collections + right.Gen2Collections, left.AllocatedBytes + right.AllocatedBytes, - left.TotalOperations + right.TotalOperations); + left.TotalOperations + right.TotalOperations, + left.SurvivedBytes + right.SurvivedBytes); } public static GcStats operator -(GcStats left, GcStats right) @@ -67,11 +70,12 @@ public long GetBytesAllocatedPerOperation(BenchmarkCase benchmarkCase) Math.Max(0, left.Gen1Collections - right.Gen1Collections), Math.Max(0, left.Gen2Collections - right.Gen2Collections), Math.Max(0, left.AllocatedBytes - right.AllocatedBytes), - Math.Max(0, left.TotalOperations - right.TotalOperations)); + Math.Max(0, left.TotalOperations - right.TotalOperations), + Math.Max(0, left.SurvivedBytes - right.SurvivedBytes)); } - public GcStats WithTotalOperations(long totalOperationsCount) - => this + new GcStats(0, 0, 0, 0, totalOperationsCount); + public GcStats WithTotalOperationsAndSurvivedBytes(long totalOperationsCount, long survivedBytes) + => this + new GcStats(0, 0, 0, 0, totalOperationsCount, survivedBytes); public int GetCollectionsCount(int generation) { @@ -109,6 +113,7 @@ public static GcStats ReadInitial() GC.CollectionCount(1), GC.CollectionCount(2), allocatedBytes, + 0, 0); } @@ -122,12 +127,13 @@ public static GcStats ReadFinal() // this will force GC.Collect, so we want to do this after collecting collections counts // to exclude this single full forced collection from results GetAllocatedBytes(), + 0, 0); } [PublicAPI] public static GcStats FromForced(int forcedFullGarbageCollections) - => new GcStats(forcedFullGarbageCollections, forcedFullGarbageCollections, forcedFullGarbageCollections, 0, 0); + => new GcStats(forcedFullGarbageCollections, forcedFullGarbageCollections, forcedFullGarbageCollections, 0, 0, 0); private static long GetAllocatedBytes() { @@ -168,7 +174,7 @@ private static Func CreateGetTotalAllocatedBytesDelegate() } public string ToOutputLine() - => $"{ResultsLinePrefix} {Gen0Collections} {Gen1Collections} {Gen2Collections} {AllocatedBytes} {TotalOperations}"; + => $"{ResultsLinePrefix} {Gen0Collections} {Gen1Collections} {Gen2Collections} {AllocatedBytes} {TotalOperations} {SurvivedBytes}"; public static GcStats Parse(string line) { @@ -180,12 +186,13 @@ public static GcStats Parse(string line) || !int.TryParse(measurementSplit[1], out int gen1) || !int.TryParse(measurementSplit[2], out int gen2) || !long.TryParse(measurementSplit[3], out long allocatedBytes) - || !long.TryParse(measurementSplit[4], out long totalOperationsCount)) + || !long.TryParse(measurementSplit[4], out long totalOperationsCount) + || !long.TryParse(measurementSplit[5], out long survivedBytes)) { throw new NotSupportedException("Invalid string"); } - return new GcStats(gen0, gen1, gen2, allocatedBytes, totalOperationsCount); + return new GcStats(gen0, gen1, gen2, allocatedBytes, totalOperationsCount, survivedBytes); } public override string ToString() => ToOutputLine(); @@ -219,10 +226,16 @@ private static long CalculateAllocationQuantumSize() return result; } - public bool Equals(GcStats other) => Gen0Collections == other.Gen0Collections && Gen1Collections == other.Gen1Collections && Gen2Collections == other.Gen2Collections && AllocatedBytes == other.AllocatedBytes && TotalOperations == other.TotalOperations; + public bool Equals(GcStats other) => + Gen0Collections == other.Gen0Collections + && Gen1Collections == other.Gen1Collections + && Gen2Collections == other.Gen2Collections + && AllocatedBytes == other.AllocatedBytes + && TotalOperations == other.TotalOperations + && SurvivedBytes == other.SurvivedBytes; public override bool Equals(object obj) => obj is GcStats other && Equals(other); - public override int GetHashCode() => HashCode.Combine(Gen0Collections, Gen1Collections, Gen2Collections, AllocatedBytes, TotalOperations); + public override int GetHashCode() => HashCode.Combine(Gen0Collections, Gen1Collections, Gen2Collections, AllocatedBytes, TotalOperations, SurvivedBytes); } } \ No newline at end of file diff --git a/src/BenchmarkDotNet/Exporters/Csv/CsvMeasurementsExporter.cs b/src/BenchmarkDotNet/Exporters/Csv/CsvMeasurementsExporter.cs index a02953fe63..e3c51869d6 100644 --- a/src/BenchmarkDotNet/Exporters/Csv/CsvMeasurementsExporter.cs +++ b/src/BenchmarkDotNet/Exporters/Csv/CsvMeasurementsExporter.cs @@ -71,6 +71,10 @@ private static MeasurementColumn[] GetColumns(Summary summary) new MeasurementColumn("Gen_2", (_, report, __) => report.GcStats.Gen2Collections.ToString(summary.GetCultureInfo())), new MeasurementColumn("Allocated_Bytes", (_, report, __) => report.GcStats.GetBytesAllocatedPerOperation(report.BenchmarkCase).ToString(summary.GetCultureInfo())) }; + if (summary.BenchmarksCases.Any(benchmark => benchmark.Config.HasSurvivedMemoryDiagnoser())) + { + columns.Add(new MeasurementColumn("Survived_Bytes", (_, report, __) => report.GcStats.SurvivedBytes.ToString(summary.GetCultureInfo()))); + } return columns.ToArray(); } diff --git a/src/BenchmarkDotNet/Templates/BenchmarkType.txt b/src/BenchmarkDotNet/Templates/BenchmarkType.txt index d8f15f9138..d6fc965f05 100644 --- a/src/BenchmarkDotNet/Templates/BenchmarkType.txt +++ b/src/BenchmarkDotNet/Templates/BenchmarkType.txt @@ -38,6 +38,7 @@ TargetJob = job, OperationsPerInvoke = $OperationsPerInvoke$, MeasureExtraStats = $MeasureExtraStats$, + MeasureSurvivedMemory = $MeasureSurvivedMemory$, BenchmarkName = benchmarkName }; @@ -123,6 +124,7 @@ { consumer.Consume(overheadDelegate($PassArguments$));@Unroll@ } + consumer.Clear(); // Necessary for survived memory diagnoser. } #if NETCOREAPP3_0_OR_GREATER @@ -135,6 +137,7 @@ { consumer.Consume(overheadDelegate($PassArguments$)); } + consumer.Clear(); // Necessary for survived memory diagnoser. } #if NETCOREAPP3_0_OR_GREATER @@ -147,6 +150,7 @@ { consumer.Consume(workloadDelegate($PassArguments$)$ConsumeField$);@Unroll@ } + consumer.Clear(); // Necessary for survived memory diagnoser. } #if NETCOREAPP3_0_OR_GREATER @@ -159,6 +163,7 @@ { consumer.Consume(workloadDelegate($PassArguments$)$ConsumeField$); } + consumer.Clear(); // Necessary for survived memory diagnoser. } [System.Runtime.CompilerServices.MethodImpl(System.Runtime.CompilerServices.MethodImplOptions.NoOptimization | System.Runtime.CompilerServices.MethodImplOptions.NoInlining)] diff --git a/src/BenchmarkDotNet/Toolchains/InProcess.Emit.Implementation/Emitters/ConsumableConsumeEmitter.cs b/src/BenchmarkDotNet/Toolchains/InProcess.Emit.Implementation/Emitters/ConsumableConsumeEmitter.cs index 76a2a5f505..42b5c4fc4a 100644 --- a/src/BenchmarkDotNet/Toolchains/InProcess.Emit.Implementation/Emitters/ConsumableConsumeEmitter.cs +++ b/src/BenchmarkDotNet/Toolchains/InProcess.Emit.Implementation/Emitters/ConsumableConsumeEmitter.cs @@ -131,5 +131,13 @@ protected override void EmitActionAfterCallOverride(ILGenerator ilBuilder) } } } + + protected override void EmitActionAfterLoopOverride(ILGenerator ilBuilder) + { + var clearMethod = typeof(Consumer).GetMethod(nameof(Consumer.Clear)); + ilBuilder.Emit(OpCodes.Ldarg_0); + ilBuilder.Emit(OpCodes.Ldfld, consumerField); + ilBuilder.Emit(OpCodes.Callvirt, clearMethod); + } } } \ No newline at end of file diff --git a/src/BenchmarkDotNet/Toolchains/InProcess.Emit.Implementation/Runnable/RunnableReuse.cs b/src/BenchmarkDotNet/Toolchains/InProcess.Emit.Implementation/Runnable/RunnableReuse.cs index 7067650ec1..30802d242e 100644 --- a/src/BenchmarkDotNet/Toolchains/InProcess.Emit.Implementation/Runnable/RunnableReuse.cs +++ b/src/BenchmarkDotNet/Toolchains/InProcess.Emit.Implementation/Runnable/RunnableReuse.cs @@ -102,6 +102,7 @@ private static EngineParameters CreateEngineParameters( TargetJob = benchmarkCase.Job, OperationsPerInvoke = benchmarkCase.Descriptor.OperationsPerInvoke, MeasureExtraStats = benchmarkCase.Config.HasExtraStatsDiagnoser(), + MeasureSurvivedMemory = benchmarkCase.Config.HasSurvivedMemoryDiagnoser(), BenchmarkName = FullNameProvider.GetBenchmarkName(benchmarkCase) }; return engineParameters; diff --git a/src/BenchmarkDotNet/Toolchains/InProcess.NoEmit/InProcessNoEmitRunner.cs b/src/BenchmarkDotNet/Toolchains/InProcess.NoEmit/InProcessNoEmitRunner.cs index e890000683..34466a24e5 100644 --- a/src/BenchmarkDotNet/Toolchains/InProcess.NoEmit/InProcessNoEmitRunner.cs +++ b/src/BenchmarkDotNet/Toolchains/InProcess.NoEmit/InProcessNoEmitRunner.cs @@ -151,6 +151,7 @@ public static void RunCore(IHost host, BenchmarkCase benchmarkCase) TargetJob = job, OperationsPerInvoke = target.OperationsPerInvoke, MeasureExtraStats = benchmarkCase.Config.HasExtraStatsDiagnoser(), + MeasureSurvivedMemory = benchmarkCase.Config.HasSurvivedMemoryDiagnoser(), BenchmarkName = FullNameProvider.GetBenchmarkName(benchmarkCase) }; diff --git a/src/BenchmarkDotNet/Toolchains/InProcess/InProcessRunner.cs b/src/BenchmarkDotNet/Toolchains/InProcess/InProcessRunner.cs index aedd8f6788..97145c6386 100644 --- a/src/BenchmarkDotNet/Toolchains/InProcess/InProcessRunner.cs +++ b/src/BenchmarkDotNet/Toolchains/InProcess/InProcessRunner.cs @@ -150,6 +150,7 @@ public static void RunCore(IHost host, BenchmarkCase benchmarkCase, BenchmarkAct TargetJob = job, OperationsPerInvoke = target.OperationsPerInvoke, MeasureExtraStats = benchmarkCase.Config.HasExtraStatsDiagnoser(), + MeasureSurvivedMemory = benchmarkCase.Config.HasSurvivedMemoryDiagnoser(), BenchmarkName = FullNameProvider.GetBenchmarkName(benchmarkCase) }; diff --git a/tests/BenchmarkDotNet.IntegrationTests/MemoryDiagnoserTests.cs b/tests/BenchmarkDotNet.IntegrationTests/MemoryDiagnoserTests.cs index 7e4ea811c7..3853592151 100755 --- a/tests/BenchmarkDotNet.IntegrationTests/MemoryDiagnoserTests.cs +++ b/tests/BenchmarkDotNet.IntegrationTests/MemoryDiagnoserTests.cs @@ -9,6 +9,7 @@ using BenchmarkDotNet.Columns; using BenchmarkDotNet.Configs; using BenchmarkDotNet.Diagnosers; +using BenchmarkDotNet.Engines; using BenchmarkDotNet.Environments; using BenchmarkDotNet.Extensions; using BenchmarkDotNet.IntegrationTests.Xunit; @@ -19,6 +20,7 @@ using BenchmarkDotNet.Tests.Loggers; using BenchmarkDotNet.Tests.XUnit; using BenchmarkDotNet.Toolchains; +using BenchmarkDotNet.Toolchains.CsProj; using BenchmarkDotNet.Toolchains.NativeAot; using BenchmarkDotNet.Toolchains.InProcess.Emit; using Xunit; @@ -65,6 +67,67 @@ public void MemoryDiagnoserIsAccurate(IToolchain toolchain) }); } + public class AccurateSurvived + { + [Benchmark] public byte[] EightBytesArray() => new byte[8]; + [Benchmark] public byte[] SixtyFourBytesArray() => new byte[64]; + [Benchmark] public Task AllocateTask() => Task.FromResult(-12345); + + + public byte[] bytes8; + public byte[] bytes64; + public Task task; + + [GlobalSetup(Targets = new string[] { nameof(EightBytesArrayNoAllocate), nameof(SixtyFourBytesArrayNoAllocate), nameof(TaskNoAllocate) })] + public void SetupNoAllocate() + { + bytes8 = new byte[8]; + bytes64 = new byte[64]; + task = Task.FromResult(-12345); + } + + [Benchmark] public byte[] EightBytesArrayNoAllocate() => bytes8; + [Benchmark] public byte[] SixtyFourBytesArrayNoAllocate() => bytes64; + [Benchmark] public Task TaskNoAllocate() => task; + + + [Benchmark] public void EightBytesArraySurvive() => bytes8 = new byte[8]; + [Benchmark] public void SixtyFourBytesArraySurvive() => bytes64 = new byte[64]; + [Benchmark] public void AllocateTaskSurvive() => task = Task.FromResult(-12345); + + + [Benchmark] public void EightBytesArrayAllocateNoSurvive() => DeadCodeEliminationHelper.KeepAliveWithoutBoxing(new byte[8]); + [Benchmark] public void SixtyFourBytesArrayAllocateNoSurvive() => DeadCodeEliminationHelper.KeepAliveWithoutBoxing(new byte[64]); + [Benchmark] public void TaskAllocateNoSurvive() => DeadCodeEliminationHelper.KeepAliveWithoutBoxing(Task.FromResult(-12345)); + } + + [Theory, MemberData(nameof(GetToolchains))] + [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] + public void MemoryDiagnoserSurvivedIsAccurate(IToolchain toolchain) + { + long objectAllocationOverhead = IntPtr.Size * 2; // pointer to method table + object header word + long arraySizeOverhead = IntPtr.Size; // array length + + AssertSurvived(toolchain, typeof(AccurateSurvived), new Dictionary + { + { nameof(AccurateSurvived.EightBytesArray), 0 }, + { nameof(AccurateSurvived.SixtyFourBytesArray), 0 }, + { nameof(AccurateSurvived.AllocateTask), 0 }, + + { nameof(AccurateSurvived.EightBytesArrayNoAllocate), 0 }, + { nameof(AccurateSurvived.SixtyFourBytesArrayNoAllocate), 0 }, + { nameof(AccurateSurvived.TaskNoAllocate), 0 }, + + { nameof(AccurateSurvived.EightBytesArraySurvive), 8 + objectAllocationOverhead + arraySizeOverhead }, + { nameof(AccurateSurvived.SixtyFourBytesArraySurvive), 64 + objectAllocationOverhead + arraySizeOverhead }, + { nameof(AccurateSurvived.AllocateTaskSurvive), CalculateRequiredSpace>() }, + + { nameof(AccurateSurvived.EightBytesArrayAllocateNoSurvive), 0 }, + { nameof(AccurateSurvived.SixtyFourBytesArrayAllocateNoSurvive), 0 }, + { nameof(AccurateSurvived.TaskAllocateNoSurvive), 0 }, + }); + } + [FactDotNetCoreOnly("We don't want to test NativeAOT twice (for .NET Framework 4.6.2 and .NET 6.0)")] public void MemoryDiagnoserSupportsNativeAOT() { @@ -114,6 +177,16 @@ public void MemoryDiagnoserDoesNotIncludeAllocationsFromSetupAndCleanup(IToolcha }); } + [Theory, MemberData(nameof(GetToolchains))] + [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] + public void MemoryDiagnoserDoesNotIncludeSurvivedFromSetupAndCleanup(IToolchain toolchain) + { + AssertSurvived(toolchain, typeof(AllocatingGlobalSetupAndCleanup), new Dictionary + { + { nameof(AllocatingGlobalSetupAndCleanup.AllocateNothing), 0 } + }); + } + public class NoAllocationsAtAll { [Benchmark] public void EmptyMethod() { } @@ -129,6 +202,16 @@ public void EngineShouldNotInterfereAllocationResults(IToolchain toolchain) }); } + [Theory, MemberData(nameof(GetToolchains))] + [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] + public void EngineShouldNotInterfereSurvivedResults(IToolchain toolchain) + { + AssertSurvived(toolchain, typeof(NoAllocationsAtAll), new Dictionary + { + { nameof(NoAllocationsAtAll.EmptyMethod), 0 } + }); + } + public class NoBoxing { [Benchmark] public ValueTuple ReturnsValueType() => new ValueTuple(0); @@ -144,9 +227,28 @@ public void EngineShouldNotIntroduceBoxing(IToolchain toolchain) }); } + [Theory, MemberData(nameof(GetToolchains))] + [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] + public void EngineShouldNotIntroduceBoxingSurvived(IToolchain toolchain) + { + AssertSurvived(toolchain, typeof(NoBoxing), new Dictionary + { + { nameof(NoBoxing.ReturnsValueType), 0 } + }); + } + public class NonAllocatingAsynchronousBenchmarks { - private readonly Task completedTaskOfT = Task.FromResult(default(int)); // we store it in the field, because Task is reference type so creating it allocates heap memory + private readonly Task completedTaskOfT = Task.FromResult(-12345); // we store it in the field, because Task is reference type so creating it allocates heap memory + + [GlobalSetup] + public void Setup() + { + // Run once to set static memory. + DeadCodeEliminationHelper.KeepAliveWithoutBoxing(CompletedTask()); + DeadCodeEliminationHelper.KeepAliveWithoutBoxing(CompletedTaskOfT()); + DeadCodeEliminationHelper.KeepAliveWithoutBoxing(CompletedValueTaskOfT()); + } [Benchmark] public Task CompletedTask() => Task.CompletedTask; @@ -172,6 +274,18 @@ public void AwaitingTasksShouldNotInterfereAllocationResults(IToolchain toolchai }); } + [Theory, MemberData(nameof(GetToolchains))] + [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] + public void AwaitingTasksShouldNotInterfereSurvivedResults(IToolchain toolchain) + { + AssertSurvived(toolchain, typeof(NonAllocatingAsynchronousBenchmarks), new Dictionary + { + { nameof(NonAllocatingAsynchronousBenchmarks.CompletedTask), 0 }, + { nameof(NonAllocatingAsynchronousBenchmarks.CompletedTaskOfT), 0 }, + { nameof(NonAllocatingAsynchronousBenchmarks.CompletedValueTaskOfT), 0 } + }); + } + public class WithOperationsPerInvokeBenchmarks { [Benchmark(OperationsPerInvoke = 4)] @@ -275,7 +389,7 @@ public void MemoryDiagnoserIsAccurateForMultiThreadedBenchmarks(IToolchain toolc private void AssertAllocations(IToolchain toolchain, Type benchmarkType, Dictionary benchmarksAllocationsValidators) { - var config = CreateConfig(toolchain); + var config = CreateConfig(toolchain, MemoryDiagnoser.Default); var benchmarks = BenchmarkConverter.TypeToBenchmarks(benchmarkType, config); var summary = BenchmarkRunner.Run(benchmarks); @@ -303,7 +417,32 @@ private void AssertAllocations(IToolchain toolchain, Type benchmarkType, Diction } } - private IConfig CreateConfig(IToolchain toolchain) + private void AssertSurvived(IToolchain toolchain, Type benchmarkType, Dictionary benchmarkSurvivedValidators) + { + // Core has survived memory measurement problems. + // See https://github.com/dotnet/runtime/issues/45446 + if (toolchain is CsProjCoreToolchain || (toolchain.IsInProcess && RuntimeInformation.IsNetCore)) + return; + + var config = CreateConfig(toolchain, new MemoryDiagnoser(new MemoryDiagnoserConfig(includeSurvived: true))); + var benchmarks = BenchmarkConverter.TypeToBenchmarks(benchmarkType, config); + + var summary = BenchmarkRunner.Run(benchmarks); + + foreach (var benchmarkSurvivedValidator in benchmarkSurvivedValidators) + { + var survivedBenchmarks = benchmarks.BenchmarksCases.Where(benchmark => benchmark.Descriptor.WorkloadMethodDisplayInfo == benchmarkSurvivedValidator.Key); + + foreach (var benchmark in survivedBenchmarks) + { + var benchmarkReport = summary.Reports.Single(report => report.BenchmarkCase == benchmark); + + Assert.Equal(benchmarkSurvivedValidator.Value, benchmarkReport.GcStats.SurvivedBytes); + } + } + } + + private IConfig CreateConfig(IToolchain toolchain, MemoryDiagnoser memoryDiagnoser) => ManualConfig.CreateEmpty() .AddJob(Job.ShortRun .WithEvaluateOverhead(false) // no need to run idle for this test @@ -313,7 +452,7 @@ private IConfig CreateConfig(IToolchain toolchain) .WithEnvironmentVariable("COMPlus_TieredCompilation", "0") // Tiered JIT can allocate some memory on a background thread, let's disable it to make our tests less flaky (#1542) .WithToolchain(toolchain)) .AddColumnProvider(DefaultColumnProviders.Instance) - .AddDiagnoser(MemoryDiagnoser.Default) + .AddDiagnoser(memoryDiagnoser) .AddLogger(toolchain.IsInProcess ? ConsoleLogger.Default : new OutputLogger(output)); // we can't use OutputLogger for the InProcess toolchains because it allocates memory on the same thread // note: don't copy, never use in production systems (it should work but I am not 100% sure)