Skip to content

Commit

Permalink
Allow per provider interval specification (#3591)
Browse files Browse the repository at this point in the history
* Allow per provider interval specification

* Update src/Microsoft.Diagnostics.Monitoring.WebApi/Metrics/MetricsSettingsFactory.cs

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Update src/Tests/Microsoft.Diagnostics.Monitoring.Tool.UnitTests/MetricsSettingsTests.cs

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Apply suggestions from code review

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Pr feedback

* pr feedback

* pr feedback

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
wiktork and github-actions[bot] authored Feb 11, 2023
1 parent 70bacb5 commit 92217cd
Show file tree
Hide file tree
Showing 19 changed files with 395 additions and 17 deletions.
44 changes: 44 additions & 0 deletions documentation/configuration/metrics-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
Due to limitations in event counters, `dotnet monitor` supports only **one** refresh interval when collecting metrics. This interval is used for
Prometheus metrics, livemetrics, triggers, traces, and trigger actions that collect traces. The default interval is 5 seconds, but can be changed in configuration.

[7.1+] For EventCounter providers, is possible to specify a different interval for each provider. See [Per provider intervals](#per-provider-intervals-71).

<details>
<summary>JSON</summary>

Expand Down Expand Up @@ -37,6 +39,48 @@ Prometheus metrics, livemetrics, triggers, traces, and trigger actions that coll
```
</details>
## Per provider intervals (7.1+)
It is possible to override the global interval on a per provider basis. Note this forces all scenarios (triggers, live metrics, prometheus metrics, traces) that use a particular provider to use that interval. Metrics that are `System.Diagnostics.Metrics` based always use global interval.

<details>
<summary>JSON</summary>

```json
{
"GlobalCounter": {
"IntervalSeconds": 5,
"Providers": {
"System.Runtime": {
"IntervalSeconds": 10
}
}
}
}
```
</details>

<details>
<summary>Kubernetes ConfigMap</summary>

```yaml
GlobalCounter__IntervalSeconds: "5"
GlobalCounter__Providers__System.Runtime__IntervalSeconds: "10"
```
</details>

<details>
<summary>Kubernetes Environment Variables</summary>

```yaml
- name: DotnetMonitor_GlobalCounter__IntervalSeconds
value: "5"
- name: DotnetMonitor_GlobalCounter__Providers__System.Runtime__IntervalSeconds
value: "10"
```
</details>

## Metrics Urls

In addition to the ordinary diagnostics urls that `dotnet monitor` binds to, it also binds to metric urls that only expose the `/metrics` endpoint. Unlike the other endpoints, the metrics urls do not require authentication. Unless you enable collection of custom providers that may contain sensitive business logic, it is generally considered safe to expose metrics endpoints.
Expand Down
25 changes: 25 additions & 0 deletions documentation/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,31 @@
"default": 1000,
"maximum": 2147483647.0,
"minimum": 1.0
},
"Providers": {
"type": [
"null",
"object"
],
"description": "Dictionary of provider names and their global configuration.",
"additionalProperties": {
"$ref": "#/definitions/GlobalProviderOptions"
}
}
}
},
"GlobalProviderOptions": {
"type": "object",
"additionalProperties": false,
"properties": {
"IntervalSeconds": {
"type": [
"null",
"number"
],
"format": "float",
"maximum": 86400.0,
"minimum": 1.0
}
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.ComponentModel.DataAnnotations;
using System.Globalization;
using System.Linq;

namespace Microsoft.Diagnostics.Monitoring.WebApi
{
public class GlobalCounterOptions
public partial class GlobalCounterOptions
{
public const float IntervalMinSeconds = 1;
public const float IntervalMaxSeconds = 60 * 60 * 24; // One day
Expand All @@ -32,6 +35,38 @@ public class GlobalCounterOptions
[DefaultValue(GlobalCounterOptionsDefaults.MaxTimeSeries)]
[Range(1, int.MaxValue)]
public int? MaxTimeSeries { get; set; }

[Display(
ResourceType = typeof(OptionsDisplayStrings),
Description = nameof(OptionsDisplayStrings.DisplayAttributeDescription_GlobalCounterOptions_Providers))]
public System.Collections.Generic.IDictionary<string, GlobalProviderOptions> Providers { get; set; } = new Dictionary<string, GlobalProviderOptions>(StringComparer.OrdinalIgnoreCase);
}

public class GlobalProviderOptions
{
[Range(GlobalCounterOptions.IntervalMinSeconds, GlobalCounterOptions.IntervalMaxSeconds)]
public float? IntervalSeconds { get; set; }
}

partial class GlobalCounterOptions : IValidatableObject
{
public IEnumerable<ValidationResult> Validate(ValidationContext validationContext)
{
var results = new List<ValidationResult>();
var providerResults = new List<ValidationResult>();
foreach ((string provider, GlobalProviderOptions options) in Providers)
{
providerResults.Clear();
if (!Validator.TryValidateObject(options, new ValidationContext(options), providerResults, true))
{
// We prefix the validation error with the provider.
results.AddRange(providerResults.Select(r => new ValidationResult(
string.Format(CultureInfo.CurrentCulture, OptionsDisplayStrings.ErrorMessage_NestedProviderValidationError, provider, r.ErrorMessage))));
}
}

return results;
}
}

internal static class GlobalCounterOptionsExtensions
Expand All @@ -44,5 +79,8 @@ public static int GetMaxHistograms(this GlobalCounterOptions options) =>

public static int GetMaxTimeSeries(this GlobalCounterOptions options) =>
options.MaxTimeSeries.GetValueOrDefault(GlobalCounterOptionsDefaults.MaxTimeSeries);

public static float GetProviderSpecificInterval(this GlobalCounterOptions options, string providerName) =>
options.Providers.TryGetValue(providerName, out GlobalProviderOptions providerOptions) ? providerOptions.IntervalSeconds ?? options.GetIntervalSeconds() : options.GetIntervalSeconds();
}
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -811,4 +811,10 @@
<value>The type of metrics this provider consumes</value>
<comment>The description provided for the MetricType parameter on MetricProvider.</comment>
</data>
<data name="DisplayAttributeDescription_GlobalCounterOptions_Providers" xml:space="preserve">
<value>Dictionary of provider names and their global configuration.</value>
</data>
<data name="ErrorMessage_NestedProviderValidationError" xml:space="preserve">
<value>Provider '{0}' validation error: '{1}'</value>
</data>
</root>
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ public Task<ActionResult> CaptureTrace(
{
TimeSpan duration = Utilities.ConvertSecondsToTimeSpan(durationSeconds);

var aggregateConfiguration = TraceUtilities.GetTraceConfiguration(profile, _counterOptions.CurrentValue.GetIntervalSeconds());
var aggregateConfiguration = TraceUtilities.GetTraceConfiguration(profile, _counterOptions.CurrentValue);

return StartTrace(processInfo, aggregateConfiguration, duration, egressProvider, tags);
}, processKey, Utilities.ArtifactType_Trace);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using Microsoft.Diagnostics.Monitoring.EventPipe;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Threading;

Expand All @@ -20,6 +21,7 @@ public static MetricsPipelineSettings CreateSettings(GlobalCounterOptions counte
return CreateSettings(includeDefaults,
durationSeconds,
counterOptions.GetIntervalSeconds(),
counterOptions.Providers,
counterOptions.GetMaxHistograms(),
counterOptions.GetMaxTimeSeries(),
() => new List<EventPipeCounterGroup>(0));
Expand All @@ -29,6 +31,7 @@ public static MetricsPipelineSettings CreateSettings(GlobalCounterOptions counte
{
return CreateSettings(options.IncludeDefaultProviders.GetValueOrDefault(MetricsOptionsDefaults.IncludeDefaultProviders),
Timeout.Infinite, counterOptions.GetIntervalSeconds(),
counterOptions.Providers,
counterOptions.GetMaxHistograms(),
counterOptions.GetMaxTimeSeries(),
() => ConvertCounterGroups(options.Providers));
Expand All @@ -40,6 +43,7 @@ public static MetricsPipelineSettings CreateSettings(GlobalCounterOptions counte
return CreateSettings(configuration.IncludeDefaultProviders,
durationSeconds,
counterOptions.GetIntervalSeconds(),
counterOptions.Providers,
counterOptions.GetMaxHistograms(),
counterOptions.GetMaxTimeSeries(),
() => ConvertCounterGroups(configuration.Providers));
Expand All @@ -48,6 +52,7 @@ public static MetricsPipelineSettings CreateSettings(GlobalCounterOptions counte
private static MetricsPipelineSettings CreateSettings(bool includeDefaults,
int durationSeconds,
float counterInterval,
IDictionary<string, GlobalProviderOptions> intervalMap,
int maxHistograms,
int maxTimeSeries,
Func<List<EventPipeCounterGroup>> createCounterGroups)
Expand All @@ -61,6 +66,15 @@ private static MetricsPipelineSettings CreateSettings(bool includeDefaults,
eventPipeCounterGroups.Add(new EventPipeCounterGroup { ProviderName = MonitoringSourceConfiguration.GrpcAspNetCoreServer, Type = CounterGroupType.EventCounter });
}

foreach (EventPipeCounterGroup counterGroup in eventPipeCounterGroups)
{
if (intervalMap.TryGetValue(counterGroup.ProviderName, out GlobalProviderOptions providerInterval))
{
Debug.Assert(counterGroup.IntervalSeconds == null, "Unexpected value for provider interval");
counterGroup.IntervalSeconds = providerInterval.IntervalSeconds;
}
}

return new MetricsPipelineSettings
{
CounterGroups = eventPipeCounterGroups.ToArray(),
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/Microsoft.Diagnostics.Monitoring.WebApi/Strings.resx
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@
<comment>Gets a string similar to "Invalid metric count.".</comment>
</data>
<data name="ErrorMessage_InvalidMetricInterval" xml:space="preserve">
<value>Custom trace metric provider '{0}' must use the global counter interval '{1}'</value>
<value>Custom trace metric provider '{0}' must use the expected counter interval '{1}'.</value>
</data>
<data name="ErrorMessage_MetricsDisabled" xml:space="preserve">
<value>Metrics was not enabled.</value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace Microsoft.Diagnostics.Monitoring.WebApi
{
internal static class TraceUtilities
{
public static MonitoringSourceConfiguration GetTraceConfiguration(Models.TraceProfile profile, float metricsIntervalSeconds)
public static MonitoringSourceConfiguration GetTraceConfiguration(Models.TraceProfile profile, GlobalCounterOptions options)
{
var configurations = new List<MonitoringSourceConfiguration>();
if (profile.HasFlag(Models.TraceProfile.Cpu))
Expand All @@ -34,7 +34,14 @@ public static MonitoringSourceConfiguration GetTraceConfiguration(Models.TracePr
}
if (profile.HasFlag(Models.TraceProfile.Metrics))
{
configurations.Add(new MetricSourceConfiguration(metricsIntervalSeconds, Enumerable.Empty<string>()));
IEnumerable<MetricEventPipeProvider> defaultProviders = MonitoringSourceConfiguration.DefaultMetricProviders.Select(provider => new MetricEventPipeProvider
{
Provider = provider,
IntervalSeconds = options.GetProviderSpecificInterval(provider),
Type = MetricType.EventCounter
});

configurations.Add(new MetricSourceConfiguration(options.GetIntervalSeconds(), defaultProviders));
}

return new AggregateSourceConfiguration(configurations.ToArray());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ public static bool ValidateProvider(GlobalCounterOptions counterOptions,
if (provider.Arguments?.TryGetValue("EventCounterIntervalSec", out string intervalValue) == true)
{
if (float.TryParse(intervalValue, out float intervalSeconds) &&
intervalSeconds != counterOptions.GetIntervalSeconds())
intervalSeconds != counterOptions.GetProviderSpecificInterval(provider.Name))
{
errorMessage = string.Format(CultureInfo.CurrentCulture,
Strings.ErrorMessage_InvalidMetricInterval,
provider.Name,
counterOptions.GetIntervalSeconds());
counterOptions.GetProviderSpecificInterval(provider.Name));
return false;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
using System.Security.Cryptography;
using System.Text.Json;
using System.Text.Json.Serialization;
using Xunit;

namespace Microsoft.Diagnostics.Monitoring.TestCommon.Options
{
Expand Down Expand Up @@ -46,6 +47,15 @@ public static RootOptions AddGlobalCounter(this RootOptions options, int interva
return options;
}

public static RootOptions AddProviderInterval(this RootOptions options, string name, int intervalSeconds)
{
Assert.NotNull(options.GlobalCounter);

options.GlobalCounter.Providers.Add(name, new GlobalProviderOptions { IntervalSeconds = (float)intervalSeconds });

return options;
}

public static CollectionRuleOptions CreateCollectionRule(this RootOptions rootOptions, string name)
{
CollectionRuleOptions options = new();
Expand Down
Loading

0 comments on commit 92217cd

Please sign in to comment.