diff --git a/src/OpenTelemetry.Exporter.Geneva/CHANGELOG.md b/src/OpenTelemetry.Exporter.Geneva/CHANGELOG.md index 47b1b1f495..e1fbedc7df 100644 --- a/src/OpenTelemetry.Exporter.Geneva/CHANGELOG.md +++ b/src/OpenTelemetry.Exporter.Geneva/CHANGELOG.md @@ -5,6 +5,15 @@ * Drop support for .NET 6 as this target is no longer supported. ([#2117](https://github.com/open-telemetry/opentelemetry-dotnet-contrib/pull/2117)) +* Added support for exporting metrics via + [user_events](https://docs.kernel.org/trace/user_events.html) on Linux when + OTLP protobuf encoding is enabled via the + `PrivatePreviewEnableOtlpProtobufEncoding=true` connection string switch. With + this, `PrivatePreviewEnableOtlpProtobufEncoding=true` is now supported on both + Widows and Linux. Windows uses ETW as transport, while Linux uses user_events + as transport. + ([#2113](https://github.com/open-telemetry/opentelemetry-dotnet-contrib/pull/2113)) + ## 1.9.0 Released 2024-Jun-21 diff --git a/src/OpenTelemetry.Exporter.Geneva/Internal/ExporterEventSource.cs b/src/OpenTelemetry.Exporter.Geneva/Internal/ExporterEventSource.cs index 107acaf52d..d7dacb2383 100644 --- a/src/OpenTelemetry.Exporter.Geneva/Internal/ExporterEventSource.cs +++ b/src/OpenTelemetry.Exporter.Geneva/Internal/ExporterEventSource.cs @@ -18,6 +18,9 @@ internal sealed class ExporterEventSource : EventSource private const int EVENT_ID_ERROR = 4; // Other common exporter exceptions private const int EVENT_ID_OTLP_PROTOBUF_METRIC = 5; // Failed to serialize metric private const int EVENT_ID_COMPLETED_EXPORT = 6; // Completed export + private const int EVENT_ID_TRANSPORT_ERROR = 7; // Transport error + private const int EVENT_ID_TRANSPORT_EXCEPTION = 8; // Transport exception + private const int EVENT_ID_TRANSPORT_INFO = 9; // Transport info [NonEvent] public void FailedToSendTraceData(Exception ex) @@ -76,6 +79,16 @@ public void FailedToSerializeMetric(string metricName, Exception ex) } } + [NonEvent] + public void TransportException(string transportType, string message, Exception ex) + { + if (Log.IsEnabled(EventLevel.Error, EventKeywords.All)) + { + // TODO: Do not hit ETW size limit even for external library exception stack. + this.TransportException(transportType, message, ex.ToInvariantString()); + } + } + [Event(EVENT_ID_TRACE, Message = "Exporter failed to send trace data. Exception: {0}", Level = EventLevel.Error)] public void FailedToSendTraceData(string error) { @@ -111,4 +124,22 @@ public void ExportCompleted(string exporterName) { this.WriteEvent(EVENT_ID_COMPLETED_EXPORT, exporterName); } + + [Event(EVENT_ID_TRANSPORT_ERROR, Message = "Transport '{0}' error. Message: {1}", Level = EventLevel.Error)] + public void TransportError(string transportType, string error) + { + this.WriteEvent(EVENT_ID_TRANSPORT_ERROR, transportType, error); + } + + [Event(EVENT_ID_TRANSPORT_EXCEPTION, Message = "Transport '{0}' error. Message: {1}, Exception: {2}", Level = EventLevel.Error)] + public void TransportException(string transportType, string error, string ex) + { + this.WriteEvent(EVENT_ID_TRANSPORT_EXCEPTION, transportType, error, ex); + } + + [Event(EVENT_ID_TRANSPORT_INFO, Message = "Transport '{0}' information. Message: {1}", Level = EventLevel.Informational)] + public void TransportInformation(string transportType, string error) + { + this.WriteEvent(EVENT_ID_TRANSPORT_INFO, transportType, error); + } } diff --git a/src/OpenTelemetry.Exporter.Geneva/Internal/Transports/UnixDomainSocketDataTransport.cs b/src/OpenTelemetry.Exporter.Geneva/Internal/Transports/UnixDomainSocketDataTransport.cs index 741daa7516..8a2c4e036f 100644 --- a/src/OpenTelemetry.Exporter.Geneva/Internal/Transports/UnixDomainSocketDataTransport.cs +++ b/src/OpenTelemetry.Exporter.Geneva/Internal/Transports/UnixDomainSocketDataTransport.cs @@ -73,7 +73,7 @@ private bool Connect() } catch (Exception ex) { - ExporterEventSource.Log.ExporterException("UDS Connect failed.", ex); + ExporterEventSource.Log.TransportException(nameof(UnixDomainSocketDataTransport), "Attempt to connect to socket failed. Connection will be retried periodically until established.", ex); return false; } diff --git a/src/OpenTelemetry.Exporter.Geneva/Metrics/OtlpProtobuf/OtlpProtobufMetricExporter.cs b/src/OpenTelemetry.Exporter.Geneva/Metrics/OtlpProtobuf/OtlpProtobufMetricExporter.cs index a6bdffad6c..fc6f67e4f8 100644 --- a/src/OpenTelemetry.Exporter.Geneva/Metrics/OtlpProtobuf/OtlpProtobufMetricExporter.cs +++ b/src/OpenTelemetry.Exporter.Geneva/Metrics/OtlpProtobuf/OtlpProtobufMetricExporter.cs @@ -1,6 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 +using System.Diagnostics; using System.Runtime.InteropServices; using OpenTelemetry.Metrics; using OpenTelemetry.Resources; @@ -15,17 +16,32 @@ internal sealed class OtlpProtobufMetricExporter : IDisposable private readonly Func getResource; - public OtlpProtobufMetricExporter(Func getResource, ConnectionStringBuilder connectionStringBuilder, IReadOnlyDictionary prepopulatedMetricDimensions) + public OtlpProtobufMetricExporter( + Func getResource, + ConnectionStringBuilder connectionStringBuilder, + IReadOnlyDictionary prepopulatedMetricDimensions) { + Debug.Assert(getResource != null, "getResource was null"); + + this.getResource = getResource; + +#if NET6_0_OR_GREATER + IMetricDataTransport transport = !RuntimeInformation.IsOSPlatform(OSPlatform.Windows) + ? MetricUnixUserEventsDataTransport.Instance + : MetricWindowsEventTracingDataTransport.Instance; +#else if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - // Temporary until we add support for user_events. throw new NotSupportedException("Exporting data in protobuf format is not supported on Linux."); } - this.getResource = getResource; + var transport = MetricWindowsEventTracingDataTransport.Instance; +#endif - this.otlpProtobufSerializer = new OtlpProtobufSerializer(MetricWindowsEventTracingDataTransport.Instance, connectionStringBuilder, prepopulatedMetricDimensions); + this.otlpProtobufSerializer = new OtlpProtobufSerializer( + transport, + connectionStringBuilder, + prepopulatedMetricDimensions); } public ExportResult Export(in Batch batch) diff --git a/src/OpenTelemetry.Exporter.Geneva/Metrics/Transport/MetricUnixUserEventsDataTransport.cs b/src/OpenTelemetry.Exporter.Geneva/Metrics/Transport/MetricUnixUserEventsDataTransport.cs new file mode 100644 index 0000000000..ea1810201b --- /dev/null +++ b/src/OpenTelemetry.Exporter.Geneva/Metrics/Transport/MetricUnixUserEventsDataTransport.cs @@ -0,0 +1,70 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +#if NET + +#nullable enable + +using System.Text; +using Microsoft.LinuxTracepoints.Provider; + +namespace OpenTelemetry.Exporter.Geneva; + +internal sealed class MetricUnixUserEventsDataTransport : IMetricDataTransport +{ + public const uint MetricsProtocol = 0U; + public const string MetricsVersion = "v0.19.00"; + public const string MetricsTracepointName = "otlp_metrics"; + public const string MetricsTracepointNameArgs = $"{MetricsTracepointName} u32 protocol;char[8] version;__rel_loc u8[] buffer"; + + private static readonly ReadOnlyMemory MetricsVersionUtf8 = Encoding.UTF8.GetBytes(MetricsVersion); + private readonly PerfTracepoint metricsTracepoint; + + private MetricUnixUserEventsDataTransport() + { + this.metricsTracepoint = new PerfTracepoint(MetricsTracepointNameArgs); + if (this.metricsTracepoint.RegisterResult != 0) + { + // ENOENT (2): No such file or directory + if (this.metricsTracepoint.RegisterResult == 2) + { + throw new NotSupportedException( + $"Tracepoint registration for 'otlp_metrics' failed with result: '{this.metricsTracepoint.RegisterResult}'. Verify your distribution/kernel supports user_events: https://docs.kernel.org/trace/user_events.html."); + } + + ExporterEventSource.Log.TransportInformation( + nameof(MetricUnixUserEventsDataTransport), + $"Tracepoint registration operation for 'otlp_metrics' returned result '{this.metricsTracepoint.RegisterResult}' which is considered recoverable. Entering running state."); + } + } + + public static MetricUnixUserEventsDataTransport Instance { get; } = new(); + + public void Send(MetricEventType eventType, byte[] body, int size) + { + throw new NotSupportedException(); + } + + public void SendOtlpProtobufEvent(byte[] body, int size) + { + if (this.metricsTracepoint.IsEnabled) + { + var buffer = new ReadOnlySpan(body, 0, size); + + var bufferRelLoc = 0u | ((uint)buffer.Length << 16); + + this.metricsTracepoint.Write( + [MetricsProtocol], + MetricsVersionUtf8.Span, + [bufferRelLoc], + buffer); + } + } + + public void Dispose() + { + this.metricsTracepoint.Dispose(); + } +} + +#endif diff --git a/src/OpenTelemetry.Exporter.Geneva/README.md b/src/OpenTelemetry.Exporter.Geneva/README.md index 5d7e1efdb1..2ba0725906 100644 --- a/src/OpenTelemetry.Exporter.Geneva/README.md +++ b/src/OpenTelemetry.Exporter.Geneva/README.md @@ -268,14 +268,25 @@ On Linux provide an `Endpoint` in addition to the `Account` and `Namespace`. For example: `Endpoint=unix:{UDS Path};Account={MetricAccount};Namespace={MetricNamespace}`. -Set `PrivatePreviewEnableOtlpProtobufEncoding=true` to opt-in to the -experimental feature for changing the underlying serialization format to binary -protobuf following the schema defined in [OTLP +##### OtlpProtobufEncoding + +On Windows set `PrivatePreviewEnableOtlpProtobufEncoding=true` on the +`ConnectionString` to opt-in to the experimental feature for changing the +underlying serialization format to binary protobuf following the schema defined +in [OTLP specification](https://github.com/open-telemetry/opentelemetry-proto/blob/v1.1.0/opentelemetry/proto/metrics/v1/metrics.proto). -> [!NOTE] - > `PrivatePreviewEnableOtlpProtobufEncoding` is currently - > only supported in Windows environment. +As of `1.10.0` `PrivatePreviewEnableOtlpProtobufEncoding=true` is also supported +on Linux. On Linux when using `PrivatePreviewEnableOtlpProtobufEncoding=true` an +`Endpoint` is **NOT** required to be provided on `ConnectionString`. For +example: `Endpoint=unix:Account={MetricAccount};Namespace={MetricNamespace}`. + +> [!IMPORTANT] +> When `PrivatePreviewEnableOtlpProtobufEncoding` is enabled on Linux metrics +> are written using +> [user_events](https://docs.kernel.org/trace/user_events.html). `user_events` +> are a newer feature of the Linux kernel and require a distro with the feature +> enabled. #### `MetricExportIntervalMilliseconds` (optional) diff --git a/test/OpenTelemetry.Exporter.Geneva.Tests/UnixUserEventsDataTransportTests.cs b/test/OpenTelemetry.Exporter.Geneva.Tests/UnixUserEventsDataTransportTests.cs new file mode 100644 index 0000000000..d3b7fd1ee1 --- /dev/null +++ b/test/OpenTelemetry.Exporter.Geneva.Tests/UnixUserEventsDataTransportTests.cs @@ -0,0 +1,334 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +#if NET6_0_OR_GREATER + +#nullable enable + +using System.Diagnostics; +using System.Globalization; +using System.Text.RegularExpressions; +using Microsoft.LinuxTracepoints.Provider; +using OpenTelemetry.Tests; +using Xunit; +using Xunit.Abstractions; + +namespace OpenTelemetry.Exporter.Geneva.Tests; + +[Trait("CategoryName", "Geneva:user_events:metrics")] +public class UnixUserEventsDataTransportTests +{ + /* + * Instructions for running these tests: + * + * 1) You need a version of Linux with user_events available in the kernel. + * This can be done on WSL2 using the 6.6+ kernel. + * + * 2) You have to run the tests with elevation. You don't need elevation to + * write/emit user_events but you need elevation to read them (which + * these tests do). + * + * Command: + * sudo dotnet test --configuration Debug --framework net8.0 --filter CategoryName=Geneva:user_events:metrics + * + * How these tests work: + * + * 1) The tests validate user_events are enabled and make sure the otlp_metrics tracepoint is registered. + * + * 2) A process is spawned to run cat /sys/kernel/debug/tracing/trace_pipe. This is what is listening for events. + * + * 3) Depending on the test, a process is spawned to run sh -c "echo '1' > /sys/kernel/tracing/events/user_events/{this.name}/enable" to enable events. + * + * 4) The thread running the tests writes to user_events using the GenevaExporter code. Then it waits for a bit. Then it checks to see what events (if any) were emitted. + * + * 5) Depending on the test, a process is spawned to run sh -c "echo '0' > /sys/kernel/tracing/events/user_events/{this.name}/enable" to disable events. + */ + + private static readonly byte[] testRequest = [0x0a, 0x0f, 0x12, 0x0d, 0x0a, 0x0b, 0x0a, 0x09, 0x54, 0x65, 0x73, 0x74, 0x4d, 0x65, 0x74, 0x65, 0x72]; + private readonly ITestOutputHelper testOutputHelper; + + public UnixUserEventsDataTransportTests(ITestOutputHelper testOutputHelper) + { + this.testOutputHelper = testOutputHelper; + } + + [SkipUnlessPlatformMatchesFact(TestPlatform.Linux, requireElevatedProcess: true)] + public void UserEvents_Enabled_Succes_Linux() + { + EnsureUserEventsEnabled(); + + var listener = new PerfTracepointListener( + MetricUnixUserEventsDataTransport.MetricsTracepointName, + MetricUnixUserEventsDataTransport.MetricsTracepointNameArgs); + + try + { + listener.Enable(); + + MetricUnixUserEventsDataTransport.Instance.SendOtlpProtobufEvent( + testRequest, + testRequest.Length); + + Thread.Sleep(5000); + + foreach (var e in listener.Events) + { + this.testOutputHelper.WriteLine(string.Join(", ", e.Select(kvp => $"{kvp.Key}={kvp.Value}"))); + } + + Assert.Single(listener.Events); + + var @event = listener.Events[0]; + + Assert.EndsWith($" ({MetricUnixUserEventsDataTransport.MetricsProtocol})", @event["protocol"]); + Assert.Equal(MetricUnixUserEventsDataTransport.MetricsVersion, @event["version"]); + + var eventBufferStringData = @event["buffer"].AsSpan(); + + byte[] eventBuffer = new byte[(eventBufferStringData.Length + 1) / 3]; + + var index = 0; + var position = 0; + while (position < eventBufferStringData.Length) + { + eventBuffer[index++] = byte.Parse(eventBufferStringData.Slice(position, 2), NumberStyles.HexNumber); + position += 3; + } + + Assert.Equal(testRequest, eventBuffer); + } + finally + { + try + { + listener.Disable(); + } + catch + { + } + + listener.Dispose(); + } + } + + [SkipUnlessPlatformMatchesFact(TestPlatform.Linux, requireElevatedProcess: true)] + public void UserEvents_Disabled_Succes_Linux() + { + EnsureUserEventsEnabled(); + + var listener = new PerfTracepointListener( + MetricUnixUserEventsDataTransport.MetricsTracepointName, + MetricUnixUserEventsDataTransport.MetricsTracepointNameArgs); + + try + { + MetricUnixUserEventsDataTransport.Instance.SendOtlpProtobufEvent( + testRequest, + testRequest.Length); + + Thread.Sleep(5000); + + Assert.Empty(listener.Events); + } + finally + { + listener.Dispose(); + } + } + + private static void EnsureUserEventsEnabled() + { + using var userEventsEnableTest = ConsoleCommand.Run("cat", "/sys/kernel/tracing/user_events_status"); + if (userEventsEnableTest.Errors.Any()) + { + throw new NotSupportedException("Kernel does not support user_events. Verify your distribution/kernel supports user_events: https://docs.kernel.org/trace/user_events.html."); + } + } + + private sealed class ConsoleCommand : IDisposable + { + private readonly Process process; + private readonly List output = new(); + private readonly List errors = new(); + + private ConsoleCommand( + string command, + string arguments, + Action? onOutputReceived, + Action? onErrorReceived) + { + Console.WriteLine($"{command} {arguments}"); + + var process = new Process + { + StartInfo = new() + { + FileName = command, + Arguments = arguments, + RedirectStandardOutput = true, + RedirectStandardError = true, + RedirectStandardInput = false, + }, + }; + + process.OutputDataReceived += (sender, args) => + { + if (!string.IsNullOrEmpty(args.Data)) + { + this.output.Add(args.Data); + Console.WriteLine($"[OUT] {args.Data}"); + + onOutputReceived?.Invoke(args.Data); + } + }; + + process.ErrorDataReceived += (sender, args) => + { + if (!string.IsNullOrEmpty(args.Data)) + { + this.errors.Add(args.Data); + Console.WriteLine($"[ERR] {args.Data}"); + + onErrorReceived?.Invoke(args.Data); + } + }; + + process.Start(); + + process.BeginOutputReadLine(); + process.BeginErrorReadLine(); + + this.process = process; + } + + public IEnumerable Output => this.output; + + public IEnumerable Errors => this.errors; + + public static ConsoleCommand Run( + string command, + string arguments, + Action? onOutputReceived = null, + Action? onErrorReceived = null) + => new(command, arguments, onOutputReceived, onErrorReceived); + + public void Kill() + { + this.process.Kill(); + } + + public void Dispose() + { + this.process.WaitForExit(); + + this.process.CancelOutputRead(); + this.process.CancelErrorRead(); + + this.process.Dispose(); + } + } + + // Warning: Do NOT use this class/design to listen/read user_events in prod. + // It is a hack to workaround lack of decent bits for listening. Hopefully + // this can be removed if/when + // https://github.com/microsoft/LinuxTracepoints-Net/ has listening bits or + // dotnet/runtime supports user_events (both reading & writing) directly. + private sealed class PerfTracepointListener : IDisposable + { + private readonly string name; + private readonly PerfTracepoint tracepoint; + private readonly ConsoleCommand catCommand; + private readonly Regex eventRegex = new("(\\w+?)=([\\w\\(\\) .,-]*)( |$)", RegexOptions.Compiled); + + public PerfTracepointListener(string name, string nameArgs) + { + this.name = name; + + this.tracepoint = new PerfTracepoint(nameArgs); + if (this.tracepoint.RegisterResult != 0) + { + throw new NotSupportedException($"Tracepoint could not be registered: '{this.tracepoint.RegisterResult}'"); + } + + this.catCommand = ConsoleCommand.Run("cat", "/sys/kernel/debug/tracing/trace_pipe", onOutputReceived: this.OnCatOutputReceived); + if (this.catCommand.Errors.Any()) + { + throw new InvalidOperationException($"Could not read '{name}' tracepoints"); + } + } + + public List> Events { get; } = new(); + + public bool IsEnabled() + { + using var command = ConsoleCommand.Run("cat", $"/sys/kernel/tracing/events/user_events/{this.name}/enable"); + + return command.Errors.Any() || command.Output.Count() != 1 + ? throw new InvalidOperationException($"Could not determine if '{this.name}' tracepoint is enabled") + : command.Output.First() != "0"; + } + + public void Enable() + { + using var command = ConsoleCommand.Run("sh", @$"-c ""echo '1' > /sys/kernel/tracing/events/user_events/{this.name}/enable"""); + if (command.Errors.Any()) + { + throw new InvalidOperationException($"Could not enable '{this.name}' tracepoint"); + } + } + + public void Disable() + { + using var command = ConsoleCommand.Run("sh", @$"-c ""echo '0' > /sys/kernel/tracing/events/user_events/{this.name}/enable"""); + if (command.Errors.Any()) + { + throw new InvalidOperationException($"Could not disable '{this.name}' tracepoint"); + } + } + + public void Dispose() + { + try + { + if (this.catCommand != null) + { + this.catCommand.Kill(); + this.catCommand.Dispose(); + } + } + finally + { + this.tracepoint.Dispose(); + } + } + + private void OnCatOutputReceived(string output) + { + var name = $": {this.name}:"; + + int startingPosition = output.IndexOf(name, StringComparison.Ordinal); + if (startingPosition < 0) + { + return; + } + + startingPosition += name.Length; + + var matches = this.eventRegex.Matches(output, startingPosition); + + if (matches.Count > 0) + { + Dictionary eventData = new(matches.Count); + + foreach (Match match in matches) + { + eventData[match.Groups[1].Value] = match.Groups[2].Value; + } + + this.Events.Add(eventData); + } + } + } +} + +#endif diff --git a/test/Shared/PlatformHelpers.cs b/test/Shared/PlatformHelpers.cs index 60d876c609..a737fa49f3 100644 --- a/test/Shared/PlatformHelpers.cs +++ b/test/Shared/PlatformHelpers.cs @@ -37,7 +37,7 @@ public static bool IsProcessElevated(TestPlatform platform) private static class SystemNativeUnix { #pragma warning disable CA5392 // Use DefaultDllImportSearchPaths attribute for P/Invokes - [DllImport("libc", SetLastError = true)] + [DllImport("libc", EntryPoint = "geteuid", SetLastError = true)] internal static extern uint GetEUid(); #pragma warning restore CA5392 // Use DefaultDllImportSearchPaths attribute for P/Invokes }