Skip to content

Commit

Permalink
PrometheusExporter: New concurrency handling for scrape middleware + …
Browse files Browse the repository at this point in the history
…http server (#2610)

* PoC for coordinating prometheus pull threads.

* Allocation free on .NET Core 3.1+ & locks are now tied to a specific exporter.

* Use CollectionManager in HttpServer. Remove delegate allocation.

* Bug fixes and benchmark updates.

* Configurable cache duration & bug fixes.

* Unit tests and bug fixes.

* More interesting unit test.

* Sealed PrometheusCollectionManager.

* Nits.

* CHANGELOG update.

* Doc tweaks.

* README update.

Co-authored-by: Cijo Thomas <[email protected]>
  • Loading branch information
CodeBlanch and cijothomas authored Nov 15, 2021
1 parent 770a367 commit e871c27
Show file tree
Hide file tree
Showing 11 changed files with 464 additions and 131 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ OpenTelemetry.Exporter.PrometheusExporterOptions.HttpListenerPrefixes.set -> voi
OpenTelemetry.Exporter.PrometheusExporterOptions.PrometheusExporterOptions() -> void
OpenTelemetry.Exporter.PrometheusExporterOptions.ScrapeEndpointPath.get -> string
OpenTelemetry.Exporter.PrometheusExporterOptions.ScrapeEndpointPath.set -> void
OpenTelemetry.Exporter.PrometheusExporterOptions.ScrapeResponseCacheDurationMilliseconds.get -> int
OpenTelemetry.Exporter.PrometheusExporterOptions.ScrapeResponseCacheDurationMilliseconds.set -> void
OpenTelemetry.Exporter.PrometheusExporterOptions.StartHttpListener.get -> bool
OpenTelemetry.Exporter.PrometheusExporterOptions.StartHttpListener.set -> void
OpenTelemetry.Metrics.PrometheusExporterMeterProviderBuilderExtensions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ OpenTelemetry.Exporter.PrometheusExporterOptions.HttpListenerPrefixes.set -> voi
OpenTelemetry.Exporter.PrometheusExporterOptions.PrometheusExporterOptions() -> void
OpenTelemetry.Exporter.PrometheusExporterOptions.ScrapeEndpointPath.get -> string
OpenTelemetry.Exporter.PrometheusExporterOptions.ScrapeEndpointPath.set -> void
OpenTelemetry.Exporter.PrometheusExporterOptions.ScrapeResponseCacheDurationMilliseconds.get -> int
OpenTelemetry.Exporter.PrometheusExporterOptions.ScrapeResponseCacheDurationMilliseconds.set -> void
OpenTelemetry.Exporter.PrometheusExporterOptions.StartHttpListener.get -> bool
OpenTelemetry.Exporter.PrometheusExporterOptions.StartHttpListener.set -> void
OpenTelemetry.Metrics.PrometheusExporterMeterProviderBuilderExtensions
Expand Down
4 changes: 4 additions & 0 deletions src/OpenTelemetry.Exporter.Prometheus/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## Unreleased

* Added scrape endpoint response caching feature &
`ScrapeResponseCacheDurationMilliseconds` option
([#2610](https://github.com/open-telemetry/opentelemetry-dotnet/pull/2610))

## 1.2.0-beta1

Released 2021-Oct-08
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
// <copyright file="PrometheusCollectionManager.cs" company="OpenTelemetry Authors">
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// </copyright>

using System;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Threading.Tasks;
using OpenTelemetry.Metrics;

namespace OpenTelemetry.Exporter.Prometheus
{
internal sealed class PrometheusCollectionManager
{
private readonly PrometheusExporter exporter;
private readonly int scrapeResponseCacheDurationInMilliseconds;
private readonly Func<Batch<Metric>, ExportResult> onCollectRef;
private byte[] buffer = new byte[85000]; // encourage the object to live in LOH (large object heap)
private int globalLockState;
private ArraySegment<byte> previousDataView;
private DateTime? previousDataViewExpirationAtUtc;
private int readerCount;
private bool collectionRunning;
private TaskCompletionSource<ArraySegment<byte>> collectionTcs;

public PrometheusCollectionManager(PrometheusExporter exporter)
{
this.exporter = exporter;
this.scrapeResponseCacheDurationInMilliseconds = this.exporter.Options.ScrapeResponseCacheDurationMilliseconds;
this.onCollectRef = this.OnCollect;
}

#if NETCOREAPP3_1_OR_GREATER
public ValueTask<ArraySegment<byte>> EnterCollect()
#else
public Task<ArraySegment<byte>> EnterCollect()
#endif
{
this.EnterGlobalLock();

// If we are within {ScrapeResponseCacheDurationMilliseconds} of the
// last successful collect, return the previous view.
if (this.previousDataViewExpirationAtUtc.HasValue && this.previousDataViewExpirationAtUtc >= DateTime.UtcNow)
{
Interlocked.Increment(ref this.readerCount);
this.ExitGlobalLock();
#if NETCOREAPP3_1_OR_GREATER
return new ValueTask<ArraySegment<byte>>(this.previousDataView);
#else
return Task.FromResult(this.previousDataView);
#endif
}

// If a collection is already running, return a task to wait on the result.
if (this.collectionRunning)
{
if (this.collectionTcs == null)
{
this.collectionTcs = new TaskCompletionSource<ArraySegment<byte>>(TaskCreationOptions.RunContinuationsAsynchronously);
}

Interlocked.Increment(ref this.readerCount);
this.ExitGlobalLock();
#if NETCOREAPP3_1_OR_GREATER
return new ValueTask<ArraySegment<byte>>(this.collectionTcs.Task);
#else
return this.collectionTcs.Task;
#endif
}

this.WaitForReadersToComplete();

// Start a collection on the current thread.
this.collectionRunning = true;
this.previousDataViewExpirationAtUtc = null;
Interlocked.Increment(ref this.readerCount);
this.ExitGlobalLock();

bool result = this.ExecuteCollect();
if (result && this.scrapeResponseCacheDurationInMilliseconds > 0)
{
this.previousDataViewExpirationAtUtc = DateTime.UtcNow.AddMilliseconds(this.scrapeResponseCacheDurationInMilliseconds);
}

this.EnterGlobalLock();

this.collectionRunning = false;

if (this.collectionTcs != null)
{
this.collectionTcs.SetResult(this.previousDataView);
this.collectionTcs = null;
}

this.ExitGlobalLock();

#if NETCOREAPP3_1_OR_GREATER
return new ValueTask<ArraySegment<byte>>(this.previousDataView);
#else
return Task.FromResult(this.previousDataView);
#endif
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void ExitCollect()
{
Interlocked.Decrement(ref this.readerCount);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void EnterGlobalLock()
{
SpinWait lockWait = default;
while (true)
{
if (Interlocked.CompareExchange(ref this.globalLockState, 1, this.globalLockState) != 0)
{
lockWait.SpinOnce();
continue;
}

break;
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void ExitGlobalLock()
{
this.globalLockState = 0;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void WaitForReadersToComplete()
{
SpinWait readWait = default;
while (true)
{
if (Interlocked.CompareExchange(ref this.readerCount, 0, this.readerCount) != 0)
{
readWait.SpinOnce();
continue;
}

break;
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private bool ExecuteCollect()
{
this.exporter.OnExport = this.onCollectRef;
bool result = this.exporter.Collect(Timeout.Infinite);
this.exporter.OnExport = null;
return result;
}

private ExportResult OnCollect(Batch<Metric> metrics)
{
int cursor = 0;

try
{
foreach (var metric in metrics)
{
while (true)
{
try
{
cursor = PrometheusSerializer.WriteMetric(this.buffer, cursor, metric);
break;
}
catch (IndexOutOfRangeException)
{
int bufferSize = this.buffer.Length * 2;

// there are two cases we might run into the following condition:
// 1. we have many metrics to be exported - in this case we probably want
// to put some upper limit and allow the user to configure it.
// 2. we got an IndexOutOfRangeException which was triggered by some other
// code instead of the buffer[cursor++] - in this case we should give up
// at certain point rather than allocating like crazy.
if (bufferSize > 100 * 1024 * 1024)
{
throw;
}

var newBuffer = new byte[bufferSize];
this.buffer.CopyTo(newBuffer, 0);
this.buffer = newBuffer;
}
}
}

this.previousDataView = new ArraySegment<byte>(this.buffer, 0, cursor);
return ExportResult.Success;
}
catch (Exception)
{
this.previousDataView = new ArraySegment<byte>(Array.Empty<byte>(), 0, 0);
return ExportResult.Failure;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,6 @@ public void Dispose()

private void WorkerProc()
{
var bufferSize = 85000; // encourage the object to live in LOH (large object heap)
var buffer = new byte[bufferSize];

this.httpListener.Start();

try
Expand All @@ -131,74 +128,7 @@ private void WorkerProc()
ctxTask.Wait(this.tokenSource.Token);
var ctx = ctxTask.Result;

try
{
ctx.Response.StatusCode = 200;
ctx.Response.Headers.Add("Server", string.Empty);
ctx.Response.ContentType = "text/plain; charset=utf-8; version=0.0.4";

this.exporter.OnExport = (metrics) =>
{
try
{
var cursor = 0;
foreach (var metric in metrics)
{
while (true)
{
try
{
cursor = PrometheusSerializer.WriteMetric(buffer, cursor, metric);
break;
}
catch (IndexOutOfRangeException)
{
bufferSize = bufferSize * 2;

// there are two cases we might run into the following condition:
// 1. we have many metrics to be exported - in this case we probably want
// to put some upper limit and allow the user to configure it.
// 2. we got an IndexOutOfRangeException which was triggered by some other
// code instead of the buffer[cursor++] - in this case we should give up
// at certain point rather than allocating like crazy.
if (bufferSize > 100 * 1024 * 1024)
{
throw;
}

var newBuffer = new byte[bufferSize];
buffer.CopyTo(newBuffer, 0);
buffer = newBuffer;
}
}
}

ctx.Response.OutputStream.Write(buffer, 0, cursor - 0);
return ExportResult.Success;
}
catch (Exception)
{
return ExportResult.Failure;
}
};

this.exporter.Collect(Timeout.Infinite);
this.exporter.OnExport = null;
}
catch (Exception ex)
{
PrometheusExporterEventSource.Log.FailedExport(ex);

ctx.Response.StatusCode = 500;
}

try
{
ctx.Response.Close();
}
catch
{
}
Task.Run(() => this.ProcessRequestAsync(ctx));
}
}
catch (OperationCanceledException ex)
Expand All @@ -218,5 +148,46 @@ private void WorkerProc()
}
}
}

private async Task ProcessRequestAsync(HttpListenerContext context)
{
try
{
var data = await this.exporter.CollectionManager.EnterCollect().ConfigureAwait(false);
try
{
if (data.Count > 0)
{
context.Response.StatusCode = 200;
context.Response.Headers.Add("Server", string.Empty);
context.Response.ContentType = "text/plain; charset=utf-8; version=0.0.4";

await context.Response.OutputStream.WriteAsync(data.Array, 0, data.Count).ConfigureAwait(false);
}
else
{
throw new InvalidOperationException("Collection failure.");
}
}
finally
{
this.exporter.CollectionManager.ExitCollect();
}
}
catch (Exception ex)
{
PrometheusExporterEventSource.Log.FailedExport(ex);

context.Response.StatusCode = 500;
}

try
{
context.Response.Close();
}
catch
{
}
}
}
}
Loading

0 comments on commit e871c27

Please sign in to comment.