From 4ee90dc15b6a6ef9fc83c265f96f4c7b0fd1db4f Mon Sep 17 00:00:00 2001 From: Joel Verhagen Date: Wed, 17 Jun 2020 14:56:53 -0700 Subject: [PATCH] Retry once on certain WebException codes (#785) Address https://github.com/NuGet/Engineering/issues/3212 Only added retry for WebException codes that were clearly transient in logs --- .../DependencyInjectionExtensions.cs | 44 ++++- .../NuGet.Services.AzureSearch.csproj | 1 + .../WebExceptionRetryDelegatingHandler.cs | 54 ++++++ .../Wrappers/IndexesOperationsWrapper.cs | 31 +++- .../Wrappers/SearchServiceClientWrapper.cs | 6 +- .../DependencyInjectionExtensionsFacts.cs | 161 ++++++++++++++++++ .../NuGet.Services.AzureSearch.Tests.csproj | 1 + .../NuGet.Services.V3.Tests.csproj | 1 + .../Support/TestHttpClientHandler.cs | 23 +++ 9 files changed, 317 insertions(+), 5 deletions(-) create mode 100644 src/NuGet.Services.AzureSearch/WebExceptionRetryDelegatingHandler.cs create mode 100644 tests/NuGet.Services.AzureSearch.Tests/DependencyInjectionExtensionsFacts.cs create mode 100644 tests/NuGet.Services.V3.Tests/Support/TestHttpClientHandler.cs diff --git a/src/NuGet.Services.AzureSearch/DependencyInjectionExtensions.cs b/src/NuGet.Services.AzureSearch/DependencyInjectionExtensions.cs index ff19bd972..0338d236d 100644 --- a/src/NuGet.Services.AzureSearch/DependencyInjectionExtensions.cs +++ b/src/NuGet.Services.AzureSearch/DependencyInjectionExtensions.cs @@ -10,6 +10,7 @@ using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using Microsoft.Rest; +using Microsoft.Rest.TransientFaultHandling; using Microsoft.WindowsAzure.Storage; using NuGet.Protocol; using NuGet.Protocol.Catalog; @@ -231,13 +232,26 @@ public static IServiceCollection AddAzureSearch( services.AddFeatureFlags(); services.AddTransient(); + services.AddTransient(p => new SearchServiceClientWrapper( + p.GetRequiredService(), + GetSearchDelegatingHandlers(p.GetRequiredService()), + GetSearchRetryPolicy(), + p.GetRequiredService>())); + services .AddTransient(p => { var options = p.GetRequiredService>(); - return new SearchServiceClient( + + var client = new SearchServiceClient( options.Value.SearchServiceName, - new SearchCredentials(options.Value.SearchServiceApiKey)); + new SearchCredentials(options.Value.SearchServiceApiKey), + new WebRequestHandler(), + GetSearchDelegatingHandlers(p.GetRequiredService())); + + client.SetRetryPolicy(GetSearchRetryPolicy()); + + return client; }); services.AddSingleton(); @@ -276,12 +290,36 @@ public static IServiceCollection AddAzureSearch( services.AddTransient(); services.AddTransient(); services.AddTransient(); - services.AddTransient(); services.AddTransient(); services.AddTransient(); services.AddTransient(); return services; } + + /// + /// Defaults originally taken from: + /// https://github.com/Azure/azure-sdk-for-net/blob/96421089bc26198098f320ea50e0208e98376956/sdk/mgmtcommon/ClientRuntime/ClientRuntime/RetryDelegatingHandler.cs#L19-L22 + /// + /// Note that this policy only applied to the automatically initialized by + /// the Azure Search SDK. This policy does not apply to . + /// + private static RetryPolicy GetSearchRetryPolicy() + { + return new RetryPolicy( + new HttpStatusCodeErrorDetectionStrategy(), + retryCount: 3, + minBackoff: TimeSpan.FromSeconds(1), + maxBackoff: TimeSpan.FromSeconds(10), + deltaBackoff: TimeSpan.FromSeconds(10)); + } + + public static DelegatingHandler[] GetSearchDelegatingHandlers(ILoggerFactory loggerFactory) + { + return new[] + { + new WebExceptionRetryDelegatingHandler(loggerFactory.CreateLogger()), + }; + } } } diff --git a/src/NuGet.Services.AzureSearch/NuGet.Services.AzureSearch.csproj b/src/NuGet.Services.AzureSearch/NuGet.Services.AzureSearch.csproj index 2747e1b9a..efdd2d396 100644 --- a/src/NuGet.Services.AzureSearch/NuGet.Services.AzureSearch.csproj +++ b/src/NuGet.Services.AzureSearch/NuGet.Services.AzureSearch.csproj @@ -228,6 +228,7 @@ + diff --git a/src/NuGet.Services.AzureSearch/WebExceptionRetryDelegatingHandler.cs b/src/NuGet.Services.AzureSearch/WebExceptionRetryDelegatingHandler.cs new file mode 100644 index 000000000..2fd237890 --- /dev/null +++ b/src/NuGet.Services.AzureSearch/WebExceptionRetryDelegatingHandler.cs @@ -0,0 +1,54 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections.Generic; +using System.Net; +using System.Net.Http; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; + +namespace NuGet.Services.AzureSearch +{ + public class WebExceptionRetryDelegatingHandler : DelegatingHandler + { + private static readonly HashSet _transientWebExceptionStatuses = new HashSet(new[] + { + WebExceptionStatus.ConnectFailure, // Unable to connect to the remote server + WebExceptionStatus.ConnectionClosed, // The underlying connection was closed + WebExceptionStatus.KeepAliveFailure, // A connection that was expected to be kept alive was closed by the server + WebExceptionStatus.ReceiveFailure, // An unexpected error occurred on a receive + }); + + private readonly ILogger _logger; + + public WebExceptionRetryDelegatingHandler(ILogger logger) + { + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } + + protected override async Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + { + try + { + return await base.SendAsync(request, cancellationToken); + } + catch (Exception ex) when (ex is HttpRequestException hre && hre.InnerException is WebException we) + { + if (_transientWebExceptionStatuses.Contains(we.Status)) + { + // Retry only a single time since some of these transient exceptions take a while (~20 seconds) to be + // thrown and we don't want to make the user wait too long even to see a failure. + _logger.LogWarning(ex, "Transient web exception encountered, status {Status}. Attempting a single retry.", we.Status); + return await base.SendAsync(request, cancellationToken); + } + else + { + _logger.LogError(ex, "Non-transient web exception encountered, status {Status}.", we.Status); + throw; + } + } + } + } +} diff --git a/src/NuGet.Services.AzureSearch/Wrappers/IndexesOperationsWrapper.cs b/src/NuGet.Services.AzureSearch/Wrappers/IndexesOperationsWrapper.cs index 99576df67..af1a56bb8 100644 --- a/src/NuGet.Services.AzureSearch/Wrappers/IndexesOperationsWrapper.cs +++ b/src/NuGet.Services.AzureSearch/Wrappers/IndexesOperationsWrapper.cs @@ -2,29 +2,58 @@ // Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. using System; +using System.Linq; +using System.Net.Http; using System.Threading.Tasks; using Microsoft.Azure.Search; using Microsoft.Azure.Search.Models; using Microsoft.Extensions.Logging; +using Microsoft.Rest.TransientFaultHandling; namespace NuGet.Services.AzureSearch.Wrappers { public class IndexesOperationsWrapper : IIndexesOperationsWrapper { private readonly IIndexesOperations _inner; + private readonly DelegatingHandler[] _handlers; + private readonly RetryPolicy _retryPolicy; private readonly ILogger _documentsOperationsLogger; public IndexesOperationsWrapper( IIndexesOperations inner, + DelegatingHandler[] handlers, + RetryPolicy retryPolicy, ILogger documentsOperationsLogger) { _inner = inner ?? throw new ArgumentNullException(nameof(inner)); + _handlers = handlers ?? throw new ArgumentNullException(nameof(handlers)); + _retryPolicy = retryPolicy; _documentsOperationsLogger = documentsOperationsLogger ?? throw new ArgumentNullException(nameof(documentsOperationsLogger)); } + /// + /// This is implemented in lieu of + /// because it allows the delegating handlers and retry policy to be specified. See: + /// https://github.com/Azure/azure-sdk-for-net/blob/96421089bc26198098f320ea50e0208e98376956/sdk/search/Microsoft.Azure.Search/src/IndexesGetClientExtensions.cs#L27-L41 + /// public ISearchIndexClientWrapper GetClient(string indexName) { - return new SearchIndexClientWrapper(_inner.GetClient(indexName), _documentsOperationsLogger); + var searchIndexClient = new SearchIndexClient( + _inner.Client.SearchServiceName, + indexName, + _inner.Client.SearchCredentials, + _inner.Client.HttpMessageHandlers.OfType().SingleOrDefault(), + _handlers); + + searchIndexClient.SearchDnsSuffix = _inner.Client.SearchDnsSuffix; + searchIndexClient.HttpClient.Timeout = _inner.Client.HttpClient.Timeout; + + if (_retryPolicy != null) + { + searchIndexClient.SetRetryPolicy(_retryPolicy); + } + + return new SearchIndexClientWrapper(searchIndexClient, _documentsOperationsLogger); } public async Task ExistsAsync(string indexName) diff --git a/src/NuGet.Services.AzureSearch/Wrappers/SearchServiceClientWrapper.cs b/src/NuGet.Services.AzureSearch/Wrappers/SearchServiceClientWrapper.cs index d760e2f05..e926a4b53 100644 --- a/src/NuGet.Services.AzureSearch/Wrappers/SearchServiceClientWrapper.cs +++ b/src/NuGet.Services.AzureSearch/Wrappers/SearchServiceClientWrapper.cs @@ -2,8 +2,10 @@ // Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. using System; +using System.Net.Http; using Microsoft.Azure.Search; using Microsoft.Extensions.Logging; +using Microsoft.Rest.TransientFaultHandling; namespace NuGet.Services.AzureSearch.Wrappers { @@ -13,10 +15,12 @@ public class SearchServiceClientWrapper : ISearchServiceClientWrapper public SearchServiceClientWrapper( ISearchServiceClient inner, + DelegatingHandler[] handlers, + RetryPolicy retryPolicy, ILogger documentsOperationsLogger) { _inner = inner ?? throw new ArgumentNullException(nameof(inner)); - Indexes = new IndexesOperationsWrapper(_inner.Indexes, documentsOperationsLogger); + Indexes = new IndexesOperationsWrapper(_inner.Indexes, handlers, retryPolicy, documentsOperationsLogger); } public IIndexesOperationsWrapper Indexes { get; } diff --git a/tests/NuGet.Services.AzureSearch.Tests/DependencyInjectionExtensionsFacts.cs b/tests/NuGet.Services.AzureSearch.Tests/DependencyInjectionExtensionsFacts.cs new file mode 100644 index 000000000..df79e5360 --- /dev/null +++ b/tests/NuGet.Services.AzureSearch.Tests/DependencyInjectionExtensionsFacts.cs @@ -0,0 +1,161 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net; +using System.Net.Http; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Azure.Search; +using Microsoft.Extensions.Logging; +using Microsoft.Rest.TransientFaultHandling; +using Moq; +using Moq.Language.Flow; +using NuGet.Services.AzureSearch.Wrappers; +using Xunit; +using Xunit.Abstractions; + +namespace NuGet.Services.AzureSearch +{ + public class DependencyInjectionExtensionsFacts + { + public class TheGetRetryPolicyMethod + { + public TheGetRetryPolicyMethod(ITestOutputHelper output) + { + LoggerFactory = new LoggerFactory().AddXunit(output); + HttpClientHandler = new Mock { CallBase = true }; + + SearchServiceClient = new SearchServiceClient( + "test-search-service", + new SearchCredentials("api-key"), + HttpClientHandler.Object, + DependencyInjectionExtensions.GetSearchDelegatingHandlers(LoggerFactory)); + SearchServiceClient.SetRetryPolicy(SingleRetry); + + IndexesOperationsWrapper = new IndexesOperationsWrapper( + SearchServiceClient.Indexes, + DependencyInjectionExtensions.GetSearchDelegatingHandlers(LoggerFactory), + SingleRetry, + LoggerFactory.CreateLogger()); + } + + [Theory] + [MemberData(nameof(NonTransientTestData))] + public async Task DoesNotRetryNonTransientErrorsForIndexOperations(Action>> setup) + { + setup(HttpClientHandler.Setup(x => x.OnSendAsync(It.IsAny(), It.IsAny()))); + + await Assert.ThrowsAnyAsync(() => SearchServiceClient.Indexes.ListAsync()); + + VerifyAttemptCount(1); + } + + [Theory] + [MemberData(nameof(NonTransientTestData))] + public async Task DoesNotRetryNonTransientErrorsForDocumentOperations(Action>> setup) + { + setup(HttpClientHandler.Setup(x => x.OnSendAsync(It.IsAny(), It.IsAny()))); + + await Assert.ThrowsAnyAsync(() => IndexesOperationsWrapper.GetClient("test-index").Documents.CountAsync()); + + VerifyAttemptCount(1); + } + + [Theory] + [MemberData(nameof(TransientTestData))] + public async Task RetriesTransientErrorsForIndexOperations(Action>> setup) + { + setup(HttpClientHandler.Setup(x => x.OnSendAsync(It.IsAny(), It.IsAny()))); + + await Assert.ThrowsAnyAsync(() => SearchServiceClient.Indexes.ListAsync()); + + VerifyAttemptCount(2); + } + + [Theory] + [MemberData(nameof(TransientTestData))] + public async Task RetriesTransientErrorsForDocumentOperations(Action>> setup) + { + setup(HttpClientHandler.Setup(x => x.OnSendAsync(It.IsAny(), It.IsAny()))); + + await Assert.ThrowsAnyAsync(() => IndexesOperationsWrapper.GetClient("test-index").Documents.CountAsync()); + + VerifyAttemptCount(2); + } + + private void VerifyAttemptCount(int count) + { + HttpClientHandler.Verify( + x => x.OnSendAsync(It.IsAny(), It.IsAny()), + Times.Exactly(count)); + } + + public static IEnumerable TransientHttpStatusCodes => new[] + { + HttpStatusCode.RequestTimeout, + (HttpStatusCode)429, + HttpStatusCode.InternalServerError, + HttpStatusCode.BadGateway, + HttpStatusCode.InternalServerError, + HttpStatusCode.GatewayTimeout, + }; + + public static IEnumerable TransientWebExceptionStatuses => new[] + { + WebExceptionStatus.ConnectFailure, + WebExceptionStatus.ConnectionClosed, + WebExceptionStatus.KeepAliveFailure, + WebExceptionStatus.ReceiveFailure, + }; + + public static IEnumerable TransientTestData => GetTestData(TransientHttpStatusCodes, TransientWebExceptionStatuses); + + public static IEnumerable NonTransientHttpStatusCodes => new[] + { + HttpStatusCode.BadRequest, + HttpStatusCode.Unauthorized, + HttpStatusCode.Forbidden, + HttpStatusCode.NotFound, + HttpStatusCode.Conflict, + HttpStatusCode.NotImplemented, + }; + + public static IEnumerable NonTransientWebExceptionStatuses => new[] + { + WebExceptionStatus.TrustFailure, + WebExceptionStatus.NameResolutionFailure, + }; + + public static IEnumerable NonTransientTestData => GetTestData(NonTransientHttpStatusCodes, NonTransientWebExceptionStatuses); + + private static IEnumerable GetTestData(IEnumerable statusCodes, IEnumerable webExceptionStatuses) + { + var setups = new List>>>(); + + foreach (var statusCode in statusCodes) + { + setups.Add(s => s.ReturnsAsync(() => new HttpResponseMessage(statusCode) { Content = new StringContent(string.Empty) })); + } + + foreach (var webExceptionStatus in webExceptionStatuses) + { + setups.Add(s => s.ThrowsAsync(new HttpRequestException("Fail.", new WebException("Inner fail.", webExceptionStatus)))); + } + + return setups.Select(x => new object[] { x }); + } + + public RetryPolicy SingleRetry => new RetryPolicy( + new HttpStatusCodeErrorDetectionStrategy(), + new FixedIntervalRetryStrategy(retryCount: 1, retryInterval: TimeSpan.Zero)); + + public ILoggerFactory LoggerFactory { get; } + public Mock HttpClientHandler { get; } + public SearchServiceClient SearchServiceClient { get; } + public IndexesOperationsWrapper IndexesOperationsWrapper { get; } + } + } +} diff --git a/tests/NuGet.Services.AzureSearch.Tests/NuGet.Services.AzureSearch.Tests.csproj b/tests/NuGet.Services.AzureSearch.Tests/NuGet.Services.AzureSearch.Tests.csproj index b767ddc03..b9e98594a 100644 --- a/tests/NuGet.Services.AzureSearch.Tests/NuGet.Services.AzureSearch.Tests.csproj +++ b/tests/NuGet.Services.AzureSearch.Tests/NuGet.Services.AzureSearch.Tests.csproj @@ -68,6 +68,7 @@ + diff --git a/tests/NuGet.Services.V3.Tests/NuGet.Services.V3.Tests.csproj b/tests/NuGet.Services.V3.Tests/NuGet.Services.V3.Tests.csproj index e11fb0e26..0d1ab6f60 100644 --- a/tests/NuGet.Services.V3.Tests/NuGet.Services.V3.Tests.csproj +++ b/tests/NuGet.Services.V3.Tests/NuGet.Services.V3.Tests.csproj @@ -46,6 +46,7 @@ + diff --git a/tests/NuGet.Services.V3.Tests/Support/TestHttpClientHandler.cs b/tests/NuGet.Services.V3.Tests/Support/TestHttpClientHandler.cs new file mode 100644 index 000000000..af995dd17 --- /dev/null +++ b/tests/NuGet.Services.V3.Tests/Support/TestHttpClientHandler.cs @@ -0,0 +1,23 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Net.Http; +using System.Threading; +using System.Threading.Tasks; + +namespace NuGet.Services +{ + public class TestHttpClientHandler : HttpClientHandler + { + public virtual Task OnSendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + { + throw new NotImplementedException(); + } + + protected override async Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + { + return await OnSendAsync(request, cancellationToken); + } + } +}