Skip to content
This repository has been archived by the owner on Jul 30, 2024. It is now read-only.
/ NuGet.Jobs Public archive

Commit

Permalink
Reboot search instances returning 500 and 503
Browse files Browse the repository at this point in the history
  • Loading branch information
joelverhagen committed Nov 8, 2018
1 parent 53187a3 commit de8c1a0
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 1 deletion.
15 changes: 15 additions & 0 deletions src/Monitoring.RebootSearchInstance/SearchInstanceRebooter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System;
using System.Diagnostics;
using System.Linq;
using System.Net;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
Expand Down Expand Up @@ -220,6 +221,20 @@ private async Task<InstanceHealth> DetermineInstanceHealthAsync(
{
commitDateTime = await _searchServiceClient.GetCommitDateTimeAsync(instance, token);
}
catch (HttpResponseException ex) when (ex.StatusCode == HttpStatusCode.ServiceUnavailable
|| ex.StatusCode == HttpStatusCode.InternalServerError)
{
_logger.LogInformation(
(EventId)0,
ex,
"The HTTP response when hitting {DiagUrl} was {StatusCode} {ReasonPhrase}. Considering this " +
"instance as an unhealthy state.",
instance.DiagUrl,
(int)ex.StatusCode,
ex.ReasonPhrase);

return InstanceHealth.Unhealthy;
}
catch (Exception ex)
{
_logger.LogInformation(
Expand Down
21 changes: 21 additions & 0 deletions src/PackageLagMonitor/HttpResponseException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Net;

namespace NuGet.Jobs.Montoring.PackageLag
{
public class HttpResponseException : Exception
{
public HttpResponseException(HttpStatusCode statusCode, string reasonPhrase, string message)
: base(message)
{
StatusCode = statusCode;
ReasonPhrase = reasonPhrase;
}

public HttpStatusCode StatusCode { get; }
public string ReasonPhrase { get; }
}
}
1 change: 1 addition & 0 deletions src/PackageLagMonitor/Monitoring.PackageLag.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
</PropertyGroup>
<ItemGroup>
<Compile Include="AzureManagementAPIWrapperConfiguration.cs" />
<Compile Include="HttpResponseException.cs" />
<Compile Include="Instance.cs" />
<Compile Include="ISearchServiceClient.cs" />
<Compile Include="Job.cs" />
Expand Down
9 changes: 9 additions & 0 deletions src/PackageLagMonitor/SearchServiceClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ public async Task<DateTimeOffset> GetCommitDateTimeAsync(Instance instance, Canc
HttpCompletionOption.ResponseContentRead,
token))
{
if (!diagResponse.IsSuccessStatusCode)
{
throw new HttpResponseException(
diagResponse.StatusCode,
diagResponse.ReasonPhrase,
$"The HTTP response when hitting {instance.DiagUrl} was {(int)diagResponse.StatusCode} " +
$"{diagResponse.ReasonPhrase}, which is not successful.");
}

var diagContent = diagResponse.Content;
var searchDiagResultRaw = await diagContent.ReadAsStringAsync();
var searchDiagResultObject = JsonConvert.DeserializeObject<SearchDiagnosticResponse>(searchDiagResultRaw);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
Expand Down Expand Up @@ -169,7 +170,7 @@ public async Task RestartsFirstUnhealthyInstance()
}

[Fact]
public async Task TreatsExceptionWhenGettingCommitTimestampAsUnknown()
public async Task TreatsUnknownExceptionWhenGettingCommitTimestampAsUnknown()
{
_searchServiceClient
.SetupSequence(x => x.GetCommitDateTimeAsync(It.IsAny<Instance>(), It.IsAny<CancellationToken>()))
Expand All @@ -195,6 +196,74 @@ public async Task TreatsExceptionWhenGettingCommitTimestampAsUnknown()
_telemetryService.Verify(x => x.TrackInstanceCount(_region, 3), Times.Once);
}

[Theory]
[InlineData(HttpStatusCode.BadGateway)]
[InlineData(HttpStatusCode.NotFound)]
public async Task TreatsUnknownHttpStatusCodeExceptionWhenGettingCommitTimestampAsUnknown(HttpStatusCode statusCode)
{
_searchServiceClient
.SetupSequence(x => x.GetCommitDateTimeAsync(It.IsAny<Instance>(), It.IsAny<CancellationToken>()))
.ThrowsAsync(new HttpResponseException(statusCode, "Service Unavailable", "Some problem."))
.ReturnsAsync(DateTimeOffset.MaxValue)
.ReturnsAsync(DateTimeOffset.MaxValue);

await _target.RunAsync(_token);

_azureManagementAPIWrapper.Verify(
x => x.RebootCloudServiceRoleInstanceAsync(
It.IsAny<string>(),
It.IsAny<string>(),
It.IsAny<string>(),
It.IsAny<string>(),
It.IsAny<string>(),
It.IsAny<string>(),
It.IsAny<CancellationToken>()),
Times.Never);
_telemetryService.Verify(x => x.TrackHealthyInstanceCount(_region, 2), Times.Once);
_telemetryService.Verify(x => x.TrackUnhealthyInstanceCount(_region, 0), Times.Once);
_telemetryService.Verify(x => x.TrackUnknownInstanceCount(_region, 1), Times.Once);
_telemetryService.Verify(x => x.TrackInstanceCount(_region, 3), Times.Once);
}

[Theory]
[InlineData(HttpStatusCode.InternalServerError)]
[InlineData(HttpStatusCode.ServiceUnavailable)]
public async Task TreatsSome500sHttpResponseExceptionAsUnhealthy(HttpStatusCode statusCode)
{
_searchServiceClient
.SetupSequence(x => x.GetCommitDateTimeAsync(It.IsAny<Instance>(), It.IsAny<CancellationToken>()))
.ThrowsAsync(new HttpResponseException(statusCode, "Service Unavailable", "Some problem."))
.ReturnsAsync(DateTimeOffset.MaxValue)
.ReturnsAsync(DateTimeOffset.MaxValue);

await _target.RunAsync(_token);

_azureManagementAPIWrapper.Verify(
x => x.RebootCloudServiceRoleInstanceAsync(
_subscription,
_resourceGroup,
_serviceName,
"Production",
_role,
It.IsAny<string>(),
It.IsAny<CancellationToken>()),
Times.Once);
_azureManagementAPIWrapper.Verify(
x => x.RebootCloudServiceRoleInstanceAsync(
It.IsAny<string>(),
It.IsAny<string>(),
It.IsAny<string>(),
It.IsAny<string>(),
It.IsAny<string>(),
It.IsAny<string>(),
It.IsAny<CancellationToken>()),
Times.Once);
_telemetryService.Verify(x => x.TrackHealthyInstanceCount(_region, 2), Times.Once);
_telemetryService.Verify(x => x.TrackUnhealthyInstanceCount(_region, 1), Times.Once);
_telemetryService.Verify(x => x.TrackUnknownInstanceCount(_region, 0), Times.Once);
_telemetryService.Verify(x => x.TrackInstanceCount(_region, 3), Times.Once);
}

[Fact]
public async Task TreatsLagBetweenThresholdsAsUnknown()
{
Expand Down

0 comments on commit de8c1a0

Please sign in to comment.