Skip to content
This repository has been archived by the owner on Jul 30, 2024. It is now read-only.
/ NuGet.Jobs Public archive

Commit

Permalink
Add registration comparer to compare two registration hives (#729)
Browse files Browse the repository at this point in the history
  • Loading branch information
joelverhagen committed Jan 7, 2020
1 parent 68f7b9a commit 69bdcc7
Show file tree
Hide file tree
Showing 14 changed files with 1,142 additions and 5 deletions.
25 changes: 21 additions & 4 deletions src/Catalog/HttpReadCursor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

using System;
using System.Diagnostics;
using System.Net;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
Expand All @@ -13,11 +14,20 @@ namespace NuGet.Services.Metadata.Catalog
public class HttpReadCursor : ReadCursor
{
private readonly Uri _address;
private readonly DateTime? _defaultValue;
private readonly Func<HttpMessageHandler> _handlerFunc;

public HttpReadCursor(Uri address, DateTime defaultValue, Func<HttpMessageHandler> handlerFunc = null)
{
_address = address;
_defaultValue = defaultValue;
_handlerFunc = handlerFunc;
}

public HttpReadCursor(Uri address, Func<HttpMessageHandler> handlerFunc = null)
{
_address = address;
_defaultValue = null;
_handlerFunc = handlerFunc;
}

Expand All @@ -30,12 +40,19 @@ public override async Task LoadAsync(CancellationToken cancellationToken)
{
Trace.TraceInformation("HttpReadCursor.Load {0}", response.StatusCode);

response.EnsureSuccessStatusCode();
if (_defaultValue != null && response.StatusCode == HttpStatusCode.NotFound)
{
Value = _defaultValue.Value;
}
else
{
response.EnsureSuccessStatusCode();

string json = await response.Content.ReadAsStringAsync();
string json = await response.Content.ReadAsStringAsync();

JObject obj = JObject.Parse(json);
Value = obj["value"].ToObject<DateTime>();
JObject obj = JObject.Parse(json);
Value = obj["value"].ToObject<DateTime>();
}
}

Trace.TraceInformation("HttpReadCursor.Load: {0}", this);
Expand Down
31 changes: 31 additions & 0 deletions src/NuGet.Jobs.RegistrationComparer/ComparisonContext.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

namespace NuGet.Jobs.RegistrationComparer
{
public class ComparisonContext
{
public ComparisonContext(
string packageId,
string leftBaseUrl,
string rightBaseUrl,
string leftUrl,
string rightUrl,
Normalizers normalizers)
{
PackageId = packageId;
LeftBaseUrl = leftBaseUrl;
RightBaseUrl = rightBaseUrl;
LeftUrl = leftUrl;
RightUrl = rightUrl;
Normalizers = normalizers;
}

public string PackageId { get; }
public string LeftBaseUrl { get; }
public string RightBaseUrl { get; }
public string LeftUrl { get; }
public string RightUrl { get; }
public Normalizers Normalizers { get; }
}
}
43 changes: 43 additions & 0 deletions src/NuGet.Jobs.RegistrationComparer/CursorUtility.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Collections.Generic;
using System.Net.Http;
using Microsoft.Extensions.Options;
using NuGet.Services.Metadata.Catalog;
using NuGet.Services.Metadata.Catalog.Persistence;

namespace NuGet.Jobs.RegistrationComparer
{
public static class CursorUtility
{
public static Dictionary<string, ReadCursor> GetRegistrationCursors(
Func<HttpMessageHandler> handlerFunc,
IOptionsSnapshot<RegistrationComparerConfiguration> options)
{
var hiveCursors = new Dictionary<string, ReadCursor>();
foreach (var hives in options.Value.Registrations)
{
var cursorUrl = new Uri(hives.LegacyBaseUrl.TrimEnd('/') + "/cursor.json");
hiveCursors.Add(cursorUrl.AbsoluteUri, new HttpReadCursor(cursorUrl, DateTime.MinValue, handlerFunc));
}

return hiveCursors;
}

public static KeyValuePair<string, DurableCursor> GetComparerCursor(IStorageFactory storageFactory)
{
return GetDurableCursor(storageFactory, "comparer-cursor.json");
}

private static KeyValuePair<string, DurableCursor> GetDurableCursor(IStorageFactory storageFactory, string name)
{
var cursorStorage = storageFactory.Create();
var cursorUri = cursorStorage.ResolveUri(name);
return new KeyValuePair<string, DurableCursor>(
cursorUri.AbsoluteUri,
new DurableCursor(cursorUri, cursorStorage, DateTime.MinValue));
}
}
}
244 changes: 244 additions & 0 deletions src/NuGet.Jobs.RegistrationComparer/HiveComparer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using NuGet.Protocol.Catalog;
using NuGet.Protocol.Registration;
using NuGet.Services.Metadata.Catalog.Helpers;

namespace NuGet.Jobs.RegistrationComparer
{
public class HiveComparer
{
private readonly HttpClient _httpClient;
private readonly JsonComparer _comparer;
private readonly ILogger<HiveComparer> _logger;

public HiveComparer(
HttpClient httpClient,
JsonComparer comparer,
ILogger<HiveComparer> logger)
{
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
_comparer = comparer ?? throw new ArgumentNullException(nameof(comparer));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}

public async Task CompareAsync(
IReadOnlyList<string> baseUrls,
string id,
IReadOnlyList<string> versions)
{
if (baseUrls.Count <= 1)
{
throw new ArgumentException("At least two base URLs must be provided.", nameof(baseUrls));
}

// Compare the indexes.
var rawIndexes = await Task.WhenAll(baseUrls.Select(x => GetIndexAsync(x, id)));
var areBothMissing = false;
for (var i = 1; i < baseUrls.Count; i++)
{
if (AreBothMissing(
rawIndexes[i - 1].Url,
rawIndexes[i].Url,
rawIndexes[i - 1].Data,
rawIndexes[i].Data))
{
areBothMissing = true;
continue;
}

var comparisonContext = new ComparisonContext(
id,
baseUrls[i - 1],
baseUrls[i],
rawIndexes[i - 1].Url,
rawIndexes[i].Url,
Normalizers.Index);

_comparer.Compare(
rawIndexes[i - 1].Data,
rawIndexes[i].Data,
comparisonContext);
}

if (areBothMissing)
{
return;
}

// Deserialize the indexes so we can get the page URLs.
var indexes = new List<DownloadedData<RegistrationIndex>>();
foreach (var rawIndex in rawIndexes)
{
indexes.Add(new DownloadedData<RegistrationIndex>(
rawIndex.Url,
rawIndex.Data.ToObject<RegistrationIndex>(NuGetJsonSerialization.Serializer)));
}

// Download the pages (if any) and leaves.
var pageUrlGroups = indexes
.Select((x, i) => x
.Data
.Items
.Where(p => p.Items == null)
.Select(p => p.Url)
.ToList())
.ToList();
var leafUrlGroups = baseUrls
.Select(x => versions.Select(v => $"{x}{id}/{v}.json").ToList())
.ToList();

var urls = new ConcurrentBag<string>(pageUrlGroups
.SelectMany(x => x)
.Concat(leafUrlGroups.SelectMany(x => x)));
var urlToJson = new ConcurrentDictionary<string, JObject>();
await ParallelAsync.Repeat(
async () =>
{
await Task.Yield();
while (urls.TryTake(out var pageUrl))
{
var json = await GetJObjectOrNullAsync(pageUrl);
urlToJson.TryAdd(pageUrl, json.Data);
}
});

// Compare the pages.
for (var i = 1; i < baseUrls.Count; i++)
{
for (var pageIndex = 0; pageIndex < pageUrlGroups[i].Count; pageIndex++)
{
var leftUrl = pageUrlGroups[i - 1][pageIndex];
var rightUrl = pageUrlGroups[i][pageIndex];

var comparisonContext = new ComparisonContext(
id,
baseUrls[i - 1],
baseUrls[i],
leftUrl,
rightUrl,
Normalizers.Page);

_comparer.Compare(
urlToJson[leftUrl],
urlToJson[rightUrl],
comparisonContext);
}
}

// Compare the affected leaves.
for (var i = 1; i < baseUrls.Count; i++)
{
for (var leafIndex = 0; leafIndex < leafUrlGroups[i].Count; leafIndex++)
{
var leftUrl = leafUrlGroups[i - 1][leafIndex];
var rightUrl = leafUrlGroups[i][leafIndex];

try
{
if (AreBothMissing(
leftUrl,
rightUrl,
urlToJson[leftUrl],
urlToJson[rightUrl]))
{
continue;
}
}
catch (InvalidOperationException ex)
{
ResultWriter.WriteWarning(ex.Message);
continue;
}

var comparisonContext = new ComparisonContext(
id,
baseUrls[i - 1],
baseUrls[i],
leftUrl,
rightUrl,
Normalizers.Leaf);

_comparer.Compare(
urlToJson[leftUrl],
urlToJson[rightUrl],
comparisonContext);
}
}
}

private bool AreBothMissing(string leftUrl, string rightUrl, JObject left, JObject right)
{
if ((left == null) != (right == null))
{
throw new InvalidOperationException(Environment.NewLine +
$"One of the URLs exists, the other does not." + Environment.NewLine +
$"| Left URL: {leftUrl}" + Environment.NewLine +
$"| Right URL: {rightUrl}" + Environment.NewLine +
$"| Left is 404: {left == null}" + Environment.NewLine +
$"| Right is 404: {right == null}" + Environment.NewLine);
}

return left == null;
}

private async Task<DownloadedData<JObject>> GetIndexAsync(string baseUrl, string id)
{
var url = $"{baseUrl}{id}/index.json";
return await GetJObjectOrNullAsync(url);
}

private async Task<DownloadedData<JObject>> GetJObjectOrNullAsync(string url)
{
using (var response = await _httpClient.GetAsync(url))
{
_logger.LogInformation(
"Fetched {Url}: {StatusCode} {ReasonPhrase}",
url,
(int)response.StatusCode,
response.ReasonPhrase);

if (response.StatusCode == HttpStatusCode.NotFound)
{
return new DownloadedData<JObject>(url, null);
}

response.EnsureSuccessStatusCode();

using (var stream = await _httpClient.GetStreamAsync(url))
using (var streamReader = new StreamReader(stream))
using (var jsonTextReader = new JsonTextReader(streamReader))
{
jsonTextReader.DateParseHandling = DateParseHandling.None;

var data = JObject.Load(jsonTextReader);
return new DownloadedData<JObject>(url, data);
}
}
}

private class DownloadedData<T>
{
public DownloadedData(string url, T data)
{
Url = url;
Data = data;
}

public string Url { get; }
public T Data { get; }
}
}
}
12 changes: 12 additions & 0 deletions src/NuGet.Jobs.RegistrationComparer/HivesConfiguration.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

namespace NuGet.Jobs.RegistrationComparer
{
public class HivesConfiguration
{
public string LegacyBaseUrl { get; set; }
public string GzippedBaseUrl { get; set; }
public string SemVer2BaseUrl { get; set; }
}
}
Loading

0 comments on commit 69bdcc7

Please sign in to comment.