From 010687cde0210d5b4f9fb000f6e4144dd3cef6a0 Mon Sep 17 00:00:00 2001 From: Loic Sharma Date: Mon, 13 Apr 2020 12:47:37 -0700 Subject: [PATCH] Refactor download overrides --- .../Auxiliary2AzureSearch/DataSetComparer.cs | 145 +++++++ ...wnerSetComparer.cs => IDataSetComparer.cs} | 18 +- .../Auxiliary2AzureSearch/OwnerSetComparer.cs | 111 ----- .../UpdateDownloadsCommand.cs | 56 ++- .../UpdateOwnersCommand.cs | 6 +- .../AuxiliaryFiles/DownloadDataExtensions.cs | 90 ---- .../IPopularityTransferDataClient.cs | 36 ++ .../PopularityTransferDataClient.cs | 139 ++++++ .../AzureSearchTelemetryService.cs | 45 ++ .../DatabaseAuxiliaryDataFetcher.cs | 61 +++ .../Db2AzureSearch/Db2AzureSearchCommand.cs | 15 + .../Db2AzureSearch/InitialAuxiliaryData.cs | 5 +- .../NewPackageRegistrationProducer.cs | 46 +- .../DependencyInjectionExtensions.cs | 11 +- .../DownloadTransferResult.cs | 40 ++ .../DownloadTransferrer.cs | 119 ++++++ .../IAzureSearchTelemetryService.cs | 4 + .../IDatabaseAuxiliaryDataFetcher.cs | 6 + .../IDownloadTransferrer.cs | 34 ++ .../NuGet.Services.AzureSearch.csproj | 11 +- .../PackageIdToPopularityTransfersBuilder.cs | 89 ++++ .../DataSetComparerFacts.cs | 404 ++++++++++++++++++ .../OwnerSetComparerFacts.cs | 161 ------- .../UpdateDownloadsCommandFacts.cs | 156 ++----- .../UpdateOwnersCommandFacts.cs | 10 +- .../AuxiliaryFiles/OwnerDataClientFacts.cs | 104 +++-- .../PopularityTransferDataClientFacts.cs | 370 ++++++++++++++++ .../Db2AzureSearchCommandFacts.cs | 6 +- .../NewPackageRegistrationProducerFacts.cs | 104 +---- .../NuGet.Services.AzureSearch.Tests.csproj | 3 +- .../NuGet.Services.V3.Tests.csproj | 1 + .../Support/RecordingStream.cs | 33 ++ 32 files changed, 1807 insertions(+), 632 deletions(-) create mode 100644 src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/DataSetComparer.cs rename src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/{IOwnerSetComparer.cs => IDataSetComparer.cs} (53%) delete mode 100644 src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/OwnerSetComparer.cs delete mode 100644 src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadDataExtensions.cs create mode 100644 src/NuGet.Services.AzureSearch/AuxiliaryFiles/IPopularityTransferDataClient.cs create mode 100644 src/NuGet.Services.AzureSearch/AuxiliaryFiles/PopularityTransferDataClient.cs create mode 100644 src/NuGet.Services.AzureSearch/DownloadTransferResult.cs create mode 100644 src/NuGet.Services.AzureSearch/DownloadTransferrer.cs create mode 100644 src/NuGet.Services.AzureSearch/IDownloadTransferrer.cs create mode 100644 src/NuGet.Services.AzureSearch/PackageIdToPopularityTransfersBuilder.cs create mode 100644 tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/DataSetComparerFacts.cs delete mode 100644 tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/OwnerSetComparerFacts.cs create mode 100644 tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/PopularityTransferDataClientFacts.cs create mode 100644 tests/NuGet.Services.V3.Tests/Support/RecordingStream.cs diff --git a/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/DataSetComparer.cs b/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/DataSetComparer.cs new file mode 100644 index 000000000..c78519261 --- /dev/null +++ b/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/DataSetComparer.cs @@ -0,0 +1,145 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using Microsoft.Extensions.Logging; + +namespace NuGet.Services.AzureSearch.Auxiliary2AzureSearch +{ + public class DataSetComparer : IDataSetComparer + { + private static readonly string[] EmptyStringArray = new string[0]; + + private readonly IAzureSearchTelemetryService _telemetryService; + private readonly ILogger _logger; + + public DataSetComparer( + IAzureSearchTelemetryService telemetryService, + ILogger logger) + { + _telemetryService = telemetryService ?? throw new ArgumentNullException(nameof(telemetryService)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } + + public SortedDictionary CompareOwners( + SortedDictionary> oldData, + SortedDictionary> newData) + { + // Use ordinal comparison to allow username case changes to flow through. + var stopwatch = Stopwatch.StartNew(); + var result = CompareData( + oldData, + newData, + "package ID", + "owners", + StringComparer.Ordinal); + + stopwatch.Stop(); + _telemetryService.TrackOwnerSetComparison(oldData.Count, newData.Count, result.Count, stopwatch.Elapsed); + + return result; + } + + public SortedDictionary ComparePopularityTransfers( + SortedDictionary> oldData, + SortedDictionary> newData) + { + // Ignore case changes in popularity transfers. + var stopwatch = Stopwatch.StartNew(); + var result = CompareData( + oldData, + newData, + "package ID", + "popularity transfers", + StringComparer.OrdinalIgnoreCase); + + stopwatch.Stop(); + _telemetryService.TrackPopularityTransfersSetComparison(oldData.Count, newData.Count, result.Count, stopwatch.Elapsed); + + return result; + } + + private SortedDictionary CompareData( + SortedDictionary> oldData, + SortedDictionary> newData, + string keyName, + string valuesName, + StringComparer valuesComparer) + { + if (oldData.Comparer != StringComparer.OrdinalIgnoreCase) + { + throw new ArgumentException("The old data should have a case-insensitive comparer.", nameof(oldData)); + } + + if (newData.Comparer != StringComparer.OrdinalIgnoreCase) + { + throw new ArgumentException("The new data should have a case-insensitive comparer.", nameof(newData)); + } + + // We use a very simplistic algorithm here. Perform one pass on the new data to find the added or changed + // values. Then perform a second pass on the old data to find removed keys. We can optimize + // this later if necessary. + // + // On the "changed" case, we emit all of the values instead of the delta. This is because Azure Search + // does not have a way to append or remove a specific item from a field that is an array. + // The entire new array needs to be provided. + var result = new SortedDictionary(StringComparer.OrdinalIgnoreCase); + + // First pass: find added or changed sets. + foreach (var pair in newData) + { + var key = pair.Key; + var newValues = pair.Value; + if (!oldData.TryGetValue(key, out var oldValues)) + { + // ADDED: The key does not exist in the old data, which means the key was added. + result.Add(key, newValues.ToArray()); + _logger.LogInformation( + $"The {keyName} {{Key}} has been added, with {{AddedCount}} {valuesName}.", + key, + newValues.Count); + } + else + { + // The key exists in the old data. We need to check if the values set has changed. + var removedValues = oldValues.Except(newValues, valuesComparer).ToList(); + var addedValues = newValues.Except(oldValues, valuesComparer).ToList(); + + if (removedValues.Any() || addedValues.Any()) + { + // CHANGED: The values set has changed. + result.Add(key, newValues.ToArray()); + _logger.LogInformation( + $"The {keyName} {{Key}} {valuesName} have changed, with {{RemovedCount}} {valuesName} removed and " + + $"{{AddedCount}} {valuesName} added.", + key, + removedValues.Count, + addedValues.Count); + } + } + } + + // Second pass: find removed sets. + foreach (var pair in oldData) + { + var key = pair.Key; + var oldValues = pair.Value; + + if (!newData.TryGetValue(key, out var newValues)) + { + // REMOVED: The key does not exist in the new data, which means the key was removed. + result.Add(key, EmptyStringArray); + _logger.LogInformation( + $"The {keyName} {{Key}} has been removed, with {{RemovedCount}} {valuesName}", + key, + oldValues.Count); + } + } + + return result; + } + } +} diff --git a/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/IOwnerSetComparer.cs b/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/IDataSetComparer.cs similarity index 53% rename from src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/IOwnerSetComparer.cs rename to src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/IDataSetComparer.cs index 038da5948..72c657e3f 100644 --- a/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/IOwnerSetComparer.cs +++ b/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/IDataSetComparer.cs @@ -6,9 +6,9 @@ namespace NuGet.Services.AzureSearch.Auxiliary2AzureSearch { /// - /// Used to compare two sets of owners to determine the changes. + /// Used to compare two sets of data to determine the changes. /// - public interface IOwnerSetComparer + public interface IDataSetComparer { /// /// Compares two sets of owners to determine the package IDs that have changed. The returned dictionary @@ -19,7 +19,19 @@ public interface IOwnerSetComparer /// /// The old owner information, typically from storage. /// The new owner information, typically from gallery DB. - SortedDictionary Compare( + SortedDictionary CompareOwners( + SortedDictionary> oldData, + SortedDictionary> newData); + + /// + /// Compares two sets of popularity transfers to determine changes. The two inputs are maps of package IDs that transfer + /// popularity away to package IDs that receive the popularity. The returned dictionary is subset of these inputs that + /// were added, removed, or changed. For the "added" and "changed" cases, the popularity transfer set is the new data. + /// For the "removed" case, the set is empty. + /// + /// The old popularity transfers, typically from storage. + /// The new popularity transfers, typically from gallery DB. + SortedDictionary ComparePopularityTransfers( SortedDictionary> oldData, SortedDictionary> newData); } diff --git a/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/OwnerSetComparer.cs b/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/OwnerSetComparer.cs deleted file mode 100644 index 8cef67759..000000000 --- a/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/OwnerSetComparer.cs +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; -using Microsoft.Extensions.Logging; - -namespace NuGet.Services.AzureSearch.Auxiliary2AzureSearch -{ - public class OwnerSetComparer : IOwnerSetComparer - { - private static readonly string[] EmptyStringArray = new string[0]; - - private readonly IAzureSearchTelemetryService _telemetryService; - private readonly ILogger _logger; - - public OwnerSetComparer( - IAzureSearchTelemetryService telemetryService, - ILogger logger) - { - _telemetryService = telemetryService ?? throw new ArgumentNullException(nameof(telemetryService)); - _logger = logger ?? throw new ArgumentNullException(nameof(logger)); - } - - public SortedDictionary Compare( - SortedDictionary> oldData, - SortedDictionary> newData) - { - if (oldData.Comparer != StringComparer.OrdinalIgnoreCase) - { - throw new ArgumentException("The old data should have a case-insensitive comparer.", nameof(oldData)); - } - - if (newData.Comparer != StringComparer.OrdinalIgnoreCase) - { - throw new ArgumentException("The new data should have a case-insensitive comparer.", nameof(newData)); - } - - var stopwatch = Stopwatch.StartNew(); - - // We use a very simplistic algorithm here. Perform one pass on the new data to find the added or changed - // package IDs. Then perform a second pass on the old data to find removed package IDs. We can optimize - // this later if necessary. - // - // We emit all of the usernames when a package ID's owners have changed instead of the delta. This is - // because Azure Search does not have a way to append or remove a specific item from a field that is an - // array. The entire new array needs to be provided. - var result = new SortedDictionary(StringComparer.OrdinalIgnoreCase); - - // First pass: find added or changed sets. - foreach (var pair in newData) - { - var id = pair.Key; - var newOwners = pair.Value; - if (!oldData.TryGetValue(id, out var oldOwners)) - { - // ADDED: The package ID does not exist in the old data, which means the package ID was added. - result.Add(id, newOwners.ToArray()); - _logger.LogInformation( - "The package ID {ID} has been added, with {AddedCount} owners.", - id, - newOwners.Count); - } - else - { - // The package ID exists in the old data. We need to check if the owner set has changed. Perform - // an ordinal comparison to allow username case changes to flow through. - var removedUsernames = oldOwners.Except(newOwners, StringComparer.Ordinal).ToList(); - var addedUsernames = newOwners.Except(oldOwners, StringComparer.Ordinal).ToList(); - - if (removedUsernames.Any() || addedUsernames.Any()) - { - // CHANGED: The username set has changed. - result.Add(id, newOwners.ToArray()); - _logger.LogInformation( - "The package ID {ID} has an ownership change, with {RemovedCount} owners removed and " + - "{AddedCount} owners added.", - id, - removedUsernames.Count, - addedUsernames.Count); - } - } - } - - // Second pass: find removed sets. - foreach (var pair in oldData) - { - var id = pair.Key; - var oldOwners = pair.Value; - - if (!newData.TryGetValue(id, out var newOwners)) - { - // REMOVED: The package ID does not exist in the new data, which means the package ID was removed. - result.Add(id, EmptyStringArray); - _logger.LogInformation( - "The package ID {ID} has been removed, with {RemovedCount} owners", - id, - oldOwners.Count); - } - } - - stopwatch.Stop(); - _telemetryService.TrackOwnerSetComparison(oldData.Count, newData.Count, result.Count, stopwatch.Elapsed); - - return result; - } - } -} - diff --git a/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/UpdateDownloadsCommand.cs b/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/UpdateDownloadsCommand.cs index 52d69c6fb..dd29eb41c 100644 --- a/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/UpdateDownloadsCommand.cs +++ b/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/UpdateDownloadsCommand.cs @@ -30,6 +30,8 @@ public class UpdateDownloadsCommand : IAzureSearchCommand private readonly IAuxiliaryFileClient _auxiliaryFileClient; private readonly IDownloadDataClient _downloadDataClient; private readonly IDownloadSetComparer _downloadSetComparer; + private readonly IDownloadTransferrer _downloadTransferrer; + private readonly IPopularityTransferDataClient _popularityTransferDataClient; private readonly ISearchDocumentBuilder _searchDocumentBuilder; private readonly ISearchIndexActionBuilder _indexActionBuilder; private readonly Func _batchPusherFactory; @@ -43,6 +45,8 @@ public UpdateDownloadsCommand( IAuxiliaryFileClient auxiliaryFileClient, IDownloadDataClient downloadDataClient, IDownloadSetComparer downloadSetComparer, + IDownloadTransferrer downloadTransferrer, + IPopularityTransferDataClient popularityTransferDataClient, ISearchDocumentBuilder searchDocumentBuilder, ISearchIndexActionBuilder indexActionBuilder, Func batchPusherFactory, @@ -54,6 +58,8 @@ public UpdateDownloadsCommand( _auxiliaryFileClient = auxiliaryFileClient ?? throw new ArgumentException(nameof(auxiliaryFileClient)); _downloadDataClient = downloadDataClient ?? throw new ArgumentNullException(nameof(downloadDataClient)); _downloadSetComparer = downloadSetComparer ?? throw new ArgumentNullException(nameof(downloadSetComparer)); + _downloadTransferrer = downloadTransferrer ?? throw new ArgumentNullException(nameof(downloadTransferrer)); + _popularityTransferDataClient = popularityTransferDataClient ?? throw new ArgumentNullException(nameof(popularityTransferDataClient)); _searchDocumentBuilder = searchDocumentBuilder ?? throw new ArgumentNullException(nameof(searchDocumentBuilder)); _indexActionBuilder = indexActionBuilder ?? throw new ArgumentNullException(nameof(indexActionBuilder)); _batchPusherFactory = batchPusherFactory ?? throw new ArgumentNullException(nameof(batchPusherFactory)); @@ -106,23 +112,26 @@ private async Task PushIndexChangesAsync() _logger.LogInformation("Fetching new download count data from blob storage."); var newData = await _auxiliaryFileClient.LoadDownloadDataAsync(); - _logger.LogInformation("Removing invalid IDs and versions from the old data."); + _logger.LogInformation("Removing invalid IDs and versions from the old downloads data."); CleanDownloadData(oldResult.Data); - _logger.LogInformation("Removing invalid IDs and versions from the new data."); + _logger.LogInformation("Removing invalid IDs and versions from the new downloads data."); CleanDownloadData(newData); - // Fetch the download overrides from the auxiliary file. Note that the overriden downloads are kept - // separate from downloads data as the original data will be persisted to auxiliary data, whereas the - // overriden data will be persisted to Azure Search. - _logger.LogInformation("Overriding download count data."); - var downloadOverrides = await _auxiliaryFileClient.LoadDownloadOverridesAsync(); - var overridenDownloads = newData.ApplyDownloadOverrides(downloadOverrides, _logger); - _logger.LogInformation("Detecting download count changes."); - var changes = _downloadSetComparer.Compare(oldResult.Data, overridenDownloads); + var changes = _downloadSetComparer.Compare(oldResult.Data, newData); + _logger.LogInformation("{Count} package IDs have download count changes.", changes.Count); + + // The "old" data in this case is the popularity transfers data that was last indexed by this job (or + // initialized by Db2AzureSearch). + _logger.LogInformation("Fetching old popularity transfer data from blob storage."); + var oldTransfers = await _popularityTransferDataClient.ReadLatestIndexedAsync(); + + _logger.LogInformation("Applying popularity transfers to download changes."); + var transferResult = await ApplyPopularityTransferChangesAsync(newData, changes, oldTransfers.Result); + var idBag = new ConcurrentBag(changes.Keys); - _logger.LogInformation("{Count} package IDs have download count changes.", idBag.Count); + _logger.LogInformation("{Count} package IDs need to be updated.", idBag.Count); if (!changes.Any()) { @@ -139,9 +148,34 @@ await ParallelAsync.Repeat( _logger.LogInformation("Uploading the new download count data to blob storage."); await _downloadDataClient.ReplaceLatestIndexedAsync(newData, oldResult.Metadata.GetIfMatchCondition()); + + _logger.LogInformation("Uploading the new popularity transfer data to blob storage."); + await _popularityTransferDataClient.ReplaceLatestIndexedAsync( + transferResult.LatestPopularityTransfers, + oldTransfers.AccessCondition); return true; } + private async Task ApplyPopularityTransferChangesAsync( + DownloadData newData, + SortedDictionary downloadChanges, + SortedDictionary> oldTransfers) + { + _logger.LogInformation("Finding download changes from popularity transfers and download overrides."); + var transferResult = await _downloadTransferrer.GetTransferChangesAsync(newData, downloadChanges, oldTransfers); + _logger.LogInformation( + "{Count} package IDs have download count changes from popularity transfers and download overrides.", + transferResult.DownloadChanges.Count); + + // Apply the transfer changes to the overall download changes. + foreach (var transferChange in transferResult.DownloadChanges) + { + downloadChanges[transferChange.Key] = transferChange.Value; + } + + return transferResult; + } + private async Task WorkAsync(ConcurrentBag idBag, SortedDictionary changes) { // Perform two batching mechanisms: diff --git a/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/UpdateOwnersCommand.cs b/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/UpdateOwnersCommand.cs index 940fae196..3800125f1 100644 --- a/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/UpdateOwnersCommand.cs +++ b/src/NuGet.Services.AzureSearch/Auxiliary2AzureSearch/UpdateOwnersCommand.cs @@ -17,7 +17,7 @@ public class UpdateOwnersCommand : IAzureSearchCommand { private readonly IDatabaseAuxiliaryDataFetcher _databaseFetcher; private readonly IOwnerDataClient _ownerDataClient; - private readonly IOwnerSetComparer _ownerSetComparer; + private readonly IDataSetComparer _ownerSetComparer; private readonly ISearchDocumentBuilder _searchDocumentBuilder; private readonly ISearchIndexActionBuilder _searchIndexActionBuilder; private readonly Func _batchPusherFactory; @@ -28,7 +28,7 @@ public class UpdateOwnersCommand : IAzureSearchCommand public UpdateOwnersCommand( IDatabaseAuxiliaryDataFetcher databaseFetcher, IOwnerDataClient ownerDataClient, - IOwnerSetComparer ownerSetComparer, + IDataSetComparer ownerSetComparer, ISearchDocumentBuilder searchDocumentBuilder, ISearchIndexActionBuilder indexActionBuilder, Func batchPusherFactory, @@ -67,7 +67,7 @@ public async Task ExecuteAsync() var databaseResult = await _databaseFetcher.GetPackageIdToOwnersAsync(); _logger.LogInformation("Detecting owner changes."); - var changes = _ownerSetComparer.Compare(storageResult.Result, databaseResult); + var changes = _ownerSetComparer.CompareOwners(storageResult.Result, databaseResult); var changesBag = new ConcurrentBag>(changes.Select(x => new IdAndValue(x.Key, x.Value))); _logger.LogInformation("{Count} package IDs have owner changes.", changesBag.Count); diff --git a/src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadDataExtensions.cs b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadDataExtensions.cs deleted file mode 100644 index 144c6733a..000000000 --- a/src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadDataExtensions.cs +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; -using System.Linq; -using Microsoft.Extensions.Logging; - -namespace NuGet.Services.AzureSearch.AuxiliaryFiles -{ - public static class DownloadDataExtensions - { - public static DownloadData ApplyDownloadOverrides( - this DownloadData originalData, - IReadOnlyDictionary downloadOverrides, - ILogger logger) - { - if (originalData == null) - { - throw new ArgumentNullException(nameof(originalData)); - } - - if (downloadOverrides == null) - { - throw new ArgumentNullException(nameof(downloadOverrides)); - } - - if (logger == null) - { - throw new ArgumentNullException(nameof(logger)); - } - - // Create a copy of the original data and apply overrides as we copy. - var result = new DownloadData(); - - foreach (var downloadData in originalData) - { - var packageId = downloadData.Key; - - if (ShouldOverrideDownloads(packageId)) - { - logger.LogInformation( - "Overriding downloads of package {PackageId} from {Downloads} to {DownloadsOverride}", - packageId, - originalData.GetDownloadCount(packageId), - downloadOverrides[packageId]); - - var versions = downloadData.Value.Keys; - - result.SetDownloadCount( - packageId, - versions.First(), - downloadOverrides[packageId]); - } - else - { - foreach (var versionData in downloadData.Value) - { - result.SetDownloadCount(downloadData.Key, versionData.Key, versionData.Value); - } - } - } - - bool ShouldOverrideDownloads(string packageId) - { - if (!downloadOverrides.TryGetValue(packageId, out var downloadOverride)) - { - return false; - } - - // Apply the downloads override only if the package has fewer total downloads. - // In effect, this removes a package's manual boost once its total downloads exceed the override. - if (originalData[packageId].Total >= downloadOverride) - { - logger.LogInformation( - "Skipping download override for package {PackageId} as its downloads of {Downloads} are " + - "greater than its override of {DownloadsOverride}", - packageId, - originalData[packageId].Total, - downloadOverride); - return false; - } - - return true; - } - - return result; - } - } -} diff --git a/src/NuGet.Services.AzureSearch/AuxiliaryFiles/IPopularityTransferDataClient.cs b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/IPopularityTransferDataClient.cs new file mode 100644 index 000000000..e31751f2a --- /dev/null +++ b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/IPopularityTransferDataClient.cs @@ -0,0 +1,36 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System.Collections.Generic; +using System.Threading.Tasks; +using NuGetGallery; + +namespace NuGet.Services.AzureSearch.AuxiliaryFiles +{ + /// + /// The purpose of this interface is allow reading and writing populairty transfer information from storage. + /// The Auxiliary2AzureSearch job does a comparison of latest popularity transfer data from the database with + /// a snapshot of information stored in Azure Blob Storage. This interface handles the reading and writing of + /// that snapshot from storage. + /// + public interface IPopularityTransferDataClient + { + /// + /// Read all of the latest indexed popularity transfers from storage. Also, return the current etag to allow + /// optimistic concurrency checks on the writing of the file. The returned dictionary's key is the + /// package ID that is transferring away its popularity, and the values are the package IDs receiving popularity. + /// The dictionary and the sets are case-insensitive. + /// + Task>>> ReadLatestIndexedAsync(); + + /// + /// Replace the existing latest indexed popularity transfers file (i.e. "popularityTransfers.v1.json" file). + /// + /// The new data to be serialized into storage. + /// The access condition (i.e. etag) to use during the upload. + Task ReplaceLatestIndexedAsync( + SortedDictionary> newData, + IAccessCondition accessCondition); + } +} + diff --git a/src/NuGet.Services.AzureSearch/AuxiliaryFiles/PopularityTransferDataClient.cs b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/PopularityTransferDataClient.cs new file mode 100644 index 000000000..98c04c4dd --- /dev/null +++ b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/PopularityTransferDataClient.cs @@ -0,0 +1,139 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Net; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using Microsoft.WindowsAzure.Storage; +using Newtonsoft.Json; +using NuGetGallery; + +namespace NuGet.Services.AzureSearch.AuxiliaryFiles +{ + public class PopularityTransferDataClient : IPopularityTransferDataClient + { + private static readonly JsonSerializer Serializer = new JsonSerializer(); + + private readonly ICloudBlobClient _cloudBlobClient; + private readonly IOptionsSnapshot _options; + private readonly IAzureSearchTelemetryService _telemetryService; + private readonly ILogger _logger; + private readonly Lazy _lazyContainer; + + public PopularityTransferDataClient( + ICloudBlobClient cloudBlobClient, + IOptionsSnapshot options, + IAzureSearchTelemetryService telemetryService, + ILogger logger) + { + _cloudBlobClient = cloudBlobClient ?? throw new ArgumentNullException(nameof(cloudBlobClient)); + _options = options ?? throw new ArgumentNullException(nameof(options)); + _telemetryService = telemetryService ?? throw new ArgumentNullException(nameof(telemetryService)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + + _lazyContainer = new Lazy( + () => _cloudBlobClient.GetContainerReference(_options.Value.StorageContainer)); + } + + private ICloudBlobContainer Container => _lazyContainer.Value; + + public async Task>>> ReadLatestIndexedAsync() + { + var stopwatch = Stopwatch.StartNew(); + var blobName = GetLatestIndexedBlobName(); + var blobReference = Container.GetBlobReference(blobName); + + _logger.LogInformation("Reading the latest indexed popularity transfers from {BlobName}.", blobName); + + var builder = new PackageIdToPopularityTransfersBuilder(_logger); + IAccessCondition accessCondition; + try + { + using (var stream = await blobReference.OpenReadAsync(AccessCondition.GenerateEmptyCondition())) + { + accessCondition = AccessConditionWrapper.GenerateIfMatchCondition(blobReference.ETag); + ReadStream(stream, builder.Add); + } + } + catch (StorageException ex) when (ex.RequestInformation.HttpStatusCode == (int)HttpStatusCode.NotFound) + { + accessCondition = AccessConditionWrapper.GenerateIfNotExistsCondition(); + _logger.LogInformation("The blob {BlobName} does not exist.", blobName); + } + + var output = new ResultAndAccessCondition>>( + builder.GetResult(), + accessCondition); + + stopwatch.Stop(); + _telemetryService.TrackReadLatestIndexedPopularityTransfers(output.Result.Count, stopwatch.Elapsed); + + return output; + } + + public async Task ReplaceLatestIndexedAsync( + SortedDictionary> newData, + IAccessCondition accessCondition) + { + using (_telemetryService.TrackReplaceLatestIndexedPopularityTransfers(newData.Count)) + { + var blobName = GetLatestIndexedBlobName(); + _logger.LogInformation("Replacing the latest indexed popularity transfers from {BlobName}.", blobName); + + var mappedAccessCondition = new AccessCondition + { + IfNoneMatchETag = accessCondition.IfNoneMatchETag, + IfMatchETag = accessCondition.IfMatchETag, + }; + + var blobReference = Container.GetBlobReference(blobName); + + using (var stream = await blobReference.OpenWriteAsync(mappedAccessCondition)) + using (var streamWriter = new StreamWriter(stream)) + using (var jsonTextWriter = new JsonTextWriter(streamWriter)) + { + blobReference.Properties.ContentType = "application/json"; + Serializer.Serialize(jsonTextWriter, newData); + } + } + } + + private static void ReadStream(Stream stream, Action> add) + { + using (var textReader = new StreamReader(stream)) + using (var jsonReader = new JsonTextReader(textReader)) + { + Guard.Assert(jsonReader.Read(), "The blob should be readable."); + Guard.Assert(jsonReader.TokenType == JsonToken.StartObject, "The first token should be the start of an object."); + Guard.Assert(jsonReader.Read(), "There should be a second token."); + while (jsonReader.TokenType == JsonToken.PropertyName) + { + var id = (string)jsonReader.Value; + + Guard.Assert(jsonReader.Read(), "There should be a token after the property name."); + Guard.Assert(jsonReader.TokenType == JsonToken.StartArray, "The token after the property name should be the start of an object."); + + var transfers = Serializer.Deserialize>(jsonReader); + add(id, transfers); + + Guard.Assert(jsonReader.TokenType == JsonToken.EndArray, "The token after reading the array should be the end of an array."); + Guard.Assert(jsonReader.Read(), "There should be a token after the end of the array."); + } + + Guard.Assert(jsonReader.TokenType == JsonToken.EndObject, "The last token should be the end of an object."); + Guard.Assert(!jsonReader.Read(), "There should be no token after the end of the object."); + } + } + + private string GetLatestIndexedBlobName() + { + return $"{_options.Value.NormalizeStoragePath()}popularity-transfers/popularity-transfers.v1.json"; + } + } +} + diff --git a/src/NuGet.Services.AzureSearch/AzureSearchTelemetryService.cs b/src/NuGet.Services.AzureSearch/AzureSearchTelemetryService.cs index d8eeb4a0f..d222d6e55 100644 --- a/src/NuGet.Services.AzureSearch/AzureSearchTelemetryService.cs +++ b/src/NuGet.Services.AzureSearch/AzureSearchTelemetryService.cs @@ -178,6 +178,51 @@ public void TrackOwnerSetComparison(int oldCount, int newCount, int changeCount, }); } + public void TrackReadLatestIndexedPopularityTransfers(int outgoingTransfers, TimeSpan elapsed) + { + _telemetryClient.TrackMetric( + Prefix + "ReadLatestIndexedPopularityTransfersSeconds", + elapsed.TotalSeconds, + new Dictionary + { + { "OutgoingTransfers", outgoingTransfers.ToString() } + }); + } + + public void TrackReadLatestPopularityTransfersFromDatabase(int outgoingTransfers, TimeSpan elapsed) + { + _telemetryClient.TrackMetric( + Prefix + "ReadLatestPopularityTransfersFromDatabase", + elapsed.TotalSeconds, + new Dictionary + { + { "OutgoingTransfers", outgoingTransfers.ToString() } + }); + } + + public void TrackPopularityTransfersSetComparison(int oldCount, int newCount, int changeCount, TimeSpan elapsed) + { + _telemetryClient.TrackMetric( + Prefix + "PopularityTransfersSetComparisonSeconds", + elapsed.TotalSeconds, + new Dictionary + { + { "OldCount", oldCount.ToString() }, + { "NewCount", oldCount.ToString() }, + { "ChangeCount", oldCount.ToString() }, + }); + } + + public IDisposable TrackReplaceLatestIndexedPopularityTransfers(int outogingTransfers) + { + return _telemetryClient.TrackDuration( + Prefix + "ReplaceLatestIndexedPopularityTransfers", + new Dictionary + { + { "OutgoingTransfers", outogingTransfers.ToString() } + }); + } + public IDisposable TrackCatalog2AzureSearchProcessBatch(int catalogLeafCount, int latestCatalogLeafCount, int packageIdCount) { return _telemetryClient.TrackDuration( diff --git a/src/NuGet.Services.AzureSearch/DatabaseAuxiliaryDataFetcher.cs b/src/NuGet.Services.AzureSearch/DatabaseAuxiliaryDataFetcher.cs index cb590645e..560e83e3e 100644 --- a/src/NuGet.Services.AzureSearch/DatabaseAuxiliaryDataFetcher.cs +++ b/src/NuGet.Services.AzureSearch/DatabaseAuxiliaryDataFetcher.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; +using System.Data; using System.Data.Entity; using System.Diagnostics; using System.Linq; @@ -35,6 +36,20 @@ FROM PackageRegistrations pr (NOLOCK) FROM PackageRegistrations pr (NOLOCK) INNER JOIN PackageRegistrationOwners pro (NOLOCK) ON pro.PackageRegistrationKey = pr.[Key] INNER JOIN Users u (NOLOCK) ON pro.UserKey = u.[Key] +"; + + private const int GetPopularityTransfersPageSize = 1000; + private const string GetPopularityTransfersSkipParameter = "@skip"; + private const string GetPopularityTransfersTakeParameter = "@take"; + private const string GetPopularityTransfersSql = @" +SELECT TOP (@take) + fpr.Id AS FromPackageId, + tpr.Id AS ToPackageId +FROM PackageRenames r (NOLOCK) +INNER JOIN PackageRegistrations fpr (NOLOCK) ON fpr.[Key] = r.[FromPackageRegistrationKey] +INNER JOIN PackageRegistrations tpr (NOLOCK) ON tpr.[Key] = r.[ToPackageRegistrationKey] +WHERE r.TransferPopularity != 0 AND r.[Key] >= @skip +ORDER BY r.[Key] ASC "; public DatabaseAuxiliaryDataFetcher( @@ -137,6 +152,52 @@ public async Task>> GetPackageIdToOwn } } } + + public async Task>> GetPackageIdToPopularityTransfersAsync() + { + var stopwatch = Stopwatch.StartNew(); + var builder = new PackageIdToPopularityTransfersBuilder(_logger); + using (var connection = await _connectionFactory.OpenAsync()) + using (var command = connection.CreateCommand()) + { + command.CommandText = GetPopularityTransfersSql; + command.Parameters.Add(GetPopularityTransfersSkipParameter, SqlDbType.Int); + command.Parameters.AddWithValue(GetPopularityTransfersTakeParameter, GetPopularityTransfersPageSize); + + // Load popularity transfers by paging through the database. + // We continue paging until we receive fewer results than the page size. + int currentPageResults; + int totalResults = 0; + do + { + command.Parameters[GetPopularityTransfersSkipParameter].Value = totalResults; + + using (var reader = await command.ExecuteReaderAsync()) + { + currentPageResults = 0; + + while (await reader.ReadAsync()) + { + currentPageResults++; + + var fromId = reader.GetString(0); + var toId = reader.GetString(1); + + builder.Add(fromId, toId); + } + } + + totalResults += currentPageResults; + } + while (currentPageResults == GetPopularityTransfersPageSize); + + var output = builder.GetResult(); + stopwatch.Stop(); + _telemetryService.TrackReadLatestPopularityTransfersFromDatabase(output.Count, stopwatch.Elapsed); + + return output; + } + } } } diff --git a/src/NuGet.Services.AzureSearch/Db2AzureSearch/Db2AzureSearchCommand.cs b/src/NuGet.Services.AzureSearch/Db2AzureSearch/Db2AzureSearchCommand.cs index c7db8d555..294bfa226 100644 --- a/src/NuGet.Services.AzureSearch/Db2AzureSearch/Db2AzureSearchCommand.cs +++ b/src/NuGet.Services.AzureSearch/Db2AzureSearch/Db2AzureSearchCommand.cs @@ -30,6 +30,7 @@ public class Db2AzureSearchCommand : IAzureSearchCommand private readonly IOwnerDataClient _ownerDataClient; private readonly IDownloadDataClient _downloadDataClient; private readonly IVerifiedPackagesDataClient _verifiedPackagesDataClient; + private readonly IPopularityTransferDataClient _popularityTransferDataClient; private readonly IOptionsSnapshot _options; private readonly IOptionsSnapshot _developmentOptions; private readonly ILogger _logger; @@ -45,6 +46,7 @@ public Db2AzureSearchCommand( IOwnerDataClient ownerDataClient, IDownloadDataClient downloadDataClient, IVerifiedPackagesDataClient verifiedPackagesDataClient, + IPopularityTransferDataClient popularityTransferDataClient, IOptionsSnapshot options, IOptionsSnapshot developmentOptions, ILogger logger) @@ -59,6 +61,7 @@ public Db2AzureSearchCommand( _ownerDataClient = ownerDataClient ?? throw new ArgumentNullException(nameof(ownerDataClient)); _downloadDataClient = downloadDataClient ?? throw new ArgumentNullException(nameof(downloadDataClient)); _verifiedPackagesDataClient = verifiedPackagesDataClient ?? throw new ArgumentNullException(nameof(verifiedPackagesDataClient)); + _popularityTransferDataClient = popularityTransferDataClient ?? throw new ArgumentNullException(nameof(popularityTransferDataClient)); _options = options ?? throw new ArgumentNullException(nameof(options)); _developmentOptions = developmentOptions ?? throw new ArgumentNullException(nameof(developmentOptions)); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); @@ -114,6 +117,9 @@ private async Task ExecuteAsync(CancellationToken token) // Write the verified packages data file. await WriteVerifiedPackagesDataAsync(initialAuxiliaryData.VerifiedPackages); + // Write popularity transfers data file. + await WritePopularityTransfersDataAsync(initialAuxiliaryData.PopularityTransfers); + // Write the cursor. _logger.LogInformation("Writing the initial cursor value to be {CursorValue:O}.", initialCursorValue); var frontCursorStorage = _storageFactory.Create(); @@ -195,6 +201,15 @@ await _verifiedPackagesDataClient.ReplaceLatestAsync( _logger.LogInformation("Done uploading the initial verified packages data file."); } + private async Task WritePopularityTransfersDataAsync(SortedDictionary> popularityTransfers) + { + _logger.LogInformation("Writing the initial popularity transfers data file."); + await _popularityTransferDataClient.ReplaceLatestIndexedAsync( + popularityTransfers, + AccessConditionWrapper.GenerateIfNotExistsCondition()); + _logger.LogInformation("Done uploading the initial popularity transfers data file."); + } + private async Task ProduceWorkAsync( ConcurrentBag allWork, CancellationTokenSource produceWorkCts, diff --git a/src/NuGet.Services.AzureSearch/Db2AzureSearch/InitialAuxiliaryData.cs b/src/NuGet.Services.AzureSearch/Db2AzureSearch/InitialAuxiliaryData.cs index 30dcae7b6..dc5475449 100644 --- a/src/NuGet.Services.AzureSearch/Db2AzureSearch/InitialAuxiliaryData.cs +++ b/src/NuGet.Services.AzureSearch/Db2AzureSearch/InitialAuxiliaryData.cs @@ -13,17 +13,20 @@ public InitialAuxiliaryData( SortedDictionary> owners, DownloadData downloads, HashSet excludedPackages, - HashSet verifiedPackages) + HashSet verifiedPackages, + SortedDictionary> popularityTransfers) { Owners = owners ?? throw new ArgumentNullException(nameof(owners)); Downloads = downloads ?? throw new ArgumentNullException(nameof(downloads)); ExcludedPackages = excludedPackages ?? throw new ArgumentNullException(nameof(excludedPackages)); VerifiedPackages = verifiedPackages ?? throw new ArgumentNullException(nameof(verifiedPackages)); + PopularityTransfers = popularityTransfers ?? throw new ArgumentNullException(nameof(popularityTransfers)); } public SortedDictionary> Owners { get; } public DownloadData Downloads { get; } public HashSet ExcludedPackages { get; } public HashSet VerifiedPackages { get; } + public SortedDictionary> PopularityTransfers { get; } } } diff --git a/src/NuGet.Services.AzureSearch/Db2AzureSearch/NewPackageRegistrationProducer.cs b/src/NuGet.Services.AzureSearch/Db2AzureSearch/NewPackageRegistrationProducer.cs index d758f1260..fff665411 100644 --- a/src/NuGet.Services.AzureSearch/Db2AzureSearch/NewPackageRegistrationProducer.cs +++ b/src/NuGet.Services.AzureSearch/Db2AzureSearch/NewPackageRegistrationProducer.cs @@ -21,6 +21,7 @@ public class NewPackageRegistrationProducer : INewPackageRegistrationProducer { private readonly IEntitiesContextFactory _contextFactory; private readonly IAuxiliaryFileClient _auxiliaryFileClient; + private readonly IDownloadTransferrer _downloadTransferrer; private readonly IOptionsSnapshot _options; private readonly IOptionsSnapshot _developmentOptions; private readonly ILogger _logger; @@ -28,14 +29,16 @@ public class NewPackageRegistrationProducer : INewPackageRegistrationProducer public NewPackageRegistrationProducer( IEntitiesContextFactory contextFactory, IAuxiliaryFileClient auxiliaryFileClient, + IDownloadTransferrer downloadTransferrer, IOptionsSnapshot options, IOptionsSnapshot developmentOptions, ILogger logger) { _contextFactory = contextFactory ?? throw new ArgumentNullException(nameof(contextFactory)); + _auxiliaryFileClient = auxiliaryFileClient ?? throw new ArgumentNullException(nameof(auxiliaryFileClient)); + _downloadTransferrer = downloadTransferrer ?? throw new ArgumentNullException(nameof(downloadTransferrer)); _options = options ?? throw new ArgumentNullException(nameof(options)); _developmentOptions = developmentOptions ?? throw new ArgumentNullException(nameof(developmentOptions)); - _auxiliaryFileClient = auxiliaryFileClient ?? throw new ArgumentNullException(nameof(auxiliaryFileClient)); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } @@ -54,15 +57,13 @@ public async Task ProduceWorkAsync( $"Excluded packages HashSet should be using {nameof(StringComparer.OrdinalIgnoreCase)}"); // Fetch the download data from the auxiliary file, since this is what is used for displaying download - // counts in the search service. The gallery DB and the downloads data file have different download count - // numbers we don't use the gallery DB values. + // counts in the search service. We don't use the gallery DB values as they are different from the + // auxiliary file. var downloads = await _auxiliaryFileClient.LoadDownloadDataAsync(); - // Fetch the download overrides from the auxiliary file. Note that the overriden downloads are kept - // separate from downloads data as the original data will be persisted to auxiliary data, whereas the - // overriden data will be persisted to Azure Search. - var downloadOverrides = await _auxiliaryFileClient.LoadDownloadOverridesAsync(); - var overridenDownloads = downloads.ApplyDownloadOverrides(downloadOverrides, _logger); + // Apply changes from popularity transfers. + var transferResult = await _downloadTransferrer.GetTransferChangesAsync(downloads); + var packageToDownloads = GetPackageToDownloads(downloads, transferResult.DownloadChanges); // Build a list of the owners data and verified IDs as we collect package registrations from the database. var ownersBuilder = new PackageIdToOwnersBuilder(_logger); @@ -91,6 +92,11 @@ public async Task ProduceWorkAsync( foreach (var pr in packageRegistrationInfo) { + if (!packageToDownloads.TryGetValue(pr.Id, out var packageDownloads)) + { + packageDownloads = 0; + } + if (!keyToPackages.TryGetValue(pr.Key, out var packages)) { packages = new List(); @@ -100,7 +106,7 @@ public async Task ProduceWorkAsync( allWork.Add(new NewPackageRegistration( pr.Id, - overridenDownloads.GetDownloadCount(pr.Id), + packageDownloads, pr.Owners, packages, isExcludedByDefault)); @@ -120,7 +126,8 @@ public async Task ProduceWorkAsync( ownersBuilder.GetResult(), downloads, excludedPackages, - verifiedPackages); + verifiedPackages, + transferResult.LatestPopularityTransfers); } private bool ShouldWait(ConcurrentBag allWork, bool log) @@ -145,6 +152,25 @@ private bool ShouldWait(ConcurrentBag allWork, bool log) return false; } + private Dictionary GetPackageToDownloads( + DownloadData downloads, + Dictionary downloadChanges) + { + var result = new Dictionary(StringComparer.OrdinalIgnoreCase); + + foreach (var packageDownload in downloads) + { + result[packageDownload.Key] = packageDownload.Value.Total; + } + + foreach (var downloadChange in downloadChanges) + { + result[downloadChange.Key] = downloadChange.Value; + } + + return result; + } + private async Task> GetPackagesAsync(PackageRegistrationRange range) { using (var context = await CreateContextAsync()) diff --git a/src/NuGet.Services.AzureSearch/DependencyInjectionExtensions.cs b/src/NuGet.Services.AzureSearch/DependencyInjectionExtensions.cs index ac3a5d37c..839aa9fcb 100644 --- a/src/NuGet.Services.AzureSearch/DependencyInjectionExtensions.cs +++ b/src/NuGet.Services.AzureSearch/DependencyInjectionExtensions.cs @@ -165,6 +165,13 @@ private static void RegisterAzureSearchStorageServices(ContainerBuilder containe c.Resolve(), c.Resolve>())); + containerBuilder + .Register(c => new PopularityTransferDataClient( + c.ResolveKeyed(key), + c.Resolve>(), + c.Resolve(), + c.Resolve>())); + containerBuilder .Register(c => new Catalog2AzureSearchCommand( c.Resolve(), @@ -187,6 +194,7 @@ private static void RegisterAzureSearchStorageServices(ContainerBuilder containe c.Resolve(), c.Resolve(), c.Resolve(), + c.Resolve(), c.Resolve>(), c.Resolve>(), c.Resolve>())); @@ -249,14 +257,15 @@ public static IServiceCollection AddAzureSearch( services.AddTransient(); services.AddTransient(); services.AddTransient(); + services.AddTransient(); services.AddTransient(); services.AddTransient(); + services.AddTransient(); services.AddTransient(); services.AddTransient(); services.AddTransient(); services.AddTransient(); services.AddTransient(); - services.AddTransient(); services.AddTransient(); services.AddTransient(); services.AddTransient(); diff --git a/src/NuGet.Services.AzureSearch/DownloadTransferResult.cs b/src/NuGet.Services.AzureSearch/DownloadTransferResult.cs new file mode 100644 index 000000000..358fed0d7 --- /dev/null +++ b/src/NuGet.Services.AzureSearch/DownloadTransferResult.cs @@ -0,0 +1,40 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections.Generic; + +namespace NuGet.Services.AzureSearch +{ + /// + /// The result of applying popularity transfers to download data. + /// + public class DownloadTransferResult + { + public DownloadTransferResult( + Dictionary downloadChanges, + SortedDictionary> popularityTransfers) + { + Guard.Assert( + downloadChanges.Comparer == StringComparer.OrdinalIgnoreCase, + $"Download changes should have comparer {nameof(StringComparer.OrdinalIgnoreCase)}"); + + Guard.Assert( + downloadChanges.Comparer == StringComparer.OrdinalIgnoreCase, + $"Latest popularity transfers should have comparer {nameof(StringComparer.OrdinalIgnoreCase)}"); + + DownloadChanges = downloadChanges ?? throw new ArgumentNullException(nameof(downloadChanges)); + LatestPopularityTransfers = popularityTransfers ?? throw new ArgumentNullException(nameof(popularityTransfers)); + } + + /// + /// The downloads that were changed by transferring downloads. + /// + public Dictionary DownloadChanges { get; } + + /// + /// The latest popularity transfers data from the gallery database. + /// + public SortedDictionary> LatestPopularityTransfers { get; } + } +} \ No newline at end of file diff --git a/src/NuGet.Services.AzureSearch/DownloadTransferrer.cs b/src/NuGet.Services.AzureSearch/DownloadTransferrer.cs new file mode 100644 index 000000000..eba22739f --- /dev/null +++ b/src/NuGet.Services.AzureSearch/DownloadTransferrer.cs @@ -0,0 +1,119 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using NuGet.Services.AzureSearch.AuxiliaryFiles; + +namespace NuGet.Services.AzureSearch +{ + public class DownloadTransferrer : IDownloadTransferrer + { + private readonly IAuxiliaryFileClient _auxiliaryFileClient; + private readonly IDatabaseAuxiliaryDataFetcher _databaseFetcher; + private readonly ILogger _logger; + + public DownloadTransferrer( + IAuxiliaryFileClient auxiliaryFileClient, + IDatabaseAuxiliaryDataFetcher databaseFetcher, + ILogger logger) + { + _auxiliaryFileClient = auxiliaryFileClient ?? throw new ArgumentException(nameof(auxiliaryFileClient)); + _databaseFetcher = databaseFetcher ?? throw new ArgumentNullException(nameof(databaseFetcher)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } + + public async Task GetTransferChangesAsync(DownloadData downloads) + { + // Downloads are transferred from a "from" package to one or more "to" packages. + // The "outgoingTransfers" maps "from" packages to their corresponding "to" packages. + // The "incomingTransfers" maps "to" packages to their corresponding "from" packages. + _logger.LogInformation("Fetching new popularity transfer data from gallery database."); + var outgoingTransfers = await _databaseFetcher.GetPackageIdToPopularityTransfersAsync(); + + return await GetTransferChangesAsync( + downloads, + outgoingTransfers); + } + + public async Task GetTransferChangesAsync( + DownloadData downloads, + SortedDictionary downloadChanges, + SortedDictionary> oldTransfers) + { + Guard.Assert( + downloadChanges.Comparer == StringComparer.OrdinalIgnoreCase, + $"Download changes should have comparer {nameof(StringComparer.OrdinalIgnoreCase)}"); + + Guard.Assert( + oldTransfers.Comparer == StringComparer.OrdinalIgnoreCase, + $"Old popularity transfer should have comparer {nameof(StringComparer.OrdinalIgnoreCase)}"); + + // Downloads are transferred from a "from" package to one or more "to" packages. + // The "outgoingTransfers" maps "from" packages to their corresponding "to" packages. + _logger.LogInformation("Fetching new popularity transfer data from gallery database."); + var outgoingTransfers = await _databaseFetcher.GetPackageIdToPopularityTransfersAsync(); + + return await GetTransferChangesAsync( + downloads, + outgoingTransfers); + } + + private async Task GetTransferChangesAsync( + DownloadData downloads, + SortedDictionary> outgoingTransfers) + { + var result = new Dictionary(StringComparer.OrdinalIgnoreCase); + + // TODO: Add download changes due to popularity transfers. + // See: https://github.com/NuGet/NuGetGallery/issues/7898 + await AddDownloadOverridesAsync(downloads, result); + + return new DownloadTransferResult( + result, + outgoingTransfers); + } + + private async Task AddDownloadOverridesAsync( + DownloadData downloads, + Dictionary downloadChanges) + { + // TODO: Remove download overrides. + // See: https://github.com/nuget/engineering/issues/3089 + _logger.LogInformation("Fetching download override data."); + var downloadOverrides = await _auxiliaryFileClient.LoadDownloadOverridesAsync(); + + foreach (var downloadOverride in downloadOverrides) + { + var packageId = downloadOverride.Key; + var packageDownloads = downloads.GetDownloadCount(packageId); + + if (downloadChanges.TryGetValue(packageId, out var updatedDownloads)) + { + packageDownloads = updatedDownloads; + } + + if (packageDownloads >= downloadOverride.Value) + { + _logger.LogInformation( + "Skipping download override for package {PackageId} as its downloads of {Downloads} are " + + "greater than its override of {DownloadsOverride}", + packageId, + packageDownloads, + downloadOverride.Value); + continue; + } + + _logger.LogInformation( + "Overriding downloads of package {PackageId} from {Downloads} to {DownloadsOverride}", + packageId, + packageDownloads, + downloadOverride.Value); + + downloadChanges[packageId] = downloadOverride.Value; + } + } + } +} diff --git a/src/NuGet.Services.AzureSearch/IAzureSearchTelemetryService.cs b/src/NuGet.Services.AzureSearch/IAzureSearchTelemetryService.cs index 832f9a001..a63e577b7 100644 --- a/src/NuGet.Services.AzureSearch/IAzureSearchTelemetryService.cs +++ b/src/NuGet.Services.AzureSearch/IAzureSearchTelemetryService.cs @@ -20,9 +20,13 @@ public interface IAzureSearchTelemetryService void TrackOwnerSetComparison(int oldCount, int newCount, int changeCount, TimeSpan elapsed); void TrackReadLatestIndexedOwners(int packageIdCount, TimeSpan elapsed); void TrackReadLatestOwnersFromDatabase(int packageIdCount, TimeSpan elapsed); + void TrackPopularityTransfersSetComparison(int oldCount, int newCount, int changeCount, TimeSpan elapsed); + void TrackReadLatestIndexedPopularityTransfers(int outgoingTransfers, TimeSpan elapsed); + void TrackReadLatestPopularityTransfersFromDatabase(int outgoingTransfers, TimeSpan elapsed); void TrackReadLatestVerifiedPackagesFromDatabase(int packageIdCount, TimeSpan elapsed); IDisposable TrackReplaceLatestIndexedOwners(int packageIdCount); IDisposable TrackUploadOwnerChangeHistory(int packageIdCount); + IDisposable TrackReplaceLatestIndexedPopularityTransfers(int outgoingTransfers); IDisposable TrackVersionListsUpdated(int versionListCount, int workerCount); IDisposable TrackCatalog2AzureSearchProcessBatch(int catalogLeafCount, int latestCatalogLeafCount, int packageIdCount); void TrackV2SearchQueryWithSearchIndex(TimeSpan elapsed); diff --git a/src/NuGet.Services.AzureSearch/IDatabaseAuxiliaryDataFetcher.cs b/src/NuGet.Services.AzureSearch/IDatabaseAuxiliaryDataFetcher.cs index c08b538a3..0d85f0276 100644 --- a/src/NuGet.Services.AzureSearch/IDatabaseAuxiliaryDataFetcher.cs +++ b/src/NuGet.Services.AzureSearch/IDatabaseAuxiliaryDataFetcher.cs @@ -24,6 +24,12 @@ public interface IDatabaseAuxiliaryDataFetcher /// Task>> GetPackageIdToOwnersAsync(); + /// + /// Fetch a mapping of package IDs to set of replacement package IDs for each renamed packages that transfer + /// popularity in the gallery database. + /// + Task>> GetPackageIdToPopularityTransfersAsync(); + /// /// Fetch the set of all verified package IDs. /// diff --git a/src/NuGet.Services.AzureSearch/IDownloadTransferrer.cs b/src/NuGet.Services.AzureSearch/IDownloadTransferrer.cs new file mode 100644 index 000000000..c0320e054 --- /dev/null +++ b/src/NuGet.Services.AzureSearch/IDownloadTransferrer.cs @@ -0,0 +1,34 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System.Collections.Generic; +using System.Threading.Tasks; +using NuGet.Services.AzureSearch.AuxiliaryFiles; + +namespace NuGet.Services.AzureSearch +{ + /// + /// Applies popularity transfer changes to download data. + /// + public interface IDownloadTransferrer + { + /// + /// Apply popularity transfer changes to the initial downloads data. + /// + /// The initial downloads data. + /// The result of applying popularity transfers. + Task GetTransferChangesAsync(DownloadData downloads); + + /// + /// Apply popularity transfer changes to the updated downloads data. + /// + /// The updated downloads data. + /// The downloads that have changed since the last index. + /// The popularity transfers that were previously indexed. + /// The result of applying popularity transfers. + Task GetTransferChangesAsync( + DownloadData downloads, + SortedDictionary downloadChanges, + SortedDictionary> oldTransfers); + } +} \ No newline at end of file diff --git a/src/NuGet.Services.AzureSearch/NuGet.Services.AzureSearch.csproj b/src/NuGet.Services.AzureSearch/NuGet.Services.AzureSearch.csproj index 8ae96e240..640f3ce3c 100644 --- a/src/NuGet.Services.AzureSearch/NuGet.Services.AzureSearch.csproj +++ b/src/NuGet.Services.AzureSearch/NuGet.Services.AzureSearch.csproj @@ -56,10 +56,11 @@ - + + @@ -79,17 +80,21 @@ + + + - + + - + diff --git a/src/NuGet.Services.AzureSearch/PackageIdToPopularityTransfersBuilder.cs b/src/NuGet.Services.AzureSearch/PackageIdToPopularityTransfersBuilder.cs new file mode 100644 index 000000000..c860d4af8 --- /dev/null +++ b/src/NuGet.Services.AzureSearch/PackageIdToPopularityTransfersBuilder.cs @@ -0,0 +1,89 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections.Generic; +using Microsoft.Extensions.Logging; + +namespace NuGet.Services.AzureSearch +{ + public class PackageIdToPopularityTransfersBuilder + { + private readonly ILogger _logger; + private int _addCount; + private readonly Dictionary _idInternPool; + private readonly SortedDictionary> _result; + + public PackageIdToPopularityTransfersBuilder(ILogger logger) + { + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + _addCount = 0; + _idInternPool = new Dictionary(StringComparer.OrdinalIgnoreCase); + _result = new SortedDictionary>(StringComparer.OrdinalIgnoreCase); + } + + /// + /// Add multiple popularity transfers. + /// + /// The package that is transferring its popularity away. + /// The packages that are receiving the transferred popularity. + public void Add(string fromId, IReadOnlyList toIds) + { + foreach (var toId in toIds) + { + Add(fromId, toId); + } + } + + /// + /// Add a popularity transfers + /// + /// The package that is transferring its popularity away. + /// The package that is receiving the transferred popularity. + public void Add(string fromId, string toId) + { + _addCount++; + if (_addCount % 10000 == 0) + { + _logger.LogInformation("{AddCount} popularity transfers have been added so far.", _addCount); + } + + // Use a single instance of each "toId" string. + toId = InternId(toId); + + if (!_result.TryGetValue(fromId, out var toIds)) + { + toIds = new SortedSet(StringComparer.OrdinalIgnoreCase); + fromId = InternId(fromId); + + _result.Add(fromId, toIds); + } + + toIds.Add(toId); + } + + /// + /// Get the popularity transfers. + /// + /// A map of packages transferring popularity away to the packages receiving the popularity. + public SortedDictionary> GetResult() + { + _logger.LogInformation("{RecordCount} popularity transfers were found.", _addCount); + _logger.LogInformation("{FromTransfers} packages transfer popularity away.", _result.Count); + _logger.LogInformation("{UniqueIds} unique package IDs.", _idInternPool.Count); + + return _result; + } + + private string InternId(string id) + { + if (_idInternPool.TryGetValue(id, out var existingId)) + { + return existingId; + } + + _idInternPool.Add(id, id); + return id; + } + } +} diff --git a/tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/DataSetComparerFacts.cs b/tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/DataSetComparerFacts.cs new file mode 100644 index 000000000..e5f1f869d --- /dev/null +++ b/tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/DataSetComparerFacts.cs @@ -0,0 +1,404 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections.Generic; +using System.Linq; +using Moq; +using Xunit; +using Xunit.Abstractions; + +namespace NuGet.Services.AzureSearch.Auxiliary2AzureSearch +{ + public class DataSetComparerFacts + { + public class CompareOwners : Facts + { + public CompareOwners(ITestOutputHelper output) : base(output) + { + } + + [Fact] + public void FindsAddedPackageIds() + { + var oldData = OwnersData("NuGet.Core: NuGet, Microsoft"); + var newData = OwnersData( + "NuGet.Core: NuGet, Microsoft", + "NuGet.Versioning: NuGet, Microsoft"); + + var changes = Target.CompareOwners(oldData, newData); + + var pair = Assert.Single(changes); + Assert.Equal("NuGet.Versioning", pair.Key); + Assert.Equal(new[] { "Microsoft", "NuGet" }, pair.Value); + + TelemetryService.Verify( + x => x.TrackOwnerSetComparison( + /*oldCount: */ 1, + /*newCount: */ 2, + /*changeCount: */ 1, + It.IsAny()), + Times.Once); + } + + [Fact] + public void FindsRemovedPackageIds() + { + var oldData = OwnersData( + "NuGet.Core: NuGet, Microsoft", + "NuGet.Versioning: NuGet, Microsoft"); + var newData = OwnersData("NuGet.Core: NuGet, Microsoft"); + + var changes = Target.CompareOwners(oldData, newData); + + var pair = Assert.Single(changes); + Assert.Equal("NuGet.Versioning", pair.Key); + Assert.Empty(pair.Value); + + TelemetryService.Verify( + x => x.TrackOwnerSetComparison( + /*oldCount: */ 2, + /*newCount: */ 1, + /*changeCount: */ 1, + It.IsAny()), + Times.Once); + } + + [Fact] + public void FindsAddedOwner() + { + var oldData = OwnersData("NuGet.Core: NuGet"); + var newData = OwnersData("NuGet.Core: NuGet, Microsoft"); + + var changes = Target.CompareOwners(oldData, newData); + + var pair = Assert.Single(changes); + Assert.Equal("NuGet.Core", pair.Key); + Assert.Equal(new[] { "Microsoft", "NuGet" }, pair.Value); + + TelemetryService.Verify( + x => x.TrackOwnerSetComparison( + /*oldCount: */ 1, + /*newCount: */ 1, + /*changeCount: */ 1, + It.IsAny()), + Times.Once); + } + + [Fact] + public void FindsRemovedOwner() + { + var oldData = OwnersData("NuGet.Core: NuGet, Microsoft"); + var newData = OwnersData("NuGet.Core: NuGet"); + + var changes = Target.CompareOwners(oldData, newData); + + var pair = Assert.Single(changes); + Assert.Equal("NuGet.Core", pair.Key); + Assert.Equal(new[] { "NuGet" }, pair.Value); + + TelemetryService.Verify( + x => x.TrackOwnerSetComparison( + /*oldCount: */ 1, + /*newCount: */ 1, + /*changeCount: */ 1, + It.IsAny()), + Times.Once); + } + + [Fact] + public void FindsOwnerWithChangedCase() + { + var oldData = OwnersData("NuGet.Core: NuGet, Microsoft"); + var newData = OwnersData("NuGet.Core: NuGet, microsoft"); + + var changes = Target.CompareOwners(oldData, newData); + + var pair = Assert.Single(changes); + Assert.Equal("NuGet.Core", pair.Key); + Assert.Equal(new[] { "microsoft", "NuGet" }, pair.Value); + + TelemetryService.Verify( + x => x.TrackOwnerSetComparison( + /*oldCount: */ 1, + /*newCount: */ 1, + /*changeCount: */ 1, + It.IsAny()), + Times.Once); + } + + [Fact] + public void FindsManyChangesAtOnce() + { + var oldData = OwnersData( + "NuGet.Core: NuGet, Microsoft", + "NuGet.Frameworks: NuGet", + "NuGet.Protocol: NuGet"); + var newData = OwnersData( + "NuGet.Core: NuGet, microsoft", + "NuGet.Versioning: NuGet", + "NuGet.Protocol: NuGet"); + + var changes = Target.CompareOwners(oldData, newData); + + Assert.Equal(3, changes.Count); + Assert.Equal(new[] { "NuGet.Core", "NuGet.Frameworks", "NuGet.Versioning" }, changes.Keys.ToArray()); + Assert.Equal(new[] { "microsoft", "NuGet" }, changes["NuGet.Core"]); + Assert.Empty(changes["NuGet.Frameworks"]); + Assert.Equal(new[] { "NuGet" }, changes["NuGet.Versioning"]); + + TelemetryService.Verify( + x => x.TrackOwnerSetComparison( + /*oldCount: */ 3, + /*newCount: */ 3, + /*changeCount: */ 3, + It.IsAny()), + Times.Once); + } + + [Fact] + public void FindsNoChanges() + { + var oldData = OwnersData( + "NuGet.Core: NuGet, Microsoft", + "NuGet.Versioning: NuGet, Microsoft"); + var newData = OwnersData( + "NuGet.Core: NuGet, Microsoft", + "NuGet.Versioning: NuGet, Microsoft"); + + var changes = Target.CompareOwners(oldData, newData); + + Assert.Empty(changes); + + TelemetryService.Verify( + x => x.TrackOwnerSetComparison( + /*oldCount: */ 2, + /*newCount: */ 2, + /*changeCount: */ 0, + It.IsAny()), + Times.Once); + } + } + + public class ComparePopularityTransfers : Facts + { + public ComparePopularityTransfers(ITestOutputHelper output) : base(output) + { + } + + [Fact] + public void FindsNoChanges() + { + var oldData = TransfersData( + "PackageA: PackageB, PackageC", + "Package1: Package3, Package2"); + var newData = TransfersData( + "PackageA: PackageC, PackageB", + "Package1: Package2, Package3"); + + var changes = Target.ComparePopularityTransfers(oldData, newData); + + Assert.Empty(changes); + + TelemetryService.Verify( + x => x.TrackPopularityTransfersSetComparison( + /*oldCount: */ 2, + /*newCount: */ 2, + /*changeCount: */ 0, + It.IsAny()), + Times.Once); + } + + [Fact] + public void FindsAddedTransfers() + { + var oldData = TransfersData("PackageA: PackageB, PackageC"); + var newData = TransfersData( + "PackageA: PackageB, PackageC", + "Package1: Package2, Package3"); + + var changes = Target.ComparePopularityTransfers(oldData, newData); + + var pair = Assert.Single(changes); + Assert.Equal("Package1", pair.Key); + Assert.Equal(new[] { "Package2", "Package3" }, pair.Value); + + TelemetryService.Verify( + x => x.TrackPopularityTransfersSetComparison( + /*oldCount: */ 1, + /*newCount: */ 2, + /*changeCount: */ 1, + It.IsAny()), + Times.Once); + } + + [Fact] + public void FindsRemovedTransfers() + { + var oldData = TransfersData( + "PackageA: PackageB, PackageC", + "Package1: Package2, Package3"); + var newData = TransfersData("PackageA: PackageB, PackageC"); + + var changes = Target.ComparePopularityTransfers(oldData, newData); + + var pair = Assert.Single(changes); + Assert.Equal("Package1", pair.Key); + Assert.Empty(pair.Value); + + TelemetryService.Verify( + x => x.TrackPopularityTransfersSetComparison( + /*oldCount: */ 2, + /*newCount: */ 1, + /*changeCount: */ 1, + It.IsAny()), + Times.Once); + } + + [Fact] + public void FindsAddedToPackage() + { + var oldData = TransfersData("PackageA: PackageB"); + var newData = TransfersData("PackageA: PackageB, PackageC"); + + var changes = Target.ComparePopularityTransfers(oldData, newData); + + var pair = Assert.Single(changes); + Assert.Equal("PackageA", pair.Key); + Assert.Equal(new[] { "PackageB", "PackageC" }, pair.Value); + + TelemetryService.Verify( + x => x.TrackPopularityTransfersSetComparison( + /*oldCount: */ 1, + /*newCount: */ 1, + /*changeCount: */ 1, + It.IsAny()), + Times.Once); + } + + [Fact] + public void FindsRemovedToPackage() + { + var oldData = TransfersData("PackageA: PackageB, PackageC"); + var newData = TransfersData("PackageA: PackageB"); + + var changes = Target.ComparePopularityTransfers(oldData, newData); + + var pair = Assert.Single(changes); + Assert.Equal("PackageA", pair.Key); + Assert.Equal(new[] { "PackageB" }, pair.Value); + + TelemetryService.Verify( + x => x.TrackPopularityTransfersSetComparison( + /*oldCount: */ 1, + /*newCount: */ 1, + /*changeCount: */ 1, + It.IsAny()), + Times.Once); + } + + [Fact] + public void IgnoresCaseChanges() + { + var oldData = TransfersData("PackageA: packageb, PackageC"); + var newData = TransfersData("packagea: PACKAGEB, packageC"); + + var changes = Target.ComparePopularityTransfers(oldData, newData); + + Assert.Empty(changes); + + TelemetryService.Verify( + x => x.TrackPopularityTransfersSetComparison( + /*oldCount: */ 1, + /*newCount: */ 1, + /*changeCount: */ 0, + It.IsAny()), + Times.Once); + } + + [Fact] + public void FindsManyChangesAtOnce() + { + var oldData = TransfersData( + "Package1: PackageA, PackageB", + "Package2: PackageC", + "Package3: PackageD"); + var newData = TransfersData( + "Package1: PackageA, PackageE", + "Package4: PackageC", + "Package3: Packaged"); + + var changes = Target.ComparePopularityTransfers(oldData, newData); + + Assert.Equal(3, changes.Count); + Assert.Equal(new[] { "Package1", "Package2", "Package4" }, changes.Keys.ToArray()); + Assert.Equal(new[] { "PackageA", "PackageE" }, changes["Package1"]); + Assert.Empty(changes["Package2"]); + Assert.Equal(new[] { "PackageC" }, changes["Package4"]); + + TelemetryService.Verify( + x => x.TrackPopularityTransfersSetComparison( + /*oldCount: */ 3, + /*newCount: */ 3, + /*changeCount: */ 3, + It.IsAny()), + Times.Once); + } + } + + public abstract class Facts + { + public Facts(ITestOutputHelper output) + { + TelemetryService = new Mock(); + Logger = output.GetLogger(); + + Target = new DataSetComparer( + TelemetryService.Object, + Logger); + } + + public Mock TelemetryService { get; } + public RecordingLogger Logger { get; } + public DataSetComparer Target { get; } + + /// + /// A helper to turn lines formatted like this "PackageId: OwnerA, OwnerB" into package ID to owners + /// dictionary. + /// + public SortedDictionary> OwnersData(params string[] lines) + { + var builder = new PackageIdToOwnersBuilder(Logger); + ParseData(lines, builder.Add); + return builder.GetResult(); + } + + /// + /// A helper to turn lines formatted like this "FromPackage1: ToPackage1, ToPackage2" into package ID to popularity + /// transfers dictionary. + /// + public SortedDictionary> TransfersData(params string[] lines) + { + var builder = new PackageIdToPopularityTransfersBuilder(Logger); + ParseData(lines, builder.Add); + return builder.GetResult(); + } + + private void ParseData(string[] lines, Action> add) + { + foreach (var line in lines) + { + var pieces = line.Split(new[] { ':' }, 2); + var key = pieces[0].Trim(); + var values = pieces[1] + .Split(',') + .Select(x => x.Trim()) + .Where(x => x.Length > 0) + .ToList(); + + add(key, values); + } + } + } + } +} diff --git a/tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/OwnerSetComparerFacts.cs b/tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/OwnerSetComparerFacts.cs deleted file mode 100644 index 17d127952..000000000 --- a/tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/OwnerSetComparerFacts.cs +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using System.Linq; -using Moq; -using Xunit; -using Xunit.Abstractions; - -namespace NuGet.Services.AzureSearch.Auxiliary2AzureSearch -{ - public class OwnerSetComparerFacts - { - public class Compare : Facts - { - public Compare(ITestOutputHelper output) : base(output) - { - } - - [Fact] - public void FindsAddedPackageIds() - { - var oldData = Data("NuGet.Core: NuGet, Microsoft"); - var newData = Data("NuGet.Core: NuGet, Microsoft", - "NuGet.Versioning: NuGet, Microsoft"); - - var changes = Target.Compare(oldData, newData); - - var pair = Assert.Single(changes); - Assert.Equal("NuGet.Versioning", pair.Key); - Assert.Equal(new[] { "Microsoft", "NuGet" }, pair.Value); - } - - - [Fact] - public void FindsRemovedPackageIds() - { - var oldData = Data("NuGet.Core: NuGet, Microsoft", - "NuGet.Versioning: NuGet, Microsoft"); - var newData = Data("NuGet.Core: NuGet, Microsoft"); - - var changes = Target.Compare(oldData, newData); - - var pair = Assert.Single(changes); - Assert.Equal("NuGet.Versioning", pair.Key); - Assert.Empty(pair.Value); - } - - [Fact] - public void FindsAddedOwner() - { - var oldData = Data("NuGet.Core: NuGet"); - var newData = Data("NuGet.Core: NuGet, Microsoft"); - - var changes = Target.Compare(oldData, newData); - - var pair = Assert.Single(changes); - Assert.Equal("NuGet.Core", pair.Key); - Assert.Equal(new[] { "Microsoft", "NuGet" }, pair.Value); - } - - [Fact] - public void FindsRemovedOwner() - { - var oldData = Data("NuGet.Core: NuGet, Microsoft"); - var newData = Data("NuGet.Core: NuGet"); - - var changes = Target.Compare(oldData, newData); - - var pair = Assert.Single(changes); - Assert.Equal("NuGet.Core", pair.Key); - Assert.Equal(new[] { "NuGet" }, pair.Value); - } - - [Fact] - public void FindsOwnerWithChangedCase() - { - var oldData = Data("NuGet.Core: NuGet, Microsoft"); - var newData = Data("NuGet.Core: NuGet, microsoft"); - - var changes = Target.Compare(oldData, newData); - - var pair = Assert.Single(changes); - Assert.Equal("NuGet.Core", pair.Key); - Assert.Equal(new[] { "microsoft", "NuGet" }, pair.Value); - } - - [Fact] - public void FindsManyChangesAtOnce() - { - var oldData = Data("NuGet.Core: NuGet, Microsoft", - "NuGet.Frameworks: NuGet", - "NuGet.Protocol: NuGet"); - var newData = Data("NuGet.Core: NuGet, microsoft", - "NuGet.Versioning: NuGet", - "NuGet.Protocol: NuGet"); - - var changes = Target.Compare(oldData, newData); - - Assert.Equal(3, changes.Count); - Assert.Equal(new[] { "NuGet.Core", "NuGet.Frameworks", "NuGet.Versioning" }, changes.Keys.ToArray()); - Assert.Equal(new[] { "microsoft", "NuGet" }, changes["NuGet.Core"]); - Assert.Empty(changes["NuGet.Frameworks"]); - Assert.Equal(new[] { "NuGet" }, changes["NuGet.Versioning"]); - } - - [Fact] - public void FindsNoChanges() - { - var oldData = Data("NuGet.Core: NuGet, Microsoft", - "NuGet.Versioning: NuGet, Microsoft"); - var newData = Data("NuGet.Core: NuGet, Microsoft", - "NuGet.Versioning: NuGet, Microsoft"); - - var changes = Target.Compare(oldData, newData); - - Assert.Empty(changes); - } - } - - public abstract class Facts - { - public Facts(ITestOutputHelper output) - { - TelemetryService = new Mock(); - Logger = output.GetLogger(); - - Target = new OwnerSetComparer( - TelemetryService.Object, - Logger); - } - - public Mock TelemetryService { get; } - public RecordingLogger Logger { get; } - public OwnerSetComparer Target { get; } - - /// - /// A helper to turn lines formatted like this "PackageId: OwnerA, OwnerB" into package ID to owners - /// dictionary. - /// - public SortedDictionary> Data(params string[] lines) - { - var builder = new PackageIdToOwnersBuilder(Logger); - foreach (var line in lines) - { - var pieces = line.Split(new[] { ':' }, 2); - var id = pieces[0].Trim(); - var usernames = pieces[1] - .Split(',') - .Select(x => x.Trim()) - .Where(x => x.Length > 0) - .ToList(); - - builder.Add(id, usernames); - } - - return builder.GetResult(); - } - } - } -} diff --git a/tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/UpdateDownloadsCommandFacts.cs b/tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/UpdateDownloadsCommandFacts.cs index 6c0f988da..67f928642 100644 --- a/tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/UpdateDownloadsCommandFacts.cs +++ b/tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/UpdateDownloadsCommandFacts.cs @@ -159,7 +159,7 @@ public async Task RejectsInvalidDataAndNormalizesVersions(string propertyName) } [Fact] - public async Task OverridesDownloadCounts() + public async Task AppliesDownloadTransfers() { DownloadSetComparer .Setup(c => c.Compare(It.IsAny(), It.IsAny())) @@ -179,8 +179,13 @@ public async Task OverridesDownloadCounts() NewDownloadData.SetDownloadCount("C", "5.0.0", 2); NewDownloadData.SetDownloadCount("C", "6.0.0", 3); - DownloadOverrides["A"] = 55; - DownloadOverrides["b"] = 66; + TransferChanges["A"] = 55; + TransferChanges["b"] = 66; + + LatestPopularityTransfers["FromPackage"] = new SortedSet(StringComparer.OrdinalIgnoreCase) + { + "ToPackage" + }; await Target.ExecuteAsync(); @@ -215,110 +220,13 @@ public async Task OverridesDownloadCounts() d["C"]["6.0.0"] == 3), It.IsAny()), Times.Once); - } - - [Fact] - public async Task AlwaysAppliesDownloadOverrides() - { - DownloadSetComparer - .Setup(c => c.Compare(It.IsAny(), It.IsAny())) - .Returns((oldData, newData) => - { - var config = new Auxiliary2AzureSearchConfiguration(); - var telemetry = Mock.Of(); - var logger = Mock.Of>(); - var options = new Mock>(); - - options.Setup(o => o.Value).Returns(config); - - return new DownloadSetComparer(telemetry, options.Object, logger) - .Compare(oldData, newData); - }); - - // Download override should be applied even if the package's downloads haven't changed. - OldDownloadData.SetDownloadCount("A", "1.0.0", 1); - NewDownloadData.SetDownloadCount("A", "1.0.0", 1); - DownloadOverrides["A"] = 2; - - await Target.ExecuteAsync(); - - // Documents should have new data with overriden downloads. - SearchDocumentBuilder - .Verify( - b => b.UpdateDownloadCount("A", SearchFilters.IncludePrereleaseAndSemVer2, 2), - Times.Once); - - // Downloads auxiliary file should have new data without overriden downloads. - DownloadDataClient.Verify( - c => c.ReplaceLatestIndexedAsync( - It.Is(d => - d["A"].Total == 1 && - d["A"]["1.0.0"] == 1), - It.IsAny()), - Times.Once); - } - - [Fact] - public async Task DoesNotOverrideIfDownloadsGreaterOrPackageHasNoDownloads() - { - DownloadSetComparer - .Setup(c => c.Compare(It.IsAny(), It.IsAny())) - .Returns((oldData, newData) => - { - return new SortedDictionary( - newData.ToDictionary(d => d.Key, d => d.Value.Total), - StringComparer.OrdinalIgnoreCase); - }); - - NewDownloadData.SetDownloadCount("A", "1.0.0", 100); - NewDownloadData.SetDownloadCount("A", "2.0.0", 200); - - NewDownloadData.SetDownloadCount("B", "3.0.0", 5); - NewDownloadData.SetDownloadCount("B", "4.0.0", 4); - - NewDownloadData.SetDownloadCount("C", "5.0.0", 0); - - DownloadOverrides["A"] = 55; - DownloadOverrides["C"] = 66; - DownloadOverrides["D"] = 77; - - await Target.ExecuteAsync(); - - // Documents should have new data with overriden downloads. - SearchDocumentBuilder - .Verify( - b => b.UpdateDownloadCount("A", SearchFilters.IncludePrereleaseAndSemVer2, 300), - Times.Once); - SearchDocumentBuilder - .Verify( - b => b.UpdateDownloadCount("B", SearchFilters.IncludePrereleaseAndSemVer2, 9), - Times.Once); - SearchDocumentBuilder - .Verify( - b => b.UpdateDownloadCount("B", SearchFilters.IncludePrereleaseAndSemVer2, 9), - Times.Once); - SearchDocumentBuilder - .Verify( - b => b.UpdateDownloadCount("C", It.IsAny(), It.IsAny()), - Times.Never); - SearchDocumentBuilder - .Verify( - b => b.UpdateDownloadCount("D", It.IsAny(), It.IsAny()), - Times.Never); - // Downloads auxiliary file should have new data without overriden downloads. - DownloadDataClient.Verify( + PopularityTransferDataClient.Verify( c => c.ReplaceLatestIndexedAsync( - It.Is(d => - d.Keys.Count() == 2 && - - d["A"].Total == 300 && - d["A"]["1.0.0"] == 100 && - d["A"]["2.0.0"] == 200 && - - d["B"].Total == 9 && - d["B"]["3.0.0"] == 5 && - d["B"]["4.0.0"] == 4), + It.Is>>(d => + d.Count == 1 && + d["FromPackage"].Count() == 1 && + d["FromPackage"].Contains("ToPackage")), It.IsAny()), Times.Once); } @@ -331,6 +239,8 @@ public Facts(ITestOutputHelper output) AuxiliaryFileClient = new Mock(); DownloadDataClient = new Mock(); DownloadSetComparer = new Mock(); + DownloadTransferrer = new Mock(); + PopularityTransferDataClient = new Mock(); SearchDocumentBuilder = new Mock(); IndexActionBuilder = new Mock(); BatchPusher = new Mock(); @@ -355,22 +265,38 @@ public Facts(ITestOutputHelper output) .ReturnsAsync(() => OldDownloadResult); NewDownloadData = new DownloadData(); AuxiliaryFileClient.Setup(x => x.LoadDownloadDataAsync()).ReturnsAsync(() => NewDownloadData); - DownloadOverrides = new Dictionary(StringComparer.OrdinalIgnoreCase); - AuxiliaryFileClient - .Setup(x => x.LoadDownloadOverridesAsync()) - .ReturnsAsync(() => DownloadOverrides); Changes = new SortedDictionary(); DownloadSetComparer .Setup(x => x.Compare(It.IsAny(), It.IsAny())) .Returns(() => Changes); + TransferChanges = new Dictionary(StringComparer.OrdinalIgnoreCase); + LatestPopularityTransfers = new SortedDictionary>(StringComparer.OrdinalIgnoreCase); + TransferResult = new DownloadTransferResult( + TransferChanges, + LatestPopularityTransfers); + DownloadTransferrer + .Setup(x => x.GetTransferChangesAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny>>())) + .ReturnsAsync(TransferResult); + + OldTransferData = new SortedDictionary>(StringComparer.OrdinalIgnoreCase); + OldTransferResult = new ResultAndAccessCondition>>( + OldTransferData, + Mock.Of()); + PopularityTransferDataClient + .Setup(x => x.ReadLatestIndexedAsync()) + .ReturnsAsync(OldTransferResult); + IndexActions = new IndexActions( new List> { IndexAction.Merge(new KeyedDocument()) }, new List>(), new ResultAndAccessCondition( new VersionListData(new Dictionary()), - new Mock().Object)); + Mock.Of())); ProcessedIds = new ConcurrentBag(); IndexActionBuilder .Setup(x => x.UpdateAsync(It.IsAny(), It.IsAny>())) @@ -405,6 +331,8 @@ public Facts(ITestOutputHelper output) AuxiliaryFileClient.Object, DownloadDataClient.Object, DownloadSetComparer.Object, + DownloadTransferrer.Object, + PopularityTransferDataClient.Object, SearchDocumentBuilder.Object, IndexActionBuilder.Object, () => BatchPusher.Object, @@ -417,6 +345,8 @@ public Facts(ITestOutputHelper output) public Mock AuxiliaryFileClient { get; } public Mock DownloadDataClient { get; } public Mock DownloadSetComparer { get; } + public Mock DownloadTransferrer { get; } + public Mock PopularityTransferDataClient { get; } public Mock SearchDocumentBuilder { get; } public Mock IndexActionBuilder { get; } public Mock BatchPusher { get; } @@ -428,8 +358,12 @@ public Facts(ITestOutputHelper output) public DownloadData OldDownloadData { get; } public AuxiliaryFileResult OldDownloadResult { get; } public DownloadData NewDownloadData { get; } - public Dictionary DownloadOverrides { get; } + public SortedDictionary> OldTransferData { get; } + public ResultAndAccessCondition>> OldTransferResult { get; } public SortedDictionary Changes { get; } + public DownloadTransferResult TransferResult { get; } + public Dictionary TransferChanges { get; } + public SortedDictionary> LatestPopularityTransfers { get; } public UpdateDownloadsCommand Target { get; } public IndexActions IndexActions { get; set; } public ConcurrentBag ProcessedIds { get; } diff --git a/tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/UpdateOwnersCommandFacts.cs b/tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/UpdateOwnersCommandFacts.cs index 0a50f66d9..6b9fe2675 100644 --- a/tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/UpdateOwnersCommandFacts.cs +++ b/tests/NuGet.Services.AzureSearch.Tests/Auxiliary2AzureSearch/UpdateOwnersCommandFacts.cs @@ -47,12 +47,12 @@ public async Task ComparesInTheRightOrder() await Target.ExecuteAsync(); OwnerSetComparer.Verify( - x => x.Compare( + x => x.CompareOwners( It.IsAny>>(), It.IsAny>>()), Times.Once); OwnerSetComparer.Verify( - x => x.Compare(StorageResult.Result, DatabaseResult), + x => x.CompareOwners(StorageResult.Result, DatabaseResult), Times.Once); } @@ -169,7 +169,7 @@ public Facts(ITestOutputHelper output) { DatabaseOwnerFetcher = new Mock(); OwnerDataClient = new Mock(); - OwnerSetComparer = new Mock(); + OwnerSetComparer = new Mock(); SearchDocumentBuilder = new Mock(); SearchIndexActionBuilder = new Mock(); Pusher = new Mock(); @@ -203,7 +203,7 @@ public Facts(ITestOutputHelper output) .Setup(x => x.ReadLatestIndexedAsync()) .ReturnsAsync(() => StorageResult); OwnerSetComparer - .Setup(x => x.Compare( + .Setup(x => x.CompareOwners( It.IsAny>>(), It.IsAny>>())) .Returns(() => Changes); @@ -225,7 +225,7 @@ public Facts(ITestOutputHelper output) public Mock DatabaseOwnerFetcher { get; } public Mock OwnerDataClient { get; } - public Mock OwnerSetComparer { get; } + public Mock OwnerSetComparer { get; } public Mock SearchDocumentBuilder { get; } public Mock SearchIndexActionBuilder { get; } public Mock Pusher { get; } diff --git a/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/OwnerDataClientFacts.cs b/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/OwnerDataClientFacts.cs index 67c53e88a..b091a4f88 100644 --- a/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/OwnerDataClientFacts.cs +++ b/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/OwnerDataClientFacts.cs @@ -41,6 +41,12 @@ public async Task AllowsEmptyObject() Assert.Empty(output.Result); Assert.Equal(ETag, output.AccessCondition.IfMatchETag); + + TelemetryService.Verify( + x => x.TrackReadLatestIndexedOwners( + /* packageIdCount: */ 0, + It.IsAny()), + Times.Once); } [Fact] @@ -60,6 +66,12 @@ public async Task AllowsMissingBlob() Assert.Empty(output.Result); Assert.Equal("*", output.AccessCondition.IfNoneMatchETag); + + TelemetryService.Verify( + x => x.TrackReadLatestIndexedOwners( + /* packageIdCount: */ 0, + It.IsAny()), + Times.Once); } [Fact] @@ -82,10 +94,15 @@ public async Task RejectsInvalidJson() .Setup(x => x.OpenReadAsync(It.IsAny())) .ReturnsAsync(() => new MemoryStream(Encoding.UTF8.GetBytes(json))); - var ex = await Assert.ThrowsAsync( () => Target.ReadLatestIndexedAsync()); Assert.Equal("The first token should be the start of an object.", ex.Message); + + TelemetryService.Verify( + x => x.TrackReadLatestIndexedOwners( + It.IsAny(), + It.IsAny()), + Times.Never); } [Fact] @@ -122,9 +139,20 @@ public async Task ReadsOwners() Assert.Equal(new[] { "nuget" }, output.Result["NuGet.Core"].ToArray()); Assert.Equal(new[] { "Microsoft", "nuget" }, output.Result["nuget.versioning"].ToArray()); Assert.Equal(new[] { "ownerA", "ownerB" }, output.Result["ZDuplicate"].ToArray()); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result.Comparer); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result["EntityFramework"].Comparer); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result["NuGet.Core"].Comparer); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result["nuget.versioning"].Comparer); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result["ZDuplicate"].Comparer); Assert.Equal(ETag, output.AccessCondition.IfMatchETag); CloudBlobContainer.Verify(x => x.GetBlobReference("owners/owners.v2.json"), Times.Once); + + TelemetryService.Verify( + x => x.TrackReadLatestIndexedOwners( + /* packageIdCount: */ 4, + It.IsAny()), + Times.Once); } [Fact] @@ -150,23 +178,26 @@ public async Task IgnoresEmptyOwnerLists() Assert.Single(output.Result); Assert.Equal(new[] { "NuGet.Core" }, output.Result.Keys.ToArray()); Assert.Equal(new[] { "nuget" }, output.Result["NuGet.Core"].ToArray()); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result.Comparer); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result["NuGet.Core"].Comparer); Assert.Equal(ETag, output.AccessCondition.IfMatchETag); + + TelemetryService.Verify( + x => x.TrackReadLatestIndexedOwners( + /* packageIdCount: */ 1, + It.IsAny()), + Times.Once); } [Fact] - public async Task AllowsDuplicateIdsWithDifferentCase() + public async Task AllowsDuplicateIds() { - var json = JsonConvert.SerializeObject(new SortedDictionary(StringComparer.Ordinal) - { - { - "NuGet.Core", - new[] { "nuget" } - }, - { - "nuget.core", - new[] { "microsoft" } - }, - }); + var json = @" +{ + ""NuGet.Core"": [ ""nuget"" ], + ""NuGet.Core"": [ ""aspnet"" ], + ""nuget.core"": [ ""microsoft"" ] +}"; CloudBlob .Setup(x => x.OpenReadAsync(It.IsAny())) .ReturnsAsync(() => new MemoryStream(Encoding.UTF8.GetBytes(json))); @@ -175,8 +206,16 @@ public async Task AllowsDuplicateIdsWithDifferentCase() Assert.Single(output.Result); Assert.Equal(new[] { "NuGet.Core" }, output.Result.Keys.ToArray()); - Assert.Equal(new[] { "microsoft", "nuget" }, output.Result["NuGet.Core"].ToArray()); + Assert.Equal(new[] { "aspnet", "microsoft", "nuget" }, output.Result["NuGet.Core"].ToArray()); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result.Comparer); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result["NuGet.Core"].Comparer); Assert.Equal(ETag, output.AccessCondition.IfMatchETag); + + TelemetryService.Verify( + x => x.TrackReadLatestIndexedOwners( + /* packageIdCount: */ 1, + It.IsAny()), + Times.Once); } } @@ -271,6 +310,12 @@ public async Task SerializesVersionsSortedOrder() ""ownerB"" ] }", json); + + TelemetryService.Verify( + x => x.TrackReplaceLatestIndexedOwners( + /*packageIdCount: */ 4), + Times.Once); + ReplaceLatestIndexedOwnersDurationMetric.Verify(x => x.Dispose(), Times.Once); } } @@ -386,6 +431,7 @@ public Facts(ITestOutputHelper output) ETag = "\"some-etag\""; AccessCondition = new Mock(); + ReplaceLatestIndexedOwnersDurationMetric = new Mock(); Options .Setup(x => x.Value) @@ -411,6 +457,10 @@ public Facts(ITestOutputHelper output) .Setup(x => x.Properties) .Returns(new CloudBlockBlob(new Uri("https://example/blob")).Properties); + TelemetryService + .Setup(x => x.TrackReplaceLatestIndexedOwners(It.IsAny())) + .Returns(ReplaceLatestIndexedOwnersDurationMetric.Object); + Target = new OwnerDataClient( CloudBlobClient.Object, Options.Object, @@ -427,36 +477,12 @@ public Facts(ITestOutputHelper output) public AzureSearchJobConfiguration Config { get; } public string ETag { get; } public Mock AccessCondition { get; } + public Mock ReplaceLatestIndexedOwnersDurationMetric { get; } public OwnerDataClient Target { get; } public List BlobNames { get; } = new List(); public List SavedBytes { get; } = new List(); public List SavedStrings { get; } = new List(); } - - private class RecordingStream : MemoryStream - { - private readonly object _lock = new object(); - private Action _onDispose; - - public RecordingStream(Action onDispose) - { - _onDispose = onDispose; - } - - protected override void Dispose(bool disposing) - { - lock (_lock) - { - if (_onDispose != null) - { - _onDispose(ToArray()); - _onDispose = null; - } - } - - base.Dispose(disposing); - } - } } } diff --git a/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/PopularityTransferDataClientFacts.cs b/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/PopularityTransferDataClientFacts.cs new file mode 100644 index 000000000..c9b6fe34d --- /dev/null +++ b/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/PopularityTransferDataClientFacts.cs @@ -0,0 +1,370 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Net; +using System.Text; +using System.Threading.Tasks; +using Microsoft.Extensions.Options; +using Microsoft.WindowsAzure.Storage; +using Microsoft.WindowsAzure.Storage.Blob; +using Moq; +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; +using NuGetGallery; +using Xunit; +using Xunit.Abstractions; + +namespace NuGet.Services.AzureSearch.AuxiliaryFiles +{ + public class PopularityTransferDataClientFacts + { + public class ReadLatestIndexed : Facts + { + public ReadLatestIndexed(ITestOutputHelper output) : base(output) + { + } + + [Fact] + public async Task AllowsEmptyObject() + { + var json = JsonConvert.SerializeObject(new Dictionary()); + CloudBlob + .Setup(x => x.OpenReadAsync(It.IsAny())) + .ReturnsAsync(() => new MemoryStream(Encoding.UTF8.GetBytes(json))); + + var output = await Target.ReadLatestIndexedAsync(); + + Assert.Empty(output.Result); + Assert.Equal(ETag, output.AccessCondition.IfMatchETag); + + TelemetryService.Verify( + x => x.TrackReadLatestIndexedPopularityTransfers( + /*outgoingTransfers: */ 0, + It.IsAny()), + Times.Once); + } + + [Fact] + public async Task AllowsMissingBlob() + { + CloudBlob + .Setup(x => x.OpenReadAsync(It.IsAny())) + .ThrowsAsync(new StorageException( + new RequestResult + { + HttpStatusCode = (int)HttpStatusCode.NotFound, + }, + message: "Not found.", + inner: null)); + + var output = await Target.ReadLatestIndexedAsync(); + + Assert.Empty(output.Result); + Assert.Equal("*", output.AccessCondition.IfNoneMatchETag); + + TelemetryService.Verify( + x => x.TrackReadLatestIndexedPopularityTransfers( + /*outgoingTransfers: */ 0, + It.IsAny()), + Times.Once); + } + + [Fact] + public async Task RejectsInvalidJson() + { + var json = JsonConvert.SerializeObject(new object[] + { + new object[] + { + "WindowsAzure.ServiceBus", + new[] { "Azure.Messaging.ServiceBus" } + }, + new object[] + { + "WindowsAzure.Storage", + new[] { "Azure.Storage.Blobs", "Azure.Storage.Queues" } + } + }); + CloudBlob + .Setup(x => x.OpenReadAsync(It.IsAny())) + .ReturnsAsync(() => new MemoryStream(Encoding.UTF8.GetBytes(json))); + + var ex = await Assert.ThrowsAsync( + () => Target.ReadLatestIndexedAsync()); + Assert.Equal("The first token should be the start of an object.", ex.Message); + } + + [Fact] + public async Task ReadsPopularityTransfers() + { + var json = JsonConvert.SerializeObject(new Dictionary + { + { + "windowsazure.servicebus", + new[] { "Azure.Messaging.ServiceBus" } + }, + { + "WindowsAzure.Storage", + new[] { "Azure.Storage.Blobs", "Azure.Storage.Queues" } + }, + { + "ZDuplicate", + new[] { "packageA", "packagea", "PACKAGEA", "packageB" } + }, + }); + CloudBlob + .Setup(x => x.OpenReadAsync(It.IsAny())) + .ReturnsAsync(() => new MemoryStream(Encoding.UTF8.GetBytes(json))); + + var output = await Target.ReadLatestIndexedAsync(); + + Assert.Equal(3, output.Result.Count); + Assert.Equal(new[] { "windowsazure.servicebus", "WindowsAzure.Storage", "ZDuplicate" }, output.Result.Keys.ToArray()); + Assert.Equal(new[] { "Azure.Messaging.ServiceBus" }, output.Result["windowsazure.servicebus"].ToArray()); + Assert.Equal(new[] { "Azure.Storage.Blobs", "Azure.Storage.Queues" }, output.Result["WindowsAzure.Storage"].ToArray()); + Assert.Equal(new[] { "packageA", "packageB" }, output.Result["ZDuplicate"].ToArray()); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result.Comparer); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result["windowsazure.servicebus"].Comparer); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result["WindowsAzure.Storage"].Comparer); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result["ZDuplicate"].Comparer); + Assert.Equal(ETag, output.AccessCondition.IfMatchETag); + + CloudBlobContainer.Verify(x => x.GetBlobReference("popularity-transfers/popularity-transfers.v1.json"), Times.Once); + TelemetryService.Verify( + x => x.TrackReadLatestIndexedPopularityTransfers( + /*outgoingTransfers: */ 3, + It.IsAny()), + Times.Once); + } + + [Fact] + public async Task IgnoresEmptyOwnerLists() + { + var json = JsonConvert.SerializeObject(new Dictionary + { + { + "NoTransfers", + new string[0] + }, + { + "PackageA", + new[] { "PackageB" } + }, + }); + CloudBlob + .Setup(x => x.OpenReadAsync(It.IsAny())) + .ReturnsAsync(() => new MemoryStream(Encoding.UTF8.GetBytes(json))); + + var output = await Target.ReadLatestIndexedAsync(); + + Assert.Single(output.Result); + Assert.Equal(new[] { "PackageA" }, output.Result.Keys.ToArray()); + Assert.Equal(new[] { "PackageB" }, output.Result["PackageA"].ToArray()); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result.Comparer); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result["PackageA"].Comparer); + Assert.Equal(ETag, output.AccessCondition.IfMatchETag); + + TelemetryService.Verify( + x => x.TrackReadLatestIndexedPopularityTransfers( + /*outgoingTransfers: */ 1, + It.IsAny()), + Times.Once); + } + + [Fact] + public async Task AllowsDuplicateIds() + { + var json = @" +{ + ""PackageA"": [ ""packageB"" ], + ""PackageA"": [ ""packageC"" ], + ""packagea"": [ ""packageD"" ] +}"; + + CloudBlob + .Setup(x => x.OpenReadAsync(It.IsAny())) + .ReturnsAsync(() => new MemoryStream(Encoding.UTF8.GetBytes(json))); + + var output = await Target.ReadLatestIndexedAsync(); + + Assert.Single(output.Result); + Assert.Equal(new[] { "PackageA" }, output.Result.Keys.ToArray()); + Assert.Equal(new[] { "packageB", "packageC", "packageD" }, output.Result["packageA"].ToArray()); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result.Comparer); + Assert.Equal(StringComparer.OrdinalIgnoreCase, output.Result["packageA"].Comparer); + Assert.Equal(ETag, output.AccessCondition.IfMatchETag); + + TelemetryService.Verify( + x => x.TrackReadLatestIndexedPopularityTransfers( + /*outgoingTransfers: */ 1, + It.IsAny()), + Times.Once); + } + } + + public class ReplaceLatestIndexed : Facts + { + public ReplaceLatestIndexed(ITestOutputHelper output) : base(output) + { + } + + [Fact] + public async Task SerializesWithoutBOM() + { + var newData = new SortedDictionary>(); + + await Target.ReplaceLatestIndexedAsync(newData, AccessCondition.Object); + + var bytes = Assert.Single(SavedBytes); + Assert.Equal((byte)'{', bytes[0]); + } + + [Fact] + public async Task SetsContentType() + { + var newData = new SortedDictionary>(); + + await Target.ReplaceLatestIndexedAsync(newData, AccessCondition.Object); + + Assert.Equal("application/json", CloudBlob.Object.Properties.ContentType); + } + + [Fact] + public async Task SerializedWithoutIndentation() + { + var newData = new SortedDictionary>(StringComparer.OrdinalIgnoreCase) + { + { + "PackageA", + new SortedSet(StringComparer.OrdinalIgnoreCase) { "packageB", "packageC" } + } + }; + + await Target.ReplaceLatestIndexedAsync(newData, AccessCondition.Object); + + var json = Assert.Single(SavedStrings); + Assert.DoesNotContain("\n", json); + } + + [Fact] + public async Task SerializesVersionsSortedOrder() + { + var newData = new SortedDictionary>(StringComparer.OrdinalIgnoreCase) + { + { + "PackageB", + new SortedSet(StringComparer.OrdinalIgnoreCase) { "PackageA", "PackageB" } + }, + { + "PackageA", + new SortedSet(StringComparer.OrdinalIgnoreCase) { "PackageC", "packagec", "packageC", "PackageB" } + }, + { + "PackageC", + new SortedSet(StringComparer.OrdinalIgnoreCase) { "PackageZ" } + } + }; + + await Target.ReplaceLatestIndexedAsync(newData, AccessCondition.Object); + + // Pretty-ify the JSON to make the assertion clearer. + var json = Assert.Single(SavedStrings); + json = JsonConvert.DeserializeObject(json).ToString(); + + Assert.Equal(@"{ + ""PackageA"": [ + ""PackageB"", + ""PackageC"" + ], + ""PackageB"": [ + ""PackageA"", + ""PackageB"" + ], + ""PackageC"": [ + ""PackageZ"" + ] +}", json); + TelemetryService.Verify( + x => x.TrackReplaceLatestIndexedPopularityTransfers( + /*outgoingTransfers: */ 3), + Times.Once); + ReplaceLatestIndexedPopularityTransfersDurationMetric.Verify(x => x.Dispose(), Times.Once); + } + } + + public abstract class Facts + { + public Facts(ITestOutputHelper output) + { + CloudBlobClient = new Mock(); + CloudBlobContainer = new Mock(); + CloudBlob = new Mock(); + Options = new Mock>(); + TelemetryService = new Mock(); + Logger = output.GetLogger(); + Config = new AzureSearchJobConfiguration + { + StorageContainer = "unit-test-container", + }; + + ETag = "\"some-etag\""; + AccessCondition = new Mock(); + ReplaceLatestIndexedPopularityTransfersDurationMetric = new Mock(); + + Options + .Setup(x => x.Value) + .Returns(() => Config); + CloudBlobClient + .Setup(x => x.GetContainerReference(It.IsAny())) + .Returns(() => CloudBlobContainer.Object); + CloudBlobContainer + .Setup(x => x.GetBlobReference(It.IsAny())) + .Returns(() => CloudBlob.Object) + .Callback(x => BlobNames.Add(x)); + CloudBlob + .Setup(x => x.ETag) + .Returns(ETag); + CloudBlob + .Setup(x => x.OpenWriteAsync(It.IsAny())) + .ReturnsAsync(() => new RecordingStream(bytes => + { + SavedBytes.Add(bytes); + SavedStrings.Add(Encoding.UTF8.GetString(bytes)); + })); + CloudBlob + .Setup(x => x.Properties) + .Returns(new CloudBlockBlob(new Uri("https://example/blob")).Properties); + + TelemetryService + .Setup(x => x.TrackReplaceLatestIndexedPopularityTransfers(It.IsAny())) + .Returns(ReplaceLatestIndexedPopularityTransfersDurationMetric.Object); + + Target = new PopularityTransferDataClient( + CloudBlobClient.Object, + Options.Object, + TelemetryService.Object, + Logger); + } + + public Mock CloudBlobClient { get; } + public Mock CloudBlobContainer { get; } + public Mock CloudBlob { get; } + public Mock> Options { get; } + public Mock TelemetryService { get; } + public RecordingLogger Logger { get; } + public AzureSearchJobConfiguration Config { get; } + public string ETag { get; } + public Mock AccessCondition { get; } + public Mock ReplaceLatestIndexedPopularityTransfersDurationMetric { get; } + public PopularityTransferDataClient Target { get; } + + public List BlobNames { get; } = new List(); + public List SavedBytes { get; } = new List(); + public List SavedStrings { get; } = new List(); + } + } +} diff --git a/tests/NuGet.Services.AzureSearch.Tests/Db2AzureSearch/Db2AzureSearchCommandFacts.cs b/tests/NuGet.Services.AzureSearch.Tests/Db2AzureSearch/Db2AzureSearchCommandFacts.cs index 5628eb38d..4aeba00fa 100644 --- a/tests/NuGet.Services.AzureSearch.Tests/Db2AzureSearch/Db2AzureSearchCommandFacts.cs +++ b/tests/NuGet.Services.AzureSearch.Tests/Db2AzureSearch/Db2AzureSearchCommandFacts.cs @@ -32,6 +32,7 @@ public class Db2AzureSearchCommandFacts private readonly Mock _ownerDataClient; private readonly Mock _downloadDataClient; private readonly Mock _verifiedPackagesDataClient; + private readonly Mock _popularityTransferDataClient; private readonly Mock> _options; private readonly Mock> _developmentOptions; private readonly Db2AzureSearchConfiguration _config; @@ -53,6 +54,7 @@ public Db2AzureSearchCommandFacts(ITestOutputHelper output) _ownerDataClient = new Mock(); _downloadDataClient = new Mock(); _verifiedPackagesDataClient = new Mock(); + _popularityTransferDataClient = new Mock(); _options = new Mock>(); _developmentOptions = new Mock>(); _logger = output.GetLogger(); @@ -68,7 +70,8 @@ public Db2AzureSearchCommandFacts(ITestOutputHelper output) owners: new SortedDictionary>(), downloads: new DownloadData(), excludedPackages: new HashSet(), - verifiedPackages: new HashSet()); + verifiedPackages: new HashSet(), + popularityTransfers: new SortedDictionary>()); _options .Setup(x => x.Value) @@ -108,6 +111,7 @@ public Db2AzureSearchCommandFacts(ITestOutputHelper output) _ownerDataClient.Object, _downloadDataClient.Object, _verifiedPackagesDataClient.Object, + _popularityTransferDataClient.Object, _options.Object, _developmentOptions.Object, _logger); diff --git a/tests/NuGet.Services.AzureSearch.Tests/Db2AzureSearch/NewPackageRegistrationProducerFacts.cs b/tests/NuGet.Services.AzureSearch.Tests/Db2AzureSearch/NewPackageRegistrationProducerFacts.cs index 603126477..e12fda462 100644 --- a/tests/NuGet.Services.AzureSearch.Tests/Db2AzureSearch/NewPackageRegistrationProducerFacts.cs +++ b/tests/NuGet.Services.AzureSearch.Tests/Db2AzureSearch/NewPackageRegistrationProducerFacts.cs @@ -37,8 +37,11 @@ public class ProduceWorkAsync private readonly CancellationToken _token; private readonly NewPackageRegistrationProducer _target; private readonly Mock _auxiliaryFileClient; + private readonly Mock _downloadTransferrer; private readonly DownloadData _downloads; - private readonly Dictionary _downloadOverrides; + private readonly DownloadTransferResult _transferResult; + private readonly Dictionary _transferChanges; + private readonly SortedDictionary> _latestPopularityTransfers; private HashSet _excludedPackages; public ProduceWorkAsync(ITestOutputHelper output) @@ -67,10 +70,16 @@ public ProduceWorkAsync(ITestOutputHelper output) _auxiliaryFileClient .Setup(x => x.LoadDownloadDataAsync()) .ReturnsAsync(() => _downloads); - _downloadOverrides = new Dictionary(StringComparer.OrdinalIgnoreCase); - _auxiliaryFileClient - .Setup(x => x.LoadDownloadOverridesAsync()) - .ReturnsAsync(() => _downloadOverrides); + + _downloadTransferrer = new Mock(); + _transferChanges = new Dictionary(StringComparer.OrdinalIgnoreCase); + _latestPopularityTransfers = new SortedDictionary>(StringComparer.OrdinalIgnoreCase); + _transferResult = new DownloadTransferResult( + _transferChanges, + _latestPopularityTransfers); + _downloadTransferrer + .Setup(x => x.GetTransferChangesAsync(It.IsAny())) + .ReturnsAsync(_transferResult); _entitiesContextFactory .Setup(x => x.CreateAsync(It.IsAny())) @@ -91,6 +100,7 @@ public ProduceWorkAsync(ITestOutputHelper output) _target = new NewPackageRegistrationProducer( _entitiesContextFactory.Object, _auxiliaryFileClient.Object, + _downloadTransferrer.Object, _options.Object, _developmentOptions.Object, _logger); @@ -386,11 +396,16 @@ public async Task ReturnsInitialAuxiliaryData() }, IsVerified = true, }); + _latestPopularityTransfers["FromPackage"] = new SortedSet(StringComparer.OrdinalIgnoreCase) + { + "ToPackage" + }; var output = await _target.ProduceWorkAsync(_work, _token); Assert.Same(_downloads, output.Downloads); Assert.Same(_excludedPackages, output.ExcludedPackages); + Assert.Same(_latestPopularityTransfers, output.PopularityTransfers); Assert.NotNull(output.VerifiedPackages); Assert.Contains("A", output.VerifiedPackages); Assert.NotNull(output.Owners); @@ -417,7 +432,7 @@ public async Task ThrowsWhenExcludedPackagesIsMissing() } [Fact] - public async Task OverridesDownloadCounts() + public async Task AppliesDownloadTransfers() { _packageRegistrations.Add(new PackageRegistration { @@ -458,8 +473,8 @@ public async Task OverridesDownloadCounts() InitializePackagesFromPackageRegistrations(); - _downloadOverrides["A"] = 55; - _downloadOverrides["b"] = 66; + _transferChanges["A"] = 55; + _transferChanges["b"] = 66; var result = await _target.ProduceWorkAsync(_work, _token); @@ -491,79 +506,6 @@ public async Task OverridesDownloadCounts() Assert.Equal(3, result.Downloads["C"]["6.0.0"]); } - [Fact] - public async Task DoesNotOverrideIfDownloadsGreaterOrPackageHasNoDownloads() - { - _packageRegistrations.Add(new PackageRegistration - { - Key = 1, - Id = "A", - Packages = new[] - { - new Package { Version = "1.0.0" }, - new Package { Version = "2.0.0" }, - }, - }); - _downloads.SetDownloadCount("A", "1.0.0", 100); - _downloads.SetDownloadCount("A", "2.0.0", 200); - _packageRegistrations.Add(new PackageRegistration - { - Key = 2, - Id = "B", - Packages = new[] - { - new Package { Version = "3.0.0" }, - new Package { Version = "4.0.0" }, - }, - }); - _downloads.SetDownloadCount("B", "3.0.0", 5); - _downloads.SetDownloadCount("B", "4.0.0", 4); - _packageRegistrations.Add(new PackageRegistration - { - Key = 3, - Id = "C", - Packages = new[] - { - new Package { Version = "5.0.0" }, - }, - }); - _downloads.SetDownloadCount("C", "5.0.0", 0); - - InitializePackagesFromPackageRegistrations(); - - _downloadOverrides["A"] = 55; - _downloadOverrides["C"] = 66; - _downloadOverrides["D"] = 77; - - var result = await _target.ProduceWorkAsync(_work, _token); - - // Documents should have overriden downloads. - var work = _work.Reverse().ToList(); - Assert.Equal(3, work.Count); - - Assert.Equal("A", work[0].PackageId); - Assert.Equal("1.0.0", work[0].Packages[0].Version); - Assert.Equal("2.0.0", work[0].Packages[1].Version); - Assert.Equal(300, work[0].TotalDownloadCount); - - Assert.Equal("B", work[1].PackageId); - Assert.Equal("3.0.0", work[1].Packages[0].Version); - Assert.Equal("4.0.0", work[1].Packages[1].Version); - Assert.Equal(9, work[1].TotalDownloadCount); - - Assert.Equal("C", work[2].PackageId); - Assert.Equal("5.0.0", work[2].Packages[0].Version); - Assert.Equal(0, work[2].TotalDownloadCount); - - // Downloads auxiliary file should have original downloads. - Assert.Equal(100, result.Downloads["A"]["1.0.0"]); - Assert.Equal(200, result.Downloads["A"]["2.0.0"]); - Assert.Equal(5, result.Downloads["B"]["3.0.0"]); - Assert.Equal(4, result.Downloads["B"]["4.0.0"]); - Assert.DoesNotContain("C", result.Downloads.Keys); - Assert.DoesNotContain("D", result.Downloads.Keys); - } - private void InitializePackagesFromPackageRegistrations() { foreach (var pr in _packageRegistrations) diff --git a/tests/NuGet.Services.AzureSearch.Tests/NuGet.Services.AzureSearch.Tests.csproj b/tests/NuGet.Services.AzureSearch.Tests/NuGet.Services.AzureSearch.Tests.csproj index d1d85e48d..c3f5d1a9e 100644 --- a/tests/NuGet.Services.AzureSearch.Tests/NuGet.Services.AzureSearch.Tests.csproj +++ b/tests/NuGet.Services.AzureSearch.Tests/NuGet.Services.AzureSearch.Tests.csproj @@ -44,6 +44,7 @@ + @@ -67,7 +68,7 @@ - + diff --git a/tests/NuGet.Services.V3.Tests/NuGet.Services.V3.Tests.csproj b/tests/NuGet.Services.V3.Tests/NuGet.Services.V3.Tests.csproj index acddda2be..e11fb0e26 100644 --- a/tests/NuGet.Services.V3.Tests/NuGet.Services.V3.Tests.csproj +++ b/tests/NuGet.Services.V3.Tests/NuGet.Services.V3.Tests.csproj @@ -45,6 +45,7 @@ + diff --git a/tests/NuGet.Services.V3.Tests/Support/RecordingStream.cs b/tests/NuGet.Services.V3.Tests/Support/RecordingStream.cs new file mode 100644 index 000000000..6cce298f3 --- /dev/null +++ b/tests/NuGet.Services.V3.Tests/Support/RecordingStream.cs @@ -0,0 +1,33 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.IO; + +namespace NuGet.Services +{ + public class RecordingStream : MemoryStream + { + private readonly object _lock = new object(); + private Action _onDispose; + + public RecordingStream(Action onDispose) + { + _onDispose = onDispose; + } + + protected override void Dispose(bool disposing) + { + lock (_lock) + { + if (_onDispose != null) + { + _onDispose(ToArray()); + _onDispose = null; + } + } + + base.Dispose(disposing); + } + } +}