Skip to content
This repository has been archived by the owner on Mar 16, 2021. It is now read-only.

Commit

Permalink
Refactor download overrides (#768)
Browse files Browse the repository at this point in the history
This change introduces a new type, `IDownloadTransferrer`, which determines what downloads should be changed to reflect the latest download overrides. In the future, this type will also apply popularity transfers.

Part of NuGet/NuGetGallery#7898
  • Loading branch information
loic-sharma authored Apr 20, 2020
1 parent 06a7c88 commit 5fe1ba7
Show file tree
Hide file tree
Showing 15 changed files with 702 additions and 272 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,11 @@ public class UpdateDownloadsCommand : IAzureSearchCommand
private static readonly int MaxDocumentsPerId = Enum.GetValues(typeof(SearchFilters)).Length;

private readonly IAuxiliaryFileClient _auxiliaryFileClient;
private readonly IDatabaseAuxiliaryDataFetcher _databaseFetcher;
private readonly IDownloadDataClient _downloadDataClient;
private readonly IDownloadSetComparer _downloadSetComparer;
private readonly IDownloadTransferrer _downloadTransferrer;
private readonly IPopularityTransferDataClient _popularityTransferDataClient;
private readonly ISearchDocumentBuilder _searchDocumentBuilder;
private readonly ISearchIndexActionBuilder _indexActionBuilder;
private readonly Func<IBatchPusher> _batchPusherFactory;
Expand All @@ -41,8 +44,11 @@ public class UpdateDownloadsCommand : IAzureSearchCommand

public UpdateDownloadsCommand(
IAuxiliaryFileClient auxiliaryFileClient,
IDatabaseAuxiliaryDataFetcher databaseFetcher,
IDownloadDataClient downloadDataClient,
IDownloadSetComparer downloadSetComparer,
IDownloadTransferrer downloadTransferrer,
IPopularityTransferDataClient popularityTransferDataClient,
ISearchDocumentBuilder searchDocumentBuilder,
ISearchIndexActionBuilder indexActionBuilder,
Func<IBatchPusher> batchPusherFactory,
Expand All @@ -52,8 +58,11 @@ public UpdateDownloadsCommand(
ILogger<Auxiliary2AzureSearchCommand> logger)
{
_auxiliaryFileClient = auxiliaryFileClient ?? throw new ArgumentException(nameof(auxiliaryFileClient));
_databaseFetcher = databaseFetcher ?? throw new ArgumentNullException(nameof(databaseFetcher));
_downloadDataClient = downloadDataClient ?? throw new ArgumentNullException(nameof(downloadDataClient));
_downloadSetComparer = downloadSetComparer ?? throw new ArgumentNullException(nameof(downloadSetComparer));
_downloadTransferrer = downloadTransferrer ?? throw new ArgumentNullException(nameof(downloadTransferrer));
_popularityTransferDataClient = popularityTransferDataClient ?? throw new ArgumentNullException(nameof(popularityTransferDataClient));
_searchDocumentBuilder = searchDocumentBuilder ?? throw new ArgumentNullException(nameof(searchDocumentBuilder));
_indexActionBuilder = indexActionBuilder ?? throw new ArgumentNullException(nameof(indexActionBuilder));
_batchPusherFactory = batchPusherFactory ?? throw new ArgumentNullException(nameof(batchPusherFactory));
Expand Down Expand Up @@ -106,23 +115,38 @@ private async Task<bool> PushIndexChangesAsync()
_logger.LogInformation("Fetching new download count data from blob storage.");
var newData = await _auxiliaryFileClient.LoadDownloadDataAsync();

_logger.LogInformation("Removing invalid IDs and versions from the old data.");
_logger.LogInformation("Removing invalid IDs and versions from the old downloads data.");
CleanDownloadData(oldResult.Data);

_logger.LogInformation("Removing invalid IDs and versions from the new data.");
_logger.LogInformation("Removing invalid IDs and versions from the new downloads data.");
CleanDownloadData(newData);

// Fetch the download overrides from the auxiliary file. Note that the overriden downloads are kept
// separate from downloads data as the original data will be persisted to auxiliary data, whereas the
// overriden data will be persisted to Azure Search.
_logger.LogInformation("Overriding download count data.");
_logger.LogInformation("Detecting download count changes.");
var changes = _downloadSetComparer.Compare(oldResult.Data, newData);
_logger.LogInformation("{Count} package IDs have download count changes.", changes.Count);

// The "old" data is the popularity transfers data that was last indexed by this job (or
// initialized by Db2AzureSearch).
_logger.LogInformation("Fetching old popularity transfer data from blob storage.");
var oldTransfers = await _popularityTransferDataClient.ReadLatestIndexedAsync();

// The "new" data is the latest popularity transfers data from the database.
_logger.LogInformation("Fetching new popularity transfer data from database.");
var newTransfers = await _databaseFetcher.GetPackageIdToPopularityTransfersAsync();

_logger.LogInformation("Fetching new download overrides from blob storage.");
var downloadOverrides = await _auxiliaryFileClient.LoadDownloadOverridesAsync();
var overridenDownloads = newData.ApplyDownloadOverrides(downloadOverrides, _logger);

_logger.LogInformation("Detecting download count changes.");
var changes = _downloadSetComparer.Compare(oldResult.Data, overridenDownloads);
_logger.LogInformation("Applying download transfers to download changes.");
ApplyDownloadTransfers(
newData,
oldTransfers.Result,
newTransfers,
downloadOverrides,
changes);

var idBag = new ConcurrentBag<string>(changes.Keys);
_logger.LogInformation("{Count} package IDs have download count changes.", idBag.Count);
_logger.LogInformation("{Count} package IDs need to be updated.", idBag.Count);

if (!changes.Any())
{
Expand All @@ -139,9 +163,38 @@ await ParallelAsync.Repeat(

_logger.LogInformation("Uploading the new download count data to blob storage.");
await _downloadDataClient.ReplaceLatestIndexedAsync(newData, oldResult.Metadata.GetIfMatchCondition());

// TODO: Upload the new popularity transfer data to blob storage.
// See: https://github.com/NuGet/NuGetGallery/issues/7898
return true;
}

private void ApplyDownloadTransfers(
DownloadData newData,
SortedDictionary<string, SortedSet<string>> oldTransfers,
SortedDictionary<string, SortedSet<string>> newTransfers,
IReadOnlyDictionary<string, long> downloadOverrides,
SortedDictionary<string, long> downloadChanges)
{
_logger.LogInformation("Finding download changes from popularity transfers and download overrides.");
var transferChanges = _downloadTransferrer.UpdateDownloadTransfers(
newData,
downloadChanges,
oldTransfers,
newTransfers,
downloadOverrides);

_logger.LogInformation(
"{Count} package IDs have download count changes from popularity transfers and download overrides.",
transferChanges.Count);

// Apply the transfer changes to the overall download changes.
foreach (var transferChange in transferChanges)
{
downloadChanges[transferChange.Key] = transferChange.Value;
}
}

private async Task WorkAsync(ConcurrentBag<string> idBag, SortedDictionary<string, long> changes)
{
// Perform two batching mechanisms:
Expand Down Expand Up @@ -334,4 +387,4 @@ private void CleanDownloadData(DownloadData data)
nonNormalizedVersionCount);
}
}
}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ public class Db2AzureSearchCommand : IAzureSearchCommand
private readonly IOwnerDataClient _ownerDataClient;
private readonly IDownloadDataClient _downloadDataClient;
private readonly IVerifiedPackagesDataClient _verifiedPackagesDataClient;
private readonly IPopularityTransferDataClient _popularityTransferDataClient;
private readonly IOptionsSnapshot<Db2AzureSearchConfiguration> _options;
private readonly IOptionsSnapshot<Db2AzureSearchDevelopmentConfiguration> _developmentOptions;
private readonly ILogger<Db2AzureSearchCommand> _logger;
Expand All @@ -45,6 +46,7 @@ public Db2AzureSearchCommand(
IOwnerDataClient ownerDataClient,
IDownloadDataClient downloadDataClient,
IVerifiedPackagesDataClient verifiedPackagesDataClient,
IPopularityTransferDataClient popularityTransferDataClient,
IOptionsSnapshot<Db2AzureSearchConfiguration> options,
IOptionsSnapshot<Db2AzureSearchDevelopmentConfiguration> developmentOptions,
ILogger<Db2AzureSearchCommand> logger)
Expand All @@ -59,6 +61,7 @@ public Db2AzureSearchCommand(
_ownerDataClient = ownerDataClient ?? throw new ArgumentNullException(nameof(ownerDataClient));
_downloadDataClient = downloadDataClient ?? throw new ArgumentNullException(nameof(downloadDataClient));
_verifiedPackagesDataClient = verifiedPackagesDataClient ?? throw new ArgumentNullException(nameof(verifiedPackagesDataClient));
_popularityTransferDataClient = popularityTransferDataClient ?? throw new ArgumentNullException(nameof(popularityTransferDataClient));
_options = options ?? throw new ArgumentNullException(nameof(options));
_developmentOptions = developmentOptions ?? throw new ArgumentNullException(nameof(developmentOptions));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
Expand Down Expand Up @@ -114,6 +117,9 @@ private async Task ExecuteAsync(CancellationToken token)
// Write the verified packages data file.
await WriteVerifiedPackagesDataAsync(initialAuxiliaryData.VerifiedPackages);

// TODO: Write popularity transfers data file.
// See: https://github.com/NuGet/NuGetGallery/issues/7898

// Write the cursor.
_logger.LogInformation("Writing the initial cursor value to be {CursorValue:O}.", initialCursorValue);
var frontCursorStorage = _storageFactory.Create();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,20 @@ public InitialAuxiliaryData(
SortedDictionary<string, SortedSet<string>> owners,
DownloadData downloads,
HashSet<string> excludedPackages,
HashSet<string> verifiedPackages)
HashSet<string> verifiedPackages,
SortedDictionary<string, SortedSet<string>> popularityTransfers)
{
Owners = owners ?? throw new ArgumentNullException(nameof(owners));
Downloads = downloads ?? throw new ArgumentNullException(nameof(downloads));
ExcludedPackages = excludedPackages ?? throw new ArgumentNullException(nameof(excludedPackages));
VerifiedPackages = verifiedPackages ?? throw new ArgumentNullException(nameof(verifiedPackages));
PopularityTransfers = popularityTransfers ?? throw new ArgumentNullException(nameof(popularityTransfers));
}

public SortedDictionary<string, SortedSet<string>> Owners { get; }
public DownloadData Downloads { get; }
public HashSet<string> ExcludedPackages { get; }
public HashSet<string> VerifiedPackages { get; }
public SortedDictionary<string, SortedSet<string>> PopularityTransfers { get; }
}
}
Loading

0 comments on commit 5fe1ba7

Please sign in to comment.