Skip to content
This repository has been archived by the owner on Mar 16, 2021. It is now read-only.

[Package Renames 5] Transfer package popularity #769

Merged
merged 1 commit into from
Apr 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,10 @@ await ParallelAsync.Repeat(
_logger.LogInformation("Uploading the new download count data to blob storage.");
await _downloadDataClient.ReplaceLatestIndexedAsync(newData, oldResult.Metadata.GetIfMatchCondition());

// TODO: Upload the new popularity transfer data to blob storage.
// See: https://github.com/NuGet/NuGetGallery/issues/7898
_logger.LogInformation("Uploading the new popularity transfer data to blob storage.");
await _popularityTransferDataClient.ReplaceLatestIndexedAsync(
newTransfers,
oldTransfers.AccessCondition);
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
namespace NuGet.Services.AzureSearch.AuxiliaryFiles
{
/// <summary>
/// The purpose of this interface is allow reading and writing populairty transfer information from storage.
/// The purpose of this interface is allow reading and writing popularity transfer information from storage.
/// The Auxiliary2AzureSearch job does a comparison of latest popularity transfer data from the database with
/// a snapshot of information stored in Azure Blob Storage. This interface handles the reading and writing of
/// that snapshot from storage.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ public class AzureSearchScoringConfiguration
/// </summary>
public Dictionary<string, double> FieldWeights { get; set; }

/// <summary>
/// The percentage of downloads that should be transferred by the popularity transfer feature.
/// Values range from 0 to 1.
/// </summary>
public double PopularityTransfer { get; set; }

/// <summary>
/// The <see cref="SearchDocument.Full.DownloadScore"/> magnitude boost.
/// This boosts packages with many downloads.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ private async Task ExecuteAsync(CancellationToken token)
// Write the verified packages data file.
await WriteVerifiedPackagesDataAsync(initialAuxiliaryData.VerifiedPackages);

// TODO: Write popularity transfers data file.
// See: https://github.com/NuGet/NuGetGallery/issues/7898
// Write popularity transfers data file.
await WritePopularityTransfersDataAsync(initialAuxiliaryData.PopularityTransfers);

// Write the cursor.
_logger.LogInformation("Writing the initial cursor value to be {CursorValue:O}.", initialCursorValue);
Expand Down Expand Up @@ -201,6 +201,15 @@ await _verifiedPackagesDataClient.ReplaceLatestAsync(
_logger.LogInformation("Done uploading the initial verified packages data file.");
}

private async Task WritePopularityTransfersDataAsync(SortedDictionary<string, SortedSet<string>> popularityTransfers)
{
_logger.LogInformation("Writing the initial popularity transfers data file.");
await _popularityTransferDataClient.ReplaceLatestIndexedAsync(
popularityTransfers,
AccessConditionWrapper.GenerateIfNotExistsCondition());
_logger.LogInformation("Done uploading the initial popularity transfers data file.");
}

private async Task<InitialAuxiliaryData> ProduceWorkAsync(
ConcurrentBag<NewPackageRegistration> allWork,
CancellationTokenSource produceWorkCts,
Expand Down
202 changes: 194 additions & 8 deletions src/NuGet.Services.AzureSearch/DownloadTransferrer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,27 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using NuGet.Services.AzureSearch.Auxiliary2AzureSearch;
using NuGet.Services.AzureSearch.AuxiliaryFiles;

namespace NuGet.Services.AzureSearch
{
public class DownloadTransferrer : IDownloadTransferrer
{
private readonly IDataSetComparer _dataComparer;
private readonly IOptionsSnapshot<AzureSearchJobConfiguration> _options;
private readonly ILogger<DownloadTransferrer> _logger;

public DownloadTransferrer(ILogger<DownloadTransferrer> logger)
public DownloadTransferrer(
IDataSetComparer dataComparer,
IOptionsSnapshot<AzureSearchJobConfiguration> options,
ILogger<DownloadTransferrer> logger)
{
_dataComparer = dataComparer ?? throw new ArgumentNullException(nameof(dataComparer));
_options = options ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}

Expand All @@ -23,9 +33,25 @@ public SortedDictionary<string, long> InitializeDownloadTransfers(
SortedDictionary<string, SortedSet<string>> outgoingTransfers,
IReadOnlyDictionary<string, long> downloadOverrides)
{
// TODO: Add download changes due to popularity transfers.
// See: https://github.com/NuGet/NuGetGallery/issues/7898
var downloadTransfers = new SortedDictionary<string, long>(StringComparer.OrdinalIgnoreCase);
Guard.Assert(
loic-sharma marked this conversation as resolved.
Show resolved Hide resolved
outgoingTransfers.Comparer == StringComparer.OrdinalIgnoreCase,
$"Popularity transfer should have comparer {nameof(StringComparer.OrdinalIgnoreCase)}");

// Downloads are transferred from a "from" package to one or more "to" packages.
// The "outgoingTransfers" maps "from" packages to their corresponding "to" packages.
// The "incomingTransfers" maps "to" packages to their corresponding "from" packages.
var incomingTransfers = GetIncomingTransfers(outgoingTransfers);

// Get the transfer changes for all packages that have popularity transfers.
var packageIds = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
packageIds.UnionWith(outgoingTransfers.Keys);
packageIds.UnionWith(incomingTransfers.Keys);

var downloadTransfers = ApplyDownloadTransfers(
downloads,
outgoingTransfers,
incomingTransfers,
packageIds);

// TODO: Remove download overrides.
// See: https://github.com/NuGet/Engineering/issues/3089
Expand All @@ -49,13 +75,36 @@ public SortedDictionary<string, long> UpdateDownloadTransfers(
oldTransfers.Comparer == StringComparer.OrdinalIgnoreCase,
$"Old popularity transfer should have comparer {nameof(StringComparer.OrdinalIgnoreCase)}");

Guard.Assert(
newTransfers.Comparer == StringComparer.OrdinalIgnoreCase,
$"New popularity transfer should have comparer {nameof(StringComparer.OrdinalIgnoreCase)}");

Guard.Assert(
downloadChanges.All(x => downloads.GetDownloadCount(x.Key) == x.Value),
"The download changes should match the latest downloads");

// TODO: Add download changes due to popularity transfers.
// See: https://github.com/NuGet/NuGetGallery/issues/7898
var downloadTransfers = new SortedDictionary<string, long>(StringComparer.OrdinalIgnoreCase);
// Downloads are transferred from a "from" package to one or more "to" packages.
// The "oldTransfers" and "newTransfers" maps "from" packages to their corresponding "to" packages.
// The "incomingTransfers" maps "to" packages to their corresponding "from" packages.
var incomingTransfers = GetIncomingTransfers(newTransfers);

_logger.LogInformation("Detecting changes in popularity transfers.");
var transferChanges = _dataComparer.ComparePopularityTransfers(oldTransfers, newTransfers);
_logger.LogInformation("{Count} popularity transfers have changed.", transferChanges.Count);

// Get the transfer changes for packages affected by the download and transfer changes.
var affectedPackages = GetPackagesAffectedByChanges(
oldTransfers,
newTransfers,
incomingTransfers,
transferChanges,
downloadChanges);

var downloadTransfers = ApplyDownloadTransfers(
downloads,
newTransfers,
incomingTransfers,
affectedPackages);

// TODO: Remove download overrides.
// See: https://github.com/NuGet/Engineering/issues/3089
Expand All @@ -64,6 +113,143 @@ public SortedDictionary<string, long> UpdateDownloadTransfers(
return downloadTransfers;
}

private SortedDictionary<string, long> ApplyDownloadTransfers(
DownloadData downloads,
SortedDictionary<string, SortedSet<string>> outgoingTransfers,
SortedDictionary<string, SortedSet<string>> incomingTransfers,
HashSet<string> packageIds)
{
_logger.LogInformation(
"{Count} package IDs have download changes due to popularity transfers.",
packageIds.Count);

var result = new SortedDictionary<string, long>(StringComparer.OrdinalIgnoreCase);
foreach (var packageId in packageIds)
{
result[packageId] = TransferPackageDownloads(
packageId,
outgoingTransfers,
incomingTransfers,
downloads);
}

return result;
}

private SortedDictionary<string, SortedSet<string>> GetIncomingTransfers(
SortedDictionary<string, SortedSet<string>> outgoingTransfers)
{
var result = new SortedDictionary<string, SortedSet<string>>(StringComparer.OrdinalIgnoreCase);

foreach (var outgoingTransfer in outgoingTransfers)
{
var fromPackage = outgoingTransfer.Key;

foreach (var toPackage in outgoingTransfer.Value)
{
if (!result.TryGetValue(toPackage, out var incomingTransfer))
{
incomingTransfer = new SortedSet<string>(StringComparer.OrdinalIgnoreCase);
result.Add(toPackage, incomingTransfer);
}

incomingTransfer.Add(fromPackage);
}
}

return result;
}

private HashSet<string> GetPackagesAffectedByChanges(
SortedDictionary<string, SortedSet<string>> oldOutgoingTransfers,
SortedDictionary<string, SortedSet<string>> outgoingTransfers,
SortedDictionary<string, SortedSet<string>> incomingTransfers,
SortedDictionary<string, string[]> transferChanges,
SortedDictionary<string, long> downloadChanges)
{
var affectedPackages = new HashSet<string>(StringComparer.OrdinalIgnoreCase);

// If a package adds, changes, or removes outgoing transfers:
// Update "from" package
// Update all new "to" packages
// Update all old "to" packages (in case "to" packages were removed)
foreach (var transferChange in transferChanges)
{
var fromPackage = transferChange.Key;
var toPackages = transferChange.Value;

affectedPackages.Add(fromPackage);
affectedPackages.UnionWith(toPackages);

if (oldOutgoingTransfers.TryGetValue(fromPackage, out var oldToPackages))
{
affectedPackages.UnionWith(oldToPackages);
}
}

// If a package has download changes and outgoing transfers
// Update "from" package
// Update all "to" packages
//
// If a package has download changes and incoming transfers
// Update "to" package
foreach (var packageId in downloadChanges.Keys)
{
if (outgoingTransfers.TryGetValue(packageId, out var toPackages))
{
affectedPackages.Add(packageId);
affectedPackages.UnionWith(toPackages);
}

if (incomingTransfers.ContainsKey(packageId))
{
affectedPackages.Add(packageId);
}
}

return affectedPackages;
}

private long TransferPackageDownloads(
string packageId,
SortedDictionary<string, SortedSet<string>> outgoingTransfers,
SortedDictionary<string, SortedSet<string>> incomingTransfers,
DownloadData downloads)
{
var originalDownloads = downloads.GetDownloadCount(packageId);
var transferPercentage = _options.Value.Scoring.PopularityTransfer;

// Calculate packages with outgoing transfers first. These packages transfer a percentage
// or their downloads equally to a set of "incoming" packages. Packages with both outgoing
// and incoming transfers "reject" the incoming transfers.
if (outgoingTransfers.ContainsKey(packageId))
{
var keepPercentage = 1 - transferPercentage;

return (long)(originalDownloads * keepPercentage);
}

// Next, calculate packages with incoming transfers. These packages receive downloads
// from one or more "outgoing" packages.
if (incomingTransfers.TryGetValue(packageId, out var incomingTransferIds))
{
var result = originalDownloads;

foreach (var incomingTransferId in incomingTransferIds)
{
var incomingDownloads = downloads.GetDownloadCount(incomingTransferId);
var incomingSplit = outgoingTransfers[incomingTransferId].Count;
loic-sharma marked this conversation as resolved.
Show resolved Hide resolved

result += (long)(incomingDownloads * transferPercentage / incomingSplit);
}

return result;
}

// The package has no outgoing or incoming transfers. Return its downloads unchanged.
return originalDownloads;
}

private void ApplyDownloadOverrides(
DownloadData downloads,
IReadOnlyDictionary<string, long> downloadOverrides,
Expand Down Expand Up @@ -102,4 +288,4 @@ private void ApplyDownloadOverrides(
}
}
}
}
}
Loading