Skip to content
This repository has been archived by the owner on Mar 16, 2021. It is now read-only.

Commit

Permalink
[Package Renames 5] Transfer package popularity (#769)
Browse files Browse the repository at this point in the history
This updates `auxiliary2azuresearch` to push download changes whenever a popularity transfer changes, or, whenever a download change affects a popularity transfer.

Part of NuGet/NuGetGallery#7898
  • Loading branch information
loic-sharma authored Apr 23, 2020
1 parent 5fe1ba7 commit 42d2c4f
Show file tree
Hide file tree
Showing 11 changed files with 1,409 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,10 @@ await ParallelAsync.Repeat(
_logger.LogInformation("Uploading the new download count data to blob storage.");
await _downloadDataClient.ReplaceLatestIndexedAsync(newData, oldResult.Metadata.GetIfMatchCondition());

// TODO: Upload the new popularity transfer data to blob storage.
// See: https://github.com/NuGet/NuGetGallery/issues/7898
_logger.LogInformation("Uploading the new popularity transfer data to blob storage.");
await _popularityTransferDataClient.ReplaceLatestIndexedAsync(
newTransfers,
oldTransfers.AccessCondition);
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
namespace NuGet.Services.AzureSearch.AuxiliaryFiles
{
/// <summary>
/// The purpose of this interface is allow reading and writing populairty transfer information from storage.
/// The purpose of this interface is allow reading and writing popularity transfer information from storage.
/// The Auxiliary2AzureSearch job does a comparison of latest popularity transfer data from the database with
/// a snapshot of information stored in Azure Blob Storage. This interface handles the reading and writing of
/// that snapshot from storage.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ public class AzureSearchScoringConfiguration
/// </summary>
public Dictionary<string, double> FieldWeights { get; set; }

/// <summary>
/// The percentage of downloads that should be transferred by the popularity transfer feature.
/// Values range from 0 to 1.
/// </summary>
public double PopularityTransfer { get; set; }

/// <summary>
/// The <see cref="SearchDocument.Full.DownloadScore"/> magnitude boost.
/// This boosts packages with many downloads.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ private async Task ExecuteAsync(CancellationToken token)
// Write the verified packages data file.
await WriteVerifiedPackagesDataAsync(initialAuxiliaryData.VerifiedPackages);

// TODO: Write popularity transfers data file.
// See: https://github.com/NuGet/NuGetGallery/issues/7898
// Write popularity transfers data file.
await WritePopularityTransfersDataAsync(initialAuxiliaryData.PopularityTransfers);

// Write the cursor.
_logger.LogInformation("Writing the initial cursor value to be {CursorValue:O}.", initialCursorValue);
Expand Down Expand Up @@ -201,6 +201,15 @@ await _verifiedPackagesDataClient.ReplaceLatestAsync(
_logger.LogInformation("Done uploading the initial verified packages data file.");
}

private async Task WritePopularityTransfersDataAsync(SortedDictionary<string, SortedSet<string>> popularityTransfers)
{
_logger.LogInformation("Writing the initial popularity transfers data file.");
await _popularityTransferDataClient.ReplaceLatestIndexedAsync(
popularityTransfers,
AccessConditionWrapper.GenerateIfNotExistsCondition());
_logger.LogInformation("Done uploading the initial popularity transfers data file.");
}

private async Task<InitialAuxiliaryData> ProduceWorkAsync(
ConcurrentBag<NewPackageRegistration> allWork,
CancellationTokenSource produceWorkCts,
Expand Down
202 changes: 194 additions & 8 deletions src/NuGet.Services.AzureSearch/DownloadTransferrer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,27 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using NuGet.Services.AzureSearch.Auxiliary2AzureSearch;
using NuGet.Services.AzureSearch.AuxiliaryFiles;

namespace NuGet.Services.AzureSearch
{
public class DownloadTransferrer : IDownloadTransferrer
{
private readonly IDataSetComparer _dataComparer;
private readonly IOptionsSnapshot<AzureSearchJobConfiguration> _options;
private readonly ILogger<DownloadTransferrer> _logger;

public DownloadTransferrer(ILogger<DownloadTransferrer> logger)
public DownloadTransferrer(
IDataSetComparer dataComparer,
IOptionsSnapshot<AzureSearchJobConfiguration> options,
ILogger<DownloadTransferrer> logger)
{
_dataComparer = dataComparer ?? throw new ArgumentNullException(nameof(dataComparer));
_options = options ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}

Expand All @@ -23,9 +33,25 @@ public SortedDictionary<string, long> InitializeDownloadTransfers(
SortedDictionary<string, SortedSet<string>> outgoingTransfers,
IReadOnlyDictionary<string, long> downloadOverrides)
{
// TODO: Add download changes due to popularity transfers.
// See: https://github.com/NuGet/NuGetGallery/issues/7898
var downloadTransfers = new SortedDictionary<string, long>(StringComparer.OrdinalIgnoreCase);
Guard.Assert(
outgoingTransfers.Comparer == StringComparer.OrdinalIgnoreCase,
$"Popularity transfer should have comparer {nameof(StringComparer.OrdinalIgnoreCase)}");

// Downloads are transferred from a "from" package to one or more "to" packages.
// The "outgoingTransfers" maps "from" packages to their corresponding "to" packages.
// The "incomingTransfers" maps "to" packages to their corresponding "from" packages.
var incomingTransfers = GetIncomingTransfers(outgoingTransfers);

// Get the transfer changes for all packages that have popularity transfers.
var packageIds = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
packageIds.UnionWith(outgoingTransfers.Keys);
packageIds.UnionWith(incomingTransfers.Keys);

var downloadTransfers = ApplyDownloadTransfers(
downloads,
outgoingTransfers,
incomingTransfers,
packageIds);

// TODO: Remove download overrides.
// See: https://github.com/NuGet/Engineering/issues/3089
Expand All @@ -49,13 +75,36 @@ public SortedDictionary<string, long> UpdateDownloadTransfers(
oldTransfers.Comparer == StringComparer.OrdinalIgnoreCase,
$"Old popularity transfer should have comparer {nameof(StringComparer.OrdinalIgnoreCase)}");

Guard.Assert(
newTransfers.Comparer == StringComparer.OrdinalIgnoreCase,
$"New popularity transfer should have comparer {nameof(StringComparer.OrdinalIgnoreCase)}");

Guard.Assert(
downloadChanges.All(x => downloads.GetDownloadCount(x.Key) == x.Value),
"The download changes should match the latest downloads");

// TODO: Add download changes due to popularity transfers.
// See: https://github.com/NuGet/NuGetGallery/issues/7898
var downloadTransfers = new SortedDictionary<string, long>(StringComparer.OrdinalIgnoreCase);
// Downloads are transferred from a "from" package to one or more "to" packages.
// The "oldTransfers" and "newTransfers" maps "from" packages to their corresponding "to" packages.
// The "incomingTransfers" maps "to" packages to their corresponding "from" packages.
var incomingTransfers = GetIncomingTransfers(newTransfers);

_logger.LogInformation("Detecting changes in popularity transfers.");
var transferChanges = _dataComparer.ComparePopularityTransfers(oldTransfers, newTransfers);
_logger.LogInformation("{Count} popularity transfers have changed.", transferChanges.Count);

// Get the transfer changes for packages affected by the download and transfer changes.
var affectedPackages = GetPackagesAffectedByChanges(
oldTransfers,
newTransfers,
incomingTransfers,
transferChanges,
downloadChanges);

var downloadTransfers = ApplyDownloadTransfers(
downloads,
newTransfers,
incomingTransfers,
affectedPackages);

// TODO: Remove download overrides.
// See: https://github.com/NuGet/Engineering/issues/3089
Expand All @@ -64,6 +113,143 @@ public SortedDictionary<string, long> UpdateDownloadTransfers(
return downloadTransfers;
}

private SortedDictionary<string, long> ApplyDownloadTransfers(
DownloadData downloads,
SortedDictionary<string, SortedSet<string>> outgoingTransfers,
SortedDictionary<string, SortedSet<string>> incomingTransfers,
HashSet<string> packageIds)
{
_logger.LogInformation(
"{Count} package IDs have download changes due to popularity transfers.",
packageIds.Count);

var result = new SortedDictionary<string, long>(StringComparer.OrdinalIgnoreCase);
foreach (var packageId in packageIds)
{
result[packageId] = TransferPackageDownloads(
packageId,
outgoingTransfers,
incomingTransfers,
downloads);
}

return result;
}

private SortedDictionary<string, SortedSet<string>> GetIncomingTransfers(
SortedDictionary<string, SortedSet<string>> outgoingTransfers)
{
var result = new SortedDictionary<string, SortedSet<string>>(StringComparer.OrdinalIgnoreCase);

foreach (var outgoingTransfer in outgoingTransfers)
{
var fromPackage = outgoingTransfer.Key;

foreach (var toPackage in outgoingTransfer.Value)
{
if (!result.TryGetValue(toPackage, out var incomingTransfer))
{
incomingTransfer = new SortedSet<string>(StringComparer.OrdinalIgnoreCase);
result.Add(toPackage, incomingTransfer);
}

incomingTransfer.Add(fromPackage);
}
}

return result;
}

private HashSet<string> GetPackagesAffectedByChanges(
SortedDictionary<string, SortedSet<string>> oldOutgoingTransfers,
SortedDictionary<string, SortedSet<string>> outgoingTransfers,
SortedDictionary<string, SortedSet<string>> incomingTransfers,
SortedDictionary<string, string[]> transferChanges,
SortedDictionary<string, long> downloadChanges)
{
var affectedPackages = new HashSet<string>(StringComparer.OrdinalIgnoreCase);

// If a package adds, changes, or removes outgoing transfers:
// Update "from" package
// Update all new "to" packages
// Update all old "to" packages (in case "to" packages were removed)
foreach (var transferChange in transferChanges)
{
var fromPackage = transferChange.Key;
var toPackages = transferChange.Value;

affectedPackages.Add(fromPackage);
affectedPackages.UnionWith(toPackages);

if (oldOutgoingTransfers.TryGetValue(fromPackage, out var oldToPackages))
{
affectedPackages.UnionWith(oldToPackages);
}
}

// If a package has download changes and outgoing transfers
// Update "from" package
// Update all "to" packages
//
// If a package has download changes and incoming transfers
// Update "to" package
foreach (var packageId in downloadChanges.Keys)
{
if (outgoingTransfers.TryGetValue(packageId, out var toPackages))
{
affectedPackages.Add(packageId);
affectedPackages.UnionWith(toPackages);
}

if (incomingTransfers.ContainsKey(packageId))
{
affectedPackages.Add(packageId);
}
}

return affectedPackages;
}

private long TransferPackageDownloads(
string packageId,
SortedDictionary<string, SortedSet<string>> outgoingTransfers,
SortedDictionary<string, SortedSet<string>> incomingTransfers,
DownloadData downloads)
{
var originalDownloads = downloads.GetDownloadCount(packageId);
var transferPercentage = _options.Value.Scoring.PopularityTransfer;

// Calculate packages with outgoing transfers first. These packages transfer a percentage
// or their downloads equally to a set of "incoming" packages. Packages with both outgoing
// and incoming transfers "reject" the incoming transfers.
if (outgoingTransfers.ContainsKey(packageId))
{
var keepPercentage = 1 - transferPercentage;

return (long)(originalDownloads * keepPercentage);
}

// Next, calculate packages with incoming transfers. These packages receive downloads
// from one or more "outgoing" packages.
if (incomingTransfers.TryGetValue(packageId, out var incomingTransferIds))
{
var result = originalDownloads;

foreach (var incomingTransferId in incomingTransferIds)
{
var incomingDownloads = downloads.GetDownloadCount(incomingTransferId);
var incomingSplit = outgoingTransfers[incomingTransferId].Count;

result += (long)(incomingDownloads * transferPercentage / incomingSplit);
}

return result;
}

// The package has no outgoing or incoming transfers. Return its downloads unchanged.
return originalDownloads;
}

private void ApplyDownloadOverrides(
DownloadData downloads,
IReadOnlyDictionary<string, long> downloadOverrides,
Expand Down Expand Up @@ -102,4 +288,4 @@ private void ApplyDownloadOverrides(
}
}
}
}
}
Loading

0 comments on commit 42d2c4f

Please sign in to comment.