Skip to content
This repository has been archived by the owner on Mar 16, 2021. It is now read-only.

Commit

Permalink
Refactor download overrides
Browse files Browse the repository at this point in the history
Update tests

Update comments

Update comments

Update Db2AzureSearchCommandFacts

Update new package registration producer facts

Add test

Add tests for download transferrer

Rename method

Fix test

Don't save popularity transfers for now

Save popularity transfers data file

Initial

More tests

Add tests

Updates
  • Loading branch information
loic-sharma committed Apr 20, 2020
1 parent 49cac2c commit e2864f1
Show file tree
Hide file tree
Showing 8 changed files with 923 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,10 @@ await ParallelAsync.Repeat(
_logger.LogInformation("Uploading the new download count data to blob storage.");
await _downloadDataClient.ReplaceLatestIndexedAsync(newData, oldResult.Metadata.GetIfMatchCondition());

// TODO: Upload the new popularity transfer data to blob storage.
// See: https://github.com/NuGet/NuGetGallery/issues/7898
_logger.LogInformation("Uploading the new popularity transfer data to blob storage.");
await _popularityTransferDataClient.ReplaceLatestIndexedAsync(
newTransfers,
oldTransfers.AccessCondition);
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
namespace NuGet.Services.AzureSearch.AuxiliaryFiles
{
/// <summary>
/// The purpose of this interface is allow reading and writing populairty transfer information from storage.
/// The purpose of this interface is allow reading and writing popularity transfer information from storage.
/// The Auxiliary2AzureSearch job does a comparison of latest popularity transfer data from the database with
/// a snapshot of information stored in Azure Blob Storage. This interface handles the reading and writing of
/// that snapshot from storage.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ public class AzureSearchScoringConfiguration
/// </summary>
public Dictionary<string, double> FieldWeights { get; set; }

/// <summary>
/// The percentage of downloads that should be transferred by the popularity transfer feature.
/// Values range from 0 to 1.
/// </summary>
public double PopularityTransfer { get; set; }

/// <summary>
/// The <see cref="SearchDocument.Full.DownloadScore"/> magnitude boost.
/// This boosts packages with many downloads.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ private async Task ExecuteAsync(CancellationToken token)
// Write the verified packages data file.
await WriteVerifiedPackagesDataAsync(initialAuxiliaryData.VerifiedPackages);

// TODO: Write popularity transfers data file.
// See: https://github.com/NuGet/NuGetGallery/issues/7898
// Write popularity transfers data file.
await WritePopularityTransfersDataAsync(initialAuxiliaryData.PopularityTransfers);

// Write the cursor.
_logger.LogInformation("Writing the initial cursor value to be {CursorValue:O}.", initialCursorValue);
Expand Down Expand Up @@ -201,6 +201,15 @@ await _verifiedPackagesDataClient.ReplaceLatestAsync(
_logger.LogInformation("Done uploading the initial verified packages data file.");
}

private async Task WritePopularityTransfersDataAsync(SortedDictionary<string, SortedSet<string>> popularityTransfers)
{
_logger.LogInformation("Writing the initial popularity transfers data file.");
await _popularityTransferDataClient.ReplaceLatestIndexedAsync(
popularityTransfers,
AccessConditionWrapper.GenerateIfNotExistsCondition());
_logger.LogInformation("Done uploading the initial popularity transfers data file.");
}

private async Task<InitialAuxiliaryData> ProduceWorkAsync(
ConcurrentBag<NewPackageRegistration> allWork,
CancellationTokenSource produceWorkCts,
Expand Down
194 changes: 186 additions & 8 deletions src/NuGet.Services.AzureSearch/DownloadTransferrer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,27 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using NuGet.Services.AzureSearch.Auxiliary2AzureSearch;
using NuGet.Services.AzureSearch.AuxiliaryFiles;

namespace NuGet.Services.AzureSearch
{
public class DownloadTransferrer : IDownloadTransferrer
{
private readonly IDataSetComparer _dataComparer;
private readonly IOptionsSnapshot<AzureSearchJobConfiguration> _options;
private readonly ILogger<DownloadTransferrer> _logger;

public DownloadTransferrer(ILogger<DownloadTransferrer> logger)
public DownloadTransferrer(
IDataSetComparer dataComparer,
IOptionsSnapshot<AzureSearchJobConfiguration> options,
ILogger<DownloadTransferrer> logger)
{
_dataComparer = dataComparer ?? throw new ArgumentNullException(nameof(dataComparer));
_options = options ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}

Expand All @@ -23,9 +33,21 @@ public SortedDictionary<string, long> InitializeDownloadTransfers(
SortedDictionary<string, SortedSet<string>> outgoingTransfers,
IReadOnlyDictionary<string, long> downloadOverrides)
{
// TODO: Add download changes due to popularity transfers.
// See: https://github.com/NuGet/NuGetGallery/issues/7898
var downloadTransfers = new SortedDictionary<string, long>(StringComparer.OrdinalIgnoreCase);
// Downloads are transferred from a "from" package to one or more "to" packages.
// The "outgoingTransfers" maps "from" packages to their corresponding "to" packages.
// The "incomingTransfers" maps "to" packages to their corresponding "from" packages.
var incomingTransfers = GetIncomingTransfers(outgoingTransfers);

// Get the transfer changes for all packages that have popularity transfers.
var packageIds = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
packageIds.UnionWith(outgoingTransfers.Keys);
packageIds.UnionWith(incomingTransfers.Keys);

var downloadTransfers = ApplyDownloadTransfers(
downloads,
outgoingTransfers,
incomingTransfers,
packageIds);

// TODO: Remove download overrides.
// See: https://github.com/NuGet/Engineering/issues/3089
Expand Down Expand Up @@ -53,9 +75,28 @@ public SortedDictionary<string, long> UpdateDownloadTransfers(
downloadChanges.All(x => downloads.GetDownloadCount(x.Key) == x.Value),
"The download changes should match the latest downloads");

// TODO: Add download changes due to popularity transfers.
// See: https://github.com/NuGet/NuGetGallery/issues/7898
var downloadTransfers = new SortedDictionary<string, long>(StringComparer.OrdinalIgnoreCase);
// Downloads are transferred from a "from" package to one or more "to" packages.
// The "oldTransfers" and "newTransfers" maps "from" packages to their corresponding "to" packages.
// The "incomingTransfers" maps "to" packages to their corresponding "from" packages.
var incomingTransfers = GetIncomingTransfers(newTransfers);

_logger.LogInformation("Detecting changes in popularity transfers.");
var transferChanges = _dataComparer.ComparePopularityTransfers(oldTransfers, newTransfers);
_logger.LogInformation("{Count} popularity transfers have changed.", transferChanges.Count);

// Get the transfer changes for packages affected by the download and transfer changes.
var affectedPackages = GetPackagesAffectedByChanges(
oldTransfers,
newTransfers,
incomingTransfers,
transferChanges,
downloadChanges);

var downloadTransfers = ApplyDownloadTransfers(
downloads,
newTransfers,
incomingTransfers,
affectedPackages);

// TODO: Remove download overrides.
// See: https://github.com/NuGet/Engineering/issues/3089
Expand All @@ -64,6 +105,143 @@ public SortedDictionary<string, long> UpdateDownloadTransfers(
return downloadTransfers;
}

private SortedDictionary<string, long> ApplyDownloadTransfers(
DownloadData downloads,
SortedDictionary<string, SortedSet<string>> outgoingTransfers,
SortedDictionary<string, SortedSet<string>> incomingTransfers,
HashSet<string> packageIds)
{
_logger.LogInformation(
"{Count} package IDs have download changes due to popularity transfers.",
packageIds.Count);

var result = new SortedDictionary<string, long>(StringComparer.OrdinalIgnoreCase);
foreach (var packageId in packageIds)
{
result[packageId] = TransferPackageDownloads(
packageId,
outgoingTransfers,
incomingTransfers,
downloads);
}

return result;
}

private SortedDictionary<string, SortedSet<string>> GetIncomingTransfers(
SortedDictionary<string, SortedSet<string>> outgoingTransfers)
{
var result = new SortedDictionary<string, SortedSet<string>>(StringComparer.OrdinalIgnoreCase);

foreach (var outgoingTransfer in outgoingTransfers)
{
var fromPackage = outgoingTransfer.Key;

foreach (var toPackage in outgoingTransfer.Value)
{
if (!result.TryGetValue(toPackage, out var incomingTransfer))
{
incomingTransfer = new SortedSet<string>(StringComparer.OrdinalIgnoreCase);
result.Add(toPackage, incomingTransfer);
}

incomingTransfer.Add(fromPackage);
}
}

return result;
}

private HashSet<string> GetPackagesAffectedByChanges(
SortedDictionary<string, SortedSet<string>> oldOutgoingTransfers,
SortedDictionary<string, SortedSet<string>> outgoingTransfers,
SortedDictionary<string, SortedSet<string>> incomingTransfers,
SortedDictionary<string, string[]> transferChanges,
SortedDictionary<string, long> downloadChanges)
{
var affectedPackages = new HashSet<string>(StringComparer.OrdinalIgnoreCase);

// If a package adds, changes, or removes outgoing transfers:
// Update "from" package
// Update all new "to" packages
// Update all old "to" packages (in case "to" packages were removed)
foreach (var transferChange in transferChanges)
{
var fromPackage = transferChange.Key;
var toPackages = transferChange.Value;

affectedPackages.Add(fromPackage);
affectedPackages.UnionWith(toPackages);

if (oldOutgoingTransfers.TryGetValue(fromPackage, out var oldToPackages))
{
affectedPackages.UnionWith(oldToPackages);
}
}

// If a package has download changes and outgoing transfers
// Update "from" package
// Update all "to" packages
//
// If a package has download changes and incoming transfers
// Update "to" package
foreach (var packageId in downloadChanges.Keys)
{
if (outgoingTransfers.TryGetValue(packageId, out var toPackages))
{
affectedPackages.Add(packageId);
affectedPackages.UnionWith(toPackages);
}

if (incomingTransfers.ContainsKey(packageId))
{
affectedPackages.Add(packageId);
}
}

return affectedPackages;
}

private long TransferPackageDownloads(
string packageId,
SortedDictionary<string, SortedSet<string>> outgoingTransfers,
SortedDictionary<string, SortedSet<string>> incomingTransfers,
DownloadData downloads)
{
var originalDownloads = downloads.GetDownloadCount(packageId);
var transferPercentage = _options.Value.Scoring.PopularityTransfer;

// Calculate packages with outgoing transfers first. These packages transfer a percentage
// or their downloads equally to a set of "incoming" packages. Packages with both outgoing
// and incoming transfers "reject" the incoming transfers.
if (outgoingTransfers.ContainsKey(packageId))
{
var keepPercentage = 1 - transferPercentage;

return (long)(originalDownloads * keepPercentage);
}

// Next, calculate packages with incoming transfers. These packages receive downloads
// from one or more "outgoing" packages.
if (incomingTransfers.TryGetValue(packageId, out var incomingTransferIds))
{
var result = originalDownloads;

foreach (var incomingTransferId in incomingTransferIds)
{
var incomingDownloads = downloads.GetDownloadCount(incomingTransferId);
var incomingSplit = outgoingTransfers[incomingTransferId].Count;

result += (long)(incomingDownloads * transferPercentage / incomingSplit);
}

return result;
}

// The package has no outgoing or incoming transfers. Return its downloads unchanged.
return originalDownloads;
}

private void ApplyDownloadOverrides(
DownloadData downloads,
IReadOnlyDictionary<string, long> downloadOverrides,
Expand Down Expand Up @@ -102,4 +280,4 @@ private void ApplyDownloadOverrides(
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,14 @@ public async Task AppliesTransferChanges()
return downloadChanges;
});

TransferChanges["Package1"] = 100;
TransferChanges["Package2"] = 200;

NewTransfers["Package1"] = new SortedSet<string>(StringComparer.OrdinalIgnoreCase)
{
"Package2"
};

TransferChanges["Package1"] = 100;
TransferChanges["Package2"] = 200;

await Target.ExecuteAsync();

PopularityTransferDataClient
Expand Down Expand Up @@ -221,8 +221,15 @@ public async Task AppliesTransferChanges()
It.IsAny<IAccessCondition>()),
Times.Once);

// TODO: Popularity transfers auxiliary file should have new data.
// See: https://github.com/NuGet/NuGetGallery/issues/7898
// Popularity transfers auxiliary file should have new data.
PopularityTransferDataClient.Verify(
c => c.ReplaceLatestIndexedAsync(
It.Is<SortedDictionary<string, SortedSet<string>>>(d =>
d.Count == 1 &&
d["Package1"].Count == 1 &&
d["Package1"].Contains("Package2")),
It.IsAny<IAccessCondition>()),
Times.Once);
}

[Fact]
Expand All @@ -246,14 +253,14 @@ public async Task TransferChangesOverideDownloadChanges()
NewDownloadData.SetDownloadCount("C", "5.0.0", 2);
NewDownloadData.SetDownloadCount("C", "6.0.0", 3);

TransferChanges["A"] = 55;
TransferChanges["b"] = 66;

NewTransfers["FromPackage"] = new SortedSet<string>(StringComparer.OrdinalIgnoreCase)
NewTransfers["A"] = new SortedSet<string>(StringComparer.OrdinalIgnoreCase)
{
"ToPackage"
"b"
};

TransferChanges["A"] = 55;
TransferChanges["b"] = 66;

await Target.ExecuteAsync();

// Documents should have new data with transfer changes.
Expand Down Expand Up @@ -288,8 +295,15 @@ public async Task TransferChangesOverideDownloadChanges()
It.IsAny<IAccessCondition>()),
Times.Once);

// TODO: Popularity transfers auxiliary file should have new data.
// See: https://github.com/NuGet/NuGetGallery/issues/7898
// Popularity transfers auxiliary file should have new data.
PopularityTransferDataClient.Verify(
c => c.ReplaceLatestIndexedAsync(
It.Is<SortedDictionary<string, SortedSet<string>>>(d =>
d.Count == 1 &&
d["A"].Count == 1 &&
d["A"].Contains("b")),
It.IsAny<IAccessCondition>()),
Times.Once);
}
}

Expand Down Expand Up @@ -334,7 +348,6 @@ public Facts(ITestOutputHelper output)
.Returns(() => Changes);

OldTransfers = new SortedDictionary<string, SortedSet<string>>(StringComparer.OrdinalIgnoreCase);

OldTransferResult = new ResultAndAccessCondition<SortedDictionary<string, SortedSet<string>>>(
OldTransfers,
Mock.Of<IAccessCondition>());
Expand All @@ -349,7 +362,7 @@ public Facts(ITestOutputHelper output)

DownloadOverrides = new Dictionary<string, long>();
AuxiliaryFileClient.Setup(x => x.LoadDownloadOverridesAsync()).ReturnsAsync(() => DownloadOverrides);

TransferChanges = new SortedDictionary<string, long>(StringComparer.OrdinalIgnoreCase);
DownloadTransferrer
.Setup(x => x.UpdateDownloadTransfers(
Expand Down
Loading

0 comments on commit e2864f1

Please sign in to comment.