Skip to content
This repository has been archived by the owner on Jul 30, 2024. It is now read-only.
/ NuGet.Jobs Public archive

Commit

Permalink
[Azure Search] Support manually boosted search results (#676)
Browse files Browse the repository at this point in the history
This change lets us override a package's download count in the Azure Search index, thereby allowing us to manually boost search results. This will be used to temporarily boost Azure SDK packages until we improve our new search rankings algorithm.

See [NuGetDeployment#1177](https://nuget.visualstudio.com/NuGetMicrosoft/_git/NuGetDeployment/pullrequest/1177?_a=overview)
Part of https://github.com/nuget/engineering/issues/2779
  • Loading branch information
loic-sharma authored Oct 17, 2019
1 parent 5dc70b5 commit 01857b9
Show file tree
Hide file tree
Showing 13 changed files with 514 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,15 @@ private async Task<bool> PushIndexChangesAsync()
_logger.LogInformation("Removing invalid IDs and versions from the new data.");
CleanDownloadData(newData);

// Fetch the download overrides from the auxiliary file. Note that the overriden downloads are kept
// separate from downloads data as the original data will be persisted to auxiliary data, whereas the
// overriden data will be persisted to Azure Search.
_logger.LogInformation("Overriding download count data.");
var downloadOverrides = await _auxiliaryFileClient.LoadDownloadOverridesAsync();
var overridenDownloads = newData.ApplyDownloadOverrides(downloadOverrides, _logger);

_logger.LogInformation("Detecting download count changes.");
var changes = _downloadSetComparer.Compare(oldResult.Data, newData);
var changes = _downloadSetComparer.Compare(oldResult.Data, overridenDownloads);
var idBag = new ConcurrentBag<string>(changes.Keys);
_logger.LogInformation("{Count} package IDs have download count changes.", idBag.Count);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ public class Auxiliary2AzureSearchConfiguration : AzureSearchJobConfiguration, I
public string AuxiliaryDataStorageConnectionString { get; set; }
public string AuxiliaryDataStorageContainer { get; set; }
public string AuxiliaryDataStorageDownloadsPath { get; set; }
public string AuxiliaryDataStorageDownloadOverridesPath { get; set; }
public string AuxiliaryDataStorageExcludedPackagesPath { get; }
public string AuxiliaryDataStorageVerifiedPackagesPath { get; set; }
public TimeSpan MinPushPeriod { get; set; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,16 @@ public async Task<HashSet<string>> LoadExcludedPackagesAsync()
logger: _logger));
}

public async Task<IReadOnlyDictionary<string, long>> LoadDownloadOverridesAsync()
{
return await LoadAuxiliaryFileAsync(
_options.Value.AuxiliaryDataStorageDownloadOverridesPath,
loader => DownloadOverrides.Load(
fileName: null,
loader: loader,
logger: _logger));
}

private async Task<T> LoadAuxiliaryFileAsync<T>(
string blobName,
Func<ILoader, T> loadData) where T : class
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.Extensions.Logging;

namespace NuGet.Services.AzureSearch.AuxiliaryFiles
{
public static class DownloadDataExtensions
{
public static DownloadData ApplyDownloadOverrides(
this DownloadData originalData,
IReadOnlyDictionary<string, long> downloadOverrides,
ILogger logger)
{
if (originalData == null)
{
throw new ArgumentNullException(nameof(originalData));
}

if (downloadOverrides == null)
{
throw new ArgumentNullException(nameof(downloadOverrides));
}

if (logger == null)
{
throw new ArgumentNullException(nameof(logger));
}

// Create a copy of the original data and apply overrides as we copy.
var result = new DownloadData();

foreach (var downloadData in originalData)
{
var packageId = downloadData.Key;

if (ShouldOverrideDownloads(packageId))
{
logger.LogInformation(
"Overriding downloads of package {PackageId} from {Downloads} to {DownloadsOverride}",
packageId,
originalData.GetDownloadCount(packageId),
downloadOverrides[packageId]);

var versions = downloadData.Value.Keys;

result.SetDownloadCount(
packageId,
versions.First(),
downloadOverrides[packageId]);
}
else
{
foreach (var versionData in downloadData.Value)
{
result.SetDownloadCount(downloadData.Key, versionData.Key, versionData.Value);
}
}
}

bool ShouldOverrideDownloads(string packageId)
{
if (!downloadOverrides.TryGetValue(packageId, out var downloadOverride))
{
return false;
}

// Apply the downloads override only if the package has fewer total downloads.
// In effect, this removes a package's manual boost once its total downloads exceed the override.
if (originalData[packageId].Total >= downloadOverride)
{
logger.LogInformation(
"Skipping download override for package {PackageId} as its downloads of {Downloads} are " +
"greater than its override of {DownloadsOverride}",
packageId,
originalData[packageId].Total,
downloadOverride);
return false;
}

return true;
}

return result;
}
}
}
33 changes: 33 additions & 0 deletions src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadOverrides.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
using System;
using System.Collections.Generic;
using Microsoft.Extensions.Logging;
using Newtonsoft.Json;
using NuGet.Indexing;

namespace NuGet.Services.AzureSearch.AuxiliaryFiles
{
public static class DownloadOverrides
{
private static readonly JsonSerializer Serializer = new JsonSerializer();

public static IReadOnlyDictionary<string, long> Load(string fileName, ILoader loader, ILogger logger)
{
try
{
using (var reader = loader.GetReader(fileName))
{
var downloadOverrides = Serializer.Deserialize<Dictionary<string, long>>(reader);

return new Dictionary<string, long>(
downloadOverrides,
StringComparer.OrdinalIgnoreCase);
}
}
catch (Exception ex)
{
logger.LogError(0, ex, "Unable to load download overrides {FileName} due to exception", fileName);
throw;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ public interface IAuxiliaryDataStorageConfiguration
string AuxiliaryDataStorageConnectionString { get; }
string AuxiliaryDataStorageContainer { get; }
string AuxiliaryDataStorageDownloadsPath { get; }
string AuxiliaryDataStorageDownloadOverridesPath { get; }
string AuxiliaryDataStorageExcludedPackagesPath { get; }
string AuxiliaryDataStorageVerifiedPackagesPath { get; }
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ namespace NuGet.Services.AzureSearch.AuxiliaryFiles
public interface IAuxiliaryFileClient
{
Task<DownloadData> LoadDownloadDataAsync();
Task<IReadOnlyDictionary<string, long>> LoadDownloadOverridesAsync();
Task<HashSet<string>> LoadVerifiedPackagesAsync();
Task<HashSet<string>> LoadExcludedPackagesAsync();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ public class Db2AzureSearchConfiguration : AzureSearchJobConfiguration, IAuxilia
public string AuxiliaryDataStorageConnectionString { get; set; }
public string AuxiliaryDataStorageContainer { get; set; }
public string AuxiliaryDataStorageDownloadsPath { get; set; }
public string AuxiliaryDataStorageDownloadOverridesPath { get; set; }
public string AuxiliaryDataStorageExcludedPackagesPath { get; set; }
public string AuxiliaryDataStorageVerifiedPackagesPath { get; set; }
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ public async Task<InitialAuxiliaryData> ProduceWorkAsync(
// numbers we don't use the gallery DB values.
var downloads = await _auxiliaryFileClient.LoadDownloadDataAsync();

// Fetch the download overrides from the auxiliary file. Note that the overriden downloads are kept
// separate from downloads data as the original data will be persisted to auxiliary data, whereas the
// overriden data will be persisted to Azure Search.
var downloadOverrides = await _auxiliaryFileClient.LoadDownloadOverridesAsync();
var overridenDownloads = downloads.ApplyDownloadOverrides(downloadOverrides, _logger);

// Fetch the verified packages file. This is not used inside the index but is used at query-time in the
// Azure Search service. We want to copy this file to the local region's storage container to improve
// availability and start-up of the service.
Expand Down Expand Up @@ -94,7 +100,7 @@ public async Task<InitialAuxiliaryData> ProduceWorkAsync(

allWork.Add(new NewPackageRegistration(
pr.Id,
downloads.GetDownloadCount(pr.Id),
overridenDownloads.GetDownloadCount(pr.Id),
pr.Owners,
packages,
isExcludedByDefault));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
<Compile Include="Auxiliary2AzureSearch\Auxiliary2AzureSearchConfiguration.cs" />
<Compile Include="Auxiliary2AzureSearch\DownloadSetComparer.cs" />
<Compile Include="Auxiliary2AzureSearch\IDownloadSetComparer.cs" />
<Compile Include="AuxiliaryFiles\DownloadDataExtensions.cs" />
<Compile Include="AuxiliaryFiles\DownloadOverrides.cs" />
<Compile Include="AuxiliaryFiles\SimpleCloudBlobExtensions.cs" />
<Compile Include="AuxiliaryFiles\DownloadByVersionData.cs" />
<Compile Include="AuxiliaryFiles\DownloadData.cs" />
Expand Down
Loading

0 comments on commit 01857b9

Please sign in to comment.