Skip to content
This repository has been archived by the owner on Jul 30, 2024. It is now read-only.
/ NuGet.Jobs Public archive

Commit

Permalink
[Azure search] Add filter for excluding certain packages from default…
Browse files Browse the repository at this point in the history
… empty search results (#609)
  • Loading branch information
shishirx34 authored Jul 25, 2019
1 parent 314ed7b commit a9d0db9
Show file tree
Hide file tree
Showing 33 changed files with 454 additions and 111 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@

namespace NuGet.Indexing
{
public static class VerifiedPackages
public static class JsonStringArrayFileParser
{
/// <summary>
/// Load the verified packages auxiliary data.
/// Load the auxiliary data in simple json string array format.
/// </summary>
/// <param name="fileName">The name of the file that contains the auxiliary data</param>
/// <param name="loader">The loader that should be used to fetch the file's content</param>
/// <param name="logger">The logger</param>
/// <returns>A case-insensitive set of all the verified packages</returns>
/// <returns>A case-insensitive set of all the strings in the json array</returns>
public static HashSet<string> Load(string fileName, ILoader loader, FrameworkLogger logger)
{
try
Expand All @@ -37,7 +37,7 @@ public static HashSet<string> Load(string fileName, ILoader loader, FrameworkLog
}

/// <summary>
/// Parse the verified packages from the input.
/// Parse the string from the input.
/// </summary>
/// <param name="reader">The reader whose content should be parsed</param>
/// <returns>A case-insensitive set of all the verified packages</returns>
Expand All @@ -47,17 +47,17 @@ public static HashSet<string> Parse(JsonReader reader)
reader.Read();
ThrowIfNotExpectedToken(reader, JsonToken.StartArray);

// Read all of the package ID strings from the JSON array.
// Read all of the strings from the JSON array.
var result = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var packageId = reader.ReadAsString();
var stringValue = reader.ReadAsString();

while (packageId != null)
while (stringValue != null)
{
// Package IDs strings are likely to be duplicates from previous reloads. We'll reuse the
// interned strings so that duplicated strings can be garbage collected right away.
result.Add(String.Intern(packageId));
result.Add(String.Intern(stringValue));

packageId = reader.ReadAsString();
stringValue = reader.ReadAsString();
}

ThrowIfNotExpectedToken(reader, JsonToken.EndArray);
Expand All @@ -69,7 +69,7 @@ private static void ThrowIfNotExpectedToken(JsonReader reader, JsonToken expecte
{
if (reader.TokenType != expected)
{
throw new InvalidDataException($"Malformed Verified Packages Auxiliary file - expected '{JsonToken.StartArray}', actual: '{reader.TokenType}'");
throw new InvalidDataException($"Malformed simple json string array auxiliary file - expected '{JsonToken.StartArray}', actual: '{reader.TokenType}'");
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/NuGet.Indexing/NuGet.Indexing.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@
<Compile Include="ShingledIdentifierAnalyzer.cs" />
<Compile Include="TagsAnalyzer.cs" />
<Compile Include="TokenizingHelper.cs" />
<Compile Include="VerifiedPackages.cs" />
<Compile Include="JsonStringArrayFileParser.cs" />
<Compile Include="VersionAnalyzer.cs" />
<Compile Include="VersionDetail.cs" />
<Compile Include="VersionDownloads.cs" />
Expand Down
2 changes: 1 addition & 1 deletion src/NuGet.Indexing/NuGetSearcherManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ private void ReloadAuxiliaryDataIfExpired()
_downloads.Load(AuxiliaryFiles.DownloadsV1, _loader, _logger);
_rankings = DownloadRankings.Load(AuxiliaryFiles.RankingsV1, _loader, _logger);
_queryBoostingContext = QueryBoostingContext.Load(AuxiliaryFiles.SearchSettingsV1, _loader, _logger);
_verifiedPackages = VerifiedPackages.Load(AuxiliaryFiles.VerifiedPackages, _loader, _logger);
_verifiedPackages = JsonStringArrayFileParser.Load(AuxiliaryFiles.VerifiedPackages, _loader, _logger);

LastAuxiliaryDataLoadTime = DateTime.UtcNow;
AuxiliaryFiles.UpdateLastModifiedTime();
Expand Down
4 changes: 4 additions & 0 deletions src/NuGet.Jobs.Db2AzureSearch/Job.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
using Autofac;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using NuGet.Services.AzureSearch;
using NuGet.Services.AzureSearch.AuxiliaryFiles;
using NuGet.Services.AzureSearch.Db2AzureSearch;

namespace NuGet.Jobs
Expand Down Expand Up @@ -35,6 +37,8 @@ protected override void ConfigureJobServices(IServiceCollection services, IConfi
services.AddAzureSearch();

services.Configure<Db2AzureSearchConfiguration>(configurationRoot.GetSection(ConfigurationSectionName));
services.AddTransient<IOptionsSnapshot<IAuxiliaryDataStorageConfiguration>>(
p => p.GetRequiredService<IOptionsSnapshot<Db2AzureSearchConfiguration>>());
services.Configure<AzureSearchJobConfiguration>(configurationRoot.GetSection(ConfigurationSectionName));
services.Configure<AzureSearchConfiguration>(configurationRoot.GetSection(ConfigurationSectionName));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ public class Auxiliary2AzureSearchConfiguration : AzureSearchJobConfiguration, I
public string AuxiliaryDataStorageContainer { get; set; }
public string AuxiliaryDataStorageDownloadsPath { get; set; }
public string AuxiliaryDataStorageVerifiedPackagesPath { get; set; }
public string AuxiliaryDataStorageExcludedPackagesPath { get; }
public TimeSpan MinPushPeriod { get; set; }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,18 @@ public async Task<AuxiliaryFileResult<HashSet<string>>> LoadVerifiedPackagesAsyn
return await LoadAuxiliaryFileAsync(
_options.Value.AuxiliaryDataStorageVerifiedPackagesPath,
etag,
loader => VerifiedPackages.Load(
loader => JsonStringArrayFileParser.Load(
fileName: null,
loader: loader,
logger: _logger));
}

public async Task<AuxiliaryFileResult<HashSet<string>>> LoadExcludedPackagesAsync(string etag)
{
return await LoadAuxiliaryFileAsync(
_options.Value.AuxiliaryDataStorageExcludedPackagesPath,
etag,
loader => JsonStringArrayFileParser.Load(
fileName: null,
loader: loader,
logger: _logger));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ public interface IAuxiliaryDataStorageConfiguration
string AuxiliaryDataStorageContainer { get; }
string AuxiliaryDataStorageDownloadsPath { get; }
string AuxiliaryDataStorageVerifiedPackagesPath { get; }
string AuxiliaryDataStorageExcludedPackagesPath { get; }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@ public interface IAuxiliaryFileClient
Task<DownloadData> LoadDownloadDataAsync();
Task<AuxiliaryFileResult<Downloads>> LoadDownloadsAsync(string etag);
Task<AuxiliaryFileResult<HashSet<string>>> LoadVerifiedPackagesAsync(string etag);
Task<AuxiliaryFileResult<HashSet<string>>> LoadExcludedPackagesAsync(string etag);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ private async Task ExecuteAsync(CancellationToken token)
using (var cancelledCts = new CancellationTokenSource())
using (var produceWorkCts = new CancellationTokenSource())
{
// Initialize the indexes and container.
// Initialize the indexes, container and excluded packages data.
await InitializeAsync();

// Here, we fetch the current catalog timestamp to use as the initial cursor value for
Expand All @@ -100,6 +100,7 @@ private async Task ExecuteAsync(CancellationToken token)
// Push all package package data to the Azure Search indexes and write the version list blobs.
var allOwners = new ConcurrentBag<IdAndValue<IReadOnlyList<string>>>();
var allDownloads = new ConcurrentBag<DownloadRecord>();

await PushAllPackageRegistrationsAsync(cancelledCts, produceWorkCts, allOwners, allDownloads);

// Write the owner data file.
Expand Down Expand Up @@ -197,6 +198,7 @@ private async Task ProduceWorkAsync(
CancellationTokenSource produceWorkCts,
CancellationToken cancellationToken)
{

await Task.Yield();
await _producer.ProduceWorkAsync(allWork, cancellationToken);
produceWorkCts.Cancel();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using NuGet.Services.AzureSearch.AuxiliaryFiles;

namespace NuGet.Services.AzureSearch.Db2AzureSearch
{
public class Db2AzureSearchConfiguration : AzureSearchJobConfiguration
public class Db2AzureSearchConfiguration : AzureSearchJobConfiguration, IAuxiliaryDataStorageConfiguration
{
public int DatabaseBatchSize { get; set; } = 10000;
public bool ReplaceContainersAndIndexes { get; set; }
public string CatalogIndexUrl { get; set; }
public string AuxiliaryDataStorageConnectionString { get; set; }
public string AuxiliaryDataStorageContainer { get; set; }
public string AuxiliaryDataStorageExcludedPackagesPath { get; set; }
public string AuxiliaryDataStorageDownloadsPath { get; set; }
public string AuxiliaryDataStorageVerifiedPackagesPath { get; set; }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,28 @@ namespace NuGet.Services.AzureSearch.Db2AzureSearch
{
/// <summary>
/// The information required to bring an entire package registration up to date in the Azure Search indexes. This
/// data is populated from the database by db2azuresearch.
/// data is populated from the database and storage by db2azuresearch.
/// </summary>
public class NewPackageRegistration
{
public NewPackageRegistration(
string packageId,
long totalDownloadCount,
string[] owners,
IReadOnlyList<Package> packages)
IReadOnlyList<Package> packages,
bool isExcludedByDefault)
{
PackageId = packageId ?? throw new ArgumentNullException(packageId);
TotalDownloadCount = totalDownloadCount;
Owners = owners ?? throw new ArgumentNullException(nameof(owners));
Packages = packages ?? throw new ArgumentNullException(nameof(packages));
IsExcludedByDefault = isExcludedByDefault;
}

public string PackageId { get; }
public long TotalDownloadCount { get; }
public string[] Owners { get; }
public IReadOnlyList<Package> Packages { get; }
public bool IsExcludedByDefault { get; }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using NuGet.Services.AzureSearch.AuxiliaryFiles;
using NuGet.Services.Entities;
using NuGetGallery;

Expand All @@ -21,14 +22,17 @@ public class NewPackageRegistrationProducer : INewPackageRegistrationProducer
private readonly IEntitiesContextFactory _contextFactory;
private readonly IOptionsSnapshot<Db2AzureSearchConfiguration> _options;
private readonly ILogger<NewPackageRegistrationProducer> _logger;
private readonly IAuxiliaryFileClient _auxiliaryFileClient;

public NewPackageRegistrationProducer(
IEntitiesContextFactory contextFactory,
IOptionsSnapshot<Db2AzureSearchConfiguration> options,
IAuxiliaryFileClient auxiliaryFileClient,
ILogger<NewPackageRegistrationProducer> logger)
{
_contextFactory = contextFactory ?? throw new ArgumentNullException(nameof(contextFactory));
_options = options ?? throw new ArgumentNullException(nameof(options));
_auxiliaryFileClient = auxiliaryFileClient ?? throw new ArgumentNullException(nameof(auxiliaryFileClient));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}

Expand All @@ -38,6 +42,12 @@ public async Task ProduceWorkAsync(
{
var ranges = await GetPackageRegistrationRangesAsync();

// Fetch exclude packages list from auxiliary files
var storageResult = await _auxiliaryFileClient.LoadExcludedPackagesAsync(etag: null);
HashSet<string> excludedPackages = storageResult.Data;

Guard.Assert(excludedPackages.Comparer == StringComparer.OrdinalIgnoreCase, $"Excluded packages HashSet should be using {nameof(StringComparer.OrdinalIgnoreCase)}");

for (var i = 0; i < ranges.Count && !cancellationToken.IsCancellationRequested; i++)
{
if (ShouldWait(allWork, log: true))
Expand Down Expand Up @@ -66,11 +76,14 @@ public async Task ProduceWorkAsync(
packages = new List<Package>();
}

var isExcludedByDefault = excludedPackages.Contains(pr.Id);

allWork.Add(new NewPackageRegistration(
pr.Id,
pr.DownloadCount,
pr.Owners,
packages));
packages,
isExcludedByDefault));
}

_logger.LogInformation("Done initializing batch {Number}/{Count}.", i + 1, ranges.Count);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,8 @@ private IndexAction<KeyedDocument> GetSearchIndexAction(
latestFlags.LatestVersionInfo.FullVersion,
package,
owners,
packageRegistration.TotalDownloadCount));
packageRegistration.TotalDownloadCount,
packageRegistration.IsExcludedByDefault));
}

private IndexAction<KeyedDocument> GetHijackIndexAction(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public static ContainerBuilder AddAzureSearch(this ContainerBuilder containerBui
/// There are multiple implementations of <see cref="ISearchServiceClientWrapper"/>.
RegisterIndexServices(containerBuilder, "SearchIndex", "HijackIndex");

/// There are multiple implementations of storage, in particulare <see cref="ICloudBlobClient"/>.
/// There are multiple implementations of storage, in particular <see cref="ICloudBlobClient"/>.
RegisterAzureSearchJobStorageServices(containerBuilder, "AzureSearchJobStorage");
RegisterAuxiliaryDataStorageServices(containerBuilder, "AuxiliaryDataStorage");

Expand Down
3 changes: 2 additions & 1 deletion src/NuGet.Services.AzureSearch/ISearchDocumentBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ SearchDocument.Full FullFromDb(
string fullVersion,
Package package,
string[] owners,
long totalDownloadCount);
long totalDownloadCount,
bool isExcludedByDefault);

SearchDocument.UpdateLatest UpdateLatestFromCatalog(
SearchFilters searchFilters,
Expand Down
13 changes: 12 additions & 1 deletion src/NuGet.Services.AzureSearch/Models/SearchDocument.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,16 @@ public static class SearchDocument
/// download count).
/// </summary>
[SerializePropertyNamesAsCamelCase]
public class Full : UpdateLatest, IDownloadCount
public class Full : UpdateLatest, IDownloadCount, IIsExcludedByDefault
{
[IsFilterable]
public long? TotalDownloadCount { get; set; }

[IsFilterable]
public double? DownloadScore { get; set; }

[IsFilterable]
public bool? IsExcludedByDefault { get; set; }
}

/// <summary>
Expand Down Expand Up @@ -119,6 +122,14 @@ public interface IDownloadCount : IUpdatedDocument
double? DownloadScore { get; set; }
}

/// <summary>
/// Allows index updating code to apply default search exclusion information to a document.
/// </summary>
public interface IIsExcludedByDefault: IUpdatedDocument
{
bool? IsExcludedByDefault { get; set; }
}

/// <summary>
/// The data required to populate <see cref="IVersions"/> and other <see cref="SearchDocument"/> classes.
/// This information, as with all other types under <see cref="SearchDocument"/>, are specific to a single
Expand Down
11 changes: 10 additions & 1 deletion src/NuGet.Services.AzureSearch/SearchDocumentBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ public SearchDocument.Full FullFromDb(
string fullVersion,
Package package,
string[] owners,
long totalDownloadCount)
long totalDownloadCount,
bool isExcludedByDefault)
{
var document = new SearchDocument.Full();

Expand All @@ -213,6 +214,7 @@ public SearchDocument.Full FullFromDb(
owners: owners);
_baseDocumentBuilder.PopulateMetadata(document, packageId, package);
PopulateDownloadCount(document, totalDownloadCount);
PopulateIsExcludedByDefault(document, isExcludedByDefault);

return document;
}
Expand Down Expand Up @@ -320,5 +322,12 @@ private static void PopulateDownloadCount<T>(
document.TotalDownloadCount = totalDownloadCount;
document.DownloadScore = DocumentUtilities.GetDownloadScore(totalDownloadCount);
}

private static void PopulateIsExcludedByDefault<T>(
T document,
bool isExcludedByDefault) where T : KeyedDocument, SearchDocument.IIsExcludedByDefault
{
document.IsExcludedByDefault = isExcludedByDefault;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ namespace NuGet.Services.AzureSearch.SearchService
public interface ISearchParametersBuilder
{
SearchParameters LastCommitTimestamp();
SearchParameters V2Search(V2SearchRequest request);
SearchParameters V3Search(V3SearchRequest request);
SearchParameters Autocomplete(AutocompleteRequest request);
SearchParameters V2Search(V2SearchRequest request, bool isDefaultSearch);
SearchParameters V3Search(V3SearchRequest request, bool isDefaultSearch);
SearchParameters Autocomplete(AutocompleteRequest request, bool isDefaultSearch);
SearchFilters GetSearchFilters(SearchRequest request);
}
}
3 changes: 2 additions & 1 deletion src/NuGet.Services.AzureSearch/SearchService/IndexFields.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,11 @@ private static string Name(string input)

public static class Search
{
public static readonly string DownloadScore = Name(nameof(SearchDocument.Full.DownloadScore));
public static readonly string IsExcludedByDefault = Name(nameof(SearchDocument.Full.IsExcludedByDefault));
public static readonly string Owners = Name(nameof(SearchDocument.Full.Owners));
public static readonly string SearchFilters = Name(nameof(SearchDocument.UpdateLatest.SearchFilters));
public static readonly string TotalDownloadCount = Name(nameof(SearchDocument.Full.TotalDownloadCount));
public static readonly string DownloadScore = Name(nameof(SearchDocument.Full.DownloadScore));
public static readonly string Versions = Name(nameof(SearchDocument.UpdateLatest.Versions));
}
}
Expand Down
Loading

0 comments on commit a9d0db9

Please sign in to comment.