From e63d35b040b65dda8524cd4a8a19cb2a49fdfcec Mon Sep 17 00:00:00 2001 From: Joel Verhagen Date: Fri, 12 Jul 2019 14:19:47 -0700 Subject: [PATCH] Add DownloadDataClient to read and write downloads.v2.json (#591) This client will be used to read and write the download data that was most recently indexed by Auxiliary2AzureSearch. This data will be compared to the latest downloads.v1.json and only the changes will be sent to Azure Search. This strategy is very similar to Owners2AzureSearch. Progress on https://github.com/NuGet/NuGetGallery/issues/6458 --- .../AuxiliaryFiles/DownloadByVersionData.cs | 64 ++++ .../AuxiliaryFiles/DownloadData.cs | 87 +++++ .../AuxiliaryFiles/DownloadDataClient.cs | 153 +++++++++ .../AuxiliaryFiles/IDownloadDataClient.cs | 14 + .../{ => AuxiliaryFiles}/IOwnerDataClient.cs | 2 +- .../{ => AuxiliaryFiles}/OwnerDataClient.cs | 2 +- .../AzureSearchTelemetryService.cs | 25 +- .../Db2AzureSearch/Db2AzureSearchCommand.cs | 1 + .../IAzureSearchTelemetryService.cs | 4 +- .../NuGet.Services.AzureSearch.csproj | 8 +- .../Owners2AzureSearchCommand.cs | 1 + .../DownloadByVersionDataFacts.cs | 146 +++++++++ .../AuxiliaryFiles/DownloadDataClientFacts.cs | 301 ++++++++++++++++++ .../AuxiliaryFiles/DownloadDataFacts.cs | 146 +++++++++ .../OwnerDataClientFacts.cs | 2 +- .../Db2AzureSearchCommandFacts.cs | 1 + .../NuGet.Services.AzureSearch.Tests.csproj | 5 +- .../Owners2AzureSearchCommandFacts.cs | 1 + 18 files changed, 954 insertions(+), 9 deletions(-) create mode 100644 src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadByVersionData.cs create mode 100644 src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadData.cs create mode 100644 src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadDataClient.cs create mode 100644 src/NuGet.Services.AzureSearch/AuxiliaryFiles/IDownloadDataClient.cs rename src/NuGet.Services.AzureSearch/{ => AuxiliaryFiles}/IOwnerDataClient.cs (97%) rename src/NuGet.Services.AzureSearch/{ => AuxiliaryFiles}/OwnerDataClient.cs (99%) create mode 100644 tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/DownloadByVersionDataFacts.cs create mode 100644 tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/DownloadDataClientFacts.cs create mode 100644 tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/DownloadDataFacts.cs rename tests/NuGet.Services.AzureSearch.Tests/{Owners2AzureSearch => AuxiliaryFiles}/OwnerDataClientFacts.cs (99%) diff --git a/src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadByVersionData.cs b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadByVersionData.cs new file mode 100644 index 000000000..915c588d4 --- /dev/null +++ b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadByVersionData.cs @@ -0,0 +1,64 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections; +using System.Collections.Generic; + +namespace NuGet.Services.AzureSearch.AuxiliaryFiles +{ + public class DownloadByVersionData : IReadOnlyDictionary + { + private readonly SortedDictionary _versions + = new SortedDictionary(StringComparer.OrdinalIgnoreCase); + + public long Total { get; private set; } + + public long GetDownloadCount(string version) + { + if (!_versions.TryGetValue(version, out var downloads)) + { + return 0; + } + + return downloads; + } + + public void SetDownloadCount(string version, long downloads) + { + if (downloads < 0) + { + throw new ArgumentOutOfRangeException(nameof(downloads), "The download count must not be negative."); + } + + if (_versions.TryGetValue(version, out var existingDownloads)) + { + // Remove the previous version so that the latest case is retained. Versions are case insensitive but + // we should try to respect the latest intent. + _versions.Remove(version); + } + else + { + existingDownloads = 0; + } + + Total += downloads - existingDownloads; + + // Only store the download count if the value is not zero. + if (downloads != 0) + { + _versions.Add(version, downloads); + } + } + + public IEnumerable Keys => _versions.Keys; + public IEnumerable Values => _versions.Values; + public int Count => _versions.Count; + public long this[string key] => _versions[key]; + public IEnumerator> GetEnumerator() => _versions.GetEnumerator(); + public bool TryGetValue(string key, out long value) => _versions.TryGetValue(key, out value); + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + public bool ContainsKey(string key) => _versions.ContainsKey(key); + } +} + diff --git a/src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadData.cs b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadData.cs new file mode 100644 index 000000000..531a4814d --- /dev/null +++ b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadData.cs @@ -0,0 +1,87 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections; +using System.Collections.Generic; + +namespace NuGet.Services.AzureSearch.AuxiliaryFiles +{ + public class DownloadData : IReadOnlyDictionary + { + /// + /// Maintain a lookup of version strings for de-duping. We maintain the original case for de-duping purposes + /// by using the default string comparer. As of July of 2019 in PROD, maintaining original case adds less than + /// 0.3% extra strings. De-duping version strings in general however removes 87.0% of the string allocations. + /// Intuitively this means most people use the same case of a given version string and a lot of people use + /// the same versions strings (common ones are 1.0.0, 1.0.1, 1.0.2, 1.1.0, etc). + /// + private readonly Dictionary _uniqueVersions = new Dictionary(); + + private readonly SortedDictionary _ids + = new SortedDictionary(StringComparer.OrdinalIgnoreCase); + + public long GetDownloadCount(string id) + { + if (!_ids.TryGetValue(id, out var versionData)) + { + return 0; + } + + return versionData.Total; + } + + public long GetDownloadCount(string id, string version) + { + if (!_ids.TryGetValue(id, out var versionData)) + { + return 0; + } + + return versionData.GetDownloadCount(version); + } + + public void SetDownloadCount(string id, string version, long downloads) + { + if (downloads < 0) + { + throw new ArgumentOutOfRangeException(nameof(downloads), "The download count must not be negative."); + } + + if (_ids.TryGetValue(id, out var versions)) + { + // Remove the previous version so that the latest case is retained. IDs are case insensitive but we + // should try to respect the latest intent. + _ids.Remove(id); + } + else + { + versions = new DownloadByVersionData(); + } + + if (!_uniqueVersions.TryGetValue(version, out var dedupedVersion)) + { + _uniqueVersions.Add(version, version); + dedupedVersion = version; + } + + versions.SetDownloadCount(dedupedVersion, downloads); + + // Only store the download count if the value is not zero. + if (versions.Total != 0) + { + _ids.Add(id, versions); + } + } + + public IEnumerable Keys => _ids.Keys; + public IEnumerable Values => _ids.Values; + public int Count => _ids.Count; + public DownloadByVersionData this[string key] => _ids[key]; + public IEnumerator> GetEnumerator() => _ids.GetEnumerator(); + public bool TryGetValue(string key, out DownloadByVersionData value) => _ids.TryGetValue(key, out value); + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + public bool ContainsKey(string key) => _ids.ContainsKey(key); + } +} + diff --git a/src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadDataClient.cs b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadDataClient.cs new file mode 100644 index 000000000..bf0277346 --- /dev/null +++ b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadDataClient.cs @@ -0,0 +1,153 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Diagnostics; +using System.IO; +using System.Net; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using Microsoft.WindowsAzure.Storage; +using Newtonsoft.Json; +using NuGetGallery; + +namespace NuGet.Services.AzureSearch.AuxiliaryFiles +{ + public class DownloadDataClient : IDownloadDataClient + { + private static readonly JsonSerializer Serializer = new JsonSerializer(); + + private readonly ICloudBlobClient _cloudBlobClient; + private readonly IOptionsSnapshot _options; + private readonly IAzureSearchTelemetryService _telemetryService; + private readonly ILogger _logger; + private readonly Lazy _lazyContainer; + + public DownloadDataClient( + ICloudBlobClient cloudBlobClient, + IOptionsSnapshot options, + IAzureSearchTelemetryService telemetryService, + ILogger logger) + { + _cloudBlobClient = cloudBlobClient ?? throw new ArgumentNullException(nameof(cloudBlobClient)); + _options = options ?? throw new ArgumentNullException(nameof(cloudBlobClient)); + _telemetryService = telemetryService ?? throw new ArgumentNullException(nameof(telemetryService)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + + _lazyContainer = new Lazy( + () => _cloudBlobClient.GetContainerReference(_options.Value.StorageContainer)); + } + + private ICloudBlobContainer Container => _lazyContainer.Value; + + public async Task> ReadLatestIndexedAsync() + { + var stopwatch = Stopwatch.StartNew(); + var blobName = GetLatestIndexedBlobName(); + var blobReference = Container.GetBlobReference(blobName); + + _logger.LogInformation("Reading the latest indexed downloads from {BlobName}.", blobName); + + var downloads = new DownloadData(); + IAccessCondition accessCondition; + try + { + using (var stream = await blobReference.OpenReadAsync(AccessCondition.GenerateEmptyCondition())) + { + accessCondition = AccessConditionWrapper.GenerateIfMatchCondition(blobReference.ETag); + ReadStream(stream, downloads.SetDownloadCount); + } + } + catch (StorageException ex) when (ex.RequestInformation.HttpStatusCode == (int)HttpStatusCode.NotFound) + { + accessCondition = AccessConditionWrapper.GenerateIfNotExistsCondition(); + _logger.LogInformation("The blob {BlobName} does not exist.", blobName); + } + + var output = new ResultAndAccessCondition(downloads, accessCondition); + + stopwatch.Stop(); + _telemetryService.TrackReadLatestIndexedDownloads(output.Result.Count, stopwatch.Elapsed); + + return output; + } + + public async Task ReplaceLatestIndexedAsync( + DownloadData newData, + IAccessCondition accessCondition) + { + using (_telemetryService.TrackReplaceLatestIndexedDownloads(newData.Count)) + { + var blobName = GetLatestIndexedBlobName(); + _logger.LogInformation("Replacing the latest indexed downloads from {BlobName}.", blobName); + + var mappedAccessCondition = new AccessCondition + { + IfNoneMatchETag = accessCondition.IfNoneMatchETag, + IfMatchETag = accessCondition.IfMatchETag, + }; + + var blobReference = Container.GetBlobReference(blobName); + + using (var stream = await blobReference.OpenWriteAsync(mappedAccessCondition)) + using (var streamWriter = new StreamWriter(stream)) + using (var jsonTextWriter = new JsonTextWriter(streamWriter)) + { + blobReference.Properties.ContentType = "application/json"; + Serializer.Serialize(jsonTextWriter, newData); + } + } + } + + private static void ReadStream( + Stream stream, + Action addVersion) + { + using (var textReader = new StreamReader(stream)) + using (var jsonReader = new JsonTextReader(textReader)) + { + Guard.Assert(jsonReader.Read(), "The blob should be readable."); + Guard.Assert(jsonReader.TokenType == JsonToken.StartObject, "The first token should be the start of an object."); + Guard.Assert(jsonReader.Read(), "There should be a second token."); + + while (jsonReader.TokenType == JsonToken.PropertyName) + { + // We assume the package ID has valid characters. + var id = (string)jsonReader.Value; + + Guard.Assert(jsonReader.Read(), "There should be a token after the package ID."); + Guard.Assert(jsonReader.TokenType == JsonToken.StartObject, "The token after the package ID should be the start of an object."); + Guard.Assert(jsonReader.Read(), "There should be a token after the start of the ID object."); + + while (jsonReader.TokenType == JsonToken.PropertyName) + { + // We assume the package version is already normalized. + var version = (string)jsonReader.Value; + + Guard.Assert(jsonReader.Read(), "There should be a token after the package version."); + Guard.Assert(jsonReader.TokenType == JsonToken.Integer, "The token after the package version should be an integer."); + + var downloads = (long)jsonReader.Value; + + Guard.Assert(jsonReader.Read(), "There should be a token after the download count."); + + addVersion(id, version, downloads); + } + + Guard.Assert(jsonReader.TokenType == JsonToken.EndObject, "The token after the package versions should be the end of an object."); + Guard.Assert(jsonReader.Read(), "There should be a token after the package ID object."); + } + + Guard.Assert(jsonReader.TokenType == JsonToken.EndObject, "The last token should be the end of an object."); + Guard.Assert(!jsonReader.Read(), "There should be no token after the end of the object."); + } + } + + private string GetLatestIndexedBlobName() + { + return $"{_options.Value.NormalizeStoragePath()}downloads.v2.json"; + } + } +} + diff --git a/src/NuGet.Services.AzureSearch/AuxiliaryFiles/IDownloadDataClient.cs b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/IDownloadDataClient.cs new file mode 100644 index 000000000..b98f54ba2 --- /dev/null +++ b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/IDownloadDataClient.cs @@ -0,0 +1,14 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System.Threading.Tasks; +using NuGetGallery; + +namespace NuGet.Services.AzureSearch.AuxiliaryFiles +{ + public interface IDownloadDataClient + { + Task> ReadLatestIndexedAsync(); + Task ReplaceLatestIndexedAsync(DownloadData newData, IAccessCondition accessCondition); + } +} \ No newline at end of file diff --git a/src/NuGet.Services.AzureSearch/IOwnerDataClient.cs b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/IOwnerDataClient.cs similarity index 97% rename from src/NuGet.Services.AzureSearch/IOwnerDataClient.cs rename to src/NuGet.Services.AzureSearch/AuxiliaryFiles/IOwnerDataClient.cs index 585e65d08..a0d8d635b 100644 --- a/src/NuGet.Services.AzureSearch/IOwnerDataClient.cs +++ b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/IOwnerDataClient.cs @@ -5,7 +5,7 @@ using System.Threading.Tasks; using NuGetGallery; -namespace NuGet.Services.AzureSearch +namespace NuGet.Services.AzureSearch.AuxiliaryFiles { /// /// The purpose of this interface is allow reading and writing owner information from storage. The Catalog2Owners diff --git a/src/NuGet.Services.AzureSearch/OwnerDataClient.cs b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/OwnerDataClient.cs similarity index 99% rename from src/NuGet.Services.AzureSearch/OwnerDataClient.cs rename to src/NuGet.Services.AzureSearch/AuxiliaryFiles/OwnerDataClient.cs index dcb51b5be..49edb0a29 100644 --- a/src/NuGet.Services.AzureSearch/OwnerDataClient.cs +++ b/src/NuGet.Services.AzureSearch/AuxiliaryFiles/OwnerDataClient.cs @@ -13,7 +13,7 @@ using Newtonsoft.Json; using NuGetGallery; -namespace NuGet.Services.AzureSearch +namespace NuGet.Services.AzureSearch.AuxiliaryFiles { public class OwnerDataClient : IOwnerDataClient { diff --git a/src/NuGet.Services.AzureSearch/AzureSearchTelemetryService.cs b/src/NuGet.Services.AzureSearch/AzureSearchTelemetryService.cs index f247de00b..8eed37f6e 100644 --- a/src/NuGet.Services.AzureSearch/AzureSearchTelemetryService.cs +++ b/src/NuGet.Services.AzureSearch/AzureSearchTelemetryService.cs @@ -123,14 +123,14 @@ public void TrackGetOwnersForPackageId(int ownerCount, TimeSpan elapsed) }); } - public void TrackReadLatestIndexedOwners(int ownerCount, TimeSpan elapsed) + public void TrackReadLatestIndexedOwners(int packageIdCount, TimeSpan elapsed) { _telemetryClient.TrackMetric( Prefix + "ReadLatestIndexedOwnersSeconds", elapsed.TotalSeconds, new Dictionary { - { "OwnerCount", ownerCount.ToString() }, + { "PackageIdCount", packageIdCount.ToString() }, }); } @@ -285,5 +285,26 @@ public void TrackLastCommitTimestampQuery(string indexName, DateTimeOffset? last { "LastCommitTimestamp", lastCommitTimestamp?.ToString("O") }, }); } + + public void TrackReadLatestIndexedDownloads(int packageIdCount, TimeSpan elapsed) + { + _telemetryClient.TrackMetric( + Prefix + "ReadLatestIndexedDownloadsSeconds", + elapsed.TotalSeconds, + new Dictionary + { + { "PackageIdCount", packageIdCount.ToString() }, + }); + } + + public IDisposable TrackReplaceLatestIndexedDownloads(int packageIdCount) + { + return _telemetryClient.TrackDuration( + Prefix + "ReplaceLatestIndexedDownloadsSeconds", + new Dictionary + { + { "PackageIdCount", packageIdCount.ToString() }, + }); + } } } diff --git a/src/NuGet.Services.AzureSearch/Db2AzureSearch/Db2AzureSearchCommand.cs b/src/NuGet.Services.AzureSearch/Db2AzureSearch/Db2AzureSearchCommand.cs index 13d6d63ad..602e700f0 100644 --- a/src/NuGet.Services.AzureSearch/Db2AzureSearch/Db2AzureSearchCommand.cs +++ b/src/NuGet.Services.AzureSearch/Db2AzureSearch/Db2AzureSearchCommand.cs @@ -10,6 +10,7 @@ using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using NuGet.Protocol.Catalog; +using NuGet.Services.AzureSearch.AuxiliaryFiles; using NuGet.Services.AzureSearch.Catalog2AzureSearch; using NuGet.Services.Metadata.Catalog; using NuGet.Services.Metadata.Catalog.Persistence; diff --git a/src/NuGet.Services.AzureSearch/IAzureSearchTelemetryService.cs b/src/NuGet.Services.AzureSearch/IAzureSearchTelemetryService.cs index 730f65ded..5bc63aa2e 100644 --- a/src/NuGet.Services.AzureSearch/IAzureSearchTelemetryService.cs +++ b/src/NuGet.Services.AzureSearch/IAzureSearchTelemetryService.cs @@ -19,7 +19,7 @@ public interface IAzureSearchTelemetryService void TrackIndexPushSuccess(string indexName, int documentCount, TimeSpan elapsed); void TrackOwners2AzureSearchCompleted(bool success, TimeSpan elapsed); void TrackOwnerSetComparison(int oldCount, int newCount, int changeCount, TimeSpan elapsed); - void TrackReadLatestIndexedOwners(int ownerCount, TimeSpan elapsed); + void TrackReadLatestIndexedOwners(int packageIdCount, TimeSpan elapsed); void TrackReadLatestOwnersFromDatabase(int packageIdCount, TimeSpan elapsed); IDisposable TrackReplaceLatestIndexedOwners(int packageIdCount); IDisposable TrackUploadOwnerChangeHistory(int packageIdCount); @@ -34,5 +34,7 @@ public interface IAzureSearchTelemetryService void TrackWarmQuery(string indexName, TimeSpan elapsed); void TrackLastCommitTimestampQuery(string indexName, DateTimeOffset? lastCommitTimestamp, TimeSpan elapsed); IDisposable TrackCatalogLeafDownloadBatch(int count); + void TrackReadLatestIndexedDownloads(int packageIdCount, TimeSpan elapsed); + IDisposable TrackReplaceLatestIndexedDownloads(int packageIdCount); } } \ No newline at end of file diff --git a/src/NuGet.Services.AzureSearch/NuGet.Services.AzureSearch.csproj b/src/NuGet.Services.AzureSearch/NuGet.Services.AzureSearch.csproj index 07e93a77c..5dd0d8783 100644 --- a/src/NuGet.Services.AzureSearch/NuGet.Services.AzureSearch.csproj +++ b/src/NuGet.Services.AzureSearch/NuGet.Services.AzureSearch.csproj @@ -49,7 +49,11 @@ + + + + @@ -63,8 +67,8 @@ - - + + diff --git a/src/NuGet.Services.AzureSearch/Owners2AzureSearch/Owners2AzureSearchCommand.cs b/src/NuGet.Services.AzureSearch/Owners2AzureSearch/Owners2AzureSearchCommand.cs index 58623f396..cb9fda11f 100644 --- a/src/NuGet.Services.AzureSearch/Owners2AzureSearch/Owners2AzureSearchCommand.cs +++ b/src/NuGet.Services.AzureSearch/Owners2AzureSearch/Owners2AzureSearchCommand.cs @@ -8,6 +8,7 @@ using System.Threading.Tasks; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; +using NuGet.Services.AzureSearch.AuxiliaryFiles; using NuGet.Services.Metadata.Catalog.Helpers; namespace NuGet.Services.AzureSearch.Owners2AzureSearch diff --git a/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/DownloadByVersionDataFacts.cs b/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/DownloadByVersionDataFacts.cs new file mode 100644 index 000000000..fb23ebe5f --- /dev/null +++ b/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/DownloadByVersionDataFacts.cs @@ -0,0 +1,146 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Linq; +using Xunit; + +namespace NuGet.Services.AzureSearch.AuxiliaryFiles +{ + public class DownloadsByVersionDataFacts + { + public class Total : Facts + { + [Fact] + public void StartsWithZero() + { + Assert.Equal(0, Target.Total); + } + + [Fact] + public void HasAllVersionCounts() + { + Target.SetDownloadCount(V1, 10); + Target.SetDownloadCount(V2, 11); + + Assert.Equal(21, Target.Total); + } + } + + public class GetDownloadCount : Facts + { + [Fact] + public void ReturnsDownloadCount() + { + Target.SetDownloadCount(V1, 10); + + Assert.Equal(10, Target.GetDownloadCount(V1)); + } + + [Fact] + public void AllowsDifferentCase() + { + Target.SetDownloadCount(V1, 10); + + Assert.Equal(10, Target.GetDownloadCount(V1Upper)); + } + + [Fact] + public void ReturnsZeroForMissingVersion() + { + Assert.Equal(0, Target.GetDownloadCount(V1)); + } + } + + public class SetDownloadCount : Facts + { + [Fact] + public void AllowsUpdatingDownloadCount() + { + Target.SetDownloadCount(V1, 10); + Target.SetDownloadCount(V1, 1); + + Assert.Equal(1, Target.GetDownloadCount(V1)); + Assert.Equal(1, Target.Total); + } + + [Fact] + public void AllowsUpdatingDownloadCountWithDifferentCase() + { + Target.SetDownloadCount(V1, 10); + Target.SetDownloadCount(V2, 5); + Target.SetDownloadCount(V1Upper, 1); + + Assert.Equal(1, Target.GetDownloadCount(V1)); + Assert.Equal(6, Target.Total); + } + + [Fact] + public void ReplacesCaseOfVersionString() + { + Target.SetDownloadCount(V1, 10); + Target.SetDownloadCount(V1Upper, 10); + + var pair = Assert.Single(Target); + Assert.Equal(V1Upper, pair.Key); + Assert.Equal(10, pair.Value); + } + + [Fact] + public void RemovesVersionWithZeroDownloads() + { + Target.SetDownloadCount(V1, 10); + Target.SetDownloadCount(V1Upper, 0); + + Assert.Empty(Target); + } + + [Fact] + public void RejectsNegativeDownloadCount() + { + var ex = Assert.Throws(() => Target.SetDownloadCount(V1, -1)); + Assert.Contains("The download count must not be negative.", ex.Message); + Assert.Equal("downloads", ex.ParamName); + } + } + + public class EnumerableImplementation : Facts + { + [Fact] + public void ReturnsVersionsInOrder() + { + Target.SetDownloadCount(V2, 2); + Target.SetDownloadCount(V3, 3); + Target.SetDownloadCount(V0, 0); + Target.SetDownloadCount(V1Upper, 1); + + var items = Target.ToArray(); + + Assert.Equal( + new[] + { + KeyValuePair.Create(V1Upper, 1L), + KeyValuePair.Create(V2, 2L), + KeyValuePair.Create(V3, 3L), + }, + items); + } + } + + public abstract class Facts + { + public const string V0 = "0.0.0"; + public const string V1 = "1.0.0-alpha"; + public const string V1Upper = "1.0.0-ALPHA"; + public const string V2 = "2.0.0"; + public const string V3 = "3.0.0"; + + public Facts() + { + Target = new DownloadByVersionData(); + } + + public DownloadByVersionData Target { get; } + } + } +} diff --git a/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/DownloadDataClientFacts.cs b/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/DownloadDataClientFacts.cs new file mode 100644 index 000000000..239b73091 --- /dev/null +++ b/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/DownloadDataClientFacts.cs @@ -0,0 +1,301 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Net; +using System.Text; +using System.Threading.Tasks; +using Microsoft.Extensions.Options; +using Microsoft.WindowsAzure.Storage; +using Microsoft.WindowsAzure.Storage.Blob; +using Moq; +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; +using NuGet.Services.AzureSearch.Support; +using NuGetGallery; +using Xunit; +using Xunit.Abstractions; + +namespace NuGet.Services.AzureSearch.AuxiliaryFiles +{ + public class DownloadDataClientFacts + { + public class ReadLatestIndexedAsync : Facts + { + public ReadLatestIndexedAsync(ITestOutputHelper output) : base(output) + { + } + + [Fact] + public async Task AllowsEmptyObject() + { + var json = JsonConvert.SerializeObject(new Dictionary()); + CloudBlob + .Setup(x => x.OpenReadAsync(It.IsAny())) + .ReturnsAsync(() => new MemoryStream(Encoding.UTF8.GetBytes(json))); + + var output = await Target.ReadLatestIndexedAsync(); + + Assert.Empty(output.Result); + Assert.Equal(ETag, output.AccessCondition.IfMatchETag); + } + + [Fact] + public async Task AllowsMissingBlob() + { + CloudBlob + .Setup(x => x.OpenReadAsync(It.IsAny())) + .ThrowsAsync(new StorageException( + new RequestResult + { + HttpStatusCode = (int)HttpStatusCode.NotFound, + }, + message: "Not found.", + inner: null)); + + var output = await Target.ReadLatestIndexedAsync(); + + Assert.Empty(output.Result); + Assert.Equal("*", output.AccessCondition.IfNoneMatchETag); + } + + [Fact] + public async Task RejectsInvalidJson() + { + var json = JsonConvert.SerializeObject(new object[] + { + new object[] + { + "nuget.versioning", + new object[] + { + new object[] { "1.0.0", 5 }, + }, + }, + new object[] + { + "EntityFramework", + new object[] + { + new object[] { "2.0.0", 10 }, + }, + } + }); + CloudBlob + .Setup(x => x.OpenReadAsync(It.IsAny())) + .ReturnsAsync(() => new MemoryStream(Encoding.UTF8.GetBytes(json))); + + var ex = await Assert.ThrowsAsync( + () => Target.ReadLatestIndexedAsync()); + Assert.Equal("The first token should be the start of an object.", ex.Message); + } + + [Fact] + public async Task ReadsDownloads() + { + var json = JsonConvert.SerializeObject(new Dictionary> + { + { + "nuget.versioning", + new Dictionary + { + { "1.0.0", 1 }, + { "2.0.0-alpha", 5 }, + } + }, + { + "NuGet.Core", + new Dictionary() + }, + { + "EntityFramework", + new Dictionary + { + { "2.0.0", 10 }, + } + }, + }); + CloudBlob + .Setup(x => x.OpenReadAsync(It.IsAny())) + .ReturnsAsync(() => new MemoryStream(Encoding.UTF8.GetBytes(json))); + + var output = await Target.ReadLatestIndexedAsync(); + + Assert.Equal(new[] { "EntityFramework", "nuget.versioning" }, output.Result.Select(x => x.Key).ToArray()); + Assert.Equal(6, output.Result.GetDownloadCount("NuGet.Versioning")); + Assert.Equal(1, output.Result.GetDownloadCount("NuGet.Versioning", "1.0.0")); + Assert.Equal(5, output.Result.GetDownloadCount("NuGet.Versioning", "2.0.0-ALPHA")); + Assert.Equal(10, output.Result.GetDownloadCount("EntityFramework")); + Assert.Equal(ETag, output.AccessCondition.IfMatchETag); + + CloudBlobContainer.Verify(x => x.GetBlobReference("downloads.v2.json"), Times.Once); + } + } + + public class ReplaceLatestIndexedAsync : Facts + { + public ReplaceLatestIndexedAsync(ITestOutputHelper output) : base(output) + { + } + + [Fact] + public async Task SerializesWithoutBOM() + { + var newData = new DownloadData(); + + await Target.ReplaceLatestIndexedAsync(newData, AccessCondition.Object); + + var bytes = Assert.Single(SavedBytes); + Assert.Equal((byte)'{', bytes[0]); + } + + [Fact] + public async Task SetsContentType() + { + var newData = new DownloadData(); + + await Target.ReplaceLatestIndexedAsync(newData, AccessCondition.Object); + + Assert.Equal("application/json", CloudBlob.Object.Properties.ContentType); + } + + [Fact] + public async Task SerializedWithoutIndentation() + { + var newData = new DownloadData(); + newData.SetDownloadCount("nuget.versioning", "1.0.0", 1); + newData.SetDownloadCount("NuGet.Versioning", "2.0.0", 5); + newData.SetDownloadCount("EntityFramework", "3.0.0", 10); + + await Target.ReplaceLatestIndexedAsync(newData, AccessCondition.Object); + + var json = Assert.Single(SavedStrings); + Assert.DoesNotContain("\n", json); + } + + [Fact] + public async Task SerializesVersionsSortedOrder() + { + var newData = new DownloadData(); + newData.SetDownloadCount("ZZZ", "9.0.0", 23); + newData.SetDownloadCount("YYY", "9.0.0", 0); + newData.SetDownloadCount("nuget.versioning", "1.0.0", 1); + newData.SetDownloadCount("NuGet.Versioning", "2.0.0", 5); + newData.SetDownloadCount("EntityFramework", "3.0.0", 10); + newData.SetDownloadCount("EntityFramework", "1.0.0", 0); + + await Target.ReplaceLatestIndexedAsync(newData, AccessCondition.Object); + + // Pretty-ify the JSON to make the assertion clearer. + var json = Assert.Single(SavedStrings); + json = JsonConvert.DeserializeObject(json).ToString(); + + Assert.Equal(@"{ + ""EntityFramework"": { + ""3.0.0"": 10 + }, + ""NuGet.Versioning"": { + ""1.0.0"": 1, + ""2.0.0"": 5 + }, + ""ZZZ"": { + ""9.0.0"": 23 + } +}", json); + } + } + + public abstract class Facts + { + public Facts(ITestOutputHelper output) + { + CloudBlobClient = new Mock(); + CloudBlobContainer = new Mock(); + CloudBlob = new Mock(); + Options = new Mock>(); + TelemetryService = new Mock(); + Logger = output.GetLogger(); + Config = new AzureSearchJobConfiguration + { + StorageContainer = "unit-test-container", + }; + + ETag = "\"some-etag\""; + AccessCondition = new Mock(); + + Options + .Setup(x => x.Value) + .Returns(() => Config); + CloudBlobClient + .Setup(x => x.GetContainerReference(It.IsAny())) + .Returns(() => CloudBlobContainer.Object); + CloudBlobContainer + .Setup(x => x.GetBlobReference(It.IsAny())) + .Returns(() => CloudBlob.Object) + .Callback(x => BlobNames.Add(x)); + CloudBlob + .Setup(x => x.ETag) + .Returns(ETag); + CloudBlob + .Setup(x => x.OpenWriteAsync(It.IsAny())) + .ReturnsAsync(() => new RecordingStream(bytes => + { + SavedBytes.Add(bytes); + SavedStrings.Add(Encoding.UTF8.GetString(bytes)); + })); + CloudBlob + .Setup(x => x.Properties) + .Returns(new CloudBlockBlob(new Uri("https://example/blob")).Properties); + + Target = new DownloadDataClient( + CloudBlobClient.Object, + Options.Object, + TelemetryService.Object, + Logger); + } + + public Mock CloudBlobClient { get; } + public Mock CloudBlobContainer { get; } + public Mock CloudBlob { get; } + public Mock> Options { get; } + public Mock TelemetryService { get; } + public RecordingLogger Logger { get; } + public AzureSearchJobConfiguration Config { get; } + public string ETag { get; } + public Mock AccessCondition { get; } + public DownloadDataClient Target { get; } + + public List BlobNames { get; } = new List(); + public List SavedBytes { get; } = new List(); + public List SavedStrings { get; } = new List(); + } + + private class RecordingStream : MemoryStream + { + private readonly object _lock = new object(); + private Action _onDispose; + + public RecordingStream(Action onDispose) + { + _onDispose = onDispose; + } + + protected override void Dispose(bool disposing) + { + lock (_lock) + { + if (_onDispose != null) + { + _onDispose(ToArray()); + _onDispose = null; + } + } + + base.Dispose(disposing); + } + } + } +} diff --git a/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/DownloadDataFacts.cs b/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/DownloadDataFacts.cs new file mode 100644 index 000000000..e9423300e --- /dev/null +++ b/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/DownloadDataFacts.cs @@ -0,0 +1,146 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Linq; +using System.Text; +using Xunit; + +namespace NuGet.Services.AzureSearch.AuxiliaryFiles +{ + public class DownloadDataFacts + { + public class GetDownloadCountById : Facts + { + [Fact] + public void ReturnsZeroForUnknownId() + { + Assert.Equal(0, Target.GetDownloadCount(IdA)); + } + + [Fact] + public void ReturnsTotalForId() + { + Target.SetDownloadCount(IdA, V1, 1); + Target.SetDownloadCount(IdB, V2, 5); + Target.SetDownloadCount(IdA, V3, 10); + + Assert.Equal(11, Target.GetDownloadCount(IdA)); + } + } + + public class GetDownloadCountByIdAndVersion : Facts + { + [Fact] + public void ReturnsZeroForUnknownIdAndVersion() + { + Assert.Equal(0, Target.GetDownloadCount(IdA, V1)); + } + + [Fact] + public void ReturnsZeroForUnknownVersion() + { + Target.SetDownloadCount(IdA, V1, 1); + + Assert.Equal(0, Target.GetDownloadCount(IdA, V2)); + } + + [Fact] + public void ReturnsDownloadsForVersion() + { + Target.SetDownloadCount(IdA, V1, 1); + + Assert.Equal(1, Target.GetDownloadCount(IdA)); + } + } + + public class SetDownloadCount : Facts + { + [Fact] + public void AllowsUpdatingDownloadCount() + { + Target.SetDownloadCount(IdA, V1, 10); + Target.SetDownloadCount(IdA, V1, 1); + + Assert.Equal(1, Target.GetDownloadCount(IdA, V1)); + Assert.Equal(1, Target.GetDownloadCount(IdA)); + } + + [Fact] + public void AllowsUpdatingDownloadCountWithDifferentCase() + { + Target.SetDownloadCount(IdA, V1, 10); + Target.SetDownloadCount(IdA, V2, 5); + Target.SetDownloadCount(IdAUpper, V1, 1); + + Assert.Equal(1, Target.GetDownloadCount(IdA, V1)); + Assert.Equal(6, Target.GetDownloadCount(IdA)); + } + + [Fact] + public void ReplacesCaseOfVersionString() + { + Target.SetDownloadCount(IdA, V1, 10); + Target.SetDownloadCount(IdAUpper, V1, 10); + + var pair = Assert.Single(Target); + Assert.Equal(IdAUpper, pair.Key); + Assert.Equal(10, pair.Value.Total); + } + + [Fact] + public void RemovesVersionWithZeroDownloads() + { + Target.SetDownloadCount(IdA, V1, 10); + Target.SetDownloadCount(IdA, V1Upper, 0); + + Assert.Empty(Target); + } + + [Fact] + public void RejectsNegativeDownloadCount() + { + var ex = Assert.Throws(() => Target.SetDownloadCount(IdA, V1, -1)); + Assert.Contains("The download count must not be negative.", ex.Message); + Assert.Equal("downloads", ex.ParamName); + } + + [Fact] + public void DedupesVersionStrings() + { + var v1A = new StringBuilder(V1).Append(string.Empty).ToString(); + var v1B = new StringBuilder(V1).Append(string.Empty).ToString(); + Assert.NotSame(v1A, v1B); + + Target.SetDownloadCount(IdA, v1A, 1); + Target.SetDownloadCount(IdB, v1B, 10); + + var records = Target + .SelectMany(i => i.Value.Select(v => new { Id = i.Key, Version = v.Key, Downloads = v.Key })) + .ToList(); + Assert.Equal(2, records.Count); + Assert.Same(records[0].Version, records[1].Version); + } + } + + public abstract class Facts + { + public const string V0 = "0.0.0"; + public const string V1 = "1.0.0-alpha"; + public const string V1Upper = "1.0.0-ALPHA"; + public const string V2 = "2.0.0"; + public const string V3 = "3.0.0"; + + public const string IdA = "NuGet.Frameworks"; + public const string IdAUpper = "NUGET.FRAMEWORKS"; + public const string IdB = "NuGet.Versioning"; + + public Facts() + { + Target = new DownloadData(); + } + + public DownloadData Target { get; } + } + } +} diff --git a/tests/NuGet.Services.AzureSearch.Tests/Owners2AzureSearch/OwnerDataClientFacts.cs b/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/OwnerDataClientFacts.cs similarity index 99% rename from tests/NuGet.Services.AzureSearch.Tests/Owners2AzureSearch/OwnerDataClientFacts.cs rename to tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/OwnerDataClientFacts.cs index b8bb6265f..bed782038 100644 --- a/tests/NuGet.Services.AzureSearch.Tests/Owners2AzureSearch/OwnerDataClientFacts.cs +++ b/tests/NuGet.Services.AzureSearch.Tests/AuxiliaryFiles/OwnerDataClientFacts.cs @@ -20,7 +20,7 @@ using Xunit; using Xunit.Abstractions; -namespace NuGet.Services.AzureSearch.Owners2AzureSearch +namespace NuGet.Services.AzureSearch.AuxiliaryFiles { public class OwnerDataClientFacts { diff --git a/tests/NuGet.Services.AzureSearch.Tests/Db2AzureSearch/Db2AzureSearchCommandFacts.cs b/tests/NuGet.Services.AzureSearch.Tests/Db2AzureSearch/Db2AzureSearchCommandFacts.cs index c6fb33b69..19b784ab1 100644 --- a/tests/NuGet.Services.AzureSearch.Tests/Db2AzureSearch/Db2AzureSearchCommandFacts.cs +++ b/tests/NuGet.Services.AzureSearch.Tests/Db2AzureSearch/Db2AzureSearchCommandFacts.cs @@ -11,6 +11,7 @@ using Microsoft.Extensions.Options; using Moq; using NuGet.Protocol.Catalog; +using NuGet.Services.AzureSearch.AuxiliaryFiles; using NuGet.Services.AzureSearch.Support; using NuGet.Services.Entities; using NuGet.Services.Metadata.Catalog.Persistence; diff --git a/tests/NuGet.Services.AzureSearch.Tests/NuGet.Services.AzureSearch.Tests.csproj b/tests/NuGet.Services.AzureSearch.Tests/NuGet.Services.AzureSearch.Tests.csproj index 44f02ef04..45c7303f4 100644 --- a/tests/NuGet.Services.AzureSearch.Tests/NuGet.Services.AzureSearch.Tests.csproj +++ b/tests/NuGet.Services.AzureSearch.Tests/NuGet.Services.AzureSearch.Tests.csproj @@ -37,6 +37,9 @@ + + + @@ -60,7 +63,7 @@ - + diff --git a/tests/NuGet.Services.AzureSearch.Tests/Owners2AzureSearch/Owners2AzureSearchCommandFacts.cs b/tests/NuGet.Services.AzureSearch.Tests/Owners2AzureSearch/Owners2AzureSearchCommandFacts.cs index a3b4cbd8a..9279e5a8d 100644 --- a/tests/NuGet.Services.AzureSearch.Tests/Owners2AzureSearch/Owners2AzureSearchCommandFacts.cs +++ b/tests/NuGet.Services.AzureSearch.Tests/Owners2AzureSearch/Owners2AzureSearchCommandFacts.cs @@ -8,6 +8,7 @@ using Microsoft.Azure.Search.Models; using Microsoft.Extensions.Options; using Moq; +using NuGet.Services.AzureSearch.AuxiliaryFiles; using NuGet.Services.AzureSearch.Support; using NuGetGallery; using Xunit;