This repository has been archived by the owner on Jul 30, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add DownloadDataClient to read and write downloads.v2.json (#591)
This client will be used to read and write the download data that was most recently indexed by Auxiliary2AzureSearch. This data will be compared to the latest downloads.v1.json and only the changes will be sent to Azure Search. This strategy is very similar to Owners2AzureSearch. Progress on NuGet/NuGetGallery#6458
- Loading branch information
1 parent
2c79274
commit e63d35b
Showing
18 changed files
with
954 additions
and
9 deletions.
There are no files selected for viewing
64 changes: 64 additions & 0 deletions
64
src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadByVersionData.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
// Copyright (c) .NET Foundation. All rights reserved. | ||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. | ||
|
||
using System; | ||
using System.Collections; | ||
using System.Collections.Generic; | ||
|
||
namespace NuGet.Services.AzureSearch.AuxiliaryFiles | ||
{ | ||
public class DownloadByVersionData : IReadOnlyDictionary<string, long> | ||
{ | ||
private readonly SortedDictionary<string, long> _versions | ||
= new SortedDictionary<string, long>(StringComparer.OrdinalIgnoreCase); | ||
|
||
public long Total { get; private set; } | ||
|
||
public long GetDownloadCount(string version) | ||
{ | ||
if (!_versions.TryGetValue(version, out var downloads)) | ||
{ | ||
return 0; | ||
} | ||
|
||
return downloads; | ||
} | ||
|
||
public void SetDownloadCount(string version, long downloads) | ||
{ | ||
if (downloads < 0) | ||
{ | ||
throw new ArgumentOutOfRangeException(nameof(downloads), "The download count must not be negative."); | ||
} | ||
|
||
if (_versions.TryGetValue(version, out var existingDownloads)) | ||
{ | ||
// Remove the previous version so that the latest case is retained. Versions are case insensitive but | ||
// we should try to respect the latest intent. | ||
_versions.Remove(version); | ||
} | ||
else | ||
{ | ||
existingDownloads = 0; | ||
} | ||
|
||
Total += downloads - existingDownloads; | ||
|
||
// Only store the download count if the value is not zero. | ||
if (downloads != 0) | ||
{ | ||
_versions.Add(version, downloads); | ||
} | ||
} | ||
|
||
public IEnumerable<string> Keys => _versions.Keys; | ||
public IEnumerable<long> Values => _versions.Values; | ||
public int Count => _versions.Count; | ||
public long this[string key] => _versions[key]; | ||
public IEnumerator<KeyValuePair<string, long>> GetEnumerator() => _versions.GetEnumerator(); | ||
public bool TryGetValue(string key, out long value) => _versions.TryGetValue(key, out value); | ||
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); | ||
public bool ContainsKey(string key) => _versions.ContainsKey(key); | ||
} | ||
} | ||
|
87 changes: 87 additions & 0 deletions
87
src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadData.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
// Copyright (c) .NET Foundation. All rights reserved. | ||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. | ||
|
||
using System; | ||
using System.Collections; | ||
using System.Collections.Generic; | ||
|
||
namespace NuGet.Services.AzureSearch.AuxiliaryFiles | ||
{ | ||
public class DownloadData : IReadOnlyDictionary<string, DownloadByVersionData> | ||
{ | ||
/// <summary> | ||
/// Maintain a lookup of version strings for de-duping. We maintain the original case for de-duping purposes | ||
/// by using the default string comparer. As of July of 2019 in PROD, maintaining original case adds less than | ||
/// 0.3% extra strings. De-duping version strings in general however removes 87.0% of the string allocations. | ||
/// Intuitively this means most people use the same case of a given version string and a lot of people use | ||
/// the same versions strings (common ones are 1.0.0, 1.0.1, 1.0.2, 1.1.0, etc). | ||
/// </summary> | ||
private readonly Dictionary<string, string> _uniqueVersions = new Dictionary<string, string>(); | ||
|
||
private readonly SortedDictionary<string, DownloadByVersionData> _ids | ||
= new SortedDictionary<string, DownloadByVersionData>(StringComparer.OrdinalIgnoreCase); | ||
|
||
public long GetDownloadCount(string id) | ||
{ | ||
if (!_ids.TryGetValue(id, out var versionData)) | ||
{ | ||
return 0; | ||
} | ||
|
||
return versionData.Total; | ||
} | ||
|
||
public long GetDownloadCount(string id, string version) | ||
{ | ||
if (!_ids.TryGetValue(id, out var versionData)) | ||
{ | ||
return 0; | ||
} | ||
|
||
return versionData.GetDownloadCount(version); | ||
} | ||
|
||
public void SetDownloadCount(string id, string version, long downloads) | ||
{ | ||
if (downloads < 0) | ||
{ | ||
throw new ArgumentOutOfRangeException(nameof(downloads), "The download count must not be negative."); | ||
} | ||
|
||
if (_ids.TryGetValue(id, out var versions)) | ||
{ | ||
// Remove the previous version so that the latest case is retained. IDs are case insensitive but we | ||
// should try to respect the latest intent. | ||
_ids.Remove(id); | ||
} | ||
else | ||
{ | ||
versions = new DownloadByVersionData(); | ||
} | ||
|
||
if (!_uniqueVersions.TryGetValue(version, out var dedupedVersion)) | ||
{ | ||
_uniqueVersions.Add(version, version); | ||
dedupedVersion = version; | ||
} | ||
|
||
versions.SetDownloadCount(dedupedVersion, downloads); | ||
|
||
// Only store the download count if the value is not zero. | ||
if (versions.Total != 0) | ||
{ | ||
_ids.Add(id, versions); | ||
} | ||
} | ||
|
||
public IEnumerable<string> Keys => _ids.Keys; | ||
public IEnumerable<DownloadByVersionData> Values => _ids.Values; | ||
public int Count => _ids.Count; | ||
public DownloadByVersionData this[string key] => _ids[key]; | ||
public IEnumerator<KeyValuePair<string, DownloadByVersionData>> GetEnumerator() => _ids.GetEnumerator(); | ||
public bool TryGetValue(string key, out DownloadByVersionData value) => _ids.TryGetValue(key, out value); | ||
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); | ||
public bool ContainsKey(string key) => _ids.ContainsKey(key); | ||
} | ||
} | ||
|
153 changes: 153 additions & 0 deletions
153
src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadDataClient.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
// Copyright (c) .NET Foundation. All rights reserved. | ||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. | ||
|
||
using System; | ||
using System.Diagnostics; | ||
using System.IO; | ||
using System.Net; | ||
using System.Threading.Tasks; | ||
using Microsoft.Extensions.Logging; | ||
using Microsoft.Extensions.Options; | ||
using Microsoft.WindowsAzure.Storage; | ||
using Newtonsoft.Json; | ||
using NuGetGallery; | ||
|
||
namespace NuGet.Services.AzureSearch.AuxiliaryFiles | ||
{ | ||
public class DownloadDataClient : IDownloadDataClient | ||
{ | ||
private static readonly JsonSerializer Serializer = new JsonSerializer(); | ||
|
||
private readonly ICloudBlobClient _cloudBlobClient; | ||
private readonly IOptionsSnapshot<AzureSearchJobConfiguration> _options; | ||
private readonly IAzureSearchTelemetryService _telemetryService; | ||
private readonly ILogger<DownloadDataClient> _logger; | ||
private readonly Lazy<ICloudBlobContainer> _lazyContainer; | ||
|
||
public DownloadDataClient( | ||
ICloudBlobClient cloudBlobClient, | ||
IOptionsSnapshot<AzureSearchJobConfiguration> options, | ||
IAzureSearchTelemetryService telemetryService, | ||
ILogger<DownloadDataClient> logger) | ||
{ | ||
_cloudBlobClient = cloudBlobClient ?? throw new ArgumentNullException(nameof(cloudBlobClient)); | ||
_options = options ?? throw new ArgumentNullException(nameof(cloudBlobClient)); | ||
_telemetryService = telemetryService ?? throw new ArgumentNullException(nameof(telemetryService)); | ||
_logger = logger ?? throw new ArgumentNullException(nameof(logger)); | ||
|
||
_lazyContainer = new Lazy<ICloudBlobContainer>( | ||
() => _cloudBlobClient.GetContainerReference(_options.Value.StorageContainer)); | ||
} | ||
|
||
private ICloudBlobContainer Container => _lazyContainer.Value; | ||
|
||
public async Task<ResultAndAccessCondition<DownloadData>> ReadLatestIndexedAsync() | ||
{ | ||
var stopwatch = Stopwatch.StartNew(); | ||
var blobName = GetLatestIndexedBlobName(); | ||
var blobReference = Container.GetBlobReference(blobName); | ||
|
||
_logger.LogInformation("Reading the latest indexed downloads from {BlobName}.", blobName); | ||
|
||
var downloads = new DownloadData(); | ||
IAccessCondition accessCondition; | ||
try | ||
{ | ||
using (var stream = await blobReference.OpenReadAsync(AccessCondition.GenerateEmptyCondition())) | ||
{ | ||
accessCondition = AccessConditionWrapper.GenerateIfMatchCondition(blobReference.ETag); | ||
ReadStream(stream, downloads.SetDownloadCount); | ||
} | ||
} | ||
catch (StorageException ex) when (ex.RequestInformation.HttpStatusCode == (int)HttpStatusCode.NotFound) | ||
{ | ||
accessCondition = AccessConditionWrapper.GenerateIfNotExistsCondition(); | ||
_logger.LogInformation("The blob {BlobName} does not exist.", blobName); | ||
} | ||
|
||
var output = new ResultAndAccessCondition<DownloadData>(downloads, accessCondition); | ||
|
||
stopwatch.Stop(); | ||
_telemetryService.TrackReadLatestIndexedDownloads(output.Result.Count, stopwatch.Elapsed); | ||
|
||
return output; | ||
} | ||
|
||
public async Task ReplaceLatestIndexedAsync( | ||
DownloadData newData, | ||
IAccessCondition accessCondition) | ||
{ | ||
using (_telemetryService.TrackReplaceLatestIndexedDownloads(newData.Count)) | ||
{ | ||
var blobName = GetLatestIndexedBlobName(); | ||
_logger.LogInformation("Replacing the latest indexed downloads from {BlobName}.", blobName); | ||
|
||
var mappedAccessCondition = new AccessCondition | ||
{ | ||
IfNoneMatchETag = accessCondition.IfNoneMatchETag, | ||
IfMatchETag = accessCondition.IfMatchETag, | ||
}; | ||
|
||
var blobReference = Container.GetBlobReference(blobName); | ||
|
||
using (var stream = await blobReference.OpenWriteAsync(mappedAccessCondition)) | ||
using (var streamWriter = new StreamWriter(stream)) | ||
using (var jsonTextWriter = new JsonTextWriter(streamWriter)) | ||
{ | ||
blobReference.Properties.ContentType = "application/json"; | ||
Serializer.Serialize(jsonTextWriter, newData); | ||
} | ||
} | ||
} | ||
|
||
private static void ReadStream( | ||
Stream stream, | ||
Action<string, string, long> addVersion) | ||
{ | ||
using (var textReader = new StreamReader(stream)) | ||
using (var jsonReader = new JsonTextReader(textReader)) | ||
{ | ||
Guard.Assert(jsonReader.Read(), "The blob should be readable."); | ||
Guard.Assert(jsonReader.TokenType == JsonToken.StartObject, "The first token should be the start of an object."); | ||
Guard.Assert(jsonReader.Read(), "There should be a second token."); | ||
|
||
while (jsonReader.TokenType == JsonToken.PropertyName) | ||
{ | ||
// We assume the package ID has valid characters. | ||
var id = (string)jsonReader.Value; | ||
|
||
Guard.Assert(jsonReader.Read(), "There should be a token after the package ID."); | ||
Guard.Assert(jsonReader.TokenType == JsonToken.StartObject, "The token after the package ID should be the start of an object."); | ||
Guard.Assert(jsonReader.Read(), "There should be a token after the start of the ID object."); | ||
|
||
while (jsonReader.TokenType == JsonToken.PropertyName) | ||
{ | ||
// We assume the package version is already normalized. | ||
var version = (string)jsonReader.Value; | ||
|
||
Guard.Assert(jsonReader.Read(), "There should be a token after the package version."); | ||
Guard.Assert(jsonReader.TokenType == JsonToken.Integer, "The token after the package version should be an integer."); | ||
|
||
var downloads = (long)jsonReader.Value; | ||
|
||
Guard.Assert(jsonReader.Read(), "There should be a token after the download count."); | ||
|
||
addVersion(id, version, downloads); | ||
} | ||
|
||
Guard.Assert(jsonReader.TokenType == JsonToken.EndObject, "The token after the package versions should be the end of an object."); | ||
Guard.Assert(jsonReader.Read(), "There should be a token after the package ID object."); | ||
} | ||
|
||
Guard.Assert(jsonReader.TokenType == JsonToken.EndObject, "The last token should be the end of an object."); | ||
Guard.Assert(!jsonReader.Read(), "There should be no token after the end of the object."); | ||
} | ||
} | ||
|
||
private string GetLatestIndexedBlobName() | ||
{ | ||
return $"{_options.Value.NormalizeStoragePath()}downloads.v2.json"; | ||
} | ||
} | ||
} | ||
|
14 changes: 14 additions & 0 deletions
14
src/NuGet.Services.AzureSearch/AuxiliaryFiles/IDownloadDataClient.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
// Copyright (c) .NET Foundation. All rights reserved. | ||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. | ||
|
||
using System.Threading.Tasks; | ||
using NuGetGallery; | ||
|
||
namespace NuGet.Services.AzureSearch.AuxiliaryFiles | ||
{ | ||
public interface IDownloadDataClient | ||
{ | ||
Task<ResultAndAccessCondition<DownloadData>> ReadLatestIndexedAsync(); | ||
Task ReplaceLatestIndexedAsync(DownloadData newData, IAccessCondition accessCondition); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.