Skip to content
This repository has been archived by the owner on Jul 30, 2024. It is now read-only.
/ NuGet.Jobs Public archive

Commit

Permalink
Add DownloadDataClient to read and write downloads.v2.json (#591)
Browse files Browse the repository at this point in the history
This client will be used to read and write the download data that was most recently indexed by Auxiliary2AzureSearch.
This data will be compared to the latest downloads.v1.json and only the changes will be sent to Azure Search.
This strategy is very similar to Owners2AzureSearch.

Progress on NuGet/NuGetGallery#6458
  • Loading branch information
joelverhagen committed Jul 12, 2019
1 parent 2c79274 commit e63d35b
Show file tree
Hide file tree
Showing 18 changed files with 954 additions and 9 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Collections;
using System.Collections.Generic;

namespace NuGet.Services.AzureSearch.AuxiliaryFiles
{
public class DownloadByVersionData : IReadOnlyDictionary<string, long>
{
private readonly SortedDictionary<string, long> _versions
= new SortedDictionary<string, long>(StringComparer.OrdinalIgnoreCase);

public long Total { get; private set; }

public long GetDownloadCount(string version)
{
if (!_versions.TryGetValue(version, out var downloads))
{
return 0;
}

return downloads;
}

public void SetDownloadCount(string version, long downloads)
{
if (downloads < 0)
{
throw new ArgumentOutOfRangeException(nameof(downloads), "The download count must not be negative.");
}

if (_versions.TryGetValue(version, out var existingDownloads))
{
// Remove the previous version so that the latest case is retained. Versions are case insensitive but
// we should try to respect the latest intent.
_versions.Remove(version);
}
else
{
existingDownloads = 0;
}

Total += downloads - existingDownloads;

// Only store the download count if the value is not zero.
if (downloads != 0)
{
_versions.Add(version, downloads);
}
}

public IEnumerable<string> Keys => _versions.Keys;
public IEnumerable<long> Values => _versions.Values;
public int Count => _versions.Count;
public long this[string key] => _versions[key];
public IEnumerator<KeyValuePair<string, long>> GetEnumerator() => _versions.GetEnumerator();
public bool TryGetValue(string key, out long value) => _versions.TryGetValue(key, out value);
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
public bool ContainsKey(string key) => _versions.ContainsKey(key);
}
}

87 changes: 87 additions & 0 deletions src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadData.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Collections;
using System.Collections.Generic;

namespace NuGet.Services.AzureSearch.AuxiliaryFiles
{
public class DownloadData : IReadOnlyDictionary<string, DownloadByVersionData>
{
/// <summary>
/// Maintain a lookup of version strings for de-duping. We maintain the original case for de-duping purposes
/// by using the default string comparer. As of July of 2019 in PROD, maintaining original case adds less than
/// 0.3% extra strings. De-duping version strings in general however removes 87.0% of the string allocations.
/// Intuitively this means most people use the same case of a given version string and a lot of people use
/// the same versions strings (common ones are 1.0.0, 1.0.1, 1.0.2, 1.1.0, etc).
/// </summary>
private readonly Dictionary<string, string> _uniqueVersions = new Dictionary<string, string>();

private readonly SortedDictionary<string, DownloadByVersionData> _ids
= new SortedDictionary<string, DownloadByVersionData>(StringComparer.OrdinalIgnoreCase);

public long GetDownloadCount(string id)
{
if (!_ids.TryGetValue(id, out var versionData))
{
return 0;
}

return versionData.Total;
}

public long GetDownloadCount(string id, string version)
{
if (!_ids.TryGetValue(id, out var versionData))
{
return 0;
}

return versionData.GetDownloadCount(version);
}

public void SetDownloadCount(string id, string version, long downloads)
{
if (downloads < 0)
{
throw new ArgumentOutOfRangeException(nameof(downloads), "The download count must not be negative.");
}

if (_ids.TryGetValue(id, out var versions))
{
// Remove the previous version so that the latest case is retained. IDs are case insensitive but we
// should try to respect the latest intent.
_ids.Remove(id);
}
else
{
versions = new DownloadByVersionData();
}

if (!_uniqueVersions.TryGetValue(version, out var dedupedVersion))
{
_uniqueVersions.Add(version, version);
dedupedVersion = version;
}

versions.SetDownloadCount(dedupedVersion, downloads);

// Only store the download count if the value is not zero.
if (versions.Total != 0)
{
_ids.Add(id, versions);
}
}

public IEnumerable<string> Keys => _ids.Keys;
public IEnumerable<DownloadByVersionData> Values => _ids.Values;
public int Count => _ids.Count;
public DownloadByVersionData this[string key] => _ids[key];
public IEnumerator<KeyValuePair<string, DownloadByVersionData>> GetEnumerator() => _ids.GetEnumerator();
public bool TryGetValue(string key, out DownloadByVersionData value) => _ids.TryGetValue(key, out value);
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
public bool ContainsKey(string key) => _ids.ContainsKey(key);
}
}

153 changes: 153 additions & 0 deletions src/NuGet.Services.AzureSearch/AuxiliaryFiles/DownloadDataClient.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Diagnostics;
using System.IO;
using System.Net;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Microsoft.WindowsAzure.Storage;
using Newtonsoft.Json;
using NuGetGallery;

namespace NuGet.Services.AzureSearch.AuxiliaryFiles
{
public class DownloadDataClient : IDownloadDataClient
{
private static readonly JsonSerializer Serializer = new JsonSerializer();

private readonly ICloudBlobClient _cloudBlobClient;
private readonly IOptionsSnapshot<AzureSearchJobConfiguration> _options;
private readonly IAzureSearchTelemetryService _telemetryService;
private readonly ILogger<DownloadDataClient> _logger;
private readonly Lazy<ICloudBlobContainer> _lazyContainer;

public DownloadDataClient(
ICloudBlobClient cloudBlobClient,
IOptionsSnapshot<AzureSearchJobConfiguration> options,
IAzureSearchTelemetryService telemetryService,
ILogger<DownloadDataClient> logger)
{
_cloudBlobClient = cloudBlobClient ?? throw new ArgumentNullException(nameof(cloudBlobClient));
_options = options ?? throw new ArgumentNullException(nameof(cloudBlobClient));
_telemetryService = telemetryService ?? throw new ArgumentNullException(nameof(telemetryService));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));

_lazyContainer = new Lazy<ICloudBlobContainer>(
() => _cloudBlobClient.GetContainerReference(_options.Value.StorageContainer));
}

private ICloudBlobContainer Container => _lazyContainer.Value;

public async Task<ResultAndAccessCondition<DownloadData>> ReadLatestIndexedAsync()
{
var stopwatch = Stopwatch.StartNew();
var blobName = GetLatestIndexedBlobName();
var blobReference = Container.GetBlobReference(blobName);

_logger.LogInformation("Reading the latest indexed downloads from {BlobName}.", blobName);

var downloads = new DownloadData();
IAccessCondition accessCondition;
try
{
using (var stream = await blobReference.OpenReadAsync(AccessCondition.GenerateEmptyCondition()))
{
accessCondition = AccessConditionWrapper.GenerateIfMatchCondition(blobReference.ETag);
ReadStream(stream, downloads.SetDownloadCount);
}
}
catch (StorageException ex) when (ex.RequestInformation.HttpStatusCode == (int)HttpStatusCode.NotFound)
{
accessCondition = AccessConditionWrapper.GenerateIfNotExistsCondition();
_logger.LogInformation("The blob {BlobName} does not exist.", blobName);
}

var output = new ResultAndAccessCondition<DownloadData>(downloads, accessCondition);

stopwatch.Stop();
_telemetryService.TrackReadLatestIndexedDownloads(output.Result.Count, stopwatch.Elapsed);

return output;
}

public async Task ReplaceLatestIndexedAsync(
DownloadData newData,
IAccessCondition accessCondition)
{
using (_telemetryService.TrackReplaceLatestIndexedDownloads(newData.Count))
{
var blobName = GetLatestIndexedBlobName();
_logger.LogInformation("Replacing the latest indexed downloads from {BlobName}.", blobName);

var mappedAccessCondition = new AccessCondition
{
IfNoneMatchETag = accessCondition.IfNoneMatchETag,
IfMatchETag = accessCondition.IfMatchETag,
};

var blobReference = Container.GetBlobReference(blobName);

using (var stream = await blobReference.OpenWriteAsync(mappedAccessCondition))
using (var streamWriter = new StreamWriter(stream))
using (var jsonTextWriter = new JsonTextWriter(streamWriter))
{
blobReference.Properties.ContentType = "application/json";
Serializer.Serialize(jsonTextWriter, newData);
}
}
}

private static void ReadStream(
Stream stream,
Action<string, string, long> addVersion)
{
using (var textReader = new StreamReader(stream))
using (var jsonReader = new JsonTextReader(textReader))
{
Guard.Assert(jsonReader.Read(), "The blob should be readable.");
Guard.Assert(jsonReader.TokenType == JsonToken.StartObject, "The first token should be the start of an object.");
Guard.Assert(jsonReader.Read(), "There should be a second token.");

while (jsonReader.TokenType == JsonToken.PropertyName)
{
// We assume the package ID has valid characters.
var id = (string)jsonReader.Value;

Guard.Assert(jsonReader.Read(), "There should be a token after the package ID.");
Guard.Assert(jsonReader.TokenType == JsonToken.StartObject, "The token after the package ID should be the start of an object.");
Guard.Assert(jsonReader.Read(), "There should be a token after the start of the ID object.");

while (jsonReader.TokenType == JsonToken.PropertyName)
{
// We assume the package version is already normalized.
var version = (string)jsonReader.Value;

Guard.Assert(jsonReader.Read(), "There should be a token after the package version.");
Guard.Assert(jsonReader.TokenType == JsonToken.Integer, "The token after the package version should be an integer.");

var downloads = (long)jsonReader.Value;

Guard.Assert(jsonReader.Read(), "There should be a token after the download count.");

addVersion(id, version, downloads);
}

Guard.Assert(jsonReader.TokenType == JsonToken.EndObject, "The token after the package versions should be the end of an object.");
Guard.Assert(jsonReader.Read(), "There should be a token after the package ID object.");
}

Guard.Assert(jsonReader.TokenType == JsonToken.EndObject, "The last token should be the end of an object.");
Guard.Assert(!jsonReader.Read(), "There should be no token after the end of the object.");
}
}

private string GetLatestIndexedBlobName()
{
return $"{_options.Value.NormalizeStoragePath()}downloads.v2.json";
}
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System.Threading.Tasks;
using NuGetGallery;

namespace NuGet.Services.AzureSearch.AuxiliaryFiles
{
public interface IDownloadDataClient
{
Task<ResultAndAccessCondition<DownloadData>> ReadLatestIndexedAsync();
Task ReplaceLatestIndexedAsync(DownloadData newData, IAccessCondition accessCondition);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
using System.Threading.Tasks;
using NuGetGallery;

namespace NuGet.Services.AzureSearch
namespace NuGet.Services.AzureSearch.AuxiliaryFiles
{
/// <summary>
/// The purpose of this interface is allow reading and writing owner information from storage. The Catalog2Owners
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
using Newtonsoft.Json;
using NuGetGallery;

namespace NuGet.Services.AzureSearch
namespace NuGet.Services.AzureSearch.AuxiliaryFiles
{
public class OwnerDataClient : IOwnerDataClient
{
Expand Down
25 changes: 23 additions & 2 deletions src/NuGet.Services.AzureSearch/AzureSearchTelemetryService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,14 @@ public void TrackGetOwnersForPackageId(int ownerCount, TimeSpan elapsed)
});
}

public void TrackReadLatestIndexedOwners(int ownerCount, TimeSpan elapsed)
public void TrackReadLatestIndexedOwners(int packageIdCount, TimeSpan elapsed)
{
_telemetryClient.TrackMetric(
Prefix + "ReadLatestIndexedOwnersSeconds",
elapsed.TotalSeconds,
new Dictionary<string, string>
{
{ "OwnerCount", ownerCount.ToString() },
{ "PackageIdCount", packageIdCount.ToString() },
});
}

Expand Down Expand Up @@ -285,5 +285,26 @@ public void TrackLastCommitTimestampQuery(string indexName, DateTimeOffset? last
{ "LastCommitTimestamp", lastCommitTimestamp?.ToString("O") },
});
}

public void TrackReadLatestIndexedDownloads(int packageIdCount, TimeSpan elapsed)
{
_telemetryClient.TrackMetric(
Prefix + "ReadLatestIndexedDownloadsSeconds",
elapsed.TotalSeconds,
new Dictionary<string, string>
{
{ "PackageIdCount", packageIdCount.ToString() },
});
}

public IDisposable TrackReplaceLatestIndexedDownloads(int packageIdCount)
{
return _telemetryClient.TrackDuration(
Prefix + "ReplaceLatestIndexedDownloadsSeconds",
new Dictionary<string, string>
{
{ "PackageIdCount", packageIdCount.ToString() },
});
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using NuGet.Protocol.Catalog;
using NuGet.Services.AzureSearch.AuxiliaryFiles;
using NuGet.Services.AzureSearch.Catalog2AzureSearch;
using NuGet.Services.Metadata.Catalog;
using NuGet.Services.Metadata.Catalog.Persistence;
Expand Down
Loading

0 comments on commit e63d35b

Please sign in to comment.