diff --git a/src/NuGet.Jobs.GitHubIndexer/GitHubIndexerConfiguration.cs b/src/NuGet.Jobs.GitHubIndexer/GitHubIndexerConfiguration.cs index ce840e130..0200150df 100644 --- a/src/NuGet.Jobs.GitHubIndexer/GitHubIndexerConfiguration.cs +++ b/src/NuGet.Jobs.GitHubIndexer/GitHubIndexerConfiguration.cs @@ -24,5 +24,15 @@ public class GitHubIndexerConfiguration /// The number of concurrent threads running to index Git repositories /// public int MaxDegreeOfParallelism { get; set; } = 32; + + /// + /// The connection string to be used for a instance. + /// + public string StorageConnectionString { get; set; } + + /// + /// Gets a setting if Read Access Geo Redundant is enabled in azure storage + /// + public bool StorageReadAccessGeoRedundant { get; set; } } } diff --git a/src/NuGet.Jobs.GitHubIndexer/Job.cs b/src/NuGet.Jobs.GitHubIndexer/Job.cs index 73e1b6132..c82a40bb7 100644 --- a/src/NuGet.Jobs.GitHubIndexer/Job.cs +++ b/src/NuGet.Jobs.GitHubIndexer/Job.cs @@ -6,6 +6,8 @@ using Autofac; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Options; +using NuGetGallery; using Octokit; namespace NuGet.Jobs.GitHubIndexer @@ -33,6 +35,10 @@ protected override void ConfigureJobServices(IServiceCollection services, IConfi services.AddTransient(); services.AddTransient(); services.AddTransient(); + services.AddTransient(provider => { + var config = provider.GetRequiredService>(); + return new CloudBlobClientWrapper(config.Value.StorageConnectionString, config.Value.StorageReadAccessGeoRedundant); + }); services.Configure(configurationRoot.GetSection(GitHubIndexerConfigurationSectionName)); } diff --git a/src/NuGet.Jobs.GitHubIndexer/ReposIndexer.cs b/src/NuGet.Jobs.GitHubIndexer/ReposIndexer.cs index c6b468d89..4c9b118f4 100644 --- a/src/NuGet.Jobs.GitHubIndexer/ReposIndexer.cs +++ b/src/NuGet.Jobs.GitHubIndexer/ReposIndexer.cs @@ -18,10 +18,12 @@ namespace NuGet.Jobs.GitHubIndexer public class ReposIndexer { private const string WorkingDirectory = "work"; + private const string BlobStorageContainerName = "content"; + private const string GitHubUsageFileName = "GitHubUsage.v1.json"; - private static readonly string GitHubUsageFilePath = Path.Combine(WorkingDirectory, "GitHubUsage.v1.json"); public static readonly string RepositoriesDirectory = Path.Combine(WorkingDirectory, "repos"); public static readonly string CacheDirectory = Path.Combine(WorkingDirectory, "cache"); + private static readonly JsonSerializer Serializer = new JsonSerializer(); private readonly IGitRepoSearcher _searcher; private readonly ILogger _logger; @@ -29,6 +31,7 @@ public class ReposIndexer private readonly IRepositoriesCache _repoCache; private readonly IRepoFetcher _repoFetcher; private readonly IConfigFileParser _configFileParser; + private readonly ICloudBlobClient _cloudClient; public ReposIndexer( IGitRepoSearcher searcher, @@ -36,6 +39,7 @@ public ReposIndexer( IRepositoriesCache repoCache, IConfigFileParser configFileParser, IRepoFetcher repoFetcher, + ICloudBlobClient cloudClient, IOptionsSnapshot configuration) { _searcher = searcher ?? throw new ArgumentNullException(nameof(searcher)); @@ -50,6 +54,7 @@ public ReposIndexer( } _maxDegreeOfParallelism = configuration.Value.MaxDegreeOfParallelism; + _cloudClient = cloudClient ?? throw new ArgumentNullException(nameof(cloudClient)); } public async Task RunAsync() @@ -80,14 +85,26 @@ await ProcessInParallel(inputBag, repo => .ThenBy(x => x.Id) .ToList(); - // TODO: Replace with upload to Azure Blob Storage (https://github.com/NuGet/NuGetGallery/issues/7211) - File.WriteAllText(GitHubUsageFilePath, JsonConvert.SerializeObject(finalList)); + await WriteFinalBlobAsync(finalList); // Delete the repos and cache directory Directory.Delete(RepositoriesDirectory, recursive: true); Directory.Delete(CacheDirectory, recursive: true); } + private async Task WriteFinalBlobAsync(List finalList) + { + var blobReference = _cloudClient.GetContainerReference(BlobStorageContainerName).GetBlobReference(GitHubUsageFileName); + + using (var stream = await blobReference.OpenWriteAsync(accessCondition: null)) + using (var streamWriter = new StreamWriter(stream)) + using (var jsonTextWriter = new JsonTextWriter(streamWriter)) + { + blobReference.Properties.ContentType = "application/json"; + Serializer.Serialize(jsonTextWriter, finalList); + } + } + private RepositoryInformation ProcessSingleRepo(WritableRepositoryInformation repo) { if (_repoCache.TryGetCachedVersion(repo, out var cachedVersion)) diff --git a/tests/NuGet.Jobs.GitHubIndexer.Tests/ReposIndexerFacts.cs b/tests/NuGet.Jobs.GitHubIndexer.Tests/ReposIndexerFacts.cs index 23096a724..17d700168 100644 --- a/tests/NuGet.Jobs.GitHubIndexer.Tests/ReposIndexerFacts.cs +++ b/tests/NuGet.Jobs.GitHubIndexer.Tests/ReposIndexerFacts.cs @@ -3,11 +3,16 @@ using System; using System.Collections.Generic; +using System.IO; using System.Linq; +using System.Text; using System.Threading.Tasks; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; +using Microsoft.WindowsAzure.Storage; +using Microsoft.WindowsAzure.Storage.Blob; using Moq; +using Newtonsoft.Json; using NuGetGallery; using Xunit; @@ -18,6 +23,7 @@ public class ReposIndexerFacts private static ReposIndexer CreateIndexer( WritableRepositoryInformation searchResult, IReadOnlyList repoFiles, + Action onDisposeHandler, Func> configFileParser = null) { var mockConfig = new Mock>(); @@ -57,12 +63,36 @@ private static ReposIndexer CreateIndexer( .Setup(x => x.FetchRepo(It.IsAny())) .Returns(mockFetchedRepo.Object); + var mockBlobClient = new Mock(); + var mockBlobContainer = new Mock(); + var mockBlob = new Mock(); + + mockBlobClient + .Setup(x => x.GetContainerReference(It.IsAny())) + .Returns(() => mockBlobContainer.Object); + mockBlobContainer + .Setup(x => x.GetBlobReference(It.IsAny())) + .Returns(() => mockBlob.Object); + mockBlob + .Setup(x => x.ETag) + .Returns("\"some-etag\""); + mockBlob + .Setup(x => x.Properties) + .Returns(new CloudBlockBlob(new Uri("https://example/blob")).Properties); + mockBlob + .Setup(x => x.OpenWriteAsync(It.IsAny())) + .ReturnsAsync(() => new RecordingStream(bytes => + { + onDisposeHandler?.Invoke(Encoding.UTF8.GetString(bytes)); + })); + return new ReposIndexer( mockSearcher.Object, new Mock>().Object, mockRepoCache.Object, mockConfigFileParser.Object, mockRepoFetcher.Object, + mockBlobClient.Object, mockConfig.Object); } public class TheRunMethod @@ -71,7 +101,7 @@ public class TheRunMethod public async Task TestNoDependenciesInFiles() { var repo = new WritableRepositoryInformation("owner/test", url: "", stars: 100, description: "", mainBranch: "master"); - var configFileNames = new string[] { "packages.config", "someProjFile.csproj", "someProjFile.props", "someProjFile.targets"}; + var configFileNames = new string[] { "packages.config", "someProjFile.csproj", "someProjFile.props", "someProjFile.targets" }; var repoFiles = new List() { new GitFileInfo("file1.txt", 1), @@ -82,7 +112,7 @@ public async Task TestNoDependenciesInFiles() new GitFileInfo(configFileNames[3], 1) }; - var indexer = CreateIndexer(repo, repoFiles); + var indexer = CreateIndexer(repo, repoFiles, onDisposeHandler: null); await indexer.RunAsync(); var result = repo.ToRepositoryInformation(); @@ -93,8 +123,8 @@ public async Task TestNoDependenciesInFiles() public async Task TestWithDependenciesInFiles() { var repo = new WritableRepositoryInformation("owner/test", url: "", stars: 100, description: "", mainBranch: "master"); - var configFileNames = new string[] { "packages.config", "someProjFile.csproj", "someProjFile.props", "someProjFile.targets"}; - var repoDependencies = new string[] { "dependency1", "dependency2", "dependency3", "dependency4"}; + var configFileNames = new string[] { "packages.config", "someProjFile.csproj", "someProjFile.props", "someProjFile.targets" }; + var repoDependencies = new string[] { "dependency1", "dependency2", "dependency3", "dependency4" }; var repoFiles = new List() { new GitFileInfo("file1.txt", 1), @@ -104,12 +134,17 @@ public async Task TestWithDependenciesInFiles() new GitFileInfo(configFileNames[2], 1), new GitFileInfo(configFileNames[3], 1) }; - - var indexer = CreateIndexer(repo, repoFiles, (ICheckedOutFile file) => + var writeToBlobCalled = false; + var indexer = CreateIndexer(repo, repoFiles, configFileParser: (ICheckedOutFile file) => { // Make sure that the Indexer filters out the non-config files Assert.True(Array.Exists(configFileNames, x => string.Equals(x, file.Path))); return repoDependencies; + }, + onDisposeHandler: (string serializedText) => + { + writeToBlobCalled = true; + Assert.Equal(JsonConvert.SerializeObject(new RepositoryInformation[] { repo.ToRepositoryInformation() }), serializedText); }); await indexer.RunAsync(); @@ -123,6 +158,33 @@ public async Task TestWithDependenciesInFiles() Assert.Equal(repo.Id, result.Id); Assert.Equal(repo.Stars, result.Stars); Assert.Equal(repo.Url, result.Url); + + // Make sure the blob has been written + Assert.True(writeToBlobCalled); + } + } + private class RecordingStream : MemoryStream + { + private readonly object _lock = new object(); + private Action _onDispose; + + public RecordingStream(Action onDispose) + { + _onDispose = onDispose; + } + + protected override void Dispose(bool disposing) + { + lock (_lock) + { + if (_onDispose != null) + { + _onDispose(ToArray()); + _onDispose = null; + } + } + + base.Dispose(disposing); } } }