Skip to content
This repository has been archived by the owner on Jul 30, 2024. It is now read-only.
/ NuGet.Jobs Public archive

[GH Idx] Save blob to azurestorage #785

Merged
merged 148 commits into from
Jul 29, 2019
Merged
Show file tree
Hide file tree
Changes from 145 commits
Commits
Show all changes
148 commits
Select commit Hold shift + click to select a range
ba497cb
[GH Index] Initial commit
mogah Jun 13, 2019
d31c59e
[GH Index] Fixed build
mogah Jun 13, 2019
ae29856
Added License headers
mogah Jun 14, 2019
388b9d7
Changed Nuspec Id
mogah Jun 14, 2019
d1e407f
Changed Nuspec script include
mogah Jun 14, 2019
d0fcfe1
Added empty job
mogah Jun 14, 2019
6de40f3
[GH Idx] Added Octokit and LibGit2Sharp dependencies
mogah Jun 14, 2019
2d904d3
[GH Idx] Add initial GHSearcher
mogah Jun 14, 2019
ca96d25
[GH Idx] Add GitRepoSearcher
mogah Jun 14, 2019
2d346b5
[GH Idx] Add dependency injection
mogah Jun 15, 2019
eab0c9c
[GH Idx] Add null check
mogah Jun 17, 2019
d9a8eb2
[GH Idx] Add tests
mogah Jun 17, 2019
1a5ccba
[GH Idx] Extracted constants
mogah Jun 18, 2019
515bdd9
[GH Idx] Fixed tests
mogah Jun 18, 2019
a6d0256
Merge remote-tracking branch 'origin/dev' into mogah-github-indexer
mogah Jun 18, 2019
e2e093c
[GH Idx] Add Filters class
mogah Jun 18, 2019
8b5e2ca
Update src/NuGet.Jobs.GitHubIndexer/GitRepoSearchers/GitHubSearcher.cs
mogah Jun 18, 2019
e3fc3a4
Update src/NuGet.Jobs.GitHubIndexer/GitRepoSearchers/GitHubSearcher.cs
mogah Jun 18, 2019
6147012
[GH Idx] Removed duplicate class RepositoryInformation
mogah Jun 18, 2019
736df94
[GH Idx] Refactored the code a bit
mogah Jun 18, 2019
228e82e
[GH Idx] Fix possible deadlock
mogah Jun 18, 2019
ae4d8d2
[GH Idx] Add config section in the appsettings.json
mogah Jun 19, 2019
e4ef982
[GH Idx] GitHubSearcher is not recursive anymore!
mogah Jun 19, 2019
a03ddfc
[GH Idx] Removed redundant comparer
mogah Jun 19, 2019
a82c009
[GH Idx] Fix upperStarBound wrongly set on request
mogah Jun 19, 2019
d44803e
[GH Idx] Fixed sleep time
mogah Jun 20, 2019
f23f800
[GH Idx] Fix typo
mogah Jun 21, 2019
d34a3f4
[GH Idx] Made fields private
mogah Jun 21, 2019
c1c35df
[GH Idx] Changed UA
mogah Jun 21, 2019
8a91ad7
[GH Idx] Made the configuration not static
mogah Jun 21, 2019
b5c38ba
[GH Idx] Add ApiInfo doc in the tests
mogah Jun 21, 2019
677a900
[GH Idx] Refactor GH Search API requester
mogah Jun 24, 2019
422f9c6
[GH Idx] Removed redundant import in csproj
mogah Jun 24, 2019
6096aa1
[GH Idx] Add documentation to the configuration
mogah Jun 24, 2019
6d2731d
[GH Idx] Move the IGitHubClient to the GitHubSearchWrapper
mogah Jun 24, 2019
b904dff
[GH Idx] Remove redundant variable
mogah Jun 24, 2019
a411667
[GH Idx] Trim tests Assembly info
mogah Jun 24, 2019
4648816
[GH Idx] Add checks to ensure the required info is in the GitHub resp…
mogah Jun 24, 2019
c9fbb29
[GH Idx] Moved public method before private methods
mogah Jun 24, 2019
c2e771d
[GH Idx] Extract retry time in a static variable
mogah Jun 24, 2019
bbc09fb
[GH Idx] Add typecheck and fix tests
mogah Jun 24, 2019
73c268a
[GH Idx] Remove redundant using
mogah Jun 24, 2019
63aca24
[GH Idx] Nit space formatting
mogah Jun 24, 2019
07a2e45
[GH Idx] Change UserAgent to use assembly name and version
mogah Jun 24, 2019
8782195
[GH Idx] Remove extra line
mogah Jun 25, 2019
a25c3a2
[GH Idx] Fix nit picks
mogah Jun 25, 2019
08d0e9d
Merge branch 'mogah-github-indexer' into mogah-github-indexer-filter
mogah Jun 25, 2019
a57a7b6
[GH Idx] Fix merge
mogah Jun 25, 2019
fd53248
[GH Idx] First iteration of the filtering
mogah Jun 26, 2019
1c06836
[GH Idx] Simplified Job class
mogah Jun 26, 2019
61403c6
[GH Idx] WIP
mogah Jun 27, 2019
9422992
[GH Idx] Process repo is now run in parallel
mogah Jun 28, 2019
e28c836
[GH Idx] Removed debug code
mogah Jun 28, 2019
7f66006
[GH Idx] WIP 2
mogah Jul 2, 2019
3af354d
[GH Idx] Modify Filters doc
mogah Jul 2, 2019
0f4a126
[GH Idx] Refactor WritableRepositoryInformation
mogah Jul 3, 2019
0b35c88
[GH Idx] WIP 3
mogah Jul 3, 2019
8e4a200
[GH Idx] Add WritableRepoInfo doc
mogah Jul 3, 2019
3e8862a
[GH Idx] Made the MaxDegreeOfParallelism configurable
mogah Jul 3, 2019
f022a5c
[GH Idx] WIP before tests
mogah Jul 9, 2019
849c2dd
[GH Idx] Refactor to decouple from LibGit2Sharp
mogah Jul 10, 2019
4170fb9
[GH Idx] Using immutable collections
mogah Jul 11, 2019
18dba70
[GH Idx] Add tests
mogah Jul 11, 2019
ce473f2
[GH Idx] Clean old code
mogah Jul 11, 2019
af8cbe1
[GH Idx] Remove unused imports
mogah Jul 11, 2019
acc478d
[GH Idx] Add logging
mogah Jul 11, 2019
6d04510
[GH Idx] Cleanup
mogah Jul 11, 2019
020f53f
Merge branch 'dev' into mogah-github-indexer-filter
mogah Jul 11, 2019
928dccd
[GH Idx] Bumping up the NuGetGalery.Core dependency version
mogah Jul 11, 2019
9973bac
[GH Idx] Reverting changes to web.config
mogah Jul 11, 2019
a46dc41
[GH Idx] Add docs
mogah Jul 11, 2019
b0b562c
Cleaned up dependencies and bumped up NuGetGalery.Core version
mogah Jul 11, 2019
87e3339
[GH Idx] Fix PascalCase method name
mogah Jul 16, 2019
e9f7121
[GH Idx] Fix space
mogah Jul 16, 2019
0b0e3c6
[GH Idx] Remove redundant comment
mogah Jul 16, 2019
c55d54c
[GH Idx] Now using proper logger creation
mogah Jul 16, 2019
82e4c67
[GH Idx] Add new line for constructor
mogah Jul 16, 2019
aaeafe0
[GH Idx] Remove redundant filter config file type
mogah Jul 16, 2019
8f9c013
[GH Idx] Add RegEx timeout
mogah Jul 16, 2019
050825d
[GH Idx] Remove empty line
mogah Jul 16, 2019
7345bb1
[GH Idx] Add named params and remove redundant code for FetchedRepo
mogah Jul 16, 2019
a72ea62
[GH Idx] Add basePathLength to optimize Select
mogah Jul 16, 2019
19dec26
[GH Idx] Move the static constructor
mogah Jul 16, 2019
acabead
[GH Idx] Cache hit now logged as an information
mogah Jul 16, 2019
d84f1e1
[GH Idx] Use Path.Combine instead of string concatenation
mogah Jul 16, 2019
55cbc07
[GH Idx] Remove redundant comment
mogah Jul 16, 2019
84717e1
[GH Idx] Extract GitFileInfo class
mogah Jul 16, 2019
b7a42f7
[GH Idx] Remove redundant imports
mogah Jul 16, 2019
2c7141c
[GH Idx] Replace "as" cast
mogah Jul 16, 2019
85e1350
[GH Idx] Simplify LINQ statement
mogah Jul 16, 2019
8ec8568
[GH Idx] Simplify config file parsing
mogah Jul 16, 2019
6752220
[GH Idx] Simplify Thread construction
mogah Jul 16, 2019
beba4fa
[GH Idx] Move cache files to their own directory
mogah Jul 16, 2019
a9a45da
[GH Idx] Remove transitive exception throws in documentation
mogah Jul 16, 2019
ce6090d
Merge remote-tracking branch 'origin/dev' into mogah-github-indexer-f…
mogah Jul 16, 2019
a5d90f9
[GH Idx] Wrap long line in Filters
mogah Jul 16, 2019
5c858c6
[GH Idx] Make dependencies case-insensitive
mogah Jul 16, 2019
7f4db6b
[GH Idx] Use Path.Combine and remove extra line
mogah Jul 16, 2019
92fd75f
[GH Idx] Add named param
mogah Jul 16, 2019
823d835
Update src/NuGet.Jobs.GitHubIndexer/CheckedOutFile.cs
mogah Jul 18, 2019
af39863
[GH Idx] Move config in same section
mogah Jul 18, 2019
0bc34ce
[GH Idx] Remove redundant documentation
mogah Jul 18, 2019
f34fb90
[GH Idx] Check for unhandled config file type
mogah Jul 18, 2019
1d0c60e
[GH Idx] Rename function
mogah Jul 18, 2019
702ffd6
[GH Idx] Using Path.Combine in RepoUtils
mogah Jul 18, 2019
62bb46a
[GH Idx] Move isValidPackageId to RepoUtils
mogah Jul 18, 2019
bc04cdc
[GH Idx] Optimal LINQ usage in ReposIndexer
mogah Jul 18, 2019
21db460
[GH Idx] Move TODO
mogah Jul 18, 2019
bc1d02b
[GH Idx] Log Trace and Debug --> Information
mogah Jul 18, 2019
27e9aef
[GH IDx] Expanded msBuild and PkgConfig enums
mogah Jul 18, 2019
ba342ea
[GH Idx] Remove special regex case
mogah Jul 18, 2019
df0aea9
[GH Idx] Using stringComparer instead of ToLower() then comparing
mogah Jul 18, 2019
f982470
[GH Idx] Use repo.FullName instead of manually creating it
mogah Jul 18, 2019
cd94b74
[GH Idx] Filters early return
mogah Jul 18, 2019
1e50b3b
[GH Idx] Log warning for long paths
mogah Jul 18, 2019
8786b98
[GH Idx] Run --> RunAsync
mogah Jul 18, 2019
6e8b250
[GH Idx] Remove ServicePointManager init setup
mogah Jul 18, 2019
71af8f4
[GH Idx] workdir --> work
mogah Jul 18, 2019
4608fc1
[GH Idx] Remove as cast
mogah Jul 18, 2019
175223c
[GH Idx] LogTrace --> LogInformation for disk cache
mogah Jul 18, 2019
a1f5eeb
Merge remote-tracking branch 'origin/mogah-github-indexer-filter' int…
mogah Jul 18, 2019
9124f2f
[GH Idx] Save final blob to Azure Storage
mogah Jul 18, 2019
1bd6915
[GH Idx] Forgot a LogTrace there...
mogah Jul 19, 2019
af8fa1a
Update src/NuGet.Jobs.GitHubIndexer/ConfigFileParser.cs
mogah Jul 25, 2019
60588e5
[GH Idx] Update FetchedRepo comment
mogah Jul 25, 2019
cd67efe
[GH Idx] Update EndsWith --> Equals
mogah Jul 25, 2019
e2175fd
[GH Idx] Fix wrong documentation
mogah Jul 25, 2019
a78037d
[GH Idx] Revert Config properties for Azure BlobStorage
mogah Jul 25, 2019
247be3c
[GH Idx] Simplify LINQ statement
mogah Jul 25, 2019
139603d
[GH Idx] Format RepoUtils line to make it more readable
mogah Jul 25, 2019
b934f40
Merge remote-tracking branch 'origin/mogah-github-indexer-filter' int…
mogah Jul 25, 2019
d1c3142
[GH Idx] Got rid of few Singletons
mogah Jul 25, 2019
90df39c
[GH Idx] Scope logging
mogah Jul 26, 2019
94d2e19
[GH Idx] "No Description." --> ""
mogah Jul 26, 2019
5bbcb83
Merge branch 'mogah-github-indexer-filter' into mogah-save-blob-to-az…
mogah Jul 26, 2019
042dbd2
[GH Idx] Fix config
mogah Jul 26, 2019
769cbb1
Update src/NuGet.Jobs.GitHubIndexer/FetchedRepo.cs
mogah Jul 29, 2019
ea769df
Update src/NuGet.Jobs.GitHubIndexer/ReposIndexer.cs
mogah Jul 29, 2019
59f8206
[GH Idx] Inverted if stattement in TryGetCachedVersion
mogah Jul 29, 2019
03af62c
[GH Idx] Fix timing to use UTC
mogah Jul 29, 2019
5a98d97
[GH Idx] Fix timing to use UTC
mogah Jul 29, 2019
595ded3
[GH Idx] Move assignment
mogah Jul 29, 2019
47f7253
Merge branch 'mogah-github-indexer-filter' into mogah-save-blob-to-az…
mogah Jul 29, 2019
f5bf7a2
Merge remote-tracking branch 'origin/dev' into mogah-save-blob-to-azu…
mogah Jul 29, 2019
c18c709
[GH Idx][ Extract container name to constant
mogah Jul 29, 2019
155c3a3
[GH Idx] Move serializer
mogah Jul 29, 2019
3bbda1c
[GH Idx] Function rename
mogah Jul 29, 2019
9a07521
[GH Idx] Add tests to make sure blob is serialized correctly
mogah Jul 29, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/NuGet.Jobs.GitHubIndexer/GitHubIndexerConfiguration.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,15 @@ public class GitHubIndexerConfiguration
/// The number of concurrent threads running to index Git repositories
/// </summary>
public int MaxDegreeOfParallelism { get; set; } = 32;

/// <summary>
/// The connection string to be used for a <see cref="NuGetGallery.CloudBlobClientWrapper"/> instance.
/// </summary>
public string StorageConnectionString { get; set; }

/// <summary>
/// Gets a setting if Read Access Geo Redundant is enabled in azure storage
/// </summary>
public bool StorageReadAccessGeoRedundant { get; set; }
}
}
22 changes: 19 additions & 3 deletions src/NuGet.Jobs.GitHubIndexer/ReposIndexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ namespace NuGet.Jobs.GitHubIndexer
public class ReposIndexer
{
private const string WorkingDirectory = "work";
private const string BlobStorageContainerName = "content";
private const string GitHubUsageFileName = "GitHubUsage.v1.json";

private static readonly string GitHubUsageFilePath = Path.Combine(WorkingDirectory, "GitHubUsage.v1.json");
public static readonly string RepositoriesDirectory = Path.Combine(WorkingDirectory, "repos");
public static readonly string CacheDirectory = Path.Combine(WorkingDirectory, "cache");

Expand All @@ -29,6 +30,7 @@ public class ReposIndexer
private readonly IRepositoriesCache _repoCache;
private readonly IRepoFetcher _repoFetcher;
private readonly IConfigFileParser _configFileParser;
private readonly ICloudBlobClient _cloudClient;

public ReposIndexer(
IGitRepoSearcher searcher,
Expand All @@ -50,6 +52,7 @@ public ReposIndexer(
}

_maxDegreeOfParallelism = configuration.Value.MaxDegreeOfParallelism;
_cloudClient = new CloudBlobClientWrapper(configuration.Value.StorageConnectionString, configuration.Value.StorageReadAccessGeoRedundant);
}

public async Task RunAsync()
Expand Down Expand Up @@ -80,14 +83,27 @@ await ProcessInParallel(inputBag, repo =>
.ThenBy(x => x.Id)
.ToList();

// TODO: Replace with upload to Azure Blob Storage (https://github.com/NuGet/NuGetGallery/issues/7211)
File.WriteAllText(GitHubUsageFilePath, JsonConvert.SerializeObject(finalList));
await WriteFinalBlob(finalList);

// Delete the repos and cache directory
Directory.Delete(RepositoriesDirectory, recursive: true);
Directory.Delete(CacheDirectory, recursive: true);
}

private async Task WriteFinalBlob(List<RepositoryInformation> finalList)
loic-sharma marked this conversation as resolved.
Show resolved Hide resolved
{
var serializer = new JsonSerializer();
loic-sharma marked this conversation as resolved.
Show resolved Hide resolved
var blobReference = _cloudClient.GetContainerReference(BlobStorageContainerName).GetBlobReference(GitHubUsageFileName);

using (var stream = await blobReference.OpenWriteAsync(accessCondition: null))
using (var streamWriter = new StreamWriter(stream))
using (var jsonTextWriter = new JsonTextWriter(streamWriter))
{
blobReference.Properties.ContentType = "application/json";
serializer.Serialize(jsonTextWriter, finalList);
}
}

private RepositoryInformation ProcessSingleRepo(WritableRepositoryInformation repo)
{
if (_repoCache.TryGetCachedVersion(repo, out var cachedVersion))
Expand Down