From b4bae8cfd76e6b06152f4ba96f0787c91fe6f8f6 Mon Sep 17 00:00:00 2001 From: Joel Verhagen Date: Tue, 28 Jan 2020 11:36:50 -0800 Subject: [PATCH] Delete Db2Lucene, Catalog2Lucene, and associated tests Progress on https://github.com/NuGet/NuGetGallery/issues/7822 --- build.ps1 | 1 - sign.thirdparty.props | 12 - src/Ng/Arguments.cs | 28 - src/Ng/Catalog2Lucene.nuspec | 16 - src/Ng/CommandHelpers.cs | 114 - src/Ng/Eula.htm | 649 ----- src/Ng/Extensions/TaskExtensions.cs | 35 - src/Ng/Jobs/Catalog2LuceneJob.cs | 194 -- src/Ng/Jobs/CheckLuceneJob.cs | 66 - src/Ng/Jobs/ClearLuceneJob.cs | 58 - src/Ng/Jobs/CopyLuceneJob.cs | 76 - src/Ng/Jobs/Db2LuceneJob.cs | 55 - src/Ng/Json/JTokenExtensions.cs | 21 - src/Ng/LuceneCursor.cs | 55 - src/Ng/Ng.csproj | 10 - src/Ng/NgJobFactory.cs | 5 - src/Ng/Readme.html | 2190 ----------------- src/Ng/SearchIndexFromCatalogCollector.cs | 386 --- src/NuGet.Indexing/CamelCaseFilter.cs | 155 -- src/NuGet.Indexing/CustomSimilarity.cs | 25 - src/NuGet.Indexing/DescriptionAnalyzer.cs | 15 - src/NuGet.Indexing/DotTokenizer.cs | 36 - src/NuGet.Indexing/ExpandAcronymsFilter.cs | 127 - .../Extraction/CatalogNuspecReader.cs | 93 - .../CatalogPackageMetadataExtraction.cs | 347 --- .../Extraction/CatalogPackageReader.cs | 101 - .../Extraction/DocumentCreator.cs | 396 --- .../Extraction/JTokenExtensions.cs | 23 - .../Extraction/LuceneCommitMetadata.cs | 35 - .../Extraction/MetadataConstants.cs | 100 - .../IAcronymExpansionProvider.cs | 13 - src/NuGet.Indexing/IdentifierAnalyzer.cs | 17 - .../IdentifierAutocompleteAnalyzer.cs | 16 - .../IdentifierKeywordAnalyzer.cs | 15 - src/NuGet.Indexing/NuGet.Indexing.csproj | 35 - .../NuGetAcronymExpansionProvider.cs | 76 - src/NuGet.Indexing/NuGetMergePolicyApplyer.cs | 42 - src/NuGet.Indexing/OwnerAnalyzer.cs | 15 - src/NuGet.Indexing/PackageAnalyzer.cs | 37 - src/NuGet.Indexing/Resources/Acronyms.json | 35 - src/NuGet.Indexing/SemanticVersionFilter.cs | 43 - .../ShingledIdentifierAnalyzer.cs | 16 - src/NuGet.Indexing/Sql2Lucene.cs | 313 --- src/NuGet.Indexing/TagsAnalyzer.cs | 16 - src/NuGet.Indexing/TokenizingHelper.cs | 23 - src/NuGet.Indexing/VersionAnalyzer.cs | 15 - .../NuGet.Services.SearchService.csproj | 12 - tests/NgTests/NgTests.csproj | 5 - .../SearchIndexFromCatalogCollectorTests.cs | 147 -- tests/NgTests/StuckIndexWriter.cs | 42 - .../CamelCaseFilterTests.cs | 252 -- .../DescriptionAnalyzerTests.cs | 153 -- .../NuGet.IndexingTests/DotTokenizerTests.cs | 61 - .../ExpandAcronymsFilterTests.cs | 92 - .../CatalogPackageMetadataExtractorTests.cs | 793 ------ .../Extraction/DocumentCreatorTests.cs | 279 --- .../IdentifierAnalyzerTests.cs | 63 - .../IdentifierAutocompleteAnalyzerTests.cs | 119 - .../IdentifierKeywordAnalyzerTests.cs | 46 - .../NuGet.IndexingTests.csproj | 21 - .../NuGet.IndexingTests/OwnerAnalyzerTests.cs | 43 - .../PackageAnalyzerTests.cs | 137 -- .../ShingledIdentifierAnalyzerTests.cs | 80 - .../NuGet.IndexingTests/TagsAnalyzerTests.cs | 68 - .../TestSupport/Constants.cs | 80 - .../TestSupport/MockObjectFactory.cs | 78 - .../TestSupport/TokenAttributes.cs | 58 - .../TestSupport/TokenStreamExtensions.cs | 45 - .../VersionAnalyzerTests.cs | 46 - 69 files changed, 8871 deletions(-) delete mode 100644 src/Ng/Catalog2Lucene.nuspec delete mode 100644 src/Ng/Eula.htm delete mode 100644 src/Ng/Extensions/TaskExtensions.cs delete mode 100644 src/Ng/Jobs/Catalog2LuceneJob.cs delete mode 100644 src/Ng/Jobs/CheckLuceneJob.cs delete mode 100644 src/Ng/Jobs/ClearLuceneJob.cs delete mode 100644 src/Ng/Jobs/CopyLuceneJob.cs delete mode 100644 src/Ng/Jobs/Db2LuceneJob.cs delete mode 100644 src/Ng/Json/JTokenExtensions.cs delete mode 100644 src/Ng/LuceneCursor.cs delete mode 100644 src/Ng/Readme.html delete mode 100644 src/Ng/SearchIndexFromCatalogCollector.cs delete mode 100644 src/NuGet.Indexing/CamelCaseFilter.cs delete mode 100644 src/NuGet.Indexing/CustomSimilarity.cs delete mode 100644 src/NuGet.Indexing/DescriptionAnalyzer.cs delete mode 100644 src/NuGet.Indexing/DotTokenizer.cs delete mode 100644 src/NuGet.Indexing/ExpandAcronymsFilter.cs delete mode 100644 src/NuGet.Indexing/Extraction/CatalogNuspecReader.cs delete mode 100644 src/NuGet.Indexing/Extraction/CatalogPackageMetadataExtraction.cs delete mode 100644 src/NuGet.Indexing/Extraction/CatalogPackageReader.cs delete mode 100644 src/NuGet.Indexing/Extraction/DocumentCreator.cs delete mode 100644 src/NuGet.Indexing/Extraction/JTokenExtensions.cs delete mode 100644 src/NuGet.Indexing/Extraction/LuceneCommitMetadata.cs delete mode 100644 src/NuGet.Indexing/Extraction/MetadataConstants.cs delete mode 100644 src/NuGet.Indexing/IAcronymExpansionProvider.cs delete mode 100644 src/NuGet.Indexing/IdentifierAnalyzer.cs delete mode 100644 src/NuGet.Indexing/IdentifierAutocompleteAnalyzer.cs delete mode 100644 src/NuGet.Indexing/IdentifierKeywordAnalyzer.cs delete mode 100644 src/NuGet.Indexing/NuGetAcronymExpansionProvider.cs delete mode 100644 src/NuGet.Indexing/NuGetMergePolicyApplyer.cs delete mode 100644 src/NuGet.Indexing/OwnerAnalyzer.cs delete mode 100644 src/NuGet.Indexing/PackageAnalyzer.cs delete mode 100644 src/NuGet.Indexing/Resources/Acronyms.json delete mode 100644 src/NuGet.Indexing/SemanticVersionFilter.cs delete mode 100644 src/NuGet.Indexing/ShingledIdentifierAnalyzer.cs delete mode 100644 src/NuGet.Indexing/Sql2Lucene.cs delete mode 100644 src/NuGet.Indexing/TagsAnalyzer.cs delete mode 100644 src/NuGet.Indexing/TokenizingHelper.cs delete mode 100644 src/NuGet.Indexing/VersionAnalyzer.cs delete mode 100644 tests/NgTests/SearchIndexFromCatalogCollectorTests.cs delete mode 100644 tests/NgTests/StuckIndexWriter.cs delete mode 100644 tests/NuGet.IndexingTests/CamelCaseFilterTests.cs delete mode 100644 tests/NuGet.IndexingTests/DescriptionAnalyzerTests.cs delete mode 100644 tests/NuGet.IndexingTests/DotTokenizerTests.cs delete mode 100644 tests/NuGet.IndexingTests/ExpandAcronymsFilterTests.cs delete mode 100644 tests/NuGet.IndexingTests/Extraction/CatalogPackageMetadataExtractorTests.cs delete mode 100644 tests/NuGet.IndexingTests/Extraction/DocumentCreatorTests.cs delete mode 100644 tests/NuGet.IndexingTests/IdentifierAnalyzerTests.cs delete mode 100644 tests/NuGet.IndexingTests/IdentifierAutocompleteAnalyzerTests.cs delete mode 100644 tests/NuGet.IndexingTests/IdentifierKeywordAnalyzerTests.cs delete mode 100644 tests/NuGet.IndexingTests/OwnerAnalyzerTests.cs delete mode 100644 tests/NuGet.IndexingTests/PackageAnalyzerTests.cs delete mode 100644 tests/NuGet.IndexingTests/ShingledIdentifierAnalyzerTests.cs delete mode 100644 tests/NuGet.IndexingTests/TagsAnalyzerTests.cs delete mode 100644 tests/NuGet.IndexingTests/TestSupport/Constants.cs delete mode 100644 tests/NuGet.IndexingTests/TestSupport/MockObjectFactory.cs delete mode 100644 tests/NuGet.IndexingTests/TestSupport/TokenAttributes.cs delete mode 100644 tests/NuGet.IndexingTests/TestSupport/TokenStreamExtensions.cs delete mode 100644 tests/NuGet.IndexingTests/VersionAnalyzerTests.cs diff --git a/build.ps1 b/build.ps1 index 2b4efbd8c..7c6778a94 100644 --- a/build.ps1 +++ b/build.ps1 @@ -129,7 +129,6 @@ Invoke-BuildStep 'Creating artifacts' { $nuspecPackages = ` "src\Ng\Catalog2Dnx.nuspec", ` "src\Ng\Catalog2icon.nuspec", ` - "src\Ng\Catalog2Lucene.nuspec", ` "src\Ng\Catalog2Monitoring.nuspec", ` "src\Ng\Catalog2Registration.nuspec", ` "src\Ng\Db2Catalog.nuspec", ` diff --git a/sign.thirdparty.props b/sign.thirdparty.props index 58882052c..d80a59cbb 100644 --- a/sign.thirdparty.props +++ b/sign.thirdparty.props @@ -10,18 +10,6 @@ - - - - - - - - - - - - diff --git a/src/Ng/Arguments.cs b/src/Ng/Arguments.cs index 472ea03b3..637f62247 100644 --- a/src/Ng/Arguments.cs +++ b/src/Ng/Arguments.cs @@ -27,12 +27,6 @@ public static class Arguments public const int DefaultReinitializeIntervalSec = 60 * 60; // 1 hour public const string ReinitializeIntervalSec = "ReinitializeIntervalSec"; - public const string LuceneDirectoryType = "luceneDirectoryType"; - public const string LucenePath = "lucenePath"; - public const string LuceneStorageAccountName = "luceneStorageAccountName"; - public const string LuceneStorageContainer = "luceneStorageContainer"; - public const string LuceneStorageKeyValue = "luceneStorageKeyValue"; - public const string AzureStorageType = "azure"; public const string FileStorageType = "file"; @@ -65,13 +59,6 @@ public static class Arguments #endregion - #region Catalog2Lucene - public const string CatalogBaseAddress = "catalogBaseAddress"; - public const string Registration = "registration"; - public const string CommitTimeoutInSeconds = "commitTimeoutInSeconds"; - public const string FlatContainerBaseAddress = "flatContainerBaseAddress"; - #endregion - #region Catalog2Registration public const string CompressedStorageAccountName = "compressedStorageAccountName"; public const string CompressedStorageBaseAddress = "compressedStorageBaseAddress"; @@ -99,21 +86,6 @@ public static class Arguments public const string Verify = "verify"; #endregion - #region CopyLucene - public const string DestDirectoryType = "destDirectoryType"; - public const string DestPath = "destPath"; - public const string DestStorageAccountName = "destStorageAccountName"; - public const string DestStorageContainer = "destStorageContainer"; - public const string DestStorageKeyValue = "destStorageKeyValue"; - - public const string SrcDirectoryType = "srcDirectoryType"; - public const string SrcPath = "srcPath"; - public const string SrcStorageAccountName = "srcStorageAccountName"; - public const string SrcStorageContainer = "srcStorageContainer"; - public const string SrcStorageKeyValue = "srcStorageKeyValue"; - - #endregion - #region Db2Catalog public const string StartDate = "startDate"; public const string PackageContentUrlFormat = "packageContentUrlFormat"; diff --git a/src/Ng/Catalog2Lucene.nuspec b/src/Ng/Catalog2Lucene.nuspec deleted file mode 100644 index 2b567eb4a..000000000 --- a/src/Ng/Catalog2Lucene.nuspec +++ /dev/null @@ -1,16 +0,0 @@ - - - - Catalog2Lucene - $version$ - .NET Foundation - .NET Foundation - The Catalog2Lucene job. - Copyright .NET Foundation - - - - - - - \ No newline at end of file diff --git a/src/Ng/CommandHelpers.cs b/src/Ng/CommandHelpers.cs index 2acb92ebd..a5335560e 100644 --- a/src/Ng/CommandHelpers.cs +++ b/src/Ng/CommandHelpers.cs @@ -4,13 +4,10 @@ using System; using System.Collections.Generic; using System.Diagnostics; -using System.IO; using System.Linq; using System.Net; using System.Net.Http; using System.Security.Cryptography.X509Certificates; -using Lucene.Net.Store; -using Lucene.Net.Store.Azure; using Microsoft.Extensions.Logging; using Microsoft.WindowsAzure.Storage; using Microsoft.WindowsAzure.Storage.Auth; @@ -310,117 +307,6 @@ private static TimeSpan MaxExecutionTime(int seconds) return TimeSpan.FromSeconds(seconds); } - public static Lucene.Net.Store.Directory GetLuceneDirectory( - IDictionary arguments, - bool required = true) - { - return GetLuceneDirectory(arguments, out var destination, required); - } - - public static Lucene.Net.Store.Directory GetLuceneDirectory( - IDictionary arguments, - out string destination, - bool required = true) - { - IDictionary names = new Dictionary - { - { Arguments.DirectoryType, Arguments.LuceneDirectoryType }, - { Arguments.Path, Arguments.LucenePath }, - { Arguments.StorageAccountName, Arguments.LuceneStorageAccountName }, - { Arguments.StorageKeyValue, Arguments.LuceneStorageKeyValue }, - { Arguments.StorageContainer, Arguments.LuceneStorageContainer } - }; - - return GetLuceneDirectoryImpl(arguments, names, out destination, required); - } - - public static Lucene.Net.Store.Directory GetCopySrcLuceneDirectory(IDictionary arguments, bool required = true) - { - IDictionary names = new Dictionary - { - { Arguments.DirectoryType, Arguments.SrcDirectoryType }, - { Arguments.Path, Arguments.SrcPath }, - { Arguments.StorageAccountName, Arguments.SrcStorageAccountName }, - { Arguments.StorageKeyValue, Arguments.SrcStorageKeyValue }, - { Arguments.StorageContainer, Arguments.SrcStorageContainer } - }; - - return GetLuceneDirectoryImpl(arguments, names, out var destination, required); - } - - public static Lucene.Net.Store.Directory GetCopyDestLuceneDirectory(IDictionary arguments, bool required = true) - { - IDictionary names = new Dictionary - { - { Arguments.DirectoryType, Arguments.DestDirectoryType }, - { Arguments.Path, Arguments.DestPath }, - { Arguments.StorageAccountName, Arguments.DestStorageAccountName }, - { Arguments.StorageKeyValue, Arguments.DestStorageKeyValue }, - { Arguments.StorageContainer, Arguments.DestStorageContainer } - }; - - return GetLuceneDirectoryImpl(arguments, names, out var destination, required); - } - - public static Lucene.Net.Store.Directory GetLuceneDirectoryImpl( - IDictionary arguments, - IDictionary argumentNameMap, - out string destination, - bool required = true) - { - destination = null; - - try - { - var luceneDirectoryType = arguments.GetOrThrow(argumentNameMap[Arguments.DirectoryType]); - - if (luceneDirectoryType.Equals(Arguments.FileStorageType, StringComparison.InvariantCultureIgnoreCase)) - { - var lucenePath = arguments.GetOrThrow(argumentNameMap[Arguments.Path]); - - var directoryInfo = new DirectoryInfo(lucenePath); - - destination = lucenePath; - - if (directoryInfo.Exists) - { - return new SimpleFSDirectory(directoryInfo); - } - - directoryInfo.Create(); - directoryInfo.Refresh(); - - return new SimpleFSDirectory(directoryInfo); - } - if (luceneDirectoryType.Equals(Arguments.AzureStorageType, StringComparison.InvariantCultureIgnoreCase)) - { - var luceneStorageAccountName = arguments.GetOrThrow(argumentNameMap[Arguments.StorageAccountName]); - - var luceneStorageKeyValue = arguments.GetOrThrow(argumentNameMap[Arguments.StorageKeyValue]); - - var luceneStorageContainer = arguments.GetOrThrow(argumentNameMap[Arguments.StorageContainer]); - - var credentials = new StorageCredentials(luceneStorageAccountName, luceneStorageKeyValue); - var account = new CloudStorageAccount(credentials, useHttps: true); - - destination = luceneStorageContainer; - - return new AzureDirectory(account, luceneStorageContainer); - } - Trace.TraceError("Unrecognized Lucene Directory Type \"{0}\"", luceneDirectoryType); - return null; - } - catch (ArgumentException) - { - if (required) - { - throw; - } - - return null; - } - } - public static Func GetHttpMessageHandlerFactory( ITelemetryService telemetryService, bool verbose, diff --git a/src/Ng/Eula.htm b/src/Ng/Eula.htm deleted file mode 100644 index 4860f99d9..000000000 --- a/src/Ng/Eula.htm +++ /dev/null @@ -1,649 +0,0 @@ - - - - - - - - - - - - - - - - - -
- -
- -
- -

Azure Library for Lucene.Net

- -
- -
- -
- -
-
- -
- -
- -

License: -Microsoft Public License (Ms-PL)

- - - - - -
-
-
-

Microsoft Public - License (Ms-PL)
-
- This license governs use of the accompanying software. If you use the - software, you accept this license. If you do not accept the license, do not - use the software.
-
- 1. Definitions
-
- The terms "reproduce," "reproduction," "derivative - works," and "distribution" have the same meaning here as under - U.S. copyright law.
-
- A "contribution" is the original software, - or any additions or changes to the software.
-
- A "contributor" is any person that distributes its contribution - under this license.
-
- "Licensed patents" are a contributor's patent claims that read - directly on its contribution.
-
- 2. Grant of Rights
-
- (A) Copyright Grant- Subject to the terms of this license, including the - license conditions and limitations in section 3, each contributor grants you - a non-exclusive, worldwide, royalty-free copyright license to reproduce its - contribution, prepare derivative works of its contribution, and distribute - its contribution or any derivative works that you create.
-
- (B) Patent Grant- Subject to the terms of this license, including the license - conditions and limitations in section 3, each contributor grants you a - non-exclusive, worldwide, royalty-free license under its licensed patents to - make, have made, use, sell, offer for sale, import, and/or otherwise dispose - of its contribution in the software or derivative works of the contribution in - the software.
-
- 3. Conditions and Limitations
-
- (A) No Trademark License- This license does not grant you rights to use any - contributors' name, logo, or trademarks.
-
- (B) If you bring a patent claim against any contributor over patents that you - claim are infringed by the software, your patent license from such - contributor to the software ends automatically.
-
- (C) If you distribute any portion of the software, you must retain all - copyright, patent, trademark, and attribution notices that are present in the - software.
-
- (D) If you distribute any portion of the software in source code form, you - may do so only under this license by including a complete copy of this - license with your distribution. If you distribute any portion of the software - in compiled or object code form, you may only do so under a license that - complies with this license.
-
- (E) The software is licensed "as-is." You bear the risk of using - it. The contributors give no express warranties, guarantees or conditions. - You may have additional consumer rights under your local laws which this - license cannot change. To the extent permitted under your local laws, the - contributors exclude the implied warranties of merchantability, fitness for a - particular purpose and non-infringement

-
-
-
- - - -

 

- -
- -
- - - - diff --git a/src/Ng/Extensions/TaskExtensions.cs b/src/Ng/Extensions/TaskExtensions.cs deleted file mode 100644 index 7091875db..000000000 --- a/src/Ng/Extensions/TaskExtensions.cs +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Threading; -using System.Threading.Tasks; - -namespace Ng.Extensions -{ - public static class TaskExtensions - { - // From: https://github.com/davidfowl/AspNetCoreDiagnosticScenarios/blob/master/AsyncGuidance.md#cancelling-uncancellable-operations - public static async Task TimeoutAfter(this Task task, TimeSpan timeout) - { - using (var cts = new CancellationTokenSource()) - { - var delayTask = Task.Delay(timeout, cts.Token); - - var resultTask = await Task.WhenAny(task, delayTask); - if (resultTask == delayTask) - { - // Operation cancelled - throw new OperationCanceledException(); - } - else - { - // Cancel the timer task so that it does not fire - cts.Cancel(); - } - - await task; - } - } - } -} diff --git a/src/Ng/Jobs/Catalog2LuceneJob.cs b/src/Ng/Jobs/Catalog2LuceneJob.cs deleted file mode 100644 index 827e2d8ea..000000000 --- a/src/Ng/Jobs/Catalog2LuceneJob.cs +++ /dev/null @@ -1,194 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Net.Http; -using System.Threading; -using System.Threading.Tasks; -using Lucene.Net.Index; -using Microsoft.Extensions.Logging; -using NuGet.Indexing; -using NuGet.Services.Configuration; -using NuGet.Services.Logging; -using NuGet.Services.Metadata.Catalog; - -namespace Ng.Jobs -{ - public class Catalog2LuceneJob : LoopingNgJob - { - private bool _verbose; - private string _source; - private string _registration; - private Lucene.Net.Store.Directory _directory; - private string _catalogBaseAddress; - private string _storageBaseAddress; - private string _galleryBaseAddress; - private Uri _flatContainerBaseAddress; - private string _flatContainerContainerName; - private TimeSpan? _commitTimeout; - private Func _handlerFunc; - private string _destination; - - public Catalog2LuceneJob( - ILoggerFactory loggerFactory, - ITelemetryClient telemetryClient, - IDictionary telemetryGlobalDimensions) - : base(loggerFactory, telemetryClient, telemetryGlobalDimensions) - { - } - - public override string GetUsage() - { - return "Usage: ng catalog2lucene " - + $"-{Arguments.Source} " - + $"[-{Arguments.Registration} . Multiple registration cursors are supported, separated by ';'.] " - + $"-{Arguments.LuceneDirectoryType} file|azure " - + $"[-{Arguments.LucenePath} ] " - + "|" - + $"[-{Arguments.LuceneStorageAccountName} " - + $"-{Arguments.LuceneStorageKeyValue} " - + $"-{Arguments.LuceneStorageContainer} " - + $"-{Arguments.CommitTimeoutInSeconds} " - + $"[-{Arguments.VaultName} " - + $"-{Arguments.ClientId} " - + $"-{Arguments.CertificateThumbprint} " - + $"[-{Arguments.ValidateCertificate} true|false]]] " - + $"[-{Arguments.Verbose} true|false] " - + $"[-{Arguments.Interval} ] " - + $"[-{Arguments.GalleryBaseAddress} ] " - + $"-{Arguments.FlatContainerBaseAddress} " - + $"-{Arguments.FlatContainerName} "; - } - - protected override void Init(IDictionary arguments, CancellationToken cancellationToken) - { - _directory = CommandHelpers.GetLuceneDirectory(arguments, out var destination); - _source = arguments.GetOrThrow(Arguments.Source); - _verbose = arguments.GetOrDefault(Arguments.Verbose, false); - - _registration = arguments.GetOrDefault(Arguments.Registration); - if (_registration == null) - { - Logger.LogInformation("Lucene index will be created up to the end of the catalog (alternatively if you provide a registration it will not pass that)"); - } - - _catalogBaseAddress = arguments.GetOrDefault(Arguments.CatalogBaseAddress); - if (_catalogBaseAddress == null) - { - Logger.LogInformation("No catalogBaseAddress was specified so the Lucene index will NOT contain the storage paths"); - } - - _storageBaseAddress = arguments.GetOrDefault(Arguments.StorageBaseAddress); - _galleryBaseAddress = arguments.GetOrDefault(Arguments.GalleryBaseAddress); - _flatContainerBaseAddress = arguments.GetOrThrow(Arguments.FlatContainerBaseAddress); - _flatContainerContainerName = arguments.GetOrThrow(Arguments.FlatContainerName); - - if (!_flatContainerBaseAddress.IsAbsoluteUri) - { - throw new InvalidOperationException($"{Arguments.FlatContainerBaseAddress} value is not an absolute URL: '{_flatContainerBaseAddress}'"); - } - - if (_flatContainerBaseAddress.Scheme != Uri.UriSchemeHttps) - { - throw new InvalidOperationException($"Only https scheme is supported for {Arguments.FlatContainerBaseAddress}"); - } - - var commitTimeoutInSeconds = arguments.GetOrDefault(Arguments.CommitTimeoutInSeconds); - if (commitTimeoutInSeconds.HasValue) - { - _commitTimeout = TimeSpan.FromSeconds(commitTimeoutInSeconds.Value); - } - else - { - _commitTimeout = null; - } - - Logger.LogInformation("CONFIG source: {ConfigSource} registration: {Registration}" + - " catalogBaseAddress: {CatalogBaseAddress} storageBaseAddress: {StorageBaseAddress} commitTimeout: {CommmitTimeout}" + - " flatContainerBaseAddress: {FlatContainerBaseAddress}", - _source, - _registration ?? "(null)", - _catalogBaseAddress ?? "(null)", - _storageBaseAddress ?? "(null)", - _galleryBaseAddress ?? "(null)", - _commitTimeout?.ToString() ?? "(null)", - _flatContainerBaseAddress); - - _handlerFunc = CommandHelpers.GetHttpMessageHandlerFactory( - TelemetryService, - _verbose, - _catalogBaseAddress, - _storageBaseAddress); - - _destination = destination; - TelemetryService.GlobalDimensions[TelemetryConstants.Destination] = _destination; - } - - protected override async Task RunInternalAsync(CancellationToken cancellationToken) - { - using (Logger.BeginScope($"Logging for {{{TelemetryConstants.Destination}}}", _destination)) - using (TelemetryService.TrackDuration(TelemetryConstants.JobLoopSeconds)) - using (var indexWriter = CreateIndexWriter(_directory)) - { - var collector = new SearchIndexFromCatalogCollector( - index: new Uri(_source), - indexWriter: indexWriter, - commitEachBatch: false, - commitTimeout: _commitTimeout, - baseAddress: _catalogBaseAddress, - galleryBaseAddress: _galleryBaseAddress == null ? null : new Uri(_galleryBaseAddress), - flatContainerBaseAddress: _flatContainerBaseAddress, - flatContainerContainerName: _flatContainerContainerName, - telemetryService: TelemetryService, - logger: Logger, - handlerFunc: _handlerFunc); - - ReadWriteCursor front = new LuceneCursor(indexWriter, MemoryCursor.MinValue); - var back = _registration == null - ? (ReadCursor)MemoryCursor.CreateMax() - : GetTheLeastAdvancedRegistrationCursor(_registration, cancellationToken); - - bool run; - do - { - run = await collector.RunAsync(front, back, cancellationToken); - - await collector.EnsureCommittedAsync(); // commit after each catalog page - } - while (run); - } - } - - private ReadCursor GetTheLeastAdvancedRegistrationCursor(string registrationArg, CancellationToken cancellationToken) - { - string[] registrations = registrationArg.Split(';'); - - return new AggregateCursor(registrations.Select(r => new HttpReadCursor(new Uri(r), _handlerFunc))); - } - - public static IndexWriter CreateIndexWriter(Lucene.Net.Store.Directory directory) - { - var create = !IndexReader.IndexExists(directory); - - directory.EnsureOpen(); - - if (!create) - { - if (IndexWriter.IsLocked(directory)) - { - IndexWriter.Unlock(directory); - } - } - - var indexWriter = new IndexWriter(directory, new PackageAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED); - - NuGetMergePolicyApplyer.ApplyTo(indexWriter); - - indexWriter.SetSimilarity(new CustomSimilarity()); - - return indexWriter; - } - } -} \ No newline at end of file diff --git a/src/Ng/Jobs/CheckLuceneJob.cs b/src/Ng/Jobs/CheckLuceneJob.cs deleted file mode 100644 index e3a3c2d37..000000000 --- a/src/Ng/Jobs/CheckLuceneJob.cs +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using System.Threading; -using System.Threading.Tasks; -using Lucene.Net.Index; -using Microsoft.Extensions.Logging; -using NuGet.Services.Logging; - -namespace Ng.Jobs -{ - public class CheckLuceneJob : NgJob - { - private Lucene.Net.Store.Directory _directory; - - public CheckLuceneJob( - ILoggerFactory loggerFactory, - ITelemetryClient telemetryClient, - IDictionary telemetryGlobalDimensions) - : base(loggerFactory, telemetryClient, telemetryGlobalDimensions) - { - } - - public override string GetUsage() - { - return "Usage: ng checklucene " - + $"-{Arguments.LuceneDirectoryType} file|azure " - + $"[-{Arguments.LucenePath} ]" - + $"|" - + $"[-{Arguments.LuceneStorageAccountName} " - + $"-{Arguments.LuceneStorageKeyValue} " - + $"-{Arguments.LuceneStorageContainer} ]"; - } - - protected override void Init(IDictionary arguments, CancellationToken cancellationToken) - { - _directory = CommandHelpers.GetLuceneDirectory(arguments); - } - - protected override Task RunInternalAsync(CancellationToken cancellationToken) - { - using (var reader = IndexReader.Open(_directory, true)) - { - Logger.LogInformation("Lucene index contains: {numDocs} documents", reader.NumDocs()); - - var commitUserData = reader.CommitUserData; - - if (commitUserData == null) - { - Logger.LogWarning("commitUserData is null"); - } - else - { - Logger.LogInformation("commitUserData:"); - foreach (var entry in commitUserData) - { - Logger.LogInformation(" {EntryKey} = {EntryValue}", entry.Key, entry.Value); - } - } - } - - return Task.FromResult(false); - } - } -} diff --git a/src/Ng/Jobs/ClearLuceneJob.cs b/src/Ng/Jobs/ClearLuceneJob.cs deleted file mode 100644 index 28e898607..000000000 --- a/src/Ng/Jobs/ClearLuceneJob.cs +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using System.Threading; -using System.Threading.Tasks; -using Lucene.Net.Analysis.Standard; -using Lucene.Net.Index; -using Microsoft.Extensions.Logging; -using NuGet.Services.Logging; - -namespace Ng.Jobs -{ - public class ClearLuceneJob : NgJob - { - private Lucene.Net.Store.Directory _directory; - - public ClearLuceneJob( - ILoggerFactory loggerFactory, - ITelemetryClient telemetryClient, - IDictionary telemetryGlobalDimensions) - : base(loggerFactory, telemetryClient, telemetryGlobalDimensions) - { - } - - public override string GetUsage() - { - return "Usage: ng clearlucene " - + $"-{Arguments.LuceneDirectoryType} file|azure " - + $"[-{Arguments.LucenePath} ]" - + "|" - + $"[-{Arguments.LuceneStorageAccountName} " - + $"-{Arguments.LuceneStorageKeyValue} " - + $"-{Arguments.LuceneStorageContainer} ]"; - } - - protected override void Init(IDictionary arguments, CancellationToken cancellationToken) - { - _directory = CommandHelpers.GetLuceneDirectory(arguments); - } - - protected override Task RunInternalAsync(CancellationToken cancellationToken) - { - if (IndexReader.IndexExists(_directory)) - { - using (var writer = new IndexWriter(_directory, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED)) - { - writer.DeleteAll(); - writer.Commit(new Dictionary()); - } - } - - Logger.LogInformation("All Done"); - - return Task.FromResult(false); - } - } -} diff --git a/src/Ng/Jobs/CopyLuceneJob.cs b/src/Ng/Jobs/CopyLuceneJob.cs deleted file mode 100644 index 2dbc52d38..000000000 --- a/src/Ng/Jobs/CopyLuceneJob.cs +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Threading; -using System.Threading.Tasks; -using Lucene.Net.Store.Azure; -using Microsoft.Extensions.Logging; -using NuGet.Services.Logging; - -namespace Ng.Jobs -{ - public class CopyLuceneJob : NgJob - { - private Lucene.Net.Store.Directory _srcDirectory; - private Lucene.Net.Store.Directory _destDirectory; - - public CopyLuceneJob( - ILoggerFactory loggerFactory, - ITelemetryClient telemetryClient, - IDictionary telemetryGlobalDimensions) - : base(loggerFactory, telemetryClient, telemetryGlobalDimensions) - { - } - - public override string GetUsage() - { - return "Usage: ng copylucene " - + $"-{Arguments.SrcDirectoryType} file|azure " - + $"[-{Arguments.SrcPath} ]" - + "|" - + $"[-{Arguments.SrcStorageAccountName} " - + $"-{Arguments.SrcStorageKeyValue} " - + $"-{Arguments.SrcStorageContainer} ] " - + $"-{Arguments.DestDirectoryType} file|azure " - + $"[-{Arguments.DestPath} ]" - + "|" - + $"[-{Arguments.DestStorageAccountName} " - + $"-{Arguments.DestStorageKeyValue} " - + $"-{Arguments.DestStorageContainer} ] " - + $"[-{Arguments.VaultName} " - + $"-{Arguments.ClientId} " - + $"-{Arguments.CertificateThumbprint} " - + $"[-{Arguments.ValidateCertificate} true|false]]"; - } - - protected override void Init(IDictionary arguments, CancellationToken cancellationToken) - { - _srcDirectory = CommandHelpers.GetCopySrcLuceneDirectory(arguments); - _destDirectory = CommandHelpers.GetCopyDestLuceneDirectory(arguments); - } - - protected override Task RunInternalAsync(CancellationToken cancellationToken) - { - Lucene.Net.Store.Directory.Copy(_srcDirectory, _destDirectory, true); - - if (_destDirectory is AzureDirectory) - { - // When the destination directory is an AzureDirectory, - // create an empty write.lock to prevent writers from crashing. - if (!_destDirectory.ListAll().Any(f => - string.Equals(f, "write.lock", StringComparison.OrdinalIgnoreCase))) - { - var writeLock = _destDirectory.CreateOutput("write.lock"); - writeLock.Dispose(); - } - } - - Logger.LogInformation("All Done"); - - return Task.FromResult(false); - } - } -} diff --git a/src/Ng/Jobs/Db2LuceneJob.cs b/src/Ng/Jobs/Db2LuceneJob.cs deleted file mode 100644 index c137c9452..000000000 --- a/src/Ng/Jobs/Db2LuceneJob.cs +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; -using System.Threading; -using System.Threading.Tasks; -using Microsoft.Extensions.Logging; -using NuGet.Indexing; -using NuGet.Services.Configuration; -using NuGet.Services.Logging; - -namespace Ng.Jobs -{ - public class Db2LuceneJob : NgJob - { - private string _connectionString; - private string _path; - private string _source; - private Uri _catalogIndexUrl; - - public Db2LuceneJob( - ILoggerFactory loggerFactory, - ITelemetryClient telemetryClient, - IDictionary telemetryGlobalDimensions) - : base(loggerFactory, telemetryClient, telemetryGlobalDimensions) - { - } - - public override string GetUsage() - { - return "Usage: ng db2lucene " - + $"-{Arguments.ConnectionString} " - + $"-{Arguments.Source} " - + $"-{Arguments.Path} " - + $"[-{Arguments.Verbose} true|false]"; - } - - protected override void Init(IDictionary arguments, CancellationToken cancellationToken) - { - _connectionString = arguments.GetOrThrow(Arguments.ConnectionString); - _source = arguments.GetOrThrow(Arguments.Source); - _path = arguments.GetOrThrow(Arguments.Path); - - _catalogIndexUrl = new Uri(_source); - } - - protected override Task RunInternalAsync(CancellationToken cancellationToken) - { - Sql2Lucene.Export(_connectionString, _catalogIndexUrl, _path, LoggerFactory); - - return Task.FromResult(false); - } - } -} diff --git a/src/Ng/Json/JTokenExtensions.cs b/src/Ng/Json/JTokenExtensions.cs deleted file mode 100644 index c99ba03b4..000000000 --- a/src/Ng/Json/JTokenExtensions.cs +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using Newtonsoft.Json; -using Newtonsoft.Json.Linq; - -namespace Ng.Json -{ - public static class JTokenExtensions - { - public static JToken SkipClone(this JToken original, string[] jsonPaths) - { - return JToken.Load(original.CreateReader().SkipPaths(jsonPaths)); - } - - public static JsonReader SkipPaths(this JsonReader original, string[] jsonPaths) - { - return new PropertySkippingJsonReader(original, jsonPaths); - } - } -} \ No newline at end of file diff --git a/src/Ng/LuceneCursor.cs b/src/Ng/LuceneCursor.cs deleted file mode 100644 index 05a95f5a1..000000000 --- a/src/Ng/LuceneCursor.cs +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Globalization; -using System.Threading; -using System.Threading.Tasks; -using Lucene.Net.Index; -using NuGet.Services.Metadata.Catalog; - -namespace Ng -{ - public class LuceneCursor : ReadWriteCursor - { - private readonly IndexWriter _indexWriter; - private readonly DateTime _defaultValue; - - public LuceneCursor(IndexWriter indexWriter, DateTime defaultValue) - { - _indexWriter = indexWriter; - _defaultValue = defaultValue; - } - - public override Task SaveAsync(CancellationToken cancellationToken) - { - // no-op because we will do the Save in the Lucene.Commit - - return Task.FromResult(true); - } - - public override Task LoadAsync(CancellationToken cancellationToken) - { - IDictionary commitUserData; - using (var reader = _indexWriter.GetReader()) - { - commitUserData = reader.CommitUserData; - } - - string value; - if (commitUserData != null && commitUserData.TryGetValue("commitTimeStamp", out value)) - { - Value = DateTime.ParseExact(value, "o", CultureInfo.InvariantCulture, DateTimeStyles.RoundtripKind); - } - else - { - Value = _defaultValue; - } - - Trace.TraceInformation("LuceneCursor.Load: {0}", this); - return Task.FromResult(true); - } - } -} \ No newline at end of file diff --git a/src/Ng/Ng.csproj b/src/Ng/Ng.csproj index 4da36abc2..eb5dae27e 100644 --- a/src/Ng/Ng.csproj +++ b/src/Ng/Ng.csproj @@ -61,19 +61,13 @@ - - - - - - @@ -82,16 +76,13 @@ - - - @@ -101,7 +92,6 @@ Designer - diff --git a/src/Ng/NgJobFactory.cs b/src/Ng/NgJobFactory.cs index 83578750d..e45aac81d 100644 --- a/src/Ng/NgJobFactory.cs +++ b/src/Ng/NgJobFactory.cs @@ -16,12 +16,7 @@ public static class NgJobFactory { "db2catalog", typeof(Db2CatalogJob) }, { "db2monitoring", typeof(Db2MonitoringJob) }, { "catalog2registration", typeof(Catalog2RegistrationJob) }, - { "catalog2lucene", typeof(Catalog2LuceneJob) }, { "catalog2dnx", typeof(Catalog2DnxJob) }, - { "copylucene", typeof(CopyLuceneJob) }, - { "checklucene", typeof(CheckLuceneJob) }, - { "clearlucene", typeof(ClearLuceneJob) }, - { "db2lucene", typeof(Db2LuceneJob) }, { "lightning", typeof(LightningJob) }, { "catalog2monitoring", typeof(Catalog2MonitoringJob) }, { "monitoring2monitoring", typeof(Monitoring2MonitoringJob) }, diff --git a/src/Ng/Readme.html b/src/Ng/Readme.html deleted file mode 100644 index e48d28a41..000000000 --- a/src/Ng/Readme.html +++ /dev/null @@ -1,2190 +0,0 @@ - - - - - - - - - - - - - - - - - - -
- -

AzureDirectory Library for Lucene.Net

- -

Project -description

- -

Lucene.Net is a robust open source search technology which -has an abstract interface called a Directory for defining how the index is -stored. AzureDirectory is an implementation of that -interface for Windows Azure Blob Storage.

- -

About -

- -

This project allows you to create Lucene Indexes and use them in Azure.
-
-This project implements a low level Lucene Directory -object called AzureDirectory around Windows Azure BlobStorage.

- -

Background -

- -

Lucene.NET -

- -

Lucene is a mature Java based open source full text indexing and -search engine and property store.
-Lucene.NET is a mature port of that library to C#.
-Lucene/Lucene.Net provides:

- -

*       -Super simple API for storing -documents with arbitrary properties

- -

*       -Complete control over what is -indexed and what is stored for retrieval

- -

*       -Robust control over where and how -things are indexed, how much memory is used, etc.

- -

*       -Superfast and super rich query -capabilities

- -

o    -Sorted results

- -

o    -Rich constraint semantics AND/OR/NOT -etc.

- -

o    -Rich text semantics (phrase match, -wildcard match, near, fuzzy match etc)

- -

o    -Text query syntax (example: -Title:(dog AND cat) OR Body:Lucen* )

- -

o    -Programmatic expressions

- -

o    -Ranked results with custom ranking -algorithms

- -

 

- -

AzureDirectory

- -

AzureDirectory smartly uses a local Directory to cache files as they are -created and automatically pushes them to Azure blob storage as appropriate. -Likewise, it smartly caches blob files on the client when they change. This -provides with a nice blend of just in time syncing of data local to indexers or -searchers across multiple machines.
-
-With the flexibility that Lucene provides over data -in memory versus storage and the just in time blob transfer that AzureDirectory provides you have great control over the composibility of where data is indexed and how it is -consumed.
-
-To be more concrete: you can have 1..N worker roles adding documents to an -index, and 1..N searcher webroles searching over the -catalog in near real time.

- -

Usage -

- -


-To use you need to create a blob storage account on http://azure.com .
-
-Create an App.Config or Web.Config -and configure your accountinfo:

- -
- -

         <?xml version="1.0" encoding="utf-8" ?>

- -

         <configuration>

- -

           -<appSettings>

- -

                 <!-- azure SETTINGS -->

- -

                 <add key="BlobStorageEndpoint" value="http://YOURACCOUNT.blob.core.windows.net"/>

- -

                 <add key="AccountName" value="YOURACCOUNTNAME"/>

- -

                 <add key="AccountSharedKey" value="YOURACCOUNTKEY"/>

- -

           -</appSettings>

- -

         </configuration>

- -

 

- -
- -


-To add documents to a catalog is as simple as
-
-

- -
- -

            AzureDirectory -azureDirectory = new AzureDirectory("TestCatalog");

- -

            IndexWriter -indexWriter = new IndexWriter(azureDirectory, new StandardAnalyzer(), -true);

- -

            Document doc = new Document();

- -

            doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString(), -Field.Store.YES, Field.Index.TOKENIZED, -Field.TermVector.NO));

- -

            doc.Add(new Field("Title", “this is my title”, Field.Store.YES, Field.Index.TOKENIZED, -Field.TermVector.NO));

- -

            doc.Add(new Field("Body", “This is my body”, Field.Store.YES, Field.Index.TOKENIZED, -Field.TermVector.NO));

- -

            indexWriter.AddDocument(doc);

- -

            indexWriter.Close();

- -

}

- -

 

- -
- -


-And searching is as easy as:
-
-

- -
- -

            IndexSearcher -searcher = new IndexSearcher(azureDirectory);               

- -

            Lucene.Net.QueryParsers.QueryParser -parser = QueryParser("Title", new StandardAnalyzer());

- -

            Lucene.Net.Search.Query -query = parser.Parse("Title:(Dog AND -Cat)");

- -

 

- -

            Hits hits -= searcher.Search(query);

- -

            for (int i = 0; i < hits.Length(); -i++)

- -

            {

- -

                Document doc = hits.Doc(i);

- -

                Console.WriteLine(doc.GetField("Title").StringValue());

- -

            }

- -

 

- -
- -

 

- -

Caching -and Compression

- -


-AzureDirectory compresses blobs before sent to the -blob storage. Blobs are automatically cached local to reduce roundtrips for -blobs which haven't changed.
-
-By default AzureDirectory stores this local cache in -a temporary folder. You can easily control where the local cache is stored by -passing in a Directory object for whatever type and location of storage you -want.
-
-This example stores the cache in a ram directory:

- -
- -

      AzureDirectory azureDirectory = new AzureDirectory("MyIndex", new RAMDirectory());

- -

 

- -
- -


-And this example stores in the file system in C:\myindex

- -
- -

      AzureDirectory azureDirectory = new AzureDirectory("MyIndex", new FSDirectory(@"c:\myindex"));

- -

 

- -
- -


-
-

- -

Notes -on settings

- -


-Just like a normal Lucene index, calling optimize too -often causes a lot of churn and not calling it enough causes too many segment -files to be created, so call it "just enough" times. That will -totally depend on your application and the nature of your pattern of adding and -updating items to determine (which is why Lucene -provides so many knobs to configure its behavior).
-
-The default compound file support that Lucene uses reduces -the number of files that are generated...this means it deletes and merges files -regularly which causes churn on the blob storage. Calling indexWriter.SetCompoundFiles(false) -will give better performance.
-
-The version of Lucene.NET checked in as a binary is Version 2.3.1, but you can -use any version of Lucene.NET you want by simply enlisting from the above open -source site.

- -

FAQ

- -

Related -

- -

There is a LINQ to Lucene provider http://linqtoLucene.codeplex.com/Wiki/View.aspx?title=Project%20Documentation  -on codeplex which allows you to define your schema as -a strongly typed object and execute LINQ expressions against the index.

- -

 

- -
- - - - diff --git a/src/Ng/SearchIndexFromCatalogCollector.cs b/src/Ng/SearchIndexFromCatalogCollector.cs deleted file mode 100644 index 52c73be73..000000000 --- a/src/Ng/SearchIndexFromCatalogCollector.cs +++ /dev/null @@ -1,386 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Globalization; -using System.Linq; -using System.Net.Http; -using System.Threading; -using System.Threading.Tasks; -using Lucene.Net.Documents; -using Lucene.Net.Index; -using Lucene.Net.Search; -using Microsoft.Extensions.Logging; -using Newtonsoft.Json.Linq; -using Ng.Extensions; -using NuGet.Indexing; -using NuGet.Services.Metadata.Catalog; -using NuGet.Versioning; - -namespace Ng -{ - public class SearchIndexFromCatalogCollector : CommitCollector - { - private readonly string _baseAddress; - private readonly Uri _galleryBaseAddress; - private readonly Uri _flatContainerBaseAddress; - private readonly string _flatContainerContainerName; - private readonly IndexWriter _indexWriter; - private readonly bool _commitEachBatch; - private readonly TimeSpan? _commitTimeout; - private readonly ILogger _logger; - - private LuceneCommitMetadata _metadataForNextCommit; - - public SearchIndexFromCatalogCollector( - Uri index, - IndexWriter indexWriter, - bool commitEachBatch, - TimeSpan? commitTimeout, - string baseAddress, - Uri galleryBaseAddress, - Uri flatContainerBaseAddress, - string flatContainerContainerName, - ITelemetryService telemetryService, - ILogger logger, - Func handlerFunc = null, - IHttpRetryStrategy httpRetryStrategy = null) - : base(index, telemetryService, handlerFunc, httpRetryStrategy: httpRetryStrategy) - { - _indexWriter = indexWriter; - _commitEachBatch = commitEachBatch; - _commitTimeout = commitTimeout; - _baseAddress = baseAddress; - _galleryBaseAddress = galleryBaseAddress; - _flatContainerBaseAddress = flatContainerBaseAddress ?? throw new ArgumentNullException(nameof(flatContainerBaseAddress)); - _flatContainerContainerName = flatContainerContainerName ?? throw new ArgumentNullException(nameof(flatContainerContainerName)); - _logger = logger; - } - - protected override async Task OnProcessBatchAsync( - CollectorHttpClient client, - IEnumerable items, - JToken context, - DateTime commitTimeStamp, - bool isLastBatch, - CancellationToken cancellationToken) - { - JObject catalogIndex = null; - if (_baseAddress != null) - { - var stopwatch = Stopwatch.StartNew(); - catalogIndex = await client.GetJObjectAsync(Index, cancellationToken); - _telemetryService.TrackCatalogIndexReadDuration(stopwatch.Elapsed, Index); - } - - IEnumerable catalogItems = await FetchCatalogItemsAsync(client, items, cancellationToken); - - var numDocs = _indexWriter.NumDocs(); - _logger.LogInformation(string.Format("Index contains {0} documents.", _indexWriter.NumDocs())); - - ProcessCatalogIndex(_indexWriter, catalogIndex, _baseAddress); - ProcessCatalogItems(_indexWriter, catalogItems, _baseAddress); - - var docsDifference = _indexWriter.NumDocs() - numDocs; - - UpdateCommitMetadata(commitTimeStamp, docsDifference); - - _logger.LogInformation(string.Format("Processed catalog items. Index now contains {0} documents. (total uncommitted {1}, batch {2})", - _indexWriter.NumDocs(), _metadataForNextCommit.Count, docsDifference)); - - if (_commitEachBatch || isLastBatch) - { - await EnsureCommittedAsync(); - } - - return true; - } - - private void UpdateCommitMetadata(DateTime commitTimeStamp, int docsDifference) - { - var count = docsDifference; - if (_metadataForNextCommit != null) - { - // we want the total for the entire commit, so add to the number we already have - count += _metadataForNextCommit.Count; - } - - _metadataForNextCommit = DocumentCreator.CreateCommitMetadata( - commitTimeStamp, "from catalog", count, Guid.NewGuid().ToString()); - } - - public async Task EnsureCommittedAsync() - { - if (_metadataForNextCommit == null) - { - // this means no changes have been made to the index - no need to commit - _logger.LogInformation(string.Format("SKIP COMMIT No changes. Index contains {0} documents.", _indexWriter.NumDocs())); - return; - } - - try - { - var commitTask = CommitIndexAsync(); - - // Ensure that the commit finishes within the configured timeout. If the timeout - // threshold is reached, an OperationCanceledException will be thrown and will cause - // the process to crash. The process MUST be ended as otherwise the commit task may - // finish in the background. - if (_commitTimeout.HasValue) - { - commitTask = commitTask.TimeoutAfter(_commitTimeout.Value); - } - - await commitTask; - } - catch (OperationCanceledException) - { - _telemetryService.TrackIndexCommitTimeout(); - _logger.LogError("TIMEOUT Committing index containing {0} documents. Metadata: commitTimeStamp {CommitTimeStamp}; change count {ChangeCount}; trace {CommitTrace}", - _indexWriter.NumDocs(), _metadataForNextCommit.CommitTimeStamp.ToString("O"), _metadataForNextCommit.Count, _metadataForNextCommit.Trace); - - Environment.Exit(exitCode: 1); - } - } - - private async Task CommitIndexAsync() - { - // This method commits to the index synchronously and may hang. Yield the current context to allow cancellation. - await Task.Yield(); - - using (_telemetryService.TrackIndexCommitDuration()) - { - _logger.LogInformation("COMMITTING index contains {0} documents. Metadata: commitTimeStamp {CommitTimeStamp}; change count {ChangeCount}; trace {CommitTrace}", - _indexWriter.NumDocs(), _metadataForNextCommit.CommitTimeStamp.ToString("O"), _metadataForNextCommit.Count, _metadataForNextCommit.Trace); - - _indexWriter.ExpungeDeletes(); - _indexWriter.Commit(_metadataForNextCommit.ToDictionary()); - - _logger.LogInformation("COMMIT index contains {0} documents. Metadata: commitTimeStamp {CommitTimeStamp}; change count {ChangeCount}; trace {CommitTrace}", - _indexWriter.NumDocs(), _metadataForNextCommit.CommitTimeStamp.ToString("O"), _metadataForNextCommit.Count, _metadataForNextCommit.Trace); - - _metadataForNextCommit = null; - } - } - - private static async Task> FetchCatalogItemsAsync( - CollectorHttpClient client, - IEnumerable items, - CancellationToken cancellationToken) - { - var tasks = new List>(); - - foreach (var item in items) - { - tasks.Add(client.GetJObjectAsync(item.Uri, cancellationToken)); - } - - await Task.WhenAll(tasks); - - return tasks.Select(t => t.Result); - } - - private static void ProcessCatalogIndex(IndexWriter indexWriter, JObject catalogIndex, string baseAddress) - { - indexWriter.DeleteDocuments(new Term("@type", Schema.DataTypes.CatalogInfastructure.AbsoluteUri)); - - Document doc = new Document(); - - Add(doc, "@type", Schema.DataTypes.CatalogInfastructure.AbsoluteUri, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - Add(doc, "Visibility", "Public", Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - - if (catalogIndex != null) - { - IEnumerable storagePaths = GetCatalogStoragePaths(catalogIndex); - AddStoragePaths(doc, storagePaths, baseAddress); - } - - indexWriter.AddDocument(doc); - } - - private void ProcessCatalogItems(IndexWriter indexWriter, IEnumerable catalogItems, string baseAddress) - { - int count = 0; - - foreach (JObject catalogItem in catalogItems) - { - _logger.LogInformation("Process CatalogItem {CatalogItem}", catalogItem["@id"].ToString()); - - NormalizeId(catalogItem); - - if (Utils.IsType(GetContext(catalogItem), catalogItem, Schema.DataTypes.PackageDetails)) - { - var properties = GetTelemetryProperties(catalogItem); - - using (_telemetryService.TrackDuration(TelemetryConstants.ProcessPackageDetailsSeconds, properties)) - { - ProcessPackageDetails(indexWriter, catalogItem); - } - } - else if (Utils.IsType(GetContext(catalogItem), catalogItem, Schema.DataTypes.PackageDelete)) - { - var properties = GetTelemetryProperties(catalogItem); - - using (_telemetryService.TrackDuration(TelemetryConstants.ProcessPackageDeleteSeconds, properties)) - { - ProcessPackageDelete(indexWriter, catalogItem); - } - } - else - { - _logger.LogInformation("Unrecognized @type ignoring CatalogItem"); - } - - count++; - } - - _logger.LogInformation(string.Format("Processed {0} CatalogItems", count)); - } - - private static Dictionary GetTelemetryProperties(JObject catalogItem) - { - var packageId = catalogItem["id"].ToString().ToLowerInvariant(); - var packageVersion = catalogItem["version"].ToString().ToLowerInvariant(); - - return new Dictionary() - { - { TelemetryConstants.Id, packageId }, - { TelemetryConstants.Version, packageVersion } - }; - } - - private static void NormalizeId(JObject catalogItem) - { - // for now, for apiapps, we have prepended the id in the catalog with the namespace, however we don't want this to impact the Lucene index - JToken originalId = catalogItem["originalId"]; - if (originalId != null) - { - catalogItem["id"] = originalId.ToString(); - } - } - - private static JToken GetContext(JObject catalogItem) - { - return catalogItem["@context"]; - } - - private void ProcessPackageDetails(IndexWriter indexWriter, JObject catalogItem) - { - _logger.LogDebug("ProcessPackageDetails"); - - indexWriter.DeleteDocuments(CreateDeleteQuery(catalogItem)); - - var package = CatalogPackageMetadataExtraction.MakePackageMetadata(catalogItem, _galleryBaseAddress, _flatContainerBaseAddress, _flatContainerContainerName); - var document = DocumentCreator.CreateDocument(package); - indexWriter.AddDocument(document); - } - - private void ProcessPackageDelete(IndexWriter indexWriter, JObject catalogItem) - { - _logger.LogDebug("ProcessPackageDelete"); - - indexWriter.DeleteDocuments(CreateDeleteQuery(catalogItem)); - } - - private static Query CreateDeleteQuery(JObject catalogItem) - { - string id = catalogItem["id"].ToString(); - string version = catalogItem["version"].ToString(); - - // note as we are not using the QueryParser we are not running this data through the analyzer so we need to mimic its behavior - string analyzedId = id.ToLowerInvariant(); - string analyzedVersion = NuGetVersion.Parse(version).ToNormalizedString(); - - JToken nsJToken; - if (catalogItem.TryGetValue("namespace", out nsJToken)) - { - string ns = nsJToken.ToString(); - - BooleanQuery query = new BooleanQuery(); - query.Add(new BooleanClause(new TermQuery(new Term("Id", analyzedId)), Occur.MUST)); - query.Add(new BooleanClause(new TermQuery(new Term("Version", analyzedVersion)), Occur.MUST)); - query.Add(new BooleanClause(new TermQuery(new Term("Namespace", ns)), Occur.MUST)); - return query; - } - else - { - BooleanQuery query = new BooleanQuery(); - query.Add(new BooleanClause(new TermQuery(new Term("Id", analyzedId)), Occur.MUST)); - query.Add(new BooleanClause(new TermQuery(new Term("Version", analyzedVersion)), Occur.MUST)); - return query; - } - } - - private static void Add(Document doc, string name, string value, Field.Store store, Field.Index index, Field.TermVector termVector, float boost = 1.0f) - { - if (value == null) - { - return; - } - - Field newField = new Field(name, value, store, index, termVector); - newField.Boost = boost; - doc.Add(newField); - } - - private static void Add(Document doc, string name, int value, Field.Store store, Field.Index index, Field.TermVector termVector, float boost = 1.0f) - { - Add(doc, name, value.ToString(CultureInfo.InvariantCulture), store, index, termVector, boost); - } - - private static float DetermineLanguageBoost(string id, string language) - { - if (!string.IsNullOrWhiteSpace(language)) - { - string languageSuffix = "." + language.Trim(); - if (id.EndsWith(languageSuffix, StringComparison.InvariantCultureIgnoreCase)) - { - return 0.1f; - } - } - return 1.0f; - } - - private static void AddStoragePaths(Document doc, IEnumerable storagePaths, string baseAddress) - { - int len = baseAddress.Length; - foreach (string storagePath in storagePaths) - { - if (storagePath.StartsWith(baseAddress)) - { - string relativePath = storagePath.Substring(len); - doc.Add(new Field("StoragePath", relativePath, Field.Store.YES, Field.Index.NOT_ANALYZED)); - } - } - } - - private static IEnumerable GetStoragePaths(JObject package) - { - IList storagePaths = new List(); - storagePaths.Add(package["@id"].ToString()); - storagePaths.Add(package["packageContent"].ToString()); - - foreach (JObject entry in package["entries"]) - { - storagePaths.Add(entry["location"].ToString()); - } - - return storagePaths; - } - - private static IEnumerable GetCatalogStoragePaths(JObject index) - { - IList storagePaths = new List(); - storagePaths.Add(index["@id"].ToString()); - - foreach (JObject page in index["items"]) - { - storagePaths.Add(page["@id"].ToString()); - } - - return storagePaths; - } - } -} \ No newline at end of file diff --git a/src/NuGet.Indexing/CamelCaseFilter.cs b/src/NuGet.Indexing/CamelCaseFilter.cs deleted file mode 100644 index f703a831b..000000000 --- a/src/NuGet.Indexing/CamelCaseFilter.cs +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; -using System.Linq; -using Lucene.Net.Analysis; -using Lucene.Net.Analysis.Tokenattributes; - -namespace NuGet.Indexing -{ - public class CamelCaseFilter : TokenFilter - { - private readonly ITermAttribute _termAttribute; - private readonly IOffsetAttribute _offsetAttribute; - private readonly IPositionIncrementAttribute _positionIncrementAttribute; - - private readonly Queue _queue = new Queue(); - - public CamelCaseFilter(TokenStream stream) - : base(stream) - { - _termAttribute = AddAttribute(); - _offsetAttribute = AddAttribute(); - _positionIncrementAttribute = AddAttribute(); - } - - public override bool IncrementToken() - { - if (_queue.Count > 0) - { - SetAttributes(_queue.Dequeue()); - return true; - } - - if (!input.IncrementToken()) - { - return false; - } - - _queue.Enqueue(new TokenAttributes - { - TermBuffer = _termAttribute.Term, - StartOffset = _offsetAttribute.StartOffset, - EndOffset = _offsetAttribute.EndOffset, - PositionIncrement = _positionIncrementAttribute.PositionIncrement - }); - - string term = _termAttribute.Term; - int start = _offsetAttribute.StartOffset; - int prevStart = start; - int positionIncrement = 0; - string prev = string.Empty; - - foreach (string subTerm in CamelCaseSplit(term)) - { - if (prev != string.Empty) - { - string shingle = string.Format("{0}{1}", prev, subTerm); - - if (shingle != term) - { - _queue.Enqueue(new TokenAttributes - { - TermBuffer = shingle, - StartOffset = prevStart, - EndOffset = prevStart + shingle.Length, - PositionIncrement = 0 - }); - } - } - - if (subTerm != term && !subTerm.Any(c => Char.IsNumber(c))) - { - _queue.Enqueue(new TokenAttributes - { - TermBuffer = subTerm, - StartOffset = start, - EndOffset = start + subTerm.Length, - PositionIncrement = positionIncrement - }); - } - - positionIncrement = 1; - prevStart = start; - start += subTerm.Length; - prev = subTerm; - } - - if (_queue.Count > 0) - { - SetAttributes(_queue.Dequeue()); - return true; - } - - return false; - } - - public static IEnumerable CamelCaseSplit(string term) - { - if (term.Length == 0) - { - yield break; - } - - if (term.Length == 1) - { - yield return term; - yield break; - } - - int beginWordIndex = 0; - int length = 1; - bool lastIsUpper = Char.IsUpper(term[0]); - bool lastIsLetter = Char.IsLetter(term[0]); - - for (int i = 1; i < term.Length; i++) - { - bool currentIsUpper = Char.IsUpper(term[i]); - bool currentIsLetter = Char.IsLetter(term[i]); - bool currentIsNumber = Char.IsNumber(term[i]); - - if ((lastIsLetter && currentIsLetter) && (!lastIsUpper && currentIsUpper) || - (lastIsLetter == currentIsNumber)) - { - yield return term.Substring(beginWordIndex, length); - length = 0; - beginWordIndex = i; - } - - length++; - - lastIsUpper = currentIsUpper; - lastIsLetter = currentIsLetter; - } - - yield return term.Substring(beginWordIndex, length); - } - - private void SetAttributes(TokenAttributes next) - { - _termAttribute.SetTermBuffer(next.TermBuffer); - _offsetAttribute.SetOffset(next.StartOffset, next.EndOffset); - _positionIncrementAttribute.PositionIncrement = next.PositionIncrement; - } - - private class TokenAttributes - { - public string TermBuffer { get; set; } - public int StartOffset { get; set; } - public int EndOffset { get; set; } - public int PositionIncrement { get; set; } - } - } -} diff --git a/src/NuGet.Indexing/CustomSimilarity.cs b/src/NuGet.Indexing/CustomSimilarity.cs deleted file mode 100644 index 7e9b27702..000000000 --- a/src/NuGet.Indexing/CustomSimilarity.cs +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. -using Lucene.Net.Search; - -namespace NuGet.Indexing -{ - public class CustomSimilarity : DefaultSimilarity - { - public override float LengthNorm(string fieldName, int numTerms) - { - if (fieldName == "TokenizedId" || fieldName == "ShingledId" || fieldName == "Title") - { - return 1; - } - else if (fieldName == "Tags" && numTerms <= 15) - { - return 1; - } - else - { - return base.LengthNorm(fieldName, numTerms); - } - } - } -} diff --git a/src/NuGet.Indexing/DescriptionAnalyzer.cs b/src/NuGet.Indexing/DescriptionAnalyzer.cs deleted file mode 100644 index e23a9461b..000000000 --- a/src/NuGet.Indexing/DescriptionAnalyzer.cs +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. -using Lucene.Net.Analysis; -using System.IO; - -namespace NuGet.Indexing -{ - public class DescriptionAnalyzer : Analyzer - { - public override TokenStream TokenStream(string fieldName, TextReader reader) - { - return new StopFilter(true, new LowerCaseFilter(new CamelCaseFilter(new DotTokenizer(reader))), TokenizingHelper.GetStopWords()); - } - } -} diff --git a/src/NuGet.Indexing/DotTokenizer.cs b/src/NuGet.Indexing/DotTokenizer.cs deleted file mode 100644 index 3af61574d..000000000 --- a/src/NuGet.Indexing/DotTokenizer.cs +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. -using Lucene.Net.Analysis; -using System; -using System.IO; - -namespace NuGet.Indexing -{ - public class DotTokenizer : CharTokenizer - { - public DotTokenizer(TextReader input) - : base(input) - { - } - - protected override bool IsTokenChar(char c) - { - return !(Char.IsWhiteSpace(c) - || c == '.' - || c == '-' - || c == ',' - || c == ';' - || c == ':' - || c == '\'' - || c == '*' - || c == '#' - || c == '!' - || c == '~' - || c == '+' - || c == '-' - || c == '(' || c == ')' - || c == '[' || c == ']' - || c == '{' || c == '}'); - } - } -} diff --git a/src/NuGet.Indexing/ExpandAcronymsFilter.cs b/src/NuGet.Indexing/ExpandAcronymsFilter.cs deleted file mode 100644 index 316010940..000000000 --- a/src/NuGet.Indexing/ExpandAcronymsFilter.cs +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using Lucene.Net.Analysis; -using Lucene.Net.Analysis.Tokenattributes; - -namespace NuGet.Indexing -{ - public class ExpandAcronymsFilter : TokenFilter - { - private readonly IAcronymExpansionProvider _acronymExpansionProvider; - - private readonly ITermAttribute _termAttribute; - private readonly IPositionIncrementAttribute _positionIncrementAttribute; - private readonly Queue _tokenSet; - private readonly HashSet _recognizedTokens; - private State _currentState; - - public ExpandAcronymsFilter(TokenStream input, IAcronymExpansionProvider acronymExpansionProvider) - : base(input) - { - _acronymExpansionProvider = acronymExpansionProvider; - - _termAttribute = AddAttribute(); - _positionIncrementAttribute = AddAttribute(); - _tokenSet = new Queue(); - _recognizedTokens = new HashSet(); - } - - public override bool IncrementToken() - { - if (_tokenSet.Count > 0) - { - RestoreState(_currentState); - _termAttribute.SetTermBuffer(_tokenSet.Dequeue()); - _positionIncrementAttribute.PositionIncrement = 0; - - return true; - } - - try - { - if (!input.IncrementToken()) // end of stream; no more tokens on input stream - { - return false; - } - } - catch (Exception) - { - return false; - } - - if (!string.IsNullOrEmpty(_termAttribute.Term)) - { - var acronyms = _acronymExpansionProvider.GetKnownAcronyms() - .Where(a => _termAttribute.Term.IndexOf(a, StringComparison.OrdinalIgnoreCase) >= 0); - - foreach (var acronym in acronyms) - { - // Add expanded acronym (ef => entity;framework) - foreach (var expansion in _acronymExpansionProvider.Expand(acronym)) - { - if (_recognizedTokens.Add(expansion)) - { - _tokenSet.Enqueue(expansion); - } - } - - // Add original term without the acronym (xamlbehaviors with xaml acronym => behaviors) - var termWithoutAcronym = RemoveSubstring(_termAttribute.Term, acronym); - if (!string.IsNullOrEmpty(termWithoutAcronym)) - { - if (_recognizedTokens.Add(termWithoutAcronym)) - { - _tokenSet.Enqueue(termWithoutAcronym); - } - } - } - } - - _currentState = CaptureState(); - return true; - } - - /// - /// This method removes a substring from a given string. For example given "foobar", "foo", it will return "bar". - /// It ignores case, so "FOOba", "foo" will also return "bar". - /// - /// Original string - /// Substring to reove from original string - /// Original string with occurrences of substring removed - public static string RemoveSubstring(string original, string substring) - { - if (string.IsNullOrEmpty(original) || string.IsNullOrEmpty(substring)) - { - return original; - } - - var result = new StringBuilder(original.Length); - - int substringLength = substring.Length; - int substringStartIndex = -1; - int lastCharacterIndex = 0; - - do - { - substringStartIndex = original.IndexOf(substring, substringStartIndex + 1, StringComparison.OrdinalIgnoreCase); - - if (substringStartIndex >= 0) - { - result.Append(original, lastCharacterIndex, substringStartIndex - lastCharacterIndex); - - lastCharacterIndex = substringStartIndex + substringLength; - } - } - while (substringStartIndex >= 0); - - result.Append(original, lastCharacterIndex, original.Length - lastCharacterIndex); - - return result.ToString(); - } - } -} \ No newline at end of file diff --git a/src/NuGet.Indexing/Extraction/CatalogNuspecReader.cs b/src/NuGet.Indexing/Extraction/CatalogNuspecReader.cs deleted file mode 100644 index 9670fdc80..000000000 --- a/src/NuGet.Indexing/Extraction/CatalogNuspecReader.cs +++ /dev/null @@ -1,93 +0,0 @@ -using System; -using System.IO; -using System.Xml.Linq; -using Newtonsoft.Json.Linq; -using NuGet.Packaging; - -namespace NuGet.Indexing -{ - public class CatalogNuspecReader - : NuspecReader, IDisposable - { - private readonly JObject _catalogItem; - - public MemoryStream NuspecStream { get; } - - public CatalogNuspecReader(JObject catalogItem) - : base(CreateXDocument(catalogItem)) - { - _catalogItem = catalogItem; - - NuspecStream = new MemoryStream(); - CreateXDocument(catalogItem).Save(NuspecStream, SaveOptions.DisableFormatting); - NuspecStream.Position = 0; - } - - private static XDocument CreateXDocument(JObject catalogItem) - { - var document = new XDocument(); - var package = new XElement("metadata"); - var metadata = new XElement("metadata"); - - // identity - var id = new XElement("id", (string)catalogItem["id"]); - var version = new XElement("version", (string)(catalogItem["verbatimVersion"] ?? catalogItem["version"])); - - // dependencies - var dependencies = new XElement("dependencies"); - foreach (var group in catalogItem.GetJArray("dependencyGroups")) - { - var groupElement = new XElement("group"); - groupElement.SetAttributeValue("targetFramework", (string)group["targetFramework"]); - foreach (var dependency in group.GetJArray("dependencies")) - { - var dependencyElement = new XElement("dependency"); - // Skip the bad dependency, this will result in dependency not be visible in search service. - // Issue: https://github.com/NuGet/NuGetGallery/issues/4866 - if (string.IsNullOrWhiteSpace((string)dependency["id"])) - { - continue; - } - - dependencyElement.SetAttributeValue("id", (string)dependency["id"]); - dependencyElement.SetAttributeValue("version", (string)dependency["range"]); - groupElement.Add(dependencyElement); - } - - dependencies.Add(groupElement); - } - - // framework assemblies - var frameworkAssemblies = new XElement("frameworkAssemblies"); - foreach (var group in catalogItem.GetJArray("frameworkAssemblyGroup")) - { - var element = new XElement("frameworkAssembly"); - element.SetAttributeValue("targetFramework", (string)group["targetFramework"]); - frameworkAssemblies.Add(element); - } - - metadata.Add(id); - metadata.Add(version); - metadata.Add(dependencies); - metadata.Add(frameworkAssemblies); - package.Add(metadata); - document.Add(package); - - return document; - } - - public void Dispose() - { - Dispose(true); - GC.SuppressFinalize(this); - } - - private void Dispose(bool disposing) - { - if (disposing) - { - NuspecStream?.Dispose(); - } - } - } -} \ No newline at end of file diff --git a/src/NuGet.Indexing/Extraction/CatalogPackageMetadataExtraction.cs b/src/NuGet.Indexing/Extraction/CatalogPackageMetadataExtraction.cs deleted file mode 100644 index d6100ca13..000000000 --- a/src/NuGet.Indexing/Extraction/CatalogPackageMetadataExtraction.cs +++ /dev/null @@ -1,347 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; -using System.Text; -using Newtonsoft.Json.Linq; -using NuGet.Frameworks; -using NuGet.Packaging; -using NuGet.Packaging.Core; -using NuGet.Versioning; -using NuGet.Services.Metadata.Catalog; -using NuGet.Services.Metadata.Catalog.Registration; - -namespace NuGet.Indexing -{ - public static class CatalogPackageMetadataExtraction - { - public static IDictionary MakePackageMetadata( - JObject catalogItem, - Uri galleryBaseAddress, - Uri flatContainerBaseAddress, - string flatContainerContainerName) - { - if (catalogItem == null) - { - throw new ArgumentNullException(nameof(catalogItem)); - } - - if (flatContainerBaseAddress == null) - { - throw new ArgumentNullException(nameof(flatContainerBaseAddress)); - } - - if (flatContainerContainerName == null) - { - throw new ArgumentNullException(nameof(flatContainerContainerName)); - } - - var extractor = new Extractor(); - return extractor.Extract(catalogItem, galleryBaseAddress, flatContainerBaseAddress, flatContainerContainerName); - } - - private class Extractor - { - private JObject _catalog; - private CatalogPackageReader _reader; - private Dictionary _metadata; - private Uri _galleryBaseAddress; - - public IDictionary Extract(JObject catalog, Uri galleryBaseAddress, Uri flatContainerBaseAddress, string flatContainerContainerName) - { - _catalog = catalog; - _reader = new CatalogPackageReader(_catalog); - _metadata = new Dictionary(); - _galleryBaseAddress = galleryBaseAddress; - - AddString(MetadataConstants.IdPropertyName); - AddString(MetadataConstants.NormalizedVersionPropertyName); - AddString(MetadataConstants.VerbatimVersionPropertyName); - AddString(MetadataConstants.TitlePropertyName); - AddString(MetadataConstants.DescriptionPropertyName); - AddString(MetadataConstants.SummaryPropertyName); - AddString(MetadataConstants.AuthorsPropertyName); - AddStringArray(MetadataConstants.TagsPropertyName); - - AddListed(); - AddSemVerLevelKey(); - AddString(MetadataConstants.CreatedPropertyName); - AddString(MetadataConstants.PublishedPropertyName); - AddString(MetadataConstants.LastEditedPropertyName); - - AddString(MetadataConstants.ProjectUrlPropertyName); - AddString(MetadataConstants.MinClientVersionPropertyName); - AddString(MetadataConstants.ReleaseNotesPropertyName); - AddString(MetadataConstants.CopyrightPropertyName); - AddString(MetadataConstants.LanguagePropertyName); - AddString(MetadataConstants.PackageHashPropertyName); - AddString(MetadataConstants.PackageHashAlgorithmPropertyName); - AddString(MetadataConstants.PackageSizePropertyName); - AddString(MetadataConstants.CatalogMetadata.RequiresLicenseAcceptancePropertyName, MetadataConstants.RequiresLicenseAcceptancePropertyName); - - AddLicenseUrl(); - AddIconUrl(flatContainerBaseAddress, flatContainerContainerName); - AddFlattenedDependencies(); - AddSupportedFrameworks(); - - return _metadata; - } - - private void AddIconUrl(Uri flatContainerBaseAddress, string flatContainerContainerName) - { - var iconFile = JTokenToString(_catalog[MetadataConstants.IconFilePropertyName]); - if (!string.IsNullOrWhiteSpace(iconFile)) - { - var packageId = JTokenToString(_catalog[MetadataConstants.IdPropertyName]); - var packageVersion = JTokenToString(_catalog[MetadataConstants.NormalizedVersionPropertyName]); - - var pathProvider = new FlatContainerPackagePathProvider(flatContainerContainerName); - var path = pathProvider.GetIconPath(packageId, packageVersion); - var flatContainerIconUrl = new Uri(flatContainerBaseAddress, path); - _metadata[MetadataConstants.IconUrlPropertyName] = flatContainerIconUrl.AbsoluteUri; - } - else - { - AddString(MetadataConstants.IconUrlPropertyName); - } - } - - private void AddLicenseUrl() - { - var packageId = JTokenToString(_catalog[MetadataConstants.IdPropertyName]); - var packageVersion = JTokenToString(_catalog[MetadataConstants.NormalizedVersionPropertyName]); - - if (_galleryBaseAddress != null && - !string.IsNullOrWhiteSpace(packageId) && - !string.IsNullOrWhiteSpace(packageVersion) && - (_catalog.Value(MetadataConstants.LicenseExpressionPropertyName) != null || - _catalog.Value(MetadataConstants.LicenseFilePropertyName) != null)) - { - _metadata[MetadataConstants.LicenseUrlPropertyName] = LicenseHelper.GetGalleryLicenseUrl(packageId, packageVersion, _galleryBaseAddress); - } - else - { - AddString(MetadataConstants.LicenseUrlPropertyName); - } - } - - private void AddString(string source, string destination = null) - { - var value = _catalog[source]; - if (value == null) - { - return; - } - - _metadata[destination ?? source] = JTokenToString(value); - } - - private string JTokenToString(JToken value) - { - if (value == null) - { - return null; - } - - if (value.Type == JTokenType.Date) - { - return value.Value().ToString("o"); - } - else - { - return (string)value; - } - } - - private void AddStringArray(string source, string destination = null) - { - var value = _catalog[source]; - if (value == null) - { - return; - } - - string joined = string.Join(" ", value.Select(JTokenToString)); - _metadata[destination ?? source] = joined; - } - - private void AddListed() - { - var listed = (string)_catalog[MetadataConstants.ListedPropertyName]; - var published = _catalog[MetadataConstants.PublishedPropertyName]; - if (listed == null) - { - if (published != null && ((DateTime)published).ToString("yyyyMMdd") == "19000101") - { - listed = "false"; - } - else - { - listed = "true"; - } - } - - _metadata[MetadataConstants.ListedPropertyName] = listed; - } - - private void AddSemVerLevelKey() - { - var version = (string)_catalog[MetadataConstants.VerbatimVersionPropertyName]; - if (version != null) - { - NuGetVersion packageOriginalVersion; - if (NuGetVersion.TryParse(version, out packageOriginalVersion)) - { - if (packageOriginalVersion.IsSemVer2) - { - _metadata[MetadataConstants.SemVerLevelKeyPropertyName] = MetadataConstants.SemVerLevel2Value; - return; - } - } - } - - var dependencyGroups = _reader.GetPackageDependencies().ToList(); - foreach (var dependencyGroup in dependencyGroups) - { - foreach (var packageDependency in dependencyGroup.Packages) - { - var versionRange = packageDependency.VersionRange; - if ((versionRange.MaxVersion != null && versionRange.MaxVersion.IsSemVer2) - || (versionRange.MinVersion != null && versionRange.MinVersion.IsSemVer2)) - { - _metadata[MetadataConstants.SemVerLevelKeyPropertyName] = MetadataConstants.SemVerLevel2Value; - return; - } - } - } - } - - private void AddFlattenedDependencies() - { - var dependencyGroups = _reader.GetPackageDependencies().ToList(); - - var builder = new StringBuilder(); - foreach (var dependencyGroup in dependencyGroups) - { - if (dependencyGroup.Packages.Any()) - { - // Add packages list - foreach (var packageDependency in dependencyGroup.Packages) - { - AddFlattenedPackageDependency(dependencyGroup, packageDependency, builder); - } - } - else - { - // Add empty framework dependency - if (builder.Length > 0) - { - builder.Append("|"); - } - - builder.Append(":"); - AddFlattenedFrameworkDependency(dependencyGroup, builder); - } - } - - if (builder.Length > 0) - { - _metadata[MetadataConstants.FlattenedDependenciesPropertyName] = builder.ToString(); - } - } - - private void AddFlattenedPackageDependency( - PackageDependencyGroup dependencyGroup, - Packaging.Core.PackageDependency packageDependency, - StringBuilder builder) - { - if (builder.Length > 0) - { - builder.Append("|"); - } - - builder.Append(packageDependency.Id); - builder.Append(":"); - if (!packageDependency.VersionRange.Equals(VersionRange.All)) - { - builder.Append(packageDependency.VersionRange?.ToString("S", new VersionRangeFormatter())); - } - - AddFlattenedFrameworkDependency(dependencyGroup, builder); - } - - private void AddFlattenedFrameworkDependency(PackageDependencyGroup dependencyGroup, StringBuilder builder) - { - if (!SpecialFrameworks.Contains(dependencyGroup.TargetFramework)) - { - try - { - builder.Append(":"); - builder.Append(dependencyGroup.TargetFramework?.GetShortFolderName()); - } - catch (FrameworkException) - { - // ignoring FrameworkException on purpose - we don't want the job crashing - // whenever someone uploads an unsupported framework - } - } - } - - private void AddSupportedFrameworks() - { - // Parse files for framework names - List supportedFrameworksFromReader = null; - try - { - supportedFrameworksFromReader = _reader - .GetSupportedFrameworks() - .ToList(); - } - catch (Exception ex) when ( - (ex is ArgumentException && ex.Message.ToLowerInvariant().StartsWith("invalid portable")) - || ex is FrameworkException - || ex is PackagingException) - { - // Ignore exceptions indicating invalid frameworks. Since the package is already accepted, it's - // better to have no supported frameworks than to block the pipeline. - Trace.TraceWarning($"{nameof(CatalogPackageMetadataExtraction)}.{nameof(AddSupportedFrameworks)} exception: " + ex); - return; - } - - // Filter out special frameworks + get short framework names - var supportedFrameworks = supportedFrameworksFromReader - .Except(SpecialFrameworks) - .Select(f => - { - try - { - return f.GetShortFolderName(); - } - catch (FrameworkException) - { - // ignoring FrameworkException on purpose - we don't want the job crashing - // whenever someone uploads an unsupported framework - return null; - } - }) - .Where(f => !String.IsNullOrEmpty(f)) - .ToArray(); - - if (supportedFrameworks.Any()) - { - _metadata[MetadataConstants.SupportedFrameworksPropertyName] = string.Join("|", supportedFrameworks); - } - } - - private IEnumerable SpecialFrameworks => new[] - { - NuGetFramework.AnyFramework, - NuGetFramework.AgnosticFramework, - NuGetFramework.UnsupportedFramework - }; - } - } -} \ No newline at end of file diff --git a/src/NuGet.Indexing/Extraction/CatalogPackageReader.cs b/src/NuGet.Indexing/Extraction/CatalogPackageReader.cs deleted file mode 100644 index 45b8d62d0..000000000 --- a/src/NuGet.Indexing/Extraction/CatalogPackageReader.cs +++ /dev/null @@ -1,101 +0,0 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Threading; -using System.Threading.Tasks; -using Newtonsoft.Json.Linq; -using NuGet.Common; -using NuGet.Frameworks; -using NuGet.Packaging; -using NuGet.Packaging.Core; -using NuGet.Packaging.Signing; - -namespace NuGet.Indexing -{ - public class CatalogPackageReader - : PackageReaderBase, IDisposable - { - private readonly JObject _catalogItem; - private readonly CatalogNuspecReader _catalogNuspecReader; - - public CatalogPackageReader(JObject catalogItem) : base(DefaultFrameworkNameProvider.Instance, DefaultCompatibilityProvider.Instance) - { - _catalogItem = catalogItem; - _catalogNuspecReader = new CatalogNuspecReader(_catalogItem); - } - - public override Stream GetStream(string path) - { - throw new NotSupportedException(); - } - - public override Stream GetNuspec() - { - return _catalogNuspecReader.NuspecStream; - } - - public override IEnumerable GetFiles() - { - var array = _catalogItem.GetJArray("packageEntries"); - if (array == null) - { - yield break; - } - - foreach (var entry in array) - { - yield return (string)entry["fullName"]; - } - } - - public override IEnumerable GetFiles(string folder) - { - return GetFiles().Where(f => f.StartsWith(folder + "/", StringComparison.OrdinalIgnoreCase)); - } - - public override IEnumerable CopyFiles(string destination, IEnumerable packageFiles, ExtractPackageFileDelegate extractFile, - Common.ILogger logger, CancellationToken token) - { - throw new NotImplementedException(); - } - - protected override void Dispose(bool disposing) - { - if (disposing) - { - _catalogNuspecReader.Dispose(); - } - } - - public override Task GetPrimarySignatureAsync(CancellationToken token) - { - throw new NotImplementedException(); - } - - public override Task IsSignedAsync(CancellationToken token) - { - throw new NotImplementedException(); - } - - public override Task ValidateIntegrityAsync(SignatureContent signatureContent, CancellationToken token) - { - throw new NotImplementedException(); - } - - public override Task GetArchiveHashAsync(HashAlgorithmName hashAlgorithm, CancellationToken token) - { - throw new NotImplementedException(); - } - - public override bool CanVerifySignedPackages(SignedPackageVerifierSettings verifierSettings) - { - throw new NotImplementedException(); - } - - public override string GetContentHash(CancellationToken token, Func GetUnsignedPackageHash = null) - { - throw new NotImplementedException(); - } - } -} \ No newline at end of file diff --git a/src/NuGet.Indexing/Extraction/DocumentCreator.cs b/src/NuGet.Indexing/Extraction/DocumentCreator.cs deleted file mode 100644 index 817e332bb..000000000 --- a/src/NuGet.Indexing/Extraction/DocumentCreator.cs +++ /dev/null @@ -1,396 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; -using System.IO; -using System.Text; -using Lucene.Net.Documents; -using Lucene.Net.Index; -using Newtonsoft.Json; -using NuGet.Versioning; -using LuceneConstants = NuGet.Indexing.MetadataConstants.LuceneMetadata; - -namespace NuGet.Indexing -{ - public static class DocumentCreator - { - const int MergeFactor = 10; // Define the size of a file in a level (exponentially) and the count of files that constitue a level - const int MaxMergeDocs = 7999; // Except never merge segments that have more docs than this - - public static IndexWriter CreateIndexWriter(Lucene.Net.Store.Directory directory, bool create) - { - IndexWriter indexWriter = new IndexWriter(directory, new PackageAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED); - indexWriter.MergeFactor = MergeFactor; - indexWriter.MaxMergeDocs = MaxMergeDocs; - - indexWriter.SetSimilarity(new CustomSimilarity()); - return indexWriter; - } - - public static LuceneCommitMetadata CreateCommitMetadata(DateTime commitTimeStamp, string description, int count, string trace) - { - return new LuceneCommitMetadata(commitTimeStamp, description, count, trace); - } - - public static Document CreateDocument(IDictionary package) - { - var errors = new List(); - var document = new Document(); - - // add fields used by search queries - AddId(document, package, errors); - AddVersion(document, package, errors); - AddTitle(document, package); - AddField(document, LuceneConstants.DescriptionPropertyName, package, MetadataConstants.DescriptionPropertyName, Field.Index.ANALYZED); - AddField(document, LuceneConstants.SummaryPropertyName, package, MetadataConstants.SummaryPropertyName, Field.Index.ANALYZED); - AddField(document, LuceneConstants.TagsPropertyName, package, MetadataConstants.TagsPropertyName, Field.Index.ANALYZED, 2.0f); - AddField(document, LuceneConstants.AuthorsPropertyName, package, MetadataConstants.AuthorsPropertyName, Field.Index.ANALYZED); - - // add fields used by filtering and sorting - AddField(document, LuceneConstants.SemVerLevelPropertyName, package, MetadataConstants.SemVerLevelKeyPropertyName, Field.Index.ANALYZED); - AddListed(document, package, errors); - AddDates(document, package, errors); - AddSortableTitle(document, package); - - // add fields used when materializing the result - AddField(document, LuceneConstants.IconUrlPropertyName, package, MetadataConstants.IconUrlPropertyName, Field.Index.NOT_ANALYZED); - AddField(document, LuceneConstants.ProjectUrlPropertyName, package, MetadataConstants.ProjectUrlPropertyName, Field.Index.NOT_ANALYZED); - AddField(document, LuceneConstants.MinClientVersionPropertyName, package, MetadataConstants.MinClientVersionPropertyName, Field.Index.NOT_ANALYZED); - AddField(document, LuceneConstants.ReleaseNotesPropertyName, package, MetadataConstants.ReleaseNotesPropertyName, Field.Index.NOT_ANALYZED); - AddField(document, LuceneConstants.CopyrightPropertyName, package, MetadataConstants.CopyrightPropertyName, Field.Index.NOT_ANALYZED); - AddField(document, LuceneConstants.LanguagePropertyName, package, MetadataConstants.LanguagePropertyName, Field.Index.NOT_ANALYZED); - AddField(document, LuceneConstants.LicenseUrlPropertyName, package, MetadataConstants.LicenseUrlPropertyName, Field.Index.NOT_ANALYZED); - AddField(document, LuceneConstants.PackageHashPropertyName, package, MetadataConstants.PackageHashPropertyName, Field.Index.NOT_ANALYZED); - AddField(document, LuceneConstants.PackageHashAlgorithmPropertyName, package, MetadataConstants.PackageHashAlgorithmPropertyName, Field.Index.NOT_ANALYZED); - AddPackageSize(document, package, errors); - AddRequiresLicenseAcceptance(document, package, errors); - AddDependencies(document, package); - AddSupportedFrameworks(document, package); - - DetermineLanguageBoost(document, package); - CheckErrors(errors); - - return document; - } - - private static void AddId(Document document, IDictionary package, List errors) - { - string value; - if (package.TryGetValue(MetadataConstants.IdPropertyName, out value)) - { - float boost = 2.0f; - if (!package.ContainsKey(MetadataConstants.TagsPropertyName)) - { - boost += 0.5f; - } - - AddField(document, LuceneConstants.IdPropertyName, value, Field.Index.ANALYZED, boost); - AddField(document, LuceneConstants.IdAutocompletePropertyName, value, Field.Index.ANALYZED, boost); - AddField(document, LuceneConstants.TokenizedIdPropertyName, value, Field.Index.ANALYZED, boost); - AddField(document, LuceneConstants.ShingledIdPropertyName, value, Field.Index.ANALYZED, boost); - } - else - { - errors.Add($"Required property '{MetadataConstants.IdPropertyName}' not found."); - } - } - - private static void AddVersion(Document document, IDictionary package, List errors) - { - string verbatimVersion; - if (package.TryGetValue(MetadataConstants.VerbatimVersionPropertyName, out verbatimVersion)) - { - AddField(document, LuceneConstants.VerbatimVersionPropertyName, verbatimVersion, Field.Index.NOT_ANALYZED); - - NuGetVersion parsedVerbatimVersion; - if (NuGetVersion.TryParse(verbatimVersion, out parsedVerbatimVersion)) - { - AddField(document, LuceneConstants.NormalizedVersionPropertyName, parsedVerbatimVersion.ToNormalizedString(), Field.Index.ANALYZED); - AddField(document, LuceneConstants.FullVersionPropertyName, parsedVerbatimVersion.ToFullString(), Field.Index.NOT_ANALYZED); - } - else - { - errors.Add($"Unable to parse '{MetadataConstants.VerbatimVersionPropertyName}' as NuGetVersion."); - } - } - else - { - errors.Add($"Required property '{MetadataConstants.VerbatimVersionPropertyName}' not found."); - } - } - - private static void AddTitle(Document document, IDictionary package) - { - string value; - - package.TryGetValue(MetadataConstants.TitlePropertyName, out value); - - if (string.IsNullOrEmpty(value)) - { - package.TryGetValue(MetadataConstants.IdPropertyName, out value); - } - - AddField(document, LuceneConstants.TitlePropertyName, value ?? string.Empty, Field.Index.ANALYZED); - } - - private static void AddListed(Document document, IDictionary package, List errors) - { - string value; - if (package.TryGetValue(MetadataConstants.ListedPropertyName, out value)) - { - bool listed; - if (bool.TryParse(value, out listed)) - { - AddField(document, LuceneConstants.ListedPropertyName, value, Field.Index.ANALYZED); - } - else - { - errors.Add($"Unable to parse '{MetadataConstants.ListedPropertyName}' as Boolean."); - } - } - else - { - errors.Add($"Required property '{MetadataConstants.ListedPropertyName}' not found."); - } - } - - private static void AddSortableTitle(Document document, IDictionary package) - { - string value; - - package.TryGetValue(MetadataConstants.TitlePropertyName, out value); - - if (string.IsNullOrEmpty(value)) - { - package.TryGetValue(MetadataConstants.IdPropertyName, out value); - } - - AddField(document, LuceneConstants.SortableTitlePropertyName, (value ?? string.Empty).Trim().ToLower(), Field.Index.NOT_ANALYZED); - } - - private static void AddDates(Document document, IDictionary package, List errors) - { - string created; - if (package.TryGetValue(MetadataConstants.CreatedPropertyName, out created)) - { - AddField(document, LuceneConstants.OriginalCreatedPropertyName, created, Field.Index.NOT_ANALYZED); - } - - string published; - if (package.TryGetValue(MetadataConstants.PublishedPropertyName, out published)) - { - AddField(document, LuceneConstants.OriginalPublishedPropertyName, published, Field.Index.NOT_ANALYZED); - - DateTimeOffset publishedDateTime; - if (DateTimeOffset.TryParse(published, out publishedDateTime)) - { - AddDateField(document, LuceneConstants.PublishedDatePropertyName, publishedDateTime); - } - else - { - errors.Add($"Unable to parse '{MetadataConstants.PublishedPropertyName}' as DateTime."); - } - - string lastEdited; - if (package.TryGetValue(MetadataConstants.LastEditedPropertyName, out lastEdited) && lastEdited != MetadataConstants.DateTimeZeroStringValue) - { - AddField(document, LuceneConstants.OriginalLastEditedPropertyName, lastEdited, Field.Index.NOT_ANALYZED); - } - else - { - lastEdited = publishedDateTime.ToString("O"); - } - - DateTimeOffset lastEditedDateTime; - if (DateTimeOffset.TryParse(lastEdited, out lastEditedDateTime)) - { - AddDateField(document, LuceneConstants.LastEditedDatePropertyName, lastEditedDateTime); - } - else - { - errors.Add($"Unable to parse '{MetadataConstants.LastEditedPropertyName}' as DateTime."); - } - } - else - { - errors.Add($"Required property '{MetadataConstants.PublishedPropertyName}' not found."); - } - } - - private static void AddPackageSize(Document document, IDictionary package, List errors) - { - string value; - if (package.TryGetValue(MetadataConstants.PackageSizePropertyName, out value)) - { - int packageSize; - if (int.TryParse(value, out packageSize)) - { - AddField(document, LuceneConstants.PackageSizePropertyName, value, Field.Index.NOT_ANALYZED); - } - else - { - errors.Add($"Unable to parse '{MetadataConstants.PackageSizePropertyName}' as Int32."); - } - } - } - - private static void AddRequiresLicenseAcceptance(Document document, IDictionary package, List errors) - { - string value; - if (package.TryGetValue(MetadataConstants.RequiresLicenseAcceptancePropertyName, out value)) - { - bool requiresLicenseAcceptance; - if (bool.TryParse(value, out requiresLicenseAcceptance)) - { - AddField(document, LuceneConstants.RequiresLicenseAcceptancePropertyName, value, Field.Index.NOT_ANALYZED); - } - else - { - errors.Add($"Unable to parse '{MetadataConstants.RequiresLicenseAcceptancePropertyName}' as Boolean."); - } - } - } - - private static void AddDependencies(Document document, IDictionary package) - { - string value; - if (package.TryGetValue(MetadataConstants.FlattenedDependenciesPropertyName, out value)) - { - AddField(document, LuceneConstants.FlattenedDependenciesPropertyName, value, Field.Index.NOT_ANALYZED); - - if (!string.IsNullOrWhiteSpace(value)) - { - using (var textWriter = new StringWriter()) - { - using (var jsonWriter = new JsonTextWriter(textWriter)) - { - jsonWriter.WriteStartArray(); - - foreach (var dependency in value.Split('|')) - { - string[] fields = dependency.Split(':'); - if (fields.Length > 0) - { - jsonWriter.WriteStartObject(); - jsonWriter.WritePropertyName("Id"); - jsonWriter.WriteValue(fields[0]); - if (fields.Length > 1) - { - jsonWriter.WritePropertyName("VersionSpec"); - jsonWriter.WriteValue(fields[1]); - } - if (fields.Length > 2) - { - jsonWriter.WritePropertyName("TargetFramework"); - jsonWriter.WriteValue(fields[2]); - } - jsonWriter.WriteEndObject(); - } - } - jsonWriter.WriteEndArray(); - jsonWriter.Flush(); - textWriter.Flush(); - string dependencies = textWriter.ToString(); - - AddField(document, LuceneConstants.DependenciesPropertyName, dependencies, Field.Index.NOT_ANALYZED); - } - } - } - } - } - - private static void AddSupportedFrameworks(Document document, IDictionary package) - { - string value; - if (package.TryGetValue(MetadataConstants.SupportedFrameworksPropertyName, out value)) - { - using (var textWriter = new StringWriter()) - { - using (var jsonWriter = new JsonTextWriter(textWriter)) - { - jsonWriter.WriteStartArray(); - foreach (var s in value.Split('|')) - { - jsonWriter.WriteValue(s); - } - jsonWriter.WriteEndArray(); - jsonWriter.Flush(); - textWriter.Flush(); - string supportedFrameworks = textWriter.ToString(); - - document.Add(new Field(LuceneConstants.SupportedFrameworksPropertyName, supportedFrameworks, Field.Store.YES, Field.Index.NOT_ANALYZED)); - } - } - } - } - - private static void DetermineLanguageBoost(Document document, IDictionary package) - { - string id; - string language; - if (package.TryGetValue(MetadataConstants.IdPropertyName, out id) && package.TryGetValue(MetadataConstants.LanguagePropertyName, out language)) - { - if (!string.IsNullOrWhiteSpace(language)) - { - string languageSuffix = "." + language.Trim(); - if (id.EndsWith(languageSuffix, StringComparison.OrdinalIgnoreCase)) - { - document.Boost = 0.1f; - } - } - document.Boost = 1.0f; - } - } - - private static void CheckErrors(List errors) - { - if (errors.Count > 0) - { - var sb = new StringBuilder(); - foreach (string error in errors) - { - sb.AppendLine(error); - } - throw new Exception(sb.ToString()); - } - } - - private static void AddField(Document document, string destination, IDictionary package, string source, Field.Index index, float boost = 1.0f) - { - string value; - if (package.TryGetValue(source, out value)) - { - AddField(document, destination, value, index, boost); - } - else if (index == Field.Index.ANALYZED) - { - /* - * Analyzed fields are those that are used in queries. There is a problem in the ParallelReader that - * cases a KeyNotFoundException to be thrown when querying for a field that does not exist in a - * document. Therefore, we add an empty value for fields that would otherwise not be present in the - * document. - */ - AddField(document, destination, string.Empty, index, boost); - } - } - - private static void AddDateField(Document document, string destination, DateTimeOffset date) - { - document.Add(new NumericField(destination, Field.Store.YES, true).SetIntValue(int.Parse(date.ToString("yyyyMMdd")))); - } - - private static void AddField(Document document, string destination, string value, Field.Index index, float boost = 1.0f) - { - var termVector = index == Field.Index.ANALYZED - ? Field.TermVector.WITH_POSITIONS_OFFSETS - : Field.TermVector.NO; - - document.Add( - new Field(destination, value, Field.Store.YES, index, termVector) - { - Boost = boost - }); - } - } -} diff --git a/src/NuGet.Indexing/Extraction/JTokenExtensions.cs b/src/NuGet.Indexing/Extraction/JTokenExtensions.cs deleted file mode 100644 index d5f35aade..000000000 --- a/src/NuGet.Indexing/Extraction/JTokenExtensions.cs +++ /dev/null @@ -1,23 +0,0 @@ -using Newtonsoft.Json.Linq; - -namespace NuGet.Indexing -{ - public static class JTokenExtensions - { - public static JArray GetJArray(this JToken token, string key) - { - var array = token[key]; - if (array == null) - { - return new JArray(); - } - - if (!(array is JArray)) - { - array = new JArray(array); - } - - return (JArray)array; - } - } -} \ No newline at end of file diff --git a/src/NuGet.Indexing/Extraction/LuceneCommitMetadata.cs b/src/NuGet.Indexing/Extraction/LuceneCommitMetadata.cs deleted file mode 100644 index a796bddba..000000000 --- a/src/NuGet.Indexing/Extraction/LuceneCommitMetadata.cs +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; - -namespace NuGet.Indexing -{ - public class LuceneCommitMetadata - { - public DateTime CommitTimeStamp { get; set; } - public string Description { get; set; } - public int Count { get; set; } - public string Trace { get; set; } - - public LuceneCommitMetadata(DateTime commitTimeStamp, string description, int count, string trace) - { - CommitTimeStamp = commitTimeStamp; - Description = description; - Count = count; - Trace = trace; - } - - public IDictionary ToDictionary() - { - IDictionary commitMetadata = new Dictionary(); - commitMetadata.Add("commitTimeStamp", CommitTimeStamp.ToString("O")); - commitMetadata.Add("commit-time-stamp", CommitTimeStamp.ToString("O")); - commitMetadata.Add("description", Description); - commitMetadata.Add("count", Count.ToString()); - commitMetadata.Add("trace", Trace); - return commitMetadata; - } - } -} \ No newline at end of file diff --git a/src/NuGet.Indexing/Extraction/MetadataConstants.cs b/src/NuGet.Indexing/Extraction/MetadataConstants.cs deleted file mode 100644 index ad417023d..000000000 --- a/src/NuGet.Indexing/Extraction/MetadataConstants.cs +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -namespace NuGet.Indexing -{ - public static class MetadataConstants - { - public static class LuceneMetadata - { - // Id Properties - public const string IdPropertyName = "Id"; - public const string IdAutocompletePropertyName = "IdAutocomplete"; - public const string ShingledIdPropertyName = "ShingledId"; - public const string TokenizedIdPropertyName = "TokenizedId"; - - // Version Properties - public const string NormalizedVersionPropertyName = "Version"; - public const string FullVersionPropertyName = "FullVersion"; - public const string VerbatimVersionPropertyName = "OriginalVersion"; - - // Date Properties - public const string LastEditedDatePropertyName = "LastEditedDate"; - public const string OriginalCreatedPropertyName = "OriginalCreated"; - public const string OriginalPublishedPropertyName = "OriginalPublished"; - public const string OriginalLastEditedPropertyName = "OriginalLastEdited"; - public const string PublishedDatePropertyName = "PublishedDate"; - - // Other Properties - public const string AuthorsPropertyName = "Authors"; - public const string CopyrightPropertyName = "Copyright"; - public const string DependenciesPropertyName = "Dependencies"; - public const string DescriptionPropertyName = "Description"; - public const string FlattenedDependenciesPropertyName = "FlattenedDependencies"; - public const string IconUrlPropertyName = "IconUrl"; - public const string LanguagePropertyName = "Language"; - public const string LicenseUrlPropertyName = "LicenseUrl"; - public const string ListedPropertyName = "Listed"; - public const string MinClientVersionPropertyName = "MinClientVersion"; - public const string PackageHashPropertyName = "PackageHash"; - public const string PackageHashAlgorithmPropertyName = "PackageHashAlgorithm"; - public const string PackageSizePropertyName = "PackageSize"; - public const string ProjectUrlPropertyName = "ProjectUrl"; - public const string ReleaseNotesPropertyName = "ReleaseNotes"; - public const string RequiresLicenseAcceptancePropertyName = "RequiresLicenseAcceptance"; - public const string SemVerLevelPropertyName = "SemVerLevel"; - public const string SortableTitlePropertyName = "SortableTitle"; - public const string SummaryPropertyName = "Summary"; - public const string SupportedFrameworksPropertyName = "SupportedFrameworks"; - public const string TagsPropertyName = "Tags"; - public const string TitlePropertyName = "Title"; - } - - // These are here to account for the minor variations when extracting data from various sources. - public static class NuPkgMetadata - { - public const string VersionPropertyName = "version"; - } - - public static class CatalogMetadata - { - public const string RequiresLicenseAcceptancePropertyName = "requireLicenseAcceptance"; - } - - // Shared Property names - public const string AuthorsPropertyName = "authors"; - public const string CopyrightPropertyName = "copyright"; - public const string CreatedPropertyName = "created"; - public const string DescriptionPropertyName = "description"; - public const string FlattenedDependenciesPropertyName = "flattenedDependencies"; - public const string IconUrlPropertyName = "iconUrl"; - public const string IconFilePropertyName = "iconFile"; - public const string IdPropertyName = "id"; - public const string LanguagePropertyName = "language"; - public const string LastEditedPropertyName = "lastEdited"; - public const string LicenseUrlPropertyName = "licenseUrl"; - public const string LicenseExpressionPropertyName = "licenseExpression"; - public const string LicenseFilePropertyName = "licenseFile"; - public const string ListedPropertyName = "listed"; - public const string MinClientVersionPropertyName = "minClientVersion"; - public const string NormalizedVersionPropertyName = "version"; - public const string PackageHashPropertyName = "packageHash"; - public const string PackageHashAlgorithmPropertyName = "packageHashAlgorithm"; - public const string PackageSizePropertyName = "packageSize"; - public const string ProjectUrlPropertyName = "projectUrl"; - public const string PublishedPropertyName = "published"; - public const string ReleaseNotesPropertyName = "releaseNotes"; - public const string RequiresLicenseAcceptancePropertyName = "requiresLicenseAcceptance"; - public const string SemVerLevelKeyPropertyName = "semVerLevelKey"; - public const string SummaryPropertyName = "summary"; - public const string SupportedFrameworksPropertyName = "supportedFrameworks"; - public const string TagsPropertyName = "tags"; - public const string TitlePropertyName = "title"; - public const string VerbatimVersionPropertyName = "verbatimVersion"; - - // Constant Values - public const string DateTimeZeroStringValue = "01/01/0001 00:00:00"; - public const string SemVerLevel2Value = "2"; - public const string HashAlgorithmValue = "SHA512"; - } -} diff --git a/src/NuGet.Indexing/IAcronymExpansionProvider.cs b/src/NuGet.Indexing/IAcronymExpansionProvider.cs deleted file mode 100644 index 26a01aac8..000000000 --- a/src/NuGet.Indexing/IAcronymExpansionProvider.cs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; - -namespace NuGet.Indexing -{ - public interface IAcronymExpansionProvider - { - IEnumerable GetKnownAcronyms(); - IEnumerable Expand(string acronym); - } -} \ No newline at end of file diff --git a/src/NuGet.Indexing/IdentifierAnalyzer.cs b/src/NuGet.Indexing/IdentifierAnalyzer.cs deleted file mode 100644 index 4003c774d..000000000 --- a/src/NuGet.Indexing/IdentifierAnalyzer.cs +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. -using Lucene.Net.Analysis; -using System.IO; - -namespace NuGet.Indexing -{ - public class IdentifierAnalyzer : Analyzer - { - public override TokenStream TokenStream(string fieldName, TextReader reader) - { - return new LowerCaseFilter( - new ExpandAcronymsFilter( - new CamelCaseFilter(new DotTokenizer(reader)), NuGetAcronymExpansionProvider.Instance)); - } - } -} diff --git a/src/NuGet.Indexing/IdentifierAutocompleteAnalyzer.cs b/src/NuGet.Indexing/IdentifierAutocompleteAnalyzer.cs deleted file mode 100644 index f286881cd..000000000 --- a/src/NuGet.Indexing/IdentifierAutocompleteAnalyzer.cs +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. -using Lucene.Net.Analysis; -using Lucene.Net.Analysis.NGram; -using System.IO; - -namespace NuGet.Indexing -{ - public class IdentifierAutocompleteAnalyzer : Analyzer - { - public override TokenStream TokenStream(string fieldName, TextReader reader) - { - return new EdgeNGramTokenFilter(new LowerCaseFilter(new CamelCaseFilter(new DotTokenizer(reader))), Side.FRONT, 1, 8); - } - } -} \ No newline at end of file diff --git a/src/NuGet.Indexing/IdentifierKeywordAnalyzer.cs b/src/NuGet.Indexing/IdentifierKeywordAnalyzer.cs deleted file mode 100644 index 180db4646..000000000 --- a/src/NuGet.Indexing/IdentifierKeywordAnalyzer.cs +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. -using Lucene.Net.Analysis; -using System.IO; - -namespace NuGet.Indexing -{ - public class IdentifierKeywordAnalyzer : Analyzer - { - public override TokenStream TokenStream(string fieldName, TextReader reader) - { - return new LowerCaseFilter(new KeywordTokenizer(reader)); - } - } -} diff --git a/src/NuGet.Indexing/NuGet.Indexing.csproj b/src/NuGet.Indexing/NuGet.Indexing.csproj index 21f677fe3..747ec49fe 100644 --- a/src/NuGet.Indexing/NuGet.Indexing.csproj +++ b/src/NuGet.Indexing/NuGet.Indexing.csproj @@ -62,51 +62,16 @@
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 3.0.3 - - - 3.0.6262.31186 - 0.3.0 runtime; build; native; contentfiles; analyzers diff --git a/src/NuGet.Indexing/NuGetAcronymExpansionProvider.cs b/src/NuGet.Indexing/NuGetAcronymExpansionProvider.cs deleted file mode 100644 index e8d30e5ca..000000000 --- a/src/NuGet.Indexing/NuGetAcronymExpansionProvider.cs +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using System.IO; -using System.Linq; -using Newtonsoft.Json; - -namespace NuGet.Indexing -{ - /// - /// This class loads a file that contains the acronyms we want to expand. - /// - /// This file could look like: - /// - /// { - /// "ef": [ - /// "ef", - /// "entity framework" - /// ] - /// } - /// - /// The above file expands "ef" into both "ef" and "entity framework". - /// - /// The acronym itself must be repeated in the list of expansions. This - /// approach allows us to "replace" acronyms as well. For example, imagine - /// we want to treat "mvc5" as just "mvc". the expansion could look like: - /// - /// { - /// "mvc5": [ - /// "mvc" - /// ] - /// } - /// - public class NuGetAcronymExpansionProvider : IAcronymExpansionProvider - { - public static readonly IAcronymExpansionProvider Instance = new NuGetAcronymExpansionProvider(); - - private static readonly Dictionary Acronyms; - - private NuGetAcronymExpansionProvider() - { - } - - static NuGetAcronymExpansionProvider() - { - var assembly = typeof(NuGetAcronymExpansionProvider).Assembly; - var assemblyName = assembly.GetName().Name; - - using (var stream = assembly.GetManifestResourceStream(assemblyName + ".Resources.Acronyms.json")) - using (var streamReader = new StreamReader(stream)) - using (var reader = new JsonTextReader(streamReader)) - { - var serializer = new JsonSerializer(); - Acronyms = serializer.Deserialize>(reader); - } - } - - public IEnumerable GetKnownAcronyms() - { - return Acronyms.Keys; - } - - public IEnumerable Expand(string acronym) - { - string[] expanded; - - if (Acronyms.TryGetValue(acronym, out expanded)) - { - return expanded; - } - - return Enumerable.Empty(); - } - } -} \ No newline at end of file diff --git a/src/NuGet.Indexing/NuGetMergePolicyApplyer.cs b/src/NuGet.Indexing/NuGetMergePolicyApplyer.cs deleted file mode 100644 index afb8e9cea..000000000 --- a/src/NuGet.Indexing/NuGetMergePolicyApplyer.cs +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using Lucene.Net.Index; - -namespace NuGet.Indexing -{ - public class NuGetMergePolicyApplyer - { - // Define the size of a file in a level (exponentially) and the count of files that constitue a level - public const int MergeFactor = 10; - - // Except never merge segments that have more docs than this - public const int MaxMergeDocs = 7999; - - // What is the size a segment must be in order to be merged? - // Don't set this too high or we'll end up with lots of small segments. - // ReSharper disable once InconsistentNaming - public const double MinMergeMB = 1.0; - - // From what segment size do we want to ignore merges? - // (or put differently: if the max segment size we want is 100 MB, MaxMergeMB should be 100 MB / 2) - // Note this does not apply when calling .Optimize() - // ReSharper disable once InconsistentNaming - public const double MaxMergeMB = 50.0; - - public static void ApplyTo(IndexWriter writer) - { - writer.MergeFactor = MergeFactor; - writer.MaxMergeDocs = MaxMergeDocs; - - var mergePolicy = new LogByteSizeMergePolicy(writer) - { - MaxMergeDocs = MaxMergeDocs, - MergeFactor = MergeFactor, - MinMergeMB = MinMergeMB, - MaxMergeMB = MaxMergeMB - }; - writer.SetMergePolicy(mergePolicy); - } - } -} \ No newline at end of file diff --git a/src/NuGet.Indexing/OwnerAnalyzer.cs b/src/NuGet.Indexing/OwnerAnalyzer.cs deleted file mode 100644 index 7e105e6f4..000000000 --- a/src/NuGet.Indexing/OwnerAnalyzer.cs +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. -using Lucene.Net.Analysis; -using System.IO; - -namespace NuGet.Indexing -{ - public class OwnerAnalyzer : Analyzer - { - public override TokenStream TokenStream(string fieldName, TextReader reader) - { - return new LowerCaseFilter(new KeywordTokenizer(reader)); - } - } -} diff --git a/src/NuGet.Indexing/PackageAnalyzer.cs b/src/NuGet.Indexing/PackageAnalyzer.cs deleted file mode 100644 index 0b2f28348..000000000 --- a/src/NuGet.Indexing/PackageAnalyzer.cs +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. -using Lucene.Net.Analysis; -using Lucene.Net.Analysis.Standard; -using System; -using System.Collections.Generic; - -namespace NuGet.Indexing -{ - public class PackageAnalyzer : PerFieldAnalyzerWrapper - { - static readonly IDictionary _fieldAnalyzers; - - static PackageAnalyzer() - { - _fieldAnalyzers = new Dictionary(StringComparer.OrdinalIgnoreCase) - { - { "Id", new IdentifierKeywordAnalyzer() }, - { "IdAutocomplete", new IdentifierAutocompleteAnalyzer() }, - { "TokenizedId", new IdentifierAnalyzer() }, - { "ShingledId", new ShingledIdentifierAnalyzer() }, - { "Version", new VersionAnalyzer() }, - { "Title", new DescriptionAnalyzer() }, - { "Description", new DescriptionAnalyzer() }, - { "Summary", new DescriptionAnalyzer() }, - { "Authors", new DescriptionAnalyzer() }, - { "Owner", new OwnerAnalyzer() }, - { "Tags", new TagsAnalyzer() } - }; - } - - public PackageAnalyzer() - : base(new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), _fieldAnalyzers) - { - } - } -} diff --git a/src/NuGet.Indexing/Resources/Acronyms.json b/src/NuGet.Indexing/Resources/Acronyms.json deleted file mode 100644 index adde3b61b..000000000 --- a/src/NuGet.Indexing/Resources/Acronyms.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "xaml": [ - "xaml" - ], - "mvc": [ - "mvc" - ], - "wpf": [ - "wpf" - ], - "ef": [ - "ef", - "entity framework" - ], - "uwp": [ - "uwp", - "universal windows platform" - ], - "uap": [ - "uwp", - "universal windows platform" - ], - "markdown": [ - "markdown" - ], - "http": [ - "http" - ], - "https": [ - "https" - ], - "ftp": [ - "ftp" - ] -} \ No newline at end of file diff --git a/src/NuGet.Indexing/SemanticVersionFilter.cs b/src/NuGet.Indexing/SemanticVersionFilter.cs deleted file mode 100644 index 928485105..000000000 --- a/src/NuGet.Indexing/SemanticVersionFilter.cs +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using Lucene.Net.Analysis; -using Lucene.Net.Analysis.Tokenattributes; -using Lucene.Net.Index; -using Lucene.Net.Search; -using NuGet.Versioning; - -namespace NuGet.Indexing -{ - public class SemanticVersionFilter : TokenFilter - { - ITermAttribute _termAttribute; - - public SemanticVersionFilter(TokenStream stream) - : base(stream) - { - _termAttribute = AddAttribute(); - } - - public override bool IncrementToken() - { - if (!input.IncrementToken()) - { - return false; - } - - string version = _termAttribute.Term; - - NuGetVersion nuGetVersion; - if (NuGetVersion.TryParse(version, out nuGetVersion)) - { - version = nuGetVersion.ToNormalizedString(); - } - - _termAttribute.SetTermBuffer(version); - - return true; - } - } -} diff --git a/src/NuGet.Indexing/ShingledIdentifierAnalyzer.cs b/src/NuGet.Indexing/ShingledIdentifierAnalyzer.cs deleted file mode 100644 index 5805051e9..000000000 --- a/src/NuGet.Indexing/ShingledIdentifierAnalyzer.cs +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. -using Lucene.Net.Analysis; -using Lucene.Net.Analysis.Shingle; -using System.IO; - -namespace NuGet.Indexing -{ - public class ShingledIdentifierAnalyzer : Analyzer - { - public override TokenStream TokenStream(string fieldName, TextReader reader) - { - return new LowerCaseFilter(new ShingleFilter(new DotTokenizer(reader))); - } - } -} diff --git a/src/NuGet.Indexing/Sql2Lucene.cs b/src/NuGet.Indexing/Sql2Lucene.cs deleted file mode 100644 index fd8a3298b..000000000 --- a/src/NuGet.Indexing/Sql2Lucene.cs +++ /dev/null @@ -1,313 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using Lucene.Net.Documents; -using Lucene.Net.Store; -using System; -using System.Collections.Generic; -using System.Data.SqlClient; -using System.Diagnostics; -using System.IO; -using System.Linq; -using System.Threading.Tasks; -using Microsoft.Extensions.Logging; -using Newtonsoft.Json.Linq; - -namespace NuGet.Indexing -{ - public class Sql2Lucene - { - static Document CreateDocument(SqlDataReader reader, IDictionary> packageFrameworks) - { - var package = new Dictionary(); - for (var i = 0; i < reader.FieldCount; i++) - { - if (!reader.IsDBNull(i)) - { - string name = reader.GetName(i); - object obj = reader.GetValue(i); - - if (name == "key") - { - var key = (int)obj; - List targetFrameworks; - if (packageFrameworks.TryGetValue(key, out targetFrameworks)) - { - package.Add("supportedFrameworks", string.Join("|", targetFrameworks)); - } - } - - var value = (obj is DateTime) ? ((DateTime)obj).ToUniversalTime().ToString("O") : obj.ToString(); - - package.Add(name, value); - } - } - - return DocumentCreator.CreateDocument(package); - } - - static string IndexBatch(string path, string connectionString, IDictionary> packageFrameworks, int beginKey, int endKey) - { - var folder = string.Format(@"{0}\index_{1}_{2}", path, beginKey, endKey); - - var directoryInfo = new DirectoryInfo(folder); - directoryInfo.Create(); - - using (var connection = new SqlConnection(connectionString)) - { - connection.Open(); - - var cmdText = @" - SELECT - Packages.[Key] 'key', - PackageRegistrations.Id 'id', - Packages.[Version] 'verbatimVersion', - Packages.NormalizedVersion 'version', - Packages.Title 'title', - Packages.Tags 'tags', - Packages.[Description] 'description', - Packages.FlattenedAuthors 'authors', - Packages.Summary 'summary', - Packages.IconUrl 'iconUrl', - Packages.ProjectUrl 'projectUrl', - Packages.MinClientVersion 'minClientVersion', - Packages.ReleaseNotes 'releaseNotes', - Packages.Copyright 'copyright', - Packages.[Language] 'language', - Packages.LicenseUrl 'licenseUrl', - Packages.RequiresLicenseAcceptance 'requireLicenseAcceptance', - Packages.[Hash] 'packageHash', - Packages.HashAlgorithm 'packageHashAlgorithm', - Packages.PackageFileSize 'packageSize', - Packages.FlattenedDependencies 'flattenedDependencies', - Packages.Created 'created', - Packages.LastEdited 'lastEdited', - Packages.Published 'published', - Packages.Listed 'listed', - Packages.SemVerLevelKey 'semVerLevelKey' - FROM Packages - INNER JOIN PackageRegistrations ON Packages.PackageRegistrationKey = PackageRegistrations.[Key] - AND Packages.[Key] >= @BeginKey - AND Packages.[Key] < @EndKey - WHERE Packages.PackageStatusKey = 0 - ORDER BY Packages.[Key] - "; - - var command = new SqlCommand(cmdText, connection); - command.CommandTimeout = (int)TimeSpan.FromMinutes(15).TotalSeconds; - command.Parameters.AddWithValue("BeginKey", beginKey); - command.Parameters.AddWithValue("EndKey", endKey); - - var reader = command.ExecuteReader(); - - var batch = 0; - - var directory = new SimpleFSDirectory(directoryInfo); - - using (var writer = DocumentCreator.CreateIndexWriter(directory, true)) - { - while (reader.Read()) - { - var document = CreateDocument(reader, packageFrameworks); - - writer.AddDocument(document); - - if (batch++ == 1000) - { - writer.Commit(); - batch = 0; - } - } - - if (batch > 0) - { - writer.Commit(); - } - } - } - - return folder; - } - - static List> CalculateBatches(string connectionString) - { - var batches = new List>(); - - using (var connection = new SqlConnection(connectionString)) - { - connection.Open(); - - string cmdText = @" - SELECT Packages.[Key] - FROM Packages - INNER JOIN PackageRegistrations ON Packages.PackageRegistrationKey = PackageRegistrations.[Key] - WHERE Packages.PackageStatusKey = 0 - ORDER BY Packages.[Key] - "; - - var command = new SqlCommand(cmdText, connection); - command.CommandTimeout = (int)TimeSpan.FromMinutes(15).TotalSeconds; - - var reader = command.ExecuteReader(); - - var list = new List(); - - while (reader.Read()) - { - list.Add(reader.GetInt32(0)); - } - - int batch = 0; - - int beginKey = list.First(); - int endKey = 0; - - foreach (int x in list) - { - endKey = x; - - if (batch++ == 50000) - { - batches.Add(Tuple.Create(beginKey, endKey)); - batch = 0; - beginKey = endKey; - } - } - - batches.Add(Tuple.Create(beginKey, endKey + 1)); - } - - return batches; - } - - static IDictionary> LoadPackageFrameworks(string connectionString) - { - var result = new Dictionary>(); - - using (var connection = new SqlConnection(connectionString)) - { - connection.Open(); - - var cmdText = @"SELECT Package_Key, TargetFramework FROM PackageFrameworks"; - - var command = new SqlCommand(cmdText, connection); - command.CommandTimeout = (int)TimeSpan.FromMinutes(15).TotalSeconds; - - var reader = command.ExecuteReader(); - - while (reader.Read()) - { - if (reader.IsDBNull(0) || reader.IsDBNull(1)) - { - continue; - } - - int packageKey = reader.GetInt32(0); - string targetFramework = reader.GetString(1); - - List targetFrameworks; - if (!result.TryGetValue(packageKey, out targetFrameworks)) - { - targetFrameworks = new List(); - result.Add(packageKey, targetFrameworks); - } - - targetFrameworks.Add(targetFramework); - } - } - - return result; - } - - public static void Export(string sourceConnectionString, Uri catalogIndexUrl, string destinationPath, ILoggerFactory loggerFactory) - { - var logger = loggerFactory.CreateLogger(); - var stopwatch = new Stopwatch(); - - // Get the commit timestamp from catalog index page for lucene index - var initTime = GetCommitTimestampFromCatalogAsync(catalogIndexUrl, logger).Result; - - stopwatch.Start(); - - var batches = CalculateBatches(sourceConnectionString); - logger.LogInformation("Calculated {BatchCount} batches (took {BatchCalculationTime} seconds)", batches.Count, stopwatch.Elapsed.TotalSeconds); - - stopwatch.Restart(); - - var packageFrameworks = LoadPackageFrameworks(sourceConnectionString); - logger.LogInformation("Loaded package frameworks (took {PackageFrameworksLoadTime} seconds)", stopwatch.Elapsed.TotalSeconds); - - stopwatch.Restart(); - - var tasks = new List>(); - foreach (var batch in batches) - { - tasks.Add(Task.Run(() => { return IndexBatch(destinationPath + @"\batches", sourceConnectionString, packageFrameworks, batch.Item1, batch.Item2); })); - } - - try - { - Task.WaitAll(tasks.ToArray()); - } - catch (AggregateException ex) - { - logger.LogError("An AggregateException occurred while running batches.", ex); - - throw; - } - - logger.LogInformation("Partition indexes generated (took {PartitionIndexGenerationTime} seconds", stopwatch.Elapsed.TotalSeconds); - - stopwatch.Restart(); - - using (var directory = new SimpleFSDirectory(new DirectoryInfo(destinationPath))) - { - using (var writer = DocumentCreator.CreateIndexWriter(directory, true)) - { - NuGetMergePolicyApplyer.ApplyTo(writer); - - var partitions = tasks.Select(t => new SimpleFSDirectory(new DirectoryInfo(t.Result))).ToArray(); - - writer.AddIndexesNoOptimize(partitions); - - foreach (var partition in partitions) - { - partition.Dispose(); - } - - writer.Commit(DocumentCreator.CreateCommitMetadata(initTime, "from SQL", writer.NumDocs(), Guid.NewGuid().ToString()) - .ToDictionary()); - } - } - - logger.LogInformation("Sql2Lucene.Export done (took {Sql2LuceneExportTime} seconds)", stopwatch.Elapsed.TotalSeconds); - - stopwatch.Reset(); - } - - private static async Task GetCommitTimestampFromCatalogAsync(Uri indexUrl, ILogger logger) - { - DateTime commitTime = DateTime.UtcNow; - try - { - using (var client = new System.Net.Http.HttpClient()) - using (var response = await client.GetAsync(indexUrl)) - { - logger.LogInformation("Fetching catalog index page: {0}", response.StatusCode); - response.EnsureSuccessStatusCode(); - - string json = response.Content.ReadAsStringAsync().Result; - JObject obj = JObject.Parse(json); - commitTime = obj["commitTimeStamp"].ToObject(); - } - } - catch (Exception ex) - { - logger.LogWarning("Error retrieving timestamp from catalog index({0})! Defaulting to current time! {1}", indexUrl.ToString(), ex); - } - - return commitTime; - } - - } -} diff --git a/src/NuGet.Indexing/TagsAnalyzer.cs b/src/NuGet.Indexing/TagsAnalyzer.cs deleted file mode 100644 index 4d8fa37a1..000000000 --- a/src/NuGet.Indexing/TagsAnalyzer.cs +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using Lucene.Net.Analysis; -using System.IO; - -namespace NuGet.Indexing -{ - public class TagsAnalyzer : Analyzer - { - public override TokenStream TokenStream(string fieldName, TextReader reader) - { - return new LowerCaseFilter(new DotTokenizer(reader)); - } - } -} diff --git a/src/NuGet.Indexing/TokenizingHelper.cs b/src/NuGet.Indexing/TokenizingHelper.cs deleted file mode 100644 index 1177c83ee..000000000 --- a/src/NuGet.Indexing/TokenizingHelper.cs +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; - -namespace NuGet.Indexing -{ - public static class TokenizingHelper - { - private static ISet _stopWords = new HashSet - { - "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "i", - "if", "in", "into", "is", "it", "its", "no", "not", "of", "on", "or", "s", "such", - "that", "the", "their", "then", "there", "these", "they", "this", "to", - "was", "we", "will", "with" - }; - - public static ISet GetStopWords() - { - return _stopWords; - } - } -} diff --git a/src/NuGet.Indexing/VersionAnalyzer.cs b/src/NuGet.Indexing/VersionAnalyzer.cs deleted file mode 100644 index 1b6d4d2f9..000000000 --- a/src/NuGet.Indexing/VersionAnalyzer.cs +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. -using Lucene.Net.Analysis; -using System.IO; - -namespace NuGet.Indexing -{ - public class VersionAnalyzer : Analyzer - { - public override TokenStream TokenStream(string fieldName, TextReader reader) - { - return new SemanticVersionFilter(new KeywordTokenizer(reader)); - } - } -} diff --git a/src/NuGet.Services.SearchService/NuGet.Services.SearchService.csproj b/src/NuGet.Services.SearchService/NuGet.Services.SearchService.csproj index e8d53d943..ba5e75eca 100644 --- a/src/NuGet.Services.SearchService/NuGet.Services.SearchService.csproj +++ b/src/NuGet.Services.SearchService/NuGet.Services.SearchService.csproj @@ -129,18 +129,6 @@ - - - - - - - - - - - - diff --git a/tests/NgTests/NgTests.csproj b/tests/NgTests/NgTests.csproj index 7e4b6bcdc..bec520a57 100644 --- a/tests/NgTests/NgTests.csproj +++ b/tests/NgTests/NgTests.csproj @@ -88,13 +88,11 @@ - - @@ -192,9 +190,6 @@ - - 3.0.3 - 4.10.1 diff --git a/tests/NgTests/SearchIndexFromCatalogCollectorTests.cs b/tests/NgTests/SearchIndexFromCatalogCollectorTests.cs deleted file mode 100644 index 040680988..000000000 --- a/tests/NgTests/SearchIndexFromCatalogCollectorTests.cs +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.IO; -using System.Linq; -using System.Net; -using System.Net.Http; -using System.Threading; -using System.Threading.Tasks; -using Lucene.Net.Store; -using Moq; -using Ng; -using Ng.Jobs; -using NgTests.Data; -using NgTests.Infrastructure; -using NuGet.Services.Metadata.Catalog; -using Xunit; -using Xunit.Abstractions; -using Constants = NuGet.IndexingTests.TestSupport.Constants; - -namespace NgTests -{ - public class SearchIndexFromCatalogCollectorTests - { - private readonly ITestOutputHelper _testOutputHelper; - - public SearchIndexFromCatalogCollectorTests(ITestOutputHelper testOutputHelper) - { - _testOutputHelper = testOutputHelper; - } - - [Theory] - [InlineData("/data/2015.10.12.10.08.54/unlistedpackage.1.0.0.json", null)] - [InlineData("/data/2015.10.12.10.08.55/listedpackage.1.0.1.json", "2015-10-12T10:08:54.1506742Z")] - [InlineData("/data/2015.10.12.10.08.55/anotherpackage.1.0.0.json", "2015-10-12T10:08:54.1506742Z")] - public async Task DoesNotSkipPackagesWhenExceptionOccurs(string catalogUri, string expectedCursorBeforeRetry) - { - // Arrange - var storage = new MemoryStorage(); - var storageFactory = new TestStorageFactory(name => storage.WithName(name)); - - MockServerHttpClientHandler mockServer; - mockServer = new MockServerHttpClientHandler(); - mockServer.SetAction("/", request => Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK))); - - var catalogStorage = Catalogs.CreateTestCatalogWithCommitThenTwoPackageCommit(); - await mockServer.AddStorageAsync(catalogStorage); - - // Make the first request for a catalog leaf node fail. This will cause the registration collector - // to fail the first time but pass the second time. - FailFirstRequest(mockServer, catalogUri); - - expectedCursorBeforeRetry = expectedCursorBeforeRetry ?? MemoryCursor.MinValue.ToString("O"); - - ReadWriteCursor front = new DurableCursor( - storage.ResolveUri("cursor.json"), - storage, - MemoryCursor.MinValue); - ReadCursor back = MemoryCursor.CreateMax(); - - var telemetryService = new Mock(); - var indexCommitDurationMetric = new Mock(); - telemetryService.Setup(t => t.TrackIndexCommitDuration()).Returns(indexCommitDurationMetric.Object); - - using (var testDirectory = TestDirectory.Create()) - { - var luceneDirectory = new SimpleFSDirectory(new DirectoryInfo(testDirectory)); - using (var indexWriter = Catalog2LuceneJob.CreateIndexWriter(luceneDirectory)) - { - var target = new SearchIndexFromCatalogCollector( - new Uri("http://tempuri.org/index.json"), - indexWriter, - commitEachBatch: true, - commitTimeout: Timeout.InfiniteTimeSpan, - baseAddress: null, - galleryBaseAddress: null, - flatContainerBaseAddress: new Uri("http://test"), - flatContainerContainerName: "fc", - telemetryService: telemetryService.Object, - logger: new TestLogger(_testOutputHelper), - handlerFunc: () => mockServer, - httpRetryStrategy: new NoRetryStrategy()); - - // Act - await Assert.ThrowsAsync(() => target.RunAsync(front, back, CancellationToken.None)); - var cursorBeforeRetry = front.Value; - await target.RunAsync(front, back, CancellationToken.None); - var cursorAfterRetry = front.Value; - - // Assert - var reader = indexWriter.GetReader(); - var documents = Enumerable - .Range(0, reader.NumDeletedDocs + reader.NumDocs()) - .Where(i => !reader.IsDeleted(i)) - .Select(i => reader.Document(i)) - .ToList(); - Assert.Equal(4, documents.Count); - - var documentsByType = documents - .ToLookup(doc => doc - .fields_ForNUnit - .FirstOrDefault(f => f.Name == "@type")? - .StringValue); - var commitDocuments = documentsByType[Schema.DataTypes.CatalogInfastructure.AbsoluteUri.ToString()].ToList(); - var packageDocuments = documentsByType[null].ToList(); - Assert.Single(commitDocuments); - Assert.Equal(3, packageDocuments.Count); - - Assert.Equal( - "UnlistedPackage", - packageDocuments[0].fields_ForNUnit.FirstOrDefault(x => x.Name == Constants.LucenePropertyId)?.StringValue); - Assert.Equal( - "ListedPackage", - packageDocuments[1].fields_ForNUnit.FirstOrDefault(x => x.Name == Constants.LucenePropertyId)?.StringValue); - Assert.Equal( - "AnotherPackage", - packageDocuments[2].fields_ForNUnit.FirstOrDefault(x => x.Name == Constants.LucenePropertyId)?.StringValue); - - Assert.Equal(DateTime.Parse(expectedCursorBeforeRetry).ToUniversalTime(), cursorBeforeRetry); - Assert.Equal(DateTime.Parse("2015-10-12T10:08:55.3335317Z").ToUniversalTime(), cursorAfterRetry); - - telemetryService.Verify(t => t.TrackIndexCommitDuration(), Times.Exactly(2)); - telemetryService.Verify(t => t.TrackIndexCommitTimeout(), Times.Never); - indexCommitDurationMetric.Verify(m => m.Dispose(), Times.Exactly(2)); - } - } - } - - private void FailFirstRequest(MockServerHttpClientHandler mockServer, string relativeUri) - { - var originalAction = mockServer.Actions[relativeUri]; - var hasFailed = false; - Func> failFirst = request => - { - if (!hasFailed) - { - hasFailed = true; - throw new HttpRequestException("Simulated HTTP failure."); - } - - return originalAction(request); - }; - mockServer.SetAction(relativeUri, failFirst); - } - } -} \ No newline at end of file diff --git a/tests/NgTests/StuckIndexWriter.cs b/tests/NgTests/StuckIndexWriter.cs deleted file mode 100644 index 0256927b3..000000000 --- a/tests/NgTests/StuckIndexWriter.cs +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Diagnostics; -using Lucene.Net.Analysis; -using Lucene.Net.Index; -using Lucene.Net.Store; - -namespace NgTests -{ - public class StuckIndexWriter : IndexWriter - { - private readonly TimeSpan _stuckDuration; - - private StuckIndexWriter(Directory directory, Analyzer analyzer, TimeSpan stuckDuration) - : base(directory, analyzer, MaxFieldLength.UNLIMITED) - { - _stuckDuration = stuckDuration; - } - - public static IndexWriter FromIndexWriter(IndexWriter originalWriter, TimeSpan stuckDuration) - { - var directory = originalWriter.Directory; - var analyzer = originalWriter.Analyzer; - originalWriter.Dispose(); - - return new StuckIndexWriter(directory, analyzer, stuckDuration); - } - - public override void ExpungeDeletes() - { - var stopwatch = Stopwatch.StartNew(); - - while (stopwatch.Elapsed < _stuckDuration) - { - } - - stopwatch.Stop(); - } - } -} diff --git a/tests/NuGet.IndexingTests/CamelCaseFilterTests.cs b/tests/NuGet.IndexingTests/CamelCaseFilterTests.cs deleted file mode 100644 index 908b43748..000000000 --- a/tests/NuGet.IndexingTests/CamelCaseFilterTests.cs +++ /dev/null @@ -1,252 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using System.IO; -using System.Linq; -using Lucene.Net.Analysis.Standard; -using Lucene.Net.Util; -using NuGet.Indexing; -using NuGet.IndexingTests.TestSupport; -using Xunit; - -namespace NuGet.IndexingTests -{ - public class CamelCaseFilterTests - { - [Theory] - [MemberData(nameof(SplitterSamples))] - public void SplitterTests(string input, string[] expected) - { - var splits = CamelCaseFilter.CamelCaseSplit(input); - - Assert.Equal(expected, splits); - } - - public static IEnumerable SplitterSamples - { - get - { - yield return new object[] - { - "DotNetZipFoo", - new[] { "Dot", "Net", "Zip", "Foo" }, - }; - - // shingle depth two with three terms - yield return new object[] - { - "DotNetZip", - new[] { "Dot", "Net", "Zip"}, - }; - - // two terms - yield return new object[] - { - "DotNet", - new[] { "Dot", "Net" }, - }; - - // one term - yield return new object[] - { - "Dot", - new[] { "Dot" }, - }; - - // empty query - yield return new object[] - { - string.Empty, - new object[0], - }; - - // maintain case - yield return new object[] - { - "DOT", - new[] { "DOT" }, - }; - - // camel case transition is only when the characters go from lowercase to uppercase - yield return new object[] - { - "DOTNet", - new[] { "DOTNet" }, - }; - - // one character camel case - yield return new object[] - { - "DotN", - new[] { "Dot", "N" }, - }; - - // Split on number - yield return new object[] - { - "Mvc5", - new[] { "Mvc", "5" }, - }; - - // Split on two numbers - yield return new object[] - { - "Log4Net", - new[] { "Log", "4", "Net" }, - }; - - // Split on two numbers and one at the end - yield return new object[] - { - "Log4Net5", - new[] { "Log", "4", "Net", "5" }, - }; - - // Split on more than one digit number - yield return new object[] - { - "Log44Net", - new[] { "Log", "44", "Net" }, - }; - } - } - - [Theory] - [MemberData(nameof(TokenizingReturnsExpectedTermAndOffsetsData))] - public void TokenizingReturnsExpectedTermAndOffsets(string text, TokenAttributes[] expected) - { - // arrange - var tokenStream = new StandardTokenizer(Version.LUCENE_30, new StringReader(text)); - var filter = new CamelCaseFilter(tokenStream); - - // act - var actual = filter.Tokenize().ToArray(); - - // assert - Assert.Equal(expected, actual); - } - - public static IEnumerable TokenizingReturnsExpectedTermAndOffsetsData - { - get - { - // shingle depth two with four terms - yield return new object[] - { - "DotNetZipFoo", - new[] - { - new TokenAttributes("DotNetZipFoo", 0, 12, 1), - new TokenAttributes("Dot", 0, 3, 0), - new TokenAttributes("DotNet", 0, 6, 0), - new TokenAttributes("Net", 3, 6, 1), - new TokenAttributes("NetZip", 3, 9, 0), - new TokenAttributes("Zip", 6, 9, 1), - new TokenAttributes("ZipFoo", 6, 12, 0), - new TokenAttributes("Foo", 9, 12, 1) - } - }; - - // shingle depth two with three terms - yield return new object[] - { - "DotNetZip", - new[] - { - new TokenAttributes("DotNetZip", 0, 9, 1), - new TokenAttributes("Dot", 0, 3, 0), - new TokenAttributes("DotNet", 0, 6, 0), - new TokenAttributes("Net", 3, 6, 1), - new TokenAttributes("NetZip", 3, 9, 0), - new TokenAttributes("Zip", 6, 9, 1), - } - }; - - // two terms - yield return new object[] - { - "DotNet", - new[] - { - new TokenAttributes("DotNet", 0, 6, 1), - new TokenAttributes("Dot", 0, 3, 0), - new TokenAttributes("Net", 3, 6, 1) - } - }; - - // one term - yield return new object[] - { - "Dot", - new[] - { - new TokenAttributes("Dot", 0, 3, 1) - } - }; - - // empty query - yield return new object[] - { - string.Empty, - new object[0] - }; - - // maintain case - yield return new object[] - { - "DOT", - new[] - { - new TokenAttributes("DOT", 0, 3, 1) - } - }; - - // camel case transition is only when the characters go from lowercase to uppercase - yield return new object[] - { - "DOTNet", - new[] - { - new TokenAttributes("DOTNet", 0, 6, 1) - } - }; - - // one character camel case - yield return new object[] - { - "DotN", - new[] - { - new TokenAttributes("DotN", 0, 4, 1), - new TokenAttributes("Dot", 0, 3, 0), - new TokenAttributes("N", 3, 4, 1) - } - }; - - yield return new object[] - { - "Mvc5", - new[] - { - new TokenAttributes("Mvc5", 0, 4, 1), - new TokenAttributes("Mvc", 0, 3 ,0 ), - } - }; - - yield return new object[] - { - "Log4Net", - new[] - { - new TokenAttributes("Log4Net", 0, 7, 1), - new TokenAttributes("Log", 0, 3, 0), - new TokenAttributes("Log4", 0, 4, 0), - new TokenAttributes("4Net", 3, 7, 0), - new TokenAttributes("Net", 4, 7, 1), - } - }; - } - } - } -} diff --git a/tests/NuGet.IndexingTests/DescriptionAnalyzerTests.cs b/tests/NuGet.IndexingTests/DescriptionAnalyzerTests.cs deleted file mode 100644 index 0fcc415b5..000000000 --- a/tests/NuGet.IndexingTests/DescriptionAnalyzerTests.cs +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using NuGet.Indexing; -using NuGet.IndexingTests.TestSupport; -using Xunit; - -namespace NuGet.IndexingTests -{ - public class DescriptionAnalyzerTests - { - [Theory] - [MemberData(nameof(TokenizerLowercasesCamelCasesAndRemovesStopWordsInputData))] - public void TokenizerLowercasesCamelCasesAndRemovesStopWordsInput(string text, TokenAttributes[] expected) - { - // arrange, act - var actual = new DescriptionAnalyzer().Tokenize(text); - - // assert - Assert.Equal(expected, actual); - } - - [Theory] - [MemberData(nameof(TokenizerRemovesCorrectStopWordsData))] - public void TokenizerRemovesCorrectStopWords(string stopWord) - { - // arrange, act - var text = string.Format("stop {0} word", stopWord); - var actual = new DescriptionAnalyzer().Tokenize(text); - var expected = new[] - { - new TokenAttributes("stop", 0, 4, 1), - new TokenAttributes("word", 6 + stopWord.Length, 10 + stopWord.Length, 2) - }; - - // assert - Assert.Equal(expected, actual); - } - - public static IEnumerable TokenizerLowercasesCamelCasesAndRemovesStopWordsInputData - { - get - { - // split by DotTokenizer - yield return new object[] - { - "Split sentence.", - new[] - { - new TokenAttributes("split", 0, 5, 1), - new TokenAttributes("sentence", 6, 14, 1) - } - }; - - // split on camel case - yield return new object[] - { - "DotNet", - new[] - { - new TokenAttributes("dotnet", 0, 6, 1), - new TokenAttributes("dot", 0, 3, 0), - new TokenAttributes("net", 3, 6, 1) - } - }; - - // lower case - yield return new object[] - { - "D", - new[] - { - new TokenAttributes("d", 0, 1, 1) - } - }; - - // remove stop words - yield return new object[] - { - "This is a sentence full of stop words.", - new[] - { - new TokenAttributes("sentence", 10, 18, 4), - new TokenAttributes("full", 19, 23, 1), - new TokenAttributes("stop", 27, 31, 2), - new TokenAttributes("words", 32, 37, 1) - } - }; - - // combined - yield return new object[] - { - "This is a half-baked sentence is describing DotNet.", - new[] - { - new TokenAttributes("half", 10, 14, 4), - new TokenAttributes("baked", 15, 20, 1), - new TokenAttributes("sentence", 21, 29, 1), - new TokenAttributes("describing", 33, 43, 2), - new TokenAttributes("dotnet", 44, 50, 1), - new TokenAttributes("dot", 44, 47, 0), - new TokenAttributes("net", 47, 50, 1) - } - }; - } - } - - public static IEnumerable TokenizerRemovesCorrectStopWordsData - { - get - { - yield return new object[] { "a" }; - yield return new object[] { "an" }; - yield return new object[] { "and" }; - yield return new object[] { "are" }; - yield return new object[] { "as" }; - yield return new object[] { "at" }; - yield return new object[] { "be" }; - yield return new object[] { "but" }; - yield return new object[] { "by" }; - yield return new object[] { "for" }; - yield return new object[] { "i" }; - yield return new object[] { "if" }; - yield return new object[] { "in" }; - yield return new object[] { "into" }; - yield return new object[] { "is" }; - yield return new object[] { "it" }; - yield return new object[] { "its" }; - yield return new object[] { "no" }; - yield return new object[] { "not" }; - yield return new object[] { "of" }; - yield return new object[] { "on" }; - yield return new object[] { "or" }; - yield return new object[] { "s" }; - yield return new object[] { "such" }; - yield return new object[] { "that" }; - yield return new object[] { "the" }; - yield return new object[] { "their" }; - yield return new object[] { "then" }; - yield return new object[] { "there" }; - yield return new object[] { "these" }; - yield return new object[] { "they" }; - yield return new object[] { "this" }; - yield return new object[] { "to" }; - yield return new object[] { "was" }; - yield return new object[] { "we" }; - yield return new object[] { "will" }; - yield return new object[] { "with" }; - } - } - } -} diff --git a/tests/NuGet.IndexingTests/DotTokenizerTests.cs b/tests/NuGet.IndexingTests/DotTokenizerTests.cs deleted file mode 100644 index 58ab531ef..000000000 --- a/tests/NuGet.IndexingTests/DotTokenizerTests.cs +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using System.IO; -using System.Linq; -using NuGet.Indexing; -using NuGet.IndexingTests.TestSupport; -using Xunit; - -namespace NuGet.IndexingTests -{ - public class DotTokenizerTests - { - [Theory] - [MemberData(nameof(SplitsTextIntoTokensOnCorrectCharactersData))] - public void SplitsTextIntoTokensOnCorrectCharacters(char seperator) - { - // arrange - var text = $"Dot{seperator}NET"; - var tokenizer = new DotTokenizer(new StringReader(text)); - var expected = new[] { new TokenAttributes("Dot", 0, 3), new TokenAttributes("NET", 4, 7) }; - - // act - var actual = tokenizer.Tokenize().ToArray(); - - // assert - Assert.Equal(expected, actual); - } - - public static IEnumerable SplitsTextIntoTokensOnCorrectCharactersData - { - get - { - yield return new object[] { ' ' }; - yield return new object[] { '\t' }; - yield return new object[] { '\r' }; - yield return new object[] { '\n' }; - yield return new object[] { '.' }; - yield return new object[] { '-' }; - yield return new object[] { ',' }; - yield return new object[] { ';' }; - yield return new object[] { ':' }; - yield return new object[] { '\'' }; - yield return new object[] { '*' }; - yield return new object[] { '#' }; - yield return new object[] { '!' }; - yield return new object[] { '~' }; - yield return new object[] { '+' }; - yield return new object[] { '-' }; - yield return new object[] { '(' }; - yield return new object[] { ')' }; - yield return new object[] { '[' }; - yield return new object[] { ']' }; - yield return new object[] { '{' }; - yield return new object[] { '}' }; - } - } - - } -} diff --git a/tests/NuGet.IndexingTests/ExpandAcronymsFilterTests.cs b/tests/NuGet.IndexingTests/ExpandAcronymsFilterTests.cs deleted file mode 100644 index 244acec1c..000000000 --- a/tests/NuGet.IndexingTests/ExpandAcronymsFilterTests.cs +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using System.IO; -using System.Linq; -using Lucene.Net.Analysis.Standard; -using Lucene.Net.Util; -using NuGet.Indexing; -using NuGet.IndexingTests.TestSupport; -using Xunit; - -namespace NuGet.IndexingTests -{ - public class ExpandAcronymsFilterTests - { - [Theory] - [MemberData(nameof(TokenizingReturnsExpectedTermsData))] - public void TokenizingReturnsExpectedTerms(string text, TokenAttributes[] expected) - { - // Arrange - var tokenStream = new StandardTokenizer(Version.LUCENE_30, new StringReader(text)); - var filter = new ExpandAcronymsFilter(tokenStream, NuGetAcronymExpansionProvider.Instance); - - // Act - var actual = filter.Tokenize().ToArray(); - - // Assert - Assert.Equal(expected, actual); - } - - [Theory] - [InlineData("foobar", "foo", "bar")] - [InlineData("foobarfoo", "foo", "bar")] - [InlineData("foobarfoobar", "foo", "barbar")] - [InlineData("fOObar", "foo", "bar")] - [InlineData("fOObarFOO", "foo", "bar")] - [InlineData("FOObarfoobar", "foo", "barbar")] - [InlineData("fooBAR", "foo", "BAR")] - [InlineData("fooBAR", "FOO", "BAR")] - [InlineData("FOOBAR", "foo", "BAR")] - public void RemoveSubstringRemovesSubstringFromString(string original, string substringToRemove, string expected) - { - // Act - var result = ExpandAcronymsFilter.RemoveSubstring(original, substringToRemove); - - // Assert - Assert.Equal(expected, result); - } - - public static IEnumerable TokenizingReturnsExpectedTermsData - { - get - { - yield return new object[] - { - "xamlbehaviors", - new[] - { - new TokenAttributes("xamlbehaviors", 0, 13, 1), - new TokenAttributes("xaml", 0, 13, 0), - new TokenAttributes("behaviors", 0, 13, 0) - } - }; - - yield return new object[] - { - "uwpef", - new[] - { - new TokenAttributes("uwpef", 0, 5, 1), - new TokenAttributes("ef", 0, 5, 0), - new TokenAttributes("entity framework", 0, 5, 0), - new TokenAttributes("uwp", 0, 5, 0), - new TokenAttributes("universal windows platform", 0, 5, 0) - } - }; - - yield return new object[] - { - "mvc5", - new[] - { - new TokenAttributes("mvc5", 0, 4, 1), - new TokenAttributes("mvc", 0, 4, 0), - new TokenAttributes("5", 0, 4, 0), - } - }; - } - } - } -} \ No newline at end of file diff --git a/tests/NuGet.IndexingTests/Extraction/CatalogPackageMetadataExtractorTests.cs b/tests/NuGet.IndexingTests/Extraction/CatalogPackageMetadataExtractorTests.cs deleted file mode 100644 index 9cb562aaf..000000000 --- a/tests/NuGet.IndexingTests/Extraction/CatalogPackageMetadataExtractorTests.cs +++ /dev/null @@ -1,793 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; -using System.Linq; -using Newtonsoft.Json.Linq; -using NuGet.Indexing; -using Xunit; - -namespace NuGet.IndexingTests.Extraction -{ - public class CatalogPackageMetadataExtractorTests - { - [Fact] - public void ThrowsWhenCatalogItemIsNull() - { - var ex = Assert.Throws(() => - CatalogPackageMetadataExtraction.MakePackageMetadata( - catalogItem: null, - galleryBaseAddress: new Uri("https://test"), - flatContainerBaseAddress: new Uri("https://test"), - flatContainerContainerName: "fc")); - - Assert.Equal("catalogItem", ex.ParamName); - } - - [Fact] - public void DoesNotThrowWhenGalleryBaseUrlIsNull() - { - var ex = Record.Exception(() => - CatalogPackageMetadataExtraction.MakePackageMetadata( - catalogItem: CatalogEntry(new { }), - galleryBaseAddress: null, - flatContainerBaseAddress: new Uri("https://test"), - flatContainerContainerName: "fc")); - Assert.Null(ex); - } - - [Fact] - public void ThrowsWhenFlatContainerBaseAddressIsNull() - { - var ex = Assert.Throws(() => - CatalogPackageMetadataExtraction.MakePackageMetadata( - catalogItem: CatalogEntry(new { }), - galleryBaseAddress: new Uri("https://test"), - flatContainerBaseAddress: null, - flatContainerContainerName: "fc")); - - Assert.Equal("flatContainerBaseAddress", ex.ParamName); - } - - [Fact] - public void ThrowsWhenFlatContainerContainerNameIsNull() - { - var ex = Assert.Throws(() => - CatalogPackageMetadataExtraction.MakePackageMetadata( - catalogItem: CatalogEntry(new { }), - galleryBaseAddress: new Uri("https://test"), - flatContainerBaseAddress: new Uri("https://test"), - flatContainerContainerName: null)); - - Assert.Equal("flatContainerContainerName", ex.ParamName); - } - - [Theory, MemberData(nameof(AddsListedData))] - public void AddsListed(object catalogEntry, string expected) - { - // Arrange - var catalogEntryJObject = CatalogEntry(catalogEntry); - - // Act - var metadata = CatalogPackageMetadataExtraction.MakePackageMetadata(catalogEntryJObject, null, new Uri("https://test"), "fc"); - - // Assert - Assert.Contains(MetadataConstants.ListedPropertyName, metadata.Keys); - Assert.Equal(expected, metadata[MetadataConstants.ListedPropertyName]); - } - - [Theory, MemberData(nameof(AddsSemVerLevelKeyData))] - public void AddsSemVerLevelKey(object catalogEntry, bool expectedToContainKey, string expected) - { - // Arrange - var catalogEntryJObject = CatalogEntry(catalogEntry); - - // Act - var metadata = CatalogPackageMetadataExtraction.MakePackageMetadata(catalogEntryJObject, null, new Uri("https://test"), "fc"); - - - // Assert - Assert.Equal(expectedToContainKey, metadata.Keys.Contains(MetadataConstants.SemVerLevelKeyPropertyName)); - if (expectedToContainKey) - { - Assert.Equal(expected, metadata[MetadataConstants.SemVerLevelKeyPropertyName]); - } - } - - [Theory, MemberData(nameof(AddsSupportedFrameworksData))] - public void AddsSupportedFrameworks(object catalogEntry, string expected) - { - // Arrange - var catalogEntryJObject = CatalogEntry(catalogEntry); - - // Act - var metadata = CatalogPackageMetadataExtraction.MakePackageMetadata(catalogEntryJObject, null, new Uri("https://test"), "fc"); - - // Assert - if (expected != null) - { - Assert.Contains(MetadataConstants.SupportedFrameworksPropertyName, metadata.Keys); - Assert.Equal(expected.Split('|').OrderBy(f => f), metadata[MetadataConstants.SupportedFrameworksPropertyName].Split('|').OrderBy(f => f)); - } - else - { - Assert.DoesNotContain(MetadataConstants.SupportedFrameworksPropertyName, metadata.Keys); - } - } - - [Theory, MemberData(nameof(AddsFlattenedDependenciesData))] - public void AddsFlattenedDependencies(object catalogEntry, string expected) - { - // Arrange - var catalogEntryJObject = CatalogEntry(catalogEntry); - - // Act - var metadata = CatalogPackageMetadataExtraction.MakePackageMetadata(catalogEntryJObject, null, new Uri("https://test"), "fc"); - - // Assert - Assert.Contains(MetadataConstants.FlattenedDependenciesPropertyName, metadata.Keys); - Assert.Equal(expected, metadata[MetadataConstants.FlattenedDependenciesPropertyName]); - } - - [Fact] - public void AllowsMissingVerbatimVersion() - { - // Arrange - // We add the invalid portable package entry folder name since this causes a failure which reads the ID and - // version from the generated .nuspec. - var catalogEntryJObject = JObject.FromObject(new - { - id = "NuGet.Versioning", - version = "4.6.0", - packageEntries = new object[] - { - new { fullName = "lib/net45/something.dll" }, - new { fullName = "lib/portable-win-wpa8/something-else.dll" }, - }, - }); - - // Act - var metadata = CatalogPackageMetadataExtraction.MakePackageMetadata(catalogEntryJObject, null, new Uri("https://test"), "fc"); - - // Assert - Assert.Equal(new[] { "id", "listed", "version" }, metadata.Keys.OrderBy(x => x)); - Assert.Equal("4.6.0", metadata["version"]); - } - - [Theory, MemberData(nameof(AddsLicensesData))] - public void AddsLicensesUrl(object catalogEntry, Uri galleryBaseAddress, string expectedLicenseurl) - { - // Arrange - var catalogEntryJObject = CatalogEntry(catalogEntry); - - // Act - var metadata = CatalogPackageMetadataExtraction.MakePackageMetadata(catalogEntryJObject, galleryBaseAddress, new Uri("https://test"), "fc"); - - // Assert - Assert.Contains(MetadataConstants.LicenseUrlPropertyName, metadata.Keys); - Assert.Equal(expectedLicenseurl, metadata[MetadataConstants.LicenseUrlPropertyName]); - } - - [Theory] - [InlineData("testPackage", "1.0.0", null, null, "https://fc.test", "fc", null)] - [InlineData("testPackage", "1.0.0", "http://icon.test", null, "https://fc.test", "fc", "http://icon.test")] - [InlineData("testPackage", "1.0.0", "", null, "https://fc.test", "fc", "")] - [InlineData("testPackage", "1.0.0", null, "iconfile", "https://fc.test", "fc", "https://fc.test/fc/testpackage/1.0.0/icon")] - [InlineData("testPackage", "1.0.0", null, "", "https://fc.test", "fc", null)] - [InlineData("testPackage", "1.0.0", "http://icon.test", "iconfile", "https://fc.test", "fc", "https://fc.test/fc/testpackage/1.0.0/icon")] - [InlineData("testPackage", "1.0.0", "", "", "https://fc.test", "fc", "")] - public void AddsIconUrl(string packageId, string packageVersion, string iconUrl, string iconFile, string flatContainerBase, string flatContainerContainerName, string expectedIconUrl) - { - var catalogEntryJObject = CatalogEntry(GetCatalogObject(packageId, packageVersion, iconUrl, iconFile)); - - var metadata = CatalogPackageMetadataExtraction.MakePackageMetadata(catalogEntryJObject, null, new Uri(flatContainerBase), flatContainerContainerName); - - if (expectedIconUrl == null) - { - Assert.False(metadata.ContainsKey(MetadataConstants.IconUrlPropertyName)); - } - else - { - Assert.True(metadata.ContainsKey(MetadataConstants.IconUrlPropertyName)); - Assert.Equal(expectedIconUrl, metadata[MetadataConstants.IconUrlPropertyName]); - } - } - - private static object GetCatalogObject(string packageId, string packageVersion, string iconUrl, string iconFile) - { - if (iconUrl == null && iconFile == null) - { - return new { id = packageId, version = packageVersion }; - } - if (iconUrl == null && iconFile != null) - { - return new { id = packageId, version = packageVersion, iconFile }; - } - if (iconUrl != null && iconFile == null) - { - return new { id = packageId, version = packageVersion, iconUrl }; - } - - return new { id = packageId, version = packageVersion, iconUrl, iconFile }; - } - - [Theory] - [InlineData("testPackage", "1.0.0", null, null, null)] - [InlineData("testPackage", "1.0.0", null, null, "https://testnuget/")] - [InlineData("testPackage", "1.0.0", "MIT", null, null)] - [InlineData("testPackage", "1.0.0", null, "license.txt", null)] - [InlineData(null, "1.0.0", "MIT", null, "https://testnuget/")] - [InlineData("testPackage", null, "MIT", null, "https://testnuget/")] - [InlineData(null, "1.0.0", null, "license.txt", "https://testnuget/")] - [InlineData("testPackage", null, null, "license.txt", "https://testnuget/")] - public void AddsNoLicensesUrl(string packageId, string packageVersion, string licenseExpression, string licenseFile, string galleryBaseAddress) - { - // Arrange - var catalogEntryJObject = CatalogEntry(new { id = packageId, version = packageVersion, licenseExpression, licenseFile }); - - // Act - var metadata = CatalogPackageMetadataExtraction.MakePackageMetadata(catalogEntryJObject, galleryBaseAddress == null ? null : new Uri(galleryBaseAddress), new Uri("https://test"), "fc"); - - // Assert - Assert.False(metadata.ContainsKey(MetadataConstants.LicenseUrlPropertyName)); - } - - public static IEnumerable AddsListedData - { - get - { - yield return new object[] { new { }, "true" }; - yield return new object[] { new { listed = (string)null }, "true" }; - yield return new object[] { new { listed = "TRUE" }, "TRUE" }; - yield return new object[] { new { listed = "False" }, "False" }; - yield return new object[] { new { listed = "Bad" }, "Bad" }; // validation is not done at this stage - yield return new object[] { new { published = "1900-01-01T00:00:00" }, "false" }; - yield return new object[] { new { published = "1900-01-02T00:00:00" }, "true" }; - yield return new object[] { new { published = "1900-01-01T00:00:00", listed = "True" }, "True" }; - } - } - - public static IEnumerable AddsSemVerLevelKeyData - { - get - { - // no dependencies - yield return new object[] { new { verbatimVersion = "1.0.0" }, false, null }; - yield return new object[] { new { verbatimVersion = "1.0.0-semver1" }, false, null }; - yield return new object[] { new { verbatimVersion = "1.0.0-semver2.0" }, true, "2" }; - yield return new object[] { new { verbatimVersion = "1.0.0-semver2.0+again" }, true, "2" }; - yield return new object[] { new { verbatimVersion = "1.0.0+aThirdTime" }, true, "2" }; - - // dependencies - yield return new object[] - { - new - { - verbatimVersion = "1.0.0", - dependencyGroups = new object[] - { - new - { - dependencies = new object[] - { - new { id = "Newtonsoft.Json", range = "4.5.11" }, - new { id = "Microsoft.Data.OData", range = "5.6.2" } - } - } - }, - }, - false, - null - }; - - yield return new object[] - { - new - { - verbatimVersion = "1.0.0+semver2", - dependencyGroups = new object[] - { - new - { - dependencies = new object[] - { - new { id = "Newtonsoft.Json", range = "4.5.11" }, - new { id = "Microsoft.Data.OData", range = "5.6.2" } - } - } - }, - }, - true, - "2" - }; - - // dependencies show semver2 - yield return new object[] - { - new - { - verbatimVersion = "1.0.0", - dependencyGroups = new object[] - { - new - { - dependencies = new object[] - { - new { id = "Newtonsoft.Json", range = "4.5.11-semver2.0.dep" }, - new { id = "Microsoft.Data.OData", range = "5.6.2" } - } - } - }, - }, - true, - "2" - }; - - yield return new object[] - { - new - { - verbatimVersion = "1.0.0", - dependencyGroups = new object[] - { - new - { - dependencies = new object[] - { - new { id = "Newtonsoft.Json", range = "4.5.11-semver2.0.dep+meta" }, - new { id = "Microsoft.Data.OData", range = "5.6.2" } - } - } - }, - }, - true, - "2" - }; - - // semver2 in real ranges - yield return new object[] - { - new - { - verbatimVersion = "1.0.0", - dependencyGroups = new object[] - { - new - { - dependencies = new object[] - { - new { id = "Newtonsoft.Json", range = "(4.5.11, 6.0.0-semver.2]" }, - new { id = "Microsoft.Data.OData", range = "5.6.2" } - } - } - }, - }, - true, - "2" - }; - - yield return new object[] - { - new - { - verbatimVersion = "1.0.0", - dependencyGroups = new object[] - { - new - { - dependencies = new object[] - { - new { id = "Newtonsoft.Json", range = "(4.5.11-semver.2, 6.0.0]" }, - new { id = "Microsoft.Data.OData", range = "5.6.2" } - } - } - }, - }, - true, - "2" - }; - - yield return new object[] - { - new - { - verbatimVersion = "1.0.0", - dependencyGroups = new object[] - { - new - { - dependencies = new object[] - { - new { id = "Newtonsoft.Json", range = "(4.5.11-semver.2, ]" }, - new { id = "Microsoft.Data.OData", range = "5.6.2" } - } - } - }, - }, - true, - "2" - }; - - yield return new object[] - { - new - { - verbatimVersion = "1.0.0", - dependencyGroups = new object[] - { - new - { - dependencies = new object[] - { - new { id = "Newtonsoft.Json", range = "(, 6.0.0-semver.2]" }, - new { id = "Microsoft.Data.OData", range = "5.6.2" } - } - } - }, - }, - true, - "2" - }; - - yield return new object[] - { - new - { - verbatimVersion = "1.0.0", - dependencyGroups = new object[] - { - new - { - dependencies = new object[] - { - new { id = "Newtonsoft.Json", range = "(, 6.0.0]" }, - new { id = "Microsoft.Data.OData", range = "5.6.2" } - } - } - }, - }, - false, - null - }; - } - } - - public static IEnumerable AddsSupportedFrameworksData - { - get - { - // framework assembly group - yield return new object[] { WithFrameworkAssemblyGroup(".NETFramework4.0-Client"), "net40-client" }; - yield return new object[] { WithFrameworkAssemblyGroup(".NETFramework4.0-Client, .NETFramework4.5"), "net40-client|net45" }; - yield return new object[] { WithFrameworkAssemblyGroup(" .NETFramework4.0-Client, , , .NETFramework4.5 ,,"), "net40-client|net45" }; - yield return new object[] - { - new - { - frameworkAssemblyGroup = new object[] - { - new { targetFramework = ".NETFramework4.0-Client" }, - new { targetFramework = ".NETFramework4.0, .NETFramework4.5" }, - new { targetFramework = " " } - } - }, - "net40-client|net40|net45" - }; - - // a single framework assembly - yield return new object[] - { - new - { - frameworkAssemblyGroup = new { targetFramework = ".NETFramework4.0, .NETFramework4.5" } - }, - "net40|net45" - }; - - // package entries - yield return new object[] { WithPackageEntry("lib/net40/something.dll"), "net40" }; - yield return new object[] { WithPackageEntry("lib/portable-net45%2Bwin%2Bwpa81%2Bwp80%2BMonoAndroid10%2BXamarin.iOS10%2BMonoTouch10/something.dll"), "portable-net45+win8+wp8+wpa81" }; - yield return new object[] - { - new - { - packageEntries = new object[] - { - new { fullName = "lib/net45/something.dll" }, - new { fullName = "lib/net40/something-else.dll" }, - new { fullName = "bad" } - } - }, - "net45|net40" - }; - - // invalid PCL TFM - yield return new object[] - { - new - { - packageEntries = new object[] - { - new { fullName = "lib/net45/something.dll" }, - new { fullName = "lib/portable-win-wpa8/something-else.dll" } - } - }, - null - }; - - // a single package entry - yield return new object[] { new { packageEntries = new { fullName = "lib/net40/something.dll" } }, "net40" }; - - // not target framework folder name - yield return new object[] - { - new - { - packageEntries = new object[] - { - new { fullName = "lib/something.dll" }, - new { fullName = "lib/net40/something-else.dll" } - } - }, - "net40" - }; - - // both - yield return new object[] - { - new - { - frameworkAssemblyGroup = new object[] - { - new { targetFramework = ".NETFramework4.0-Client" }, - new { targetFramework = ".NETFramework4.0, .NETFramework4.5" }, - new { targetFramework = " " } - }, - packageEntries = new object[] - { - new { fullName = "lib/net45/something.dll" }, - new { fullName = "lib/net20/something.dll" }, - new { fullName = "bad" } - } - }, - "net40-client|net40|net45|net20" - }; - } - } - - public static IEnumerable AddsFlattenedDependenciesData - { - get - { - // multiple packages - yield return new object[] - { - new - { - dependencyGroups = new object[] - { - new - { - dependencies = new object[] - { - new { id = "Newtonsoft.Json", range = "4.5.11" }, - new { id = "Microsoft.Data.OData", range = "5.6.2" } - } - } - }, - - }, - "Newtonsoft.Json:4.5.11|Microsoft.Data.OData:5.6.2" - }; - - // multiple target frameworks - yield return new object[] - { - new - { - dependencyGroups = new object[] - { - new - { - dependencies = new object[] - { - new { id = "Newtonsoft.Json", range = "4.5.11" } - }, - targetFramework = ".NETFramework4.5" - }, - new - { - dependencies = new object[] - { - new { id = "Microsoft.Data.OData", range = "5.6.2" } - }, - targetFramework = ".NETFramework4.0-client" - }, - new - { - dependencies = new object[] - { - new { id = "Microsoft.Data.OData", range = "5.6.2" }, - new { id = "", range = "" } - } - } - }, - - }, - "Newtonsoft.Json:4.5.11:net45|Microsoft.Data.OData:5.6.2:net40-client|Microsoft.Data.OData:5.6.2" - }; - - // multiple target frameworks without direct package dependencies - yield return new object[] - { - new - { - dependencyGroups = new object[] - { - new - { - dependencies = new object[0], - targetFramework = ".NETFramework4.5" - }, - new - { - dependencies = new object[] - { - new { id = "Microsoft.Data.OData", range = "5.6.2" } - }, - targetFramework = ".NETFramework4.0-client" - }, - new - { - dependencies = new object[] - { - new { id = "Microsoft.Data.OData", range = "5.6.2" } - } - } - }, - - }, - "::net45|Microsoft.Data.OData:5.6.2:net40-client|Microsoft.Data.OData:5.6.2" - }; - - // a single item - yield return new object[] - { - new - { - dependencyGroups = new - { - dependencies = new object[] - { - new { id = "Newtonsoft.Json", range = "4.5.11" }, - new { id = "Microsoft.Data.OData", range = "5.6.2" } - } - } - }, - "Newtonsoft.Json:4.5.11|Microsoft.Data.OData:5.6.2" - }; - - // different target framework format - yield return new object[] { WithDependency("Newtonsoft.Json", "4.5.11", ".NETFramework4.5"), "Newtonsoft.Json:4.5.11:net45" }; - yield return new object[] { WithDependency("Newtonsoft.Json", "4.5.11", ".NETFramework4.0"), "Newtonsoft.Json:4.5.11:net40" }; - yield return new object[] { WithDependency("Newtonsoft.Json", "4.5.11", string.Empty), "Newtonsoft.Json:4.5.11" }; - yield return new object[] { WithDependency("Newtonsoft.Json", "4.5.11", null), "Newtonsoft.Json:4.5.11" }; - yield return new object[] - { - new - { - dependencyGroups = new object[] - { - new - { - dependencies = new object[] - { - new - { - id = "Newtonsoft.Json", - range = "4.5.11" - } - } - } - } - }, - "Newtonsoft.Json:4.5.11" - }; - } - } - - public static IEnumerable AddsLicensesData - { - get - { // licenseExpression licenseFile licenseUrl - yield return new object[] { - new { id = "testPackage", version = "1.0.0", licenseExpression = "MIT", licenseUrl = "https://testlicenseurl"}, - new Uri("https://testnuget"), - "https://testnuget/packages/testPackage/1.0.0/license" }; - yield return new object[] { - new { id = "testPackage", version = "1.0.0", licenseFile = "license.txt", licenseUrl = "https://testlicenseurl"}, - new Uri("https://testnuget"), - "https://testnuget/packages/testPackage/1.0.0/license" }; - yield return new object[] { - new { id = "testPackage", version = "1.0.0", licenseUrl = "https://testlicenseurl"}, - new Uri("https://testnuget"), - "https://testlicenseurl" }; - yield return new object[] { - new { id = "testPackage", version = "1.0.0", licenseExpression = "MIT", licenseUrl = "https://testlicenseurl"}, - null, - "https://testlicenseurl" }; - yield return new object[] { - new { id = "testPackage", version = "1.0.0", licenseFile = "license.txt", licenseUrl = "https://testlicenseurl"}, - null, - "https://testlicenseurl" }; - yield return new object[] { - new { id = "testPackage", version = "1.0.0", licenseExpression = "MIT"}, - new Uri("https://testnuget"), - "https://testnuget/packages/testPackage/1.0.0/license" }; - yield return new object[] { - new { id = "testPackage", version = "1.0.0", licenseFile = "license.txt"}, - new Uri("https://testnuget"), - "https://testnuget/packages/testPackage/1.0.0/license" }; - } - } - - private static object WithDependency(string id, string range, string targetFramework) - { - return new - { - dependencyGroups = new object[] - { - new - { - dependencies = new object[] - { - new { id, range } - }, - targetFramework - } - } - }; - } - - private static object WithFrameworkAssemblyGroup(string targetFramework) - { - return new - { - frameworkAssemblyGroup = new object[] - { - new { targetFramework } - } - }; - } - - private static object WithPackageEntry(string fullName) - { - return new - { - packageEntries = new object[] - { - new { fullName } - } - }; - } - - private static JObject CatalogEntry(object obj) - { - var json = JObject.FromObject(obj); - - // Add required properties if they are missing. - if (json[MetadataConstants.IdPropertyName] == null) - { - json[MetadataConstants.IdPropertyName] = "NuGet.Versioning"; - } - - if (json[MetadataConstants.VerbatimVersionPropertyName] == null) - { - json[MetadataConstants.VerbatimVersionPropertyName] = "4.6.2"; - } - - return json; - } - } -} diff --git a/tests/NuGet.IndexingTests/Extraction/DocumentCreatorTests.cs b/tests/NuGet.IndexingTests/Extraction/DocumentCreatorTests.cs deleted file mode 100644 index 0badd6dc7..000000000 --- a/tests/NuGet.IndexingTests/Extraction/DocumentCreatorTests.cs +++ /dev/null @@ -1,279 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System; -using System.Collections.Generic; -using System.Linq; -using Lucene.Net.Documents; -using NuGet.Indexing; -using Xunit; -using LuceneConstants = NuGet.Indexing.MetadataConstants.LuceneMetadata; - -namespace NuGet.IndexingTests -{ - public class DocumentCreatorTests - { - [Theory, MemberData(nameof(VerifiesRequiredPropertiesAreProvidedData))] - public void VerifiesRequiredPropertiesAreProvided(string key, string expected) - { - // Arrange - var package = GetPackage(); - package.Remove(key); - - // Act, Assert - var exception = Assert.Throws(() => DocumentCreator.CreateDocument(package)); - Assert.Equal(expected, exception.Message); - } - - [Theory, MemberData(nameof(ValidatesPropertiesThatAreNotStringsData))] - public void ValidatesPropertiesThatAreNotStrings(string key, string expected) - { - // Arrange - var package = GetPackage(); - package[key] = "bad"; - - // Act, Assert - var exception = Assert.Throws(() => DocumentCreator.CreateDocument(package)); - Assert.Equal(expected, exception.Message); - } - - [Fact] - public void RejectsMissingVersionAndOriginalVersion() - { - // Arrange - var package = GetPackage(); - package.Remove(MetadataConstants.NormalizedVersionPropertyName); - package.Remove(MetadataConstants.VerbatimVersionPropertyName); - - // Act, Assert - var exception = Assert.Throws(() => DocumentCreator.CreateDocument(package)); - Assert.Equal($"Required property '{MetadataConstants.VerbatimVersionPropertyName}' not found.\r\n", exception.Message); - } - - [Fact] - public void RejectsInvalidOriginalVersionWhenVersionIsNotProvided() - { - // Arrange - var package = GetPackage(); - package.Remove(MetadataConstants.NormalizedVersionPropertyName); - package[MetadataConstants.VerbatimVersionPropertyName] = "bad"; - - // Act, Assert - var exception = Assert.Throws(() => DocumentCreator.CreateDocument(package)); - Assert.Equal($"Unable to parse '{MetadataConstants.VerbatimVersionPropertyName}' as NuGetVersion.\r\n", exception.Message); - } - - [Fact] - public void AllAnalyzedFieldsHasPositionsAndOffsetsStored() - { - // Arrange - var package = GetPackage(); - - // Act - var document = DocumentCreator.CreateDocument(package); - - // Assert - foreach (var fieldable in document.GetFields().Where(f => f.IsTokenized && !(f is NumericField))) - { - Assert.True(fieldable.IsTermVectorStored, $"{fieldable.Name} should have its term vector stored."); - Assert.True(fieldable.IsStoreOffsetWithTermVector, $"{fieldable.Name} should store offsets with its term vector."); - Assert.True(fieldable.IsStorePositionWithTermVector, $"{fieldable.Name} should store positions with its term vector."); - } - } - - [Fact] - public void AllFieldsAreStoredAndIndexed() - { - // Arrange - var package = GetPackage(); - - // Act - var document = DocumentCreator.CreateDocument(package); - - // Assert - foreach (var field in document.GetFields()) - { - Assert.True(field.IsStored, $"{field.Name} should be stored."); - Assert.True(field.IsIndexed, $"{field.Name} should be indexed."); - } - } - - [Fact] - public void DefaultsMissingTitleToValueOfId() - { - // Arrange - var package = GetPackage(); - package.Remove(MetadataConstants.TitlePropertyName); - - // Act - var document = DocumentCreator.CreateDocument(package); - - // Assert - Assert.Equal("DotNetZip", document.GetFieldable(LuceneConstants.TitlePropertyName).StringValue); - Assert.Equal("dotnetzip", document.GetFieldable(LuceneConstants.SortableTitlePropertyName).StringValue); - } - - [Fact] - public void DefaultsEmptyTitleToValueOfId() - { - // Arrange - var package = GetPackage(); - package[MetadataConstants.TitlePropertyName] = string.Empty; - - // Act - var document = DocumentCreator.CreateDocument(package); - - // Assert - Assert.Equal("DotNetZip", document.GetFieldable(LuceneConstants.TitlePropertyName).StringValue); - Assert.Equal("dotnetzip", document.GetFieldable(LuceneConstants.SortableTitlePropertyName).StringValue); - } - - [Fact] - public void DefaultsMissingLastEditedToValueOfPublished() - { - // Arrange - var package = GetPackage(); - package.Remove(MetadataConstants.LastEditedPropertyName); - - // Act - var document = DocumentCreator.CreateDocument(package); - - // Assert - Assert.Equal("2002-02-02T00:00:00.0000000Z", document.GetField(LuceneConstants.OriginalPublishedPropertyName).StringValue); - Assert.Null(document.GetField(LuceneConstants.OriginalLastEditedPropertyName)); - Assert.Equal("20020202", document.GetFieldable(LuceneConstants.PublishedDatePropertyName).StringValue); - Assert.Equal("20020202", document.GetFieldable(LuceneConstants.LastEditedDatePropertyName).StringValue); - } - - [Fact] - public void DefaultsMissingVersionToParsedOriginalVersion() - { - // Arrange - var package = GetPackage(); - package.Remove(MetadataConstants.NormalizedVersionPropertyName); - package[MetadataConstants.VerbatimVersionPropertyName] = "1.02.003"; - - // Act - var document = DocumentCreator.CreateDocument(package); - - // Assert - Assert.Equal("1.02.003", document.GetFieldable(LuceneConstants.VerbatimVersionPropertyName).StringValue); - Assert.Equal("1.2.3", document.GetFieldable(LuceneConstants.NormalizedVersionPropertyName).StringValue); - } - - [Fact] - public void HasExpectedFieldNamesAndValues() - { - // Arrange - var package = GetPackage(); - var expected = new[] - { - new KeyValuePair(LuceneConstants.IdPropertyName, "DotNetZip"), - new KeyValuePair(LuceneConstants.IdAutocompletePropertyName, "DotNetZip"), - new KeyValuePair(LuceneConstants.TokenizedIdPropertyName, "DotNetZip"), - new KeyValuePair(LuceneConstants.ShingledIdPropertyName, "DotNetZip"), - new KeyValuePair(LuceneConstants.VerbatimVersionPropertyName, "1.00.000"), - new KeyValuePair(LuceneConstants.NormalizedVersionPropertyName, "1.0.0"), - new KeyValuePair(LuceneConstants.FullVersionPropertyName, "1.0.0"), - new KeyValuePair(LuceneConstants.TitlePropertyName, "The Famous DotNetZip"), - new KeyValuePair(LuceneConstants.DescriptionPropertyName, "The description."), - new KeyValuePair(LuceneConstants.SummaryPropertyName, "The summary."), - new KeyValuePair(LuceneConstants.TagsPropertyName, "dot net zip"), - new KeyValuePair(LuceneConstants.AuthorsPropertyName, "Justin Bieber, Nick Jonas"), - new KeyValuePair(LuceneConstants.SemVerLevelPropertyName, ""), - new KeyValuePair(LuceneConstants.ListedPropertyName, "true"), - new KeyValuePair(LuceneConstants.OriginalCreatedPropertyName, "2001-01-01T00:00:00.0000000Z"), - new KeyValuePair(LuceneConstants.OriginalPublishedPropertyName, "2002-02-02T00:00:00.0000000Z"), - new KeyValuePair(LuceneConstants.PublishedDatePropertyName, "20020202"), - new KeyValuePair(LuceneConstants.OriginalLastEditedPropertyName, "2003-03-03T00:00:00.0000000Z"), - new KeyValuePair(LuceneConstants.LastEditedDatePropertyName, "20030303"), - new KeyValuePair(LuceneConstants.SortableTitlePropertyName, "the famous dotnetzip"), - new KeyValuePair(LuceneConstants.IconUrlPropertyName, "http://example/icon.png"), - new KeyValuePair(LuceneConstants.ProjectUrlPropertyName, "http://example/"), - new KeyValuePair(LuceneConstants.MinClientVersionPropertyName, "2.0.0"), - new KeyValuePair(LuceneConstants.ReleaseNotesPropertyName, "The release notes."), - new KeyValuePair(LuceneConstants.CopyrightPropertyName, "The copyright."), - new KeyValuePair(LuceneConstants.LanguagePropertyName, "English"), - new KeyValuePair(LuceneConstants.LicenseUrlPropertyName, "http://example/license.txt"), - new KeyValuePair(LuceneConstants.PackageHashPropertyName, "0123456789ABCDEF"), - new KeyValuePair(LuceneConstants.PackageHashAlgorithmPropertyName, "SHA1"), - new KeyValuePair(LuceneConstants.PackageSizePropertyName, "1200"), - new KeyValuePair(LuceneConstants.RequiresLicenseAcceptancePropertyName, "true"), - new KeyValuePair(LuceneConstants.FlattenedDependenciesPropertyName, "Lucene.Net:2.9.4.1|WindowsAzure.Storage:1.6"), - new KeyValuePair(LuceneConstants.DependenciesPropertyName, "[{\"Id\":\"Lucene.Net\",\"VersionSpec\":\"2.9.4.1\"},{\"Id\":\"WindowsAzure.Storage\",\"VersionSpec\":\"1.6\"}]"), - new KeyValuePair(LuceneConstants.SupportedFrameworksPropertyName, "[\"net40\",\"aspnet99\"]") - }; - - // Act - var document = DocumentCreator.CreateDocument(package); - - // Assert - var actual = document.GetFields().Select(f => new KeyValuePair(f.Name, f.StringValue)).ToArray(); - Assert.Equal(expected.Length, actual.Length); - for (int i = 0; i < expected.Length; i++) - { - Assert.Equal(expected[i], actual[i]); - } - } - - public static IEnumerable VerifiesRequiredPropertiesAreProvidedData - { - get - { - yield return new[] { MetadataConstants.IdPropertyName, "Required property 'id' not found.\r\n" }; - yield return new[] { MetadataConstants.ListedPropertyName, "Required property 'listed' not found.\r\n" }; - yield return new[] { MetadataConstants.PublishedPropertyName, "Required property 'published' not found.\r\n" }; - } - } - - public static IEnumerable ValidatesPropertiesThatAreNotStringsData - { - get - { - yield return new[] { MetadataConstants.ListedPropertyName, "Unable to parse 'listed' as Boolean.\r\n" }; - yield return new[] { MetadataConstants.PublishedPropertyName, "Unable to parse 'published' as DateTime.\r\n" }; - yield return new[] { MetadataConstants.LastEditedPropertyName, "Unable to parse 'lastEdited' as DateTime.\r\n" }; - yield return new[] { MetadataConstants.RequiresLicenseAcceptancePropertyName, "Unable to parse 'requiresLicenseAcceptance' as Boolean.\r\n" }; - yield return new[] { MetadataConstants.PackageSizePropertyName, "Unable to parse 'packageSize' as Int32.\r\n" }; - } - } - - private static IDictionary GetPackage() - { - return new Dictionary - { - // required - { MetadataConstants.IdPropertyName, "DotNetZip" }, - { MetadataConstants.NormalizedVersionPropertyName, "1.0.0" }, - { MetadataConstants.ListedPropertyName, "true" }, - { MetadataConstants.PublishedPropertyName, new DateTime(2002, 2, 2, 0, 0, 0, DateTimeKind.Utc).ToString("O") }, - - // not required but validated - { MetadataConstants.LastEditedPropertyName, new DateTime(2003, 3, 3, 0, 0, 0, DateTimeKind.Utc).ToString("O") }, - { MetadataConstants.PackageSizePropertyName, "1200" }, - { MetadataConstants.RequiresLicenseAcceptancePropertyName, "true" }, - - // not required - { MetadataConstants.SemVerLevelKeyPropertyName, "" }, - { MetadataConstants.VerbatimVersionPropertyName, "1.00.000" }, - { MetadataConstants.TitlePropertyName, "The Famous DotNetZip" }, - { MetadataConstants.DescriptionPropertyName, "The description." }, - { MetadataConstants.SummaryPropertyName, "The summary." }, - { MetadataConstants.TagsPropertyName, "dot net zip" }, - { MetadataConstants.AuthorsPropertyName, "Justin Bieber, Nick Jonas" }, - { MetadataConstants.CreatedPropertyName, new DateTime(2001, 1, 1, 0, 0, 0, DateTimeKind.Utc).ToString("O") }, - { MetadataConstants.IconUrlPropertyName, "http://example/icon.png" }, - { MetadataConstants.ProjectUrlPropertyName, "http://example/" }, - { MetadataConstants.MinClientVersionPropertyName, "2.0.0" }, - { MetadataConstants.ReleaseNotesPropertyName, "The release notes." }, - { MetadataConstants.CopyrightPropertyName, "The copyright." }, - { MetadataConstants.LanguagePropertyName, "English" }, - { MetadataConstants.LicenseUrlPropertyName, "http://example/license.txt" }, - { MetadataConstants.PackageHashPropertyName, "0123456789ABCDEF" }, - { MetadataConstants.PackageHashAlgorithmPropertyName, "SHA1" }, - { MetadataConstants.FlattenedDependenciesPropertyName, "Lucene.Net:2.9.4.1|WindowsAzure.Storage:1.6" }, - { MetadataConstants.SupportedFrameworksPropertyName, "net40|aspnet99" } - }; - } - } -} diff --git a/tests/NuGet.IndexingTests/IdentifierAnalyzerTests.cs b/tests/NuGet.IndexingTests/IdentifierAnalyzerTests.cs deleted file mode 100644 index daf6f9b03..000000000 --- a/tests/NuGet.IndexingTests/IdentifierAnalyzerTests.cs +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using NuGet.Indexing; -using NuGet.IndexingTests.TestSupport; -using Xunit; - -namespace NuGet.IndexingTests -{ - public class IdentifierAnalyzerTests - { - [Theory] - [MemberData(nameof(TokenizerLowercasesAndCamelCasesInputData))] - public void TokenizerLowercasesAndCamelCasesInput(string text, TokenAttributes[] expected) - { - // arrange, act - var actual = new IdentifierAnalyzer().Tokenize(text); - - // assert - Assert.Equal(expected, actual); - } - - public static IEnumerable TokenizerLowercasesAndCamelCasesInputData - { - get - { - // split by DotTokenizer - yield return new object[] - { - "a.b", - new[] - { - new TokenAttributes("a", 0, 1, 1), - new TokenAttributes("b", 2, 3, 1) - } - }; - - // split on camel case - yield return new object[] - { - "DotNet", - new[] - { - new TokenAttributes("dotnet", 0, 6, 1), - new TokenAttributes("dot", 0, 3, 0), - new TokenAttributes("net", 3, 6, 1) - } - }; - - // lower case - yield return new object[] - { - "D", - new[] - { - new TokenAttributes("d", 0, 1, 1) - } - }; - } - } - } -} diff --git a/tests/NuGet.IndexingTests/IdentifierAutocompleteAnalyzerTests.cs b/tests/NuGet.IndexingTests/IdentifierAutocompleteAnalyzerTests.cs deleted file mode 100644 index c09bfcdb0..000000000 --- a/tests/NuGet.IndexingTests/IdentifierAutocompleteAnalyzerTests.cs +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using NuGet.Indexing; -using NuGet.IndexingTests.TestSupport; -using Xunit; - -namespace NuGet.IndexingTests -{ - public class IdentifierAutocompleteAnalyzerTests - { - [Theory] - [MemberData(nameof(TokenizerLowercasesNGramsAndCamelCasesInputData))] - public void TokenizerLowercasesNGramsAndCamelCasesInput(string text, TokenAttributes[] expected) - { - // arrange, act - var actual = new IdentifierAutocompleteAnalyzer().Tokenize(text); - - // assert - Assert.Equal(expected, actual); - } - - public static IEnumerable TokenizerLowercasesNGramsAndCamelCasesInputData - { - get - { - // split by DotTokenizer - yield return new object[] - { - "a.b", - new[] - { - new TokenAttributes("a", 0, 1, 1), - new TokenAttributes("b", 2, 3, 1) - } - }; - - // split on camel case - yield return new object[] - { - "DotNet", - new[] - { - new TokenAttributes("d", 0, 1, 1), - new TokenAttributes("do", 0, 2, 1), - new TokenAttributes("dot", 0, 3, 1), - new TokenAttributes("dotn", 0, 4, 1), - new TokenAttributes("dotne", 0, 5, 1), - new TokenAttributes("dotnet", 0, 6, 1), - new TokenAttributes("d", 0, 1, 1), - new TokenAttributes("do", 0, 2, 1), - new TokenAttributes("dot", 0, 3, 1), - new TokenAttributes("n", 3, 4, 1), - new TokenAttributes("ne", 3, 5, 1), - new TokenAttributes("net", 3, 6, 1) - } - }; - - // lower case - yield return new object[] - { - "D", - new[] - { - new TokenAttributes("d", 0, 1, 1) - } - }; - - // ngram size one to eight - yield return new object[] - { - "DOTNETZIP NET", - new[] - { - new TokenAttributes("d", 0, 1, 1), - new TokenAttributes("do", 0, 2, 1), - new TokenAttributes("dot", 0, 3, 1), - new TokenAttributes("dotn", 0, 4, 1), - new TokenAttributes("dotne", 0, 5, 1), - new TokenAttributes("dotnet", 0, 6, 1), - new TokenAttributes("dotnetz", 0, 7, 1), - new TokenAttributes("dotnetzi", 0, 8, 1), - new TokenAttributes("n", 10, 11, 1), - new TokenAttributes("ne", 10, 12, 1), - new TokenAttributes("net", 10, 13, 1) - } - }; - - // combined - yield return new object[] - { - "DotNet.ZIP-new", - new[] - { - new TokenAttributes("d", 0, 1, 1), - new TokenAttributes("do", 0, 2, 1), - new TokenAttributes("dot", 0, 3, 1), - new TokenAttributes("dotn", 0, 4, 1), - new TokenAttributes("dotne", 0, 5, 1), - new TokenAttributes("dotnet", 0, 6, 1), - new TokenAttributes("d", 0, 1, 1), - new TokenAttributes("do", 0, 2, 1), - new TokenAttributes("dot", 0, 3, 1), - new TokenAttributes("n", 3, 4, 1), - new TokenAttributes("ne", 3, 5, 1), - new TokenAttributes("net", 3, 6, 1), - new TokenAttributes("z", 7, 8, 1), - new TokenAttributes("zi", 7, 9, 1), - new TokenAttributes("zip", 7, 10, 1), - new TokenAttributes("n", 11, 12, 1), - new TokenAttributes("ne", 11, 13, 1), - new TokenAttributes("new", 11, 14, 1) - } - }; - } - } - } -} diff --git a/tests/NuGet.IndexingTests/IdentifierKeywordAnalyzerTests.cs b/tests/NuGet.IndexingTests/IdentifierKeywordAnalyzerTests.cs deleted file mode 100644 index 32fa957ea..000000000 --- a/tests/NuGet.IndexingTests/IdentifierKeywordAnalyzerTests.cs +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using NuGet.Indexing; -using NuGet.IndexingTests.TestSupport; -using Xunit; - -namespace NuGet.IndexingTests -{ - public class IdentifierKeywordAnalyzerTests - { - [Theory] - [MemberData(nameof(TokenizerOnlyLowercasesInputData))] - public void TokenizerOnlyLowercasesInput(string text, TokenAttributes expected) - { - // arrange, act - var actual = new IdentifierKeywordAnalyzer().Tokenize(text); - - // assert - Assert.Equal(new[] { expected }, actual); - } - - public static IEnumerable TokenizerOnlyLowercasesInputData - { - get - { - // all uppercase - yield return new object[] { "DOTNET", new TokenAttributes("dotnet", 0, 6) }; - - // camel case - yield return new object[] { "DotNet", new TokenAttributes("dotnet", 0, 6) }; - - // lower case - yield return new object[] { "dotnet", new TokenAttributes("dotnet", 0, 6) }; - - // stop words and spaces - yield return new object[] { "A BAD identifier.", new TokenAttributes("a bad identifier.", 0, 17) }; - - // mixed - yield return new object[] { "DotNet.ZIP-Unofficial is a BAD identifier.", new TokenAttributes("dotnet.zip-unofficial is a bad identifier.", 0, 42) }; - } - - } - } -} diff --git a/tests/NuGet.IndexingTests/NuGet.IndexingTests.csproj b/tests/NuGet.IndexingTests/NuGet.IndexingTests.csproj index d46c909c9..e38153072 100644 --- a/tests/NuGet.IndexingTests/NuGet.IndexingTests.csproj +++ b/tests/NuGet.IndexingTests/NuGet.IndexingTests.csproj @@ -50,27 +50,9 @@ - - - - - - - - - - - - - - - - - - @@ -82,9 +64,6 @@ - - 3.0.3 - 4.10.1 diff --git a/tests/NuGet.IndexingTests/OwnerAnalyzerTests.cs b/tests/NuGet.IndexingTests/OwnerAnalyzerTests.cs deleted file mode 100644 index f5b113a1b..000000000 --- a/tests/NuGet.IndexingTests/OwnerAnalyzerTests.cs +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using NuGet.Indexing; -using NuGet.IndexingTests.TestSupport; -using Xunit; - -namespace NuGet.IndexingTests -{ - public class OwnerAnalyzerTests - { - [Theory] - [MemberData(nameof(TokenizerOnlyLowercasesOwnerInputData))] - public void TokenizerOnlyLowercasesOwnerInput(string text, TokenAttributes expected) - { - // arrange, act - var actual = new OwnerAnalyzer().Tokenize(text); - - // assert - Assert.Equal(new[] { expected }, actual); - } - - public static IEnumerable TokenizerOnlyLowercasesOwnerInputData - { - get - { - // all upper case - yield return new object[] { "MICROSOFT-OWNER", new TokenAttributes("microsoft-owner", 0, 15) }; - - // title case - yield return new object[] { "Microsoft.Owner", new TokenAttributes("microsoft.owner", 0, 15) }; - - // camel case - yield return new object[] { "MicrosoftOwner", new TokenAttributes("microsoftowner", 0, 14) }; - - // mixed - yield return new object[] { "a Microsoft OWNER.", new TokenAttributes("a microsoft owner.", 0, 18) }; - } - - } - } -} diff --git a/tests/NuGet.IndexingTests/PackageAnalyzerTests.cs b/tests/NuGet.IndexingTests/PackageAnalyzerTests.cs deleted file mode 100644 index 7c9bdec6b..000000000 --- a/tests/NuGet.IndexingTests/PackageAnalyzerTests.cs +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using System.IO; -using System.Linq; -using NuGet.Indexing; -using NuGet.IndexingTests.TestSupport; -using Xunit; - -namespace NuGet.IndexingTests -{ - public class PackageAnalyzerTests - { - [Theory] - [MemberData(nameof(AddsCorrectFieldAnalyzersData))] - public void AddsCorrectFieldAnalyzers(string field, string text, TokenAttributes[] expected) - { - // arrange - var analyzer = new PackageAnalyzer(); - - // act - var tokenStream = analyzer.TokenStream(field, new StringReader(text)); - var actual = tokenStream.Tokenize().ToArray(); - - // assert - Assert.Equal(expected, actual); - } - - public static IEnumerable AddsCorrectFieldAnalyzersData - { - get - { - yield return new object[] - { - "Id", - "DotNetZip", - new[] - { - new TokenAttributes("dotnetzip", 0, 9) - } - }; - - yield return new object[] - { - "IdAutocomplete", - "DotNet", - new[] - { - new TokenAttributes("d", 0, 1, 1), - new TokenAttributes("do", 0, 2, 1), - new TokenAttributes("dot", 0, 3, 1), - new TokenAttributes("dotn", 0, 4, 1), - new TokenAttributes("dotne", 0, 5, 1), - new TokenAttributes("dotnet", 0, 6, 1), - new TokenAttributes("d", 0, 1, 1), - new TokenAttributes("do", 0, 2, 1), - new TokenAttributes("dot", 0, 3, 1), - new TokenAttributes("n", 3, 4, 1), - new TokenAttributes("ne", 3, 5, 1), - new TokenAttributes("net", 3, 6, 1), - - } - }; - - yield return new object[] - { - "TokenizedId", - "DotNet", - new[] - { - new TokenAttributes("dotnet", 0, 6, 1), - new TokenAttributes("dot", 0, 3, 0), - new TokenAttributes("net", 3, 6, 1) - } - }; - - yield return new object[] - { - "Version", - "01.002.0003", - new[] - { - new TokenAttributes("1.2.3", 0, 11) - } - }; - - yield return GetDescriptionTestCase("Title"); - yield return GetDescriptionTestCase("Description"); - yield return GetDescriptionTestCase("Summary"); - yield return GetDescriptionTestCase("Authors"); - - yield return new object[] - { - "Owner", - "Microsoft", - new[] - { - new TokenAttributes("microsoft", 0, 9) - } - }; - - yield return new object[] - { - "Tags", - "DOT Net zip", - new[] - { - new TokenAttributes("dot", 0, 3, null), - new TokenAttributes("net", 4, 7, null), - new TokenAttributes("zip", 8, 11, null), - } - }; - } - } - - private static object[] GetDescriptionTestCase(string field) - { - return new object[] - { - field, - "There is a package called DotNetZip.", - new[] - { - new TokenAttributes("package", 11, 18, 4), - new TokenAttributes("called", 19, 25, 1), - new TokenAttributes("dotnetzip", 26, 35, 1), - new TokenAttributes("dot", 26, 29, 0), - new TokenAttributes("dotnet", 26, 32, 0), - new TokenAttributes("net", 29, 32, 1), - new TokenAttributes("netzip", 29, 35, 0), - new TokenAttributes("zip", 32, 35, 1) - } - }; - } - } -} diff --git a/tests/NuGet.IndexingTests/ShingledIdentifierAnalyzerTests.cs b/tests/NuGet.IndexingTests/ShingledIdentifierAnalyzerTests.cs deleted file mode 100644 index 696d8d192..000000000 --- a/tests/NuGet.IndexingTests/ShingledIdentifierAnalyzerTests.cs +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using NuGet.Indexing; -using NuGet.IndexingTests.TestSupport; -using Xunit; - -namespace NuGet.IndexingTests -{ - public class ShingledIdentifierAnalyzerTests - { - [Theory] - [MemberData(nameof(TokenizerShinglesAndLowercasesInputData))] - public void TokenizerShinglesAndLowercasesInput(string text, TokenAttributes[] expected) - { - // arrange, act - var actual = new ShingledIdentifierAnalyzer().Tokenize(text); - - // assert - Assert.Equal(expected, actual); - } - - public static IEnumerable TokenizerShinglesAndLowercasesInputData - { - get - { - // split by DotTokenizer - yield return new object[] - { - "a.b", - new[] - { - new TokenAttributes("a", 0, 1, 1), - new TokenAttributes("a b", 0, 3, 0), - new TokenAttributes("b", 2, 3, 1) - } - }; - - // lower case - yield return new object[] - { - "D", - new[] - { - new TokenAttributes("d", 0, 1, 1) - } - }; - - // consecutive seperators - yield return new object[] - { - "a.....b", - new[] - { - new TokenAttributes("a", 0, 1, 1), - new TokenAttributes("a b", 0, 7, 0), - new TokenAttributes("b", 6, 7, 1) - } - }; - - // shingle up to two - yield return new object[] - { - "a.b.c.d", - new[] - { - new TokenAttributes("a", 0, 1, 1), - new TokenAttributes("a b", 0, 3, 0), - new TokenAttributes("b", 2, 3, 1), - new TokenAttributes("b c", 2, 5, 0), - new TokenAttributes("c", 4, 5, 1), - new TokenAttributes("c d", 4, 7, 0), - new TokenAttributes("d", 6, 7, 1), - } - }; - } - } - } -} diff --git a/tests/NuGet.IndexingTests/TagsAnalyzerTests.cs b/tests/NuGet.IndexingTests/TagsAnalyzerTests.cs deleted file mode 100644 index 0d87c873c..000000000 --- a/tests/NuGet.IndexingTests/TagsAnalyzerTests.cs +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using NuGet.Indexing; -using NuGet.IndexingTests.TestSupport; -using Xunit; - -namespace NuGet.IndexingTests -{ - public class TagsAnalyzerTests - { - [Theory] - [MemberData(nameof(TokenizerLowercasesAndSplitsInputData))] - public void TokenizerLowercasesAndSplitsInput(string text, TokenAttributes[] expected) - { - // arrange, act - var actual = new TagsAnalyzer().Tokenize(text); - - // assert - Assert.Equal(expected, actual); - } - - public static IEnumerable TokenizerLowercasesAndSplitsInputData - { - get - { - // split by DotTokenizer - yield return new object[] - { - "Split sentence.", - new[] - { - new TokenAttributes("split", 0, 5), - new TokenAttributes("sentence", 6, 14) - } - }; - - // lower case - yield return new object[] - { - "D", - new[] - { - new TokenAttributes("d", 0, 1) - } - }; - - // leaves stop words - yield return new object[] - { - "This is a sentence full of stop words.", - new[] - { - new TokenAttributes("this", 0, 4), - new TokenAttributes("is", 5, 7), - new TokenAttributes("a", 8, 9), - new TokenAttributes("sentence", 10, 18), - new TokenAttributes("full", 19, 23), - new TokenAttributes("of", 24, 26), - new TokenAttributes("stop", 27, 31), - new TokenAttributes("words", 32, 37) - } - }; - } - } - } -} diff --git a/tests/NuGet.IndexingTests/TestSupport/Constants.cs b/tests/NuGet.IndexingTests/TestSupport/Constants.cs deleted file mode 100644 index 4b7de6af3..000000000 --- a/tests/NuGet.IndexingTests/TestSupport/Constants.cs +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using Lucene.Net.Search; -using Lucene.Net.Util; -using Lucene.Net.Documents; -using NuGet.Indexing; -using NuGet.Versioning; - -namespace NuGet.IndexingTests.TestSupport -{ - public class Constants - { - public static readonly string BaseUriHttp = "http://testuri/"; - public static readonly string BaseUriHttps = "https://testuri/"; - public static readonly string BaseUriSemVer2Http = "http://testurisemver2/"; - public static readonly string BaseUriSemVer2Https = "https://testurisemver2/"; - public static readonly string LucenePropertyIconUrl = "IconUrl"; - public static readonly string LucenePropertyId = "Id"; - public static readonly string LucenePropertyDescription = "Description"; - public static readonly string LucenePropertyLicenseUrl = "LicenseUrl"; - public static readonly string LucenePropertyListed = "Listed"; - public static readonly string LucenePropertyProjectUrl = "ProjectUrl"; - public static readonly string LucenePropertySummary = "Summary"; - public static readonly string LucenePropertyTitle = "Title"; - public static readonly string LucenePropertyFullVersion = "FullVersion"; - public static readonly string LucenePropertyNormalizedVersion = "Version"; - public static readonly string LucenePropertyOriginalVersion = "OriginalVersion"; - public static readonly string LucenePropertySemVerLevel = "SemVerLevel"; - public static readonly string MockBase = "Mock"; - public static readonly string MockExplanationBase = MockBase + "Explanation"; - public static readonly string Query = "test"; - public static readonly string RankingsIdPrefix = "testId"; - public static readonly string RankingsSegmentName = "testReader"; - public static readonly string SchemeNameHttp = "http"; - public static readonly string SchemeNameHttps = "https"; - public static readonly string SegmentReaderPrefix = "SegmentReader"; - public static readonly string SemVerLevel2Value = "2"; - - public static readonly ScoreDoc[] ScoreDocs = { - new ScoreDoc(0, (float)1.0), - new ScoreDoc(1, (float)0.5) - }; - - - // These two lists should be used together to create the full matrix of documents - public static readonly Document[] FullDocMatrix = - { - MockObjectFactory.GetBasicDocument(0, listed:true), - MockObjectFactory.GetBasicDocument(1, listed:false), - MockObjectFactory.GetSemVerDocument(2, listed: true), - MockObjectFactory.GetSemVerDocument(3, listed: false), - MockObjectFactory.GetBasicDocument(4, listed:true), - MockObjectFactory.GetBasicDocument(5, listed:false), - MockObjectFactory.GetSemVerDocument(6, listed: true), - MockObjectFactory.GetSemVerDocument(7, listed: false) - }; - - public static readonly NuGetVersion[] FullVersionMatrix = - { - new NuGetVersion("1.0.0"), - new NuGetVersion("1.0.0"), - new NuGetVersion("1.0.0"), - new NuGetVersion("1.0.0"), - new NuGetVersion("1.0.0-something"), - new NuGetVersion("1.0.0-something"), - new NuGetVersion("1.0.0-something"), - new NuGetVersion("1.0.0-something") - }; - - public static readonly OpenBitSet LatestBitSet = new OpenBitSet(10); - - public static readonly OpenBitSet LatestStableBitSet = new OpenBitSet(10); - - public static readonly OpenBitSet LatestSemVer2BitSet = new OpenBitSet(10); - - public static readonly OpenBitSet LatestStableSemVer2BitSet = new OpenBitSet(10); - } -} diff --git a/tests/NuGet.IndexingTests/TestSupport/MockObjectFactory.cs b/tests/NuGet.IndexingTests/TestSupport/MockObjectFactory.cs deleted file mode 100644 index 9299cefe3..000000000 --- a/tests/NuGet.IndexingTests/TestSupport/MockObjectFactory.cs +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using Lucene.Net.Documents; -using Lucene.Net.Index; -using Microsoft.Extensions.Logging; -using Moq; - -namespace NuGet.IndexingTests.TestSupport -{ - public static class MockObjectFactory - { - public static string MockPrefix = "Mock"; - - public static Document GetSemVerDocument(int MockId, bool listed = true, string semVerLevel = null) - { - var mockDocument = GetBasicDocument(MockId, listed); - mockDocument.Add(new Field(Constants.LucenePropertySemVerLevel, semVerLevel == null ? Constants.SemVerLevel2Value : semVerLevel, Field.Store.YES, Field.Index.NO)); - - return mockDocument; - } - - public static Document GetBasicDocument(int MockId, bool listed = true) - { - var mockDocument = new Document(); - mockDocument.Add(new Field(Constants.LucenePropertyId, MockPrefix + Constants.LucenePropertyId + MockId, Field.Store.YES, Field.Index.NO)); - mockDocument.Add(new Field(Constants.LucenePropertyFullVersion, MockPrefix + Constants.LucenePropertyFullVersion + MockId, Field.Store.YES, Field.Index.NO)); - mockDocument.Add(new Field(Constants.LucenePropertyNormalizedVersion, MockPrefix + Constants.LucenePropertyNormalizedVersion + MockId, Field.Store.YES, Field.Index.NO)); - mockDocument.Add(new Field(Constants.LucenePropertyOriginalVersion, MockPrefix + Constants.LucenePropertyOriginalVersion + MockId, Field.Store.YES, Field.Index.NO)); - mockDocument.Add(new Field(Constants.LucenePropertyDescription, MockPrefix + Constants.LucenePropertyDescription + MockId, Field.Store.YES, Field.Index.NO)); - mockDocument.Add(new Field(Constants.LucenePropertySummary, MockPrefix + Constants.LucenePropertySummary + MockId, Field.Store.YES, Field.Index.NO)); - mockDocument.Add(new Field(Constants.LucenePropertyTitle, MockPrefix + Constants.LucenePropertyTitle + MockId, Field.Store.YES, Field.Index.NO)); - mockDocument.Add(new Field(Constants.LucenePropertyIconUrl, MockPrefix + Constants.LucenePropertyIconUrl + MockId, Field.Store.YES, Field.Index.NO)); - mockDocument.Add(new Field(Constants.LucenePropertyLicenseUrl, MockPrefix + Constants.LucenePropertyLicenseUrl + MockId, Field.Store.YES, Field.Index.NO)); - mockDocument.Add(new Field(Constants.LucenePropertyProjectUrl, MockPrefix + Constants.LucenePropertyProjectUrl + MockId, Field.Store.YES, Field.Index.NO)); - mockDocument.Add(new Field(Constants.LucenePropertyListed, listed ? "true" : "false", Field.Store.YES, Field.Index.NO)); - - return mockDocument; - } - - public static Mock CreateMockIndexReader(int numberOfDocs, int numberOfSubReaders = 0) - { - var mockIndexReader = new Mock(); - var numDocs = numberOfDocs; - - mockIndexReader.Setup(x => x.MaxDoc).Returns(numDocs); - mockIndexReader.Setup(x => x.TermDocs()).Returns((TermDocs)null); - mockIndexReader.Setup(x => x.NumDocs()).Returns(numDocs); - mockIndexReader.Setup(x => x.GetSequentialSubReaders()).Returns(MakeFakeSubReaders(numberOfSubReaders)); - - return mockIndexReader; - } - - private static SegmentReader[] MakeFakeSubReaders(int numberOfReaders) - { - if (numberOfReaders <= 0) - { - return null; - } - - var readers = new SegmentReader[numberOfReaders]; - - for (var i = 0; i < numberOfReaders; i ++) - { - var mockReader = new Mock(); - mockReader.Setup(x => x.SegmentName).Returns(Constants.SegmentReaderPrefix + i); - readers[i] = mockReader.Object; - } - - return readers; - } - - public static Mock CreateMockLogger() - { - return new Mock(); - } - } -} diff --git a/tests/NuGet.IndexingTests/TestSupport/TokenAttributes.cs b/tests/NuGet.IndexingTests/TestSupport/TokenAttributes.cs deleted file mode 100644 index ebe9d32e5..000000000 --- a/tests/NuGet.IndexingTests/TestSupport/TokenAttributes.cs +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -namespace NuGet.IndexingTests.TestSupport -{ - public class TokenAttributes - { - /// - /// - /// - /// The value extracted. - /// Starting position of the token in the original input string, that was used to extract the term. - /// End position of the token in the original input string, that was used to extract the term. - /// Need to expand on the meaning of this. - public TokenAttributes(string term, int startOffset, int endOffset, int? positionIncrement = null) - { - Term = term; - StartOffset = startOffset; - EndOffset = endOffset; - PositionIncrement = positionIncrement; - } - - public string Term { get; set; } - public int StartOffset { get; set; } - public int EndOffset { get; set; } - public int? PositionIncrement { get; set; } - - public override string ToString() - { - return - "{" + - $"Term: '{Term}', " + - $"Offset: ({StartOffset}, {EndOffset}), " + - $"PositionIncrement: {PositionIncrement?.ToString() ?? "null"}" + - "}"; - } - - public override bool Equals(object obj) - { - var other = obj as TokenAttributes; - - if (other == null) - { - return false; - } - - return string.Equals(Term, other.Term) && - StartOffset == other.StartOffset && - EndOffset == other.EndOffset & - PositionIncrement == other.PositionIncrement; - } - - public override int GetHashCode() - { - return ToString().GetHashCode(); - } - } -} \ No newline at end of file diff --git a/tests/NuGet.IndexingTests/TestSupport/TokenStreamExtensions.cs b/tests/NuGet.IndexingTests/TestSupport/TokenStreamExtensions.cs deleted file mode 100644 index f7970ba0c..000000000 --- a/tests/NuGet.IndexingTests/TestSupport/TokenStreamExtensions.cs +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using System.IO; -using System.Linq; -using Lucene.Net.Analysis; -using Lucene.Net.Analysis.Tokenattributes; - -namespace NuGet.IndexingTests.TestSupport -{ - public static class TokenStreamExtensions - { - public static IEnumerable Tokenize(this TokenStream tokenStream) - { - var term = tokenStream.GetAttribute(); - var offset = tokenStream.GetAttribute(); - - IPositionIncrementAttribute positionIncrement = null; - - if (tokenStream.HasAttribute()) - { - positionIncrement = tokenStream.GetAttribute(); - } - - while (tokenStream.IncrementToken()) - { - var tokenAttributes = new TokenAttributes(term.Term, offset.StartOffset, offset.EndOffset); - - if (positionIncrement != null) - { - tokenAttributes.PositionIncrement = positionIncrement.PositionIncrement; - } - - yield return tokenAttributes; - } - } - - public static TokenAttributes[] Tokenize(this Analyzer analyzer, string text) - { - var tokenStream = analyzer.TokenStream(null, new StringReader(text)); - return tokenStream.Tokenize().ToArray(); - } - } -} diff --git a/tests/NuGet.IndexingTests/VersionAnalyzerTests.cs b/tests/NuGet.IndexingTests/VersionAnalyzerTests.cs deleted file mode 100644 index 09b249779..000000000 --- a/tests/NuGet.IndexingTests/VersionAnalyzerTests.cs +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) .NET Foundation. All rights reserved. -// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. - -using System.Collections.Generic; -using NuGet.Indexing; -using NuGet.IndexingTests.TestSupport; -using Xunit; - -namespace NuGet.IndexingTests -{ - public class VersionAnalyzerTests - { - [Theory] - [MemberData(nameof(TokenizerNormalizesVersionInputData))] - public void TokenizerNormalizesVersionInput(string text, TokenAttributes expected) - { - // arrange, act - var actual = new VersionAnalyzer().Tokenize(text); - - // assert - Assert.Equal(new[] { expected }, actual); - } - - public static IEnumerable TokenizerNormalizesVersionInputData - { - get - { - // one dot - yield return new object[] { "1.0", new TokenAttributes("1.0.0", 0, 3) }; - - // extra zeros - yield return new object[] { "2.003.1", new TokenAttributes("2.3.1", 0, 7) }; - - // empty - yield return new object[] { string.Empty, new TokenAttributes(string.Empty, 0, 0) }; - - // lots of digits - yield return new object[] { "1.02.3456789", new TokenAttributes("1.2.3456789", 0, 12) }; - - // trim - yield return new object[] { " 1.02.03 ", new TokenAttributes("1.2.3", 0, 12) }; - } - - } - } -}