diff --git a/src/Nest/Aggregations/AggregateDictionary.cs b/src/Nest/Aggregations/AggregateDictionary.cs index c6c4686b759..5f1cf895d88 100644 --- a/src/Nest/Aggregations/AggregateDictionary.cs +++ b/src/Nest/Aggregations/AggregateDictionary.cs @@ -178,6 +178,20 @@ public TermsAggregate Terms(string key) public MultiBucketAggregate> AdjacencyMatrix(string key) => GetMultiKeyedBucketAggregate(key); + public MultiBucketAggregate> RareTerms(string key) + { + var bucket = TryGet(key); + return bucket == null + ? null + : new MultiBucketAggregate> + { + Buckets = GetRareTermsBuckets(bucket.Items).ToList(), + Meta = bucket.Meta + }; + } + + public MultiBucketAggregate> RareTerms(string key) => RareTerms(key); + public MultiBucketAggregate Range(string key) => GetMultiBucketAggregate(key); public MultiBucketAggregate DateRange(string key) => GetMultiBucketAggregate(key); @@ -275,5 +289,17 @@ private IEnumerable> GetSignificantTermsBuckets> GetRareTermsBuckets(IEnumerable items) + { + var buckets = items.Cast>(); + + foreach (var bucket in buckets) + yield return new RareTermsBucket(bucket.BackingDictionary) + { + Key = (TKey)Convert.ChangeType(bucket.Key, typeof(TKey)), + DocCount = bucket.DocCount.GetValueOrDefault(0) + }; + } } } diff --git a/src/Nest/Aggregations/AggregationContainer.cs b/src/Nest/Aggregations/AggregationContainer.cs index be83f72d23c..ecadb8dd8e4 100644 --- a/src/Nest/Aggregations/AggregationContainer.cs +++ b/src/Nest/Aggregations/AggregationContainer.cs @@ -208,6 +208,9 @@ public interface IAggregationContainer [DataMember(Name = "range")] IRangeAggregation Range { get; set; } + [DataMember(Name = "rare_terms")] + IRareTermsAggregation RareTerms { get; set; } + [DataMember(Name = "reverse_nested")] IReverseNestedAggregation ReverseNested { get; set; } @@ -339,6 +342,8 @@ public class AggregationContainer : IAggregationContainer public IRangeAggregation Range { get; set; } + public IRareTermsAggregation RareTerms { get; set; } + public IReverseNestedAggregation ReverseNested { get; set; } public ISamplerAggregation Sampler { get; set; } @@ -481,6 +486,8 @@ public class AggregationContainerDescriptor : DescriptorBase, IRangeAggregation> selector ) => _SetInnerAggregation(name, selector, (a, d) => a.Range = d); + public AggregationContainerDescriptor RareTerms(string name, + Func, IRareTermsAggregation> selector + ) => + _SetInnerAggregation(name, selector, (a, d) => a.RareTerms = d); + public AggregationContainerDescriptor Stats(string name, Func, IStatsAggregation> selector ) => diff --git a/src/Nest/Aggregations/Bucket/RareTerms/RareTermsAggregation.cs b/src/Nest/Aggregations/Bucket/RareTerms/RareTermsAggregation.cs new file mode 100644 index 00000000000..c9558c8a649 --- /dev/null +++ b/src/Nest/Aggregations/Bucket/RareTerms/RareTermsAggregation.cs @@ -0,0 +1,127 @@ +using System; +using System.Collections.Generic; +using System.Linq.Expressions; +using System.Runtime.Serialization; +using Elasticsearch.Net.Utf8Json; + +namespace Nest +{ + /// + /// A multi-bucket value source based aggregation which finds "rare" terms — terms that are at the long-tail of the distribution + /// and are not frequent. Conceptually, this is like a terms aggregation that is sorted by _count ascending. + /// + [InterfaceDataContract] + [ReadAs(typeof(RareTermsAggregation))] + public interface IRareTermsAggregation : IBucketAggregation + { + /// + /// Terms that should be excluded from the aggregation + /// + [DataMember(Name = "exclude")] + TermsExclude Exclude { get; set; } + + /// + /// The field to find rare terms in + /// + [DataMember(Name = "field")] + Field Field { get; set; } + + /// + /// Terms that should be included in the aggregation + /// + [DataMember(Name = "include")] + TermsInclude Include { get; set; } + + /// + /// The maximum number of documents a term should appear in. + /// Defaults to 1 + /// + [DataMember(Name = "max_doc_count")] + long? MaximumDocumentCount { get; set; } + + /// + /// The value that should be used if a document does not have the field being aggregated + /// + [DataMember(Name = "missing")] + object Missing { get; set; } + + /// + /// The precision of the internal CuckooFilters. Smaller precision leads to better approximation, + /// but higher memory usage. Cannot be smaller than 0.00001. Defaults to 0.01 + /// + [DataMember(Name = "precision")] + double? Precision { get; set; } + } + + /// + public class RareTermsAggregation : BucketAggregationBase, IRareTermsAggregation + { + internal RareTermsAggregation() { } + + public RareTermsAggregation(string name) : base(name) { } + + /// + public TermsExclude Exclude { get; set; } + /// + public Field Field { get; set; } + /// + public TermsInclude Include { get; set; } + /// + public long? MaximumDocumentCount { get; set; } + /// + public object Missing { get; set; } + /// + public double? Precision { get; set; } + + internal override void WrapInContainer(AggregationContainer c) => c.RareTerms = this; + } + + /// + public class RareTermsAggregationDescriptor + : BucketAggregationDescriptorBase, IRareTermsAggregation, T>, IRareTermsAggregation + where T : class + { + TermsExclude IRareTermsAggregation.Exclude { get; set; } + Field IRareTermsAggregation.Field { get; set; } + TermsInclude IRareTermsAggregation.Include { get; set; } + long? IRareTermsAggregation.MaximumDocumentCount { get; set; } + object IRareTermsAggregation.Missing { get; set; } + double? IRareTermsAggregation.Precision { get; set; } + + /// + public RareTermsAggregationDescriptor Field(Field field) => Assign(field, (a, v) => a.Field = v); + + /// + public RareTermsAggregationDescriptor Field(Expression> field) => Assign(field, (a, v) => a.Field = v); + + /// + public RareTermsAggregationDescriptor MaximumDocumentCount(long? maximumDocumentCount) => + Assign(maximumDocumentCount, (a, v) => a.MaximumDocumentCount = v); + + /// + public RareTermsAggregationDescriptor Include(long partition, long numberOfPartitions) => + Assign(new TermsInclude(partition, numberOfPartitions), (a, v) => a.Include = v); + + /// + public RareTermsAggregationDescriptor Include(string includePattern) => + Assign(new TermsInclude(includePattern), (a, v) => a.Include = v); + + /// + public RareTermsAggregationDescriptor Include(IEnumerable values) => + Assign(new TermsInclude(values), (a, v) => a.Include = v); + + /// + public RareTermsAggregationDescriptor Exclude(string excludePattern) => + Assign(new TermsExclude(excludePattern), (a, v) => a.Exclude = v); + + /// + public RareTermsAggregationDescriptor Exclude(IEnumerable values) => + Assign(new TermsExclude(values), (a, v) => a.Exclude = v); + + /// + public RareTermsAggregationDescriptor Missing(object missing) => Assign(missing, (a, v) => a.Missing = v); + + /// + public RareTermsAggregationDescriptor Precision(double? precision) => Assign(precision, (a, v) => a.Precision = v); + } +} diff --git a/src/Nest/Aggregations/Bucket/RareTerms/RareTermsBucket.cs b/src/Nest/Aggregations/Bucket/RareTerms/RareTermsBucket.cs new file mode 100644 index 00000000000..25cd2e7e4af --- /dev/null +++ b/src/Nest/Aggregations/Bucket/RareTerms/RareTermsBucket.cs @@ -0,0 +1,13 @@ +using System.Collections.Generic; + +namespace Nest +{ + public class RareTermsBucket : BucketBase + { + public RareTermsBucket(IReadOnlyDictionary dict) : base(dict) { } + + public long DocCount { get; set; } + + public TKey Key { get; set; } + } +} diff --git a/src/Nest/Aggregations/Visitor/AggregationVisitor.cs b/src/Nest/Aggregations/Visitor/AggregationVisitor.cs index bf13ad588ee..0b60e2f617a 100644 --- a/src/Nest/Aggregations/Visitor/AggregationVisitor.cs +++ b/src/Nest/Aggregations/Visitor/AggregationVisitor.cs @@ -74,6 +74,8 @@ public interface IAggregationVisitor void Visit(IRangeAggregation aggregation); + void Visit(IRareTermsAggregation aggregation); + void Visit(ITermsAggregation aggregation); void Visit(ISignificantTermsAggregation aggregation); @@ -201,6 +203,8 @@ public virtual void Visit(ISignificantTermsAggregation aggregation) { } public virtual void Visit(IRangeAggregation aggregation) { } + public virtual void Visit(IRareTermsAggregation aggregation) { } + public virtual void Visit(INestedAggregation aggregation) { } public virtual void Visit(IParentAggregation aggregation) { } diff --git a/src/Nest/Aggregations/Visitor/AggregationWalker.cs b/src/Nest/Aggregations/Visitor/AggregationWalker.cs index f62b7b5e225..510ff903bac 100644 --- a/src/Nest/Aggregations/Visitor/AggregationWalker.cs +++ b/src/Nest/Aggregations/Visitor/AggregationWalker.cs @@ -128,6 +128,11 @@ public void Walk(IAggregationContainer aggregation, IAggregationVisitor visitor) v.Visit(d); Accept(v, d.Aggregations); }); + AcceptAggregation(aggregation.RareTerms, visitor, (v, d) => + { + v.Visit(d); + Accept(v, d.Aggregations); + }); AcceptAggregation(aggregation.ReverseNested, visitor, (v, d) => { v.Visit(d); diff --git a/src/Tests/Tests/Aggregations/Bucket/RareTerms/RareTermsAggregationUsageTests.cs b/src/Tests/Tests/Aggregations/Bucket/RareTerms/RareTermsAggregationUsageTests.cs new file mode 100644 index 00000000000..9e5560e58d3 --- /dev/null +++ b/src/Tests/Tests/Aggregations/Bucket/RareTerms/RareTermsAggregationUsageTests.cs @@ -0,0 +1,79 @@ +using System; +using System.Collections.Generic; +using FluentAssertions; +using Nest; +using Tests.Core.Extensions; +using Tests.Core.ManagedElasticsearch.Clusters; +using Tests.Domain; +using Tests.Framework.EndpointTests.TestState; + +namespace Tests.Aggregations.Bucket.RareTerms +{ + /** + * A multi-bucket value source based aggregation which finds "rare" terms — terms that are at the long-tail of the + * distribution and are not frequent. Conceptually, this is like a terms aggregation that is sorted by _count ascending. + * As noted in the terms aggregation docs, actually ordering a terms agg by count ascending has unbounded error. + * Instead, you should use the rare_terms aggregation + * + * See the Elasticsearch documentation on {ref_current}/search-aggregations-bucket-rare-terms-aggregation.html[rare terms aggregation] for more detail. + */ + public class RareTermsAggregationUsageTests : AggregationUsageTestBase + { + public RareTermsAggregationUsageTests(ReadOnlyCluster i, EndpointUsage usage) : base(i, usage) { } + + protected override object AggregationJson => new + { + names = new + { + meta = new + { + foo = "bar" + }, + rare_terms = new + { + field = "name", + max_doc_count = 5, + missing = "n/a", + precision = 0.001 + } + } + }; + + protected override Func, IAggregationContainer> FluentAggs => a => a + .RareTerms("names", st => st + .Field(p => p.Name) + .Missing("n/a") + .MaximumDocumentCount(5) + .Precision(0.001) + .Meta(m => m + .Add("foo", "bar") + ) + ); + + protected override AggregationDictionary InitializerAggs => + new RareTermsAggregation("names") + { + Field = Infer.Field(p => p.Name), + MaximumDocumentCount = 5, + Precision = 0.001, + Missing = "n/a", + Meta = new Dictionary { { "foo", "bar" } } + }; + + protected override void ExpectResponse(ISearchResponse response) + { + response.ShouldBeValid(); + var rareTerms = response.Aggregations.RareTerms("names"); + rareTerms.Should().NotBeNull(); + rareTerms.Buckets.Should().NotBeNull(); + rareTerms.Buckets.Count.Should().BeGreaterThan(0); + foreach (var item in rareTerms.Buckets) + { + item.Key.Should().NotBeNullOrEmpty(); + item.DocCount.Should().BeGreaterOrEqualTo(1); + } + rareTerms.Meta.Should().NotBeNull().And.HaveCount(1); + rareTerms.Meta["foo"].Should().Be("bar"); + } + } +}