-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement Rare Terms aggregation (#4054)
Relates: #4001 This commit implements the Rare terms aggregations. Rare term buckets only expose key and doc_count, so a new RareTermsBucket<TKey> type is used.
- Loading branch information
Showing
7 changed files
with
266 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
127 changes: 127 additions & 0 deletions
127
src/Nest/Aggregations/Bucket/RareTerms/RareTermsAggregation.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq.Expressions; | ||
using System.Runtime.Serialization; | ||
using Elasticsearch.Net.Utf8Json; | ||
|
||
namespace Nest | ||
{ | ||
/// <summary> | ||
/// A multi-bucket value source based aggregation which finds "rare" terms — terms that are at the long-tail of the distribution | ||
/// and are not frequent. Conceptually, this is like a terms aggregation that is sorted by _count ascending. | ||
/// </summary> | ||
[InterfaceDataContract] | ||
[ReadAs(typeof(RareTermsAggregation))] | ||
public interface IRareTermsAggregation : IBucketAggregation | ||
{ | ||
/// <summary> | ||
/// Terms that should be excluded from the aggregation | ||
/// </summary> | ||
[DataMember(Name = "exclude")] | ||
TermsExclude Exclude { get; set; } | ||
|
||
/// <summary> | ||
/// The field to find rare terms in | ||
/// </summary> | ||
[DataMember(Name = "field")] | ||
Field Field { get; set; } | ||
|
||
/// <summary> | ||
/// Terms that should be included in the aggregation | ||
/// </summary> | ||
[DataMember(Name = "include")] | ||
TermsInclude Include { get; set; } | ||
|
||
/// <summary> | ||
/// The maximum number of documents a term should appear in. | ||
/// Defaults to <c>1</c> | ||
/// </summary> | ||
[DataMember(Name = "max_doc_count")] | ||
long? MaximumDocumentCount { get; set; } | ||
|
||
/// <summary> | ||
/// The value that should be used if a document does not have the field being aggregated | ||
/// </summary> | ||
[DataMember(Name = "missing")] | ||
object Missing { get; set; } | ||
|
||
/// <summary> | ||
/// The precision of the internal CuckooFilters. Smaller precision leads to better approximation, | ||
/// but higher memory usage. Cannot be smaller than 0.00001. Defaults to 0.01 | ||
/// </summary> | ||
[DataMember(Name = "precision")] | ||
double? Precision { get; set; } | ||
} | ||
|
||
/// <inheritdoc cref="IRareTermsAggregation"/> | ||
public class RareTermsAggregation : BucketAggregationBase, IRareTermsAggregation | ||
{ | ||
internal RareTermsAggregation() { } | ||
|
||
public RareTermsAggregation(string name) : base(name) { } | ||
|
||
/// <inheritdoc /> | ||
public TermsExclude Exclude { get; set; } | ||
/// <inheritdoc /> | ||
public Field Field { get; set; } | ||
/// <inheritdoc /> | ||
public TermsInclude Include { get; set; } | ||
/// <inheritdoc /> | ||
public long? MaximumDocumentCount { get; set; } | ||
/// <inheritdoc /> | ||
public object Missing { get; set; } | ||
/// <inheritdoc /> | ||
public double? Precision { get; set; } | ||
|
||
internal override void WrapInContainer(AggregationContainer c) => c.RareTerms = this; | ||
} | ||
|
||
/// <inheritdoc cref="IRareTermsAggregation"/> | ||
public class RareTermsAggregationDescriptor<T> | ||
: BucketAggregationDescriptorBase<RareTermsAggregationDescriptor<T>, IRareTermsAggregation, T>, IRareTermsAggregation | ||
where T : class | ||
{ | ||
TermsExclude IRareTermsAggregation.Exclude { get; set; } | ||
Field IRareTermsAggregation.Field { get; set; } | ||
TermsInclude IRareTermsAggregation.Include { get; set; } | ||
long? IRareTermsAggregation.MaximumDocumentCount { get; set; } | ||
object IRareTermsAggregation.Missing { get; set; } | ||
double? IRareTermsAggregation.Precision { get; set; } | ||
|
||
/// <inheritdoc cref="IRareTermsAggregation.Field" /> | ||
public RareTermsAggregationDescriptor<T> Field(Field field) => Assign(field, (a, v) => a.Field = v); | ||
|
||
/// <inheritdoc cref="IRareTermsAggregation.Field" /> | ||
public RareTermsAggregationDescriptor<T> Field<TValue>(Expression<Func<T, TValue>> field) => Assign(field, (a, v) => a.Field = v); | ||
|
||
/// <inheritdoc cref="IRareTermsAggregation.MaximumDocumentCount" /> | ||
public RareTermsAggregationDescriptor<T> MaximumDocumentCount(long? maximumDocumentCount) => | ||
Assign(maximumDocumentCount, (a, v) => a.MaximumDocumentCount = v); | ||
|
||
/// <inheritdoc cref="IRareTermsAggregation.Include" /> | ||
public RareTermsAggregationDescriptor<T> Include(long partition, long numberOfPartitions) => | ||
Assign(new TermsInclude(partition, numberOfPartitions), (a, v) => a.Include = v); | ||
|
||
/// <inheritdoc cref="IRareTermsAggregation.Include" /> | ||
public RareTermsAggregationDescriptor<T> Include(string includePattern) => | ||
Assign(new TermsInclude(includePattern), (a, v) => a.Include = v); | ||
|
||
/// <inheritdoc cref="IRareTermsAggregation.Include" /> | ||
public RareTermsAggregationDescriptor<T> Include(IEnumerable<string> values) => | ||
Assign(new TermsInclude(values), (a, v) => a.Include = v); | ||
|
||
/// <inheritdoc cref="IRareTermsAggregation.Exclude" /> | ||
public RareTermsAggregationDescriptor<T> Exclude(string excludePattern) => | ||
Assign(new TermsExclude(excludePattern), (a, v) => a.Exclude = v); | ||
|
||
/// <inheritdoc cref="IRareTermsAggregation.Exclude" /> | ||
public RareTermsAggregationDescriptor<T> Exclude(IEnumerable<string> values) => | ||
Assign(new TermsExclude(values), (a, v) => a.Exclude = v); | ||
|
||
/// <inheritdoc cref="IRareTermsAggregation.Missing" /> | ||
public RareTermsAggregationDescriptor<T> Missing(object missing) => Assign(missing, (a, v) => a.Missing = v); | ||
|
||
/// <inheritdoc cref="IRareTermsAggregation.Precision" /> | ||
public RareTermsAggregationDescriptor<T> Precision(double? precision) => Assign(precision, (a, v) => a.Precision = v); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
using System.Collections.Generic; | ||
|
||
namespace Nest | ||
{ | ||
public class RareTermsBucket<TKey> : BucketBase | ||
{ | ||
public RareTermsBucket(IReadOnlyDictionary<string, IAggregate> dict) : base(dict) { } | ||
|
||
public long DocCount { get; set; } | ||
|
||
public TKey Key { get; set; } | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
79 changes: 79 additions & 0 deletions
79
src/Tests/Tests/Aggregations/Bucket/RareTerms/RareTermsAggregationUsageTests.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using FluentAssertions; | ||
using Nest; | ||
using Tests.Core.Extensions; | ||
using Tests.Core.ManagedElasticsearch.Clusters; | ||
using Tests.Domain; | ||
using Tests.Framework.EndpointTests.TestState; | ||
|
||
namespace Tests.Aggregations.Bucket.RareTerms | ||
{ | ||
/** | ||
* A multi-bucket value source based aggregation which finds "rare" terms — terms that are at the long-tail of the | ||
* distribution and are not frequent. Conceptually, this is like a terms aggregation that is sorted by _count ascending. | ||
* As noted in the terms aggregation docs, actually ordering a terms agg by count ascending has unbounded error. | ||
* Instead, you should use the rare_terms aggregation | ||
* | ||
* See the Elasticsearch documentation on {ref_current}/search-aggregations-bucket-rare-terms-aggregation.html[rare terms aggregation] for more detail. | ||
*/ | ||
public class RareTermsAggregationUsageTests : AggregationUsageTestBase | ||
{ | ||
public RareTermsAggregationUsageTests(ReadOnlyCluster i, EndpointUsage usage) : base(i, usage) { } | ||
|
||
protected override object AggregationJson => new | ||
{ | ||
names = new | ||
{ | ||
meta = new | ||
{ | ||
foo = "bar" | ||
}, | ||
rare_terms = new | ||
{ | ||
field = "name", | ||
max_doc_count = 5, | ||
missing = "n/a", | ||
precision = 0.001 | ||
} | ||
} | ||
}; | ||
|
||
protected override Func<AggregationContainerDescriptor<Project>, IAggregationContainer> FluentAggs => a => a | ||
.RareTerms("names", st => st | ||
.Field(p => p.Name) | ||
.Missing("n/a") | ||
.MaximumDocumentCount(5) | ||
.Precision(0.001) | ||
.Meta(m => m | ||
.Add("foo", "bar") | ||
) | ||
); | ||
|
||
protected override AggregationDictionary InitializerAggs => | ||
new RareTermsAggregation("names") | ||
{ | ||
Field = Infer.Field<Project>(p => p.Name), | ||
MaximumDocumentCount = 5, | ||
Precision = 0.001, | ||
Missing = "n/a", | ||
Meta = new Dictionary<string, object> { { "foo", "bar" } } | ||
}; | ||
|
||
protected override void ExpectResponse(ISearchResponse<Project> response) | ||
{ | ||
response.ShouldBeValid(); | ||
var rareTerms = response.Aggregations.RareTerms("names"); | ||
rareTerms.Should().NotBeNull(); | ||
rareTerms.Buckets.Should().NotBeNull(); | ||
rareTerms.Buckets.Count.Should().BeGreaterThan(0); | ||
foreach (var item in rareTerms.Buckets) | ||
{ | ||
item.Key.Should().NotBeNullOrEmpty(); | ||
item.DocCount.Should().BeGreaterOrEqualTo(1); | ||
} | ||
rareTerms.Meta.Should().NotBeNull().And.HaveCount(1); | ||
rareTerms.Meta["foo"].Should().Be("bar"); | ||
} | ||
} | ||
} |