From 19b76e205fe9494f8dd151b8befc73b2c235f1a5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 31 Jul 2020 11:08:01 +1000 Subject: [PATCH] Add normalize aggregation (#4886) (#4895) Relates: elastic/elasticsearch#56399 This commit adds the normalize aggregation to the high level client. Co-authored-by: Russ Cam --- docs/aggregations.asciidoc | 4 + .../normalize-aggregation-usage.asciidoc | 106 ++++++++++++++++++ docs/code-standards/descriptors.asciidoc | 1 + src/Nest/Aggregations/AggregateDictionary.cs | 2 + src/Nest/Aggregations/AggregationContainer.cs | 15 +++ .../Normalize/NormalizeAggregation.cs | 91 +++++++++++++++ .../Visitor/AggregationVisitor.cs | 4 + .../Aggregations/Visitor/AggregationWalker.cs | 4 + .../NormalizeAggregationUsageTests.cs | 104 +++++++++++++++++ tests/Tests/CodeStandards/Descriptors.doc.cs | 1 + 10 files changed, 332 insertions(+) create mode 100644 docs/aggregations/pipeline/normalize/normalize-aggregation-usage.asciidoc create mode 100644 src/Nest/Aggregations/Pipeline/Normalize/NormalizeAggregation.cs create mode 100644 tests/Tests/Aggregations/Pipeline/Normalize/NormalizeAggregationUsageTests.cs diff --git a/docs/aggregations.asciidoc b/docs/aggregations.asciidoc index 74edf088ee3..0d4fd0098cb 100644 --- a/docs/aggregations.asciidoc +++ b/docs/aggregations.asciidoc @@ -277,6 +277,8 @@ There are many different types of pipeline aggregation, each computing different * <> +* <> + * <> * <> @@ -321,6 +323,8 @@ include::aggregations/pipeline/moving-average/moving-average-simple-aggregation- include::aggregations/pipeline/moving-function/moving-function-aggregation-usage.asciidoc[] +include::aggregations/pipeline/normalize/normalize-aggregation-usage.asciidoc[] + include::aggregations/pipeline/percentiles-bucket/percentiles-bucket-aggregation-usage.asciidoc[] include::aggregations/pipeline/serial-differencing/serial-differencing-aggregation-usage.asciidoc[] diff --git a/docs/aggregations/pipeline/normalize/normalize-aggregation-usage.asciidoc b/docs/aggregations/pipeline/normalize/normalize-aggregation-usage.asciidoc new file mode 100644 index 00000000000..2000172c00a --- /dev/null +++ b/docs/aggregations/pipeline/normalize/normalize-aggregation-usage.asciidoc @@ -0,0 +1,106 @@ +:ref_current: https://www.elastic.co/guide/en/elasticsearch/reference/master + +:github: https://github.com/elastic/elasticsearch-net + +:nuget: https://www.nuget.org/packages + +//// +IMPORTANT NOTE +============== +This file has been generated from https://github.com/elastic/elasticsearch-net/tree/master/src/Tests/Tests/Aggregations/Pipeline/Normalize/NormalizeAggregationUsageTests.cs. +If you wish to submit a PR for any spelling mistakes, typos or grammatical errors for this file, +please modify the original csharp file found at the link and submit the PR with that change. Thanks! +//// + +[[normalize-aggregation-usage]] +=== Normalize Aggregation Usage + +A parent pipeline aggregation which calculates the specific normalized/rescaled value for a specific bucket value. +Values that cannot be normalized, will be skipped using the skip gap policy. + +NOTE: Valid for Elasticsearch 7.9.0+ with at least basic license level + +==== Fluent DSL example + +[source,csharp] +---- +a => a +.DateHistogram("projects_started_per_month", dh => dh + .Field(p => p.StartedOn) + .CalendarInterval(DateInterval.Month) + .Aggregations(aa => aa + .Sum("commits", sm => sm + .Field(p => p.NumberOfCommits) + ) + .Normalize("percent_of_commits", aaa => aaa + .BucketsPath("commits") + .Method(NormalizeMethod.PercentOfSum) + .Format("00.00%") + ) + ) +) +---- + +==== Object Initializer syntax example + +[source,csharp] +---- +new DateHistogramAggregation("projects_started_per_month") +{ + Field = "startedOn", + CalendarInterval = DateInterval.Month, + Aggregations = new SumAggregation("commits", "numberOfCommits") && + new NormalizeAggregation("percent_of_commits", "commits") + { + Method = NormalizeMethod.PercentOfSum, + Format = "00.00%" + } +} +---- + +[source,javascript] +.Example json output +---- +{ + "projects_started_per_month": { + "date_histogram": { + "field": "startedOn", + "calendar_interval": "month" + }, + "aggs": { + "commits": { + "sum": { + "field": "numberOfCommits" + } + }, + "percent_of_commits": { + "normalize": { + "buckets_path": "commits", + "method": "percent_of_sum", + "format": "00.00%" + } + } + } + } +} +---- + +==== Handling Responses + +[source,csharp] +---- +response.ShouldBeValid(); + +var projectsPerMonth = response.Aggregations.DateHistogram("projects_started_per_month"); +projectsPerMonth.Should().NotBeNull(); +projectsPerMonth.Buckets.Should().NotBeNull(); +projectsPerMonth.Buckets.Count.Should().BeGreaterThan(0); + +foreach (var bucket in projectsPerMonth.Buckets) +{ + var normalize = bucket.Normalize("percent_of_commits"); + normalize.Value.Should().BeGreaterOrEqualTo(0); + normalize.ValueAsString.Should().NotBeNullOrEmpty(); +} +---- + diff --git a/docs/code-standards/descriptors.asciidoc b/docs/code-standards/descriptors.asciidoc index fa82ee1fece..d19733e70e0 100644 --- a/docs/code-standards/descriptors.asciidoc +++ b/docs/code-standards/descriptors.asciidoc @@ -211,6 +211,7 @@ var methods = from d in YieldAllDescriptors() where !(m.Name == nameof(RankFeatureSigmoidFunctionDescriptor.Pivot) && dt == typeof(RankFeatureSigmoidFunctionDescriptor)) where !(m.Name == nameof(DateHistogramGroupSourceDescriptor.CalendarInterval) && dt == typeof(DateHistogramGroupSourceDescriptor<>)) where !(m.Name == nameof(DateHistogramGroupSourceDescriptor.FixedInterval) && dt == typeof(DateHistogramGroupSourceDescriptor<>)) + where !(m.Name == nameof(NormalizeAggregationDescriptor.Method) && dt == typeof(NormalizeAggregationDescriptor)) select new {m, d, p}; diff --git a/src/Nest/Aggregations/AggregateDictionary.cs b/src/Nest/Aggregations/AggregateDictionary.cs index 4ca68ae0bd1..88e27c919a3 100644 --- a/src/Nest/Aggregations/AggregateDictionary.cs +++ b/src/Nest/Aggregations/AggregateDictionary.cs @@ -122,6 +122,8 @@ public FiltersAggregate Filters(string key) public SingleBucketAggregate Nested(string key) => TryGet(key); + public ValueAggregate Normalize(string key) => TryGet(key); + public SingleBucketAggregate ReverseNested(string key) => TryGet(key); public SingleBucketAggregate Children(string key) => TryGet(key); diff --git a/src/Nest/Aggregations/AggregationContainer.cs b/src/Nest/Aggregations/AggregationContainer.cs index eae4420b7be..94691baf6e1 100644 --- a/src/Nest/Aggregations/AggregationContainer.cs +++ b/src/Nest/Aggregations/AggregationContainer.cs @@ -202,6 +202,10 @@ public interface IAggregationContainer [DataMember(Name = "nested")] INestedAggregation Nested { get; set; } + /// + [DataMember(Name = "normalize")] + INormalizeAggregation Normalize { get; set; } + /// [DataMember(Name = "parent")] IParentAggregation Parent { get; set; } @@ -356,6 +360,9 @@ public class AggregationContainer : IAggregationContainer public INestedAggregation Nested { get; set; } + /// + public INormalizeAggregation Normalize { get; set; } + /// public IParentAggregation Parent { get; set; } @@ -513,6 +520,8 @@ public class AggregationContainerDescriptor : DescriptorBase, INestedAggregation> selector ) => _SetInnerAggregation(name, selector, (a, d) => a.Nested = d); + /// + public AggregationContainerDescriptor Normalize(string name, + Func selector + ) => + _SetInnerAggregation(name, selector, (a, d) => a.Normalize = d); + /// public AggregationContainerDescriptor Parent(string name, Func, IParentAggregation> selector diff --git a/src/Nest/Aggregations/Pipeline/Normalize/NormalizeAggregation.cs b/src/Nest/Aggregations/Pipeline/Normalize/NormalizeAggregation.cs new file mode 100644 index 00000000000..7953ea05ca1 --- /dev/null +++ b/src/Nest/Aggregations/Pipeline/Normalize/NormalizeAggregation.cs @@ -0,0 +1,91 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Runtime.Serialization; +using Elasticsearch.Net; +using Elasticsearch.Net.Utf8Json; + +namespace Nest +{ + /// + /// A parent pipeline aggregation which calculates the specific normalized/rescaled value for a specific bucket value. + /// Values that cannot be normalized, will be skipped using the skip gap policy. + /// + /// Valid in Elasticsearch 7.9.0+ with at least basic license level. + /// + [InterfaceDataContract] + [ReadAs(typeof(NormalizeAggregation))] + public interface INormalizeAggregation : IPipelineAggregation + { + [DataMember(Name = "method")] + NormalizeMethod Method { get; set; } + } + + /// + public class NormalizeAggregation + : PipelineAggregationBase, INormalizeAggregation + { + internal NormalizeAggregation() { } + + public NormalizeAggregation(string name, SingleBucketsPath bucketsPath) + : base(name, bucketsPath) { } + + internal override void WrapInContainer(AggregationContainer c) => c.Normalize = this; + + /// + public NormalizeMethod Method { get; set; } + } + + /// + public class NormalizeAggregationDescriptor + : PipelineAggregationDescriptorBase + , INormalizeAggregation + { + NormalizeMethod INormalizeAggregation.Method { get; set; } + + /// + public NormalizeAggregationDescriptor Method(NormalizeMethod method) => + Assign(method, (a, v) => a.Method = v); + } + + [StringEnum] + public enum NormalizeMethod + { + /// + /// rescales the data such that the minimum number is zero, and the maximum number is 1, with the rest normalized linearly in-between. + /// + [EnumMember(Value = "rescale_0_1")] + RescaleZeroToOne, + + /// + /// rescales the data such that the minimum number is zero, and the maximum number is 1, with the rest normalized linearly in-between. + /// + [EnumMember(Value = "rescale_0_100")] + RescaleZeroToOneHundred, + + /// + /// normalizes each value so that it represents a percentage of the total sum it attributes to. + /// + [EnumMember(Value = "percent_of_sum")] + PercentOfSum, + + /// + /// normalizes such that each value is normalized by how much it differs from the average. + /// + [EnumMember(Value = "mean")] + Mean, + + /// + /// normalizes such that each value represents how far it is from the mean relative to the standard deviation + /// + [EnumMember(Value = "zscore")] + Zscore, + + /// + /// normalizes such that each value is exponentiated and relative to the sum of the exponents of the original values. + /// + [EnumMember(Value = "softmax")] + Softmax + } +} diff --git a/src/Nest/Aggregations/Visitor/AggregationVisitor.cs b/src/Nest/Aggregations/Visitor/AggregationVisitor.cs index b4572bdba9d..7580c22fef8 100644 --- a/src/Nest/Aggregations/Visitor/AggregationVisitor.cs +++ b/src/Nest/Aggregations/Visitor/AggregationVisitor.cs @@ -72,6 +72,8 @@ public interface IAggregationVisitor void Visit(INestedAggregation aggregation); + void Visit(INormalizeAggregation aggregation); + void Visit(IParentAggregation aggregation); void Visit(IReverseNestedAggregation aggregation); @@ -233,6 +235,8 @@ public virtual void Visit(IRareTermsAggregation aggregation) { } public virtual void Visit(INestedAggregation aggregation) { } + public virtual void Visit(INormalizeAggregation aggregation) { } + public virtual void Visit(IParentAggregation aggregation) { } public virtual void Visit(ICardinalityAggregation aggregation) { } diff --git a/src/Nest/Aggregations/Visitor/AggregationWalker.cs b/src/Nest/Aggregations/Visitor/AggregationWalker.cs index 2aa24ab3ce1..ea57de3b90e 100644 --- a/src/Nest/Aggregations/Visitor/AggregationWalker.cs +++ b/src/Nest/Aggregations/Visitor/AggregationWalker.cs @@ -122,6 +122,10 @@ public void Walk(IAggregationContainer aggregation, IAggregationVisitor visitor) v.Visit(d); Accept(v, d.Aggregations); }); + AcceptAggregation(aggregation.Normalize, visitor, (v, d) => + { + v.Visit(d); + }); AcceptAggregation(aggregation.Parent, visitor, (v, d) => { v.Visit(d); diff --git a/tests/Tests/Aggregations/Pipeline/Normalize/NormalizeAggregationUsageTests.cs b/tests/Tests/Aggregations/Pipeline/Normalize/NormalizeAggregationUsageTests.cs new file mode 100644 index 00000000000..ca8c58e377f --- /dev/null +++ b/tests/Tests/Aggregations/Pipeline/Normalize/NormalizeAggregationUsageTests.cs @@ -0,0 +1,104 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + + using System; + using Elastic.Elasticsearch.Xunit.XunitPlumbing; + using FluentAssertions; +using Nest; +using Tests.Core.Extensions; +using Tests.Core.ManagedElasticsearch.Clusters; +using Tests.Domain; +using Tests.Framework.EndpointTests.TestState; + +namespace Tests.Aggregations.Pipeline.Normalize +{ + /** + * A parent pipeline aggregation which calculates the specific normalized/rescaled value for a specific bucket value. + * Values that cannot be normalized, will be skipped using the skip gap policy. + * + * NOTE: Valid for Elasticsearch 7.9.0+ with at least basic license level + */ + [SkipVersion("<7.9.0", "Introduced in 7.9.0")] + public class NormalizeAggregationUsageTests : AggregationUsageTestBase + { + public NormalizeAggregationUsageTests(ReadOnlyCluster cluster, EndpointUsage usage) : base(cluster, usage) { } + + protected override object AggregationJson => new + { + projects_started_per_month = new + { + date_histogram = new + { + field = "startedOn", + calendar_interval = "month", + }, + aggs = new + { + commits = new + { + sum = new + { + field = "numberOfCommits" + } + }, + percent_of_commits = new + { + normalize = new + { + buckets_path = "commits", + method = "percent_of_sum", + format = "00.00%" + } + } + } + } + }; + + protected override Func, IAggregationContainer> FluentAggs => a => a + .DateHistogram("projects_started_per_month", dh => dh + .Field(p => p.StartedOn) + .CalendarInterval(DateInterval.Month) + .Aggregations(aa => aa + .Sum("commits", sm => sm + .Field(p => p.NumberOfCommits) + ) + .Normalize("percent_of_commits", aaa => aaa + .BucketsPath("commits") + .Method(NormalizeMethod.PercentOfSum) + .Format("00.00%") + ) + ) + ); + + protected override AggregationDictionary InitializerAggs => + new DateHistogramAggregation("projects_started_per_month") + { + Field = "startedOn", + CalendarInterval = DateInterval.Month, + Aggregations = new SumAggregation("commits", "numberOfCommits") && + new NormalizeAggregation("percent_of_commits", "commits") + { + Method = NormalizeMethod.PercentOfSum, + Format = "00.00%" + } + }; + + protected override void ExpectResponse(ISearchResponse response) + { + response.ShouldBeValid(); + + var projectsPerMonth = response.Aggregations.DateHistogram("projects_started_per_month"); + projectsPerMonth.Should().NotBeNull(); + projectsPerMonth.Buckets.Should().NotBeNull(); + projectsPerMonth.Buckets.Count.Should().BeGreaterThan(0); + + foreach (var bucket in projectsPerMonth.Buckets) + { + var normalize = bucket.Normalize("percent_of_commits"); + normalize.Value.Should().BeGreaterOrEqualTo(0); + normalize.ValueAsString.Should().NotBeNullOrEmpty(); + } + } + } +} diff --git a/tests/Tests/CodeStandards/Descriptors.doc.cs b/tests/Tests/CodeStandards/Descriptors.doc.cs index 38953df8f8a..8e684436339 100644 --- a/tests/Tests/CodeStandards/Descriptors.doc.cs +++ b/tests/Tests/CodeStandards/Descriptors.doc.cs @@ -227,6 +227,7 @@ from m in d.GetMethods() where !(m.Name == nameof(RankFeatureSigmoidFunctionDescriptor.Pivot) && dt == typeof(RankFeatureSigmoidFunctionDescriptor)) where !(m.Name == nameof(DateHistogramGroupSourceDescriptor.CalendarInterval) && dt == typeof(DateHistogramGroupSourceDescriptor<>)) where !(m.Name == nameof(DateHistogramGroupSourceDescriptor.FixedInterval) && dt == typeof(DateHistogramGroupSourceDescriptor<>)) + where !(m.Name == nameof(NormalizeAggregationDescriptor.Method) && dt == typeof(NormalizeAggregationDescriptor)) select new {m, d, p};