From 0683c90dedef5a71a12674361a5bd6182a37e8be Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 26 Aug 2022 14:18:46 -0400 Subject: [PATCH] REST tests for normalize agg (#89629) This adds a REST test for the normalize pipeline agg so we have backwards compatibility tests for it. --- .../pipeline/normalize-aggregation.asciidoc | 2 +- .../test/analytics/normalize.yml | 237 +++++++++++++++++- 2 files changed, 229 insertions(+), 10 deletions(-) diff --git a/docs/reference/aggregations/pipeline/normalize-aggregation.asciidoc b/docs/reference/aggregations/pipeline/normalize-aggregation.asciidoc index 63384d5994af9..8989b2c66c7f2 100644 --- a/docs/reference/aggregations/pipeline/normalize-aggregation.asciidoc +++ b/docs/reference/aggregations/pipeline/normalize-aggregation.asciidoc @@ -72,7 +72,7 @@ _mean_:: [4.63, 4.63, 9.63, 49.63, 9.63, 9.63, 19.63] -_zscore_:: +_z-score_:: This method normalizes such that each value represents how far it is from the mean relative to the standard deviation x' = (x - mean_x) / stdev_x diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/normalize.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/normalize.yml index 6c7766a75cfae..de9edb8d3af0e 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/normalize.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/normalize.yml @@ -48,32 +48,251 @@ setup: user: "d" --- -"Basic Search": +rescale_0_1: + - skip: + features: close_to + + - do: + search: + index: foo + body: + size: 0 + aggs: + users_by_day: + date_histogram: + field: timestamp + calendar_interval: day + aggs: + percent_of_total_users: + normalize: + buckets_path: _count + method: rescale_0_1 + + - length: { aggregations.users_by_day.buckets: 3 } + - match: { aggregations.users_by_day.buckets.0.key_as_string: "2017-01-01T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.0.doc_count: 3 } + - close_to: { aggregations.users_by_day.buckets.0.percent_of_total_users.value: { value: 1.0, error: 0.05 } } + - match: { aggregations.users_by_day.buckets.1.key_as_string: "2017-01-02T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.1.doc_count: 2 } + - close_to: { aggregations.users_by_day.buckets.1.percent_of_total_users.value: { value: 0.5, error: 0.05 }} + - match: { aggregations.users_by_day.buckets.2.key_as_string: "2017-01-03T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.2.doc_count: 1 } + - close_to: { aggregations.users_by_day.buckets.2.percent_of_total_users.value: { value: 0.0, error: 0.05 }} + +--- +rescale_0_100: + - skip: + features: close_to + + - do: + search: + index: foo + body: + size: 0 + aggs: + users_by_day: + date_histogram: + field: timestamp + calendar_interval: day + aggs: + percent_of_total_users: + normalize: + buckets_path: _count + method: rescale_0_100 + + - length: { aggregations.users_by_day.buckets: 3 } + - match: { aggregations.users_by_day.buckets.0.key_as_string: "2017-01-01T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.0.doc_count: 3 } + - close_to: { aggregations.users_by_day.buckets.0.percent_of_total_users.value: { value: 100, error: 0.5 }} + - match: { aggregations.users_by_day.buckets.1.key_as_string: "2017-01-02T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.1.doc_count: 2 } + - close_to: { aggregations.users_by_day.buckets.1.percent_of_total_users.value: { value: 50, error: 0.5 }} + - match: { aggregations.users_by_day.buckets.2.key_as_string: "2017-01-03T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.2.doc_count: 1 } + - close_to: { aggregations.users_by_day.buckets.2.percent_of_total_users.value: { value: 0, error: 0.5 }} + +--- +percent_of_sum: + - skip: + features: close_to + + - do: + search: + index: foo + body: + size: 0 + aggs: + users_by_day: + date_histogram: + field: timestamp + calendar_interval: day + aggs: + percent_of_total_users: + normalize: + buckets_path: _count + method: percent_of_sum + + - length: { aggregations.users_by_day.buckets: 3 } + - match: { aggregations.users_by_day.buckets.0.key_as_string: "2017-01-01T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.0.doc_count: 3 } + - close_to: { aggregations.users_by_day.buckets.0.percent_of_total_users.value: { value: 0.5, error: 0.05 }} + - match: { aggregations.users_by_day.buckets.1.key_as_string: "2017-01-02T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.1.doc_count: 2 } + - close_to: { aggregations.users_by_day.buckets.1.percent_of_total_users.value: { value: 0.3, error: 0.05 }} + - match: { aggregations.users_by_day.buckets.2.key_as_string: "2017-01-03T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.2.doc_count: 1 } + - close_to: { aggregations.users_by_day.buckets.2.percent_of_total_users.value: { value: 0.2, error: 0.05 }} + +--- +mean: + - skip: + features: close_to + + - do: + search: + index: foo + body: + size: 0 + aggs: + users_by_day: + date_histogram: + field: timestamp + calendar_interval: day + aggs: + percent_of_total_users: + normalize: + buckets_path: _count + method: mean + + - length: { aggregations.users_by_day.buckets: 3 } + - match: { aggregations.users_by_day.buckets.0.key_as_string: "2017-01-01T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.0.doc_count: 3 } + - close_to: { aggregations.users_by_day.buckets.0.percent_of_total_users.value: { value: 0.5, error: 0.05 }} + - match: { aggregations.users_by_day.buckets.1.key_as_string: "2017-01-02T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.1.doc_count: 2 } + - close_to: { aggregations.users_by_day.buckets.1.percent_of_total_users.value: { value: 0.0, error: 0.05 }} + - match: { aggregations.users_by_day.buckets.2.key_as_string: "2017-01-03T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.2.doc_count: 1 } + - close_to: { aggregations.users_by_day.buckets.2.percent_of_total_users.value: { value: -0.5, error: 0.05 }} + +--- +zscore: + - skip: + features: close_to - do: search: - index: "foo" + index: foo body: size: 0 aggs: users_by_day: date_histogram: - field: "timestamp" - calendar_interval: "day" + field: timestamp + calendar_interval: day aggs: percent_of_total_users: normalize: - buckets_path: "_count" - method: "percent_of_sum" + buckets_path: _count + method: z-score - length: { aggregations.users_by_day.buckets: 3 } - match: { aggregations.users_by_day.buckets.0.key_as_string: "2017-01-01T00:00:00.000Z" } - match: { aggregations.users_by_day.buckets.0.doc_count: 3 } - - match: { aggregations.users_by_day.buckets.0.percent_of_total_users.value: 0.5 } + - close_to: { aggregations.users_by_day.buckets.0.percent_of_total_users.value: { value: 1.2, error: 0.05 }} - match: { aggregations.users_by_day.buckets.1.key_as_string: "2017-01-02T00:00:00.000Z" } - match: { aggregations.users_by_day.buckets.1.doc_count: 2 } - - match: { aggregations.users_by_day.buckets.1.percent_of_total_users.value: 0.3333333333333333 } + - close_to: { aggregations.users_by_day.buckets.1.percent_of_total_users.value: { value: 0.0, error: 0.05 }} - match: { aggregations.users_by_day.buckets.2.key_as_string: "2017-01-03T00:00:00.000Z" } - match: { aggregations.users_by_day.buckets.2.doc_count: 1 } - - match: { aggregations.users_by_day.buckets.2.percent_of_total_users.value: 0.16666666666666666 } + - close_to: { aggregations.users_by_day.buckets.2.percent_of_total_users.value: { value: -1.22, error: 0.05 }} + +--- +softmax: + - skip: + features: close_to + - do: + search: + index: foo + body: + size: 0 + aggs: + users_by_day: + date_histogram: + field: timestamp + calendar_interval: day + aggs: + percent_of_total_users: + normalize: + buckets_path: _count + method: softmax + + - length: { aggregations.users_by_day.buckets: 3 } + - match: { aggregations.users_by_day.buckets.0.key_as_string: "2017-01-01T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.0.doc_count: 3 } + - close_to: { aggregations.users_by_day.buckets.0.percent_of_total_users.value: { value: 0.67, error: 0.05 }} + - match: { aggregations.users_by_day.buckets.1.key_as_string: "2017-01-02T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.1.doc_count: 2 } + - close_to: { aggregations.users_by_day.buckets.1.percent_of_total_users.value: { value: 0.24, error: 0.05 }} + - match: { aggregations.users_by_day.buckets.2.key_as_string: "2017-01-03T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.2.doc_count: 1 } + - close_to: { aggregations.users_by_day.buckets.2.percent_of_total_users.value: { value: 0.09, error: 0.05 }} + +--- +format: + - skip: + features: close_to + + - do: + search: + index: foo + body: + size: 0 + aggs: + users_by_day: + date_histogram: + field: timestamp + calendar_interval: day + aggs: + percent_of_total_users: + normalize: + buckets_path: _count + method: percent_of_sum + format: 00.00% + + - length: { aggregations.users_by_day.buckets: 3 } + - match: { aggregations.users_by_day.buckets.0.key_as_string: "2017-01-01T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.0.doc_count: 3 } + - close_to: { aggregations.users_by_day.buckets.0.percent_of_total_users.value: { value: 0.5, error: 0.05 }} + - match: { aggregations.users_by_day.buckets.0.percent_of_total_users.value_as_string: 50.00% } + - match: { aggregations.users_by_day.buckets.1.key_as_string: "2017-01-02T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.1.doc_count: 2 } + - close_to: { aggregations.users_by_day.buckets.1.percent_of_total_users.value: { value: 0.3, error: 0.05 }} + - match: { aggregations.users_by_day.buckets.1.percent_of_total_users.value_as_string: 33.33% } + - match: { aggregations.users_by_day.buckets.2.key_as_string: "2017-01-03T00:00:00.000Z" } + - match: { aggregations.users_by_day.buckets.2.doc_count: 1 } + - close_to: { aggregations.users_by_day.buckets.2.percent_of_total_users.value: { value: 0.2, error: 0.05 }} + - match: { aggregations.users_by_day.buckets.2.percent_of_total_users.value_as_string: 16.67% } + +--- +bad path: + - skip: + features: close_to + + - do: + catch: /No aggregation found for path \[badpath\]/ + search: + index: foo + body: + size: 0 + aggs: + users_by_day: + date_histogram: + field: timestamp + calendar_interval: day + aggs: + percent_of_total_users: + normalize: + buckets_path: badpath + method: rescale_0_1