From 4a69973464b6b36afc39cf8443e88e2f2e91ce3d Mon Sep 17 00:00:00 2001 From: Christopher Wolff Date: Thu, 25 May 2023 17:36:28 -0700 Subject: [PATCH 1/2] fix: make histogramQuantile handle case of zero samples --- libflux/go/libflux/buildinfo.gen.go | 4 +- .../prometheus_histogramQuantile_test.flux | 31 ++++++ stdlib/universe/histogram_quantile.go | 100 +++++++++++++----- stdlib/universe/histogram_quantile_test.flux | 80 ++++++++++++++ 4 files changed, 186 insertions(+), 29 deletions(-) diff --git a/libflux/go/libflux/buildinfo.gen.go b/libflux/go/libflux/buildinfo.gen.go index d47c820454..577f76dd99 100644 --- a/libflux/go/libflux/buildinfo.gen.go +++ b/libflux/go/libflux/buildinfo.gen.go @@ -215,7 +215,7 @@ var sourceHashes = map[string]string{ "stdlib/experimental/polyline/polyline_test.flux": "16473dce4f71dcdbe1e3f90b350ab1e56a513679cb5c3f872e0f2d7678d42d1f", "stdlib/experimental/preview_test.flux": "cca570d25b17ed201a0ecc7ebf9e547ccff2aa0814a3ac49f12faa938cbdaf73", "stdlib/experimental/prometheus/prometheus.flux": "dc01322d3c0655661a8c2279c69acf50d02791a670fb0d681947af6726bc2f4d", - "stdlib/experimental/prometheus/prometheus_histogramQuantile_test.flux": "15da11b9c9d43cbc1c579de7064d7f3870b1c77b610ce3c567e8eb14f40ee090", + "stdlib/experimental/prometheus/prometheus_histogramQuantile_test.flux": "95a708f68ebd42b7d88f1ec62e8f8aa2f08995f64ed280e5bf41f910a0502057", "stdlib/experimental/quantile_test.flux": "e3cf2ee9716c1179139d0a388c24b24f1c25ca329b27bebaeb53b7e1b608ead2", "stdlib/experimental/query/from.flux": "713b7feb6904d64cf4cbd235396ff6ff20e1eb96578190c0ac3be4f37df7c362", "stdlib/experimental/record/record.flux": "273eebb2ee5cb9b153940ca0f42ed5b97b3d86de07d91c96a75508682d84feae", @@ -493,7 +493,7 @@ var sourceHashes = map[string]string{ "stdlib/universe/highestAverage_test.flux": "65744d16b7c5d8ac2f4e3c47621195de1840ad72b12bfbb692d4f645e71a00a8", "stdlib/universe/highestCurrent_test.flux": "c285ff40a7d8789d2c20bcf90d8063b710499ddc24621d627f6693c30351ebe5", "stdlib/universe/highestMax_test.flux": "fff9f21422d2607afb9ff2786d67d173c7cd797418eb7b3521f8cf7e49443b88", - "stdlib/universe/histogram_quantile_test.flux": "3ce3d5e6ce27fd8bb2e84da031faca2dafef2566737842e534fbd667ffa8a4f6", + "stdlib/universe/histogram_quantile_test.flux": "f7f6799d6787e3e291ccdbbbd1787ca3ca505f0a901b99f4381b0f3ee1b62c77", "stdlib/universe/histogram_test.flux": "e9e9775f80ac7c2a76044e6e0e8a89045d736c6ab618e8de6d7d1ebe9848938e", "stdlib/universe/holt_winters_panic_test.flux": "204eb8044d634e5350a364eac466eb51e7f549e4ac7f454de7b244ba272b248f", "stdlib/universe/holt_winters_test.flux": "9bc8441527867b6c075d003034a3def1748766df478ba8b434e2a2297ead0ec0", diff --git a/stdlib/experimental/prometheus/prometheus_histogramQuantile_test.flux b/stdlib/experimental/prometheus/prometheus_histogramQuantile_test.flux index 1f0038148a..7195b25e8c 100644 --- a/stdlib/experimental/prometheus/prometheus_histogramQuantile_test.flux +++ b/stdlib/experimental/prometheus/prometheus_histogramQuantile_test.flux @@ -375,3 +375,34 @@ testcase prometheus_histogramQuantile_onNonmonotonicDrop { testing.diff(got: got, want: want) } + +testcase prometheus_histogramQuantile_zeroSamples { + inData = + "#group,false,false,true,true,false,false,true,true +#datatype,string,long,string,string,dateTime:RFC3339,double,string,string +#default,_result,,,,,,, +,result,table,_field,_measurement,_time,_value,le,org +,,0,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,0.001,0001 +,,2,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,0.005,0001 +,,3,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,0.025,0001 +,,4,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,0.125,0001 +,,4,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,0.625,0001 +,,5,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,3.125,0001 +,,6,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,15.625,0001 +,,7,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,+Inf,0001 +" + outData = + "#group,false,false,true,true,true,true,false,true,false,true +#datatype,string,long,string,string,dateTime:RFC3339,dateTime:RFC3339,dateTime:RFC3339,string,double,string +#default,_result,,,,,,,,, +,result,table,_field,_measurement,_start,_stop,_time,org,_value,quantile +,,0,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00Z,2021-10-08T00:01:00Z,2021-10-08T00:00:00.412729Z,0001,,0.99 +" + want = csv.from(csv: outData) + got = + csv.from(csv: inData) + |> range(start: 2021-10-08T00:00:00Z, stop: 2021-10-08T00:01:00Z) + |> prometheus.histogramQuantile(quantile: 0.99, metricVersion: 2) + + testing.diff(got: got, want: want) +} diff --git a/stdlib/universe/histogram_quantile.go b/stdlib/universe/histogram_quantile.go index 51d4d4b3a7..533ed71ea9 100644 --- a/stdlib/universe/histogram_quantile.go +++ b/stdlib/universe/histogram_quantile.go @@ -251,51 +251,97 @@ func (t histogramQuantileTransformation) Process(id execute.DatasetID, tbl flux. }) } - q, ok, err := t.computeQuantile(cdf) + result, err := t.computeQuantile(cdf) if err != nil { return err } - if !ok { + if result.action == drop { return nil } if err := execute.AppendKeyValues(tbl.Key(), builder); err != nil { return err } - if err := builder.AppendFloat(valueIdx, q); err != nil { - return err + if result.action == appendValue { + if err := builder.AppendFloat(valueIdx, result.v); err != nil { + return err + } + } else { + // action is appendNil + if err := builder.AppendNil(valueIdx); err != nil { + return err + } + } return nil } -func (t *histogramQuantileTransformation) computeQuantile(cdf []bucket) (float64, bool, error) { +type quantileAction int + +const ( + appendValue quantileAction = iota + appendNil + drop +) + +type quantileResult struct { + action quantileAction + v float64 +} + +// isMonotonic will check if the buckets are monotonic and +// return true if so. +// +// If force is set, it will force them to be monotonic +// by assuming no increase from the previous bucket. +// When force is is set, this function will always return true. +func isMonotonic(force bool, cdf []bucket) bool { + prevCount := 0.0 + for i := range cdf { + if cdf[i].count < prevCount { + if force { + cdf[i].count = prevCount + } else { + return false + } + } else { + prevCount = cdf[i].count + } + } + return true +} + +func (t *histogramQuantileTransformation) computeQuantile(cdf []bucket) (quantileResult, error) { if len(cdf) == 0 { - return 0, false, errors.New(codes.FailedPrecondition, "histogram is empty") + return quantileResult{}, errors.New(codes.FailedPrecondition, "histogram is empty") } + + if !isMonotonic(t.spec.OnNonmonotonic == onNonmonotonicForce, cdf) { + switch t.spec.OnNonmonotonic { + case onNonmonotonicError: + return quantileResult{}, errors.New(codes.FailedPrecondition, "histogram records counts are not monotonic") + case onNonmonotonicDrop: + return quantileResult{action: drop}, nil + default: + // "force" is not possible because isMonotonic will fix the buckets + return quantileResult{}, errors.Newf(codes.Internal, "unknown or unexpected value for onNonmonotonic: %q", t.spec.OnNonmonotonic) + } + } + // Find rank index and check counts are monotonic - prevCount := 0.0 totalCount := cdf[len(cdf)-1].count + if totalCount == 0 { + // Produce a null value if there were no samples + return quantileResult{action: appendNil}, nil + } + rank := t.spec.Quantile * totalCount rankIdx := -1 for i, b := range cdf { - if b.count < prevCount { - switch t.spec.OnNonmonotonic { - case onNonmonotonicError: - return 0, false, errors.New(codes.FailedPrecondition, "histogram records counts are not monotonic") - case onNonmonotonicForce: - b.count = prevCount - case onNonmonotonicDrop: - return 0, false, nil - default: - return 0, false, errors.Newf(codes.Internal, "unknown value for onNonmonotonic: %q", t.spec.OnNonmonotonic) - } - } else { - prevCount = b.count - } - if rank >= b.count { rankIdx = i } } + var ( lowerCount, lowerBound, @@ -311,7 +357,7 @@ func (t *histogramQuantileTransformation) computeQuantile(cdf []bucket) (float64 upperBound = cdf[0].upperBound case len(cdf) - 1: // Quantile is above the highest upper bound, simply return it as it must be finite - return cdf[len(cdf)-1].upperBound, true, nil + return quantileResult{action: appendValue, v: cdf[len(cdf)-1].upperBound}, nil default: lowerCount = cdf[rankIdx].count lowerBound = cdf[rankIdx].upperBound @@ -320,19 +366,19 @@ func (t *histogramQuantileTransformation) computeQuantile(cdf []bucket) (float64 } if rank == lowerCount { // No need to interpolate - return lowerBound, true, nil + return quantileResult{action: appendValue, v: lowerBound}, nil } if math.IsInf(lowerBound, -1) { // We cannot interpolate with infinity - return upperBound, true, nil + return quantileResult{action: appendValue, v: upperBound}, nil } if math.IsInf(upperBound, 1) { // We cannot interpolate with infinity - return lowerBound, true, nil + return quantileResult{action: appendValue, v: lowerBound}, nil } // Compute quantile using linear interpolation scale := (rank - lowerCount) / (upperCount - lowerCount) - return lowerBound + (upperBound-lowerBound)*scale, true, nil + return quantileResult{action: appendValue, v: lowerBound + (upperBound-lowerBound)*scale}, nil } func (t histogramQuantileTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error { diff --git a/stdlib/universe/histogram_quantile_test.flux b/stdlib/universe/histogram_quantile_test.flux index 01e4425920..2a8ce0c3fc 100644 --- a/stdlib/universe/histogram_quantile_test.flux +++ b/stdlib/universe/histogram_quantile_test.flux @@ -172,6 +172,42 @@ testcase histogramQuantileOnNonmonotonicForce { testing.diff(got, want) } +testcase histogramQuantileOnNonmonotonicForceLastBucket { + inData = + " +#datatype,string,long,dateTime:RFC3339,string,double,double,string +#group,false,false,true,true,false,false,true +#default,_result,,,,,, +,result,table,_time,_field,_value,le,_measurement +,,0,2018-05-22T19:53:00Z,x_duration_seconds,1,0.1,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.2,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.3,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.4,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.5,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.6,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.7,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,8,0.8,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,10,0.9,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,+Inf,l +" + outData = + " +#datatype,string,long,dateTime:RFC3339,dateTime:RFC3339,dateTime:RFC3339,string,double,string +#group,false,false,true,true,true,true,false,true +#default,_result,,,,,,, +,result,table,_start,_stop,_time,_field,_value,_measurement +,,0,2018-05-22T19:53:00Z,2030-01-01T00:00:00Z,2018-05-22T19:53:00Z,x_duration_seconds,0.8500000000000001,l +" + + got = + csv.from(csv: inData) + |> range(start: 2018-05-22T19:53:00Z) + |> histogramQuantile(quantile: 0.9, onNonmonotonic: "force") + want = csv.from(csv: outData) + + testing.diff(got, want) +} + testcase histogramQuantileOnNonmonotonicDrop { inData = " @@ -214,3 +250,47 @@ testcase histogramQuantileOnNonmonotonicDrop { testing.diff(got, want) } + +testcase histogramQuantileNoSamples { + inData = + " +#datatype,string,long,dateTime:RFC3339,string,double,double,string +#group,false,false,true,true,false,false,true +#default,_result,,,,,, +,result,table,_time,_field,_value,le,_measurement +,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.1,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.2,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.3,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.4,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.5,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.6,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.7,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.8,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.9,l +,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,+Inf,l +,,1,2018-05-22T19:53:00Z,y_duration_seconds,0,-Inf,l +,,1,2018-05-22T19:53:00Z,y_duration_seconds,10,0.2,l +,,1,2018-05-22T19:53:00Z,y_duration_seconds,15,0.4,l +,,1,2018-05-22T19:53:00Z,y_duration_seconds,25,0.6,l +,,1,2018-05-22T19:53:00Z,y_duration_seconds,35,0.8,l +,,1,2018-05-22T19:53:00Z,y_duration_seconds,45,1,l +,,1,2018-05-22T19:53:00Z,y_duration_seconds,45,+Inf,l +" + outData = + " +#datatype,string,long,dateTime:RFC3339,dateTime:RFC3339,dateTime:RFC3339,string,double,string +#group,false,false,true,true,true,true,false,true +#default,_result,,,,,,, +,result,table,_start,_stop,_time,_field,_value,_measurement +,,0,2018-05-22T19:53:00Z,2030-01-01T00:00:00Z,2018-05-22T19:53:00Z,x_duration_seconds,,l +,,1,2018-05-22T19:53:00Z,2030-01-01T00:00:00Z,2018-05-22T19:53:00Z,y_duration_seconds,0.91,l +" + + got = + csv.from(csv: inData) + |> range(start: 2018-05-22T19:53:00Z) + |> histogramQuantile(quantile: 0.9, onNonmonotonic: "drop") + want = csv.from(csv: outData) + + testing.diff(got, want) +} From 8d1b45539bf02995608ed445a1bf1f2813946ca1 Mon Sep 17 00:00:00 2001 From: "Christopher M. Wolff" Date: Fri, 26 May 2023 10:32:15 -0700 Subject: [PATCH 2/2] chore: fix comment typo in stdlib/universe/histogram_quantile.go Co-authored-by: Gavin Cabbage --- stdlib/universe/histogram_quantile.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/universe/histogram_quantile.go b/stdlib/universe/histogram_quantile.go index 533ed71ea9..33b73e2660 100644 --- a/stdlib/universe/histogram_quantile.go +++ b/stdlib/universe/histogram_quantile.go @@ -293,7 +293,7 @@ type quantileResult struct { // // If force is set, it will force them to be monotonic // by assuming no increase from the previous bucket. -// When force is is set, this function will always return true. +// When force is set, this function will always return true. func isMonotonic(force bool, cdf []bucket) bool { prevCount := 0.0 for i := range cdf {