Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: make histogramQuantile handle case of zero samples #5419

Merged
merged 2 commits into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions libflux/go/libflux/buildinfo.gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ var sourceHashes = map[string]string{
"stdlib/experimental/polyline/polyline_test.flux": "16473dce4f71dcdbe1e3f90b350ab1e56a513679cb5c3f872e0f2d7678d42d1f",
"stdlib/experimental/preview_test.flux": "cca570d25b17ed201a0ecc7ebf9e547ccff2aa0814a3ac49f12faa938cbdaf73",
"stdlib/experimental/prometheus/prometheus.flux": "dc01322d3c0655661a8c2279c69acf50d02791a670fb0d681947af6726bc2f4d",
"stdlib/experimental/prometheus/prometheus_histogramQuantile_test.flux": "15da11b9c9d43cbc1c579de7064d7f3870b1c77b610ce3c567e8eb14f40ee090",
"stdlib/experimental/prometheus/prometheus_histogramQuantile_test.flux": "95a708f68ebd42b7d88f1ec62e8f8aa2f08995f64ed280e5bf41f910a0502057",
"stdlib/experimental/quantile_test.flux": "e3cf2ee9716c1179139d0a388c24b24f1c25ca329b27bebaeb53b7e1b608ead2",
"stdlib/experimental/query/from.flux": "713b7feb6904d64cf4cbd235396ff6ff20e1eb96578190c0ac3be4f37df7c362",
"stdlib/experimental/record/record.flux": "273eebb2ee5cb9b153940ca0f42ed5b97b3d86de07d91c96a75508682d84feae",
Expand Down Expand Up @@ -493,7 +493,7 @@ var sourceHashes = map[string]string{
"stdlib/universe/highestAverage_test.flux": "65744d16b7c5d8ac2f4e3c47621195de1840ad72b12bfbb692d4f645e71a00a8",
"stdlib/universe/highestCurrent_test.flux": "c285ff40a7d8789d2c20bcf90d8063b710499ddc24621d627f6693c30351ebe5",
"stdlib/universe/highestMax_test.flux": "fff9f21422d2607afb9ff2786d67d173c7cd797418eb7b3521f8cf7e49443b88",
"stdlib/universe/histogram_quantile_test.flux": "3ce3d5e6ce27fd8bb2e84da031faca2dafef2566737842e534fbd667ffa8a4f6",
"stdlib/universe/histogram_quantile_test.flux": "f7f6799d6787e3e291ccdbbbd1787ca3ca505f0a901b99f4381b0f3ee1b62c77",
"stdlib/universe/histogram_test.flux": "e9e9775f80ac7c2a76044e6e0e8a89045d736c6ab618e8de6d7d1ebe9848938e",
"stdlib/universe/holt_winters_panic_test.flux": "204eb8044d634e5350a364eac466eb51e7f549e4ac7f454de7b244ba272b248f",
"stdlib/universe/holt_winters_test.flux": "9bc8441527867b6c075d003034a3def1748766df478ba8b434e2a2297ead0ec0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -375,3 +375,34 @@ testcase prometheus_histogramQuantile_onNonmonotonicDrop {

testing.diff(got: got, want: want)
}

testcase prometheus_histogramQuantile_zeroSamples {
inData =
"#group,false,false,true,true,false,false,true,true
#datatype,string,long,string,string,dateTime:RFC3339,double,string,string
#default,_result,,,,,,,
,result,table,_field,_measurement,_time,_value,le,org
,,0,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,0.001,0001
,,2,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,0.005,0001
,,3,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,0.025,0001
,,4,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,0.125,0001
,,4,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,0.625,0001
,,5,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,3.125,0001
,,6,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,15.625,0001
,,7,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,+Inf,0001
"
outData =
"#group,false,false,true,true,true,true,false,true,false,true
#datatype,string,long,string,string,dateTime:RFC3339,dateTime:RFC3339,dateTime:RFC3339,string,double,string
#default,_result,,,,,,,,,
,result,table,_field,_measurement,_start,_stop,_time,org,_value,quantile
,,0,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00Z,2021-10-08T00:01:00Z,2021-10-08T00:00:00.412729Z,0001,,0.99
"
want = csv.from(csv: outData)
got =
csv.from(csv: inData)
|> range(start: 2021-10-08T00:00:00Z, stop: 2021-10-08T00:01:00Z)
|> prometheus.histogramQuantile(quantile: 0.99, metricVersion: 2)

testing.diff(got: got, want: want)
}
100 changes: 73 additions & 27 deletions stdlib/universe/histogram_quantile.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,51 +251,97 @@ func (t histogramQuantileTransformation) Process(id execute.DatasetID, tbl flux.
})
}

q, ok, err := t.computeQuantile(cdf)
result, err := t.computeQuantile(cdf)
if err != nil {
return err
}
if !ok {
if result.action == drop {
return nil
}
if err := execute.AppendKeyValues(tbl.Key(), builder); err != nil {
return err
}
if err := builder.AppendFloat(valueIdx, q); err != nil {
return err
if result.action == appendValue {
if err := builder.AppendFloat(valueIdx, result.v); err != nil {
return err
}
} else {
// action is appendNil
if err := builder.AppendNil(valueIdx); err != nil {
return err
}

}
return nil
}

func (t *histogramQuantileTransformation) computeQuantile(cdf []bucket) (float64, bool, error) {
type quantileAction int

const (
appendValue quantileAction = iota
appendNil
drop
)

type quantileResult struct {
action quantileAction
v float64
}

// isMonotonic will check if the buckets are monotonic and
// return true if so.
//
// If force is set, it will force them to be monotonic
// by assuming no increase from the previous bucket.
// When force is set, this function will always return true.
func isMonotonic(force bool, cdf []bucket) bool {
prevCount := 0.0
for i := range cdf {
if cdf[i].count < prevCount {
if force {
cdf[i].count = prevCount
} else {
return false
}
} else {
prevCount = cdf[i].count
}
}
return true
}

func (t *histogramQuantileTransformation) computeQuantile(cdf []bucket) (quantileResult, error) {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The issue here was that the Flux stdlib function histogram_quantile gets a wrong answer for some input data.

This function accepts a cumulative distribution function (a cumulative histogram produced from the input table data) and produces the requested quantile.

When the cdf contains all zeroes, this function would return the bound of the last histogram bucket, which is incorrect. The right thing to do for that case is to return a null value, since we can't compute a quantile if we didn't actually receive any observations.

if len(cdf) == 0 {
return 0, false, errors.New(codes.FailedPrecondition, "histogram is empty")
return quantileResult{}, errors.New(codes.FailedPrecondition, "histogram is empty")
}

if !isMonotonic(t.spec.OnNonmonotonic == onNonmonotonicForce, cdf) {
switch t.spec.OnNonmonotonic {
case onNonmonotonicError:
return quantileResult{}, errors.New(codes.FailedPrecondition, "histogram records counts are not monotonic")
case onNonmonotonicDrop:
return quantileResult{action: drop}, nil
default:
// "force" is not possible because isMonotonic will fix the buckets
return quantileResult{}, errors.Newf(codes.Internal, "unknown or unexpected value for onNonmonotonic: %q", t.spec.OnNonmonotonic)
}
}
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sometimes the histogram buckets are not monotonic (which they should be if they are cumulative) due to late-arriving data on the edge. The OnNonmonotonic parameter describes what to do in this case.

Checking for monotonicity first (and fixing if needed and requested by the user) avoids a bug that occurred when the total observation count was pulled from the last bucket before it was "fixed" in the case of forcing monotonicity.

This is not really related to the issue the user found but I saw it here and fixed it. The test case histogramQuantileOnNonmonotonicForceLastBucket below verifies this fix.


// Find rank index and check counts are monotonic
prevCount := 0.0
totalCount := cdf[len(cdf)-1].count
if totalCount == 0 {
// Produce a null value if there were no samples
return quantileResult{action: appendNil}, nil
}
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is where we bail and produce a null value for the case of zero observations.


rank := t.spec.Quantile * totalCount
rankIdx := -1
for i, b := range cdf {
if b.count < prevCount {
switch t.spec.OnNonmonotonic {
case onNonmonotonicError:
return 0, false, errors.New(codes.FailedPrecondition, "histogram records counts are not monotonic")
case onNonmonotonicForce:
b.count = prevCount
case onNonmonotonicDrop:
return 0, false, nil
default:
return 0, false, errors.Newf(codes.Internal, "unknown value for onNonmonotonic: %q", t.spec.OnNonmonotonic)
}
} else {
prevCount = b.count
}

if rank >= b.count {
rankIdx = i
}
}

var (
lowerCount,
lowerBound,
Expand All @@ -311,7 +357,7 @@ func (t *histogramQuantileTransformation) computeQuantile(cdf []bucket) (float64
upperBound = cdf[0].upperBound
case len(cdf) - 1:
// Quantile is above the highest upper bound, simply return it as it must be finite
return cdf[len(cdf)-1].upperBound, true, nil
return quantileResult{action: appendValue, v: cdf[len(cdf)-1].upperBound}, nil
default:
lowerCount = cdf[rankIdx].count
lowerBound = cdf[rankIdx].upperBound
Expand All @@ -320,19 +366,19 @@ func (t *histogramQuantileTransformation) computeQuantile(cdf []bucket) (float64
}
if rank == lowerCount {
// No need to interpolate
return lowerBound, true, nil
return quantileResult{action: appendValue, v: lowerBound}, nil
}
if math.IsInf(lowerBound, -1) {
// We cannot interpolate with infinity
return upperBound, true, nil
return quantileResult{action: appendValue, v: upperBound}, nil
}
if math.IsInf(upperBound, 1) {
// We cannot interpolate with infinity
return lowerBound, true, nil
return quantileResult{action: appendValue, v: lowerBound}, nil
}
// Compute quantile using linear interpolation
scale := (rank - lowerCount) / (upperCount - lowerCount)
return lowerBound + (upperBound-lowerBound)*scale, true, nil
return quantileResult{action: appendValue, v: lowerBound + (upperBound-lowerBound)*scale}, nil
}

func (t histogramQuantileTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
Expand Down
80 changes: 80 additions & 0 deletions stdlib/universe/histogram_quantile_test.flux
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,42 @@ testcase histogramQuantileOnNonmonotonicForce {
testing.diff(got, want)
}

testcase histogramQuantileOnNonmonotonicForceLastBucket {
inData =
"
#datatype,string,long,dateTime:RFC3339,string,double,double,string
#group,false,false,true,true,false,false,true
#default,_result,,,,,,
,result,table,_time,_field,_value,le,_measurement
,,0,2018-05-22T19:53:00Z,x_duration_seconds,1,0.1,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.2,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.3,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.4,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.5,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.6,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.7,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,8,0.8,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,10,0.9,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,+Inf,l
"
outData =
"
#datatype,string,long,dateTime:RFC3339,dateTime:RFC3339,dateTime:RFC3339,string,double,string
#group,false,false,true,true,true,true,false,true
#default,_result,,,,,,,
,result,table,_start,_stop,_time,_field,_value,_measurement
,,0,2018-05-22T19:53:00Z,2030-01-01T00:00:00Z,2018-05-22T19:53:00Z,x_duration_seconds,0.8500000000000001,l
"

got =
csv.from(csv: inData)
|> range(start: 2018-05-22T19:53:00Z)
|> histogramQuantile(quantile: 0.9, onNonmonotonic: "force")
want = csv.from(csv: outData)

testing.diff(got, want)
}

testcase histogramQuantileOnNonmonotonicDrop {
inData =
"
Expand Down Expand Up @@ -214,3 +250,47 @@ testcase histogramQuantileOnNonmonotonicDrop {

testing.diff(got, want)
}

testcase histogramQuantileNoSamples {
inData =
"
#datatype,string,long,dateTime:RFC3339,string,double,double,string
#group,false,false,true,true,false,false,true
#default,_result,,,,,,
,result,table,_time,_field,_value,le,_measurement
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.1,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.2,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.3,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.4,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.5,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.6,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.7,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.8,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.9,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,+Inf,l
,,1,2018-05-22T19:53:00Z,y_duration_seconds,0,-Inf,l
,,1,2018-05-22T19:53:00Z,y_duration_seconds,10,0.2,l
,,1,2018-05-22T19:53:00Z,y_duration_seconds,15,0.4,l
,,1,2018-05-22T19:53:00Z,y_duration_seconds,25,0.6,l
,,1,2018-05-22T19:53:00Z,y_duration_seconds,35,0.8,l
,,1,2018-05-22T19:53:00Z,y_duration_seconds,45,1,l
,,1,2018-05-22T19:53:00Z,y_duration_seconds,45,+Inf,l
"
outData =
"
#datatype,string,long,dateTime:RFC3339,dateTime:RFC3339,dateTime:RFC3339,string,double,string
#group,false,false,true,true,true,true,false,true
#default,_result,,,,,,,
,result,table,_start,_stop,_time,_field,_value,_measurement
,,0,2018-05-22T19:53:00Z,2030-01-01T00:00:00Z,2018-05-22T19:53:00Z,x_duration_seconds,,l
,,1,2018-05-22T19:53:00Z,2030-01-01T00:00:00Z,2018-05-22T19:53:00Z,y_duration_seconds,0.91,l
"

got =
csv.from(csv: inData)
|> range(start: 2018-05-22T19:53:00Z)
|> histogramQuantile(quantile: 0.9, onNonmonotonic: "drop")
want = csv.from(csv: outData)

testing.diff(got, want)
}