From a0419139502d57c4d2e2e12dd45e056533273e61 Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Thu, 16 Sep 2021 11:08:44 -0400 Subject: [PATCH] opt: refactor histogram range calculation Histogram range calculation has been refactored into a single switch statement. This prohibits inconsistencies between the three previous functions `getRange`, `getRangeNonNumeric` and `isNonNumeric`. It also highlights the preference for calculating numeric ranges when possible. Release note: None --- pkg/sql/opt/props/histogram.go | 188 ++++++++++++++-------------- pkg/sql/opt/props/histogram_test.go | 2 +- 2 files changed, 93 insertions(+), 97 deletions(-) diff --git a/pkg/sql/opt/props/histogram.go b/pkg/sql/opt/props/histogram.go index 4cf2b417f2d1..8cf25e393a6f 100644 --- a/pkg/sql/opt/props/histogram.go +++ b/pkg/sql/opt/props/histogram.go @@ -713,95 +713,112 @@ func getFilteredBucket( // := 210,557,119,328,878,600 // func getRangesBeforeAndAfter( - bucketLowerBound, bucketUpperBound, spanLowerBound, spanUpperBound tree.Datum, swap bool, + beforeLowerBound, beforeUpperBound, afterLowerBound, afterUpperBound tree.Datum, swap bool, ) (rangeBefore, rangeAfter float64, ok bool) { // If the data types don't match, don't bother trying to calculate the range // sizes. This should almost never happen, but we want to avoid type // assertion errors below. typesMatch := - bucketLowerBound.ResolvedType().Equivalent(bucketUpperBound.ResolvedType()) && - bucketUpperBound.ResolvedType().Equivalent(spanLowerBound.ResolvedType()) && - spanLowerBound.ResolvedType().Equivalent(spanUpperBound.ResolvedType()) + beforeLowerBound.ResolvedType().Equivalent(beforeUpperBound.ResolvedType()) && + beforeUpperBound.ResolvedType().Equivalent(afterLowerBound.ResolvedType()) && + afterLowerBound.ResolvedType().Equivalent(afterUpperBound.ResolvedType()) if !typesMatch { return 0, 0, false } if swap { - bucketLowerBound, bucketUpperBound = bucketUpperBound, bucketLowerBound - spanLowerBound, spanUpperBound = spanUpperBound, spanLowerBound + beforeLowerBound, beforeUpperBound = beforeUpperBound, beforeLowerBound + afterLowerBound, afterUpperBound = afterUpperBound, afterLowerBound } // TODO(rytaft): handle more types here. // Note: the calculations below assume that bucketLowerBound is inclusive and // Span.PreferInclusive() has been called on the span. + switch beforeLowerBound.ResolvedType().Family() { + case types.IntFamily: + rangeBefore = float64(*beforeUpperBound.(*tree.DInt)) - float64(*beforeLowerBound.(*tree.DInt)) + rangeAfter = float64(*afterUpperBound.(*tree.DInt)) - float64(*afterLowerBound.(*tree.DInt)) + return rangeBefore, rangeAfter, true - getRange := func(lowerBound, upperBound tree.Datum) (rng float64, ok bool) { - switch lowerBound.ResolvedType().Family() { - case types.IntFamily: - rng = float64(*upperBound.(*tree.DInt)) - float64(*lowerBound.(*tree.DInt)) - return rng, true - - case types.DateFamily: - lower := lowerBound.(*tree.DDate) - upper := upperBound.(*tree.DDate) - if lower.IsFinite() && upper.IsFinite() { - rng = float64(upper.PGEpochDays()) - float64(lower.PGEpochDays()) - return rng, true - } - return 0, false - - case types.DecimalFamily: - lower, err := lowerBound.(*tree.DDecimal).Float64() - if err != nil { - return 0, false - } - upper, err := upperBound.(*tree.DDecimal).Float64() - if err != nil { - return 0, false - } - rng = upper - lower - return rng, true - - case types.FloatFamily: - rng = float64(*upperBound.(*tree.DFloat)) - float64(*lowerBound.(*tree.DFloat)) - return rng, true - - case types.TimestampFamily: - lower := lowerBound.(*tree.DTimestamp).Time - upper := upperBound.(*tree.DTimestamp).Time - rng = float64(upper.Sub(lower)) - return rng, true - - case types.TimestampTZFamily: - lower := lowerBound.(*tree.DTimestampTZ).Time - upper := upperBound.(*tree.DTimestampTZ).Time - rng = float64(upper.Sub(lower)) - return rng, true - - case types.TimeFamily: - lower := lowerBound.(*tree.DTime) - upper := upperBound.(*tree.DTime) - rng = float64(*upper) - float64(*lower) - return rng, true - - case types.TimeTZFamily: - lower := lowerBound.(*tree.DTimeTZ).TimeOfDay - upper := upperBound.(*tree.DTimeTZ).TimeOfDay - rng = float64(upper) - float64(lower) - return rng, true - - default: - return 0, false + case types.DateFamily: + lowerBefore := beforeLowerBound.(*tree.DDate) + upperBefore := beforeUpperBound.(*tree.DDate) + lowerAfter := afterLowerBound.(*tree.DDate) + upperAfter := afterUpperBound.(*tree.DDate) + if lowerBefore.IsFinite() && upperBefore.IsFinite() && lowerAfter.IsFinite() && upperAfter.IsFinite() { + rangeBefore = float64(upperBefore.PGEpochDays()) - float64(lowerBefore.PGEpochDays()) + rangeAfter = float64(upperAfter.PGEpochDays()) - float64(lowerAfter.PGEpochDays()) + return rangeBefore, rangeAfter, true } - } + return 0, 0, false - getRangeNonNumeric := func( - lowerBoundBefore, upperBoundBefore, lowerBoundAfter, upperBoundAfter tree.Datum, - ) (rngBefore, rngAfter float64, ok bool) { + case types.DecimalFamily: + lowerBefore, err := beforeLowerBound.(*tree.DDecimal).Float64() + if err != nil { + return 0, 0, false + } + upperBefore, err := beforeUpperBound.(*tree.DDecimal).Float64() + if err != nil { + return 0, 0, false + } + lowerAfter, err := afterLowerBound.(*tree.DDecimal).Float64() + if err != nil { + return 0, 0, false + } + upperAfter, err := afterUpperBound.(*tree.DDecimal).Float64() + if err != nil { + return 0, 0, false + } + rangeBefore = upperBefore - lowerBefore + rangeAfter = upperAfter - lowerAfter + return rangeBefore, rangeAfter, true + + case types.FloatFamily: + rangeBefore = float64(*beforeUpperBound.(*tree.DFloat)) - float64(*beforeLowerBound.(*tree.DFloat)) + rangeAfter = float64(*afterUpperBound.(*tree.DFloat)) - float64(*afterLowerBound.(*tree.DFloat)) + return rangeBefore, rangeAfter, true + + case types.TimestampFamily: + lowerBefore := beforeLowerBound.(*tree.DTimestamp).Time + upperBefore := beforeUpperBound.(*tree.DTimestamp).Time + lowerAfter := afterLowerBound.(*tree.DTimestamp).Time + upperAfter := afterUpperBound.(*tree.DTimestamp).Time + rangeBefore = float64(upperBefore.Sub(lowerBefore)) + rangeAfter = float64(upperAfter.Sub(lowerAfter)) + return rangeBefore, rangeAfter, true + + case types.TimestampTZFamily: + lowerBefore := beforeLowerBound.(*tree.DTimestampTZ).Time + upperBefore := beforeUpperBound.(*tree.DTimestampTZ).Time + lowerAfter := afterLowerBound.(*tree.DTimestampTZ).Time + upperAfter := afterUpperBound.(*tree.DTimestampTZ).Time + rangeBefore = float64(upperBefore.Sub(lowerBefore)) + rangeAfter = float64(upperAfter.Sub(lowerAfter)) + return rangeBefore, rangeAfter, true + + case types.TimeFamily: + lowerBefore := beforeLowerBound.(*tree.DTime) + upperBefore := beforeUpperBound.(*tree.DTime) + lowerAfter := afterLowerBound.(*tree.DTime) + upperAfter := afterUpperBound.(*tree.DTime) + rangeBefore = float64(*upperBefore) - float64(*lowerBefore) + rangeAfter = float64(*upperAfter) - float64(*lowerAfter) + return rangeBefore, rangeAfter, true + + case types.TimeTZFamily: + lowerBefore := beforeLowerBound.(*tree.DTimeTZ).TimeOfDay + upperBefore := beforeUpperBound.(*tree.DTimeTZ).TimeOfDay + lowerAfter := afterLowerBound.(*tree.DTimeTZ).TimeOfDay + upperAfter := afterUpperBound.(*tree.DTimeTZ).TimeOfDay + rangeBefore = float64(upperBefore) - float64(lowerBefore) + rangeAfter = float64(upperAfter) - float64(lowerAfter) + return rangeBefore, rangeAfter, true - // Utilizes an array to simplify number of repetitive calls. + case types.StringFamily, types.BytesFamily, types.UuidFamily, types.INetFamily: + // For non-numeric types, convert the datums to encoded keys to + // approximate the range. We utilize an array to reduce repetitive code. boundArr := [4]tree.Datum{ - lowerBoundBefore, upperBoundBefore, lowerBoundAfter, upperBoundAfter, + beforeLowerBound, beforeUpperBound, afterLowerBound, afterUpperBound, } var boundArrByte [4][]byte @@ -821,27 +838,17 @@ func getRangesBeforeAndAfter( boundArrByte[i] = getFixedLenArr(boundArrByte[i], ind, 8 /* fixLen */) } - rngBefore = float64(binary.BigEndian.Uint64(boundArrByte[1]) - + rangeBefore = float64(binary.BigEndian.Uint64(boundArrByte[1]) - binary.BigEndian.Uint64(boundArrByte[0])) - rngAfter = float64(binary.BigEndian.Uint64(boundArrByte[3]) - + rangeAfter = float64(binary.BigEndian.Uint64(boundArrByte[3]) - binary.BigEndian.Uint64(boundArrByte[2])) - return rngBefore, rngAfter, true - } + return rangeBefore, rangeAfter, true - // For non-numeric types, compute the prefix across all bucket/span bounds. - ok = false - if isNonNumeric(bucketLowerBound.ResolvedType()) { - rangeBefore, rangeAfter, ok = getRangeNonNumeric( - bucketLowerBound, bucketUpperBound, spanLowerBound, spanUpperBound, - ) - } else { - okBefore, okAfter := false, false - rangeBefore, okBefore = getRange(bucketLowerBound, bucketUpperBound) - rangeAfter, okAfter = getRange(spanLowerBound, spanUpperBound) - ok = okBefore && okAfter + default: + // Range calculations are not supported for the given type family. + return 0, 0, false } - return rangeBefore, rangeAfter, ok } // isDiscrete returns true if the given data type is discrete. @@ -853,17 +860,6 @@ func isDiscrete(typ *types.T) bool { return false } -// isNonNumeric returns true if the given data type is non-numeric. -// Note: this function does not support all non-numeric data-types within -// cockroach db. -func isNonNumeric(typ *types.T) bool { - switch typ.Family() { - case types.StringFamily, types.BytesFamily, types.UuidFamily, types.INetFamily: - return true - } - return false -} - // getCommonPrefix returns the first index where the value at said index differs // across all byte arrays in byteArr. byteArr must contain at least one element // to compute a common prefix. diff --git a/pkg/sql/opt/props/histogram_test.go b/pkg/sql/opt/props/histogram_test.go index ad7bf64e417a..583d7e66ba59 100644 --- a/pkg/sql/opt/props/histogram_test.go +++ b/pkg/sql/opt/props/histogram_test.go @@ -644,7 +644,7 @@ func TestFilterBucket(t *testing.T) { runTest(h, testData, types.TimeTZFamily) }) - t.Run("string/bytes", func(t *testing.T) { + t.Run("string-bytes", func(t *testing.T) { typesToTest := []struct { family types.Family createDatumFn func(string) tree.Datum