Skip to content

Commit

Permalink
opt: support BYTES for histogram range calculations
Browse files Browse the repository at this point in the history
Fixes #68346

Release note (performance improvement): The accuracy of histogram
calculations for BYTES types has been improved. As a result, the
optimizer should generate more efficient query plans in some cases.
  • Loading branch information
mgartner committed Aug 11, 2021
1 parent 6314f90 commit e2ae5b2
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 10 deletions.
2 changes: 2 additions & 0 deletions pkg/sql/opt/constraint/testutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ func parseDatumPath(evalCtx *tree.EvalContext, str string, typs []types.Family)
val, _, err = tree.ParseDTimestampTZ(evalCtx, valStr, time.Microsecond)
case types.StringFamily:
val = tree.NewDString(valStr)
case types.BytesFamily:
val = tree.NewDBytes(tree.DBytes(valStr))
case types.OidFamily:
dInt, err := tree.ParseDInt(valStr)
if err == nil {
Expand Down
16 changes: 6 additions & 10 deletions pkg/sql/opt/props/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -800,9 +800,10 @@ func getRangesBeforeAndAfter(
) (rngBefore, rngAfter float64, ok bool) {

// Utilizes an array to simplify number of repetitive calls.
boundArr := []tree.Datum{lowerBoundBefore, upperBoundBefore, lowerBoundAfter,
upperBoundAfter}
boundArrByte := make([][]byte, 4)
boundArr := [4]tree.Datum{
lowerBoundBefore, upperBoundBefore, lowerBoundAfter, upperBoundAfter,
}
var boundArrByte [4][]byte

for i := range boundArr {
var err error
Expand Down Expand Up @@ -857,7 +858,7 @@ func isDiscrete(typ *types.T) bool {
// cockroach db.
func isNonNumeric(typ *types.T) bool {
switch typ.Family() {
case types.StringFamily, types.UuidFamily, types.INetFamily:
case types.StringFamily, types.BytesFamily, types.UuidFamily, types.INetFamily:
return true
}
return false
Expand All @@ -866,12 +867,7 @@ func isNonNumeric(typ *types.T) bool {
// getCommonPrefix returns the first index where the value at said index differs
// across all byte arrays in byteArr. byteArr must contain at least one element
// to compute a common prefix.
func getCommonPrefix(byteArr [][]byte) int {

if len(byteArr) <= 0 {
panic(errors.AssertionFailedf("byteArr must have at least one element"))
}

func getCommonPrefix(byteArr [4][]byte) int {
// Checks if the current value at index is the same between all byte arrays.
currIndMatching := func(ind int) bool {
for i := 0; i < len(byteArr); i++ {
Expand Down
63 changes: 63 additions & 0 deletions pkg/sql/opt/props/histogram_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,69 @@ func TestFilterBucket(t *testing.T) {
runTest(h3, t3, types.StringFamily)
})

t.Run("bytes", func(t *testing.T) {
h1 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{
{NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(tree.NewDBytes("bear"))},
{NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDBytes("bobcat")},
}}
h2 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{
{NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(tree.NewDBytes("a"))},
{NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDBytes("c")},
}}
h3 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{
{NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(tree.NewDBytes("aaaaaaaaaaaa"))},
{NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDBytes("cccccccccccc")},
}}

t1 := []testCase{
{
span: "[/bluejay - /boar]",
expected: &cat.HistogramBucket{NumEq: 0, NumRange: 2.92, DistinctRange: 2.92, UpperBound: tree.NewDBytes("boar")},
},
{
span: "[/beer - /bobcat]",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 9.98, DistinctRange: 9.98, UpperBound: tree.NewDBytes("bobcat")},
},
}

t2 := []testCase{
// Within the CRDB encoding, all null bytes are followed by an escape byte,
// (255) which are left in for the rangeAfter calculations. For this
// reason, the resulting NumRange is slightly lower than expected at 4.99
// instead of 5.
{
span: "[/a\x00 - /b]",
expected: &cat.HistogramBucket{NumEq: 0, NumRange: 4.99, DistinctRange: 4.99, UpperBound: tree.NewDBytes("b")},
},
{
span: "[/as - /b]",
expected: &cat.HistogramBucket{NumEq: 0, NumRange: 2.76, DistinctRange: 2.76, UpperBound: tree.NewDBytes("b")},
},
{
span: "[/as - /c]",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 7.77, DistinctRange: 7.77, UpperBound: tree.NewDBytes("c")},
},
{
span: "[/bs - /c]",
expected: &cat.HistogramBucket{NumEq: 5, NumRange: 2.76, DistinctRange: 2.76, UpperBound: tree.NewDBytes("c")},
},
}

// The initial 8 bytes for lowerBound and upperBound of the span is the same.
// Hence, the resulting NumRange/DistinctRange should be 0, as rangeAfter
// only considers the first 8 bytes of the bounds.
t3 := []testCase{
{
span: "[/aaaaaaaabbbb - /aaaaaaaacccc]",
expected: &cat.HistogramBucket{NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDBytes("aaaaaaaacccc")},
},
}

runTest(h1, t1, types.BytesFamily)
runTest(h2, t2, types.BytesFamily)
runTest(h3, t3, types.BytesFamily)
})

t.Run("uuid", func(t *testing.T) {
l1, err := tree.ParseDUuidFromString("2189ad07-52f2-4d60-83e8-4a8347fef718")
if err != nil {
Expand Down

0 comments on commit e2ae5b2

Please sign in to comment.