Skip to content

Commit

Permalink
cli: use more interesting values for statement-bundle recreate
Browse files Browse the repository at this point in the history
This commit introduces a couple of helper methods to `tree` package that
extend `tree.Datum.Prev` and `tree.Datum.Next` for certain datum types.
Methods of `tree.Datum` interface are pretty strict on some guarantees
(like lexicographical ordering), but in some places we don't need such
things, yet we want to get a "smaller" or a "larger" datum than any
given one if possible. This is now supported by the new utility methods.
These methods are now utilized by `debug statement-bundle recreate`
command when `--placeholder` options are used in order to generate all
possible plans. In one example of a stmt bundle the number of generated
plans increased from 14 to 19. This commit additionally improves the
relevant code when recreating the bundle a bit.

Epic: None

Release note: None
  • Loading branch information
yuzefovich committed Feb 15, 2023
1 parent 18ecb5d commit 4d9e721
Show file tree
Hide file tree
Showing 5 changed files with 238 additions and 12 deletions.
47 changes: 41 additions & 6 deletions pkg/cli/statement_bundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ func runBundleRecreate(cmd *cobra.Command, args []string) (resErr error) {

if placeholderPairs != nil {
placeholderToColMap := make(map[int]string)
placeholderFQColNames := make(map[string]struct{})
for _, placeholderPairStr := range placeholderPairs {
pair := strings.Split(placeholderPairStr, "=")
if len(pair) != 2 {
Expand All @@ -159,8 +160,11 @@ func runBundleRecreate(cmd *cobra.Command, args []string) (resErr error) {
return err
}
placeholderToColMap[n] = pair[1]
placeholderFQColNames[pair[1]] = struct{}{}
}
inputs, outputs, err := getExplainCombinations(conn, explainPrefix, placeholderToColMap, bundle)
inputs, outputs, err := getExplainCombinations(
conn, explainPrefix, placeholderToColMap, placeholderFQColNames, bundle,
)
if err != nil {
return err
}
Expand Down Expand Up @@ -209,6 +213,7 @@ func getExplainCombinations(
conn clisqlclient.Conn,
explainPrefix string,
placeholderToColMap map[int]string,
placeholderFQColNames map[string]struct{},
bundle *statementBundle,
) (inputs [][]string, explainOutputs []string, err error) {

Expand Down Expand Up @@ -273,6 +278,11 @@ func getExplainCombinations(
}
col := columns[0]
fqColName := fmt.Sprintf("%s.%s", tableName, col)
if _, isPlaceholder := placeholderFQColNames[fqColName]; !isPlaceholder {
// This column is not one of the placeholder values, so simply
// ignore it.
continue
}
d, _, err := tree.ParseDTimestamp(nil, stat["created_at"].(string), time.Microsecond)
if err != nil {
panic(err)
Expand All @@ -285,7 +295,9 @@ func getExplainCombinations(

typ := stat["histo_col_type"].(string)
if typ == "" {
fmt.Println("Ignoring column with empty type ", col)
// Empty 'histo_col_type' is used when there is no histogram for
// the column, simply skip this stat (see stats/json.go for more
// details).
continue
}
colTypeRef, err := parser.GetTypeFromValidSQLSyntax(typ)
Expand All @@ -300,6 +312,10 @@ func getExplainCombinations(
continue
}
buckets := stat["histo_buckets"].([]interface{})
// addedNonExistent tracks whether we included at least one
// "previous" datum which - according to the histograms - is not
// present in the table.
var addedNonExistent bool
var maxUpperBound tree.Datum
for _, b := range buckets {
bucket := b.(map[string]interface{})
Expand All @@ -318,9 +334,24 @@ func getExplainCombinations(
if maxUpperBound == nil || maxUpperBound.Compare(&evalCtx, datum) < 0 {
maxUpperBound = datum
}
if numRange > 0 {
if prev, ok := datum.Prev(&evalCtx); ok {
// If we have any datums within the bucket (i.e. not equal to
// the upper bound), we always attempt to add a "previous" to
// the upper bound datum.
addPrevious := numRange > 0
if numRange == 0 && !addedNonExistent {
// If our bucket says that there are no values present in
// the table between the current upper bound and the upper
// bound of the previous histogram bucket, then we only
// attempt to add the "previous" non-existent datum if we
// haven't done so already (this is to avoid the redundant
// non-existent values which would get treated in the same
// fashion anyway).
addPrevious = true
}
if addPrevious {
if prev, ok := tree.DatumPrev(datum, &evalCtx, &evalCtx.CollationEnv); ok {
bucketMap[key] = append(bucketMap[key], tree.AsStringWithFlags(prev, fmtCtx))
addedNonExistent = addedNonExistent || numRange == 0
}
}
}
Expand All @@ -330,7 +361,7 @@ func getExplainCombinations(
}
// Create a value that's outside of histogram range by incrementing the
// max value that we've seen.
if outside, ok := maxUpperBound.Next(&evalCtx); ok {
if outside, ok := tree.DatumNext(maxUpperBound, &evalCtx, &evalCtx.CollationEnv); ok {
colSamples = append(colSamples, tree.AsStringWithFlags(outside, fmtCtx))
}
sort.Strings(colSamples)
Expand Down Expand Up @@ -386,7 +417,8 @@ func getExplainCombinations(
func getExplainOutputs(
conn clisqlclient.Conn, explainPrefix string, statement string, inputs [][]string,
) (explainStrings []string, err error) {
for _, values := range inputs {
fmt.Printf("trying %d placeholder combinations\n", len(inputs))
for i, values := range inputs {
// Run an explain for each possible input.
query := fmt.Sprintf("%s %s", explainPrefix, statement)
args := make([]interface{}, len(values))
Expand All @@ -409,6 +441,9 @@ func getExplainOutputs(
return nil, err
}
explainStrings = append(explainStrings, explainStr.String())
if (i+1)%1000 == 0 {
fmt.Printf("%d placeholder combinations are done\n", i+1)
}
}
return explainStrings, nil
}
Expand Down
17 changes: 12 additions & 5 deletions pkg/cli/statement_bundle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,22 @@ import (
func TestRunExplainCombinations(t *testing.T) {
defer leaktest.AfterTest(t)()
tests := []struct {
bundlePath string
placeholderToColMap map[int]string
expectedInputs [][]string
expectedOutputs []string
bundlePath string
placeholderToColMap map[int]string
placeholderFQColNames map[string]struct{}
expectedInputs [][]string
expectedOutputs []string
}{
{
bundlePath: "bundle",
placeholderToColMap: map[int]string{
1: "public.a.a",
2: "public.a.b",
},
placeholderFQColNames: map[string]struct{}{
"public.a.a": {},
"public.a.b": {},
},
expectedInputs: [][]string{{"999", "8"}},
expectedOutputs: []string{`select
├── scan a
Expand Down Expand Up @@ -82,7 +87,9 @@ func TestRunExplainCombinations(t *testing.T) {
}
}

inputs, outputs, err := getExplainCombinations(conn, "EXPLAIN(OPT)", test.placeholderToColMap, bundle)
inputs, outputs, err := getExplainCombinations(
conn, "EXPLAIN(OPT)", test.placeholderToColMap, test.placeholderFQColNames, bundle,
)
assert.NoError(t, err)
assert.Equal(t, test.expectedInputs, inputs)
assert.Equal(t, test.expectedOutputs, outputs)
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/sem/tree/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ go_test(
"constant_test.go",
"datum_integration_test.go",
"datum_invariants_test.go",
"datum_prev_next_test.go",
"datum_test.go",
"expr_test.go",
"format_test.go",
Expand Down
119 changes: 118 additions & 1 deletion pkg/sql/sem/tree/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -1629,7 +1629,7 @@ func (d *DEncodedKey) Prev(ctx CompareContext) (Datum, bool) {

// Next implements the Datum interface.
func (d *DEncodedKey) Next(ctx CompareContext) (Datum, bool) {
return nil, true
return nil, false
}

// IsMax implements the Datum interface.
Expand Down Expand Up @@ -5794,3 +5794,120 @@ func AdjustValueToType(typ *types.T, inVal Datum) (outVal Datum, err error) {
}
return inVal, nil
}

// DatumPrev returns a datum that is "previous" to the given one. For many types
// it just delegates to Datum.Prev, but for some types that don't have an
// implementation of that function this method makes the best effort to come up
// with a reasonable previous datum that is smaller than the given one.
//
// The return value is undefined if Datum.IsMin returns true or if the value is
// NaN of an infinity (for floats and decimals).
func DatumPrev(
datum Datum, cmpCtx CompareContext, collationEnv *CollationEnvironment,
) (Datum, bool) {
datum = UnwrapDOidWrapper(datum)
prevString := func(s string) (string, bool) {
// In order to obtain a previous string we subtract 1 from the last
// non-zero byte.
b := []byte(s)
lastNonZeroByteIdx := len(b) - 1
for ; lastNonZeroByteIdx >= 0 && b[lastNonZeroByteIdx] == 0; lastNonZeroByteIdx-- {
}
if lastNonZeroByteIdx < 0 {
return "", false
}
b[lastNonZeroByteIdx]--
return string(b), true
}
switch d := datum.(type) {
case *DDecimal:
var prev DDecimal
var sub apd.Decimal
_, err := sub.SetFloat64(1e-6)
if err != nil {
return nil, false
}
_, err = ExactCtx.Sub(&prev.Decimal, &d.Decimal, &sub)
if err != nil {
return nil, false
}
return &prev, true
case *DString:
prev, ok := prevString(string(*d))
if !ok {
return nil, false
}
return NewDString(prev), true
case *DCollatedString:
prev, ok := prevString(d.Contents)
if !ok {
return nil, false
}
c, err := NewDCollatedString(prev, d.Locale, collationEnv)
if err != nil {
return nil, false
}
return c, true
case *DBytes:
prev, ok := prevString(string(*d))
if !ok {
return nil, false
}
return NewDBytes(DBytes(prev)), true
case *DInterval:
// Subtract 1ms.
prev := d.Sub(duration.MakeDuration(1000000 /* nanos */, 0 /* days */, 0 /* months */))
return NewDInterval(prev, types.DefaultIntervalTypeMetadata), true
default:
// TODO(yuzefovich): consider adding support for other datums that don't
// have Datum.Prev implementation (DBitArray, DGeography, DGeometry,
// DBox2D, DJSON, DArray).
return datum.Prev(cmpCtx)
}
}

// DatumNext returns a datum that is "next" to the given one. For many types it
// just delegates to Datum.Next, but for some types that don't have an
// implementation of that function this method makes the best effort to come up
// with a reasonable next datum that is greater than the given one.
//
// The return value is undefined if Datum.IsMax returns true or if the value is
// NaN of an infinity (for floats and decimals).
func DatumNext(
datum Datum, cmpCtx CompareContext, collationEnv *CollationEnvironment,
) (Datum, bool) {
datum = UnwrapDOidWrapper(datum)
switch d := datum.(type) {
case *DDecimal:
var next DDecimal
var add apd.Decimal
_, err := add.SetFloat64(1e-6)
if err != nil {
return nil, false
}
_, err = ExactCtx.Add(&next.Decimal, &d.Decimal, &add)
if err != nil {
return nil, false
}
return &next, true
case *DCollatedString:
s := NewDString(d.Contents)
next, ok := s.Next(cmpCtx)
if !ok {
return nil, false
}
c, err := NewDCollatedString(string(*next.(*DString)), d.Locale, collationEnv)
if err != nil {
return nil, false
}
return c, true
case *DInterval:
next := d.Add(duration.MakeDuration(1000000 /* nanos */, 0 /* days */, 0 /* months */))
return NewDInterval(next, types.DefaultIntervalTypeMetadata), true
default:
// TODO(yuzefovich): consider adding support for other datums that don't
// have Datum.Next implementation (DGeography, DGeometry, DBox2D,
// DJSON).
return datum.Next(cmpCtx)
}
}
66 changes: 66 additions & 0 deletions pkg/sql/sem/tree/datum_prev_next_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright 2023 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package tree_test

import (
"math"
"testing"

"github.com/cockroachdb/apd/v3"
"github.com/cockroachdb/cockroach/pkg/sql/randgen"
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/stretchr/testify/require"
)

// TestDatumPrevNext verifies that tree.DatumPrev and tree.DatumNext return
// datums that are smaller and larger, respectively, than the given datum if
// ok=true is returned (modulo some edge cases).
func TestDatumPrevNext(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

rng, _ := randutil.NewTestRand()
var evalCtx eval.Context
const numRuns = 1000
for i := 0; i < numRuns; i++ {
typ := randgen.RandType(rng)
d := randgen.RandDatum(rng, typ, false /* nullOk */)
// Ignore NaNs and infinities.
if f, ok := d.(*tree.DFloat); ok {
if math.IsNaN(float64(*f)) || math.IsInf(float64(*f), 0) {
continue
}
}
if dec, ok := d.(*tree.DDecimal); ok {
if dec.Form == apd.NaN || dec.Form == apd.Infinite {
continue
}
}
if !d.IsMin(&evalCtx) {
if prev, ok := tree.DatumPrev(d, &evalCtx, &evalCtx.CollationEnv); ok {
cmp, err := d.CompareError(&evalCtx, prev)
require.NoError(t, err)
require.True(t, cmp > 0, "d=%s, prev=%s, type=%s", d.String(), prev.String(), d.ResolvedType().SQLString())
}
}
if !d.IsMax(&evalCtx) {
if next, ok := tree.DatumNext(d, &evalCtx, &evalCtx.CollationEnv); ok {
cmp, err := d.CompareError(&evalCtx, next)
require.NoError(t, err)
require.True(t, cmp < 0, "d=%s, next=%s, type=%s", d.String(), next.String(), d.ResolvedType().SQLString())
}
}
}
}

0 comments on commit 4d9e721

Please sign in to comment.