Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
136538: sql/stats: skip over histogram buckets for dropped enum values r=yuzefovich a=michae2

While decoding histogram upper bounds, if we discover that a physical representation of an enum value cannot be found in the enum type, assume that the value has been dropped and skip over the bucket. This allows us to continue using the most recent statistics after altering an enum type.

Fixes: #67050

Release note (bug fix): Fix a bug which causes the optimizer to use stale table statistics after altering an enum type used in the table.

136571: TEAMS: ping kv-triage for unittest failures r=tbg a=tbg

Currently unit test nightlies ping (at)cockroachdb/kv, our main handle.
This is not a team we'd like people to just leave to avoid notifications.
Github doesn't seem to offer a way to opt out of notifications for a team
one is on, besides, there are useful notifications going to that handle.

Our triage process is robust without these mentions. They can go to
(at)cockroachdb/kv-notifications for those inclined to consume a
firehose.

This change frees up our main alias (at)cockroachdb/kv for more
meaningful notifications.

136605: kvserver: deflake TestStoreRangeMergeAbandonedFollowersAutomaticallyGarbageCollected r=arulajmani a=arulajmani


Fixes #136597

Release note: None

Co-authored-by: Michael Erickson <[email protected]>
Co-authored-by: Tobias Grieger <[email protected]>
Co-authored-by: Arul Ajmani <[email protected]>
  • Loading branch information
4 people committed Dec 3, 2024
4 parents ef46c67 + 4f35347 + c775e5d + 4902e8b commit 9400dbb
Show file tree
Hide file tree
Showing 10 changed files with 93 additions and 33 deletions.
4 changes: 4 additions & 0 deletions TEAMS.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ cockroachdb/sql-queries:
cockroachdb/kv:
aliases:
cockroachdb/kv-triage: roachtest
# This is a hack - we can't have the same key twice,
# but it also doesn't seem worth rewriting the aliases
# into slice form for just this.
'cockroachdb/kv-triage ': unittest
cockroachdb/kv-prs: other
triage_column_id: 14242655
label: T-kv
Expand Down
19 changes: 14 additions & 5 deletions pkg/cmd/bazci/githubpost/githubpost.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,21 @@ func DefaultFormatter(ctx context.Context, f Failure) (issues.IssueFormatter, is
}
if len(teams) > 0 {
projColID = teams[0].TriageColumnID
for _, team := range teams {
if !team.SilenceMentions {
mentions = append(mentions, "@"+string(team.Name()))
for _, tm := range teams {
if !tm.SilenceMentions {
var hasAliases bool
for al, purp := range tm.Aliases {
if purp == team.PurposeUnittest {
hasAliases = true
mentions = append(mentions, "@"+strings.TrimSpace(string(al)))
}
}
if !hasAliases {
mentions = append(mentions, "@"+string(tm.Name()))
}
}
if team.Label != "" {
labels = append(labels, team.Label)
if tm.Label != "" {
labels = append(labels, tm.Label)
}
}
}
Expand Down
16 changes: 6 additions & 10 deletions pkg/cmd/bazci/githubpost/githubpost_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func TestListFailuresFromJSON(t *testing.T) {
testName: "TestStopperWithCancelConcurrent",
title: "util/stop: TestStopperWithCancelConcurrent failed",
message: "this is just a testing issue",
mention: []string{"@cockroachdb/kv"},
mention: []string{"@cockroachdb/kv-triage"},
labels: []string{"C-test-failure", "release-blocker", "T-kv"},
hasProject: true,
}},
Expand All @@ -117,7 +117,7 @@ func TestListFailuresFromJSON(t *testing.T) {
testName: "TestStopperWithCancelConcurrent",
title: "util/stop: TestStopperWithCancelConcurrent failed",
message: "this is just a testing issue",
mention: []string{"@cockroachdb/kv"},
mention: []string{"@cockroachdb/kv-triage"},
labels: []string{"T-kv"},
hasProject: true,
}},
Expand All @@ -131,7 +131,7 @@ func TestListFailuresFromJSON(t *testing.T) {
testName: "TestReplicateQueueRebalance",
title: "kv/kvserver: TestReplicateQueueRebalance failed",
message: "replicate_queue_test.go:88: condition failed to evaluate within 45s: not balanced: [10 1 10 1 8]",
mention: []string{"@cockroachdb/kv"},
mention: []string{"@cockroachdb/kv-triage"},
labels: []string{"C-test-failure", "release-blocker", "T-kv"},
hasProject: true,
}},
Expand All @@ -145,7 +145,7 @@ func TestListFailuresFromJSON(t *testing.T) {
testName: "TestGossipHandlesReplacedNode",
title: "kv/kvserver: TestGossipHandlesReplacedNode failed",
message: "F180711 20:13:15.826193 83 storage/replica.go:1877 [n?,s1,r1/1:/M{in-ax}] on-disk and in-memory state diverged:",
mention: []string{"@cockroachdb/kv"},
mention: []string{"@cockroachdb/kv-triage"},
labels: []string{"C-test-failure", "release-blocker", "T-kv"},
hasProject: true,
}},
Expand Down Expand Up @@ -192,7 +192,7 @@ func TestListFailuresFromJSON(t *testing.T) {
testName: "TestTxnCoordSenderPipelining",
title: "kv/kvclient/kvcoord: TestTxnCoordSenderPipelining failed",
message: `injected failure`,
mention: []string{"@cockroachdb/kv"},
mention: []string{"@cockroachdb/kv-triage"},
labels: []string{"C-test-failure", "release-blocker", "T-kv"},
hasProject: true,
},
Expand All @@ -206,7 +206,7 @@ TestTxnCoordSenderPipelining - 1.00s
Slow passing tests:
TestAnchorKey - 1.01s
`,
mention: []string{"@cockroachdb/kv"},
mention: []string{"@cockroachdb/kv-triage"},
labels: []string{"C-test-failure", "release-blocker", "T-kv"},
hasProject: true,
},
Expand Down Expand Up @@ -414,7 +414,6 @@ func TestListFailuresFromTestXML(t *testing.T) {
fileName string
expPkg string
expIssues []issue
formatter Formatter
}{
{
fileName: "basic.xml",
Expand All @@ -429,7 +428,6 @@ func TestListFailuresFromTestXML(t *testing.T) {
--- FAIL: TestJSONErrors/frues (0.00s)`,
mention: []string{"@cockroachdb/unowned"},
}},
formatter: DefaultFormatter,
},
}

Expand Down Expand Up @@ -481,7 +479,6 @@ func TestPostGeneralFailure(t *testing.T) {
testCases := []struct {
fileName string
expIssues []issue
formatter Formatter
}{
{
fileName: "failed-build-output.txt",
Expand All @@ -490,7 +487,6 @@ func TestPostGeneralFailure(t *testing.T) {
mention: []string{"@cockroachdb/unowned"},
labels: []string{"C-test-failure", "release-blocker", "T-testeng"},
}},
formatter: DefaultFormatter,
},
}

Expand Down
4 changes: 4 additions & 0 deletions pkg/internal/team/TEAMS.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ cockroachdb/sql-queries:
cockroachdb/kv:
aliases:
cockroachdb/kv-triage: roachtest
# This is a hack - we can't have the same key twice,
# but it also doesn't seem worth rewriting the aliases
# into slice form for just this.
'cockroachdb/kv-triage ': unittest
cockroachdb/kv-prs: other
triage_column_id: 14242655
label: T-kv
Expand Down
2 changes: 2 additions & 0 deletions pkg/internal/team/team.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,13 @@ const (
// PurposeRoachtest indicates that the team handles that should be mentioned
// in roachtest issues should be returned.
PurposeRoachtest = Purpose("roachtest")
PurposeUnittest = Purpose("unittest")
)

var validPurposes = map[Purpose]struct{}{
PurposeOther: {},
PurposeRoachtest: {}, // mention in roachtest issues
PurposeUnittest: {}, // mention in unit test issues
}

// LoadTeams loads the teams from an io input.
Expand Down
4 changes: 3 additions & 1 deletion pkg/kv/kvserver/client_merge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3000,7 +3000,9 @@ func TestStoreRangeMergeAbandonedFollowersAutomaticallyGarbageCollected(t *testi
tc := testcluster.StartTestCluster(t, 3,
base.TestClusterArgs{
ReplicationMode: base.ReplicationManual,
ServerArgs: base.TestServerArgs{},
ServerArgs: base.TestServerArgs{
Settings: st,
},
})
defer tc.Stopper().Stop(ctx)
scratch := tc.ScratchRange(t)
Expand Down
40 changes: 40 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -3612,3 +3612,43 @@ ORDER BY stat->>'name' DESC
----
__merged__ {"distinct_range": 0, "num_eq": 1, "num_range": 0, "upper_bound": "0"}
__forecast__ {"distinct_range": 0, "num_eq": 1, "num_range": 0, "upper_bound": "0"}

# Regression test for #67050: make sure we skip over enum values that have been
# dropped when decoding histograms.

statement ok
CREATE TYPE e67050 AS ENUM ('a', 'b', 'c')

statement ok
CREATE TABLE t67050 (x e67050 PRIMARY KEY)

statement ok
INSERT INTO t67050 VALUES ('a'), ('b'), ('c')

statement ok
ANALYZE t67050

statement ok
DELETE FROM t67050 WHERE x = 'a'

statement ok
ALTER TYPE e67050 DROP VALUE 'a'

query T
SELECT jsonb_pretty(statistics->0->'histo_buckets') FROM
[SHOW STATISTICS USING JSON FOR TABLE t67050]
----
[
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "b"
},
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "c"
}
]
14 changes: 7 additions & 7 deletions pkg/sql/stats/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,29 +64,29 @@ func (js *JSONStatistic) SetHistogram(h *HistogramData) error {
// done across databases. If it is a user-defined type, we need the type name
// resolution to be for the correct database.
js.HistogramColumnType = typ.SQLStringFullyQualified()
js.HistogramBuckets = make([]JSONHistoBucket, len(h.Buckets))
js.HistogramBuckets = make([]JSONHistoBucket, 0, len(h.Buckets))
js.HistogramVersion = h.Version
var a tree.DatumAlloc
for i := range h.Buckets {
b := &h.Buckets[i]
js.HistogramBuckets[i].NumEq = b.NumEq
js.HistogramBuckets[i].NumRange = b.NumRange
js.HistogramBuckets[i].DistinctRange = b.DistinctRange

if b.UpperBound == nil {
return fmt.Errorf("histogram bucket upper bound is unset")
}
datum, err := DecodeUpperBound(h.Version, typ, &a, b.UpperBound)
if err != nil {
if h.ColumnType.Family() == types.EnumFamily && errors.Is(err, types.EnumValueNotFound) {
// Skip over buckets for enum values that were dropped.
continue
}
return err
}

js.HistogramBuckets[i] = JSONHistoBucket{
js.HistogramBuckets = append(js.HistogramBuckets, JSONHistoBucket{
NumEq: b.NumEq,
NumRange: b.NumRange,
DistinctRange: b.DistinctRange,
UpperBound: tree.AsStringWithFlags(datum, tree.FmtExport),
}
})
}
return nil
}
Expand Down
19 changes: 10 additions & 9 deletions pkg/sql/stats/stats_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -688,41 +688,42 @@ func (sc *TableStatisticsCache) parseStats(
// the resulting buckets into tabStat.Histogram.
func DecodeHistogramBuckets(tabStat *TableStatistic) error {
h := tabStat.HistogramData
var offset int
if tabStat.NullCount > 0 {
// A bucket for NULL is not persisted, but we create a fake one to
// make histograms easier to work with. The length of res.Histogram
// is therefore 1 greater than the length of the histogram data
// buckets.
// TODO(michae2): Combine this with setHistogramBuckets, especially if we
// need to change both after #6224 is fixed (NULLS LAST in index ordering).
tabStat.Histogram = make([]cat.HistogramBucket, len(h.Buckets)+1)
tabStat.Histogram = make([]cat.HistogramBucket, 1, len(h.Buckets)+1)
tabStat.Histogram[0] = cat.HistogramBucket{
NumEq: float64(tabStat.NullCount),
NumRange: 0,
DistinctRange: 0,
UpperBound: tree.DNull,
}
offset = 1
} else {
tabStat.Histogram = make([]cat.HistogramBucket, len(h.Buckets))
offset = 0
tabStat.Histogram = make([]cat.HistogramBucket, 0, len(h.Buckets))
}

// Decode the histogram data so that it's usable by the opt catalog.
var a tree.DatumAlloc
for i := offset; i < len(tabStat.Histogram); i++ {
bucket := &h.Buckets[i-offset]
for i := range h.Buckets {
bucket := &h.Buckets[i]
datum, err := DecodeUpperBound(h.Version, h.ColumnType, &a, bucket.UpperBound)
if err != nil {
if h.ColumnType.Family() == types.EnumFamily && errors.Is(err, types.EnumValueNotFound) {
// Skip over buckets for enum values that were dropped.
continue
}
return err
}
tabStat.Histogram[i] = cat.HistogramBucket{
tabStat.Histogram = append(tabStat.Histogram, cat.HistogramBucket{
NumEq: float64(bucket.NumEq),
NumRange: float64(bucket.NumRange),
DistinctRange: bucket.DistinctRange,
UpperBound: datum,
}
})
}
return nil
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/sql/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -2823,6 +2823,8 @@ func (t *T) IsNumeric() bool {
}
}

var EnumValueNotFound = errors.New("could not find enum value")

// EnumGetIdxOfPhysical returns the index within the TypeMeta's slice of
// enum physical representations that matches the input byte slice.
func (t *T) EnumGetIdxOfPhysical(phys []byte) (int, error) {
Expand All @@ -2835,7 +2837,7 @@ func (t *T) EnumGetIdxOfPhysical(phys []byte) (int, error) {
return i, nil
}
}
err := errors.Newf(
err := errors.Wrapf(EnumValueNotFound,
"could not find %v in enum %q representation %s %s",
phys,
t.TypeMeta.Name.FQName(true /* explicitCatalog */),
Expand Down

0 comments on commit 9400dbb

Please sign in to comment.