diff --git a/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row b/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row index f6babcddc376..15116dde32a9 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row +++ b/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row @@ -1469,49 +1469,49 @@ LIMIT 1] OFFSET 2 └── • union all │ columns: (a, b) │ ordering: +a,+b - │ estimated row count: 30 + │ estimated row count: 999,995 │ ├── • union all │ │ columns: (a, b) │ │ ordering: +a,+b - │ │ estimated row count: 20 + │ │ estimated row count: 666,663 │ │ │ ├── • filter │ │ │ columns: (a, b) │ │ │ ordering: +a,+b - │ │ │ estimated row count: 10 + │ │ │ estimated row count: 333,332 │ │ │ filter: b IS NOT NULL │ │ │ │ │ └── • scan │ │ columns: (a, b) │ │ ordering: +a,+b - │ │ estimated row count: 10 (<0.01% of the table; stats collected ago) + │ │ estimated row count: 333,333 (33% of the table; stats collected ago) │ │ table: t56201@key_a_b │ │ spans: /"@"/!NULL-/"@"/PrefixEnd │ │ │ └── • filter │ │ columns: (a, b) │ │ ordering: +a,+b - │ │ estimated row count: 10 + │ │ estimated row count: 333,332 │ │ filter: b IS NOT NULL │ │ │ └── • scan │ columns: (a, b) │ ordering: +a,+b - │ estimated row count: 10 (<0.01% of the table; stats collected ago) + │ estimated row count: 333,333 (33% of the table; stats collected ago) │ table: t56201@key_a_b │ spans: /"\x80"/!NULL-/"\x80"/PrefixEnd │ └── • filter │ columns: (a, b) │ ordering: +a,+b - │ estimated row count: 10 + │ estimated row count: 333,332 │ filter: b IS NOT NULL │ └── • scan columns: (a, b) ordering: +a,+b - estimated row count: 10 (<0.01% of the table; stats collected ago) + estimated row count: 333,333 (33% of the table; stats collected ago) table: t56201@key_a_b spans: /"\xc0"/!NULL-/"\xc0"/PrefixEnd @@ -1555,31 +1555,31 @@ LIMIT 1] OFFSET 2 └── • union all │ columns: (b, crdb_region, rowid) │ ordering: +b - │ estimated row count: 10 + │ estimated row count: 333,333 │ ├── • union all │ │ columns: (b, crdb_region, rowid) │ │ ordering: +b - │ │ estimated row count: 7 + │ │ estimated row count: 222,222 │ │ │ ├── • scan │ │ columns: (b, crdb_region, rowid) │ │ ordering: +b - │ │ estimated row count: 3 (<0.01% of the table; stats collected ago) + │ │ estimated row count: 111,111 (11% of the table; stats collected ago) │ │ table: t56201@key_b_partial (partial index) │ │ spans: /"@"/!NULL-/"@"/PrefixEnd │ │ │ └── • scan │ columns: (b, crdb_region, rowid) │ ordering: +b - │ estimated row count: 3 (<0.01% of the table; stats collected ago) + │ estimated row count: 111,111 (11% of the table; stats collected ago) │ table: t56201@key_b_partial (partial index) │ spans: /"\x80"/!NULL-/"\x80"/PrefixEnd │ └── • scan columns: (b, crdb_region, rowid) ordering: +b - estimated row count: 3 (<0.01% of the table; stats collected ago) + estimated row count: 111,111 (11% of the table; stats collected ago) table: t56201@key_b_partial (partial index) spans: /"\xc0"/!NULL-/"\xc0"/PrefixEnd @@ -1619,31 +1619,31 @@ LIMIT 1] OFFSET 2 └── • union all │ columns: (c) │ ordering: +c - │ estimated row count: 10 + │ estimated row count: 333,333 │ ├── • union all │ │ columns: (c) │ │ ordering: +c - │ │ estimated row count: 7 + │ │ estimated row count: 222,222 │ │ │ ├── • scan │ │ columns: (c) │ │ ordering: +c - │ │ estimated row count: 3 (<0.01% of the table; stats collected ago) + │ │ estimated row count: 111,111 (11% of the table; stats collected ago) │ │ table: t56201@key_c_partial (partial index) │ │ spans: /"@"-/"@"/PrefixEnd │ │ │ └── • scan │ columns: (c) │ ordering: +c - │ estimated row count: 3 (<0.01% of the table; stats collected ago) + │ estimated row count: 111,111 (11% of the table; stats collected ago) │ table: t56201@key_c_partial (partial index) │ spans: /"\x80"-/"\x80"/PrefixEnd │ └── • scan columns: (c) ordering: +c - estimated row count: 3 (<0.01% of the table; stats collected ago) + estimated row count: 111,111 (11% of the table; stats collected ago) table: t56201@key_c_partial (partial index) spans: /"\xc0"-/"\xc0"/PrefixEnd diff --git a/pkg/cmd/roachtest/tests/django_blocklist.go b/pkg/cmd/roachtest/tests/django_blocklist.go index a854c1fbf882..2982c702c0b8 100644 --- a/pkg/cmd/roachtest/tests/django_blocklist.go +++ b/pkg/cmd/roachtest/tests/django_blocklist.go @@ -168,15 +168,22 @@ var djangoBlocklists = blocklistsForVersion{ } // Maintain that this list is alphabetized. -var djangoBlocklist21_2 = blocklist{} +var djangoBlocklist21_2 = djangoBlocklist21_1 -var djangoBlocklist21_1 = blocklist{} +var djangoBlocklist21_1 = djangoBlocklist20_2 var djangoBlocklist20_2 = blocklist{} -var djangoIgnoreList21_2 = djangoIgnoreList21_1 +var djangoIgnoreList21_2 = blocklist{ + "migrations.test_operations.OperationTests.test_alter_fk_non_fk": "will be fixed in django-cockroachdb v3.2.2", + "schema.tests.SchemaTests.test_alter_field_db_collation": "will be fixed in django-cockroachdb v3.2.2", + "schema.tests.SchemaTests.test_alter_field_type_and_db_collation": "will be fixed in django-cockroachdb v3.2.2", +} -var djangoIgnoreList21_1 = djangoIgnoreList20_2 +var djangoIgnoreList21_1 = blocklist{ + "schema.tests.SchemaTests.test_alter_field_db_collation": "will be fixed in django-cockroachdb v3.2.2", + "schema.tests.SchemaTests.test_alter_field_type_and_db_collation": "will be fixed in django-cockroachdb v3.2.2", +} var djangoIgnoreList20_2 = blocklist{ "expressions.tests.BasicExpressionsTests.test_boolean_expression_combined": "unknown", diff --git a/pkg/kv/kvserver/metrics.go b/pkg/kv/kvserver/metrics.go index 02fe7da0a40b..b31d2c1ccbff 100644 --- a/pkg/kv/kvserver/metrics.go +++ b/pkg/kv/kvserver/metrics.go @@ -1051,6 +1051,14 @@ var ( Unit: metric.Unit_NANOSECONDS, } + // Export request counter. + metaExportEvalTotalDelay = metric.Metadata{ + Name: "exportrequest.delay.total", + Help: "Amount by which evaluation of Export requests was delayed", + Measurement: "Nanoseconds", + Unit: metric.Unit_NANOSECONDS, + } + // Encryption-at-rest metrics. // TODO(mberhault): metrics for key age, per-key file/bytes counts. metaEncryptionAlgorithm = metric.Metadata{ @@ -1282,6 +1290,9 @@ type StoreMetrics struct { AddSSTableProposalTotalDelay *metric.Counter AddSSTableProposalEngineDelay *metric.Counter + // Export request stats. + ExportRequestProposalTotalDelay *metric.Counter + // Encryption-at-rest stats. // EncryptionAlgorithm is an enum representing the cipher in use, so we use a gauge. EncryptionAlgorithm *metric.Gauge @@ -1672,6 +1683,9 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics { AddSSTableProposalTotalDelay: metric.NewCounter(metaAddSSTableEvalTotalDelay), AddSSTableProposalEngineDelay: metric.NewCounter(metaAddSSTableEvalEngineDelay), + // ExportRequest proposal. + ExportRequestProposalTotalDelay: metric.NewCounter(metaExportEvalTotalDelay), + // Encryption-at-rest. EncryptionAlgorithm: metric.NewGauge(metaEncryptionAlgorithm), diff --git a/pkg/kv/kvserver/store_send.go b/pkg/kv/kvserver/store_send.go index 8a6cd885de58..cdb21b7349e4 100644 --- a/pkg/kv/kvserver/store_send.go +++ b/pkg/kv/kvserver/store_send.go @@ -299,6 +299,7 @@ func (s *Store) maybeThrottleBatch( } waited := timeutil.Since(before) + s.metrics.ExportRequestProposalTotalDelay.Inc(waited.Nanoseconds()) if waited > time.Second { log.Infof(ctx, "Export request was delayed by %v", waited) } diff --git a/pkg/sql/opt/constraint/constraint.go b/pkg/sql/opt/constraint/constraint.go index ebab73ef38c8..e57fe8a371f8 100644 --- a/pkg/sql/opt/constraint/constraint.go +++ b/pkg/sql/opt/constraint/constraint.go @@ -15,7 +15,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" - "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/errors" ) @@ -703,54 +702,16 @@ func (c *Constraint) CalculateMaxResults( return 0, false } if prefix == numCols-1 { + keyCtx := MakeKeyContext(&c.Columns, evalCtx) // If the prefix does not include the last column, calculate the number of - // distinct values possible in the span. This is only supported for int - // and date types. + // distinct values possible in the span. for i := 0; i < c.Spans.Count(); i++ { sp := c.Spans.Get(i) - start := sp.StartKey() - end := sp.EndKey() - - // Ensure that the keys specify the last column. - if start.Length() != numCols || end.Length() != numCols { - return 0, false - } - - // TODO(asubiotto): This logic is very similar to - // updateDistinctCountsFromConstraint. It would be nice to extract this - // logic somewhere. - colIdx := numCols - 1 - startVal := start.Value(colIdx) - endVal := end.Value(colIdx) - var startIntVal, endIntVal int64 - if startVal.ResolvedType().Family() == types.IntFamily && - endVal.ResolvedType().Family() == types.IntFamily { - startIntVal = int64(*startVal.(*tree.DInt)) - endIntVal = int64(*endVal.(*tree.DInt)) - } else if startVal.ResolvedType().Family() == types.DateFamily && - endVal.ResolvedType().Family() == types.DateFamily { - startDate := startVal.(*tree.DDate) - endDate := endVal.(*tree.DDate) - if !startDate.IsFinite() || !endDate.IsFinite() { - // One of the boundaries is not finite, so we can't determine the - // distinct count for this column. - return 0, false - } - startIntVal = int64(startDate.PGEpochDays()) - endIntVal = int64(endDate.PGEpochDays()) - } else { + spanDistinctVals, ok := sp.KeyCount(&keyCtx, numCols) + if !ok { return 0, false } - - if c.Columns.Get(colIdx).Ascending() { - distinctVals += uint64(endIntVal - startIntVal) - } else { - distinctVals += uint64(startIntVal - endIntVal) - } - - // Add one since both start and end boundaries should be inclusive - // (due to Span.PreferInclusive). - distinctVals++ + distinctVals += uint64(spanDistinctVals) } } else { distinctVals = uint64(c.Spans.Count()) diff --git a/pkg/sql/opt/exec/execbuilder/testdata/enums b/pkg/sql/opt/exec/execbuilder/testdata/enums index e9271218336a..a241aa0c8e3c 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/enums +++ b/pkg/sql/opt/exec/execbuilder/testdata/enums @@ -10,6 +10,21 @@ CREATE TYPE greeting AS ENUM ('hello', 'howdy', 'hi'); CREATE TABLE t (x greeting PRIMARY KEY, y greeting, INDEX i (y), FAMILY (x, y)); INSERT INTO t VALUES ('hello', 'howdy'), ('howdy', 'hi') +# Test that we calculate the correct stats and cardinality. +query T +EXPLAIN (OPT,VERBOSE) SELECT * FROM t +---- +scan t + ├── columns: x:1 y:2 + ├── check constraint expressions + │ └── x:1 IN ('hello', 'howdy', 'hi') [outer=(1), constraints=(/1: [/'hello' - /'hello'] [/'howdy' - /'howdy'] [/'hi' - /'hi']; tight)] + ├── cardinality: [0 - 3] + ├── stats: [rows=3] + ├── cost: 18.05 + ├── key: (1) + ├── fd: (1)-->(2) + └── prune: (1,2) + query T EXPLAIN (OPT) SELECT * FROM t WHERE x = 'hello' ---- @@ -96,6 +111,26 @@ scan checks@checks_x_y_idx ├── [/'hi'/2 - /'hi'/2] └── [/'cheers'/2 - /'cheers'/2] +# Test that we calculate the correct stats and cardinality. +query T +EXPLAIN (OPT,VERBOSE) SELECT DISTINCT x FROM checks +---- +distinct-on + ├── columns: x:1 + ├── grouping columns: x:1 + ├── internal-ordering: +1 + ├── cardinality: [0 - 4] + ├── stats: [rows=4, distinct(1)=4, null(1)=0] + ├── cost: 1115.67 + ├── key: (1) + └── scan checks@checks_x_y_idx + ├── columns: x:1 + ├── stats: [rows=1000, distinct(1)=4, null(1)=0] + ├── cost: 1105.61 + ├── ordering: +1 + ├── prune: (1) + └── interesting orderings: (+1) + # Test that a limited, ordered scan is efficient. statement ok CREATE TABLE composite_key (x greeting, y INT, PRIMARY KEY (x, y), FAMILY (x, y)); @@ -120,3 +155,63 @@ limit │ ├── constraint: /26/27: [/'cheers' - /'cheers'] │ └── limit: 5 └── 5 + +statement ok +CREATE TABLE nulls (x greeting, y int, INDEX (x, y)) + +# Test that we calculate the correct stats and cardinality including null values. +query T +EXPLAIN (OPT,VERBOSE) SELECT x FROM nulls WHERE y < 0 UNION SELECT x FROM nulls WHERE y > 10 +---- +union + ├── columns: x:11 + ├── left columns: nulls.x:1 + ├── right columns: nulls.x:6 + ├── internal-ordering: +11 + ├── cardinality: [0 - 5] + ├── stats: [rows=5, distinct(11)=5, null(11)=1] + ├── cost: 2278.80667 + ├── key: (11) + ├── interesting orderings: (+11) + ├── project + │ ├── columns: nulls.x:1 + │ ├── stats: [rows=333.333333, distinct(1)=5, null(1)=3.33333333] + │ ├── cost: 1139.37333 + │ ├── ordering: +1 + │ ├── interesting orderings: (+1) + │ └── select + │ ├── columns: nulls.x:1 y:2 + │ ├── stats: [rows=333.333333, distinct(1)=5, null(1)=3.33333333, distinct(2)=33.3333333, null(2)=0] + │ ├── cost: 1136.03 + │ ├── ordering: +1 + │ ├── interesting orderings: (+1,+2) + │ ├── scan nulls@nulls_x_y_idx + │ │ ├── columns: nulls.x:1 y:2 + │ │ ├── stats: [rows=1000, distinct(1)=5, null(1)=10, distinct(2)=100, null(2)=10] + │ │ ├── cost: 1126.01 + │ │ ├── ordering: +1 + │ │ ├── prune: (1,2) + │ │ └── interesting orderings: (+1,+2) + │ └── filters + │ └── y:2 < 0 [outer=(2), constraints=(/2: (/NULL - /-1]; tight)] + └── project + ├── columns: nulls.x:6 + ├── stats: [rows=333.333333, distinct(6)=5, null(6)=3.33333333] + ├── cost: 1139.37333 + ├── ordering: +6 + ├── interesting orderings: (+6) + └── select + ├── columns: nulls.x:6 y:7 + ├── stats: [rows=333.333333, distinct(6)=5, null(6)=3.33333333, distinct(7)=33.3333333, null(7)=0] + ├── cost: 1136.03 + ├── ordering: +6 + ├── interesting orderings: (+6,+7) + ├── scan nulls@nulls_x_y_idx + │ ├── columns: nulls.x:6 y:7 + │ ├── stats: [rows=1000, distinct(6)=5, null(6)=10, distinct(7)=100, null(7)=10] + │ ├── cost: 1126.01 + │ ├── ordering: +6 + │ ├── prune: (6,7) + │ └── interesting orderings: (+6,+7) + └── filters + └── y:7 > 10 [outer=(7), constraints=(/7: [/11 - ]; tight)] diff --git a/pkg/sql/opt/exec/execbuilder/testdata/unique b/pkg/sql/opt/exec/execbuilder/testdata/unique index 0df614801e0c..fd8c4986cefb 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/unique +++ b/pkg/sql/opt/exec/execbuilder/testdata/unique @@ -2142,7 +2142,7 @@ vectorized: true # Test that we use the index when available for the update checks. query T -EXPLAIN (VERBOSE) UPDATE uniq_enum SET r = DEFAULT, s = 'baz', i = 3 WHERE r = 'eu-west' +EXPLAIN (VERBOSE) UPDATE uniq_enum SET r = DEFAULT, s = 'baz', i = 3 WHERE r = 'eu-west' AND i > 10 AND i <= 20 ---- distribution: local vectorized: true @@ -2162,7 +2162,7 @@ vectorized: true │ │ │ └── • render │ │ columns: (r, s, i, j, r_new, s_new, i_new, check1) -│ │ estimated row count: 10 (missing stats) +│ │ estimated row count: 9 (missing stats) │ │ render check1: r_new IN ('us-east', 'us-west', 'eu-west') │ │ render r: r │ │ render s: s @@ -2174,7 +2174,7 @@ vectorized: true │ │ │ └── • render │ │ columns: (r_new, s_new, i_new, r, s, i, j) -│ │ estimated row count: 10 (missing stats) +│ │ estimated row count: 9 (missing stats) │ │ render r_new: CASE (random() * 3.0)::INT8 WHEN 0 THEN 'us-east' WHEN 1 THEN 'us-west' ELSE 'eu-west' END │ │ render s_new: 'baz' │ │ render i_new: 3 @@ -2185,9 +2185,10 @@ vectorized: true │ │ │ └── • scan │ columns: (r, s, i, j) -│ estimated row count: 10 (missing stats) +│ estimated row count: 9 (missing stats) │ table: uniq_enum@primary -│ spans: /"\xc0"-/"\xc0"/PrefixEnd +│ spans: /"\xc0"/11-/"\xc0"/20/# +│ parallel │ locking strength: for update │ ├── • constraint-check @@ -2212,11 +2213,11 @@ vectorized: true │ │ │ └── • cross join (inner) │ │ columns: (r_new, i_new, "lookup_join_const_col_@17") -│ │ estimated row count: 30 (missing stats) +│ │ estimated row count: 28 (missing stats) │ │ │ ├── • project │ │ │ columns: (r_new, i_new) -│ │ │ estimated row count: 10 (missing stats) +│ │ │ estimated row count: 9 (missing stats) │ │ │ │ │ └── • scan buffer │ │ columns: (r, s, i, j, r_new, s_new, i_new, check1) @@ -2251,11 +2252,11 @@ vectorized: true │ └── • cross join (inner) │ columns: (r_new, s_new, i_new, j, "lookup_join_const_col_@27") - │ estimated row count: 30 (missing stats) + │ estimated row count: 28 (missing stats) │ ├── • project │ │ columns: (r_new, s_new, i_new, j) - │ │ estimated row count: 10 (missing stats) + │ │ estimated row count: 9 (missing stats) │ │ │ └── • scan buffer │ columns: (r, s, i, j, r_new, s_new, i_new, check1) diff --git a/pkg/sql/opt/memo/logical_props_builder.go b/pkg/sql/opt/memo/logical_props_builder.go index 3a176daa59e1..380636da416c 100644 --- a/pkg/sql/opt/memo/logical_props_builder.go +++ b/pkg/sql/opt/memo/logical_props_builder.go @@ -19,6 +19,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt/constraint" "github.com/cockroachdb/cockroach/pkg/sql/opt/props" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" ) @@ -163,6 +164,7 @@ func (b *logicalPropsBuilder) buildScanProps(scan *ScanExpr, rel *props.Relation if pred != nil { b.updateCardinalityFromFilters(pred, rel) } + b.updateCardinalityFromTypes(rel.OutputCols, rel) } // Statistics @@ -640,6 +642,8 @@ func (b *logicalPropsBuilder) buildGroupingExprProps(groupExpr RelExpr, rel *pro rel.Cardinality = inputProps.Cardinality.AsLowAs(1) if rel.FuncDeps.HasMax1Row() { rel.Cardinality = rel.Cardinality.Limit(1) + } else { + b.updateCardinalityFromTypes(groupingCols, rel) } } @@ -761,6 +765,8 @@ func (b *logicalPropsBuilder) buildSetProps(setNode RelExpr, rel *props.Relation rel.Cardinality = b.makeSetCardinality(op, leftProps.Cardinality, rightProps.Cardinality) if rel.FuncDeps.HasMax1Row() { rel.Cardinality = rel.Cardinality.Limit(1) + } else { + b.updateCardinalityFromTypes(rel.OutputCols, rel) } // Statistics @@ -1952,6 +1958,64 @@ func (b *logicalPropsBuilder) updateCardinalityFromConstraint( } } +// updateCardinalityFromTypes determines whether a tight cardinality bound +// can be determined from the types of the given columns. This is possible +// if any of the columns is a strict key and has a type with a finite set +// of possible values (e.g., bool or enum type). +func (b *logicalPropsBuilder) updateCardinalityFromTypes(cols opt.ColSet, rel *props.Relational) { + cols.ForEach(func(col opt.ColumnID) { + // We need to check if this column is a strict key, since a lax key could + // include an arbitrary number of null values. + if !rel.FuncDeps.ColsAreStrictKey(opt.MakeColSet(col)) { + return + } + + md := b.mem.Metadata() + count, ok := distinctCountFromType(md, md.ColumnMeta(col).Type) + if ok && count < math.MaxUint32 { + if !rel.NotNullCols.Contains(col) { + // Add one for a possible null value. + count++ + } + rel.Cardinality = rel.Cardinality.Limit(uint32(count)) + } + }) +} + +// distinctCountFromType calculates the maximum number of distinct values in the +// given type. Returns the distinct count and ok=true if the type has a finite +// set of possible values (e.g., bool or enum type), and ok=false otherwise. +func distinctCountFromType(md *opt.Metadata, typ *types.T) (_ uint64, ok bool) { + // TODO(rytaft): Support other limited types such as INT2, BIT(N), VARBIT(N), + // CHAR(N), and VARCHAR(N). + switch typ.Family() { + case types.BoolFamily: + // There are maximum two distinct values: true and false. + return 2, true + + case types.EnumFamily: + typOid := typ.Oid() + var hydrated *types.T + // Find the hydrated type in the metadata. + for _, t := range md.AllUserDefinedTypes() { + if t.Oid() == typOid { + hydrated = t + break + } + } + if hydrated == nil { + // This can happen in rare cases if the user defined type is + // contained in an array. + // TODO(rytaft): This should really be an assertion failure. See #67434. + break + } + // Enum types have a well defined set of values. + return uint64(len(hydrated.TypeMeta.EnumData.PhysicalRepresentations)), true + } + + return 0, false +} + // ensureLookupJoinInputProps lazily populates the relational properties that // apply to the lookup side of the join, as if it were a Scan operator. func ensureLookupJoinInputProps(join *LookupJoinExpr, sb *statisticsBuilder) *props.Relational { diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go index b927e12f2c7b..80fcaab3dff7 100644 --- a/pkg/sql/opt/memo/statistics_builder.go +++ b/pkg/sql/opt/memo/statistics_builder.go @@ -19,7 +19,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt/constraint" "github.com/cockroachdb/cockroach/pkg/sql/opt/props" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" - "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/json" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" @@ -466,13 +465,15 @@ func (sb *statisticsBuilder) colStatLeaf( if notNullCols.Contains(col) { colStat.NullCount = 0 } - if sb.md.ColumnMeta(col).Type.Family() == types.BoolFamily { - // There are maximum three distinct values: true, false, and null. - maxDistinct := float64(2) + // Some types (e.g., bool and enum) have a known maximum number of distinct + // values. + maxDistinct, ok := distinctCountFromType(sb.md, sb.md.ColumnMeta(col).Type) + if ok { if colStat.NullCount > 0 { + // Add one for the null value. maxDistinct++ } - colStat.DistinctCount = min(colStat.DistinctCount, maxDistinct) + colStat.DistinctCount = min(colStat.DistinctCount, float64(maxDistinct)) } } else { distinctCount := 1.0 @@ -3288,6 +3289,7 @@ func (sb *statisticsBuilder) updateDistinctCountsFromConstraint( // All of the columns that are part of the prefix have a finite number of // distinct values. prefix := c.Prefix(sb.evalCtx) + keyCtx := constraint.MakeKeyContext(&c.Columns, sb.evalCtx) // If there are any other columns beyond the prefix, we may be able to // determine the number of distinct values for the first one. For example: @@ -3303,49 +3305,17 @@ func (sb *statisticsBuilder) updateDistinctCountsFromConstraint( countable := true for i := 0; i < c.Spans.Count(); i++ { sp := c.Spans.Get(i) - if sp.StartKey().Length() <= col || sp.EndKey().Length() <= col { - // We can't determine the distinct count for this column. For example, - // the number of distinct values for column b in the constraint - // /a/b: [/1/1 - /1] cannot be determined. + spanDistinctVals, ok := sp.KeyCount(&keyCtx, col+1) + if !ok { countable = false continue } + // Subtract 1 from the span distinct count since we started with + // distinctCount = 1 above and we increment for each new value below. + distinctCount += float64(spanDistinctVals) - 1 + startVal := sp.StartKey().Value(col) endVal := sp.EndKey().Value(col) - if startVal.Compare(sb.evalCtx, endVal) != 0 { - var start, end float64 - if startVal.ResolvedType().Family() == types.IntFamily && - endVal.ResolvedType().Family() == types.IntFamily { - start = float64(*startVal.(*tree.DInt)) - end = float64(*endVal.(*tree.DInt)) - } else if startVal.ResolvedType().Family() == types.DateFamily && - endVal.ResolvedType().Family() == types.DateFamily { - startDate := startVal.(*tree.DDate) - endDate := endVal.(*tree.DDate) - if !startDate.IsFinite() || !endDate.IsFinite() { - // One of the boundaries is not finite, so we can't determine the - // distinct count for this column. - countable = false - continue - } - start = float64(startDate.PGEpochDays()) - end = float64(endDate.PGEpochDays()) - } else { - // We can't determine the distinct count for this column. For example, - // the number of distinct values in the constraint - // /a: [/'cherry' - /'mango'] cannot be determined. - countable = false - continue - } - // We assume that both start and end boundaries are inclusive. This - // should be the case for integer and date columns (due to - // normalization by constraint.PreferInclusive). - if c.Columns.Get(col).Ascending() { - distinctCount += end - start - } else { - distinctCount += start - end - } - } if i != 0 && val != nil { compare := startVal.Compare(sb.evalCtx, val) ascending := c.Columns.Get(col).Ascending() diff --git a/pkg/sql/opt/memo/testdata/stats/project b/pkg/sql/opt/memo/testdata/stats/project index f772e6b85731..dbe21e679ad5 100644 --- a/pkg/sql/opt/memo/testdata/stats/project +++ b/pkg/sql/opt/memo/testdata/stats/project @@ -197,7 +197,8 @@ project ├── columns: column12:12(bool) ├── grouping columns: column12:12(bool) ├── outer: (3) - ├── stats: [rows=10, distinct(12)=10, null(12)=0] + ├── cardinality: [0 - 3] + ├── stats: [rows=3, distinct(12)=3, null(12)=0] ├── key: (12) └── project ├── columns: column12:12(bool) diff --git a/pkg/sql/opt/norm/testdata/rules/groupby b/pkg/sql/opt/norm/testdata/rules/groupby index 06f1114063a0..2f89d543975f 100644 --- a/pkg/sql/opt/norm/testdata/rules/groupby +++ b/pkg/sql/opt/norm/testdata/rules/groupby @@ -2712,6 +2712,7 @@ SELECT b, count(DISTINCT y) FROM xyzbs GROUP BY b group-by ├── columns: b:4!null count:8!null ├── grouping columns: b:4!null + ├── cardinality: [0 - 2] ├── key: (4) ├── fd: (4)-->(8) ├── distinct-on diff --git a/pkg/ts/catalog/chart_catalog.go b/pkg/ts/catalog/chart_catalog.go index 55f90aa746d5..975067f9e313 100644 --- a/pkg/ts/catalog/chart_catalog.go +++ b/pkg/ts/catalog/chart_catalog.go @@ -2305,6 +2305,17 @@ var charts = []sectionDescription{ }, }, }, + { + Organization: [][]string{{DistributionLayer, "Bulk", "Egress"}}, + Charts: []chartDescription{ + { + Title: "Export Delays", + Metrics: []string{ + "exportrequest.delay.total", + }, + }, + }, + }, { Organization: [][]string{{StorageLayer, "Storage", "KV"}}, Charts: []chartDescription{