From 963deb810e25b8f7e2f106eeb069306e6e8d5d30 Mon Sep 17 00:00:00 2001 From: Jordan Lewis Date: Fri, 22 Jul 2022 16:21:25 -0400 Subject: [PATCH] opt: permit coexistance of inv, forward histograms Previously, when trying to use persisted histograms to filter an inverted column, if the histogram read from disk was a "forward histogram", the operation would fail with an internal error. This is because inverted histograms and forward histograms use different datatypes on disk, and it's not possible to compare a forward histogram (represented as the datatype of the column) with the inverted constraint (represented as DBytes). Now, this problem is corrected by making sure that we only store forward histogram data in forward histogram column sets, and likewise for inverted histogram data being only stored in inverted histogram column sets. Release note: None --- .../testdata/logic_test/trigram_indexes | 51 +++ pkg/sql/opt/cat/table.go | 5 + pkg/sql/opt/memo/statistics_builder.go | 119 ++++-- pkg/sql/opt/memo/testdata/stats/inverted-geo | 2 +- .../opt/memo/testdata/stats/inverted-trigram | 351 ++++++++++++++++++ pkg/sql/opt/memo/testdata/stats/scan | 56 +++ pkg/sql/opt/testutils/testcat/test_catalog.go | 9 + pkg/sql/opt_catalog.go | 5 + 8 files changed, 557 insertions(+), 41 deletions(-) create mode 100644 pkg/sql/opt/memo/testdata/stats/inverted-trigram diff --git a/pkg/sql/logictest/testdata/logic_test/trigram_indexes b/pkg/sql/logictest/testdata/logic_test/trigram_indexes index 8960db395357..4cbacb3054f5 100644 --- a/pkg/sql/logictest/testdata/logic_test/trigram_indexes +++ b/pkg/sql/logictest/testdata/logic_test/trigram_indexes @@ -143,3 +143,54 @@ INSERT INTO pkt VALUES (1, 'abcd'), (2, 'bcde') statement error primary key column b cannot be present in an inverted index ALTER TABLE pkt ALTER PRIMARY KEY USING COLUMNS (b) + +# Ensure that it's okay to perform an inverted filter on a table with a trigram +# inverted index that only has a forward statistic collected on the inverted +# column. + +statement ok +CREATE TABLE b (a) AS SELECT encode(set_byte('foobar ',1,g), 'escape') || g::text FROM generate_series(1,1000) g(g) + +statement ok +ANALYZE b + +statement ok +CREATE INVERTED INDEX ON b(a gin_trgm_ops) + +query T rowsort +SELECT * FROM b WHERE a LIKE '%foo%' +---- +foobar 111 +foobar 367 +foobar 623 +foobar 879 + +# Ensure that scans still work after we re-analyze. + +statement ok +ANALYZE b + +query T rowsort +SELECT * FROM b WHERE a LIKE '%foo%' +---- +foobar 111 +foobar 367 +foobar 623 +foobar 879 + +statement ok +CREATE INDEX on b(a); +ANALYZE b + +query T rowsort +SELECT * FROM b WHERE a LIKE '%foo%' +---- +foobar 111 +foobar 367 +foobar 623 +foobar 879 + +query T +SELECT * FROM b WHERE a = 'foobar 367' +---- +foobar 367 diff --git a/pkg/sql/opt/cat/table.go b/pkg/sql/opt/cat/table.go index bc37dab20ce1..2f7e0b674290 100644 --- a/pkg/sql/opt/cat/table.go +++ b/pkg/sql/opt/cat/table.go @@ -14,6 +14,7 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/types" ) // Table is an interface to a database table, exposing only the information @@ -196,6 +197,10 @@ type TableStatistic interface { // and it represents the distribution of values for that column. // See HistogramBucket for more details. Histogram() []HistogramBucket + + // HistogramType returns the type that the histogram was created on. For + // inverted index histograms, this will always return types.Bytes. + HistogramType() *types.T } // HistogramBucket contains the data for a single histogram bucket. Note diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go index 85d7008da745..88e3198dbe0c 100644 --- a/pkg/sql/opt/memo/statistics_builder.go +++ b/pkg/sql/opt/memo/statistics_builder.go @@ -25,6 +25,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/stats" + "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/buildutil" "github.com/cockroachdb/cockroach/pkg/util/encoding" "github.com/cockroachdb/cockroach/pkg/util/json" @@ -616,7 +617,7 @@ func (sb *statisticsBuilder) makeTableStatistics(tabID opt.TableID) *props.Stati // stats from a statistic's column to any associated inverted columns. // TODO(mgartner): It might be simpler to iterate over all the table columns // looking for inverted columns. - invertedIndexCols := make(map[int][]int) + invertedIndexCols := make(map[int]invertedIndexColInfo) for indexI, indexN := 0, tab.IndexCount(); indexI < indexN; indexI++ { index := tab.Index(indexI) if !index.IsInverted() { @@ -624,7 +625,9 @@ func (sb *statisticsBuilder) makeTableStatistics(tabID opt.TableID) *props.Stati } col := index.InvertedColumn() srcOrd := col.InvertedSourceColumnOrdinal() - invertedIndexCols[srcOrd] = append(invertedIndexCols[srcOrd], col.Ordinal()) + info := invertedIndexCols[srcOrd] + info.invIdxColOrds = append(info.invIdxColOrds, col.Ordinal()) + invertedIndexCols[srcOrd] = info } // Make now and annotate the metadata table with it for next time. @@ -656,49 +659,47 @@ func (sb *statisticsBuilder) makeTableStatistics(tabID opt.TableID) *props.Stati cols.Add(tabID.ColumnID(stat.ColumnOrdinal(i))) } - if colStat, ok := stats.ColStats.Add(cols); ok { + needHistogram := cols.Len() == 1 && stat.Histogram() != nil && + sb.evalCtx.SessionData().OptimizerUseHistograms + seenInvertedStat := false + invertedStatistic := false + var invertedColOrds []int + if needHistogram { + info := invertedIndexCols[stat.ColumnOrdinal(0)] + invertedColOrds = info.invIdxColOrds + seenInvertedStat = info.foundInvertedHistogram + // If some of the columns are inverted and the statistics is of type + // BYTES, it means we have an inverted statistic on this column set. + invertedStatistic = len(invertedColOrds) > 0 && stat.HistogramType().Family() == types.BytesFamily + } + + colStat, ok := stats.ColStats.Add(cols) + if ok || (colStat.Histogram == nil && !invertedStatistic && seenInvertedStat) { + // Set the statistic if either: + // 1. We have no statistic for the current colset at all + // 2. All of the following conditions hold: + // a. The previously found statistic for the colset has no histogram + // b. the current statistic is not inverted + // c. the previously found statistic for this colset was inverted + // If these conditions hold, it means that the previous histogram + // we found for the current colset was derived from an inverted + // histogram, and therefore the existing forward statistic doesn't have + // a histogram at all, and the new statistic we just found has a + // non-inverted histogram that we should be using instead. colStat.DistinctCount = float64(stat.DistinctCount()) colStat.NullCount = float64(stat.NullCount()) colStat.AvgSize = float64(stat.AvgSize()) - if cols.Len() == 1 && stat.Histogram() != nil && - sb.evalCtx.SessionData().OptimizerUseHistograms { - col, _ := cols.Next(0) - - // If this column is invertable, the histogram describes the inverted index - // entries, and we need to create a new stat for it, and not apply a histogram - // to the source column. - invertedColOrds := invertedIndexCols[stat.ColumnOrdinal(0)] - if len(invertedColOrds) == 0 { - colStat.Histogram = &props.Histogram{} - colStat.Histogram.Init(sb.evalCtx, col, stat.Histogram()) - } else { - for _, invertedColOrd := range invertedColOrds { - invCol := tabID.ColumnID(invertedColOrd) - invCols := opt.MakeColSet(invCol) - if invColStat, ok := stats.ColStats.Add(invCols); ok { - invColStat.Histogram = &props.Histogram{} - invColStat.Histogram.Init(sb.evalCtx, invCol, stat.Histogram()) - // Set inverted entry counts from the histogram. Make sure the - // distinct count is at least 1, for the same reason as the row - // count above. - invColStat.DistinctCount = max(invColStat.Histogram.DistinctValuesCount(), 1) - // Inverted indexes don't have nulls. - invColStat.NullCount = 0 - if stat.AvgSize() == 0 { - invColStat.AvgSize = defaultColSize - } else { - invColStat.AvgSize = float64(stat.AvgSize()) - } - } - } - } + if needHistogram && !invertedStatistic { + // A statistic is inverted if the column is invertible and its + // histogram contains buckets of types BYTES. + // NOTE: this leaves an ambiguity which would surface if we ever + // permitted an inverted index on BYTES-type columns. A deeper fix + // is tracked here: https://github.com/cockroachdb/cockroach/issues/50655 + col := cols.SingleColumn() + colStat.Histogram = &props.Histogram{} + colStat.Histogram.Init(sb.evalCtx, col, stat.Histogram()) } - // Fetch the colStat again since it may now have a different address due - // to calling stats.ColStats.Add() on any inverted column statistics - // created above. - colStat, _ = stats.ColStats.Lookup(cols) - // Make sure the distinct count is at least 1, for the same reason as // the row count above. colStat.DistinctCount = max(colStat.DistinctCount, 1) @@ -708,12 +709,50 @@ func (sb *statisticsBuilder) makeTableStatistics(tabID opt.TableID) *props.Stati // count). sb.finalizeFromRowCountAndDistinctCounts(colStat, stats) } + + // Add inverted histograms if necessary. + if needHistogram && invertedStatistic { + // Record the fact that we are adding an inverted statistic to this + // column set. + info := invertedIndexCols[stat.ColumnOrdinal(0)] + info.foundInvertedHistogram = true + invertedIndexCols[stat.ColumnOrdinal(0)] = info + for _, invertedColOrd := range invertedColOrds { + invCol := tabID.ColumnID(invertedColOrd) + invCols := opt.MakeColSet(invCol) + if invColStat, ok := stats.ColStats.Add(invCols); ok { + invColStat.Histogram = &props.Histogram{} + invColStat.Histogram.Init(sb.evalCtx, invCol, stat.Histogram()) + // Set inverted entry counts from the histogram. Make sure the + // distinct count is at least 1, for the same reason as the row + // count above. + invColStat.DistinctCount = max(invColStat.Histogram.DistinctValuesCount(), 1) + // Inverted indexes don't have nulls. + invColStat.NullCount = 0 + if stat.AvgSize() == 0 { + invColStat.AvgSize = defaultColSize + } else { + invColStat.AvgSize = float64(stat.AvgSize()) + } + } + } + } } } sb.md.SetTableAnnotation(tabID, statsAnnID, stats) return stats } +// invertedIndexColInfo is used to store information about an inverted column. +type invertedIndexColInfo struct { + // invIdxColOrds is the list of inverted index column ordinals for a given + // inverted column. + invIdxColOrds []int + // foundInvertedHistogram is set to true if we've found an inverted histogram + // for a given inverted column. + foundInvertedHistogram bool +} + func (sb *statisticsBuilder) colStatTable( tabID opt.TableID, colSet opt.ColSet, ) *props.ColumnStatistic { diff --git a/pkg/sql/opt/memo/testdata/stats/inverted-geo b/pkg/sql/opt/memo/testdata/stats/inverted-geo index d65b8aee74ef..f0ea820c9dfe 100644 --- a/pkg/sql/opt/memo/testdata/stats/inverted-geo +++ b/pkg/sql/opt/memo/testdata/stats/inverted-geo @@ -729,7 +729,7 @@ ALTER TABLE t62289 INJECT STATISTICS e'[ "upper_bound": "0102000020E61000000300000005D8E086BB6365C03F9E5737DD1A53C0C04ECDED673B55C06711C00C7C0240C0B8EABD96072856404A9D2C529FC74EC0" } ], - "histo_col_type": "GEOGRAPHY", + "histo_col_type": "BYTES", "name": "__auto__", "null_count": 0, "row_count": 0 diff --git a/pkg/sql/opt/memo/testdata/stats/inverted-trigram b/pkg/sql/opt/memo/testdata/stats/inverted-trigram new file mode 100644 index 000000000000..85e9be356b35 --- /dev/null +++ b/pkg/sql/opt/memo/testdata/stats/inverted-trigram @@ -0,0 +1,351 @@ +# Ensure that it's okay to perform an inverted filter on a table with a trigram +# inverted index that only has a forward statistic collected on the inverted +# column. + +exec-ddl +CREATE TABLE a (a TEXT) +---- + +exec-ddl +CREATE INDEX ON a(a) +---- + +exec-ddl +CREATE INVERTED INDEX ON a(a gin_trgm_ops) +---- + +# First, check both plans without stats. +opt +SELECT * FROM a WHERE a = 'foo' +---- +scan a@a_a_idx + ├── columns: a:1(string!null) + ├── constraint: /1/2: [/'foo' - /'foo'] + ├── stats: [rows=10, distinct(1)=1, null(1)=0, avgsize(1)=4] + └── fd: ()-->(1) + +opt +SELECT * FROM a WHERE a LIKE '%foo%' +---- +select + ├── columns: a:1(string!null) + ├── stats: [rows=330, distinct(1)=100, null(1)=0, avgsize(1)=4] + ├── index-join a + │ ├── columns: a:1(string) + │ ├── stats: [rows=111.1111] + │ └── scan a@a_a_idx1 + │ ├── columns: rowid:2(int!null) + │ ├── inverted constraint: /5/2 + │ │ └── spans: ["\x12foo\x00\x01", "\x12foo\x00\x01"] + │ ├── stats: [rows=111.1111, distinct(5)=100, null(5)=0, avgsize(5)=4] + │ └── key: (2) + └── filters + └── a:1 LIKE '%foo%' [type=bool, outer=(1), constraints=(/1: (/NULL - ])] + +# Inject forward statistics. +exec-ddl +ALTER TABLE a INJECT STATISTICS '[ + { + "columns": ["a"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000, + "distinct_count": 10, + "null_count": 0, + "histo_col_type": "VARCHAR", + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "blah" + }, + { + "distinct_range": 0, + "num_eq": 990, + "num_range": 0, + "upper_bound": "zooo" + } + ] + } +]' +---- + +# Check the plan for a forward scan. +opt +SELECT * FROM a WHERE a = 'foo' +---- +scan a@a_a_idx + ├── columns: a:1(string!null) + ├── constraint: /1/2: [/'foo' - /'foo'] + ├── stats: [rows=2e-07, distinct(1)=2e-07, null(1)=0, avgsize(1)=4] + │ histogram(1)= 0 0 + │ <--- 'foo' + └── fd: ()-->(1) + +# Make sure that this query doesn't have a problem, even though the inverted +# scan could see "forward histogram" data. + +opt +SELECT * FROM a WHERE a LIKE '%foo%' +---- +select + ├── columns: a:1(string!null) + ├── stats: [rows=333.3333, distinct(1)=10, null(1)=0, avgsize(1)=4] + ├── index-join a + │ ├── columns: a:1(string) + │ ├── stats: [rows=111.1111] + │ └── scan a@a_a_idx1 + │ ├── columns: rowid:2(int!null) + │ ├── inverted constraint: /5/2 + │ │ └── spans: ["\x12foo\x00\x01", "\x12foo\x00\x01"] + │ ├── stats: [rows=111.1111, distinct(5)=100, null(5)=0, avgsize(5)=4] + │ └── key: (2) + └── filters + └── a:1 LIKE '%foo%' [type=bool, outer=(1), constraints=(/1: (/NULL - ])] + +# Now, inject inverted statistics with forward statistics also. +exec-ddl +ALTER TABLE a INJECT STATISTICS '[ + { + "columns": ["a"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000, + "distinct_count": 1000, + "null_count": 0, + "histo_col_type": "BYTES", + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 9, + "num_range": 0, + "upper_bound": "\\x122020310001" + }, + { + "distinct_range": 0, + "num_eq": 8, + "num_range": 0, + "upper_bound": "\\x122020320001" + }, + { + "distinct_range": 0, + "num_eq": 6, + "num_range": 0, + "upper_bound": "\\x122020330001" + }, + { + "distinct_range": 0, + "num_eq": 6, + "num_range": 0, + "upper_bound": "\\x127973200001" + } + ] + }, + { + "columns": ["a"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000, + "distinct_count": 1000, + "null_count": 0, + "histo_col_type": "VARCHAR", + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "blah" + }, + { + "distinct_range": 0, + "num_eq": 990, + "num_range": 0, + "upper_bound": "zooo" + } + ] + } +]' +---- + +# Test that we get a plan that uses the inverted index now that there are stats. +opt +SELECT * FROM a WHERE a LIKE '%foo%' +---- +select + ├── columns: a:1(string!null) + ├── stats: [rows=333.3333, distinct(1)=333.333, null(1)=0, avgsize(1)=4] + ├── index-join a + │ ├── columns: a:1(string) + │ ├── stats: [rows=5.8e-09] + │ └── scan a@a_a_idx1 + │ ├── columns: rowid:2(int!null) + │ ├── inverted constraint: /5/2 + │ │ └── spans: ["\x12foo\x00\x01", "\x12foo\x00\x01"] + │ ├── stats: [rows=5.8e-09, distinct(5)=5.8e-09, null(5)=0, avgsize(5)=4] + │ │ histogram(5)= 0 0 + │ │ <--- '\x12666f6f0002' + │ └── key: (2) + └── filters + └── a:1 LIKE '%foo%' [type=bool, outer=(1), constraints=(/1: (/NULL - ])] + +# Now, check what happens with a forward scan now that we have an inverted histogram. +opt +SELECT * FROM a WHERE a = 'foobarbaz' +---- +scan a@a_a_idx + ├── columns: a:1(string!null) + ├── constraint: /1/2: [/'foobarbaz' - /'foobarbaz'] + ├── stats: [rows=2e-07, distinct(1)=2e-07, null(1)=0, avgsize(1)=4] + │ histogram(1)= 0 0 + │ <--- 'foobarbaz' + └── fd: ()-->(1) + +# Finally, check what happens when there are only inverted stats. +exec-ddl +ALTER TABLE a INJECT STATISTICS '[ + { + "columns": ["a"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000, + "distinct_count": 1000, + "null_count": 0, + "histo_col_type": "BYTES", + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 9, + "num_range": 0, + "upper_bound": "\\x122020310001" + }, + { + "distinct_range": 0, + "num_eq": 8, + "num_range": 0, + "upper_bound": "\\x122020320001" + }, + { + "distinct_range": 0, + "num_eq": 6, + "num_range": 0, + "upper_bound": "\\x122020330001" + }, + { + "distinct_range": 0, + "num_eq": 6, + "num_range": 0, + "upper_bound": "\\x127973200001" + } + ] + } +]' +---- + +opt +SELECT * FROM a WHERE a LIKE '%foo%' +---- +select + ├── columns: a:1(string!null) + ├── stats: [rows=333.3333, distinct(1)=333.333, null(1)=0, avgsize(1)=4] + ├── index-join a + │ ├── columns: a:1(string) + │ ├── stats: [rows=5.8e-09] + │ └── scan a@a_a_idx1 + │ ├── columns: rowid:2(int!null) + │ ├── inverted constraint: /5/2 + │ │ └── spans: ["\x12foo\x00\x01", "\x12foo\x00\x01"] + │ ├── stats: [rows=5.8e-09, distinct(5)=5.8e-09, null(5)=0, avgsize(5)=4] + │ │ histogram(5)= 0 0 + │ │ <--- '\x12666f6f0002' + │ └── key: (2) + └── filters + └── a:1 LIKE '%foo%' [type=bool, outer=(1), constraints=(/1: (/NULL - ])] + +opt +SELECT * FROM a WHERE a = 'foobarbaz' +---- +scan a@a_a_idx + ├── columns: a:1(string!null) + ├── constraint: /1/2: [/'foobarbaz' - /'foobarbaz'] + ├── stats: [rows=1, distinct(1)=1, null(1)=0, avgsize(1)=4] + └── fd: ()-->(1) + +# Simulate truncate on an inverted column to ensure that the optimizer doesn't +# use stale stats. +exec-ddl +ALTER TABLE a INJECT STATISTICS '[ + { + "columns": ["a"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000, + "distinct_count": 1000, + "null_count": 0, + "histo_col_type": "VARCHAR", + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 10, + "num_range": 0, + "upper_bound": "blah" + }, + { + "distinct_range": 0, + "num_eq": 990, + "num_range": 0, + "upper_bound": "zooo" + } + ] + }, + { + "columns": ["a"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000, + "distinct_count": 1000, + "null_count": 0, + "histo_col_type": "BYTES", + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 9, + "num_range": 0, + "upper_bound": "\\x122020310001" + }, + { + "distinct_range": 0, + "num_eq": 8, + "num_range": 0, + "upper_bound": "\\x122020320001" + }, + { + "distinct_range": 0, + "num_eq": 6, + "num_range": 0, + "upper_bound": "\\x122020330001" + }, + { + "distinct_range": 0, + "num_eq": 6, + "num_range": 0, + "upper_bound": "\\x127973200001" + } + ] + }, + { + "columns": ["a"], + "created_at": "2018-01-01 2:00:00.00000+00:00", + "row_count": 0, + "distinct_count": 0, + "null_count": 0 + } +]' +---- + +# This explain should have no histogram, since the most recent statistic has no +# histogram, even though the column in question is inverted and older stats +# exist with histograms. + +opt +SELECT * FROM a WHERE a = 'foobarbaz' +---- +scan a@a_a_idx + ├── columns: a:1(string!null) + ├── constraint: /1/2: [/'foobarbaz' - /'foobarbaz'] + ├── stats: [rows=1, distinct(1)=1, null(1)=0, avgsize(1)=4] + └── fd: ()-->(1) diff --git a/pkg/sql/opt/memo/testdata/stats/scan b/pkg/sql/opt/memo/testdata/stats/scan index 282344a420ea..8f5d7f53e52d 100644 --- a/pkg/sql/opt/memo/testdata/stats/scan +++ b/pkg/sql/opt/memo/testdata/stats/scan @@ -3084,3 +3084,59 @@ top-k │ <---- 3 -------- 5 ---- ├── key: (6,7) └── fd: ()-->(2,3,5), (6,7)-->(1,4) + +# Test that a statistic with a histogram followed in time by a statistic +# without a histogram does not cause the optimizer to use the old statistic +# incorrectly just because it has a histogram. + +exec-ddl +CREATE TABLE trunc (x INT PRIMARY KEY) +---- + +exec-ddl +ALTER TABLE trunc INJECT STATISTICS '[ + { + "columns": ["x"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000, + "distinct_count": 40, + "avg_size": 2, + "histo_col_type": "int", + "histo_buckets": [ + {"num_eq": 0, "num_range": 0, "distinct_range": 0, "upper_bound": "0"}, + {"num_eq": 10, "num_range": 90, "distinct_range": 9, "upper_bound": "10"}, + {"num_eq": 20, "num_range": 180, "distinct_range": 9, "upper_bound": "20"}, + {"num_eq": 30, "num_range": 270, "distinct_range": 9, "upper_bound": "30"}, + {"num_eq": 40, "num_range": 360, "distinct_range": 9, "upper_bound": "40"} + ] + }, + { + "columns": ["x"], + "created_at": "2018-01-01 2:00:00.00000+00:00", + "row_count": 0, + "distinct_count": 0, + "avg_size": 0 + } +]' +---- + +# The following explain should have no histogram in it. +build +SELECT * FROM trunc WHERE x < 10 +---- +project + ├── columns: x:1(int!null) + ├── stats: [rows=1] + ├── key: (1) + └── select + ├── columns: x:1(int!null) crdb_internal_mvcc_timestamp:2(decimal) tableoid:3(oid) + ├── stats: [rows=1, distinct(1)=1, null(1)=0, avgsize(1)=4] + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── scan trunc + │ ├── columns: x:1(int!null) crdb_internal_mvcc_timestamp:2(decimal) tableoid:3(oid) + │ ├── stats: [rows=1, distinct(1)=1, null(1)=0, avgsize(1)=4] + │ ├── key: (1) + │ └── fd: (1)-->(2,3) + └── filters + └── x:1 < 10 [type=bool, outer=(1), constraints=(/1: (/NULL - /9]; tight)] diff --git a/pkg/sql/opt/testutils/testcat/test_catalog.go b/pkg/sql/opt/testutils/testcat/test_catalog.go index e3cf4ae3fcad..01c6941c4677 100644 --- a/pkg/sql/opt/testutils/testcat/test_catalog.go +++ b/pkg/sql/opt/testutils/testcat/test_catalog.go @@ -1221,6 +1221,15 @@ func (ts *TableStat) Histogram() []cat.HistogramBucket { return histogram } +// HistogramType is part of the cat.TableStatistic interface. +func (ts *TableStat) HistogramType() *types.T { + colTypeRef, err := parser.GetTypeFromValidSQLSyntax(ts.js.HistogramColumnType) + if err != nil { + panic(err) + } + return tree.MustBeStaticallyKnownType(colTypeRef) +} + // TableStats is a slice of TableStat pointers. type TableStats []*TableStat diff --git a/pkg/sql/opt_catalog.go b/pkg/sql/opt_catalog.go index 55ea5c5aec56..5f10d787d085 100644 --- a/pkg/sql/opt_catalog.go +++ b/pkg/sql/opt_catalog.go @@ -1674,6 +1674,11 @@ func (os *optTableStat) Histogram() []cat.HistogramBucket { return os.stat.Histogram } +// HistogramType is part of the cat.TableStatistic interface. +func (os *optTableStat) HistogramType() *types.T { + return os.stat.HistogramData.ColumnType +} + // optFamily is a wrapper around descpb.ColumnFamilyDescriptor that keeps a // reference to the table wrapper. type optFamily struct {