From 05a9692bee96ce57b739ed668f857d656b13abc1 Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Wed, 11 Aug 2021 16:06:16 -0700 Subject: [PATCH] opt: support BYTES for histogram range calculations Fixes #68346 Release note (performance improvement): The accuracy of histogram calculations for BYTES types has been improved. As a result, the optimizer should generate more efficient query plans in some cases. --- pkg/sql/opt/constraint/testutils.go | 2 + pkg/sql/opt/memo/testdata/stats/inverted-geo | 10 +- .../testdata/stats/inverted-geo-multi-column | 76 +++++------ .../memo/testdata/stats/partial-index-scan | 94 +++++++------- pkg/sql/opt/props/histogram.go | 16 +-- pkg/sql/opt/props/histogram_test.go | 121 ++++++++++-------- 6 files changed, 166 insertions(+), 153 deletions(-) diff --git a/pkg/sql/opt/constraint/testutils.go b/pkg/sql/opt/constraint/testutils.go index 4606baae4fcb..9fb231980987 100644 --- a/pkg/sql/opt/constraint/testutils.go +++ b/pkg/sql/opt/constraint/testutils.go @@ -151,6 +151,8 @@ func parseDatumPath(evalCtx *tree.EvalContext, str string, typs []types.Family) val, _, err = tree.ParseDTimestampTZ(evalCtx, valStr, time.Microsecond) case types.StringFamily: val = tree.NewDString(valStr) + case types.BytesFamily: + val = tree.NewDBytes(tree.DBytes(valStr)) case types.OidFamily: dInt, err := tree.ParseDInt(valStr) if err == nil { diff --git a/pkg/sql/opt/memo/testdata/stats/inverted-geo b/pkg/sql/opt/memo/testdata/stats/inverted-geo index f9359a8bd245..a6f0cd655b39 100644 --- a/pkg/sql/opt/memo/testdata/stats/inverted-geo +++ b/pkg/sql/opt/memo/testdata/stats/inverted-geo @@ -118,22 +118,22 @@ memo (optimized, ~11KB, required=[presentation: i:1]) │ └── cost: 2124.52 ├── G7: (filters G9) ├── G8: (index-join G10 t,cols=(1,2)) - │ ├── [ordering: +1] [limit hint: 13.50] + │ ├── [ordering: +1] [limit hint: 5.34] │ │ ├── best: (sort G8) - │ │ └── cost: 22165.12 + │ │ └── cost: 8755.79 │ └── [] │ ├── best: (index-join G10 t,cols=(1,2)) - │ └── cost: 21352.06 + │ └── cost: 8465.67 ├── G9: (function G11 st_intersects) ├── G10: (inverted-filter G12 g_inverted_key) │ └── [] │ ├── best: (inverted-filter G12 g_inverted_key) - │ └── cost: 3172.04 + │ └── cost: 1268.99 ├── G11: (scalar-list G13 G14) ├── G12: (scan t@secondary,cols=(3,5),constrained inverted) │ └── [] │ ├── best: (scan t@secondary,cols=(3,5),constrained inverted) - │ └── cost: 3142.02 + │ └── cost: 1257.09 ├── G13: (const '010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F') └── G14: (variable g) diff --git a/pkg/sql/opt/memo/testdata/stats/inverted-geo-multi-column b/pkg/sql/opt/memo/testdata/stats/inverted-geo-multi-column index 50d38cde5686..a7b23b652111 100644 --- a/pkg/sql/opt/memo/testdata/stats/inverted-geo-multi-column +++ b/pkg/sql/opt/memo/testdata/stats/inverted-geo-multi-column @@ -83,7 +83,7 @@ project ├── fd: ()-->(3), (1)-->(2) ├── index-join t │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=153.552632] + │ ├── stats: [rows=60.7847521] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -96,7 +96,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=153.552632] + │ ├── stats: [rows=60.7847521] │ ├── key: (1) │ └── scan t@m │ ├── columns: k:1(int!null) g_inverted_key:6(geometry!null) @@ -107,11 +107,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── flags: force-index=m - │ ├── stats: [rows=153.552632, distinct(1)=43.8721804, null(1)=0, distinct(3)=1, null(3)=0, distinct(6)=3, null(6)=0, distinct(3,6)=3, null(3,6)=0] - │ │ histogram(3)= 0 100 + │ ├── stats: [rows=60.7847521, distinct(1)=17.367072, null(1)=0, distinct(3)=1, null(3)=0, distinct(6)=1.18756842, null(6)=0, distinct(3,6)=1.18756842, null(3,6)=0] + │ │ histogram(3)= 0 60.785 │ │ <--- 'banana' - │ │ histogram(6)= 0 0 25.592 0 25.592 51.184 25.592 0 0 0 25.592 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(6)= 0 0 9.3283e-11 51.184 9.6005 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(6) └── filters @@ -135,7 +135,7 @@ project ├── fd: ()-->(3), (1)-->(2) ├── index-join t │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=153.552632] + │ ├── stats: [rows=60.7847521] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -148,7 +148,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=153.552632] + │ ├── stats: [rows=60.7847521] │ ├── key: (1) │ └── scan t@p,partial │ ├── columns: k:1(int!null) g_inverted_key:7(geometry!null) @@ -158,11 +158,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── flags: force-index=p - │ ├── stats: [rows=153.552632, distinct(1)=43.8721804, null(1)=0, distinct(3)=1, null(3)=0, distinct(7)=3, null(7)=0, distinct(3,7)=3, null(3,7)=0] - │ │ histogram(3)= 0 100 + │ ├── stats: [rows=60.7847521, distinct(1)=17.367072, null(1)=0, distinct(3)=1, null(3)=0, distinct(7)=1.18756842, null(7)=0, distinct(3,7)=1.18756842, null(3,7)=0] + │ │ histogram(3)= 0 60.785 │ │ <--- 'banana' - │ │ histogram(7)= 0 0 25.592 0 25.592 51.184 25.592 0 0 0 25.592 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(7)= 0 0 9.3283e-11 51.184 9.6005 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(7) └── filters @@ -198,7 +198,7 @@ project ├── fd: (1)-->(2,3) ├── index-join t │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -211,7 +211,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ └── scan t@m │ ├── columns: k:1(int!null) g_inverted_key:6(geometry!null) @@ -225,11 +225,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── flags: force-index=m - │ ├── stats: [rows=307.105263, distinct(1)=87.7443609, null(1)=0, distinct(3)=2, null(3)=0, distinct(6)=3, null(6)=0, distinct(3,6)=6, null(3,6)=0] - │ │ histogram(3)= 0 100 0 100 + │ ├── stats: [rows=121.569504, distinct(1)=34.7341441, null(1)=0, distinct(3)=2, null(3)=0, distinct(6)=1.18756842, null(6)=0, distinct(3,6)=2.37513684, null(3,6)=0] + │ │ histogram(3)= 0 60.785 0 60.785 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(6)= 0 0 51.184 0 51.184 102.37 51.184 0 0 0 51.184 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(6)= 0 0 1.8657e-10 102.37 19.201 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(6) └── filters @@ -253,7 +253,7 @@ project ├── fd: (1)-->(2,3) ├── index-join t │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -266,7 +266,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ └── scan t@p,partial │ ├── columns: k:1(int!null) g_inverted_key:8(geometry!null) @@ -276,11 +276,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── flags: force-index=p - │ ├── stats: [rows=307.105263, distinct(1)=87.7443609, null(1)=0, distinct(3)=2, null(3)=0, distinct(8)=3, null(8)=0, distinct(3,8)=6, null(3,8)=0] - │ │ histogram(3)= 0 100 0 100 + │ ├── stats: [rows=121.569504, distinct(1)=34.7341441, null(1)=0, distinct(3)=2, null(3)=0, distinct(8)=1.18756842, null(8)=0, distinct(3,8)=2.37513684, null(3,8)=0] + │ │ histogram(3)= 0 60.785 0 60.785 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(8)= 0 0 51.184 0 51.184 102.37 51.184 0 0 0 51.184 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(8)= 0 0 1.8657e-10 102.37 19.201 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(8) └── filters @@ -316,7 +316,7 @@ project ├── fd: ()-->(4), (1)-->(2,3) ├── index-join t │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) i:4(int) - │ ├── stats: [rows=24.0813118] + │ ├── stats: [rows=9.53273514] │ ├── key: (1) │ ├── fd: (1)-->(2-4) │ └── inverted-filter @@ -329,7 +329,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=24.0813118] + │ ├── stats: [rows=9.53273514] │ ├── key: (1) │ └── scan t@mp,partial │ ├── columns: k:1(int!null) g_inverted_key:9(geometry!null) @@ -340,13 +340,13 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── flags: force-index=mp - │ ├── stats: [rows=24.0813118, distinct(1)=6.88037479, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=1, null(4)=0, distinct(9)=3, null(9)=0, distinct(3,4,9)=6, null(3,4,9)=0] - │ │ histogram(3)= 0 12.041 0 12.041 + │ ├── stats: [rows=9.53273514, distinct(1)=2.72363861, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=1, null(4)=0, distinct(9)=1.18756842, null(9)=0, distinct(3,4,9)=2.37513684, null(3,4,9)=0] + │ │ histogram(3)= 0 4.7664 0 4.7664 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(4)= 0 24.081 + │ │ histogram(4)= 0 9.5327 │ │ <--- 400 - - │ │ histogram(9)= 0 0 4.0136 0 4.0136 8.0271 4.0136 0 0 0 4.0136 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(9)= 0 0 1.4629e-11 8.0271 1.5056 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(9) └── filters @@ -372,7 +372,7 @@ project ├── fd: (1)-->(2-4) ├── index-join t │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) i:4(int) - │ ├── stats: [rows=48.1626236] + │ ├── stats: [rows=19.0654703] │ ├── key: (1) │ ├── fd: (1)-->(2-4) │ └── inverted-filter @@ -385,7 +385,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=48.1626236] + │ ├── stats: [rows=19.0654703] │ ├── key: (1) │ └── scan t@mp,partial │ ├── columns: k:1(int!null) g_inverted_key:9(geometry!null) @@ -399,13 +399,13 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── flags: force-index=mp - │ ├── stats: [rows=48.1626236, distinct(1)=13.7607496, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=3, null(4)=0, distinct(9)=3, null(9)=0, distinct(3,4,9)=18, null(3,4,9)=0] - │ │ histogram(3)= 0 24.081 0 24.081 + │ ├── stats: [rows=19.0654703, distinct(1)=5.44727722, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=3, null(4)=0, distinct(9)=1.18756842, null(9)=0, distinct(3,4,9)=7.12541053, null(3,4,9)=0] + │ │ histogram(3)= 0 9.5327 0 9.5327 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(4)= 0 8.0271 0 16.054 0 24.081 + │ │ histogram(4)= 0 3.1776 0 6.3552 0 9.5327 │ │ <--- 200 ---- 300 ---- 400 - - │ │ histogram(9)= 0 0 8.0271 0 8.0271 16.054 8.0271 0 0 0 8.0271 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(9)= 0 0 2.9259e-11 16.054 3.0113 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(9) └── filters diff --git a/pkg/sql/opt/memo/testdata/stats/partial-index-scan b/pkg/sql/opt/memo/testdata/stats/partial-index-scan index 24199fc50126..042e1a892ee0 100644 --- a/pkg/sql/opt/memo/testdata/stats/partial-index-scan +++ b/pkg/sql/opt/memo/testdata/stats/partial-index-scan @@ -1031,11 +1031,11 @@ project ├── inverted constraint: /6/1 │ └── spans: ["7g\x00\x01*\x0e\x00", "7g\x00\x01*\x0e\x00"] ├── flags: force-index=partial - ├── stats: [rows=184.108911, distinct(4)=2, null(4)=0, distinct(6)=50.5, null(6)=0, distinct(4,6)=101, null(4,6)=0] - │ histogram(4)= 0 92.054 0 92.054 + ├── stats: [rows=100, distinct(4)=2, null(4)=0, distinct(6)=1, null(6)=0, distinct(4,6)=2, null(4,6)=0] + │ histogram(4)= 0 50 0 50 │ <--- 'banana' --- 'cherry' - │ histogram(6)= 0 73.644 110.47 0 - │ <--- '\x376700012a0e00' -------- '\x376700012a0e01' + │ histogram(6)= 0 100 5.9476e-16 0 + │ <--- '\x376700012a0e00' ------------ '\x376700012a0e01' └── key: (1) opt @@ -1054,11 +1054,11 @@ index-join inv_hist ├── inverted constraint: /6/1 │ └── spans: ["7g\x00\x01*\x0e\x00", "7g\x00\x01*\x0e\x00"] ├── flags: force-index=partial - ├── stats: [rows=184.108911, distinct(4)=2, null(4)=0, distinct(6)=50.5, null(6)=0, distinct(4,6)=101, null(4,6)=0] - │ histogram(4)= 0 92.054 0 92.054 + ├── stats: [rows=100, distinct(4)=2, null(4)=0, distinct(6)=1, null(6)=0, distinct(4,6)=2, null(4,6)=0] + │ histogram(4)= 0 50 0 50 │ <--- 'banana' --- 'cherry' - │ histogram(6)= 0 73.644 110.47 0 - │ <--- '\x376700012a0e00' -------- '\x376700012a0e01' + │ histogram(6)= 0 100 5.9476e-16 0 + │ <--- '\x376700012a0e00' ------------ '\x376700012a0e01' └── key: (1) opt @@ -1079,7 +1079,7 @@ project ├── fd: ()-->(4), (1)-->(3) ├── index-join inv_hist │ ├── columns: k:1(int!null) j:3(jsonb) s:4(string) - │ ├── stats: [rows=184.108911] + │ ├── stats: [rows=100] │ ├── key: (1) │ ├── fd: (1)-->(3,4) │ └── scan inv_hist@partial,partial @@ -1087,11 +1087,11 @@ project │ ├── inverted constraint: /6/1 │ │ └── spans: ["7g\x00\x01*\x0e\x00", "7g\x00\x01*\x0e\x00"] │ ├── flags: force-index=partial - │ ├── stats: [rows=184.108911, distinct(4)=2, null(4)=0, distinct(6)=50.5, null(6)=0, distinct(4,6)=101, null(4,6)=0] - │ │ histogram(4)= 0 92.054 0 92.054 + │ ├── stats: [rows=100, distinct(4)=2, null(4)=0, distinct(6)=1, null(6)=0, distinct(4,6)=2, null(4,6)=0] + │ │ histogram(4)= 0 50 0 50 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(6)= 0 73.644 110.47 0 - │ │ <--- '\x376700012a0e00' -------- '\x376700012a0e01' + │ │ histogram(6)= 0 100 5.9476e-16 0 + │ │ <--- '\x376700012a0e00' ------------ '\x376700012a0e01' │ └── key: (1) └── filters └── s:4 = 'banana' [type=bool, outer=(4), constraints=(/4: [/'banana' - /'banana']; tight), fd=()-->(4)] @@ -1109,7 +1109,7 @@ select ├── fd: ()-->(4), (1)-->(2,3) ├── index-join inv_hist │ ├── columns: k:1(int!null) i:2(int) j:3(jsonb) s:4(string) - │ ├── stats: [rows=184.108911] + │ ├── stats: [rows=100] │ ├── key: (1) │ ├── fd: (1)-->(2-4) │ └── scan inv_hist@partial,partial @@ -1117,11 +1117,11 @@ select │ ├── inverted constraint: /6/1 │ │ └── spans: ["7g\x00\x01*\x0e\x00", "7g\x00\x01*\x0e\x00"] │ ├── flags: force-index=partial - │ ├── stats: [rows=184.108911, distinct(4)=2, null(4)=0, distinct(6)=50.5, null(6)=0, distinct(4,6)=101, null(4,6)=0] - │ │ histogram(4)= 0 92.054 0 92.054 + │ ├── stats: [rows=100, distinct(4)=2, null(4)=0, distinct(6)=1, null(6)=0, distinct(4,6)=2, null(4,6)=0] + │ │ histogram(4)= 0 50 0 50 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(6)= 0 73.644 110.47 0 - │ │ <--- '\x376700012a0e00' -------- '\x376700012a0e01' + │ │ histogram(6)= 0 100 5.9476e-16 0 + │ │ <--- '\x376700012a0e00' ------------ '\x376700012a0e01' │ └── key: (1) └── filters └── s:4 = 'banana' [type=bool, outer=(4), constraints=(/4: [/'banana' - /'banana']; tight), fd=()-->(4)] @@ -1141,7 +1141,7 @@ select ├── fd: (1)-->(2-4) ├── index-join inv_hist │ ├── columns: k:1(int!null) i:2(int) j:3(jsonb) s:4(string) - │ ├── stats: [rows=184.108911] + │ ├── stats: [rows=100] │ ├── key: (1) │ ├── fd: (1)-->(2-4) │ └── scan inv_hist@partial,partial @@ -1149,11 +1149,11 @@ select │ ├── inverted constraint: /6/1 │ │ └── spans: ["7g\x00\x01*\x0e\x00", "7g\x00\x01*\x0e\x00"] │ ├── flags: force-index=partial - │ ├── stats: [rows=184.108911, distinct(4)=2, null(4)=0, distinct(6)=50.5, null(6)=0, distinct(4,6)=101, null(4,6)=0] - │ │ histogram(4)= 0 92.054 0 92.054 + │ ├── stats: [rows=100, distinct(4)=2, null(4)=0, distinct(6)=1, null(6)=0, distinct(4,6)=2, null(4,6)=0] + │ │ histogram(4)= 0 50 0 50 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(6)= 0 73.644 110.47 0 - │ │ <--- '\x376700012a0e00' -------- '\x376700012a0e01' + │ │ histogram(6)= 0 100 5.9476e-16 0 + │ │ <--- '\x376700012a0e00' ------------ '\x376700012a0e01' │ └── key: (1) └── filters └── (i:2 > 0) AND (i:2 <= 100) [type=bool, outer=(2), constraints=(/2: [/1 - /100]; tight)] @@ -1452,7 +1452,7 @@ project ├── fd: (1)-->(2,3) ├── index-join spatial │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=300] + │ ├── stats: [rows=118.756842] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -1465,7 +1465,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=300] + │ ├── stats: [rows=118.756842] │ ├── key: (1) │ └── scan spatial@p,partial │ ├── columns: k:1(int!null) g_inverted_key:6(geometry!null) @@ -1474,11 +1474,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x10\x00\x00\x00\x00\x00\x00\x00"] │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] - │ ├── stats: [rows=300, distinct(1)=85.7142857, null(1)=0, distinct(3)=2, null(3)=0, distinct(6)=3, null(6)=0, distinct(3,6)=6, null(3,6)=0] - │ │ histogram(3)= 0 100 0 100 + │ ├── stats: [rows=118.756842, distinct(1)=33.9305263, null(1)=0, distinct(3)=2, null(3)=0, distinct(6)=1.18756842, null(6)=0, distinct(3,6)=2.37513684, null(3,6)=0] + │ │ histogram(3)= 0 59.378 0 59.378 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(6)= 0 0 50 0 50 100 50 0 0 0 50 0 - │ │ <--- '\x42fd1000000000000000' ---- '\x42fd1000000000000001' ---- '\x42fd1000000100000000' ---- '\x42fd1200000000000000' --- '\x42fd1400000000000000' ---- '\x42fd1400000000000001' + │ │ histogram(6)= 0 0 1.8225e-10 100 18.757 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(6) └── filters @@ -1504,7 +1504,7 @@ project ├── fd: ()-->(3), (1)-->(2) ├── index-join spatial │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=300] + │ ├── stats: [rows=118.756842] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -1517,7 +1517,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=300] + │ ├── stats: [rows=118.756842] │ ├── key: (1) │ └── scan spatial@p,partial │ ├── columns: k:1(int!null) g_inverted_key:6(geometry!null) @@ -1526,11 +1526,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x10\x00\x00\x00\x00\x00\x00\x00"] │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] - │ ├── stats: [rows=300, distinct(1)=85.7142857, null(1)=0, distinct(3)=2, null(3)=0, distinct(6)=3, null(6)=0, distinct(3,6)=6, null(3,6)=0] - │ │ histogram(3)= 0 100 0 100 + │ ├── stats: [rows=118.756842, distinct(1)=33.9305263, null(1)=0, distinct(3)=2, null(3)=0, distinct(6)=1.18756842, null(6)=0, distinct(3,6)=2.37513684, null(3,6)=0] + │ │ histogram(3)= 0 59.378 0 59.378 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(6)= 0 0 50 0 50 100 50 0 0 0 50 0 - │ │ <--- '\x42fd1000000000000000' ---- '\x42fd1000000000000001' ---- '\x42fd1000000100000000' ---- '\x42fd1200000000000000' --- '\x42fd1400000000000000' ---- '\x42fd1400000000000001' + │ │ histogram(6)= 0 0 1.8225e-10 100 18.757 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(6) └── filters @@ -1590,7 +1590,7 @@ project ├── fd: (1)-->(2,3) ├── index-join spatial │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -1603,7 +1603,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ └── scan spatial@p,partial │ ├── columns: k:1(int!null) g_inverted_key:6(geometry!null) @@ -1612,11 +1612,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x10\x00\x00\x00\x00\x00\x00\x00"] │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] - │ ├── stats: [rows=307.105263, distinct(1)=87.7443609, null(1)=0, distinct(3)=2, null(3)=0, distinct(6)=3, null(6)=0, distinct(3,6)=6, null(3,6)=0] - │ │ histogram(3)= 0 100 0 100 + │ ├── stats: [rows=121.569504, distinct(1)=34.7341441, null(1)=0, distinct(3)=2, null(3)=0, distinct(6)=1.18756842, null(6)=0, distinct(3,6)=2.37513684, null(3,6)=0] + │ │ histogram(3)= 0 60.785 0 60.785 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(6)= 0 0 51.184 0 51.184 102.37 51.184 0 0 0 51.184 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(6)= 0 0 1.8657e-10 102.37 19.201 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(6) └── filters @@ -1640,7 +1640,7 @@ project ├── fd: ()-->(3), (1)-->(2) ├── index-join spatial │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -1653,7 +1653,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ └── scan spatial@p,partial │ ├── columns: k:1(int!null) g_inverted_key:6(geometry!null) @@ -1662,11 +1662,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x10\x00\x00\x00\x00\x00\x00\x00"] │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] - │ ├── stats: [rows=307.105263, distinct(1)=87.7443609, null(1)=0, distinct(3)=2, null(3)=0, distinct(6)=3, null(6)=0, distinct(3,6)=6, null(3,6)=0] - │ │ histogram(3)= 0 100 0 100 + │ ├── stats: [rows=121.569504, distinct(1)=34.7341441, null(1)=0, distinct(3)=2, null(3)=0, distinct(6)=1.18756842, null(6)=0, distinct(3,6)=2.37513684, null(3,6)=0] + │ │ histogram(3)= 0 60.785 0 60.785 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(6)= 0 0 51.184 0 51.184 102.37 51.184 0 0 0 51.184 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(6)= 0 0 1.8657e-10 102.37 19.201 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(6) └── filters diff --git a/pkg/sql/opt/props/histogram.go b/pkg/sql/opt/props/histogram.go index e29853a00dc2..54d7020f8bf3 100644 --- a/pkg/sql/opt/props/histogram.go +++ b/pkg/sql/opt/props/histogram.go @@ -817,9 +817,10 @@ func getRangesBeforeAndAfter( ) (rngBefore, rngAfter float64, ok bool) { // Utilizes an array to simplify number of repetitive calls. - boundArr := []tree.Datum{lowerBoundBefore, upperBoundBefore, lowerBoundAfter, - upperBoundAfter} - boundArrByte := make([][]byte, 4) + boundArr := [4]tree.Datum{ + lowerBoundBefore, upperBoundBefore, lowerBoundAfter, upperBoundAfter, + } + var boundArrByte [4][]byte for i := range boundArr { var err error @@ -874,7 +875,7 @@ func isDiscrete(typ *types.T) bool { // cockroach db. func isNonNumeric(typ *types.T) bool { switch typ.Family() { - case types.StringFamily, types.UuidFamily, types.INetFamily: + case types.StringFamily, types.BytesFamily, types.UuidFamily, types.INetFamily: return true } return false @@ -883,12 +884,7 @@ func isNonNumeric(typ *types.T) bool { // getCommonPrefix returns the first index where the value at said index differs // across all byte arrays in byteArr. byteArr must contain at least one element // to compute a common prefix. -func getCommonPrefix(byteArr [][]byte) int { - - if len(byteArr) <= 0 { - panic(errors.AssertionFailedf("byteArr must have at least one element")) - } - +func getCommonPrefix(byteArr [4][]byte) int { // Checks if the current value at index is the same between all byte arrays. currIndMatching := func(ind int) bool { for i := 0; i < len(byteArr); i++ { diff --git a/pkg/sql/opt/props/histogram_test.go b/pkg/sql/opt/props/histogram_test.go index 4b0095c15e20..0815548f3b7b 100644 --- a/pkg/sql/opt/props/histogram_test.go +++ b/pkg/sql/opt/props/histogram_test.go @@ -696,67 +696,82 @@ func TestFilterBucket(t *testing.T) { runTest(h2, testData2, 0 /* colOffset */, types.TimeTZFamily) }) - t.Run("string", func(t *testing.T) { - h1 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ - {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(tree.NewDString("bear"))}, - {NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDString("bobcat")}, - }} - h2 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ - {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(tree.NewDString("a"))}, - {NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDString("c")}, - }} - h3 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ - {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(tree.NewDString("aaaaaaaaaaaa"))}, - {NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDString("cccccccccccc")}, - }} - - t1 := []testCase{ + t.Run("string/bytes", func(t *testing.T) { + typesToTest := []struct { + family types.Family + createDatumFn func(string) tree.Datum + }{ { - span: "[/bluejay - /boar]", - expected: &cat.HistogramBucket{NumEq: 0, NumRange: 2.92, DistinctRange: 2.92, UpperBound: tree.NewDString("boar")}, + family: types.StringFamily, + createDatumFn: func(s string) tree.Datum { return tree.NewDString(s) }, }, { - span: "[/beer - /bobcat]", - expected: &cat.HistogramBucket{NumEq: 5, NumRange: 9.98, DistinctRange: 9.98, UpperBound: tree.NewDString("bobcat")}, + family: types.BytesFamily, + createDatumFn: func(s string) tree.Datum { return tree.NewDBytes(tree.DBytes(s)) }, }, } + for _, typ := range typesToTest { + h1 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ + {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(typ.createDatumFn("bear"))}, + {NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: typ.createDatumFn("bobcat")}, + }} + h2 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ + {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(typ.createDatumFn("a"))}, + {NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: typ.createDatumFn("c")}, + }} + h3 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ + {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(typ.createDatumFn("aaaaaaaaaaaa"))}, + {NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: typ.createDatumFn("cccccccccccc")}, + }} + + t1 := []testCase{ + { + span: "[/bluejay - /boar]", + expected: &cat.HistogramBucket{NumEq: 0, NumRange: 2.92, DistinctRange: 2.92, UpperBound: typ.createDatumFn("boar")}, + }, + { + span: "[/beer - /bobcat]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 9.98, DistinctRange: 9.98, UpperBound: typ.createDatumFn("bobcat")}, + }, + } - t2 := []testCase{ - // Within the CRDB encoding, all null bytes are followed by an escape byte, - // (255) which are left in for the rangeAfter calculations. For this - // reason, the resulting NumRange is slightly lower than expected at 4.99 - // instead of 5. - { - span: "[/a\x00 - /b]", - expected: &cat.HistogramBucket{NumEq: 0, NumRange: 4.99, DistinctRange: 4.99, UpperBound: tree.NewDString("b")}, - }, - { - span: "[/as - /b]", - expected: &cat.HistogramBucket{NumEq: 0, NumRange: 2.76, DistinctRange: 2.76, UpperBound: tree.NewDString("b")}, - }, - { - span: "[/as - /c]", - expected: &cat.HistogramBucket{NumEq: 5, NumRange: 7.77, DistinctRange: 7.77, UpperBound: tree.NewDString("c")}, - }, - { - span: "[/bs - /c]", - expected: &cat.HistogramBucket{NumEq: 5, NumRange: 2.76, DistinctRange: 2.76, UpperBound: tree.NewDString("c")}, - }, - } + t2 := []testCase{ + // Within the CRDB encoding, all null bytes are followed by an escape byte, + // (255) which are left in for the rangeAfter calculations. For this + // reason, the resulting NumRange is slightly lower than expected at 4.99 + // instead of 5. + { + span: "[/a\x00 - /b]", + expected: &cat.HistogramBucket{NumEq: 0, NumRange: 4.99, DistinctRange: 4.99, UpperBound: typ.createDatumFn("b")}, + }, + { + span: "[/as - /b]", + expected: &cat.HistogramBucket{NumEq: 0, NumRange: 2.76, DistinctRange: 2.76, UpperBound: typ.createDatumFn("b")}, + }, + { + span: "[/as - /c]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 7.77, DistinctRange: 7.77, UpperBound: typ.createDatumFn("c")}, + }, + { + span: "[/bs - /c]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 2.76, DistinctRange: 2.76, UpperBound: typ.createDatumFn("c")}, + }, + } - // The initial 8 bytes for lowerBound and upperBound of the span is the same. - // Hence, the resulting NumRange/DistinctRange should be 0, as rangeAfter - // only considers the first 8 bytes of the bounds. - t3 := []testCase{ - { - span: "[/aaaaaaaabbbb - /aaaaaaaacccc]", - expected: &cat.HistogramBucket{NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("aaaaaaaacccc")}, - }, - } + // The initial 8 bytes for lowerBound and upperBound of the span is the same. + // Hence, the resulting NumRange/DistinctRange should be 0, as rangeAfter + // only considers the first 8 bytes of the bounds. + t3 := []testCase{ + { + span: "[/aaaaaaaabbbb - /aaaaaaaacccc]", + expected: &cat.HistogramBucket{NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: typ.createDatumFn("aaaaaaaacccc")}, + }, + } - runTest(h1, t1, 0 /* colOffset */, types.StringFamily) - runTest(h2, t2, 0 /* colOffset */, types.StringFamily) - runTest(h3, t3, 0 /* colOffset */, types.StringFamily) + runTest(h1, t1, 0 /* colOffset */, typ.family) + runTest(h2, t2, 0 /* colOffset */, typ.family) + runTest(h3, t3, 0 /* colOffset */, typ.family) + } }) t.Run("uuid", func(t *testing.T) {