diff --git a/pkg/kv/kvserver/client_tenant_test.go b/pkg/kv/kvserver/client_tenant_test.go index 41b92928de7f..327282a0dcb3 100644 --- a/pkg/kv/kvserver/client_tenant_test.go +++ b/pkg/kv/kvserver/client_tenant_test.go @@ -29,6 +29,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/testutils/skip" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/util/encoding" "github.com/cockroachdb/cockroach/pkg/util/leaktest" @@ -142,6 +143,7 @@ func TestTenantsStorageMetricsOnSplit(t *testing.T) { // and report the correct metrics. func TestTenantRateLimiter(t *testing.T) { defer leaktest.AfterTest(t)() + skip.WithIssue(t, 70456, "flaky test") defer log.Scope(t).Close(t) // This test utilizes manual time to make the rate-limiting calculations more diff --git a/pkg/sql/opt/constraint/testutils.go b/pkg/sql/opt/constraint/testutils.go index 4606baae4fcb..9fb231980987 100644 --- a/pkg/sql/opt/constraint/testutils.go +++ b/pkg/sql/opt/constraint/testutils.go @@ -151,6 +151,8 @@ func parseDatumPath(evalCtx *tree.EvalContext, str string, typs []types.Family) val, _, err = tree.ParseDTimestampTZ(evalCtx, valStr, time.Microsecond) case types.StringFamily: val = tree.NewDString(valStr) + case types.BytesFamily: + val = tree.NewDBytes(tree.DBytes(valStr)) case types.OidFamily: dInt, err := tree.ParseDInt(valStr) if err == nil { diff --git a/pkg/sql/opt/memo/testdata/stats/inverted-geo b/pkg/sql/opt/memo/testdata/stats/inverted-geo index 0b8f3c06f4d3..c66887508e5c 100644 --- a/pkg/sql/opt/memo/testdata/stats/inverted-geo +++ b/pkg/sql/opt/memo/testdata/stats/inverted-geo @@ -118,22 +118,22 @@ memo (optimized, ~11KB, required=[presentation: i:1]) │ └── cost: 2124.52 ├── G7: (filters G9) ├── G8: (index-join G10 t,cols=(1,2)) - │ ├── [ordering: +1] [limit hint: 13.50] + │ ├── [ordering: +1] [limit hint: 5.34] │ │ ├── best: (sort G8) - │ │ └── cost: 22166.50 + │ │ └── cost: 8755.99 │ └── [] │ ├── best: (index-join G10 t,cols=(1,2)) - │ └── cost: 21352.06 + │ └── cost: 8465.67 ├── G9: (function G11 st_intersects) ├── G10: (inverted-filter G12 g_inverted_key) │ └── [] │ ├── best: (inverted-filter G12 g_inverted_key) - │ └── cost: 3172.04 + │ └── cost: 1268.99 ├── G11: (scalar-list G13 G14) ├── G12: (scan t@secondary,cols=(3,6),constrained inverted) │ └── [] │ ├── best: (scan t@secondary,cols=(3,6),constrained inverted) - │ └── cost: 3142.02 + │ └── cost: 1257.09 ├── G13: (const '010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F') └── G14: (variable g) diff --git a/pkg/sql/opt/memo/testdata/stats/inverted-geo-multi-column b/pkg/sql/opt/memo/testdata/stats/inverted-geo-multi-column index a50409d7082a..94aa5f9ba41e 100644 --- a/pkg/sql/opt/memo/testdata/stats/inverted-geo-multi-column +++ b/pkg/sql/opt/memo/testdata/stats/inverted-geo-multi-column @@ -83,7 +83,7 @@ project ├── fd: ()-->(3), (1)-->(2) ├── index-join t │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=153.552632] + │ ├── stats: [rows=60.7847521] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -96,7 +96,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=153.552632] + │ ├── stats: [rows=60.7847521] │ ├── key: (1) │ └── scan t@m │ ├── columns: k:1(int!null) g_inverted_key:7(geometry!null) @@ -107,11 +107,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── flags: force-index=m - │ ├── stats: [rows=153.552632, distinct(1)=43.8721804, null(1)=0, distinct(3)=1, null(3)=0, distinct(7)=3, null(7)=0, distinct(3,7)=3, null(3,7)=0] - │ │ histogram(3)= 0 100 + │ ├── stats: [rows=60.7847521, distinct(1)=17.367072, null(1)=0, distinct(3)=1, null(3)=0, distinct(7)=1.18756842, null(7)=0, distinct(3,7)=1.18756842, null(3,7)=0] + │ │ histogram(3)= 0 60.785 │ │ <--- 'banana' - │ │ histogram(7)= 0 0 25.592 0 25.592 51.184 25.592 0 0 0 25.592 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(7)= 0 0 9.3283e-11 51.184 9.6005 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(7) └── filters @@ -135,7 +135,7 @@ project ├── fd: ()-->(3), (1)-->(2) ├── index-join t │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=153.552632] + │ ├── stats: [rows=60.7847521] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -148,7 +148,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=153.552632] + │ ├── stats: [rows=60.7847521] │ ├── key: (1) │ └── scan t@p,partial │ ├── columns: k:1(int!null) g_inverted_key:8(geometry!null) @@ -158,11 +158,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── flags: force-index=p - │ ├── stats: [rows=153.552632, distinct(1)=43.8721804, null(1)=0, distinct(3)=1, null(3)=0, distinct(8)=3, null(8)=0, distinct(3,8)=3, null(3,8)=0] - │ │ histogram(3)= 0 100 + │ ├── stats: [rows=60.7847521, distinct(1)=17.367072, null(1)=0, distinct(3)=1, null(3)=0, distinct(8)=1.18756842, null(8)=0, distinct(3,8)=1.18756842, null(3,8)=0] + │ │ histogram(3)= 0 60.785 │ │ <--- 'banana' - │ │ histogram(8)= 0 0 25.592 0 25.592 51.184 25.592 0 0 0 25.592 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(8)= 0 0 9.3283e-11 51.184 9.6005 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(8) └── filters @@ -198,7 +198,7 @@ project ├── fd: (1)-->(2,3) ├── index-join t │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -211,7 +211,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ └── scan t@m │ ├── columns: k:1(int!null) g_inverted_key:7(geometry!null) @@ -225,11 +225,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── flags: force-index=m - │ ├── stats: [rows=307.105263, distinct(1)=87.7443609, null(1)=0, distinct(3)=2, null(3)=0, distinct(7)=3, null(7)=0, distinct(3,7)=6, null(3,7)=0] - │ │ histogram(3)= 0 100 0 100 + │ ├── stats: [rows=121.569504, distinct(1)=34.7341441, null(1)=0, distinct(3)=2, null(3)=0, distinct(7)=1.18756842, null(7)=0, distinct(3,7)=2.37513684, null(3,7)=0] + │ │ histogram(3)= 0 60.785 0 60.785 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(7)= 0 0 51.184 0 51.184 102.37 51.184 0 0 0 51.184 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(7)= 0 0 1.8657e-10 102.37 19.201 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(7) └── filters @@ -253,7 +253,7 @@ project ├── fd: (1)-->(2,3) ├── index-join t │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -266,7 +266,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ └── scan t@p,partial │ ├── columns: k:1(int!null) g_inverted_key:9(geometry!null) @@ -276,11 +276,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── flags: force-index=p - │ ├── stats: [rows=307.105263, distinct(1)=87.7443609, null(1)=0, distinct(3)=2, null(3)=0, distinct(9)=3, null(9)=0, distinct(3,9)=6, null(3,9)=0] - │ │ histogram(3)= 0 100 0 100 + │ ├── stats: [rows=121.569504, distinct(1)=34.7341441, null(1)=0, distinct(3)=2, null(3)=0, distinct(9)=1.18756842, null(9)=0, distinct(3,9)=2.37513684, null(3,9)=0] + │ │ histogram(3)= 0 60.785 0 60.785 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(9)= 0 0 51.184 0 51.184 102.37 51.184 0 0 0 51.184 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(9)= 0 0 1.8657e-10 102.37 19.201 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(9) └── filters @@ -316,7 +316,7 @@ project ├── fd: ()-->(4), (1)-->(2,3) ├── index-join t │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) i:4(int) - │ ├── stats: [rows=24.0813118] + │ ├── stats: [rows=9.53273514] │ ├── key: (1) │ ├── fd: (1)-->(2-4) │ └── inverted-filter @@ -329,7 +329,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=24.0813118] + │ ├── stats: [rows=9.53273514] │ ├── key: (1) │ └── scan t@mp,partial │ ├── columns: k:1(int!null) g_inverted_key:10(geometry!null) @@ -340,13 +340,13 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── flags: force-index=mp - │ ├── stats: [rows=24.0813118, distinct(1)=6.88037479, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=1, null(4)=0, distinct(10)=3, null(10)=0, distinct(3,4,10)=6, null(3,4,10)=0] - │ │ histogram(3)= 0 12.041 0 12.041 + │ ├── stats: [rows=9.53273514, distinct(1)=2.72363861, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=1, null(4)=0, distinct(10)=1.18756842, null(10)=0, distinct(3,4,10)=2.37513684, null(3,4,10)=0] + │ │ histogram(3)= 0 4.7664 0 4.7664 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(4)= 0 24.081 + │ │ histogram(4)= 0 9.5327 │ │ <--- 400 - - │ │ histogram(10)= 0 0 4.0136 0 4.0136 8.0271 4.0136 0 0 0 4.0136 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(10)= 0 0 1.4629e-11 8.0271 1.5056 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(10) └── filters @@ -372,7 +372,7 @@ project ├── fd: (1)-->(2-4) ├── index-join t │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) i:4(int) - │ ├── stats: [rows=48.1626236] + │ ├── stats: [rows=19.0654703] │ ├── key: (1) │ ├── fd: (1)-->(2-4) │ └── inverted-filter @@ -385,7 +385,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=48.1626236] + │ ├── stats: [rows=19.0654703] │ ├── key: (1) │ └── scan t@mp,partial │ ├── columns: k:1(int!null) g_inverted_key:10(geometry!null) @@ -399,13 +399,13 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── flags: force-index=mp - │ ├── stats: [rows=48.1626236, distinct(1)=13.7607496, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=3, null(4)=0, distinct(10)=3, null(10)=0, distinct(3,4,10)=18, null(3,4,10)=0] - │ │ histogram(3)= 0 24.081 0 24.081 + │ ├── stats: [rows=19.0654703, distinct(1)=5.44727722, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=3, null(4)=0, distinct(10)=1.18756842, null(10)=0, distinct(3,4,10)=7.12541053, null(3,4,10)=0] + │ │ histogram(3)= 0 9.5327 0 9.5327 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(4)= 0 8.0271 0 16.054 0 24.081 + │ │ histogram(4)= 0 3.1776 0 6.3552 0 9.5327 │ │ <--- 200 ---- 300 ---- 400 - - │ │ histogram(10)= 0 0 8.0271 0 8.0271 16.054 8.0271 0 0 0 8.0271 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(10)= 0 0 2.9259e-11 16.054 3.0113 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(10) └── filters diff --git a/pkg/sql/opt/memo/testdata/stats/partial-index-scan b/pkg/sql/opt/memo/testdata/stats/partial-index-scan index b4d2f41299e0..ca9f6b828a9a 100644 --- a/pkg/sql/opt/memo/testdata/stats/partial-index-scan +++ b/pkg/sql/opt/memo/testdata/stats/partial-index-scan @@ -1036,11 +1036,11 @@ project ├── inverted constraint: /7/1 │ └── spans: ["7g\x00\x01*\x0e\x00", "7g\x00\x01*\x0e\x00"] ├── flags: force-index=partial - ├── stats: [rows=184.108911, distinct(4)=2, null(4)=0, distinct(7)=50.5, null(7)=0, distinct(4,7)=101, null(4,7)=0] - │ histogram(4)= 0 92.054 0 92.054 + ├── stats: [rows=100, distinct(4)=2, null(4)=0, distinct(7)=1, null(7)=0, distinct(4,7)=2, null(4,7)=0] + │ histogram(4)= 0 50 0 50 │ <--- 'banana' --- 'cherry' - │ histogram(7)= 0 73.644 110.47 0 - │ <--- '\x376700012a0e00' -------- '\x376700012a0e01' + │ histogram(7)= 0 100 5.9476e-16 0 + │ <--- '\x376700012a0e00' ------------ '\x376700012a0e01' └── key: (1) opt @@ -1059,11 +1059,11 @@ index-join inv_hist ├── inverted constraint: /7/1 │ └── spans: ["7g\x00\x01*\x0e\x00", "7g\x00\x01*\x0e\x00"] ├── flags: force-index=partial - ├── stats: [rows=184.108911, distinct(4)=2, null(4)=0, distinct(7)=50.5, null(7)=0, distinct(4,7)=101, null(4,7)=0] - │ histogram(4)= 0 92.054 0 92.054 + ├── stats: [rows=100, distinct(4)=2, null(4)=0, distinct(7)=1, null(7)=0, distinct(4,7)=2, null(4,7)=0] + │ histogram(4)= 0 50 0 50 │ <--- 'banana' --- 'cherry' - │ histogram(7)= 0 73.644 110.47 0 - │ <--- '\x376700012a0e00' -------- '\x376700012a0e01' + │ histogram(7)= 0 100 5.9476e-16 0 + │ <--- '\x376700012a0e00' ------------ '\x376700012a0e01' └── key: (1) opt @@ -1084,7 +1084,7 @@ project ├── fd: ()-->(4), (1)-->(3) ├── index-join inv_hist │ ├── columns: k:1(int!null) j:3(jsonb) s:4(string) - │ ├── stats: [rows=184.108911] + │ ├── stats: [rows=100] │ ├── key: (1) │ ├── fd: (1)-->(3,4) │ └── scan inv_hist@partial,partial @@ -1092,11 +1092,11 @@ project │ ├── inverted constraint: /7/1 │ │ └── spans: ["7g\x00\x01*\x0e\x00", "7g\x00\x01*\x0e\x00"] │ ├── flags: force-index=partial - │ ├── stats: [rows=184.108911, distinct(4)=2, null(4)=0, distinct(7)=50.5, null(7)=0, distinct(4,7)=101, null(4,7)=0] - │ │ histogram(4)= 0 92.054 0 92.054 + │ ├── stats: [rows=100, distinct(4)=2, null(4)=0, distinct(7)=1, null(7)=0, distinct(4,7)=2, null(4,7)=0] + │ │ histogram(4)= 0 50 0 50 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(7)= 0 73.644 110.47 0 - │ │ <--- '\x376700012a0e00' -------- '\x376700012a0e01' + │ │ histogram(7)= 0 100 5.9476e-16 0 + │ │ <--- '\x376700012a0e00' ------------ '\x376700012a0e01' │ └── key: (1) └── filters └── s:4 = 'banana' [type=bool, outer=(4), constraints=(/4: [/'banana' - /'banana']; tight), fd=()-->(4)] @@ -1114,7 +1114,7 @@ select ├── fd: ()-->(4), (1)-->(2,3) ├── index-join inv_hist │ ├── columns: k:1(int!null) i:2(int) j:3(jsonb) s:4(string) - │ ├── stats: [rows=184.108911] + │ ├── stats: [rows=100] │ ├── key: (1) │ ├── fd: (1)-->(2-4) │ └── scan inv_hist@partial,partial @@ -1122,11 +1122,11 @@ select │ ├── inverted constraint: /7/1 │ │ └── spans: ["7g\x00\x01*\x0e\x00", "7g\x00\x01*\x0e\x00"] │ ├── flags: force-index=partial - │ ├── stats: [rows=184.108911, distinct(4)=2, null(4)=0, distinct(7)=50.5, null(7)=0, distinct(4,7)=101, null(4,7)=0] - │ │ histogram(4)= 0 92.054 0 92.054 + │ ├── stats: [rows=100, distinct(4)=2, null(4)=0, distinct(7)=1, null(7)=0, distinct(4,7)=2, null(4,7)=0] + │ │ histogram(4)= 0 50 0 50 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(7)= 0 73.644 110.47 0 - │ │ <--- '\x376700012a0e00' -------- '\x376700012a0e01' + │ │ histogram(7)= 0 100 5.9476e-16 0 + │ │ <--- '\x376700012a0e00' ------------ '\x376700012a0e01' │ └── key: (1) └── filters └── s:4 = 'banana' [type=bool, outer=(4), constraints=(/4: [/'banana' - /'banana']; tight), fd=()-->(4)] @@ -1146,7 +1146,7 @@ select ├── fd: (1)-->(2-4) ├── index-join inv_hist │ ├── columns: k:1(int!null) i:2(int) j:3(jsonb) s:4(string) - │ ├── stats: [rows=184.108911] + │ ├── stats: [rows=100] │ ├── key: (1) │ ├── fd: (1)-->(2-4) │ └── scan inv_hist@partial,partial @@ -1154,11 +1154,11 @@ select │ ├── inverted constraint: /7/1 │ │ └── spans: ["7g\x00\x01*\x0e\x00", "7g\x00\x01*\x0e\x00"] │ ├── flags: force-index=partial - │ ├── stats: [rows=184.108911, distinct(4)=2, null(4)=0, distinct(7)=50.5, null(7)=0, distinct(4,7)=101, null(4,7)=0] - │ │ histogram(4)= 0 92.054 0 92.054 + │ ├── stats: [rows=100, distinct(4)=2, null(4)=0, distinct(7)=1, null(7)=0, distinct(4,7)=2, null(4,7)=0] + │ │ histogram(4)= 0 50 0 50 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(7)= 0 73.644 110.47 0 - │ │ <--- '\x376700012a0e00' -------- '\x376700012a0e01' + │ │ histogram(7)= 0 100 5.9476e-16 0 + │ │ <--- '\x376700012a0e00' ------------ '\x376700012a0e01' │ └── key: (1) └── filters └── (i:2 > 0) AND (i:2 <= 100) [type=bool, outer=(2), constraints=(/2: [/1 - /100]; tight)] @@ -1457,7 +1457,7 @@ project ├── fd: (1)-->(2,3) ├── index-join spatial │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=300] + │ ├── stats: [rows=118.756842] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -1470,7 +1470,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=300] + │ ├── stats: [rows=118.756842] │ ├── key: (1) │ └── scan spatial@p,partial │ ├── columns: k:1(int!null) g_inverted_key:7(geometry!null) @@ -1479,11 +1479,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x10\x00\x00\x00\x00\x00\x00\x00"] │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] - │ ├── stats: [rows=300, distinct(1)=85.7142857, null(1)=0, distinct(3)=2, null(3)=0, distinct(7)=3, null(7)=0, distinct(3,7)=6, null(3,7)=0] - │ │ histogram(3)= 0 100 0 100 + │ ├── stats: [rows=118.756842, distinct(1)=33.9305263, null(1)=0, distinct(3)=2, null(3)=0, distinct(7)=1.18756842, null(7)=0, distinct(3,7)=2.37513684, null(3,7)=0] + │ │ histogram(3)= 0 59.378 0 59.378 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(7)= 0 0 50 0 50 100 50 0 0 0 50 0 - │ │ <--- '\x42fd1000000000000000' ---- '\x42fd1000000000000001' ---- '\x42fd1000000100000000' ---- '\x42fd1200000000000000' --- '\x42fd1400000000000000' ---- '\x42fd1400000000000001' + │ │ histogram(7)= 0 0 1.8225e-10 100 18.757 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(7) └── filters @@ -1509,7 +1509,7 @@ project ├── fd: ()-->(3), (1)-->(2) ├── index-join spatial │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=300] + │ ├── stats: [rows=118.756842] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -1522,7 +1522,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=300] + │ ├── stats: [rows=118.756842] │ ├── key: (1) │ └── scan spatial@p,partial │ ├── columns: k:1(int!null) g_inverted_key:7(geometry!null) @@ -1531,11 +1531,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x10\x00\x00\x00\x00\x00\x00\x00"] │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] - │ ├── stats: [rows=300, distinct(1)=85.7142857, null(1)=0, distinct(3)=2, null(3)=0, distinct(7)=3, null(7)=0, distinct(3,7)=6, null(3,7)=0] - │ │ histogram(3)= 0 100 0 100 + │ ├── stats: [rows=118.756842, distinct(1)=33.9305263, null(1)=0, distinct(3)=2, null(3)=0, distinct(7)=1.18756842, null(7)=0, distinct(3,7)=2.37513684, null(3,7)=0] + │ │ histogram(3)= 0 59.378 0 59.378 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(7)= 0 0 50 0 50 100 50 0 0 0 50 0 - │ │ <--- '\x42fd1000000000000000' ---- '\x42fd1000000000000001' ---- '\x42fd1000000100000000' ---- '\x42fd1200000000000000' --- '\x42fd1400000000000000' ---- '\x42fd1400000000000001' + │ │ histogram(7)= 0 0 1.8225e-10 100 18.757 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(7) └── filters @@ -1595,7 +1595,7 @@ project ├── fd: (1)-->(2,3) ├── index-join spatial │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -1608,7 +1608,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ └── scan spatial@p,partial │ ├── columns: k:1(int!null) g_inverted_key:7(geometry!null) @@ -1617,11 +1617,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x10\x00\x00\x00\x00\x00\x00\x00"] │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] - │ ├── stats: [rows=307.105263, distinct(1)=87.7443609, null(1)=0, distinct(3)=2, null(3)=0, distinct(7)=3, null(7)=0, distinct(3,7)=6, null(3,7)=0] - │ │ histogram(3)= 0 100 0 100 + │ ├── stats: [rows=121.569504, distinct(1)=34.7341441, null(1)=0, distinct(3)=2, null(3)=0, distinct(7)=1.18756842, null(7)=0, distinct(3,7)=2.37513684, null(3,7)=0] + │ │ histogram(3)= 0 60.785 0 60.785 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(7)= 0 0 51.184 0 51.184 102.37 51.184 0 0 0 51.184 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(7)= 0 0 1.8657e-10 102.37 19.201 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(7) └── filters @@ -1645,7 +1645,7 @@ project ├── fd: ()-->(3), (1)-->(2) ├── index-join spatial │ ├── columns: k:1(int!null) g:2(geometry) s:3(string) - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ ├── fd: (1)-->(2,3) │ └── inverted-filter @@ -1658,7 +1658,7 @@ project │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] │ ├── pre-filterer expression │ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool] - │ ├── stats: [rows=307.105263] + │ ├── stats: [rows=121.569504] │ ├── key: (1) │ └── scan spatial@p,partial │ ├── columns: k:1(int!null) g_inverted_key:7(geometry!null) @@ -1667,11 +1667,11 @@ project │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x10\x00\x00\x00\x00\x00\x00\x00"] │ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00") │ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"] - │ ├── stats: [rows=307.105263, distinct(1)=87.7443609, null(1)=0, distinct(3)=2, null(3)=0, distinct(7)=3, null(7)=0, distinct(3,7)=6, null(3,7)=0] - │ │ histogram(3)= 0 100 0 100 + │ ├── stats: [rows=121.569504, distinct(1)=34.7341441, null(1)=0, distinct(3)=2, null(3)=0, distinct(7)=1.18756842, null(7)=0, distinct(3,7)=2.37513684, null(3,7)=0] + │ │ histogram(3)= 0 60.785 0 60.785 │ │ <--- 'banana' --- 'cherry' - │ │ histogram(7)= 0 0 51.184 0 51.184 102.37 51.184 0 0 0 51.184 0 - │ │ <--- '\x42fd1000000000000000' -------- '\x42fd1000000000000001' -------- '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000000' -------- '\x42fd1400000000000001' + │ │ histogram(7)= 0 0 1.8657e-10 102.37 19.201 0 0 0 + │ │ <--- '\x42fd1000000000000001' ------------ '\x42fd1000000100000000' -------- '\x42fd1200000000000000' --- '\x42fd1400000000000001' │ ├── key: (1) │ └── fd: (1)-->(7) └── filters diff --git a/pkg/sql/opt/props/histogram.go b/pkg/sql/opt/props/histogram.go index 80a4704df2a8..7d5ceb86ab76 100644 --- a/pkg/sql/opt/props/histogram.go +++ b/pkg/sql/opt/props/histogram.go @@ -663,24 +663,24 @@ func getFilteredBucket( } } -// getRangesBeforeAndAfter returns the size of the ranges before and after the -// given bucket is filtered by the given span. If swap is true, the upper and -// lower bounds should be swapped for the bucket and the span. Returns ok=true -// if these range sizes are calculated successfully, and false otherwise. -// The calculations for rangeBefore and rangeAfter are datatype dependent. +// getRangesBeforeAndAfter returns the size of the before and after ranges based +// on the lower and upper bounds provided. If swap is true, the upper and lower +// bounds of both ranges are swapped. Returns ok=true if these range sizes are +// calculated successfully, and false otherwise. The calculations for +// rangeBefore and rangeAfter are datatype dependent. // -// For numeric types, we can simply find the difference between the bucket/span -// bounds for rangeBefore/rangeAfter. +// For numeric types, we can simply find the difference between the lower and +// upper bounds for rangeBefore/rangeAfter. // -// For non-numeric types, we can convert each bound into sorted key bytes -// (CRDB key representation) to find their range. As we do need a lot of -// precision in our range estimate, we can remove the common prefix between -// bucket/span bounds, and limit the byte array to 8 bytes. This also simplifies -// our implementation since we won't need to handle an arbitrary length of -// bounds. Following the conversion, we must zero extend the byte arrays to -// ensure the length is uniform between bucket/span bounds. This process is -// highlighted below, where [\bear - \bobcat] represents the original bucket and -// [\bluejay - \boar] represents the span. +// For non-numeric types, we can convert each bound into sorted key bytes (CRDB +// key representation) to find their range. As we do need a lot of precision in +// our range estimate, we can remove the common prefix between the lower and +// upper bounds, and limit the byte array to 8 bytes. This also simplifies our +// implementation since we won't need to handle an arbitrary length of bounds. +// Following the conversion, we must zero extend the byte arrays to ensure the +// length is uniform between lower and upper bounds. This process is highlighted +// below, where [\bear - \bobcat] represents the before range and +// [\bluejay - \boar] represents the after range. // // bear := [18 98 101 97 114 0 1 ] // => [101 97 114 0 0 0 0 0 ] @@ -695,7 +695,7 @@ func getFilteredBucket( // => [111 98 99 97 116 0 0 0 ] // // We can now find the range before/after by finding the difference between -// the bucket/span bounds: +// the lower and upper bounds: // // rangeBefore := [111 98 99 97 116 0 1 0] - // [101 97 114 0 1 0 0 0] @@ -713,96 +713,113 @@ func getFilteredBucket( // := 210,557,119,328,878,600 // func getRangesBeforeAndAfter( - bucketLowerBound, bucketUpperBound, spanLowerBound, spanUpperBound tree.Datum, swap bool, + beforeLowerBound, beforeUpperBound, afterLowerBound, afterUpperBound tree.Datum, swap bool, ) (rangeBefore, rangeAfter float64, ok bool) { // If the data types don't match, don't bother trying to calculate the range // sizes. This should almost never happen, but we want to avoid type // assertion errors below. typesMatch := - bucketLowerBound.ResolvedType().Equivalent(bucketUpperBound.ResolvedType()) && - bucketUpperBound.ResolvedType().Equivalent(spanLowerBound.ResolvedType()) && - spanLowerBound.ResolvedType().Equivalent(spanUpperBound.ResolvedType()) + beforeLowerBound.ResolvedType().Equivalent(beforeUpperBound.ResolvedType()) && + beforeUpperBound.ResolvedType().Equivalent(afterLowerBound.ResolvedType()) && + afterLowerBound.ResolvedType().Equivalent(afterUpperBound.ResolvedType()) if !typesMatch { return 0, 0, false } if swap { - bucketLowerBound, bucketUpperBound = bucketUpperBound, bucketLowerBound - spanLowerBound, spanUpperBound = spanUpperBound, spanLowerBound + beforeLowerBound, beforeUpperBound = beforeUpperBound, beforeLowerBound + afterLowerBound, afterUpperBound = afterUpperBound, afterLowerBound } + // The calculations below assume that all bounds are inclusive. // TODO(rytaft): handle more types here. - // Note: the calculations below assume that bucketLowerBound is inclusive and - // Span.PreferInclusive() has been called on the span. - - getRange := func(lowerBound, upperBound tree.Datum) (rng float64, ok bool) { - switch lowerBound.ResolvedType().Family() { - case types.IntFamily: - rng = float64(*upperBound.(*tree.DInt)) - float64(*lowerBound.(*tree.DInt)) - return rng, true - - case types.DateFamily: - lower := lowerBound.(*tree.DDate) - upper := upperBound.(*tree.DDate) - if lower.IsFinite() && upper.IsFinite() { - rng = float64(upper.PGEpochDays()) - float64(lower.PGEpochDays()) - return rng, true - } - return 0, false - - case types.DecimalFamily: - lower, err := lowerBound.(*tree.DDecimal).Float64() - if err != nil { - return 0, false - } - upper, err := upperBound.(*tree.DDecimal).Float64() - if err != nil { - return 0, false - } - rng = upper - lower - return rng, true - - case types.FloatFamily: - rng = float64(*upperBound.(*tree.DFloat)) - float64(*lowerBound.(*tree.DFloat)) - return rng, true - - case types.TimestampFamily: - lower := lowerBound.(*tree.DTimestamp).Time - upper := upperBound.(*tree.DTimestamp).Time - rng = float64(upper.Sub(lower)) - return rng, true - - case types.TimestampTZFamily: - lower := lowerBound.(*tree.DTimestampTZ).Time - upper := upperBound.(*tree.DTimestampTZ).Time - rng = float64(upper.Sub(lower)) - return rng, true - - case types.TimeFamily: - lower := lowerBound.(*tree.DTime) - upper := upperBound.(*tree.DTime) - rng = float64(*upper) - float64(*lower) - return rng, true - - case types.TimeTZFamily: - lower := lowerBound.(*tree.DTimeTZ).TimeOfDay - upper := upperBound.(*tree.DTimeTZ).TimeOfDay - rng = float64(upper) - float64(lower) - return rng, true + switch beforeLowerBound.ResolvedType().Family() { + case types.IntFamily: + rangeBefore = float64(*beforeUpperBound.(*tree.DInt)) - float64(*beforeLowerBound.(*tree.DInt)) + rangeAfter = float64(*afterUpperBound.(*tree.DInt)) - float64(*afterLowerBound.(*tree.DInt)) + return rangeBefore, rangeAfter, true - default: - return 0, false + case types.DateFamily: + lowerBefore := beforeLowerBound.(*tree.DDate) + upperBefore := beforeUpperBound.(*tree.DDate) + lowerAfter := afterLowerBound.(*tree.DDate) + upperAfter := afterUpperBound.(*tree.DDate) + if lowerBefore.IsFinite() && upperBefore.IsFinite() && lowerAfter.IsFinite() && upperAfter.IsFinite() { + rangeBefore = float64(upperBefore.PGEpochDays()) - float64(lowerBefore.PGEpochDays()) + rangeAfter = float64(upperAfter.PGEpochDays()) - float64(lowerAfter.PGEpochDays()) + return rangeBefore, rangeAfter, true } - } - - getRangeNonNumeric := func( - lowerBoundBefore, upperBoundBefore, lowerBoundAfter, upperBoundAfter tree.Datum, - ) (rngBefore, rngAfter float64, ok bool) { + return 0, 0, false - // Utilizes an array to simplify number of repetitive calls. - boundArr := []tree.Datum{lowerBoundBefore, upperBoundBefore, lowerBoundAfter, - upperBoundAfter} - boundArrByte := make([][]byte, 4) + case types.DecimalFamily: + lowerBefore, err := beforeLowerBound.(*tree.DDecimal).Float64() + if err != nil { + return 0, 0, false + } + upperBefore, err := beforeUpperBound.(*tree.DDecimal).Float64() + if err != nil { + return 0, 0, false + } + lowerAfter, err := afterLowerBound.(*tree.DDecimal).Float64() + if err != nil { + return 0, 0, false + } + upperAfter, err := afterUpperBound.(*tree.DDecimal).Float64() + if err != nil { + return 0, 0, false + } + rangeBefore = upperBefore - lowerBefore + rangeAfter = upperAfter - lowerAfter + return rangeBefore, rangeAfter, true + + case types.FloatFamily: + rangeBefore = float64(*beforeUpperBound.(*tree.DFloat)) - float64(*beforeLowerBound.(*tree.DFloat)) + rangeAfter = float64(*afterUpperBound.(*tree.DFloat)) - float64(*afterLowerBound.(*tree.DFloat)) + return rangeBefore, rangeAfter, true + + case types.TimestampFamily: + lowerBefore := beforeLowerBound.(*tree.DTimestamp).Time + upperBefore := beforeUpperBound.(*tree.DTimestamp).Time + lowerAfter := afterLowerBound.(*tree.DTimestamp).Time + upperAfter := afterUpperBound.(*tree.DTimestamp).Time + rangeBefore = float64(upperBefore.Sub(lowerBefore)) + rangeAfter = float64(upperAfter.Sub(lowerAfter)) + return rangeBefore, rangeAfter, true + + case types.TimestampTZFamily: + lowerBefore := beforeLowerBound.(*tree.DTimestampTZ).Time + upperBefore := beforeUpperBound.(*tree.DTimestampTZ).Time + lowerAfter := afterLowerBound.(*tree.DTimestampTZ).Time + upperAfter := afterUpperBound.(*tree.DTimestampTZ).Time + rangeBefore = float64(upperBefore.Sub(lowerBefore)) + rangeAfter = float64(upperAfter.Sub(lowerAfter)) + return rangeBefore, rangeAfter, true + + case types.TimeFamily: + lowerBefore := beforeLowerBound.(*tree.DTime) + upperBefore := beforeUpperBound.(*tree.DTime) + lowerAfter := afterLowerBound.(*tree.DTime) + upperAfter := afterUpperBound.(*tree.DTime) + rangeBefore = float64(*upperBefore) - float64(*lowerBefore) + rangeAfter = float64(*upperAfter) - float64(*lowerAfter) + return rangeBefore, rangeAfter, true + + case types.TimeTZFamily: + lowerBefore := beforeLowerBound.(*tree.DTimeTZ).TimeOfDay + upperBefore := beforeUpperBound.(*tree.DTimeTZ).TimeOfDay + lowerAfter := afterLowerBound.(*tree.DTimeTZ).TimeOfDay + upperAfter := afterUpperBound.(*tree.DTimeTZ).TimeOfDay + rangeBefore = float64(upperBefore) - float64(lowerBefore) + rangeAfter = float64(upperAfter) - float64(lowerAfter) + return rangeBefore, rangeAfter, true + + case types.StringFamily, types.BytesFamily, types.UuidFamily, types.INetFamily: + // For non-numeric types, convert the datums to encoded keys to + // approximate the range. We utilize an array to reduce repetitive code. + boundArr := [4]tree.Datum{ + beforeLowerBound, beforeUpperBound, afterLowerBound, afterUpperBound, + } + var boundArrByte [4][]byte for i := range boundArr { var err error @@ -820,27 +837,17 @@ func getRangesBeforeAndAfter( boundArrByte[i] = getFixedLenArr(boundArrByte[i], ind, 8 /* fixLen */) } - rngBefore = float64(binary.BigEndian.Uint64(boundArrByte[1]) - + rangeBefore = float64(binary.BigEndian.Uint64(boundArrByte[1]) - binary.BigEndian.Uint64(boundArrByte[0])) - rngAfter = float64(binary.BigEndian.Uint64(boundArrByte[3]) - + rangeAfter = float64(binary.BigEndian.Uint64(boundArrByte[3]) - binary.BigEndian.Uint64(boundArrByte[2])) - return rngBefore, rngAfter, true - } + return rangeBefore, rangeAfter, true - // For non-numeric types, compute the prefix across all bucket/span bounds. - ok = false - if isNonNumeric(bucketLowerBound.ResolvedType()) { - rangeBefore, rangeAfter, ok = getRangeNonNumeric( - bucketLowerBound, bucketUpperBound, spanLowerBound, spanUpperBound, - ) - } else { - okBefore, okAfter := false, false - rangeBefore, okBefore = getRange(bucketLowerBound, bucketUpperBound) - rangeAfter, okAfter = getRange(spanLowerBound, spanUpperBound) - ok = okBefore && okAfter + default: + // Range calculations are not supported for the given type family. + return 0, 0, false } - return rangeBefore, rangeAfter, ok } // isDiscrete returns true if the given data type is discrete. @@ -852,26 +859,10 @@ func isDiscrete(typ *types.T) bool { return false } -// isNonNumeric returns true if the given data type is non-numeric. -// Note: this function does not support all non-numeric data-types within -// cockroach db. -func isNonNumeric(typ *types.T) bool { - switch typ.Family() { - case types.StringFamily, types.UuidFamily, types.INetFamily: - return true - } - return false -} - // getCommonPrefix returns the first index where the value at said index differs // across all byte arrays in byteArr. byteArr must contain at least one element // to compute a common prefix. -func getCommonPrefix(byteArr [][]byte) int { - - if len(byteArr) <= 0 { - panic(errors.AssertionFailedf("byteArr must have at least one element")) - } - +func getCommonPrefix(byteArr [4][]byte) int { // Checks if the current value at index is the same between all byte arrays. currIndMatching := func(ind int) bool { for i := 0; i < len(byteArr); i++ { diff --git a/pkg/sql/opt/props/histogram_test.go b/pkg/sql/opt/props/histogram_test.go index eca72b201a3b..583d7e66ba59 100644 --- a/pkg/sql/opt/props/histogram_test.go +++ b/pkg/sql/opt/props/histogram_test.go @@ -644,67 +644,82 @@ func TestFilterBucket(t *testing.T) { runTest(h, testData, types.TimeTZFamily) }) - t.Run("string", func(t *testing.T) { - h1 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ - {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(tree.NewDString("bear"))}, - {NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDString("bobcat")}, - }} - h2 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ - {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(tree.NewDString("a"))}, - {NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDString("c")}, - }} - h3 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ - {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(tree.NewDString("aaaaaaaaaaaa"))}, - {NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: tree.NewDString("cccccccccccc")}, - }} + t.Run("string-bytes", func(t *testing.T) { + typesToTest := []struct { + family types.Family + createDatumFn func(string) tree.Datum + }{ + { + family: types.StringFamily, + createDatumFn: func(s string) tree.Datum { return tree.NewDString(s) }, + }, + { + family: types.BytesFamily, + createDatumFn: func(s string) tree.Datum { return tree.NewDBytes(tree.DBytes(s)) }, + }, + } + for _, typ := range typesToTest { + h1 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ + {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(typ.createDatumFn("bear"))}, + {NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: typ.createDatumFn("bobcat")}, + }} + h2 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ + {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(typ.createDatumFn("a"))}, + {NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: typ.createDatumFn("c")}, + }} + h3 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ + {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(typ.createDatumFn("aaaaaaaaaaaa"))}, + {NumEq: 5, NumRange: 10, DistinctRange: 10, UpperBound: typ.createDatumFn("cccccccccccc")}, + }} + + t1 := []testCase{ + { + span: "[/bluejay - /boar]", + expected: &cat.HistogramBucket{NumEq: 0, NumRange: 2.92, DistinctRange: 2.92, UpperBound: typ.createDatumFn("boar")}, + }, + { + span: "[/beer - /bobcat]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 9.98, DistinctRange: 9.98, UpperBound: typ.createDatumFn("bobcat")}, + }, + } - t1 := []testCase{ - { - span: "[/bluejay - /boar]", - expected: &cat.HistogramBucket{NumEq: 0, NumRange: 2.92, DistinctRange: 2.92, UpperBound: tree.NewDString("boar")}, - }, - { - span: "[/beer - /bobcat]", - expected: &cat.HistogramBucket{NumEq: 5, NumRange: 9.98, DistinctRange: 9.98, UpperBound: tree.NewDString("bobcat")}, - }, - } + t2 := []testCase{ + // Within the CRDB encoding, all null bytes are followed by an escape byte, + // (255) which are left in for the rangeAfter calculations. For this + // reason, the resulting NumRange is slightly lower than expected at 4.99 + // instead of 5. + { + span: "[/a\x00 - /b]", + expected: &cat.HistogramBucket{NumEq: 0, NumRange: 4.99, DistinctRange: 4.99, UpperBound: typ.createDatumFn("b")}, + }, + { + span: "[/as - /b]", + expected: &cat.HistogramBucket{NumEq: 0, NumRange: 2.76, DistinctRange: 2.76, UpperBound: typ.createDatumFn("b")}, + }, + { + span: "[/as - /c]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 7.77, DistinctRange: 7.77, UpperBound: typ.createDatumFn("c")}, + }, + { + span: "[/bs - /c]", + expected: &cat.HistogramBucket{NumEq: 5, NumRange: 2.76, DistinctRange: 2.76, UpperBound: typ.createDatumFn("c")}, + }, + } - t2 := []testCase{ - // Within the CRDB encoding, all null bytes are followed by an escape byte, - // (255) which are left in for the rangeAfter calculations. For this - // reason, the resulting NumRange is slightly lower than expected at 4.99 - // instead of 5. - { - span: "[/a\x00 - /b]", - expected: &cat.HistogramBucket{NumEq: 0, NumRange: 4.99, DistinctRange: 4.99, UpperBound: tree.NewDString("b")}, - }, - { - span: "[/as - /b]", - expected: &cat.HistogramBucket{NumEq: 0, NumRange: 2.76, DistinctRange: 2.76, UpperBound: tree.NewDString("b")}, - }, - { - span: "[/as - /c]", - expected: &cat.HistogramBucket{NumEq: 5, NumRange: 7.77, DistinctRange: 7.77, UpperBound: tree.NewDString("c")}, - }, - { - span: "[/bs - /c]", - expected: &cat.HistogramBucket{NumEq: 5, NumRange: 2.76, DistinctRange: 2.76, UpperBound: tree.NewDString("c")}, - }, - } + // The initial 8 bytes for lowerBound and upperBound of the span is the same. + // Hence, the resulting NumRange/DistinctRange should be 0, as rangeAfter + // only considers the first 8 bytes of the bounds. + t3 := []testCase{ + { + span: "[/aaaaaaaabbbb - /aaaaaaaacccc]", + expected: &cat.HistogramBucket{NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: typ.createDatumFn("aaaaaaaacccc")}, + }, + } - // The initial 8 bytes for lowerBound and upperBound of the span is the same. - // Hence, the resulting NumRange/DistinctRange should be 0, as rangeAfter - // only considers the first 8 bytes of the bounds. - t3 := []testCase{ - { - span: "[/aaaaaaaabbbb - /aaaaaaaacccc]", - expected: &cat.HistogramBucket{NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDString("aaaaaaaacccc")}, - }, + runTest(h1, t1, typ.family) + runTest(h2, t2, typ.family) + runTest(h3, t3, typ.family) } - - runTest(h1, t1, types.StringFamily) - runTest(h2, t2, types.StringFamily) - runTest(h3, t3, types.StringFamily) }) t.Run("uuid", func(t *testing.T) {