-
Notifications
You must be signed in to change notification settings - Fork 3.9k
/
Copy pathselect_funcs.go
1653 lines (1493 loc) · 60.4 KB
/
select_funcs.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package xform
import (
"sort"
"github.com/cockroachdb/cockroach/pkg/sql/inverted"
"github.com/cockroachdb/cockroach/pkg/sql/opt"
"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
"github.com/cockroachdb/cockroach/pkg/sql/opt/constraint"
"github.com/cockroachdb/cockroach/pkg/sql/opt/invertedidx"
"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/errors"
)
// IsLocking returns true if the ScanPrivate is configured to use a row-level
// locking mode. This can be the case either because the Scan is in the scope of
// a SELECT .. FOR [KEY] UPDATE/SHARE clause or because the Scan was configured
// as part of the row retrieval of a DELETE or UPDATE statement.
func (c *CustomFuncs) IsLocking(scan *memo.ScanPrivate) bool {
return scan.IsLocking()
}
// GeneratePartialIndexScans generates unconstrained index scans over all
// non-inverted, partial indexes with predicates that are implied by the
// filters. Partial indexes with predicates which cannot be proven to be implied
// by the filters are disregarded.
//
// When a filter completely matches the predicate, the remaining filters are
// simplified so that they do not include the filter. A redundant filter is
// unnecessary to include in the remaining filters because a scan over the partial
// index implicitly filters the results.
//
// For every partial index that is implied by the filters, a Scan will be
// generated along with a combination of an IndexJoin and Selects. There are
// three questions to consider which determine which operators are generated.
//
// 1. Does the index "cover" the columns needed?
// 2. Are there any remaining filters to apply after the Scan?
// 3. If there are remaining filters does the index cover the referenced
// columns?
//
// If the index covers the columns needed, no IndexJoin is need. The two
// possible generated expressions are either a lone Scan or a Scan wrapped in a
// Select that applies any remaining filters.
//
// (Scan $scanDef)
//
// (Select (Scan $scanDef) $remainingFilters)
//
// If the index is not covering, then an IndexJoin is required to retrieve the
// needed columns. Some or all of the remaining filters may be required to be
// applied after the IndexJoin, because they reference columns not covered by
// the index. Therefore, Selects can be constructed before, after, or both
// before and after the IndexJoin depending on the columns referenced in the
// remaining filters.
//
// If the index is not covering, then an IndexJoin is required to retrieve the
// needed columns. Some of the remaining filters may be applied in a Select
// before the IndexJoin, if all the columns referenced in the filter are covered
// by the index. Some of the remaining filters may be applied in a Select after
// the IndexJoin, if their columns are not covered. Therefore, Selects can be
// constructed before, after, or both before and after the IndexJoin.
//
// (IndexJoin (Scan $scanDef) $indexJoinDef)
//
// (IndexJoin
// (Select (Scan $scanDef) $remainingFilters)
// $indexJoinDef
// )
//
// (Select
// (IndexJoin (Scan $scanDef) $indexJoinDef)
// $outerFilter
// )
//
// (Select
// (IndexJoin
// (Select (Scan $scanDef) $innerFilter)
// $indexJoinDef
// )
// $outerFilter
// )
//
func (c *CustomFuncs) GeneratePartialIndexScans(
grp memo.RelExpr, scanPrivate *memo.ScanPrivate, filters memo.FiltersExpr,
) {
// Iterate over all partial indexes.
var iter scanIndexIter
iter.Init(c.e.mem, &c.im, scanPrivate, filters, rejectNonPartialIndexes|rejectInvertedIndexes)
iter.ForEach(func(index cat.Index, remainingFilters memo.FiltersExpr, indexCols opt.ColSet, isCovering bool) {
var sb indexScanBuilder
sb.init(c, scanPrivate.Table)
newScanPrivate := *scanPrivate
newScanPrivate.Index = index.Ordinal()
// If index is covering, just add a Select with the remaining filters,
// if there are any.
if isCovering {
sb.setScan(&newScanPrivate)
sb.addSelect(remainingFilters)
sb.build(grp)
return
}
// If the index is not covering, scan the needed index columns plus
// primary key columns.
newScanPrivate.Cols = indexCols.Intersection(scanPrivate.Cols)
newScanPrivate.Cols.UnionWith(sb.primaryKeyCols())
sb.setScan(&newScanPrivate)
// Add a Select with any remaining filters that can be filtered before
// the IndexJoin. If there are no remaining filters this is a no-op. If
// all or parts of the remaining filters cannot be applied until after
// the IndexJoin, the new value of remainingFilters will contain those
// filters.
remainingFilters = sb.addSelectAfterSplit(remainingFilters, newScanPrivate.Cols)
// Add an IndexJoin to retrieve the columns not provided by the Scan.
sb.addIndexJoin(scanPrivate.Cols)
// Add a Select with any remaining filters.
sb.addSelect(remainingFilters)
sb.build(grp)
})
}
// GenerateConstrainedScans enumerates all non-inverted secondary indexes on the
// Scan operator's table and tries to push the given Select filter into new
// constrained Scan operators using those indexes. Since this only needs to be
// done once per table, GenerateConstrainedScans should only be called on the
// original unaltered primary index Scan operator (i.e. not constrained or
// limited).
//
// For each secondary index that "covers" the columns needed by the scan, there
// are three cases:
//
// - a filter that can be completely converted to a constraint over that index
// generates a single constrained Scan operator (to be added to the same
// group as the original Select operator):
//
// (Scan $scanDef)
//
// - a filter that can be partially converted to a constraint over that index
// generates a constrained Scan operator in a new memo group, wrapped in a
// Select operator having the remaining filter (to be added to the same group
// as the original Select operator):
//
// (Select (Scan $scanDef) $filter)
//
// - a filter that cannot be converted to a constraint generates nothing
//
// And for a secondary index that does not cover the needed columns:
//
// - a filter that can be completely converted to a constraint over that index
// generates a single constrained Scan operator in a new memo group, wrapped
// in an IndexJoin operator that looks up the remaining needed columns (and
// is added to the same group as the original Select operator)
//
// (IndexJoin (Scan $scanDef) $indexJoinDef)
//
// - a filter that can be partially converted to a constraint over that index
// generates a constrained Scan operator in a new memo group, wrapped in an
// IndexJoin operator that looks up the remaining needed columns; the
// remaining filter is distributed above and/or below the IndexJoin,
// depending on which columns it references:
//
// (IndexJoin
// (Select (Scan $scanDef) $filter)
// $indexJoinDef
// )
//
// (Select
// (IndexJoin (Scan $scanDef) $indexJoinDef)
// $filter
// )
//
// (Select
// (IndexJoin
// (Select (Scan $scanDef) $innerFilter)
// $indexJoinDef
// )
// $outerFilter
// )
//
// GenerateConstrainedScans will further constrain the enumerated index scans
// by trying to use the check constraints and computed columns that apply to the
// table being scanned, as well as the partitioning defined for the index. See
// comments above checkColumnFilters, computedColFilters, and
// partitionValuesFilters for more detail.
func (c *CustomFuncs) GenerateConstrainedScans(
grp memo.RelExpr, scanPrivate *memo.ScanPrivate, explicitFilters memo.FiltersExpr,
) {
var sb indexScanBuilder
sb.init(c, scanPrivate.Table)
// Generate implicit filters from constraints and computed columns as
// optional filters to help constrain an index scan.
optionalFilters := c.checkConstraintFilters(scanPrivate.Table)
computedColFilters := c.computedColFilters(scanPrivate, explicitFilters, optionalFilters)
optionalFilters = append(optionalFilters, computedColFilters...)
filterColumns := c.FilterOuterCols(explicitFilters)
filterColumns.UnionWith(c.FilterOuterCols(optionalFilters))
// Iterate over all non-inverted indexes.
md := c.e.mem.Metadata()
tabMeta := md.TableMeta(scanPrivate.Table)
var iter scanIndexIter
iter.Init(c.e.mem, &c.im, scanPrivate, explicitFilters, rejectInvertedIndexes)
iter.ForEach(func(index cat.Index, filters memo.FiltersExpr, indexCols opt.ColSet, isCovering bool) {
// We only consider the partition values when a particular index can otherwise
// not be constrained. For indexes that are constrained, the partitioned values
// add no benefit as they don't really constrain anything.
// Furthermore, if the filters don't take advantage of the index (use any of the
// index columns), using the partition values add no benefit.
//
// If the index is partitioned (by list), we generate two constraints and
// union them: the "main" constraint and the "in-between" constraint.The
// "main" constraint restricts the index to the known partition ranges. The
// "in-between" constraint restricts the index to the rest of the ranges
// (i.e. everything that falls in-between the main ranges); the in-between
// constraint is necessary for correctness (there can be rows outside of the
// partitioned ranges).
//
// For both constraints, the partition-related filters are passed as
// "optional" which guarantees that they return no remaining filters. This
// allows us to merge the remaining filters from both constraints.
//
// Consider the following index and its partition:
//
// CREATE INDEX orders_by_seq_num
// ON orders (region, seq_num, id)
// STORING (total)
// PARTITION BY LIST (region)
// (
// PARTITION us_east1 VALUES IN ('us-east1'),
// PARTITION us_west1 VALUES IN ('us-west1'),
// PARTITION europe_west2 VALUES IN ('europe-west2')
// )
//
// The constraint generated for the query:
// SELECT sum(total) FROM orders WHERE seq_num >= 100 AND seq_num < 200
// is:
// [/'europe-west2'/100 - /'europe-west2'/199]
// [/'us-east1'/100 - /'us-east1'/199]
// [/'us-west1'/100 - /'us-west1'/199]
//
// The spans before europe-west2, after us-west1 and in between the defined
// partitions are missing. We must add these spans now, appropriately
// constrained using the filters.
//
// It is important that we add these spans after the partition spans are generated
// because otherwise these spans would merge with the partition spans and would
// disallow the partition spans (and the in between ones) to be constrained further.
// Using the partitioning example and the query above, if we added the in between
// spans at the same time as the partitioned ones, we would end up with a span that
// looked like:
// [ - /'europe-west2'/99]
//
// Allowing the partition spans to be constrained further and then adding
// the spans give us a more constrained index scan as shown below:
// [ - /'europe-west2')
// [/'europe-west2'/100 - /'europe-west2'/199]
// [/e'europe-west2\x00'/100 - /'us-east1')
// [/'us-east1'/100 - /'us-east1'/199]
// [/e'us-east1\x00'/100 - /'us-west1')
// [/'us-west1'/100 - /'us-west1'/199]
// [/e'us-west1\x00'/100 - ]
//
// Notice how we 'skip' all the europe-west2 rows with seq_num < 100.
//
var partitionFilters, inBetweenFilters memo.FiltersExpr
indexColumns := tabMeta.IndexKeyColumns(index.Ordinal())
firstIndexCol := scanPrivate.Table.IndexColumnID(index, 0)
if !filterColumns.Contains(firstIndexCol) && indexColumns.Intersects(filterColumns) {
// Calculate any partition filters if appropriate (see below).
partitionFilters, inBetweenFilters = c.partitionValuesFilters(scanPrivate.Table, index)
}
// Check whether the filter (along with any partitioning filters) can constrain the index.
constraint, remainingFilters, ok := c.tryConstrainIndex(
filters,
append(optionalFilters, partitionFilters...),
scanPrivate.Table,
index.Ordinal(),
)
if !ok {
return
}
if len(partitionFilters) > 0 {
inBetweenConstraint, inBetweenRemainingFilters, ok := c.tryConstrainIndex(
filters,
append(optionalFilters, inBetweenFilters...),
scanPrivate.Table,
index.Ordinal(),
)
if !ok {
panic(errors.AssertionFailedf("in-between filters didn't yield a constraint"))
}
constraint.UnionWith(c.e.evalCtx, inBetweenConstraint)
// Even though the partitioned constraints and the inBetween constraints
// were consolidated, we must make sure their Union is as well.
constraint.ConsolidateSpans(c.e.evalCtx)
// Add all remaining filters that need to be present in the
// inBetween spans. Some of the remaining filters are common
// between them, so we must deduplicate them.
remainingFilters = c.ConcatFilters(remainingFilters, inBetweenRemainingFilters)
remainingFilters.Sort()
remainingFilters.Deduplicate()
}
// Construct new constrained ScanPrivate.
newScanPrivate := *scanPrivate
newScanPrivate.Index = index.Ordinal()
newScanPrivate.Constraint = constraint
// Record whether we were able to use partitions to constrain the scan.
newScanPrivate.PartitionConstrainedScan = (len(partitionFilters) > 0)
// If the alternate index includes the set of needed columns, then construct
// a new Scan operator using that index.
if isCovering {
sb.setScan(&newScanPrivate)
// If there are remaining filters, then the constrained Scan operator
// will be created in a new group, and a Select operator will be added
// to the same group as the original operator.
sb.addSelect(remainingFilters)
sb.build(grp)
return
}
// Otherwise, construct an IndexJoin operator that provides the columns
// missing from the index.
if scanPrivate.Flags.NoIndexJoin {
return
}
// Scan whatever columns we need which are available from the index, plus
// the PK columns.
newScanPrivate.Cols = indexCols.Intersection(scanPrivate.Cols)
newScanPrivate.Cols.UnionWith(sb.primaryKeyCols())
sb.setScan(&newScanPrivate)
// If remaining filter exists, split it into one part that can be pushed
// below the IndexJoin, and one part that needs to stay above.
remainingFilters = sb.addSelectAfterSplit(remainingFilters, newScanPrivate.Cols)
sb.addIndexJoin(scanPrivate.Cols)
sb.addSelect(remainingFilters)
sb.build(grp)
})
}
// tryFoldComputedCol tries to reduce the computed column with the given column
// ID into a constant value, by evaluating it with respect to a set of other
// columns that are constant. If the computed column is constant, enter it into
// the constCols map and return false. Otherwise, return false.
func (c *CustomFuncs) tryFoldComputedCol(
tabMeta *opt.TableMeta, computedColID opt.ColumnID, constCols map[opt.ColumnID]opt.ScalarExpr,
) bool {
// Check whether computed column has already been folded.
if _, ok := constCols[computedColID]; ok {
return true
}
var replace func(e opt.Expr) opt.Expr
replace = func(e opt.Expr) opt.Expr {
if variable, ok := e.(*memo.VariableExpr); ok {
// Can variable be folded?
if constVal, ok := constCols[variable.Col]; ok {
// Yes, so replace it with its constant value.
return constVal
}
// No, but that may be because the variable refers to a dependent
// computed column. In that case, try to recursively fold that
// computed column. There are no infinite loops possible because the
// dependency graph is guaranteed to be acyclic.
if _, ok := tabMeta.ComputedCols[variable.Col]; ok {
if c.tryFoldComputedCol(tabMeta, variable.Col, constCols) {
return constCols[variable.Col]
}
}
return e
}
return c.e.f.Replace(e, replace)
}
computedCol := tabMeta.ComputedCols[computedColID]
replaced := replace(computedCol).(opt.ScalarExpr)
// If the computed column is constant, enter it into the constCols map.
if opt.IsConstValueOp(replaced) {
constCols[computedColID] = replaced
return true
}
return false
}
// inBetweenFilters returns a set of filters that are required to cover all the
// in-between spans given a set of partition values. This is required for
// correctness reasons; although values are unlikely to exist between defined
// partitions, they may exist and so the constraints of the scan must incorporate
// these spans.
func (c *CustomFuncs) inBetweenFilters(
tabID opt.TableID, index cat.Index, partitionValues []tree.Datums,
) memo.FiltersExpr {
var inBetween memo.ScalarListExpr
if len(partitionValues) == 0 {
return memo.EmptyFiltersExpr
}
// Sort the partitionValues lexicographically.
sort.Slice(partitionValues, func(i, j int) bool {
return partitionValues[i].Compare(c.e.evalCtx, partitionValues[j]) < 0
})
// Add the beginning span.
beginExpr := c.columnComparison(tabID, index, partitionValues[0], -1)
inBetween = append(inBetween, beginExpr)
// Add the end span.
endExpr := c.columnComparison(tabID, index, partitionValues[len(partitionValues)-1], 1)
inBetween = append(inBetween, endExpr)
// Add the in-between spans.
for i := 1; i < len(partitionValues); i++ {
lowerPartition := partitionValues[i-1]
higherPartition := partitionValues[i]
// The between spans will be greater than the lower partition but smaller
// than the higher partition.
var largerThanLower opt.ScalarExpr
if c.isPrefixOf(lowerPartition, higherPartition) {
// Since the lower partition is a prefix of the higher partition, the span
// must begin with the values defined in the lower partition. Consider the
// partitions ('us') and ('us', 'cali'). In this case the in-between span
// should be [/'us - /'us'/'cali').
largerThanLower = c.columnComparison(tabID, index, lowerPartition, 0)
} else {
largerThanLower = c.columnComparison(tabID, index, lowerPartition, 1)
}
smallerThanHigher := c.columnComparison(tabID, index, higherPartition, -1)
// Add the in-between span to the list of inBetween spans.
betweenExpr := c.e.f.ConstructAnd(largerThanLower, smallerThanHigher)
inBetween = append(inBetween, betweenExpr)
}
// Return an Or expression between all the expressions.
return memo.FiltersExpr{c.e.f.ConstructFiltersItem(c.constructOr(inBetween))}
}
// constructOr constructs an expression that is an OR between all the
// provided conditions
func (c *CustomFuncs) constructOr(conditions memo.ScalarListExpr) opt.ScalarExpr {
if len(conditions) == 0 {
return c.e.f.ConstructFalse()
}
orExpr := conditions[0]
for i := 1; i < len(conditions); i++ {
orExpr = c.e.f.ConstructOr(conditions[i], orExpr)
}
return orExpr
}
// columnComparison returns a filter that compares the index columns to the
// given values. The comp parameter can be -1, 0 or 1 to indicate whether the
// comparison type of the filter should be a Lt, Eq or Gt.
func (c *CustomFuncs) columnComparison(
tabID opt.TableID, index cat.Index, values tree.Datums, comp int,
) opt.ScalarExpr {
colTypes := make([]*types.T, len(values))
for i := range values {
colTypes[i] = values[i].ResolvedType()
}
columnVariables := make(memo.ScalarListExpr, len(values))
scalarValues := make(memo.ScalarListExpr, len(values))
for i, val := range values {
colID := tabID.IndexColumnID(index, i)
columnVariables[i] = c.e.f.ConstructVariable(colID)
scalarValues[i] = c.e.f.ConstructConstVal(val, val.ResolvedType())
}
colsTuple := c.e.f.ConstructTuple(columnVariables, types.MakeTuple(colTypes))
valsTuple := c.e.f.ConstructTuple(scalarValues, types.MakeTuple(colTypes))
if comp == 0 {
return c.e.f.ConstructEq(colsTuple, valsTuple)
} else if comp > 0 {
return c.e.f.ConstructGt(colsTuple, valsTuple)
}
return c.e.f.ConstructLt(colsTuple, valsTuple)
}
// inPartitionFilters returns a FiltersExpr that is required to cover
// all the partition spans. For each partition defined, inPartitionFilters
// will contain a FilterItem that restricts the index columns by
// the partition values. Use inBetweenFilters to generate filters that
// cover all the spans that the partitions don't cover.
func (c *CustomFuncs) inPartitionFilters(
tabID opt.TableID, index cat.Index, partitionValues []tree.Datums,
) memo.FiltersExpr {
var partitions memo.ScalarListExpr
// Sort the partition values so the most selective ones are first.
sort.Slice(partitionValues, func(i, j int) bool {
return len(partitionValues[i]) >= len(partitionValues[j])
})
// Construct all the partition filters.
for i, partition := range partitionValues {
// Only add this partition if a more selective partition hasn't
// been defined on the same partition.
partitionSeen := false
for j, moreSelectivePartition := range partitionValues {
if j >= i {
break
}
// At this point we know whether the current partition was seen before.
partitionSeen = c.isPrefixOf(partition, moreSelectivePartition)
if partitionSeen {
break
}
}
// This partition is a prefix of a more selective partition and so,
// will be taken care of by the in-between partitions.
if partitionSeen {
continue
}
// Get an expression that restricts the values of the index to the
// partition values.
inPartition := c.columnComparison(tabID, index, partition, 0)
partitions = append(partitions, inPartition)
}
// Return an Or expression between all the expressions.
return memo.FiltersExpr{c.e.f.ConstructFiltersItem(c.constructOr(partitions))}
}
// isPrefixOf returns whether pre is a prefix of other.
func (c *CustomFuncs) isPrefixOf(pre []tree.Datum, other []tree.Datum) bool {
if len(pre) > len(other) {
// Pre can't be a prefix of other as it is larger.
return false
}
for i := range pre {
if pre[i].Compare(c.e.evalCtx, other[i]) != 0 {
return false
}
}
return true
}
// partitionValuesFilters constructs filters with the purpose of
// constraining an index scan using the partition values similar to
// the filters added from the check constraints (see
// checkConstraintFilters). It returns two sets of filters, one to
// create the partition spans, and one to create the spans for all
// the in between ranges that are not part of any partitions.
//
// For example consider the following table and partitioned index:
//
// CREATE TABLE orders (
// region STRING NOT NULL, id INT8 NOT NULL, total DECIMAL NOT NULL, seq_num INT NOT NULL,
// PRIMARY KEY (region, id)
// )
//
// CREATE INDEX orders_by_seq_num
// ON orders (region, seq_num, id)
// STORING (total)
// PARTITION BY LIST (region)
// (
// PARTITION us_east1 VALUES IN ('us-east1'),
// PARTITION us_west1 VALUES IN ('us-west1'),
// PARTITION europe_west2 VALUES IN ('europe-west2')
// )
//
// Now consider the following query:
// SELECT sum(total) FROM orders WHERE seq_num >= 100 AND seq_num < 200
//
// Normally, the index would not be utilized but because we know what the
// partition values are for the prefix of the index, we can generate
// filters that allow us to use the index (adding the appropriate in-between
// filters to catch all the values that are not part of the partitions).
// By doing so, we get the following plan:
// scalar-group-by
// ├── select
// │ ├── scan orders@orders_by_seq_num
// │ │ └── constraint: /1/4/2: [ - /'europe-west2')
// │ │ [/'europe-west2'/100 - /'europe-west2'/199]
// │ │ [/e'europe-west2\x00'/100 - /'us-east1')
// │ │ [/'us-east1'/100 - /'us-east1'/199]
// │ │ [/e'us-east1\x00'/100 - /'us-west1')
// │ │ [/'us-west1'/100 - /'us-west1'/199]
// │ │ [/e'us-west1\x00'/100 - ]
// │ └── filters
// │ └── (seq_num >= 100) AND (seq_num < 200)
// └── aggregations
// └── sum
// └── variable: total
//
func (c *CustomFuncs) partitionValuesFilters(
tabID opt.TableID, index cat.Index,
) (partitionFilter, inBetweenFilter memo.FiltersExpr) {
// Find all the partition values
partitionValues := index.PartitionByListPrefixes()
if len(partitionValues) == 0 {
return partitionFilter, inBetweenFilter
}
// Get the in partition expressions.
inPartition := c.inPartitionFilters(tabID, index, partitionValues)
// Get the in between expressions.
inBetween := c.inBetweenFilters(tabID, index, partitionValues)
return inPartition, inBetween
}
// GenerateInvertedIndexScans enumerates all inverted indexes on the Scan
// operator's table and generates an alternate Scan operator for each inverted
// index that can service the query.
//
// The resulting Scan operator is pre-constrained and requires an IndexJoin to
// project columns other than the primary key columns. The reason it's pre-
// constrained is that we cannot treat an inverted index in the same way as a
// regular index, since it does not actually contain the indexed column.
func (c *CustomFuncs) GenerateInvertedIndexScans(
grp memo.RelExpr, scanPrivate *memo.ScanPrivate, filters memo.FiltersExpr,
) {
var sb indexScanBuilder
sb.init(c, scanPrivate.Table)
tabMeta := c.e.mem.Metadata().TableMeta(scanPrivate.Table)
// Generate implicit filters from constraints and computed columns as
// optional filters to help constrain an index scan.
optionalFilters := c.checkConstraintFilters(scanPrivate.Table)
computedColFilters := c.computedColFilters(scanPrivate, filters, optionalFilters)
optionalFilters = append(optionalFilters, computedColFilters...)
// Iterate over all inverted indexes.
var iter scanIndexIter
iter.Init(c.e.mem, &c.im, scanPrivate, filters, rejectNonInvertedIndexes)
iter.ForEach(func(index cat.Index, filters memo.FiltersExpr, indexCols opt.ColSet, isCovering bool) {
// Check whether the filter can constrain the index.
spanExpr, constraint, remainingFilters, pfState, ok := invertedidx.TryFilterInvertedIndex(
c.e.evalCtx, c.e.f, filters, optionalFilters, scanPrivate.Table, index, tabMeta.ComputedCols,
)
if !ok {
// A span expression to constrain the inverted index could not be
// generated.
return
}
spansToRead := spanExpr.SpansToRead
// Override the filters with remainingFilters. If the index is a
// multi-column inverted index, the non-inverted prefix columns are
// constrained by the constraint. In this case, it may be possible to
// reduce the filters if the constraint fully describes some of
// sub-expressions. The remainingFilters are the filters that are not
// fully expressed by the constraint.
//
// Consider the example:
//
// CREATE TABLE t (a INT, b INT, g GEOMETRY, INVERTED INDEX (b, g))
//
// SELECT * FROM t WHERE a = 1 AND b = 2 AND ST_Intersects(.., g)
//
// The constraint would constrain b to [/2 - /2], guaranteeing that
// the inverted index scan would only produce rows where (b = 2).
// Reapplying the (b = 2) filter after the scan would be
// unnecessary, so the remainingFilters in this case would be
// (a = 1 AND ST_Intersects(.., g)).
filters = remainingFilters
// Construct new ScanOpDef with the new index and constraint.
newScanPrivate := *scanPrivate
newScanPrivate.Index = index.Ordinal()
newScanPrivate.Constraint = constraint
newScanPrivate.InvertedConstraint = spansToRead
// We will need an inverted filter above the scan if the spanExpr might
// produce duplicate primary keys or requires at least one UNION or
// INTERSECTION. In this case, we must scan both the primary key columns
// and the inverted key column.
needInvertedFilter := !spanExpr.Unique || spanExpr.Operator != inverted.None
pkCols := sb.primaryKeyCols()
newScanPrivate.Cols = pkCols.Copy()
var invertedCol opt.ColumnID
if needInvertedFilter {
invertedCol = scanPrivate.Table.ColumnID(index.VirtualInvertedColumn().Ordinal())
newScanPrivate.Cols.Add(invertedCol)
}
// The Scan operator always goes in a new group, since it's always nested
// underneath the IndexJoin. The IndexJoin may also go into its own group,
// if there's a remaining filter above it.
// TODO(mgartner): We don't always need to create an index join. The
// index join will be removed by EliminateIndexJoinInsideProject, but
// it'd be more efficient to not create the index join in the first
// place.
sb.setScan(&newScanPrivate)
// Add an inverted filter if needed.
if needInvertedFilter {
sb.addInvertedFilter(spanExpr, pfState, invertedCol)
}
// If remaining filter exists, split it into one part that can be pushed
// below the IndexJoin, and one part that needs to stay above.
filters = sb.addSelectAfterSplit(filters, pkCols)
sb.addIndexJoin(scanPrivate.Cols)
sb.addSelect(filters)
sb.build(grp)
})
}
// tryConstrainIndex tries to derive a constraint for the given index from the
// specified filter. If a constraint is derived, it is returned along with any
// filter remaining after extracting the constraint. If no constraint can be
// derived, then tryConstrainIndex returns ok = false.
func (c *CustomFuncs) tryConstrainIndex(
requiredFilters, optionalFilters memo.FiltersExpr, tabID opt.TableID, indexOrd int,
) (constraint *constraint.Constraint, remainingFilters memo.FiltersExpr, ok bool) {
// Start with fast check to rule out indexes that cannot be constrained.
if !c.canMaybeConstrainNonInvertedIndex(requiredFilters, tabID, indexOrd) &&
!c.canMaybeConstrainNonInvertedIndex(optionalFilters, tabID, indexOrd) {
return nil, nil, false
}
ic := c.initIdxConstraintForIndex(requiredFilters, optionalFilters, tabID, indexOrd)
constraint = ic.Constraint()
if constraint.IsUnconstrained() {
return nil, nil, false
}
// Return 0 if no remaining filter.
remaining := ic.RemainingFilters()
// Make copy of constraint so that idxconstraint instance is not referenced.
copy := *constraint
return ©, remaining, true
}
// canMaybeConstrainNonInvertedIndex returns true if we should try to constrain
// a given non-inverted index by the given filter. It returns false if it is
// impossible for the filter can constrain the scan.
//
// If any of the three following statements are true, then it is
// possible that the index can be constrained:
//
// 1. The filter references the first index column.
// 2. The constraints are not tight (see props.Scalar.TightConstraints).
// 3. Any of the filter's constraints start with the first index column.
//
func (c *CustomFuncs) canMaybeConstrainNonInvertedIndex(
filters memo.FiltersExpr, tabID opt.TableID, indexOrd int,
) bool {
md := c.e.mem.Metadata()
index := md.Table(tabID).Index(indexOrd)
for i := range filters {
filterProps := filters[i].ScalarProps()
// If the filter involves the first index column, then the index can
// possibly be constrained.
firstIndexCol := tabID.IndexColumnID(index, 0)
if filterProps.OuterCols.Contains(firstIndexCol) {
return true
}
// If the constraints are not tight, then the index can possibly be
// constrained, because index constraint generation supports more
// expressions than filter constraint generation.
if !filterProps.TightConstraints {
return true
}
// If any constraint involves the first index column, then the index can
// possibly be constrained.
cset := filterProps.Constraints
for i := 0; i < cset.Length(); i++ {
firstCol := cset.Constraint(i).Columns.Get(0).ID()
if firstCol == firstIndexCol {
return true
}
}
}
return false
}
// GenerateZigzagJoins generates zigzag joins for all pairs of indexes of the
// Scan table which contain one of the constant columns in the FiltersExpr as
// its prefix.
//
// Similar to the lookup join, if the selected index pair does not contain
// all the columns in the output of the scan, we wrap the zigzag join
// in another index join (implemented as a lookup join) on the primary index.
// The index join is implemented with a lookup join since the index join does
// not support arbitrary input sources that are not plain index scans.
func (c *CustomFuncs) GenerateZigzagJoins(
grp memo.RelExpr, scanPrivate *memo.ScanPrivate, filters memo.FiltersExpr,
) {
tab := c.e.mem.Metadata().Table(scanPrivate.Table)
// Short circuit unless zigzag joins are explicitly enabled.
if !c.e.evalCtx.SessionData.ZigzagJoinEnabled {
return
}
fixedCols := memo.ExtractConstColumns(filters, c.e.evalCtx)
if fixedCols.Len() == 0 {
// Zigzagging isn't helpful in the absence of fixed columns.
return
}
// Zigzag joins aren't currently equipped to produce system columns, so
// don't generate any if some system columns are requested.
foundSystemCol := false
scanPrivate.Cols.ForEach(func(colID opt.ColumnID) {
if tab.Column(scanPrivate.Table.ColumnOrdinal(colID)).Kind() == cat.System {
foundSystemCol = true
}
})
if foundSystemCol {
return
}
// Iterate through indexes, looking for those prefixed with fixedEq cols.
// Efficiently finding a set of indexes that make the most efficient zigzag
// join, with no limit on the number of indexes selected, is an instance of
// this NP-hard problem:
// https://en.wikipedia.org/wiki/Maximum_coverage_problem
//
// A formal definition would be: Suppose we have a set of fixed columns F
// (defined as fixedCols in the code above), and a set of indexes I. The
// "fixed prefix" of every index, in this context, refers to the longest
// prefix of each index's columns that is in F. In other words, we stop
// adding to the prefix when we come across the first non-fixed column
// in an index.
//
// We want to find at most k = 2 indexes from I (in the future k could be
// >= 2 when the zigzag joiner supports 2+ index zigzag joins) that cover
// the maximum number of columns in F. An index is defined to have covered
// a column if that column is in the index's fixed prefix.
//
// Since only 2-way zigzag joins are currently supported, the naive
// approach is bounded at n^2. For now, just do that - a quadratic
// iteration through all indexes.
//
// TODO(itsbilal): Implement the greedy or weighted version of the
// algorithm laid out here:
// https://en.wikipedia.org/wiki/Maximum_coverage_problem
//
// TODO(mgartner): We should consider primary indexes when it has multiple
// columns and only the first is being constrained.
var iter scanIndexIter
iter.Init(c.e.mem, &c.im, scanPrivate, filters, rejectPrimaryIndex|rejectInvertedIndexes)
iter.ForEach(func(leftIndex cat.Index, outerFilters memo.FiltersExpr, leftCols opt.ColSet, _ bool) {
leftFixed := c.indexConstrainedCols(leftIndex, scanPrivate.Table, fixedCols)
// Short-circuit quickly if the first column in the index is not a fixed
// column.
if leftFixed.Len() == 0 {
return
}
var iter2 scanIndexIter
iter2.Init(c.e.mem, &c.im, scanPrivate, outerFilters, rejectPrimaryIndex|rejectInvertedIndexes)
iter2.SetOriginalFilters(filters)
iter2.ForEachStartingAfter(leftIndex.Ordinal(), func(rightIndex cat.Index, innerFilters memo.FiltersExpr, rightCols opt.ColSet, _ bool) {
rightFixed := c.indexConstrainedCols(rightIndex, scanPrivate.Table, fixedCols)
// If neither side contributes a fixed column not contributed by the
// other, then there's no reason to zigzag on this pair of indexes.
if leftFixed.SubsetOf(rightFixed) || rightFixed.SubsetOf(leftFixed) {
return
}
// Columns that are in both indexes are, by definition, equal.
eqCols := leftCols.Intersection(rightCols)
eqCols.DifferenceWith(fixedCols)
if eqCols.Len() == 0 {
// A simple index join is more efficient in such cases.
return
}
// If there are any equalities across the columns of the two indexes,
// push them into the zigzag join spec.
leftEq, rightEq := memo.ExtractJoinEqualityColumns(
leftCols, rightCols, innerFilters,
)
leftEqCols, rightEqCols := eqColsForZigzag(
tab,
scanPrivate.Table,
leftIndex,
rightIndex,
fixedCols,
leftEq,
rightEq,
)
if len(leftEqCols) == 0 || len(rightEqCols) == 0 {
// One of the indexes is not sorted by any of the equality
// columns, because the equality columns do not immediately
// succeed the fixed columns. A zigzag join cannot be planned.
return
}
// Confirm the primary key columns are in both leftEqCols and
// rightEqCols. The conversion of a select with filters to a
// zigzag join requires the primary key columns to be in the output
// for output correctness; otherwise, we could be outputting more
// results than there should be (due to an equality on a non-unique
// non-required value).
pkIndex := tab.Index(cat.PrimaryIndex)
pkCols := make(opt.ColList, pkIndex.KeyColumnCount())
pkColsFound := true
for i := range pkCols {
pkCols[i] = scanPrivate.Table.IndexColumnID(pkIndex, i)
if _, ok := leftEqCols.Find(pkCols[i]); !ok {
pkColsFound = false
break
}
if _, ok := rightEqCols.Find(pkCols[i]); !ok {
pkColsFound = false
break
}
}
if !pkColsFound {
return
}
leftFixedCols, leftVals, leftTypes := c.fixedColsForZigzag(
leftIndex, scanPrivate.Table, innerFilters,
)
rightFixedCols, rightVals, rightTypes := c.fixedColsForZigzag(
rightIndex, scanPrivate.Table, innerFilters,
)
// If the fixed cols have been reduced during partial index
// implication, then a zigzag join cannot be planned. A single index
// scan should be more efficient.
if len(leftFixedCols) != leftFixed.Len() || len(rightFixedCols) != rightFixed.Len() {
return
}
zigzagJoin := memo.ZigzagJoinExpr{
On: innerFilters,
ZigzagJoinPrivate: memo.ZigzagJoinPrivate{
LeftTable: scanPrivate.Table,
LeftIndex: leftIndex.Ordinal(),
RightTable: scanPrivate.Table,
RightIndex: rightIndex.Ordinal(),
LeftEqCols: leftEqCols,
RightEqCols: rightEqCols,
LeftFixedCols: leftFixedCols,
RightFixedCols: rightFixedCols,
},
}
leftTupleTyp := types.MakeTuple(leftTypes)
rightTupleTyp := types.MakeTuple(rightTypes)
zigzagJoin.FixedVals = memo.ScalarListExpr{
c.e.f.ConstructTuple(leftVals, leftTupleTyp),
c.e.f.ConstructTuple(rightVals, rightTupleTyp),