-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
index_encoding.go
1626 lines (1516 loc) · 59.5 KB
/
index_encoding.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2018 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package rowenc
import (
"context"
"sort"
"unsafe"
"github.com/cockroachdb/cockroach/pkg/geo/geoindex"
"github.com/cockroachdb/cockroach/pkg/geo/geopb"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catalogkeys"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catenumpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/fetchpb"
"github.com/cockroachdb/cockroach/pkg/sql/inverted"
"github.com/cockroachdb/cockroach/pkg/sql/rowenc/keyside"
"github.com/cockroachdb/cockroach/pkg/sql/rowenc/rowencpb"
"github.com/cockroachdb/cockroach/pkg/sql/rowenc/valueside"
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sqlerrors"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/intsets"
"github.com/cockroachdb/cockroach/pkg/util/json"
"github.com/cockroachdb/cockroach/pkg/util/mon"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/trigram"
"github.com/cockroachdb/cockroach/pkg/util/tsearch"
"github.com/cockroachdb/cockroach/pkg/util/unique"
"github.com/cockroachdb/errors"
)
// This file contains facilities to encode primary and secondary
// indexes on SQL tables.
// MakeIndexKeyPrefix returns the key prefix used for the index's data. If you
// need the corresponding Span, prefer desc.IndexSpan(indexID) or
// desc.PrimaryIndexSpan().
func MakeIndexKeyPrefix(codec keys.SQLCodec, tableID descpb.ID, indexID descpb.IndexID) []byte {
return codec.IndexPrefix(uint32(tableID), uint32(indexID))
}
// EncodeIndexKey creates a key by concatenating keyPrefix with the encodings of
// the index key columns, and returns the key and whether any of the encoded
// values were NULLs.
//
// Note that key suffix columns are not encoded, so the result isn't always a
// full index key.
func EncodeIndexKey(
tableDesc catalog.TableDescriptor,
index catalog.Index,
colMap catalog.TableColMap,
values []tree.Datum,
keyPrefix []byte,
) (key []byte, containsNull bool, err error) {
keyAndSuffixCols := tableDesc.IndexFetchSpecKeyAndSuffixColumns(index)
keyCols := keyAndSuffixCols[:index.NumKeyColumns()]
key, containsNull, err = EncodePartialIndexKey(
keyCols,
colMap,
values,
keyPrefix,
)
if err != nil {
return nil, false, err
}
return key, containsNull, err
}
// EncodePartialIndexSpan creates the minimal key span for the key specified by the
// given table, index, and values, with the same method as
// EncodePartialIndexKey.
func EncodePartialIndexSpan(
keyCols []fetchpb.IndexFetchSpec_KeyColumn,
colMap catalog.TableColMap,
values []tree.Datum,
keyPrefix []byte,
) (span roachpb.Span, containsNull bool, err error) {
var key roachpb.Key
key, containsNull, err = EncodePartialIndexKey(keyCols, colMap, values, keyPrefix)
if err != nil {
return span, false, err
}
return roachpb.Span{Key: key, EndKey: key.PrefixEnd()}, containsNull, nil
}
// EncodePartialIndexKey encodes a partial index key; only the given key (or key
// suffix) columns are encoded; these can be a prefix of the index key columns.
// Does not directly append to keyPrefix.
func EncodePartialIndexKey(
keyCols []fetchpb.IndexFetchSpec_KeyColumn,
colMap catalog.TableColMap,
values []tree.Datum,
keyPrefix []byte,
) (key []byte, containsNull bool, _ error) {
// We know we will append to the key which will cause the capacity to grow so
// make it bigger from the get-go.
// Add the length of the key prefix as an initial guess.
// Add 2 bytes for every column value. An underestimate for all but low integers.
key = growKey(keyPrefix, len(keyPrefix)+2*len(values))
for i := range keyCols {
keyCol := &keyCols[i]
val := findColumnValue(keyCol.ColumnID, colMap, values)
if val == tree.DNull {
containsNull = true
}
dir, err := catalogkeys.IndexColumnEncodingDirection(keyCol.Direction)
if err != nil {
return nil, false, err
}
if key, err = keyside.Encode(key, val, dir); err != nil {
return nil, false, err
}
}
return key, containsNull, nil
}
type Directions []catenumpb.IndexColumn_Direction
func (d Directions) Get(i int) (encoding.Direction, error) {
if i < len(d) {
return catalogkeys.IndexColumnEncodingDirection(d[i])
}
return encoding.Ascending, nil
}
// MakeSpanFromEncDatums creates a minimal index key span on the input
// values. A minimal index key span is a span that includes the fewest possible
// keys after the start key generated by the input values.
//
// The start key is generated by concatenating keyPrefix with the encodings of
// the given EncDatum values. The values, types, and dirs parameters should be
// specified in the same order as the index key columns and may be a prefix.
func MakeSpanFromEncDatums(
values EncDatumRow,
keyCols []fetchpb.IndexFetchSpec_KeyColumn,
alloc *tree.DatumAlloc,
keyPrefix []byte,
) (_ roachpb.Span, containsNull bool, _ error) {
startKey, containsNull, err := MakeKeyFromEncDatums(values, keyCols, alloc, keyPrefix)
if err != nil {
return roachpb.Span{}, false, err
}
return roachpb.Span{Key: startKey, EndKey: startKey.PrefixEnd()}, containsNull, nil
}
// NeededColumnFamilyIDs returns the minimal set of column families required to
// retrieve neededCols for the specified table and index. The returned descpb.FamilyIDs
// are in sorted order.
func NeededColumnFamilyIDs(
neededColOrdinals intsets.Fast, table catalog.TableDescriptor, index catalog.Index,
) []descpb.FamilyID {
if table.NumFamilies() == 1 {
return []descpb.FamilyID{table.GetFamilies()[0].ID}
}
// Build some necessary data structures for column metadata.
columns := table.DeletableColumns()
colIdxMap := catalog.ColumnIDToOrdinalMap(columns)
var indexedCols intsets.Fast
var compositeCols intsets.Fast
var extraCols intsets.Fast
for i := 0; i < index.NumKeyColumns(); i++ {
columnID := index.GetKeyColumnID(i)
columnOrdinal := colIdxMap.GetDefault(columnID)
indexedCols.Add(columnOrdinal)
}
for i := 0; i < index.NumCompositeColumns(); i++ {
columnID := index.GetCompositeColumnID(i)
columnOrdinal := colIdxMap.GetDefault(columnID)
compositeCols.Add(columnOrdinal)
}
for i := 0; i < index.NumKeySuffixColumns(); i++ {
columnID := index.GetKeySuffixColumnID(i)
columnOrdinal := colIdxMap.GetDefault(columnID)
extraCols.Add(columnOrdinal)
}
// The column family with ID 0 is special because it always has a KV entry.
// Other column families will omit a value if all their columns are null, so
// we may need to retrieve family 0 to use as a sentinel for distinguishing
// between null values and the absence of a row. Also, secondary indexes store
// values here for composite and "extra" columns. ("Extra" means primary key
// columns which are not indexed.)
var family0 *descpb.ColumnFamilyDescriptor
hasSecondaryEncoding := index.GetEncodingType() == catenumpb.SecondaryIndexEncoding
// First iterate over the needed columns and look for a few special cases:
// * columns which can be decoded from the key and columns whose value is stored
// in family 0.
// * certain system columns, like the MVCC timestamp column require all of the
// column families to be scanned to produce a value.
family0Needed := false
mvccColumnRequested := false
nc := neededColOrdinals.Copy()
neededColOrdinals.ForEach(func(columnOrdinal int) {
if indexedCols.Contains(columnOrdinal) && !compositeCols.Contains(columnOrdinal) {
// We can decode this column from the index key, so no particular family
// is needed.
nc.Remove(columnOrdinal)
}
if hasSecondaryEncoding && (compositeCols.Contains(columnOrdinal) ||
extraCols.Contains(columnOrdinal)) {
// Secondary indexes store composite and "extra" column values in family
// 0.
family0Needed = true
nc.Remove(columnOrdinal)
}
// System column ordinals are larger than the number of columns.
if columnOrdinal >= len(columns) {
mvccColumnRequested = true
}
})
// If the MVCC timestamp column was requested, then bail out.
if mvccColumnRequested {
families := make([]descpb.FamilyID, 0, table.NumFamilies())
_ = table.ForeachFamily(func(family *descpb.ColumnFamilyDescriptor) error {
families = append(families, family.ID)
return nil
})
return families
}
secondaryStoredColumnIDs := index.CollectSecondaryStoredColumnIDs()
// Iterate over the column families to find which ones contain needed columns.
// We also keep track of whether all of the needed families' columns are
// nullable, since this means we need column family 0 as a sentinel, even if
// none of its columns are needed.
var neededFamilyIDs []descpb.FamilyID
allFamiliesNullable := true
_ = table.ForeachFamily(func(family *descpb.ColumnFamilyDescriptor) error {
needed := false
nullable := true
if family.ID == 0 {
// Set column family 0 aside in case we need it as a sentinel.
family0 = family
if family0Needed {
needed = true
}
nullable = false
}
for _, columnID := range family.ColumnIDs {
if needed && !nullable {
// Nothing left to check.
break
}
columnOrdinal := colIdxMap.GetDefault(columnID)
if nc.Contains(columnOrdinal) {
needed = true
}
if !columns[columnOrdinal].IsNullable() && !indexedCols.Contains(columnOrdinal) {
// This column is non-nullable and is not indexed, thus, if it
// is stored in the value part of the KV entry (which is the
// case for the primary indexes as well as when the column is
// included in STORING clause of the secondary index), the
// column family is non-nullable too.
//
// Note that for unique secondary indexes more columns might be
// included in the value part (namely "key suffix" columns when
// the indexed columns have a NULL value), but we choose to
// ignore those here. This is needed for correctness, and as a
// result we might fetch the zeroth column family when it turns
// out to be not needed.
if index.Primary() || secondaryStoredColumnIDs.Contains(columnID) {
nullable = false
}
}
}
if needed {
neededFamilyIDs = append(neededFamilyIDs, family.ID)
allFamiliesNullable = allFamiliesNullable && nullable
}
return nil
})
if family0 == nil {
panic(errors.AssertionFailedf("column family 0 not found"))
}
// If all the needed families are nullable, we also need family 0 as a
// sentinel. Note that this is only the case if family 0 was not already added
// to neededFamilyIDs.
if allFamiliesNullable {
// Prepend family 0.
neededFamilyIDs = append(neededFamilyIDs, 0)
copy(neededFamilyIDs[1:], neededFamilyIDs)
neededFamilyIDs[0] = family0.ID
}
return neededFamilyIDs
}
// SplitRowKeyIntoFamilySpans splits a key representing a single row point
// lookup into separate disjoint spans that request only the particular column
// families from neededFamilies instead of requesting all the families. It is up
// to the client to ensure the requested span represents a single row lookup and
// that the span splitting is appropriate (see CanSplitSpanIntoFamilySpans).
//
// The returned spans might or might not have EndKeys set. If they are for a
// single key, they will not have EndKeys set.
//
// Note that this function will still return a family-specific span even if the
// input span is for a table that has just a single column family, so that the
// caller can have a precise key to send via a GetRequest if desired.
//
// The function accepts a slice of spans to append to.
func SplitRowKeyIntoFamilySpans(
appendTo roachpb.Spans, key roachpb.Key, neededFamilies []descpb.FamilyID,
) roachpb.Spans {
key = key[:len(key):len(key)] // avoid mutation and aliasing
for i, familyID := range neededFamilies {
var famSpan roachpb.Span
famSpan.Key = keys.MakeFamilyKey(key, uint32(familyID))
// Don't set the EndKey yet, because a column family on its own can be
// fetched using a GetRequest.
if i > 0 && familyID == neededFamilies[i-1]+1 {
// This column family is adjacent to the previous one. We can merge
// the two spans into one.
appendTo[len(appendTo)-1].EndKey = famSpan.Key.PrefixEnd()
} else {
appendTo = append(appendTo, famSpan)
}
}
return appendTo
}
// MakeKeyFromEncDatums creates an index key by concatenating keyPrefix with the
// encodings of the given EncDatum values.
func MakeKeyFromEncDatums(
values EncDatumRow,
keyCols []fetchpb.IndexFetchSpec_KeyColumn,
alloc *tree.DatumAlloc,
keyPrefix []byte,
) (_ roachpb.Key, containsNull bool, _ error) {
// Values may be a prefix of the index columns.
if len(values) > len(keyCols) {
return nil, false, errors.Errorf("%d values, %d key cols", len(values), len(keyCols))
}
// We know we will append to the key which will cause the capacity to grow
// so make it bigger from the get-go.
key := make(roachpb.Key, len(keyPrefix), len(keyPrefix)*2)
copy(key, keyPrefix)
for i, val := range values {
encoding := catenumpb.DatumEncoding_ASCENDING_KEY
if keyCols[i].Direction == catenumpb.IndexColumn_DESC {
encoding = catenumpb.DatumEncoding_DESCENDING_KEY
}
if val.IsNull() {
containsNull = true
}
var err error
key, err = val.Encode(keyCols[i].Type, alloc, encoding, key)
if err != nil {
return nil, false, err
}
}
return key, containsNull, nil
}
// findColumnValue returns the value corresponding to the column. If
// the column isn't present return a NULL value.
func findColumnValue(
column descpb.ColumnID, colMap catalog.TableColMap, values []tree.Datum,
) tree.Datum {
if i, ok := colMap.Get(column); ok {
// TODO(pmattis): Need to convert the values[i] value to the type
// expected by the column.
return values[i]
}
return tree.DNull
}
// DecodePartialTableIDIndexID decodes a table id followed by an index id. The
// input key must already have its tenant id removed.
func DecodePartialTableIDIndexID(key []byte) ([]byte, descpb.ID, descpb.IndexID, error) {
key, tableID, indexID, err := keys.DecodeTableIDIndexID(key)
return key, descpb.ID(tableID), descpb.IndexID(indexID), err
}
// DecodeIndexKeyPrefix decodes the prefix of an index key and returns the
// index id and a slice for the rest of the key.
//
// Don't use this function in the scan "hot path".
func DecodeIndexKeyPrefix(
codec keys.SQLCodec, expectedTableID descpb.ID, key []byte,
) (indexID descpb.IndexID, remaining []byte, err error) {
key, err = codec.StripTenantPrefix(key)
if err != nil {
return 0, nil, err
}
var tableID descpb.ID
key, tableID, indexID, err = DecodePartialTableIDIndexID(key)
if err != nil {
return 0, nil, err
}
if tableID != expectedTableID {
return 0, nil, errors.Errorf(
"unexpected table ID %d, expected %d instead", tableID, expectedTableID)
}
return indexID, key, err
}
// DecodeIndexKey decodes the values that are a part of the specified index
// key (setting vals).
//
// The remaining bytes in the index key are returned which will either be an
// encoded column ID for the primary key index, the primary key suffix for
// non-unique secondary indexes or unique secondary indexes containing NULL or
// empty.
func DecodeIndexKey(
codec keys.SQLCodec,
types []*types.T,
vals []EncDatum,
colDirs []catenumpb.IndexColumn_Direction,
key []byte,
) (remainingKey []byte, foundNull bool, _ error) {
key, err := codec.StripTenantPrefix(key)
if err != nil {
return nil, false, err
}
key, _, _, err = DecodePartialTableIDIndexID(key)
if err != nil {
return nil, false, err
}
remainingKey, foundNull, err = DecodeKeyVals(types, vals, colDirs, key)
if err != nil {
return nil, false, err
}
return remainingKey, foundNull, nil
}
// DecodeKeyVals decodes the values that are part of the key. The decoded
// values are stored in the vals. If this slice is nil, the direction
// used will default to encoding.Ascending.
// DecodeKeyVals returns whether or not NULL was encountered in the key.
func DecodeKeyVals(
types []*types.T, vals []EncDatum, directions []catenumpb.IndexColumn_Direction, key []byte,
) (remainingKey []byte, foundNull bool, _ error) {
if directions != nil && len(directions) != len(vals) {
return nil, false, errors.Errorf("encoding directions doesn't parallel vals: %d vs %d.",
len(directions), len(vals))
}
for j := range vals {
enc := catenumpb.DatumEncoding_ASCENDING_KEY
if directions != nil && (directions[j] == catenumpb.IndexColumn_DESC) {
enc = catenumpb.DatumEncoding_DESCENDING_KEY
}
var err error
vals[j], key, err = EncDatumFromBuffer(types[j], enc, key)
if err != nil {
return nil, false, err
}
foundNull = foundNull || vals[j].IsNull()
}
return key, foundNull, nil
}
// DecodeKeyValsUsingSpec is a variant of DecodeKeyVals which uses
// fetchpb.IndexFetchSpec_KeyColumn for column metadata.
func DecodeKeyValsUsingSpec(
keyCols []fetchpb.IndexFetchSpec_KeyColumn, key []byte, vals []EncDatum,
) (remainingKey []byte, foundNull bool, _ error) {
for j := range vals {
c := keyCols[j]
enc := catenumpb.DatumEncoding_ASCENDING_KEY
if c.Direction == catenumpb.IndexColumn_DESC {
enc = catenumpb.DatumEncoding_DESCENDING_KEY
}
var err error
vals[j], key, err = EncDatumFromBuffer(c.Type, enc, key)
if err != nil {
return nil, false, err
}
foundNull = foundNull || vals[j].IsNull()
}
return key, foundNull, nil
}
// IndexEntry represents an encoded key/value for an index entry.
type IndexEntry struct {
Key roachpb.Key
Value roachpb.Value
// Only used for forward indexes.
Family descpb.FamilyID
}
// ValueEncodedColumn represents a composite or stored column of a secondary
// index.
type ValueEncodedColumn struct {
ColID descpb.ColumnID
IsComposite bool
}
// ByID implements sort.Interface for []valueEncodedColumn based on the id
// field.
type ByID []ValueEncodedColumn
func (a ByID) Len() int { return len(a) }
func (a ByID) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByID) Less(i, j int) bool { return a[i].ColID < a[j].ColID }
// EncodeInvertedIndexKeys creates a list of inverted index keys by
// concatenating keyPrefix with the encodings of the column in the
// index.
func EncodeInvertedIndexKeys(
index catalog.Index, colMap catalog.TableColMap, values []tree.Datum, keyPrefix []byte,
) (key [][]byte, err error) {
keyPrefix, err = EncodeInvertedIndexPrefixKeys(index, colMap, values, keyPrefix)
if err != nil {
return nil, err
}
var val tree.Datum
if i, ok := colMap.Get(index.InvertedColumnID()); ok {
val = values[i]
} else {
val = tree.DNull
}
indexGeoConfig := index.GetGeoConfig()
if !indexGeoConfig.IsEmpty() {
return EncodeGeoInvertedIndexTableKeys(val, keyPrefix, indexGeoConfig)
}
return EncodeInvertedIndexTableKeys(val, keyPrefix, index.GetVersion())
}
// EncodeInvertedIndexPrefixKeys encodes the non-inverted prefix columns if
// the given index is a multi-column inverted index.
func EncodeInvertedIndexPrefixKeys(
index catalog.Index, colMap catalog.TableColMap, values []tree.Datum, keyPrefix []byte,
) (_ []byte, err error) {
numColumns := index.NumKeyColumns()
// If the index is a multi-column inverted index, we encode the non-inverted
// columns in the key prefix.
if numColumns > 1 {
// Do not encode the last column, which is the inverted column, here. It
// is encoded below this block.
colIDs := index.IndexDesc().KeyColumnIDs[:numColumns-1]
dirs := Directions(index.IndexDesc().KeyColumnDirections)
// Double the size of the key to make the imminent appends more
// efficient.
keyPrefix = growKey(keyPrefix, len(keyPrefix))
keyPrefix, _, err = EncodeColumns(colIDs, dirs, colMap, values, keyPrefix)
if err != nil {
return nil, err
}
}
return keyPrefix, nil
}
// EncodeInvertedIndexTableKeys produces one inverted index key per element in
// the input datum, which should be a container (either JSON or Array). For
// JSON, "element" means unique path through the document. Each output key is
// prefixed by inKey, and is guaranteed to be lexicographically sortable, but
// not guaranteed to be round-trippable during decoding. If the input Datum
// is (SQL) NULL, no inverted index keys will be produced, because inverted
// indexes cannot and do not need to satisfy the predicate col IS NULL.
//
// This function does not return keys for empty arrays or for NULL array
// elements unless the version is at least
// descpb.EmptyArraysInInvertedIndexesVersion. (Note that this only applies
// to arrays, not JSONs. This function returns keys for all non-null JSONs
// regardless of the version.)
func EncodeInvertedIndexTableKeys(
val tree.Datum, inKey []byte, version descpb.IndexDescriptorVersion,
) (key [][]byte, err error) {
if val == tree.DNull {
return nil, nil
}
// TODO(yuzefovich): can val ever be a placeholder?
datum := tree.UnwrapDOidWrapper(val)
switch val.ResolvedType().Family() {
case types.JsonFamily:
// We do not need to pass the version for JSON types, since all prior
// versions of JSON inverted indexes include keys for empty objects and
// arrays.
return json.EncodeInvertedIndexKeys(inKey, val.(*tree.DJSON).JSON)
case types.ArrayFamily:
return encodeArrayInvertedIndexTableKeys(val.(*tree.DArray), inKey, version, false /* excludeNulls */)
case types.StringFamily:
// TODO(jordan): Right now, this is just trigram inverted indexes. If we
// want to support different types of inverted indexes on strings, we'll
// need to pass in the inverted index column kind to this function.
// We pad the keys when writing them to the index.
// TODO(jordan): why are we doing this padding at all? Postgres does it.
// val could be a DOidWrapper, so we need to use the unwrapped datum
// here.
return encodeTrigramInvertedIndexTableKeys(string(*datum.(*tree.DString)), inKey, version, true /* pad */)
case types.TSVectorFamily:
return tsearch.EncodeInvertedIndexKeys(inKey, val.(*tree.DTSVector).TSVector)
}
return nil, errors.AssertionFailedf("trying to apply inverted index to unsupported type %s", datum.ResolvedType())
}
// EncodeContainingInvertedIndexSpans returns the spans that must be scanned in
// the inverted index to evaluate a contains (@>) predicate with the given
// datum, which should be a container (either JSON or Array). These spans
// should be used to find the objects in the index that contain the given json
// or array. In other words, if we have a predicate x @> y, this function
// should use the value of y to find the spans to scan in an inverted index on
// x.
//
// The spans are returned in an inverted.SpanExpression, which represents the
// set operations that must be applied on the spans read during execution. See
// comments in the SpanExpression definition for details.
func EncodeContainingInvertedIndexSpans(
ctx context.Context, evalCtx *eval.Context, val tree.Datum,
) (invertedExpr inverted.Expression, err error) {
if val == tree.DNull {
return nil, nil
}
datum := eval.UnwrapDatum(ctx, evalCtx, val)
switch val.ResolvedType().Family() {
case types.JsonFamily:
return json.EncodeContainingInvertedIndexSpans(nil /* inKey */, val.(*tree.DJSON).JSON)
case types.ArrayFamily:
return encodeContainingArrayInvertedIndexSpans(val.(*tree.DArray), nil /* inKey */)
default:
return nil, errors.AssertionFailedf(
"trying to apply inverted index to unsupported type %s", datum.ResolvedType(),
)
}
}
// EncodeContainedInvertedIndexSpans returns the spans that must be scanned in
// the inverted index to evaluate a contained by (<@) predicate with the given
// datum, which should be a container (either an Array or JSON). These spans
// should be used to find the objects in the index that could be contained by
// the given json or array. In other words, if we have a predicate x <@ y, this
// function should use the value of y to find the spans to scan in an inverted
// index on x.
//
// The spans are returned in an inverted.SpanExpression, which represents the
// set operations that must be applied on the spans read during execution. The
// span expression returned will never be tight. See comments in the
// SpanExpression definition for details.
func EncodeContainedInvertedIndexSpans(
ctx context.Context, evalCtx *eval.Context, val tree.Datum,
) (invertedExpr inverted.Expression, err error) {
if val == tree.DNull {
return nil, nil
}
datum := eval.UnwrapDatum(ctx, evalCtx, val)
switch val.ResolvedType().Family() {
case types.ArrayFamily:
return encodeContainedArrayInvertedIndexSpans(val.(*tree.DArray), nil /* inKey */)
case types.JsonFamily:
return json.EncodeContainedInvertedIndexSpans(nil /* inKey */, val.(*tree.DJSON).JSON)
default:
return nil, errors.AssertionFailedf(
"trying to apply inverted index to unsupported type %s", datum.ResolvedType(),
)
}
}
// EncodeExistsInvertedIndexSpans returns the spans that must be scanned in
// the inverted index to evaluate an exists (?) predicate with the given
// string datum. These spans should be used to find the objects in the index
// that have the string datum as a top-level key.
//
// If val is an array, then the inverted expression is a conjunction if all is
// true, and a disjunction otherwise.
//
// The spans are returned in an inverted.SpanExpression, which represents the
// set operations that must be applied on the spans read during execution.
func EncodeExistsInvertedIndexSpans(
ctx context.Context, evalCtx *eval.Context, val tree.Datum, all bool,
) (invertedExpr inverted.Expression, err error) {
if val == tree.DNull {
return nil, nil
}
datum := eval.UnwrapDatum(ctx, evalCtx, val)
switch val.ResolvedType().Family() {
case types.StringFamily:
// val could be a DOidWrapper, so we need to use the unwrapped datum
// here.
s := string(*datum.(*tree.DString))
return json.EncodeExistsInvertedIndexSpans(nil /* inKey */, s)
case types.ArrayFamily:
if val.ResolvedType().ArrayContents().Family() != types.StringFamily {
return nil, errors.AssertionFailedf(
"trying to apply inverted index to unsupported type %s", datum.ResolvedType(),
)
}
var expr inverted.Expression
for _, d := range val.(*tree.DArray).Array {
s := string(*d.(*tree.DString))
newExpr, err := json.EncodeExistsInvertedIndexSpans(nil /* inKey */, s)
if err != nil {
return nil, err
}
if expr == nil {
expr = newExpr
} else if all {
expr = inverted.And(expr, newExpr)
} else {
expr = inverted.Or(expr, newExpr)
}
}
return expr, nil
default:
return nil, errors.AssertionFailedf(
"trying to apply inverted index to unsupported type %s", datum.ResolvedType(),
)
}
}
// EncodeOverlapsInvertedIndexSpans returns the spans that must be scanned in
// the inverted index to evaluate an overlaps (&&) predicate with the given
// datum, which should be an Array. These spans should be used to find the
// objects in the index that could overlap with the given array. In other
// words, if we have a predicate x && y, this function should use the value of
// y to find the spans to scan in an inverted index on x.
//
// The spans are returned in an inverted.SpanExpression, which represents the
// set operations that must be applied on the spans read during execution. The
// span expression returned will be tight. See comments in the
// SpanExpression definition for details.
func EncodeOverlapsInvertedIndexSpans(
ctx context.Context, evalCtx *eval.Context, val tree.Datum,
) (invertedExpr inverted.Expression, err error) {
if val == tree.DNull {
return nil, nil
}
datum := eval.UnwrapDatum(ctx, evalCtx, val)
switch val.ResolvedType().Family() {
case types.ArrayFamily:
return encodeOverlapsArrayInvertedIndexSpans(val.(*tree.DArray), nil /* inKey */)
default:
return nil, errors.AssertionFailedf(
"trying to apply inverted index to unsupported type %s", datum.ResolvedType(),
)
}
}
// encodeArrayInvertedIndexTableKeys returns a list of inverted index keys for
// the given input array, one per entry in the array. The input inKey is
// prefixed to all returned keys.
//
// This function does not return keys for empty arrays or for NULL array elements
// unless the version is at least descpb.EmptyArraysInInvertedIndexesVersion.
// It also does not return keys for NULL array elements if excludeNulls is
// true. This option is used by encodeContainedArrayInvertedIndexSpans, which
// builds index spans to evaluate <@ (contained by) expressions.
func encodeArrayInvertedIndexTableKeys(
val *tree.DArray, inKey []byte, version descpb.IndexDescriptorVersion, excludeNulls bool,
) (key [][]byte, err error) {
if val.Array.Len() == 0 {
if version >= descpb.EmptyArraysInInvertedIndexesVersion {
return [][]byte{encoding.EncodeEmptyArray(inKey)}, nil
}
}
outKeys := make([][]byte, 0, len(val.Array))
for i := range val.Array {
d := val.Array[i]
if d == tree.DNull && (version < descpb.EmptyArraysInInvertedIndexesVersion || excludeNulls) {
// Older versions did not include null elements, but we must include them
// going forward since `SELECT ARRAY[NULL] @> ARRAY[]` returns true.
continue
}
outKey := make([]byte, len(inKey))
copy(outKey, inKey)
newKey, err := keyside.Encode(outKey, d, encoding.Ascending)
if err != nil {
return nil, err
}
outKeys = append(outKeys, newKey)
}
outKeys = unique.UniquifyByteSlices(outKeys)
return outKeys, nil
}
// encodeContainingArrayInvertedIndexSpans returns the spans that must be
// scanned in the inverted index to evaluate a contains (@>) predicate with
// the given array, one slice of spans per entry in the array. The input
// inKey is prefixed to all returned keys.
func encodeContainingArrayInvertedIndexSpans(
val *tree.DArray, inKey []byte,
) (invertedExpr inverted.Expression, err error) {
if val.Array.Len() == 0 {
// All arrays contain the empty array. Return a SpanExpression that
// requires a full scan of the inverted index.
invertedExpr = inverted.ExprForSpan(
inverted.MakeSingleValSpan(inKey), true, /* tight */
)
return invertedExpr, nil
}
if val.HasNulls {
// If there are any nulls, return empty spans. This is needed to ensure
// that `SELECT ARRAY[NULL, 2] @> ARRAY[NULL, 2]` is false.
return &inverted.SpanExpression{Tight: true, Unique: true}, nil
}
keys, err := encodeArrayInvertedIndexTableKeys(val, inKey, descpb.LatestIndexDescriptorVersion, false /* excludeNulls */)
if err != nil {
return nil, err
}
for _, key := range keys {
spanExpr := inverted.ExprForSpan(
inverted.MakeSingleValSpan(key), true, /* tight */
)
spanExpr.Unique = true
if invertedExpr == nil {
invertedExpr = spanExpr
} else {
invertedExpr = inverted.And(invertedExpr, spanExpr)
}
}
return invertedExpr, nil
}
// encodeContainedArrayInvertedIndexSpans returns the spans that must be
// scanned in the inverted index to evaluate a contained by (<@) predicate with
// the given array, one slice of spans per entry in the array. The input
// inKey is prefixed to all returned keys.
func encodeContainedArrayInvertedIndexSpans(
val *tree.DArray, inKey []byte,
) (invertedExpr inverted.Expression, err error) {
// The empty array should always be added to the spans, since it is contained
// by everything.
emptyArrSpanExpr := inverted.ExprForSpan(
inverted.MakeSingleValSpan(encoding.EncodeEmptyArray(inKey)), false, /* tight */
)
emptyArrSpanExpr.Unique = true
// If the given array is empty, we return the SpanExpression.
if val.Array.Len() == 0 {
return emptyArrSpanExpr, nil
}
// We always exclude nulls from the list of keys when evaluating <@.
// This is because an expression like ARRAY[NULL] <@ ARRAY[NULL] is false,
// since NULL in SQL represents an unknown value.
keys, err := encodeArrayInvertedIndexTableKeys(val, inKey, descpb.LatestIndexDescriptorVersion, true /* excludeNulls */)
if err != nil {
return nil, err
}
invertedExpr = emptyArrSpanExpr
for _, key := range keys {
spanExpr := inverted.ExprForSpan(
inverted.MakeSingleValSpan(key), false, /* tight */
)
invertedExpr = inverted.Or(invertedExpr, spanExpr)
}
// The inverted expression produced for <@ will never be tight.
// For example, if we are evaluating if indexed column x <@ ARRAY[1], the
// inverted expression would scan for all arrays in x that contain the
// empty array or ARRAY[1]. The resulting arrays could contain other values
// and would need to be passed through an additional filter. For example,
// ARRAY[1, 2, 3] would be returned by the scan, but it should be filtered
// out since ARRAY[1, 2, 3] <@ ARRAY[1] is false.
invertedExpr.SetNotTight()
return invertedExpr, nil
}
// encodeOverlapsArrayInvertedIndexSpans returns the spans that must be
// scanned in the inverted index to evaluate an overlaps (&&) predicate with
// the given array, one slice of spans per entry in the array. The input
// inKey is prefixed to all returned keys.
func encodeOverlapsArrayInvertedIndexSpans(
val *tree.DArray, inKey []byte,
) (invertedExpr inverted.Expression, err error) {
// If the given array is directly empty (i.e. Len == 0),
// or contains only NULLs and thus has effective length 0,
// we cannot generate an inverted expression.
// TODO: This should be a contradiction which is treated as a no-op.
if val.Array.Len() == 0 || !val.HasNonNulls {
return inverted.NonInvertedColExpression{}, nil
}
// We always exclude nulls from the list of keys when evaluating &&.
// This is because an expression like ARRAY[NULL] && ARRAY[NULL] is false,
// since NULL in SQL represents an unknown value.
keys, err := encodeArrayInvertedIndexTableKeys(val, inKey, descpb.PrimaryIndexWithStoredColumnsVersion, true /* excludeNulls */)
if err != nil {
return nil, err
}
for _, key := range keys {
spanExpr := inverted.ExprForSpan(
inverted.MakeSingleValSpan(key), true, /* tight */
)
spanExpr.Unique = true
if invertedExpr == nil {
invertedExpr = spanExpr
} else {
invertedExpr = inverted.Or(invertedExpr, spanExpr)
}
}
return invertedExpr, nil
}
// EncodeTrigramSpans returns the spans that must be scanned to look up trigrams
// present in the input string. If allMustMatch is true, the resultant inverted
// expression must match every trigram in the input. Otherwise, it will match
// any trigram in the input.
func EncodeTrigramSpans(s string, allMustMatch bool) (inverted.Expression, error) {
// We do not pad the trigrams when allMustMatch is true. To see why, observe
// the keys that we insert for a string "zfooz":
//
// " z", " zf", "zfo", "foo", "foz", "oz "
//
// If we were then searching for the string %foo%, and we padded the output
// keys as well, we'd be searching for the key " f", which doesn't exist
// in the index for zfooz, even though zfooz is like %foo%.
keys, err := encodeTrigramInvertedIndexTableKeys(s, nil, /* inKey */
descpb.LatestIndexDescriptorVersion, !allMustMatch /* pad */)
if err != nil {
return nil, err
}
if len(keys) == 0 {
return nil, errors.New("no trigrams available to search with")
}
var ret inverted.Expression
for _, key := range keys {
spanExpr := inverted.ExprForSpan(inverted.MakeSingleValSpan(key), false /* tight */)
if ret == nil {
// The first trigram (and only the first trigram) is unique.
// TODO(jordan): we *could* make this first expression tight if we knew
// for sure that the expression is something like `LIKE '%foo%'`. In this
// case, we're sure that the returned row will pass the predicate because
// the LIKE operator has wildcards on either side of the trigram. But
// this is such a marginal case that it doesn't seem worth it to plumb
// in this special case. For all other single-trigram cases, such as
// `LIKE '%foo'` or `= 'foo'`, we don't have a tight span.
spanExpr.Unique = true
ret = spanExpr
} else {
// As soon as we have more than one trigram to search for, we no longer
// have a unique expression, since two separate trigrams could both
// point at a single row. We also no longer have a tight expression,
// because the trigrams that we're checking don't necessarily have to
// be in the right order within the string to guarantee that just because
// both trigrams match, the strings pass the LIKE or % test.
if allMustMatch {
ret = inverted.And(ret, spanExpr)
} else {
ret = inverted.Or(ret, spanExpr)
}
}
}
return ret, nil
}
// EncodeGeoInvertedIndexTableKeys is the equivalent of EncodeInvertedIndexTableKeys
// for Geography and Geometry.
func EncodeGeoInvertedIndexTableKeys(
val tree.Datum, inKey []byte, indexGeoConfig geoindex.Config,
) (key [][]byte, err error) {
if val == tree.DNull {
return nil, nil
}
switch val.ResolvedType().Family() {
case types.GeographyFamily:
index := geoindex.NewS2GeographyIndex(*indexGeoConfig.S2Geography)
intKeys, bbox, err := index.InvertedIndexKeys(context.TODO(), val.(*tree.DGeography).Geography)
if err != nil {
return nil, err
}
return encodeGeoKeys(encoding.EncodeGeoInvertedAscending(inKey), intKeys, bbox)
case types.GeometryFamily:
index := geoindex.NewS2GeometryIndex(*indexGeoConfig.S2Geometry)
intKeys, bbox, err := index.InvertedIndexKeys(context.TODO(), val.(*tree.DGeometry).Geometry)
if err != nil {
return nil, err
}
return encodeGeoKeys(encoding.EncodeGeoInvertedAscending(inKey), intKeys, bbox)
default:
return nil, errors.Errorf("internal error: unexpected type: %s", val.ResolvedType().Family())
}
}
func encodeGeoKeys(
inKey []byte, geoKeys []geoindex.Key, bbox geopb.BoundingBox,
) (keys [][]byte, err error) {
encodedBBox := make([]byte, 0, encoding.MaxGeoInvertedBBoxLen)