-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
sqltranslator.go
519 lines (477 loc) · 18.8 KB
/
sqltranslator.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
// Copyright 2021 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
// Package spanconfigsqltranslator provides logic to translate sql descriptors
// and their corresponding zone configurations to constituent spans and span
// configurations.
package spanconfigsqltranslator
import (
"context"
"github.com/cockroachdb/cockroach/pkg/config/zonepb"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/spanconfig"
"github.com/cockroachdb/cockroach/pkg/sql"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descs"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/errors"
)
// SQLTranslator implements the spanconfig.SQLTranslator interface.
var _ spanconfig.SQLTranslator = &SQLTranslator{}
// SQLTranslator is the concrete implementation of spanconfig.SQLTranslator.
type SQLTranslator struct {
execCfg *sql.ExecutorConfig
codec keys.SQLCodec
knobs *spanconfig.TestingKnobs
}
// New constructs and returns a SQLTranslator.
func New(
execCfg *sql.ExecutorConfig, codec keys.SQLCodec, knobs *spanconfig.TestingKnobs,
) *SQLTranslator {
if knobs == nil {
knobs = &spanconfig.TestingKnobs{}
}
return &SQLTranslator{
execCfg: execCfg,
codec: codec,
knobs: knobs,
}
}
// Translate is part of the spanconfig.SQLTranslator interface.
func (s *SQLTranslator) Translate(
ctx context.Context, ids descpb.IDs,
) ([]roachpb.SpanConfigEntry, hlc.Timestamp, error) {
var entries []roachpb.SpanConfigEntry
// txn used to translate the IDs, so that we can get its commit timestamp
// later.
var translateTxn *kv.Txn
if err := sql.DescsTxn(ctx, s.execCfg, func(
ctx context.Context, txn *kv.Txn, descsCol *descs.Collection,
) error {
// We're in a retryable closure, so clear any entries from previous
// attempts.
entries = entries[:0]
// For every ID we want to translate, first expand it to descendant leaf
// IDs that have span configurations associated for them. We also
// de-duplicate leaf IDs to not generate redundant entries.
seen := make(map[descpb.ID]struct{})
addedPseudoTableSpans := false
var leafIDs descpb.IDs
for _, id := range ids {
descendantLeafIDs, err := s.findDescendantLeafIDs(ctx, id, txn, descsCol)
if err != nil {
return err
}
for _, descendantLeafID := range descendantLeafIDs {
if _, found := seen[descendantLeafID]; !found {
seen[descendantLeafID] = struct{}{}
leafIDs = append(leafIDs, descendantLeafID)
}
}
if (id == keys.SystemDatabaseID || id == keys.RootNamespaceID) && s.codec.ForSystemTenant() {
// We have special handling for the system database (and RANGE
// DEFAULT, which the system database inherits from). The system
// config span infrastructure generates splits along (empty)
// pseudo table boundaries[1] -- we do the same. Not doing so is
// safe, but this helps reduce the differences between the two
// subsystems which has practical implications for our bootstrap
// code and tests that bake in assumptions about these splits.
// While the two systems exist side-by-side, it's easier to just
// minimize these differences (it also removes the tiny
// re-splitting costs when switching between them). We can get
// rid of this special handling once the system config span is
// removed (#70560).
//
// [2]: Consider the liveness range [/System/NodeLiveness,
// /System/NodeLivenessMax). It's identified using the
// pseudo ID 22 (i.e. keys.LivenessRangesID). Because we're
// using a pseudo ID, what of [/Table/22-/Table/23)? This
// is a keyspan with no contents, yet one the system config
// span splits along to create an empty range. It's
// precisely this "feature" we're looking to emulate. As
// for what config to apply over said range -- we do as the
// system config span does, applying the config for the
// system database.
if addedPseudoTableSpans {
continue // nothing to do
}
for _, pseudoTableID := range keys.PseudoTableIDs {
zone, err := sql.GetHydratedZoneConfigForDatabase(ctx, txn, s.codec, keys.SystemDatabaseID)
if err != nil {
return err
}
tableStartKey := s.codec.TablePrefix(pseudoTableID)
tableEndKey := tableStartKey.PrefixEnd()
tableSpanConfig := zone.AsSpanConfig()
entries = append(entries, roachpb.SpanConfigEntry{
Span: roachpb.Span{
Key: tableStartKey,
EndKey: tableEndKey,
},
Config: tableSpanConfig,
})
}
addedPseudoTableSpans = true
}
}
// For every unique leaf ID, generate span configurations.
for _, leafID := range leafIDs {
translatedEntries, err := s.generateSpanConfigurations(ctx, leafID, txn, descsCol)
if err != nil {
return err
}
entries = append(entries, translatedEntries...)
}
translateTxn = txn
return nil
}); err != nil {
return nil, hlc.Timestamp{}, err
}
return entries, translateTxn.CommitTimestamp(), nil
}
// descLookupFlags is the set of look up flags used when fetching descriptors.
var descLookupFlags = tree.CommonLookupFlags{
// We act on errors being surfaced when the descriptor being looked up is
// not found.
Required: true,
// We can (do) generate span configurations for dropped and offline tables.
IncludeDropped: true,
IncludeOffline: true,
// We want consistent reads.
AvoidLeased: true,
}
// generateSpanConfigurations generates the span configurations for the given
// ID. The ID must belong to an object that has a span configuration associated
// with it, i.e, it should either belong to a table or a named zone.
func (s *SQLTranslator) generateSpanConfigurations(
ctx context.Context, id descpb.ID, txn *kv.Txn, descsCol *descs.Collection,
) (entries []roachpb.SpanConfigEntry, err error) {
if zonepb.IsNamedZoneID(id) {
return s.generateSpanConfigurationsForNamedZone(ctx, txn, id)
}
// We're dealing with a SQL object.
desc, err := descsCol.GetImmutableDescriptorByID(ctx, txn, id, descLookupFlags)
if err != nil {
if errors.Is(err, catalog.ErrDescriptorNotFound) {
return nil, nil // the descriptor has been deleted; nothing to do here
}
return nil, err
}
if s.knobs.ExcludeDroppedDescriptorsFromLookup && desc.Dropped() {
return nil, nil // we're excluding this descriptor; nothing to do here
}
if desc.DescriptorType() != catalog.Table {
return nil, errors.AssertionFailedf(
"can only generate span configurations for tables, but got %s", desc.DescriptorType(),
)
}
return s.generateSpanConfigurationsForTable(ctx, txn, desc)
}
// generateSpanConfigurationsForNamedZone expects an ID corresponding to a named
// zone and generates the span configurations for it.
func (s *SQLTranslator) generateSpanConfigurationsForNamedZone(
ctx context.Context, txn *kv.Txn, id descpb.ID,
) ([]roachpb.SpanConfigEntry, error) {
name, ok := zonepb.NamedZonesByID[uint32(id)]
if !ok {
return nil, errors.AssertionFailedf("id %d does not belong to a named zone", id)
}
// Named zones other than RANGE DEFAULT are not a thing for secondary tenants.
if !s.codec.ForSystemTenant() && name != zonepb.DefaultZoneName {
return nil,
errors.AssertionFailedf("secondary tenants do not have the notion of %s named zone", name)
}
var spans []roachpb.Span
switch name {
case zonepb.DefaultZoneName: // nothing to do.
case zonepb.MetaZoneName:
spans = append(spans, roachpb.Span{Key: keys.Meta1Span.Key, EndKey: keys.NodeLivenessSpan.Key})
case zonepb.LivenessZoneName:
spans = append(spans, keys.NodeLivenessSpan)
case zonepb.TimeseriesZoneName:
spans = append(spans, keys.TimeseriesSpan)
case zonepb.SystemZoneName:
// Add spans for the system range without the timeseries and
// liveness ranges, which are individually captured above.
//
// Note that the NodeLivenessSpan sorts before the rest of the system
// keyspace, so the first span here starts at the end of the
// NodeLivenessSpan.
spans = append(spans, roachpb.Span{
Key: keys.NodeLivenessSpan.EndKey,
EndKey: keys.TimeseriesSpan.Key,
})
spans = append(spans, roachpb.Span{
Key: keys.TimeseriesSpan.EndKey,
EndKey: keys.SystemMax,
})
case zonepb.TenantsZoneName: // nothing to do.
default:
return nil, errors.AssertionFailedf("unknown named zone config %s", name)
}
zoneConfig, err := sql.GetHydratedZoneConfigForNamedZone(ctx, txn, s.codec, name)
if err != nil {
return nil, err
}
spanConfig := zoneConfig.AsSpanConfig()
var entries []roachpb.SpanConfigEntry
for _, span := range spans {
entries = append(entries, roachpb.SpanConfigEntry{
Span: span,
Config: spanConfig,
})
}
return entries, nil
}
// generateSpanConfigurationsForTable generates the span configurations
// corresponding to the given tableID. It uses a transactional view of
// system.zones and system.descriptors to do so.
func (s *SQLTranslator) generateSpanConfigurationsForTable(
ctx context.Context, txn *kv.Txn, desc catalog.Descriptor,
) ([]roachpb.SpanConfigEntry, error) {
if desc.DescriptorType() != catalog.Table {
return nil, errors.AssertionFailedf(
"expected table descriptor, but got descriptor of type %s", desc.DescriptorType(),
)
}
zone, err := sql.GetHydratedZoneConfigForTable(ctx, txn, s.codec, desc.GetID())
if err != nil {
return nil, err
}
tableStartKey := s.codec.TablePrefix(uint32(desc.GetID()))
tableEndKey := tableStartKey.PrefixEnd()
tableSpanConfig := zone.AsSpanConfig()
entries := make([]roachpb.SpanConfigEntry, 0)
if desc.GetID() == keys.DescriptorTableID {
// We have some special handling for `system.descriptor` on account of
// it being the first non-empty table in every tenant's keyspace.
if !s.codec.ForSystemTenant() {
// We start the span at the tenant prefix. This effectively installs
// the tenant's split boundary at /Tenant/<id> instead of
// /Tenant/<id>/Table/3. This doesn't really make a difference given
// there's no data within [/Tenant/<id>/ - /Tenant/<id>/Table/3),
// but looking at range boundaries, it's slightly less confusing
// this way.
entries = append(entries, roachpb.SpanConfigEntry{
Span: roachpb.Span{
Key: s.codec.TenantPrefix(),
EndKey: tableEndKey,
},
Config: tableSpanConfig,
})
} else {
// The same as above, except we have named ranges preceding
// `system.descriptor`. Not doing anything special here would mean
// splitting on /Table/3 instead of /Table/0 (pretty printed as
// /Table/SystemConfigSpan/Start), which is benign since there's no
// data under /Table/{0-2}. Still, doing it this way reduces the
// differences between the gossip-backed subsystem and this one --
// somewhat useful for understandability reasons and reducing the
// (tiny) re-splitting costs when switching between the two
// subsystems.
entries = append(entries, roachpb.SpanConfigEntry{
Span: roachpb.Span{
Key: keys.SystemConfigSpan.Key,
EndKey: tableEndKey,
},
Config: tableSpanConfig,
})
}
return entries, nil
// TODO(irfansharif): There's an attack vector here that we haven't
// addressed satisfactorily. By splitting only on start keys of span
// configs, a malicious tenant could augment their reconciliation
// process to install configs starting much later in their addressable
// keyspace. This could induce KV to consider a range boundary that
// starts at the previous tenant's keyspace (albeit the tail end of it)
// and ending within the malicious tenant's one -- that's no good. We
// could do two things:
// (i) Have each tenant install a span config that demarcates the end of
// its keyspace (in our example the previous tenant would defensively
// prevent leaking its user data through this hard boundary);
// (ii) Have KV enforce these hard boundaries in with keyspans that
// straddle tenant keyspaces.
//
// Doing (ii) feels saner, and we already do something similar when
// seeding `system.span_configurations` for newly created tenants. We
// could have secondary tenants still govern span configurations over
// their keyspace, but we'd always split on the tenant boundary. For
// malicious tenants, the config we'd apply over that range would be the
// fallback one KV already uses for missing spans. For non-malicious
// tenants, it could either be the config for (i) `system.descriptor` as
// done above, or (ii) whatever the tenant's RANGE DEFAULT is.
//
// See #73749.
}
prevEndKey := tableStartKey
for i := range zone.SubzoneSpans {
// We need to prepend the tablePrefix to the spans stored inside the
// SubzoneSpans field because we store the stripped version there for
// historical reasons.
//
// NB: Re-using tableStartKey/prevEndKey here, or pulling out a
// variable, would be buggy -- the underlying buffer gets mutated by the
// append, throwing everything else off below.
span := roachpb.Span{
Key: append(s.codec.TablePrefix(uint32(desc.GetID())), zone.SubzoneSpans[i].Key...),
EndKey: append(s.codec.TablePrefix(uint32(desc.GetID())), zone.SubzoneSpans[i].EndKey...),
}
{
// The zone config code sets the EndKey to be nil before storing the
// proto if it is equal to `Key.PrefixEnd()`, so we bring it back if
// required.
if zone.SubzoneSpans[i].EndKey == nil {
span.EndKey = span.Key.PrefixEnd()
}
}
// If there is a "hole" in the spans covered by the subzones array we fill
// it using the parent zone configuration.
if !prevEndKey.Equal(span.Key) {
entries = append(entries,
roachpb.SpanConfigEntry{
Span: roachpb.Span{Key: prevEndKey, EndKey: span.Key},
Config: tableSpanConfig,
},
)
}
// Add an entry for the subzone.
subzoneSpanConfig := zone.Subzones[zone.SubzoneSpans[i].SubzoneIndex].Config.AsSpanConfig()
entries = append(entries,
roachpb.SpanConfigEntry{
Span: roachpb.Span{Key: span.Key, EndKey: span.EndKey},
Config: subzoneSpanConfig,
},
)
prevEndKey = span.EndKey
}
// If the last subzone span doesn't cover the entire table's keyspace then
// we cover the remaining key range with the table's zone configuration.
if !prevEndKey.Equal(tableEndKey) {
entries = append(entries,
roachpb.SpanConfigEntry{
Span: roachpb.Span{Key: prevEndKey, EndKey: tableEndKey},
Config: tableSpanConfig,
},
)
}
return entries, nil
}
// findDescendantLeafIDs finds all leaf IDs below the given ID in the zone
// configuration hierarchy. Leaf IDs are either table IDs or named zone IDs
// (other than RANGE DEFAULT).
func (s *SQLTranslator) findDescendantLeafIDs(
ctx context.Context, id descpb.ID, txn *kv.Txn, descsCol *descs.Collection,
) (descpb.IDs, error) {
if zonepb.IsNamedZoneID(id) {
return s.findDescendantLeafIDsForNamedZone(ctx, id, txn, descsCol)
}
// We're dealing with a SQL Object here.
return s.findDescendantLeafIDsForDescriptor(ctx, id, txn, descsCol)
}
// findDescendantLeafIDsForDescriptor finds all leaf object IDs below the given
// descriptor ID in the zone configuration hierarchy. Based on the descriptor
// type, these are:
// - Database: IDs of all tables inside the database.
// - Table: ID of the table itself.
// - Schema/Type: Nothing, as schemas/types do not carry zone configurations and
// are not part of the zone configuration hierarchy.
func (s *SQLTranslator) findDescendantLeafIDsForDescriptor(
ctx context.Context, id descpb.ID, txn *kv.Txn, descsCol *descs.Collection,
) (descpb.IDs, error) {
desc, err := descsCol.GetImmutableDescriptorByID(ctx, txn, id, descLookupFlags)
if err != nil {
if errors.Is(err, catalog.ErrDescriptorNotFound) {
return nil, nil // the descriptor has been deleted; nothing to do here
}
return nil, err
}
if s.knobs.ExcludeDroppedDescriptorsFromLookup && desc.Dropped() {
return nil, nil // we're excluding this descriptor; nothing to do here
}
switch desc.DescriptorType() {
case catalog.Type, catalog.Schema:
// There is nothing to do for {Type, Schema} descriptors as they are not
// part of the zone configuration hierarchy.
return nil, nil
case catalog.Table:
// Tables are leaf objects in the zone configuration hierarchy, so simply
// return the ID.
return descpb.IDs{id}, nil
case catalog.Database:
// Fallthrough.
default:
return nil, errors.AssertionFailedf("unknown descriptor type: %s", desc.DescriptorType())
}
// There's nothing for us to do if the descriptor is offline or has been
// dropped.
if desc.Offline() || desc.Dropped() {
return nil, nil
}
// Expand the database descriptor to all the tables inside it and return their
// IDs.
tables, err := descsCol.GetAllTableDescriptorsInDatabase(ctx, txn, desc.GetID())
if err != nil {
return nil, err
}
ret := make(descpb.IDs, 0, len(tables))
for _, table := range tables {
ret = append(ret, table.GetID())
}
return ret, nil
}
// findDescendantLeafIDsForNamedZone finds all leaf IDs below the given named
// zone ID in the zone configuration hierarchy.
// Depending on the named zone, these are:
// - RANGE DEFAULT: All tables (and named zones iff system tenant).
// - Any other named zone: ID of the named zone itself.
func (s *SQLTranslator) findDescendantLeafIDsForNamedZone(
ctx context.Context, id descpb.ID, txn *kv.Txn, descsCol *descs.Collection,
) (descpb.IDs, error) {
name, ok := zonepb.NamedZonesByID[uint32(id)]
if !ok {
return nil, errors.AssertionFailedf("id %d does not belong to a named zone", id)
}
if name != zonepb.DefaultZoneName {
// No IDs lie below named zones other than RANGE DEFAULT in the zone config
// hierarchy, so simply return the named zone ID.
return descpb.IDs{id}, nil
}
// A change to RANGE DEFAULT translates to every SQL object of the tenant.
databases, err := descsCol.GetAllDatabaseDescriptors(ctx, txn)
if err != nil {
return nil, err
}
var descendantIDs descpb.IDs
for _, dbDesc := range databases {
tableIDs, err := s.findDescendantLeafIDsForDescriptor(
ctx, dbDesc.GetID(), txn, descsCol,
)
if err != nil {
return nil, err
}
descendantIDs = append(descendantIDs, tableIDs...)
}
// All named zones (other than RANGE DEFAULT itself, ofcourse) inherit from
// RANGE DEFAULT.
// NB: Only the system tenant has named zones other than RANGE DEFAULT.
if s.codec.ForSystemTenant() {
for _, namedZone := range zonepb.NamedZonesList {
// Add an entry for all named zones bar RANGE DEFAULT.
if namedZone == zonepb.DefaultZoneName {
continue
}
descendantIDs = append(descendantIDs, descpb.ID(zonepb.NamedZones[namedZone]))
}
}
return descendantIDs, nil
}