-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
processor.go
919 lines (826 loc) · 29.6 KB
/
processor.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
// Copyright 2018 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package rangefeed
import (
"context"
"fmt"
"sync"
"time"
"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/future"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/errors"
)
const (
// defaultPushTxnsInterval is the default interval at which a Processor will
// push all transactions in the unresolvedIntentQueue that are above the age
// specified by PushTxnsAge.
defaultPushTxnsInterval = 250 * time.Millisecond
// defaultPushTxnsAge is the default age at which a Processor will begin to
// consider a transaction old enough to push.
defaultPushTxnsAge = 10 * time.Second
)
// newErrBufferCapacityExceeded creates an error that is returned to subscribers
// if the rangefeed processor is not able to keep up with the flow of incoming
// events and is forced to drop events in order to not block.
func newErrBufferCapacityExceeded() *kvpb.Error {
return kvpb.NewError(
kvpb.NewRangeFeedRetryError(kvpb.RangeFeedRetryError_REASON_SLOW_CONSUMER),
)
}
// Config encompasses the configuration required to create a Processor.
type Config struct {
log.AmbientContext
Clock *hlc.Clock
RangeID roachpb.RangeID
Span roachpb.RSpan
TxnPusher TxnPusher
// PushTxnsInterval specifies the interval at which a Processor will push
// all transactions in the unresolvedIntentQueue that are above the age
// specified by PushTxnsAge.
PushTxnsInterval time.Duration
// PushTxnsAge specifies the age at which a Processor will begin to consider
// a transaction old enough to push.
PushTxnsAge time.Duration
// EventChanCap specifies the capacity to give to the Processor's input
// channel.
EventChanCap int
// EventChanTimeout specifies the maximum time to wait when sending on the
// Processor's input channel before giving up and shutting down the Processor.
// 0 disables the timeout, backpressuring writers up through Raft (for tests).
EventChanTimeout time.Duration
// Metrics is for production monitoring of RangeFeeds.
Metrics *Metrics
// Optional Processor memory budget.
MemBudget *FeedBudget
}
// SetDefaults initializes unset fields in Config to values
// suitable for use by a Processor.
func (sc *Config) SetDefaults() {
if sc.TxnPusher == nil {
if sc.PushTxnsInterval != 0 {
panic("nil TxnPusher with non-zero PushTxnsInterval")
}
if sc.PushTxnsAge != 0 {
panic("nil TxnPusher with non-zero PushTxnsAge")
}
} else {
if sc.PushTxnsInterval == 0 {
sc.PushTxnsInterval = defaultPushTxnsInterval
}
if sc.PushTxnsAge == 0 {
sc.PushTxnsAge = defaultPushTxnsAge
}
}
}
// Processor manages a set of rangefeed registrations and handles the routing of
// logical updates to these registrations. While routing logical updates to
// rangefeed registrations, the processor performs two important tasks:
// 1. it translates logical updates into rangefeed events.
// 2. it transforms a range-level closed timestamp to a rangefeed-level resolved
// timestamp.
type Processor interface {
// Lifecycle of processor.
// Start processor will start internal tasks and background initializations.
// It is ok to start registering streams before background initialization
// completes.
//
// The provided iterator is used to initialize the rangefeed's resolved
// timestamp. It must obey the contract of an iterator used for an
// initResolvedTSScan. The Processor promises to clean up the iterator by
// calling its Close method when it is finished.
//
// Note that newRtsIter must be called under the same lock as first
// registration to ensure that all there would be no missing events.
// This is currently achieved by Register function synchronizing with
// the work loop before the lock is released.
//
// If the iterator is nil then no initialization scan will be performed and
// the resolved timestamp will immediately be considered initialized.
Start(stopper *stop.Stopper, newRtsIter IntentScannerConstructor) error
// Stop processor and close all registrations.
//
// It is meant to be called by replica when it finds that all streams were
// stopped before removing references to the processor.
//
// It is not valid to restart a processor after it has been stopped.
Stop()
// StopWithErr terminates all registrations with an error and then winds down
// any internal processor resources.
//
// It is not valid to restart a processor after it has been stopped.
StopWithErr(pErr *kvpb.Error)
// Lifecycle of registrations.
// Register registers the stream over the specified span of keys.
//
// The registration will not observe any events that were consumed before this
// method was called. It is undefined whether the registration will observe
// events that are consumed concurrently with this call. The channel will be
// provided an error when the registration closes.
//
// The optionally provided "catch-up" iterator is used to read changes from the
// engine which occurred after the provided start timestamp (exclusive).
//
// If the method returns false, the processor will have been stopped, so calling
// Stop is not necessary. If the method returns true, it will also return an
// updated operation filter that includes the operations required by the new
// registration.
//
// NB: startTS is exclusive; the first possible event will be at startTS.Next().
Register(
span roachpb.RSpan,
startTS hlc.Timestamp, // exclusive
catchUpIterConstructor CatchUpIteratorConstructor,
withDiff bool,
stream Stream,
disconnectFn func(),
done *future.ErrorFuture,
) (bool, *Filter)
// DisconnectSpanWithErr disconnects all rangefeed registrations that overlap
// the given span with the given error.
DisconnectSpanWithErr(span roachpb.Span, pErr *kvpb.Error)
// Filter returns a new operation filter based on the registrations attached to
// the processor. Returns nil if the processor has been stopped already.
Filter() *Filter
// Len returns the number of registrations attached to the processor.
Len() int
// Data flow.
// ConsumeLogicalOps informs the rangefeed processor of the set of logical
// operations. It returns false if consuming the operations hit a timeout, as
// specified by the EventChanTimeout configuration. If the method returns false,
// the processor will have been stopped, so calling Stop is not necessary.
ConsumeLogicalOps(ctx context.Context, ops ...enginepb.MVCCLogicalOp) bool
// ConsumeSSTable informs the rangefeed processor of an SSTable that was added
// via AddSSTable. It returns false if consuming the SSTable hit a timeout, as
// specified by the EventChanTimeout configuration. If the method returns false,
// the processor will have been stopped, so calling Stop is not necessary.
ConsumeSSTable(
ctx context.Context, sst []byte, sstSpan roachpb.Span, writeTS hlc.Timestamp,
) bool
// ForwardClosedTS indicates that the closed timestamp that serves as the basis
// for the rangefeed processor's resolved timestamp has advanced. It returns
// false if forwarding the closed timestamp hit a timeout, as specified by the
// EventChanTimeout configuration. If the method returns false, the processor
// will have been stopped, so calling Stop is not necessary.
ForwardClosedTS(ctx context.Context, closedTS hlc.Timestamp) bool
}
type LegacyProcessor struct {
Config
reg registry
rts resolvedTimestamp
regC chan registration
unregC chan *registration
lenReqC chan struct{}
lenResC chan int
filterReqC chan struct{}
filterResC chan *Filter
eventC chan *event
spanErrC chan spanErr
stopC chan *kvpb.Error
stoppedC chan struct{}
}
var eventSyncPool = sync.Pool{
New: func() interface{} {
return new(event)
},
}
func getPooledEvent(ev event) *event {
e := eventSyncPool.Get().(*event)
*e = ev
return e
}
func putPooledEvent(ev *event) {
*ev = event{}
eventSyncPool.Put(ev)
}
// event is a union of different event types that the Processor goroutine needs
// to be informed of. It is used so that all events can be sent over the same
// channel, which is necessary to prevent reordering.
type event struct {
// Event variants. Only one set.
ops opsEvent
ct ctEvent
initRTS initRTSEvent
sst *sstEvent
sync *syncEvent
// Budget allocated to process the event.
alloc *SharedBudgetAllocation
}
type opsEvent []enginepb.MVCCLogicalOp
type ctEvent struct {
hlc.Timestamp
}
type initRTSEvent bool
type sstEvent struct {
data []byte
span roachpb.Span
ts hlc.Timestamp
}
type syncEvent struct {
c chan struct{}
// This setting is used in conjunction with c in tests in order to ensure that
// all registrations have fully finished outputting their buffers. This has to
// be done by the processor in order to avoid race conditions with the
// registry. Should be used only in tests.
testRegCatchupSpan *roachpb.Span
}
// spanErr is an error across a key span that will disconnect overlapping
// registrations.
type spanErr struct {
span roachpb.Span
pErr *kvpb.Error
}
// NewProcessor creates a new rangefeed Processor. The corresponding goroutine
// should be launched using the Start method.
func NewProcessor(cfg Config) Processor {
cfg.SetDefaults()
cfg.AmbientContext.AddLogTag("rangefeed", nil)
p := &LegacyProcessor{
Config: cfg,
reg: makeRegistry(cfg.Metrics),
rts: makeResolvedTimestamp(),
regC: make(chan registration),
unregC: make(chan *registration),
lenReqC: make(chan struct{}),
lenResC: make(chan int),
filterReqC: make(chan struct{}),
filterResC: make(chan *Filter),
eventC: make(chan *event, cfg.EventChanCap),
spanErrC: make(chan spanErr),
stopC: make(chan *kvpb.Error, 1),
stoppedC: make(chan struct{}),
}
return p
}
// IntentScannerConstructor is used to construct an IntentScanner. It
// should be called from underneath a stopper task to ensure that the
// engine has not been closed.
type IntentScannerConstructor func() IntentScanner
// CatchUpIteratorConstructor is used to construct an iterator that can be used
// for catchup-scans. Takes the key span and exclusive start time to run the
// catchup scan for. It should be called from underneath a stopper task to
// ensure that the engine has not been closed.
type CatchUpIteratorConstructor func(roachpb.Span, hlc.Timestamp) (*CatchUpIterator, error)
// Start implements Processor interface.
//
// LegacyProcessor launches a goroutine to process rangefeed events and send
// them to registrations.
//
// Note that to fulfill newRtsIter contract, LegacyProcessor will create
// iterator at the start of its work loop prior to firing async task.
func (p *LegacyProcessor) Start(stopper *stop.Stopper, newRtsIter IntentScannerConstructor) error {
ctx := p.AnnotateCtx(context.Background())
if err := stopper.RunAsyncTask(ctx, "rangefeed.LegacyProcessor", func(ctx context.Context) {
p.run(ctx, p.RangeID, newRtsIter, stopper)
}); err != nil {
p.reg.DisconnectWithErr(all, kvpb.NewError(err))
close(p.stoppedC)
return err
}
return nil
}
// run is called from Start and runs the rangefeed.
func (p *LegacyProcessor) run(
ctx context.Context,
_forStacks roachpb.RangeID,
rtsIterFunc IntentScannerConstructor,
stopper *stop.Stopper,
) {
// Close the memory budget last, or there will be a period of time during
// which requests are still ongoing but will run into the closed budget,
// causing shutdown noise and busy retries.
// Closing the budget after stoppedC ensures that all other goroutines are
// (very close to being) shut down by the time the budget goes away.
defer p.MemBudget.Close(ctx)
defer close(p.stoppedC)
ctx, cancelOutputLoops := context.WithCancel(ctx)
defer cancelOutputLoops()
// Launch an async task to scan over the resolved timestamp iterator and
// initialize the unresolvedIntentQueue. Ignore error if quiescing.
if rtsIterFunc != nil {
rtsIter := rtsIterFunc()
initScan := newInitResolvedTSScan(p.Span, p, rtsIter)
err := stopper.RunAsyncTask(ctx, "rangefeed: init resolved ts", initScan.Run)
if err != nil {
initScan.Cancel()
}
} else {
p.initResolvedTS(ctx)
}
// txnPushTicker periodically pushes the transaction record of all
// unresolved intents that are above a certain age, helping to ensure
// that the resolved timestamp continues to make progress.
var txnPushTicker *time.Ticker
var txnPushTickerC <-chan time.Time
var txnPushAttemptC chan struct{}
if p.PushTxnsInterval > 0 {
txnPushTicker = time.NewTicker(p.PushTxnsInterval)
txnPushTickerC = txnPushTicker.C
defer txnPushTicker.Stop()
}
for {
select {
// Handle new registrations.
case r := <-p.regC:
if !p.Span.AsRawSpanWithNoLocals().Contains(r.span) {
log.Fatalf(ctx, "registration %s not in Processor's key range %v", r, p.Span)
}
// Construct the catchUpIter before notifying the registration that it
// has been registered. Note that if the catchUpScan is never run, then
// the iterator constructed here will be closed in disconnect.
if err := r.maybeConstructCatchUpIter(); err != nil {
r.disconnect(kvpb.NewError(err))
return
}
// Add the new registration to the registry.
p.reg.Register(&r)
// Publish an updated filter that includes the new registration.
p.filterResC <- p.reg.NewFilter()
// Immediately publish a checkpoint event to the registry. This will be the first event
// published to this registration after its initial catch-up scan completes. The resolved
// timestamp might be empty but the checkpoint event is still useful to indicate that the
// catch-up scan has completed. This allows clients to rely on stronger ordering semantics
// once they observe the first checkpoint event.
r.publish(ctx, p.newCheckpointEvent(), nil)
// Run an output loop for the registry.
runOutputLoop := func(ctx context.Context) {
r.runOutputLoop(ctx, p.RangeID)
select {
case p.unregC <- &r:
if r.unreg != nil {
r.unreg()
}
case <-p.stoppedC:
}
}
if err := stopper.RunAsyncTask(ctx, "rangefeed: output loop", runOutputLoop); err != nil {
r.disconnect(kvpb.NewError(err))
p.reg.Unregister(ctx, &r)
}
// Respond to unregistration requests; these come from registrations that
// encounter an error during their output loop.
case r := <-p.unregC:
p.reg.Unregister(ctx, r)
// Send errors to registrations overlapping the span and disconnect them.
// Requested via DisconnectSpanWithErr().
case e := <-p.spanErrC:
p.reg.DisconnectWithErr(e.span, e.pErr)
// Respond to answers about the processor goroutine state.
case <-p.lenReqC:
p.lenResC <- p.reg.Len()
// Respond to answers about which operations can be filtered before
// reaching the Processor.
case <-p.filterReqC:
p.filterResC <- p.reg.NewFilter()
// Transform and route events.
case e := <-p.eventC:
p.consumeEvent(ctx, e)
e.alloc.Release(ctx)
putPooledEvent(e)
// Check whether any unresolved intents need a push.
case <-txnPushTickerC:
// Don't perform transaction push attempts until the resolved
// timestamp has been initialized.
if !p.rts.IsInit() {
continue
}
now := p.Clock.Now()
before := now.Add(-p.PushTxnsAge.Nanoseconds(), 0)
oldTxns := p.rts.intentQ.Before(before)
if len(oldTxns) > 0 {
toPush := make([]enginepb.TxnMeta, len(oldTxns))
for i, txn := range oldTxns {
toPush[i] = txn.asTxnMeta()
}
// Set the ticker channel to nil so that it can't trigger a
// second concurrent push. Create a push attempt response
// channel that is closed when the push attempt completes.
txnPushTickerC = nil
txnPushAttemptC = make(chan struct{})
// Launch an async transaction push attempt that pushes the
// timestamp of all transactions beneath the push offset.
// Ignore error if quiescing.
pushTxns := newTxnPushAttempt(p.Span, p.TxnPusher, p, toPush, now, txnPushAttemptC)
err := stopper.RunAsyncTask(ctx, "rangefeed: pushing old txns", pushTxns.Run)
if err != nil {
pushTxns.Cancel()
}
}
// Update the resolved timestamp based on the push attempt.
case <-txnPushAttemptC:
// Reset the ticker channel so that it can trigger push attempts
// again. Set the push attempt channel back to nil.
txnPushTickerC = txnPushTicker.C
txnPushAttemptC = nil
// Close registrations and exit when signaled.
case pErr := <-p.stopC:
p.reg.DisconnectWithErr(all, pErr)
return
// Exit on stopper.
case <-stopper.ShouldQuiesce():
pErr := kvpb.NewError(&kvpb.NodeUnavailableError{})
p.reg.DisconnectWithErr(all, pErr)
return
}
}
}
// Stop implements Processor interface.
func (p *LegacyProcessor) Stop() {
p.StopWithErr(nil)
}
// StopWithErr implements Processor interface.
func (p *LegacyProcessor) StopWithErr(pErr *kvpb.Error) {
// Flush any remaining events before stopping.
p.syncEventC()
// Send the processor a stop signal.
p.sendStop(pErr)
}
// DisconnectSpanWithErr implements Processor interface.
func (p *LegacyProcessor) DisconnectSpanWithErr(span roachpb.Span, pErr *kvpb.Error) {
select {
case p.spanErrC <- spanErr{span: span, pErr: pErr}:
case <-p.stoppedC:
// Already stopped. Do nothing.
}
}
func (p *LegacyProcessor) sendStop(pErr *kvpb.Error) {
select {
case p.stopC <- pErr:
// stopC has non-zero capacity so this should not block unless
// multiple callers attempt to stop the Processor concurrently.
case <-p.stoppedC:
// Already stopped. Do nothing.
}
}
// Register implements Processor interface.
func (p *LegacyProcessor) Register(
span roachpb.RSpan,
startTS hlc.Timestamp,
catchUpIterConstructor CatchUpIteratorConstructor,
withDiff bool,
stream Stream,
disconnectFn func(),
done *future.ErrorFuture,
) (bool, *Filter) {
// Synchronize the event channel so that this registration doesn't see any
// events that were consumed before this registration was called. Instead,
// it should see these events during its catch up scan.
p.syncEventC()
blockWhenFull := p.Config.EventChanTimeout == 0 // for testing
r := newRegistration(
span.AsRawSpanWithNoLocals(), startTS, catchUpIterConstructor, withDiff,
p.Config.EventChanCap, blockWhenFull, p.Metrics, stream, disconnectFn, done,
)
select {
case p.regC <- r:
// Wait for response.
return true, <-p.filterResC
case <-p.stoppedC:
return false, nil
}
}
// Len implements Processor interface.
func (p *LegacyProcessor) Len() int {
// Ask the processor goroutine.
select {
case p.lenReqC <- struct{}{}:
// Wait for response.
return <-p.lenResC
case <-p.stoppedC:
return 0
}
}
// Filter implements Processor interface.
func (p *LegacyProcessor) Filter() *Filter {
// Ask the processor goroutine.
select {
case p.filterReqC <- struct{}{}:
// Wait for response.
return <-p.filterResC
case <-p.stoppedC:
return nil
}
}
// ConsumeLogicalOps implements Processor interface.
func (p *LegacyProcessor) ConsumeLogicalOps(
ctx context.Context, ops ...enginepb.MVCCLogicalOp,
) bool {
if len(ops) == 0 {
return true
}
return p.sendEvent(ctx, event{ops: ops}, p.EventChanTimeout)
}
// ConsumeSSTable implements Processor interface.
func (p *LegacyProcessor) ConsumeSSTable(
ctx context.Context, sst []byte, sstSpan roachpb.Span, writeTS hlc.Timestamp,
) bool {
return p.sendEvent(ctx, event{sst: &sstEvent{sst, sstSpan, writeTS}}, p.EventChanTimeout)
}
// ForwardClosedTS implements Processor interface.
func (p *LegacyProcessor) ForwardClosedTS(ctx context.Context, closedTS hlc.Timestamp) bool {
if closedTS.IsEmpty() {
return true
}
return p.sendEvent(ctx, event{ct: ctEvent{closedTS}}, p.EventChanTimeout)
}
// sendEvent informs the Processor of a new event. If a timeout is specified,
// the method will wait for no longer than that duration before giving up,
// shutting down the Processor, and returning false. 0 for no timeout.
func (p *LegacyProcessor) sendEvent(ctx context.Context, e event, timeout time.Duration) bool {
// The code is a bit unwieldy because we try to avoid any allocations on fast
// path where we have enough budget and outgoing channel is free. If not, we
// try to set up timeout for acquiring budget and then reuse this timeout when
// inserting value into channel.
var alloc *SharedBudgetAllocation
if p.MemBudget != nil {
size := calculateDateEventSize(e)
if size > 0 {
var err error
// First we will try non-blocking fast path to allocate memory budget.
alloc, err = p.MemBudget.TryGet(ctx, size)
// If budget is already closed, then just let it through because processor
// is terminating.
if err != nil && !errors.Is(err, budgetClosedError) {
// Since we don't have enough budget, we should try to wait for
// allocation returns before failing.
if timeout > 0 {
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, timeout) // nolint:context
defer cancel()
// We reset timeout here so that subsequent channel write op doesn't
// try to wait beyond what is already set up.
timeout = 0
}
p.Metrics.RangeFeedBudgetBlocked.Inc(1)
alloc, err = p.MemBudget.WaitAndGet(ctx, size)
}
if err != nil && !errors.Is(err, budgetClosedError) {
p.Metrics.RangeFeedBudgetExhausted.Inc(1)
p.sendStop(newErrBufferCapacityExceeded())
return false
}
// Always release allocation pointer after sending as it is nil safe.
// In normal case its value is moved into event, in case of allocation
// errors it is nil, in case of send errors it is non-nil and this call
// ensures that unused allocation is released.
defer func() {
alloc.Release(ctx)
}()
}
}
ev := getPooledEvent(e)
ev.alloc = alloc
if timeout == 0 {
// Timeout is zero if no timeout was requested or timeout is already set on
// the context by budget allocation. Just try to write using context as a
// timeout.
select {
case p.eventC <- ev:
// Reset allocation after successful posting to prevent deferred cleanup
// from freeing it (see comment on defer for explanation).
alloc = nil
case <-p.stoppedC:
// Already stopped. Do nothing.
case <-ctx.Done():
p.sendStop(newErrBufferCapacityExceeded())
return false
}
} else {
// First try fast path operation without blocking and without creating any
// contexts in case channel has capacity.
select {
case p.eventC <- ev:
// Reset allocation after successful posting to prevent deferred cleanup
// from freeing it (see comment on defer for explanation).
alloc = nil
case <-p.stoppedC:
// Already stopped. Do nothing.
default:
// Fast path failed since we don't have capacity in channel. Wait for
// slots to clear up using context timeout.
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, timeout) // nolint:context
defer cancel()
select {
case p.eventC <- ev:
// Reset allocation after successful posting to prevent deferred cleanup
// from freeing it (see comment on defer for explanation).
alloc = nil
case <-p.stoppedC:
// Already stopped. Do nothing.
case <-ctx.Done():
// Sending on the eventC channel would have blocked.
// Instead, tear down the processor and return immediately.
p.sendStop(newErrBufferCapacityExceeded())
return false
}
}
}
return true
}
// setResolvedTSInitialized informs the Processor that its resolved timestamp has
// all the information it needs to be considered initialized.
func (p *LegacyProcessor) setResolvedTSInitialized(ctx context.Context) {
p.sendEvent(ctx, event{initRTS: true}, 0)
}
// syncEventC synchronizes access to the Processor goroutine, allowing the
// caller to establish causality with actions taken by the Processor goroutine.
// It does so by flushing the event pipeline.
func (p *LegacyProcessor) syncEventC() {
syncC := make(chan struct{})
ev := getPooledEvent(event{sync: &syncEvent{c: syncC}})
select {
case p.eventC <- ev:
select {
case <-syncC:
// Synchronized.
case <-p.stoppedC:
// Already stopped. Do nothing.
}
case <-p.stoppedC:
// Already stopped. Do nothing.
}
}
func (p *LegacyProcessor) consumeEvent(ctx context.Context, e *event) {
switch {
case e.ops != nil:
p.consumeLogicalOps(ctx, e.ops, e.alloc)
case !e.ct.IsEmpty():
p.forwardClosedTS(ctx, e.ct.Timestamp)
case bool(e.initRTS):
p.initResolvedTS(ctx)
case e.sst != nil:
p.consumeSSTable(ctx, e.sst.data, e.sst.span, e.sst.ts, e.alloc)
case e.sync != nil:
if e.sync.testRegCatchupSpan != nil {
if err := p.reg.waitForCaughtUp(*e.sync.testRegCatchupSpan); err != nil {
log.Errorf(
ctx,
"error waiting for registries to catch up during test, results might be impacted: %s",
err,
)
}
}
close(e.sync.c)
default:
panic(fmt.Sprintf("missing event variant: %+v", e))
}
}
func (p *LegacyProcessor) consumeLogicalOps(
ctx context.Context, ops []enginepb.MVCCLogicalOp, alloc *SharedBudgetAllocation,
) {
for _, op := range ops {
// Publish RangeFeedValue updates, if necessary.
switch t := op.GetValue().(type) {
case *enginepb.MVCCWriteValueOp:
// Publish the new value directly.
p.publishValue(ctx, t.Key, t.Timestamp, t.Value, t.PrevValue, alloc)
case *enginepb.MVCCDeleteRangeOp:
// Publish the range deletion directly.
p.publishDeleteRange(ctx, t.StartKey, t.EndKey, t.Timestamp, alloc)
case *enginepb.MVCCWriteIntentOp:
// No updates to publish.
case *enginepb.MVCCUpdateIntentOp:
// No updates to publish.
case *enginepb.MVCCCommitIntentOp:
// Publish the newly committed value.
p.publishValue(ctx, t.Key, t.Timestamp, t.Value, t.PrevValue, alloc)
case *enginepb.MVCCAbortIntentOp:
// No updates to publish.
case *enginepb.MVCCAbortTxnOp:
// No updates to publish.
default:
panic(errors.AssertionFailedf("unknown logical op %T", t))
}
// Determine whether the operation caused the resolved timestamp to
// move forward. If so, publish a RangeFeedCheckpoint notification.
if p.rts.ConsumeLogicalOp(op) {
p.publishCheckpoint(ctx)
}
}
}
func (p *LegacyProcessor) consumeSSTable(
ctx context.Context,
sst []byte,
sstSpan roachpb.Span,
sstWTS hlc.Timestamp,
alloc *SharedBudgetAllocation,
) {
p.publishSSTable(ctx, sst, sstSpan, sstWTS, alloc)
}
func (p *LegacyProcessor) forwardClosedTS(ctx context.Context, newClosedTS hlc.Timestamp) {
if p.rts.ForwardClosedTS(newClosedTS) {
p.publishCheckpoint(ctx)
}
}
func (p *LegacyProcessor) initResolvedTS(ctx context.Context) {
if p.rts.Init() {
p.publishCheckpoint(ctx)
}
}
func (p *LegacyProcessor) publishValue(
ctx context.Context,
key roachpb.Key,
timestamp hlc.Timestamp,
value, prevValue []byte,
alloc *SharedBudgetAllocation,
) {
if !p.Span.ContainsKey(roachpb.RKey(key)) {
log.Fatalf(ctx, "key %v not in Processor's key range %v", key, p.Span)
}
var prevVal roachpb.Value
if prevValue != nil {
prevVal.RawBytes = prevValue
}
var event kvpb.RangeFeedEvent
event.MustSetValue(&kvpb.RangeFeedValue{
Key: key,
Value: roachpb.Value{
RawBytes: value,
Timestamp: timestamp,
},
PrevValue: prevVal,
})
p.reg.PublishToOverlapping(ctx, roachpb.Span{Key: key}, &event, alloc)
}
func (p *LegacyProcessor) publishDeleteRange(
ctx context.Context,
startKey, endKey roachpb.Key,
timestamp hlc.Timestamp,
alloc *SharedBudgetAllocation,
) {
span := roachpb.Span{Key: startKey, EndKey: endKey}
if !p.Span.ContainsKeyRange(roachpb.RKey(startKey), roachpb.RKey(endKey)) {
log.Fatalf(ctx, "span %s not in Processor's key range %v", span, p.Span)
}
var event kvpb.RangeFeedEvent
event.MustSetValue(&kvpb.RangeFeedDeleteRange{
Span: span,
Timestamp: timestamp,
})
p.reg.PublishToOverlapping(ctx, span, &event, alloc)
}
func (p *LegacyProcessor) publishSSTable(
ctx context.Context,
sst []byte,
sstSpan roachpb.Span,
sstWTS hlc.Timestamp,
alloc *SharedBudgetAllocation,
) {
if sstSpan.Equal(roachpb.Span{}) {
panic(errors.AssertionFailedf("received SSTable without span"))
}
if sstWTS.IsEmpty() {
panic(errors.AssertionFailedf("received SSTable without write timestamp"))
}
p.reg.PublishToOverlapping(ctx, sstSpan, &kvpb.RangeFeedEvent{
SST: &kvpb.RangeFeedSSTable{
Data: sst,
Span: sstSpan,
WriteTS: sstWTS,
},
}, alloc)
}
func (p *LegacyProcessor) publishCheckpoint(ctx context.Context) {
// TODO(nvanbenschoten): persist resolvedTimestamp. Give Processor a client.DB.
// TODO(nvanbenschoten): rate limit these? send them periodically?
event := p.newCheckpointEvent()
p.reg.PublishToOverlapping(ctx, all, event, nil)
}
func (p *LegacyProcessor) newCheckpointEvent() *kvpb.RangeFeedEvent {
// Create a RangeFeedCheckpoint over the Processor's entire span. Each
// individual registration will trim this down to just the key span that
// it is listening on in registration.maybeStripEvent before publishing.
var event kvpb.RangeFeedEvent
event.MustSetValue(&kvpb.RangeFeedCheckpoint{
Span: p.Span.AsRawSpanWithNoLocals(),
ResolvedTS: p.rts.Get(),
})
return &event
}
// calculateDateEventSize returns estimated size of the event that contain actual
// data. We only account for logical ops and sst's. Those events come from raft
// and are budgeted. Other events come from processor jobs and update timestamps
// we don't take them into account as they are supposed to be small and to avoid
// complexity of having multiple producers getting from budget.
func calculateDateEventSize(e event) int64 {
var size int64
for _, op := range e.ops {
size += int64(op.Size())
}
if e.sst != nil {
size += int64(len(e.sst.data))
}
return size
}