-
Notifications
You must be signed in to change notification settings - Fork 3.9k
/
Copy pathconcurrency_manager.go
612 lines (557 loc) · 20.5 KB
/
concurrency_manager.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package concurrency
import (
"context"
"sync"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanlatch"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/txnwait"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
)
// MaxLockWaitQueueLength sets the maximum length of a lock wait-queue that the
// request is willing to enter and wait in. Used to provide a release valve and
// ensure some level of quality-of-service under severe per-key contention. If
// set to a non-zero value and an existing lock wait-queue is already equal to
// or exceeding this length, the request will be rejected eagerly instead of
// entering the queue and waiting.
var MaxLockWaitQueueLength = settings.RegisterIntSetting(
"kv.lock_table.maximum_lock_wait_queue_length",
"the maximum length of a lock wait-queue that requests are willing to enter "+
"and wait in. The setting can be used to ensure some level of quality-of-service "+
"under severe per-key contention. If set to a non-zero value and an existing lock "+
"wait-queue is already equal to or exceeding this length, requests will be rejected "+
"eagerly instead of entering the queue and waiting. Set to 0 to disable.",
0,
func(v int64) error {
if v < 0 {
return errors.Errorf("cannot be set to a negative value: %d", v)
}
if v == 0 {
return nil // disabled
}
// Don't let the setting be dropped below a reasonable value that we don't
// expect to impact internal transaction processing.
const minSafeMaxLength = 3
if v < minSafeMaxLength {
return errors.Errorf("cannot be set below %d: %d", minSafeMaxLength, v)
}
return nil
},
)
// DiscoveredLocksThresholdToConsultFinalizedTxnCache sets a threshold as
// mentioned in the description string. The default of 200 is somewhat
// arbitrary but should suffice for small OLTP transactions. Given the default
// 10,000 lock capacity of the lock table, 200 is small enough to not matter
// much against the capacity, which is desirable. We have seen examples with
// discoveredCount > 100,000, caused by stats collection, where we definitely
// want to avoid adding these locks to the lock table, if possible.
var DiscoveredLocksThresholdToConsultFinalizedTxnCache = settings.RegisterIntSetting(
"kv.lock_table.discovered_locks_threshold_for_consulting_finalized_txn_cache",
"the maximum number of discovered locks by a waiter, above which the finalized txn cache"+
"is consulted and resolvable locks are not added to the lock table -- this should be a small"+
"fraction of the maximum number of locks in the lock table",
200,
settings.NonNegativeInt,
)
// managerImpl implements the Manager interface.
type managerImpl struct {
st *cluster.Settings
// Synchronizes conflicting in-flight requests.
lm latchManager
// Synchronizes conflicting in-progress transactions.
lt lockTable
// Waits for locks that conflict with a request to be released.
ltw lockTableWaiter
// Waits for transaction completion and detects deadlocks.
twq txnWaitQueue
}
// Config contains the dependencies to construct a Manager.
type Config struct {
// Identification.
NodeDesc *roachpb.NodeDescriptor
RangeDesc *roachpb.RangeDescriptor
// Components.
Settings *cluster.Settings
DB *kv.DB
Clock *hlc.Clock
Stopper *stop.Stopper
IntentResolver IntentResolver
// Metrics.
TxnWaitMetrics *txnwait.Metrics
SlowLatchGauge *metric.Gauge
// Configs + Knobs.
MaxLockTableSize int64
DisableTxnPushing bool
OnContentionEvent func(*roachpb.ContentionEvent) // may be nil; allowed to mutate the event
TxnWaitKnobs txnwait.TestingKnobs
}
func (c *Config) initDefaults() {
if c.MaxLockTableSize == 0 {
c.MaxLockTableSize = defaultLockTableSize
}
}
// NewManager creates a new concurrency Manager structure.
func NewManager(cfg Config) Manager {
cfg.initDefaults()
m := new(managerImpl)
lt := newLockTable(cfg.MaxLockTableSize)
*m = managerImpl{
st: cfg.Settings,
// TODO(nvanbenschoten): move pkg/storage/spanlatch to a new
// pkg/storage/concurrency/latch package. Make it implement the
// latchManager interface directly, if possible.
lm: &latchManagerImpl{
m: spanlatch.Make(
cfg.Stopper,
cfg.SlowLatchGauge,
),
},
lt: lt,
ltw: &lockTableWaiterImpl{
st: cfg.Settings,
clock: cfg.Clock,
stopper: cfg.Stopper,
ir: cfg.IntentResolver,
lt: lt,
disableTxnPushing: cfg.DisableTxnPushing,
onContentionEvent: cfg.OnContentionEvent,
},
// TODO(nvanbenschoten): move pkg/storage/txnwait to a new
// pkg/storage/concurrency/txnwait package.
twq: txnwait.NewQueue(txnwait.Config{
RangeDesc: cfg.RangeDesc,
DB: cfg.DB,
Clock: cfg.Clock,
Stopper: cfg.Stopper,
Metrics: cfg.TxnWaitMetrics,
Knobs: cfg.TxnWaitKnobs,
}),
}
return m
}
// SequenceReq implements the RequestSequencer interface.
func (m *managerImpl) SequenceReq(
ctx context.Context, prev *Guard, req Request, evalKind RequestEvalKind,
) (*Guard, Response, *Error) {
var g *Guard
if prev == nil {
switch evalKind {
case PessimisticEval:
log.Event(ctx, "sequencing request")
case OptimisticEval:
log.Event(ctx, "optimistically sequencing request")
case PessimisticAfterFailedOptimisticEval:
panic("retry should have non-nil guard")
}
g = newGuard(req)
} else {
g = prev
switch evalKind {
case PessimisticEval:
g.AssertNoLatches()
log.Event(ctx, "re-sequencing request")
case OptimisticEval:
panic("optimistic eval cannot happen when re-sequencing")
case PessimisticAfterFailedOptimisticEval:
if shouldAcquireLatches(req) {
g.AssertLatches()
}
log.Event(ctx, "re-sequencing request after optimistic sequencing failed")
}
}
g.EvalKind = evalKind
resp, err := m.sequenceReqWithGuard(ctx, g, req)
if resp != nil || err != nil {
// Ensure that we release the guard if we return a response or an error.
m.FinishReq(g)
return nil, resp, err
}
return g, nil, nil
}
// TODO(sumeer): we are using both g.Req and req, when the former should
// suffice. Remove the req parameter.
func (m *managerImpl) sequenceReqWithGuard(
ctx context.Context, g *Guard, req Request,
) (Response, *Error) {
// Some requests don't need to acquire latches at all.
if !shouldAcquireLatches(req) {
log.Event(ctx, "not acquiring latches")
return nil, nil
}
// Provide the manager with an opportunity to intercept the request. It
// may be able to serve the request directly, and even if not, it may be
// able to update its internal state based on the request.
resp, err := m.maybeInterceptReq(ctx, req)
if resp != nil || err != nil {
return resp, err
}
// Only the first iteration can sometimes already be holding latches -- we
// use this to assert below.
first := true
for {
if !first {
g.AssertNoLatches()
}
first = false
if !g.HoldingLatches() {
// TODO(sumeer): optimistic requests could register their need for
// latches, but not actually wait until acquisition.
// https://github.com/cockroachdb/cockroach/issues/9521
// Acquire latches for the request. This synchronizes the request
// with all conflicting in-flight requests.
log.Event(ctx, "acquiring latches")
g.lg, err = m.lm.Acquire(ctx, req)
if err != nil {
return nil, err
}
}
// Some requests don't want the wait on locks.
if req.LockSpans.Empty() {
return nil, nil
}
// Set the request's MaxWaitQueueLength based on the cluster setting.
g.Req.MaxLockWaitQueueLength = int(MaxLockWaitQueueLength.Get(&m.st.SV))
if g.EvalKind == OptimisticEval {
if g.ltg != nil {
panic("Optimistic locking should not have a non-nil lockTableGuard")
}
log.Event(ctx, "optimistically scanning lock table for conflicting locks")
g.ltg = m.lt.ScanOptimistic(g.Req)
} else {
// Scan for conflicting locks.
log.Event(ctx, "scanning lock table for conflicting locks")
g.ltg = m.lt.ScanAndEnqueue(g.Req, g.ltg)
}
// Wait on conflicting locks, if necessary.
if g.ltg.ShouldWait() {
m.lm.Release(g.moveLatchGuard())
log.Event(ctx, "waiting in lock wait-queues")
if err := m.ltw.WaitOn(ctx, g.Req, g.ltg); err != nil {
return nil, err
}
continue
}
return nil, nil
}
}
// maybeInterceptReq allows the concurrency manager to intercept requests before
// sequencing and evaluation so that it can immediately act on them. This allows
// the concurrency manager to route certain concurrency control-related requests
// into queues and optionally update its internal state based on the requests.
func (m *managerImpl) maybeInterceptReq(ctx context.Context, req Request) (Response, *Error) {
switch {
case req.isSingle(roachpb.PushTxn):
// If necessary, wait in the txnWaitQueue for the pushee transaction to
// expire or to move to a finalized state.
t := req.Requests[0].GetPushTxn()
resp, err := m.twq.MaybeWaitForPush(ctx, t)
if err != nil {
return nil, err
} else if resp != nil {
return makeSingleResponse(resp), nil
}
case req.isSingle(roachpb.QueryTxn):
// If necessary, wait in the txnWaitQueue for a transaction state update
// or for a dependent transaction to change.
t := req.Requests[0].GetQueryTxn()
return nil, m.twq.MaybeWaitForQuery(ctx, t)
default:
// TODO(nvanbenschoten): in the future, use this hook to update the lock
// table to allow contending transactions to proceed.
// for _, arg := range req.Requests {
// switch t := arg.GetInner().(type) {
// case *roachpb.ResolveIntentRequest:
// _ = t
// case *roachpb.ResolveIntentRangeRequest:
// _ = t
// }
// }
}
return nil, nil
}
// shouldAcquireLatches determines whether the request should acquire latches
// before proceeding to evaluate. Latches are used to synchronize with other
// conflicting requests, based on the Spans collected for the request. Most
// request types will want to acquire latches.
func shouldAcquireLatches(req Request) bool {
switch {
case req.ReadConsistency != roachpb.CONSISTENT:
// Only acquire latches for consistent operations.
return false
case req.isSingle(roachpb.RequestLease):
// Do not acquire latches for lease requests. These requests are run on
// replicas that do not hold the lease, so acquiring latches wouldn't
// help synchronize with other requests.
return false
}
return true
}
// FinishReq implements the RequestSequencer interface.
func (m *managerImpl) FinishReq(g *Guard) {
// NOTE: we release latches _before_ exiting lock wait-queues deliberately.
// Either order would be correct, but the order here avoids non-determinism in
// cases where a request A holds both latches and lock wait-queue reservations
// and has a request B waiting on its reservations. If request A released its
// reservations before releasing its latches, it would be possible for B to
// beat A to the latch manager and end up blocking on its latches briefly. Not
// only is this confusing in traces, but it is slightly less efficient than if
// request A released latches before letting anyone waiting on it in the lock
// table proceed, ensuring that waiters do not hit its latches.
//
// Elsewhere, we relate the relationship of between the latch manager and the
// lock-table to that of a mutex and condition variable pair. Following that
// analogy, this release ordering is akin to signaling a condition variable
// after releasing its associated mutex. Doing so ensures that whoever the
// signaler wakes up (if anyone) will never bump into its mutex immediately
// upon resumption.
if lg := g.moveLatchGuard(); lg != nil {
m.lm.Release(lg)
}
if ltg := g.moveLockTableGuard(); ltg != nil {
m.lt.Dequeue(ltg)
}
releaseGuard(g)
}
// HandleWriterIntentError implements the ContentionHandler interface.
func (m *managerImpl) HandleWriterIntentError(
ctx context.Context, g *Guard, seq roachpb.LeaseSequence, t *roachpb.WriteIntentError,
) (*Guard, *Error) {
if g.ltg == nil {
log.Fatalf(ctx, "cannot handle WriteIntentError %v for request without "+
"lockTableGuard; were lock spans declared for this request?", t)
}
// Add a discovered lock to lock-table for each intent and enter each lock's
// wait-queue. If the lock-table is disabled and one or more of the intents
// are ignored then we immediately wait on all intents.
consultFinalizedTxnCache :=
int64(len(t.Intents)) > DiscoveredLocksThresholdToConsultFinalizedTxnCache.Get(&m.st.SV)
wait := false
for i := range t.Intents {
intent := &t.Intents[i]
added, err := m.lt.AddDiscoveredLock(intent, seq, consultFinalizedTxnCache, g.ltg)
if err != nil {
log.Fatalf(ctx, "%v", err)
}
if !added {
wait = true
}
}
// Release the Guard's latches but continue to remain in lock wait-queues by
// not releasing lockWaitQueueGuards. We expect the caller of this method to
// then re-sequence the Request by calling SequenceReq with the un-latched
// Guard. This is analogous to iterating through the loop in SequenceReq.
m.lm.Release(g.moveLatchGuard())
// If the lockTable was disabled then we need to immediately wait on the
// intents to ensure that they are resolved and moved out of the request's
// way.
if wait {
for i := range t.Intents {
intent := &t.Intents[i]
if err := m.ltw.WaitOnLock(ctx, g.Req, intent); err != nil {
m.FinishReq(g)
return nil, err
}
}
} else {
if toResolve := g.ltg.ResolveBeforeScanning(); len(toResolve) > 0 {
if err := m.ltw.ResolveDeferredIntents(ctx, toResolve); err != nil {
m.FinishReq(g)
return nil, err
}
}
}
return g, nil
}
// HandleTransactionPushError implements the ContentionHandler interface.
func (m *managerImpl) HandleTransactionPushError(
ctx context.Context, g *Guard, t *roachpb.TransactionPushError,
) *Guard {
m.twq.EnqueueTxn(&t.PusheeTxn)
// Release the Guard's latches. The PushTxn request should not be in any
// lock wait-queues because it does not scan the lockTable. We expect the
// caller of this method to then re-sequence the Request by calling
// SequenceReq with the un-latched Guard. This is analogous to iterating
// through the loop in SequenceReq.
m.lm.Release(g.moveLatchGuard())
return g
}
// OnLockAcquired implements the LockManager interface.
func (m *managerImpl) OnLockAcquired(ctx context.Context, acq *roachpb.LockAcquisition) {
if err := m.lt.AcquireLock(&acq.Txn, acq.Key, lock.Exclusive, acq.Durability); err != nil {
log.Fatalf(ctx, "%v", err)
}
}
// OnLockUpdated implements the LockManager interface.
func (m *managerImpl) OnLockUpdated(ctx context.Context, up *roachpb.LockUpdate) {
if err := m.lt.UpdateLocks(up); err != nil {
log.Fatalf(ctx, "%v", err)
}
}
// OnTransactionUpdated implements the TransactionManager interface.
func (m *managerImpl) OnTransactionUpdated(ctx context.Context, txn *roachpb.Transaction) {
m.twq.UpdateTxn(ctx, txn)
}
// GetDependents implements the TransactionManager interface.
func (m *managerImpl) GetDependents(txnID uuid.UUID) []uuid.UUID {
return m.twq.GetDependents(txnID)
}
// OnRangeDescUpdated implements the RangeStateListener interface.
func (m *managerImpl) OnRangeDescUpdated(desc *roachpb.RangeDescriptor) {
m.twq.OnRangeDescUpdated(desc)
}
// OnRangeLeaseUpdated implements the RangeStateListener interface.
func (m *managerImpl) OnRangeLeaseUpdated(seq roachpb.LeaseSequence, isLeaseholder bool) {
if isLeaseholder {
m.lt.Enable(seq)
m.twq.Enable(seq)
} else {
// Disable all queues - the concurrency manager will no longer be
// informed about all state transitions to locks and transactions.
const disable = true
m.lt.Clear(disable)
m.twq.Clear(disable)
}
}
// OnRangeSplit implements the RangeStateListener interface.
func (m *managerImpl) OnRangeSplit() {
// TODO(nvanbenschoten): it only essential that we clear the half of the
// lockTable which contains locks in the key range that is being split off
// from the current range. For now though, we clear it all.
const disable = false
m.lt.Clear(disable)
m.twq.Clear(disable)
}
// OnRangeMerge implements the RangeStateListener interface.
func (m *managerImpl) OnRangeMerge() {
// Disable all queues - the range is being merged into its LHS neighbor.
// It will no longer be informed about all state transitions to locks and
// transactions.
const disable = true
m.lt.Clear(disable)
m.twq.Clear(disable)
}
// OnReplicaSnapshotApplied implements the RangeStateListener interface.
func (m *managerImpl) OnReplicaSnapshotApplied() {
// A snapshot can cause discontinuities in raft entry application. The
// lockTable expects to observe all lock state transitions on the range
// through LockManager listener methods. If there's a chance it missed a
// state transition, it is safer to simply clear the lockTable and rebuild
// it from persistent intent state by allowing requests to discover locks
// and inform the manager through calls to HandleWriterIntentError.
//
// A range only maintains locks in the lockTable of its leaseholder replica
// even thought it runs a concurrency manager on all replicas. Because of
// this, we expect it to be very rare that this actually clears any locks.
// Still, it is possible for the leaseholder replica to receive a snapshot
// when it is not also the raft leader.
const disable = false
m.lt.Clear(disable)
}
// LatchMetrics implements the MetricExporter interface.
func (m *managerImpl) LatchMetrics() (global, local kvserverpb.LatchManagerInfo) {
return m.lm.Info()
}
// LockTableDebug implements the MetricExporter interface.
func (m *managerImpl) LockTableDebug() string {
return m.lt.String()
}
// TxnWaitQueue implements the MetricExporter interface.
func (m *managerImpl) TxnWaitQueue() *txnwait.Queue {
return m.twq.(*txnwait.Queue)
}
func (r *Request) txnMeta() *enginepb.TxnMeta {
if r.Txn == nil {
return nil
}
return &r.Txn.TxnMeta
}
func (r *Request) isSingle(m roachpb.Method) bool {
if len(r.Requests) != 1 {
return false
}
return r.Requests[0].GetInner().Method() == m
}
// Used to avoid allocations.
var guardPool = sync.Pool{
New: func() interface{} { return new(Guard) },
}
func newGuard(req Request) *Guard {
g := guardPool.Get().(*Guard)
g.Req = req
return g
}
func releaseGuard(g *Guard) {
*g = Guard{}
guardPool.Put(g)
}
// LatchSpans returns the maximal set of spans that the request will access.
func (g *Guard) LatchSpans() *spanset.SpanSet {
return g.Req.LatchSpans
}
// LockSpans returns the maximal set of lock spans that the request will access.
func (g *Guard) LockSpans() *spanset.SpanSet {
return g.Req.LockSpans
}
// HoldingLatches returned whether the guard is holding latches or not.
func (g *Guard) HoldingLatches() bool {
return g != nil && g.lg != nil
}
// AssertLatches asserts that the guard is non-nil and holding latches, if the
// request is supposed to hold latches while evaluating in the first place.
func (g *Guard) AssertLatches() {
if shouldAcquireLatches(g.Req) && !g.HoldingLatches() {
panic("expected latches held, found none")
}
}
// AssertNoLatches asserts that the guard is non-nil and not holding latches.
func (g *Guard) AssertNoLatches() {
if g.HoldingLatches() {
panic("unexpected latches held")
}
}
// CheckOptimisticNoConflicts checks that the lockSpansRead do not have a
// conflicting lock.
func (g *Guard) CheckOptimisticNoConflicts(lockSpansRead *spanset.SpanSet) (ok bool) {
if g.EvalKind != OptimisticEval {
panic(errors.AssertionFailedf("unexpected EvalKind: %d", g.EvalKind))
}
if g.ltg == nil {
return true
}
return g.ltg.CheckOptimisticNoConflicts(lockSpansRead)
}
func (g *Guard) moveLatchGuard() latchGuard {
lg := g.lg
g.lg = nil
return lg
}
func (g *Guard) moveLockTableGuard() lockTableGuard {
ltg := g.ltg
g.ltg = nil
return ltg
}
func makeSingleResponse(r roachpb.Response) Response {
ru := make(Response, 1)
ru[0].MustSetInner(r)
return ru
}