-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
concurrency_manager.go
550 lines (498 loc) · 17.6 KB
/
concurrency_manager.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package concurrency
import (
"context"
"sync"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanlatch"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/txnwait"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
)
// DiscoveredLocksThresholdToConsultFinalizedTxnCache sets a threshold as
// mentioned in the description string. The default of 200 is somewhat
// arbitrary but should suffice for small OLTP transactions. Given the default
// 10,000 lock capacity of the lock table, 200 is small enough to not matter
// much against the capacity, which is desirable. We have seen examples with
// discoveredCount > 100,000, caused by stats collection, where we definitely
// want to avoid adding these locks to the lock table, if possible.
var DiscoveredLocksThresholdToConsultFinalizedTxnCache = settings.RegisterIntSetting(
"kv.lock_table.discovered_locks_threshold_for_consulting_finalized_txn_cache",
"the maximum number of discovered locks by a waiter, above which the finalized txn cache"+
"is consulted and resolvable locks are not added to the lock table -- this should be a small"+
"fraction of the maximum number of locks in the lock table",
200,
settings.NonNegativeInt,
)
// managerImpl implements the Manager interface.
type managerImpl struct {
st *cluster.Settings
// Synchronizes conflicting in-flight requests.
lm latchManager
// Synchronizes conflicting in-progress transactions.
lt lockTable
// Waits for locks that conflict with a request to be released.
ltw lockTableWaiter
// Waits for transaction completion and detects deadlocks.
twq txnWaitQueue
}
// Config contains the dependencies to construct a Manager.
type Config struct {
// Identification.
NodeDesc *roachpb.NodeDescriptor
RangeDesc *roachpb.RangeDescriptor
// Components.
Settings *cluster.Settings
DB *kv.DB
Clock *hlc.Clock
Stopper *stop.Stopper
IntentResolver IntentResolver
// Metrics.
TxnWaitMetrics *txnwait.Metrics
SlowLatchGauge *metric.Gauge
// Configs + Knobs.
MaxLockTableSize int64
DisableTxnPushing bool
OnContentionEvent func(*roachpb.ContentionEvent) // may be nil; allowed to mutate the event
TxnWaitKnobs txnwait.TestingKnobs
}
func (c *Config) initDefaults() {
if c.MaxLockTableSize == 0 {
c.MaxLockTableSize = defaultLockTableSize
}
}
// NewManager creates a new concurrency Manager structure.
func NewManager(cfg Config) Manager {
cfg.initDefaults()
m := new(managerImpl)
lt := newLockTable(cfg.MaxLockTableSize)
*m = managerImpl{
st: cfg.Settings,
// TODO(nvanbenschoten): move pkg/storage/spanlatch to a new
// pkg/storage/concurrency/latch package. Make it implement the
// latchManager interface directly, if possible.
lm: &latchManagerImpl{
m: spanlatch.Make(
cfg.Stopper,
cfg.SlowLatchGauge,
),
},
lt: lt,
ltw: &lockTableWaiterImpl{
st: cfg.Settings,
clock: cfg.Clock,
stopper: cfg.Stopper,
ir: cfg.IntentResolver,
lt: lt,
disableTxnPushing: cfg.DisableTxnPushing,
onContentionEvent: cfg.OnContentionEvent,
},
// TODO(nvanbenschoten): move pkg/storage/txnwait to a new
// pkg/storage/concurrency/txnwait package.
twq: txnwait.NewQueue(txnwait.Config{
RangeDesc: cfg.RangeDesc,
DB: cfg.DB,
Clock: cfg.Clock,
Stopper: cfg.Stopper,
Metrics: cfg.TxnWaitMetrics,
Knobs: cfg.TxnWaitKnobs,
}),
}
return m
}
// SequenceReq implements the RequestSequencer interface.
func (m *managerImpl) SequenceReq(
ctx context.Context, prev *Guard, req Request, evalKind RequestEvalKind,
) (*Guard, Response, *Error) {
var g *Guard
holdsLatches := false
if prev == nil {
switch evalKind {
case PessimisticEval:
log.Event(ctx, "sequencing request")
case OptimisticEval:
log.Event(ctx, "optimistically sequencing request")
case PessimisticAfterFailedOptimisticEval:
panic("retry should have non-nil guard")
}
g = newGuard(req)
} else {
g = prev
switch evalKind {
case PessimisticEval:
g.AssertNoLatches()
log.Event(ctx, "re-sequencing request")
case OptimisticEval:
panic("optimistic eval cannot happen when re-sequencing")
case PessimisticAfterFailedOptimisticEval:
if shouldAcquireLatches(req) {
g.AssertLatches()
holdsLatches = true
}
log.Event(ctx, "re-sequencing request after optimistic sequencing failed")
}
}
g.EvalKind = evalKind
resp, err := m.sequenceReqWithGuard(ctx, g, req, holdsLatches)
if resp != nil || err != nil {
// Ensure that we release the guard if we return a response or an error.
m.FinishReq(g)
return nil, resp, err
}
return g, nil, nil
}
// TODO(sumeer): we are using both g.Req and req, when the former should
// suffice. Remove the req parameter.
func (m *managerImpl) sequenceReqWithGuard(
ctx context.Context, g *Guard, req Request, holdsLatches bool,
) (Response, *Error) {
// Some requests don't need to acquire latches at all.
if !shouldAcquireLatches(req) {
log.Event(ctx, "not acquiring latches")
return nil, nil
}
// Provide the manager with an opportunity to intercept the request. It
// may be able to serve the request directly, and even if not, it may be
// able to update its internal state based on the request.
resp, err := m.maybeInterceptReq(ctx, req)
if resp != nil || err != nil {
return resp, err
}
for {
if !holdsLatches {
// TODO(sumeer): optimistic requests could register their need for
// latches, but not actually wait until acquisition.
// https://github.com/cockroachdb/cockroach/issues/9521
// Acquire latches for the request. This synchronizes the request
// with all conflicting in-flight requests.
log.Event(ctx, "acquiring latches")
g.lg, err = m.lm.Acquire(ctx, req)
if err != nil {
return nil, err
}
}
// For subsequent iterations.
holdsLatches = false
// Some requests don't want the wait on locks.
if req.LockSpans.Empty() {
return nil, nil
}
if g.EvalKind == OptimisticEval {
if g.ltg != nil {
panic("Optimistic locking should not have a non-nil lockTableGuard")
}
log.Event(ctx, "scanning lock table for conflicting locks")
g.ltg = m.lt.ScanOptimistic(g.Req)
} else {
// Scan for conflicting locks.
log.Event(ctx, "scanning lock table for conflicting locks")
g.ltg = m.lt.ScanAndEnqueue(g.Req, g.ltg)
}
// Wait on conflicting locks, if necessary.
if g.ltg.ShouldWait() {
m.lm.Release(g.moveLatchGuard())
log.Event(ctx, "waiting in lock wait-queues")
if err := m.ltw.WaitOn(ctx, g.Req, g.ltg); err != nil {
return nil, err
}
continue
}
return nil, nil
}
}
// maybeInterceptReq allows the concurrency manager to intercept requests before
// sequencing and evaluation so that it can immediately act on them. This allows
// the concurrency manager to route certain concurrency control-related requests
// into queues and optionally update its internal state based on the requests.
func (m *managerImpl) maybeInterceptReq(ctx context.Context, req Request) (Response, *Error) {
switch {
case req.isSingle(roachpb.PushTxn):
// If necessary, wait in the txnWaitQueue for the pushee transaction to
// expire or to move to a finalized state.
t := req.Requests[0].GetPushTxn()
resp, err := m.twq.MaybeWaitForPush(ctx, t)
if err != nil {
return nil, err
} else if resp != nil {
return makeSingleResponse(resp), nil
}
case req.isSingle(roachpb.QueryTxn):
// If necessary, wait in the txnWaitQueue for a transaction state update
// or for a dependent transaction to change.
t := req.Requests[0].GetQueryTxn()
return nil, m.twq.MaybeWaitForQuery(ctx, t)
default:
// TODO(nvanbenschoten): in the future, use this hook to update the lock
// table to allow contending transactions to proceed.
// for _, arg := range req.Requests {
// switch t := arg.GetInner().(type) {
// case *roachpb.ResolveIntentRequest:
// _ = t
// case *roachpb.ResolveIntentRangeRequest:
// _ = t
// }
// }
}
return nil, nil
}
// shouldAcquireLatches determines whether the request should acquire latches
// before proceeding to evaluate. Latches are used to synchronize with other
// conflicting requests, based on the Spans collected for the request. Most
// request types will want to acquire latches.
func shouldAcquireLatches(req Request) bool {
switch {
case req.ReadConsistency != roachpb.CONSISTENT:
// Only acquire latches for consistent operations.
return false
case req.isSingle(roachpb.RequestLease):
// Do not acquire latches for lease requests. These requests are run on
// replicas that do not hold the lease, so acquiring latches wouldn't
// help synchronize with other requests.
return false
}
return true
}
// FinishReq implements the RequestSequencer interface.
func (m *managerImpl) FinishReq(g *Guard) {
if ltg := g.moveLockTableGuard(); ltg != nil {
m.lt.Dequeue(ltg)
}
if lg := g.moveLatchGuard(); lg != nil {
m.lm.Release(lg)
}
releaseGuard(g)
}
// HandleWriterIntentError implements the ContentionHandler interface.
func (m *managerImpl) HandleWriterIntentError(
ctx context.Context, g *Guard, seq roachpb.LeaseSequence, t *roachpb.WriteIntentError,
) (*Guard, *Error) {
if g.ltg == nil {
log.Fatalf(ctx, "cannot handle WriteIntentError %v for request without "+
"lockTableGuard; were lock spans declared for this request?", t)
}
// Add a discovered lock to lock-table for each intent and enter each lock's
// wait-queue. If the lock-table is disabled and one or more of the intents
// are ignored then we immediately wait on all intents.
consultFinalizedTxnCache :=
int64(len(t.Intents)) > DiscoveredLocksThresholdToConsultFinalizedTxnCache.Get(&m.st.SV)
wait := false
for i := range t.Intents {
intent := &t.Intents[i]
added, err := m.lt.AddDiscoveredLock(intent, seq, consultFinalizedTxnCache, g.ltg)
if err != nil {
log.Fatalf(ctx, "%v", err)
}
if !added {
wait = true
}
}
// Release the Guard's latches but continue to remain in lock wait-queues by
// not releasing lockWaitQueueGuards. We expect the caller of this method to
// then re-sequence the Request by calling SequenceReq with the un-latched
// Guard. This is analogous to iterating through the loop in SequenceReq.
m.lm.Release(g.moveLatchGuard())
// If the lockTable was disabled then we need to immediately wait on the
// intents to ensure that they are resolved and moved out of the request's
// way.
if wait {
for i := range t.Intents {
intent := &t.Intents[i]
if err := m.ltw.WaitOnLock(ctx, g.Req, intent); err != nil {
m.FinishReq(g)
return nil, err
}
}
} else {
if toResolve := g.ltg.ResolveBeforeScanning(); len(toResolve) > 0 {
if err := m.ltw.ResolveDeferredIntents(ctx, toResolve); err != nil {
m.FinishReq(g)
return nil, err
}
}
}
return g, nil
}
// HandleTransactionPushError implements the ContentionHandler interface.
func (m *managerImpl) HandleTransactionPushError(
ctx context.Context, g *Guard, t *roachpb.TransactionPushError,
) *Guard {
m.twq.EnqueueTxn(&t.PusheeTxn)
// Release the Guard's latches. The PushTxn request should not be in any
// lock wait-queues because it does not scan the lockTable. We expect the
// caller of this method to then re-sequence the Request by calling
// SequenceReq with the un-latched Guard. This is analogous to iterating
// through the loop in SequenceReq.
m.lm.Release(g.moveLatchGuard())
return g
}
// OnLockAcquired implements the LockManager interface.
func (m *managerImpl) OnLockAcquired(ctx context.Context, acq *roachpb.LockAcquisition) {
if err := m.lt.AcquireLock(&acq.Txn, acq.Key, lock.Exclusive, acq.Durability); err != nil {
log.Fatalf(ctx, "%v", err)
}
}
// OnLockUpdated implements the LockManager interface.
func (m *managerImpl) OnLockUpdated(ctx context.Context, up *roachpb.LockUpdate) {
if err := m.lt.UpdateLocks(up); err != nil {
log.Fatalf(ctx, "%v", err)
}
}
// OnTransactionUpdated implements the TransactionManager interface.
func (m *managerImpl) OnTransactionUpdated(ctx context.Context, txn *roachpb.Transaction) {
m.twq.UpdateTxn(ctx, txn)
}
// GetDependents implements the TransactionManager interface.
func (m *managerImpl) GetDependents(txnID uuid.UUID) []uuid.UUID {
return m.twq.GetDependents(txnID)
}
// OnRangeDescUpdated implements the RangeStateListener interface.
func (m *managerImpl) OnRangeDescUpdated(desc *roachpb.RangeDescriptor) {
m.twq.OnRangeDescUpdated(desc)
}
// OnRangeLeaseUpdated implements the RangeStateListener interface.
func (m *managerImpl) OnRangeLeaseUpdated(seq roachpb.LeaseSequence, isLeaseholder bool) {
if isLeaseholder {
m.lt.Enable(seq)
m.twq.Enable(seq)
} else {
// Disable all queues - the concurrency manager will no longer be
// informed about all state transitions to locks and transactions.
const disable = true
m.lt.Clear(disable)
m.twq.Clear(disable)
}
}
// OnRangeSplit implements the RangeStateListener interface.
func (m *managerImpl) OnRangeSplit() {
// TODO(nvanbenschoten): it only essential that we clear the half of the
// lockTable which contains locks in the key range that is being split off
// from the current range. For now though, we clear it all.
const disable = false
m.lt.Clear(disable)
m.twq.Clear(disable)
}
// OnRangeMerge implements the RangeStateListener interface.
func (m *managerImpl) OnRangeMerge() {
// Disable all queues - the range is being merged into its LHS neighbor.
// It will no longer be informed about all state transitions to locks and
// transactions.
const disable = true
m.lt.Clear(disable)
m.twq.Clear(disable)
}
// OnReplicaSnapshotApplied implements the RangeStateListener interface.
func (m *managerImpl) OnReplicaSnapshotApplied() {
// A snapshot can cause discontinuities in raft entry application. The
// lockTable expects to observe all lock state transitions on the range
// through LockManager listener methods. If there's a chance it missed a
// state transition, it is safer to simply clear the lockTable and rebuild
// it from persistent intent state by allowing requests to discover locks
// and inform the manager through calls to HandleWriterIntentError.
//
// A range only maintains locks in the lockTable of its leaseholder replica
// even thought it runs a concurrency manager on all replicas. Because of
// this, we expect it to be very rare that this actually clears any locks.
// Still, it is possible for the leaseholder replica to receive a snapshot
// when it is not also the raft leader.
const disable = false
m.lt.Clear(disable)
}
// LatchMetrics implements the MetricExporter interface.
func (m *managerImpl) LatchMetrics() (global, local kvserverpb.LatchManagerInfo) {
return m.lm.Info()
}
// LockTableDebug implements the MetricExporter interface.
func (m *managerImpl) LockTableDebug() string {
return m.lt.String()
}
// TxnWaitQueue implements the MetricExporter interface.
func (m *managerImpl) TxnWaitQueue() *txnwait.Queue {
return m.twq.(*txnwait.Queue)
}
func (r *Request) txnMeta() *enginepb.TxnMeta {
if r.Txn == nil {
return nil
}
return &r.Txn.TxnMeta
}
func (r *Request) isSingle(m roachpb.Method) bool {
if len(r.Requests) != 1 {
return false
}
return r.Requests[0].GetInner().Method() == m
}
// Used to avoid allocations.
var guardPool = sync.Pool{
New: func() interface{} { return new(Guard) },
}
func newGuard(req Request) *Guard {
g := guardPool.Get().(*Guard)
g.Req = req
return g
}
func releaseGuard(g *Guard) {
*g = Guard{}
guardPool.Put(g)
}
// LatchSpans returns the maximal set of spans that the request will access.
func (g *Guard) LatchSpans() *spanset.SpanSet {
return g.Req.LatchSpans
}
// HoldingLatches returned whether the guard is holding latches or not.
func (g *Guard) HoldingLatches() bool {
return g != nil && g.lg != nil
}
// AssertLatches asserts that the guard is non-nil and holding latches, if the
// request is supposed to hold latches while evaluating in the first place.
func (g *Guard) AssertLatches() {
if shouldAcquireLatches(g.Req) && !g.HoldingLatches() {
panic("expected latches held, found none")
}
}
// AssertNoLatches asserts that the guard is non-nil and not holding latches.
func (g *Guard) AssertNoLatches() {
if g.HoldingLatches() {
panic("unexpected latches held")
}
}
// CheckOptimisticNoConflicts checks that the lockSpansRead do not have a
// conflicting lock.
func (g *Guard) CheckOptimisticNoConflicts(lockSpansRead *spanset.SpanSet) (ok bool) {
if g.ltg == nil {
return true
}
return g.ltg.CheckOptimisticNoConflicts(lockSpansRead)
}
func (g *Guard) moveLatchGuard() latchGuard {
lg := g.lg
g.lg = nil
return lg
}
func (g *Guard) moveLockTableGuard() lockTableGuard {
ltg := g.ltg
g.ltg = nil
return ltg
}
func makeSingleResponse(r roachpb.Response) Response {
ru := make(Response, 1)
ru[0].MustSetInner(r)
return ru
}