Skip to content

Commit

Permalink
concurrency: emit structured contention information to trace
Browse files Browse the repository at this point in the history
This is a (very dirty) first stab at cockroachdb#55583 that I am posting to answer
the following questions:

1. is this basic approach sane or should contention metadata be
generated elsewhere (i.e. not in `WaitOn`)? `WaitOn` does not
necessarily see all updates, though I would take the point of
view that a contenting transaction that a request never ends
up waiting on (presumably because it is busy handling other
conflicts) is not one we need to observe.
2. how to best structure the code so that contention events
are tracked. Completing the current approach would likely
result in spaghetti code that is easy to get wrong. We
seem to want to keep a bit of state around; more than we
do with the current structure which intentionally minimizes
state held across iterations of the wait loop. In particular,
it's unclear to me whether `state.txn` and/or `state.key` can
change within a single invocation of `WaitOn`. I think this
all means that I need to understand this code (in particular
that on the other side of newStateC) better.

Release note: None
  • Loading branch information
tbg committed Jan 5, 2021
1 parent 6de6fca commit aedaab4
Show file tree
Hide file tree
Showing 13 changed files with 745 additions and 664 deletions.
13 changes: 13 additions & 0 deletions pkg/kv/kvserver/concurrency/concurrency_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/datadriven"
"github.com/cockroachdb/errors"
"github.com/gogo/protobuf/types"
"github.com/maruel/panicparse/stack"
"github.com/petermattis/goid"
)
Expand Down Expand Up @@ -920,6 +921,18 @@ func (m *monitor) collectRecordings() string {
g.prevEvents++
}
}
var ev roachpb.ContentionEvent
for _, item := range span.InternalStructured {
if types.Is(item, &ev) {
_ = types.UnmarshalAny(item, &ev)
if ev.Duration != 0 {
ev.Duration = 123 // for determinism
}
logs = append(logs, logRecord{
g: g, value: fmt.Sprintf("contention metadata: %s @ %s", ev.TxnMeta.ID.Short(), ev.Key),
})
}
}
}
if atomic.LoadInt32(&g.finished) == 1 {
g.cancel()
Expand Down
24 changes: 24 additions & 0 deletions pkg/kv/kvserver/concurrency/lock_table_waiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
)
Expand Down Expand Up @@ -152,13 +153,22 @@ func (w *lockTableWaiterImpl) WaitOn(
// re-discover the intent(s) during evaluation and resolve them themselves.
var deferredResolution []roachpb.LockUpdate
defer w.resolveDeferredIntents(ctx, &err, &deferredResolution)
ev := &roachpb.ContentionEvent{}
tBegin := timeutil.Now()
for {
select {
case <-newStateC:
timerC = nil
state := guard.CurState()
switch state.kind {
case waitFor, waitForDistinguished:
// NB: TxnLockTableWaiterWithNonTxn has state.{key,txn}==nil, despite
// what the comments on the txn field claim; should probably make that
// test more realistic.
if state.key != nil {
ev.Key = state.key
ev.TxnMeta = *state.txn
}
if req.WaitPolicy == lock.WaitPolicy_Error {
// If the waiter has an Error wait policy, resolve the conflict
// immediately without waiting. If the conflict is a lock then
Expand Down Expand Up @@ -263,6 +273,13 @@ func (w *lockTableWaiterImpl) WaitOn(
// detail in #41720. Specifically, see mention of "contention
// footprint" and COMMITTED_BUT_NOT_REMOVABLE.
w.lm.OnLockUpdated(ctx, &deferredResolution[len(deferredResolution)-1])

ev.Duration = timeutil.Since(tBegin)
if sp := tracing.SpanFromContext(ctx); sp != nil && ev.TxnMeta.ID != uuid.Nil {
sp.LogStructured(ev)
}
ev = &roachpb.ContentionEvent{}
tBegin = timeutil.Now()
continue
}
}
Expand Down Expand Up @@ -334,6 +351,13 @@ func (w *lockTableWaiterImpl) WaitOn(
// waiting, re-acquire latches, and check the lockTable again for
// any new conflicts. If it find none, it can proceed with
// evaluation.

if ev.TxnMeta.ID != uuid.Nil {
ev.Duration = timeutil.Since(tBegin)
if sp := tracing.SpanFromContext(ctx); sp != nil {
sp.LogStructured(ev)
}
}
return nil

default:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ on-txn-updated txn=txn2 status=committed
[2] sequence req3: acquiring latches
[2] sequence req3: scanning lock table for conflicting locks
[2] sequence req3: sequencing complete, returned guard
[2] sequence req3: contention metadata: 00000002 @ "k"

debug-lock-table
----
Expand Down Expand Up @@ -218,9 +219,11 @@ on-txn-updated txn=txn2 status=pending ts=18,1
[2] sequence req5: acquiring latches
[2] sequence req5: scanning lock table for conflicting locks
[2] sequence req5: sequencing complete, returned guard
[2] sequence req5: contention metadata: 00000002 @ "k"
[3] sequence req6: acquiring latches
[3] sequence req6: scanning lock table for conflicting locks
[3] sequence req6: sequencing complete, returned guard
[3] sequence req6: contention metadata: 00000002 @ "k"

new-request name=req7 txn=none ts=12,1
put key=k value=v
Expand Down Expand Up @@ -251,6 +254,7 @@ on-txn-updated txn=txn2 status=committed
[4] sequence req7: acquiring latches
[4] sequence req7: scanning lock table for conflicting locks
[4] sequence req7: sequencing complete, returned guard
[4] sequence req7: contention metadata: 00000002 @ "k"

finish req=req7
----
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,15 @@ on-txn-updated txn=txn2 status=aborted
[3] sequence req1: acquiring latches
[3] sequence req1: scanning lock table for conflicting locks
[3] sequence req1: sequencing complete, returned guard
[3] sequence req1: contention metadata: 00000002 @ "b"
[3] sequence req1: contention metadata: 00000002 @ "c"
[3] sequence req1: contention metadata: 00000002 @ "d"
[3] sequence req1: contention metadata: 00000002 @ "e"
[3] sequence req1: contention metadata: 00000002 @ "f"
[3] sequence req1: contention metadata: 00000002 @ "g"
[3] sequence req1: contention metadata: 00000002 @ "h"
[3] sequence req1: contention metadata: 00000002 @ "i"
[3] sequence req1: contention metadata: 00000002 @ "j"

debug-lock-table
----
Expand Down Expand Up @@ -181,6 +190,7 @@ on-txn-updated txn=txn2 status=committed
[3] sequence req1: acquiring latches
[3] sequence req1: scanning lock table for conflicting locks
[3] sequence req1: sequencing complete, returned guard
[3] sequence req1: contention metadata: 00000002 @ "a"

handle-write-intent-error req=req1 lease-seq=1
intent txn=txn2 key=b
Expand Down Expand Up @@ -209,6 +219,7 @@ sequence req=req1
[5] sequence req1: acquiring latches
[5] sequence req1: scanning lock table for conflicting locks
[5] sequence req1: sequencing complete, returned guard
[5] sequence req1: contention metadata: 00000002 @ "b"

handle-write-intent-error req=req1 lease-seq=1
intent txn=txn2 key=c
Expand Down Expand Up @@ -239,6 +250,7 @@ sequence req=req1
[7] sequence req1: acquiring latches
[7] sequence req1: scanning lock table for conflicting locks
[7] sequence req1: sequencing complete, returned guard
[7] sequence req1: contention metadata: 00000002 @ "c"

debug-lock-table
----
Expand Down
11 changes: 11 additions & 0 deletions pkg/kv/kvserver/concurrency/testdata/concurrency_manager/deadlocks
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ on-txn-updated txn=txn1 status=aborted
[6] sequence req3r: acquiring latches
[6] sequence req3r: scanning lock table for conflicting locks
[6] sequence req3r: sequencing complete, returned guard
[6] sequence req3r: contention metadata: 00000001 @ "a"

# Txn3 can proceed and eventually commit.
finish req=req3r
Expand All @@ -179,6 +180,7 @@ on-txn-updated txn=txn3 status=committed
[5] sequence req2r: acquiring latches
[5] sequence req2r: scanning lock table for conflicting locks
[5] sequence req2r: sequencing complete, returned guard
[5] sequence req2r: contention metadata: 00000003 @ "c"

# Txn2 can proceed and eventually commit.
finish req=req2r
Expand Down Expand Up @@ -379,6 +381,7 @@ on-txn-updated txn=txn1 status=aborted
[4] sequence req4w: acquiring latches
[4] sequence req4w: scanning lock table for conflicting locks
[4] sequence req4w: sequencing complete, returned guard
[4] sequence req4w: contention metadata: 00000001 @ "a"
[5] sequence req1w2: detected pusher aborted
[5] sequence req1w2: sequencing complete, returned error: TransactionAbortedError(ABORT_REASON_PUSHER_ABORTED): <nil>
[7] sequence req3w2: resolving intent "a" for txn 00000001 with ABORTED status
Expand All @@ -392,6 +395,7 @@ finish req=req4w
[7] sequence req3w2: acquiring latches
[7] sequence req3w2: scanning lock table for conflicting locks
[7] sequence req3w2: sequencing complete, returned guard
[7] sequence req3w2: contention metadata: 00000004 @ "a"

# Txn3 can proceed and eventually commit.
finish req=req3w2
Expand All @@ -405,6 +409,7 @@ on-txn-updated txn=txn3 status=committed
[6] sequence req2w2: acquiring latches
[6] sequence req2w2: scanning lock table for conflicting locks
[6] sequence req2w2: sequencing complete, returned guard
[6] sequence req2w2: contention metadata: 00000003 @ "c"

# Txn2 can proceed and eventually commit.
finish req=req2w2
Expand Down Expand Up @@ -616,6 +621,7 @@ on-txn-updated txn=txn4 status=aborted
[5] sequence req1w2: acquiring latches
[5] sequence req1w2: scanning lock table for conflicting locks
[5] sequence req1w2: sequencing complete, returned guard
[5] sequence req1w2: contention metadata: 00000004 @ "b"

# Txn1 can proceed and eventually commit.
finish req=req1w2
Expand All @@ -629,6 +635,7 @@ on-txn-updated txn=txn1 status=committed
[6] sequence req3w2: acquiring latches
[6] sequence req3w2: scanning lock table for conflicting locks
[6] sequence req3w2: sequencing complete, returned guard
[6] sequence req3w2: contention metadata: 00000001 @ "a"

# Txn3 can proceed and eventually commit.
finish req=req3w2
Expand Down Expand Up @@ -841,6 +848,7 @@ on-txn-updated txn=txn1 status=aborted
[6] sequence req3w2: acquiring latches
[6] sequence req3w2: scanning lock table for conflicting locks
[6] sequence req3w2: sequencing complete, returned guard
[6] sequence req3w2: contention metadata: 00000001 @ "a"

# Txn3 can proceed and eventually commit.
finish req=req3w2
Expand All @@ -854,6 +862,7 @@ on-txn-updated txn=txn3 status=committed
[4] sequence req4w: acquiring latches
[4] sequence req4w: scanning lock table for conflicting locks
[4] sequence req4w: sequencing complete, returned guard
[4] sequence req4w: contention metadata: 00000003 @ "c"

# Txn4 can proceed and eventually commit.
finish req=req4w
Expand Down Expand Up @@ -1085,6 +1094,7 @@ on-txn-updated txn=txn4 status=aborted
[6] sequence req3w2: acquiring latches
[6] sequence req3w2: scanning lock table for conflicting locks
[6] sequence req3w2: sequencing complete, returned guard
[6] sequence req3w2: contention metadata: 00000004 @ "a"

# Txn3 can proceed and eventually commit.
finish req=req3w2
Expand All @@ -1098,6 +1108,7 @@ on-txn-updated txn=txn3 status=committed
[4] sequence req5w: acquiring latches
[4] sequence req5w: scanning lock table for conflicting locks
[4] sequence req5w: sequencing complete, returned guard
[4] sequence req5w: contention metadata: 00000003 @ "c"

# Txn5 can proceed and eventually commit.
finish req=req5w
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,12 @@ on-txn-updated txn=txn3 status=committed
[5] sequence req4: acquiring latches
[5] sequence req4: scanning lock table for conflicting locks
[5] sequence req4: sequencing complete, returned guard
[5] sequence req4: contention metadata: 00000003 @ "k"
[7] sequence req2: resolving intent "k" for txn 00000003 with COMMITTED status
[7] sequence req2: acquiring latches
[7] sequence req2: scanning lock table for conflicting locks
[7] sequence req2: sequencing complete, returned guard
[7] sequence req2: contention metadata: 00000003 @ "k"

finish req=req2
----
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ on-txn-updated txn=txn1 status=aborted
[3] sequence req1: acquiring latches
[3] sequence req1: scanning lock table for conflicting locks
[3] sequence req1: sequencing complete, returned guard
[3] sequence req1: contention metadata: 00000001 @ "k"

finish req=req1
----
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ on-lease-updated leaseholder=false lease-seq=2
[2] sequence req2: acquiring latches
[2] sequence req2: scanning lock table for conflicting locks
[2] sequence req2: sequencing complete, returned guard
[2] sequence req2: contention metadata: 00000001 @ "k"

debug-lock-table
----
Expand Down Expand Up @@ -262,12 +263,14 @@ on-lock-updated req=reqRes2 txn=txn2 key=k status=committed
[-] update lock: committing txn 00000002 @ k
[11] sequence req3: acquiring latches
[11] sequence req3: blocked on select in spanlatch.(*Manager).waitForSignal
[11] sequence req3: contention metadata: 00000002 @ "k"

finish req=reqRes2
----
[-] finish reqRes2: finishing request
[11] sequence req3: scanning lock table for conflicting locks
[11] sequence req3: sequencing complete, returned guard
[11] sequence req3: contention metadata: 00000002 @ "k"

debug-lock-table
----
Expand Down Expand Up @@ -377,6 +380,7 @@ on-split
[2] sequence req2: acquiring latches
[2] sequence req2: scanning lock table for conflicting locks
[2] sequence req2: sequencing complete, returned guard
[2] sequence req2: contention metadata: 00000001 @ "k"

debug-lock-table
----
Expand Down Expand Up @@ -420,12 +424,14 @@ on-lock-updated req=reqRes1 txn=txn1 key=k status=committed
[-] update lock: committing txn 00000001 @ k
[4] sequence req2: acquiring latches
[4] sequence req2: blocked on select in spanlatch.(*Manager).waitForSignal
[4] sequence req2: contention metadata: 00000001 @ "k"

finish req=reqRes1
----
[-] finish reqRes1: finishing request
[4] sequence req2: scanning lock table for conflicting locks
[4] sequence req2: sequencing complete, returned guard
[4] sequence req2: contention metadata: 00000001 @ "k"

debug-lock-table
----
Expand Down Expand Up @@ -547,6 +553,7 @@ on-merge
[2] sequence req2: acquiring latches
[2] sequence req2: scanning lock table for conflicting locks
[2] sequence req2: sequencing complete, returned guard
[2] sequence req2: contention metadata: 00000001 @ "k"

debug-lock-table
----
Expand Down Expand Up @@ -695,6 +702,7 @@ on-snapshot-applied
[2] sequence req2: acquiring latches
[2] sequence req2: scanning lock table for conflicting locks
[2] sequence req2: sequencing complete, returned guard
[2] sequence req2: contention metadata: 00000001 @ "k"

debug-lock-table
----
Expand Down Expand Up @@ -738,12 +746,14 @@ on-lock-updated req=reqRes1 txn=txn1 key=k status=committed
[-] update lock: committing txn 00000001 @ k
[4] sequence req2: acquiring latches
[4] sequence req2: blocked on select in spanlatch.(*Manager).waitForSignal
[4] sequence req2: contention metadata: 00000001 @ "k"

finish req=reqRes1
----
[-] finish reqRes1: finishing request
[4] sequence req2: scanning lock table for conflicting locks
[4] sequence req2: sequencing complete, returned guard
[4] sequence req2: contention metadata: 00000001 @ "k"

debug-lock-table
----
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ on-txn-updated txn=txn1 status=pending ts=15,2
[3] sequence req1: acquiring latches
[3] sequence req1: scanning lock table for conflicting locks
[3] sequence req1: sequencing complete, returned guard
[3] sequence req1: contention metadata: 00000001 @ "k"

finish req=req1
----
Expand Down Expand Up @@ -111,6 +112,7 @@ on-txn-updated txn=txn1 status=pending ts=15,2
[3] sequence req1: acquiring latches
[3] sequence req1: scanning lock table for conflicting locks
[3] sequence req1: sequencing complete, returned guard
[3] sequence req1: contention metadata: 00000001 @ "k"

finish req=req1
----
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ on-txn-updated txn=txn1 status=pending ts=12,2
[2] sequence req2: acquiring latches
[2] sequence req2: scanning lock table for conflicting locks
[2] sequence req2: sequencing complete, returned guard
[2] sequence req2: contention metadata: 00000001 @ "k"

finish req=req2
----
Expand Down Expand Up @@ -191,6 +192,7 @@ on-txn-updated txn=txn1 status=pending ts=12,2
[2] sequence req2: acquiring latches
[2] sequence req2: scanning lock table for conflicting locks
[2] sequence req2: sequencing complete, returned guard
[2] sequence req2: contention metadata: 00000001 @ "k"

finish req=req2
----
Expand Down Expand Up @@ -318,6 +320,7 @@ on-txn-updated txn=txn1 status=pending ts=12,2
[2] sequence req2: acquiring latches
[2] sequence req2: scanning lock table for conflicting locks
[2] sequence req2: sequencing complete, returned guard
[2] sequence req2: contention metadata: 00000001 @ "k"

finish req=req2
----
Expand Down Expand Up @@ -389,6 +392,7 @@ on-txn-updated txn=txn1 status=committed
[3] sequence req4: acquiring latches
[3] sequence req4: scanning lock table for conflicting locks
[3] sequence req4: sequencing complete, returned guard
[3] sequence req4: contention metadata: 00000001 @ "k"

finish req=req4
----
Expand Down
Loading

0 comments on commit aedaab4

Please sign in to comment.