-
Notifications
You must be signed in to change notification settings - Fork 3.9k
/
Copy pathtxn_interceptor_seq_num_allocator.go
162 lines (143 loc) · 6.4 KB
/
txn_interceptor_seq_num_allocator.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
// Copyright 2018 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package kv
import (
"context"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage/engine/enginepb"
"github.com/cockroachdb/errors"
)
// txnSeqNumAllocator is a txnInterceptor in charge of allocating sequence
// numbers to all the individual requests in batches.
//
// Sequence numbers serve a few roles in the transaction model:
//
// 1. they are used to enforce an ordering between read and write operations in a
// single transaction that go to the same key. Each read request that travels
// through the interceptor is assigned the sequence number of the most recent
// write. Each write request that travels through the interceptor is assigned
// a sequence number larger than any previously allocated.
//
// This is true even for leaf transaction coordinators. In their case, they are
// provided the sequence number of the most recent write during construction.
// Because they only perform read operations and never issue writes, they assign
// each read this sequence number without ever incrementing their own counter.
// In this way, sequence numbers are maintained correctly across a distributed
// tree of transaction coordinators.
//
// 2. they are used to uniquely identify write operations. Because every write
// request is given a new sequence number, the tuple (txn_id, txn_epoch, seq)
// uniquely identifies a write operation across an entire cluster. This property
// is exploited when determining the status of an individual write by looking
// for its intent. We perform such an operation using the QueryIntent request
// type when pipelining transactional writes. We will do something similar
// during the recovery stage of implicitly committed transactions.
//
// 3. they are used to determine whether a batch contains the entire write set
// for a transaction. See BatchRequest.IsCompleteTransaction.
//
// 4. they are used to provide idempotency for replays and re-issues. The MVCC
// layer is sequence number-aware and ensures that reads at a given sequence
// number ignore writes in the same transaction at larger sequence numbers.
// Likewise, writes at a sequence number become no-ops if an intent with the
// same sequence is already present. If an intent with the same sequence is not
// already present but an intent with a larger sequence number is, an error is
// returned. Likewise, if an intent with the same sequence is present but its
// value is different than what we recompute, an error is returned.
//
type txnSeqNumAllocator struct {
wrapped lockedSender
// writeSeq is the current write seqnum, i.e. the value last assigned
// to a write operation in a batch. It remains at 0 until the first
// write operation is encountered.
writeSeq enginepb.TxnSeq
// readSeq is the sequence number at which to perform read-only
// operations when steppingModeEnabled is set.
readSeq enginepb.TxnSeq
// steppingModeEnabled indicates whether to operate in stepping mode
// or read-own-writes:
// - in read-own-writes, read-only operations read at the latest
// write seqnum.
// - when stepping, read-only operations read at a
// fixed readSeq.
steppingModeEnabled bool
}
// SendLocked is part of the txnInterceptor interface.
func (s *txnSeqNumAllocator) SendLocked(
ctx context.Context, ba roachpb.BatchRequest,
) (*roachpb.BatchResponse, *roachpb.Error) {
for _, ru := range ba.Requests {
req := ru.GetInner()
// Only increment the sequence number generator for requests that
// will leave intents or requests that will commit the transaction.
// This enables ba.IsCompleteTransaction to work properly.
if roachpb.IsTransactionWrite(req) || req.Method() == roachpb.EndTxn {
s.writeSeq++
}
// Note: only read-only requests can operate at a past seqnum.
// Combined read/write requests (e.g. CPut) always read at the
// latest write seqnum.
oldHeader := req.Header()
oldHeader.Sequence = s.writeSeq
if s.steppingModeEnabled && roachpb.IsReadOnly(req) {
oldHeader.Sequence = s.readSeq
}
req.SetHeader(oldHeader)
}
return s.wrapped.SendLocked(ctx, ba)
}
// setWrapped is part of the txnInterceptor interface.
func (s *txnSeqNumAllocator) setWrapped(wrapped lockedSender) { s.wrapped = wrapped }
// populateLeafInputState is part of the txnInterceptor interface.
func (s *txnSeqNumAllocator) populateLeafInputState(tis *roachpb.LeafTxnInputState) {
tis.Txn.Sequence = s.writeSeq
tis.SteppingModeEnabled = s.steppingModeEnabled
tis.ReadSeqNum = s.readSeq
}
// initializeLeaf loads the read seqnum for a leaf transaction.
func (s *txnSeqNumAllocator) initializeLeaf(tis *roachpb.LeafTxnInputState) {
s.steppingModeEnabled = tis.SteppingModeEnabled
s.readSeq = tis.ReadSeqNum
}
// populateLeafFinalState is part of the txnInterceptor interface.
func (s *txnSeqNumAllocator) populateLeafFinalState(tfs *roachpb.LeafTxnFinalState) {}
// importLeafFinalState is part of the txnInterceptor interface.
func (s *txnSeqNumAllocator) importLeafFinalState(tfs *roachpb.LeafTxnFinalState) {}
// stepLocked bumps the read seqnum to the current write seqnum.
// Used by the TxnCoordSender's Step() method.
func (s *txnSeqNumAllocator) stepLocked() error {
if !s.steppingModeEnabled {
return errors.AssertionFailedf("stepping mode is not enabled")
}
if s.readSeq > s.writeSeq {
return errors.AssertionFailedf(
"cannot step() after mistaken initialization (%d,%d)", s.writeSeq, s.readSeq)
}
s.readSeq = s.writeSeq
return nil
}
// configureSteppingLocked configures the stepping mode.
// Used by the TxnCoordSender's ConfigureStepping() method.
func (s *txnSeqNumAllocator) configureSteppingLocked(enabled bool) (prevEnabled bool) {
prevEnabled = s.steppingModeEnabled
s.steppingModeEnabled = enabled
if !prevEnabled && enabled {
s.readSeq = s.writeSeq
}
return prevEnabled
}
// epochBumpedLocked is part of the txnInterceptor interface.
func (s *txnSeqNumAllocator) epochBumpedLocked() {
s.writeSeq = 0
s.readSeq = 0
s.steppingModeEnabled = false
}
// closeLocked is part of the txnInterceptor interface.
func (*txnSeqNumAllocator) closeLocked() {}