-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
txn.go
1699 lines (1535 loc) · 64 KB
/
txn.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2015 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package kv
import (
"context"
"fmt"
"time"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/admission"
"github.com/cockroachdb/cockroach/pkg/util/contextutil"
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
)
// asyncRollbackTimeout is the context timeout during rollback() for a client
// who has already disconnected. This is needed to asynchronously clean up the
// client's intents and txn record. If the intent resolver has spare async task
// capacity, this timeout only needs to be long enough for the EndTxn request to
// make it through Raft, but if the cleanup task is synchronous (to backpressure
// clients) then cleanup will be abandoned when the timeout expires.
//
// We generally want to clean up if possible, so we set it high at 1 minute. If
// the transaction is very large or cleanup is very costly (e.g. hits a slow
// path for some reason), and the async pool is full (i.e. the system is
// under load), then it makes sense to abandon the cleanup before too long.
const asyncRollbackTimeout = time.Minute
// Txn is an in-progress distributed database transaction. A Txn is safe for
// concurrent use by multiple goroutines.
type Txn struct {
db *DB
// typ indicates the type of transaction.
typ TxnType
// gatewayNodeID, if != 0, is the ID of the node on whose behalf this
// transaction is running. Normally this is the current node, but in the case
// of Txns created on remote nodes by DistSQL this will be the gateway.
// It will be attached to all requests sent through this transaction.
gatewayNodeID roachpb.NodeID
// The following fields are not safe for concurrent modification.
// They should be set before operating on the transaction.
// commitTriggers are run upon successful commit.
commitTriggers []func(ctx context.Context)
// systemConfigTrigger is set to true when modifying keys from the SystemConfig
// span. This sets the SystemConfigTrigger on EndTxnRequest.
systemConfigTrigger bool
// mu holds fields that need to be synchronized for concurrent request execution.
mu struct {
syncutil.Mutex
ID uuid.UUID
debugName string
userPriority roachpb.UserPriority
// previousIDs holds the set of all previous IDs that the Txn's Proto has
// had across transaction aborts. This allows us to determine if a given
// response was meant for any incarnation of this transaction. This is
// useful for catching retriable errors that have escaped inner
// transactions, so that they don't cause a retry of an outer transaction.
previousIDs map[uuid.UUID]struct{}
// sender is a stateful sender for use with transactions (usually a
// TxnCoordSender). A new sender is created on transaction restarts (not
// retries).
sender TxnSender
// The txn has to be committed by this deadline. A nil value indicates no
// deadline.
deadline *hlc.Timestamp
}
// admissionHeader is used for admission control for work done in this
// transaction. Only certain paths initialize this properly, and the
// remaining just use the zero value. The set of code paths that initialize
// this are expected to expand over time.
admissionHeader roachpb.AdmissionHeader
}
// NewTxn returns a new RootTxn.
// Note: for SQL usage, prefer NewTxnWithSteppingEnabled() below.
// Note: for KV usage that should be subject to admission control, prefer
// NewTxnRootKV() below.
//
// If the transaction is used to send any operations, CommitOrCleanup() or
// CleanupOnError() should eventually be called to commit/rollback the
// transaction (including stopping the heartbeat loop).
//
// gatewayNodeID: If != 0, this is the ID of the node on whose behalf this
// transaction is running. Normally this is the current node, but in the case
// of Txns created on remote nodes by DistSQL this will be the gateway.
// If 0 is passed, then no value is going to be filled in the batches sent
// through this txn. This will have the effect that the DistSender will fill
// in the batch with the current node's ID.
// If the gatewayNodeID is set and this is a root transaction, we optimize
// away any clock uncertainty for our own node, as our clock is accessible.
//
// See also db.NewTxn().
func NewTxn(ctx context.Context, db *DB, gatewayNodeID roachpb.NodeID) *Txn {
return NewTxnWithAdmissionControl(
ctx, db, gatewayNodeID, roachpb.AdmissionHeader_OTHER, admission.NormalPri)
}
// NewTxnWithAdmissionControl creates a new transaction with the specified
// admission control source and priority. See NewTxn() for details.
func NewTxnWithAdmissionControl(
ctx context.Context,
db *DB,
gatewayNodeID roachpb.NodeID,
source roachpb.AdmissionHeader_Source,
priority admission.WorkPriority,
) *Txn {
if db == nil {
panic(errors.WithContextTags(
errors.AssertionFailedf("attempting to create txn with nil db"), ctx))
}
now := db.clock.NowAsClockTimestamp()
kvTxn := roachpb.MakeTransaction(
"unnamed",
nil, // baseKey
roachpb.NormalUserPriority,
now.ToTimestamp(),
db.clock.MaxOffset().Nanoseconds(),
int32(db.ctx.NodeID.SQLInstanceID()),
)
txn := NewTxnFromProto(ctx, db, gatewayNodeID, now, RootTxn, &kvTxn)
txn.admissionHeader = roachpb.AdmissionHeader{
CreateTime: db.clock.PhysicalNow(),
Priority: int32(priority),
Source: source,
}
return txn
}
// NewTxnWithSteppingEnabled is like NewTxn but suitable for use by SQL. Note
// that this initializes Txn.admissionHeader to specify that the source is
// FROM_SQL.
// qualityOfService is the QoSLevel level to use in admission control, whose
// value also corresponds exactly with the admission.WorkPriority to use.
func NewTxnWithSteppingEnabled(
ctx context.Context,
db *DB,
gatewayNodeID roachpb.NodeID,
qualityOfService sessiondatapb.QoSLevel,
) *Txn {
txn := NewTxnWithAdmissionControl(ctx, db, gatewayNodeID,
roachpb.AdmissionHeader_FROM_SQL, admission.WorkPriority(qualityOfService))
_ = txn.ConfigureStepping(ctx, SteppingEnabled)
return txn
}
// NewTxnRootKV is like NewTxn but specifically represents a transaction
// originating within KV and that is at the root of the tree of requests. For KV
// usage that should be subject to admission control. Do not use this for
// executing transactions originating in SQL. This distinction only causes this
// transaction to undergo admission control. See AdmissionHeader_Source for more
// details.
func NewTxnRootKV(ctx context.Context, db *DB, gatewayNodeID roachpb.NodeID) *Txn {
return NewTxnWithAdmissionControl(
ctx, db, gatewayNodeID, roachpb.AdmissionHeader_ROOT_KV, admission.NormalPri)
}
// NewTxnFromProto is like NewTxn but assumes the Transaction object is already initialized.
// Do not use this directly; use NewTxn() instead.
// This function exists for testing only.
func NewTxnFromProto(
ctx context.Context,
db *DB,
gatewayNodeID roachpb.NodeID,
now hlc.ClockTimestamp,
typ TxnType,
proto *roachpb.Transaction,
) *Txn {
// Ensure the gateway node ID is marked as free from clock offset.
if gatewayNodeID != 0 && typ == RootTxn {
proto.UpdateObservedTimestamp(gatewayNodeID, now)
}
txn := &Txn{db: db, typ: typ, gatewayNodeID: gatewayNodeID}
txn.mu.ID = proto.ID
txn.mu.userPriority = roachpb.NormalUserPriority
txn.mu.sender = db.factory.RootTransactionalSender(proto, txn.mu.userPriority)
return txn
}
// NewLeafTxn instantiates a new leaf transaction.
func NewLeafTxn(
ctx context.Context, db *DB, gatewayNodeID roachpb.NodeID, tis *roachpb.LeafTxnInputState,
) *Txn {
if db == nil {
panic(errors.WithContextTags(
errors.AssertionFailedf("attempting to create leaf txn with nil db for Transaction: %s", tis.Txn), ctx))
}
if tis.Txn.Status != roachpb.PENDING {
panic(errors.WithContextTags(
errors.AssertionFailedf("can't create leaf txn with non-PENDING proto: %s", tis.Txn), ctx))
}
tis.Txn.AssertInitialized(ctx)
txn := &Txn{db: db, typ: LeafTxn, gatewayNodeID: gatewayNodeID}
txn.mu.ID = tis.Txn.ID
txn.mu.userPriority = roachpb.NormalUserPriority
txn.mu.sender = db.factory.LeafTransactionalSender(tis)
return txn
}
// DB returns a transaction's DB.
func (txn *Txn) DB() *DB {
return txn.db
}
// Sender returns a transaction's TxnSender.
func (txn *Txn) Sender() TxnSender {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.sender
}
// ID returns the current ID of the transaction.
func (txn *Txn) ID() uuid.UUID {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.ID
}
// Epoch exports the txn's epoch.
func (txn *Txn) Epoch() enginepb.TxnEpoch {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.sender.Epoch()
}
// statusLocked returns the txn proto status field.
func (txn *Txn) statusLocked() roachpb.TransactionStatus {
return txn.mu.sender.TxnStatus()
}
// IsCommitted returns true iff the transaction has the committed status.
func (txn *Txn) IsCommitted() bool {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.statusLocked() == roachpb.COMMITTED
}
// IsAborted returns true iff the transaction has the aborted status.
func (txn *Txn) IsAborted() bool {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.statusLocked() == roachpb.ABORTED
}
// IsOpen returns true iff the transaction is in the open state where
// it can accept further commands.
func (txn *Txn) IsOpen() bool {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.statusLocked() == roachpb.PENDING
}
// SetUserPriority sets the transaction's user priority. Transactions default to
// normal user priority. The user priority must be set before any operations are
// performed on the transaction.
func (txn *Txn) SetUserPriority(userPriority roachpb.UserPriority) error {
if txn.typ != RootTxn {
panic(errors.AssertionFailedf("SetUserPriority() called on leaf txn"))
}
txn.mu.Lock()
defer txn.mu.Unlock()
if txn.mu.userPriority == userPriority {
return nil
}
if userPriority < roachpb.MinUserPriority || userPriority > roachpb.MaxUserPriority {
return errors.AssertionFailedf("the given user priority %f is out of the allowed range [%f, %f]",
userPriority, roachpb.MinUserPriority, roachpb.MaxUserPriority)
}
txn.mu.userPriority = userPriority
return txn.mu.sender.SetUserPriority(userPriority)
}
// TestingSetPriority sets the transaction priority. It is intended for
// internal (testing) use only.
func (txn *Txn) TestingSetPriority(priority enginepb.TxnPriority) {
txn.mu.Lock()
// The negative user priority is translated on the server into a positive,
// non-randomized, priority for the transaction.
txn.mu.userPriority = roachpb.UserPriority(-priority)
if err := txn.mu.sender.SetUserPriority(txn.mu.userPriority); err != nil {
log.Fatalf(context.TODO(), "%+v", err)
}
txn.mu.Unlock()
}
// UserPriority returns the transaction's user priority.
func (txn *Txn) UserPriority() roachpb.UserPriority {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.userPriority
}
// SetDebugName sets the debug name associated with the transaction which will
// appear in log files and the web UI.
func (txn *Txn) SetDebugName(name string) {
if txn.typ != RootTxn {
panic(errors.AssertionFailedf("SetDebugName() called on leaf txn"))
}
txn.mu.Lock()
defer txn.mu.Unlock()
txn.mu.sender.SetDebugName(name)
txn.mu.debugName = name
}
// DebugName returns the debug name associated with the transaction.
func (txn *Txn) DebugName() string {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.debugNameLocked()
}
func (txn *Txn) debugNameLocked() string {
return fmt.Sprintf("%s (id: %s)", txn.mu.debugName, txn.mu.ID)
}
// String returns a string version of this transaction.
func (txn *Txn) String() string {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.sender.String()
}
// ReadTimestamp returns the transaction's current read timestamp.
// Note a transaction can be internally pushed forward in time before
// committing so this is not guaranteed to be the commit timestamp.
// Use CommitTimestamp() when needed.
func (txn *Txn) ReadTimestamp() hlc.Timestamp {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.readTimestampLocked()
}
func (txn *Txn) readTimestampLocked() hlc.Timestamp {
return txn.mu.sender.ReadTimestamp()
}
// CommitTimestamp returns the transaction's start timestamp.
// The start timestamp can get pushed but the use of this
// method will guarantee that if a timestamp push is needed
// the commit will fail with a retryable error.
func (txn *Txn) CommitTimestamp() hlc.Timestamp {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.sender.CommitTimestamp()
}
// CommitTimestampFixed returns true if the commit timestamp has
// been fixed to the start timestamp and cannot be pushed forward.
func (txn *Txn) CommitTimestampFixed() bool {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.sender.CommitTimestampFixed()
}
// ProvisionalCommitTimestamp returns the transaction's provisional
// commit timestamp. This can evolve throughout a txn's lifecycle. See
// the comment on the WriteTimestamp field of TxnMeta for details.
func (txn *Txn) ProvisionalCommitTimestamp() hlc.Timestamp {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.sender.ProvisionalCommitTimestamp()
}
// RequiredFrontier returns the largest timestamp at which the transaction may
// read values when performing a read-only operation.
func (txn *Txn) RequiredFrontier() hlc.Timestamp {
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.sender.RequiredFrontier()
}
// DeprecatedSetSystemConfigTrigger sets the system db trigger to true on this transaction.
// This will impact the EndTxnRequest. Note that this method takes a boolean
// argument indicating whether this transaction is intended for the system
// tenant. Only transactions for the system tenant need to set the system config
// trigger which is used to gossip updates to the system config to KV servers.
// The KV servers need access to an up-to-date system config in order to
// determine split points and zone configurations.
func (txn *Txn) DeprecatedSetSystemConfigTrigger(forSystemTenant bool) error {
if txn.typ != RootTxn {
return errors.AssertionFailedf("DeprecatedSetSystemConfigTrigger() called on leaf txn")
}
if !forSystemTenant {
return nil
}
txn.mu.Lock()
defer txn.mu.Unlock()
if err := txn.mu.sender.AnchorOnSystemConfigRange(); err != nil {
return err
}
txn.systemConfigTrigger = true
return nil
}
// DisablePipelining instructs the transaction not to pipeline requests. It
// should rarely be necessary to call this method.
//
// DisablePipelining must be called before any operations are performed on the
// transaction.
func (txn *Txn) DisablePipelining() error {
if txn.typ != RootTxn {
return errors.AssertionFailedf("DisablePipelining() called on leaf txn")
}
txn.mu.Lock()
defer txn.mu.Unlock()
return txn.mu.sender.DisablePipelining()
}
// NewBatch creates and returns a new empty batch object for use with the Txn.
func (txn *Txn) NewBatch() *Batch {
return &Batch{txn: txn, AdmissionHeader: txn.AdmissionHeader()}
}
// Get retrieves the value for a key, returning the retrieved key/value or an
// error. It is not considered an error for the key to not exist.
//
// r, err := txn.Get("a")
// // string(r.Key) == "a"
//
// key can be either a byte slice or a string.
func (txn *Txn) Get(ctx context.Context, key interface{}) (KeyValue, error) {
b := txn.NewBatch()
b.Get(key)
return getOneRow(txn.Run(ctx, b), b)
}
// GetForUpdate retrieves the value for a key, returning the retrieved key/value
// or an error. An unreplicated, exclusive lock is acquired on the key, if it
// exists. It is not considered an error for the key to not exist.
//
// r, err := txn.GetForUpdate("a")
// // string(r.Key) == "a"
//
// key can be either a byte slice or a string.
func (txn *Txn) GetForUpdate(ctx context.Context, key interface{}) (KeyValue, error) {
b := txn.NewBatch()
b.GetForUpdate(key)
return getOneRow(txn.Run(ctx, b), b)
}
// GetProto retrieves the value for a key and decodes the result as a proto
// message. If the key doesn't exist, the proto will simply be reset.
//
// key can be either a byte slice or a string.
func (txn *Txn) GetProto(ctx context.Context, key interface{}, msg protoutil.Message) error {
_, err := txn.GetProtoTs(ctx, key, msg)
return err
}
// GetProtoTs retrieves the value for a key and decodes the result as a proto
// message. It additionally returns the timestamp at which the key was read.
// If the key doesn't exist, the proto will simply be reset and a zero timestamp
// will be returned. A zero timestamp will also be returned if unmarshaling
// fails.
//
// key can be either a byte slice or a string.
func (txn *Txn) GetProtoTs(
ctx context.Context, key interface{}, msg protoutil.Message,
) (hlc.Timestamp, error) {
r, err := txn.Get(ctx, key)
if err != nil {
return hlc.Timestamp{}, err
}
if err := r.ValueProto(msg); err != nil || r.Value == nil {
return hlc.Timestamp{}, err
}
return r.Value.Timestamp, nil
}
// Put sets the value for a key
//
// key can be either a byte slice or a string. value can be any key type, a
// protoutil.Message or any Go primitive type (bool, int, etc).
func (txn *Txn) Put(ctx context.Context, key, value interface{}) error {
b := txn.NewBatch()
b.Put(key, value)
return getOneErr(txn.Run(ctx, b), b)
}
// CPut conditionally sets the value for a key if the existing value is equal to
// expValue. To conditionally set a value only if the key doesn't currently
// exist, pass an empty expValue.
//
// Returns a ConditionFailedError if the existing value is not equal to expValue.
//
// key can be either a byte slice or a string. value can be any key type, a
// protoutil.Message or any Go primitive type (bool, int, etc).
//
// An empty expValue means that the key is expected to not exist. If not empty,
// expValue needs to correspond to a Value.TagAndDataBytes() - i.e. a key's
// value without the checksum (as the checksum includes the key too).
//
// Note that, as an exception to the general rule, it's ok to send more requests
// after getting a ConditionFailedError. See comments on ConditionalPutRequest
// for more info.
func (txn *Txn) CPut(ctx context.Context, key, value interface{}, expValue []byte) error {
b := txn.NewBatch()
b.CPut(key, value, expValue)
return getOneErr(txn.Run(ctx, b), b)
}
// InitPut sets the first value for a key to value. An error is reported if a
// value already exists for the key and it's not equal to the value passed in.
// If failOnTombstones is set to true, tombstones count as mismatched values
// and will cause a ConditionFailedError.
//
// key can be either a byte slice or a string. value can be any key type, a
// protoutil.Message or any Go primitive type (bool, int, etc). It is illegal to
// set value to nil.
func (txn *Txn) InitPut(ctx context.Context, key, value interface{}, failOnTombstones bool) error {
b := txn.NewBatch()
b.InitPut(key, value, failOnTombstones)
return getOneErr(txn.Run(ctx, b), b)
}
// Inc increments the integer value at key. If the key does not exist it will
// be created with an initial value of 0 which will then be incremented. If the
// key exists but was set using Put or CPut an error will be returned.
//
// The returned Result will contain a single row and Result.Err will indicate
// success or failure.
//
// key can be either a byte slice or a string.
func (txn *Txn) Inc(ctx context.Context, key interface{}, value int64) (KeyValue, error) {
b := txn.NewBatch()
b.Inc(key, value)
return getOneRow(txn.Run(ctx, b), b)
}
func (txn *Txn) scan(
ctx context.Context, begin, end interface{}, maxRows int64, isReverse, forUpdate bool,
) ([]KeyValue, error) {
b := txn.NewBatch()
if maxRows > 0 {
b.Header.MaxSpanRequestKeys = maxRows
}
b.scan(begin, end, isReverse, forUpdate)
r, err := getOneResult(txn.Run(ctx, b), b)
return r.Rows, err
}
// Scan retrieves the rows between begin (inclusive) and end (exclusive) in
// ascending order.
//
// The returned []KeyValue will contain up to maxRows elements (or all results
// when zero is supplied).
//
// key can be either a byte slice or a string.
func (txn *Txn) Scan(
ctx context.Context, begin, end interface{}, maxRows int64,
) ([]KeyValue, error) {
return txn.scan(ctx, begin, end, maxRows, false /* isReverse */, false /* forUpdate */)
}
// ScanForUpdate retrieves the rows between begin (inclusive) and end
// (exclusive) in ascending order. Unreplicated, exclusive locks are acquired on
// each of the returned keys.
//
// The returned []KeyValue will contain up to maxRows elements (or all results
// when zero is supplied).
//
// key can be either a byte slice or a string.
func (txn *Txn) ScanForUpdate(
ctx context.Context, begin, end interface{}, maxRows int64,
) ([]KeyValue, error) {
return txn.scan(ctx, begin, end, maxRows, false /* isReverse */, true /* forUpdate */)
}
// ReverseScan retrieves the rows between begin (inclusive) and end (exclusive)
// in descending order.
//
// The returned []KeyValue will contain up to maxRows elements (or all results
// when zero is supplied).
//
// key can be either a byte slice or a string.
func (txn *Txn) ReverseScan(
ctx context.Context, begin, end interface{}, maxRows int64,
) ([]KeyValue, error) {
return txn.scan(ctx, begin, end, maxRows, true /* isReverse */, false /* forUpdate */)
}
// ReverseScanForUpdate retrieves the rows between begin (inclusive) and end
// (exclusive) in descending order. Unreplicated, exclusive locks are acquired
// on each of the returned keys.
//
// The returned []KeyValue will contain up to maxRows elements (or all results
// when zero is supplied).
//
// key can be either a byte slice or a string.
func (txn *Txn) ReverseScanForUpdate(
ctx context.Context, begin, end interface{}, maxRows int64,
) ([]KeyValue, error) {
return txn.scan(ctx, begin, end, maxRows, true /* isReverse */, true /* forUpdate */)
}
// Iterate performs a paginated scan and applying the function f to every page.
// The semantics of retrieval and ordering are the same as for Scan. Note that
// Txn auto-retries the transaction if necessary. Hence, the paginated data
// must not be used for side-effects before the txn has committed.
func (txn *Txn) Iterate(
ctx context.Context, begin, end interface{}, pageSize int, f func([]KeyValue) error,
) error {
for {
rows, err := txn.Scan(ctx, begin, end, int64(pageSize))
if err != nil {
return err
}
if len(rows) == 0 {
return nil
}
if err := f(rows); err != nil {
return errors.Wrap(err, "running iterate callback")
}
if len(rows) < pageSize {
return nil
}
begin = rows[len(rows)-1].Key.Next()
}
}
// Del deletes one or more keys.
//
// key can be either a byte slice or a string.
func (txn *Txn) Del(ctx context.Context, keys ...interface{}) error {
b := txn.NewBatch()
b.Del(keys...)
return getOneErr(txn.Run(ctx, b), b)
}
// DelRange deletes the rows between begin (inclusive) and end (exclusive).
//
// The returned []roachpb.Key will contain the keys deleted if the returnKeys
// parameter is true, or will be nil if the parameter is false, and Result.Err
// will indicate success or failure.
//
// key can be either a byte slice or a string.
func (txn *Txn) DelRange(
ctx context.Context, begin, end interface{}, returnKeys bool,
) ([]roachpb.Key, error) {
b := txn.NewBatch()
b.DelRange(begin, end, returnKeys)
r, err := getOneResult(txn.Run(ctx, b), b)
return r.Keys, err
}
// Run executes the operations queued up within a batch. Before executing any
// of the operations the batch is first checked to see if there were any errors
// during its construction (e.g. failure to marshal a proto message).
//
// The operations within a batch are run in parallel and the order is
// non-deterministic. It is an unspecified behavior to modify and retrieve the
// same key within a batch.
//
// Upon completion, Batch.Results will contain the results for each
// operation. The order of the results matches the order the operations were
// added to the batch.
func (txn *Txn) Run(ctx context.Context, b *Batch) error {
if err := b.validate(); err != nil {
return err
}
return sendAndFill(ctx, txn.Send, b)
}
func (txn *Txn) commit(ctx context.Context) error {
// A batch with only endTxnReq is not subject to admission control, in order
// to reduce contention by releasing locks. In multi-tenant settings, it
// will be subject to admission control, and the zero CreateTime will give
// it preference within the tenant.
et := endTxnReq(true /* commit */, txn.deadline(), txn.systemConfigTrigger)
ba := roachpb.BatchRequest{Requests: et.unionArr[:]}
_, pErr := txn.Send(ctx, ba)
if pErr == nil {
for _, t := range txn.commitTriggers {
t(ctx)
}
}
return pErr.GoError()
}
// CleanupOnError cleans up the transaction as a result of an error.
func (txn *Txn) CleanupOnError(ctx context.Context, err error) {
if txn.typ != RootTxn {
panic(errors.WithContextTags(errors.AssertionFailedf("CleanupOnError() called on leaf txn"), ctx))
}
if err == nil {
panic(errors.WithContextTags(errors.AssertionFailedf("CleanupOnError() called with nil error"), ctx))
}
if replyErr := txn.rollback(ctx); replyErr != nil {
if _, ok := replyErr.GetDetail().(*roachpb.TransactionStatusError); ok || txn.IsAborted() {
log.Eventf(ctx, "failure aborting transaction: %s; abort caused by: %s", replyErr, err)
} else {
log.Warningf(ctx, "failure aborting transaction: %s; abort caused by: %s", replyErr, err)
}
}
}
// Commit is the same as CommitOrCleanup but will not attempt to clean
// up on failure. This can be used when the caller is prepared to do proper
// cleanup.
func (txn *Txn) Commit(ctx context.Context) error {
if txn.typ != RootTxn {
return errors.WithContextTags(errors.AssertionFailedf("Commit() called on leaf txn"), ctx)
}
return txn.commit(ctx)
}
// CommitInBatch executes the operations queued up within a batch and
// commits the transaction. Explicitly committing a transaction is
// optional, but more efficient than relying on the implicit commit
// performed when the transaction function returns without error.
// The batch must be created by this transaction.
// If the command completes successfully, the txn is considered finalized. On
// error, no attempt is made to clean up the (possibly still pending)
// transaction.
func (txn *Txn) CommitInBatch(ctx context.Context, b *Batch) error {
if txn.typ != RootTxn {
return errors.WithContextTags(errors.AssertionFailedf("CommitInBatch() called on leaf txn"), ctx)
}
if txn != b.txn {
return errors.Errorf("a batch b can only be committed by b.txn")
}
et := endTxnReq(true /* commit */, txn.deadline(), txn.systemConfigTrigger)
b.growReqs(1)
b.reqs[len(b.reqs)-1].Value = &et.union
b.initResult(1 /* calls */, 0, b.raw, nil)
return txn.Run(ctx, b)
}
// CommitOrCleanup sends an EndTxnRequest with Commit=true.
// If that fails, an attempt to rollback is made.
// txn should not be used to send any more commands after this call.
func (txn *Txn) CommitOrCleanup(ctx context.Context) error {
if txn.typ != RootTxn {
return errors.WithContextTags(errors.AssertionFailedf("CommitOrCleanup() called on leaf txn"), ctx)
}
err := txn.commit(ctx)
if err != nil {
txn.CleanupOnError(ctx, err)
}
return err
}
// UpdateDeadline sets the transactions deadline to the passed deadline.
// It may move the deadline to any timestamp above the current read timestamp.
// If the deadline is below the current provisional commit timestamp (write timestamp),
// then the transaction will fail with a deadline error during the commit.
// The deadline cannot be lower than txn.ReadTimestamp and we make the assumption
// the read timestamp will not change during execution, which is valid today.
func (txn *Txn) UpdateDeadline(ctx context.Context, deadline hlc.Timestamp) error {
if txn.typ != RootTxn {
panic(errors.WithContextTags(errors.AssertionFailedf("UpdateDeadline() called on leaf txn"), ctx))
}
txn.mu.Lock()
defer txn.mu.Unlock()
readTimestamp := txn.readTimestampLocked()
if deadline.Less(readTimestamp) {
return errors.AssertionFailedf("deadline below read timestamp is nonsensical; "+
"txn has would have no chance to commit. Deadline: %s. Read timestamp: %s Previous Deadline: %s.",
deadline, readTimestamp, txn.mu.deadline)
}
txn.mu.deadline = new(hlc.Timestamp)
*txn.mu.deadline = deadline
return nil
}
// DeadlineLikelySufficient returns true if there currently is a deadline and
// that deadline is earlier than either the ProvisionalCommitTimestamp or
// the current reading of the node's HLC clock. The second condition is a
// conservative optimization to deal with the fact that the provisional
// commit timestamp may not represent the true commit timestamp; the
// transaction may have been pushed but not yet discovered that fact.
// Transactions that write from now on can still get pushed, versus
// transactions which are done writing where it will be less clear
// how those get pushed.
// Deadlines, in general, should not commonly be at risk of expiring near
// the current time, except in extraordinary circumstances. In cases where
// considering it helps, it helps a lot. In cases where considering it
// does not help, it does not hurt much.
func (txn *Txn) DeadlineLikelySufficient(sv *settings.Values) bool {
txn.mu.Lock()
defer txn.mu.Unlock()
// Instead of using the current HLC clock we will
// use the current time with a fudge factor because:
// 1) The clocks are desynchronized, so we may have
// been pushed above the current time.
// 2) There is a potential to race against concurrent pushes,
// which a future timestamp will help against.
// 3) If we are writing to non-blocking ranges than any
// push will be into the future.
getTargetTS := func() hlc.Timestamp {
now := txn.db.Clock().NowAsClockTimestamp()
maxClockOffset := txn.db.Clock().MaxOffset()
lagTargetDuration := closedts.TargetDuration.Get(sv)
leadTargetOverride := closedts.LeadForGlobalReadsOverride.Get(sv)
sideTransportCloseInterval := closedts.SideTransportCloseInterval.Get(sv)
return closedts.TargetForPolicy(now, maxClockOffset,
lagTargetDuration, leadTargetOverride, sideTransportCloseInterval,
roachpb.LEAD_FOR_GLOBAL_READS).Add(int64(time.Second), 0)
}
return txn.mu.deadline != nil &&
!txn.mu.deadline.IsEmpty() &&
// Avoid trying to get get the txn mutex again by directly
// invoking ProvisionalCommitTimestamp versus calling
// ProvisionalCommitTimestampLocked on the Txn.
(txn.mu.deadline.Less(txn.mu.sender.ProvisionalCommitTimestamp()) ||
// In case the transaction gets pushed and the push is not observed,
// we cautiously also indicate that the deadline maybe expired if
// the current HLC clock (with a fudge factor) exceeds the deadline.
txn.mu.deadline.Less(getTargetTS()))
}
// resetDeadlineLocked resets the deadline.
func (txn *Txn) resetDeadlineLocked() {
txn.mu.deadline = nil
}
// Rollback sends an EndTxnRequest with Commit=false.
// txn is considered finalized and cannot be used to send any more commands.
func (txn *Txn) Rollback(ctx context.Context) error {
if txn.typ != RootTxn {
return errors.WithContextTags(errors.AssertionFailedf("Rollback() called on leaf txn"), ctx)
}
return txn.rollback(ctx).GoError()
}
func (txn *Txn) rollback(ctx context.Context) *roachpb.Error {
log.VEventf(ctx, 2, "rolling back transaction")
// If the client has already disconnected, fall back to asynchronous cleanup
// below. Note that this is the common path when a client disconnects in the
// middle of an open transaction or during statement execution.
if ctx.Err() == nil {
// A batch with only endTxnReq is not subject to admission control, in
// order to reduce contention by releasing locks. In multi-tenant
// settings, it will be subject to admission control, and the zero
// CreateTime will give it preference within the tenant.
et := endTxnReq(false /* commit */, nil /* deadline */, false /* systemConfigTrigger */)
ba := roachpb.BatchRequest{Requests: et.unionArr[:]}
_, pErr := txn.Send(ctx, ba)
if pErr == nil {
return nil
}
// If rollback errored and the ctx was canceled during rollback, assume
// ctx cancellation caused the error and try again async below.
if ctx.Err() == nil {
return pErr
}
}
// We don't have a client whose context we can attach to, but we do want to
// limit how long this request is going to be around for to avoid leaking a
// goroutine (in case of a long-lived network partition). If it gets through
// Raft, and the intent resolver has free async task capacity, the actual
// cleanup will be independent of this context.
stopper := txn.db.ctx.Stopper
ctx, cancel := stopper.WithCancelOnQuiesce(txn.db.AnnotateCtx(context.Background()))
if err := stopper.RunAsyncTask(ctx, "async-rollback", func(ctx context.Context) {
defer cancel()
// A batch with only endTxnReq is not subject to admission control, in
// order to reduce contention by releasing locks. In multi-tenant
// settings, it will be subject to admission control, and the zero
// CreateTime will give it preference within the tenant.
et := endTxnReq(false /* commit */, nil /* deadline */, false /* systemConfigTrigger */)
ba := roachpb.BatchRequest{Requests: et.unionArr[:]}
_ = contextutil.RunWithTimeout(ctx, "async txn rollback", asyncRollbackTimeout,
func(ctx context.Context) error {
if _, pErr := txn.Send(ctx, ba); pErr != nil {
if statusErr, ok := pErr.GetDetail().(*roachpb.TransactionStatusError); ok &&
statusErr.Reason == roachpb.TransactionStatusError_REASON_TXN_COMMITTED {
// A common cause of these async rollbacks failing is when they're
// triggered by a ctx canceled while a commit is in-flight (and it's too
// late for it to be canceled), and so the rollback finds the txn to be
// already committed. We don't spam the logs with those.
log.VEventf(ctx, 2, "async rollback failed: %s", pErr)
} else {
log.Infof(ctx, "async rollback failed: %s", pErr)
}
}
return nil
})
}); err != nil {
cancel()
return roachpb.NewError(err)
}
return nil
}
// AddCommitTrigger adds a closure to be executed on successful commit
// of the transaction.
func (txn *Txn) AddCommitTrigger(trigger func(ctx context.Context)) {
if txn.typ != RootTxn {
panic(errors.AssertionFailedf("AddCommitTrigger() called on leaf txn"))
}
txn.commitTriggers = append(txn.commitTriggers, trigger)
}
// endTxnReqAlloc is used to batch the heap allocations of an EndTxn request.
type endTxnReqAlloc struct {
req roachpb.EndTxnRequest
union roachpb.RequestUnion_EndTxn
unionArr [1]roachpb.RequestUnion
}
func endTxnReq(commit bool, deadline *hlc.Timestamp, hasTrigger bool) *endTxnReqAlloc {
alloc := new(endTxnReqAlloc)
alloc.req.Commit = commit
alloc.req.Deadline = deadline
if hasTrigger {
alloc.req.InternalCommitTrigger = &roachpb.InternalCommitTrigger{
ModifiedSpanTrigger: &roachpb.ModifiedSpanTrigger{
SystemConfigSpan: true,
},
}
}
alloc.union.EndTxn = &alloc.req
alloc.unionArr[0].Value = &alloc.union
return alloc
}
// AutoCommitError wraps a non-retryable error coming from auto-commit.
type AutoCommitError struct {
cause error
}
// Cause implements errors.Causer.
func (e *AutoCommitError) Cause() error {
return e.cause
}
func (e *AutoCommitError) Error() string {
return e.cause.Error()
}
// exec executes fn in the context of a distributed transaction. The closure is
// retried on retriable errors.
// If no error is returned by the closure, an attempt to commit the txn is made.
//
// When this method returns, txn might be in any state; exec does not attempt
// to clean up the transaction before returning an error. In case of
// TransactionAbortedError, txn is reset to a fresh transaction, ready to be
// used.
func (txn *Txn) exec(ctx context.Context, fn func(context.Context, *Txn) error) (err error) {
// Run fn in a retry loop until we encounter a success or
// error condition this loop isn't capable of handling.
for {
if err := ctx.Err(); err != nil {
return err
}
err = fn(ctx, txn)
// Commit on success, unless the txn has already been committed by the
// closure. We allow that, as closure might want to run 1PC transactions.
if err == nil {
if !txn.IsCommitted() {
err = txn.Commit(ctx)
log.Eventf(ctx, "client.Txn did AutoCommit. err: %v", err)