From 0668efbf891e36e77e14d24cf5837fac0c51a5f1 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Thu, 28 Jan 2021 19:56:19 -0500 Subject: [PATCH] kvserver: introduce a Raft-based transport for closedts This patch introduces a replacement for the existing closed timestamp mechanism / transport. The new mechanism is gated by a cluster version. Raft commands now carry increasing closed timestamps generated by the propBuf by using the recent request Tracker for synchronizing with in-flight requests (i.e. not closing timestamps below them). Raft commands get a closed ts field, and the range state gets the field as well. The propBuf pays attention to the range's closed timestamp policy for deciding whether to close lagging or leading timestamps. Fixes #57395, #57396 Touches #57405 Release note: None --- pkg/clusterversion/cockroach_versions.go | 7 + pkg/clusterversion/key_string.go | 5 +- pkg/kv/kvserver/BUILD.bazel | 2 + pkg/kv/kvserver/below_raft_protos_test.go | 7 +- pkg/kv/kvserver/client_replica_test.go | 2 +- .../kvserver/closedts/tracker/heap_tracker.go | 7 + .../closedts/tracker/lockfree_tracker.go | 9 + pkg/kv/kvserver/closedts/tracker/tracker.go | 5 + pkg/kv/kvserver/kvserverpb/proposer_kv.go | 20 +- pkg/kv/kvserver/kvserverpb/proposer_kv.pb.go | 427 ++++++++++++----- pkg/kv/kvserver/kvserverpb/proposer_kv.proto | 23 + pkg/kv/kvserver/kvserverpb/state.pb.go | 230 +++++---- pkg/kv/kvserver/kvserverpb/state.proto | 8 + pkg/kv/kvserver/replica.go | 4 + pkg/kv/kvserver/replica_application_result.go | 12 +- .../replica_application_state_machine.go | 49 +- pkg/kv/kvserver/replica_follower_read.go | 6 + pkg/kv/kvserver/replica_init.go | 4 +- pkg/kv/kvserver/replica_proposal.go | 29 +- pkg/kv/kvserver/replica_proposal_buf.go | 331 ++++++++++++- pkg/kv/kvserver/replica_proposal_buf_test.go | 447 ++++++++++++++++-- pkg/kv/kvserver/replica_raft.go | 32 +- pkg/kv/kvserver/replica_rangefeed_test.go | 1 + pkg/kv/kvserver/replica_test.go | 53 ++- pkg/kv/kvserver/replica_write.go | 21 +- pkg/kv/kvserver/stateloader/stateloader.go | 32 +- pkg/kv/kvserver/store_split.go | 13 +- pkg/kv/kvserver/testing_knobs.go | 3 + pkg/roachpb/batch.go | 2 +- pkg/storage/enginepb/mvcc3.go | 5 + pkg/storage/enginepb/mvcc3.pb.go | 314 +++++++----- pkg/storage/enginepb/mvcc3.proto | 13 + pkg/util/hlc/timestamp.go | 13 + 33 files changed, 1718 insertions(+), 418 deletions(-) diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index 0a769a31522d..acd7ccd8fd86 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -252,6 +252,9 @@ const ( // database, such as adding REGIONS to a DATABASE or setting the LOCALITY // on a TABLE. MultiRegionFeatures + // ClosedTimestampsRaftTransport enables the Raft transport for closed + // timestamps and disables the previous per-node transport. + ClosedTimestampsRaftTransport // Step (1): Add new versions here. ) @@ -426,6 +429,10 @@ var versionsSingleton = keyedVersions([]keyedVersion{ Key: MultiRegionFeatures, Version: roachpb.Version{Major: 20, Minor: 2, Internal: 34}, }, + { + Key: ClosedTimestampsRaftTransport, + Version: roachpb.Version{Major: 20, Minor: 2, Internal: 36}, + }, // Step (2): Add new versions here. }) diff --git a/pkg/clusterversion/key_string.go b/pkg/clusterversion/key_string.go index 1ff13fc74478..ad864ce4c6ca 100644 --- a/pkg/clusterversion/key_string.go +++ b/pkg/clusterversion/key_string.go @@ -45,11 +45,12 @@ func _() { _ = x[SequencesRegclass-34] _ = x[ImplicitColumnPartitioning-35] _ = x[MultiRegionFeatures-36] + _ = x[ClosedTimestampsRaftTransport-37] } -const _Key_name = "NamespaceTableWithSchemasStart20_2GeospatialTypeEnumsRangefeedLeasesAlterColumnTypeGeneralAlterSystemJobsAddCreatedByColumnsAddScheduledJobsTableUserDefinedSchemasNoOriginFKIndexesNodeMembershipStatusMinPasswordLengthAbortSpanBytesAlterSystemJobsAddSqllivenessColumnsAddNewSystemSqllivenessTableMaterializedViewsBox2DTypeUpdateScheduledJobsSchemaCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1EmptyArraysInInvertedIndexesUniqueWithoutIndexConstraintsVirtualComputedColumnsCPutInlineReplicaVersionsreplacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationNewSchemaChangerLongRunningMigrationsTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationSeparatedIntentsTracingVerbosityIndependentSemanticsSequencesRegclassImplicitColumnPartitioningMultiRegionFeatures" +const _Key_name = "NamespaceTableWithSchemasStart20_2GeospatialTypeEnumsRangefeedLeasesAlterColumnTypeGeneralAlterSystemJobsAddCreatedByColumnsAddScheduledJobsTableUserDefinedSchemasNoOriginFKIndexesNodeMembershipStatusMinPasswordLengthAbortSpanBytesAlterSystemJobsAddSqllivenessColumnsAddNewSystemSqllivenessTableMaterializedViewsBox2DTypeUpdateScheduledJobsSchemaCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1EmptyArraysInInvertedIndexesUniqueWithoutIndexConstraintsVirtualComputedColumnsCPutInlineReplicaVersionsreplacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationNewSchemaChangerLongRunningMigrationsTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationSeparatedIntentsTracingVerbosityIndependentSemanticsSequencesRegclassImplicitColumnPartitioningMultiRegionFeaturesClosedTimestampsRaftTransport" -var _Key_index = [...]uint16{0, 25, 34, 48, 53, 68, 90, 124, 145, 163, 180, 200, 217, 231, 295, 312, 321, 346, 366, 378, 383, 392, 420, 449, 471, 481, 496, 542, 592, 608, 629, 667, 709, 725, 761, 778, 804, 823} +var _Key_index = [...]uint16{0, 25, 34, 48, 53, 68, 90, 124, 145, 163, 180, 200, 217, 231, 295, 312, 321, 346, 366, 378, 383, 392, 420, 449, 471, 481, 496, 542, 592, 608, 629, 667, 709, 725, 761, 778, 804, 823, 852} func (i Key) String() string { if i < 0 || i >= Key(len(_Key_index)-1) { diff --git a/pkg/kv/kvserver/BUILD.bazel b/pkg/kv/kvserver/BUILD.bazel index 402d71680912..ca306e3d1fc1 100644 --- a/pkg/kv/kvserver/BUILD.bazel +++ b/pkg/kv/kvserver/BUILD.bazel @@ -117,6 +117,7 @@ go_library( "//pkg/kv/kvserver/closedts/container", "//pkg/kv/kvserver/closedts/ctpb", "//pkg/kv/kvserver/closedts/storage", + "//pkg/kv/kvserver/closedts/tracker", "//pkg/kv/kvserver/concurrency", "//pkg/kv/kvserver/constraint", "//pkg/kv/kvserver/gc", @@ -299,6 +300,7 @@ go_test( "//pkg/kv/kvserver/batcheval/result", "//pkg/kv/kvserver/closedts", "//pkg/kv/kvserver/closedts/ctpb", + "//pkg/kv/kvserver/closedts/tracker", "//pkg/kv/kvserver/concurrency", "//pkg/kv/kvserver/concurrency/lock", "//pkg/kv/kvserver/constraint", diff --git a/pkg/kv/kvserver/below_raft_protos_test.go b/pkg/kv/kvserver/below_raft_protos_test.go index a3e04b28ea0e..00652ba4853e 100644 --- a/pkg/kv/kvserver/below_raft_protos_test.go +++ b/pkg/kv/kvserver/below_raft_protos_test.go @@ -77,13 +77,8 @@ var belowRaftGoldenProtos = map[reflect.Type]fixture{ populatedConstructor: func(r *rand.Rand) protoutil.Message { return enginepb.NewPopulatedRangeAppliedState(r, false) }, - // The populatedSum has changed from 10390885694280604642 to - // 7958815789228166749, as of 21.1, due to the addition of the - // SeparatedIntentCount field in MVCCStats. This field will not actually - // be populated until all nodes are on 21.1, so there isn't a risk of - // divergence. emptySum: 615555020845646359, - populatedSum: 7958815789228166749, + populatedSum: 3253881774919630461, }, reflect.TypeOf(&raftpb.HardState{}): { populatedConstructor: func(r *rand.Rand) protoutil.Message { diff --git a/pkg/kv/kvserver/client_replica_test.go b/pkg/kv/kvserver/client_replica_test.go index 50836e8ec237..d453d9fbee53 100644 --- a/pkg/kv/kvserver/client_replica_test.go +++ b/pkg/kv/kvserver/client_replica_test.go @@ -3332,7 +3332,7 @@ func TestProposalOverhead(t *testing.T) { // overhead is that users ranges do not have rangefeeds on by default whereas // system ranges do. const ( - expectedUserOverhead uint32 = 42 + expectedUserOverhead uint32 = 45 ) t.Run("user-key overhead", func(t *testing.T) { userKey := tc.ScratchRange(t) diff --git a/pkg/kv/kvserver/closedts/tracker/heap_tracker.go b/pkg/kv/kvserver/closedts/tracker/heap_tracker.go index 620bd05462a6..a78a90caea83 100644 --- a/pkg/kv/kvserver/closedts/tracker/heap_tracker.go +++ b/pkg/kv/kvserver/closedts/tracker/heap_tracker.go @@ -124,3 +124,10 @@ func (h *heapTracker) LowerBound(ctx context.Context) hlc.Timestamp { } return h.mu.rs[0].ts } + +// Count is part of the Tracker interface. +func (h *heapTracker) Count() int { + h.mu.Lock() + defer h.mu.Unlock() + return h.mu.rs.Len() +} diff --git a/pkg/kv/kvserver/closedts/tracker/lockfree_tracker.go b/pkg/kv/kvserver/closedts/tracker/lockfree_tracker.go index b9cd8bced181..86cdb0ff9c57 100644 --- a/pkg/kv/kvserver/closedts/tracker/lockfree_tracker.go +++ b/pkg/kv/kvserver/closedts/tracker/lockfree_tracker.go @@ -16,6 +16,7 @@ import ( "sync/atomic" "github.com/cockroachdb/cockroach/pkg/util/hlc" + "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/timeutil" ) @@ -169,6 +170,9 @@ func (t *lockfreeTracker) Untrack(ctx context.Context, tok RemovalToken) { b := tok.(lockfreeToken).b // Note that atomic ops are not required here, as we hold the exclusive lock. b.refcnt-- + if b.refcnt < 0 { + log.Fatalf(ctx, "negative bucket refcount: %d", b.refcnt) + } if b.refcnt == 0 { // Reset the bucket, so that future Track() calls can create a new one. b.ts = 0 @@ -198,6 +202,11 @@ func (t *lockfreeTracker) LowerBound(ctx context.Context) hlc.Timestamp { } } +// Count is part of the Tracker interface. +func (t *lockfreeTracker) Count() int { + return int(t.b1.refcnt) + int(t.b2.refcnt) +} + // bucket represent a Tracker bucket: a data structure that coalesces a number // of timestamps, keeping track only of their count and minimum. // diff --git a/pkg/kv/kvserver/closedts/tracker/tracker.go b/pkg/kv/kvserver/closedts/tracker/tracker.go index 4fc41d429bff..e00f1b41af7f 100644 --- a/pkg/kv/kvserver/closedts/tracker/tracker.go +++ b/pkg/kv/kvserver/closedts/tracker/tracker.go @@ -85,6 +85,11 @@ type Tracker interface { // make is that, if no synthethic timestamp is inserted into the tracked set // for a while, eventually the LowerBound value will not be synthetic. LowerBound(context.Context) hlc.Timestamp + + // Count returns the current size of the tracked set. + // + // Count cannot be called concurrently with other methods. + Count() int } // RemovalToken represents the result of Track: a token to be later used with diff --git a/pkg/kv/kvserver/kvserverpb/proposer_kv.go b/pkg/kv/kvserver/kvserverpb/proposer_kv.go index 04a4000420e2..14fc1ca2d064 100644 --- a/pkg/kv/kvserver/kvserverpb/proposer_kv.go +++ b/pkg/kv/kvserver/kvserverpb/proposer_kv.go @@ -10,18 +10,36 @@ package kvserverpb -import "math" +import ( + "math" + + "github.com/cockroachdb/cockroach/pkg/util/hlc" +) var maxRaftCommandFooterSize = (&RaftCommandFooter{ MaxLeaseIndex: math.MaxUint64, }).Size() +var maxClosedTimestampFooterSize = (&ClosedTimestampFooter{ + ClosedTimestamp: hlc.Timestamp{ + WallTime: math.MaxInt64, + Logical: math.MaxInt32, + Synthetic: true, + }, +}).Size() + // MaxRaftCommandFooterSize returns the maximum possible size of an // encoded RaftCommandFooter proto. func MaxRaftCommandFooterSize() int { return maxRaftCommandFooterSize } +// MaxClosedTimestampFooterSize returns the maximmum possible size of an encoded +// ClosedTimestampFooter. +func MaxClosedTimestampFooterSize() int { + return maxClosedTimestampFooterSize +} + // IsZero returns whether all fields are set to their zero value. func (r ReplicatedEvalResult) IsZero() bool { return r == ReplicatedEvalResult{} diff --git a/pkg/kv/kvserver/kvserverpb/proposer_kv.pb.go b/pkg/kv/kvserver/kvserverpb/proposer_kv.pb.go index 8b43941018d9..ae64003650e3 100644 --- a/pkg/kv/kvserver/kvserverpb/proposer_kv.pb.go +++ b/pkg/kv/kvserver/kvserverpb/proposer_kv.pb.go @@ -48,7 +48,7 @@ func (m *Split) Reset() { *m = Split{} } func (m *Split) String() string { return proto.CompactTextString(m) } func (*Split) ProtoMessage() {} func (*Split) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{0} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{0} } func (m *Split) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -83,7 +83,7 @@ func (m *Merge) Reset() { *m = Merge{} } func (m *Merge) String() string { return proto.CompactTextString(m) } func (*Merge) ProtoMessage() {} func (*Merge) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{1} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{1} } func (m *Merge) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -117,7 +117,7 @@ type ChangeReplicas struct { func (m *ChangeReplicas) Reset() { *m = ChangeReplicas{} } func (*ChangeReplicas) ProtoMessage() {} func (*ChangeReplicas) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{2} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{2} } func (m *ChangeReplicas) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -169,7 +169,7 @@ func (m *ComputeChecksum) Reset() { *m = ComputeChecksum{} } func (m *ComputeChecksum) String() string { return proto.CompactTextString(m) } func (*ComputeChecksum) ProtoMessage() {} func (*ComputeChecksum) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{3} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{3} } func (m *ComputeChecksum) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -206,7 +206,7 @@ func (m *Compaction) Reset() { *m = Compaction{} } func (m *Compaction) String() string { return proto.CompactTextString(m) } func (*Compaction) ProtoMessage() {} func (*Compaction) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{4} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{4} } func (m *Compaction) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -243,7 +243,7 @@ func (m *SuggestedCompaction) Reset() { *m = SuggestedCompaction{} } func (m *SuggestedCompaction) String() string { return proto.CompactTextString(m) } func (*SuggestedCompaction) ProtoMessage() {} func (*SuggestedCompaction) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{5} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{5} } func (m *SuggestedCompaction) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -305,7 +305,7 @@ func (m *ReplicatedEvalResult) Reset() { *m = ReplicatedEvalResult{} } func (m *ReplicatedEvalResult) String() string { return proto.CompactTextString(m) } func (*ReplicatedEvalResult) ProtoMessage() {} func (*ReplicatedEvalResult) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{6} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{6} } func (m *ReplicatedEvalResult) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -349,7 +349,7 @@ func (m *ReplicatedEvalResult_AddSSTable) Reset() { *m = ReplicatedEvalR func (m *ReplicatedEvalResult_AddSSTable) String() string { return proto.CompactTextString(m) } func (*ReplicatedEvalResult_AddSSTable) ProtoMessage() {} func (*ReplicatedEvalResult_AddSSTable) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{6, 0} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{6, 0} } func (m *ReplicatedEvalResult_AddSSTable) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -386,7 +386,7 @@ func (m *WriteBatch) Reset() { *m = WriteBatch{} } func (m *WriteBatch) String() string { return proto.CompactTextString(m) } func (*WriteBatch) ProtoMessage() {} func (*WriteBatch) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{7} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{7} } func (m *WriteBatch) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -423,7 +423,7 @@ func (m *LogicalOpLog) Reset() { *m = LogicalOpLog{} } func (m *LogicalOpLog) String() string { return proto.CompactTextString(m) } func (*LogicalOpLog) ProtoMessage() {} func (*LogicalOpLog) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{8} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{8} } func (m *LogicalOpLog) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -505,7 +505,22 @@ type RaftCommand struct { // been added after it, and on removal, the assignment counters must be // updated accordingly. Managing retry of proposals becomes trickier as // well as that uproots whatever ordering was originally envisioned. + // + // This field is set through RaftCommandFooter hackery. MaxLeaseIndex uint64 `protobuf:"varint,4,opt,name=max_lease_index,json=maxLeaseIndex,proto3" json:"max_lease_index,omitempty"` + // The closed timestamp carried by this command. Once a follower is told to + // apply this command, it knows that there will be no further writes at + // timestamps <= closed_timestamp. Note that the command itself might + // represent a write at a lower timestamp, so the closed timestamp can only be + // used after this command is applied. + // + // The field can be zero, which is to be interpreted as no closed timestamp + // update. Some commands (lease requests) implicitly carry a closed timestamp + // in a command-specific way. If the value is not zero, the value is greater + // or equal to that of the previous commands (and all before it). + // + // This field is set through ClosedTimestampFooter hackery. + ClosedTimestamp hlc.Timestamp `protobuf:"bytes,17,opt,name=closed_timestamp,json=closedTimestamp,proto3" json:"closed_timestamp"` // replicated_eval_result is a set of structured information that instructs // replicated state changes to the part of a Range's replicated state machine // that exists outside of RocksDB. @@ -529,7 +544,7 @@ func (m *RaftCommand) Reset() { *m = RaftCommand{} } func (m *RaftCommand) String() string { return proto.CompactTextString(m) } func (*RaftCommand) ProtoMessage() {} func (*RaftCommand) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{9} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{9} } func (m *RaftCommand) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -568,7 +583,7 @@ func (m *RaftCommandFooter) Reset() { *m = RaftCommandFooter{} } func (m *RaftCommandFooter) String() string { return proto.CompactTextString(m) } func (*RaftCommandFooter) ProtoMessage() {} func (*RaftCommandFooter) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{10} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{10} } func (m *RaftCommandFooter) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -593,6 +608,42 @@ func (m *RaftCommandFooter) XXX_DiscardUnknown() { var xxx_messageInfo_RaftCommandFooter proto.InternalMessageInfo +// ClosedTimestampFooter is similar to RaftCommandFooter, allowing the proposal +// buffer to fill in the closed_timestamp field after most of the proto has been +// marshaled already. +type ClosedTimestampFooter struct { + ClosedTimestamp hlc.Timestamp `protobuf:"bytes,17,opt,name=closed_timestamp,json=closedTimestamp,proto3" json:"closed_timestamp"` +} + +func (m *ClosedTimestampFooter) Reset() { *m = ClosedTimestampFooter{} } +func (m *ClosedTimestampFooter) String() string { return proto.CompactTextString(m) } +func (*ClosedTimestampFooter) ProtoMessage() {} +func (*ClosedTimestampFooter) Descriptor() ([]byte, []int) { + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{11} +} +func (m *ClosedTimestampFooter) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *ClosedTimestampFooter) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + b = b[:cap(b)] + n, err := m.MarshalTo(b) + if err != nil { + return nil, err + } + return b[:n], nil +} +func (dst *ClosedTimestampFooter) XXX_Merge(src proto.Message) { + xxx_messageInfo_ClosedTimestampFooter.Merge(dst, src) +} +func (m *ClosedTimestampFooter) XXX_Size() int { + return m.Size() +} +func (m *ClosedTimestampFooter) XXX_DiscardUnknown() { + xxx_messageInfo_ClosedTimestampFooter.DiscardUnknown(m) +} + +var xxx_messageInfo_ClosedTimestampFooter proto.InternalMessageInfo + func init() { proto.RegisterType((*Split)(nil), "cockroach.kv.kvserver.storagepb.Split") proto.RegisterType((*Merge)(nil), "cockroach.kv.kvserver.storagepb.Merge") @@ -607,6 +658,7 @@ func init() { proto.RegisterType((*RaftCommand)(nil), "cockroach.kv.kvserver.storagepb.RaftCommand") proto.RegisterMapType((map[string]string)(nil), "cockroach.kv.kvserver.storagepb.RaftCommand.TraceDataEntry") proto.RegisterType((*RaftCommandFooter)(nil), "cockroach.kv.kvserver.storagepb.RaftCommandFooter") + proto.RegisterType((*ClosedTimestampFooter)(nil), "cockroach.kv.kvserver.storagepb.ClosedTimestampFooter") } func (this *Split) Equal(that interface{}) bool { if that == nil { @@ -1314,6 +1366,16 @@ func (m *RaftCommand) MarshalTo(dAtA []byte) (int, error) { i += copy(dAtA[i:], v) } } + dAtA[i] = 0x8a + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintProposerKv(dAtA, i, uint64(m.ClosedTimestamp.Size())) + n21, err := m.ClosedTimestamp.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n21 return i, nil } @@ -1340,6 +1402,34 @@ func (m *RaftCommandFooter) MarshalTo(dAtA []byte) (int, error) { return i, nil } +func (m *ClosedTimestampFooter) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ClosedTimestampFooter) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + dAtA[i] = 0x8a + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintProposerKv(dAtA, i, uint64(m.ClosedTimestamp.Size())) + n22, err := m.ClosedTimestamp.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n22 + return i, nil +} + func encodeVarintProposerKv(dAtA []byte, offset int, v uint64) int { for v >= 1<<7 { dAtA[offset] = uint8(v&0x7f | 0x80) @@ -1576,6 +1666,8 @@ func (m *RaftCommand) Size() (n int) { n += mapEntrySize + 2 + sovProposerKv(uint64(mapEntrySize)) } } + l = m.ClosedTimestamp.Size() + n += 2 + l + sovProposerKv(uint64(l)) return n } @@ -1591,6 +1683,17 @@ func (m *RaftCommandFooter) Size() (n int) { return n } +func (m *ClosedTimestampFooter) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + l = m.ClosedTimestamp.Size() + n += 2 + l + sovProposerKv(uint64(l)) + return n +} + func sovProposerKv(x uint64) (n int) { for { n++ @@ -3282,6 +3385,36 @@ func (m *RaftCommand) Unmarshal(dAtA []byte) error { } m.TraceData[mapkey] = mapvalue iNdEx = postIndex + case 17: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ClosedTimestamp", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowProposerKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthProposerKv + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := m.ClosedTimestamp.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipProposerKv(dAtA[iNdEx:]) @@ -3372,6 +3505,86 @@ func (m *RaftCommandFooter) Unmarshal(dAtA []byte) error { } return nil } +func (m *ClosedTimestampFooter) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowProposerKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ClosedTimestampFooter: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ClosedTimestampFooter: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 17: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ClosedTimestamp", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowProposerKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthProposerKv + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := m.ClosedTimestamp.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipProposerKv(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthProposerKv + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} func skipProposerKv(dAtA []byte) (n int, err error) { l := len(dAtA) iNdEx := 0 @@ -3478,98 +3691,100 @@ var ( ) func init() { - proto.RegisterFile("kv/kvserver/kvserverpb/proposer_kv.proto", fileDescriptor_proposer_kv_0c8837b323bf7b92) -} - -var fileDescriptor_proposer_kv_0c8837b323bf7b92 = []byte{ - // 1424 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xa4, 0x57, 0x4f, 0x6f, 0xdb, 0x46, - 0x16, 0xb7, 0x2c, 0xc9, 0xa6, 0x9e, 0x6c, 0x89, 0x9e, 0x38, 0x09, 0xd7, 0xbb, 0x2b, 0x19, 0xda, - 0x6c, 0xe0, 0xdd, 0xcd, 0x52, 0x81, 0xbd, 0x0b, 0x14, 0x49, 0x50, 0xc4, 0x92, 0x93, 0xc6, 0x8a, - 0xed, 0x26, 0x23, 0x27, 0x2d, 0xd2, 0x03, 0x31, 0x22, 0x27, 0x14, 0x2b, 0x8a, 0x64, 0x86, 0x23, - 0x25, 0xfe, 0x14, 0x6d, 0x81, 0x1e, 0x7a, 0x6a, 0x73, 0xec, 0xd7, 0xe8, 0x2d, 0x97, 0x02, 0x39, - 0x06, 0x3d, 0x08, 0x8d, 0x73, 0xe9, 0x67, 0xc8, 0xa9, 0x98, 0xe1, 0x50, 0x92, 0x0b, 0xa7, 0x56, - 0xda, 0xdb, 0xf0, 0xcd, 0xbc, 0xdf, 0x7b, 0xf3, 0xfe, 0xfc, 0xde, 0x10, 0x36, 0x7a, 0xc3, 0x7a, - 0x6f, 0x18, 0x53, 0x36, 0xa4, 0x6c, 0xbc, 0x88, 0x3a, 0xf5, 0x88, 0x85, 0x51, 0x18, 0x53, 0x66, - 0xf5, 0x86, 0x66, 0xc4, 0x42, 0x1e, 0xa2, 0xaa, 0x1d, 0xda, 0x3d, 0x16, 0x12, 0xbb, 0x6b, 0xf6, - 0x86, 0x66, 0x7a, 0xd4, 0x8c, 0x79, 0xc8, 0x88, 0x4b, 0xa3, 0xce, 0xda, 0x8a, 0xdc, 0x8c, 0x3a, - 0x75, 0x12, 0x79, 0x89, 0xce, 0x1a, 0x4a, 0x45, 0x0e, 0xe1, 0x44, 0xc9, 0x2e, 0xa4, 0xb2, 0x3e, - 0xe5, 0x64, 0x4a, 0xfe, 0x57, 0x85, 0x54, 0xa7, 0x81, 0xeb, 0x05, 0x54, 0x1c, 0x18, 0xda, 0xb6, - 0xda, 0xfc, 0xdb, 0xa9, 0x9b, 0x5b, 0x6a, 0xb7, 0xf6, 0x8e, 0x4b, 0xc4, 0x9c, 0x70, 0xaa, 0xce, - 0x18, 0x03, 0xee, 0xf9, 0xf5, 0xae, 0x6f, 0xd7, 0xb9, 0xd7, 0xa7, 0x31, 0x27, 0xfd, 0x48, 0xed, - 0xac, 0xba, 0xa1, 0x1b, 0xca, 0x65, 0x5d, 0xac, 0x12, 0x69, 0xed, 0xfb, 0x0c, 0xe4, 0xdb, 0x91, - 0xef, 0x71, 0xd4, 0x84, 0x45, 0xce, 0x3c, 0xd7, 0xa5, 0xcc, 0xc8, 0xac, 0x67, 0x36, 0x8a, 0x9b, - 0x55, 0x73, 0x12, 0x0a, 0x75, 0x19, 0x53, 0x1e, 0x3d, 0x4c, 0x8e, 0x35, 0xb4, 0x17, 0xa3, 0xea, - 0xdc, 0xcb, 0x51, 0x35, 0x83, 0x53, 0x4d, 0x74, 0x08, 0x05, 0xd6, 0x8d, 0x2d, 0x87, 0xfa, 0x9c, - 0x18, 0xf3, 0x12, 0xe6, 0x9f, 0x53, 0x30, 0xea, 0x7a, 0x66, 0x7a, 0x3d, 0x73, 0xff, 0x61, 0xb3, - 0xd9, 0xe6, 0x84, 0xc7, 0x0d, 0x5d, 0x80, 0x1d, 0x8f, 0xaa, 0x1a, 0xbe, 0xd3, 0xde, 0x11, 0xea, - 0x58, 0x63, 0xdd, 0x58, 0xae, 0xae, 0xe5, 0x7e, 0x79, 0x5e, 0xcd, 0xd4, 0x30, 0xe4, 0xf7, 0x29, - 0x73, 0xe9, 0x6c, 0x9e, 0xca, 0xa3, 0xef, 0xf6, 0x54, 0x61, 0x3a, 0x50, 0x6a, 0x76, 0x49, 0xe0, - 0x52, 0x4c, 0x23, 0xdf, 0xb3, 0x49, 0x8c, 0xf6, 0x7e, 0x0b, 0xbe, 0x71, 0x0a, 0xf8, 0x49, 0x9d, - 0xdf, 0xb3, 0xf2, 0xcd, 0xf3, 0xea, 0x5c, 0xed, 0xf5, 0x3c, 0x94, 0x9b, 0x61, 0x3f, 0x1a, 0x70, - 0xda, 0xec, 0x52, 0xbb, 0x17, 0x0f, 0xfa, 0xe8, 0x73, 0x28, 0xda, 0x6a, 0x6d, 0x79, 0x8e, 0xb4, - 0xb5, 0xd4, 0xd8, 0x15, 0x08, 0x3f, 0x8d, 0xaa, 0x5b, 0xae, 0xc7, 0xbb, 0x83, 0x8e, 0x69, 0x87, - 0xfd, 0xfa, 0xd8, 0xba, 0xd3, 0x99, 0xac, 0xeb, 0x51, 0xcf, 0xad, 0xcb, 0x54, 0x0f, 0x06, 0x9e, - 0x63, 0x3e, 0x78, 0xb0, 0xbb, 0x73, 0x3c, 0xaa, 0x42, 0x8a, 0xbe, 0xbb, 0x83, 0x21, 0x45, 0xdf, - 0x75, 0xd0, 0x3f, 0x60, 0x39, 0x26, 0x43, 0x6a, 0xc5, 0x01, 0x89, 0xe2, 0x6e, 0xc8, 0x65, 0x66, - 0x34, 0xbc, 0x24, 0x84, 0x6d, 0x25, 0x43, 0x5b, 0x90, 0xeb, 0x87, 0x0e, 0x35, 0xb2, 0xeb, 0x99, - 0x8d, 0xd2, 0xa9, 0x21, 0x4d, 0xd1, 0xf7, 0x43, 0x87, 0x62, 0x79, 0x18, 0x55, 0x20, 0xb1, 0x13, - 0x85, 0x5e, 0xc0, 0x8d, 0x9c, 0x84, 0x9d, 0x92, 0x20, 0x03, 0x16, 0x87, 0x94, 0xc5, 0x5e, 0x18, - 0x18, 0xf9, 0xf5, 0xcc, 0xc6, 0x32, 0x4e, 0x3f, 0xd1, 0x1d, 0x28, 0x70, 0xca, 0xfa, 0x5e, 0x40, - 0x38, 0x35, 0x16, 0xd6, 0xb3, 0x1b, 0xc5, 0xcd, 0x4b, 0xa7, 0xd8, 0x54, 0x31, 0xde, 0xa1, 0xb1, - 0xcd, 0xbc, 0x88, 0x87, 0xac, 0x91, 0x13, 0x31, 0xc2, 0x13, 0x65, 0x95, 0xc9, 0x87, 0x00, 0x22, - 0xc4, 0xc4, 0xe6, 0x02, 0x7d, 0x15, 0xf2, 0x9d, 0x23, 0x4e, 0x63, 0x19, 0xd7, 0x2c, 0x4e, 0x3e, - 0xd0, 0x15, 0x40, 0xf1, 0xc0, 0x75, 0x69, 0xcc, 0xa9, 0x63, 0x11, 0x6e, 0x05, 0x24, 0x08, 0x63, - 0x19, 0x8c, 0x2c, 0xd6, 0xc7, 0x3b, 0xdb, 0xfc, 0x40, 0xc8, 0x15, 0xee, 0xd7, 0xf3, 0x70, 0xae, - 0x9d, 0x6e, 0x4d, 0x59, 0xb8, 0x0f, 0x85, 0x98, 0x13, 0xc6, 0xad, 0x1e, 0x3d, 0x52, 0xd9, 0xfb, - 0xdf, 0xdb, 0x51, 0xf5, 0xea, 0x4c, 0x99, 0x4b, 0x6f, 0x77, 0x97, 0x1e, 0x61, 0x4d, 0xc2, 0xdc, - 0xa5, 0x47, 0x68, 0x1f, 0x16, 0x69, 0xe0, 0x48, 0xc0, 0xf9, 0x3f, 0x01, 0xb8, 0x40, 0x03, 0x47, - 0xc0, 0x3d, 0x00, 0xb0, 0xc7, 0xfe, 0xca, 0xb4, 0x16, 0x37, 0xff, 0x63, 0x9e, 0x41, 0x6f, 0xe6, - 0xe4, 0x8a, 0x53, 0xf5, 0x3c, 0x05, 0xa4, 0xc2, 0xf2, 0x83, 0x06, 0xab, 0x2a, 0x37, 0x9c, 0x3a, - 0xb7, 0x86, 0xc4, 0xc7, 0x34, 0x1e, 0xf8, 0x82, 0x46, 0xf2, 0x92, 0x8f, 0x54, 0xf7, 0xff, 0xf7, - 0x4c, 0x83, 0x0a, 0x45, 0xb0, 0x00, 0xc5, 0x89, 0x2e, 0xba, 0x01, 0xf9, 0x58, 0x30, 0x8d, 0xf2, - 0xfa, 0xf2, 0x99, 0x20, 0x92, 0x97, 0x70, 0xa2, 0x24, 0xb4, 0xfb, 0xa2, 0xfb, 0x65, 0x3d, 0xce, - 0xa2, 0x2d, 0xb9, 0x02, 0x27, 0x4a, 0x68, 0x03, 0x74, 0x2f, 0xb6, 0x7c, 0x4a, 0x62, 0x6a, 0x31, - 0xfa, 0x64, 0x40, 0x63, 0x6e, 0x2c, 0xc8, 0xc2, 0x2e, 0x79, 0xf1, 0x9e, 0x10, 0xe3, 0x44, 0x8a, - 0xb6, 0xa1, 0x30, 0x26, 0x59, 0x43, 0x93, 0xb6, 0xfe, 0x3e, 0x65, 0x4b, 0xb4, 0xa7, 0xd9, 0xf5, - 0x6d, 0xf3, 0x30, 0x3d, 0x34, 0xae, 0xdd, 0x54, 0x80, 0xee, 0x81, 0xee, 0xd0, 0x88, 0x51, 0x19, - 0x45, 0x45, 0x9b, 0xf0, 0x1e, 0xb4, 0x89, 0xcb, 0x13, 0x75, 0xc9, 0x95, 0xe8, 0x53, 0x28, 0xdb, - 0x92, 0x9d, 0x2c, 0xa6, 0xe8, 0xc9, 0x58, 0x92, 0x80, 0xf5, 0xb3, 0x53, 0x7f, 0x82, 0xd5, 0x70, - 0xc9, 0x3e, 0xc9, 0x8c, 0x97, 0xa0, 0xc4, 0xc8, 0x63, 0x6e, 0xf9, 0xa1, 0xab, 0x3c, 0x5d, 0x96, - 0x9d, 0xb3, 0x24, 0xa4, 0x7b, 0xa1, 0x9b, 0xd8, 0x7f, 0x02, 0x45, 0xe2, 0x38, 0x56, 0x1c, 0x73, - 0xd2, 0xf1, 0xa9, 0xb1, 0x22, 0x6d, 0xdf, 0x9c, 0xb5, 0x0a, 0x4e, 0xd4, 0x92, 0xb9, 0xed, 0x38, - 0xed, 0xf6, 0xa1, 0xc0, 0x69, 0x94, 0x04, 0xbd, 0x4d, 0xbe, 0x31, 0x10, 0xc7, 0x69, 0x27, 0x36, - 0xd0, 0x6d, 0xc8, 0x27, 0xfe, 0x20, 0x69, 0xec, 0xdf, 0x33, 0x45, 0x4e, 0x7a, 0xab, 0x12, 0x92, - 0xa8, 0xa3, 0x2f, 0x32, 0x70, 0x2e, 0x62, 0x74, 0xa8, 0x92, 0x9f, 0xbc, 0x0d, 0x88, 0x6f, 0xac, - 0xce, 0x92, 0xda, 0x9b, 0x6f, 0x47, 0xd5, 0x1b, 0xb3, 0xd3, 0xb6, 0x50, 0x6e, 0xfa, 0xa1, 0xdd, - 0x1b, 0x23, 0xe0, 0x15, 0x61, 0x5b, 0x16, 0xd8, 0x3d, 0x65, 0x19, 0x7d, 0x06, 0xba, 0x9d, 0xcc, - 0x0d, 0x2b, 0xa5, 0x73, 0xe3, 0xbc, 0xf4, 0xe6, 0xea, 0x4c, 0x8d, 0x3c, 0x35, 0x70, 0x70, 0xd9, - 0x3e, 0x29, 0x58, 0xfb, 0x08, 0xa6, 0x02, 0x8a, 0x10, 0xe4, 0xc4, 0x2b, 0x25, 0xa1, 0x32, 0x2c, - 0xd7, 0xa8, 0x0a, 0x79, 0x9b, 0xd9, 0x5b, 0x9b, 0xb2, 0x97, 0x97, 0x1b, 0x85, 0xe3, 0x51, 0x35, - 0xdf, 0xc4, 0xcd, 0xad, 0x4d, 0x9c, 0xc8, 0x13, 0x2e, 0x68, 0xe5, 0xb4, 0x8c, 0x3e, 0xdf, 0xca, - 0x69, 0x79, 0x7d, 0xa1, 0x95, 0xd3, 0x16, 0x75, 0xad, 0x95, 0xd3, 0x0a, 0x3a, 0xb4, 0x72, 0x5a, - 0x49, 0x2f, 0xb7, 0x72, 0x5a, 0x59, 0xd7, 0x5b, 0x39, 0x4d, 0xd7, 0x57, 0x5a, 0x39, 0xed, 0x9c, - 0xbe, 0xda, 0x5a, 0xd0, 0xbe, 0x3a, 0xd0, 0xbf, 0x3d, 0xa8, 0xad, 0x03, 0x7c, 0xc2, 0x3c, 0x4e, - 0x1b, 0x84, 0xdb, 0xdd, 0xd3, 0x1c, 0xa8, 0xdd, 0x87, 0xa5, 0xbd, 0xd0, 0xf5, 0x6c, 0xe2, 0x7f, - 0x1c, 0xed, 0x85, 0x2e, 0xda, 0x86, 0x6c, 0x18, 0x09, 0x52, 0x17, 0xe3, 0xe2, 0x5f, 0x67, 0xe5, - 0x79, 0xac, 0xaa, 0xd2, 0x2c, 0x74, 0x6b, 0x3f, 0xe6, 0xa1, 0x88, 0xc9, 0x63, 0xde, 0x0c, 0xfb, - 0x7d, 0x12, 0x38, 0xe8, 0x32, 0x94, 0xfb, 0xe4, 0x99, 0x4a, 0xb9, 0x17, 0x38, 0xf4, 0x99, 0xa4, - 0x8d, 0x1c, 0x5e, 0xee, 0x93, 0x67, 0x32, 0x1b, 0xbb, 0x42, 0x88, 0x0e, 0xe1, 0x2f, 0x53, 0x9d, - 0x3a, 0x7e, 0x37, 0x4a, 0x3d, 0x39, 0xdb, 0x8a, 0x9b, 0xc6, 0x29, 0xf3, 0x2b, 0x21, 0x8c, 0x8b, - 0x13, 0xd5, 0x7b, 0x4a, 0x53, 0x6e, 0xa0, 0x21, 0x5c, 0x3c, 0x09, 0x65, 0xc5, 0x82, 0x5c, 0x02, - 0x9b, 0x4a, 0xce, 0xc9, 0x36, 0x3e, 0x7c, 0x3b, 0xaa, 0x5e, 0x7b, 0xaf, 0x11, 0x20, 0x81, 0xdb, - 0x0a, 0x05, 0x9f, 0x8f, 0xa6, 0xed, 0xa5, 0x62, 0xf4, 0x04, 0x2e, 0xb0, 0x71, 0xc7, 0x59, 0x74, - 0x48, 0x7c, 0x8b, 0xc9, 0x9e, 0x93, 0x3d, 0x5d, 0xdc, 0xfc, 0xff, 0x1f, 0x6a, 0x58, 0x15, 0xe7, - 0x55, 0x76, 0xda, 0x60, 0xd8, 0x83, 0xe2, 0x53, 0x91, 0x6d, 0xab, 0x23, 0xd2, 0x6d, 0x94, 0x66, - 0x9c, 0x47, 0x93, 0x0a, 0xc1, 0xf0, 0x74, 0x52, 0x2d, 0x6d, 0x28, 0xf9, 0x49, 0x7a, 0xad, 0x30, - 0x12, 0x94, 0x64, 0x94, 0x67, 0x9c, 0x37, 0xd3, 0x05, 0x85, 0x97, 0xfc, 0xe9, 0xf2, 0x7a, 0x04, - 0xc0, 0x19, 0xb1, 0xa9, 0x25, 0x0b, 0x51, 0x97, 0x55, 0x76, 0xfd, 0xec, 0x48, 0x4c, 0xaa, 0xc9, - 0x3c, 0x14, 0xea, 0x3b, 0x84, 0x93, 0x5b, 0x01, 0x67, 0x47, 0xb8, 0xc0, 0xd3, 0xef, 0xb5, 0x1b, - 0x50, 0x3a, 0xb9, 0x89, 0x74, 0xc8, 0xa6, 0x6f, 0x87, 0x02, 0x16, 0x4b, 0xf1, 0x6a, 0x19, 0x12, - 0x7f, 0x90, 0xcc, 0xce, 0x02, 0x4e, 0x3e, 0xae, 0xcd, 0x7f, 0x20, 0x5a, 0x2c, 0xab, 0xe7, 0xc6, - 0x8d, 0x36, 0xaf, 0x67, 0x93, 0x26, 0xfa, 0xee, 0xa0, 0x76, 0x1d, 0x56, 0xa6, 0x1c, 0xb8, 0x1d, - 0x86, 0x9c, 0xb2, 0x59, 0x8b, 0xba, 0x71, 0xe5, 0xc5, 0xeb, 0xca, 0xdc, 0x8b, 0xe3, 0x4a, 0xe6, - 0xe5, 0x71, 0x25, 0xf3, 0xea, 0xb8, 0x92, 0xf9, 0xf9, 0xb8, 0x92, 0xf9, 0xf2, 0x4d, 0x65, 0xee, - 0xe5, 0x9b, 0xca, 0xdc, 0xab, 0x37, 0x95, 0xb9, 0x47, 0x30, 0xf9, 0xcf, 0xe8, 0x2c, 0xc8, 0x5f, - 0x86, 0xad, 0x5f, 0x03, 0x00, 0x00, 0xff, 0xff, 0xf1, 0xe4, 0x48, 0xb2, 0x4d, 0x0d, 0x00, 0x00, + proto.RegisterFile("kv/kvserver/kvserverpb/proposer_kv.proto", fileDescriptor_proposer_kv_0b3536bd0bf3d98c) +} + +var fileDescriptor_proposer_kv_0b3536bd0bf3d98c = []byte{ + // 1453 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xac, 0x57, 0x4f, 0x73, 0x13, 0xc7, + 0x12, 0xb7, 0x2c, 0xc9, 0x5e, 0xb5, 0x6c, 0x69, 0x3d, 0x18, 0xd8, 0xe7, 0xf7, 0x9e, 0xe4, 0xd2, + 0xe3, 0x51, 0x4e, 0x42, 0x56, 0x94, 0x9d, 0x54, 0xa5, 0x80, 0x4a, 0x61, 0xc9, 0x10, 0x2c, 0x6c, + 0x07, 0x46, 0x86, 0xa4, 0xc8, 0x61, 0x6b, 0xb4, 0x3b, 0xac, 0x36, 0x5a, 0x69, 0x97, 0x9d, 0x91, + 0xc0, 0x9f, 0x22, 0x49, 0x55, 0x0e, 0xb9, 0x24, 0xe1, 0x98, 0xaf, 0x91, 0x1b, 0x47, 0x8e, 0x54, + 0x0e, 0xaa, 0x60, 0x2e, 0xf9, 0x0c, 0x9c, 0x52, 0x33, 0x3b, 0xab, 0x3f, 0x29, 0x13, 0x8b, 0x24, + 0xb7, 0xd9, 0x9e, 0xe9, 0x5f, 0xf7, 0xf4, 0x9f, 0x5f, 0xcf, 0xc2, 0x46, 0x67, 0x50, 0xed, 0x0c, + 0x18, 0x8d, 0x06, 0x34, 0x1a, 0x2d, 0xc2, 0x56, 0x35, 0x8c, 0x82, 0x30, 0x60, 0x34, 0xb2, 0x3a, + 0x03, 0x33, 0x8c, 0x02, 0x1e, 0xa0, 0xb2, 0x1d, 0xd8, 0x9d, 0x28, 0x20, 0x76, 0xdb, 0xec, 0x0c, + 0xcc, 0xe4, 0xa8, 0xc9, 0x78, 0x10, 0x11, 0x97, 0x86, 0xad, 0xb5, 0x15, 0xb9, 0x19, 0xb6, 0xaa, + 0x24, 0xf4, 0x62, 0x9d, 0x35, 0x94, 0x88, 0x1c, 0xc2, 0x89, 0x92, 0x9d, 0x4b, 0x64, 0x5d, 0xca, + 0xc9, 0x84, 0xfc, 0xdf, 0x0a, 0xa9, 0x4a, 0x7b, 0xae, 0xd7, 0xa3, 0xe2, 0xc0, 0xc0, 0xb6, 0xd5, + 0xe6, 0x7f, 0x4e, 0xdc, 0xdc, 0x52, 0xbb, 0x95, 0x37, 0x5c, 0x82, 0x71, 0xc2, 0xa9, 0x3a, 0x63, + 0xf4, 0xb9, 0xe7, 0x57, 0xdb, 0xbe, 0x5d, 0xe5, 0x5e, 0x97, 0x32, 0x4e, 0xba, 0xa1, 0xda, 0x59, + 0x75, 0x03, 0x37, 0x90, 0xcb, 0xaa, 0x58, 0xc5, 0xd2, 0xca, 0x4f, 0x29, 0xc8, 0x36, 0x43, 0xdf, + 0xe3, 0xa8, 0x0e, 0x8b, 0x3c, 0xf2, 0x5c, 0x97, 0x46, 0x46, 0x6a, 0x3d, 0xb5, 0x91, 0xdf, 0x2c, + 0x9b, 0xe3, 0x50, 0xa8, 0xcb, 0x98, 0xf2, 0xe8, 0x61, 0x7c, 0xac, 0xa6, 0x3d, 0x1b, 0x96, 0xe7, + 0x9e, 0x0f, 0xcb, 0x29, 0x9c, 0x68, 0xa2, 0x43, 0xc8, 0x45, 0x6d, 0x66, 0x39, 0xd4, 0xe7, 0xc4, + 0x98, 0x97, 0x30, 0xff, 0x9f, 0x80, 0x51, 0xd7, 0x33, 0x93, 0xeb, 0x99, 0xfb, 0xf7, 0xeb, 0xf5, + 0x26, 0x27, 0x9c, 0xd5, 0x74, 0x01, 0x76, 0x3c, 0x2c, 0x6b, 0xf8, 0x56, 0x73, 0x47, 0xa8, 0x63, + 0x2d, 0x6a, 0x33, 0xb9, 0xba, 0x92, 0xf9, 0xed, 0x69, 0x39, 0x55, 0xc1, 0x90, 0xdd, 0xa7, 0x91, + 0x4b, 0x67, 0xf3, 0x54, 0x1e, 0x7d, 0xb3, 0xa7, 0x0a, 0xd3, 0x81, 0x42, 0xbd, 0x4d, 0x7a, 0x2e, + 0xc5, 0x34, 0xf4, 0x3d, 0x9b, 0x30, 0xb4, 0xf7, 0x47, 0xf0, 0x8d, 0x13, 0xc0, 0xa7, 0x75, 0xfe, + 0xcc, 0xca, 0x77, 0x4f, 0xcb, 0x73, 0x95, 0x97, 0xf3, 0x50, 0xac, 0x07, 0xdd, 0xb0, 0xcf, 0x69, + 0xbd, 0x4d, 0xed, 0x0e, 0xeb, 0x77, 0xd1, 0x97, 0x90, 0xb7, 0xd5, 0xda, 0xf2, 0x1c, 0x69, 0x6b, + 0xa9, 0xb6, 0x2b, 0x10, 0x7e, 0x19, 0x96, 0xb7, 0x5c, 0x8f, 0xb7, 0xfb, 0x2d, 0xd3, 0x0e, 0xba, + 0xd5, 0x91, 0x75, 0xa7, 0x35, 0x5e, 0x57, 0xc3, 0x8e, 0x5b, 0x95, 0xa9, 0xee, 0xf7, 0x3d, 0xc7, + 0xbc, 0x77, 0x6f, 0x77, 0xe7, 0x78, 0x58, 0x86, 0x04, 0x7d, 0x77, 0x07, 0x43, 0x82, 0xbe, 0xeb, + 0xa0, 0xff, 0xc1, 0x32, 0x23, 0x03, 0x6a, 0xb1, 0x1e, 0x09, 0x59, 0x3b, 0xe0, 0x32, 0x33, 0x1a, + 0x5e, 0x12, 0xc2, 0xa6, 0x92, 0xa1, 0x2d, 0xc8, 0x74, 0x03, 0x87, 0x1a, 0xe9, 0xf5, 0xd4, 0x46, + 0xe1, 0xc4, 0x90, 0x26, 0xe8, 0xfb, 0x81, 0x43, 0xb1, 0x3c, 0x8c, 0x4a, 0x10, 0xdb, 0x09, 0x03, + 0xaf, 0xc7, 0x8d, 0x8c, 0x84, 0x9d, 0x90, 0x20, 0x03, 0x16, 0x07, 0x34, 0x62, 0x5e, 0xd0, 0x33, + 0xb2, 0xeb, 0xa9, 0x8d, 0x65, 0x9c, 0x7c, 0xa2, 0x5b, 0x90, 0xe3, 0x34, 0xea, 0x7a, 0x3d, 0xc2, + 0xa9, 0xb1, 0xb0, 0x9e, 0xde, 0xc8, 0x6f, 0x5e, 0x38, 0xc1, 0xa6, 0x8a, 0xf1, 0x0e, 0x65, 0x76, + 0xe4, 0x85, 0x3c, 0x88, 0x6a, 0x19, 0x11, 0x23, 0x3c, 0x56, 0x56, 0x99, 0xbc, 0x0f, 0x20, 0x42, + 0x4c, 0x6c, 0x2e, 0xd0, 0x57, 0x21, 0xdb, 0x3a, 0xe2, 0x94, 0xc9, 0xb8, 0xa6, 0x71, 0xfc, 0x81, + 0x2e, 0x01, 0x62, 0x7d, 0xd7, 0xa5, 0x8c, 0x53, 0xc7, 0x22, 0xdc, 0xea, 0x91, 0x5e, 0xc0, 0x64, + 0x30, 0xd2, 0x58, 0x1f, 0xed, 0x6c, 0xf3, 0x03, 0x21, 0x57, 0xb8, 0xdf, 0xce, 0xc3, 0x99, 0x66, + 0xb2, 0x35, 0x61, 0xe1, 0x2e, 0xe4, 0x18, 0x27, 0x11, 0xb7, 0x3a, 0xf4, 0x48, 0x65, 0xef, 0x83, + 0xd7, 0xc3, 0xf2, 0xe5, 0x99, 0x32, 0x97, 0xdc, 0xee, 0x36, 0x3d, 0xc2, 0x9a, 0x84, 0xb9, 0x4d, + 0x8f, 0xd0, 0x3e, 0x2c, 0xd2, 0x9e, 0x23, 0x01, 0xe7, 0xff, 0x06, 0xe0, 0x02, 0xed, 0x39, 0x02, + 0xee, 0x1e, 0x80, 0x3d, 0xf2, 0x57, 0xa6, 0x35, 0xbf, 0xf9, 0x9e, 0x79, 0x0a, 0xbd, 0x99, 0xe3, + 0x2b, 0x4e, 0xd4, 0xf3, 0x04, 0x90, 0x0a, 0xcb, 0xcf, 0x1a, 0xac, 0xaa, 0xdc, 0x70, 0xea, 0xdc, + 0x18, 0x10, 0x1f, 0x53, 0xd6, 0xf7, 0x05, 0x8d, 0x64, 0x25, 0x1f, 0xa9, 0xee, 0x7f, 0xff, 0x54, + 0x83, 0x0a, 0x45, 0xb0, 0x00, 0xc5, 0xb1, 0x2e, 0xba, 0x06, 0x59, 0x26, 0x98, 0x46, 0x79, 0x7d, + 0xf1, 0x54, 0x10, 0xc9, 0x4b, 0x38, 0x56, 0x12, 0xda, 0x5d, 0xd1, 0xfd, 0xb2, 0x1e, 0x67, 0xd1, + 0x96, 0x5c, 0x81, 0x63, 0x25, 0xb4, 0x01, 0xba, 0xc7, 0x2c, 0x9f, 0x12, 0x46, 0xad, 0x88, 0x3e, + 0xea, 0x53, 0xc6, 0x8d, 0x05, 0x59, 0xd8, 0x05, 0x8f, 0xed, 0x09, 0x31, 0x8e, 0xa5, 0x68, 0x1b, + 0x72, 0x23, 0x92, 0x35, 0x34, 0x69, 0xeb, 0xbf, 0x13, 0xb6, 0x44, 0x7b, 0x9a, 0x6d, 0xdf, 0x36, + 0x0f, 0x93, 0x43, 0xa3, 0xda, 0x4d, 0x04, 0xe8, 0x0e, 0xe8, 0x0e, 0x0d, 0x23, 0x2a, 0xa3, 0xa8, + 0x68, 0x13, 0xde, 0x82, 0x36, 0x71, 0x71, 0xac, 0x2e, 0xb9, 0x12, 0x7d, 0x0e, 0x45, 0x5b, 0xb2, + 0x93, 0x15, 0x29, 0x7a, 0x32, 0x96, 0x24, 0x60, 0xf5, 0xf4, 0xd4, 0x4f, 0xb1, 0x1a, 0x2e, 0xd8, + 0xd3, 0xcc, 0x78, 0x01, 0x0a, 0x11, 0x79, 0xc8, 0x2d, 0x3f, 0x70, 0x95, 0xa7, 0xcb, 0xb2, 0x73, + 0x96, 0x84, 0x74, 0x2f, 0x70, 0x63, 0xfb, 0x8f, 0x20, 0x4f, 0x1c, 0xc7, 0x62, 0x8c, 0x93, 0x96, + 0x4f, 0x8d, 0x15, 0x69, 0xfb, 0xfa, 0xac, 0x55, 0x30, 0x55, 0x4b, 0xe6, 0xb6, 0xe3, 0x34, 0x9b, + 0x87, 0x02, 0xa7, 0x56, 0x10, 0xf4, 0x36, 0xfe, 0xc6, 0x40, 0x1c, 0xa7, 0x19, 0xdb, 0x40, 0x37, + 0x21, 0x1b, 0xfb, 0x83, 0xa4, 0xb1, 0x77, 0x67, 0x8a, 0x9c, 0xf4, 0x56, 0x25, 0x24, 0x56, 0x47, + 0x5f, 0xa5, 0xe0, 0x4c, 0x18, 0xd1, 0x81, 0x4a, 0x7e, 0xfc, 0x36, 0x20, 0xbe, 0xb1, 0x3a, 0x4b, + 0x6a, 0xaf, 0xbf, 0x1e, 0x96, 0xaf, 0xcd, 0x4e, 0xdb, 0x42, 0xb9, 0xee, 0x07, 0x76, 0x67, 0x84, + 0x80, 0x57, 0x84, 0x6d, 0x59, 0x60, 0x77, 0x94, 0x65, 0xf4, 0x05, 0xe8, 0x76, 0x3c, 0x37, 0xac, + 0x84, 0xce, 0x8d, 0xb3, 0xd2, 0x9b, 0xcb, 0x33, 0x35, 0xf2, 0xc4, 0xc0, 0xc1, 0x45, 0x7b, 0x5a, + 0xb0, 0xf6, 0x09, 0x4c, 0x04, 0x14, 0x21, 0xc8, 0x88, 0x57, 0x4a, 0x4c, 0x65, 0x58, 0xae, 0x51, + 0x19, 0xb2, 0x76, 0x64, 0x6f, 0x6d, 0xca, 0x5e, 0x5e, 0xae, 0xe5, 0x8e, 0x87, 0xe5, 0x6c, 0x1d, + 0xd7, 0xb7, 0x36, 0x71, 0x2c, 0x8f, 0xb9, 0xa0, 0x91, 0xd1, 0x52, 0xfa, 0x7c, 0x23, 0xa3, 0x65, + 0xf5, 0x85, 0x46, 0x46, 0x5b, 0xd4, 0xb5, 0x46, 0x46, 0xcb, 0xe9, 0xd0, 0xc8, 0x68, 0x05, 0xbd, + 0xd8, 0xc8, 0x68, 0x45, 0x5d, 0x6f, 0x64, 0x34, 0x5d, 0x5f, 0x69, 0x64, 0xb4, 0x33, 0xfa, 0x6a, + 0x63, 0x41, 0xfb, 0xe6, 0x40, 0xff, 0xe1, 0xa0, 0xb2, 0x0e, 0xf0, 0x59, 0xe4, 0x71, 0x5a, 0x23, + 0xdc, 0x6e, 0x9f, 0xe4, 0x40, 0xe5, 0x2e, 0x2c, 0xed, 0x05, 0xae, 0x67, 0x13, 0xff, 0xd3, 0x70, + 0x2f, 0x70, 0xd1, 0x36, 0xa4, 0x83, 0x50, 0x90, 0xba, 0x18, 0x17, 0xef, 0x9c, 0x96, 0xe7, 0x91, + 0xaa, 0x4a, 0xb3, 0xd0, 0xad, 0x7c, 0xbf, 0x00, 0x79, 0x4c, 0x1e, 0xf2, 0x7a, 0xd0, 0xed, 0x92, + 0x9e, 0x83, 0x2e, 0x42, 0xb1, 0x4b, 0x9e, 0xa8, 0x94, 0x7b, 0x3d, 0x87, 0x3e, 0x91, 0xb4, 0x91, + 0xc1, 0xcb, 0x5d, 0xf2, 0x44, 0x66, 0x63, 0x57, 0x08, 0xd1, 0x21, 0xfc, 0x6b, 0xa2, 0x53, 0x47, + 0xef, 0x46, 0xa9, 0x27, 0x67, 0x5b, 0x7e, 0xd3, 0x38, 0x61, 0x7e, 0xc5, 0x84, 0x71, 0x7e, 0xac, + 0x7a, 0x47, 0x69, 0xca, 0x0d, 0x34, 0x80, 0xf3, 0xd3, 0x50, 0x16, 0x13, 0xe4, 0xd2, 0xb3, 0xa9, + 0xe4, 0x9c, 0x74, 0xed, 0xe3, 0xd7, 0xc3, 0xf2, 0x95, 0xb7, 0x1a, 0x01, 0x12, 0xb8, 0xa9, 0x50, + 0xf0, 0xd9, 0x70, 0xd2, 0x5e, 0x22, 0x46, 0x8f, 0xe0, 0x5c, 0x34, 0xea, 0x38, 0x8b, 0x0e, 0x88, + 0x6f, 0x45, 0xb2, 0xe7, 0x64, 0x4f, 0xe7, 0x37, 0x3f, 0xfc, 0x4b, 0x0d, 0xab, 0xe2, 0xbc, 0x1a, + 0x9d, 0x34, 0x18, 0xf6, 0x20, 0xff, 0x58, 0x64, 0xdb, 0x6a, 0x89, 0x74, 0x1b, 0x85, 0x19, 0xe7, + 0xd1, 0xb8, 0x42, 0x30, 0x3c, 0x1e, 0x57, 0x4b, 0x13, 0x0a, 0x7e, 0x9c, 0x5e, 0x2b, 0x08, 0x05, + 0x25, 0x19, 0xc5, 0x19, 0xe7, 0xcd, 0x64, 0x41, 0xe1, 0x25, 0x7f, 0xb2, 0xbc, 0x1e, 0x00, 0xf0, + 0x88, 0xd8, 0xd4, 0x92, 0x85, 0xa8, 0xcb, 0x2a, 0xbb, 0x7a, 0x7a, 0x24, 0xc6, 0xd5, 0x64, 0x1e, + 0x0a, 0xf5, 0x1d, 0xc2, 0xc9, 0x8d, 0x1e, 0x8f, 0x8e, 0x70, 0x8e, 0x27, 0xdf, 0xe8, 0x00, 0x74, + 0xdb, 0x0f, 0x18, 0x75, 0xac, 0xf1, 0xcc, 0x58, 0x99, 0x7d, 0x66, 0x14, 0x63, 0xe5, 0x91, 0x78, + 0xed, 0x1a, 0x14, 0xa6, 0x8d, 0x21, 0x1d, 0xd2, 0xc9, 0x5b, 0x24, 0x87, 0xc5, 0x52, 0xbc, 0x82, + 0x06, 0xc4, 0xef, 0xc7, 0xb3, 0x38, 0x87, 0xe3, 0x8f, 0x2b, 0xf3, 0x1f, 0x89, 0x96, 0x4d, 0xeb, + 0x99, 0x51, 0xe3, 0xce, 0xeb, 0xe9, 0xb8, 0x29, 0x7f, 0x3c, 0xa8, 0x5c, 0x85, 0x95, 0x89, 0x0b, + 0xdd, 0x0c, 0x02, 0x4e, 0xa3, 0x59, 0x9b, 0xa4, 0xe2, 0xc2, 0xd9, 0xfa, 0xb4, 0x9f, 0x0a, 0xe0, + 0x1f, 0xbe, 0x7d, 0xed, 0xd2, 0xb3, 0x97, 0xa5, 0xb9, 0x67, 0xc7, 0xa5, 0xd4, 0xf3, 0xe3, 0x52, + 0xea, 0xc5, 0x71, 0x29, 0xf5, 0xeb, 0x71, 0x29, 0xf5, 0xf5, 0xab, 0xd2, 0xdc, 0xf3, 0x57, 0xa5, + 0xb9, 0x17, 0xaf, 0x4a, 0x73, 0x0f, 0x60, 0xfc, 0x83, 0xd4, 0x5a, 0x90, 0xff, 0x3a, 0x5b, 0xbf, + 0x07, 0x00, 0x00, 0xff, 0xff, 0x38, 0xb4, 0xb7, 0xb4, 0x06, 0x0e, 0x00, 0x00, } diff --git a/pkg/kv/kvserver/kvserverpb/proposer_kv.proto b/pkg/kv/kvserver/kvserverpb/proposer_kv.proto index 3267b2a7b4a8..249b4e45ca58 100644 --- a/pkg/kv/kvserver/kvserverpb/proposer_kv.proto +++ b/pkg/kv/kvserver/kvserverpb/proposer_kv.proto @@ -240,8 +240,24 @@ message RaftCommand { // been added after it, and on removal, the assignment counters must be // updated accordingly. Managing retry of proposals becomes trickier as // well as that uproots whatever ordering was originally envisioned. + // + // This field is set through RaftCommandFooter hackery. uint64 max_lease_index = 4; + // The closed timestamp carried by this command. Once a follower is told to + // apply this command, it knows that there will be no further writes at + // timestamps <= closed_timestamp. Note that the command itself might + // represent a write at a lower timestamp, so the closed timestamp can only be + // used after this command is applied. + // + // The field can be zero, which is to be interpreted as no closed timestamp + // update. Some commands (lease requests) implicitly carry a closed timestamp + // in a command-specific way. If the value is not zero, the value is greater + // or equal to that of the previous commands (and all before it). + // + // This field is set through ClosedTimestampFooter hackery. + util.hlc.Timestamp closed_timestamp = 17 [(gogoproto.nullable) = false]; + reserved 3; // Proposer-evaluated KV mode. @@ -277,3 +293,10 @@ message RaftCommand { message RaftCommandFooter { uint64 max_lease_index = 4; } + +// ClosedTimestampFooter is similar to RaftCommandFooter, allowing the proposal +// buffer to fill in the closed_timestamp field after most of the proto has been +// marshaled already. +message ClosedTimestampFooter { + util.hlc.Timestamp closed_timestamp = 17 [(gogoproto.nullable) = false]; +} diff --git a/pkg/kv/kvserver/kvserverpb/state.pb.go b/pkg/kv/kvserver/kvserverpb/state.pb.go index d33ed0c2ccae..b81a0c1f0d0d 100644 --- a/pkg/kv/kvserver/kvserverpb/state.pb.go +++ b/pkg/kv/kvserver/kvserverpb/state.pb.go @@ -102,13 +102,20 @@ type ReplicaState struct { // [1]: migration.Manager // [2]: PurgeOutdatedReplicas Version *roachpb.Version `protobuf:"bytes,12,opt,name=version,proto3" json:"version,omitempty"` + // closed_timestamp is the largest timestamp that is known to have been + // closed. This means that the current leaseholder (if any) and any future + // leaseholder will not evaluate writes at or below this timestamp, and also + // that any in-flight commands that can still apply are writing at higher + // timestamps. Non-leaseholder replicas are free to serve "follower reads" at + // or below this timestamp. + ClosedTimestamp hlc.Timestamp `protobuf:"bytes,13,opt,name=closed_timestamp,json=closedTimestamp,proto3" json:"closed_timestamp"` } func (m *ReplicaState) Reset() { *m = ReplicaState{} } func (m *ReplicaState) String() string { return proto.CompactTextString(m) } func (*ReplicaState) ProtoMessage() {} func (*ReplicaState) Descriptor() ([]byte, []int) { - return fileDescriptor_state_acd314f0f91777f8, []int{0} + return fileDescriptor_state_884b07f3590284d4, []int{0} } func (m *ReplicaState) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -180,7 +187,7 @@ func (m *RangeInfo) Reset() { *m = RangeInfo{} } func (m *RangeInfo) String() string { return proto.CompactTextString(m) } func (*RangeInfo) ProtoMessage() {} func (*RangeInfo) Descriptor() ([]byte, []int) { - return fileDescriptor_state_acd314f0f91777f8, []int{1} + return fileDescriptor_state_884b07f3590284d4, []int{1} } func (m *RangeInfo) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -216,7 +223,7 @@ func (m *RangeInfo_CTEntry) Reset() { *m = RangeInfo_CTEntry{} } func (m *RangeInfo_CTEntry) String() string { return proto.CompactTextString(m) } func (*RangeInfo_CTEntry) ProtoMessage() {} func (*RangeInfo_CTEntry) Descriptor() ([]byte, []int) { - return fileDescriptor_state_acd314f0f91777f8, []int{1, 0} + return fileDescriptor_state_884b07f3590284d4, []int{1, 0} } func (m *RangeInfo_CTEntry) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -252,7 +259,7 @@ func (m *LatchManagerInfo) Reset() { *m = LatchManagerInfo{} } func (m *LatchManagerInfo) String() string { return proto.CompactTextString(m) } func (*LatchManagerInfo) ProtoMessage() {} func (*LatchManagerInfo) Descriptor() ([]byte, []int) { - return fileDescriptor_state_acd314f0f91777f8, []int{2} + return fileDescriptor_state_884b07f3590284d4, []int{2} } func (m *LatchManagerInfo) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -329,6 +336,9 @@ func (this *ReplicaState) Equal(that interface{}) bool { if !this.Version.Equal(that1.Version) { return false } + if !this.ClosedTimestamp.Equal(&that1.ClosedTimestamp) { + return false + } return true } func (this *RangeInfo) Equal(that interface{}) bool { @@ -530,6 +540,14 @@ func (m *ReplicaState) MarshalTo(dAtA []byte) (int, error) { } i += n6 } + dAtA[i] = 0x6a + i++ + i = encodeVarintState(dAtA, i, uint64(m.ClosedTimestamp.Size())) + n7, err := m.ClosedTimestamp.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n7 return i, nil } @@ -551,11 +569,11 @@ func (m *RangeInfo) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0xa i++ i = encodeVarintState(dAtA, i, uint64(m.ReplicaState.Size())) - n7, err := m.ReplicaState.MarshalTo(dAtA[i:]) + n8, err := m.ReplicaState.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n7 + i += n8 if m.LastIndex != 0 { dAtA[i] = 0x10 i++ @@ -599,19 +617,19 @@ func (m *RangeInfo) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0x5a i++ i = encodeVarintState(dAtA, i, uint64(m.NewestClosedTimestamp.Size())) - n8, err := m.NewestClosedTimestamp.MarshalTo(dAtA[i:]) + n9, err := m.NewestClosedTimestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n8 + i += n9 dAtA[i] = 0x62 i++ i = encodeVarintState(dAtA, i, uint64(m.ActiveClosedTimestamp.Size())) - n9, err := m.ActiveClosedTimestamp.MarshalTo(dAtA[i:]) + n10, err := m.ActiveClosedTimestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n9 + i += n10 if m.RangefeedRegistrations != 0 { dAtA[i] = 0x68 i++ @@ -623,22 +641,22 @@ func (m *RangeInfo) MarshalTo(dAtA []byte) (int, error) { i = encodeVarintState(dAtA, i, uint64(m.ProposalQuotaBaseIndex)) } if len(m.ProposalQuotaReleaseQueue) > 0 { - dAtA11 := make([]byte, len(m.ProposalQuotaReleaseQueue)*10) - var j10 int + dAtA12 := make([]byte, len(m.ProposalQuotaReleaseQueue)*10) + var j11 int for _, num1 := range m.ProposalQuotaReleaseQueue { num := uint64(num1) for num >= 1<<7 { - dAtA11[j10] = uint8(uint64(num)&0x7f | 0x80) + dAtA12[j11] = uint8(uint64(num)&0x7f | 0x80) num >>= 7 - j10++ + j11++ } - dAtA11[j10] = uint8(num) - j10++ + dAtA12[j11] = uint8(num) + j11++ } dAtA[i] = 0x7a i++ - i = encodeVarintState(dAtA, i, uint64(j10)) - i += copy(dAtA[i:], dAtA11[:j10]) + i = encodeVarintState(dAtA, i, uint64(j11)) + i += copy(dAtA[i:], dAtA12[:j11]) } if m.TenantID != 0 { dAtA[i] = 0x80 @@ -681,11 +699,11 @@ func (m *RangeInfo_CTEntry) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0x12 i++ i = encodeVarintState(dAtA, i, uint64(m.ClosedTimestamp.Size())) - n12, err := m.ClosedTimestamp.MarshalTo(dAtA[i:]) + n13, err := m.ClosedTimestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n12 + i += n13 if m.MLAI != 0 { dAtA[i] = 0x18 i++ @@ -775,6 +793,8 @@ func (m *ReplicaState) Size() (n int) { l = m.Version.Size() n += 1 + l + sovState(uint64(l)) } + l = m.ClosedTimestamp.Size() + n += 1 + l + sovState(uint64(l)) return n } @@ -1167,6 +1187,36 @@ func (m *ReplicaState) Unmarshal(dAtA []byte) error { return err } iNdEx = postIndex + case 13: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ClosedTimestamp", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowState + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthState + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := m.ClosedTimestamp.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipState(dAtA[iNdEx:]) @@ -1952,76 +2002,76 @@ var ( ) func init() { - proto.RegisterFile("kv/kvserver/kvserverpb/state.proto", fileDescriptor_state_acd314f0f91777f8) + proto.RegisterFile("kv/kvserver/kvserverpb/state.proto", fileDescriptor_state_884b07f3590284d4) } -var fileDescriptor_state_acd314f0f91777f8 = []byte{ - // 1061 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x55, 0xcd, 0x6e, 0x1b, 0x37, - 0x10, 0xf6, 0x5a, 0x2b, 0x5b, 0xa2, 0xec, 0x58, 0x61, 0x9d, 0x78, 0xe3, 0xc4, 0x92, 0x21, 0xa0, - 0x85, 0x0b, 0xa4, 0x2b, 0xd4, 0xe9, 0x0f, 0xfa, 0x03, 0x14, 0x96, 0x5c, 0x14, 0x72, 0x6d, 0x23, - 0x66, 0x84, 0x1c, 0xd2, 0xc3, 0x82, 0xda, 0x1d, 0xaf, 0x16, 0x5a, 0x2d, 0x37, 0x24, 0xa5, 0xd8, - 0x79, 0x8a, 0x3e, 0x42, 0x5f, 0xa3, 0x6f, 0x60, 0xf4, 0xe4, 0x63, 0x4e, 0x42, 0x2b, 0x5f, 0x7a, - 0xeb, 0xbd, 0xa7, 0x80, 0xa4, 0x56, 0x96, 0x6c, 0x03, 0xc9, 0x8d, 0xfb, 0x7d, 0xdf, 0x0c, 0xc9, - 0xf9, 0x86, 0xb3, 0xa8, 0xd6, 0x1b, 0xd6, 0x7b, 0x43, 0x01, 0x7c, 0x08, 0x7c, 0xba, 0x48, 0x3b, - 0x75, 0x21, 0xa9, 0x04, 0x37, 0xe5, 0x4c, 0x32, 0x5c, 0xf5, 0x99, 0xdf, 0xe3, 0x8c, 0xfa, 0x5d, - 0xb7, 0x37, 0x74, 0x33, 0x91, 0x2b, 0x24, 0xe3, 0x34, 0x84, 0xb4, 0xb3, 0xf9, 0x78, 0xb2, 0xac, - 0x43, 0x12, 0x46, 0x09, 0xa4, 0x9d, 0x7a, 0x7f, 0xe8, 0xfb, 0x26, 0x7a, 0xf3, 0xb1, 0x8e, 0x4c, - 0x3b, 0xf5, 0x28, 0x91, 0xc0, 0x13, 0x1a, 0x7b, 0x9c, 0x9e, 0xca, 0x09, 0xf9, 0x30, 0x23, 0xfb, - 0x20, 0x69, 0x40, 0x25, 0x9d, 0xe0, 0x38, 0xc3, 0x67, 0x30, 0x67, 0x20, 0xa3, 0xb8, 0xde, 0x8d, - 0xfd, 0xba, 0x8c, 0xfa, 0x20, 0x24, 0xed, 0xa7, 0x13, 0x66, 0x3d, 0x64, 0x21, 0xd3, 0xcb, 0xba, - 0x5a, 0x19, 0xb4, 0xf6, 0xa7, 0x8d, 0x56, 0x08, 0xa4, 0x71, 0xe4, 0xd3, 0x17, 0xea, 0x36, 0xf8, - 0x29, 0xc2, 0x6a, 0x6b, 0x8f, 0xa6, 0x69, 0x1c, 0x41, 0xe0, 0x45, 0x49, 0x00, 0x67, 0x8e, 0xb5, - 0x6d, 0xed, 0xd8, 0xa4, 0xac, 0x98, 0x3d, 0x43, 0xb4, 0x14, 0x8e, 0x5d, 0xf4, 0x49, 0x0c, 0x54, - 0xc0, 0x0d, 0xf9, 0xa2, 0x96, 0xdf, 0xd7, 0xd4, 0x9c, 0xfe, 0x1b, 0x64, 0x07, 0x20, 0x7c, 0x27, - 0xb7, 0x6d, 0xed, 0x94, 0x76, 0x6b, 0xee, 0x75, 0xd1, 0x26, 0x77, 0x71, 0x09, 0x4d, 0x42, 0xd8, - 0x07, 0xe1, 0xf3, 0x28, 0x95, 0x8c, 0x13, 0xad, 0xc7, 0x2e, 0xca, 0xeb, 0x64, 0x8e, 0xad, 0x03, - 0x9d, 0x3b, 0x02, 0x0f, 0x15, 0x4f, 0x8c, 0x0c, 0x1f, 0xa3, 0x35, 0xc9, 0x07, 0x89, 0x4f, 0x25, - 0x04, 0x9e, 0xb6, 0xc9, 0xc9, 0xeb, 0xc8, 0x4f, 0xef, 0xdc, 0xf2, 0x54, 0xb6, 0x33, 0xb5, 0xae, - 0x02, 0xb9, 0x27, 0xe7, 0xbe, 0xf1, 0x09, 0x5a, 0x09, 0x7d, 0x4f, 0x76, 0x39, 0x88, 0x2e, 0x8b, - 0x03, 0x67, 0x49, 0x27, 0xdb, 0x9a, 0x49, 0xa6, 0xea, 0xee, 0x76, 0x63, 0xdf, 0x6d, 0x67, 0x75, - 0x6f, 0xac, 0x8d, 0x47, 0xd5, 0xd2, 0x2f, 0xcd, 0x76, 0x16, 0x45, 0x4a, 0xa1, 0x3f, 0xfd, 0xc0, - 0x3f, 0xa0, 0xbc, 0x3a, 0x98, 0x70, 0x96, 0x6f, 0x1d, 0x6c, 0xd2, 0x29, 0x6e, 0xd6, 0x29, 0xee, - 0xd1, 0xcb, 0x66, 0x53, 0x1d, 0x44, 0x10, 0x13, 0x83, 0xbf, 0x46, 0x1b, 0x03, 0x11, 0x25, 0xe1, - 0xb4, 0xee, 0xfa, 0x8e, 0x5e, 0x0f, 0xce, 0x9d, 0xd2, 0xb6, 0xb5, 0x53, 0x20, 0xeb, 0x9a, 0x9e, - 0xd4, 0x5e, 0xdf, 0xe1, 0x57, 0x38, 0xc7, 0x5f, 0xa1, 0xe5, 0x21, 0x70, 0x11, 0xb1, 0xc4, 0x59, - 0xd1, 0xbb, 0x6e, 0xde, 0x51, 0x8e, 0x97, 0x46, 0x41, 0x32, 0xe9, 0xf7, 0xf6, 0xbf, 0x7f, 0x54, - 0xad, 0x03, 0xbb, 0x50, 0x28, 0x17, 0x0f, 0xec, 0x42, 0xb1, 0x8c, 0x0e, 0xec, 0x02, 0x2a, 0x97, - 0x6a, 0x7f, 0x15, 0x50, 0x51, 0xdb, 0xd5, 0x4a, 0x4e, 0x19, 0x3e, 0x32, 0xf7, 0x01, 0xdd, 0x2b, - 0xa5, 0xdd, 0x2f, 0xdc, 0x0f, 0x3c, 0x08, 0x77, 0xb6, 0xed, 0x1a, 0x85, 0x8b, 0x51, 0x75, 0xe1, - 0x72, 0x54, 0xb5, 0xcc, 0x0d, 0x01, 0x6f, 0x21, 0x14, 0x53, 0x21, 0xe7, 0x1a, 0xaa, 0xa8, 0x10, - 0xd3, 0x48, 0x55, 0x54, 0x4a, 0x06, 0x7d, 0x2f, 0x85, 0x24, 0x88, 0x92, 0x50, 0xf7, 0x93, 0x4d, - 0x50, 0x32, 0xe8, 0x3f, 0x37, 0x48, 0x26, 0x08, 0x38, 0x4b, 0x53, 0x08, 0xb4, 0xfb, 0x46, 0xb0, - 0x6f, 0x10, 0x5c, 0x43, 0xab, 0xba, 0xd1, 0x63, 0x16, 0x7a, 0x22, 0x7a, 0x0b, 0xda, 0xd3, 0x1c, - 0x29, 0x29, 0xf0, 0x90, 0x85, 0x2f, 0xa2, 0xb7, 0x80, 0x7f, 0x44, 0x9b, 0x34, 0x4d, 0x39, 0x3b, - 0x8b, 0xfa, 0xaa, 0xbc, 0x29, 0x67, 0x29, 0x13, 0x34, 0xf6, 0x5e, 0x0f, 0x98, 0xa4, 0xda, 0xb8, - 0x1c, 0x71, 0x66, 0x14, 0xcf, 0x27, 0x82, 0x13, 0xc5, 0xe3, 0xcf, 0xd0, 0x1a, 0x57, 0xe5, 0xf1, - 0xfa, 0xf4, 0xcc, 0xeb, 0x9c, 0x4b, 0x10, 0x4e, 0x41, 0x87, 0xac, 0x6a, 0xf8, 0x88, 0x9e, 0x35, - 0x14, 0x88, 0xbf, 0x44, 0x0f, 0xe6, 0x4e, 0xe2, 0x49, 0x3e, 0x10, 0x12, 0x02, 0x07, 0x69, 0x2b, - 0xf1, 0xcc, 0x89, 0xda, 0x86, 0xc1, 0x29, 0xda, 0x48, 0xe0, 0x0d, 0x08, 0xe9, 0xf9, 0x31, 0x13, - 0x10, 0x78, 0xd3, 0xd7, 0xae, 0xfd, 0x2f, 0xed, 0xee, 0x7e, 0xb8, 0xfc, 0x99, 0x73, 0x6e, 0xb3, - 0xfd, 0x73, 0x22, 0xf9, 0x79, 0xc3, 0x56, 0x1e, 0x90, 0x07, 0x26, 0x71, 0x53, 0xe7, 0x9d, 0x36, - 0x33, 0xfe, 0x0d, 0x6d, 0x50, 0x5f, 0x46, 0x43, 0xb8, 0xbd, 0xe3, 0xca, 0xc7, 0x3c, 0x86, 0x49, - 0x72, 0x93, 0xe3, 0x66, 0xf2, 0x6f, 0xd1, 0x86, 0x2e, 0xc9, 0x29, 0x40, 0xe0, 0x71, 0x08, 0x23, - 0x21, 0x39, 0x95, 0x11, 0x4b, 0x84, 0xb3, 0xaa, 0x2b, 0xf6, 0x70, 0x4a, 0x93, 0x59, 0x16, 0x7f, - 0x87, 0x1e, 0xcd, 0x9b, 0xe2, 0x75, 0xd4, 0x34, 0x32, 0x4d, 0x73, 0xcf, 0x84, 0xa6, 0xb3, 0xa6, - 0x34, 0xa8, 0x00, 0xd3, 0x41, 0x3f, 0xa1, 0x27, 0x37, 0x42, 0x39, 0x98, 0x59, 0xf6, 0x7a, 0x00, - 0x03, 0x70, 0xd6, 0xb6, 0x73, 0x3b, 0x39, 0xf2, 0x68, 0x2e, 0x9a, 0x18, 0xc5, 0x89, 0x12, 0xe0, - 0xcf, 0x51, 0x51, 0x42, 0x42, 0x13, 0xe9, 0x45, 0x81, 0x53, 0x56, 0xfd, 0xd5, 0x58, 0x19, 0x8f, - 0xaa, 0x85, 0xb6, 0x06, 0x5b, 0xfb, 0xa4, 0x60, 0xe8, 0x56, 0xa0, 0x9b, 0x99, 0xf9, 0x3d, 0x4f, - 0xd2, 0x4e, 0x0c, 0xce, 0xfd, 0x6d, 0x6b, 0xa7, 0x48, 0x8a, 0x0a, 0x69, 0x2b, 0x60, 0xf3, 0x3f, - 0x0b, 0x2d, 0x4f, 0x4c, 0xc0, 0xaf, 0xd0, 0x72, 0xc2, 0x02, 0x50, 0x39, 0xd5, 0x43, 0xca, 0x37, - 0xf6, 0xc6, 0xa3, 0xea, 0xd2, 0x31, 0x0b, 0xa0, 0xb5, 0xff, 0xff, 0xa8, 0xfa, 0x2c, 0x8c, 0x64, - 0x77, 0xd0, 0x71, 0x7d, 0xd6, 0xaf, 0x4f, 0xeb, 0x1d, 0x74, 0xae, 0xd7, 0xf5, 0xb4, 0x17, 0xd6, - 0xb3, 0xa7, 0x6c, 0xc2, 0xc8, 0x92, 0xca, 0xd8, 0x0a, 0xf0, 0x31, 0x2a, 0xdf, 0x32, 0x6f, 0xf1, - 0xe3, 0xcd, 0x5b, 0xf3, 0x6f, 0xd8, 0xf6, 0x04, 0xd9, 0xfd, 0x98, 0x46, 0xfa, 0xf5, 0xe5, 0x1a, - 0x85, 0xf1, 0xa8, 0x6a, 0x1f, 0x1d, 0xee, 0xb5, 0x88, 0x46, 0xf1, 0x3a, 0xca, 0x43, 0xca, 0xfc, - 0xae, 0x9e, 0xd9, 0x39, 0x62, 0x3e, 0xcc, 0x30, 0x99, 0x8e, 0x14, 0xbb, 0x9c, 0x37, 0x23, 0xa5, - 0x46, 0x50, 0xf9, 0x90, 0x4a, 0xbf, 0x7b, 0x44, 0x13, 0x1a, 0x02, 0xd7, 0x23, 0x65, 0x0b, 0x21, - 0x0e, 0x34, 0xf0, 0x7c, 0x36, 0x48, 0xa4, 0x2e, 0x47, 0x8e, 0x14, 0x15, 0xd2, 0x54, 0x80, 0x7a, - 0xe2, 0x6f, 0x78, 0x24, 0x61, 0xc2, 0x2f, 0x6a, 0x1e, 0x69, 0x48, 0x0b, 0x1a, 0x4f, 0x2f, 0xfe, - 0xa9, 0x2c, 0x5c, 0x8c, 0x2b, 0xd6, 0xe5, 0xb8, 0x62, 0xbd, 0x1b, 0x57, 0xac, 0xbf, 0xc7, 0x15, - 0xeb, 0xf7, 0xab, 0xca, 0xc2, 0xe5, 0x55, 0x65, 0xe1, 0xdd, 0x55, 0x65, 0xe1, 0x15, 0xba, 0xfe, - 0x9b, 0x77, 0x96, 0xf4, 0x1f, 0xf1, 0xd9, 0xfb, 0x00, 0x00, 0x00, 0xff, 0xff, 0xd2, 0x8a, 0xae, - 0x49, 0xee, 0x07, 0x00, 0x00, +var fileDescriptor_state_884b07f3590284d4 = []byte{ + // 1068 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x56, 0xcf, 0x4f, 0x1b, 0x47, + 0x14, 0x66, 0xe3, 0x35, 0xd8, 0x63, 0x08, 0xce, 0x14, 0xc2, 0x86, 0x04, 0x1b, 0x59, 0x6a, 0x45, + 0xa5, 0x74, 0xad, 0x92, 0xfe, 0x50, 0x7f, 0x48, 0x15, 0x36, 0x55, 0x65, 0x0a, 0x28, 0x4c, 0xac, + 0x1c, 0xd2, 0xc3, 0x6a, 0xbc, 0xfb, 0x58, 0xaf, 0xbc, 0xde, 0xd9, 0xcc, 0x8c, 0x1d, 0xc8, 0x7f, + 0xd0, 0x5b, 0xff, 0x84, 0xfe, 0x39, 0xa8, 0x27, 0x8e, 0x39, 0x59, 0xad, 0xb9, 0xf4, 0xd6, 0x7b, + 0x4f, 0xd5, 0xcc, 0x78, 0x8d, 0x0d, 0x48, 0x89, 0x7a, 0x9b, 0xfd, 0xde, 0xf7, 0xde, 0xcc, 0x7c, + 0xef, 0x9b, 0x67, 0xa3, 0x5a, 0x6f, 0x58, 0xef, 0x0d, 0x05, 0xf0, 0x21, 0xf0, 0xe9, 0x22, 0xed, + 0xd4, 0x85, 0xa4, 0x12, 0xdc, 0x94, 0x33, 0xc9, 0x70, 0xd5, 0x67, 0x7e, 0x8f, 0x33, 0xea, 0x77, + 0xdd, 0xde, 0xd0, 0xcd, 0x48, 0xae, 0x90, 0x8c, 0xd3, 0x10, 0xd2, 0xce, 0xe6, 0xe3, 0xc9, 0xb2, + 0x0e, 0x49, 0x18, 0x25, 0x90, 0x76, 0xea, 0xfd, 0xa1, 0xef, 0x9b, 0xec, 0xcd, 0xc7, 0x3a, 0x33, + 0xed, 0xd4, 0xa3, 0x44, 0x02, 0x4f, 0x68, 0xec, 0x71, 0x7a, 0x2a, 0x27, 0xc1, 0x87, 0x59, 0xb0, + 0x0f, 0x92, 0x06, 0x54, 0xd2, 0x09, 0x8e, 0x33, 0x7c, 0x06, 0x73, 0x06, 0x32, 0x8a, 0xeb, 0xdd, + 0xd8, 0xaf, 0xcb, 0xa8, 0x0f, 0x42, 0xd2, 0x7e, 0x3a, 0x89, 0xac, 0x85, 0x2c, 0x64, 0x7a, 0x59, + 0x57, 0x2b, 0x83, 0xd6, 0x7e, 0xcd, 0xa3, 0x65, 0x02, 0x69, 0x1c, 0xf9, 0xf4, 0x85, 0xba, 0x0d, + 0x7e, 0x8a, 0xb0, 0xda, 0xda, 0xa3, 0x69, 0x1a, 0x47, 0x10, 0x78, 0x51, 0x12, 0xc0, 0x99, 0x63, + 0x6d, 0x5b, 0x3b, 0x36, 0x29, 0xab, 0xc8, 0x9e, 0x09, 0xb4, 0x14, 0x8e, 0x5d, 0xf4, 0x51, 0x0c, + 0x54, 0xc0, 0x0d, 0xfa, 0x3d, 0x4d, 0x7f, 0xa0, 0x43, 0x73, 0xfc, 0xaf, 0x90, 0x1d, 0x80, 0xf0, + 0x9d, 0xdc, 0xb6, 0xb5, 0x53, 0xda, 0xad, 0xb9, 0xd7, 0xa2, 0x4d, 0xee, 0xe2, 0x12, 0x9a, 0x84, + 0xb0, 0x0f, 0xc2, 0xe7, 0x51, 0x2a, 0x19, 0x27, 0x9a, 0x8f, 0x5d, 0x94, 0xd7, 0xc5, 0x1c, 0x5b, + 0x27, 0x3a, 0x77, 0x24, 0x1e, 0xaa, 0x38, 0x31, 0x34, 0x7c, 0x8c, 0x56, 0x25, 0x1f, 0x24, 0x3e, + 0x95, 0x10, 0x78, 0xba, 0x4d, 0x4e, 0x5e, 0x67, 0x7e, 0x7c, 0xe7, 0x96, 0xa7, 0xb2, 0x9d, 0xb1, + 0xb5, 0x0a, 0xe4, 0xbe, 0x9c, 0xfb, 0xc6, 0x27, 0x68, 0x39, 0xf4, 0x3d, 0xd9, 0xe5, 0x20, 0xba, + 0x2c, 0x0e, 0x9c, 0x45, 0x5d, 0x6c, 0x6b, 0xa6, 0x98, 0xd2, 0xdd, 0xed, 0xc6, 0xbe, 0xdb, 0xce, + 0x74, 0x6f, 0xac, 0x8e, 0x47, 0xd5, 0xd2, 0x4f, 0xcd, 0x76, 0x96, 0x45, 0x4a, 0xa1, 0x3f, 0xfd, + 0xc0, 0xdf, 0xa1, 0xbc, 0x3a, 0x98, 0x70, 0x96, 0x6e, 0x1d, 0x6c, 0xe2, 0x14, 0x37, 0x73, 0x8a, + 0x7b, 0xf4, 0xb2, 0xd9, 0x54, 0x07, 0x11, 0xc4, 0xe4, 0xe0, 0x2f, 0xd1, 0xc6, 0x40, 0x44, 0x49, + 0x38, 0xd5, 0x5d, 0xdf, 0xd1, 0xeb, 0xc1, 0xb9, 0x53, 0xda, 0xb6, 0x76, 0x0a, 0x64, 0x4d, 0x87, + 0x27, 0xda, 0xeb, 0x3b, 0xfc, 0x0c, 0xe7, 0xf8, 0x0b, 0xb4, 0x34, 0x04, 0x2e, 0x22, 0x96, 0x38, + 0xcb, 0x7a, 0xd7, 0xcd, 0x3b, 0xe4, 0x78, 0x69, 0x18, 0x24, 0xa3, 0xe2, 0x63, 0x54, 0xf6, 0x63, + 0x26, 0x20, 0xf0, 0xa6, 0x9e, 0x72, 0x56, 0x3e, 0x44, 0x00, 0xfb, 0x62, 0x54, 0x5d, 0x20, 0xab, + 0x26, 0x79, 0x0a, 0x7f, 0x6b, 0xff, 0xfd, 0x7b, 0xd5, 0x3a, 0xb0, 0x0b, 0x85, 0x72, 0xf1, 0xc0, + 0x2e, 0x14, 0xcb, 0xe8, 0xc0, 0x2e, 0xa0, 0x72, 0xa9, 0xf6, 0x47, 0x01, 0x15, 0x75, 0xfb, 0x5b, + 0xc9, 0x29, 0xc3, 0x47, 0x46, 0x1f, 0xd0, 0xde, 0x2b, 0xed, 0x7e, 0xe6, 0xbe, 0xe7, 0x81, 0xb9, + 0xb3, 0x36, 0x6e, 0x14, 0xd4, 0xd6, 0x97, 0xa3, 0xaa, 0x65, 0x14, 0x03, 0xbc, 0x85, 0x50, 0x4c, + 0x85, 0x9c, 0x33, 0x68, 0x51, 0x21, 0xc6, 0x98, 0x55, 0x54, 0x4a, 0x06, 0x7d, 0x2f, 0x85, 0x24, + 0x88, 0x92, 0x50, 0xfb, 0xd3, 0x26, 0x28, 0x19, 0xf4, 0x9f, 0x1b, 0x24, 0x23, 0x04, 0x9c, 0xa5, + 0x29, 0x04, 0xda, 0x4d, 0x86, 0xb0, 0x6f, 0x10, 0x5c, 0x43, 0x2b, 0xfa, 0xe1, 0xc4, 0x2c, 0xf4, + 0x44, 0xf4, 0x16, 0xb4, 0x47, 0x72, 0xa4, 0xa4, 0xc0, 0x43, 0x16, 0xbe, 0x88, 0xde, 0x02, 0xfe, + 0x1e, 0x6d, 0xd2, 0x34, 0xe5, 0xec, 0x2c, 0xea, 0xab, 0x76, 0xa5, 0x9c, 0xa5, 0x4c, 0xd0, 0xd8, + 0x7b, 0x3d, 0x60, 0x92, 0x6a, 0x23, 0xe4, 0x88, 0x33, 0xc3, 0x78, 0x3e, 0x21, 0x9c, 0xa8, 0x38, + 0xfe, 0x04, 0xad, 0x72, 0x25, 0x8f, 0xd7, 0xa7, 0x67, 0x5e, 0xe7, 0x5c, 0x82, 0x70, 0x0a, 0x3a, + 0x65, 0x45, 0xc3, 0x47, 0xf4, 0xac, 0xa1, 0x40, 0xfc, 0x39, 0x5a, 0x9f, 0x3b, 0x89, 0x27, 0xf9, + 0x40, 0x48, 0x08, 0x1c, 0xa4, 0xad, 0x81, 0x67, 0x4e, 0xd4, 0x36, 0x11, 0x9c, 0xa2, 0x8d, 0x04, + 0xde, 0x80, 0x90, 0xde, 0xad, 0x4e, 0x97, 0xb4, 0xfc, 0xbb, 0xef, 0x97, 0x3f, 0xeb, 0x9c, 0xdb, + 0x6c, 0xff, 0x98, 0x48, 0x7e, 0x3e, 0x69, 0xff, 0xba, 0x29, 0xdc, 0x9c, 0x37, 0x01, 0xfe, 0x05, + 0x6d, 0x50, 0x5f, 0x46, 0x43, 0xb8, 0xbd, 0xe3, 0xf2, 0x87, 0x7b, 0x6b, 0xdd, 0xd4, 0xb8, 0x59, + 0xfc, 0x6b, 0xb4, 0xa1, 0x25, 0x39, 0x05, 0x08, 0x3c, 0x0e, 0x61, 0x24, 0x24, 0xa7, 0x32, 0x62, + 0x89, 0xd0, 0xc6, 0xcd, 0x91, 0x87, 0xd3, 0x30, 0x99, 0x8d, 0xe2, 0x6f, 0xd0, 0xa3, 0xf9, 0xa6, + 0x78, 0x1d, 0x35, 0xdd, 0x8c, 0x69, 0xee, 0x9b, 0xd4, 0x74, 0xb6, 0x29, 0x0d, 0x2a, 0xc0, 0x38, + 0xe8, 0x07, 0xf4, 0xe4, 0x46, 0x2a, 0x07, 0x33, 0x1b, 0x5f, 0x0f, 0x60, 0x00, 0xce, 0xea, 0x76, + 0x6e, 0x27, 0x47, 0x1e, 0xcd, 0x65, 0x13, 0xc3, 0x38, 0x51, 0x04, 0xfc, 0x29, 0x2a, 0x4a, 0x48, + 0x68, 0x22, 0xbd, 0x28, 0x70, 0xca, 0xca, 0x5f, 0x8d, 0xe5, 0xf1, 0xa8, 0x5a, 0x68, 0x6b, 0xb0, + 0xb5, 0x4f, 0x0a, 0x26, 0xdc, 0x0a, 0xb4, 0x99, 0x99, 0xdf, 0xf3, 0x24, 0xed, 0xc4, 0xe0, 0x3c, + 0xd8, 0xb6, 0x76, 0x8a, 0xa4, 0xa8, 0x90, 0xb6, 0x02, 0x36, 0xff, 0xb1, 0xd0, 0xd2, 0xa4, 0x09, + 0xf8, 0x15, 0x5a, 0x4a, 0x58, 0x00, 0xaa, 0xa6, 0x7a, 0x48, 0xf9, 0xc6, 0xde, 0x78, 0x54, 0x5d, + 0x3c, 0x66, 0x01, 0xb4, 0xf6, 0xff, 0x1d, 0x55, 0x9f, 0x85, 0x91, 0xec, 0x0e, 0x3a, 0xae, 0xcf, + 0xfa, 0xf5, 0xa9, 0xde, 0x41, 0xe7, 0x7a, 0x5d, 0x4f, 0x7b, 0x61, 0x3d, 0x1b, 0x0d, 0x26, 0x8d, + 0x2c, 0xaa, 0x8a, 0xad, 0xe0, 0xce, 0xc1, 0x70, 0xef, 0xff, 0x0f, 0x06, 0xfc, 0x04, 0xd9, 0xfd, + 0x98, 0x46, 0xfa, 0xf5, 0xe5, 0x1a, 0x85, 0xf1, 0xa8, 0x6a, 0x1f, 0x1d, 0xee, 0xb5, 0x88, 0x46, + 0xf1, 0x1a, 0xca, 0x43, 0xca, 0xfc, 0xae, 0xfe, 0x0d, 0xc8, 0x11, 0xf3, 0x61, 0x86, 0xc9, 0x74, + 0xa4, 0xd8, 0xe5, 0xbc, 0x19, 0x29, 0x35, 0x82, 0xca, 0x87, 0x54, 0xfa, 0xdd, 0x23, 0x9a, 0xd0, + 0x10, 0xb8, 0x1e, 0x29, 0x5b, 0x08, 0x71, 0xa0, 0x81, 0xe7, 0xb3, 0x41, 0x22, 0xb5, 0x1c, 0x39, + 0x52, 0x54, 0x48, 0x53, 0x01, 0xea, 0x89, 0xbf, 0xe1, 0x91, 0x84, 0x49, 0xfc, 0x9e, 0x8e, 0x23, + 0x0d, 0x69, 0x42, 0xe3, 0xe9, 0xc5, 0x5f, 0x95, 0x85, 0x8b, 0x71, 0xc5, 0xba, 0x1c, 0x57, 0xac, + 0x77, 0xe3, 0x8a, 0xf5, 0xe7, 0xb8, 0x62, 0xfd, 0x76, 0x55, 0x59, 0xb8, 0xbc, 0xaa, 0x2c, 0xbc, + 0xbb, 0xaa, 0x2c, 0xbc, 0x42, 0xd7, 0xff, 0x0e, 0x3a, 0x8b, 0xfa, 0x17, 0xf6, 0xd9, 0x7f, 0x01, + 0x00, 0x00, 0xff, 0xff, 0x23, 0xeb, 0x99, 0xc3, 0x3e, 0x08, 0x00, 0x00, } diff --git a/pkg/kv/kvserver/kvserverpb/state.proto b/pkg/kv/kvserver/kvserverpb/state.proto index f8288300aeac..45e07446177e 100644 --- a/pkg/kv/kvserver/kvserverpb/state.proto +++ b/pkg/kv/kvserver/kvserverpb/state.proto @@ -100,6 +100,14 @@ message ReplicaState { // [2]: PurgeOutdatedReplicas roachpb.Version version = 12; + // closed_timestamp is the largest timestamp that is known to have been + // closed. This means that the current leaseholder (if any) and any future + // leaseholder will not evaluate writes at or below this timestamp, and also + // that any in-flight commands that can still apply are writing at higher + // timestamps. Non-leaseholder replicas are free to serve "follower reads" at + // or below this timestamp. + util.hlc.Timestamp closed_timestamp = 13 [(gogoproto.nullable) = false]; + reserved 8, 9, 10; } diff --git a/pkg/kv/kvserver/replica.go b/pkg/kv/kvserver/replica.go index e602af9193b0..e73af5140919 100644 --- a/pkg/kv/kvserver/replica.go +++ b/pkg/kv/kvserver/replica.go @@ -360,6 +360,10 @@ type Replica struct { // consumed, commands are proposed through Raft and moved to the // proposals map. // + // The propBuf is the one closing timestamps, so evaluating writes must be + // registered with the propBuf through TrackEvaluatingRequest before their + // write timestamp is decided. + // // Access to proposalBuf must occur *without* holding the mutex. // Instead, the buffer internally holds a reference to mu and will use // it appropriately. diff --git a/pkg/kv/kvserver/replica_application_result.go b/pkg/kv/kvserver/replica_application_result.go index a2761aa41bbd..9477b8cc0cec 100644 --- a/pkg/kv/kvserver/replica_application_result.go +++ b/pkg/kv/kvserver/replica_application_result.go @@ -205,6 +205,16 @@ func (r *Replica) tryReproposeWithNewLeaseIndex( minTS, untrack := r.store.cfg.ClosedTimestamp.Tracker.Track(ctx) defer untrack(ctx, 0, 0, 0) // covers all error paths below + + // We need to track the request again in order to protect its timestamp until + // it gets reproposed. + // TODO(andrei): Only track if the request consults the ts cache. Some + // requests (e.g. EndTxn) don't care about closed timestamps. + minTS2, tok := r.mu.proposalBuf.TrackEvaluatingRequest(ctx, p.Request.WriteTimestamp()) + defer tok.DoneIfNotMoved(ctx) + minTS.Forward(minTS2) + + // NB: p.Request.Timestamp reflects the action of ba.SetActiveTimestamp. // The IsIntentWrite condition matches the similar logic for caring // about the closed timestamp cache in applyTimestampCache(). if p.Request.IsIntentWrite() && p.Request.WriteTimestamp().LessEq(minTS) { @@ -222,7 +232,7 @@ func (r *Replica) tryReproposeWithNewLeaseIndex( // Some tests check for this log message in the trace. log.VEventf(ctx, 2, "retry: proposalIllegalLeaseIndex") - maxLeaseIndex, pErr := r.propose(ctx, p) + maxLeaseIndex, pErr := r.propose(ctx, p, tok.Move(ctx)) if pErr != nil { return pErr } diff --git a/pkg/kv/kvserver/replica_application_state_machine.go b/pkg/kv/kvserver/replica_application_state_machine.go index cfd7cef7a35b..7793ca520fdd 100644 --- a/pkg/kv/kvserver/replica_application_state_machine.go +++ b/pkg/kv/kvserver/replica_application_state_machine.go @@ -376,8 +376,10 @@ type replicaAppBatch struct { // replicaState other than Stats are overwritten completely rather than // updated in-place. stats enginepb.MVCCStats - // maxTS is the maximum clock timestamp that any command that was staged in - // this batch was evaluated at. + // maxTS is the maximum clock timestamp that this command carries. Timestamps + // come from the writes that are part of this command, and also from the + // closed timestamp carried by this command. Synthetic timestamps are not + // registered here. maxTS hlc.ClockTimestamp // migrateToAppliedStateKey tracks whether any command in the batch // triggered a migration to the replica applied state key. If so, this @@ -428,7 +430,8 @@ func (b *replicaAppBatch) Stage(cmdI apply.Command) (apply.CheckedCommand, error return nil, makeNonDeterministicFailure("applied index jumped from %d to %d", applied, idx) } if log.V(4) { - log.Infof(ctx, "processing command %x: maxLeaseIndex=%d", cmd.idKey, cmd.raftCmd.MaxLeaseIndex) + log.Infof(ctx, "processing command %x: raftIndex=%d maxLeaseIndex=%d closedts=%s", + cmd.idKey, cmd.ent.Index, cmd.raftCmd.MaxLeaseIndex, cmd.raftCmd.ClosedTimestamp) } // Determine whether the command should be applied to the replicated state @@ -442,7 +445,20 @@ func (b *replicaAppBatch) Stage(cmdI apply.Command) (apply.CheckedCommand, error cmd.raftCmd.ReplicatedEvalResult = kvserverpb.ReplicatedEvalResult{} cmd.raftCmd.WriteBatch = nil cmd.raftCmd.LogicalOpLog = nil + cmd.raftCmd.ClosedTimestamp.Reset() } else { + // Assert that we're not writing under the closed timestamp. We can only do + // these checks on IsIntentWrite requests, since others (for example, + // EndTxn) can operate below the closed timestamp. In turn, this means that + // we can only assert on the leaseholder, as only that replica has + // cmd.proposal.Request filled in. + if cmd.IsLocal() && cmd.proposal.Request.IsIntentWrite() { + wts := cmd.proposal.Request.WriteTimestamp() + if wts.LessEq(b.state.ClosedTimestamp) { + return nil, makeNonDeterministicFailure("writing at %s below closed ts: %s (%s)", + wts, b.state.ClosedTimestamp.String(), cmd.proposal.Request.String()) + } + } log.Event(ctx, "applying command") } @@ -623,7 +639,7 @@ func (b *replicaAppBatch) runPreApplyTriggersAfterStagingWriteBatch( // // Alternatively if we discover that the RHS has already been removed // from this store, clean up its data. - splitPreApply(ctx, b.batch, res.Split.SplitTrigger, b.r) + splitPreApply(ctx, b.batch, res.Split.SplitTrigger, b.r, cmd.raftCmd.ClosedTimestamp) // The rangefeed processor will no longer be provided logical ops for // its entire range, so it needs to be shut down and all registrations @@ -807,6 +823,18 @@ func (b *replicaAppBatch) stageTrivialReplicatedEvalResult( if leaseAppliedIndex := cmd.leaseIndex; leaseAppliedIndex != 0 { b.state.LeaseAppliedIndex = leaseAppliedIndex } + if cts := cmd.raftCmd.ClosedTimestamp; !cts.IsEmpty() { + if cts.Less(b.state.ClosedTimestamp) { + log.Fatalf(ctx, + "closed timestamp regressing from %s to %s when applying command %x", + b.state.ClosedTimestamp, cts, cmd.idKey) + } + b.state.ClosedTimestamp = cts + if clockTS, ok := cts.TryToClockTimestamp(); ok { + b.maxTS.Forward(clockTS) + } + } + res := cmd.replicatedResult() // Special-cased MVCC stats handling to exploit commutativity of stats delta @@ -860,10 +888,11 @@ func (b *replicaAppBatch) ApplyToStateMachine(ctx context.Context) error { b.batch.Close() b.batch = nil - // Update the replica's applied indexes and mvcc stats. + // Update the replica's applied indexes, mvcc stats and closed timestamp. r.mu.Lock() r.mu.state.RaftAppliedIndex = b.state.RaftAppliedIndex r.mu.state.LeaseAppliedIndex = b.state.LeaseAppliedIndex + closedTimestampUpdated := r.mu.state.ClosedTimestamp.Forward(b.state.ClosedTimestamp) prevStats := *r.mu.state.Stats *r.mu.state.Stats = *b.state.Stats @@ -879,6 +908,13 @@ func (b *replicaAppBatch) ApplyToStateMachine(ctx context.Context) error { needsTruncationByLogSize := r.needsRaftLogTruncationLocked() tenantID := r.mu.tenantID r.mu.Unlock() + if closedTimestampUpdated { + // TODO(andrei): Pass in the new closed timestamp to + // r.handleClosedTimestampUpdateRaftMuLocked directly after the old closed + // ts tracker goes away. Until then we can't do it; we have to let the + // method consult r.maxClosed(). + r.handleClosedTimestampUpdateRaftMuLocked(ctx) + } // Record the stats delta in the StoreMetrics. deltaStats := *b.state.Stats @@ -926,7 +962,8 @@ func (b *replicaAppBatch) addAppliedStateKeyToBatch(ctx context.Context) error { // Set the range applied state, which includes the last applied raft and // lease index along with the mvcc stats, all in one key. if err := loader.SetRangeAppliedState( - ctx, b.batch, b.state.RaftAppliedIndex, b.state.LeaseAppliedIndex, b.state.Stats, + ctx, b.batch, b.state.RaftAppliedIndex, b.state.LeaseAppliedIndex, + b.state.Stats, &b.state.ClosedTimestamp, ); err != nil { return wrapWithNonDeterministicFailure(err, "unable to set range applied state") } diff --git a/pkg/kv/kvserver/replica_follower_read.go b/pkg/kv/kvserver/replica_follower_read.go index cdc35c0b3778..2ebd11c4fd63 100644 --- a/pkg/kv/kvserver/replica_follower_read.go +++ b/pkg/kv/kvserver/replica_follower_read.go @@ -141,13 +141,19 @@ func (r *Replica) maxClosedRLocked(ctx context.Context) (_ hlc.Timestamp, ok boo lai := r.mu.state.LeaseAppliedIndex lease := *r.mu.state.Lease initialMaxClosed := r.mu.initialMaxClosed + replicaStateClosed := r.mu.state.ClosedTimestamp if lease.Expiration != nil { return hlc.Timestamp{}, false } + // Look at the legacy closed timestamp propagation mechanism. maxClosed := r.store.cfg.ClosedTimestamp.Provider.MaxClosed( lease.Replica.NodeID, r.RangeID, ctpb.Epoch(lease.Epoch), ctpb.LAI(lai)) maxClosed.Forward(lease.Start.ToTimestamp()) maxClosed.Forward(initialMaxClosed) + + // Look at the "new" closed timestamp propagation mechanism. + maxClosed.Forward(replicaStateClosed) + return maxClosed, true } diff --git a/pkg/kv/kvserver/replica_init.go b/pkg/kv/kvserver/replica_init.go index de73dd090f99..c64027597c26 100644 --- a/pkg/kv/kvserver/replica_init.go +++ b/pkg/kv/kvserver/replica_init.go @@ -18,6 +18,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/abortspan" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts/tracker" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/split" @@ -95,8 +96,9 @@ func newUnloadedReplica( }) r.mu.proposals = map[kvserverbase.CmdIDKey]*ProposalData{} r.mu.checksums = map[uuid.UUID]ReplicaChecksum{} - r.mu.proposalBuf.Init((*replicaProposer)(r)) + r.mu.proposalBuf.Init((*replicaProposer)(r), tracker.NewLockfreeTracker(), r.Clock(), r.ClusterSettings()) r.mu.proposalBuf.testing.allowLeaseProposalWhenNotLeader = store.cfg.TestingKnobs.AllowLeaseRequestProposalsWhenNotLeader + r.mu.proposalBuf.testing.dontCloseTimestamps = store.cfg.TestingKnobs.DontCloseTimestamps if leaseHistoryMaxEntries > 0 { r.leaseHistory = newLeaseHistory() diff --git a/pkg/kv/kvserver/replica_proposal.go b/pkg/kv/kvserver/replica_proposal.go index d4af67968b54..5ccc84b1025f 100644 --- a/pkg/kv/kvserver/replica_proposal.go +++ b/pkg/kv/kvserver/replica_proposal.go @@ -114,6 +114,15 @@ type ProposalData struct { // here; this could be replaced with isLease and isChangeReplicas // booleans. Request *roachpb.BatchRequest + + // leaseStatus represents the lease under which the Request was evaluated and + // under which this proposal is being made. For lease requests, this is the + // previous lease that the requester was aware of. + leaseStatus kvserverpb.LeaseStatus + + // tok identifies the request to the propBuf. Once the proposal is made, the + // token will be used to stop tracking this request. + tok TrackedRequestToken } // finishApplication is called when a command application has finished. The @@ -327,6 +336,10 @@ const ( func (r *Replica) leasePostApplyLocked( ctx context.Context, prevLease *roachpb.Lease, newLease *roachpb.Lease, jumpOpt leaseJumpOption, ) { + // Note that we actually install the lease further down in this method. + // Everything we do before then doesn't need to worry about requests being + // evaluated under the new lease. + // Sanity check to make sure that the lease sequence is moving in the right // direction. if s1, s2 := prevLease.Sequence, newLease.Sequence; s1 != 0 { @@ -412,6 +425,10 @@ func (r *Replica) leasePostApplyLocked( // to not matter). r.concMgr.OnRangeLeaseUpdated(newLease.Sequence, iAmTheLeaseHolder) + // Inform the propBuf about the new lease so that it can initialize its closed + // timestamp tracking. + r.mu.proposalBuf.OnLeaseChangeLocked(iAmTheLeaseHolder, r.mu.state.ClosedTimestamp) + // Ordering is critical here. We only install the new lease after we've // checked for an in-progress merge and updated the timestamp cache. If the // ordering were reversed, it would be possible for requests to see the new @@ -833,6 +850,7 @@ func (r *Replica) requestToProposal( ctx context.Context, idKey kvserverbase.CmdIDKey, ba *roachpb.BatchRequest, + st kvserverpb.LeaseStatus, lul hlc.Timestamp, latchSpans *spanset.SpanSet, ) (*ProposalData, *roachpb.Error) { @@ -840,11 +858,12 @@ func (r *Replica) requestToProposal( // Fill out the results even if pErr != nil; we'll return the error below. proposal := &ProposalData{ - ctx: ctx, - idKey: idKey, - doneCh: make(chan proposalResult, 1), - Local: &res.Local, - Request: ba, + ctx: ctx, + idKey: idKey, + doneCh: make(chan proposalResult, 1), + Local: &res.Local, + Request: ba, + leaseStatus: st, } if needConsensus { diff --git a/pkg/kv/kvserver/replica_proposal_buf.go b/pkg/kv/kvserver/replica_proposal_buf.go index f5b31c1db79a..f991c35f41c9 100644 --- a/pkg/kv/kvserver/replica_proposal_buf.go +++ b/pkg/kv/kvserver/replica_proposal_buf.go @@ -15,7 +15,13 @@ import ( "sync" "sync/atomic" + "github.com/cockroachdb/cockroach/pkg/clusterversion" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts/tracker" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/errors" @@ -105,8 +111,8 @@ func (c *propBufCnt) read() propBufCntRes { return propBufCntRes(atomic.LoadUint64((*uint64)(c))) } -// propBuf is a multi-producer, single-consumer buffer for Raft proposals. The -// buffer supports concurrent insertion of proposals. +// propBuf is a multi-producer, single-consumer buffer for Raft proposals on a +// range. The buffer supports concurrent insertion of proposals. // // The proposal buffer also handles the assignment of maximum lease indexes for // commands. Picking the maximum lease index for commands is done atomically @@ -116,6 +122,11 @@ func (c *propBufCnt) read() propBufCntRes { // get out of sync then some commands would necessarily be rejected beneath Raft // during application (see checkForcedErr). // +// The proposal buffer also is in charge of advancing the respective range's +// closed timestamp by assigning closed timestamp to proposals. For this +// purpose, new requests starting evaluation needs to synchronize with the +// proposal buffer (see TrackEvaluatingRequest). +// // Proposals enter the buffer via Insert() or ReinsertLocked(). They are moved // into Raft via FlushLockedWithRaftGroup() when the buffer fills up, or during // the next handleRaftReady iteration, whichever happens earlier. This @@ -127,13 +138,35 @@ func (c *propBufCnt) read() propBufCntRes { // initialization. Methods called "...Locked" and "...RLocked" expect the // corresponding locker() and rlocker() to be held. type propBuf struct { - p proposer - full sync.Cond + p proposer + clock *hlc.Clock + settings *cluster.Settings + // evalTracker tracks currently-evaluating requests, making sure that + // proposals coming out of the propBuf don't carry closed timestamps below + // currently-evaluating requests. + evalTracker tracker.Tracker + full sync.Cond liBase uint64 cnt propBufCnt arr propBufArray + // assignedClosedTimestamp is the largest "closed timestamp" - i.e. the largest + // timestamp that was communicated to other replicas as closed, representing a + // promise that this leaseholder will not evaluate writes below this timestamp + // any more. + // + // Note that this field is not used by the local replica (or by anybody) + // directly to decide whether follower reads can be served. See + // ReplicaState.closed_timestamp. + // + // This field can be read under the proposer's read lock, and written to under + // the write lock. + assignedClosedTimestamp hlc.Timestamp + + // A buffer used to avoid allocations. + tmpClosedTimestampFooter kvserverpb.ClosedTimestampFooter + testing struct { // leaseIndexFilter can be used by tests to override the max lease index // assigned to a proposal by returning a non-zero lease index. @@ -151,6 +184,8 @@ type propBuf struct { // heartbeats and then expect other replicas to take the lease without // worrying about Raft). allowLeaseProposalWhenNotLeader bool + // dontCloseTimestamps inhibits the closing of timestamps. + dontCloseTimestamps bool } } @@ -178,6 +213,12 @@ type proposer interface { destroyed() destroyStatus leaseAppliedIndex() uint64 enqueueUpdateCheck() + closeTimestampPolicy() roachpb.RangeClosedTimestampPolicy + // raftTransportClosedTimestampEnabled returns whether the range has switched + // to the Raft-based closed timestamp transport. + // TODO(andrei): This shouldn't be needed any more in 21.2, once the Raft + // transport is unconditionally enabled. + raftTransportClosedTimestampEnabled() bool // The following require the proposer to hold an exclusive lock. withGroupLocked(func(proposerRaft) error) error registerProposalLocked(*ProposalData) @@ -205,9 +246,14 @@ type proposerRaft interface { } // Init initializes the proposal buffer and binds it to the provided proposer. -func (b *propBuf) Init(p proposer) { +func (b *propBuf) Init( + p proposer, tracker tracker.Tracker, clock *hlc.Clock, settings *cluster.Settings, +) { b.p = p b.full.L = p.rlocker() + b.clock = clock + b.evalTracker = tracker + b.settings = settings b.liBase = p.leaseAppliedIndex() } @@ -225,12 +271,20 @@ func (b *propBuf) LastAssignedLeaseIndexRLocked() uint64 { // proposer's Raft group. The method accepts the Raft command as part of the // ProposalData struct, along with a partial encoding of the command in the // provided byte slice. It is expected that the byte slice contains marshaled -// information for all of the command's fields except for its max lease index, -// which is assigned by the method when the command is sequenced in the buffer. -// It is also expected that the byte slice has sufficient capacity to marshal -// the maximum lease index field into it. After adding the proposal to the +// information for all of the command's fields except for MaxLeaseIndex, and +// ClosedTimestamp. MaxLeaseIndex is assigned here, when the command is +// sequenced in the buffer. ClosedTimestamp will be assigned later, when the +// buffer is flushed. It is also expected that the byte slice has sufficient +// capacity to marshal these fields into it. After adding the proposal to the // buffer, the assigned max lease index is returned. -func (b *propBuf) Insert(ctx context.Context, p *ProposalData, data []byte) (uint64, error) { +// +// Insert takes ownership of the supplied token; the caller should tok.Move() it +// into this method. It will be used to untrack the request once it comes out of the +// proposal buffer. +func (b *propBuf) Insert( + ctx context.Context, p *ProposalData, data []byte, tok TrackedRequestToken, +) (uint64, error) { + defer tok.DoneIfNotMoved(ctx) // Request a new max lease applied index for any request that isn't itself // a lease request. Lease requests don't need unique max lease index values // because their max lease indexes are ignored. See checkForcedErr. @@ -252,6 +306,10 @@ func (b *propBuf) Insert(ctx context.Context, p *ProposalData, data []byte) (uin } // Assign the command's maximum lease index. + // TODO(andrei): Move this to Flush in 21.2, to mirror the assignment of the + // closed timestamp. For now it's needed here because Insert needs to return + // the MLAI for the benefit of the "old" closed timestamp tracker. When moving + // to flush, make sure to not reassign it on reproposals. p.command.MaxLeaseIndex = b.liBase + res.leaseIndexOffset() if filter := b.testing.leaseIndexFilter; filter != nil { if override, err := filter(p); err != nil { @@ -277,7 +335,9 @@ func (b *propBuf) Insert(ctx context.Context, p *ProposalData, data []byte) (uin return 0, err } - // Insert the proposal into the buffer's array. + // Insert the proposal into the buffer's array. The buffer now takes ownership + // of the token. + p.tok = tok.Move(ctx) b.insertIntoArray(p, res.arrayIndex()) // Return the maximum lease index that the proposal's command was given. @@ -458,6 +518,8 @@ func (b *propBuf) FlushLockedWithRaftGroup( } } + closedTSTarget := b.computeClosedTimestampTarget() + // Remember the first error that we see when proposing the batch. We don't // immediately return this error because we want to finish clearing out the // buffer and registering each of the proposals with the proposer, but we @@ -516,6 +578,11 @@ func (b *propBuf) FlushLockedWithRaftGroup( } } + // Exit the tracker. + reproposal := !p.tok.stillTracked() + if !reproposal { + p.tok.doneLocked(ctx) + } // Raft processing bookkeeping. b.p.registerProposalLocked(p) @@ -538,6 +605,20 @@ func (b *propBuf) FlushLockedWithRaftGroup( continue } + // Figure out what closed timestamp this command will carry. + // + // If this is a reproposal, we don't reassign the closed timestamp. We + // could, in principle, but we'd have to make a copy of the encoded command + // as to not modify the copy that's already stored in the local replica's + // raft entry cache. + if !reproposal { + err := b.assignClosedTimestampToProposalLocked(ctx, p, closedTSTarget) + if err != nil { + firstErr = err + continue + } + } + // Coordinate proposing the command to etcd/raft. if crt := p.command.ReplicatedEvalResult.ChangeReplicas; crt != nil { // Flush any previously batched (non-conf change) proposals to @@ -596,6 +677,121 @@ func (b *propBuf) FlushLockedWithRaftGroup( return used, proposeBatch(raftGroup, b.p.replicaID(), ents) } +// computeClosedTimestampTarget computes the timestamp we'd like to close for +// our range. Note that we might not be able to ultimately close this timestamp +// if there's requests in flight. +func (b *propBuf) computeClosedTimestampTarget() hlc.Timestamp { + now := b.clock.Now().WallTime + closedTSPolicy := b.p.closeTimestampPolicy() + var closedTSTarget hlc.Timestamp + switch closedTSPolicy { + case roachpb.LAG_BY_CLUSTER_SETTING, roachpb.LEAD_FOR_GLOBAL_READS: + targetDuration := closedts.TargetDuration.Get(&b.settings.SV) + closedTSTarget = hlc.Timestamp{WallTime: now - targetDuration.Nanoseconds()} + // TODO(andrei,nvanbenschoten): Resolve all the issues preventing us from closing + // timestamps in the future (which, in turn, forces future-time writes on + // global ranges), and enable the proper logic below. + //case roachpb.LEAD_FOR_GLOBAL_READS: + // closedTSTarget = hlc.Timestamp{ + // WallTime: now + 2*b.clock.MaxOffset().Nanoseconds(), + // Synthetic: true, + // } + } + return closedTSTarget +} + +// assignClosedTimestampToProposalLocked assigns a closed timestamp to be carried by +// an outgoing proposal. +// +// This shouldn't be called for reproposals. +func (b *propBuf) assignClosedTimestampToProposalLocked( + ctx context.Context, p *ProposalData, closedTSTarget hlc.Timestamp, +) error { + if b.testing.dontCloseTimestamps { + return nil + } + // If the Raft transport is not enabled yet, bail. If the range has already + // started publishing closed timestamps using Raft, then it doesn't matter + // whether this node found out about the version bump yet. + if !b.p.raftTransportClosedTimestampEnabled() && + !b.settings.Version.IsActive(ctx, clusterversion.ClosedTimestampsRaftTransport) { + return nil + } + + // For lease requests, we make a distinction between lease extensions and + // brand new leases. Brand new leases carry a closed timestamp equal to the + // lease start time. Lease extensions don't get a closed timestamp. This is + // because they're proposed without a MLAI, and so two lease extensions might + // commute and both apply which would result in a closed timestamp regression. + // The command application side doesn't bother protecting against such + // regressions. + // Lease transfers behave like regular proposals. Note that transfers + // carry a summary of the timestamp cache, so the new leaseholder will be + // aware of all the reads performed by the previous leaseholder. + isBrandNewLeaseRequest := false + if p.Request.IsLeaseRequest() { + // We read the lease from the ReplicatedEvalResult, not from leaseReq, because the + // former is more up to date, having been modified by the evaluation. + newLease := p.command.ReplicatedEvalResult.State.Lease + oldLease := p.leaseStatus.Lease + leaseExtension := newLease.Sequence == oldLease.Sequence + if leaseExtension { + return nil + } + isBrandNewLeaseRequest = true + // For brand new leases, we close the lease start time. Since this proposing + // replica is not the leaseholder, the previous target is meaningless. + closedTSTarget = newLease.Start.ToTimestamp() + } + if !isBrandNewLeaseRequest { + lb := b.evalTracker.LowerBound(ctx) + if !lb.IsEmpty() { + // If the tracker told us that requests are currently evaluating at + // timestamps >= lb, then we can close up to lb.Prev(). We use FloorPrev() + // to get rid of the logical ticks; we try to not publish closed ts with + // logical ticks when there's no good reason for them. + closedTSTarget.Backward(lb.FloorPrev()) + } + // We can't close timestamps above the current lease's expiration(*). This is + // in order to keep the monotonic property of closed timestamps carried by + // commands, which makes for straight-forward closed timestamp management on + // the command application side: if we allowed requests to close timestamps + // above the lease's expiration, then a future LeaseRequest proposed by + // another node might carry a lower closed timestamp (i.e. the lease start + // time). + // (*) If we've previously closed a higher timestamp under a previous lease + // with a higher expiration, then requests will keep carrying that closed + // timestamp; we won't regress the closed timestamp. + // + // HACK(andrei): We declare the lease expiration to be synthetic by fiat, + // because it frequently is synthetic even though currently it's not marked + // as such. See the TODO in Timestamp.Add() about the work remaining to + // properly mark these timestamps as synthetic. We need to make sure it's + // synthetic here so that the results of Backwards() can be synthetic. + leaseExpiration := p.leaseStatus.Expiration().WithSynthetic(true) + closedTSTarget.Backward(leaseExpiration) + } + + // We're about to close closedTSTarget. The propBuf needs to remember that in + // order for incoming requests to be bumped above it (through + // TrackEvaluatingRequest). + b.forwardClosedTimestampLocked(closedTSTarget) + // Fill in the closed ts in the proposal. + f := &b.tmpClosedTimestampFooter + f.ClosedTimestamp = b.assignedClosedTimestamp + footerLen := f.Size() + if log.ExpensiveLogEnabled(ctx, 4) { + log.VEventf(ctx, 4, "attaching closed timestamp %s to proposal %x", b.assignedClosedTimestamp, p.idKey) + } + + preLen := len(p.encodedCommand) + // Here we rely on p.encodedCommand to have been allocated with enough + // capacity for this footer. + p.encodedCommand = p.encodedCommand[:preLen+footerLen] + _, err := protoutil.MarshalTo(f, p.encodedCommand[preLen:]) + return err +} + func (b *propBuf) forwardLeaseIndexBase(v uint64) { if b.liBase < v { b.liBase = v @@ -636,6 +832,109 @@ func (b *propBuf) FlushLockedWithoutProposing(ctx context.Context) { } } +// OnLeaseChangeLocked is called when a new lease is applied to this range. +// assignedClosedTimestamp is the range's closed timestamp after the new lease was applied. The +// closed timestamp tracked by the propBuf is updated accordingly. +func (b *propBuf) OnLeaseChangeLocked(leaseOwned bool, closedTS hlc.Timestamp) { + if leaseOwned { + b.forwardClosedTimestampLocked(closedTS) + } else { + // Zero out to avoid any confusion. + b.assignedClosedTimestamp = hlc.Timestamp{} + } +} + +// forwardClosedTimestamp forwards the closed timestamp tracked by the propBuf. +func (b *propBuf) forwardClosedTimestampLocked(closedTS hlc.Timestamp) { + b.assignedClosedTimestamp.Forward(closedTS) +} + +// EvaluatingRequestsCount returns the count of requests currently tracked by +// the propBuf. +func (b *propBuf) EvaluatingRequestsCount() int { + b.p.rlocker().Lock() + defer b.p.rlocker().Unlock() + return b.evalTracker.Count() +} + +// TrackedRequestToken represents the result of propBuf.TrackEvaluatingRequest: +// a token to be later used for untracking the respective request. +// +// This token tries to make it easy to pass responsibility for untracking. The +// intended pattern is: +// tok := propbBuf.TrackEvaluatingRequest() +// defer tok.DoneIfNotMoved() +// fn(tok.Move()) +type TrackedRequestToken struct { + done bool + tok tracker.RemovalToken + b *propBuf +} + +// DoneIfNotMoved untracks the request if Move had not been called on the token +// previously. If Move had been called, this is a no-op. +// +// Note that if this ends up actually destroying the token (i.e. if Move() had +// not been called previously) this takes r.mu, so it's pretty expensive. On +// happy paths, the token is expected to have been Move()d, and a batch of +// tokens are expected to be destroyed at once by the propBuf (which calls +// doneLocked). +func (t *TrackedRequestToken) DoneIfNotMoved(ctx context.Context) { + if t.done { + return + } + t.b.p.locker().Lock() + t.doneLocked(ctx) + t.b.p.locker().Unlock() +} + +func (t *TrackedRequestToken) doneLocked(ctx context.Context) { + if t.done { + log.Fatalf(ctx, "duplicate Done() call") + } + t.done = true + t.b.evalTracker.Untrack(ctx, t.tok) +} + +// stillTracked returns true if no Done* method has been called. +func (t *TrackedRequestToken) stillTracked() bool { + return !t.done +} + +// Move returns a new token which can untrack the request. The original token is +// neutered; calling DoneIfNotMoved on it becomes a no-op. +func (t *TrackedRequestToken) Move(ctx context.Context) TrackedRequestToken { + if t.done { + log.Fatalf(ctx, "attempting to Move() after Done() call") + } + cpy := *t + t.done = true + return cpy +} + +// TrackEvaluatingRequest atomically starts tracking an evaluating request and +// returns the minimum timestamp at which this request can write. The tracked +// request is identified by its tentative write timestamp. After calling this, +// the caller must bump the write timestamp to at least the returned minTS. +// +// The returned token must be used to eventually remove this request from the +// tracked set by calling tok.Done(); the removal will allow timestamps above +// its write timestamp to be closed. If the evaluation results in a proposal, +// the token will make it back to this propBuf through Insert; in this case it +// will be the propBuf itself that ultimately stops tracking the request once +// the proposal is flushed from the buffer. +func (b *propBuf) TrackEvaluatingRequest( + ctx context.Context, wts hlc.Timestamp, +) (minTS hlc.Timestamp, _ TrackedRequestToken) { + b.p.rlocker().Lock() + defer b.p.rlocker().Unlock() + + minTS = b.assignedClosedTimestamp.Next() + wts.Forward(minTS) + tok := b.evalTracker.Track(ctx, wts) + return minTS, TrackedRequestToken{tok: tok, b: b} +} + const propBufArrayMinSize = 4 const propBufArrayMaxSize = 256 const propBufArrayShrinkDelay = 16 @@ -702,6 +1001,8 @@ func (a *propBufArray) adjustSize(used int) { // replicaProposer implements the proposer interface. type replicaProposer Replica +var _ proposer = &replicaProposer{} + func (rp *replicaProposer) locker() sync.Locker { return &rp.mu.RWMutex } @@ -726,6 +1027,14 @@ func (rp *replicaProposer) enqueueUpdateCheck() { rp.store.enqueueRaftUpdateCheck(rp.RangeID) } +func (rp *replicaProposer) closeTimestampPolicy() roachpb.RangeClosedTimestampPolicy { + return (*Replica)(rp).closedTimestampPolicyRLocked() +} + +func (rp *replicaProposer) raftTransportClosedTimestampEnabled() bool { + return !(*Replica)(rp).mu.state.ClosedTimestamp.IsEmpty() +} + func (rp *replicaProposer) withGroupLocked(fn func(raftGroup proposerRaft) error) error { // Pass true for mayCampaignOnWake because we're about to propose a command. return (*Replica)(rp).withRaftGroupLocked(true, func(raftGroup *raft.RawNode) (bool, error) { diff --git a/pkg/kv/kvserver/replica_proposal_buf_test.go b/pkg/kv/kvserver/replica_proposal_buf_test.go index 116009a2acb9..07b5b159bd15 100644 --- a/pkg/kv/kvserver/replica_proposal_buf_test.go +++ b/pkg/kv/kvserver/replica_proposal_buf_test.go @@ -17,10 +17,16 @@ import ( "testing" "time" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts/tracker" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/cockroachdb/errors" "github.com/stretchr/testify/assert" @@ -53,16 +59,31 @@ type testProposer struct { // If leaderReplicaInDescriptor is set, this specifies what type of replica it // is. Some types of replicas are not eligible to get a lease. leaderReplicaType roachpb.ReplicaType + rangePolicy roachpb.RangeClosedTimestampPolicy } +var _ proposer = &testProposer{} + type testProposerRaft struct { status raft.BasicStatus + // lastProps are the command that the propBuf flushed last. + lastProps []kvserverpb.RaftCommand } -var _ proposerRaft = testProposerRaft{} +var _ proposerRaft = &testProposerRaft{} -func (t testProposerRaft) Step(raftpb.Message) error { - // TODO(andrei, nvanbenschoten): Capture the message and test against it. +func (t *testProposerRaft) Step(msg raftpb.Message) error { + if msg.Type != raftpb.MsgProp { + return nil + } + // Decode and save all the commands. + t.lastProps = make([]kvserverpb.RaftCommand, len(msg.Entries)) + for i, e := range msg.Entries { + _ /* idKey */, encodedCommand := DecodeRaftCommand(e.Data) + if err := protoutil.Unmarshal(encodedCommand, &t.lastProps[i]); err != nil { + return err + } + } return nil } @@ -99,6 +120,14 @@ func (t *testProposer) enqueueUpdateCheck() { t.enqueued++ } +func (t *testProposer) closeTimestampPolicy() roachpb.RangeClosedTimestampPolicy { + return t.rangePolicy +} + +func (t *testProposer) raftTransportClosedTimestampEnabled() bool { + return true +} + func (t *testProposer) withGroupLocked(fn func(proposerRaft) error) error { // Note that t.raftGroup can be nil, which FlushLockedWithRaftGroup supports. return fn(t.raftGroup) @@ -150,18 +179,55 @@ func (t *testProposer) rejectProposalWithRedirectLocked( t.onRejectProposalWithRedirectLocked(prop, redirectTo) } -func newPropData(leaseReq bool) (*ProposalData, []byte) { +// proposalCreator holds on to a lease and creates proposals using it. +type proposalCreator struct { + lease kvserverpb.LeaseStatus +} + +func (pc proposalCreator) newPutProposal() (*ProposalData, []byte) { var ba roachpb.BatchRequest - if leaseReq { - ba.Add(&roachpb.RequestLeaseRequest{}) - } else { - ba.Add(&roachpb.PutRequest{}) + ba.Add(&roachpb.PutRequest{}) + return pc.newProposal(ba) +} + +func (pc proposalCreator) newLeaseProposal(lease roachpb.Lease) (*ProposalData, []byte) { + var ba roachpb.BatchRequest + ba.Add(&roachpb.RequestLeaseRequest{Lease: lease}) + return pc.newProposal(ba) +} + +func (pc proposalCreator) newProposal(ba roachpb.BatchRequest) (*ProposalData, []byte) { + var lease *roachpb.Lease + r, ok := ba.GetArg(roachpb.RequestLease) + if ok { + lease = &r.(*roachpb.RequestLeaseRequest).Lease + } + p := &ProposalData{ + ctx: context.Background(), + idKey: kvserverbase.CmdIDKey("test-cmd"), + command: &kvserverpb.RaftCommand{ + ReplicatedEvalResult: kvserverpb.ReplicatedEvalResult{ + State: &kvserverpb.ReplicaState{Lease: lease}, + }, + }, + Request: &ba, + leaseStatus: pc.lease, + } + return p, pc.encodeProposal(p) +} + +func (pc proposalCreator) encodeProposal(p *ProposalData) []byte { + cmdLen := p.command.Size() + needed := raftCommandPrefixLen + cmdLen + + kvserverpb.MaxRaftCommandFooterSize() + + kvserverpb.MaxClosedTimestampFooterSize() + data := make([]byte, raftCommandPrefixLen, needed) + encodeRaftCommandPrefix(data, raftVersionStandard, p.idKey) + data = data[:raftCommandPrefixLen+p.command.Size()] + if _, err := protoutil.MarshalTo(p.command, data[raftCommandPrefixLen:]); err != nil { + panic(err) } - return &ProposalData{ - ctx: context.Background(), - command: &kvserverpb.RaftCommand{}, - Request: &ba, - }, make([]byte, 0, kvserverpb.MaxRaftCommandFooterSize()) + return data } // TestProposalBuffer tests the basic behavior of the Raft proposal buffer. @@ -172,14 +238,23 @@ func TestProposalBuffer(t *testing.T) { var p testProposer var b propBuf - b.Init(&p) + var pc proposalCreator + clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) + b.Init(&p, tracker.NewLockfreeTracker(), clock, cluster.MakeTestingClusterSettings()) // Insert propBufArrayMinSize proposals. The buffer should not be flushed. num := propBufArrayMinSize for i := 0; i < num; i++ { leaseReq := i == 3 - pd, data := newPropData(leaseReq) - mlai, err := b.Insert(ctx, pd, data) + var pd *ProposalData + var data []byte + if leaseReq { + pd, data = pc.newLeaseProposal(roachpb.Lease{}) + } else { + pd, data = pc.newPutProposal() + } + _, tok := b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + mlai, err := b.Insert(ctx, pd, data, tok) require.Nil(t, err) if leaseReq { expMlai := uint64(i) @@ -196,12 +271,14 @@ func TestProposalBuffer(t *testing.T) { require.Equal(t, 1, p.enqueued) require.Equal(t, 0, p.registered) } + require.Equal(t, num, b.evalTracker.Count()) // Insert another proposal. This causes the buffer to flush. Doing so // results in a lease applied index being skipped, which is harmless. // Remember that the lease request above did not receive a lease index. - pd, data := newPropData(false) - mlai, err := b.Insert(ctx, pd, data) + pd, data := pc.newPutProposal() + _, tok := b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + mlai, err := b.Insert(ctx, pd, data, tok) require.Nil(t, err) expMlai := uint64(num + 1) require.Equal(t, expMlai, mlai) @@ -212,6 +289,7 @@ func TestProposalBuffer(t *testing.T) { require.Equal(t, num, p.registered) require.Equal(t, uint64(num), b.liBase) require.Equal(t, 2*propBufArrayMinSize, b.arr.len()) + require.Equal(t, 1, b.evalTracker.Count()) // Increase the proposer's applied lease index and flush. The buffer's // lease index offset should jump up. @@ -224,7 +302,8 @@ func TestProposalBuffer(t *testing.T) { // Insert one more proposal. The lease applied index should adjust to // the increase accordingly. - mlai, err = b.Insert(ctx, pd, data) + _, tok = b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + mlai, err = b.Insert(ctx, pd, data, tok) require.Nil(t, err) expMlai = p.lai + 1 require.Equal(t, expMlai, mlai) @@ -252,7 +331,9 @@ func TestProposalBufferConcurrentWithDestroy(t *testing.T) { var p testProposer var b propBuf - b.Init(&p) + var pc proposalCreator + clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) + b.Init(&p, tracker.NewLockfreeTracker(), clock, cluster.MakeTestingClusterSettings()) mlais := make(map[uint64]struct{}) dsErr := errors.New("destroyed") @@ -263,8 +344,9 @@ func TestProposalBufferConcurrentWithDestroy(t *testing.T) { for i := 0; i < concurrency; i++ { g.Go(func() error { for { - pd, data := newPropData(false) - mlai, err := b.Insert(ctx, pd, data) + pd, data := pc.newPutProposal() + _, tok := b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + mlai, err := b.Insert(ctx, pd, data, tok) if err != nil { if errors.Is(err, dsErr) { return nil @@ -323,12 +405,16 @@ func TestProposalBufferRegistersAllOnProposalError(t *testing.T) { var p testProposer var b propBuf - b.Init(&p) + var pc proposalCreator + clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) + b.Init(&p, tracker.NewLockfreeTracker(), clock, cluster.MakeTestingClusterSettings()) num := propBufArrayMinSize + toks := make([]TrackedRequestToken, num) for i := 0; i < num; i++ { - pd, data := newPropData(false) - _, err := b.Insert(ctx, pd, data) + pd, data := pc.newPutProposal() + _, toks[i] = b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + _, err := b.Insert(ctx, pd, data, toks[i]) require.Nil(t, err) } require.Equal(t, num, b.Len()) @@ -346,6 +432,7 @@ func TestProposalBufferRegistersAllOnProposalError(t *testing.T) { err := b.flushLocked(ctx) require.Equal(t, propErr, err) require.Equal(t, num, p.registered) + require.Zero(t, b.evalTracker.Count()) } // TestProposalBufferRegistrationWithInsertionErrors tests that if during @@ -358,12 +445,22 @@ func TestProposalBufferRegistrationWithInsertionErrors(t *testing.T) { var p testProposer var b propBuf - b.Init(&p) + var pc proposalCreator + clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) + b.Init(&p, tracker.NewLockfreeTracker(), clock, cluster.MakeTestingClusterSettings()) num := propBufArrayMinSize / 2 + toks1 := make([]TrackedRequestToken, num) for i := 0; i < num; i++ { - pd, data := newPropData(i%2 == 0) - _, err := b.Insert(ctx, pd, data) + var pd *ProposalData + var data []byte + if i%2 == 0 { + pd, data = pc.newLeaseProposal(roachpb.Lease{}) + } else { + pd, data = pc.newPutProposal() + } + _, toks1[i] = b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + _, err := b.Insert(ctx, pd, data, toks1[i]) require.Nil(t, err) } @@ -372,9 +469,17 @@ func TestProposalBufferRegistrationWithInsertionErrors(t *testing.T) { return 0, insertErr } + toks2 := make([]TrackedRequestToken, num) for i := 0; i < num; i++ { - pd, data := newPropData(i%2 == 0) - _, err := b.Insert(ctx, pd, data) + var pd *ProposalData + var data []byte + if i%2 == 0 { + pd, data = pc.newLeaseProposal(roachpb.Lease{}) + } else { + pd, data = pc.newPutProposal() + } + _, toks2[i] = b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + _, err := b.Insert(ctx, pd, data, toks2[i]) require.Equal(t, insertErr, err) } require.Equal(t, 2*num, b.Len()) @@ -383,6 +488,7 @@ func TestProposalBufferRegistrationWithInsertionErrors(t *testing.T) { require.Equal(t, 0, b.Len()) require.Equal(t, num, p.registered) + require.Zero(t, b.evalTracker.Count()) } // TestPropBufCnt tests the basic behavior of the counter maintained by the @@ -494,6 +600,7 @@ func TestProposalBufferRejectLeaseAcqOnFollower(t *testing.T) { } { t.Run(tc.name, func(t *testing.T) { var p testProposer + var pc proposalCreator // p.replicaID() is hardcoded; it'd better be hardcoded to what this test // expects. require.Equal(t, self, uint64(p.replicaID())) @@ -519,7 +626,7 @@ func TestProposalBufferRejectLeaseAcqOnFollower(t *testing.T) { Lead: tc.leader, }, } - r := testProposerRaft{ + r := &testProposerRaft{ status: raftStatus, } p.raftGroup = r @@ -527,10 +634,12 @@ func TestProposalBufferRejectLeaseAcqOnFollower(t *testing.T) { p.leaderReplicaType = tc.leaderRepType var b propBuf - b.Init(&p) + clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) + b.Init(&p, tracker.NewLockfreeTracker(), clock, cluster.MakeTestingClusterSettings()) - pd, data := newPropData(true /* leaseReq */) - _, err := b.Insert(ctx, pd, data) + pd, data := pc.newLeaseProposal(roachpb.Lease{}) + _, tok := b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + _, err := b.Insert(ctx, pd, data, tok) require.NoError(t, err) require.NoError(t, b.flushLocked(ctx)) if tc.expRejection { @@ -541,3 +650,273 @@ func TestProposalBufferRejectLeaseAcqOnFollower(t *testing.T) { }) } } + +func TestProposalBufferComputeClosedTimestampTarget(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + const nowNanos = 100 + const maxOffsetNanos = 20 + manualClock := hlc.NewManualClock(nowNanos) + clock := hlc.NewClock(manualClock.UnixNano, maxOffsetNanos) + + const lagTargetNanos = 10 + st := cluster.MakeTestingClusterSettings() + closedts.TargetDuration.Override(&st.SV, lagTargetNanos) + + for _, tc := range []struct { + rangePolicy roachpb.RangeClosedTimestampPolicy + expClosedTSTarget hlc.Timestamp + }{ + { + rangePolicy: roachpb.LAG_BY_CLUSTER_SETTING, + expClosedTSTarget: hlc.Timestamp{WallTime: nowNanos - lagTargetNanos}, + }, + { + rangePolicy: roachpb.LEAD_FOR_GLOBAL_READS, + expClosedTSTarget: hlc.Timestamp{WallTime: nowNanos - lagTargetNanos}, + // TODO(andrei, nvanbenschoten): What we should be expecting here is the following, once + // the propBuf starts properly implementing this timestamp closing policy: + // expClosedTSTarget: hlc.Timestamp{WallTime: nowNanos + 2*maxOffsetNanos, Synthetic: true}, + }, + } { + t.Run(tc.rangePolicy.String(), func(t *testing.T) { + var p testProposer + p.rangePolicy = tc.rangePolicy + var b propBuf + b.Init(&p, tracker.NewLockfreeTracker(), clock, st) + require.Equal(t, tc.expClosedTSTarget, b.computeClosedTimestampTarget()) + }) + } +} + +// Test that the propBuf properly assigns closed timestamps to proposals being +// flushed out of it. Each subtest proposes one command and checks for the +// expected closed timestamp being written to the proposal by the propBuf. +func TestProposalBufferClosedTimestamp(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + mc := hlc.NewManualClock((1613588135 * time.Second).Nanoseconds()) + clock := hlc.NewClock(mc.UnixNano, time.Nanosecond) + st := cluster.MakeTestingClusterSettings() + closedts.TargetDuration.Override(&st.SV, time.Second) + now := clock.Now() + newLeaseStart := now.MustToClockTimestamp() + nowMinusClosedLag := hlc.Timestamp{ + WallTime: mc.UnixNano() - closedts.TargetDuration.Get(&st.SV).Nanoseconds(), + } + nowMinusTwiceClosedLag := hlc.Timestamp{ + WallTime: mc.UnixNano() - 2*closedts.TargetDuration.Get(&st.SV).Nanoseconds(), + } + expiredLeaseTimestamp := hlc.Timestamp{WallTime: mc.UnixNano() - 1000} + someClosedTS := hlc.Timestamp{WallTime: mc.UnixNano() - 2000} + + type reqType int + checkClosedTS := func(t *testing.T, r *testProposerRaft, exp hlc.Timestamp) { + require.Len(t, r.lastProps, 1) + require.Equal(t, exp, r.lastProps[0].ClosedTimestamp) + } + + // The lease that the proposals are made under. + curLease := roachpb.Lease{ + Epoch: 0, // Expiration-based lease. + Sequence: 1, + Start: hlc.ClockTimestamp{}, + // Expiration is filled by each test. + Expiration: nil, + } + + const ( + regularWrite reqType = iota + // newLease means that the request is a lease acquisition (new lease or + // lease extension). + newLease + leaseTransfer + ) + + for _, tc := range []struct { + name string + + reqType reqType + // The lower bound of all currently-evaluating requests. We can't close this + // or above. + trackerLowerBound hlc.Timestamp + // The expiration of the current lease. The closed timestamp of most + // proposal is upper-bounded by this, which matters for + // LEAD_FOR_GLOBAL_READS ranges (on other ranges the propBuf would never + // like to close a timestamp above the current lease expiration because it + // wouldn't be processing commands if the lease is expired). + leaseExp hlc.Timestamp + rangePolicy roachpb.RangeClosedTimestampPolicy + // The highest closed timestamp that the propBuf has previously attached to + // a proposal. The propBuf should never propose a new closedTS below this. + prevClosedTimestamp hlc.Timestamp + + // lease is used when reqType = newLease. This will be the lease being + // proposed. + lease roachpb.Lease + + expClosed hlc.Timestamp + }{ + { + name: "basic", + reqType: regularWrite, + trackerLowerBound: hlc.Timestamp{}, + leaseExp: hlc.MaxTimestamp, + rangePolicy: roachpb.LAG_BY_CLUSTER_SETTING, + prevClosedTimestamp: hlc.Timestamp{}, + expClosed: nowMinusClosedLag, + }, + { + // The request tracker will prevent us from closing below its lower bound. + name: "not closing below evaluating requests", + reqType: regularWrite, + trackerLowerBound: nowMinusTwiceClosedLag, + leaseExp: hlc.MaxTimestamp, + rangePolicy: roachpb.LAG_BY_CLUSTER_SETTING, + prevClosedTimestamp: hlc.Timestamp{}, + expClosed: nowMinusTwiceClosedLag.FloorPrev(), + }, + { + // Like the basic test, except that we can't close timestamp below what + // we've already closed previously. + name: "no regression", + reqType: regularWrite, + trackerLowerBound: hlc.Timestamp{}, + leaseExp: hlc.MaxTimestamp, + rangePolicy: roachpb.LAG_BY_CLUSTER_SETTING, + prevClosedTimestamp: someClosedTS, + expClosed: someClosedTS, + }, + { + name: "brand new lease", + reqType: newLease, + lease: roachpb.Lease{ + // Higher sequence => this is a brand new lease, not an extension. + Sequence: curLease.Sequence + 1, + Start: newLeaseStart, + }, + trackerLowerBound: hlc.Timestamp{}, + // The current lease can be expired; we won't backtrack the closed + // timestamp to this expiration. + leaseExp: expiredLeaseTimestamp, + rangePolicy: roachpb.LAG_BY_CLUSTER_SETTING, + expClosed: newLeaseStart.ToTimestamp(), + }, + { + name: "lease extension", + reqType: newLease, + lease: roachpb.Lease{ + // Same sequence => this is a lease extension. + Sequence: curLease.Sequence, + Start: newLeaseStart, + }, + trackerLowerBound: hlc.Timestamp{}, + // The current lease can be expired; we won't backtrack the closed + // timestamp to this expiration. + leaseExp: expiredLeaseTimestamp, + rangePolicy: roachpb.LAG_BY_CLUSTER_SETTING, + // Lease extensions don't carry closed timestamps because they don't get + // MLAIs, and so they can be reordered. + expClosed: hlc.Timestamp{}, + }, + { + // Lease transfers behave just like regular writes. The lease start time + // doesn't matter. + name: "lease transfer", + reqType: leaseTransfer, + lease: roachpb.Lease{ + Sequence: curLease.Sequence + 1, + Start: newLeaseStart, + }, + trackerLowerBound: hlc.Timestamp{}, + leaseExp: hlc.MaxTimestamp, + rangePolicy: roachpb.LAG_BY_CLUSTER_SETTING, + expClosed: nowMinusClosedLag, + }, + { + // With the LEAD_FOR_GLOBAL_READS policy, we're expecting to close + // timestamps in the future. + // TODO(andrei,nvanbenschoten): The global policy is not actually hooked + // up at the moment, so this test expects a past timestamp to be closed. + // Once it is hooked up, we should also add another test that checks that + // timestamps above the current lease expiration are not closed. + name: "global range", + reqType: regularWrite, + trackerLowerBound: hlc.Timestamp{}, + leaseExp: hlc.MaxTimestamp, + rangePolicy: roachpb.LEAD_FOR_GLOBAL_READS, + prevClosedTimestamp: hlc.Timestamp{}, + expClosed: nowMinusClosedLag, + }, + } { + t.Run(tc.name, func(t *testing.T) { + r := &testProposerRaft{} + p := testProposer{ + lai: 10, + raftGroup: r, + } + tracker := mockTracker{ + lowerBound: tc.trackerLowerBound, + } + pc := proposalCreator{lease: kvserverpb.LeaseStatus{Lease: curLease}} + pc.lease.Lease.Expiration = &tc.leaseExp + + var b propBuf + b.Init(&p, tracker, clock, st) + b.forwardClosedTimestampLocked(tc.prevClosedTimestamp) + + var pd *ProposalData + var data []byte + switch tc.reqType { + case regularWrite: + pd, data = pc.newPutProposal() + case newLease: + pd, data = pc.newLeaseProposal(tc.lease) + case leaseTransfer: + var ba roachpb.BatchRequest + ba.Add(&roachpb.TransferLeaseRequest{ + Lease: roachpb.Lease{ + Start: now.MustToClockTimestamp(), + Sequence: pc.lease.Lease.Sequence + 1, + }, + PrevLease: pc.lease.Lease, + }) + pd, data = pc.newProposal(ba) + default: + t.Fatalf("unknown req type %d", tc.reqType) + } + tok := TrackedRequestToken{ + done: false, + tok: nil, + b: &b, + } + _, err := b.Insert(ctx, pd, data, tok) + require.NoError(t, err) + require.NoError(t, b.flushLocked(ctx)) + checkClosedTS(t, r, tc.expClosed) + }) + } +} + +type mockTracker struct { + lowerBound hlc.Timestamp +} + +func (t mockTracker) Track(ctx context.Context, ts hlc.Timestamp) tracker.RemovalToken { + panic("unimplemented") +} + +func (t mockTracker) Untrack(context.Context, tracker.RemovalToken) {} + +func (t mockTracker) LowerBound(context.Context) hlc.Timestamp { + return t.lowerBound +} + +func (t mockTracker) Count() int { + panic("unimplemented") +} + +var _ tracker.Tracker = mockTracker{} diff --git a/pkg/kv/kvserver/replica_raft.go b/pkg/kv/kvserver/replica_raft.go index 686294c3ee75..2568c9b44158 100644 --- a/pkg/kv/kvserver/replica_raft.go +++ b/pkg/kv/kvserver/replica_raft.go @@ -58,6 +58,10 @@ func makeIDKey() kvserverbase.CmdIDKey { // caller should relinquish all ownership of it. If it does return an error, the // caller retains full ownership over the guard. // +// evalAndPropose takes ownership of the supplied token; the caller should +// tok.Move() it into this method. It will be used to untrack the request once +// it comes out of the proposal buffer. +// // Return values: // - a channel which receives a response or error upon application // - a closure used to attempt to abandon the command. When called, it unbinds @@ -73,9 +77,11 @@ func (r *Replica) evalAndPropose( g *concurrency.Guard, st kvserverpb.LeaseStatus, lul hlc.Timestamp, + tok TrackedRequestToken, ) (chan proposalResult, func(), int64, *roachpb.Error) { + defer tok.DoneIfNotMoved(ctx) idKey := makeIDKey() - proposal, pErr := r.requestToProposal(ctx, idKey, ba, lul, g.LatchSpans()) + proposal, pErr := r.requestToProposal(ctx, idKey, ba, st, lul, g.LatchSpans()) log.Event(proposal.ctx, "evaluated request") // If the request hit a server-side concurrency retry error, immediately @@ -200,7 +206,7 @@ func (r *Replica) evalAndPropose( } } - maxLeaseIndex, pErr := r.propose(ctx, proposal) + maxLeaseIndex, pErr := r.propose(ctx, proposal, tok.Move(ctx)) if pErr != nil { return nil, nil, 0, pErr } @@ -234,7 +240,14 @@ func (r *Replica) evalAndPropose( // the method returns, all access to the command must be performed while holding // Replica.mu and Replica.raftMu. If a non-nil error is returned the // MaxLeaseIndex is not updated. -func (r *Replica) propose(ctx context.Context, p *ProposalData) (index int64, pErr *roachpb.Error) { +// +// propose takes ownership of the supplied token; the caller should tok.Move() +// it into this method. It will be used to untrack the request once it comes out +// of the proposal buffer. +func (r *Replica) propose( + ctx context.Context, p *ProposalData, tok TrackedRequestToken, +) (index int64, pErr *roachpb.Error) { + defer tok.DoneIfNotMoved(ctx) // If an error occurs reset the command's MaxLeaseIndex to its initial value. // Failure to propose will propagate to the client. An invariant of this @@ -304,8 +317,12 @@ func (r *Replica) propose(ctx context.Context, p *ProposalData) (index int64, pE preLen = raftCommandPrefixLen } cmdLen := p.command.Size() - cap := preLen + cmdLen + kvserverpb.MaxRaftCommandFooterSize() - data := make([]byte, preLen, cap) + // Allocate the data slice with enough capacity to eventually hold the two + // "footers" that are filled later. + needed := preLen + cmdLen + + kvserverpb.MaxRaftCommandFooterSize() + + kvserverpb.MaxClosedTimestampFooterSize() + data := make([]byte, preLen, needed) // Encode prefix with command ID, if necessary. if prefix { encodeRaftCommandPrefix(data, version, p.idKey) @@ -345,7 +362,7 @@ func (r *Replica) propose(ctx context.Context, p *ProposalData) (index int64, pE // // NB: we must not hold r.mu while using the proposal buffer, see comment // on the field. - maxLeaseIndex, err := r.mu.proposalBuf.Insert(ctx, p, data) + maxLeaseIndex, err := r.mu.proposalBuf.Insert(ctx, p, data, tok.Move(ctx)) if err != nil { return 0, roachpb.NewError(err) } @@ -977,6 +994,9 @@ const ( // waiting on. // mu must be held. // +// Note that reproposals don't need to worry about checking the closed timestamp +// before reproposing, since they're reusing the original LAI. +// // refreshAtDelta only applies for reasonTicks and specifies how old (in ticks) // a command must be for it to be inspected; the usual value is the number of // ticks of an election timeout (affect only proposals that have had ample time diff --git a/pkg/kv/kvserver/replica_rangefeed_test.go b/pkg/kv/kvserver/replica_rangefeed_test.go index 4105f97d4374..de435bd8712b 100644 --- a/pkg/kv/kvserver/replica_rangefeed_test.go +++ b/pkg/kv/kvserver/replica_rangefeed_test.go @@ -167,6 +167,7 @@ func TestReplicaRangefeed(t *testing.T) { } events = stream.Events() + // Filter out checkpoints. Those are not deterministic; they can come at any time. var filteredEvents []*roachpb.RangeFeedEvent for _, e := range events { if e.Checkpoint != nil { diff --git a/pkg/kv/kvserver/replica_test.go b/pkg/kv/kvserver/replica_test.go index caa40fde4334..ac468760c49f 100644 --- a/pkg/kv/kvserver/replica_test.go +++ b/pkg/kv/kvserver/replica_test.go @@ -198,6 +198,9 @@ func (tc *testContext) Clock() *hlc.Clock { func (tc *testContext) Start(t testing.TB, stopper *stop.Stopper) { tc.manualClock = hlc.NewManualClock(123) cfg := TestStoreConfig(hlc.NewClock(tc.manualClock.UnixNano, time.Nanosecond)) + // testContext tests like to move the manual clock around and assume that they can write at past + // timestamps. + cfg.TestingKnobs.DontCloseTimestamps = true tc.StartWithStoreConfig(t, stopper, cfg) } @@ -617,7 +620,8 @@ func sendLeaseRequest(r *Replica, l *roachpb.Lease) error { ba.Timestamp = r.store.Clock().Now() ba.Add(&roachpb.RequestLeaseRequest{Lease: *l}) st := r.CurrentLeaseStatus(ctx) - ch, _, _, pErr := r.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + _, tok := r.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + ch, _, _, pErr := r.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if pErr == nil { // Next if the command was committed, wait for the range to apply it. // TODO(bdarnell): refactor this to a more conventional error-handling pattern. @@ -1418,7 +1422,8 @@ func TestReplicaLeaseRejectUnknownRaftNodeID(t *testing.T) { ba := roachpb.BatchRequest{} ba.Timestamp = tc.repl.store.Clock().Now() ba.Add(&roachpb.RequestLeaseRequest{Lease: *lease}) - ch, _, _, pErr := tc.repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + _, tok := tc.repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + ch, _, _, pErr := tc.repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if pErr == nil { // Next if the command was committed, wait for the range to apply it. // TODO(bdarnell): refactor to a more conventional error-handling pattern. @@ -7894,7 +7899,8 @@ func TestReplicaCancelRaftCommandProgress(t *testing.T) { }, }) st := repl.CurrentLeaseStatus(ctx) - ch, _, idx, err := repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + _, tok := repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + ch, _, idx, err := repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if err != nil { t.Fatal(err) } @@ -7963,8 +7969,9 @@ func TestReplicaBurstPendingCommandsAndRepropose(t *testing.T) { Key: roachpb.Key(fmt.Sprintf("k%d", i)), }, }) + _, tok := tc.repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) st := tc.repl.CurrentLeaseStatus(ctx) - ch, _, idx, err := tc.repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + ch, _, idx, err := tc.repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if err != nil { t.Fatal(err) } @@ -8082,8 +8089,8 @@ func TestReplicaRefreshPendingCommandsTicks(t *testing.T) { var ba roachpb.BatchRequest ba.Timestamp = tc.Clock().Now() ba.Add(&roachpb.PutRequest{RequestHeader: roachpb.RequestHeader{Key: roachpb.Key(id)}}) - lease, _ := r.GetLease() - cmd, pErr := r.requestToProposal(ctx, kvserverbase.CmdIDKey(id), &ba, hlc.Timestamp{}, &allSpans) + st := r.CurrentLeaseStatus(ctx) + cmd, pErr := r.requestToProposal(ctx, kvserverbase.CmdIDKey(id), &ba, st, hlc.Timestamp{}, &allSpans) if pErr != nil { t.Fatal(pErr) } @@ -8092,8 +8099,9 @@ func TestReplicaRefreshPendingCommandsTicks(t *testing.T) { dropProposals.m[cmd] = struct{}{} // silently drop proposals dropProposals.Unlock() - cmd.command.ProposerLeaseSequence = lease.Sequence - if _, pErr := r.propose(ctx, cmd); pErr != nil { + cmd.command.ProposerLeaseSequence = st.Lease.Sequence + _, tok := r.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + if _, pErr := r.propose(ctx, cmd, tok); pErr != nil { t.Error(pErr) } r.mu.Lock() @@ -8204,7 +8212,7 @@ func TestReplicaRefreshMultiple(t *testing.T) { incCmdID = makeIDKey() atomic.StoreInt32(&filterActive, 1) - proposal, pErr := repl.requestToProposal(ctx, incCmdID, &ba, hlc.Timestamp{}, &allSpans) + proposal, pErr := repl.requestToProposal(ctx, incCmdID, &ba, repl.CurrentLeaseStatus(ctx), hlc.Timestamp{}, &allSpans) if pErr != nil { t.Fatal(pErr) } @@ -8234,7 +8242,8 @@ func TestReplicaRefreshMultiple(t *testing.T) { // that it will generate a retry when it fails. Then call refreshProposals // twice to repropose it and put it in the logs twice more. proposal.command.ProposerLeaseSequence = repl.mu.state.Lease.Sequence - if _, pErr := repl.propose(ctx, proposal); pErr != nil { + _, tok := repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + if _, pErr := repl.propose(ctx, proposal, tok); pErr != nil { t.Fatal(pErr) } repl.mu.Lock() @@ -8244,6 +8253,7 @@ func TestReplicaRefreshMultiple(t *testing.T) { repl.refreshProposalsLocked(ctx, 0 /* refreshAtDelta */, reasonNewLeader) repl.refreshProposalsLocked(ctx, 0 /* refreshAtDelta */, reasonNewLeader) repl.mu.Unlock() + require.Zero(t, tc.repl.mu.proposalBuf.EvaluatingRequestsCount()) // Wait for our proposal to apply. The two refreshed proposals above // will fail due to their illegal lease index. Then they'll generate @@ -9238,6 +9248,7 @@ func TestCommandTooLarge(t *testing.T) { func TestErrorInRaftApplicationClearsIntents(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) + ctx := context.Background() var storeKnobs StoreTestingKnobs var filterActive int32 @@ -9257,7 +9268,7 @@ func TestErrorInRaftApplicationClearsIntents(t *testing.T) { defer s.Stopper().Stop(context.Background()) splitKey := roachpb.Key("b") - if err := kvDB.AdminSplit(context.Background(), splitKey, hlc.MaxTimestamp /* expirationTime */); err != nil { + if err := kvDB.AdminSplit(ctx, splitKey, hlc.MaxTimestamp /* expirationTime */); err != nil { t.Fatal(err) } @@ -9296,7 +9307,8 @@ func TestErrorInRaftApplicationClearsIntents(t *testing.T) { exLease, _ := repl.GetLease() st := kvserverpb.LeaseStatus{Lease: exLease, State: kvserverpb.LeaseState_VALID} - ch, _, _, pErr := repl.evalAndPropose(context.Background(), &ba, allSpansGuard(), st, hlc.Timestamp{}) + _, tok := repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + ch, _, _, pErr := repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if pErr != nil { t.Fatal(pErr) } @@ -9343,7 +9355,8 @@ func TestProposeWithAsyncConsensus(t *testing.T) { atomic.StoreInt32(&filterActive, 1) st := tc.repl.CurrentLeaseStatus(ctx) - ch, _, _, pErr := repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + _, tok := repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + ch, _, _, pErr := repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if pErr != nil { t.Fatal(pErr) } @@ -9407,7 +9420,8 @@ func TestApplyPaginatedCommittedEntries(t *testing.T) { atomic.StoreInt32(&filterActive, 1) st := repl.CurrentLeaseStatus(ctx) - _, _, _, pErr := repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + _, tok := repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + _, _, _, pErr := repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if pErr != nil { t.Fatal(pErr) } @@ -9425,7 +9439,8 @@ func TestApplyPaginatedCommittedEntries(t *testing.T) { ba2.Timestamp = tc.Clock().Now() var pErr *roachpb.Error - ch, _, _, pErr = repl.evalAndPropose(ctx, &ba2, allSpansGuard(), st, hlc.Timestamp{}) + _, tok := repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + ch, _, _, pErr = repl.evalAndPropose(ctx, &ba2, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if pErr != nil { t.Fatal(pErr) } @@ -12537,9 +12552,10 @@ func TestProposalNotAcknowledgedOrReproposedAfterApplication(t *testing.T) { // Hold the RaftLock to ensure that after evalAndPropose our proposal is in // the proposal map. Entries are only removed from that map underneath raft. tc.repl.RaftLock() + _, tok := tc.repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) sp := cfg.AmbientCtx.Tracer.StartSpan("replica send", tracing.WithForceRealSpan()) tracedCtx := tracing.ContextWithSpan(ctx, sp) - ch, _, _, pErr := tc.repl.evalAndPropose(tracedCtx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + ch, _, _, pErr := tc.repl.evalAndPropose(tracedCtx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok) if pErr != nil { t.Fatal(pErr) } @@ -12625,13 +12641,14 @@ func TestLaterReproposalsDoNotReuseContext(t *testing.T) { Value: roachpb.MakeValueFromBytes([]byte("val")), }) + _, tok := tc.repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) // Hold the RaftLock to encourage the reproposals to occur in the same batch. tc.repl.RaftLock() tracedCtx, sp := tracer.StartSpanCtx(ctx, "replica send", tracing.WithForceRealSpan()) // Go out of our way to enable recording so that expensive logging is enabled // for this context. sp.SetVerbose(true) - ch, _, _, pErr := tc.repl.evalAndPropose(tracedCtx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + ch, _, _, pErr := tc.repl.evalAndPropose(tracedCtx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if pErr != nil { t.Fatal(pErr) } @@ -12800,7 +12817,7 @@ func TestContainsEstimatesClampProposal(t *testing.T) { ba.Timestamp = tc.Clock().Now() req := putArgs(roachpb.Key("some-key"), []byte("some-value")) ba.Add(&req) - proposal, err := tc.repl.requestToProposal(ctx, cmdIDKey, &ba, hlc.Timestamp{}, &allSpans) + proposal, err := tc.repl.requestToProposal(ctx, cmdIDKey, &ba, tc.repl.CurrentLeaseStatus(ctx), hlc.Timestamp{}, &allSpans) if err != nil { t.Error(err) } diff --git a/pkg/kv/kvserver/replica_write.go b/pkg/kv/kvserver/replica_write.go index b02db0ae3af0..fab0d7372cde 100644 --- a/pkg/kv/kvserver/replica_write.go +++ b/pkg/kv/kvserver/replica_write.go @@ -88,6 +88,25 @@ func (r *Replica) executeWriteBatch( minTS, untrack := r.store.cfg.ClosedTimestamp.Tracker.Track(ctx) defer untrack(ctx, 0, 0, 0) // covers all error returns below + // Start tracking this request. The act of tracking also gives us a closed + // timestamp, which we must ensure to evaluate above of. We're going to pass + // in minTS to applyTimestampCache(), which bumps us accordingly if necessary. + // We need to start tracking this request before we know the final write + // timestamp at which this request will evaluate because we need to atomically + // read the closed timestamp and start to be tracked. + // TODO(andrei): The timestamp cache might bump us above the timestamp at + // which we're registering with the proposalBuf. In that case, this request + // will be tracked at an unnecessarily low timestamp. We could invent an + // interface through which to communicate the updated timestamp to the + // proposalBuf. + minTS2, tok := r.mu.proposalBuf.TrackEvaluatingRequest(ctx, ba.WriteTimestamp()) + defer tok.DoneIfNotMoved(ctx) + minTS.Forward(minTS2) + + if !ba.IsSingleSkipLeaseCheckRequest() && st.Expiration().Less(minTS) { + log.Fatalf(ctx, "closed timestamp above lease expiration (%s vs %s): %s", minTS, st.Expiration(), ba) + } + // Examine the timestamp cache for preceding commands which require this // command to move its timestamp forward. Or, in the case of a transactional // write, the txn timestamp and possible write-too-old bool. @@ -120,7 +139,7 @@ func (r *Replica) executeWriteBatch( // If the command is proposed to Raft, ownership of and responsibility for // the concurrency guard will be assumed by Raft, so provide the guard to // evalAndPropose. - ch, abandon, maxLeaseIndex, pErr := r.evalAndPropose(ctx, ba, g, st, localUncertaintyLimit) + ch, abandon, maxLeaseIndex, pErr := r.evalAndPropose(ctx, ba, g, st, localUncertaintyLimit, tok.Move(ctx)) if pErr != nil { if maxLeaseIndex != 0 { log.Fatalf( diff --git a/pkg/kv/kvserver/stateloader/stateloader.go b/pkg/kv/kvserver/stateloader/stateloader.go index 9a1a4d5be558..a6610a807d39 100644 --- a/pkg/kv/kvserver/stateloader/stateloader.go +++ b/pkg/kv/kvserver/stateloader/stateloader.go @@ -83,6 +83,9 @@ func (rsl StateLoader) Load( ms := as.RangeStats.ToStats() s.Stats = &ms + if as.ClosedTimestamp != nil { + s.ClosedTimestamp = *as.ClosedTimestamp + } } else { if s.RaftAppliedIndex, s.LeaseAppliedIndex, err = rsl.LoadAppliedIndex(ctx, reader); err != nil { return kvserverpb.ReplicaState{}, err @@ -167,8 +170,8 @@ func (rsl StateLoader) Save( } } if state.UsingAppliedStateKey { - rai, lai := state.RaftAppliedIndex, state.LeaseAppliedIndex - if err := rsl.SetRangeAppliedState(ctx, readWriter, rai, lai, ms); err != nil { + rai, lai, ct := state.RaftAppliedIndex, state.LeaseAppliedIndex, &state.ClosedTimestamp + if err := rsl.SetRangeAppliedState(ctx, readWriter, rai, lai, ms, ct); err != nil { return enginepb.MVCCStats{}, err } } else { @@ -294,17 +297,26 @@ func (rsl StateLoader) LoadMVCCStats( // The applied indices and the stats used to be stored separately in different // keys. We now deem those keys to be "legacy" because they have been replaced // by the range applied state key. +// +// TODO(andrei): closedTimestamp is a pointer to avoid an allocation when +// putting it in RangeAppliedState. RangeAppliedState.ClosedTimestamp is made +// non-nullable (see comments on the field), this argument should be taken by +// value. func (rsl StateLoader) SetRangeAppliedState( ctx context.Context, readWriter storage.ReadWriter, appliedIndex, leaseAppliedIndex uint64, newMS *enginepb.MVCCStats, + closedTimestamp *hlc.Timestamp, ) error { as := enginepb.RangeAppliedState{ RaftAppliedIndex: appliedIndex, LeaseAppliedIndex: leaseAppliedIndex, RangeStats: newMS.ToPersistentStats(), } + if closedTimestamp != nil && !closedTimestamp.IsEmpty() { + as.ClosedTimestamp = closedTimestamp + } // The RangeAppliedStateKey is not included in stats. This is also reflected // in C.MVCCComputeStats and ComputeStatsForRange. ms := (*enginepb.MVCCStats)(nil) @@ -477,12 +489,26 @@ func (rsl StateLoader) SetMVCCStats( if as, err := rsl.LoadRangeAppliedState(ctx, readWriter); err != nil { return err } else if as != nil { - return rsl.SetRangeAppliedState(ctx, readWriter, as.RaftAppliedIndex, as.LeaseAppliedIndex, newMS) + return rsl.SetRangeAppliedState( + ctx, readWriter, as.RaftAppliedIndex, as.LeaseAppliedIndex, newMS, as.ClosedTimestamp) } return rsl.writeLegacyMVCCStatsInternal(ctx, readWriter, newMS) } +// SetClosedTimestamp overwrites the closed timestamp. +func (rsl StateLoader) SetClosedTimestamp( + ctx context.Context, readWriter storage.ReadWriter, closedTS hlc.Timestamp, +) error { + as, err := rsl.LoadRangeAppliedState(ctx, readWriter) + if err != nil { + return err + } + return rsl.SetRangeAppliedState( + ctx, readWriter, as.RaftAppliedIndex, as.LeaseAppliedIndex, + as.RangeStats.ToStatsPtr(), &closedTS) +} + // SetLegacyRaftTruncatedState overwrites the truncated state. func (rsl StateLoader) SetLegacyRaftTruncatedState( ctx context.Context, diff --git a/pkg/kv/kvserver/store_split.go b/pkg/kv/kvserver/store_split.go index 83692feba9f1..7f8dccbfa3c4 100644 --- a/pkg/kv/kvserver/store_split.go +++ b/pkg/kv/kvserver/store_split.go @@ -18,6 +18,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/storage/enginepb" + "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" "go.etcd.io/etcd/raft/v3" @@ -28,7 +29,12 @@ import ( // changes to the given ReadWriter will be written atomically with the // split commit. func splitPreApply( - ctx context.Context, readWriter storage.ReadWriter, split roachpb.SplitTrigger, r *Replica, + ctx context.Context, + readWriter storage.ReadWriter, + split roachpb.SplitTrigger, + r *Replica, + // The closed timestamp used to initialize the RHS. + closedTS hlc.Timestamp, ) { // Sanity check that the store is in the split. // @@ -116,6 +122,11 @@ func splitPreApply( log.Fatalf(ctx, "%v", err) } + // Persist the closed timestamp. + if err := rsl.SetClosedTimestamp(ctx, readWriter, closedTS); err != nil { + log.Fatalf(ctx, "%s", err) + } + // The initialMaxClosed is assigned to the RHS replica to ensure that // follower reads do not regress following the split. After the split occurs // there will be no information in the closedts subsystem about the newly diff --git a/pkg/kv/kvserver/testing_knobs.go b/pkg/kv/kvserver/testing_knobs.go index 96ad6bff8468..aaba2ea7c185 100644 --- a/pkg/kv/kvserver/testing_knobs.go +++ b/pkg/kv/kvserver/testing_knobs.go @@ -239,6 +239,9 @@ type StoreTestingKnobs struct { // heartbeats and then expect other replicas to take the lease without // worrying about Raft). AllowLeaseRequestProposalsWhenNotLeader bool + // DontCloseTimestamps inhibits the propBuf's closing of timestamps. All Raft + // commands will carry an empty closed timestamp. + DontCloseTimestamps bool // AllowDangerousReplicationChanges disables safeguards // in execChangeReplicasTxn that prevent moving // to a configuration that cannot make progress. diff --git a/pkg/roachpb/batch.go b/pkg/roachpb/batch.go index ed1bf4d6a188..37ce483aa908 100644 --- a/pkg/roachpb/batch.go +++ b/pkg/roachpb/batch.go @@ -56,7 +56,7 @@ func (ba *BatchRequest) SetActiveTimestamp(nowFn func() hlc.Timestamp) error { // provisional commit timestamp evolves. // // Note that writes will be performed at the provisional commit timestamp, - // txn.Timestamp, regardless of the batch timestamp. + // txn.WriteTimestamp, regardless of the batch timestamp. ba.Timestamp = txn.ReadTimestamp } else { // When not transactional, allow empty timestamp and use nowFn instead diff --git a/pkg/storage/enginepb/mvcc3.go b/pkg/storage/enginepb/mvcc3.go index b03c00ca9bce..1cb6924095d5 100644 --- a/pkg/storage/enginepb/mvcc3.go +++ b/pkg/storage/enginepb/mvcc3.go @@ -30,6 +30,11 @@ func (ms *MVCCPersistentStats) ToStats() MVCCStats { return MVCCStats(*ms) } +// ToStatsPtr converts the receiver to a *MVCCStats. +func (ms *MVCCPersistentStats) ToStatsPtr() *MVCCStats { + return (*MVCCStats)(ms) +} + // SafeValue implements the redact.SafeValue interface. func (ms *MVCCStats) SafeValue() {} diff --git a/pkg/storage/enginepb/mvcc3.pb.go b/pkg/storage/enginepb/mvcc3.pb.go index 8d61e5cafc58..def873b63b77 100644 --- a/pkg/storage/enginepb/mvcc3.pb.go +++ b/pkg/storage/enginepb/mvcc3.pb.go @@ -129,7 +129,7 @@ type TxnMeta struct { func (m *TxnMeta) Reset() { *m = TxnMeta{} } func (*TxnMeta) ProtoMessage() {} func (*TxnMeta) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{0} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{0} } func (m *TxnMeta) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -165,7 +165,7 @@ func (m *IgnoredSeqNumRange) Reset() { *m = IgnoredSeqNumRange{} } func (m *IgnoredSeqNumRange) String() string { return proto.CompactTextString(m) } func (*IgnoredSeqNumRange) ProtoMessage() {} func (*IgnoredSeqNumRange) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{1} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{1} } func (m *IgnoredSeqNumRange) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -216,7 +216,7 @@ func (m *MVCCStatsDelta) Reset() { *m = MVCCStatsDelta{} } func (m *MVCCStatsDelta) String() string { return proto.CompactTextString(m) } func (*MVCCStatsDelta) ProtoMessage() {} func (*MVCCStatsDelta) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{2} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{2} } func (m *MVCCStatsDelta) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -268,7 +268,7 @@ func (m *MVCCPersistentStats) Reset() { *m = MVCCPersistentStats{} } func (m *MVCCPersistentStats) String() string { return proto.CompactTextString(m) } func (*MVCCPersistentStats) ProtoMessage() {} func (*MVCCPersistentStats) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{3} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{3} } func (m *MVCCPersistentStats) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -307,13 +307,25 @@ type RangeAppliedState struct { // range_stats is the set of mvcc stats that accounts for the current value // of the Raft state machine. RangeStats MVCCPersistentStats `protobuf:"bytes,3,opt,name=range_stats,json=rangeStats,proto3" json:"range_stats"` + // closed_timestamp is the largest timestamp that is known to have been closed + // as of this lease applied index. This means that the current leaseholder (if + // any) and any future leaseholder will not evaluate writes at or below this + // timestamp, and also that any in-flight commands that can still apply are + // writing at higher timestamps. Non-leaseholder replicas are free to serve + // "follower reads" at or below this timestamp. + // + // TODO(andrei): Make this field not-nullable in 21.2, once all the ranges + // have a closed timestamp applied to their state (this might need a + // migration). In 21.1 we cannot write empty timestamp to disk because that + // looks like an inconsistency to the consistency-checker. + ClosedTimestamp *hlc.Timestamp `protobuf:"bytes,4,opt,name=closed_timestamp,json=closedTimestamp,proto3" json:"closed_timestamp,omitempty"` } func (m *RangeAppliedState) Reset() { *m = RangeAppliedState{} } func (m *RangeAppliedState) String() string { return proto.CompactTextString(m) } func (*RangeAppliedState) ProtoMessage() {} func (*RangeAppliedState) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{4} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{4} } func (m *RangeAppliedState) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -351,7 +363,7 @@ func (m *MVCCWriteValueOp) Reset() { *m = MVCCWriteValueOp{} } func (m *MVCCWriteValueOp) String() string { return proto.CompactTextString(m) } func (*MVCCWriteValueOp) ProtoMessage() {} func (*MVCCWriteValueOp) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{5} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{5} } func (m *MVCCWriteValueOp) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -389,7 +401,7 @@ func (m *MVCCWriteIntentOp) Reset() { *m = MVCCWriteIntentOp{} } func (m *MVCCWriteIntentOp) String() string { return proto.CompactTextString(m) } func (*MVCCWriteIntentOp) ProtoMessage() {} func (*MVCCWriteIntentOp) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{6} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{6} } func (m *MVCCWriteIntentOp) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -425,7 +437,7 @@ func (m *MVCCUpdateIntentOp) Reset() { *m = MVCCUpdateIntentOp{} } func (m *MVCCUpdateIntentOp) String() string { return proto.CompactTextString(m) } func (*MVCCUpdateIntentOp) ProtoMessage() {} func (*MVCCUpdateIntentOp) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{7} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{7} } func (m *MVCCUpdateIntentOp) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -464,7 +476,7 @@ func (m *MVCCCommitIntentOp) Reset() { *m = MVCCCommitIntentOp{} } func (m *MVCCCommitIntentOp) String() string { return proto.CompactTextString(m) } func (*MVCCCommitIntentOp) ProtoMessage() {} func (*MVCCCommitIntentOp) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{8} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{8} } func (m *MVCCCommitIntentOp) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -504,7 +516,7 @@ func (m *MVCCAbortIntentOp) Reset() { *m = MVCCAbortIntentOp{} } func (m *MVCCAbortIntentOp) String() string { return proto.CompactTextString(m) } func (*MVCCAbortIntentOp) ProtoMessage() {} func (*MVCCAbortIntentOp) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{9} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{9} } func (m *MVCCAbortIntentOp) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -540,7 +552,7 @@ func (m *MVCCAbortTxnOp) Reset() { *m = MVCCAbortTxnOp{} } func (m *MVCCAbortTxnOp) String() string { return proto.CompactTextString(m) } func (*MVCCAbortTxnOp) ProtoMessage() {} func (*MVCCAbortTxnOp) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{10} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{10} } func (m *MVCCAbortTxnOp) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -579,7 +591,7 @@ func (m *MVCCLogicalOp) Reset() { *m = MVCCLogicalOp{} } func (m *MVCCLogicalOp) String() string { return proto.CompactTextString(m) } func (*MVCCLogicalOp) ProtoMessage() {} func (*MVCCLogicalOp) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{11} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{11} } func (m *MVCCLogicalOp) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -811,6 +823,9 @@ func (this *RangeAppliedState) Equal(that interface{}) bool { if !this.RangeStats.Equal(&that1.RangeStats) { return false } + if !this.ClosedTimestamp.Equal(that1.ClosedTimestamp) { + return false + } return true } func (m *TxnMeta) Marshal() (dAtA []byte, err error) { @@ -1143,6 +1158,16 @@ func (m *RangeAppliedState) MarshalTo(dAtA []byte) (int, error) { return 0, err } i += n4 + if m.ClosedTimestamp != nil { + dAtA[i] = 0x22 + i++ + i = encodeVarintMvcc3(dAtA, i, uint64(m.ClosedTimestamp.Size())) + n5, err := m.ClosedTimestamp.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n5 + } return i, nil } @@ -1170,11 +1195,11 @@ func (m *MVCCWriteValueOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0x12 i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.Timestamp.Size())) - n5, err := m.Timestamp.MarshalTo(dAtA[i:]) + n6, err := m.Timestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n5 + i += n6 if len(m.Value) > 0 { dAtA[i] = 0x1a i++ @@ -1208,11 +1233,11 @@ func (m *MVCCWriteIntentOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0xa i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.TxnID.Size())) - n6, err := m.TxnID.MarshalTo(dAtA[i:]) + n7, err := m.TxnID.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n6 + i += n7 if len(m.TxnKey) > 0 { dAtA[i] = 0x12 i++ @@ -1222,19 +1247,19 @@ func (m *MVCCWriteIntentOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0x1a i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.Timestamp.Size())) - n7, err := m.Timestamp.MarshalTo(dAtA[i:]) + n8, err := m.Timestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n7 + i += n8 dAtA[i] = 0x22 i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.TxnMinTimestamp.Size())) - n8, err := m.TxnMinTimestamp.MarshalTo(dAtA[i:]) + n9, err := m.TxnMinTimestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n8 + i += n9 return i, nil } @@ -1256,19 +1281,19 @@ func (m *MVCCUpdateIntentOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0xa i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.TxnID.Size())) - n9, err := m.TxnID.MarshalTo(dAtA[i:]) + n10, err := m.TxnID.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n9 + i += n10 dAtA[i] = 0x12 i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.Timestamp.Size())) - n10, err := m.Timestamp.MarshalTo(dAtA[i:]) + n11, err := m.Timestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n10 + i += n11 return i, nil } @@ -1290,11 +1315,11 @@ func (m *MVCCCommitIntentOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0xa i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.TxnID.Size())) - n11, err := m.TxnID.MarshalTo(dAtA[i:]) + n12, err := m.TxnID.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n11 + i += n12 if len(m.Key) > 0 { dAtA[i] = 0x12 i++ @@ -1304,11 +1329,11 @@ func (m *MVCCCommitIntentOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0x1a i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.Timestamp.Size())) - n12, err := m.Timestamp.MarshalTo(dAtA[i:]) + n13, err := m.Timestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n12 + i += n13 if len(m.Value) > 0 { dAtA[i] = 0x22 i++ @@ -1342,11 +1367,11 @@ func (m *MVCCAbortIntentOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0xa i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.TxnID.Size())) - n13, err := m.TxnID.MarshalTo(dAtA[i:]) + n14, err := m.TxnID.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n13 + i += n14 return i, nil } @@ -1368,11 +1393,11 @@ func (m *MVCCAbortTxnOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0xa i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.TxnID.Size())) - n14, err := m.TxnID.MarshalTo(dAtA[i:]) + n15, err := m.TxnID.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n14 + i += n15 return i, nil } @@ -1395,61 +1420,61 @@ func (m *MVCCLogicalOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0xa i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.WriteValue.Size())) - n15, err := m.WriteValue.MarshalTo(dAtA[i:]) + n16, err := m.WriteValue.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n15 + i += n16 } if m.WriteIntent != nil { dAtA[i] = 0x12 i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.WriteIntent.Size())) - n16, err := m.WriteIntent.MarshalTo(dAtA[i:]) + n17, err := m.WriteIntent.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n16 + i += n17 } if m.UpdateIntent != nil { dAtA[i] = 0x1a i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.UpdateIntent.Size())) - n17, err := m.UpdateIntent.MarshalTo(dAtA[i:]) + n18, err := m.UpdateIntent.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n17 + i += n18 } if m.CommitIntent != nil { dAtA[i] = 0x22 i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.CommitIntent.Size())) - n18, err := m.CommitIntent.MarshalTo(dAtA[i:]) + n19, err := m.CommitIntent.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n18 + i += n19 } if m.AbortIntent != nil { dAtA[i] = 0x2a i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.AbortIntent.Size())) - n19, err := m.AbortIntent.MarshalTo(dAtA[i:]) + n20, err := m.AbortIntent.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n19 + i += n20 } if m.AbortTxn != nil { dAtA[i] = 0x32 i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.AbortTxn.Size())) - n20, err := m.AbortTxn.MarshalTo(dAtA[i:]) + n21, err := m.AbortTxn.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n20 + i += n21 } return i, nil } @@ -1585,6 +1610,9 @@ func NewPopulatedRangeAppliedState(r randyMvcc3, easy bool) *RangeAppliedState { this.LeaseAppliedIndex = uint64(uint64(r.Uint32())) v5 := NewPopulatedMVCCPersistentStats(r, easy) this.RangeStats = *v5 + if r.Intn(10) != 0 { + this.ClosedTimestamp = hlc.NewPopulatedTimestamp(r, easy) + } if !easy && r.Intn(10) != 0 { } return this @@ -1833,6 +1861,10 @@ func (m *RangeAppliedState) Size() (n int) { } l = m.RangeStats.Size() n += 1 + l + sovMvcc3(uint64(l)) + if m.ClosedTimestamp != nil { + l = m.ClosedTimestamp.Size() + n += 1 + l + sovMvcc3(uint64(l)) + } return n } @@ -3116,6 +3148,39 @@ func (m *RangeAppliedState) Unmarshal(dAtA []byte) error { return err } iNdEx = postIndex + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ClosedTimestamp", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMvcc3 + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMvcc3 + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.ClosedTimestamp == nil { + m.ClosedTimestamp = &hlc.Timestamp{} + } + if err := m.ClosedTimestamp.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipMvcc3(dAtA[iNdEx:]) @@ -4308,86 +4373,87 @@ var ( ) func init() { - proto.RegisterFile("storage/enginepb/mvcc3.proto", fileDescriptor_mvcc3_91ae80d2fc45e9ad) + proto.RegisterFile("storage/enginepb/mvcc3.proto", fileDescriptor_mvcc3_ed774bd3b5c68109) } -var fileDescriptor_mvcc3_91ae80d2fc45e9ad = []byte{ - // 1219 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xcc, 0x57, 0x41, 0x6f, 0x1b, 0x45, - 0x14, 0xf6, 0x7a, 0xd7, 0xc9, 0xfa, 0xd9, 0x49, 0xec, 0x69, 0x05, 0x56, 0x69, 0xed, 0xe0, 0x03, - 0x8a, 0x4a, 0xbb, 0x46, 0x2d, 0xa7, 0xdc, 0xec, 0xa4, 0x2a, 0x2e, 0x4d, 0x53, 0x36, 0x6e, 0x91, - 0x40, 0x62, 0x35, 0x5e, 0x0f, 0x9b, 0x55, 0xd6, 0xb3, 0xdb, 0xdd, 0xb1, 0x6b, 0xff, 0x0b, 0x2e, - 0x48, 0x1c, 0x40, 0xca, 0x8f, 0xe0, 0xc0, 0x4f, 0xc8, 0xb1, 0x12, 0x1c, 0x2a, 0x24, 0x2c, 0x70, - 0x2f, 0xfc, 0x86, 0xf6, 0x82, 0x66, 0x66, 0xbd, 0xb6, 0x03, 0x71, 0x0c, 0x81, 0x88, 0xdb, 0xcc, - 0xfb, 0xde, 0xfb, 0xde, 0x9b, 0xe7, 0x6f, 0xe7, 0x8d, 0xe1, 0x7a, 0xc4, 0xfc, 0x10, 0x3b, 0xa4, - 0x46, 0xa8, 0xe3, 0x52, 0x12, 0xb4, 0x6b, 0xdd, 0xbe, 0x6d, 0xdf, 0x35, 0x82, 0xd0, 0x67, 0x3e, - 0xba, 0x66, 0xfb, 0xf6, 0x51, 0xe8, 0x63, 0xfb, 0xd0, 0x88, 0xfd, 0x8c, 0x89, 0xdf, 0xb5, 0x52, - 0x8f, 0xb9, 0x5e, 0xed, 0xd0, 0xb3, 0x6b, 0xcc, 0xed, 0x92, 0x88, 0xe1, 0x6e, 0x20, 0xa3, 0xae, - 0x5d, 0x75, 0x7c, 0xc7, 0x17, 0xcb, 0x1a, 0x5f, 0x49, 0x6b, 0xf5, 0x6b, 0x15, 0x56, 0x5b, 0x03, - 0xba, 0x47, 0x18, 0x46, 0x9f, 0x40, 0xda, 0xed, 0x94, 0x94, 0x4d, 0x65, 0x2b, 0xdf, 0xa8, 0x9f, - 0x8c, 0x2a, 0xa9, 0x9f, 0x47, 0x95, 0xbb, 0x8e, 0xcb, 0x0e, 0x7b, 0x6d, 0xc3, 0xf6, 0xbb, 0xb5, - 0x24, 0x6d, 0xa7, 0x3d, 0x5d, 0xd7, 0x82, 0x23, 0xa7, 0x26, 0x92, 0xf6, 0x7a, 0x6e, 0xc7, 0x78, - 0xf2, 0xa4, 0xb9, 0x3b, 0x1e, 0x55, 0xd2, 0xcd, 0x5d, 0x33, 0xed, 0x76, 0x50, 0x01, 0xd4, 0x23, - 0x32, 0x2c, 0xa9, 0x9c, 0xd3, 0xe4, 0x4b, 0x54, 0x85, 0x0c, 0x09, 0x7c, 0xfb, 0xb0, 0xa4, 0x6d, - 0x2a, 0x5b, 0x99, 0x46, 0xfe, 0xf5, 0xa8, 0xa2, 0xb7, 0x06, 0xf4, 0x1e, 0xb7, 0x99, 0x12, 0x42, - 0x0f, 0x61, 0xe3, 0x79, 0xe8, 0x32, 0x62, 0x25, 0x67, 0x28, 0x65, 0x36, 0x95, 0xad, 0xdc, 0x9d, - 0x1b, 0xc6, 0xf4, 0xe8, 0x3c, 0xa7, 0x71, 0xe8, 0xd9, 0x46, 0x6b, 0xe2, 0xd4, 0xd0, 0x78, 0xd1, - 0xe6, 0xba, 0x88, 0x4d, 0xac, 0xe8, 0x7d, 0xd0, 0x83, 0xd0, 0xf5, 0x43, 0x97, 0x0d, 0x4b, 0x2b, - 0x22, 0xe9, 0xc6, 0xeb, 0x51, 0x25, 0xd7, 0x1a, 0xd0, 0xc7, 0xb1, 0xd9, 0x4c, 0x1c, 0xd0, 0x7b, - 0xa0, 0x47, 0xe4, 0x59, 0x8f, 0x50, 0x9b, 0x94, 0x56, 0x85, 0x33, 0xbc, 0x1e, 0x55, 0x56, 0x5a, - 0x03, 0x7a, 0x40, 0x9e, 0x99, 0x09, 0x86, 0x3e, 0x82, 0xb5, 0xae, 0x4b, 0x67, 0x0a, 0xcc, 0x2e, - 0x5f, 0x60, 0xbe, 0xeb, 0xd2, 0xc4, 0xb6, 0xad, 0x7f, 0x73, 0x5c, 0x49, 0xfd, 0x70, 0x5c, 0x51, - 0x1e, 0x68, 0x7a, 0xba, 0xa0, 0x3e, 0xd0, 0x74, 0xbd, 0x90, 0xad, 0x7e, 0x01, 0xa8, 0xe9, 0x50, - 0x3f, 0x24, 0x9d, 0x03, 0xf2, 0xec, 0x51, 0xaf, 0x6b, 0x62, 0xea, 0x10, 0xb4, 0x09, 0x99, 0x88, - 0xe1, 0x90, 0x89, 0x1f, 0x69, 0xbe, 0x34, 0x09, 0xa0, 0xeb, 0xa0, 0x12, 0xda, 0x29, 0xa5, 0xff, - 0x84, 0x73, 0xf3, 0xb6, 0xce, 0xf3, 0xfc, 0x7e, 0x5c, 0x51, 0xaa, 0x3f, 0x69, 0xb0, 0xbe, 0xf7, - 0x74, 0x67, 0xe7, 0x80, 0x61, 0x16, 0xed, 0x12, 0x8f, 0x61, 0x74, 0x13, 0x8a, 0x1e, 0x8e, 0x98, - 0xd5, 0x0b, 0x3a, 0x98, 0x11, 0x8b, 0x62, 0xea, 0x47, 0x22, 0x51, 0xc1, 0xdc, 0xe0, 0xc0, 0x13, - 0x61, 0x7f, 0xc4, 0xcd, 0xe8, 0x06, 0x80, 0x4b, 0x19, 0xa1, 0xcc, 0xc2, 0x0e, 0x11, 0xd9, 0x0a, - 0x66, 0x56, 0x5a, 0xea, 0x0e, 0x41, 0x1f, 0x40, 0xde, 0xb1, 0xad, 0xf6, 0x90, 0x91, 0x48, 0x38, - 0xf0, 0xdf, 0xbf, 0xd0, 0x58, 0x1f, 0x8f, 0x2a, 0x70, 0x7f, 0xa7, 0xc1, 0xcd, 0x75, 0x87, 0x98, - 0xe0, 0xd8, 0x93, 0x35, 0x27, 0xf4, 0xdc, 0x3e, 0x91, 0x31, 0x42, 0x1b, 0xc8, 0xcc, 0x72, 0x8b, - 0xf0, 0x48, 0x60, 0xdb, 0xef, 0x51, 0x26, 0xc4, 0x10, 0xc3, 0x3b, 0xdc, 0x80, 0xde, 0x81, 0xec, - 0x11, 0x19, 0xc6, 0xc1, 0x2b, 0x02, 0xd5, 0x8f, 0xc8, 0x50, 0xc6, 0xc6, 0xa0, 0x0c, 0x5d, 0x4d, - 0xc0, 0x24, 0xb2, 0x8f, 0xbd, 0x38, 0x52, 0x97, 0x60, 0x1f, 0x7b, 0x49, 0x24, 0x07, 0x65, 0x64, - 0x36, 0x01, 0x65, 0xe4, 0xbb, 0x90, 0x8f, 0x5b, 0x20, 0x83, 0x41, 0xe0, 0x39, 0x69, 0x93, 0xf1, - 0x53, 0x17, 0x49, 0x91, 0x9b, 0x75, 0x49, 0xf2, 0x47, 0xc3, 0x28, 0xa6, 0xc8, 0xcb, 0x14, 0xd1, - 0x30, 0x4a, 0xf2, 0x73, 0x50, 0x06, 0xaf, 0x25, 0xa0, 0x8c, 0xbc, 0x0d, 0xc8, 0xf6, 0x29, 0xc3, - 0x2e, 0x8d, 0x2c, 0x12, 0x31, 0xb7, 0x8b, 0x39, 0xc5, 0xfa, 0xa6, 0xb2, 0xa5, 0x9a, 0xc5, 0x09, - 0x72, 0x6f, 0x02, 0xa0, 0x2d, 0x28, 0xe0, 0xb6, 0x1f, 0x32, 0x2b, 0x0a, 0x30, 0x8d, 0xf3, 0x6d, - 0x08, 0xca, 0x75, 0x61, 0x3f, 0x08, 0x30, 0x95, 0x59, 0x3f, 0x84, 0xb7, 0x22, 0x12, 0xe0, 0x10, - 0x33, 0xd2, 0xb1, 0xe6, 0xea, 0x2f, 0x08, 0xff, 0xab, 0x09, 0xda, 0x9c, 0x1e, 0x64, 0x5b, 0x13, - 0xb2, 0xfa, 0x45, 0x83, 0x2b, 0x5c, 0x56, 0x8f, 0x49, 0x18, 0xb9, 0x11, 0x47, 0x85, 0xc0, 0xfe, - 0x6f, 0xda, 0x52, 0x17, 0x6b, 0x4b, 0x5d, 0xa8, 0x2d, 0x75, 0x91, 0xb6, 0xd4, 0x45, 0xda, 0x52, - 0x17, 0x69, 0x4b, 0x3d, 0x47, 0x5b, 0xea, 0xf9, 0xda, 0x52, 0xcf, 0xd1, 0x96, 0xba, 0x48, 0x5b, - 0xea, 0xbf, 0xaf, 0x2d, 0xf5, 0x6f, 0x6a, 0x4b, 0x3d, 0x43, 0x5b, 0xd3, 0x6b, 0xeb, 0x47, 0x05, - 0x8a, 0xe2, 0x2a, 0xac, 0x07, 0x81, 0xe7, 0x92, 0x0e, 0x57, 0x17, 0x41, 0xb7, 0x00, 0x85, 0xf8, - 0x4b, 0x66, 0x61, 0x69, 0xb4, 0x5c, 0xda, 0x21, 0x03, 0x21, 0x2f, 0xcd, 0x2c, 0x70, 0x24, 0xf6, - 0x6e, 0x72, 0x3b, 0x32, 0xe0, 0x8a, 0x47, 0x70, 0x44, 0x4e, 0xb9, 0xa7, 0x85, 0x7b, 0x51, 0x40, - 0x73, 0xfe, 0x4f, 0x21, 0x17, 0xf2, 0x94, 0x56, 0xc4, 0xa5, 0x2c, 0xf4, 0x96, 0xbb, 0x53, 0x33, - 0xce, 0x1e, 0xc2, 0xc6, 0x5f, 0x7c, 0x01, 0xf1, 0xd5, 0x0f, 0x82, 0x49, 0x58, 0x66, 0x4e, 0xf5, - 0xad, 0x02, 0x05, 0x1e, 0xf3, 0x29, 0x1f, 0x5c, 0x4f, 0xb1, 0xd7, 0x23, 0xfb, 0xc1, 0x64, 0x74, - 0x2a, 0xd3, 0xd1, 0x59, 0x87, 0xec, 0x74, 0xde, 0xa4, 0x97, 0x9f, 0x37, 0xd3, 0x28, 0x74, 0x15, - 0x32, 0x7d, 0xce, 0x1f, 0x4f, 0x64, 0xb9, 0xe1, 0x5f, 0x40, 0x10, 0x92, 0xbe, 0x25, 0x21, 0x4d, - 0x40, 0x59, 0x6e, 0x11, 0xb5, 0x54, 0xbf, 0x4b, 0x43, 0x31, 0x29, 0x4f, 0xfe, 0x2e, 0xfb, 0x01, - 0xfa, 0x1c, 0x56, 0xd8, 0x80, 0x5a, 0xc9, 0x8b, 0x61, 0xf7, 0x62, 0x2f, 0x86, 0x4c, 0x6b, 0x40, - 0x9b, 0xbb, 0x66, 0x86, 0x0d, 0x68, 0xb3, 0x83, 0xde, 0x86, 0x55, 0x4e, 0xce, 0x1b, 0x90, 0x16, - 0xe5, 0xf0, 0x5c, 0x1f, 0x9f, 0xee, 0x81, 0xfa, 0x8f, 0x7a, 0xb0, 0x0f, 0x45, 0xce, 0x3d, 0x3f, - 0xbe, 0xb5, 0xe5, 0xa9, 0x36, 0xd8, 0x80, 0xee, 0xcd, 0x4c, 0xf0, 0xea, 0xf7, 0x0a, 0x20, 0xde, - 0x1f, 0x79, 0x89, 0x5d, 0x4e, 0x83, 0x2e, 0xae, 0x85, 0xea, 0x9b, 0xb8, 0xec, 0x1d, 0xbf, 0xdb, - 0x75, 0xd9, 0xe5, 0x94, 0x1d, 0x8b, 0x3a, 0x7d, 0x86, 0xa8, 0xd5, 0x8b, 0x89, 0x5a, 0x3b, 0x5b, - 0xd4, 0x99, 0xd3, 0xa2, 0x0e, 0xa4, 0xa6, 0xeb, 0xfc, 0x7e, 0xba, 0x94, 0xb3, 0x57, 0xbb, 0xf2, - 0xc5, 0x25, 0x32, 0xb6, 0x06, 0xf4, 0xbf, 0x4e, 0xf7, 0x46, 0x85, 0x35, 0x9e, 0xef, 0xa1, 0xef, - 0xb8, 0x36, 0xf6, 0xf6, 0x03, 0xb4, 0x07, 0x39, 0xf9, 0xac, 0x96, 0x2d, 0x51, 0x44, 0xb3, 0x6f, - 0x9d, 0x77, 0x91, 0xcd, 0x5e, 0x4a, 0x26, 0x3c, 0x4f, 0x76, 0xe8, 0x31, 0xe4, 0x25, 0x9d, 0xbc, - 0xc7, 0x63, 0x15, 0xde, 0x5e, 0x8a, 0x6f, 0xd2, 0x71, 0x53, 0x56, 0x24, 0xb7, 0xe8, 0x00, 0xd6, - 0xe2, 0x07, 0x42, 0x4c, 0x29, 0xf5, 0x60, 0x9c, 0x47, 0x39, 0xff, 0xe1, 0x99, 0xf9, 0xde, 0xcc, - 0x9e, 0x93, 0xda, 0x42, 0xe1, 0x13, 0x52, 0x6d, 0x39, 0xd2, 0xf9, 0xcf, 0xc2, 0xcc, 0xdb, 0x33, - 0x7b, 0x7e, 0x76, 0x39, 0xf1, 0x62, 0xce, 0xcc, 0x72, 0x67, 0x9f, 0x53, 0x9b, 0x99, 0xc3, 0xd3, - 0x2d, 0xba, 0x0f, 0x59, 0xc9, 0xc8, 0x06, 0x54, 0x3c, 0x33, 0x72, 0x77, 0x6e, 0x2e, 0x45, 0x27, - 0xa4, 0x64, 0xea, 0x38, 0x5e, 0x6f, 0x6b, 0x27, 0xc7, 0x15, 0xa5, 0x71, 0xf3, 0xe4, 0xb7, 0x72, - 0xea, 0x64, 0x5c, 0x56, 0x5e, 0x8c, 0xcb, 0xca, 0xcb, 0x71, 0x59, 0xf9, 0x75, 0x5c, 0x56, 0xbe, - 0x7a, 0x55, 0x4e, 0xbd, 0x78, 0x55, 0x4e, 0xbd, 0x7c, 0x55, 0x4e, 0x7d, 0xa6, 0x4f, 0xa8, 0xda, - 0x2b, 0xe2, 0xaf, 0xe0, 0xdd, 0x3f, 0x02, 0x00, 0x00, 0xff, 0xff, 0x83, 0xbe, 0x76, 0x2a, 0x76, - 0x0e, 0x00, 0x00, +var fileDescriptor_mvcc3_ed774bd3b5c68109 = []byte{ + // 1236 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xcc, 0x57, 0x41, 0x6f, 0xe3, 0x44, + 0x14, 0x8e, 0x63, 0xa7, 0x75, 0x26, 0x69, 0x93, 0xce, 0xae, 0x20, 0x5a, 0x76, 0x93, 0x92, 0x03, + 0xaa, 0x96, 0x5d, 0x07, 0xb5, 0x9c, 0x7a, 0x4b, 0xda, 0xd5, 0x6e, 0x96, 0xed, 0xb6, 0xb8, 0x69, + 0x91, 0x40, 0xc2, 0x9a, 0xd8, 0x83, 0x6b, 0xd5, 0x19, 0xbb, 0xf6, 0x24, 0xeb, 0xfc, 0x0b, 0x2e, + 0x48, 0x1c, 0x40, 0xea, 0x8f, 0xe0, 0xc0, 0x4f, 0xe8, 0x71, 0x0f, 0x1c, 0x56, 0x48, 0x44, 0x90, + 0x5e, 0xf8, 0x0d, 0xdd, 0x0b, 0x9a, 0x19, 0xc7, 0x49, 0x0a, 0x4d, 0x03, 0x85, 0x8a, 0xdb, 0xcc, + 0xfb, 0xde, 0xfb, 0xde, 0x9b, 0x97, 0xcf, 0xf3, 0x26, 0xe0, 0x7e, 0x48, 0xbd, 0x00, 0xd9, 0xb8, + 0x86, 0x89, 0xed, 0x10, 0xec, 0xb7, 0x6b, 0x9d, 0x9e, 0x69, 0x6e, 0x68, 0x7e, 0xe0, 0x51, 0x0f, + 0xde, 0x33, 0x3d, 0xf3, 0x38, 0xf0, 0x90, 0x79, 0xa4, 0xc5, 0x7e, 0xda, 0xc8, 0xef, 0x5e, 0xa9, + 0x4b, 0x1d, 0xb7, 0x76, 0xe4, 0x9a, 0x35, 0xea, 0x74, 0x70, 0x48, 0x51, 0xc7, 0x17, 0x51, 0xf7, + 0xee, 0xda, 0x9e, 0xed, 0xf1, 0x65, 0x8d, 0xad, 0x84, 0xb5, 0xfa, 0x8d, 0x0c, 0x16, 0x5b, 0x11, + 0xd9, 0xc1, 0x14, 0xc1, 0x4f, 0x41, 0xda, 0xb1, 0x4a, 0xd2, 0xaa, 0xb4, 0x96, 0x6f, 0xd4, 0xcf, + 0x06, 0x95, 0xd4, 0xcf, 0x83, 0xca, 0x86, 0xed, 0xd0, 0xa3, 0x6e, 0x5b, 0x33, 0xbd, 0x4e, 0x2d, + 0x49, 0x6b, 0xb5, 0xc7, 0xeb, 0x9a, 0x7f, 0x6c, 0xd7, 0x78, 0xd2, 0x6e, 0xd7, 0xb1, 0xb4, 0x83, + 0x83, 0xe6, 0xf6, 0x70, 0x50, 0x49, 0x37, 0xb7, 0xf5, 0xb4, 0x63, 0xc1, 0x22, 0x90, 0x8f, 0x71, + 0xbf, 0x24, 0x33, 0x4e, 0x9d, 0x2d, 0x61, 0x15, 0x64, 0xb0, 0xef, 0x99, 0x47, 0x25, 0x65, 0x55, + 0x5a, 0xcb, 0x34, 0xf2, 0x17, 0x83, 0x8a, 0xda, 0x8a, 0xc8, 0x13, 0x66, 0xd3, 0x05, 0x04, 0x5f, + 0x80, 0xc2, 0xab, 0xc0, 0xa1, 0xd8, 0x48, 0xce, 0x50, 0xca, 0xac, 0x4a, 0x6b, 0xb9, 0xf5, 0x07, + 0xda, 0xf8, 0xe8, 0x2c, 0xa7, 0x76, 0xe4, 0x9a, 0x5a, 0x6b, 0xe4, 0xd4, 0x50, 0x58, 0xd1, 0xfa, + 0x32, 0x8f, 0x4d, 0xac, 0xf0, 0x43, 0xa0, 0xfa, 0x81, 0xe3, 0x05, 0x0e, 0xed, 0x97, 0x16, 0x78, + 0xd2, 0xc2, 0xc5, 0xa0, 0x92, 0x6b, 0x45, 0x64, 0x2f, 0x36, 0xeb, 0x89, 0x03, 0xfc, 0x00, 0xa8, + 0x21, 0x3e, 0xe9, 0x62, 0x62, 0xe2, 0xd2, 0x22, 0x77, 0x06, 0x17, 0x83, 0xca, 0x42, 0x2b, 0x22, + 0xfb, 0xf8, 0x44, 0x4f, 0x30, 0xf8, 0x0c, 0x2c, 0x75, 0x1c, 0x32, 0x51, 0x60, 0x76, 0xfe, 0x02, + 0xf3, 0x1d, 0x87, 0x24, 0xb6, 0x4d, 0xf5, 0xdb, 0xd3, 0x4a, 0xea, 0xc7, 0xd3, 0x8a, 0xf4, 0x5c, + 0x51, 0xd3, 0x45, 0xf9, 0xb9, 0xa2, 0xaa, 0xc5, 0x6c, 0xf5, 0x4b, 0x00, 0x9b, 0x36, 0xf1, 0x02, + 0x6c, 0xed, 0xe3, 0x93, 0x97, 0xdd, 0x8e, 0x8e, 0x88, 0x8d, 0xe1, 0x2a, 0xc8, 0x84, 0x14, 0x05, + 0x94, 0xff, 0x48, 0xd3, 0xa5, 0x09, 0x00, 0xde, 0x07, 0x32, 0x26, 0x56, 0x29, 0xfd, 0x27, 0x9c, + 0x99, 0x37, 0x55, 0x96, 0xe7, 0xf7, 0xd3, 0x8a, 0x54, 0xfd, 0x49, 0x01, 0xcb, 0x3b, 0x87, 0x5b, + 0x5b, 0xfb, 0x14, 0xd1, 0x70, 0x1b, 0xbb, 0x14, 0xc1, 0x87, 0x60, 0xc5, 0x45, 0x21, 0x35, 0xba, + 0xbe, 0x85, 0x28, 0x36, 0x08, 0x22, 0x5e, 0xc8, 0x13, 0x15, 0xf5, 0x02, 0x03, 0x0e, 0xb8, 0xfd, + 0x25, 0x33, 0xc3, 0x07, 0x00, 0x38, 0x84, 0x62, 0x42, 0x0d, 0x64, 0x63, 0x9e, 0xad, 0xa8, 0x67, + 0x85, 0xa5, 0x6e, 0x63, 0xf8, 0x11, 0xc8, 0xdb, 0xa6, 0xd1, 0xee, 0x53, 0x1c, 0x72, 0x07, 0xf6, + 0xfb, 0x17, 0x1b, 0xcb, 0xc3, 0x41, 0x05, 0x3c, 0xdd, 0x6a, 0x30, 0x73, 0xdd, 0xc6, 0x3a, 0xb0, + 0xcd, 0xd1, 0x9a, 0x11, 0xba, 0x4e, 0x0f, 0x8b, 0x18, 0xae, 0x0d, 0xa8, 0x67, 0x99, 0x85, 0x7b, + 0x24, 0xb0, 0xe9, 0x75, 0x09, 0xe5, 0x62, 0x88, 0xe1, 0x2d, 0x66, 0x80, 0xef, 0x81, 0xec, 0x31, + 0xee, 0xc7, 0xc1, 0x0b, 0x1c, 0x55, 0x8f, 0x71, 0x5f, 0xc4, 0xc6, 0xa0, 0x08, 0x5d, 0x4c, 0xc0, + 0x24, 0xb2, 0x87, 0xdc, 0x38, 0x52, 0x15, 0x60, 0x0f, 0xb9, 0x49, 0x24, 0x03, 0x45, 0x64, 0x36, + 0x01, 0x45, 0xe4, 0xfb, 0x20, 0x1f, 0xb7, 0x40, 0x04, 0x03, 0x8e, 0xe7, 0x84, 0x4d, 0xc4, 0x8f, + 0x5d, 0x04, 0x45, 0x6e, 0xd2, 0x25, 0xc9, 0x1f, 0xf6, 0xc3, 0x98, 0x22, 0x2f, 0x52, 0x84, 0xfd, + 0x30, 0xc9, 0xcf, 0x40, 0x11, 0xbc, 0x94, 0x80, 0x22, 0xf2, 0x31, 0x80, 0xa6, 0x47, 0x28, 0x72, + 0x48, 0x68, 0xe0, 0x90, 0x3a, 0x1d, 0xc4, 0x28, 0x96, 0x57, 0xa5, 0x35, 0x59, 0x5f, 0x19, 0x21, + 0x4f, 0x46, 0x00, 0x5c, 0x03, 0x45, 0xd4, 0xf6, 0x02, 0x6a, 0x84, 0x3e, 0x22, 0x71, 0xbe, 0x02, + 0xa7, 0x5c, 0xe6, 0xf6, 0x7d, 0x1f, 0x11, 0x91, 0xf5, 0x63, 0xf0, 0x4e, 0x88, 0x7d, 0x14, 0x20, + 0x8a, 0x2d, 0x63, 0xaa, 0xfe, 0x22, 0xf7, 0xbf, 0x9b, 0xa0, 0xcd, 0xf1, 0x41, 0x36, 0x15, 0x2e, + 0xab, 0x5f, 0x14, 0x70, 0x87, 0xc9, 0x6a, 0x0f, 0x07, 0xa1, 0x13, 0x32, 0x94, 0x0b, 0xec, 0xff, + 0xa6, 0x2d, 0x79, 0xb6, 0xb6, 0xe4, 0x99, 0xda, 0x92, 0x67, 0x69, 0x4b, 0x9e, 0xa5, 0x2d, 0x79, + 0x96, 0xb6, 0xe4, 0x6b, 0xb4, 0x25, 0x5f, 0xaf, 0x2d, 0xf9, 0x1a, 0x6d, 0xc9, 0xb3, 0xb4, 0x25, + 0xff, 0xfb, 0xda, 0x92, 0xff, 0xa6, 0xb6, 0xe4, 0x2b, 0xb4, 0x35, 0xbe, 0xb6, 0xbe, 0x4f, 0x83, + 0x15, 0x7e, 0x15, 0xd6, 0x7d, 0xdf, 0x75, 0xb0, 0xc5, 0xd4, 0x85, 0xe1, 0x23, 0x00, 0x03, 0xf4, + 0x15, 0x35, 0x90, 0x30, 0x1a, 0x0e, 0xb1, 0x70, 0xc4, 0xe5, 0xa5, 0xe8, 0x45, 0x86, 0xc4, 0xde, + 0x4d, 0x66, 0x87, 0x1a, 0xb8, 0xe3, 0x62, 0x14, 0xe2, 0x4b, 0xee, 0x69, 0xee, 0xbe, 0xc2, 0xa1, + 0x29, 0xff, 0x43, 0x90, 0x0b, 0x58, 0x4a, 0x23, 0x64, 0x52, 0xe6, 0x7a, 0xcb, 0xad, 0xd7, 0xb4, + 0xab, 0x87, 0xb0, 0xf6, 0x17, 0x5f, 0x40, 0x7c, 0xf5, 0x03, 0xce, 0x24, 0xbe, 0x89, 0x67, 0xa0, + 0x68, 0xba, 0x5e, 0x88, 0xad, 0x89, 0x29, 0xa2, 0xcc, 0x31, 0x45, 0xf4, 0x82, 0x08, 0x9b, 0x18, + 0x21, 0x49, 0x7f, 0xbe, 0x93, 0x40, 0x91, 0x65, 0xff, 0x8c, 0x8d, 0xc0, 0x43, 0xe4, 0x76, 0xf1, + 0xae, 0x3f, 0x1a, 0xc2, 0xd2, 0x78, 0x08, 0xd7, 0x41, 0x76, 0x9c, 0x33, 0x3d, 0xff, 0xe4, 0x1a, + 0x47, 0xc1, 0xbb, 0x20, 0xd3, 0x63, 0xfc, 0xf1, 0x6c, 0x17, 0x1b, 0xf6, 0x2d, 0xf9, 0x01, 0xee, + 0x19, 0x02, 0x52, 0x38, 0x94, 0x65, 0x16, 0x5e, 0x0b, 0xff, 0xf9, 0x92, 0xf2, 0xc4, 0x2f, 0xbc, + 0xeb, 0xc3, 0x2f, 0xc0, 0x02, 0x8d, 0x88, 0x91, 0xbc, 0x3d, 0xb6, 0x6f, 0xf6, 0xf6, 0xc8, 0xb4, + 0x22, 0xd2, 0xdc, 0xd6, 0x33, 0x34, 0x22, 0x4d, 0x0b, 0xbe, 0x0b, 0x16, 0x19, 0x39, 0x6b, 0x40, + 0x9a, 0x97, 0xc3, 0x72, 0x7d, 0x72, 0xb9, 0x07, 0xf2, 0x3f, 0xea, 0xc1, 0x2e, 0x58, 0x61, 0xdc, + 0xd3, 0x0f, 0x01, 0x65, 0x7e, 0xaa, 0x02, 0x8d, 0xc8, 0xce, 0xc4, 0x5b, 0xa0, 0xfa, 0x83, 0x04, + 0x20, 0xeb, 0x8f, 0xb8, 0x0e, 0x6f, 0xa7, 0x41, 0x37, 0xd7, 0x42, 0xf5, 0x6d, 0x5c, 0xf6, 0x96, + 0xd7, 0xe9, 0x38, 0xf4, 0x76, 0xca, 0x8e, 0x45, 0x9d, 0xbe, 0x42, 0xd4, 0xf2, 0xcd, 0x44, 0xad, + 0x5c, 0x2d, 0xea, 0xcc, 0x65, 0x51, 0xfb, 0x42, 0xd3, 0x75, 0x76, 0xd3, 0xdd, 0xca, 0xd9, 0xab, + 0x1d, 0xf1, 0x76, 0xe3, 0x19, 0x5b, 0x11, 0xf9, 0xaf, 0xd3, 0xbd, 0x95, 0xc1, 0x12, 0xcb, 0xf7, + 0xc2, 0xb3, 0x1d, 0x13, 0xb9, 0xbb, 0x3e, 0xdc, 0x01, 0x39, 0xf1, 0x40, 0x17, 0x2d, 0x91, 0x78, + 0xb3, 0x1f, 0x5d, 0x77, 0x25, 0x4e, 0x5e, 0x4a, 0x3a, 0x78, 0x95, 0xec, 0xe0, 0x1e, 0xc8, 0x0b, + 0x3a, 0x31, 0x11, 0x62, 0x15, 0x3e, 0x9e, 0x8b, 0x6f, 0xd4, 0x71, 0x5d, 0x54, 0x24, 0xb6, 0x70, + 0x1f, 0x2c, 0xc5, 0x4f, 0x8d, 0x98, 0x52, 0xe8, 0x41, 0xbb, 0x8e, 0x72, 0xfa, 0xc3, 0xd3, 0xf3, + 0xdd, 0x89, 0x3d, 0x23, 0x35, 0xb9, 0xc2, 0x47, 0xa4, 0xca, 0x7c, 0xa4, 0xd3, 0x9f, 0x85, 0x9e, + 0x37, 0x27, 0xf6, 0xec, 0xec, 0x62, 0x76, 0xc6, 0x9c, 0x99, 0xf9, 0xce, 0x3e, 0xa5, 0x36, 0x3d, + 0x87, 0xc6, 0x5b, 0xf8, 0x14, 0x64, 0x05, 0x23, 0x8d, 0x08, 0x7f, 0xb0, 0xe4, 0xd6, 0x1f, 0xce, + 0x45, 0xc7, 0xa5, 0xa4, 0xab, 0x28, 0x5e, 0x6f, 0x2a, 0x67, 0xa7, 0x15, 0xa9, 0xf1, 0xf0, 0xec, + 0xb7, 0x72, 0xea, 0x6c, 0x58, 0x96, 0x5e, 0x0f, 0xcb, 0xd2, 0x9b, 0x61, 0x59, 0xfa, 0x75, 0x58, + 0x96, 0xbe, 0x3e, 0x2f, 0xa7, 0x5e, 0x9f, 0x97, 0x53, 0x6f, 0xce, 0xcb, 0xa9, 0xcf, 0xd5, 0x11, + 0x55, 0x7b, 0x81, 0xff, 0xa9, 0xdc, 0xf8, 0x23, 0x00, 0x00, 0xff, 0xff, 0x46, 0x22, 0xed, 0xb0, + 0xc0, 0x0e, 0x00, 0x00, } diff --git a/pkg/storage/enginepb/mvcc3.proto b/pkg/storage/enginepb/mvcc3.proto index a283b4088bed..66181e1e064f 100644 --- a/pkg/storage/enginepb/mvcc3.proto +++ b/pkg/storage/enginepb/mvcc3.proto @@ -203,6 +203,19 @@ message RangeAppliedState { // range_stats is the set of mvcc stats that accounts for the current value // of the Raft state machine. MVCCPersistentStats range_stats = 3 [(gogoproto.nullable) = false]; + + // closed_timestamp is the largest timestamp that is known to have been closed + // as of this lease applied index. This means that the current leaseholder (if + // any) and any future leaseholder will not evaluate writes at or below this + // timestamp, and also that any in-flight commands that can still apply are + // writing at higher timestamps. Non-leaseholder replicas are free to serve + // "follower reads" at or below this timestamp. + // + // TODO(andrei): Make this field not-nullable in 21.2, once all the ranges + // have a closed timestamp applied to their state (this might need a + // migration). In 21.1 we cannot write empty timestamp to disk because that + // looks like an inconsistency to the consistency-checker. + util.hlc.Timestamp closed_timestamp = 4; } // MVCCWriteValueOp corresponds to a value being written outside of a diff --git a/pkg/util/hlc/timestamp.go b/pkg/util/hlc/timestamp.go index 8a92f35190f2..af127a9c2457 100644 --- a/pkg/util/hlc/timestamp.go +++ b/pkg/util/hlc/timestamp.go @@ -196,6 +196,10 @@ func (t Timestamp) Add(wallTime int64, logical int32) Timestamp { // // Adding a positive value to a Timestamp adds the Synthetic flag. // s.Synthetic = true // } + // + // When addressing this TODO, remove the hack in + // propBuf.assignClosedTimestampToProposal that manually marks lease + // expirations as synthetic. return s } @@ -360,6 +364,15 @@ func (t Timestamp) UnsafeToClockTimestamp() ClockTimestamp { return ClockTimestamp(t) } +// MustToClockTimestamp casts a Timestamp to a ClockTimestamp. Panics if the +// timestamp is synthetic. See TryToClockTimestamp if you don't want to panic. +func (t Timestamp) MustToClockTimestamp() ClockTimestamp { + if t.Synthetic { + panic(fmt.Sprintf("can't convert synthetic timestamp to ClockTimestamp: %s", t)) + } + return ClockTimestamp(t) +} + // ToTimestamp upcasts a ClockTimestamp into a Timestamp. func (t ClockTimestamp) ToTimestamp() Timestamp { if t.Synthetic {