diff --git a/pkg/kv/kvnemesis/env.go b/pkg/kv/kvnemesis/env.go index 1f6112df1b2c..9aa39d77f935 100644 --- a/pkg/kv/kvnemesis/env.go +++ b/pkg/kv/kvnemesis/env.go @@ -14,7 +14,6 @@ import ( "context" gosql "database/sql" "fmt" - "regexp" "time" "github.com/cockroachdb/cockroach-go/v2/crdb" @@ -69,22 +68,6 @@ func (e *Env) CheckConsistency(ctx context.Context, span roachpb.Span) []error { if err := rows.Scan(&rangeID, &key, &status, &detail); err != nil { return []error{err} } - // TODO(erikgrinaker): There's a known issue that can result in a 10-byte - // discrepancy in SysBytes. This hasn't been investigated, but it's not - // critical so we ignore it for now. See: - // https://github.com/cockroachdb/cockroach/issues/93896 - if status == kvpb.CheckConsistencyResponse_RANGE_CONSISTENT_STATS_INCORRECT.String() { - m := regexp.MustCompile(`.*\ndelta \(stats-computed\): \{(.*)\}`).FindStringSubmatch(detail) - if len(m) > 1 { - delta := m[1] - // Strip out LastUpdateNanos and all zero-valued fields. - delta = regexp.MustCompile(`LastUpdateNanos:\d+`).ReplaceAllString(delta, "") - delta = regexp.MustCompile(`\S+:0\b`).ReplaceAllString(delta, "") - if regexp.MustCompile(`^\s*SysBytes:10\s*$`).MatchString(delta) { - continue - } - } - } switch status { case kvpb.CheckConsistencyResponse_RANGE_INDETERMINATE.String(): // Can't do anything, so let it slide. diff --git a/pkg/kv/kvpb/api.proto b/pkg/kv/kvpb/api.proto index 625c3fff8cee..51c2588d8794 100644 --- a/pkg/kv/kvpb/api.proto +++ b/pkg/kv/kvpb/api.proto @@ -2085,6 +2085,14 @@ message SubsumeResponse { (gogoproto.customname) = "MVCCStats" ]; + // RangeIDLocalMVCCStats are the MVCC statistics for the replicated range + // ID-local keys. During a merge, these must be subtracted from MVCCStats + // since they won't be present in the merged range. + storage.enginepb.MVCCStats range_id_local_mvcc_stats = 8 [ + (gogoproto.nullable) = false, + (gogoproto.customname) = "RangeIDLocalMVCCStats" + ]; + // LeaseAppliedIndex is the lease applied index of the last applied command // at the time that the Subsume request executed. This is NOT intended to be // the lease index of the SubsumeRequest itself. Instead, it is intended to diff --git a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go index 09fbecaaf7ec..4cfb66529222 100644 --- a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go +++ b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go @@ -16,6 +16,7 @@ import ( "sync/atomic" "time" + "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv/kvpb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/abortspan" @@ -1272,9 +1273,7 @@ func mergeTrigger( // The stats for the merged range are the sum of the LHS and RHS stats // adjusted for range key merges (which is the inverse of the split - // adjustment). The RHS's replicated range ID stats are subtracted -- the only - // replicated range ID keys we copy from the RHS are the keys in the abort - // span, and we've already accounted for those stats above. + // adjustment). ms.Add(merge.RightMVCCStats) msRangeKeyDelta, err := computeSplitRangeKeyStatsDelta(batch, merge.LeftDesc, merge.RightDesc) if err != nil { @@ -1282,7 +1281,18 @@ func mergeTrigger( } ms.Subtract(msRangeKeyDelta) - { + // The RHS's replicated range ID stats are subtracted -- the only replicated + // range ID keys we copy from the RHS are the keys in the abort span, and + // we've already accounted for those stats above. + // + // NB: RangeIDLocalMVCCStats is introduced in 23.2 to mitigate a SysBytes race + // with lease requests (which ignore latches). For 23.1 compatibility, we fall + // back to computing it here when not set. We don't need a version gate since + // it's only used at evaluation time and doesn't affect below-Raft state. + if merge.RightRangeIDLocalMVCCStats != (enginepb.MVCCStats{}) { + ms.Subtract(merge.RightRangeIDLocalMVCCStats) + } else { + _ = clusterversion.V23_1 // remove this branch when 23.1 support is removed ridPrefix := keys.MakeRangeIDReplicatedPrefix(merge.RightDesc.RangeID) sysMS, err := storage.ComputeStats(batch, ridPrefix, ridPrefix.PrefixEnd(), 0 /* nowNanos */) if err != nil { diff --git a/pkg/kv/kvserver/batcheval/cmd_subsume.go b/pkg/kv/kvserver/batcheval/cmd_subsume.go index 05dcc8ddb02e..fbcc45106811 100644 --- a/pkg/kv/kvserver/batcheval/cmd_subsume.go +++ b/pkg/kv/kvserver/batcheval/cmd_subsume.go @@ -135,6 +135,21 @@ func Subsume( reply.LeaseAppliedIndex = cArgs.EvalCtx.GetLeaseAppliedIndex() reply.FreezeStart = cArgs.EvalCtx.Clock().NowAsClockTimestamp() + // We ship the range ID-local replicated stats as well, since these must be + // subtracted from MVCCStats for the merged range. + // + // NB: lease requests can race with this computation, since they ignore + // latches and write to the range ID-local keyspace. This can very rarely + // result in a minor SysBytes discrepancy when the GetMVCCStats() call above + // is not consistent with this readWriter snapshot. We accept this for now, + // rather than introducing additional synchronization complexity. + ridPrefix := keys.MakeRangeIDReplicatedPrefix(desc.RangeID) + reply.RangeIDLocalMVCCStats, err = storage.ComputeStats( + readWriter, ridPrefix, ridPrefix.PrefixEnd(), 0 /* nowNanos */) + if err != nil { + return result.Result{}, err + } + // Collect a read summary from the RHS leaseholder to ship to the LHS // leaseholder. This is used to instruct the LHS on how to update its // timestamp cache to ensure that no future writes are allowed to invalidate diff --git a/pkg/kv/kvserver/replica_command.go b/pkg/kv/kvserver/replica_command.go index 3b40763f7eab..f55ec1476017 100644 --- a/pkg/kv/kvserver/replica_command.go +++ b/pkg/kv/kvserver/replica_command.go @@ -764,12 +764,13 @@ func (r *Replica) AdminMerge( Commit: true, InternalCommitTrigger: &roachpb.InternalCommitTrigger{ MergeTrigger: &roachpb.MergeTrigger{ - LeftDesc: updatedLeftDesc, - RightDesc: rightDesc, - RightMVCCStats: rhsSnapshotRes.MVCCStats, - FreezeStart: rhsSnapshotRes.FreezeStart, - RightClosedTimestamp: rhsSnapshotRes.ClosedTimestamp, - RightReadSummary: rhsSnapshotRes.ReadSummary, + LeftDesc: updatedLeftDesc, + RightDesc: rightDesc, + RightMVCCStats: rhsSnapshotRes.MVCCStats, + RightRangeIDLocalMVCCStats: rhsSnapshotRes.RangeIDLocalMVCCStats, + FreezeStart: rhsSnapshotRes.FreezeStart, + RightClosedTimestamp: rhsSnapshotRes.ClosedTimestamp, + RightReadSummary: rhsSnapshotRes.ReadSummary, }, }, }) diff --git a/pkg/roachpb/data.proto b/pkg/roachpb/data.proto index 1d1b0ee1fd9a..179059e6fd86 100644 --- a/pkg/roachpb/data.proto +++ b/pkg/roachpb/data.proto @@ -153,6 +153,11 @@ message MergeTrigger { (gogoproto.nullable) = false ]; + storage.enginepb.MVCCStats right_range_id_local_mvcc_stats = 9 [ + (gogoproto.customname) = "RightRangeIDLocalMVCCStats", + (gogoproto.nullable) = false + ]; + // FreezeStart is a timestamp that is guaranteed to be greater than the // timestamps at which any requests were serviced by the right-hand side range // before it stopped responding to requests altogether (in anticipation of