diff --git a/pkg/kv/kvserver/metrics.go b/pkg/kv/kvserver/metrics.go index 3712fd528669..50c60fb5b43e 100644 --- a/pkg/kv/kvserver/metrics.go +++ b/pkg/kv/kvserver/metrics.go @@ -164,6 +164,12 @@ var ( Measurement: "Replicas", Unit: metric.Unit_COUNT, } + metaLeaseLivenessCount = metric.Metadata{ + Name: "leases.liveness", + Help: "Number of replica leaseholders for the liveness range(s)", + Measurement: "Replicas", + Unit: metric.Unit_COUNT, + } // Storage metrics. metaLiveBytes = metric.Metadata{ @@ -2022,6 +2028,7 @@ type StoreMetrics struct { LeaseTransferErrorCount *metric.Counter LeaseExpirationCount *metric.Gauge LeaseEpochCount *metric.Gauge + LeaseLivenessCount *metric.Gauge // Storage metrics. ResolveCommitCount *metric.Counter @@ -2653,6 +2660,7 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics { LeaseTransferErrorCount: metric.NewCounter(metaLeaseTransferErrorCount), LeaseExpirationCount: metric.NewGauge(metaLeaseExpirationCount), LeaseEpochCount: metric.NewGauge(metaLeaseEpochCount), + LeaseLivenessCount: metric.NewGauge(metaLeaseLivenessCount), // Intent resolution metrics. ResolveCommitCount: metric.NewCounter(metaResolveCommit), diff --git a/pkg/kv/kvserver/replica_metrics.go b/pkg/kv/kvserver/replica_metrics.go index b846b55e7a7f..4d6ef8bfa071 100644 --- a/pkg/kv/kvserver/replica_metrics.go +++ b/pkg/kv/kvserver/replica_metrics.go @@ -15,6 +15,7 @@ import ( "math" "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/allocatorimpl" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" @@ -27,11 +28,12 @@ import ( // ReplicaMetrics contains details on the current status of the replica. type ReplicaMetrics struct { - Leader bool - LeaseValid bool - Leaseholder bool - LeaseType roachpb.LeaseType - LeaseStatus kvserverpb.LeaseStatus + Leader bool + LeaseValid bool + Leaseholder bool + LeaseType roachpb.LeaseType + LeaseStatus kvserverpb.LeaseStatus + LivenessLease bool // Quiescent indicates whether the replica believes itself to be quiesced. Quiescent bool @@ -125,12 +127,14 @@ type calcReplicaMetricsInput struct { } func calcReplicaMetrics(d calcReplicaMetricsInput) ReplicaMetrics { - var validLease, validLeaseOwner bool + var validLease, validLeaseOwner, livenessLease bool var validLeaseType roachpb.LeaseType if d.leaseStatus.IsValid() { validLease = true validLeaseOwner = d.leaseStatus.Lease.OwnedBy(d.storeID) validLeaseType = d.leaseStatus.Lease.Type() + livenessLease = validLeaseOwner && + keys.NodeLivenessSpan.Overlaps(d.desc.RSpan().AsRawSpanWithNoLocals()) } rangeCounter, unavailable, underreplicated, overreplicated := calcRangeCounter( @@ -152,6 +156,7 @@ func calcReplicaMetrics(d calcReplicaMetricsInput) ReplicaMetrics { Leaseholder: validLeaseOwner, LeaseType: validLeaseType, LeaseStatus: d.leaseStatus, + LivenessLease: livenessLease, Quiescent: d.quiescent, Ticking: d.ticking, RangeCounter: rangeCounter, diff --git a/pkg/kv/kvserver/replica_proposal.go b/pkg/kv/kvserver/replica_proposal.go index 547df418854c..ea03b019b5d7 100644 --- a/pkg/kv/kvserver/replica_proposal.go +++ b/pkg/kv/kvserver/replica_proposal.go @@ -366,6 +366,17 @@ func (r *Replica) leasePostApplyLocked( r.gossipFirstRangeLocked(ctx) } + // Log acquisition of meta and liveness range leases. These are critical to + // cluster health, so it's useful to know their location over time. + if leaseChangingHands && iAmTheLeaseHolder && + r.descRLocked().StartKey.Less(roachpb.RKey(keys.NodeLivenessKeyMax)) { + if r.ownsValidLeaseRLocked(ctx, now) { + log.Health.Infof(ctx, "acquired system range lease: %s", newLease) + } else { + log.Health.Warningf(ctx, "applied system range lease after it expired: %s", newLease) + } + } + st := r.leaseStatusAtRLocked(ctx, now) if leaseChangingHands && newLease.Type() == roachpb.LeaseExpiration && r.ownsValidLeaseRLocked(ctx, now) && !r.shouldUseExpirationLeaseRLocked() { diff --git a/pkg/kv/kvserver/store.go b/pkg/kv/kvserver/store.go index 3ef3055f9425..da9ad9a3a504 100644 --- a/pkg/kv/kvserver/store.go +++ b/pkg/kv/kvserver/store.go @@ -2892,6 +2892,7 @@ func (s *Store) updateReplicationGauges(ctx context.Context) error { leaseHolderCount int64 leaseExpirationCount int64 leaseEpochCount int64 + leaseLivenessCount int64 raftLeaderNotLeaseHolderCount int64 raftLeaderInvalidLeaseCount int64 quiescentCount int64 @@ -2963,6 +2964,9 @@ func (s *Store) updateReplicationGauges(ctx context.Context) error { case roachpb.LeaseEpoch: leaseEpochCount++ } + if metrics.LivenessLease { + leaseLivenessCount++ + } } if metrics.Quiescent { quiescentCount++ @@ -3021,6 +3025,7 @@ func (s *Store) updateReplicationGauges(ctx context.Context) error { s.metrics.LeaseHolderCount.Update(leaseHolderCount) s.metrics.LeaseExpirationCount.Update(leaseExpirationCount) s.metrics.LeaseEpochCount.Update(leaseEpochCount) + s.metrics.LeaseLivenessCount.Update(leaseLivenessCount) s.metrics.QuiescentCount.Update(quiescentCount) s.metrics.UninitializedCount.Update(uninitializedCount) s.metrics.AverageQueriesPerSecond.Update(averageQueriesPerSecond)