Skip to content

Commit

Permalink
kvserver: gossip l0sublevels instead of read amp
Browse files Browse the repository at this point in the history
Previously read amplification was gossipped among stores to enable
future allocation decisions that would avoid candidates with high read
amplification. L0 sublevels represents the number of levels within the
levle 0 and is normally the significant portion of read amplification.
When read amplification is high (>15) it is normally due to a similarly
high count of L0 sublevels. This patch change read amplification to l0
sublevels, as it is a better indicator of store health.

Release justification: low risk, replace deprecated gossip signal.

Release note: None
  • Loading branch information
kvoli committed Mar 29, 2022
1 parent 327f886 commit 36d49f3
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 35 deletions.
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -2966,7 +2966,7 @@ func (s *Store) Capacity(ctx context.Context, useCached bool) (roachpb.StoreCapa
capacity.LogicalBytes = logicalBytes
capacity.QueriesPerSecond = totalQueriesPerSecond
capacity.WritesPerSecond = totalWritesPerSecond
capacity.ReadAmplification = s.metrics.RdbReadAmplification.Value()
capacity.L0Sublevels = s.metrics.RdbL0Sublevels.Value()
capacity.BytesPerReplica = roachpb.PercentilesFromData(bytesPerReplica)
capacity.WritesPerReplica = roachpb.PercentilesFromData(writesPerReplica)
s.recordNewPerSecondStats(totalQueriesPerSecond, totalWritesPerSecond)
Expand Down
12 changes: 3 additions & 9 deletions pkg/kv/kvserver/store_pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -755,10 +755,6 @@ type StoreList struct {
// candidateWritesPerSecond tracks writes-per-second stats for stores that are
// eligible to be rebalance targets.
candidateWritesPerSecond stat

// candidateReadAmplification tracks the read amplification stats for stores that are
// eligible to be rebalance targets.
candidateReadAmplification stat
}

// Generates a new store list based on the passed in descriptors. It will
Expand All @@ -773,32 +769,30 @@ func makeStoreList(descriptors []roachpb.StoreDescriptor) StoreList {
sl.candidateLogicalBytes.update(float64(desc.Capacity.LogicalBytes))
sl.candidateQueriesPerSecond.update(desc.Capacity.QueriesPerSecond)
sl.candidateWritesPerSecond.update(desc.Capacity.WritesPerSecond)
sl.candidateReadAmplification.update(float64(desc.Capacity.ReadAmplification))
}
return sl
}

func (sl StoreList) String() string {
var buf bytes.Buffer
fmt.Fprintf(&buf,
" candidate: avg-ranges=%v avg-leases=%v avg-disk-usage=%v avg-queries-per-second=%v read-amplification=%v",
" candidate: avg-ranges=%v avg-leases=%v avg-disk-usage=%v avg-queries-per-second=%v",
sl.candidateRanges.mean,
sl.candidateLeases.mean,
humanizeutil.IBytes(int64(sl.candidateLogicalBytes.mean)),
sl.candidateQueriesPerSecond.mean,
sl.candidateReadAmplification.mean,
)
if len(sl.stores) > 0 {
fmt.Fprintf(&buf, "\n")
} else {
fmt.Fprintf(&buf, " <no candidates>")
}
for _, desc := range sl.stores {
fmt.Fprintf(&buf, " %d: ranges=%d leases=%d disk-usage=%s queries-per-second=%.2f read-amplification=%d\n",
fmt.Fprintf(&buf, " %d: ranges=%d leases=%d disk-usage=%s queries-per-second=%.2f l0-sublevels=%d\n",
desc.StoreID, desc.Capacity.RangeCount,
desc.Capacity.LeaseCount, humanizeutil.IBytes(desc.Capacity.LogicalBytes),
desc.Capacity.QueriesPerSecond,
desc.Capacity.ReadAmplification,
desc.Capacity.L0Sublevels,
)
}
return buf.String()
Expand Down
40 changes: 20 additions & 20 deletions pkg/kv/kvserver/store_pool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -513,28 +513,28 @@ func TestStorePoolUpdateLocalStore(t *testing.T) {
StoreID: 1,
Node: roachpb.NodeDescriptor{NodeID: 1},
Capacity: roachpb.StoreCapacity{
Capacity: 100,
Available: 50,
RangeCount: 5,
LeaseCount: 1,
LogicalBytes: 30,
QueriesPerSecond: 100,
WritesPerSecond: 30,
ReadAmplification: 5,
Capacity: 100,
Available: 50,
RangeCount: 5,
LeaseCount: 1,
LogicalBytes: 30,
QueriesPerSecond: 100,
WritesPerSecond: 30,
L0Sublevels: 4,
},
},
{
StoreID: 2,
Node: roachpb.NodeDescriptor{NodeID: 2},
Capacity: roachpb.StoreCapacity{
Capacity: 100,
Available: 55,
RangeCount: 4,
LeaseCount: 2,
LogicalBytes: 25,
QueriesPerSecond: 50,
WritesPerSecond: 25,
ReadAmplification: 10,
Capacity: 100,
Available: 55,
RangeCount: 4,
LeaseCount: 2,
LogicalBytes: 25,
QueriesPerSecond: 50,
WritesPerSecond: 25,
L0Sublevels: 8,
},
},
}
Expand Down Expand Up @@ -576,8 +576,8 @@ func TestStorePoolUpdateLocalStore(t *testing.T) {
if expectedWPS := 30 + WPS; desc.Capacity.WritesPerSecond != expectedWPS {
t.Errorf("expected WritesPerSecond %f, but got %f", expectedWPS, desc.Capacity.WritesPerSecond)
}
if expectedReadAmp := int64(5); desc.Capacity.ReadAmplification != expectedReadAmp {
t.Errorf("expected ReadAmplification %d, but got %d", expectedReadAmp, desc.Capacity.ReadAmplification)
if expectedL0Sublevels := int64(4); desc.Capacity.L0Sublevels != expectedL0Sublevels {
t.Errorf("expected L0 Sub-Levels %d, but got %d", expectedL0Sublevels, desc.Capacity.L0Sublevels)
}

sp.updateLocalStoreAfterRebalance(roachpb.StoreID(2), rangeUsageInfo, roachpb.REMOVE_VOTER)
Expand All @@ -597,8 +597,8 @@ func TestStorePoolUpdateLocalStore(t *testing.T) {
if expectedWPS := 25 - WPS; desc.Capacity.WritesPerSecond != expectedWPS {
t.Errorf("expected WritesPerSecond %f, but got %f", expectedWPS, desc.Capacity.WritesPerSecond)
}
if expectedReadAmp := int64(10); desc.Capacity.ReadAmplification != expectedReadAmp {
t.Errorf("expected ReadAmplification %d, but got %d", expectedReadAmp, desc.Capacity.ReadAmplification)
if expectedL0Sublevels := int64(8); desc.Capacity.L0Sublevels != expectedL0Sublevels {
t.Errorf("expected L0 Sub-Levels %d, but got %d", expectedL0Sublevels, desc.Capacity.L0Sublevels)
}

sp.updateLocalStoresAfterLeaseTransfer(roachpb.StoreID(1), roachpb.StoreID(2), rangeUsageInfo.QueriesPerSecond)
Expand Down
6 changes: 3 additions & 3 deletions pkg/roachpb/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -570,12 +570,12 @@ func (sc StoreCapacity) String() string {
// SafeFormat implements the redact.SafeFormatter interface.
func (sc StoreCapacity) SafeFormat(w redact.SafePrinter, _ rune) {
w.Printf("disk (capacity=%s, available=%s, used=%s, logicalBytes=%s), "+
"ranges=%d, leases=%d, queries=%.2f, writes=%.2f, readAmplification=%d"+
"bytesPerReplica={%s}, writesPerReplica={%s}",
"ranges=%d, leases=%d, queries=%.2f, writes=%.2f, "+
"l0Sublevels=%d, bytesPerReplica={%s}, writesPerReplica={%s}",
humanizeutil.IBytes(sc.Capacity), humanizeutil.IBytes(sc.Available),
humanizeutil.IBytes(sc.Used), humanizeutil.IBytes(sc.LogicalBytes),
sc.RangeCount, sc.LeaseCount, sc.QueriesPerSecond, sc.WritesPerSecond,
sc.ReadAmplification, sc.BytesPerReplica, sc.WritesPerReplica)
sc.L0Sublevels, sc.BytesPerReplica, sc.WritesPerReplica)
}

// FractionUsed computes the fraction of storage capacity that is in use.
Expand Down
5 changes: 3 additions & 2 deletions pkg/roachpb/metadata.proto
Original file line number Diff line number Diff line change
Expand Up @@ -330,16 +330,17 @@ message StoreCapacity {
// by ranges in the store. The stat is tracked over the time period defined
// in storage/replica_stats.go, which as of July 2018 is 30 minutes.
optional double writes_per_second = 5 [(gogoproto.nullable) = false];
// read_amplification tracks the current read amplification in the store.
// l0_sublevels tracks the current number of l0 sublevels in the store.
// TODO(kvoli): Use of this field will need to be version-gated, to avoid
// instances where overlapping node-binary versions within a cluster result
// in this this field missing.
optional int64 read_amplification = 11 [(gogoproto.nullable) = false];
optional int64 l0_sublevels = 12 [(gogoproto.nullable) = false];
// bytes_per_replica and writes_per_replica contain percentiles for the
// number of bytes and writes-per-second to each replica in the store.
// This information can be used for rebalancing decisions.
optional Percentiles bytes_per_replica = 6 [(gogoproto.nullable) = false];
optional Percentiles writes_per_replica = 7 [(gogoproto.nullable) = false];
reserved 11;
}

// StoreProperties contains configuration and OS-level details for a storage device.
Expand Down

0 comments on commit 36d49f3

Please sign in to comment.