Skip to content

Commit

Permalink
*: improve more metrics (tikv#1761)
Browse files Browse the repository at this point in the history
Signed-off-by: Ryan Leung <[email protected]>
  • Loading branch information
rleungx authored and sre-bot committed Sep 23, 2019
1 parent 4ca2265 commit 5c648dc
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 14 deletions.
10 changes: 8 additions & 2 deletions server/checker/replica_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,8 @@ func (r *ReplicaChecker) fixPeer(region *core.RegionInfo, peer *metapb.Peer, sta
if len(region.GetPeers()) > r.cluster.GetMaxReplicas() {
op, err := operator.CreateRemovePeerOperator(removeExtra, r.cluster, operator.OpReplica, region, peer.GetStoreId())
if err != nil {
checkerCounter.WithLabelValues("replica_checker", "create-operator-fail").Inc()
reason := fmt.Sprintf("%s-fail", removeExtra)
checkerCounter.WithLabelValues("replica_checker", reason).Inc()
return nil
}
return op
Expand All @@ -271,14 +272,17 @@ func (r *ReplicaChecker) fixPeer(region *core.RegionInfo, peer *metapb.Peer, sta
if region.GetPendingPeer(peer.GetId()) != nil {
op, err := operator.CreateRemovePeerOperator(removePending, r.cluster, operator.OpReplica, region, peer.GetStoreId())
if err != nil {
checkerCounter.WithLabelValues("replica_checker", "create-operator-fail").Inc()
reason := fmt.Sprintf("%s-fail", removePending)
checkerCounter.WithLabelValues("replica_checker", reason).Inc()
return nil
}
return op
}

storeID, _ := r.SelectBestReplacementStore(region, peer, filter.NewStorageThresholdFilter(r.name))
if storeID == 0 {
reason := fmt.Sprintf("no-store-%s", status)
checkerCounter.WithLabelValues("replica_checker", reason).Inc()
log.Debug("no best store to add replica", zap.Uint64("region-id", region.GetID()))
return nil
}
Expand All @@ -290,6 +294,8 @@ func (r *ReplicaChecker) fixPeer(region *core.RegionInfo, peer *metapb.Peer, sta
replace := fmt.Sprintf("replace-%s-replica", status)
op, err := operator.CreateMovePeerOperator(replace, r.cluster, region, operator.OpReplica, peer.GetStoreId(), newPeer.GetStoreId(), newPeer.GetId())
if err != nil {
reason := fmt.Sprintf("%s-fail", replace)
checkerCounter.WithLabelValues("replica_checker", reason).Inc()
return nil
}
return op
Expand Down
3 changes: 1 addition & 2 deletions server/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,6 @@ func (c *RaftCluster) putStoreLocked(store *core.StoreInfo) error {
func (c *RaftCluster) checkStores() {
var offlineStores []*metapb.Store
var upStoreCount int

stores := c.GetStores()
for _, store := range stores {
// the store has already been tombstone
Expand Down Expand Up @@ -963,7 +962,7 @@ func (c *RaftCluster) checkStores() {

if upStoreCount < c.GetMaxReplicas() {
for _, offlineStore := range offlineStores {
log.Warn("store may not turn into Tombstone, there are no extra up node has enough space to accommodate the extra replica", zap.Stringer("store", offlineStore))
log.Warn("store may not turn into Tombstone, there are no extra up store has enough space to accommodate the extra replica", zap.Stringer("store", offlineStore))
}
}
}
Expand Down
6 changes: 5 additions & 1 deletion server/schedulers/balance_leader.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,11 @@ func (l *balanceLeaderScheduler) Schedule(cluster schedule.Cluster) []*operator.

// No store can be selected as source or target.
if source == nil || target == nil {
schedulerCounter.WithLabelValues(l.GetName(), "no-store").Inc()
if source == nil {
schedulerCounter.WithLabelValues(l.GetName(), "no-source-store").Inc()
} else {
schedulerCounter.WithLabelValues(l.GetName(), "no-target-store").Inc()
}
// When the cluster is balanced, all stores will be added to the cache once
// all of them have been selected. This will cause the scheduler to not adapt
// to sudden change of a store's leader. Here we clear the taint cache and
Expand Down
2 changes: 1 addition & 1 deletion server/schedulers/balance_region.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func (s *balanceRegionScheduler) Schedule(cluster schedule.Cluster) []*operator.
f := s.hitsCounter.buildSourceFilter(s.GetName(), cluster)
source := s.selector.SelectSource(cluster, stores, f)
if source == nil {
schedulerCounter.WithLabelValues(s.GetName(), "no-store").Inc()
schedulerCounter.WithLabelValues(s.GetName(), "no-source-store").Inc()
// Unlike the balanceLeaderScheduler, we don't need to clear the taintCache
// here. Because normally region score won't change rapidly, and the region
// balance requires lower sensitivity compare to leader balance.
Expand Down
4 changes: 2 additions & 2 deletions server/schedulers/random_merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func (s *randomMergeScheduler) Schedule(cluster schedule.Cluster) []*operator.Op
stores := cluster.GetStores()
store := s.selector.SelectSource(cluster, stores)
if store == nil {
schedulerCounter.WithLabelValues(s.GetName(), "no-store").Inc()
schedulerCounter.WithLabelValues(s.GetName(), "no-source-store").Inc()
return nil
}
region := cluster.RandLeaderRegion(store.GetID(), core.HealthRegion())
Expand All @@ -83,7 +83,7 @@ func (s *randomMergeScheduler) Schedule(cluster schedule.Cluster) []*operator.Op
target = other
}
if target == nil {
schedulerCounter.WithLabelValues(s.GetName(), "no-adjacent").Inc()
schedulerCounter.WithLabelValues(s.GetName(), "no-target-store").Inc()
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion server/schedulers/shuffle_region.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func (s *shuffleRegionScheduler) scheduleRemovePeer(cluster schedule.Cluster) (*

source := s.selector.SelectSource(cluster, stores)
if source == nil {
schedulerCounter.WithLabelValues(s.GetName(), "no-store").Inc()
schedulerCounter.WithLabelValues(s.GetName(), "no-source-store").Inc()
return nil, nil
}

Expand Down
10 changes: 5 additions & 5 deletions server/statistics/store_collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,11 @@ func (s *storeStatistics) Collect() {
disableReplaceOfflineReplica = 1
}

configs["disable_makeup_replica"] = disableMakeUpReplica
configs["disable_learner"] = disableLearner
configs["disable_remove_down_replica"] = disableRemoveDownReplica
configs["disable_remove_extra_replica"] = disableRemoveExtraReplica
configs["disable_replace_offline_replica"] = disableReplaceOfflineReplica
configs["disable-makeup-replica"] = disableMakeUpReplica
configs["disable-learner"] = disableLearner
configs["disable-remove-down-replica"] = disableRemoveDownReplica
configs["disable-remove-extra-replica"] = disableRemoveExtraReplica
configs["disable-replace-offline-replica"] = disableReplaceOfflineReplica

for typ, value := range configs {
configStatusGauge.WithLabelValues(typ, s.namespace).Set(value)
Expand Down

0 comments on commit 5c648dc

Please sign in to comment.