-
Notifications
You must be signed in to change notification settings - Fork 726
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
*: add hot region schedule #611
Merged
Merged
Changes from all commits
Commits
Show all changes
34 commits
Select commit
Hold shift + click to select a range
e2982b7
*: add hot region schedule
nolouch 2a2ef7f
*: adjust state outqueue state
nolouch ca4c248
*:calculate avg bytes write
nolouch 7d8e7bf
*: clean up
nolouch 0f6c35c
address
nolouch 382986e
tiny clean
nolouch 465e6e7
some fix
nolouch 20e78b0
change to lruCache
nolouch 0d21c20
some clean
nolouch 5050c94
address comments
nolouch f5c9de4
address comments
nolouch e4f281d
*:tiny fix
nolouch 034e975
balance: add some test
nolouch 9228598
tiny clean
nolouch a787547
Merge branch 'master' into hot-region-schedule
nolouch 1097525
address comments
nolouch ef4fe14
Merge branch 'master' into shuning/hot-region-schedule
nolouch 3eae118
address comment
nolouch f18d1c4
address comments
nolouch d9ad9f7
fix
nolouch 95e3530
address comments
nolouch 5e8f5a8
tiny fix
nolouch 174782e
address comment
nolouch 1c3605a
tiny fix
nolouch 30bcb69
address comment
nolouch ce794ce
Merge branch 'master' into shuning/hot-region-schedule
disksing bad2c8a
fix ci
nolouch 18e60e9
Merge branch 'shuning/hot-region-schedule' of https://github.com/ping…
nolouch c2469d7
tiny fix
nolouch 7d80df0
Merge branch 'master' into shuning/hot-region-schedule
zhangjinpeng87 2760bc9
address comment
nolouch 79a39d8
Merge branch 'shuning/hot-region-schedule' of https://github.com/ping…
nolouch 79b9de3
address comment
nolouch 91e69c6
address comment
nolouch File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
// Copyright 2017 PingCAP, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package api | ||
|
||
import ( | ||
"net/http" | ||
|
||
"github.com/pingcap/pd/server" | ||
"github.com/unrolled/render" | ||
) | ||
|
||
type hotStatusHandler struct { | ||
*server.Handler | ||
rd *render.Render | ||
} | ||
|
||
func newHotStatusHandler(handler *server.Handler, rd *render.Render) *hotStatusHandler { | ||
return &hotStatusHandler{ | ||
Handler: handler, | ||
rd: rd, | ||
} | ||
} | ||
|
||
func (h *hotStatusHandler) GetHotRegions(w http.ResponseWriter, r *http.Request) { | ||
h.rd.JSON(w, http.StatusOK, h.GetHotWriteRegions()) | ||
} | ||
|
||
func (h *hotStatusHandler) GetHotStores(w http.ResponseWriter, r *http.Request) { | ||
h.rd.JSON(w, http.StatusOK, h.GetHotWriteStores()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,9 @@ package server | |
|
||
import ( | ||
"math" | ||
"math/rand" | ||
"sort" | ||
"sync" | ||
"time" | ||
|
||
log "github.com/Sirupsen/logrus" | ||
|
@@ -370,3 +373,278 @@ func (r *replicaChecker) checkBestReplacement(region *RegionInfo) Operator { | |
} | ||
return newTransferPeer(region, oldPeer, newPeer) | ||
} | ||
|
||
// RegionStat records each hot region's statistics | ||
type RegionStat struct { | ||
RegionID uint64 `json:"region_id"` | ||
WrittenBytes uint64 `json:"written_bytes"` | ||
// HotDegree records the hot region update times | ||
HotDegree int `json:"hot_degree"` | ||
// LastUpdateTime used to calculate average write | ||
LastUpdateTime time.Time `json:"last_update_time"` | ||
StoreID uint64 `json:"-"` | ||
// antiCount used to eliminate some noise when remove region in cache | ||
antiCount int | ||
// version used to check the region split times | ||
version uint64 | ||
} | ||
|
||
// RegionsStat is a list of a group region state type | ||
type RegionsStat []RegionStat | ||
|
||
func (m RegionsStat) Len() int { return len(m) } | ||
func (m RegionsStat) Swap(i, j int) { m[i], m[j] = m[j], m[i] } | ||
func (m RegionsStat) Less(i, j int) bool { return m[i].WrittenBytes < m[j].WrittenBytes } | ||
|
||
// StoreHotRegions records all hot regions in one store with sequence | ||
type StoreHotRegions struct { | ||
TotalWrittenBytes uint64 `json:"total_written"` | ||
RegionCount int `json:"region_count"` | ||
RegionsStat RegionsStat `json:"stats"` | ||
} | ||
|
||
type balanceHotRegionScheduler struct { | ||
sync.RWMutex | ||
opt *scheduleOption | ||
limit uint64 | ||
scoreStatus map[uint64]*StoreHotRegions // store id -> regions status in this store | ||
r *rand.Rand | ||
} | ||
|
||
func newBalanceHotRegionScheduler(opt *scheduleOption) *balanceHotRegionScheduler { | ||
return &balanceHotRegionScheduler{ | ||
opt: opt, | ||
limit: 1, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we modify the limit? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use a const var |
||
scoreStatus: make(map[uint64]*StoreHotRegions), | ||
r: rand.New(rand.NewSource(time.Now().UnixNano())), | ||
} | ||
} | ||
|
||
func (h *balanceHotRegionScheduler) GetName() string { | ||
return "balance-hot-region-scheduler" | ||
} | ||
|
||
func (h *balanceHotRegionScheduler) GetResourceKind() ResourceKind { | ||
return priorityKind | ||
} | ||
|
||
func (h *balanceHotRegionScheduler) GetResourceLimit() uint64 { | ||
return h.limit | ||
} | ||
|
||
func (h *balanceHotRegionScheduler) Prepare(cluster *clusterInfo) error { return nil } | ||
|
||
func (h *balanceHotRegionScheduler) Cleanup(cluster *clusterInfo) {} | ||
|
||
func (h *balanceHotRegionScheduler) Schedule(cluster *clusterInfo) Operator { | ||
h.calculateScore(cluster) | ||
region := h.SelectSourceRegion(cluster) | ||
if region == nil { | ||
return nil | ||
} | ||
newLeader := h.selectTransferLeader(region) | ||
if newLeader != nil { | ||
return newPriorityTransferLeader(region, newLeader) | ||
} | ||
peer := h.selectTransferPeer(region, cluster) | ||
if peer != nil { | ||
return newPriorityTransferPeer(region, region.Leader, peer) | ||
} | ||
return nil | ||
} | ||
|
||
func (h *balanceHotRegionScheduler) calculateScore(cluster *clusterInfo) { | ||
h.Lock() | ||
defer h.Unlock() | ||
h.scoreStatus = make(map[uint64]*StoreHotRegions) | ||
items := cluster.writeStatistics.elems() | ||
for _, item := range items { | ||
r, ok := item.value.(*RegionStat) | ||
if !ok { | ||
continue | ||
} | ||
if r.HotDegree < hotRegionLowThreshold { | ||
continue | ||
} | ||
|
||
regionInfo := cluster.getRegion(r.RegionID) | ||
storeID := regionInfo.Leader.GetStoreId() | ||
status, ok := h.scoreStatus[storeID] | ||
if !ok { | ||
status = &StoreHotRegions{ | ||
RegionsStat: make(RegionsStat, 0, storeHotRegionsDefaultLen), | ||
} | ||
h.scoreStatus[storeID] = status | ||
} | ||
status.TotalWrittenBytes += r.WrittenBytes | ||
status.RegionsStat = append(status.RegionsStat, RegionStat{ | ||
RegionID: r.RegionID, | ||
WrittenBytes: r.WrittenBytes, | ||
HotDegree: r.HotDegree, | ||
LastUpdateTime: r.LastUpdateTime, | ||
StoreID: storeID, | ||
antiCount: r.antiCount, | ||
version: r.version, | ||
}) | ||
status.RegionCount++ | ||
} | ||
|
||
for _, rs := range h.scoreStatus { | ||
sort.Sort(sort.Reverse(rs.RegionsStat)) | ||
} | ||
} | ||
|
||
func (h *balanceHotRegionScheduler) SelectSourceRegion(cluster *clusterInfo) *RegionInfo { | ||
var ( | ||
maxWritten uint64 | ||
sourceStore uint64 | ||
maxHotStoreRegionCount int | ||
) | ||
// choose a hot store as transfer source | ||
// the numbers of the hot regions in that store has higher priority than TotalWrittenBytes | ||
for sid, s := range h.scoreStatus { | ||
if s.RegionsStat.Len() < 2 { | ||
continue | ||
} | ||
|
||
if maxHotStoreRegionCount < s.RegionsStat.Len() { | ||
maxHotStoreRegionCount = s.RegionsStat.Len() | ||
maxWritten = s.TotalWrittenBytes | ||
sourceStore = sid | ||
continue | ||
} | ||
|
||
if maxHotStoreRegionCount == s.RegionsStat.Len() && maxWritten < s.TotalWrittenBytes { | ||
maxWritten = s.TotalWrittenBytes | ||
sourceStore = sid | ||
} | ||
} | ||
|
||
if sourceStore == 0 { | ||
return nil | ||
} | ||
|
||
length := h.scoreStatus[sourceStore].RegionsStat.Len() | ||
// the hottest region in the store not move | ||
// randomly pick a region from 1 .. length-1 | ||
// TODO: consider hot degree when pick | ||
rr := h.r.Intn(length-1) + 1 | ||
pickedRegionStat := h.scoreStatus[sourceStore].RegionsStat[rr] | ||
if pickedRegionStat.antiCount < hotRegionAntiCount { | ||
return nil | ||
} | ||
sourceRegion := cluster.getRegion(pickedRegionStat.RegionID) | ||
if len(sourceRegion.DownPeers) != 0 || len(sourceRegion.PendingPeers) != 0 { | ||
return nil | ||
} | ||
// use written bytes per second | ||
sourceRegion.WrittenBytes = pickedRegionStat.WrittenBytes | ||
h.adjustBalanceLimit(sourceStore) | ||
return sourceRegion | ||
} | ||
|
||
func (h *balanceHotRegionScheduler) adjustBalanceLimit(storeID uint64) { | ||
s := h.scoreStatus[storeID] | ||
var hotRegionTotalCount float64 | ||
for _, m := range h.scoreStatus { | ||
hotRegionTotalCount += float64(m.RegionsStat.Len()) | ||
} | ||
|
||
avgRegionCount := hotRegionTotalCount / float64(len(h.scoreStatus)) | ||
// Multiplied by hotRegionLimitFactor to avoid transfer back and forth | ||
limit := uint64((float64(s.RegionsStat.Len()) - avgRegionCount) * hotRegionLimitFactor) | ||
h.limit = maxUint64(1, limit) | ||
} | ||
|
||
func (h *balanceHotRegionScheduler) GetStatus() map[uint64]*StoreHotRegions { | ||
h.RLock() | ||
defer h.RUnlock() | ||
status := make(map[uint64]*StoreHotRegions) | ||
for id, stat := range h.scoreStatus { | ||
clone := *stat | ||
status[id] = &clone | ||
} | ||
return status | ||
} | ||
|
||
func (h *balanceHotRegionScheduler) selectTransferLeader(sourceRegion *RegionInfo) *metapb.Peer { | ||
followPeers := sourceRegion.GetFollowers() | ||
storeIDs := make([]uint64, 0, len(followPeers)) | ||
for _, peer := range followPeers { | ||
storeIDs = append(storeIDs, peer.GetStoreId()) | ||
} | ||
|
||
targetStoreID := h.selectBestStore(storeIDs, sourceRegion) | ||
var targetPeer *metapb.Peer | ||
for _, peer := range followPeers { | ||
if peer.GetStoreId() == targetStoreID { | ||
targetPeer = peer | ||
} | ||
} | ||
return targetPeer | ||
} | ||
|
||
func (h *balanceHotRegionScheduler) selectTransferPeer(sourceRegion *RegionInfo, cluster *clusterInfo) *metapb.Peer { | ||
var filters []Filter | ||
stores := cluster.getStores() | ||
|
||
filters = append(filters, newExcludedFilter(sourceRegion.GetStoreIds(), sourceRegion.GetStoreIds())) | ||
filters = append(filters, newDistinctScoreFilter(h.opt.GetReplication(), stores, cluster.getLeaderStore(sourceRegion))) | ||
filters = append(filters, newStateFilter(h.opt)) | ||
filters = append(filters, newStorageThresholdFilter(h.opt)) | ||
|
||
storeIDs := make([]uint64, 0, len(stores)) | ||
for _, store := range stores { | ||
if filterTarget(store, filters) { | ||
continue | ||
} | ||
storeIDs = append(storeIDs, store.GetId()) | ||
} | ||
targetStoreID := h.selectBestStore(storeIDs, sourceRegion) | ||
targetStore := cluster.getStore(targetStoreID) | ||
if targetStore == nil { | ||
return nil | ||
} | ||
newPeer, err := cluster.allocPeer(targetStore.GetId()) | ||
if err != nil { | ||
log.Errorf("failed to allocate peer: %v", err) | ||
return nil | ||
} | ||
|
||
return newPeer | ||
} | ||
|
||
// select a store to transfer peer | ||
// preferred to the store that with the least number of regions | ||
// and then choose the least total written bytes store | ||
func (h *balanceHotRegionScheduler) selectBestStore(stores []uint64, sourceRegion *RegionInfo) uint64 { | ||
sr := h.scoreStatus[sourceRegion.Leader.GetStoreId()] | ||
sourceStoreWrittenBytes := sr.TotalWrittenBytes | ||
sourceStoreHotRegionCount := sr.RegionsStat.Len() | ||
|
||
var ( | ||
targetStore uint64 | ||
minWrittenBytes uint64 = math.MaxUint64 | ||
) | ||
minRegionCount := int(math.MaxInt32) | ||
for _, store := range stores { | ||
if s, ok := h.scoreStatus[store]; ok { | ||
if sourceStoreHotRegionCount-s.RegionsStat.Len() > 1 && minRegionCount > s.RegionsStat.Len() { | ||
targetStore = store | ||
minWrittenBytes = s.TotalWrittenBytes | ||
minRegionCount = s.RegionsStat.Len() | ||
continue | ||
} | ||
if minRegionCount == s.RegionsStat.Len() && minWrittenBytes > s.TotalWrittenBytes && | ||
uint64(float64(sourceStoreWrittenBytes)*hotRegionScheduleFactor) > s.TotalWrittenBytes+2*sourceRegion.WrittenBytes { | ||
minWrittenBytes = s.TotalWrittenBytes | ||
targetStore = store | ||
} | ||
|
||
} else { | ||
targetStore = store | ||
break | ||
} | ||
} | ||
return targetStore | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please let pd-ctl support this later.