Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

safepoint: support to set safepoints for services #2348

Merged
merged 19 commits into from
Apr 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ type Client interface {
// If the given safePoint is less than the current one, it will not be updated.
// Returns the new safePoint after updating.
UpdateGCSafePoint(ctx context.Context, safePoint uint64) (uint64, error)

// UpdateServiceGCSafePoint updates the safepoint for specific service and
// returns the minimum safepoint across all services, this value is used to
// determine the safepoint for multiple services, it does not tigger a GC
// job. Use UpdateGCSafePoint to trigger the GC job if needed.
UpdateServiceGCSafePoint(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error)
// ScatterRegion scatters the specified region. Should use it for a batch of regions,
// and the distribution of these regions will be dispersed.
ScatterRegion(ctx context.Context, regionID uint64) error
Expand Down Expand Up @@ -604,6 +610,36 @@ func (c *client) UpdateGCSafePoint(ctx context.Context, safePoint uint64) (uint6
return resp.GetNewSafePoint(), nil
}

// UpdateServiceGCSafePoint updates the safepoint for specific service and
// returns the minimum safepoint across all services, this value is used to
// determine the safepoint for multiple services, it does not tigger a GC
// job. Use UpdateGCSafePoint to trigger the GC job if needed.
func (c *client) UpdateServiceGCSafePoint(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error) {
if span := opentracing.SpanFromContext(ctx); span != nil {
span = opentracing.StartSpan("pdclient.UpdateServiceGCSafePoint", opentracing.ChildOf(span.Context()))
defer span.Finish()
}

start := time.Now()
defer func() { cmdDurationUpdateServiceGCSafePoint.Observe(time.Since(start).Seconds()) }()

ctx, cancel := context.WithTimeout(ctx, pdTimeout)
resp, err := c.leaderClient().UpdateServiceGCSafePoint(ctx, &pdpb.UpdateServiceGCSafePointRequest{
Header: c.requestHeader(),
ServiceId: []byte(serviceID),
TTL: ttl,
SafePoint: safePoint,
})
cancel()

if err != nil {
cmdFailedDurationUpdateServiceGCSafePoint.Observe(time.Since(start).Seconds())
c.ScheduleCheckLeader()
return 0, errors.WithStack(err)
}
return resp.GetMinSafePoint(), nil
}

func (c *client) ScatterRegion(ctx context.Context, regionID uint64) error {
if span := opentracing.SpanFromContext(ctx); span != nil {
span = opentracing.StartSpan("pdclient.ScatterRegion", opentracing.ChildOf(span.Context()))
Expand Down
44 changes: 23 additions & 21 deletions client/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,28 +73,30 @@ var (

var (
// WithLabelValues is a heavy operation, define variable to avoid call it every time.
cmdDurationWait = cmdDuration.WithLabelValues("wait")
cmdDurationTSO = cmdDuration.WithLabelValues("tso")
cmdDurationTSOAsyncWait = cmdDuration.WithLabelValues("tso_async_wait")
cmdDurationGetRegion = cmdDuration.WithLabelValues("get_region")
cmdDurationGetPrevRegion = cmdDuration.WithLabelValues("get_prev_region")
cmdDurationGetRegionByID = cmdDuration.WithLabelValues("get_region_byid")
cmdDurationScanRegions = cmdDuration.WithLabelValues("scan_regions")
cmdDurationGetStore = cmdDuration.WithLabelValues("get_store")
cmdDurationGetAllStores = cmdDuration.WithLabelValues("get_all_stores")
cmdDurationUpdateGCSafePoint = cmdDuration.WithLabelValues("update_gc_safe_point")
cmdDurationScatterRegion = cmdDuration.WithLabelValues("scatter_region")
cmdDurationGetOperator = cmdDuration.WithLabelValues("get_operator")
cmdDurationWait = cmdDuration.WithLabelValues("wait")
cmdDurationTSO = cmdDuration.WithLabelValues("tso")
cmdDurationTSOAsyncWait = cmdDuration.WithLabelValues("tso_async_wait")
cmdDurationGetRegion = cmdDuration.WithLabelValues("get_region")
cmdDurationGetPrevRegion = cmdDuration.WithLabelValues("get_prev_region")
cmdDurationGetRegionByID = cmdDuration.WithLabelValues("get_region_byid")
cmdDurationScanRegions = cmdDuration.WithLabelValues("scan_regions")
cmdDurationGetStore = cmdDuration.WithLabelValues("get_store")
cmdDurationGetAllStores = cmdDuration.WithLabelValues("get_all_stores")
cmdDurationUpdateGCSafePoint = cmdDuration.WithLabelValues("update_gc_safe_point")
cmdDurationUpdateServiceGCSafePoint = cmdDuration.WithLabelValues("update_service_gc_safe_point")
cmdDurationScatterRegion = cmdDuration.WithLabelValues("scatter_region")
cmdDurationGetOperator = cmdDuration.WithLabelValues("get_operator")

cmdFailDurationGetRegion = cmdFailedDuration.WithLabelValues("get_region")
cmdFailDurationTSO = cmdFailedDuration.WithLabelValues("tso")
cmdFailDurationGetPrevRegion = cmdFailedDuration.WithLabelValues("get_prev_region")
cmdFailedDurationGetRegionByID = cmdFailedDuration.WithLabelValues("get_region_byid")
cmdFailedDurationScanRegions = cmdFailedDuration.WithLabelValues("scan_regions")
cmdFailedDurationGetStore = cmdFailedDuration.WithLabelValues("get_store")
cmdFailedDurationGetAllStores = cmdFailedDuration.WithLabelValues("get_all_stores")
cmdFailedDurationUpdateGCSafePoint = cmdFailedDuration.WithLabelValues("update_gc_safe_point")
requestDurationTSO = requestDuration.WithLabelValues("tso")
cmdFailDurationGetRegion = cmdFailedDuration.WithLabelValues("get_region")
cmdFailDurationTSO = cmdFailedDuration.WithLabelValues("tso")
cmdFailDurationGetPrevRegion = cmdFailedDuration.WithLabelValues("get_prev_region")
cmdFailedDurationGetRegionByID = cmdFailedDuration.WithLabelValues("get_region_byid")
cmdFailedDurationScanRegions = cmdFailedDuration.WithLabelValues("scan_regions")
cmdFailedDurationGetStore = cmdFailedDuration.WithLabelValues("get_store")
cmdFailedDurationGetAllStores = cmdFailedDuration.WithLabelValues("get_all_stores")
cmdFailedDurationUpdateGCSafePoint = cmdFailedDuration.WithLabelValues("update_gc_safe_point")
cmdFailedDurationUpdateServiceGCSafePoint = cmdFailedDuration.WithLabelValues("update_service_gc_safe_point")
requestDurationTSO = requestDuration.WithLabelValues("tso")

// config
configCmdDurationCreate = configCmdDuration.WithLabelValues("create")
Expand Down
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ require (
github.com/gorilla/mux v1.7.3
github.com/gorilla/websocket v1.2.0 // indirect
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0
github.com/grpc-ecosystem/grpc-gateway v1.14.3 // indirect
github.com/json-iterator/go v1.1.9 // indirect
github.com/juju/ratelimit v1.0.1
github.com/kevinburke/go-bindata v3.18.0+incompatible
Expand All @@ -42,7 +43,7 @@ require (
github.com/pingcap/check v0.0.0-20191216031241-8a5a85928f12
github.com/pingcap/errcode v0.0.0-20180921232412-a1a7271709d9
github.com/pingcap/failpoint v0.0.0-20191029060244-12f4ac2fd11d
github.com/pingcap/kvproto v0.0.0-20200410092417-d20cd6ac1321
github.com/pingcap/kvproto v0.0.0-20200417092353-efbe03bcffbd
github.com/pingcap/log v0.0.0-20200117041106-d28c14d3b1cd
github.com/pingcap/sysutil v0.0.0-20200408114249-ed3bd6f7fdb1
github.com/pkg/errors v0.9.1
Expand Down
7 changes: 4 additions & 3 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,9 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92Bcuy
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
github.com/grpc-ecosystem/grpc-gateway v1.9.5 h1:UImYN5qQ8tuGpGE16ZmjvcTtTw24zw1QAp/SlnNrZhI=
github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY=
github.com/grpc-ecosystem/grpc-gateway v1.12.1 h1:zCy2xE9ablevUOrUZc3Dl72Dt+ya2FNAvC2yLYMHzi4=
github.com/grpc-ecosystem/grpc-gateway v1.12.1/go.mod h1:8XEsbTttt/W+VvjtQhLACqCisSPWTxCZ7sBRjU6iH9c=
github.com/grpc-ecosystem/grpc-gateway v1.14.3 h1:OCJlWkOUoTnl0neNGlf4fUm3TmbEtguw7vR+nGtnDjY=
github.com/grpc-ecosystem/grpc-gateway v1.14.3/go.mod h1:6CwZWGDSPRJidgKAtJVvND6soZe6fT7iteq8wDPdhb0=
github.com/gtank/cryptopasta v0.0.0-20170601214702-1f550f6f2f69 h1:7xsUJsB2NrdcttQPa7JLEaGzvdbk7KvfrjgHZXOQRo0=
github.com/gtank/cryptopasta v0.0.0-20170601214702-1f550f6f2f69/go.mod h1:YLEMZOtU+AZ7dhN9T/IpGhXVGly2bvkJQ+zxj3WeVQo=
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
Expand Down Expand Up @@ -290,8 +291,8 @@ github.com/pingcap/failpoint v0.0.0-20191029060244-12f4ac2fd11d h1:F8vp38kTAckN+
github.com/pingcap/failpoint v0.0.0-20191029060244-12f4ac2fd11d/go.mod h1:DNS3Qg7bEDhU6EXNHF+XSv/PGznQaMJ5FWvctpm6pQI=
github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w=
github.com/pingcap/kvproto v0.0.0-20200214064158-62d31900d88e/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI=
github.com/pingcap/kvproto v0.0.0-20200410092417-d20cd6ac1321 h1:OeDTuueTyWXo8wTy9W0wxDs3spJnpe+hu+F26R/dtro=
github.com/pingcap/kvproto v0.0.0-20200410092417-d20cd6ac1321/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI=
github.com/pingcap/kvproto v0.0.0-20200417092353-efbe03bcffbd h1:BuTFEyCEm71vUU/3qmndWNWfySS0Jwek1iZ1rQ/4Jqc=
github.com/pingcap/kvproto v0.0.0-20200417092353-efbe03bcffbd/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI=
github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9 h1:AJD9pZYm72vMgPcQDww9rkZ1DnWfl0pXV3BOWlkYIjA=
github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8=
github.com/pingcap/log v0.0.0-20200117041106-d28c14d3b1cd h1:CV3VsP3Z02MVtdpTMfEgRJ4T9NGgGTxdHpJerent7rM=
Expand Down
57 changes: 57 additions & 0 deletions server/core/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"strings"
"sync"
"sync/atomic"
"time"

"github.com/BurntSushi/toml"
"github.com/gogo/protobuf/proto"
Expand Down Expand Up @@ -385,6 +386,62 @@ func (s *Storage) LoadGCSafePoint() (uint64, error) {
return safePoint, nil
}

// ServiceSafePoint is the safepoint for a specific service
type ServiceSafePoint struct {
ServiceID string
ExpiredAt int64
SafePoint uint64
}

// SaveServiceGCSafePoint saves a GC safepoint for the service
func (s *Storage) SaveServiceGCSafePoint(ssp *ServiceSafePoint) error {
key := path.Join(gcPath, "safe_point", "service", ssp.ServiceID)
value, err := json.Marshal(ssp)
if err != nil {
return err
}

return s.Save(key, string(value))
}

// RemoveServiceGCSafePoint removes a GC safepoint for the service
func (s *Storage) RemoveServiceGCSafePoint(serviceID string) error {
key := path.Join(gcPath, "safe_point", "service", serviceID)
return s.Remove(key)
}

// LoadMinServiceGCSafePoint returns the minimum safepoint across all services
func (s *Storage) LoadMinServiceGCSafePoint() (*ServiceSafePoint, error) {
prefix := path.Join(gcPath, "safe_point", "service")
// the next of 'e' is 'f'
prefixEnd := path.Join(gcPath, "safe_point", "servicf")
keys, values, err := s.LoadRange(prefix, prefixEnd, 0)
if err != nil {
return nil, err
}
if len(keys) == 0 {
return &ServiceSafePoint{}, nil
}

min := &ServiceSafePoint{SafePoint: math.MaxUint64}
now := time.Now().Unix()
for i, key := range keys {
ssp := &ServiceSafePoint{}
if err := json.Unmarshal([]byte(values[i]), ssp); err != nil {
return nil, err
}
if ssp.ExpiredAt < now {
s.Remove(key)
shafreeck marked this conversation as resolved.
Show resolved Hide resolved
continue
}
if ssp.SafePoint < min.SafePoint {
min = ssp
}
}

return min, nil
}

// LoadAllScheduleConfig loads all schedulers' config.
func (s *Storage) LoadAllScheduleConfig() ([]string, []string, error) {
keys, values, err := s.LoadRange(customScheduleConfigPath, clientv3.GetPrefixRangeEnd(customScheduleConfigPath), 1000)
Expand Down
57 changes: 57 additions & 0 deletions server/core/storage_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,12 @@
package core

import (
"encoding/json"
"fmt"
"math"
"path"
"strings"
"time"

. "github.com/pingcap/check"
"github.com/pingcap/kvproto/pkg/metapb"
Expand Down Expand Up @@ -194,6 +198,59 @@ func (s *testKVSuite) TestLoadGCSafePoint(c *C) {
}
}

func (s *testKVSuite) TestSaveServiceGCSafePoint(c *C) {
mem := kv.NewMemoryKV()
storage := NewStorage(mem)
expireAt := time.Now().Add(100 * time.Second).Unix()
serviceSafePoints := []*ServiceSafePoint{
{"1", expireAt, 1},
{"2", expireAt, 2},
{"3", expireAt, 3},
}

for _, ssp := range serviceSafePoints {
c.Assert(storage.SaveServiceGCSafePoint(ssp), IsNil)
}

prefix := path.Join(gcPath, "safe_point", "service")
prefixEnd := path.Join(gcPath, "safe_point", "servicf")
keys, values, err := mem.LoadRange(prefix, prefixEnd, len(serviceSafePoints))
c.Assert(err, IsNil)
c.Assert(len(keys), Equals, 3)
c.Assert(len(values), Equals, 3)

ssp := &ServiceSafePoint{}
for i, key := range keys {
c.Assert(strings.HasSuffix(key, serviceSafePoints[i].ServiceID), IsTrue)

c.Assert(json.Unmarshal([]byte(values[i]), ssp), IsNil)
c.Assert(ssp.ServiceID, Equals, serviceSafePoints[i].ServiceID)
c.Assert(ssp.ExpiredAt, Equals, serviceSafePoints[i].ExpiredAt)
c.Assert(ssp.SafePoint, Equals, serviceSafePoints[i].SafePoint)
}
}

func (s *testKVSuite) TestLoadMinServiceGCSafePoint(c *C) {
mem := kv.NewMemoryKV()
storage := NewStorage(mem)
expireAt := time.Now().Add(1000 * time.Second).Unix()
serviceSafePoints := []*ServiceSafePoint{
{"1", 0, 1},
{"2", expireAt, 2},
{"3", expireAt, 3},
}

for _, ssp := range serviceSafePoints {
c.Assert(storage.SaveServiceGCSafePoint(ssp), IsNil)
}

ssp, err := storage.LoadMinServiceGCSafePoint()
c.Assert(err, IsNil)
c.Assert(ssp.ServiceID, Equals, "2")
c.Assert(ssp.ExpiredAt, Equals, expireAt)
c.Assert(ssp.SafePoint, Equals, uint64(2))
}

type KVWithMaxRangeLimit struct {
kv.Base
rangeLimit int
Expand Down
58 changes: 58 additions & 0 deletions server/grpc_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,64 @@ func (s *Server) UpdateGCSafePoint(ctx context.Context, request *pdpb.UpdateGCSa
}, nil
}

// UpdateServiceGCSafePoint update the safepoint for specific service
func (s *Server) UpdateServiceGCSafePoint(ctx context.Context, request *pdpb.UpdateServiceGCSafePointRequest) (*pdpb.UpdateServiceGCSafePointResponse, error) {
s.serviceSafePointLock.Lock()
defer s.serviceSafePointLock.Unlock()

if err := s.validateRequest(request.GetHeader()); err != nil {
return nil, err
}

rc := s.GetRaftCluster()
if rc == nil {
return &pdpb.UpdateServiceGCSafePointResponse{Header: s.notBootstrappedHeader()}, nil
}
if request.TTL <= 0 {
if err := s.storage.RemoveServiceGCSafePoint(string(request.ServiceId)); err != nil {
return nil, err
}
}

min, err := s.storage.LoadMinServiceGCSafePoint()
if err != nil {
return nil, err
}

if request.TTL > 0 && request.SafePoint > min.SafePoint {
ssp := &core.ServiceSafePoint{
ServiceID: string(request.ServiceId),
ExpiredAt: time.Now().Unix() + request.TTL,
SafePoint: request.SafePoint,
}
if err := s.storage.SaveServiceGCSafePoint(ssp); err != nil {
return nil, err
}
log.Info("update service GC safe point",
zap.String("service-id", string(ssp.ServiceID)),
zap.Int64("expire-at", ssp.ExpiredAt),
zap.Uint64("safepoint", ssp.SafePoint))
// If the min safepoint is updated, load the next one
if string(request.ServiceId) == min.ServiceID {
min, err = s.storage.LoadMinServiceGCSafePoint()
if err != nil {
return nil, err
}
}
// If ssp is the first safepoint, it is the min value now
if min.SafePoint == 0 {
min = ssp
}
}

return &pdpb.UpdateServiceGCSafePointResponse{
Header: s.header(),
ServiceId: []byte(min.ServiceID),
TTL: min.ExpiredAt - time.Now().Unix(),
MinSafePoint: min.SafePoint,
}, nil
}

// GetOperator gets information about the operator belonging to the speicfy region.
func (s *Server) GetOperator(ctx context.Context, request *pdpb.GetOperatorRequest) (*pdpb.GetOperatorResponse, error) {
if err := s.validateRequest(request.GetHeader()); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion server/kv/levedb_kv.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (kv *LeveldbKV) LoadRange(startKey, endKey string, limit int) ([]string, []
values := make([]string, 0, limit)
count := 0
for iter.Next() {
if count >= limit {
if limit > 0 && count >= limit {
break
}
keys = append(keys, string(iter.Key()))
Expand Down
5 changes: 4 additions & 1 deletion server/kv/mem_kv.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ func (kv *memoryKV) LoadRange(key, endKey string, limit int) ([]string, []string
kv.tree.AscendRange(memoryKVItem{key, ""}, memoryKVItem{endKey, ""}, func(item btree.Item) bool {
keys = append(keys, item.(memoryKVItem).key)
values = append(values, item.(memoryKVItem).value)
return len(keys) < limit
if limit > 0 {
return len(keys) < limit
}
return true
})
return keys, values, nil
}
Expand Down
3 changes: 3 additions & 0 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ type Server struct {
// Add callback functions at different stages
startCallbacks []func()
closeCallbacks []func()

// serviceSafePointLock is a lock for UpdateServiceGCSafePoint
serviceSafePointLock sync.Mutex
}

// HandlerBuilder builds a server HTTP handler.
Expand Down
Loading