diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml index c0bdb7ea492..e2bf99c026f 100644 --- a/.github/workflows/check.yaml +++ b/.github/workflows/check.yaml @@ -6,7 +6,7 @@ concurrency: jobs: statics: runs-on: ubuntu-latest - timeout-minutes: 10 + timeout-minutes: 20 steps: - uses: actions/setup-go@v3 with: diff --git a/.github/workflows/pd-tests.yaml b/.github/workflows/pd-tests.yaml index 73e31fd4ad1..517a2c480e7 100644 --- a/.github/workflows/pd-tests.yaml +++ b/.github/workflows/pd-tests.yaml @@ -45,7 +45,7 @@ jobs: env: WORKER_ID: ${{ matrix.worker_id }} WORKER_COUNT: 13 - JOB_COUNT: 10 # 11, 12 13 are for other integrations jobs + JOB_COUNT: 10 # 11, 12, 13 are for other integrations jobs run: | make ci-test-job JOB_COUNT=$(($JOB_COUNT)) JOB_INDEX=$WORKER_ID mv covprofile covprofile_$WORKER_ID diff --git a/Dockerfile b/Dockerfile index 550b1c1bb72..d3f0025023b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,7 +25,9 @@ RUN GO111MODULE=on go mod download COPY . . -RUN make +# Workaround sqlite3 and alpine 3.19 incompatibility +# https://github.com/mattn/go-sqlite3/issues/1164 +RUN CGO_CFLAGS="-D_LARGEFILE64_SOURCE" make FROM alpine:3.17 diff --git a/Makefile b/Makefile index 54ad331aea4..946493cd7ce 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,8 @@ dev-basic: build check basic-test BUILD_FLAGS ?= BUILD_TAGS ?= BUILD_CGO_ENABLED := 0 +BUILD_TOOL_CGO_ENABLED := 0 +BUILD_GOEXPERIMENT ?= PD_EDITION ?= Community # Ensure PD_EDITION is set to Community or Enterprise before running build process. ifneq "$(PD_EDITION)" "Community" @@ -46,6 +48,13 @@ ifeq ($(PLUGIN), 1) BUILD_TAGS += with_plugin endif +ifeq ($(ENABLE_FIPS), 1) + BUILD_TAGS+=boringcrypto + BUILD_GOEXPERIMENT=boringcrypto + BUILD_CGO_ENABLED := 1 + BUILD_TOOL_CGO_ENABLED := 1 +endif + LDFLAGS += -X "$(PD_PKG)/pkg/versioninfo.PDReleaseVersion=$(shell git describe --tags --dirty --always)" LDFLAGS += -X "$(PD_PKG)/pkg/versioninfo.PDBuildTS=$(shell date -u '+%Y-%m-%d %I:%M:%S')" LDFLAGS += -X "$(PD_PKG)/pkg/versioninfo.PDGitHash=$(shell git rev-parse HEAD)" @@ -79,7 +88,7 @@ endif PD_SERVER_DEP += dashboard-ui pd-server: ${PD_SERVER_DEP} - CGO_ENABLED=$(BUILD_CGO_ENABLED) go build $(BUILD_FLAGS) -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -tags "$(BUILD_TAGS)" -o $(BUILD_BIN_PATH)/pd-server cmd/pd-server/main.go + GOEXPERIMENT=$(BUILD_GOEXPERIMENT) CGO_ENABLED=$(BUILD_CGO_ENABLED) go build $(BUILD_FLAGS) -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -tags "$(BUILD_TAGS)" -o $(BUILD_BIN_PATH)/pd-server cmd/pd-server/main.go pd-server-failpoint: @$(FAILPOINT_ENABLE) @@ -94,13 +103,13 @@ pd-server-basic: # Tools pd-ctl: - CGO_ENABLED=0 go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/pd-ctl tools/pd-ctl/main.go + GOEXPERIMENT=$(BUILD_GOEXPERIMENT) CGO_ENABLED=$(BUILD_TOOL_CGO_ENABLED) go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/pd-ctl tools/pd-ctl/main.go pd-tso-bench: cd tools/pd-tso-bench && CGO_ENABLED=0 go build -o $(BUILD_BIN_PATH)/pd-tso-bench main.go pd-api-bench: cd tools/pd-api-bench && CGO_ENABLED=0 go build -o $(BUILD_BIN_PATH)/pd-api-bench main.go pd-recover: - CGO_ENABLED=0 go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/pd-recover tools/pd-recover/main.go + GOEXPERIMENT=$(BUILD_GOEXPERIMENT) CGO_ENABLED=$(BUILD_TOOL_CGO_ENABLED) go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/pd-recover tools/pd-recover/main.go pd-analysis: CGO_ENABLED=0 go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/pd-analysis tools/pd-analysis/main.go pd-heartbeat-bench: @@ -235,6 +244,7 @@ ci-test-job: install-tools dashboard-ui else \ for mod in $(shell ./scripts/ci-subtask.sh $(JOB_COUNT) $(JOB_INDEX)); do cd $$mod && $(MAKE) ci-test-job && cd $(ROOT_PATH) > /dev/null && cat $$mod/covprofile >> covprofile; done; \ fi + @$(FAILPOINT_DISABLE) TSO_INTEGRATION_TEST_PKGS := $(PD_PKG)/tests/server/tso diff --git a/client/client.go b/client/client.go index 067872d2d39..7053ed2be96 100644 --- a/client/client.go +++ b/client/client.go @@ -74,7 +74,7 @@ type GlobalConfigItem struct { PayLoad []byte } -// Client is a PD (Placement Driver) client. +// Client is a PD (Placement Driver) RPC client. // It should not be used after calling Close(). type Client interface { // GetClusterID gets the cluster ID from PD. @@ -91,7 +91,7 @@ type Client interface { // client should retry later. GetRegion(ctx context.Context, key []byte, opts ...GetRegionOption) (*Region, error) // GetRegionFromMember gets a region from certain members. - GetRegionFromMember(ctx context.Context, key []byte, memberURLs []string) (*Region, error) + GetRegionFromMember(ctx context.Context, key []byte, memberURLs []string, opts ...GetRegionOption) (*Region, error) // GetPrevRegion gets the previous region and its leader Peer of the region where the key is located. GetPrevRegion(ctx context.Context, key []byte, opts ...GetRegionOption) (*Region, error) // GetRegionByID gets a region and its leader Peer from PD by id. @@ -100,7 +100,7 @@ type Client interface { // Limit limits the maximum number of regions returned. // If a region has no leader, corresponding leader will be placed by a peer // with empty value (PeerID is 0). - ScanRegions(ctx context.Context, key, endKey []byte, limit int) ([]*Region, error) + ScanRegions(ctx context.Context, key, endKey []byte, limit int, opts ...GetRegionOption) ([]*Region, error) // GetStore gets a store from PD by store id. // The store may expire later. Caller is responsible for caching and taking care // of store change. @@ -136,7 +136,7 @@ type Client interface { LoadGlobalConfig(ctx context.Context, names []string, configPath string) ([]GlobalConfigItem, int64, error) // StoreGlobalConfig set the config from etcd StoreGlobalConfig(ctx context.Context, configPath string, items []GlobalConfigItem) error - // WatchGlobalConfig returns an stream with all global config and updates + // WatchGlobalConfig returns a stream with all global config and updates WatchGlobalConfig(ctx context.Context, configPath string, revision int64) (chan []GlobalConfigItem, error) // UpdateOption updates the client option. UpdateOption(option DynamicOption, value interface{}) error @@ -200,7 +200,8 @@ func WithSkipStoreLimit() RegionsOption { // GetRegionOp represents available options when getting regions. type GetRegionOp struct { - needBuckets bool + needBuckets bool + allowFollowerHandle bool } // GetRegionOption configures GetRegionOp. @@ -211,6 +212,11 @@ func WithBuckets() GetRegionOption { return func(op *GetRegionOp) { op.needBuckets = true } } +// WithAllowFollowerHandle means that client can send request to follower and let it handle this request. +func WithAllowFollowerHandle() GetRegionOption { + return func(op *GetRegionOp) { op.allowFollowerHandle = true } +} + // LeaderHealthCheckInterval might be changed in the unit to shorten the testing time. var LeaderHealthCheckInterval = time.Second @@ -701,6 +707,12 @@ func (c *client) UpdateOption(option DynamicOption, value interface{}) error { return errors.New("[pd] invalid value type for EnableTSOFollowerProxy option, it should be bool") } c.option.setEnableTSOFollowerProxy(enable) + case EnableFollowerHandle: + enable, ok := value.(bool) + if !ok { + return errors.New("[pd] invalid value type for EnableFollowerHandle option, it should be bool") + } + c.option.setEnableFollowerHandle(enable) default: return errors.New("[pd] unsupported client option") } @@ -732,16 +744,18 @@ func (c *client) checkLeaderHealth(ctx context.Context) { if client := c.pdSvcDiscovery.GetServingEndpointClientConn(); client != nil { healthCli := healthpb.NewHealthClient(client) resp, err := healthCli.Check(ctx, &healthpb.HealthCheckRequest{Service: ""}) - rpcErr, ok := status.FromError(err) failpoint.Inject("unreachableNetwork1", func() { resp = nil err = status.New(codes.Unavailable, "unavailable").Err() }) + rpcErr, ok := status.FromError(err) if (ok && isNetworkError(rpcErr.Code())) || resp.GetStatus() != healthpb.HealthCheckResponse_SERVING { atomic.StoreInt32(&(c.leaderNetworkFailure), int32(1)) } else { atomic.StoreInt32(&(c.leaderNetworkFailure), int32(0)) } + } else { + atomic.StoreInt32(&(c.leaderNetworkFailure), int32(1)) } } @@ -751,8 +765,7 @@ func (c *client) GetAllMembers(ctx context.Context) ([]*pdpb.Member, error) { ctx, cancel := context.WithTimeout(ctx, c.option.timeout) req := &pdpb.GetMembersRequest{Header: c.requestHeader()} - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { cancel() return nil, errs.ErrClientGetProtoClient @@ -800,15 +813,15 @@ func (c *client) backupClientConn() (*grpc.ClientConn, string) { return nil, "" } -func (c *client) getClient() pdpb.PDClient { +func (c *client) getClientAndContext(ctx context.Context) (pdpb.PDClient, context.Context) { if c.option.enableForwarding && atomic.LoadInt32(&c.leaderNetworkFailure) == 1 { backupClientConn, addr := c.backupClientConn() if backupClientConn != nil { log.Debug("[pd] use follower client", zap.String("addr", addr)) - return pdpb.NewPDClient(backupClientConn) + return pdpb.NewPDClient(backupClientConn), grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) } } - return c.leaderClient() + return c.leaderClient(), ctx } func (c *client) GetTSAsync(ctx context.Context) TSFuture { @@ -868,16 +881,18 @@ func (c *client) GetMinTS(ctx context.Context) (physical int64, logical int64, e default: return 0, 0, errs.ErrClientGetMinTSO.FastGenByArgs("undefined service mode") } - + ctx, cancel := context.WithTimeout(ctx, c.option.timeout) // Call GetMinTS API to get the minimal TS from the API leader. - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { + cancel() return 0, 0, errs.ErrClientGetProtoClient } resp, err := protoClient.GetMinTS(ctx, &pdpb.GetMinTSRequest{ Header: c.requestHeader(), }) + cancel() if err != nil { if strings.Contains(err.Error(), "Unimplemented") { // If the method is not supported, we fallback to GetTS. @@ -915,44 +930,11 @@ func handleRegionResponse(res *pdpb.GetRegionResponse) *Region { return r } -func (c *client) GetRegion(ctx context.Context, key []byte, opts ...GetRegionOption) (*Region, error) { - if span := opentracing.SpanFromContext(ctx); span != nil { - span = opentracing.StartSpan("pdclient.GetRegion", opentracing.ChildOf(span.Context())) - defer span.Finish() - } - start := time.Now() - defer func() { cmdDurationGetRegion.Observe(time.Since(start).Seconds()) }() - ctx, cancel := context.WithTimeout(ctx, c.option.timeout) - - options := &GetRegionOp{} - for _, opt := range opts { - opt(options) - } - req := &pdpb.GetRegionRequest{ - Header: c.requestHeader(), - RegionKey: key, - NeedBuckets: options.needBuckets, - } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() - if protoClient == nil { - cancel() - return nil, errs.ErrClientGetProtoClient - } - resp, err := protoClient.GetRegion(ctx, req) - cancel() - - if err = c.respForErr(cmdFailDurationGetRegion, start, err, resp.GetHeader()); err != nil { - return nil, err - } - return handleRegionResponse(resp), nil -} - func isNetworkError(code codes.Code) bool { return code == codes.Unavailable || code == codes.DeadlineExceeded } -func (c *client) GetRegionFromMember(ctx context.Context, key []byte, memberURLs []string) (*Region, error) { +func (c *client) GetRegionFromMember(ctx context.Context, key []byte, memberURLs []string, opts ...GetRegionOption) (*Region, error) { if span := opentracing.SpanFromContext(ctx); span != nil { span = opentracing.StartSpan("pdclient.GetRegionFromMember", opentracing.ChildOf(span.Context())) defer span.Finish() @@ -990,6 +972,38 @@ func (c *client) GetRegionFromMember(ctx context.Context, key []byte, memberURLs return handleRegionResponse(resp), nil } +func (c *client) GetRegion(ctx context.Context, key []byte, opts ...GetRegionOption) (*Region, error) { + if span := opentracing.SpanFromContext(ctx); span != nil { + span = opentracing.StartSpan("pdclient.GetRegion", opentracing.ChildOf(span.Context())) + defer span.Finish() + } + start := time.Now() + defer func() { cmdDurationGetRegion.Observe(time.Since(start).Seconds()) }() + ctx, cancel := context.WithTimeout(ctx, c.option.timeout) + + options := &GetRegionOp{} + for _, opt := range opts { + opt(options) + } + req := &pdpb.GetRegionRequest{ + Header: c.requestHeader(), + RegionKey: key, + NeedBuckets: options.needBuckets, + } + protoClient, ctx := c.getClientAndContext(ctx) + if protoClient == nil { + cancel() + return nil, errs.ErrClientGetProtoClient + } + resp, err := protoClient.GetRegion(ctx, req) + cancel() + + if err = c.respForErr(cmdFailDurationGetRegion, start, err, resp.GetHeader()); err != nil { + return nil, err + } + return handleRegionResponse(resp), nil +} + func (c *client) GetPrevRegion(ctx context.Context, key []byte, opts ...GetRegionOption) (*Region, error) { if span := opentracing.SpanFromContext(ctx); span != nil { span = opentracing.StartSpan("pdclient.GetPrevRegion", opentracing.ChildOf(span.Context())) @@ -1008,8 +1022,7 @@ func (c *client) GetPrevRegion(ctx context.Context, key []byte, opts ...GetRegio RegionKey: key, NeedBuckets: options.needBuckets, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { cancel() return nil, errs.ErrClientGetProtoClient @@ -1041,8 +1054,7 @@ func (c *client) GetRegionByID(ctx context.Context, regionID uint64, opts ...Get RegionId: regionID, NeedBuckets: options.needBuckets, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { cancel() return nil, errs.ErrClientGetProtoClient @@ -1056,13 +1068,13 @@ func (c *client) GetRegionByID(ctx context.Context, regionID uint64, opts ...Get return handleRegionResponse(resp), nil } -func (c *client) ScanRegions(ctx context.Context, key, endKey []byte, limit int) ([]*Region, error) { +func (c *client) ScanRegions(ctx context.Context, key, endKey []byte, limit int, opts ...GetRegionOption) ([]*Region, error) { if span := opentracing.SpanFromContext(ctx); span != nil { span = opentracing.StartSpan("pdclient.ScanRegions", opentracing.ChildOf(span.Context())) defer span.Finish() } start := time.Now() - defer cmdDurationScanRegions.Observe(time.Since(start).Seconds()) + defer func() { cmdDurationScanRegions.Observe(time.Since(start).Seconds()) }() var cancel context.CancelFunc scanCtx := ctx @@ -1076,8 +1088,7 @@ func (c *client) ScanRegions(ctx context.Context, key, endKey []byte, limit int) EndKey: endKey, Limit: int32(limit), } - scanCtx = grpcutil.BuildForwardContext(scanCtx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, scanCtx := c.getClientAndContext(scanCtx) if protoClient == nil { cancel() return nil, errs.ErrClientGetProtoClient @@ -1132,8 +1143,7 @@ func (c *client) GetStore(ctx context.Context, storeID uint64) (*metapb.Store, e Header: c.requestHeader(), StoreId: storeID, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { cancel() return nil, errs.ErrClientGetProtoClient @@ -1177,8 +1187,7 @@ func (c *client) GetAllStores(ctx context.Context, opts ...GetStoreOption) ([]*m Header: c.requestHeader(), ExcludeTombstoneStores: options.excludeTombstone, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { cancel() return nil, errs.ErrClientGetProtoClient @@ -1205,8 +1214,7 @@ func (c *client) UpdateGCSafePoint(ctx context.Context, safePoint uint64) (uint6 Header: c.requestHeader(), SafePoint: safePoint, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { cancel() return 0, errs.ErrClientGetProtoClient @@ -1240,8 +1248,7 @@ func (c *client) UpdateServiceGCSafePoint(ctx context.Context, serviceID string, TTL: ttl, SafePoint: safePoint, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { cancel() return 0, errs.ErrClientGetProtoClient @@ -1273,8 +1280,7 @@ func (c *client) scatterRegionsWithGroup(ctx context.Context, regionID uint64, g RegionId: regionID, Group: group, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { cancel() return errs.ErrClientGetProtoClient @@ -1318,8 +1324,7 @@ func (c *client) SplitAndScatterRegions(ctx context.Context, splitKeys [][]byte, RetryLimit: options.retryLimit, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { cancel() return nil, errs.ErrClientGetProtoClient @@ -1341,8 +1346,7 @@ func (c *client) GetOperator(ctx context.Context, regionID uint64) (*pdpb.GetOpe Header: c.requestHeader(), RegionId: regionID, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { cancel() return nil, errs.ErrClientGetProtoClient @@ -1369,8 +1373,7 @@ func (c *client) SplitRegions(ctx context.Context, splitKeys [][]byte, opts ...R SplitKeys: splitKeys, RetryLimit: options.retryLimit, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { cancel() return nil, errs.ErrClientGetProtoClient @@ -1400,8 +1403,7 @@ func (c *client) scatterRegionsWithOptions(ctx context.Context, regionsID []uint SkipStoreLimit: options.skipStoreLimit, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { cancel() return nil, errs.ErrClientGetProtoClient @@ -1451,8 +1453,7 @@ func trimHTTPPrefix(str string) string { func (c *client) LoadGlobalConfig(ctx context.Context, names []string, configPath string) ([]GlobalConfigItem, int64, error) { ctx, cancel := context.WithTimeout(ctx, c.option.timeout) defer cancel() - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { return nil, 0, errs.ErrClientGetProtoClient } @@ -1483,8 +1484,7 @@ func (c *client) StoreGlobalConfig(ctx context.Context, configPath string, items } ctx, cancel := context.WithTimeout(ctx, c.option.timeout) defer cancel() - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { return errs.ErrClientGetProtoClient } @@ -1501,8 +1501,7 @@ func (c *client) WatchGlobalConfig(ctx context.Context, configPath string, revis globalConfigWatcherCh := make(chan []GlobalConfigItem, 16) ctx, cancel := context.WithTimeout(ctx, c.option.timeout) defer cancel() - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { return nil, errs.ErrClientGetProtoClient } @@ -1550,8 +1549,7 @@ func (c *client) WatchGlobalConfig(ctx context.Context, configPath string, revis func (c *client) GetExternalTimestamp(ctx context.Context) (uint64, error) { ctx, cancel := context.WithTimeout(ctx, c.option.timeout) defer cancel() - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { return 0, errs.ErrClientGetProtoClient } @@ -1571,8 +1569,7 @@ func (c *client) GetExternalTimestamp(ctx context.Context) (uint64, error) { func (c *client) SetExternalTimestamp(ctx context.Context, timestamp uint64) error { ctx, cancel := context.WithTimeout(ctx, c.option.timeout) defer cancel() - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { return errs.ErrClientGetProtoClient } diff --git a/client/gc_client.go b/client/gc_client.go index b5d64e25129..fff292405c2 100644 --- a/client/gc_client.go +++ b/client/gc_client.go @@ -22,7 +22,6 @@ import ( "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" "github.com/tikv/pd/client/errs" - "github.com/tikv/pd/client/grpcutil" "go.uber.org/zap" ) @@ -48,8 +47,7 @@ func (c *client) UpdateGCSafePointV2(ctx context.Context, keyspaceID uint32, saf KeyspaceId: keyspaceID, SafePoint: safePoint, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { cancel() return 0, errs.ErrClientGetProtoClient @@ -80,8 +78,7 @@ func (c *client) UpdateServiceSafePointV2(ctx context.Context, keyspaceID uint32 SafePoint: safePoint, Ttl: ttl, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { cancel() return 0, errs.ErrClientGetProtoClient @@ -104,8 +101,7 @@ func (c *client) WatchGCSafePointV2(ctx context.Context, revision int64) (chan [ ctx, cancel := context.WithTimeout(ctx, c.option.timeout) defer cancel() - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) - protoClient := c.getClient() + protoClient, ctx := c.getClientAndContext(ctx) if protoClient == nil { return nil, errs.ErrClientGetProtoClient } diff --git a/client/go.mod b/client/go.mod index 948a5c22b14..54be0c96765 100644 --- a/client/go.mod +++ b/client/go.mod @@ -13,6 +13,7 @@ require ( github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 github.com/prometheus/client_golang v1.11.1 github.com/stretchr/testify v1.8.2 + go.uber.org/atomic v1.10.0 go.uber.org/goleak v1.1.11 go.uber.org/zap v1.24.0 golang.org/x/exp v0.0.0-20230711005742-c3f37128e5a4 @@ -31,7 +32,6 @@ require ( github.com/prometheus/client_model v0.2.0 // indirect github.com/prometheus/common v0.26.0 // indirect github.com/prometheus/procfs v0.6.0 // indirect - go.uber.org/atomic v1.10.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/net v0.17.0 // indirect golang.org/x/sys v0.13.0 // indirect diff --git a/client/grpcutil/grpcutil.go b/client/grpcutil/grpcutil.go index 125f1125721..fe149e76ecc 100644 --- a/client/grpcutil/grpcutil.go +++ b/client/grpcutil/grpcutil.go @@ -21,6 +21,8 @@ import ( "sync" "time" + "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/log" "github.com/tikv/pd/client/errs" "github.com/tikv/pd/client/tlsutil" @@ -88,6 +90,12 @@ func GetOrCreateGRPCConn(ctx context.Context, clientConns *sync.Map, addr string dCtx, cancel := context.WithTimeout(ctx, dialTimeout) defer cancel() cc, err := GetClientConn(dCtx, addr, tlsConfig, opt...) + failpoint.Inject("unreachableNetwork2", func(val failpoint.Value) { + if val, ok := val.(string); ok && val == addr { + cc = nil + err = errors.Errorf("unreachable network") + } + }) if err != nil { return nil, err } diff --git a/client/http/api.go b/client/http/api.go new file mode 100644 index 00000000000..2153cd286e8 --- /dev/null +++ b/client/http/api.go @@ -0,0 +1,188 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package http + +import ( + "fmt" + "net/url" + "time" +) + +// The following constants are the paths of PD HTTP APIs. +const ( + // Metadata + HotRead = "/pd/api/v1/hotspot/regions/read" + HotWrite = "/pd/api/v1/hotspot/regions/write" + HotHistory = "/pd/api/v1/hotspot/regions/history" + RegionByIDPrefix = "/pd/api/v1/region/id" + regionByKey = "/pd/api/v1/region/key" + Regions = "/pd/api/v1/regions" + regionsByKey = "/pd/api/v1/regions/key" + RegionsByStoreIDPrefix = "/pd/api/v1/regions/store" + regionsReplicated = "/pd/api/v1/regions/replicated" + EmptyRegions = "/pd/api/v1/regions/check/empty-region" + AccelerateSchedule = "/pd/api/v1/regions/accelerate-schedule" + AccelerateScheduleInBatch = "/pd/api/v1/regions/accelerate-schedule/batch" + store = "/pd/api/v1/store" + Stores = "/pd/api/v1/stores" + StatsRegion = "/pd/api/v1/stats/region" + membersPrefix = "/pd/api/v1/members" + leaderPrefix = "/pd/api/v1/leader" + transferLeader = "/pd/api/v1/leader/transfer" + // Config + Config = "/pd/api/v1/config" + ClusterVersion = "/pd/api/v1/config/cluster-version" + ScheduleConfig = "/pd/api/v1/config/schedule" + ReplicateConfig = "/pd/api/v1/config/replicate" + // Rule + PlacementRule = "/pd/api/v1/config/rule" + PlacementRules = "/pd/api/v1/config/rules" + PlacementRulesInBatch = "/pd/api/v1/config/rules/batch" + placementRulesByGroup = "/pd/api/v1/config/rules/group" + PlacementRuleBundle = "/pd/api/v1/config/placement-rule" + placementRuleGroup = "/pd/api/v1/config/rule_group" + placementRuleGroups = "/pd/api/v1/config/rule_groups" + RegionLabelRule = "/pd/api/v1/config/region-label/rule" + RegionLabelRules = "/pd/api/v1/config/region-label/rules" + RegionLabelRulesByIDs = "/pd/api/v1/config/region-label/rules/ids" + // Scheduler + Schedulers = "/pd/api/v1/schedulers" + scatterRangeScheduler = "/pd/api/v1/schedulers/scatter-range-" + // Admin + ResetTS = "/pd/api/v1/admin/reset-ts" + BaseAllocID = "/pd/api/v1/admin/base-alloc-id" + SnapshotRecoveringMark = "/pd/api/v1/admin/cluster/markers/snapshot-recovering" + // Debug + PProfProfile = "/pd/api/v1/debug/pprof/profile" + PProfHeap = "/pd/api/v1/debug/pprof/heap" + PProfMutex = "/pd/api/v1/debug/pprof/mutex" + PProfAllocs = "/pd/api/v1/debug/pprof/allocs" + PProfBlock = "/pd/api/v1/debug/pprof/block" + PProfGoroutine = "/pd/api/v1/debug/pprof/goroutine" + // Others + MinResolvedTSPrefix = "/pd/api/v1/min-resolved-ts" + Status = "/pd/api/v1/status" + Version = "/pd/api/v1/version" +) + +// RegionByID returns the path of PD HTTP API to get region by ID. +func RegionByID(regionID uint64) string { + return fmt.Sprintf("%s/%d", RegionByIDPrefix, regionID) +} + +// RegionByKey returns the path of PD HTTP API to get region by key. +func RegionByKey(key []byte) string { + return fmt.Sprintf("%s/%s", regionByKey, url.QueryEscape(string(key))) +} + +// RegionsByKeyRange returns the path of PD HTTP API to scan regions with given start key, end key and limit parameters. +func RegionsByKeyRange(keyRange *KeyRange, limit int) string { + startKeyStr, endKeyStr := keyRange.EscapeAsUTF8Str() + return fmt.Sprintf("%s?start_key=%s&end_key=%s&limit=%d", + regionsByKey, startKeyStr, endKeyStr, limit) +} + +// RegionsByStoreID returns the path of PD HTTP API to get regions by store ID. +func RegionsByStoreID(storeID uint64) string { + return fmt.Sprintf("%s/%d", RegionsByStoreIDPrefix, storeID) +} + +// RegionsReplicatedByKeyRange returns the path of PD HTTP API to get replicated regions with given start key and end key. +func RegionsReplicatedByKeyRange(keyRange *KeyRange) string { + startKeyStr, endKeyStr := keyRange.EscapeAsHexStr() + return fmt.Sprintf("%s?startKey=%s&endKey=%s", + regionsReplicated, startKeyStr, endKeyStr) +} + +// RegionStatsByKeyRange returns the path of PD HTTP API to get region stats by start key and end key. +func RegionStatsByKeyRange(keyRange *KeyRange, onlyCount bool) string { + startKeyStr, endKeyStr := keyRange.EscapeAsUTF8Str() + if onlyCount { + return fmt.Sprintf("%s?start_key=%s&end_key=%s&count", + StatsRegion, startKeyStr, endKeyStr) + } + return fmt.Sprintf("%s?start_key=%s&end_key=%s", + StatsRegion, startKeyStr, endKeyStr) +} + +// StoreByID returns the store API with store ID parameter. +func StoreByID(id uint64) string { + return fmt.Sprintf("%s/%d", store, id) +} + +// StoreLabelByID returns the store label API with store ID parameter. +func StoreLabelByID(id uint64) string { + return fmt.Sprintf("%s/%d/label", store, id) +} + +// LabelByStoreID returns the path of PD HTTP API to set store label. +func LabelByStoreID(storeID int64) string { + return fmt.Sprintf("%s/%d/label", store, storeID) +} + +// TransferLeaderByID returns the path of PD HTTP API to transfer leader by ID. +func TransferLeaderByID(leaderID string) string { + return fmt.Sprintf("%s/%s", transferLeader, leaderID) +} + +// ConfigWithTTLSeconds returns the config API with the TTL seconds parameter. +func ConfigWithTTLSeconds(ttlSeconds float64) string { + return fmt.Sprintf("%s?ttlSecond=%.0f", Config, ttlSeconds) +} + +// PlacementRulesByGroup returns the path of PD HTTP API to get placement rules by group. +func PlacementRulesByGroup(group string) string { + return fmt.Sprintf("%s/%s", placementRulesByGroup, group) +} + +// PlacementRuleByGroupAndID returns the path of PD HTTP API to get placement rule by group and ID. +func PlacementRuleByGroupAndID(group, id string) string { + return fmt.Sprintf("%s/%s/%s", PlacementRule, group, id) +} + +// PlacementRuleBundleByGroup returns the path of PD HTTP API to get placement rule bundle by group. +func PlacementRuleBundleByGroup(group string) string { + return fmt.Sprintf("%s/%s", PlacementRuleBundle, group) +} + +// PlacementRuleBundleWithPartialParameter returns the path of PD HTTP API to get placement rule bundle with partial parameter. +func PlacementRuleBundleWithPartialParameter(partial bool) string { + return fmt.Sprintf("%s?partial=%t", PlacementRuleBundle, partial) +} + +// PlacementRuleGroupByID returns the path of PD HTTP API to get placement rule group by ID. +func PlacementRuleGroupByID(id string) string { + return fmt.Sprintf("%s/%s", placementRuleGroup, id) +} + +// SchedulerByName returns the scheduler API with the given scheduler name. +func SchedulerByName(name string) string { + return fmt.Sprintf("%s/%s", Schedulers, name) +} + +// ScatterRangeSchedulerWithName returns the scatter range scheduler API with name parameter. +func ScatterRangeSchedulerWithName(name string) string { + return fmt.Sprintf("%s%s", scatterRangeScheduler, name) +} + +// PProfProfileAPIWithInterval returns the pprof profile API with interval parameter. +func PProfProfileAPIWithInterval(interval time.Duration) string { + return fmt.Sprintf("%s?seconds=%d", PProfProfile, interval/time.Second) +} + +// PProfGoroutineWithDebugLevel returns the pprof goroutine API with debug level parameter. +func PProfGoroutineWithDebugLevel(level int) string { + return fmt.Sprintf("%s?debug=%d", PProfGoroutine, level) +} diff --git a/client/http/client.go b/client/http/client.go new file mode 100644 index 00000000000..958c52489fb --- /dev/null +++ b/client/http/client.go @@ -0,0 +1,802 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package http + +import ( + "bytes" + "context" + "crypto/tls" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "time" + + "github.com/pingcap/errors" + "github.com/pingcap/kvproto/pkg/pdpb" + "github.com/pingcap/log" + "github.com/prometheus/client_golang/prometheus" + "go.uber.org/zap" +) + +const ( + defaultCallerID = "pd-http-client" + httpScheme = "http" + httpsScheme = "https" + networkErrorStatus = "network error" + + defaultTimeout = 30 * time.Second +) + +// Client is a PD (Placement Driver) HTTP client. +type Client interface { + /* Meta-related interfaces */ + GetRegionByID(context.Context, uint64) (*RegionInfo, error) + GetRegionByKey(context.Context, []byte) (*RegionInfo, error) + GetRegions(context.Context) (*RegionsInfo, error) + GetRegionsByKeyRange(context.Context, *KeyRange, int) (*RegionsInfo, error) + GetRegionsByStoreID(context.Context, uint64) (*RegionsInfo, error) + GetRegionsReplicatedStateByKeyRange(context.Context, *KeyRange) (string, error) + GetHotReadRegions(context.Context) (*StoreHotPeersInfos, error) + GetHotWriteRegions(context.Context) (*StoreHotPeersInfos, error) + GetHistoryHotRegions(context.Context, *HistoryHotRegionsRequest) (*HistoryHotRegions, error) + GetRegionStatusByKeyRange(context.Context, *KeyRange, bool) (*RegionStats, error) + GetStores(context.Context) (*StoresInfo, error) + SetStoreLabels(context.Context, int64, map[string]string) error + GetMembers(context.Context) (*MembersInfo, error) + GetLeader(context.Context) (*pdpb.Member, error) + TransferLeader(context.Context, string) error + /* Config-related interfaces */ + GetScheduleConfig(context.Context) (map[string]interface{}, error) + SetScheduleConfig(context.Context, map[string]interface{}) error + /* Scheduler-related interfaces */ + GetSchedulers(context.Context) ([]string, error) + CreateScheduler(ctx context.Context, name string, storeID uint64) error + /* Rule-related interfaces */ + GetAllPlacementRuleBundles(context.Context) ([]*GroupBundle, error) + GetPlacementRuleBundleByGroup(context.Context, string) (*GroupBundle, error) + GetPlacementRulesByGroup(context.Context, string) ([]*Rule, error) + SetPlacementRule(context.Context, *Rule) error + SetPlacementRuleInBatch(context.Context, []*RuleOp) error + SetPlacementRuleBundles(context.Context, []*GroupBundle, bool) error + DeletePlacementRule(context.Context, string, string) error + GetAllPlacementRuleGroups(context.Context) ([]*RuleGroup, error) + GetPlacementRuleGroupByID(context.Context, string) (*RuleGroup, error) + SetPlacementRuleGroup(context.Context, *RuleGroup) error + DeletePlacementRuleGroupByID(context.Context, string) error + GetAllRegionLabelRules(context.Context) ([]*LabelRule, error) + GetRegionLabelRulesByIDs(context.Context, []string) ([]*LabelRule, error) + SetRegionLabelRule(context.Context, *LabelRule) error + PatchRegionLabelRules(context.Context, *LabelRulePatch) error + /* Scheduling-related interfaces */ + AccelerateSchedule(context.Context, *KeyRange) error + AccelerateScheduleInBatch(context.Context, []*KeyRange) error + /* Other interfaces */ + GetMinResolvedTSByStoresIDs(context.Context, []uint64) (uint64, map[uint64]uint64, error) + + /* Client-related methods */ + // WithCallerID sets and returns a new client with the given caller ID. + WithCallerID(string) Client + // WithRespHandler sets and returns a new client with the given HTTP response handler. + // This allows the caller to customize how the response is handled, including error handling logic. + // Additionally, it is important for the caller to handle the content of the response body properly + // in order to ensure that it can be read and marshaled correctly into `res`. + WithRespHandler(func(resp *http.Response, res interface{}) error) Client + Close() +} + +var _ Client = (*client)(nil) + +// clientInner is the inner implementation of the PD HTTP client, which will +// implement some internal logics, such as HTTP client, service discovery, etc. +type clientInner struct { + pdAddrs []string + tlsConf *tls.Config + cli *http.Client +} + +type client struct { + // Wrap this struct is to make sure the inner implementation + // won't be exposed and cloud be consistent during the copy. + inner *clientInner + + callerID string + respHandler func(resp *http.Response, res interface{}) error + + requestCounter *prometheus.CounterVec + executionDuration *prometheus.HistogramVec +} + +// ClientOption configures the HTTP client. +type ClientOption func(c *client) + +// WithHTTPClient configures the client with the given initialized HTTP client. +func WithHTTPClient(cli *http.Client) ClientOption { + return func(c *client) { + c.inner.cli = cli + } +} + +// WithTLSConfig configures the client with the given TLS config. +// This option won't work if the client is configured with WithHTTPClient. +func WithTLSConfig(tlsConf *tls.Config) ClientOption { + return func(c *client) { + c.inner.tlsConf = tlsConf + } +} + +// WithMetrics configures the client with metrics. +func WithMetrics( + requestCounter *prometheus.CounterVec, + executionDuration *prometheus.HistogramVec, +) ClientOption { + return func(c *client) { + c.requestCounter = requestCounter + c.executionDuration = executionDuration + } +} + +// NewClient creates a PD HTTP client with the given PD addresses and TLS config. +func NewClient( + pdAddrs []string, + opts ...ClientOption, +) Client { + c := &client{inner: &clientInner{}, callerID: defaultCallerID} + // Apply the options first. + for _, opt := range opts { + opt(c) + } + // Normalize the addresses with correct scheme prefix. + for i, addr := range pdAddrs { + if !strings.HasPrefix(addr, httpScheme) { + var scheme string + if c.inner.tlsConf != nil { + scheme = httpsScheme + } else { + scheme = httpScheme + } + pdAddrs[i] = fmt.Sprintf("%s://%s", scheme, addr) + } + } + c.inner.pdAddrs = pdAddrs + // Init the HTTP client if it's not configured. + if c.inner.cli == nil { + c.inner.cli = &http.Client{Timeout: defaultTimeout} + if c.inner.tlsConf != nil { + transport := http.DefaultTransport.(*http.Transport).Clone() + transport.TLSClientConfig = c.inner.tlsConf + c.inner.cli.Transport = transport + } + } + + return c +} + +// Close closes the HTTP client. +func (c *client) Close() { + if c.inner == nil { + return + } + if c.inner.cli != nil { + c.inner.cli.CloseIdleConnections() + } + log.Info("[pd] http client closed") +} + +// WithCallerID sets and returns a new client with the given caller ID. +func (c *client) WithCallerID(callerID string) Client { + newClient := *c + newClient.callerID = callerID + return &newClient +} + +// WithRespHandler sets and returns a new client with the given HTTP response handler. +func (c *client) WithRespHandler( + handler func(resp *http.Response, res interface{}) error, +) Client { + newClient := *c + newClient.respHandler = handler + return &newClient +} + +func (c *client) reqCounter(name, status string) { + if c.requestCounter == nil { + return + } + c.requestCounter.WithLabelValues(name, status).Inc() +} + +func (c *client) execDuration(name string, duration time.Duration) { + if c.executionDuration == nil { + return + } + c.executionDuration.WithLabelValues(name).Observe(duration.Seconds()) +} + +// Header key definition constants. +const ( + pdAllowFollowerHandleKey = "PD-Allow-Follower-Handle" + xCallerIDKey = "X-Caller-ID" +) + +// HeaderOption configures the HTTP header. +type HeaderOption func(header http.Header) + +// WithAllowFollowerHandle sets the header field to allow a PD follower to handle this request. +func WithAllowFollowerHandle() HeaderOption { + return func(header http.Header) { + header.Set(pdAllowFollowerHandleKey, "true") + } +} + +// At present, we will use the retry strategy of polling by default to keep +// it consistent with the current implementation of some clients (e.g. TiDB). +func (c *client) requestWithRetry( + ctx context.Context, + name, uri, method string, + body io.Reader, res interface{}, + headerOpts ...HeaderOption, +) error { + var ( + err error + addr string + ) + for idx := 0; idx < len(c.inner.pdAddrs); idx++ { + addr = c.inner.pdAddrs[idx] + err = c.request(ctx, name, fmt.Sprintf("%s%s", addr, uri), method, body, res, headerOpts...) + if err == nil { + break + } + log.Debug("[pd] request one addr failed", + zap.Int("idx", idx), zap.String("addr", addr), zap.Error(err)) + } + return err +} + +func (c *client) request( + ctx context.Context, + name, url, method string, + body io.Reader, res interface{}, + headerOpts ...HeaderOption, +) error { + logFields := []zap.Field{ + zap.String("name", name), + zap.String("url", url), + zap.String("method", method), + zap.String("caller-id", c.callerID), + } + log.Debug("[pd] request the http url", logFields...) + req, err := http.NewRequestWithContext(ctx, method, url, body) + if err != nil { + log.Error("[pd] create http request failed", append(logFields, zap.Error(err))...) + return errors.Trace(err) + } + for _, opt := range headerOpts { + opt(req.Header) + } + req.Header.Set(xCallerIDKey, c.callerID) + + start := time.Now() + resp, err := c.inner.cli.Do(req) + if err != nil { + c.reqCounter(name, networkErrorStatus) + log.Error("[pd] do http request failed", append(logFields, zap.Error(err))...) + return errors.Trace(err) + } + c.execDuration(name, time.Since(start)) + c.reqCounter(name, resp.Status) + + // Give away the response handling to the caller if the handler is set. + if c.respHandler != nil { + return c.respHandler(resp, res) + } + + defer func() { + err = resp.Body.Close() + if err != nil { + log.Warn("[pd] close http response body failed", append(logFields, zap.Error(err))...) + } + }() + + if resp.StatusCode != http.StatusOK { + logFields = append(logFields, zap.String("status", resp.Status)) + + bs, readErr := io.ReadAll(resp.Body) + if readErr != nil { + logFields = append(logFields, zap.NamedError("read-body-error", err)) + } else { + logFields = append(logFields, zap.ByteString("body", bs)) + } + + log.Error("[pd] request failed with a non-200 status", logFields...) + return errors.Errorf("request pd http api failed with status: '%s'", resp.Status) + } + + if res == nil { + return nil + } + + err = json.NewDecoder(resp.Body).Decode(res) + if err != nil { + return errors.Trace(err) + } + return nil +} + +// GetRegionByID gets the region info by ID. +func (c *client) GetRegionByID(ctx context.Context, regionID uint64) (*RegionInfo, error) { + var region RegionInfo + err := c.requestWithRetry(ctx, + "GetRegionByID", RegionByID(regionID), + http.MethodGet, http.NoBody, ®ion) + if err != nil { + return nil, err + } + return ®ion, nil +} + +// GetRegionByKey gets the region info by key. +func (c *client) GetRegionByKey(ctx context.Context, key []byte) (*RegionInfo, error) { + var region RegionInfo + err := c.requestWithRetry(ctx, + "GetRegionByKey", RegionByKey(key), + http.MethodGet, http.NoBody, ®ion) + if err != nil { + return nil, err + } + return ®ion, nil +} + +// GetRegions gets the regions info. +func (c *client) GetRegions(ctx context.Context) (*RegionsInfo, error) { + var regions RegionsInfo + err := c.requestWithRetry(ctx, + "GetRegions", Regions, + http.MethodGet, http.NoBody, ®ions) + if err != nil { + return nil, err + } + return ®ions, nil +} + +// GetRegionsByKeyRange gets the regions info by key range. If the limit is -1, it will return all regions within the range. +// The keys in the key range should be encoded in the UTF-8 bytes format. +func (c *client) GetRegionsByKeyRange(ctx context.Context, keyRange *KeyRange, limit int) (*RegionsInfo, error) { + var regions RegionsInfo + err := c.requestWithRetry(ctx, + "GetRegionsByKeyRange", RegionsByKeyRange(keyRange, limit), + http.MethodGet, http.NoBody, ®ions) + if err != nil { + return nil, err + } + return ®ions, nil +} + +// GetRegionsByStoreID gets the regions info by store ID. +func (c *client) GetRegionsByStoreID(ctx context.Context, storeID uint64) (*RegionsInfo, error) { + var regions RegionsInfo + err := c.requestWithRetry(ctx, + "GetRegionsByStoreID", RegionsByStoreID(storeID), + http.MethodGet, http.NoBody, ®ions) + if err != nil { + return nil, err + } + return ®ions, nil +} + +// GetRegionsReplicatedStateByKeyRange gets the regions replicated state info by key range. +// The keys in the key range should be encoded in the hex bytes format (without encoding to the UTF-8 bytes). +func (c *client) GetRegionsReplicatedStateByKeyRange(ctx context.Context, keyRange *KeyRange) (string, error) { + var state string + err := c.requestWithRetry(ctx, + "GetRegionsReplicatedStateByKeyRange", RegionsReplicatedByKeyRange(keyRange), + http.MethodGet, http.NoBody, &state) + if err != nil { + return "", err + } + return state, nil +} + +// GetHotReadRegions gets the hot read region statistics info. +func (c *client) GetHotReadRegions(ctx context.Context) (*StoreHotPeersInfos, error) { + var hotReadRegions StoreHotPeersInfos + err := c.requestWithRetry(ctx, + "GetHotReadRegions", HotRead, + http.MethodGet, http.NoBody, &hotReadRegions) + if err != nil { + return nil, err + } + return &hotReadRegions, nil +} + +// GetHotWriteRegions gets the hot write region statistics info. +func (c *client) GetHotWriteRegions(ctx context.Context) (*StoreHotPeersInfos, error) { + var hotWriteRegions StoreHotPeersInfos + err := c.requestWithRetry(ctx, + "GetHotWriteRegions", HotWrite, + http.MethodGet, http.NoBody, &hotWriteRegions) + if err != nil { + return nil, err + } + return &hotWriteRegions, nil +} + +// GetHistoryHotRegions gets the history hot region statistics info. +func (c *client) GetHistoryHotRegions(ctx context.Context, req *HistoryHotRegionsRequest) (*HistoryHotRegions, error) { + reqJSON, err := json.Marshal(req) + if err != nil { + return nil, errors.Trace(err) + } + var historyHotRegions HistoryHotRegions + err = c.requestWithRetry(ctx, + "GetHistoryHotRegions", HotHistory, + http.MethodGet, bytes.NewBuffer(reqJSON), &historyHotRegions, + WithAllowFollowerHandle()) + if err != nil { + return nil, err + } + return &historyHotRegions, nil +} + +// GetRegionStatusByKeyRange gets the region status by key range. +// If the `onlyCount` flag is true, the result will only include the count of regions. +// The keys in the key range should be encoded in the UTF-8 bytes format. +func (c *client) GetRegionStatusByKeyRange(ctx context.Context, keyRange *KeyRange, onlyCount bool) (*RegionStats, error) { + var regionStats RegionStats + err := c.requestWithRetry(ctx, + "GetRegionStatusByKeyRange", RegionStatsByKeyRange(keyRange, onlyCount), + http.MethodGet, http.NoBody, ®ionStats, + ) + if err != nil { + return nil, err + } + return ®ionStats, nil +} + +// SetStoreLabels sets the labels of a store. +func (c *client) SetStoreLabels(ctx context.Context, storeID int64, storeLabels map[string]string) error { + jsonInput, err := json.Marshal(storeLabels) + if err != nil { + return errors.Trace(err) + } + return c.requestWithRetry(ctx, "SetStoreLabel", LabelByStoreID(storeID), + http.MethodPost, bytes.NewBuffer(jsonInput), nil) +} + +func (c *client) GetMembers(ctx context.Context) (*MembersInfo, error) { + var members MembersInfo + err := c.requestWithRetry(ctx, + "GetMembers", membersPrefix, + http.MethodGet, http.NoBody, &members) + if err != nil { + return nil, err + } + return &members, nil +} + +// GetLeader gets the leader of PD cluster. +func (c *client) GetLeader(ctx context.Context) (*pdpb.Member, error) { + var leader pdpb.Member + err := c.requestWithRetry(ctx, "GetLeader", leaderPrefix, + http.MethodGet, http.NoBody, &leader) + if err != nil { + return nil, err + } + return &leader, nil +} + +// TransferLeader transfers the PD leader. +func (c *client) TransferLeader(ctx context.Context, newLeader string) error { + return c.requestWithRetry(ctx, "TransferLeader", TransferLeaderByID(newLeader), + http.MethodPost, http.NoBody, nil) +} + +// GetScheduleConfig gets the schedule configurations. +func (c *client) GetScheduleConfig(ctx context.Context) (map[string]interface{}, error) { + var config map[string]interface{} + err := c.requestWithRetry(ctx, + "GetScheduleConfig", ScheduleConfig, + http.MethodGet, http.NoBody, &config) + if err != nil { + return nil, err + } + return config, nil +} + +// SetScheduleConfig sets the schedule configurations. +func (c *client) SetScheduleConfig(ctx context.Context, config map[string]interface{}) error { + configJSON, err := json.Marshal(config) + if err != nil { + return errors.Trace(err) + } + return c.requestWithRetry(ctx, + "SetScheduleConfig", ScheduleConfig, + http.MethodPost, bytes.NewBuffer(configJSON), nil) +} + +// GetStores gets the stores info. +func (c *client) GetStores(ctx context.Context) (*StoresInfo, error) { + var stores StoresInfo + err := c.requestWithRetry(ctx, + "GetStores", Stores, + http.MethodGet, http.NoBody, &stores) + if err != nil { + return nil, err + } + return &stores, nil +} + +// GetAllPlacementRuleBundles gets all placement rules bundles. +func (c *client) GetAllPlacementRuleBundles(ctx context.Context) ([]*GroupBundle, error) { + var bundles []*GroupBundle + err := c.requestWithRetry(ctx, + "GetPlacementRuleBundle", PlacementRuleBundle, + http.MethodGet, http.NoBody, &bundles) + if err != nil { + return nil, err + } + return bundles, nil +} + +// GetPlacementRuleBundleByGroup gets the placement rules bundle by group. +func (c *client) GetPlacementRuleBundleByGroup(ctx context.Context, group string) (*GroupBundle, error) { + var bundle GroupBundle + err := c.requestWithRetry(ctx, + "GetPlacementRuleBundleByGroup", PlacementRuleBundleByGroup(group), + http.MethodGet, http.NoBody, &bundle) + if err != nil { + return nil, err + } + return &bundle, nil +} + +// GetPlacementRulesByGroup gets the placement rules by group. +func (c *client) GetPlacementRulesByGroup(ctx context.Context, group string) ([]*Rule, error) { + var rules []*Rule + err := c.requestWithRetry(ctx, + "GetPlacementRulesByGroup", PlacementRulesByGroup(group), + http.MethodGet, http.NoBody, &rules) + if err != nil { + return nil, err + } + return rules, nil +} + +// SetPlacementRule sets the placement rule. +func (c *client) SetPlacementRule(ctx context.Context, rule *Rule) error { + ruleJSON, err := json.Marshal(rule) + if err != nil { + return errors.Trace(err) + } + return c.requestWithRetry(ctx, + "SetPlacementRule", PlacementRule, + http.MethodPost, bytes.NewBuffer(ruleJSON), nil) +} + +// SetPlacementRuleInBatch sets the placement rules in batch. +func (c *client) SetPlacementRuleInBatch(ctx context.Context, ruleOps []*RuleOp) error { + ruleOpsJSON, err := json.Marshal(ruleOps) + if err != nil { + return errors.Trace(err) + } + return c.requestWithRetry(ctx, + "SetPlacementRuleInBatch", PlacementRulesInBatch, + http.MethodPost, bytes.NewBuffer(ruleOpsJSON), nil) +} + +// SetPlacementRuleBundles sets the placement rule bundles. +// If `partial` is false, all old configurations will be over-written and dropped. +func (c *client) SetPlacementRuleBundles(ctx context.Context, bundles []*GroupBundle, partial bool) error { + bundlesJSON, err := json.Marshal(bundles) + if err != nil { + return errors.Trace(err) + } + return c.requestWithRetry(ctx, + "SetPlacementRuleBundles", PlacementRuleBundleWithPartialParameter(partial), + http.MethodPost, bytes.NewBuffer(bundlesJSON), nil) +} + +// DeletePlacementRule deletes the placement rule. +func (c *client) DeletePlacementRule(ctx context.Context, group, id string) error { + return c.requestWithRetry(ctx, + "DeletePlacementRule", PlacementRuleByGroupAndID(group, id), + http.MethodDelete, http.NoBody, nil) +} + +// GetAllPlacementRuleGroups gets all placement rule groups. +func (c *client) GetAllPlacementRuleGroups(ctx context.Context) ([]*RuleGroup, error) { + var ruleGroups []*RuleGroup + err := c.requestWithRetry(ctx, + "GetAllPlacementRuleGroups", placementRuleGroups, + http.MethodGet, http.NoBody, &ruleGroups) + if err != nil { + return nil, err + } + return ruleGroups, nil +} + +// GetPlacementRuleGroupByID gets the placement rule group by ID. +func (c *client) GetPlacementRuleGroupByID(ctx context.Context, id string) (*RuleGroup, error) { + var ruleGroup RuleGroup + err := c.requestWithRetry(ctx, + "GetPlacementRuleGroupByID", PlacementRuleGroupByID(id), + http.MethodGet, http.NoBody, &ruleGroup) + if err != nil { + return nil, err + } + return &ruleGroup, nil +} + +// SetPlacementRuleGroup sets the placement rule group. +func (c *client) SetPlacementRuleGroup(ctx context.Context, ruleGroup *RuleGroup) error { + ruleGroupJSON, err := json.Marshal(ruleGroup) + if err != nil { + return errors.Trace(err) + } + return c.requestWithRetry(ctx, + "SetPlacementRuleGroup", placementRuleGroup, + http.MethodPost, bytes.NewBuffer(ruleGroupJSON), nil) +} + +// DeletePlacementRuleGroupByID deletes the placement rule group by ID. +func (c *client) DeletePlacementRuleGroupByID(ctx context.Context, id string) error { + return c.requestWithRetry(ctx, + "DeletePlacementRuleGroupByID", PlacementRuleGroupByID(id), + http.MethodDelete, http.NoBody, nil) +} + +// GetAllRegionLabelRules gets all region label rules. +func (c *client) GetAllRegionLabelRules(ctx context.Context) ([]*LabelRule, error) { + var labelRules []*LabelRule + err := c.requestWithRetry(ctx, + "GetAllRegionLabelRules", RegionLabelRules, + http.MethodGet, http.NoBody, &labelRules) + if err != nil { + return nil, err + } + return labelRules, nil +} + +// GetRegionLabelRulesByIDs gets the region label rules by IDs. +func (c *client) GetRegionLabelRulesByIDs(ctx context.Context, ruleIDs []string) ([]*LabelRule, error) { + idsJSON, err := json.Marshal(ruleIDs) + if err != nil { + return nil, errors.Trace(err) + } + var labelRules []*LabelRule + err = c.requestWithRetry(ctx, + "GetRegionLabelRulesByIDs", RegionLabelRulesByIDs, + http.MethodGet, bytes.NewBuffer(idsJSON), &labelRules) + if err != nil { + return nil, err + } + return labelRules, nil +} + +// SetRegionLabelRule sets the region label rule. +func (c *client) SetRegionLabelRule(ctx context.Context, labelRule *LabelRule) error { + labelRuleJSON, err := json.Marshal(labelRule) + if err != nil { + return errors.Trace(err) + } + return c.requestWithRetry(ctx, + "SetRegionLabelRule", RegionLabelRule, + http.MethodPost, bytes.NewBuffer(labelRuleJSON), nil) +} + +// PatchRegionLabelRules patches the region label rules. +func (c *client) PatchRegionLabelRules(ctx context.Context, labelRulePatch *LabelRulePatch) error { + labelRulePatchJSON, err := json.Marshal(labelRulePatch) + if err != nil { + return errors.Trace(err) + } + return c.requestWithRetry(ctx, + "PatchRegionLabelRules", RegionLabelRules, + http.MethodPatch, bytes.NewBuffer(labelRulePatchJSON), nil) +} + +// GetSchedulers gets the schedulers from PD cluster. +func (c *client) GetSchedulers(ctx context.Context) ([]string, error) { + var schedulers []string + err := c.requestWithRetry(ctx, "GetSchedulers", Schedulers, + http.MethodGet, http.NoBody, &schedulers) + if err != nil { + return nil, err + } + return schedulers, nil +} + +// CreateScheduler creates a scheduler to PD cluster. +func (c *client) CreateScheduler(ctx context.Context, name string, storeID uint64) error { + inputJSON, err := json.Marshal(map[string]interface{}{ + "name": name, + "store_id": storeID, + }) + if err != nil { + return errors.Trace(err) + } + return c.requestWithRetry(ctx, + "CreateScheduler", Schedulers, + http.MethodPost, bytes.NewBuffer(inputJSON), nil) +} + +// AccelerateSchedule accelerates the scheduling of the regions within the given key range. +// The keys in the key range should be encoded in the hex bytes format (without encoding to the UTF-8 bytes). +func (c *client) AccelerateSchedule(ctx context.Context, keyRange *KeyRange) error { + startKey, endKey := keyRange.EscapeAsHexStr() + inputJSON, err := json.Marshal(map[string]string{ + "start_key": startKey, + "end_key": endKey, + }) + if err != nil { + return errors.Trace(err) + } + return c.requestWithRetry(ctx, + "AccelerateSchedule", AccelerateSchedule, + http.MethodPost, bytes.NewBuffer(inputJSON), nil) +} + +// AccelerateScheduleInBatch accelerates the scheduling of the regions within the given key ranges in batch. +// The keys in the key ranges should be encoded in the hex bytes format (without encoding to the UTF-8 bytes). +func (c *client) AccelerateScheduleInBatch(ctx context.Context, keyRanges []*KeyRange) error { + input := make([]map[string]string, 0, len(keyRanges)) + for _, keyRange := range keyRanges { + startKey, endKey := keyRange.EscapeAsHexStr() + input = append(input, map[string]string{ + "start_key": startKey, + "end_key": endKey, + }) + } + inputJSON, err := json.Marshal(input) + if err != nil { + return errors.Trace(err) + } + return c.requestWithRetry(ctx, + "AccelerateScheduleInBatch", AccelerateScheduleInBatch, + http.MethodPost, bytes.NewBuffer(inputJSON), nil) +} + +// GetMinResolvedTSByStoresIDs get min-resolved-ts by stores IDs. +func (c *client) GetMinResolvedTSByStoresIDs(ctx context.Context, storeIDs []uint64) (uint64, map[uint64]uint64, error) { + uri := MinResolvedTSPrefix + // scope is an optional parameter, it can be `cluster` or specified store IDs. + // - When no scope is given, cluster-level's min_resolved_ts will be returned and storesMinResolvedTS will be nil. + // - When scope is `cluster`, cluster-level's min_resolved_ts will be returned and storesMinResolvedTS will be filled. + // - When scope given a list of stores, min_resolved_ts will be provided for each store + // and the scope-specific min_resolved_ts will be returned. + if len(storeIDs) != 0 { + storeIDStrs := make([]string, len(storeIDs)) + for idx, id := range storeIDs { + storeIDStrs[idx] = fmt.Sprintf("%d", id) + } + uri = fmt.Sprintf("%s?scope=%s", uri, strings.Join(storeIDStrs, ",")) + } + resp := struct { + MinResolvedTS uint64 `json:"min_resolved_ts"` + IsRealTime bool `json:"is_real_time,omitempty"` + StoresMinResolvedTS map[uint64]uint64 `json:"stores_min_resolved_ts"` + }{} + err := c.requestWithRetry(ctx, + "GetMinResolvedTSByStoresIDs", uri, + http.MethodGet, http.NoBody, &resp) + if err != nil { + return 0, nil, err + } + if !resp.IsRealTime { + return 0, nil, errors.Trace(errors.New("min resolved ts is not enabled")) + } + return resp.MinResolvedTS, resp.StoresMinResolvedTS, nil +} diff --git a/client/http/client_test.go b/client/http/client_test.go new file mode 100644 index 00000000000..70c2ddee08b --- /dev/null +++ b/client/http/client_test.go @@ -0,0 +1,73 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package http + +import ( + "context" + "net/http" + "testing" + + "github.com/stretchr/testify/require" +) + +// requestChecker is used to check the HTTP request sent by the client. +type requestChecker struct { + checker func(req *http.Request) error +} + +// RoundTrip implements the `http.RoundTripper` interface. +func (rc *requestChecker) RoundTrip(req *http.Request) (resp *http.Response, err error) { + return &http.Response{StatusCode: http.StatusOK}, rc.checker(req) +} + +func newHTTPClientWithRequestChecker(checker func(req *http.Request) error) *http.Client { + return &http.Client{ + Transport: &requestChecker{checker: checker}, + } +} + +func TestPDAllowFollowerHandleHeader(t *testing.T) { + re := require.New(t) + var expectedVal string + httpClient := newHTTPClientWithRequestChecker(func(req *http.Request) error { + val := req.Header.Get(pdAllowFollowerHandleKey) + if val != expectedVal { + re.Failf("PD allow follower handler header check failed", + "should be %s, but got %s", expectedVal, val) + } + return nil + }) + c := NewClient([]string{"http://127.0.0.1"}, WithHTTPClient(httpClient)) + c.GetRegions(context.Background()) + expectedVal = "true" + c.GetHistoryHotRegions(context.Background(), &HistoryHotRegionsRequest{}) +} + +func TestCallerID(t *testing.T) { + re := require.New(t) + expectedVal := defaultCallerID + httpClient := newHTTPClientWithRequestChecker(func(req *http.Request) error { + val := req.Header.Get(xCallerIDKey) + if val != expectedVal { + re.Failf("Caller ID header check failed", + "should be %s, but got %s", expectedVal, val) + } + return nil + }) + c := NewClient([]string{"http://127.0.0.1"}, WithHTTPClient(httpClient)) + c.GetRegions(context.Background()) + expectedVal = "test" + c.WithCallerID(expectedVal).GetRegions(context.Background()) +} diff --git a/client/http/codec.go b/client/http/codec.go new file mode 100644 index 00000000000..26be64b4f28 --- /dev/null +++ b/client/http/codec.go @@ -0,0 +1,121 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package http + +import ( + "encoding/hex" + + "github.com/pingcap/errors" +) + +const ( + encGroupSize = 8 + encMarker = byte(0xFF) + encPad = byte(0x0) +) + +var pads = make([]byte, encGroupSize) + +// encodeBytes guarantees the encoded value is in ascending order for comparison, +// encoding with the following rule: +// +// [group1][marker1]...[groupN][markerN] +// group is 8 bytes slice which is padding with 0. +// marker is `0xFF - padding 0 count` +// +// For example: +// +// [] -> [0, 0, 0, 0, 0, 0, 0, 0, 247] +// [1, 2, 3] -> [1, 2, 3, 0, 0, 0, 0, 0, 250] +// [1, 2, 3, 0] -> [1, 2, 3, 0, 0, 0, 0, 0, 251] +// [1, 2, 3, 4, 5, 6, 7, 8] -> [1, 2, 3, 4, 5, 6, 7, 8, 255, 0, 0, 0, 0, 0, 0, 0, 0, 247] +// +// Refer: https://github.com/facebook/mysql-5.6/wiki/MyRocks-record-format#memcomparable-format +func encodeBytes(data []byte) []byte { + // Allocate more space to avoid unnecessary slice growing. + // Assume that the byte slice size is about `(len(data) / encGroupSize + 1) * (encGroupSize + 1)` bytes, + // that is `(len(data) / 8 + 1) * 9` in our implement. + dLen := len(data) + result := make([]byte, 0, (dLen/encGroupSize+1)*(encGroupSize+1)) + for idx := 0; idx <= dLen; idx += encGroupSize { + remain := dLen - idx + padCount := 0 + if remain >= encGroupSize { + result = append(result, data[idx:idx+encGroupSize]...) + } else { + padCount = encGroupSize - remain + result = append(result, data[idx:]...) + result = append(result, pads[:padCount]...) + } + + marker := encMarker - byte(padCount) + result = append(result, marker) + } + return result +} + +func decodeBytes(b []byte) ([]byte, error) { + buf := make([]byte, 0, len(b)) + for { + if len(b) < encGroupSize+1 { + return nil, errors.New("insufficient bytes to decode value") + } + + groupBytes := b[:encGroupSize+1] + + group := groupBytes[:encGroupSize] + marker := groupBytes[encGroupSize] + + padCount := encMarker - marker + if padCount > encGroupSize { + return nil, errors.Errorf("invalid marker byte, group bytes %q", groupBytes) + } + + realGroupSize := encGroupSize - padCount + buf = append(buf, group[:realGroupSize]...) + b = b[encGroupSize+1:] + + if padCount != 0 { + // Check validity of padding bytes. + for _, v := range group[realGroupSize:] { + if v != encPad { + return nil, errors.Errorf("invalid padding byte, group bytes %q", groupBytes) + } + } + break + } + } + return buf, nil +} + +// rawKeyToKeyHexStr converts a raw key to a hex string after encoding. +func rawKeyToKeyHexStr(key []byte) string { + if len(key) == 0 { + return "" + } + return hex.EncodeToString(encodeBytes(key)) +} + +// keyHexStrToRawKey converts a hex string to a raw key after decoding. +func keyHexStrToRawKey(hexKey string) ([]byte, error) { + if len(hexKey) == 0 { + return make([]byte, 0), nil + } + key, err := hex.DecodeString(hexKey) + if err != nil { + return nil, err + } + return decodeBytes(key) +} diff --git a/client/http/codec_test.go b/client/http/codec_test.go new file mode 100644 index 00000000000..fa8d413a0d1 --- /dev/null +++ b/client/http/codec_test.go @@ -0,0 +1,64 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package http + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestBytesCodec(t *testing.T) { + inputs := []struct { + enc []byte + dec []byte + }{ + {[]byte{}, []byte{0, 0, 0, 0, 0, 0, 0, 0, 247}}, + {[]byte{0}, []byte{0, 0, 0, 0, 0, 0, 0, 0, 248}}, + {[]byte{1, 2, 3}, []byte{1, 2, 3, 0, 0, 0, 0, 0, 250}}, + {[]byte{1, 2, 3, 0}, []byte{1, 2, 3, 0, 0, 0, 0, 0, 251}}, + {[]byte{1, 2, 3, 4, 5, 6, 7}, []byte{1, 2, 3, 4, 5, 6, 7, 0, 254}}, + {[]byte{0, 0, 0, 0, 0, 0, 0, 0}, []byte{0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 247}}, + {[]byte{1, 2, 3, 4, 5, 6, 7, 8}, []byte{1, 2, 3, 4, 5, 6, 7, 8, 255, 0, 0, 0, 0, 0, 0, 0, 0, 247}}, + {[]byte{1, 2, 3, 4, 5, 6, 7, 8, 9}, []byte{1, 2, 3, 4, 5, 6, 7, 8, 255, 9, 0, 0, 0, 0, 0, 0, 0, 248}}, + } + + for _, input := range inputs { + b := encodeBytes(input.enc) + require.Equal(t, input.dec, b) + + d, err := decodeBytes(b) + require.NoError(t, err) + require.Equal(t, input.enc, d) + } + + // Test error decode. + errInputs := [][]byte{ + {1, 2, 3, 4}, + {0, 0, 0, 0, 0, 0, 0, 247}, + {0, 0, 0, 0, 0, 0, 0, 0, 246}, + {0, 0, 0, 0, 0, 0, 0, 1, 247}, + {1, 2, 3, 4, 5, 6, 7, 8, 0}, + {1, 2, 3, 4, 5, 6, 7, 8, 255, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 255, 1, 2, 3, 4, 5, 6, 7, 8}, + {1, 2, 3, 4, 5, 6, 7, 8, 255, 1, 2, 3, 4, 5, 6, 7, 8, 255}, + {1, 2, 3, 4, 5, 6, 7, 8, 255, 1, 2, 3, 4, 5, 6, 7, 8, 0}, + } + + for _, input := range errInputs { + _, err := decodeBytes(input) + require.Error(t, err) + } +} diff --git a/client/http/types.go b/client/http/types.go new file mode 100644 index 00000000000..b05e8e0efba --- /dev/null +++ b/client/http/types.go @@ -0,0 +1,586 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package http + +import ( + "encoding/hex" + "encoding/json" + "net/url" + "time" + + "github.com/pingcap/kvproto/pkg/encryptionpb" + "github.com/pingcap/kvproto/pkg/pdpb" +) + +// KeyRange defines a range of keys in bytes. +type KeyRange struct { + startKey []byte + endKey []byte +} + +// NewKeyRange creates a new key range structure with the given start key and end key bytes. +// Notice: the actual encoding of the key range is not specified here. It should be either UTF-8 or hex. +// - UTF-8 means the key has already been encoded into a string with UTF-8 encoding, like: +// []byte{52 56 54 53 54 99 54 99 54 102 50 48 53 55 54 102 55 50 54 99 54 52}, which will later be converted to "48656c6c6f20576f726c64" +// by using `string()` method. +// - Hex means the key is just a raw hex bytes without encoding to a UTF-8 string, like: +// []byte{72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100}, which will later be converted to "48656c6c6f20576f726c64" +// by using `hex.EncodeToString()` method. +func NewKeyRange(startKey, endKey []byte) *KeyRange { + return &KeyRange{startKey, endKey} +} + +// EscapeAsUTF8Str returns the URL escaped key strings as they are UTF-8 encoded. +func (r *KeyRange) EscapeAsUTF8Str() (startKeyStr, endKeyStr string) { + startKeyStr = url.QueryEscape(string(r.startKey)) + endKeyStr = url.QueryEscape(string(r.endKey)) + return +} + +// EscapeAsHexStr returns the URL escaped key strings as they are hex encoded. +func (r *KeyRange) EscapeAsHexStr() (startKeyStr, endKeyStr string) { + startKeyStr = url.QueryEscape(hex.EncodeToString(r.startKey)) + endKeyStr = url.QueryEscape(hex.EncodeToString(r.endKey)) + return +} + +// NOTICE: the structures below are copied from the PD API definitions. +// Please make sure the consistency if any change happens to the PD API. + +// RegionInfo stores the information of one region. +type RegionInfo struct { + ID int64 `json:"id"` + StartKey string `json:"start_key"` + EndKey string `json:"end_key"` + Epoch RegionEpoch `json:"epoch"` + Peers []RegionPeer `json:"peers"` + Leader RegionPeer `json:"leader"` + DownPeers []RegionPeerStat `json:"down_peers"` + PendingPeers []RegionPeer `json:"pending_peers"` + WrittenBytes uint64 `json:"written_bytes"` + ReadBytes uint64 `json:"read_bytes"` + ApproximateSize int64 `json:"approximate_size"` + ApproximateKeys int64 `json:"approximate_keys"` + + ReplicationStatus *ReplicationStatus `json:"replication_status,omitempty"` +} + +// GetStartKey gets the start key of the region. +func (r *RegionInfo) GetStartKey() string { return r.StartKey } + +// GetEndKey gets the end key of the region. +func (r *RegionInfo) GetEndKey() string { return r.EndKey } + +// RegionEpoch stores the information about its epoch. +type RegionEpoch struct { + ConfVer int64 `json:"conf_ver"` + Version int64 `json:"version"` +} + +// RegionPeer stores information of one peer. +type RegionPeer struct { + ID int64 `json:"id"` + StoreID int64 `json:"store_id"` + IsLearner bool `json:"is_learner"` +} + +// RegionPeerStat stores one field `DownSec` which indicates how long it's down than `RegionPeer`. +type RegionPeerStat struct { + Peer RegionPeer `json:"peer"` + DownSec int64 `json:"down_seconds"` +} + +// ReplicationStatus represents the replication mode status of the region. +type ReplicationStatus struct { + State string `json:"state"` + StateID int64 `json:"state_id"` +} + +// RegionsInfo stores the information of regions. +type RegionsInfo struct { + Count int64 `json:"count"` + Regions []RegionInfo `json:"regions"` +} + +// Merge merges two RegionsInfo together and returns a new one. +func (ri *RegionsInfo) Merge(other *RegionsInfo) *RegionsInfo { + newRegionsInfo := &RegionsInfo{ + Regions: make([]RegionInfo, 0, ri.Count+other.Count), + } + m := make(map[int64]RegionInfo, ri.Count+other.Count) + for _, region := range ri.Regions { + m[region.ID] = region + } + for _, region := range other.Regions { + m[region.ID] = region + } + for _, region := range m { + newRegionsInfo.Regions = append(newRegionsInfo.Regions, region) + } + newRegionsInfo.Count = int64(len(newRegionsInfo.Regions)) + return newRegionsInfo +} + +// StoreHotPeersInfos is used to get human-readable description for hot regions. +type StoreHotPeersInfos struct { + AsPeer StoreHotPeersStat `json:"as_peer"` + AsLeader StoreHotPeersStat `json:"as_leader"` +} + +// StoreHotPeersStat is used to record the hot region statistics group by store. +type StoreHotPeersStat map[uint64]*HotPeersStat + +// HotPeersStat records all hot regions statistics +type HotPeersStat struct { + StoreByteRate float64 `json:"store_bytes"` + StoreKeyRate float64 `json:"store_keys"` + StoreQueryRate float64 `json:"store_query"` + TotalBytesRate float64 `json:"total_flow_bytes"` + TotalKeysRate float64 `json:"total_flow_keys"` + TotalQueryRate float64 `json:"total_flow_query"` + Count int `json:"regions_count"` + Stats []HotPeerStatShow `json:"statistics"` +} + +// HotPeerStatShow records the hot region statistics for output +type HotPeerStatShow struct { + StoreID uint64 `json:"store_id"` + Stores []uint64 `json:"stores"` + IsLeader bool `json:"is_leader"` + IsLearner bool `json:"is_learner"` + RegionID uint64 `json:"region_id"` + HotDegree int `json:"hot_degree"` + ByteRate float64 `json:"flow_bytes"` + KeyRate float64 `json:"flow_keys"` + QueryRate float64 `json:"flow_query"` + AntiCount int `json:"anti_count"` + LastUpdateTime time.Time `json:"last_update_time,omitempty"` +} + +// HistoryHotRegionsRequest wrap the request conditions. +type HistoryHotRegionsRequest struct { + StartTime int64 `json:"start_time,omitempty"` + EndTime int64 `json:"end_time,omitempty"` + RegionIDs []uint64 `json:"region_ids,omitempty"` + StoreIDs []uint64 `json:"store_ids,omitempty"` + PeerIDs []uint64 `json:"peer_ids,omitempty"` + IsLearners []bool `json:"is_learners,omitempty"` + IsLeaders []bool `json:"is_leaders,omitempty"` + HotRegionTypes []string `json:"hot_region_type,omitempty"` +} + +// HistoryHotRegions wraps historyHotRegion +type HistoryHotRegions struct { + HistoryHotRegion []*HistoryHotRegion `json:"history_hot_region"` +} + +// HistoryHotRegion wraps hot region info +// it is storage format of hot_region_storage +type HistoryHotRegion struct { + UpdateTime int64 `json:"update_time"` + RegionID uint64 `json:"region_id"` + PeerID uint64 `json:"peer_id"` + StoreID uint64 `json:"store_id"` + IsLeader bool `json:"is_leader"` + IsLearner bool `json:"is_learner"` + HotRegionType string `json:"hot_region_type"` + HotDegree int64 `json:"hot_degree"` + FlowBytes float64 `json:"flow_bytes"` + KeyRate float64 `json:"key_rate"` + QueryRate float64 `json:"query_rate"` + StartKey string `json:"start_key"` + EndKey string `json:"end_key"` + // Encryption metadata for start_key and end_key. encryption_meta.iv is IV for start_key. + // IV for end_key is calculated from (encryption_meta.iv + len(start_key)). + // The field is only used by PD and should be ignored otherwise. + // If encryption_meta is empty (i.e. nil), it means start_key and end_key are unencrypted. + EncryptionMeta *encryptionpb.EncryptionMeta `json:"encryption_meta,omitempty"` +} + +// StoresInfo represents the information of all TiKV/TiFlash stores. +type StoresInfo struct { + Count int `json:"count"` + Stores []StoreInfo `json:"stores"` +} + +// StoreInfo represents the information of one TiKV/TiFlash store. +type StoreInfo struct { + Store MetaStore `json:"store"` + Status StoreStatus `json:"status"` +} + +// MetaStore represents the meta information of one store. +type MetaStore struct { + ID int64 `json:"id"` + Address string `json:"address"` + State int64 `json:"state"` + StateName string `json:"state_name"` + Version string `json:"version"` + Labels []StoreLabel `json:"labels"` + StatusAddress string `json:"status_address"` + GitHash string `json:"git_hash"` + StartTimestamp int64 `json:"start_timestamp"` +} + +// StoreLabel stores the information of one store label. +type StoreLabel struct { + Key string `json:"key"` + Value string `json:"value"` +} + +// StoreStatus stores the detail information of one store. +type StoreStatus struct { + Capacity string `json:"capacity"` + Available string `json:"available"` + LeaderCount int64 `json:"leader_count"` + LeaderWeight float64 `json:"leader_weight"` + LeaderScore float64 `json:"leader_score"` + LeaderSize int64 `json:"leader_size"` + RegionCount int64 `json:"region_count"` + RegionWeight float64 `json:"region_weight"` + RegionScore float64 `json:"region_score"` + RegionSize int64 `json:"region_size"` + StartTS time.Time `json:"start_ts"` + LastHeartbeatTS time.Time `json:"last_heartbeat_ts"` + Uptime string `json:"uptime"` +} + +// RegionStats stores the statistics of regions. +type RegionStats struct { + Count int `json:"count"` + EmptyCount int `json:"empty_count"` + StorageSize int64 `json:"storage_size"` + StorageKeys int64 `json:"storage_keys"` + StoreLeaderCount map[uint64]int `json:"store_leader_count"` + StorePeerCount map[uint64]int `json:"store_peer_count"` +} + +// PeerRoleType is the expected peer type of the placement rule. +type PeerRoleType string + +const ( + // Voter can either match a leader peer or follower peer + Voter PeerRoleType = "voter" + // Leader matches a leader. + Leader PeerRoleType = "leader" + // Follower matches a follower. + Follower PeerRoleType = "follower" + // Learner matches a learner. + Learner PeerRoleType = "learner" +) + +// LabelConstraint is used to filter store when trying to place peer of a region. +type LabelConstraint struct { + Key string `json:"key,omitempty"` + Op LabelConstraintOp `json:"op,omitempty"` + Values []string `json:"values,omitempty"` +} + +// LabelConstraintOp defines how a LabelConstraint matches a store. It can be one of +// 'in', 'notIn', 'exists', or 'notExists'. +type LabelConstraintOp string + +const ( + // In restricts the store label value should in the value list. + // If label does not exist, `in` is always false. + In LabelConstraintOp = "in" + // NotIn restricts the store label value should not in the value list. + // If label does not exist, `notIn` is always true. + NotIn LabelConstraintOp = "notIn" + // Exists restricts the store should have the label. + Exists LabelConstraintOp = "exists" + // NotExists restricts the store should not have the label. + NotExists LabelConstraintOp = "notExists" +) + +// Rule is the placement rule that can be checked against a region. When +// applying rules (apply means schedule regions to match selected rules), the +// apply order is defined by the tuple [GroupIndex, GroupID, Index, ID]. +type Rule struct { + GroupID string `json:"group_id"` // mark the source that add the rule + ID string `json:"id"` // unique ID within a group + Index int `json:"index,omitempty"` // rule apply order in a group, rule with less ID is applied first when indexes are equal + Override bool `json:"override,omitempty"` // when it is true, all rules with less indexes are disabled + StartKey []byte `json:"-"` // range start key + StartKeyHex string `json:"start_key"` // hex format start key, for marshal/unmarshal + EndKey []byte `json:"-"` // range end key + EndKeyHex string `json:"end_key"` // hex format end key, for marshal/unmarshal + Role PeerRoleType `json:"role"` // expected role of the peers + IsWitness bool `json:"is_witness"` // when it is true, it means the role is also a witness + Count int `json:"count"` // expected count of the peers + LabelConstraints []LabelConstraint `json:"label_constraints,omitempty"` // used to select stores to place peers + LocationLabels []string `json:"location_labels,omitempty"` // used to make peers isolated physically + IsolationLevel string `json:"isolation_level,omitempty"` // used to isolate replicas explicitly and forcibly + Version uint64 `json:"version,omitempty"` // only set at runtime, add 1 each time rules updated, begin from 0. + CreateTimestamp uint64 `json:"create_timestamp,omitempty"` // only set at runtime, recorded rule create timestamp +} + +// String returns the string representation of this rule. +func (r *Rule) String() string { + b, _ := json.Marshal(r) + return string(b) +} + +// Clone returns a copy of Rule. +func (r *Rule) Clone() *Rule { + var clone Rule + json.Unmarshal([]byte(r.String()), &clone) + clone.StartKey = append(r.StartKey[:0:0], r.StartKey...) + clone.EndKey = append(r.EndKey[:0:0], r.EndKey...) + return &clone +} + +var ( + _ json.Marshaler = (*Rule)(nil) + _ json.Unmarshaler = (*Rule)(nil) +) + +// This is a helper struct used to customizing the JSON marshal/unmarshal methods of `Rule`. +type rule struct { + GroupID string `json:"group_id"` + ID string `json:"id"` + Index int `json:"index,omitempty"` + Override bool `json:"override,omitempty"` + StartKeyHex string `json:"start_key"` + EndKeyHex string `json:"end_key"` + Role PeerRoleType `json:"role"` + IsWitness bool `json:"is_witness"` + Count int `json:"count"` + LabelConstraints []LabelConstraint `json:"label_constraints,omitempty"` + LocationLabels []string `json:"location_labels,omitempty"` + IsolationLevel string `json:"isolation_level,omitempty"` +} + +// MarshalJSON implements `json.Marshaler` interface to make sure we could set the correct start/end key. +func (r *Rule) MarshalJSON() ([]byte, error) { + tempRule := &rule{ + GroupID: r.GroupID, + ID: r.ID, + Index: r.Index, + Override: r.Override, + StartKeyHex: r.StartKeyHex, + EndKeyHex: r.EndKeyHex, + Role: r.Role, + IsWitness: r.IsWitness, + Count: r.Count, + LabelConstraints: r.LabelConstraints, + LocationLabels: r.LocationLabels, + IsolationLevel: r.IsolationLevel, + } + // Converts the start/end key to hex format if the corresponding hex field is empty. + if len(r.StartKey) > 0 && len(r.StartKeyHex) == 0 { + tempRule.StartKeyHex = rawKeyToKeyHexStr(r.StartKey) + } + if len(r.EndKey) > 0 && len(r.EndKeyHex) == 0 { + tempRule.EndKeyHex = rawKeyToKeyHexStr(r.EndKey) + } + return json.Marshal(tempRule) +} + +// UnmarshalJSON implements `json.Unmarshaler` interface to make sure we could get the correct start/end key. +func (r *Rule) UnmarshalJSON(bytes []byte) error { + var tempRule rule + err := json.Unmarshal(bytes, &tempRule) + if err != nil { + return err + } + newRule := Rule{ + GroupID: tempRule.GroupID, + ID: tempRule.ID, + Index: tempRule.Index, + Override: tempRule.Override, + StartKeyHex: tempRule.StartKeyHex, + EndKeyHex: tempRule.EndKeyHex, + Role: tempRule.Role, + IsWitness: tempRule.IsWitness, + Count: tempRule.Count, + LabelConstraints: tempRule.LabelConstraints, + LocationLabels: tempRule.LocationLabels, + IsolationLevel: tempRule.IsolationLevel, + } + newRule.StartKey, err = keyHexStrToRawKey(newRule.StartKeyHex) + if err != nil { + return err + } + newRule.EndKey, err = keyHexStrToRawKey(newRule.EndKeyHex) + if err != nil { + return err + } + *r = newRule + return nil +} + +// RuleOpType indicates the operation type +type RuleOpType string + +const ( + // RuleOpAdd a placement rule, only need to specify the field *Rule + RuleOpAdd RuleOpType = "add" + // RuleOpDel a placement rule, only need to specify the field `GroupID`, `ID`, `MatchID` + RuleOpDel RuleOpType = "del" +) + +// RuleOp is for batching placement rule actions. +// The action type is distinguished by the field `Action`. +type RuleOp struct { + *Rule // information of the placement rule to add/delete the operation type + Action RuleOpType `json:"action"` + DeleteByIDPrefix bool `json:"delete_by_id_prefix"` // if action == delete, delete by the prefix of id +} + +func (r RuleOp) String() string { + b, _ := json.Marshal(r) + return string(b) +} + +var ( + _ json.Marshaler = (*RuleOp)(nil) + _ json.Unmarshaler = (*RuleOp)(nil) +) + +// This is a helper struct used to customizing the JSON marshal/unmarshal methods of `RuleOp`. +type ruleOp struct { + GroupID string `json:"group_id"` + ID string `json:"id"` + Index int `json:"index,omitempty"` + Override bool `json:"override,omitempty"` + StartKeyHex string `json:"start_key"` + EndKeyHex string `json:"end_key"` + Role PeerRoleType `json:"role"` + IsWitness bool `json:"is_witness"` + Count int `json:"count"` + LabelConstraints []LabelConstraint `json:"label_constraints,omitempty"` + LocationLabels []string `json:"location_labels,omitempty"` + IsolationLevel string `json:"isolation_level,omitempty"` + Action RuleOpType `json:"action"` + DeleteByIDPrefix bool `json:"delete_by_id_prefix"` +} + +// MarshalJSON implements `json.Marshaler` interface to make sure we could set the correct start/end key. +func (r *RuleOp) MarshalJSON() ([]byte, error) { + tempRuleOp := &ruleOp{ + GroupID: r.GroupID, + ID: r.ID, + Index: r.Index, + Override: r.Override, + StartKeyHex: r.StartKeyHex, + EndKeyHex: r.EndKeyHex, + Role: r.Role, + IsWitness: r.IsWitness, + Count: r.Count, + LabelConstraints: r.LabelConstraints, + LocationLabels: r.LocationLabels, + IsolationLevel: r.IsolationLevel, + Action: r.Action, + DeleteByIDPrefix: r.DeleteByIDPrefix, + } + // Converts the start/end key to hex format if the corresponding hex field is empty. + if len(r.StartKey) > 0 && len(r.StartKeyHex) == 0 { + tempRuleOp.StartKeyHex = rawKeyToKeyHexStr(r.StartKey) + } + if len(r.EndKey) > 0 && len(r.EndKeyHex) == 0 { + tempRuleOp.EndKeyHex = rawKeyToKeyHexStr(r.EndKey) + } + return json.Marshal(tempRuleOp) +} + +// UnmarshalJSON implements `json.Unmarshaler` interface to make sure we could get the correct start/end key. +func (r *RuleOp) UnmarshalJSON(bytes []byte) error { + var tempRuleOp ruleOp + err := json.Unmarshal(bytes, &tempRuleOp) + if err != nil { + return err + } + newRuleOp := RuleOp{ + Rule: &Rule{ + GroupID: tempRuleOp.GroupID, + ID: tempRuleOp.ID, + Index: tempRuleOp.Index, + Override: tempRuleOp.Override, + StartKeyHex: tempRuleOp.StartKeyHex, + EndKeyHex: tempRuleOp.EndKeyHex, + Role: tempRuleOp.Role, + IsWitness: tempRuleOp.IsWitness, + Count: tempRuleOp.Count, + LabelConstraints: tempRuleOp.LabelConstraints, + LocationLabels: tempRuleOp.LocationLabels, + IsolationLevel: tempRuleOp.IsolationLevel, + }, + Action: tempRuleOp.Action, + DeleteByIDPrefix: tempRuleOp.DeleteByIDPrefix, + } + newRuleOp.StartKey, err = keyHexStrToRawKey(newRuleOp.StartKeyHex) + if err != nil { + return err + } + newRuleOp.EndKey, err = keyHexStrToRawKey(newRuleOp.EndKeyHex) + if err != nil { + return err + } + *r = newRuleOp + return nil +} + +// RuleGroup defines properties of a rule group. +type RuleGroup struct { + ID string `json:"id,omitempty"` + Index int `json:"index,omitempty"` + Override bool `json:"override,omitempty"` +} + +func (g *RuleGroup) String() string { + b, _ := json.Marshal(g) + return string(b) +} + +// GroupBundle represents a rule group and all rules belong to the group. +type GroupBundle struct { + ID string `json:"group_id"` + Index int `json:"group_index"` + Override bool `json:"group_override"` + Rules []*Rule `json:"rules"` +} + +// RegionLabel is the label of a region. +type RegionLabel struct { + Key string `json:"key"` + Value string `json:"value"` + TTL string `json:"ttl,omitempty"` + StartAt string `json:"start_at,omitempty"` +} + +// LabelRule is the rule to assign labels to a region. +type LabelRule struct { + ID string `json:"id"` + Index int `json:"index"` + Labels []RegionLabel `json:"labels"` + RuleType string `json:"rule_type"` + Data interface{} `json:"data"` +} + +// LabelRulePatch is the patch to update the label rules. +type LabelRulePatch struct { + SetRules []*LabelRule `json:"sets"` + DeleteRules []string `json:"deletes"` +} + +// MembersInfo is PD members info returned from PD RESTful interface +// type Members map[string][]*pdpb.Member +type MembersInfo struct { + Header *pdpb.ResponseHeader `json:"header,omitempty"` + Members []*pdpb.Member `json:"members,omitempty"` + Leader *pdpb.Member `json:"leader,omitempty"` + EtcdLeader *pdpb.Member `json:"etcd_leader,omitempty"` +} diff --git a/client/http/types_test.go b/client/http/types_test.go new file mode 100644 index 00000000000..1dedbdc7d3b --- /dev/null +++ b/client/http/types_test.go @@ -0,0 +1,200 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package http + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestMergeRegionsInfo(t *testing.T) { + re := require.New(t) + regionsInfo1 := &RegionsInfo{ + Count: 1, + Regions: []RegionInfo{ + { + ID: 1, + StartKey: "", + EndKey: "a", + }, + }, + } + regionsInfo2 := &RegionsInfo{ + Count: 1, + Regions: []RegionInfo{ + { + ID: 2, + StartKey: "a", + EndKey: "", + }, + }, + } + regionsInfo := regionsInfo1.Merge(regionsInfo2) + re.Equal(int64(2), regionsInfo.Count) + re.Equal(2, len(regionsInfo.Regions)) + re.Subset(regionsInfo.Regions, append(regionsInfo1.Regions, regionsInfo2.Regions...)) +} + +func TestRuleStartEndKey(t *testing.T) { + re := require.New(t) + // Empty start/end key and key hex. + ruleToMarshal := &Rule{} + rule := mustMarshalAndUnmarshal(re, ruleToMarshal) + re.Equal("", rule.StartKeyHex) + re.Equal("", rule.EndKeyHex) + re.Equal([]byte(""), rule.StartKey) + re.Equal([]byte(""), rule.EndKey) + // Empty start/end key and non-empty key hex. + ruleToMarshal = &Rule{ + StartKeyHex: rawKeyToKeyHexStr([]byte("a")), + EndKeyHex: rawKeyToKeyHexStr([]byte("b")), + } + rule = mustMarshalAndUnmarshal(re, ruleToMarshal) + re.Equal([]byte("a"), rule.StartKey) + re.Equal([]byte("b"), rule.EndKey) + re.Equal(ruleToMarshal.StartKeyHex, rule.StartKeyHex) + re.Equal(ruleToMarshal.EndKeyHex, rule.EndKeyHex) + // Non-empty start/end key and empty key hex. + ruleToMarshal = &Rule{ + StartKey: []byte("a"), + EndKey: []byte("b"), + } + rule = mustMarshalAndUnmarshal(re, ruleToMarshal) + re.Equal(ruleToMarshal.StartKey, rule.StartKey) + re.Equal(ruleToMarshal.EndKey, rule.EndKey) + re.Equal(rawKeyToKeyHexStr(ruleToMarshal.StartKey), rule.StartKeyHex) + re.Equal(rawKeyToKeyHexStr(ruleToMarshal.EndKey), rule.EndKeyHex) + // Non-empty start/end key and non-empty key hex. + ruleToMarshal = &Rule{ + StartKey: []byte("a"), + EndKey: []byte("b"), + StartKeyHex: rawKeyToKeyHexStr([]byte("c")), + EndKeyHex: rawKeyToKeyHexStr([]byte("d")), + } + rule = mustMarshalAndUnmarshal(re, ruleToMarshal) + re.Equal([]byte("c"), rule.StartKey) + re.Equal([]byte("d"), rule.EndKey) + re.Equal(ruleToMarshal.StartKeyHex, rule.StartKeyHex) + re.Equal(ruleToMarshal.EndKeyHex, rule.EndKeyHex) + // Half of each pair of keys is empty. + ruleToMarshal = &Rule{ + StartKey: []byte("a"), + EndKeyHex: rawKeyToKeyHexStr([]byte("d")), + } + rule = mustMarshalAndUnmarshal(re, ruleToMarshal) + re.Equal(ruleToMarshal.StartKey, rule.StartKey) + re.Equal([]byte("d"), rule.EndKey) + re.Equal(rawKeyToKeyHexStr(ruleToMarshal.StartKey), rule.StartKeyHex) + re.Equal(ruleToMarshal.EndKeyHex, rule.EndKeyHex) +} + +func mustMarshalAndUnmarshal(re *require.Assertions, rule *Rule) *Rule { + ruleJSON, err := json.Marshal(rule) + re.NoError(err) + var newRule *Rule + err = json.Unmarshal(ruleJSON, &newRule) + re.NoError(err) + return newRule +} + +func TestRuleOpStartEndKey(t *testing.T) { + re := require.New(t) + // Empty start/end key and key hex. + ruleOpToMarshal := &RuleOp{ + Rule: &Rule{}, + } + ruleOp := mustMarshalAndUnmarshalRuleOp(re, ruleOpToMarshal) + re.Equal("", ruleOp.StartKeyHex) + re.Equal("", ruleOp.EndKeyHex) + re.Equal([]byte(""), ruleOp.StartKey) + re.Equal([]byte(""), ruleOp.EndKey) + // Empty start/end key and non-empty key hex. + ruleOpToMarshal = &RuleOp{ + Rule: &Rule{ + StartKeyHex: rawKeyToKeyHexStr([]byte("a")), + EndKeyHex: rawKeyToKeyHexStr([]byte("b")), + }, + Action: RuleOpAdd, + DeleteByIDPrefix: true, + } + ruleOp = mustMarshalAndUnmarshalRuleOp(re, ruleOpToMarshal) + re.Equal([]byte("a"), ruleOp.StartKey) + re.Equal([]byte("b"), ruleOp.EndKey) + re.Equal(ruleOpToMarshal.StartKeyHex, ruleOp.StartKeyHex) + re.Equal(ruleOpToMarshal.EndKeyHex, ruleOp.EndKeyHex) + re.Equal(ruleOpToMarshal.Action, ruleOp.Action) + re.Equal(ruleOpToMarshal.DeleteByIDPrefix, ruleOp.DeleteByIDPrefix) + // Non-empty start/end key and empty key hex. + ruleOpToMarshal = &RuleOp{ + Rule: &Rule{ + StartKey: []byte("a"), + EndKey: []byte("b"), + }, + Action: RuleOpAdd, + DeleteByIDPrefix: true, + } + ruleOp = mustMarshalAndUnmarshalRuleOp(re, ruleOpToMarshal) + re.Equal(ruleOpToMarshal.StartKey, ruleOp.StartKey) + re.Equal(ruleOpToMarshal.EndKey, ruleOp.EndKey) + re.Equal(rawKeyToKeyHexStr(ruleOpToMarshal.StartKey), ruleOp.StartKeyHex) + re.Equal(rawKeyToKeyHexStr(ruleOpToMarshal.EndKey), ruleOp.EndKeyHex) + re.Equal(ruleOpToMarshal.Action, ruleOp.Action) + re.Equal(ruleOpToMarshal.DeleteByIDPrefix, ruleOp.DeleteByIDPrefix) + // Non-empty start/end key and non-empty key hex. + ruleOpToMarshal = &RuleOp{ + Rule: &Rule{ + StartKey: []byte("a"), + EndKey: []byte("b"), + StartKeyHex: rawKeyToKeyHexStr([]byte("c")), + EndKeyHex: rawKeyToKeyHexStr([]byte("d")), + }, + Action: RuleOpAdd, + DeleteByIDPrefix: true, + } + ruleOp = mustMarshalAndUnmarshalRuleOp(re, ruleOpToMarshal) + re.Equal([]byte("c"), ruleOp.StartKey) + re.Equal([]byte("d"), ruleOp.EndKey) + re.Equal(ruleOpToMarshal.StartKeyHex, ruleOp.StartKeyHex) + re.Equal(ruleOpToMarshal.EndKeyHex, ruleOp.EndKeyHex) + re.Equal(ruleOpToMarshal.Action, ruleOp.Action) + re.Equal(ruleOpToMarshal.DeleteByIDPrefix, ruleOp.DeleteByIDPrefix) + // Half of each pair of keys is empty. + ruleOpToMarshal = &RuleOp{ + Rule: &Rule{ + StartKey: []byte("a"), + EndKeyHex: rawKeyToKeyHexStr([]byte("d")), + }, + Action: RuleOpDel, + DeleteByIDPrefix: false, + } + ruleOp = mustMarshalAndUnmarshalRuleOp(re, ruleOpToMarshal) + re.Equal(ruleOpToMarshal.StartKey, ruleOp.StartKey) + re.Equal([]byte("d"), ruleOp.EndKey) + re.Equal(rawKeyToKeyHexStr(ruleOpToMarshal.StartKey), ruleOp.StartKeyHex) + re.Equal(ruleOpToMarshal.EndKeyHex, ruleOp.EndKeyHex) + re.Equal(ruleOpToMarshal.Action, ruleOp.Action) + re.Equal(ruleOpToMarshal.DeleteByIDPrefix, ruleOp.DeleteByIDPrefix) +} + +func mustMarshalAndUnmarshalRuleOp(re *require.Assertions, ruleOp *RuleOp) *RuleOp { + ruleOpJSON, err := json.Marshal(ruleOp) + re.NoError(err) + var newRuleOp *RuleOp + err = json.Unmarshal(ruleOpJSON, &newRuleOp) + re.NoError(err) + return newRuleOp +} diff --git a/client/keyspace_client.go b/client/keyspace_client.go index d9b9172dd69..fedb7452412 100644 --- a/client/keyspace_client.go +++ b/client/keyspace_client.go @@ -21,7 +21,6 @@ import ( "github.com/opentracing/opentracing-go" "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/keyspacepb" - "github.com/tikv/pd/client/grpcutil" ) // KeyspaceClient manages keyspace metadata. @@ -57,7 +56,6 @@ func (c *client) LoadKeyspace(ctx context.Context, name string) (*keyspacepb.Key Header: c.requestHeader(), Name: name, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) resp, err := c.keyspaceClient().LoadKeyspace(ctx, req) cancel() @@ -98,7 +96,6 @@ func (c *client) UpdateKeyspaceState(ctx context.Context, id uint32, state keysp Id: id, State: state, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) resp, err := c.keyspaceClient().UpdateKeyspaceState(ctx, req) cancel() @@ -138,7 +135,6 @@ func (c *client) GetAllKeyspaces(ctx context.Context, startID uint32, limit uint StartId: startID, Limit: limit, } - ctx = grpcutil.BuildForwardContext(ctx, c.GetLeaderAddr()) resp, err := c.keyspaceClient().GetAllKeyspaces(ctx, req) cancel() diff --git a/client/option.go b/client/option.go index d6a6d61d2f9..2a6c285cfb7 100644 --- a/client/option.go +++ b/client/option.go @@ -28,6 +28,7 @@ const ( maxInitClusterRetries = 100 defaultMaxTSOBatchWaitInterval time.Duration = 0 defaultEnableTSOFollowerProxy = false + defaultEnableFollowerHandle = false ) // DynamicOption is used to distinguish the dynamic option type. @@ -40,6 +41,8 @@ const ( // EnableTSOFollowerProxy is the TSO Follower Proxy option. // It is stored as bool. EnableTSOFollowerProxy + // EnableFollowerHandle is the follower handle option. + EnableFollowerHandle dynamicOptionCount ) @@ -72,6 +75,7 @@ func newOption() *option { co.dynamicOptions[MaxTSOBatchWaitInterval].Store(defaultMaxTSOBatchWaitInterval) co.dynamicOptions[EnableTSOFollowerProxy].Store(defaultEnableTSOFollowerProxy) + co.dynamicOptions[EnableFollowerHandle].Store(defaultEnableFollowerHandle) return co } @@ -88,6 +92,19 @@ func (o *option) setMaxTSOBatchWaitInterval(interval time.Duration) error { return nil } +// setEnableFollowerHandle set the Follower Handle option. +func (o *option) setEnableFollowerHandle(enable bool) { + old := o.getEnableFollowerHandle() + if enable != old { + o.dynamicOptions[EnableFollowerHandle].Store(enable) + } +} + +// getMaxTSOBatchWaitInterval gets the Follower Handle enable option. +func (o *option) getEnableFollowerHandle() bool { + return o.dynamicOptions[EnableFollowerHandle].Load().(bool) +} + // getMaxTSOBatchWaitInterval gets the max TSO batch wait interval option. func (o *option) getMaxTSOBatchWaitInterval() time.Duration { return o.dynamicOptions[MaxTSOBatchWaitInterval].Load().(time.Duration) diff --git a/client/option_test.go b/client/option_test.go index 1b5604f4d19..1a8faf8fcd9 100644 --- a/client/option_test.go +++ b/client/option_test.go @@ -28,6 +28,7 @@ func TestDynamicOptionChange(t *testing.T) { // Check the default value setting. re.Equal(defaultMaxTSOBatchWaitInterval, o.getMaxTSOBatchWaitInterval()) re.Equal(defaultEnableTSOFollowerProxy, o.getEnableTSOFollowerProxy()) + re.Equal(defaultEnableFollowerHandle, o.getEnableFollowerHandle()) // Check the invalid value setting. re.NotNil(o.setMaxTSOBatchWaitInterval(time.Second)) @@ -55,4 +56,11 @@ func TestDynamicOptionChange(t *testing.T) { close(o.enableTSOFollowerProxyCh) // Setting the same value should not notify the channel. o.setEnableTSOFollowerProxy(expectBool) + + expectBool = true + o.setEnableFollowerHandle(expectBool) + re.Equal(expectBool, o.getEnableFollowerHandle()) + expectBool = false + o.setEnableFollowerHandle(expectBool) + re.Equal(expectBool, o.getEnableFollowerHandle()) } diff --git a/client/pd_service_discovery.go b/client/pd_service_discovery.go index e96093f598d..b75276adbe9 100644 --- a/client/pd_service_discovery.go +++ b/client/pd_service_discovery.go @@ -63,8 +63,6 @@ type ServiceDiscovery interface { GetKeyspaceID() uint32 // GetKeyspaceGroupID returns the ID of the keyspace group GetKeyspaceGroupID() uint32 - // DiscoverServiceURLs discovers the microservice with the specified type and returns the server urls. - DiscoverMicroservice(svcType serviceType) ([]string, error) // GetServiceURLs returns the URLs of the servers providing the service GetServiceURLs() []string // GetServingEndpointClientConn returns the grpc client connection of the serving endpoint @@ -324,7 +322,7 @@ func (c *pdServiceDiscovery) GetKeyspaceGroupID() uint32 { } // DiscoverMicroservice discovers the microservice with the specified type and returns the server urls. -func (c *pdServiceDiscovery) DiscoverMicroservice(svcType serviceType) (urls []string, err error) { +func (c *pdServiceDiscovery) discoverMicroservice(svcType serviceType) (urls []string, err error) { switch svcType { case apiService: urls = c.GetServiceURLs() @@ -616,7 +614,6 @@ func (c *pdServiceDiscovery) switchLeader(addrs []string) error { if _, err := c.GetOrCreateGRPCConn(addr); err != nil { log.Warn("[pd] failed to connect leader", zap.String("leader", addr), errs.ZapError(err)) - return err } // Set PD leader and Global TSO Allocator (which is also the PD leader) c.leader.Store(addr) diff --git a/client/resource_group/controller/config.go b/client/resource_group/controller/config.go index 16a2525cd0d..ffc360c385c 100644 --- a/client/resource_group/controller/config.go +++ b/client/resource_group/controller/config.go @@ -88,6 +88,9 @@ type Config struct { // RequestUnit is the configuration determines the coefficients of the RRU and WRU cost. // This configuration should be modified carefully. RequestUnit RequestUnitConfig `toml:"request-unit" json:"request-unit"` + + // EnableControllerTraceLog is to control whether resource control client enable trace. + EnableControllerTraceLog bool `toml:"enable-controller-trace-log" json:"enable-controller-trace-log,string"` } // DefaultConfig returns the default resource manager controller configuration. @@ -96,6 +99,7 @@ func DefaultConfig() *Config { DegradedModeWaitDuration: NewDuration(defaultDegradedModeWaitDuration), LTBMaxWaitDuration: NewDuration(defaultMaxWaitDuration), RequestUnit: DefaultRequestUnitConfig(), + EnableControllerTraceLog: false, } } diff --git a/client/resource_group/controller/controller.go b/client/resource_group/controller/controller.go index e3495a21ff1..a07622e5f3b 100755 --- a/client/resource_group/controller/controller.go +++ b/client/resource_group/controller/controller.go @@ -32,6 +32,7 @@ import ( "github.com/prometheus/client_golang/prometheus" pd "github.com/tikv/pd/client" "github.com/tikv/pd/client/errs" + atomicutil "go.uber.org/atomic" "go.uber.org/zap" "golang.org/x/exp/slices" ) @@ -54,10 +55,18 @@ const ( lowToken selectType = 1 ) +var enableControllerTraceLog = atomicutil.NewBool(false) + +func logControllerTrace(msg string, fields ...zap.Field) { + if enableControllerTraceLog.Load() { + log.Info(msg, fields...) + } +} + // ResourceGroupKVInterceptor is used as quota limit controller for resource group using kv store. type ResourceGroupKVInterceptor interface { // OnRequestWait is used to check whether resource group has enough tokens. It maybe needs to wait some time. - OnRequestWait(ctx context.Context, resourceGroupName string, info RequestInfo) (*rmpb.Consumption, *rmpb.Consumption, error) + OnRequestWait(ctx context.Context, resourceGroupName string, info RequestInfo) (*rmpb.Consumption, *rmpb.Consumption, time.Duration, uint32, error) // OnResponse is used to consume tokens after receiving response. OnResponse(resourceGroupName string, req RequestInfo, resp ResponseInfo) (*rmpb.Consumption, error) // IsBackgroundRequest If the resource group has background jobs, we should not record consumption and wait for it. @@ -171,12 +180,13 @@ func loadServerConfig(ctx context.Context, provider ResourceGroupProvider) (*Con if err != nil { return nil, err } - if len(resp.Kvs) == 0 { + kvs := resp.GetKvs() + if len(kvs) == 0 { log.Warn("[resource group controller] server does not save config, load config failed") return DefaultConfig(), nil } config := &Config{} - err = json.Unmarshal(resp.Kvs[0].GetValue(), config) + err = json.Unmarshal(kvs[0].GetValue(), config) if err != nil { return nil, err } @@ -368,6 +378,9 @@ func (c *ResourceGroupsController) Start(ctx context.Context) { } copyCfg := *c.ruConfig c.safeRuConfig.Store(©Cfg) + if enableControllerTraceLog.Load() != config.EnableControllerTraceLog { + enableControllerTraceLog.Store(config.EnableControllerTraceLog) + } log.Info("load resource controller config after config changed", zap.Reflect("config", config), zap.Reflect("ruConfig", c.ruConfig)) } @@ -504,7 +517,7 @@ func (c *ResourceGroupsController) sendTokenBucketRequests(ctx context.Context, c.responseDeadlineCh = c.run.responseDeadline.C } go func() { - log.Debug("[resource group controller] send token bucket request", zap.Time("now", now), zap.Any("req", req.Requests), zap.String("source", source)) + logControllerTrace("[resource group controller] send token bucket request", zap.Time("now", now), zap.Any("req", req.Requests), zap.String("source", source)) resp, err := c.provider.AcquireTokenBuckets(ctx, req) latency := time.Since(now) if err != nil { @@ -517,7 +530,7 @@ func (c *ResourceGroupsController) sendTokenBucketRequests(ctx context.Context, } else { successfulTokenRequestDuration.Observe(latency.Seconds()) } - log.Debug("[resource group controller] token bucket response", zap.Time("now", time.Now()), zap.Any("resp", resp), zap.String("source", source), zap.Duration("latency", latency)) + logControllerTrace("[resource group controller] token bucket response", zap.Time("now", time.Now()), zap.Any("resp", resp), zap.String("source", source), zap.Duration("latency", latency)) c.tokenResponseChan <- resp }() } @@ -525,10 +538,10 @@ func (c *ResourceGroupsController) sendTokenBucketRequests(ctx context.Context, // OnRequestWait is used to check whether resource group has enough tokens. It maybe needs to wait some time. func (c *ResourceGroupsController) OnRequestWait( ctx context.Context, resourceGroupName string, info RequestInfo, -) (*rmpb.Consumption, *rmpb.Consumption, error) { +) (*rmpb.Consumption, *rmpb.Consumption, time.Duration, uint32, error) { gc, err := c.tryGetResourceGroup(ctx, resourceGroupName) if err != nil { - return nil, nil, err + return nil, nil, time.Duration(0), 0, err } return gc.onRequestWait(ctx, info) } @@ -602,10 +615,11 @@ type groupCostController struct { calculators []ResourceCalculator handleRespFunc func(*rmpb.TokenBucketResponse) - successfulRequestDuration prometheus.Observer - requestRetryCounter prometheus.Counter - failedRequestCounter prometheus.Counter - tokenRequestCounter prometheus.Counter + successfulRequestDuration prometheus.Observer + failedLimitReserveDuration prometheus.Observer + requestRetryCounter prometheus.Counter + failedRequestCounter prometheus.Counter + tokenRequestCounter prometheus.Counter mu struct { sync.Mutex @@ -695,14 +709,15 @@ func newGroupCostController( return nil, errs.ErrClientResourceGroupConfigUnavailable.FastGenByArgs("not supports the resource type") } gc := &groupCostController{ - meta: group, - name: group.Name, - mainCfg: mainCfg, - mode: group.GetMode(), - successfulRequestDuration: successfulRequestDuration.WithLabelValues(group.Name), - failedRequestCounter: failedRequestCounter.WithLabelValues(group.Name), - requestRetryCounter: requestRetryCounter.WithLabelValues(group.Name), - tokenRequestCounter: resourceGroupTokenRequestCounter.WithLabelValues(group.Name), + meta: group, + name: group.Name, + mainCfg: mainCfg, + mode: group.GetMode(), + successfulRequestDuration: successfulRequestDuration.WithLabelValues(group.Name), + failedLimitReserveDuration: failedLimitReserveDuration.WithLabelValues(group.Name), + failedRequestCounter: failedRequestCounter.WithLabelValues(group.Name), + requestRetryCounter: requestRetryCounter.WithLabelValues(group.Name), + tokenRequestCounter: resourceGroupTokenRequestCounter.WithLabelValues(group.Name), calculators: []ResourceCalculator{ newKVCalculator(mainCfg), newSQLCalculator(mainCfg), @@ -804,7 +819,7 @@ func (gc *groupCostController) updateRunState() { } *gc.run.consumption = *gc.mu.consumption gc.mu.Unlock() - log.Debug("[resource group controller] update run state", zap.Any("request-unit-consumption", gc.run.consumption)) + logControllerTrace("[resource group controller] update run state", zap.Any("request-unit-consumption", gc.run.consumption)) gc.run.now = newTime } @@ -885,7 +900,7 @@ func (gc *groupCostController) updateAvgRaWResourcePerSec() { if !gc.calcAvg(counter, getRawResourceValueFromConsumption(gc.run.consumption, typ)) { continue } - log.Debug("[resource group controller] update avg raw resource per sec", zap.String("name", gc.name), zap.String("type", rmpb.RawResourceType_name[int32(typ)]), zap.Float64("avg-ru-per-sec", counter.avgRUPerSec)) + logControllerTrace("[resource group controller] update avg raw resource per sec", zap.String("name", gc.name), zap.String("type", rmpb.RawResourceType_name[int32(typ)]), zap.Float64("avg-ru-per-sec", counter.avgRUPerSec)) } gc.burstable.Store(isBurstable) } @@ -899,7 +914,7 @@ func (gc *groupCostController) updateAvgRUPerSec() { if !gc.calcAvg(counter, getRUValueFromConsumption(gc.run.consumption, typ)) { continue } - log.Debug("[resource group controller] update avg ru per sec", zap.String("name", gc.name), zap.String("type", rmpb.RequestUnitType_name[int32(typ)]), zap.Float64("avg-ru-per-sec", counter.avgRUPerSec)) + logControllerTrace("[resource group controller] update avg ru per sec", zap.String("name", gc.name), zap.String("type", rmpb.RequestUnitType_name[int32(typ)]), zap.Float64("avg-ru-per-sec", counter.avgRUPerSec)) } gc.burstable.Store(isBurstable) } @@ -1175,7 +1190,7 @@ func (gc *groupCostController) calcRequest(counter *tokenCounter) float64 { func (gc *groupCostController) onRequestWait( ctx context.Context, info RequestInfo, -) (*rmpb.Consumption, *rmpb.Consumption, error) { +) (*rmpb.Consumption, *rmpb.Consumption, time.Duration, uint32, error) { delta := &rmpb.Consumption{} for _, calc := range gc.calculators { calc.BeforeKVRequest(delta, info) @@ -1184,6 +1199,7 @@ func (gc *groupCostController) onRequestWait( gc.mu.Lock() add(gc.mu.consumption, delta) gc.mu.Unlock() + var waitDuration time.Duration if !gc.burstable.Load() { var err error @@ -1216,18 +1232,23 @@ func (gc *groupCostController) onRequestWait( } gc.requestRetryCounter.Inc() time.Sleep(retryInterval) + waitDuration += retryInterval } if err != nil { gc.failedRequestCounter.Inc() + if d.Seconds() > 0 { + gc.failedLimitReserveDuration.Observe(d.Seconds()) + } gc.mu.Lock() sub(gc.mu.consumption, delta) gc.mu.Unlock() failpoint.Inject("triggerUpdate", func() { gc.lowRUNotifyChan <- struct{}{} }) - return nil, nil, err + return nil, nil, waitDuration, 0, err } gc.successfulRequestDuration.Observe(d.Seconds()) + waitDuration += d } gc.mu.Lock() @@ -1244,7 +1265,7 @@ func (gc *groupCostController) onRequestWait( *gc.mu.storeCounter[info.StoreID()] = *gc.mu.globalCounter gc.mu.Unlock() - return delta, penalty, nil + return delta, penalty, waitDuration, gc.getMeta().GetPriority(), nil } func (gc *groupCostController) onResponse( diff --git a/client/resource_group/controller/controller_test.go b/client/resource_group/controller/controller_test.go index 6877f8206f3..4d09e338222 100644 --- a/client/resource_group/controller/controller_test.go +++ b/client/resource_group/controller/controller_test.go @@ -30,8 +30,9 @@ import ( func createTestGroupCostController(re *require.Assertions) *groupCostController { group := &rmpb.ResourceGroup{ - Name: "test", - Mode: rmpb.GroupMode_RUMode, + Name: "test", + Mode: rmpb.GroupMode_RUMode, + Priority: 1, RUSettings: &rmpb.GroupRequestUnitSettings{ RU: &rmpb.TokenBucket{ Settings: &rmpb.TokenLimitSettings{ @@ -100,8 +101,9 @@ func TestRequestAndResponseConsumption(t *testing.T) { kvCalculator := gc.getKVCalculator() for idx, testCase := range testCases { caseNum := fmt.Sprintf("case %d", idx) - consumption, _, err := gc.onRequestWait(context.TODO(), testCase.req) + consumption, _, _, priority, err := gc.onRequestWait(context.TODO(), testCase.req) re.NoError(err, caseNum) + re.Equal(priority, gc.meta.Priority) expectedConsumption := &rmpb.Consumption{} if testCase.req.IsWrite() { kvCalculator.calculateWriteCost(expectedConsumption, testCase.req) diff --git a/client/resource_group/controller/limiter.go b/client/resource_group/controller/limiter.go index f89ab17514c..63c94a9782b 100644 --- a/client/resource_group/controller/limiter.go +++ b/client/resource_group/controller/limiter.go @@ -122,10 +122,11 @@ func NewLimiterWithCfg(now time.Time, cfg tokenBucketReconfigureArgs, lowTokensN // A Reservation holds information about events that are permitted by a Limiter to happen after a delay. // A Reservation may be canceled, which may enable the Limiter to permit additional events. type Reservation struct { - ok bool - lim *Limiter - tokens float64 - timeToAct time.Time + ok bool + lim *Limiter + tokens float64 + timeToAct time.Time + needWaitDurtion time.Duration // This is the Limit at reservation time, it can change later. limit Limit } @@ -301,7 +302,7 @@ func (lim *Limiter) Reconfigure(now time.Time, ) { lim.mu.Lock() defer lim.mu.Unlock() - log.Debug("[resource group controller] before reconfigure", zap.Float64("old-tokens", lim.tokens), zap.Float64("old-rate", float64(lim.limit)), zap.Float64("old-notify-threshold", args.NotifyThreshold), zap.Int64("old-burst", lim.burst)) + logControllerTrace("[resource group controller] before reconfigure", zap.Float64("old-tokens", lim.tokens), zap.Float64("old-rate", float64(lim.limit)), zap.Float64("old-notify-threshold", args.NotifyThreshold), zap.Int64("old-burst", lim.burst)) if args.NewBurst < 0 { lim.last = now lim.tokens = args.NewTokens @@ -317,7 +318,7 @@ func (lim *Limiter) Reconfigure(now time.Time, opt(lim) } lim.maybeNotify() - log.Debug("[resource group controller] after reconfigure", zap.Float64("tokens", lim.tokens), zap.Float64("rate", float64(lim.limit)), zap.Float64("notify-threshold", args.NotifyThreshold), zap.Int64("burst", lim.burst)) + logControllerTrace("[resource group controller] after reconfigure", zap.Float64("tokens", lim.tokens), zap.Float64("rate", float64(lim.limit)), zap.Float64("notify-threshold", args.NotifyThreshold), zap.Int64("burst", lim.burst)) } // AvailableTokens decreases the amount of tokens currently available. @@ -358,9 +359,10 @@ func (lim *Limiter) reserveN(now time.Time, n float64, maxFutureReserve time.Dur // Prepare reservation r := Reservation{ - ok: ok, - lim: lim, - limit: lim.limit, + ok: ok, + lim: lim, + limit: lim.limit, + needWaitDurtion: waitDuration, } if ok { r.tokens = n @@ -372,7 +374,14 @@ func (lim *Limiter) reserveN(now time.Time, n float64, maxFutureReserve time.Dur lim.tokens = tokens lim.maybeNotify() } else { - log.Debug("[resource group controller]", zap.Float64("current-tokens", lim.tokens), zap.Float64("current-rate", float64(lim.limit)), zap.Float64("request-tokens", n), zap.Int64("burst", lim.burst), zap.Int("remaining-notify-times", lim.remainingNotifyTimes)) + log.Warn("[resource group controller] cannot reserve enough tokens", + zap.Duration("need-wait-duration", waitDuration), + zap.Duration("max-wait-duration", maxFutureReserve), + zap.Float64("current-ltb-tokens", lim.tokens), + zap.Float64("current-ltb-rate", float64(lim.limit)), + zap.Float64("request-tokens", n), + zap.Int64("burst", lim.burst), + zap.Int("remaining-notify-times", lim.remainingNotifyTimes)) lim.last = last if lim.limit == 0 { lim.notify() @@ -452,7 +461,7 @@ func WaitReservations(ctx context.Context, now time.Time, reservations []*Reserv for _, res := range reservations { if !res.ok { cancel() - return 0, errs.ErrClientResourceGroupThrottled + return res.needWaitDurtion, errs.ErrClientResourceGroupThrottled } delay := res.DelayFrom(now) if delay > longestDelayDuration { diff --git a/client/resource_group/controller/limiter_test.go b/client/resource_group/controller/limiter_test.go index b8b96ae13d6..d963f830551 100644 --- a/client/resource_group/controller/limiter_test.go +++ b/client/resource_group/controller/limiter_test.go @@ -161,7 +161,7 @@ func TestCancel(t *testing.T) { checkTokens(re, lim1, t2, 7) checkTokens(re, lim2, t2, 2) d, err := WaitReservations(ctx, t2, []*Reservation{r1, r2}) - re.Equal(d, time.Duration(0)) + re.Equal(d, 4*time.Second) re.Error(err) checkTokens(re, lim1, t3, 13) checkTokens(re, lim2, t3, 3) diff --git a/client/resource_group/controller/metrics.go b/client/resource_group/controller/metrics.go index 68eb26d0312..7e6a559265b 100644 --- a/client/resource_group/controller/metrics.go +++ b/client/resource_group/controller/metrics.go @@ -42,6 +42,15 @@ var ( Help: "Bucketed histogram of wait duration of successful request.", }, []string{resourceGroupNameLabel}) + failedLimitReserveDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: requestSubsystem, + Name: "limit_reserve_time_failed", + Buckets: []float64{.005, .01, .05, .1, .5, 1, 5, 10, 20, 25, 30}, // 0.005 ~ 30 + Help: "Bucketed histogram of wait duration of failed request.", + }, []string{resourceGroupNameLabel}) + failedRequestCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, @@ -85,6 +94,7 @@ func init() { prometheus.MustRegister(resourceGroupStatusGauge) prometheus.MustRegister(successfulRequestDuration) prometheus.MustRegister(failedRequestCounter) + prometheus.MustRegister(failedLimitReserveDuration) prometheus.MustRegister(requestRetryCounter) prometheus.MustRegister(tokenRequestDuration) prometheus.MustRegister(resourceGroupTokenRequestCounter) diff --git a/client/retry/backoff.go b/client/retry/backoff.go index e2ca9ab3972..b47a39d8eaa 100644 --- a/client/retry/backoff.go +++ b/client/retry/backoff.go @@ -34,9 +34,11 @@ func (bo *BackOffer) Exec( fn func() error, ) error { if err := fn(); err != nil { + after := time.NewTimer(bo.nextInterval()) + defer after.Stop() select { case <-ctx.Done(): - case <-time.After(bo.nextInterval()): + case <-after.C: failpoint.Inject("backOffExecute", func() { testBackOffExecuteFlag = true }) diff --git a/client/tso_client.go b/client/tso_client.go index 35d9388c72b..fc38ee8e5ba 100644 --- a/client/tso_client.go +++ b/client/tso_client.go @@ -171,9 +171,10 @@ func (c *tsoClient) GetTSOAllocatorClientConnByDCLocation(dcLocation string) (*g if !ok { panic(fmt.Sprintf("the allocator leader in %s should exist", dcLocation)) } + // todo: if we support local tso forward, we should get or create client conns. cc, ok := c.svcDiscovery.GetClientConns().Load(url) if !ok { - panic(fmt.Sprintf("the client connection of %s in %s should exist", url, dcLocation)) + return nil, url.(string) } return cc.(*grpc.ClientConn), url.(string) } diff --git a/client/tso_dispatcher.go b/client/tso_dispatcher.go index e4c5bf3c77a..0de4dc3a49e 100644 --- a/client/tso_dispatcher.go +++ b/client/tso_dispatcher.go @@ -254,7 +254,7 @@ func (c *tsoClient) checkAllocator( requestForwarded.WithLabelValues(forwardedHostTrim, addrTrim).Set(0) }() cc, u := c.GetTSOAllocatorClientConnByDCLocation(dc) - healthCli := healthpb.NewHealthClient(cc) + var healthCli healthpb.HealthClient ticker := time.NewTicker(time.Second) defer ticker.Stop() for { @@ -263,20 +263,25 @@ func (c *tsoClient) checkAllocator( log.Info("[tso] the leader of the allocator leader is changed", zap.String("dc", dc), zap.String("origin", url), zap.String("new", u)) return } - healthCtx, healthCancel := context.WithTimeout(dispatcherCtx, c.option.timeout) - resp, err := healthCli.Check(healthCtx, &healthpb.HealthCheckRequest{Service: ""}) - failpoint.Inject("unreachableNetwork", func() { - resp.Status = healthpb.HealthCheckResponse_UNKNOWN - }) - healthCancel() - if err == nil && resp.GetStatus() == healthpb.HealthCheckResponse_SERVING { - // create a stream of the original allocator - cctx, cancel := context.WithCancel(dispatcherCtx) - stream, err := c.tsoStreamBuilderFactory.makeBuilder(cc).build(cctx, cancel, c.option.timeout) - if err == nil && stream != nil { - log.Info("[tso] recover the original tso stream since the network has become normal", zap.String("dc", dc), zap.String("url", url)) - updateAndClear(url, &tsoConnectionContext{url, stream, cctx, cancel}) - return + if healthCli == nil && cc != nil { + healthCli = healthpb.NewHealthClient(cc) + } + if healthCli != nil { + healthCtx, healthCancel := context.WithTimeout(dispatcherCtx, c.option.timeout) + resp, err := healthCli.Check(healthCtx, &healthpb.HealthCheckRequest{Service: ""}) + failpoint.Inject("unreachableNetwork", func() { + resp.Status = healthpb.HealthCheckResponse_UNKNOWN + }) + healthCancel() + if err == nil && resp.GetStatus() == healthpb.HealthCheckResponse_SERVING { + // create a stream of the original allocator + cctx, cancel := context.WithCancel(dispatcherCtx) + stream, err := c.tsoStreamBuilderFactory.makeBuilder(cc).build(cctx, cancel, c.option.timeout) + if err == nil && stream != nil { + log.Info("[tso] recover the original tso stream since the network has become normal", zap.String("dc", dc), zap.String("url", url)) + updateAndClear(url, &tsoConnectionContext{url, stream, cctx, cancel}) + return + } } } select { @@ -285,7 +290,7 @@ func (c *tsoClient) checkAllocator( case <-ticker.C: // To ensure we can get the latest allocator leader // and once the leader is changed, we can exit this function. - _, u = c.GetTSOAllocatorClientConnByDCLocation(dc) + cc, u = c.GetTSOAllocatorClientConnByDCLocation(dc) } } } @@ -597,29 +602,32 @@ func (c *tsoClient) tryConnectToTSO( for i := 0; i < maxRetryTimes; i++ { c.svcDiscovery.ScheduleCheckMemberChanged() cc, url = c.GetTSOAllocatorClientConnByDCLocation(dc) - cctx, cancel := context.WithCancel(dispatcherCtx) - stream, err = c.tsoStreamBuilderFactory.makeBuilder(cc).build(cctx, cancel, c.option.timeout) - failpoint.Inject("unreachableNetwork", func() { - stream = nil - err = status.New(codes.Unavailable, "unavailable").Err() - }) - if stream != nil && err == nil { - updateAndClear(url, &tsoConnectionContext{url, stream, cctx, cancel}) - return nil - } - - if err != nil && c.option.enableForwarding { - // The reason we need to judge if the error code is equal to "Canceled" here is that - // when we create a stream we use a goroutine to manually control the timeout of the connection. - // There is no need to wait for the transport layer timeout which can reduce the time of unavailability. - // But it conflicts with the retry mechanism since we use the error code to decide if it is caused by network error. - // And actually the `Canceled` error can be regarded as a kind of network error in some way. - if rpcErr, ok := status.FromError(err); ok && (isNetworkError(rpcErr.Code()) || rpcErr.Code() == codes.Canceled) { - networkErrNum++ + if cc != nil { + cctx, cancel := context.WithCancel(dispatcherCtx) + stream, err = c.tsoStreamBuilderFactory.makeBuilder(cc).build(cctx, cancel, c.option.timeout) + failpoint.Inject("unreachableNetwork", func() { + stream = nil + err = status.New(codes.Unavailable, "unavailable").Err() + }) + if stream != nil && err == nil { + updateAndClear(url, &tsoConnectionContext{url, stream, cctx, cancel}) + return nil } - } - cancel() + if err != nil && c.option.enableForwarding { + // The reason we need to judge if the error code is equal to "Canceled" here is that + // when we create a stream we use a goroutine to manually control the timeout of the connection. + // There is no need to wait for the transport layer timeout which can reduce the time of unavailability. + // But it conflicts with the retry mechanism since we use the error code to decide if it is caused by network error. + // And actually the `Canceled` error can be regarded as a kind of network error in some way. + if rpcErr, ok := status.FromError(err); ok && (isNetworkError(rpcErr.Code()) || rpcErr.Code() == codes.Canceled) { + networkErrNum++ + } + } + cancel() + } else { + networkErrNum++ + } select { case <-dispatcherCtx.Done(): return err diff --git a/client/tso_service_discovery.go b/client/tso_service_discovery.go index 92f95129951..5f14c406797 100644 --- a/client/tso_service_discovery.go +++ b/client/tso_service_discovery.go @@ -288,21 +288,6 @@ func (c *tsoServiceDiscovery) GetKeyspaceGroupID() uint32 { return c.keyspaceGroupSD.group.Id } -// DiscoverServiceURLs discovers the microservice with the specified type and returns the server urls. -func (c *tsoServiceDiscovery) DiscoverMicroservice(svcType serviceType) ([]string, error) { - var urls []string - - switch svcType { - case apiService: - case tsoService: - return c.apiSvcDiscovery.DiscoverMicroservice(tsoService) - default: - panic("invalid service type") - } - - return urls, nil -} - // GetServiceURLs returns the URLs of the tso primary/secondary addresses of this keyspace group. // For testing use. It should only be called when the client is closed. func (c *tsoServiceDiscovery) GetServiceURLs() []string { @@ -582,7 +567,7 @@ func (c *tsoServiceDiscovery) getTSOServer(sd ServiceDiscovery) (string, error) ) t := c.tsoServerDiscovery if len(t.addrs) == 0 || t.failureCount == len(t.addrs) { - addrs, err = sd.DiscoverMicroservice(tsoService) + addrs, err = sd.(*pdServiceDiscovery).discoverMicroservice(tsoService) if err != nil { return "", err } diff --git a/errors.toml b/errors.toml index 1b96de8a209..a318fc32492 100644 --- a/errors.toml +++ b/errors.toml @@ -16,6 +16,11 @@ error = ''' redirect failed ''' +["PD:apiutil:ErrRedirectToNotLeader"] +error = ''' +redirect to not leader +''' + ["PD:autoscaling:ErrEmptyMetricsResponse"] error = ''' metrics response from Prometheus is empty @@ -496,6 +501,16 @@ error = ''' init file log error, %s ''' +["PD:mcs:ErrNotFoundSchedulingAddr"] +error = ''' +cannot find scheduling address +''' + +["PD:mcs:ErrSchedulingServer"] +error = ''' +scheduling server meets %v +''' + ["PD:member:ErrCheckCampaign"] error = ''' check campaign failed @@ -541,6 +556,11 @@ error = ''' build rule list failed, %s ''' +["PD:placement:ErrKeyFormat"] +error = ''' +key should be in hex format, %s +''' + ["PD:placement:ErrLoadRule"] error = ''' load rule failed @@ -551,11 +571,21 @@ error = ''' load rule group failed ''' +["PD:placement:ErrPlacementDisabled"] +error = ''' +placement rules feature is disabled +''' + ["PD:placement:ErrRuleContent"] error = ''' invalid rule content, %s ''' +["PD:placement:ErrRuleNotFound"] +error = ''' +rule not found +''' + ["PD:plugin:ErrLoadPlugin"] error = ''' failed to load plugin @@ -606,6 +636,11 @@ error = ''' region %v has abnormal peer ''' +["PD:region:ErrRegionInvalidID"] +error = ''' +invalid region id +''' + ["PD:region:ErrRegionNotAdjacent"] error = ''' two regions are not adjacent diff --git a/go.mod b/go.mod index 86f56089347..676d350d22d 100644 --- a/go.mod +++ b/go.mod @@ -18,7 +18,7 @@ require ( github.com/gin-contrib/cors v1.4.0 github.com/gin-contrib/gzip v0.0.1 github.com/gin-contrib/pprof v1.4.0 - github.com/gin-gonic/gin v1.8.1 + github.com/gin-gonic/gin v1.9.1 github.com/go-echarts/go-echarts v1.0.0 github.com/gogo/protobuf v1.3.2 github.com/google/btree v1.1.2 @@ -36,7 +36,7 @@ require ( github.com/pingcap/kvproto v0.0.0-20231018065736-c0689aded40c github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 - github.com/pingcap/tidb-dashboard v0.0.0-20230911054332-22add1e00511 + github.com/pingcap/tidb-dashboard v0.0.0-20231127105651-ce4097837c5e github.com/prometheus/client_golang v1.11.1 github.com/prometheus/common v0.26.0 github.com/sasha-s/go-deadlock v0.2.0 @@ -45,7 +45,7 @@ require ( github.com/soheilhy/cmux v0.1.4 github.com/spf13/cobra v1.0.0 github.com/spf13/pflag v1.0.5 - github.com/stretchr/testify v1.8.2 + github.com/stretchr/testify v1.8.3 github.com/swaggo/http-swagger v1.2.6 github.com/swaggo/swag v1.8.3 github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965 @@ -84,8 +84,10 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/bitly/go-simplejson v0.5.0 // indirect github.com/breeswish/gin-jwt/v2 v2.6.4-jwt-patch // indirect + github.com/bytedance/sonic v1.9.1 // indirect github.com/cenkalti/backoff/v4 v4.0.2 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f // indirect github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect github.com/cpuguy83/go-md2man/v2 v2.0.0 // indirect @@ -96,6 +98,7 @@ require ( github.com/fatih/structtag v1.2.0 // indirect github.com/fogleman/gg v1.3.0 // indirect github.com/fsnotify/fsnotify v1.4.9 // indirect + github.com/gabriel-vasile/mimetype v1.4.2 // indirect github.com/ghodss/yaml v1.0.0 // indirect github.com/gin-contrib/sse v0.1.0 // indirect github.com/go-ole/go-ole v1.2.6 // indirect @@ -103,13 +106,13 @@ require ( github.com/go-openapi/jsonreference v0.19.6 // indirect github.com/go-openapi/spec v0.20.4 // indirect github.com/go-openapi/swag v0.19.15 // indirect - github.com/go-playground/locales v0.14.0 // indirect - github.com/go-playground/universal-translator v0.18.0 // indirect - github.com/go-playground/validator/v10 v10.10.0 // indirect + github.com/go-playground/locales v0.14.1 // indirect + github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/go-playground/validator/v10 v10.14.0 // indirect github.com/go-resty/resty/v2 v2.6.0 // indirect github.com/go-sql-driver/mysql v1.7.0 // indirect github.com/goccy/go-graphviz v0.0.9 // indirect - github.com/goccy/go-json v0.9.7 // indirect + github.com/goccy/go-json v0.10.2 // indirect github.com/golang-jwt/jwt v3.2.1+incompatible // indirect github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect github.com/golang/protobuf v1.5.3 // indirect @@ -128,11 +131,12 @@ require ( github.com/joomcode/errorx v1.0.1 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/cpuid/v2 v2.2.4 // indirect github.com/konsorten/go-windows-terminal-sequences v1.0.3 // indirect - github.com/leodido/go-urn v1.2.1 // indirect + github.com/leodido/go-urn v1.2.4 // indirect github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a // indirect github.com/mattn/go-colorable v0.1.8 // indirect - github.com/mattn/go-isatty v0.0.14 // indirect + github.com/mattn/go-isatty v0.0.19 // indirect github.com/mattn/go-runewidth v0.0.8 // indirect github.com/mattn/go-sqlite3 v1.14.15 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect @@ -144,7 +148,7 @@ require ( github.com/oleiade/reflections v1.0.1 // indirect github.com/olekukonko/tablewriter v0.0.4 // indirect github.com/onsi/gomega v1.20.1 // indirect - github.com/pelletier/go-toml/v2 v2.0.1 // indirect + github.com/pelletier/go-toml/v2 v2.0.8 // indirect github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36 // indirect github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e // indirect github.com/pkg/errors v0.9.1 // indirect @@ -166,7 +170,8 @@ require ( github.com/tklauser/go-sysconf v0.3.11 // indirect github.com/tklauser/numcpus v0.6.0 // indirect github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 // indirect - github.com/ugorji/go/codec v1.2.7 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/ugorji/go/codec v1.2.11 // indirect github.com/urfave/cli/v2 v2.3.0 // indirect github.com/vmihailenco/msgpack/v5 v5.3.5 // indirect github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect @@ -177,6 +182,7 @@ require ( go.uber.org/dig v1.9.0 // indirect go.uber.org/fx v1.12.0 // indirect go.uber.org/multierr v1.11.0 // indirect + golang.org/x/arch v0.3.0 // indirect golang.org/x/crypto v0.14.0 // indirect golang.org/x/image v0.5.0 // indirect golang.org/x/mod v0.8.0 // indirect diff --git a/go.sum b/go.sum index 9392644f181..c7ceeee028c 100644 --- a/go.sum +++ b/go.sum @@ -72,6 +72,9 @@ github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4Yn github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= github.com/breeswish/gin-jwt/v2 v2.6.4-jwt-patch h1:KLE/YeX+9FNaGVW5MtImRVPhjDpfpgJhvkuYWBmOYbo= github.com/breeswish/gin-jwt/v2 v2.6.4-jwt-patch/go.mod h1:KjBLriHXe7L6fGceqWzTod8HUB/TP1WWDtfuSYtYXaI= +github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= +github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s= +github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U= github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 h1:BjkPE3785EwPhhyuFkbINB+2a1xATwk8SNDWnJiD41g= github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5/go.mod h1:jtAfVaU/2cu1+wdSRPWE2c1N2qeAA3K4RH9pYgqwets= github.com/cenkalti/backoff/v4 v4.0.2 h1:JIufpQLbh4DkbQoii76ItQIUFzevQSqOLZca4eamEDs= @@ -81,6 +84,9 @@ github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghf github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5OhCuC+XN+r/bBCmeuuJtjz+bCNIf8= @@ -140,6 +146,8 @@ github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzP github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= +github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gin-contrib/cors v1.4.0 h1:oJ6gwtUl3lqV0WEIwM/LxPF1QZ5qe2lGWdY2+bz7y0g= @@ -153,8 +161,9 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= github.com/gin-gonic/gin v1.3.0/go.mod h1:7cKuhb5qV2ggCFctp2fJQ+ErvciLZrIeoOSOm6mUr7Y= github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M= -github.com/gin-gonic/gin v1.8.1 h1:4+fr/el88TOO3ewCmQr8cx/CtZ/umlIRIs5M4NTNjf8= github.com/gin-gonic/gin v1.8.1/go.mod h1:ji8BvRH1azfM+SYow9zQ6SZMvR8qOMZHmsCuWR9tTTk= +github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= +github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= github.com/go-echarts/go-echarts v1.0.0 h1:n181E4iXwj4zrU9VYmdM2m8dyhERt2w9k9YhHqdp6A8= github.com/go-echarts/go-echarts v1.0.0/go.mod h1:qbmyAb/Rl1f2w7wKba1D4LoNq4U164yO4/wedFbcWyo= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= @@ -176,17 +185,21 @@ github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7 github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM= github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= -github.com/go-playground/assert/v2 v2.0.1 h1:MsBgLAaY856+nPRTKrp3/OZK38U/wa0CcBYNjji3q3A= github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= +github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= -github.com/go-playground/locales v0.14.0 h1:u50s323jtVGugKlcYeyzC0etD1HifMjqmJqb8WugfUU= github.com/go-playground/locales v0.14.0/go.mod h1:sawfccIbzZTqEDETgFXqTho0QybSa7l++s0DH+LDiLs= +github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= +github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= -github.com/go-playground/universal-translator v0.18.0 h1:82dyy6p4OuJq4/CByFNOn/jYrnRPArHwAcmLoJZxyho= github.com/go-playground/universal-translator v0.18.0/go.mod h1:UvRDBj+xPUEGrFYl+lu/H90nyDXpg0fqeB/AQUGNTVA= +github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= +github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI= -github.com/go-playground/validator/v10 v10.10.0 h1:I7mrTYv78z8k8VXa/qJlOlEXn/nBh+BF8dHX5nt/dr0= github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos= +github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js= +github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= github.com/go-resty/resty/v2 v2.6.0 h1:joIR5PNLM2EFqqESUjCMGXrWmXNHEU9CEiK813oKYS4= github.com/go-resty/resty/v2 v2.6.0/go.mod h1:PwvJS6hvaPkjtjNg9ph+VrSD92bi5Zq73w/BIH7cC3Q= github.com/go-sql-driver/mysql v1.7.0 h1:ueSltNNllEqE3qcWBTD0iQd3IpL/6U+mJxLkazJ7YPc= @@ -194,8 +207,9 @@ github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9 github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/goccy/go-graphviz v0.0.9 h1:s/FMMJ1Joj6La3S5ApO3Jk2cwM4LpXECC2muFx3IPQQ= github.com/goccy/go-graphviz v0.0.9/go.mod h1:wXVsXxmyMQU6TN3zGRttjNn3h+iCAS7xQFC6TlNvLhk= -github.com/goccy/go-json v0.9.7 h1:IcB+Aqpx/iMHu5Yooh7jEzJk1JZ7Pjtmys2ukPr7EeM= github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= +github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/gogo/protobuf v0.0.0-20171007142547-342cbe0a0415/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v0.0.0-20180717141946-636bf0302bc9/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= @@ -335,6 +349,9 @@ github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8 github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= +github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -348,8 +365,9 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= -github.com/leodido/go-urn v1.2.1 h1:BqpAaACuzVSgi/VLzGZIobT2z4v53pjosyNd9Yv6n/w= github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY= +github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= +github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a h1:N9zuLhTvBSRt0gWSiJswwQ2HqDmtX/ZCDJURnKUt1Ik= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a/go.mod h1:JKx41uQRwqlTZabZc+kILPrO/3jlKnQ2Z8b7YiVw5cE= @@ -366,8 +384,9 @@ github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNx github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= -github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= +github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mattn/go-runewidth v0.0.8 h1:3tS41NlGYSmhhe/8fhGRzc+z3AYCw1Fe1WAyLuujKs0= @@ -421,8 +440,9 @@ github.com/otiai10/mint v1.3.0/go.mod h1:F5AjcsTsWUqX+Na9fpHb52P8pcRX2CI6A3ctIT9 github.com/otiai10/mint v1.3.3/go.mod h1:/yxELlJQ0ufhjUwhshSj+wFjZ78CnZ48/1wtmBH1OTc= github.com/pascaldekloe/name v0.0.0-20180628100202-0fd16699aae1/go.mod h1:eD5JxqMiuNYyFNmyY9rkJ/slN8y59oEu4Ei7F8OoKWQ= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= -github.com/pelletier/go-toml/v2 v2.0.1 h1:8e3L2cCQzLFi2CR4g7vGFuFxX7Jl1kKX8gW+iV0GUKU= github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo= +github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= +github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36 h1:64bxqeTEN0/xoEqhKGowgihNuzISS9rEG6YUMU4bzJo= github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36/go.mod h1:pxMtw7cyUw6B2bRH0ZBANSPg+AoSud1I1iyJHI69jH4= github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d h1:U+PMnTlV2tu7RuMK5etusZG3Cf+rpow5hqQByeCzJ2g= @@ -446,8 +466,8 @@ github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8 github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 h1:QV6jqlfOkh8hqvEAgwBZa+4bSgO0EeKC7s5c6Luam2I= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21/go.mod h1:QYnjfA95ZaMefyl1NO8oPtKeb8pYUdnDVhQgf+qdpjM= -github.com/pingcap/tidb-dashboard v0.0.0-20230911054332-22add1e00511 h1:oyrCfNlAWmLlUfEr+7YTSBo29SP/J1N8hnxBt5yUABo= -github.com/pingcap/tidb-dashboard v0.0.0-20230911054332-22add1e00511/go.mod h1:EZ90+V5S4TttbYag6oKZ3jcNKRwZe1Mc9vXwOt9JBYw= +github.com/pingcap/tidb-dashboard v0.0.0-20231127105651-ce4097837c5e h1:SJUSDejvKtj9vSh5ptRHh4iMrvPV3oKO8yp6/SYE8vc= +github.com/pingcap/tidb-dashboard v0.0.0-20231127105651-ce4097837c5e/go.mod h1:ucZBRz52icb23T/5Z4CsuUHmarYiin7p2MeiVBe+o8c= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e h1:FBaTXU8C3xgt/drM58VHxojHo/QoG1oPsgWTGvaSpO4= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= @@ -548,8 +568,10 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY= +github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/swaggo/files v0.0.0-20210815190702-a29dd2bc99b2 h1:+iNTcqQJy0OZ5jk6a5NLib47eqXK8uYcPX+O4+cBpEM= github.com/swaggo/files v0.0.0-20210815190702-a29dd2bc99b2/go.mod h1:lKJPbtWzJ9JhsTN1k1gZgleJWY/cqq0psdoMmaThG3w= github.com/swaggo/http-swagger v1.2.6 h1:ihTjChUoSRMpFMjWw+0AkL1Ti4r6v8pCgVYLmQVRlRw= @@ -577,13 +599,16 @@ github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 h1:j6JEOq5QWFker+d7mFQYOhjTZonQ7YkLTHm56dbn+yM= github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= +github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= github.com/ugorji/go/codec v0.0.0-20181022190402-e5e69e061d4f/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= -github.com/ugorji/go/codec v1.2.7 h1:YPXUKf7fYbp/y8xloBqZOw2qaVggbfwMlI8WM3wZUJ0= github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY= +github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= +github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/unrolled/render v1.0.1 h1:VDDnQQVfBMsOsp3VaCJszSO0nkBIVEYoPWeRThk9spY= github.com/unrolled/render v1.0.1/go.mod h1:gN9T0NhL4Bfbwu8ann7Ry/TGHYfosul+J0obPf6NBdM= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= @@ -642,6 +667,9 @@ go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ= go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= +golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= +golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -756,6 +784,7 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -900,6 +929,7 @@ honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I= moul.io/zapgorm2 v1.1.0 h1:qwAlMBYf+qJkJ7PAzJl4oCe6eS6QGiKAXUPeis0+RBE= moul.io/zapgorm2 v1.1.0/go.mod h1:emRfKjNqSzVj5lcgasBdovIXY1jSOwFz2GQZn1Rddks= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q= sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= diff --git a/pkg/audit/audit.go b/pkg/audit/audit.go index 3553e5f8377..b971b09ed7e 100644 --- a/pkg/audit/audit.go +++ b/pkg/audit/audit.go @@ -98,7 +98,7 @@ func (b *PrometheusHistogramBackend) ProcessHTTPRequest(req *http.Request) bool if !ok { return false } - b.histogramVec.WithLabelValues(requestInfo.ServiceLabel, "HTTP", requestInfo.Component, requestInfo.IP).Observe(float64(endTime - requestInfo.StartTimeStamp)) + b.histogramVec.WithLabelValues(requestInfo.ServiceLabel, "HTTP", requestInfo.CallerID, requestInfo.IP).Observe(float64(endTime - requestInfo.StartTimeStamp)) return true } diff --git a/pkg/audit/audit_test.go b/pkg/audit/audit_test.go index 20f8c9344f7..8098b36975e 100644 --- a/pkg/audit/audit_test.go +++ b/pkg/audit/audit_test.go @@ -51,7 +51,7 @@ func TestPrometheusHistogramBackend(t *testing.T) { Name: "audit_handling_seconds_test", Help: "PD server service handling audit", Buckets: prometheus.DefBuckets, - }, []string{"service", "method", "component", "ip"}) + }, []string{"service", "method", "caller_id", "ip"}) prometheus.MustRegister(serviceAuditHistogramTest) @@ -59,10 +59,10 @@ func TestPrometheusHistogramBackend(t *testing.T) { defer ts.Close() backend := NewPrometheusHistogramBackend(serviceAuditHistogramTest, true) - req, _ := http.NewRequest(http.MethodGet, "http://127.0.0.1:2379/test?test=test", nil) + req, _ := http.NewRequest(http.MethodGet, "http://127.0.0.1:2379/test?test=test", http.NoBody) info := requestutil.GetRequestInfo(req) info.ServiceLabel = "test" - info.Component = "user1" + info.CallerID = "user1" info.IP = "localhost" req = req.WithContext(requestutil.WithRequestInfo(req.Context(), info)) re.False(backend.ProcessHTTPRequest(req)) @@ -73,20 +73,20 @@ func TestPrometheusHistogramBackend(t *testing.T) { re.True(backend.ProcessHTTPRequest(req)) re.True(backend.ProcessHTTPRequest(req)) - info.Component = "user2" + info.CallerID = "user2" req = req.WithContext(requestutil.WithRequestInfo(req.Context(), info)) re.True(backend.ProcessHTTPRequest(req)) // For test, sleep time needs longer than the push interval time.Sleep(time.Second) - req, _ = http.NewRequest(http.MethodGet, ts.URL, nil) + req, _ = http.NewRequest(http.MethodGet, ts.URL, http.NoBody) resp, err := http.DefaultClient.Do(req) re.NoError(err) defer resp.Body.Close() content, _ := io.ReadAll(resp.Body) output := string(content) - re.Contains(output, "pd_service_audit_handling_seconds_test_count{component=\"user1\",ip=\"localhost\",method=\"HTTP\",service=\"test\"} 2") - re.Contains(output, "pd_service_audit_handling_seconds_test_count{component=\"user2\",ip=\"localhost\",method=\"HTTP\",service=\"test\"} 1") + re.Contains(output, "pd_service_audit_handling_seconds_test_count{caller_id=\"user1\",ip=\"localhost\",method=\"HTTP\",service=\"test\"} 2") + re.Contains(output, "pd_service_audit_handling_seconds_test_count{caller_id=\"user2\",ip=\"localhost\",method=\"HTTP\",service=\"test\"} 1") } func TestLocalLogBackendUsingFile(t *testing.T) { @@ -103,7 +103,7 @@ func TestLocalLogBackendUsingFile(t *testing.T) { b, _ := os.ReadFile(fname) output := strings.SplitN(string(b), "]", 4) re.Equal( - fmt.Sprintf(" [\"audit log\"] [service-info=\"{ServiceLabel:, Method:HTTP/1.1/GET:/test, Component:anonymous, IP:, Port:, "+ + fmt.Sprintf(" [\"audit log\"] [service-info=\"{ServiceLabel:, Method:HTTP/1.1/GET:/test, CallerID:anonymous, IP:, Port:, "+ "StartTime:%s, URLParam:{\\\"test\\\":[\\\"test\\\"]}, BodyParam:testBody}\"]\n", time.Unix(info.StartTimeStamp, 0).String()), output[3], diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 0b3e0351b16..8809a706936 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -59,7 +59,4 @@ func Collect(c Cluster, region *core.RegionInfo, stores []*core.StoreInfo, hasRe if hasRegionStats { c.GetRegionStats().Observe(region, stores) } - if !isPrepared && isNew { - c.GetCoordinator().GetPrepareChecker().Collect(region) - } } diff --git a/pkg/core/region.go b/pkg/core/region.go index 6875844b7a6..b141e8478da 100644 --- a/pkg/core/region.go +++ b/pkg/core/region.go @@ -41,7 +41,10 @@ import ( "go.uber.org/zap" ) -const randomRegionMaxRetry = 10 +const ( + randomRegionMaxRetry = 10 + scanRegionLimit = 1000 +) // errRegionIsStale is error info for region is stale. func errRegionIsStale(region *metapb.Region, origin *metapb.Region) error { @@ -1337,11 +1340,23 @@ func (r *RegionsInfo) GetStoreWriteRate(storeID uint64) (bytesRate, keysRate flo return } -// GetClusterNotFromStorageRegionsCnt gets the total count of regions that not loaded from storage anymore +// GetClusterNotFromStorageRegionsCnt gets the `NotFromStorageRegionsCnt` count of regions that not loaded from storage anymore. func (r *RegionsInfo) GetClusterNotFromStorageRegionsCnt() int { + r.t.RLock() + defer r.t.RUnlock() + return r.tree.notFromStorageRegionsCount() +} + +// GetNotFromStorageRegionsCntByStore gets the `NotFromStorageRegionsCnt` count of a store's leader, follower and learner by storeID. +func (r *RegionsInfo) GetNotFromStorageRegionsCntByStore(storeID uint64) int { r.st.RLock() defer r.st.RUnlock() - return r.tree.notFromStorageRegionsCnt + return r.getNotFromStorageRegionsCntByStoreLocked(storeID) +} + +// getNotFromStorageRegionsCntByStoreLocked gets the `NotFromStorageRegionsCnt` count of a store's leader, follower and learner by storeID. +func (r *RegionsInfo) getNotFromStorageRegionsCntByStoreLocked(storeID uint64) int { + return r.leaders[storeID].notFromStorageRegionsCount() + r.followers[storeID].notFromStorageRegionsCount() + r.learners[storeID].notFromStorageRegionsCount() } // GetMetaRegions gets a set of metapb.Region from regionMap @@ -1377,7 +1392,7 @@ func (r *RegionsInfo) GetStoreRegionCount(storeID uint64) int { return r.getStoreRegionCountLocked(storeID) } -// GetStoreRegionCount gets the total count of a store's leader, follower and learner RegionInfo by storeID +// getStoreRegionCountLocked gets the total count of a store's leader, follower and learner RegionInfo by storeID func (r *RegionsInfo) getStoreRegionCountLocked(storeID uint64) int { return r.leaders[storeID].length() + r.followers[storeID].length() + r.learners[storeID].length() } @@ -1610,16 +1625,31 @@ func (r *RegionsInfo) ScanRegionWithIterator(startKey []byte, iterator func(regi // GetRegionSizeByRange scans regions intersecting [start key, end key), returns the total region size of this range. func (r *RegionsInfo) GetRegionSizeByRange(startKey, endKey []byte) int64 { - r.t.RLock() - defer r.t.RUnlock() var size int64 - r.tree.scanRange(startKey, func(region *RegionInfo) bool { - if len(endKey) > 0 && bytes.Compare(region.GetStartKey(), endKey) >= 0 { - return false + for { + r.t.RLock() + var cnt int + r.tree.scanRange(startKey, func(region *RegionInfo) bool { + if len(endKey) > 0 && bytes.Compare(region.GetStartKey(), endKey) >= 0 { + return false + } + if cnt >= scanRegionLimit { + return false + } + cnt++ + startKey = region.GetEndKey() + size += region.GetApproximateSize() + return true + }) + r.t.RUnlock() + if cnt == 0 { + break } - size += region.GetApproximateSize() - return true - }) + if len(startKey) == 0 { + break + } + } + return size } diff --git a/pkg/core/region_test.go b/pkg/core/region_test.go index 50302de920e..508e7aa59aa 100644 --- a/pkg/core/region_test.go +++ b/pkg/core/region_test.go @@ -18,8 +18,10 @@ import ( "crypto/rand" "fmt" "math" + mrand "math/rand" "strconv" "testing" + "time" "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" @@ -658,6 +660,124 @@ func BenchmarkRandomRegion(b *testing.B) { } } +func BenchmarkRandomSetRegion(b *testing.B) { + regions := NewRegionsInfo() + var items []*RegionInfo + for i := 0; i < 1000000; i++ { + peer := &metapb.Peer{StoreId: 1, Id: uint64(i + 1)} + region := NewRegionInfo(&metapb.Region{ + Id: uint64(i + 1), + Peers: []*metapb.Peer{peer}, + StartKey: []byte(fmt.Sprintf("%20d", i)), + EndKey: []byte(fmt.Sprintf("%20d", i+1)), + }, peer) + origin, overlaps, rangeChanged := regions.SetRegion(region) + regions.UpdateSubTree(region, origin, overlaps, rangeChanged) + items = append(items, region) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + item := items[i%len(items)] + item.approximateKeys = int64(200000) + item.approximateSize = int64(20) + origin, overlaps, rangeChanged := regions.SetRegion(item) + regions.UpdateSubTree(item, origin, overlaps, rangeChanged) + } +} + +func TestGetRegionSizeByRange(t *testing.T) { + regions := NewRegionsInfo() + nums := 1000010 + for i := 0; i < nums; i++ { + peer := &metapb.Peer{StoreId: 1, Id: uint64(i + 1)} + endKey := []byte(fmt.Sprintf("%20d", i+1)) + if i == nums-1 { + endKey = []byte("") + } + region := NewRegionInfo(&metapb.Region{ + Id: uint64(i + 1), + Peers: []*metapb.Peer{peer}, + StartKey: []byte(fmt.Sprintf("%20d", i)), + EndKey: endKey, + }, peer, SetApproximateSize(10)) + origin, overlaps, rangeChanged := regions.SetRegion(region) + regions.UpdateSubTree(region, origin, overlaps, rangeChanged) + } + totalSize := regions.GetRegionSizeByRange([]byte(""), []byte("")) + require.Equal(t, int64(nums*10), totalSize) + for i := 1; i < 10; i++ { + verifyNum := nums / i + endKey := fmt.Sprintf("%20d", verifyNum) + totalSize := regions.GetRegionSizeByRange([]byte(""), []byte(endKey)) + require.Equal(t, int64(verifyNum*10), totalSize) + } +} + +func BenchmarkRandomSetRegionWithGetRegionSizeByRange(b *testing.B) { + regions := NewRegionsInfo() + var items []*RegionInfo + for i := 0; i < 1000000; i++ { + peer := &metapb.Peer{StoreId: 1, Id: uint64(i + 1)} + region := NewRegionInfo(&metapb.Region{ + Id: uint64(i + 1), + Peers: []*metapb.Peer{peer}, + StartKey: []byte(fmt.Sprintf("%20d", i)), + EndKey: []byte(fmt.Sprintf("%20d", i+1)), + }, peer, SetApproximateSize(10)) + origin, overlaps, rangeChanged := regions.SetRegion(region) + regions.UpdateSubTree(region, origin, overlaps, rangeChanged) + items = append(items, region) + } + b.ResetTimer() + go func() { + for { + regions.GetRegionSizeByRange([]byte(""), []byte("")) + time.Sleep(time.Millisecond) + } + }() + for i := 0; i < b.N; i++ { + item := items[i%len(items)] + item.approximateKeys = int64(200000) + origin, overlaps, rangeChanged := regions.SetRegion(item) + regions.UpdateSubTree(item, origin, overlaps, rangeChanged) + } +} + +func BenchmarkRandomSetRegionWithGetRegionSizeByRangeParallel(b *testing.B) { + regions := NewRegionsInfo() + var items []*RegionInfo + for i := 0; i < 1000000; i++ { + peer := &metapb.Peer{StoreId: 1, Id: uint64(i + 1)} + region := NewRegionInfo(&metapb.Region{ + Id: uint64(i + 1), + Peers: []*metapb.Peer{peer}, + StartKey: []byte(fmt.Sprintf("%20d", i)), + EndKey: []byte(fmt.Sprintf("%20d", i+1)), + }, peer) + origin, overlaps, rangeChanged := regions.SetRegion(region) + regions.UpdateSubTree(region, origin, overlaps, rangeChanged) + items = append(items, region) + } + b.ResetTimer() + go func() { + for { + regions.GetRegionSizeByRange([]byte(""), []byte("")) + time.Sleep(time.Millisecond) + } + }() + + b.RunParallel( + func(pb *testing.PB) { + for pb.Next() { + item := items[mrand.Intn(len(items))] + n := item.Clone(SetApproximateSize(20)) + origin, overlaps, rangeChanged := regions.SetRegion(n) + regions.UpdateSubTree(item, origin, overlaps, rangeChanged) + } + }, + ) +} + const keyLength = 100 func randomBytes(n int) []byte { diff --git a/pkg/core/region_tree.go b/pkg/core/region_tree.go index ed3445de6b6..333e1730ec8 100644 --- a/pkg/core/region_tree.go +++ b/pkg/core/region_tree.go @@ -1,4 +1,5 @@ // Copyright 2016 TiKV Project Authors. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -82,6 +83,13 @@ func (t *regionTree) length() int { return t.tree.Len() } +func (t *regionTree) notFromStorageRegionsCount() int { + if t == nil { + return 0 + } + return t.notFromStorageRegionsCnt +} + // GetOverlaps returns the range items that has some intersections with the given items. func (t *regionTree) overlaps(item *regionItem) []*regionItem { // note that Find() gets the last item that is less or equal than the item. diff --git a/pkg/core/store_option.go b/pkg/core/store_option.go index 8a2aa1ef089..93b25562731 100644 --- a/pkg/core/store_option.go +++ b/pkg/core/store_option.go @@ -274,3 +274,23 @@ func SetLastAwakenTime(lastAwaken time.Time) StoreCreateOption { store.lastAwakenTime = lastAwaken } } + +// SetStoreMeta sets the meta for the store. +// NOTICE: LastHeartbeat is not persisted each time, so it is not set by this function. Please use SetLastHeartbeatTS instead. +func SetStoreMeta(newMeta *metapb.Store) StoreCreateOption { + return func(store *StoreInfo) { + meta := typeutil.DeepClone(store.meta, StoreFactory) + meta.Version = newMeta.GetVersion() + meta.GitHash = newMeta.GetGitHash() + meta.Address = newMeta.GetAddress() + meta.StatusAddress = newMeta.GetStatusAddress() + meta.PeerAddress = newMeta.GetPeerAddress() + meta.StartTimestamp = newMeta.GetStartTimestamp() + meta.DeployPath = newMeta.GetDeployPath() + meta.State = newMeta.GetState() + meta.Labels = newMeta.GetLabels() + meta.NodeState = newMeta.GetNodeState() + meta.PhysicallyDestroyed = newMeta.GetPhysicallyDestroyed() + store.meta = meta + } +} diff --git a/pkg/dashboard/adapter/manager.go b/pkg/dashboard/adapter/manager.go index a3691242c8f..293d8ad6549 100644 --- a/pkg/dashboard/adapter/manager.go +++ b/pkg/dashboard/adapter/manager.go @@ -19,6 +19,7 @@ import ( "sync" "time" + "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" "github.com/pingcap/tidb-dashboard/pkg/apiserver" @@ -75,6 +76,10 @@ func (m *Manager) Stop() { func (m *Manager) serviceLoop() { defer logutil.LogPanic() defer m.wg.Done() + // TODO: After we fix the atomic problem of config, we can remove this failpoint. + failpoint.Inject("skipDashboardLoop", func() { + failpoint.Return() + }) ticker := time.NewTicker(CheckInterval) defer ticker.Stop() diff --git a/pkg/dashboard/adapter/redirector_test.go b/pkg/dashboard/adapter/redirector_test.go index f192fbeb1f2..c5d837507fc 100644 --- a/pkg/dashboard/adapter/redirector_test.go +++ b/pkg/dashboard/adapter/redirector_test.go @@ -70,17 +70,17 @@ func (suite *redirectorTestSuite) TestReverseProxy() { suite.redirector.SetAddress(suite.tempServer.URL) // Test normal forwarding - req, err := http.NewRequest(http.MethodGet, redirectorServer.URL, nil) + req, err := http.NewRequest(http.MethodGet, redirectorServer.URL, http.NoBody) suite.NoError(err) checkHTTPRequest(suite.Require(), suite.noRedirectHTTPClient, req, http.StatusOK, suite.tempText) // Test the requests that are forwarded by others - req, err = http.NewRequest(http.MethodGet, redirectorServer.URL, nil) + req, err = http.NewRequest(http.MethodGet, redirectorServer.URL, http.NoBody) suite.NoError(err) req.Header.Set(proxyHeader, "other") checkHTTPRequest(suite.Require(), suite.noRedirectHTTPClient, req, http.StatusOK, suite.tempText) // Test LoopDetected suite.redirector.SetAddress(redirectorServer.URL) - req, err = http.NewRequest(http.MethodGet, redirectorServer.URL, nil) + req, err = http.NewRequest(http.MethodGet, redirectorServer.URL, http.NoBody) suite.NoError(err) checkHTTPRequest(suite.Require(), suite.noRedirectHTTPClient, req, http.StatusLoopDetected, "") } @@ -90,11 +90,11 @@ func (suite *redirectorTestSuite) TestTemporaryRedirect() { defer redirectorServer.Close() suite.redirector.SetAddress(suite.tempServer.URL) // Test TemporaryRedirect - req, err := http.NewRequest(http.MethodGet, redirectorServer.URL, nil) + req, err := http.NewRequest(http.MethodGet, redirectorServer.URL, http.NoBody) suite.NoError(err) checkHTTPRequest(suite.Require(), suite.noRedirectHTTPClient, req, http.StatusTemporaryRedirect, "") // Test Response - req, err = http.NewRequest(http.MethodGet, redirectorServer.URL, nil) + req, err = http.NewRequest(http.MethodGet, redirectorServer.URL, http.NoBody) suite.NoError(err) checkHTTPRequest(suite.Require(), http.DefaultClient, req, http.StatusOK, suite.tempText) } diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index d5d73e90b58..132b3a8aad9 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -32,7 +32,11 @@ import ( "go.uber.org/zap" ) -const watchLoopUnhealthyTimeout = 60 * time.Second +const ( + defaultCampaignTimesSlot = 10 + watchLoopUnhealthyTimeout = 60 * time.Second + campaignTimesRecordTimeout = 5 * time.Minute +) // GetLeader gets the corresponding leader from etcd by given leaderPath (as the key). func GetLeader(c *clientv3.Client, leaderPath string) (*pdpb.Member, int64, error) { @@ -62,20 +66,24 @@ type Leadership struct { keepAliveCtx context.Context keepAliveCancelFunc context.CancelFunc keepAliveCancelFuncLock syncutil.Mutex + // campaignTimes is used to record the campaign times of the leader within `campaignTimesRecordTimeout`. + // It is ordered by time to prevent the leader from campaigning too frequently. + campaignTimes []time.Time } // NewLeadership creates a new Leadership. func NewLeadership(client *clientv3.Client, leaderKey, purpose string) *Leadership { leadership := &Leadership{ - purpose: purpose, - client: client, - leaderKey: leaderKey, + purpose: purpose, + client: client, + leaderKey: leaderKey, + campaignTimes: make([]time.Time, 0, defaultCampaignTimesSlot), } return leadership } // getLease gets the lease of leadership, only if leadership is valid, -// i.e the owner is a true leader, the lease is not nil. +// i.e. the owner is a true leader, the lease is not nil. func (ls *Leadership) getLease() *lease { l := ls.lease.Load() if l == nil { @@ -104,8 +112,42 @@ func (ls *Leadership) GetLeaderKey() string { return ls.leaderKey } +// GetCampaignTimesNum is used to get the campaign times of the leader within `campaignTimesRecordTimeout`. +func (ls *Leadership) GetCampaignTimesNum() int { + if ls == nil { + return 0 + } + return len(ls.campaignTimes) +} + +// ResetCampaignTimes is used to reset the campaign times of the leader. +func (ls *Leadership) ResetCampaignTimes() { + if ls == nil { + return + } + ls.campaignTimes = make([]time.Time, 0, defaultCampaignTimesSlot) +} + +// addCampaignTimes is used to add the campaign times of the leader. +func (ls *Leadership) addCampaignTimes() { + if ls == nil { + return + } + for i := len(ls.campaignTimes) - 1; i >= 0; i-- { + if time.Since(ls.campaignTimes[i]) > campaignTimesRecordTimeout { + // remove the time which is more than `campaignTimesRecordTimeout` + // array is sorted by time + ls.campaignTimes = ls.campaignTimes[i:] + break + } + } + + ls.campaignTimes = append(ls.campaignTimes, time.Now()) +} + // Campaign is used to campaign the leader with given lease and returns a leadership func (ls *Leadership) Campaign(leaseTimeout int64, leaderData string, cmps ...clientv3.Cmp) error { + ls.addCampaignTimes() ls.leaderValue = leaderData // Create a new lease to campaign newLease := &lease{ @@ -260,6 +302,7 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { continue } } + lastReceivedResponseTime = time.Now() log.Info("watch channel is created", zap.Int64("revision", revision), zap.String("leader-key", ls.leaderKey), zap.String("purpose", ls.purpose)) watchChanLoop: diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index 181dfc9b393..a4320238374 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -102,6 +102,8 @@ var ( // region errors var ( + // ErrRegionInvalidID is error info for region id invalid. + ErrRegionInvalidID = errors.Normalize("invalid region id", errors.RFCCodeText("PD:region:ErrRegionInvalidID")) // ErrRegionNotAdjacent is error info for region not adjacent. ErrRegionNotAdjacent = errors.Normalize("two regions are not adjacent", errors.RFCCodeText("PD:region:ErrRegionNotAdjacent")) // ErrRegionNotFound is error info for region not found. @@ -153,10 +155,13 @@ var ( // placement errors var ( - ErrRuleContent = errors.Normalize("invalid rule content, %s", errors.RFCCodeText("PD:placement:ErrRuleContent")) - ErrLoadRule = errors.Normalize("load rule failed", errors.RFCCodeText("PD:placement:ErrLoadRule")) - ErrLoadRuleGroup = errors.Normalize("load rule group failed", errors.RFCCodeText("PD:placement:ErrLoadRuleGroup")) - ErrBuildRuleList = errors.Normalize("build rule list failed, %s", errors.RFCCodeText("PD:placement:ErrBuildRuleList")) + ErrRuleContent = errors.Normalize("invalid rule content, %s", errors.RFCCodeText("PD:placement:ErrRuleContent")) + ErrLoadRule = errors.Normalize("load rule failed", errors.RFCCodeText("PD:placement:ErrLoadRule")) + ErrLoadRuleGroup = errors.Normalize("load rule group failed", errors.RFCCodeText("PD:placement:ErrLoadRuleGroup")) + ErrBuildRuleList = errors.Normalize("build rule list failed, %s", errors.RFCCodeText("PD:placement:ErrBuildRuleList")) + ErrPlacementDisabled = errors.Normalize("placement rules feature is disabled", errors.RFCCodeText("PD:placement:ErrPlacementDisabled")) + ErrKeyFormat = errors.Normalize("key should be in hex format, %s", errors.RFCCodeText("PD:placement:ErrKeyFormat")) + ErrRuleNotFound = errors.Normalize("rule not found", errors.RFCCodeText("PD:placement:ErrRuleNotFound")) ) // region label errors @@ -192,6 +197,8 @@ var ( var ( ErrRedirect = errors.Normalize("redirect failed", errors.RFCCodeText("PD:apiutil:ErrRedirect")) ErrOptionNotExist = errors.Normalize("the option %s does not exist", errors.RFCCodeText("PD:apiutil:ErrOptionNotExist")) + // ErrRedirectToNotLeader is the error message for redirect to not leader. + ErrRedirectToNotLeader = errors.Normalize("redirect to not leader", errors.RFCCodeText("PD:apiutil:ErrRedirectToNotLeader")) ) // grpcutil errors @@ -403,3 +410,9 @@ var ( ErrDeleteReservedGroup = errors.Normalize("cannot delete reserved group", errors.RFCCodeText("PD:resourcemanager:ErrDeleteReservedGroup")) ErrInvalidGroup = errors.Normalize("invalid group settings, please check the group name, priority and the number of resources", errors.RFCCodeText("PD:resourcemanager:ErrInvalidGroup")) ) + +// Micro service errors +var ( + ErrNotFoundSchedulingAddr = errors.Normalize("cannot find scheduling address", errors.RFCCodeText("PD:mcs:ErrNotFoundSchedulingAddr")) + ErrSchedulingServer = errors.Normalize("scheduling server meets %v", errors.RFCCodeText("PD:mcs:ErrSchedulingServer")) +) diff --git a/pkg/mcs/resourcemanager/server/apis/v1/api.go b/pkg/mcs/resourcemanager/server/apis/v1/api.go index ffcb9318590..7b5f2903484 100644 --- a/pkg/mcs/resourcemanager/server/apis/v1/api.go +++ b/pkg/mcs/resourcemanager/server/apis/v1/api.go @@ -81,10 +81,10 @@ func NewService(srv *rmserver.Service) *Service { c.Set(multiservicesapi.ServiceContextKey, manager.GetBasicServer()) c.Next() }) - apiHandlerEngine.Use(multiservicesapi.ServiceRedirector()) apiHandlerEngine.GET("metrics", utils.PromHandler()) pprof.Register(apiHandlerEngine) endpoint := apiHandlerEngine.Group(APIPathPrefix) + endpoint.Use(multiservicesapi.ServiceRedirector()) s := &Service{ manager: manager, apiHandlerEngine: apiHandlerEngine, diff --git a/pkg/mcs/resourcemanager/server/config.go b/pkg/mcs/resourcemanager/server/config.go index 3f64b2987fd..bcd5a853dfc 100644 --- a/pkg/mcs/resourcemanager/server/config.go +++ b/pkg/mcs/resourcemanager/server/config.go @@ -102,6 +102,9 @@ type ControllerConfig struct { // RequestUnit is the configuration determines the coefficients of the RRU and WRU cost. // This configuration should be modified carefully. RequestUnit RequestUnitConfig `toml:"request-unit" json:"request-unit"` + + // EnableControllerTraceLog is to control whether resource control client enable trace. + EnableControllerTraceLog bool `toml:"enable-controller-trace-log" json:"enable-controller-trace-log,string"` } // Adjust adjusts the configuration and initializes it with the default value if necessary. @@ -250,6 +253,26 @@ func (c *Config) adjustLog(meta *configutil.ConfigMetaData) { } } +// GetName returns the Name +func (c *Config) GetName() string { + return c.Name +} + +// GeBackendEndpoints returns the BackendEndpoints +func (c *Config) GeBackendEndpoints() string { + return c.BackendEndpoints +} + +// GetListenAddr returns the ListenAddr +func (c *Config) GetListenAddr() string { + return c.ListenAddr +} + +// GetAdvertiseListenAddr returns the AdvertiseListenAddr +func (c *Config) GetAdvertiseListenAddr() string { + return c.AdvertiseListenAddr +} + // GetTLSConfig returns the TLS config. func (c *Config) GetTLSConfig() *grpcutil.TLSConfig { return &c.Security.TLSConfig diff --git a/pkg/mcs/resourcemanager/server/resource_group.go b/pkg/mcs/resourcemanager/server/resource_group.go index 863cfd19026..fc3a58cab51 100644 --- a/pkg/mcs/resourcemanager/server/resource_group.go +++ b/pkg/mcs/resourcemanager/server/resource_group.go @@ -138,7 +138,7 @@ func FromProtoResourceGroup(group *rmpb.ResourceGroup) *ResourceGroup { // RequestRU requests the RU of the resource group. func (rg *ResourceGroup) RequestRU( now time.Time, - neededTokens float64, + requiredToken float64, targetPeriodMs, clientUniqueID uint64, ) *rmpb.GrantedRUTokenBucket { rg.Lock() @@ -147,7 +147,7 @@ func (rg *ResourceGroup) RequestRU( if rg.RUSettings == nil || rg.RUSettings.RU.Settings == nil { return nil } - tb, trickleTimeMs := rg.RUSettings.RU.request(now, neededTokens, targetPeriodMs, clientUniqueID) + tb, trickleTimeMs := rg.RUSettings.RU.request(now, requiredToken, targetPeriodMs, clientUniqueID) return &rmpb.GrantedRUTokenBucket{GrantedTokens: tb, TrickleTimeMs: trickleTimeMs} } diff --git a/pkg/mcs/resourcemanager/server/server.go b/pkg/mcs/resourcemanager/server/server.go index 47248208c8a..2a1be3e0ca5 100644 --- a/pkg/mcs/resourcemanager/server/server.go +++ b/pkg/mcs/resourcemanager/server/server.go @@ -152,7 +152,7 @@ func (s *Server) primaryElectionLoop() { func (s *Server) campaignLeader() { log.Info("start to campaign the primary/leader", zap.String("campaign-resource-manager-primary-name", s.participant.Name())) - if err := s.participant.CampaignLeader(s.cfg.LeaderLease); err != nil { + if err := s.participant.CampaignLeader(s.Context(), s.cfg.LeaderLease); err != nil { if err.Error() == errs.ErrEtcdTxnConflict.Error() { log.Info("campaign resource manager primary meets error due to txn conflict, another server may campaign successfully", zap.String("campaign-resource-manager-primary-name", s.participant.Name())) @@ -296,14 +296,14 @@ func (s *Server) startServer() (err error) { // different service modes provided by the same pd-server binary serverInfo.WithLabelValues(versioninfo.PDReleaseVersion, versioninfo.PDGitHash).Set(float64(time.Now().Unix())) - uniqueName := s.cfg.ListenAddr + uniqueName := s.cfg.GetAdvertiseListenAddr() uniqueID := memberutil.GenerateUniqueID(uniqueName) log.Info("joining primary election", zap.String("participant-name", uniqueName), zap.Uint64("participant-id", uniqueID)) s.participant = member.NewParticipant(s.GetClient(), utils.ResourceManagerServiceName) p := &resource_manager.Participant{ Name: uniqueName, Id: uniqueID, // id is unique among all participants - ListenUrls: []string{s.cfg.AdvertiseListenAddr}, + ListenUrls: []string{s.cfg.GetAdvertiseListenAddr()}, } s.participant.InitInfo(p, endpoint.ResourceManagerSvcRootPath(s.clusterID), utils.PrimaryKey, "primary election") @@ -312,7 +312,7 @@ func (s *Server) startServer() (err error) { manager: NewManager[*Server](s), } - if err := s.InitListener(s.GetTLSConfig(), s.cfg.ListenAddr); err != nil { + if err := s.InitListener(s.GetTLSConfig(), s.cfg.GetListenAddr()); err != nil { return err } diff --git a/pkg/mcs/resourcemanager/server/token_buckets.go b/pkg/mcs/resourcemanager/server/token_buckets.go index 5efab52fe68..05a93c32673 100644 --- a/pkg/mcs/resourcemanager/server/token_buckets.go +++ b/pkg/mcs/resourcemanager/server/token_buckets.go @@ -20,6 +20,8 @@ import ( "github.com/gogo/protobuf/proto" rmpb "github.com/pingcap/kvproto/pkg/resource_manager" + "github.com/pingcap/log" + "go.uber.org/zap" ) const ( @@ -31,6 +33,7 @@ const ( defaultReserveRatio = 0.5 defaultLoanCoefficient = 2 maxAssignTokens = math.MaxFloat64 / 1024 // assume max client connect is 1024 + slotExpireTimeout = 10 * time.Minute ) // GroupTokenBucket is a token bucket for a resource group. @@ -62,6 +65,7 @@ type TokenSlot struct { // tokenCapacity is the number of tokens in the slot. tokenCapacity float64 lastTokenCapacity float64 + lastReqTime time.Time } // GroupTokenBucketState is the running state of TokenBucket. @@ -75,7 +79,8 @@ type GroupTokenBucketState struct { LastUpdate *time.Time `json:"last_update,omitempty"` Initialized bool `json:"initialized"` // settingChanged is used to avoid that the number of tokens returned is jitter because of changing fill rate. - settingChanged bool + settingChanged bool + lastCheckExpireSlot time.Time } // Clone returns the copy of GroupTokenBucketState @@ -95,6 +100,7 @@ func (gts *GroupTokenBucketState) Clone() *GroupTokenBucketState { Initialized: gts.Initialized, tokenSlots: tokenSlots, clientConsumptionTokensSum: gts.clientConsumptionTokensSum, + lastCheckExpireSlot: gts.lastCheckExpireSlot, } } @@ -119,16 +125,18 @@ func (gts *GroupTokenBucketState) balanceSlotTokens( clientUniqueID uint64, settings *rmpb.TokenLimitSettings, requiredToken, elapseTokens float64) { + now := time.Now() slot, exist := gts.tokenSlots[clientUniqueID] if !exist { // Only slots that require a positive number will be considered alive, // but still need to allocate the elapsed tokens as well. if requiredToken != 0 { - slot = &TokenSlot{} + slot = &TokenSlot{lastReqTime: now} gts.tokenSlots[clientUniqueID] = slot gts.clientConsumptionTokensSum = 0 } } else { + slot.lastReqTime = now if gts.clientConsumptionTokensSum >= maxAssignTokens { gts.clientConsumptionTokensSum = 0 } @@ -139,6 +147,16 @@ func (gts *GroupTokenBucketState) balanceSlotTokens( } } + if time.Since(gts.lastCheckExpireSlot) >= slotExpireTimeout { + gts.lastCheckExpireSlot = now + for clientUniqueID, slot := range gts.tokenSlots { + if time.Since(slot.lastReqTime) >= slotExpireTimeout { + delete(gts.tokenSlots, clientUniqueID) + log.Info("delete resource group slot because expire", zap.Time("last-req-time", slot.lastReqTime), + zap.Any("expire timeout", slotExpireTimeout), zap.Any("del client id", clientUniqueID), zap.Any("len", len(gts.tokenSlots))) + } + } + } if len(gts.tokenSlots) == 0 { return } @@ -264,11 +282,12 @@ func (gtb *GroupTokenBucket) init(now time.Time, clientID uint64) { lastTokenCapacity: gtb.Tokens, } gtb.LastUpdate = &now + gtb.lastCheckExpireSlot = now gtb.Initialized = true } // updateTokens updates the tokens and settings. -func (gtb *GroupTokenBucket) updateTokens(now time.Time, burstLimit int64, clientUniqueID uint64, consumptionToken float64) { +func (gtb *GroupTokenBucket) updateTokens(now time.Time, burstLimit int64, clientUniqueID uint64, requiredToken float64) { var elapseTokens float64 if !gtb.Initialized { gtb.init(now, clientUniqueID) @@ -288,21 +307,21 @@ func (gtb *GroupTokenBucket) updateTokens(now time.Time, burstLimit int64, clien gtb.Tokens = burst } // Balance each slots. - gtb.balanceSlotTokens(clientUniqueID, gtb.Settings, consumptionToken, elapseTokens) + gtb.balanceSlotTokens(clientUniqueID, gtb.Settings, requiredToken, elapseTokens) } // request requests tokens from the corresponding slot. func (gtb *GroupTokenBucket) request(now time.Time, - neededTokens float64, + requiredToken float64, targetPeriodMs, clientUniqueID uint64, ) (*rmpb.TokenBucket, int64) { burstLimit := gtb.Settings.GetBurstLimit() - gtb.updateTokens(now, burstLimit, clientUniqueID, neededTokens) + gtb.updateTokens(now, burstLimit, clientUniqueID, requiredToken) slot, ok := gtb.tokenSlots[clientUniqueID] if !ok { return &rmpb.TokenBucket{Settings: &rmpb.TokenLimitSettings{BurstLimit: burstLimit}}, 0 } - res, trickleDuration := slot.assignSlotTokens(neededTokens, targetPeriodMs) + res, trickleDuration := slot.assignSlotTokens(requiredToken, targetPeriodMs) // Update bucket to record all tokens. gtb.Tokens -= slot.lastTokenCapacity - slot.tokenCapacity slot.lastTokenCapacity = slot.tokenCapacity @@ -310,24 +329,24 @@ func (gtb *GroupTokenBucket) request(now time.Time, return res, trickleDuration } -func (ts *TokenSlot) assignSlotTokens(neededTokens float64, targetPeriodMs uint64) (*rmpb.TokenBucket, int64) { +func (ts *TokenSlot) assignSlotTokens(requiredToken float64, targetPeriodMs uint64) (*rmpb.TokenBucket, int64) { var res rmpb.TokenBucket burstLimit := ts.settings.GetBurstLimit() res.Settings = &rmpb.TokenLimitSettings{BurstLimit: burstLimit} // If BurstLimit < 0, just return. if burstLimit < 0 { - res.Tokens = neededTokens + res.Tokens = requiredToken return &res, 0 } // FillRate is used for the token server unavailable in abnormal situation. - if neededTokens <= 0 { + if requiredToken <= 0 { return &res, 0 } // If the current tokens can directly meet the requirement, returns the need token. - if ts.tokenCapacity >= neededTokens { - ts.tokenCapacity -= neededTokens + if ts.tokenCapacity >= requiredToken { + ts.tokenCapacity -= requiredToken // granted the total request tokens - res.Tokens = neededTokens + res.Tokens = requiredToken return &res, 0 } @@ -336,7 +355,7 @@ func (ts *TokenSlot) assignSlotTokens(neededTokens float64, targetPeriodMs uint6 hasRemaining := false if ts.tokenCapacity > 0 { grantedTokens = ts.tokenCapacity - neededTokens -= grantedTokens + requiredToken -= grantedTokens ts.tokenCapacity = 0 hasRemaining = true } @@ -373,36 +392,36 @@ func (ts *TokenSlot) assignSlotTokens(neededTokens float64, targetPeriodMs uint6 for i := 1; i < loanCoefficient; i++ { p[i] = float64(loanCoefficient-i)*float64(fillRate)*targetPeriodTimeSec + p[i-1] } - for i := 0; i < loanCoefficient && neededTokens > 0 && trickleTime < targetPeriodTimeSec; i++ { + for i := 0; i < loanCoefficient && requiredToken > 0 && trickleTime < targetPeriodTimeSec; i++ { loan := -ts.tokenCapacity if loan >= p[i] { continue } roundReserveTokens := p[i] - loan fillRate := float64(loanCoefficient-i) * float64(fillRate) - if roundReserveTokens > neededTokens { - ts.tokenCapacity -= neededTokens - grantedTokens += neededTokens + if roundReserveTokens > requiredToken { + ts.tokenCapacity -= requiredToken + grantedTokens += requiredToken trickleTime += grantedTokens / fillRate - neededTokens = 0 + requiredToken = 0 } else { roundReserveTime := roundReserveTokens / fillRate if roundReserveTime+trickleTime >= targetPeriodTimeSec { roundTokens := (targetPeriodTimeSec - trickleTime) * fillRate - neededTokens -= roundTokens + requiredToken -= roundTokens ts.tokenCapacity -= roundTokens grantedTokens += roundTokens trickleTime = targetPeriodTimeSec } else { grantedTokens += roundReserveTokens - neededTokens -= roundReserveTokens + requiredToken -= roundReserveTokens ts.tokenCapacity -= roundReserveTokens trickleTime += roundReserveTime } } } - if neededTokens > 0 && grantedTokens < defaultReserveRatio*float64(fillRate)*targetPeriodTimeSec { - reservedTokens := math.Min(neededTokens+grantedTokens, defaultReserveRatio*float64(fillRate)*targetPeriodTimeSec) + if requiredToken > 0 && grantedTokens < defaultReserveRatio*float64(fillRate)*targetPeriodTimeSec { + reservedTokens := math.Min(requiredToken+grantedTokens, defaultReserveRatio*float64(fillRate)*targetPeriodTimeSec) ts.tokenCapacity -= reservedTokens - grantedTokens grantedTokens = reservedTokens } diff --git a/pkg/mcs/scheduling/server/apis/v1/api.go b/pkg/mcs/scheduling/server/apis/v1/api.go index 39be00ef9a0..b59780b7a61 100644 --- a/pkg/mcs/scheduling/server/apis/v1/api.go +++ b/pkg/mcs/scheduling/server/apis/v1/api.go @@ -15,9 +15,13 @@ package apis import ( + "encoding/hex" + "errors" "fmt" "net/http" + "net/url" "strconv" + "strings" "sync" "github.com/gin-contrib/cors" @@ -26,16 +30,19 @@ import ( "github.com/gin-gonic/gin" "github.com/joho/godotenv" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" scheserver "github.com/tikv/pd/pkg/mcs/scheduling/server" mcsutils "github.com/tikv/pd/pkg/mcs/utils" sche "github.com/tikv/pd/pkg/schedule/core" "github.com/tikv/pd/pkg/schedule/handler" "github.com/tikv/pd/pkg/schedule/operator" + "github.com/tikv/pd/pkg/schedule/schedulers" "github.com/tikv/pd/pkg/statistics/utils" "github.com/tikv/pd/pkg/storage" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/apiutil/multiservicesapi" "github.com/tikv/pd/pkg/utils/logutil" + "github.com/tikv/pd/pkg/utils/typeutil" "github.com/unrolled/render" ) @@ -99,10 +106,10 @@ func NewService(srv *scheserver.Service) *Service { c.Set(handlerKey, handler.NewHandler(&server{srv.Server})) c.Next() }) - apiHandlerEngine.Use(multiservicesapi.ServiceRedirector()) apiHandlerEngine.GET("metrics", mcsutils.PromHandler()) pprof.Register(apiHandlerEngine) root := apiHandlerEngine.Group(APIPathPrefix) + root.Use(multiservicesapi.ServiceRedirector()) s := &Service{ srv: srv, apiHandlerEngine: apiHandlerEngine, @@ -110,10 +117,12 @@ func NewService(srv *scheserver.Service) *Service { rd: createIndentRender(), } s.RegisterAdminRouter() + s.RegisterConfigRouter() s.RegisterOperatorsRouter() s.RegisterSchedulersRouter() s.RegisterCheckersRouter() s.RegisterHotspotRouter() + s.RegisterRegionsRouter() return s } @@ -121,6 +130,8 @@ func NewService(srv *scheserver.Service) *Service { func (s *Service) RegisterAdminRouter() { router := s.root.Group("admin") router.PUT("/log", changeLogLevel) + router.DELETE("cache/regions", deleteAllRegionCache) + router.DELETE("cache/regions/:id", deleteRegionCacheByID) } // RegisterSchedulersRouter registers the router of the schedulers handler. @@ -128,6 +139,8 @@ func (s *Service) RegisterSchedulersRouter() { router := s.root.Group("schedulers") router.GET("", getSchedulers) router.GET("/diagnostic/:name", getDiagnosticResult) + router.GET("/config", getSchedulerConfig) + router.GET("/config/:name/list", getSchedulerConfigByName) // TODO: in the future, we should split pauseOrResumeScheduler to two different APIs. // And we need to do one-to-two forwarding in the API middleware. router.POST("/:name", pauseOrResumeScheduler) @@ -160,6 +173,57 @@ func (s *Service) RegisterOperatorsRouter() { router.GET("/records", getOperatorRecords) } +// RegisterRegionsRouter registers the router of the regions handler. +func (s *Service) RegisterRegionsRouter() { + router := s.root.Group("regions") + router.POST("/accelerate-schedule", accelerateRegionsScheduleInRange) + router.POST("/accelerate-schedule/batch", accelerateRegionsScheduleInRanges) + router.POST("/scatter", scatterRegions) + router.POST("/split", splitRegions) + router.GET("/replicated", checkRegionsReplicated) +} + +// RegisterConfigRouter registers the router of the config handler. +func (s *Service) RegisterConfigRouter() { + router := s.root.Group("config") + router.GET("", getConfig) + + rules := router.Group("rules") + rules.GET("", getAllRules) + rules.GET("/group/:group", getRuleByGroup) + rules.GET("/region/:region", getRulesByRegion) + rules.GET("/region/:region/detail", checkRegionPlacementRule) + rules.GET("/key/:key", getRulesByKey) + + // We cannot merge `/rule` and `/rules`, because we allow `group_id` to be "group", + // which is the same as the prefix of `/rules/group/:group`. + rule := router.Group("rule") + rule.GET("/:group/:id", getRuleByGroupAndID) + + groups := router.Group("rule_groups") + groups.GET("", getAllGroupConfigs) + groups.GET("/:id", getRuleGroupConfig) + + placementRule := router.Group("placement-rule") + placementRule.GET("", getPlacementRules) + placementRule.GET("/:group", getPlacementRuleByGroup) + + regionLabel := router.Group("region-label") + regionLabel.GET("/rules", getAllRegionLabelRules) + regionLabel.GET("/rules/ids", getRegionLabelRulesByIDs) + regionLabel.GET("/rules/:id", getRegionLabelRuleByID) + + regions := router.Group("regions") + regions.GET("/:id/label/:key", getRegionLabelByKey) + regions.GET("/:id/labels", getRegionLabels) +} + +// @Tags admin +// @Summary Change the log level. +// @Produce json +// @Success 200 {string} string "The log level is updated." +// @Failure 400 {string} string "The input is invalid." +// @Router /admin/log [put] func changeLogLevel(c *gin.Context) { svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) var level string @@ -176,6 +240,60 @@ func changeLogLevel(c *gin.Context) { c.String(http.StatusOK, "The log level is updated.") } +// @Tags config +// @Summary Get full config. +// @Produce json +// @Success 200 {object} config.Config +// @Router /config [get] +func getConfig(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) + cfg := svr.GetConfig() + cfg.Schedule.MaxMergeRegionKeys = cfg.Schedule.GetMaxMergeRegionKeys() + c.IndentedJSON(http.StatusOK, cfg) +} + +// @Tags admin +// @Summary Drop all regions from cache. +// @Produce json +// @Success 200 {string} string "All regions are removed from server cache." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /admin/cache/regions [delete] +func deleteAllRegionCache(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) + cluster := svr.GetCluster() + if cluster == nil { + c.String(http.StatusInternalServerError, errs.ErrNotBootstrapped.GenWithStackByArgs().Error()) + return + } + cluster.DropCacheAllRegion() + c.String(http.StatusOK, "All regions are removed from server cache.") +} + +// @Tags admin +// @Summary Drop a specific region from cache. +// @Param id path integer true "Region Id" +// @Produce json +// @Success 200 {string} string "The region is removed from server cache." +// @Failure 400 {string} string "The input is invalid." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /admin/cache/regions/{id} [delete] +func deleteRegionCacheByID(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) + cluster := svr.GetCluster() + if cluster == nil { + c.String(http.StatusInternalServerError, errs.ErrNotBootstrapped.GenWithStackByArgs().Error()) + return + } + regionIDStr := c.Param("id") + regionID, err := strconv.ParseUint(regionIDStr, 10, 64) + if err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + cluster.DropCacheRegion(regionID) + c.String(http.StatusOK, "The region is removed from server cache.") +} + // @Tags operators // @Summary Get an operator by ID. // @Param region_id path int true "A Region's Id" @@ -385,6 +503,60 @@ func getSchedulers(c *gin.Context) { c.IndentedJSON(http.StatusOK, output) } +// @Tags schedulers +// @Summary List all scheduler configs. +// @Produce json +// @Success 200 {object} map[string]interface{} +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /schedulers/config/ [get] +func getSchedulerConfig(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + sc, err := handler.GetSchedulersController() + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + sches, configs, err := sc.GetAllSchedulerConfigs() + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + c.IndentedJSON(http.StatusOK, schedulers.ToPayload(sches, configs)) +} + +// @Tags schedulers +// @Summary List scheduler config by name. +// @Produce json +// @Success 200 {object} map[string]interface{} +// @Failure 404 {string} string scheduler not found +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /schedulers/config/{name}/list [get] +func getSchedulerConfigByName(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + sc, err := handler.GetSchedulersController() + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + handlers := sc.GetSchedulerHandlers() + name := c.Param("name") + if _, ok := handlers[name]; !ok { + c.String(http.StatusNotFound, errs.ErrSchedulerNotFound.GenWithStackByArgs().Error()) + return + } + isDisabled, err := sc.IsSchedulerDisabled(name) + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + if isDisabled { + c.String(http.StatusNotFound, errs.ErrSchedulerNotFound.GenWithStackByArgs().Error()) + return + } + c.Request.URL.Path = "/list" + handlers[name].ServeHTTP(c.Writer, c.Request) +} + // @Tags schedulers // @Summary List schedulers diagnostic result. // @Produce json @@ -475,7 +647,7 @@ func getHotRegions(typ utils.RWType, c *gin.Context) { for _, storeID := range storeIDs { id, err := strconv.ParseUint(storeID, 10, 64) if err != nil { - c.String(http.StatusBadRequest, fmt.Sprintf("invalid store id: %s", storeID)) + c.String(http.StatusBadRequest, errs.ErrInvalidStoreID.FastGenByArgs(storeID).Error()) return } _, err = handler.GetStore(id) @@ -539,8 +711,6 @@ func getHotBuckets(c *gin.Context) { // @Accept json // @Produce json // @Success 200 {object} storage.HistoryHotRegions -// @Failure 400 {string} string "The input is invalid." -// @Failure 500 {string} string "PD server failed to proceed the request." // @Router /hotspot/regions/history [get] func getHistoryHotRegions(c *gin.Context) { // TODO: support history hotspot in scheduling server with stateless in the future. @@ -548,3 +718,617 @@ func getHistoryHotRegions(c *gin.Context) { var res storage.HistoryHotRegions c.IndentedJSON(http.StatusOK, res) } + +// @Tags rule +// @Summary List all rules of cluster. +// @Produce json +// @Success 200 {array} placement.Rule +// @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /config/rules [get] +func getAllRules(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + manager, err := handler.GetRuleManager() + if err == errs.ErrPlacementDisabled { + c.String(http.StatusPreconditionFailed, err.Error()) + return + } + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + rules := manager.GetAllRules() + c.IndentedJSON(http.StatusOK, rules) +} + +// @Tags rule +// @Summary List all rules of cluster by group. +// @Param group path string true "The name of group" +// @Produce json +// @Success 200 {array} placement.Rule +// @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /config/rules/group/{group} [get] +func getRuleByGroup(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + manager, err := handler.GetRuleManager() + if err == errs.ErrPlacementDisabled { + c.String(http.StatusPreconditionFailed, err.Error()) + return + } + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + group := c.Param("group") + rules := manager.GetRulesByGroup(group) + c.IndentedJSON(http.StatusOK, rules) +} + +// @Tags rule +// @Summary List all rules of cluster by region. +// @Param id path integer true "Region Id" +// @Produce json +// @Success 200 {array} placement.Rule +// @Failure 400 {string} string "The input is invalid." +// @Failure 404 {string} string "The region does not exist." +// @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /config/rules/region/{region} [get] +func getRulesByRegion(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + manager, err := handler.GetRuleManager() + if err == errs.ErrPlacementDisabled { + c.String(http.StatusPreconditionFailed, err.Error()) + return + } + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + regionStr := c.Param("region") + region, code, err := handler.PreCheckForRegion(regionStr) + if err != nil { + c.String(code, err.Error()) + return + } + rules := manager.GetRulesForApplyRegion(region) + c.IndentedJSON(http.StatusOK, rules) +} + +// @Tags rule +// @Summary List rules and matched peers related to the given region. +// @Param id path integer true "Region Id" +// @Produce json +// @Success 200 {object} placement.RegionFit +// @Failure 400 {string} string "The input is invalid." +// @Failure 404 {string} string "The region does not exist." +// @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /config/rules/region/{region}/detail [get] +func checkRegionPlacementRule(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + regionStr := c.Param("region") + region, code, err := handler.PreCheckForRegion(regionStr) + if err != nil { + c.String(code, err.Error()) + return + } + regionFit, err := handler.CheckRegionPlacementRule(region) + if err == errs.ErrPlacementDisabled { + c.String(http.StatusPreconditionFailed, err.Error()) + return + } + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + c.IndentedJSON(http.StatusOK, regionFit) +} + +// @Tags rule +// @Summary List all rules of cluster by key. +// @Param key path string true "The name of key" +// @Produce json +// @Success 200 {array} placement.Rule +// @Failure 400 {string} string "The input is invalid." +// @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /config/rules/key/{key} [get] +func getRulesByKey(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + manager, err := handler.GetRuleManager() + if err == errs.ErrPlacementDisabled { + c.String(http.StatusPreconditionFailed, err.Error()) + return + } + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + keyHex := c.Param("key") + key, err := hex.DecodeString(keyHex) + if err != nil { + c.String(http.StatusBadRequest, errs.ErrKeyFormat.Error()) + return + } + rules := manager.GetRulesByKey(key) + c.IndentedJSON(http.StatusOK, rules) +} + +// @Tags rule +// @Summary Get rule of cluster by group and id. +// @Param group path string true "The name of group" +// @Param id path string true "Rule Id" +// @Produce json +// @Success 200 {object} placement.Rule +// @Failure 404 {string} string "The rule does not exist." +// @Failure 412 {string} string "Placement rules feature is disabled." +// @Router /config/rule/{group}/{id} [get] +func getRuleByGroupAndID(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + manager, err := handler.GetRuleManager() + if err == errs.ErrPlacementDisabled { + c.String(http.StatusPreconditionFailed, err.Error()) + return + } + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + group, id := c.Param("group"), c.Param("id") + rule := manager.GetRule(group, id) + if rule == nil { + c.String(http.StatusNotFound, errs.ErrRuleNotFound.Error()) + return + } + c.IndentedJSON(http.StatusOK, rule) +} + +// @Tags rule +// @Summary List all rule group configs. +// @Produce json +// @Success 200 {array} placement.RuleGroup +// @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /config/rule_groups [get] +func getAllGroupConfigs(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + manager, err := handler.GetRuleManager() + if err == errs.ErrPlacementDisabled { + c.String(http.StatusPreconditionFailed, err.Error()) + return + } + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + ruleGroups := manager.GetRuleGroups() + c.IndentedJSON(http.StatusOK, ruleGroups) +} + +// @Tags rule +// @Summary Get rule group config by group id. +// @Param id path string true "Group Id" +// @Produce json +// @Success 200 {object} placement.RuleGroup +// @Failure 404 {string} string "The RuleGroup does not exist." +// @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /config/rule_groups/{id} [get] +func getRuleGroupConfig(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + manager, err := handler.GetRuleManager() + if err == errs.ErrPlacementDisabled { + c.String(http.StatusPreconditionFailed, err.Error()) + return + } + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + id := c.Param("id") + group := manager.GetRuleGroup(id) + if group == nil { + c.String(http.StatusNotFound, errs.ErrRuleNotFound.Error()) + return + } + c.IndentedJSON(http.StatusOK, group) +} + +// @Tags rule +// @Summary List all rules and groups configuration. +// @Produce json +// @Success 200 {array} placement.GroupBundle +// @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /config/placement-rules [get] +func getPlacementRules(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + manager, err := handler.GetRuleManager() + if err == errs.ErrPlacementDisabled { + c.String(http.StatusPreconditionFailed, err.Error()) + return + } + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + bundles := manager.GetAllGroupBundles() + c.IndentedJSON(http.StatusOK, bundles) +} + +// @Tags rule +// @Summary Get group config and all rules belong to the group. +// @Param group path string true "The name of group" +// @Produce json +// @Success 200 {object} placement.GroupBundle +// @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /config/placement-rules/{group} [get] +func getPlacementRuleByGroup(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + manager, err := handler.GetRuleManager() + if err == errs.ErrPlacementDisabled { + c.String(http.StatusPreconditionFailed, err.Error()) + return + } + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + g := c.Param("group") + group := manager.GetGroupBundle(g) + c.IndentedJSON(http.StatusOK, group) +} + +// @Tags region_label +// @Summary Get label of a region. +// @Param id path integer true "Region Id" +// @Param key path string true "Label key" +// @Produce json +// @Success 200 {string} string +// @Failure 400 {string} string "The input is invalid." +// @Failure 404 {string} string "The region does not exist." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /config/regions/{id}/label/{key} [get] +func getRegionLabelByKey(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + + idStr := c.Param("id") + labelKey := c.Param("key") // TODO: test https://github.com/tikv/pd/pull/4004 + + id, err := strconv.ParseUint(idStr, 10, 64) + if err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + + region, err := handler.GetRegion(id) + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + if region == nil { + c.String(http.StatusNotFound, errs.ErrRegionNotFound.FastGenByArgs().Error()) + return + } + + l, err := handler.GetRegionLabeler() + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + labelValue := l.GetRegionLabel(region, labelKey) + c.IndentedJSON(http.StatusOK, labelValue) +} + +// @Tags region_label +// @Summary Get labels of a region. +// @Param id path integer true "Region Id" +// @Produce json +// @Success 200 {string} string +// @Failure 400 {string} string "The input is invalid." +// @Failure 404 {string} string "The region does not exist." +// @Router /config/regions/{id}/labels [get] +func getRegionLabels(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + + idStr := c.Param("id") + id, err := strconv.ParseUint(idStr, 10, 64) + if err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + + region, err := handler.GetRegion(id) + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + if region == nil { + c.String(http.StatusNotFound, errs.ErrRegionNotFound.FastGenByArgs().Error()) + return + } + l, err := handler.GetRegionLabeler() + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + labels := l.GetRegionLabels(region) + c.IndentedJSON(http.StatusOK, labels) +} + +// @Tags region_label +// @Summary List all label rules of cluster. +// @Produce json +// @Success 200 {array} labeler.LabelRule +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /config/region-label/rules [get] +func getAllRegionLabelRules(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + l, err := handler.GetRegionLabeler() + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + rules := l.GetAllLabelRules() + c.IndentedJSON(http.StatusOK, rules) +} + +// @Tags region_label +// @Summary Get label rules of cluster by ids. +// @Param body body []string true "IDs of query rules" +// @Produce json +// @Success 200 {array} labeler.LabelRule +// @Failure 400 {string} string "The input is invalid." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /config/region-label/rules/ids [get] +func getRegionLabelRulesByIDs(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + l, err := handler.GetRegionLabeler() + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + var ids []string + if err := c.BindJSON(&ids); err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + rules, err := l.GetLabelRules(ids) + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + c.IndentedJSON(http.StatusOK, rules) +} + +// @Tags region_label +// @Summary Get label rule of cluster by id. +// @Param id path string true "Rule Id" +// @Produce json +// @Success 200 {object} labeler.LabelRule +// @Failure 404 {string} string "The rule does not exist." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /config/region-label/rules/{id} [get] +func getRegionLabelRuleByID(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + + id, err := url.PathUnescape(c.Param("id")) + if err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + + l, err := handler.GetRegionLabeler() + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + rule := l.GetLabelRule(id) + if rule == nil { + c.String(http.StatusNotFound, errs.ErrRegionRuleNotFound.FastGenByArgs().Error()) + return + } + c.IndentedJSON(http.StatusOK, rule) +} + +// @Tags region +// @Summary Accelerate regions scheduling a in given range, only receive hex format for keys +// @Accept json +// @Param body body object true "json params" +// @Param limit query integer false "Limit count" default(256) +// @Produce json +// @Success 200 {string} string "Accelerate regions scheduling in a given range [startKey, endKey)" +// @Failure 400 {string} string "The input is invalid." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /regions/accelerate-schedule [post] +func accelerateRegionsScheduleInRange(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + + var input map[string]interface{} + if err := c.BindJSON(&input); err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + rawStartKey, ok1 := input["start_key"].(string) + rawEndKey, ok2 := input["end_key"].(string) + if !ok1 || !ok2 { + c.String(http.StatusBadRequest, "start_key or end_key is not string") + return + } + + limitStr, _ := c.GetQuery("limit") + limit, err := handler.AdjustLimit(limitStr, 256 /*default limit*/) + if err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + + err = handler.AccelerateRegionsScheduleInRange(rawStartKey, rawEndKey, limit) + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + c.String(http.StatusOK, fmt.Sprintf("Accelerate regions scheduling in a given range [%s,%s)", rawStartKey, rawEndKey)) +} + +// @Tags region +// @Summary Accelerate regions scheduling in given ranges, only receive hex format for keys +// @Accept json +// @Param body body object true "json params" +// @Param limit query integer false "Limit count" default(256) +// @Produce json +// @Success 200 {string} string "Accelerate regions scheduling in given ranges [startKey1, endKey1), [startKey2, endKey2), ..." +// @Failure 400 {string} string "The input is invalid." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /regions/accelerate-schedule/batch [post] +func accelerateRegionsScheduleInRanges(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + + var input []map[string]interface{} + if err := c.BindJSON(&input); err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + limitStr, _ := c.GetQuery("limit") + limit, err := handler.AdjustLimit(limitStr, 256 /*default limit*/) + if err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + + var msgBuilder strings.Builder + msgBuilder.Grow(128) + msgBuilder.WriteString("Accelerate regions scheduling in given ranges: ") + var startKeys, endKeys [][]byte + for _, rg := range input { + startKey, rawStartKey, err := apiutil.ParseKey("start_key", rg) + if err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + endKey, rawEndKey, err := apiutil.ParseKey("end_key", rg) + if err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + startKeys = append(startKeys, startKey) + endKeys = append(endKeys, endKey) + msgBuilder.WriteString(fmt.Sprintf("[%s,%s), ", rawStartKey, rawEndKey)) + } + err = handler.AccelerateRegionsScheduleInRanges(startKeys, endKeys, limit) + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + c.String(http.StatusOK, msgBuilder.String()) +} + +// @Tags region +// @Summary Scatter regions by given key ranges or regions id distributed by given group with given retry limit +// @Accept json +// @Param body body object true "json params" +// @Produce json +// @Success 200 {string} string "Scatter regions by given key ranges or regions id distributed by given group with given retry limit" +// @Failure 400 {string} string "The input is invalid." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /regions/scatter [post] +func scatterRegions(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + + var input map[string]interface{} + if err := c.BindJSON(&input); err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + rawStartKey, ok1 := input["start_key"].(string) + rawEndKey, ok2 := input["end_key"].(string) + group, _ := input["group"].(string) + retryLimit := 5 + if rl, ok := input["retry_limit"].(float64); ok { + retryLimit = int(rl) + } + + opsCount, failures, err := func() (int, map[uint64]error, error) { + if ok1 && ok2 { + return handler.ScatterRegionsByRange(rawStartKey, rawEndKey, group, retryLimit) + } + ids, ok := typeutil.JSONToUint64Slice(input["regions_id"]) + if !ok { + return 0, nil, errors.New("regions_id is invalid") + } + return handler.ScatterRegionsByID(ids, group, retryLimit, false) + }() + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + s := handler.BuildScatterRegionsResp(opsCount, failures) + c.IndentedJSON(http.StatusOK, &s) +} + +// @Tags region +// @Summary Split regions with given split keys +// @Accept json +// @Param body body object true "json params" +// @Produce json +// @Success 200 {string} string "Split regions with given split keys" +// @Failure 400 {string} string "The input is invalid." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /regions/split [post] +func splitRegions(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + + var input map[string]interface{} + if err := c.BindJSON(&input); err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + s, ok := input["split_keys"] + if !ok { + c.String(http.StatusBadRequest, "split_keys should be provided.") + return + } + rawSplitKeys := s.([]interface{}) + if len(rawSplitKeys) < 1 { + c.String(http.StatusBadRequest, "empty split keys.") + return + } + retryLimit := 5 + if rl, ok := input["retry_limit"].(float64); ok { + retryLimit = int(rl) + } + s, err := handler.SplitRegions(c.Request.Context(), rawSplitKeys, retryLimit) + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + c.IndentedJSON(http.StatusOK, &s) +} + +// @Tags region +// @Summary Check if regions in the given key ranges are replicated. Returns 'REPLICATED', 'INPROGRESS', or 'PENDING'. 'PENDING' means that there is at least one region pending for scheduling. Similarly, 'INPROGRESS' means there is at least one region in scheduling. +// @Param startKey query string true "Regions start key, hex encoded" +// @Param endKey query string true "Regions end key, hex encoded" +// @Produce plain +// @Success 200 {string} string "INPROGRESS" +// @Failure 400 {string} string "The input is invalid." +// @Router /regions/replicated [get] +func checkRegionsReplicated(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + rawStartKey, ok1 := c.GetQuery("startKey") + rawEndKey, ok2 := c.GetQuery("endKey") + if !ok1 || !ok2 { + c.String(http.StatusBadRequest, "there is no start_key or end_key") + return + } + + state, err := handler.CheckRegionsReplicated(rawStartKey, rawEndKey) + if err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + c.String(http.StatusOK, state) +} diff --git a/pkg/mcs/scheduling/server/cluster.go b/pkg/mcs/scheduling/server/cluster.go index e4983eca7ea..b24db7ac805 100644 --- a/pkg/mcs/scheduling/server/cluster.go +++ b/pkg/mcs/scheduling/server/cluster.go @@ -52,7 +52,10 @@ type Cluster struct { running atomic.Bool } -const regionLabelGCInterval = time.Hour +const ( + regionLabelGCInterval = time.Hour + requestTimeout = 3 * time.Second +) // NewCluster creates a new cluster. func NewCluster(parentCtx context.Context, persistConfig *config.PersistConfig, storage storage.Storage, basicCluster *core.BasicCluster, hbStreams *hbstream.HeartbeatStreams, clusterID uint64, checkMembershipCh chan struct{}) (*Cluster, error) { @@ -199,9 +202,11 @@ func (c *Cluster) AllocID() (uint64, error) { if err != nil { return 0, err } - resp, err := client.AllocID(c.ctx, &pdpb.AllocIDRequest{Header: &pdpb.RequestHeader{ClusterId: c.clusterID}}) + ctx, cancel := context.WithTimeout(c.ctx, requestTimeout) + defer cancel() + resp, err := client.AllocID(ctx, &pdpb.AllocIDRequest{Header: &pdpb.RequestHeader{ClusterId: c.clusterID}}) if err != nil { - c.checkMembershipCh <- struct{}{} + c.triggerMembershipCheck() return 0, err } return resp.GetId(), nil @@ -210,12 +215,19 @@ func (c *Cluster) AllocID() (uint64, error) { func (c *Cluster) getAPIServerLeaderClient() (pdpb.PDClient, error) { cli := c.apiServerLeader.Load() if cli == nil { - c.checkMembershipCh <- struct{}{} + c.triggerMembershipCheck() return nil, errors.New("API server leader is not found") } return cli.(pdpb.PDClient), nil } +func (c *Cluster) triggerMembershipCheck() { + select { + case c.checkMembershipCh <- struct{}{}: + default: // avoid blocking + } +} + // SwitchAPIServerLeader switches the API server leader. func (c *Cluster) SwitchAPIServerLeader(new pdpb.PDClient) bool { old := c.apiServerLeader.Load() @@ -474,10 +486,6 @@ func (c *Cluster) collectMetrics() { c.coordinator.GetSchedulersController().CollectSchedulerMetrics() c.coordinator.CollectHotSpotMetrics() - c.collectClusterMetrics() -} - -func (c *Cluster) collectClusterMetrics() { if c.regionStats == nil { return } @@ -489,20 +497,8 @@ func (c *Cluster) collectClusterMetrics() { func (c *Cluster) resetMetrics() { statistics.Reset() - - c.coordinator.GetSchedulersController().ResetSchedulerMetrics() - c.coordinator.ResetHotSpotMetrics() - c.resetClusterMetrics() -} - -func (c *Cluster) resetClusterMetrics() { - if c.regionStats == nil { - return - } - c.regionStats.Reset() - c.labelStats.Reset() - // reset hot cache metrics - c.hotStat.ResetMetrics() + schedulers.ResetSchedulerMetrics() + schedule.ResetHotSpotMetrics() } // StartBackgroundJobs starts background jobs. @@ -526,6 +522,11 @@ func (c *Cluster) StopBackgroundJobs() { c.wg.Wait() } +// IsBackgroundJobsRunning returns whether the background jobs are running. Only for test purpose. +func (c *Cluster) IsBackgroundJobsRunning() bool { + return c.running.Load() +} + // HandleRegionHeartbeat processes RegionInfo reports from client. func (c *Cluster) HandleRegionHeartbeat(region *core.RegionInfo) error { if err := c.processRegionHeartbeat(region); err != nil { @@ -581,3 +582,13 @@ func (c *Cluster) processRegionHeartbeat(region *core.RegionInfo) error { func (c *Cluster) IsPrepared() bool { return c.coordinator.GetPrepareChecker().IsPrepared() } + +// DropCacheAllRegion removes all cached regions. +func (c *Cluster) DropCacheAllRegion() { + c.ResetRegionCache() +} + +// DropCacheRegion removes a region from the cache. +func (c *Cluster) DropCacheRegion(id uint64) { + c.RemoveRegionIfExist(id) +} diff --git a/pkg/mcs/scheduling/server/config/config.go b/pkg/mcs/scheduling/server/config/config.go index 4f9caca41e6..3e347afc12e 100644 --- a/pkg/mcs/scheduling/server/config/config.go +++ b/pkg/mcs/scheduling/server/config/config.go @@ -19,6 +19,7 @@ import ( "os" "path/filepath" "reflect" + "strconv" "strings" "sync/atomic" "time" @@ -30,6 +31,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" "github.com/spf13/pflag" + "github.com/tikv/pd/pkg/cache" "github.com/tikv/pd/pkg/core/constant" "github.com/tikv/pd/pkg/core/storelimit" "github.com/tikv/pd/pkg/mcs/utils" @@ -61,9 +63,9 @@ type Config struct { Metric metricutil.MetricConfig `toml:"metric" json:"metric"` // Log related config. - Log log.Config `toml:"log" json:"log"` - Logger *zap.Logger - LogProps *log.ZapProperties + Log log.Config `toml:"log" json:"log"` + Logger *zap.Logger `json:"-"` + LogProps *log.ZapProperties `json:"-"` Security configutil.SecurityConfig `toml:"security" json:"security"` @@ -164,11 +166,26 @@ func (c *Config) adjustLog(meta *configutil.ConfigMetaData) { } } +// GetName returns the Name +func (c *Config) GetName() string { + return c.Name +} + +// GeBackendEndpoints returns the BackendEndpoints +func (c *Config) GeBackendEndpoints() string { + return c.BackendEndpoints +} + // GetListenAddr returns the ListenAddr func (c *Config) GetListenAddr() string { return c.ListenAddr } +// GetAdvertiseListenAddr returns the AdvertiseListenAddr +func (c *Config) GetAdvertiseListenAddr() string { + return c.AdvertiseListenAddr +} + // GetTLSConfig returns the TLS config. func (c *Config) GetTLSConfig() *grpcutil.TLSConfig { return &c.Security.TLSConfig @@ -195,9 +212,17 @@ func (c *Config) validate() error { return nil } +// Clone creates a copy of current config. +func (c *Config) Clone() *Config { + cfg := &Config{} + *cfg = *c + return cfg +} + // PersistConfig wraps all configurations that need to persist to storage and // allows to access them safely. type PersistConfig struct { + ttl *cache.TTLString // Store the global configuration that is related to the scheduling. clusterVersion unsafe.Pointer schedule atomic.Value @@ -209,7 +234,7 @@ type PersistConfig struct { } // NewPersistConfig creates a new PersistConfig instance. -func NewPersistConfig(cfg *Config) *PersistConfig { +func NewPersistConfig(cfg *Config, ttl *cache.TTLString) *PersistConfig { o := &PersistConfig{} o.SetClusterVersion(&cfg.ClusterVersion) o.schedule.Store(&cfg.Schedule) @@ -217,6 +242,7 @@ func NewPersistConfig(cfg *Config) *PersistConfig { // storeConfig will be fetched from TiKV by PD API server, // so we just set an empty value here first. o.storeConfig.Store(&sc.StoreConfig{}) + o.ttl = ttl return o } @@ -307,16 +333,6 @@ func (o *PersistConfig) GetMaxReplicas() int { return int(o.GetReplicationConfig().MaxReplicas) } -// GetMaxSnapshotCount returns the max snapshot count. -func (o *PersistConfig) GetMaxSnapshotCount() uint64 { - return o.GetScheduleConfig().MaxSnapshotCount -} - -// GetMaxPendingPeerCount returns the max pending peer count. -func (o *PersistConfig) GetMaxPendingPeerCount() uint64 { - return o.GetScheduleConfig().MaxPendingPeerCount -} - // IsPlacementRulesEnabled returns if the placement rules is enabled. func (o *PersistConfig) IsPlacementRulesEnabled() bool { return o.GetReplicationConfig().EnablePlacementRules @@ -332,31 +348,6 @@ func (o *PersistConfig) GetHighSpaceRatio() float64 { return o.GetScheduleConfig().HighSpaceRatio } -// GetHotRegionScheduleLimit returns the limit for hot region schedule. -func (o *PersistConfig) GetHotRegionScheduleLimit() uint64 { - return o.GetScheduleConfig().HotRegionScheduleLimit -} - -// GetRegionScheduleLimit returns the limit for region schedule. -func (o *PersistConfig) GetRegionScheduleLimit() uint64 { - return o.GetScheduleConfig().RegionScheduleLimit -} - -// GetLeaderScheduleLimit returns the limit for leader schedule. -func (o *PersistConfig) GetLeaderScheduleLimit() uint64 { - return o.GetScheduleConfig().LeaderScheduleLimit -} - -// GetReplicaScheduleLimit returns the limit for replica schedule. -func (o *PersistConfig) GetReplicaScheduleLimit() uint64 { - return o.GetScheduleConfig().ReplicaScheduleLimit -} - -// GetMergeScheduleLimit returns the limit for merge schedule. -func (o *PersistConfig) GetMergeScheduleLimit() uint64 { - return o.GetScheduleConfig().MergeScheduleLimit -} - // GetLeaderSchedulePolicy is to get leader schedule policy. func (o *PersistConfig) GetLeaderSchedulePolicy() constant.SchedulePolicy { return constant.StringToSchedulePolicy(o.GetScheduleConfig().LeaderSchedulePolicy) @@ -397,26 +388,11 @@ func (o *PersistConfig) IsOneWayMergeEnabled() bool { return o.GetScheduleConfig().EnableOneWayMerge } -// GetMaxMergeRegionSize returns the max region size. -func (o *PersistConfig) GetMaxMergeRegionSize() uint64 { - return o.GetScheduleConfig().MaxMergeRegionSize -} - -// GetMaxMergeRegionKeys returns the max region keys. -func (o *PersistConfig) GetMaxMergeRegionKeys() uint64 { - return o.GetScheduleConfig().MaxMergeRegionKeys -} - // GetRegionScoreFormulaVersion returns the region score formula version. func (o *PersistConfig) GetRegionScoreFormulaVersion() string { return o.GetScheduleConfig().RegionScoreFormulaVersion } -// GetSchedulerMaxWaitingOperator returns the scheduler max waiting operator. -func (o *PersistConfig) GetSchedulerMaxWaitingOperator() uint64 { - return o.GetScheduleConfig().SchedulerMaxWaitingOperator -} - // GetHotRegionCacheHitsThreshold returns the hot region cache hits threshold. func (o *PersistConfig) GetHotRegionCacheHitsThreshold() int { return int(o.GetScheduleConfig().HotRegionCacheHitsThreshold) @@ -452,11 +428,6 @@ func (o *PersistConfig) GetTolerantSizeRatio() float64 { return o.GetScheduleConfig().TolerantSizeRatio } -// GetWitnessScheduleLimit returns the limit for region schedule. -func (o *PersistConfig) GetWitnessScheduleLimit() uint64 { - return o.GetScheduleConfig().WitnessScheduleLimit -} - // IsDebugMetricsEnabled returns if debug metrics is enabled. func (o *PersistConfig) IsDebugMetricsEnabled() bool { return o.GetScheduleConfig().EnableDebugMetrics @@ -487,11 +458,6 @@ func (o *PersistConfig) IsRemoveExtraReplicaEnabled() bool { return o.GetScheduleConfig().EnableRemoveExtraReplica } -// IsLocationReplacementEnabled returns if location replace is enabled. -func (o *PersistConfig) IsLocationReplacementEnabled() bool { - return o.GetScheduleConfig().EnableLocationReplacement -} - // IsWitnessAllowed returns if the witness is allowed. func (o *PersistConfig) IsWitnessAllowed() bool { return o.GetScheduleConfig().EnableWitness @@ -512,8 +478,87 @@ func (o *PersistConfig) GetStoresLimit() map[uint64]sc.StoreLimitConfig { return o.GetScheduleConfig().StoreLimit } +// TTL related methods. + +// GetLeaderScheduleLimit returns the limit for leader schedule. +func (o *PersistConfig) GetLeaderScheduleLimit() uint64 { + return o.getTTLUintOr(sc.LeaderScheduleLimitKey, o.GetScheduleConfig().LeaderScheduleLimit) +} + +// GetRegionScheduleLimit returns the limit for region schedule. +func (o *PersistConfig) GetRegionScheduleLimit() uint64 { + return o.getTTLUintOr(sc.RegionScheduleLimitKey, o.GetScheduleConfig().RegionScheduleLimit) +} + +// GetWitnessScheduleLimit returns the limit for region schedule. +func (o *PersistConfig) GetWitnessScheduleLimit() uint64 { + return o.getTTLUintOr(sc.WitnessScheduleLimitKey, o.GetScheduleConfig().WitnessScheduleLimit) +} + +// GetReplicaScheduleLimit returns the limit for replica schedule. +func (o *PersistConfig) GetReplicaScheduleLimit() uint64 { + return o.getTTLUintOr(sc.ReplicaRescheduleLimitKey, o.GetScheduleConfig().ReplicaScheduleLimit) +} + +// GetMergeScheduleLimit returns the limit for merge schedule. +func (o *PersistConfig) GetMergeScheduleLimit() uint64 { + return o.getTTLUintOr(sc.MergeScheduleLimitKey, o.GetScheduleConfig().MergeScheduleLimit) +} + +// GetHotRegionScheduleLimit returns the limit for hot region schedule. +func (o *PersistConfig) GetHotRegionScheduleLimit() uint64 { + return o.getTTLUintOr(sc.HotRegionScheduleLimitKey, o.GetScheduleConfig().HotRegionScheduleLimit) +} + +// GetStoreLimit returns the limit of a store. +func (o *PersistConfig) GetStoreLimit(storeID uint64) (returnSC sc.StoreLimitConfig) { + defer func() { + returnSC.RemovePeer = o.getTTLFloatOr(fmt.Sprintf("remove-peer-%v", storeID), returnSC.RemovePeer) + returnSC.AddPeer = o.getTTLFloatOr(fmt.Sprintf("add-peer-%v", storeID), returnSC.AddPeer) + }() + if limit, ok := o.GetScheduleConfig().StoreLimit[storeID]; ok { + return limit + } + cfg := o.GetScheduleConfig().Clone() + sc := sc.StoreLimitConfig{ + AddPeer: sc.DefaultStoreLimit.GetDefaultStoreLimit(storelimit.AddPeer), + RemovePeer: sc.DefaultStoreLimit.GetDefaultStoreLimit(storelimit.RemovePeer), + } + v, ok1, err := o.getTTLFloat("default-add-peer") + if err != nil { + log.Warn("failed to parse default-add-peer from PersistOptions's ttl storage", zap.Error(err)) + } + canSetAddPeer := ok1 && err == nil + if canSetAddPeer { + returnSC.AddPeer = v + } + + v, ok2, err := o.getTTLFloat("default-remove-peer") + if err != nil { + log.Warn("failed to parse default-remove-peer from PersistOptions's ttl storage", zap.Error(err)) + } + canSetRemovePeer := ok2 && err == nil + if canSetRemovePeer { + returnSC.RemovePeer = v + } + + if canSetAddPeer || canSetRemovePeer { + return returnSC + } + cfg.StoreLimit[storeID] = sc + o.SetScheduleConfig(cfg) + return o.GetScheduleConfig().StoreLimit[storeID] +} + // GetStoreLimitByType returns the limit of a store with a given type. func (o *PersistConfig) GetStoreLimitByType(storeID uint64, typ storelimit.Type) (returned float64) { + defer func() { + if typ == storelimit.RemovePeer { + returned = o.getTTLFloatOr(fmt.Sprintf("remove-peer-%v", storeID), returned) + } else if typ == storelimit.AddPeer { + returned = o.getTTLFloatOr(fmt.Sprintf("add-peer-%v", storeID), returned) + } + }() limit := o.GetStoreLimit(storeID) switch typ { case storelimit.AddPeer: @@ -528,20 +573,48 @@ func (o *PersistConfig) GetStoreLimitByType(storeID uint64, typ storelimit.Type) } } -// GetStoreLimit returns the limit of a store. -func (o *PersistConfig) GetStoreLimit(storeID uint64) (returnSC sc.StoreLimitConfig) { - if limit, ok := o.GetScheduleConfig().StoreLimit[storeID]; ok { - return limit +// GetMaxSnapshotCount returns the number of the max snapshot which is allowed to send. +func (o *PersistConfig) GetMaxSnapshotCount() uint64 { + return o.getTTLUintOr(sc.MaxSnapshotCountKey, o.GetScheduleConfig().MaxSnapshotCount) +} + +// GetMaxPendingPeerCount returns the number of the max pending peers. +func (o *PersistConfig) GetMaxPendingPeerCount() uint64 { + return o.getTTLUintOr(sc.MaxPendingPeerCountKey, o.GetScheduleConfig().MaxPendingPeerCount) +} + +// GetMaxMergeRegionSize returns the max region size. +func (o *PersistConfig) GetMaxMergeRegionSize() uint64 { + return o.getTTLUintOr(sc.MaxMergeRegionSizeKey, o.GetScheduleConfig().MaxMergeRegionSize) +} + +// GetMaxMergeRegionKeys returns the max number of keys. +// It returns size * 10000 if the key of max-merge-region-Keys doesn't exist. +func (o *PersistConfig) GetMaxMergeRegionKeys() uint64 { + keys, exist, err := o.getTTLUint(sc.MaxMergeRegionKeysKey) + if exist && err == nil { + return keys } - cfg := o.GetScheduleConfig().Clone() - sc := sc.StoreLimitConfig{ - AddPeer: sc.DefaultStoreLimit.GetDefaultStoreLimit(storelimit.AddPeer), - RemovePeer: sc.DefaultStoreLimit.GetDefaultStoreLimit(storelimit.RemovePeer), + size, exist, err := o.getTTLUint(sc.MaxMergeRegionSizeKey) + if exist && err == nil { + return size * 10000 } + return o.GetScheduleConfig().GetMaxMergeRegionKeys() +} - cfg.StoreLimit[storeID] = sc - o.SetScheduleConfig(cfg) - return o.GetScheduleConfig().StoreLimit[storeID] +// GetSchedulerMaxWaitingOperator returns the number of the max waiting operators. +func (o *PersistConfig) GetSchedulerMaxWaitingOperator() uint64 { + return o.getTTLUintOr(sc.SchedulerMaxWaitingOperatorKey, o.GetScheduleConfig().SchedulerMaxWaitingOperator) +} + +// IsLocationReplacementEnabled returns if location replace is enabled. +func (o *PersistConfig) IsLocationReplacementEnabled() bool { + return o.getTTLBoolOr(sc.EnableLocationReplacement, o.GetScheduleConfig().EnableLocationReplacement) +} + +// IsTikvRegionSplitEnabled returns whether tikv split region is enabled. +func (o *PersistConfig) IsTikvRegionSplitEnabled() bool { + return o.getTTLBoolOr(sc.EnableTiKVSplitRegion, o.GetScheduleConfig().EnableTiKVSplitRegion) } // SetAllStoresLimit sets all store limit for a given type and rate. @@ -658,11 +731,6 @@ func (o *PersistConfig) IsRaftKV2() bool { return o.GetStoreConfig().IsRaftKV2() } -// IsTikvRegionSplitEnabled returns whether tikv split region is disabled. -func (o *PersistConfig) IsTikvRegionSplitEnabled() bool { - return o.GetScheduleConfig().EnableTiKVSplitRegion -} - // TODO: implement the following methods // AddSchedulerCfg adds the scheduler configurations. @@ -688,3 +756,72 @@ func (o *PersistConfig) IsTraceRegionFlow() bool { func (o *PersistConfig) Persist(storage endpoint.ConfigStorage) error { return nil } + +func (o *PersistConfig) getTTLUint(key string) (uint64, bool, error) { + stringForm, ok := o.GetTTLData(key) + if !ok { + return 0, false, nil + } + r, err := strconv.ParseUint(stringForm, 10, 64) + return r, true, err +} + +func (o *PersistConfig) getTTLUintOr(key string, defaultValue uint64) uint64 { + if v, ok, err := o.getTTLUint(key); ok { + if err == nil { + return v + } + log.Warn("failed to parse "+key+" from PersistOptions's ttl storage", zap.Error(err)) + } + return defaultValue +} + +func (o *PersistConfig) getTTLBool(key string) (result bool, contains bool, err error) { + stringForm, ok := o.GetTTLData(key) + if !ok { + return + } + result, err = strconv.ParseBool(stringForm) + contains = true + return +} + +func (o *PersistConfig) getTTLBoolOr(key string, defaultValue bool) bool { + if v, ok, err := o.getTTLBool(key); ok { + if err == nil { + return v + } + log.Warn("failed to parse "+key+" from PersistOptions's ttl storage", zap.Error(err)) + } + return defaultValue +} + +func (o *PersistConfig) getTTLFloat(key string) (float64, bool, error) { + stringForm, ok := o.GetTTLData(key) + if !ok { + return 0, false, nil + } + r, err := strconv.ParseFloat(stringForm, 64) + return r, true, err +} + +func (o *PersistConfig) getTTLFloatOr(key string, defaultValue float64) float64 { + if v, ok, err := o.getTTLFloat(key); ok { + if err == nil { + return v + } + log.Warn("failed to parse "+key+" from PersistOptions's ttl storage", zap.Error(err)) + } + return defaultValue +} + +// GetTTLData returns if there is a TTL data for a given key. +func (o *PersistConfig) GetTTLData(key string) (string, bool) { + if o.ttl == nil { + return "", false + } + if result, ok := o.ttl.Get(key); ok { + return result.(string), ok + } + return "", false +} diff --git a/pkg/mcs/scheduling/server/config/watcher.go b/pkg/mcs/scheduling/server/config/watcher.go index 6ad37045000..4ded93ceb1b 100644 --- a/pkg/mcs/scheduling/server/config/watcher.go +++ b/pkg/mcs/scheduling/server/config/watcher.go @@ -20,6 +20,7 @@ import ( "strings" "sync" "sync/atomic" + "time" "github.com/coreos/go-semver/semver" "github.com/pingcap/log" @@ -48,8 +49,11 @@ type Watcher struct { // - Value: configuration JSON. schedulerConfigPathPrefix string + ttlConfigPrefix string + etcdClient *clientv3.Client configWatcher *etcdutil.LoopWatcher + ttlConfigWatcher *etcdutil.LoopWatcher schedulerConfigWatcher *etcdutil.LoopWatcher // Some data, like the global schedule config, should be loaded into `PersistConfig`. @@ -82,6 +86,7 @@ func NewWatcher( ctx: ctx, cancel: cancel, configPath: endpoint.ConfigPath(clusterID), + ttlConfigPrefix: sc.TTLConfigPrefix, schedulerConfigPathPrefix: endpoint.SchedulerConfigPathPrefix(clusterID), etcdClient: etcdClient, PersistConfig: persistConfig, @@ -91,6 +96,10 @@ func NewWatcher( if err != nil { return nil, err } + err = cw.initializeTTLConfigWatcher() + if err != nil { + return nil, err + } err = cw.initializeSchedulerConfigWatcher() if err != nil { return nil, err @@ -143,11 +152,43 @@ func (cw *Watcher) initializeConfigWatcher() error { return cw.configWatcher.WaitLoad() } +func (cw *Watcher) initializeTTLConfigWatcher() error { + putFn := func(kv *mvccpb.KeyValue) error { + key := string(kv.Key)[len(sc.TTLConfigPrefix)+1:] + value := string(kv.Value) + leaseID := kv.Lease + resp, err := cw.etcdClient.TimeToLive(cw.ctx, clientv3.LeaseID(leaseID)) + if err != nil { + return err + } + log.Info("update scheduling ttl config", zap.String("key", key), zap.String("value", value)) + cw.ttl.PutWithTTL(key, value, time.Duration(resp.TTL)*time.Second) + return nil + } + deleteFn := func(kv *mvccpb.KeyValue) error { + key := string(kv.Key)[len(sc.TTLConfigPrefix)+1:] + cw.ttl.PutWithTTL(key, nil, 0) + return nil + } + postEventFn := func() error { + return nil + } + cw.ttlConfigWatcher = etcdutil.NewLoopWatcher( + cw.ctx, &cw.wg, + cw.etcdClient, + "scheduling-ttl-config-watcher", cw.ttlConfigPrefix, + putFn, deleteFn, postEventFn, clientv3.WithPrefix(), + ) + cw.ttlConfigWatcher.StartWatchLoop() + return cw.ttlConfigWatcher.WaitLoad() +} + func (cw *Watcher) initializeSchedulerConfigWatcher() error { prefixToTrim := cw.schedulerConfigPathPrefix + "/" putFn := func(kv *mvccpb.KeyValue) error { name := strings.TrimPrefix(string(kv.Key), prefixToTrim) - log.Info("update scheduler config", zap.String("name", string(kv.Value))) + log.Info("update scheduler config", zap.String("name", name), + zap.String("value", string(kv.Value))) err := cw.storage.SaveSchedulerConfig(name, kv.Value) if err != nil { log.Warn("failed to save scheduler config", diff --git a/pkg/mcs/scheduling/server/grpc_service.go b/pkg/mcs/scheduling/server/grpc_service.go index 8e47e7380f9..b865e917d75 100644 --- a/pkg/mcs/scheduling/server/grpc_service.go +++ b/pkg/mcs/scheduling/server/grpc_service.go @@ -65,7 +65,7 @@ type Service struct { *Server } -// NewService creates a new TSO service. +// NewService creates a new scheduling service. func NewService[T ConfigProvider](svr bs.Server) registry.RegistrableService { server, ok := svr.(*Server) if !ok { @@ -118,7 +118,7 @@ func (s *heartbeatServer) Recv() (*schedulingpb.RegionHeartbeatRequest, error) { return req, nil } -// RegionHeartbeat implements gRPC PDServer. +// RegionHeartbeat implements gRPC SchedulingServer. func (s *Service) RegionHeartbeat(stream schedulingpb.Scheduling_RegionHeartbeatServer) error { var ( server = &heartbeatServer{stream: stream} @@ -168,7 +168,7 @@ func (s *Service) RegionHeartbeat(stream schedulingpb.Scheduling_RegionHeartbeat } } -// StoreHeartbeat implements gRPC PDServer. +// StoreHeartbeat implements gRPC SchedulingServer. func (s *Service) StoreHeartbeat(ctx context.Context, request *schedulingpb.StoreHeartbeatRequest) (*schedulingpb.StoreHeartbeatResponse, error) { c := s.GetCluster() if c == nil { @@ -202,7 +202,7 @@ func (s *Service) SplitRegions(ctx context.Context, request *schedulingpb.SplitR }, nil } -// ScatterRegions implements gRPC PDServer. +// ScatterRegions implements gRPC SchedulingServer. func (s *Service) ScatterRegions(ctx context.Context, request *schedulingpb.ScatterRegionsRequest) (*schedulingpb.ScatterRegionsResponse, error) { c := s.GetCluster() if c == nil { @@ -211,7 +211,11 @@ func (s *Service) ScatterRegions(ctx context.Context, request *schedulingpb.Scat opsCount, failures, err := c.GetRegionScatterer().ScatterRegionsByID(request.GetRegionsId(), request.GetGroup(), int(request.GetRetryLimit()), request.GetSkipStoreLimit()) if err != nil { - return nil, err + header := s.errorHeader(&schedulingpb.Error{ + Type: schedulingpb.ErrorType_UNKNOWN, + Message: err.Error(), + }) + return &schedulingpb.ScatterRegionsResponse{Header: header}, nil } percentage := 100 if len(failures) > 0 { @@ -243,7 +247,7 @@ func (s *Service) GetOperator(ctx context.Context, request *schedulingpb.GetOper if r == nil { header := s.errorHeader(&schedulingpb.Error{ Type: schedulingpb.ErrorType_UNKNOWN, - Message: "Not Found", + Message: "region not found", }) return &schedulingpb.GetOperatorResponse{Header: header}, nil } @@ -257,7 +261,7 @@ func (s *Service) GetOperator(ctx context.Context, request *schedulingpb.GetOper }, nil } -// AskBatchSplit implements gRPC PDServer. +// AskBatchSplit implements gRPC SchedulingServer. func (s *Service) AskBatchSplit(ctx context.Context, request *schedulingpb.AskBatchSplitRequest) (*schedulingpb.AskBatchSplitResponse, error) { c := s.GetCluster() if c == nil { diff --git a/pkg/mcs/scheduling/server/meta/watcher.go b/pkg/mcs/scheduling/server/meta/watcher.go index 3dbd0fc8c92..6fae537eab9 100644 --- a/pkg/mcs/scheduling/server/meta/watcher.go +++ b/pkg/mcs/scheduling/server/meta/watcher.go @@ -16,12 +16,14 @@ package meta import ( "context" + "strconv" "sync" "github.com/gogo/protobuf/proto" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/etcdutil" "go.etcd.io/etcd/clientv3" @@ -79,9 +81,15 @@ func (w *Watcher) initializeStoreWatcher() error { origin := w.basicCluster.GetStore(store.GetId()) if origin == nil { w.basicCluster.PutStore(core.NewStoreInfo(store)) - return nil + } else { + w.basicCluster.PutStore(origin.Clone(core.SetStoreMeta(store))) } - w.basicCluster.PutStore(origin.Clone(core.SetStoreState(store.GetState(), store.GetPhysicallyDestroyed()))) + + if store.GetNodeState() == metapb.NodeState_Removed { + statistics.ResetStoreStatistics(store.GetAddress(), strconv.FormatUint(store.GetId(), 10)) + // TODO: remove hot stats + } + return nil } deleteFn := func(kv *mvccpb.KeyValue) error { diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 9caae932037..8ee8b81ae47 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -36,6 +36,7 @@ import ( "github.com/pingcap/sysutil" "github.com/spf13/cobra" bs "github.com/tikv/pd/pkg/basicserver" + "github.com/tikv/pd/pkg/cache" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/mcs/discovery" @@ -46,6 +47,7 @@ import ( "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/member" "github.com/tikv/pd/pkg/schedule" + sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/schedule/hbstream" "github.com/tikv/pd/pkg/schedule/schedulers" "github.com/tikv/pd/pkg/storage/endpoint" @@ -190,6 +192,10 @@ func (s *Server) updateAPIServerMemberLoop() { continue } for _, ep := range members.Members { + if len(ep.GetClientURLs()) == 0 { // This member is not started yet. + log.Info("member is not started yet", zap.String("member-id", fmt.Sprintf("%x", ep.GetID())), errs.ZapError(err)) + continue + } status, err := s.GetClient().Status(ctx, ep.ClientURLs[0]) if err != nil { log.Info("failed to get status of member", zap.String("member-id", fmt.Sprintf("%x", ep.ID)), zap.String("endpoint", ep.ClientURLs[0]), errs.ZapError(err)) @@ -241,7 +247,7 @@ func (s *Server) primaryElectionLoop() { func (s *Server) campaignLeader() { log.Info("start to campaign the primary/leader", zap.String("campaign-scheduling-primary-name", s.participant.Name())) - if err := s.participant.CampaignLeader(s.cfg.LeaderLease); err != nil { + if err := s.participant.CampaignLeader(s.Context(), s.cfg.LeaderLease); err != nil { if err.Error() == errs.ErrEtcdTxnConflict.Error() { log.Info("campaign scheduling primary meets error due to txn conflict, another server may campaign successfully", zap.String("campaign-scheduling-primary-name", s.participant.Name())) @@ -405,21 +411,22 @@ func (s *Server) startServer() (err error) { // different service modes provided by the same pd-server binary serverInfo.WithLabelValues(versioninfo.PDReleaseVersion, versioninfo.PDGitHash).Set(float64(time.Now().Unix())) - uniqueName := s.cfg.ListenAddr + s.serviceID = &discovery.ServiceRegistryEntry{ServiceAddr: s.cfg.AdvertiseListenAddr} + uniqueName := s.cfg.GetAdvertiseListenAddr() uniqueID := memberutil.GenerateUniqueID(uniqueName) log.Info("joining primary election", zap.String("participant-name", uniqueName), zap.Uint64("participant-id", uniqueID)) s.participant = member.NewParticipant(s.GetClient(), utils.SchedulingServiceName) p := &schedulingpb.Participant{ Name: uniqueName, Id: uniqueID, // id is unique among all participants - ListenUrls: []string{s.cfg.AdvertiseListenAddr}, + ListenUrls: []string{s.cfg.GetAdvertiseListenAddr()}, } s.participant.InitInfo(p, endpoint.SchedulingSvcRootPath(s.clusterID), utils.PrimaryKey, "primary election") s.service = &Service{Server: s} s.AddServiceReadyCallback(s.startCluster) s.AddServiceExitCallback(s.stopCluster) - if err := s.InitListener(s.GetTLSConfig(), s.cfg.ListenAddr); err != nil { + if err := s.InitListener(s.GetTLSConfig(), s.cfg.GetListenAddr()); err != nil { return err } @@ -443,7 +450,7 @@ func (s *Server) startServer() (err error) { return err } s.serviceRegister = discovery.NewServiceRegister(s.Context(), s.GetClient(), strconv.FormatUint(s.clusterID, 10), - utils.SchedulingServiceName, s.cfg.AdvertiseListenAddr, serializedEntry, discovery.DefaultLeaseInSeconds) + utils.SchedulingServiceName, s.cfg.GetAdvertiseListenAddr(), serializedEntry, discovery.DefaultLeaseInSeconds) if err := s.serviceRegister.Register(); err != nil { log.Error("failed to register the service", zap.String("service-name", utils.SchedulingServiceName), errs.ZapError(err)) return err @@ -510,13 +517,30 @@ func (s *Server) GetPersistConfig() *config.PersistConfig { return s.persistConfig } +// GetConfig gets the config. +func (s *Server) GetConfig() *config.Config { + cfg := s.cfg.Clone() + cfg.Schedule = *s.persistConfig.GetScheduleConfig().Clone() + cfg.Replication = *s.persistConfig.GetReplicationConfig().Clone() + cfg.ClusterVersion = *s.persistConfig.GetClusterVersion() + if s.storage == nil { + return cfg + } + sches, configs, err := s.storage.LoadAllSchedulerConfigs() + if err != nil { + return cfg + } + cfg.Schedule.SchedulersPayload = schedulers.ToPayload(sches, configs) + return cfg +} + // CreateServer creates the Server func CreateServer(ctx context.Context, cfg *config.Config) *Server { svr := &Server{ BaseServer: server.NewBaseServer(ctx), DiagnosticsServer: sysutil.NewDiagnosticsServer(cfg.Log.File.Filename), cfg: cfg, - persistConfig: config.NewPersistConfig(cfg), + persistConfig: config.NewPersistConfig(cfg, cache.NewStringTTL(ctx, sc.DefaultGCInterval, sc.DefaultTTL)), checkMembershipCh: make(chan struct{}, 1), } return svr diff --git a/pkg/mcs/tso/server/apis/v1/api.go b/pkg/mcs/tso/server/apis/v1/api.go index f1853bf5483..33e1e0801aa 100644 --- a/pkg/mcs/tso/server/apis/v1/api.go +++ b/pkg/mcs/tso/server/apis/v1/api.go @@ -15,6 +15,7 @@ package apis import ( + "fmt" "net/http" "strconv" "sync" @@ -89,10 +90,10 @@ func NewService(srv *tsoserver.Service) *Service { c.Set(multiservicesapi.ServiceContextKey, srv) c.Next() }) - apiHandlerEngine.Use(multiservicesapi.ServiceRedirector()) apiHandlerEngine.GET("metrics", utils.PromHandler()) pprof.Register(apiHandlerEngine) root := apiHandlerEngine.Group(APIPathPrefix) + root.Use(multiservicesapi.ServiceRedirector()) s := &Service{ srv: srv, apiHandlerEngine: apiHandlerEngine, @@ -150,6 +151,7 @@ type ResetTSParams struct { // @Failure 400 {string} string "The input is invalid." // @Failure 403 {string} string "Reset ts is forbidden." // @Failure 500 {string} string "TSO server failed to proceed the request." +// @Failure 503 {string} string "It's a temporary failure, please retry." // @Router /admin/reset-ts [post] // if force-use-larger=true: // @@ -185,6 +187,12 @@ func ResetTS(c *gin.Context) { if err = svr.ResetTS(ts, ignoreSmaller, skipUpperBoundCheck, 0); err != nil { if err == errs.ErrServerNotStarted { c.String(http.StatusInternalServerError, err.Error()) + } else if err == errs.ErrEtcdTxnConflict { + // If the error is ErrEtcdTxnConflict, it means there is a temporary failure. + // Return 503 to let the client retry. + // Ref: https://datatracker.ietf.org/doc/html/rfc7231#section-6.6.4 + c.String(http.StatusServiceUnavailable, + fmt.Sprintf("It's a temporary failure with error %s, please retry.", err.Error())) } else { c.String(http.StatusForbidden, err.Error()) } diff --git a/pkg/mcs/tso/server/grpc_service.go b/pkg/mcs/tso/server/grpc_service.go index 40a308c72f8..9006faf49da 100644 --- a/pkg/mcs/tso/server/grpc_service.go +++ b/pkg/mcs/tso/server/grpc_service.go @@ -28,8 +28,6 @@ import ( bs "github.com/tikv/pd/pkg/basicserver" "github.com/tikv/pd/pkg/mcs/registry" "github.com/tikv/pd/pkg/utils/apiutil" - "github.com/tikv/pd/pkg/utils/grpcutil" - "github.com/tikv/pd/pkg/utils/tsoutil" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -88,21 +86,9 @@ func (s *Service) RegisterRESTHandler(userDefineHandlers map[string]http.Handler // Tso returns a stream of timestamps func (s *Service) Tso(stream tsopb.TSO_TsoServer) error { - var ( - doneCh chan struct{} - errCh chan error - ) ctx, cancel := context.WithCancel(stream.Context()) defer cancel() for { - // Prevent unnecessary performance overhead of the channel. - if errCh != nil { - select { - case err := <-errCh: - return errors.WithStack(err) - default: - } - } request, err := stream.Recv() if err == io.EOF { return nil @@ -111,26 +97,6 @@ func (s *Service) Tso(stream tsopb.TSO_TsoServer) error { return errors.WithStack(err) } - streamCtx := stream.Context() - forwardedHost := grpcutil.GetForwardedHost(streamCtx) - if !s.IsLocalRequest(forwardedHost) { - clientConn, err := s.GetDelegateClient(s.Context(), s.GetTLSConfig(), forwardedHost) - if err != nil { - return errors.WithStack(err) - } - - if errCh == nil { - doneCh = make(chan struct{}) - defer close(doneCh) - errCh = make(chan error) - } - - tsoProtoFactory := s.tsoProtoFactory - tsoRequest := tsoutil.NewTSOProtoRequest(forwardedHost, clientConn, request, stream) - s.tsoDispatcher.DispatchRequest(ctx, tsoRequest, tsoProtoFactory, doneCh, errCh) - continue - } - start := time.Now() // TSO uses leader lease to determine validity. No need to check leader here. if s.IsClosed() { diff --git a/pkg/mcs/tso/server/install/install.go b/pkg/mcs/tso/server/install/install.go index 27db0c51d75..a821505474f 100644 --- a/pkg/mcs/tso/server/install/install.go +++ b/pkg/mcs/tso/server/install/install.go @@ -28,5 +28,5 @@ func init() { // Install registers the API group and grpc service. func Install(register *registry.ServiceRegistry) { - register.RegisterService("Scheduling", server.NewService[*server.Server]) + register.RegisterService("TSO", server.NewService[*server.Server]) } diff --git a/pkg/mcs/tso/server/server.go b/pkg/mcs/tso/server/server.go index 16ef3216c62..1a2430477d8 100644 --- a/pkg/mcs/tso/server/server.go +++ b/pkg/mcs/tso/server/server.go @@ -78,9 +78,6 @@ type Server struct { service *Service keyspaceGroupManager *tso.KeyspaceGroupManager - // tsoDispatcher is used to dispatch the TSO requests to - // the corresponding forwarding TSO channels. - tsoDispatcher *tsoutil.TSODispatcher // tsoProtoFactory is the abstract factory for creating tso // related data structures defined in the tso grpc protocol tsoProtoFactory *tsoutil.TSOProtoFactory diff --git a/pkg/member/member.go b/pkg/member/member.go index 80332a65f94..b411d0c957b 100644 --- a/pkg/member/member.go +++ b/pkg/member/member.go @@ -27,6 +27,7 @@ import ( "time" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/election" @@ -42,6 +43,8 @@ const ( // The timeout to wait transfer etcd leader to complete. moveLeaderTimeout = 5 * time.Second dcLocationConfigEtcdPrefix = "dc-location" + // If the campaign times is more than this value in `campaignTimesRecordTimeout`, the PD will resign and campaign again. + campaignLeaderFrequencyTimes = 3 ) // EmbeddedEtcdMember is used for the election related logic. It implements Member interface. @@ -177,7 +180,17 @@ func (m *EmbeddedEtcdMember) GetLastLeaderUpdatedTime() time.Time { // CampaignLeader is used to campaign a PD member's leadership // and make it become a PD leader. -func (m *EmbeddedEtcdMember) CampaignLeader(leaseTimeout int64) error { +// leader should be changed when campaign leader frequently. +func (m *EmbeddedEtcdMember) CampaignLeader(ctx context.Context, leaseTimeout int64) error { + failpoint.Inject("skipCampaignLeaderCheck", func() { + failpoint.Return(m.leadership.Campaign(leaseTimeout, m.MemberValue())) + }) + if m.leadership.GetCampaignTimesNum() >= campaignLeaderFrequencyTimes { + log.Warn("campaign times is too frequent, resign and campaign again", + zap.String("leader-name", m.Name()), zap.String("leader-key", m.GetLeaderPath())) + m.leadership.ResetCampaignTimes() + return m.ResignEtcdLeader(ctx, m.Name(), "") + } return m.leadership.Campaign(leaseTimeout, m.MemberValue()) } diff --git a/pkg/member/participant.go b/pkg/member/participant.go index b3034a86807..189da7b96c9 100644 --- a/pkg/member/participant.go +++ b/pkg/member/participant.go @@ -121,22 +121,16 @@ func (m *Participant) Client() *clientv3.Client { // IsLeader returns whether the participant is the leader or not by checking its leadership's // lease and leader info. func (m *Participant) IsLeader() bool { - if m.GetLeader() == nil { - return false - } return m.leadership.Check() && m.GetLeader().GetId() == m.member.GetId() && m.campaignCheck() } // IsLeaderElected returns true if the leader exists; otherwise false func (m *Participant) IsLeaderElected() bool { - return m.GetLeader() != nil + return m.GetLeader().GetId() != 0 } // GetLeaderListenUrls returns current leader's listen urls func (m *Participant) GetLeaderListenUrls() []string { - if m.GetLeader() == nil { - return nil - } return m.GetLeader().GetListenUrls() } @@ -149,13 +143,9 @@ func (m *Participant) GetLeaderID() uint64 { func (m *Participant) GetLeader() participant { leader := m.leader.Load() if leader == nil { - return nil - } - member := leader.(participant) - if member.GetId() == 0 { - return nil + return NewParticipantByService(m.serviceName) } - return member + return leader.(participant) } // setLeader sets the member's leader. @@ -196,7 +186,7 @@ func (m *Participant) GetLeadership() *election.Leadership { } // CampaignLeader is used to campaign the leadership and make it become a leader. -func (m *Participant) CampaignLeader(leaseTimeout int64) error { +func (m *Participant) CampaignLeader(_ context.Context, leaseTimeout int64) error { if !m.campaignCheck() { return errs.ErrCheckCampaign } diff --git a/pkg/mock/mockcluster/config.go b/pkg/mock/mockcluster/config.go index 6febba026e8..a2e11b43deb 100644 --- a/pkg/mock/mockcluster/config.go +++ b/pkg/mock/mockcluster/config.go @@ -154,8 +154,8 @@ func (mc *Cluster) SetMaxReplicasWithLabel(enablePlacementRules bool, num int, l } if enablePlacementRules { rule := &placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Index: 1, StartKey: []byte(""), EndKey: []byte(""), diff --git a/pkg/replication/replication_mode.go b/pkg/replication/replication_mode.go index 30b34e4596a..9776a36a8f3 100644 --- a/pkg/replication/replication_mode.go +++ b/pkg/replication/replication_mode.go @@ -212,6 +212,7 @@ const ( type drAutoSyncStatus struct { State string `json:"state,omitempty"` StateID uint64 `json:"state_id,omitempty"` + AsyncStartTime *time.Time `json:"async_start,omitempty"` RecoverStartTime *time.Time `json:"recover_start,omitempty"` TotalRegions int `json:"total_regions,omitempty"` SyncedRegions int `json:"synced_regions,omitempty"` @@ -262,7 +263,8 @@ func (m *ModeManager) drSwitchToAsyncWithLock(availableStores []uint64) error { log.Warn("failed to switch to async state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err)) return err } - dr := drAutoSyncStatus{State: drStateAsync, StateID: id, AvailableStores: availableStores} + now := time.Now() + dr := drAutoSyncStatus{State: drStateAsync, StateID: id, AvailableStores: availableStores, AsyncStartTime: &now} if err := m.storage.SaveReplicationStatus(modeDRAutoSync, dr); err != nil { log.Warn("failed to switch to async state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err)) return err @@ -272,6 +274,15 @@ func (m *ModeManager) drSwitchToAsyncWithLock(availableStores []uint64) error { return nil } +func (m *ModeManager) drDurationSinceAsyncStart() time.Duration { + m.RLock() + defer m.RUnlock() + if m.drAutoSync.AsyncStartTime == nil { + return 0 + } + return time.Since(*m.drAutoSync.AsyncStartTime) +} + func (m *ModeManager) drSwitchToSyncRecover() error { m.Lock() defer m.Unlock() @@ -431,15 +442,6 @@ func (m *ModeManager) tickUpdateState() { canSync := primaryHasVoter && drHasVoter hasMajority := totalUpVoter*2 > totalVoter - log.Debug("replication store status", - zap.Uint64s("up-primary", storeIDs[primaryUp]), - zap.Uint64s("up-dr", storeIDs[drUp]), - zap.Uint64s("down-primary", storeIDs[primaryDown]), - zap.Uint64s("down-dr", storeIDs[drDown]), - zap.Bool("can-sync", canSync), - zap.Bool("has-majority", hasMajority), - ) - /* +----+ all region sync +------------+ @@ -458,7 +460,8 @@ func (m *ModeManager) tickUpdateState() { */ - switch m.drGetState() { + state := m.drGetState() + switch state { case drStateSync: // If hasMajority is false, the cluster is always unavailable. Switch to async won't help. if !canSync && hasMajority { @@ -477,7 +480,7 @@ func (m *ModeManager) tickUpdateState() { m.drSwitchToAsync(storeIDs[primaryUp]) } case drStateAsync: - if canSync { + if canSync && m.drDurationSinceAsyncStart() > m.config.DRAutoSync.WaitRecoverTimeout.Duration { m.drSwitchToSyncRecover() break } @@ -500,6 +503,19 @@ func (m *ModeManager) tickUpdateState() { } } } + + logFunc := log.Debug + if state != m.drGetState() { + logFunc = log.Info + } + logFunc("replication store status", + zap.Uint64s("up-primary", storeIDs[primaryUp]), + zap.Uint64s("up-dr", storeIDs[drUp]), + zap.Uint64s("down-primary", storeIDs[primaryDown]), + zap.Uint64s("down-dr", storeIDs[drDown]), + zap.Bool("can-sync", canSync), + zap.Bool("has-majority", hasMajority), + ) } func (m *ModeManager) tickReplicateStatus() { diff --git a/pkg/replication/replication_mode_test.go b/pkg/replication/replication_mode_test.go index e01fb7a0b9a..5cf9f1a1450 100644 --- a/pkg/replication/replication_mode_test.go +++ b/pkg/replication/replication_mode_test.go @@ -16,6 +16,7 @@ package replication import ( "context" + "encoding/json" "errors" "fmt" "testing" @@ -159,6 +160,20 @@ func newMockReplicator(ids []uint64) *mockFileReplicator { } } +func assertLastData(t *testing.T, data string, state string, stateID uint64, availableStores []uint64) { + type status struct { + State string `json:"state"` + StateID uint64 `json:"state_id"` + AvailableStores []uint64 `json:"available_stores"` + } + var s status + err := json.Unmarshal([]byte(data), &s) + require.NoError(t, err) + require.Equal(t, state, s.State) + require.Equal(t, stateID, s.StateID) + require.Equal(t, availableStores, s.AvailableStores) +} + func TestStateSwitch(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) @@ -190,7 +205,7 @@ func TestStateSwitch(t *testing.T) { stateID := rep.drAutoSync.StateID re.NotEqual(uint64(0), stateID) rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"sync","state_id":%d}`, stateID), replicator.lastData[1]) + assertLastData(t, replicator.lastData[1], "sync", stateID, nil) assertStateIDUpdate := func() { re.NotEqual(stateID, rep.drAutoSync.StateID) stateID = rep.drAutoSync.StateID @@ -207,7 +222,7 @@ func TestStateSwitch(t *testing.T) { re.Equal(drStateAsyncWait, rep.drGetState()) assertStateIDUpdate() rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d,"available_stores":[1,2,3,4]}`, stateID), replicator.lastData[1]) + assertLastData(t, replicator.lastData[1], "async_wait", stateID, []uint64{1, 2, 3, 4}) re.False(rep.GetReplicationStatus().GetDrAutoSync().GetPauseRegionSplit()) conf.DRAutoSync.PauseRegionSplit = true @@ -218,7 +233,7 @@ func TestStateSwitch(t *testing.T) { rep.tickUpdateState() assertStateIDUpdate() rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"async","state_id":%d,"available_stores":[1,2,3,4]}`, stateID), replicator.lastData[1]) + assertLastData(t, replicator.lastData[1], "async", stateID, []uint64{1, 2, 3, 4}) // add new store in dr zone. cluster.AddLabelsStore(5, 1, map[string]string{"zone": "zone2"}) @@ -268,18 +283,19 @@ func TestStateSwitch(t *testing.T) { rep.tickUpdateState() re.Equal(drStateAsyncWait, rep.drGetState()) assertStateIDUpdate() + rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d,"available_stores":[1,2,3,4]}`, stateID), replicator.lastData[1]) + assertLastData(t, replicator.lastData[1], "async_wait", stateID, []uint64{1, 2, 3, 4}) setStoreState(cluster, "down", "up", "up", "up", "down", "down") rep.tickUpdateState() assertStateIDUpdate() rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d,"available_stores":[2,3,4]}`, stateID), replicator.lastData[1]) + assertLastData(t, replicator.lastData[1], "async_wait", stateID, []uint64{2, 3, 4}) setStoreState(cluster, "up", "down", "up", "up", "down", "down") rep.tickUpdateState() assertStateIDUpdate() rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d,"available_stores":[1,3,4]}`, stateID), replicator.lastData[1]) + assertLastData(t, replicator.lastData[1], "async_wait", stateID, []uint64{1, 3, 4}) // async_wait -> async rep.tickUpdateState() @@ -291,26 +307,32 @@ func TestStateSwitch(t *testing.T) { rep.tickUpdateState() assertStateIDUpdate() rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"async","state_id":%d,"available_stores":[1,3,4]}`, stateID), replicator.lastData[1]) + assertLastData(t, replicator.lastData[1], "async", stateID, []uint64{1, 3, 4}) // async -> async setStoreState(cluster, "up", "up", "up", "up", "down", "down") rep.tickUpdateState() // store 2 won't be available before it syncs status. rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"async","state_id":%d,"available_stores":[1,3,4]}`, stateID), replicator.lastData[1]) + assertLastData(t, replicator.lastData[1], "async", stateID, []uint64{1, 3, 4}) syncStoreStatus(1, 2, 3, 4) rep.tickUpdateState() assertStateIDUpdate() rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"async","state_id":%d,"available_stores":[1,2,3,4]}`, stateID), replicator.lastData[1]) + assertLastData(t, replicator.lastData[1], "async", stateID, []uint64{1, 2, 3, 4}) // async -> sync_recover setStoreState(cluster, "up", "up", "up", "up", "up", "up") rep.tickUpdateState() re.Equal(drStateSyncRecover, rep.drGetState()) assertStateIDUpdate() + rep.drSwitchToAsync([]uint64{1, 2, 3, 4, 5}) + rep.config.DRAutoSync.WaitRecoverTimeout = typeutil.NewDuration(time.Hour) + rep.tickUpdateState() + re.Equal(drStateAsync, rep.drGetState()) // wait recover timeout + + rep.config.DRAutoSync.WaitRecoverTimeout = typeutil.NewDuration(0) setStoreState(cluster, "down", "up", "up", "up", "up", "up") rep.tickUpdateState() re.Equal(drStateSyncRecover, rep.drGetState()) @@ -387,27 +409,27 @@ func TestReplicateState(t *testing.T) { stateID := rep.drAutoSync.StateID // replicate after initialized rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"sync","state_id":%d}`, stateID), replicator.lastData[1]) + assertLastData(t, replicator.lastData[1], "sync", stateID, nil) // repliate state to new member replicator.memberIDs = append(replicator.memberIDs, 2, 3) rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"sync","state_id":%d}`, stateID), replicator.lastData[2]) - re.Equal(fmt.Sprintf(`{"state":"sync","state_id":%d}`, stateID), replicator.lastData[3]) + assertLastData(t, replicator.lastData[2], "sync", stateID, nil) + assertLastData(t, replicator.lastData[3], "sync", stateID, nil) // inject error replicator.errors[2] = errors.New("failed to persist") rep.tickUpdateState() // switch async_wait since there is only one zone newStateID := rep.drAutoSync.StateID rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d,"available_stores":[1,2]}`, newStateID), replicator.lastData[1]) - re.Equal(fmt.Sprintf(`{"state":"sync","state_id":%d}`, stateID), replicator.lastData[2]) - re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d,"available_stores":[1,2]}`, newStateID), replicator.lastData[3]) + assertLastData(t, replicator.lastData[1], "async_wait", newStateID, []uint64{1, 2}) + assertLastData(t, replicator.lastData[2], "sync", stateID, nil) + assertLastData(t, replicator.lastData[3], "async_wait", newStateID, []uint64{1, 2}) // clear error, replicate to node 2 next time delete(replicator.errors, 2) rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d,"available_stores":[1,2]}`, newStateID), replicator.lastData[2]) + assertLastData(t, replicator.lastData[2], "async_wait", newStateID, []uint64{1, 2}) } func TestAsynctimeout(t *testing.T) { @@ -637,7 +659,7 @@ func TestComplexPlacementRules(t *testing.T) { rep.tickUpdateState() re.Equal(drStateAsyncWait, rep.drGetState()) rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d,"available_stores":[1,2,3,4,5,6]}`, rep.drAutoSync.StateID), replicator.lastData[1]) + assertLastData(t, replicator.lastData[1], "async_wait", rep.drAutoSync.StateID, []uint64{1, 2, 3, 4, 5, 6}) // reset to sync setStoreState(cluster, "up", "up", "up", "up", "up", "up", "up", "up", "up", "up") @@ -698,7 +720,7 @@ func TestComplexPlacementRules2(t *testing.T) { rep.tickUpdateState() re.Equal(drStateAsyncWait, rep.drGetState()) rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d,"available_stores":[1,2,3,4]}`, rep.drAutoSync.StateID), replicator.lastData[1]) + assertLastData(t, replicator.lastData[1], "async_wait", rep.drAutoSync.StateID, []uint64{1, 2, 3, 4}) } func TestComplexPlacementRules3(t *testing.T) { @@ -737,7 +759,7 @@ func TestComplexPlacementRules3(t *testing.T) { rep.tickUpdateState() re.Equal(drStateAsyncWait, rep.drGetState()) rep.tickReplicateStatus() - re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d,"available_stores":[1,2,3,4]}`, rep.drAutoSync.StateID), replicator.lastData[1]) + assertLastData(t, replicator.lastData[1], "async_wait", rep.drAutoSync.StateID, []uint64{1, 2, 3, 4}) } func genRegions(cluster *mockcluster.Cluster, stateID uint64, n int) []*core.RegionInfo { diff --git a/pkg/schedule/checker/checker_controller.go b/pkg/schedule/checker/checker_controller.go index 68b794f417a..355226cd2d8 100644 --- a/pkg/schedule/checker/checker_controller.go +++ b/pkg/schedule/checker/checker_controller.go @@ -221,6 +221,11 @@ func (c *Controller) ClearSuspectKeyRanges() { c.suspectKeyRanges.Clear() } +// ClearSuspectRegions clears the suspect regions, only for unit test +func (c *Controller) ClearSuspectRegions() { + c.suspectRegions.Clear() +} + // IsPendingRegion returns true if the given region is in the pending list. func (c *Controller) IsPendingRegion(regionID uint64) bool { _, exist := c.ruleChecker.pendingList.Get(regionID) diff --git a/pkg/schedule/checker/merge_checker_test.go b/pkg/schedule/checker/merge_checker_test.go index 6478eb0b2c4..5e9311c76cd 100644 --- a/pkg/schedule/checker/merge_checker_test.go +++ b/pkg/schedule/checker/merge_checker_test.go @@ -188,7 +188,7 @@ func (suite *mergeCheckerTestSuite) TestBasic() { // merge cannot across rule key. suite.cluster.SetEnablePlacementRules(true) suite.cluster.RuleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 1, Override: true, @@ -202,7 +202,7 @@ func (suite *mergeCheckerTestSuite) TestBasic() { suite.NotNil(ops) suite.Equal(suite.regions[2].GetID(), ops[0].RegionID()) suite.Equal(suite.regions[1].GetID(), ops[1].RegionID()) - suite.cluster.RuleManager.DeleteRule("pd", "test") + suite.cluster.RuleManager.DeleteRule(placement.DefaultGroupID, "test") // check 'merge_option' label suite.cluster.GetRegionLabeler().SetLabelRule(&labeler.LabelRule{ diff --git a/pkg/schedule/checker/rule_checker.go b/pkg/schedule/checker/rule_checker.go index b0537bf9ce4..553ece09e65 100644 --- a/pkg/schedule/checker/rule_checker.go +++ b/pkg/schedule/checker/rule_checker.go @@ -78,6 +78,7 @@ var ( ruleCheckerSkipRemoveOrphanPeerCounter = checkerCounter.WithLabelValues(ruleChecker, "skip-remove-orphan-peer") ruleCheckerRemoveOrphanPeerCounter = checkerCounter.WithLabelValues(ruleChecker, "remove-orphan-peer") ruleCheckerReplaceOrphanPeerCounter = checkerCounter.WithLabelValues(ruleChecker, "replace-orphan-peer") + ruleCheckerReplaceOrphanPeerNoFitCounter = checkerCounter.WithLabelValues(ruleChecker, "replace-orphan-peer-no-fit") ) // RuleChecker fix/improve region by placement rules. @@ -131,6 +132,11 @@ func (c *RuleChecker) CheckWithFit(region *core.RegionInfo, fit *placement.Regio return } + // the placement rule is disabled + if fit == nil { + return + } + // If the fit is calculated by FitRegion, which means we get a new fit result, thus we should // invalid the cache if it exists c.ruleManager.InvalidCache(region.GetID()) @@ -447,38 +453,63 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg if len(fit.OrphanPeers) == 0 { return nil, nil } - var pinDownPeer *metapb.Peer - isUnhealthyPeer := func(id uint64) bool { - for _, downPeer := range region.GetDownPeers() { - if downPeer.Peer.GetId() == id { + + isPendingPeer := func(id uint64) bool { + for _, pendingPeer := range region.GetPendingPeers() { + if pendingPeer.GetId() == id { return true } } - for _, pendingPeer := range region.GetPendingPeers() { - if pendingPeer.GetId() == id { + return false + } + + isDownPeer := func(id uint64) bool { + for _, downPeer := range region.GetDownPeers() { + if downPeer.Peer.GetId() == id { return true } } return false } + + isUnhealthyPeer := func(id uint64) bool { + return isPendingPeer(id) || isDownPeer(id) + } + + isInDisconnectedStore := func(p *metapb.Peer) bool { + // avoid to meet down store when fix orphan peers, + // isInDisconnectedStore is usually more strictly than IsUnhealthy. + store := c.cluster.GetStore(p.GetStoreId()) + if store == nil { + return true + } + return store.IsDisconnected() + } + + checkDownPeer := func(peers []*metapb.Peer) (*metapb.Peer, bool) { + for _, p := range peers { + if isInDisconnectedStore(p) || isDownPeer(p.GetId()) { + return p, true + } + if isPendingPeer(p.GetId()) { + return nil, true + } + } + return nil, false + } + // remove orphan peers only when all rules are satisfied (count+role) and all peers selected // by RuleFits is not pending or down. + var pinDownPeer *metapb.Peer hasUnhealthyFit := false -loopFits: for _, rf := range fit.RuleFits { if !rf.IsSatisfied() { hasUnhealthyFit = true break } - for _, p := range rf.Peers { - if isUnhealthyPeer(p.GetId()) { - // make sure is down peer. - if region.GetDownPeer(p.GetId()) != nil { - pinDownPeer = p - } - hasUnhealthyFit = true - break loopFits - } + pinDownPeer, hasUnhealthyFit = checkDownPeer(rf.Peers) + if hasUnhealthyFit { + break } } @@ -491,16 +522,19 @@ loopFits: // try to use orphan peers to replace unhealthy down peers. for _, orphanPeer := range fit.OrphanPeers { if pinDownPeer != nil { + if pinDownPeer.GetId() == orphanPeer.GetId() { + continue + } // make sure the orphan peer is healthy. - if isUnhealthyPeer(orphanPeer.GetId()) { + if isUnhealthyPeer(orphanPeer.GetId()) || isInDisconnectedStore(orphanPeer) { continue } // no consider witness in this path. if pinDownPeer.GetIsWitness() || orphanPeer.GetIsWitness() { continue } - // down peer's store should be down. - if !c.isStoreDownTimeHitMaxDownTime(pinDownPeer.GetStoreId()) { + // pinDownPeer's store should be disconnected, because we use more strict judge before. + if !isInDisconnectedStore(pinDownPeer) { continue } // check if down peer can replace with orphan peer. @@ -514,10 +548,14 @@ loopFits: return operator.CreatePromoteLearnerOperatorAndRemovePeer("replace-down-peer-with-orphan-peer", c.cluster, region, orphanPeer, pinDownPeer) case orphanPeerRole == metapb.PeerRole_Voter && destRole == metapb.PeerRole_Learner: return operator.CreateDemoteLearnerOperatorAndRemovePeer("replace-down-peer-with-orphan-peer", c.cluster, region, orphanPeer, pinDownPeer) + case orphanPeerRole == destRole && isInDisconnectedStore(pinDownPeer) && !dstStore.IsDisconnected(): + return operator.CreateRemovePeerOperator("remove-replaced-orphan-peer", c.cluster, 0, region, pinDownPeer.GetStoreId()) default: // destRole should not same with orphanPeerRole. if role is same, it fit with orphanPeer should be better than now. // destRole never be leader, so we not consider it. } + } else { + ruleCheckerReplaceOrphanPeerNoFitCounter.Inc() } } } @@ -526,14 +564,25 @@ loopFits: // Ref https://github.com/tikv/pd/issues/4045 if len(fit.OrphanPeers) >= 2 { hasHealthPeer := false + var disconnectedPeer *metapb.Peer + for _, orphanPeer := range fit.OrphanPeers { + if isInDisconnectedStore(orphanPeer) { + disconnectedPeer = orphanPeer + break + } + } for _, orphanPeer := range fit.OrphanPeers { if isUnhealthyPeer(orphanPeer.GetId()) { ruleCheckerRemoveOrphanPeerCounter.Inc() - return operator.CreateRemovePeerOperator("remove-orphan-peer", c.cluster, 0, region, orphanPeer.StoreId) + return operator.CreateRemovePeerOperator("remove-unhealthy-orphan-peer", c.cluster, 0, region, orphanPeer.StoreId) } if hasHealthPeer { // there already exists a healthy orphan peer, so we can remove other orphan Peers. ruleCheckerRemoveOrphanPeerCounter.Inc() + // if there exists a disconnected orphan peer, we will pick it to remove firstly. + if disconnectedPeer != nil { + return operator.CreateRemovePeerOperator("remove-orphan-peer", c.cluster, 0, region, disconnectedPeer.StoreId) + } return operator.CreateRemovePeerOperator("remove-orphan-peer", c.cluster, 0, region, orphanPeer.StoreId) } hasHealthPeer = true diff --git a/pkg/schedule/checker/rule_checker_test.go b/pkg/schedule/checker/rule_checker_test.go index 8ee3b1eccfa..e77830fac49 100644 --- a/pkg/schedule/checker/rule_checker_test.go +++ b/pkg/schedule/checker/rule_checker_test.go @@ -17,6 +17,7 @@ package checker import ( "context" "fmt" + "strconv" "strings" "testing" @@ -87,7 +88,7 @@ func (suite *ruleCheckerTestSuite) TestAddRulePeerWithIsolationLevel() { suite.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z1", "rack": "r3", "host": "h1"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -100,7 +101,7 @@ func (suite *ruleCheckerTestSuite) TestAddRulePeerWithIsolationLevel() { suite.Nil(op) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 3) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -124,9 +125,9 @@ func (suite *ruleCheckerTestSuite) TestReplaceDownPeerWithIsolationLevel() { suite.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z3", "host": "h5"}) suite.cluster.AddLabelsStore(6, 1, map[string]string{"zone": "z3", "host": "h6"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 3, 5) - suite.ruleManager.DeleteRule("pd", "default") + suite.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -225,7 +226,6 @@ func (suite *ruleCheckerTestSuite) TestFixToManyOrphanPeers() { suite.NotNil(op) suite.Equal("remove-orphan-peer", op.Desc()) suite.Equal(uint64(5), op.Step(0).(operator.RemovePeer).FromStore) - // Case2: // store 4, 5, 6 are orphan peers, and peer on store 3 is down peer. and peer on store 4, 5 are pending. region = suite.cluster.GetRegion(1) @@ -235,6 +235,91 @@ func (suite *ruleCheckerTestSuite) TestFixToManyOrphanPeers() { suite.cluster.PutRegion(region) op = suite.rc.Check(suite.cluster.GetRegion(1)) suite.NotNil(op) + suite.Equal("remove-unhealthy-orphan-peer", op.Desc()) + suite.Equal(uint64(4), op.Step(0).(operator.RemovePeer).FromStore) + // Case3: + // store 4, 5, 6 are orphan peers, and peer on one of stores is disconnect peer + // we should remove disconnect peer first. + for i := uint64(4); i <= 6; i++ { + region = suite.cluster.GetRegion(1) + suite.cluster.SetStoreDisconnect(i) + region = region.Clone( + core.WithDownPeers([]*pdpb.PeerStats{{Peer: region.GetStorePeer(3), DownSeconds: 60000}}), + core.WithPendingPeers([]*metapb.Peer{region.GetStorePeer(3)})) + suite.cluster.PutRegion(region) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Equal("remove-orphan-peer", op.Desc()) + suite.Equal(i, op.Step(0).(operator.RemovePeer).FromStore) + suite.cluster.SetStoreUp(i) + } + // Case4: + // store 4, 5, 6 are orphan peers, and peer on two of stores is disconnect peer + // we should remove disconnect peer first. + for i := uint64(4); i <= 6; i++ { + region = suite.cluster.GetRegion(1) + suite.cluster.SetStoreDisconnect(4) + suite.cluster.SetStoreDisconnect(5) + suite.cluster.SetStoreDisconnect(6) + suite.cluster.SetStoreUp(i) + region = region.Clone( + core.WithDownPeers([]*pdpb.PeerStats{{Peer: region.GetStorePeer(3), DownSeconds: 60000}}), + core.WithPendingPeers([]*metapb.Peer{region.GetStorePeer(3)})) + suite.cluster.PutRegion(region) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Equal("remove-orphan-peer", op.Desc()) + removedPeerStoreID := op.Step(0).(operator.RemovePeer).FromStore + suite.NotEqual(i, removedPeerStoreID) + region = suite.cluster.GetRegion(1) + newRegion := region.Clone(core.WithRemoveStorePeer(removedPeerStoreID)) + suite.cluster.PutRegion(newRegion) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Equal("remove-orphan-peer", op.Desc()) + removedPeerStoreID = op.Step(0).(operator.RemovePeer).FromStore + suite.NotEqual(i, removedPeerStoreID) + suite.cluster.PutRegion(region) + } +} + +func (suite *ruleCheckerTestSuite) TestFixToManyOrphanPeers2() { + suite.cluster.AddLeaderStore(1, 1) + suite.cluster.AddLeaderStore(2, 1) + suite.cluster.AddLeaderStore(3, 1) + suite.cluster.AddLeaderStore(4, 1) + suite.cluster.AddLeaderStore(5, 1) + suite.cluster.AddRegionWithLearner(1, 1, []uint64{2, 3}, []uint64{4, 5}) + + // Case1: + // store 4, 5 are orphan peers, and peer on one of stores is disconnect peer + // we should remove disconnect peer first. + for i := uint64(4); i <= 5; i++ { + region := suite.cluster.GetRegion(1) + suite.cluster.SetStoreDisconnect(i) + region = region.Clone( + core.WithDownPeers([]*pdpb.PeerStats{{Peer: region.GetStorePeer(3), DownSeconds: 60000}}), + core.WithPendingPeers([]*metapb.Peer{region.GetStorePeer(3)})) + suite.cluster.PutRegion(region) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Equal("remove-orphan-peer", op.Desc()) + suite.Equal(i, op.Step(0).(operator.RemovePeer).FromStore) + suite.cluster.SetStoreUp(i) + } + + // Case2: + // store 4, 5 are orphan peers, and they are disconnect peers + // we should remove the peer on disconnect stores at least. + region := suite.cluster.GetRegion(1) + suite.cluster.SetStoreDisconnect(4) + suite.cluster.SetStoreDisconnect(5) + region = region.Clone( + core.WithDownPeers([]*pdpb.PeerStats{{Peer: region.GetStorePeer(3), DownSeconds: 60000}}), + core.WithPendingPeers([]*metapb.Peer{region.GetStorePeer(3)})) + suite.cluster.PutRegion(region) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) suite.Equal("remove-orphan-peer", op.Desc()) suite.Equal(uint64(4), op.Step(0).(operator.RemovePeer).FromStore) } @@ -246,7 +331,7 @@ func (suite *ruleCheckerTestSuite) TestFixOrphanPeers2() { suite.cluster.AddLabelsStore(3, 1, map[string]string{"foo": "baz"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 3) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Override: true, @@ -282,7 +367,7 @@ func (suite *ruleCheckerTestSuite) TestFixRoleLeader() { suite.cluster.AddLabelsStore(3, 1, map[string]string{"role": "voter"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 3) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Override: true, @@ -293,7 +378,7 @@ func (suite *ruleCheckerTestSuite) TestFixRoleLeader() { }, }) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r2", Index: 101, Role: placement.Follower, @@ -313,7 +398,7 @@ func (suite *ruleCheckerTestSuite) TestFixRoleLeaderIssue3130() { suite.cluster.AddLabelsStore(2, 1, map[string]string{"role": "leader"}) suite.cluster.AddLeaderRegion(1, 1, 2) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Override: true, @@ -386,7 +471,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness() { suite.cluster.AddLeaderRegion(1, 1) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Override: true, @@ -412,7 +497,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness2() { suite.cluster.AddLeaderRegion(1, 1, 2, 3, 4) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Override: false, @@ -459,8 +544,8 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness4() { err := suite.ruleManager.SetRules([]*placement.Rule{ { - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Index: 100, Override: true, Role: placement.Voter, @@ -468,7 +553,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness4() { IsWitness: false, }, { - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Override: false, @@ -495,7 +580,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness5() { suite.cluster.AddLeaderRegion(1, 1, 2, 3) err := suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Override: true, @@ -518,15 +603,15 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness6() { err := suite.ruleManager.SetRules([]*placement.Rule{ { - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Index: 100, Role: placement.Voter, IsWitness: false, Count: 2, }, { - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Role: placement.Voter, @@ -556,15 +641,15 @@ func (suite *ruleCheckerTestSuite) TestDisableWitness() { err := suite.ruleManager.SetRules([]*placement.Rule{ { - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Index: 100, Role: placement.Voter, IsWitness: false, Count: 2, }, { - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Role: placement.Voter, @@ -595,7 +680,7 @@ func (suite *ruleCheckerTestSuite) TestBetterReplacement() { suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host3"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 3) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -619,7 +704,7 @@ func (suite *ruleCheckerTestSuite) TestBetterReplacement2() { suite.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z2", "host": "host1"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 3) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -642,7 +727,7 @@ func (suite *ruleCheckerTestSuite) TestNoBetterReplacement() { suite.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host2"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 3) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -702,7 +787,7 @@ func (suite *ruleCheckerTestSuite) TestPriorityFixOrphanPeer() { suite.cluster.PutRegion(testRegion) op = suite.rc.Check(suite.cluster.GetRegion(1)) suite.NotNil(op) - suite.Equal("remove-orphan-peer", op.Desc()) + suite.Equal("remove-unhealthy-orphan-peer", op.Desc()) suite.IsType(remove, op.Step(0)) // Ref #3521 suite.cluster.SetStoreOffline(2) @@ -723,6 +808,222 @@ func (suite *ruleCheckerTestSuite) TestPriorityFixOrphanPeer() { suite.Equal("remove-orphan-peer", op.Desc()) } +// Ref https://github.com/tikv/pd/issues/7249 https://github.com/tikv/tikv/issues/15799 +func (suite *ruleCheckerTestSuite) TestFixOrphanPeerWithDisconnectedStoreAndRuleChanged() { + // disconnect any two stores and change rule to 3 replicas + stores := []uint64{1, 2, 3, 4, 5} + testCases := [][]uint64{} + for i := 0; i < len(stores); i++ { + for j := i + 1; j < len(stores); j++ { + testCases = append(testCases, []uint64{stores[i], stores[j]}) + } + } + for _, leader := range stores { + var followers []uint64 + for i := 0; i < len(stores); i++ { + if stores[i] != leader { + followers = append(followers, stores[i]) + } + } + + for _, testCase := range testCases { + // init cluster with 5 replicas + suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host3"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) + suite.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5"}) + suite.cluster.AddLeaderRegionWithRange(1, "", "", leader, followers...) + rule := &placement.Rule{ + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, + Role: placement.Voter, + Count: 5, + StartKey: []byte{}, + EndKey: []byte{}, + } + err := suite.ruleManager.SetRule(rule) + suite.NoError(err) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.Nil(op) + + // set two stores to disconnected + suite.cluster.SetStoreDisconnect(testCase[0]) + suite.cluster.SetStoreDisconnect(testCase[1]) + + // change rule to 3 replicas + rule = &placement.Rule{ + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, + Role: placement.Voter, + Count: 3, + StartKey: []byte{}, + EndKey: []byte{}, + Override: true, + } + suite.ruleManager.SetRule(rule) + + // remove peer from region 1 + for j := 1; j <= 2; j++ { + r1 := suite.cluster.GetRegion(1) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Contains(op.Desc(), "orphan") + var removedPeerStoreID uint64 + newLeaderStoreID := r1.GetLeader().GetStoreId() + for i := 0; i < op.Len(); i++ { + if s, ok := op.Step(i).(operator.RemovePeer); ok { + removedPeerStoreID = s.FromStore + } + if s, ok := op.Step(i).(operator.TransferLeader); ok { + newLeaderStoreID = s.ToStore + } + } + suite.NotZero(removedPeerStoreID) + r1 = r1.Clone( + core.WithLeader(r1.GetStorePeer(newLeaderStoreID)), + core.WithRemoveStorePeer(removedPeerStoreID)) + suite.cluster.PutRegion(r1) + r1 = suite.cluster.GetRegion(1) + suite.Len(r1.GetPeers(), 5-j) + } + + r1 := suite.cluster.GetRegion(1) + for _, p := range r1.GetPeers() { + suite.NotEqual(p.GetStoreId(), testCase[0]) + suite.NotEqual(p.GetStoreId(), testCase[1]) + } + suite.TearDownTest() + suite.SetupTest() + } + } +} + +// Ref https://github.com/tikv/pd/issues/7249 https://github.com/tikv/tikv/issues/15799 +func (suite *ruleCheckerTestSuite) TestFixOrphanPeerWithDisconnectedStoreAndRuleChangedWithLearner() { + // disconnect any three stores and change rule to 3 replicas + // and there is a learner in the disconnected store. + stores := []uint64{1, 2, 3, 4, 5, 6} + testCases := [][]uint64{} + for i := 0; i < len(stores); i++ { + for j := i + 1; j < len(stores); j++ { + for k := j + 1; k < len(stores); k++ { + testCases = append(testCases, []uint64{stores[i], stores[j], stores[k]}) + } + } + } + for _, leader := range stores { + var followers []uint64 + for i := 0; i < len(stores); i++ { + if stores[i] != leader { + followers = append(followers, stores[i]) + } + } + + for _, testCase := range testCases { + for _, learnerStore := range testCase { + if learnerStore == leader { + continue + } + voterFollowers := []uint64{} + for _, follower := range followers { + if follower != learnerStore { + voterFollowers = append(voterFollowers, follower) + } + } + // init cluster with 5 voters and 1 learner + suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host3"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) + suite.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5"}) + suite.cluster.AddLabelsStore(6, 1, map[string]string{"host": "host6"}) + suite.cluster.AddLeaderRegionWithRange(1, "", "", leader, voterFollowers...) + err := suite.ruleManager.SetRules([]*placement.Rule{ + { + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, + Index: 100, + Override: true, + Role: placement.Voter, + Count: 5, + IsWitness: false, + }, + { + GroupID: placement.DefaultGroupID, + ID: "r1", + Index: 100, + Override: false, + Role: placement.Learner, + Count: 1, + IsWitness: false, + LabelConstraints: []placement.LabelConstraint{ + {Key: "host", Op: "in", Values: []string{"host" + strconv.FormatUint(learnerStore, 10)}}, + }, + }, + }) + suite.NoError(err) + r1 := suite.cluster.GetRegion(1) + r1 = r1.Clone(core.WithAddPeer(&metapb.Peer{Id: 12, StoreId: learnerStore, Role: metapb.PeerRole_Learner})) + suite.cluster.PutRegion(r1) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.Nil(op) + + // set three stores to disconnected + suite.cluster.SetStoreDisconnect(testCase[0]) + suite.cluster.SetStoreDisconnect(testCase[1]) + suite.cluster.SetStoreDisconnect(testCase[2]) + + // change rule to 3 replicas + suite.ruleManager.DeleteRule(placement.DefaultGroupID, "r1") + suite.ruleManager.SetRule(&placement.Rule{ + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, + Role: placement.Voter, + Count: 3, + StartKey: []byte{}, + EndKey: []byte{}, + Override: true, + }) + + // remove peer from region 1 + for j := 1; j <= 3; j++ { + r1 := suite.cluster.GetRegion(1) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Contains(op.Desc(), "orphan") + var removedPeerStroeID uint64 + newLeaderStoreID := r1.GetLeader().GetStoreId() + for i := 0; i < op.Len(); i++ { + if s, ok := op.Step(i).(operator.RemovePeer); ok { + removedPeerStroeID = s.FromStore + } + if s, ok := op.Step(i).(operator.TransferLeader); ok { + newLeaderStoreID = s.ToStore + } + } + suite.NotZero(removedPeerStroeID) + r1 = r1.Clone( + core.WithLeader(r1.GetStorePeer(newLeaderStoreID)), + core.WithRemoveStorePeer(removedPeerStroeID)) + suite.cluster.PutRegion(r1) + r1 = suite.cluster.GetRegion(1) + suite.Len(r1.GetPeers(), 6-j) + } + + r1 = suite.cluster.GetRegion(1) + for _, p := range r1.GetPeers() { + suite.NotEqual(p.GetStoreId(), testCase[0]) + suite.NotEqual(p.GetStoreId(), testCase[1]) + suite.NotEqual(p.GetStoreId(), testCase[2]) + } + suite.TearDownTest() + suite.SetupTest() + } + } + } +} + func (suite *ruleCheckerTestSuite) TestPriorityFitHealthWithDifferentRole1() { suite.cluster.SetEnableUseJointConsensus(true) suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) @@ -751,6 +1052,7 @@ func (suite *ruleCheckerTestSuite) TestPriorityFitHealthWithDifferentRole1() { suite.Equal("replace-down-peer-with-orphan-peer", op.Desc()) // set peer3 only pending + suite.cluster.GetStore(3).GetMeta().LastHeartbeat = time.Now().UnixNano() r1 = r1.Clone(core.WithDownPeers(nil)) suite.cluster.PutRegion(r1) op = suite.rc.Check(suite.cluster.GetRegion(1)) @@ -804,13 +1106,13 @@ func (suite *ruleCheckerTestSuite) TestPriorityFitHealthPeersAndTiFlash() { suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4", "engine": "tiflash"}) suite.cluster.AddRegionWithLearner(1, 1, []uint64{2, 3}, []uint64{4}) rule := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Role: placement.Voter, Count: 3, } rule2 := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test2", Role: placement.Learner, Count: 1, @@ -824,7 +1126,7 @@ func (suite *ruleCheckerTestSuite) TestPriorityFitHealthPeersAndTiFlash() { } suite.ruleManager.SetRule(rule) suite.ruleManager.SetRule(rule2) - suite.ruleManager.DeleteRule("pd", "default") + suite.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) r1 := suite.cluster.GetRegion(1) // set peer3 to pending and down @@ -875,12 +1177,12 @@ func (suite *ruleCheckerTestSuite) TestIssue3293() { suite.cluster.DeleteStore(suite.cluster.GetStore(5)) err = suite.ruleManager.SetRule(&placement.Rule{ GroupID: "TiDB_DDL_51", - ID: "default", + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3, }) suite.NoError(err) - err = suite.ruleManager.DeleteRule("pd", "default") + err = suite.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) suite.NoError(err) op := suite.rc.Check(suite.cluster.GetRegion(1)) suite.NotNil(op) @@ -988,7 +1290,7 @@ func (suite *ruleCheckerTestSuite) TestFixDownPeer() { suite.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z3"}) suite.cluster.AddLeaderRegion(1, 1, 3, 4) rule := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -1044,13 +1346,13 @@ func (suite *ruleCheckerTestSuite) TestFixDownWitnessPeer() { r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(2)})) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 2, }) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Role: placement.Voter, Count: 1, @@ -1077,13 +1379,13 @@ func (suite *ruleCheckerTestSuite) TestFixDownPeerWithAvailableWitness() { r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(3)})) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 2, }) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Role: placement.Voter, Count: 1, @@ -1115,13 +1417,13 @@ func (suite *ruleCheckerTestSuite) TestFixDownPeerWithAvailableWitness2() { r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(3)})) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 2, }) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Role: placement.Voter, Count: 1, @@ -1149,13 +1451,13 @@ func (suite *ruleCheckerTestSuite) TestFixDownPeerWithAvailableWitness3() { r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(3)})) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 2, }) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Role: placement.Voter, Count: 1, @@ -1206,7 +1508,7 @@ func (suite *ruleCheckerTestSuite) TestFixOfflinePeer() { suite.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z3"}) suite.cluster.AddLeaderRegion(1, 1, 3, 4) rule := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -1241,13 +1543,13 @@ func (suite *ruleCheckerTestSuite) TestFixOfflinePeerWithAvaliableWitness() { r := suite.cluster.GetRegion(1) r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(2)})) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 2, }) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Role: placement.Voter, Count: 1, @@ -1271,7 +1573,7 @@ func (suite *ruleCheckerTestSuite) TestRuleCache() { suite.cluster.AddRegionStore(999, 1) suite.cluster.AddLeaderRegion(1, 1, 3, 4) rule := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -1290,7 +1592,7 @@ func (suite *ruleCheckerTestSuite) TestRuleCache() { stillCached bool }{ { - name: "default", + name: placement.DefaultRuleID, region: region, stillCached: true, }, @@ -1416,7 +1718,7 @@ func (suite *ruleCheckerTestSuite) TestDemoteVoter() { suite.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z4"}) region := suite.cluster.AddLeaderRegion(1, 1, 4) rule := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Role: placement.Voter, Count: 1, @@ -1429,7 +1731,7 @@ func (suite *ruleCheckerTestSuite) TestDemoteVoter() { }, } rule2 := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test2", Role: placement.Learner, Count: 1, @@ -1443,7 +1745,7 @@ func (suite *ruleCheckerTestSuite) TestDemoteVoter() { } suite.ruleManager.SetRule(rule) suite.ruleManager.SetRule(rule2) - suite.ruleManager.DeleteRule("pd", "default") + suite.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) op := suite.rc.Check(region) suite.NotNil(op) suite.Equal("fix-demote-voter", op.Desc()) @@ -1505,7 +1807,7 @@ func (suite *ruleCheckerTestSuite) TestLocationLabels() { suite.cluster.AddLabelsStore(6, 1, map[string]string{"zone": "z2", "rack": "r3", "host": "h2"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 5) rule1 := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test1", Role: placement.Leader, Count: 1, @@ -1519,7 +1821,7 @@ func (suite *ruleCheckerTestSuite) TestLocationLabels() { LocationLabels: []string{"rack"}, } rule2 := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test2", Role: placement.Voter, Count: 1, @@ -1533,7 +1835,7 @@ func (suite *ruleCheckerTestSuite) TestLocationLabels() { LocationLabels: []string{"rack"}, } rule3 := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test3", Role: placement.Voter, Count: 1, @@ -1549,7 +1851,7 @@ func (suite *ruleCheckerTestSuite) TestLocationLabels() { suite.ruleManager.SetRule(rule1) suite.ruleManager.SetRule(rule2) suite.ruleManager.SetRule(rule3) - suite.ruleManager.DeleteRule("pd", "default") + suite.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) op := suite.rc.Check(suite.cluster.GetRegion(1)) suite.NotNil(op) suite.Equal("move-to-better-location", op.Desc()) @@ -1580,7 +1882,7 @@ func (suite *ruleCheckerTestSuite) TestTiFlashLocationLabels() { }, } suite.ruleManager.SetRule(rule1) - rule := suite.ruleManager.GetRule("pd", "default") + rule := suite.ruleManager.GetRule(placement.DefaultGroupID, placement.DefaultRuleID) rule.LocationLabels = []string{"zone", "rack", "host"} suite.ruleManager.SetRule(rule) op := suite.rc.Check(suite.cluster.GetRegion(1)) diff --git a/pkg/schedule/config/config.go b/pkg/schedule/config/config.go index c8fa62b8aff..27b8917d1bf 100644 --- a/pkg/schedule/config/config.go +++ b/pkg/schedule/config/config.go @@ -79,6 +79,34 @@ var ( DefaultTiFlashStoreLimit = StoreLimit{AddPeer: 30, RemovePeer: 30} ) +// The following consts are used to identify the config item that needs to set TTL. +const ( + // TTLConfigPrefix is the prefix of the config item that needs to set TTL. + TTLConfigPrefix = "/config/ttl" + + MaxSnapshotCountKey = "schedule.max-snapshot-count" + MaxMergeRegionSizeKey = "schedule.max-merge-region-size" + MaxPendingPeerCountKey = "schedule.max-pending-peer-count" + MaxMergeRegionKeysKey = "schedule.max-merge-region-keys" + LeaderScheduleLimitKey = "schedule.leader-schedule-limit" + RegionScheduleLimitKey = "schedule.region-schedule-limit" + WitnessScheduleLimitKey = "schedule.witness-schedule-limit" + ReplicaRescheduleLimitKey = "schedule.replica-schedule-limit" + MergeScheduleLimitKey = "schedule.merge-schedule-limit" + HotRegionScheduleLimitKey = "schedule.hot-region-schedule-limit" + SchedulerMaxWaitingOperatorKey = "schedule.scheduler-max-waiting-operator" + EnableLocationReplacement = "schedule.enable-location-replacement" + DefaultAddPeer = "default-add-peer" + DefaultRemovePeer = "default-remove-peer" + + // EnableTiKVSplitRegion is the option to enable tikv split region. + // it's related to schedule, but it's not an explicit config + EnableTiKVSplitRegion = "schedule.enable-tikv-split-region" + + DefaultGCInterval = 5 * time.Second + DefaultTTL = 5 * time.Minute +) + // StoreLimit is the default limit of adding peer and removing peer when putting stores. type StoreLimit struct { mu syncutil.RWMutex diff --git a/pkg/schedule/coordinator.go b/pkg/schedule/coordinator.go index 8fb9ec8b286..e276589a4e9 100644 --- a/pkg/schedule/coordinator.go +++ b/pkg/schedule/coordinator.go @@ -88,8 +88,8 @@ type Coordinator struct { } // NewCoordinator creates a new Coordinator. -func NewCoordinator(ctx context.Context, cluster sche.ClusterInformer, hbStreams *hbstream.HeartbeatStreams) *Coordinator { - ctx, cancel := context.WithCancel(ctx) +func NewCoordinator(parentCtx context.Context, cluster sche.ClusterInformer, hbStreams *hbstream.HeartbeatStreams) *Coordinator { + ctx, cancel := context.WithCancel(parentCtx) opController := operator.NewController(ctx, cluster.GetBasicCluster(), cluster.GetSharedConfig(), hbStreams) schedulers := schedulers.NewController(ctx, cluster, cluster.GetStorage(), opController) checkers := checker.NewController(ctx, cluster, cluster.GetCheckerConfig(), cluster.GetRuleManager(), cluster.GetRegionLabeler(), opController) @@ -513,6 +513,7 @@ func (c *Coordinator) InitSchedulers(needRun bool) { if err := c.cluster.GetSchedulerConfig().Persist(c.cluster.GetStorage()); err != nil { log.Error("cannot persist schedule config", errs.ZapError(err)) } + log.Info("scheduler config is updated", zap.Reflect("scheduler-config", scheduleCfg.Schedulers)) c.markSchedulersInitialized() } @@ -714,7 +715,7 @@ func collectHotMetrics(cluster sche.ClusterInformer, stores []*core.StoreInfo, t } // ResetHotSpotMetrics resets hot spot metrics. -func (c *Coordinator) ResetHotSpotMetrics() { +func ResetHotSpotMetrics() { hotSpotStatusGauge.Reset() schedulers.HotPendingSum.Reset() } diff --git a/pkg/schedule/filter/filters_test.go b/pkg/schedule/filter/filters_test.go index fa085890694..f030dff81a4 100644 --- a/pkg/schedule/filter/filters_test.go +++ b/pkg/schedule/filter/filters_test.go @@ -159,7 +159,7 @@ func TestRuleFitFilterWithPlacementRule(t *testing.T) { testCluster := mockcluster.NewCluster(ctx, opt) testCluster.SetEnablePlacementRules(true) ruleManager := testCluster.RuleManager - ruleManager.DeleteRule("pd", "default") + ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) err := ruleManager.SetRules([]*placement.Rule{ { GroupID: "test", diff --git a/pkg/schedule/handler/handler.go b/pkg/schedule/handler/handler.go index fca43f3eeeb..346a7254284 100644 --- a/pkg/schedule/handler/handler.go +++ b/pkg/schedule/handler/handler.go @@ -16,12 +16,15 @@ package handler import ( "bytes" + "context" "encoding/hex" "net/http" + "strconv" "strings" "time" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" @@ -30,6 +33,7 @@ import ( "github.com/tikv/pd/pkg/schedule" sche "github.com/tikv/pd/pkg/schedule/core" "github.com/tikv/pd/pkg/schedule/filter" + "github.com/tikv/pd/pkg/schedule/labeler" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/schedule/scatter" @@ -41,6 +45,11 @@ import ( "go.uber.org/zap" ) +const ( + defaultRegionLimit = 16 + maxRegionLimit = 10240 +) + // Server is the interface for handler about schedule. // TODO: remove it after GetCluster is unified between PD server and Scheduling server. type Server interface { @@ -765,13 +774,22 @@ func (h *Handler) GetCheckerStatus(name string) (map[string]bool, error) { }, nil } -// GetSchedulerNames returns all names of schedulers. -func (h *Handler) GetSchedulerNames() ([]string, error) { +// GetSchedulersController returns controller of schedulers. +func (h *Handler) GetSchedulersController() (*schedulers.Controller, error) { co := h.GetCoordinator() if co == nil { return nil, errs.ErrNotBootstrapped.GenWithStackByArgs() } - return co.GetSchedulersController().GetSchedulerNames(), nil + return co.GetSchedulersController(), nil +} + +// GetSchedulerNames returns all names of schedulers. +func (h *Handler) GetSchedulerNames() ([]string, error) { + sc, err := h.GetSchedulersController() + if err != nil { + return nil, err + } + return sc.GetSchedulerNames(), nil } type schedulerPausedPeriod struct { @@ -782,15 +800,14 @@ type schedulerPausedPeriod struct { // GetSchedulerByStatus returns all names of schedulers by status. func (h *Handler) GetSchedulerByStatus(status string, needTS bool) (interface{}, error) { - co := h.GetCoordinator() - if co == nil { - return nil, errs.ErrNotBootstrapped.GenWithStackByArgs() + sc, err := h.GetSchedulersController() + if err != nil { + return nil, err } - sc := co.GetSchedulersController() schedulers := sc.GetSchedulerNames() switch status { case "paused": - var pausedSchedulers []string + pausedSchedulers := make([]string, 0, len(schedulers)) pausedPeriods := []schedulerPausedPeriod{} for _, scheduler := range schedulers { paused, err := sc.IsSchedulerPaused(scheduler) @@ -825,7 +842,7 @@ func (h *Handler) GetSchedulerByStatus(status string, needTS bool) (interface{}, } return pausedSchedulers, nil case "disabled": - var disabledSchedulers []string + disabledSchedulers := make([]string, 0, len(schedulers)) for _, scheduler := range schedulers { disabled, err := sc.IsSchedulerDisabled(scheduler) if err != nil { @@ -837,7 +854,20 @@ func (h *Handler) GetSchedulerByStatus(status string, needTS bool) (interface{}, } return disabledSchedulers, nil default: - return schedulers, nil + // The default scheduler could not be deleted in scheduling server, + // so schedulers could only be disabled. + // We should not return the disabled schedulers here. + enabledSchedulers := make([]string, 0, len(schedulers)) + for _, scheduler := range schedulers { + disabled, err := sc.IsSchedulerDisabled(scheduler) + if err != nil { + return nil, err + } + if !disabled { + enabledSchedulers = append(enabledSchedulers, scheduler) + } + } + return enabledSchedulers, nil } } @@ -861,11 +891,11 @@ func (h *Handler) GetDiagnosticResult(name string) (*schedulers.DiagnosticResult // t == 0 : resume scheduler. // t > 0 : scheduler delays t seconds. func (h *Handler) PauseOrResumeScheduler(name string, t int64) (err error) { - co := h.GetCoordinator() - if co == nil { - return errs.ErrNotBootstrapped.GenWithStackByArgs() + sc, err := h.GetSchedulersController() + if err != nil { + return err } - if err = co.GetSchedulersController().PauseOrResumeScheduler(name, t); err != nil { + if err = sc.PauseOrResumeScheduler(name, t); err != nil { if t == 0 { log.Error("can not resume scheduler", zap.String("scheduler-name", name), errs.ZapError(err)) } else { @@ -1040,3 +1070,262 @@ func (h *Handler) GetHotBuckets(regionIDs ...uint64) (HotBucketsResponse, error) } return ret, nil } + +// GetRegion returns the region labeler. +func (h *Handler) GetRegion(id uint64) (*core.RegionInfo, error) { + c := h.GetCluster() + if c == nil { + return nil, errs.ErrNotBootstrapped.GenWithStackByArgs() + } + return c.GetRegion(id), nil +} + +// GetRegionLabeler returns the region labeler. +func (h *Handler) GetRegionLabeler() (*labeler.RegionLabeler, error) { + c := h.GetCluster() + if c == nil || c.GetRegionLabeler() == nil { + return nil, errs.ErrNotBootstrapped + } + return c.GetRegionLabeler(), nil +} + +// AccelerateRegionsScheduleInRange accelerates regions scheduling in a given range. +func (h *Handler) AccelerateRegionsScheduleInRange(rawStartKey, rawEndKey string, limit int) error { + startKey, err := hex.DecodeString(rawStartKey) + if err != nil { + return err + } + endKey, err := hex.DecodeString(rawEndKey) + if err != nil { + return err + } + c := h.GetCluster() + if c == nil { + return errs.ErrNotBootstrapped.GenWithStackByArgs() + } + co := h.GetCoordinator() + if co == nil { + return errs.ErrNotBootstrapped.GenWithStackByArgs() + } + regions := c.ScanRegions(startKey, endKey, limit) + if len(regions) > 0 { + regionsIDList := make([]uint64, 0, len(regions)) + for _, region := range regions { + regionsIDList = append(regionsIDList, region.GetID()) + } + co.GetCheckerController().AddSuspectRegions(regionsIDList...) + } + return nil +} + +// AccelerateRegionsScheduleInRanges accelerates regions scheduling in given ranges. +func (h *Handler) AccelerateRegionsScheduleInRanges(startKeys [][]byte, endKeys [][]byte, limit int) error { + c := h.GetCluster() + if c == nil { + return errs.ErrNotBootstrapped.GenWithStackByArgs() + } + co := h.GetCoordinator() + if co == nil { + return errs.ErrNotBootstrapped.GenWithStackByArgs() + } + if len(startKeys) != len(endKeys) { + return errors.New("startKeys and endKeys should have the same length") + } + var regions []*core.RegionInfo + for i := range startKeys { + regions = append(regions, c.ScanRegions(startKeys[i], endKeys[i], limit)...) + } + if len(regions) > 0 { + regionsIDList := make([]uint64, 0, len(regions)) + for _, region := range regions { + regionsIDList = append(regionsIDList, region.GetID()) + } + co.GetCheckerController().AddSuspectRegions(regionsIDList...) + } + return nil +} + +// AdjustLimit adjusts the limit of regions to schedule. +func (h *Handler) AdjustLimit(limitStr string, defaultLimits ...int) (int, error) { + limit := defaultRegionLimit + if len(defaultLimits) > 0 { + limit = defaultLimits[0] + } + if limitStr != "" { + var err error + limit, err = strconv.Atoi(limitStr) + if err != nil { + return 0, err + } + } + if limit > maxRegionLimit { + limit = maxRegionLimit + } + return limit, nil +} + +// ScatterRegionsResponse is the response for scatter regions. +type ScatterRegionsResponse struct { + ProcessedPercentage int `json:"processed-percentage"` +} + +// BuildScatterRegionsResp builds ScatterRegionsResponse. +func (h *Handler) BuildScatterRegionsResp(opsCount int, failures map[uint64]error) *ScatterRegionsResponse { + // If there existed any operator failed to be added into Operator Controller, add its regions into unProcessedRegions + percentage := 100 + if len(failures) > 0 { + percentage = 100 - 100*len(failures)/(opsCount+len(failures)) + log.Debug("scatter regions", zap.Errors("failures", func() []error { + r := make([]error, 0, len(failures)) + for _, err := range failures { + r = append(r, err) + } + return r + }())) + } + return &ScatterRegionsResponse{ + ProcessedPercentage: percentage, + } +} + +// ScatterRegionsByRange scatters regions by range. +func (h *Handler) ScatterRegionsByRange(rawStartKey, rawEndKey string, group string, retryLimit int) (int, map[uint64]error, error) { + startKey, err := hex.DecodeString(rawStartKey) + if err != nil { + return 0, nil, err + } + endKey, err := hex.DecodeString(rawEndKey) + if err != nil { + return 0, nil, err + } + co := h.GetCoordinator() + if co == nil { + return 0, nil, errs.ErrNotBootstrapped.GenWithStackByArgs() + } + return co.GetRegionScatterer().ScatterRegionsByRange(startKey, endKey, group, retryLimit) +} + +// ScatterRegionsByID scatters regions by id. +func (h *Handler) ScatterRegionsByID(ids []uint64, group string, retryLimit int, skipStoreLimit bool) (int, map[uint64]error, error) { + co := h.GetCoordinator() + if co == nil { + return 0, nil, errs.ErrNotBootstrapped.GenWithStackByArgs() + } + return co.GetRegionScatterer().ScatterRegionsByID(ids, group, retryLimit, false) +} + +// SplitRegionsResponse is the response for split regions. +type SplitRegionsResponse struct { + ProcessedPercentage int `json:"processed-percentage"` + NewRegionsID []uint64 `json:"regions-id"` +} + +// SplitRegions splits regions by split keys. +func (h *Handler) SplitRegions(ctx context.Context, rawSplitKeys []interface{}, retryLimit int) (*SplitRegionsResponse, error) { + co := h.GetCoordinator() + if co == nil { + return nil, errs.ErrNotBootstrapped.GenWithStackByArgs() + } + splitKeys := make([][]byte, 0, len(rawSplitKeys)) + for _, rawKey := range rawSplitKeys { + key, err := hex.DecodeString(rawKey.(string)) + if err != nil { + return nil, err + } + splitKeys = append(splitKeys, key) + } + + percentage, newRegionsID := co.GetRegionSplitter().SplitRegions(ctx, splitKeys, retryLimit) + s := &SplitRegionsResponse{ + ProcessedPercentage: percentage, + NewRegionsID: newRegionsID, + } + failpoint.Inject("splitResponses", func(val failpoint.Value) { + rawID, ok := val.(int) + if ok { + s.ProcessedPercentage = 100 + s.NewRegionsID = []uint64{uint64(rawID)} + } + }) + return s, nil +} + +// CheckRegionsReplicated checks if regions are replicated. +func (h *Handler) CheckRegionsReplicated(rawStartKey, rawEndKey string) (string, error) { + startKey, err := hex.DecodeString(rawStartKey) + if err != nil { + return "", err + } + endKey, err := hex.DecodeString(rawEndKey) + if err != nil { + return "", err + } + c := h.GetCluster() + if c == nil { + return "", errs.ErrNotBootstrapped.GenWithStackByArgs() + } + co := h.GetCoordinator() + if co == nil { + return "", errs.ErrNotBootstrapped.GenWithStackByArgs() + } + regions := c.ScanRegions(startKey, endKey, -1) + state := "REPLICATED" + for _, region := range regions { + if !filter.IsRegionReplicated(c, region) { + state = "INPROGRESS" + if co.IsPendingRegion(region.GetID()) { + state = "PENDING" + break + } + } + } + failpoint.Inject("mockPending", func(val failpoint.Value) { + aok, ok := val.(bool) + if ok && aok { + state = "PENDING" + } + }) + return state, nil +} + +// GetRuleManager returns the rule manager. +func (h *Handler) GetRuleManager() (*placement.RuleManager, error) { + c := h.GetCluster() + if c == nil { + return nil, errs.ErrNotBootstrapped + } + if !c.GetSharedConfig().IsPlacementRulesEnabled() { + return nil, errs.ErrPlacementDisabled + } + return c.GetRuleManager(), nil +} + +// PreCheckForRegion checks if the region is valid. +func (h *Handler) PreCheckForRegion(regionStr string) (*core.RegionInfo, int, error) { + c := h.GetCluster() + if c == nil { + return nil, http.StatusInternalServerError, errs.ErrNotBootstrapped.GenWithStackByArgs() + } + regionID, err := strconv.ParseUint(regionStr, 10, 64) + if err != nil { + return nil, http.StatusBadRequest, errs.ErrRegionInvalidID.FastGenByArgs() + } + region := c.GetRegion(regionID) + if region == nil { + return nil, http.StatusNotFound, errs.ErrRegionNotFound.FastGenByArgs(regionID) + } + return region, http.StatusOK, nil +} + +// CheckRegionPlacementRule checks if the region matches the placement rules. +func (h *Handler) CheckRegionPlacementRule(region *core.RegionInfo) (*placement.RegionFit, error) { + c := h.GetCluster() + if c == nil { + return nil, errs.ErrNotBootstrapped.GenWithStackByArgs() + } + manager, err := h.GetRuleManager() + if err != nil { + return nil, err + } + return manager.FitRegion(c, region), nil +} diff --git a/pkg/schedule/operator/create_operator_test.go b/pkg/schedule/operator/create_operator_test.go index 08a30680303..2fcd45d11f2 100644 --- a/pkg/schedule/operator/create_operator_test.go +++ b/pkg/schedule/operator/create_operator_test.go @@ -1145,8 +1145,8 @@ func TestCreateLeaveJointStateOperatorWithoutFitRules(t *testing.T) { cluster := mockcluster.NewCluster(ctx, opts) re.NoError(cluster.SetRules([]*placement.Rule{ { - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, StartKeyHex: hex.EncodeToString([]byte("")), EndKeyHex: hex.EncodeToString([]byte("")), Role: placement.Voter, diff --git a/pkg/schedule/operator/operator_controller.go b/pkg/schedule/operator/operator_controller.go index 3e9d2f3abcb..e3bead3ffca 100644 --- a/pkg/schedule/operator/operator_controller.go +++ b/pkg/schedule/operator/operator_controller.go @@ -799,6 +799,12 @@ func (oc *Controller) GetFastOpInfluence(cluster *core.BasicCluster, influence O } } +// CleanAllOpRecords removes all operators' records. +// It is used in tests only. +func (oc *Controller) CleanAllOpRecords() { + oc.records.ttl.Clear() +} + // AddOpInfluence add operator influence for cluster func AddOpInfluence(op *Operator, influence OpInfluence, cluster *core.BasicCluster) { region := cluster.GetRegion(op.RegionID()) diff --git a/pkg/schedule/operator/operator_test.go b/pkg/schedule/operator/operator_test.go index 9d924738543..c16d929f379 100644 --- a/pkg/schedule/operator/operator_test.go +++ b/pkg/schedule/operator/operator_test.go @@ -17,7 +17,6 @@ package operator import ( "context" "encoding/json" - "fmt" "sync/atomic" "testing" "time" @@ -514,7 +513,7 @@ func (suite *operatorTestSuite) TestOpStepTimeout() { }, } for i, v := range testData { - fmt.Printf("case:%d\n", i) + suite.T().Logf("case: %d", i) for _, step := range v.step { suite.Equal(v.expect, step.Timeout(v.regionSize)) } diff --git a/pkg/schedule/placement/fit_region_test.go b/pkg/schedule/placement/fit_region_test.go index 0ec67b2a2aa..5bc62d9cc12 100644 --- a/pkg/schedule/placement/fit_region_test.go +++ b/pkg/schedule/placement/fit_region_test.go @@ -55,8 +55,8 @@ func (ms mockStoresSet) GetStore(id uint64) *core.StoreInfo { func addExtraRules(extraRules int) []*Rule { rules := make([]*Rule, 0) rules = append(rules, &Rule{ - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: 3, LocationLabels: []string{}, @@ -110,8 +110,8 @@ func BenchmarkFitRegion(b *testing.B) { region := mockRegion(3, 0) rules := []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: 3, LocationLabels: []string{}, @@ -129,8 +129,8 @@ func BenchmarkFitRegionMoreStores(b *testing.B) { region := mockRegion(3, 0) rules := []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: 3, LocationLabels: []string{}, @@ -148,8 +148,8 @@ func BenchmarkFitRegionMorePeers(b *testing.B) { region := mockRegion(5, 0) rules := []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: 5, LocationLabels: []string{}, @@ -167,14 +167,14 @@ func BenchmarkFitRegionMorePeersEquals(b *testing.B) { region := mockRegion(3, 0) rules := []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Leader, Count: 1, LocationLabels: []string{}, }, { - GroupID: "pd", + GroupID: DefaultGroupID, ID: "default-2", Role: Follower, Count: 4, @@ -193,8 +193,8 @@ func BenchmarkFitRegionMorePeersSplitRules(b *testing.B) { region := mockRegion(3, 0) rules := []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Leader, Count: 1, LocationLabels: []string{}, @@ -202,7 +202,7 @@ func BenchmarkFitRegionMorePeersSplitRules(b *testing.B) { } for i := 0; i < 4; i++ { rules = append(rules, &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: fmt.Sprintf("%v", i), Role: Follower, Count: 1, @@ -221,8 +221,8 @@ func BenchmarkFitRegionMoreVotersSplitRules(b *testing.B) { region := mockRegion(5, 0) rules := []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: 1, LocationLabels: []string{}, @@ -230,7 +230,7 @@ func BenchmarkFitRegionMoreVotersSplitRules(b *testing.B) { } for i := 0; i < 4; i++ { rules = append(rules, &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: fmt.Sprintf("%v", i), Role: Voter, Count: 1, @@ -260,7 +260,7 @@ func BenchmarkFitRegionCrossRegion(b *testing.B) { region := mockRegion(5, 0) rules := make([]*Rule, 0) rules = append(rules, &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: "1", Role: Leader, Count: 1, @@ -268,7 +268,7 @@ func BenchmarkFitRegionCrossRegion(b *testing.B) { }) for i := 0; i < 2; i++ { rules = append(rules, &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: fmt.Sprintf("%v", i), Role: Follower, Count: 1, @@ -289,7 +289,7 @@ func BenchmarkFitRegionWithMoreRulesAndStoreLabels(b *testing.B) { // create 100 rules, with each rule has 101 LabelConstraints. for i := 0; i < 100; i++ { rule := &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: fmt.Sprintf("%v", i), Role: Follower, Count: 3, @@ -351,7 +351,7 @@ func BenchmarkFitRegionWithLocationLabels(b *testing.B) { region := mockRegion(5, 5) rules := []*Rule{} rule := &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: "followers", Role: Follower, Count: 3, @@ -360,7 +360,7 @@ func BenchmarkFitRegionWithLocationLabels(b *testing.B) { } rules = append(rules, rule) rule = &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: "learner", Role: Learner, Count: 3, @@ -369,7 +369,7 @@ func BenchmarkFitRegionWithLocationLabels(b *testing.B) { } rules = append(rules, rule) rule = &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: "voters", Role: Voter, Count: 4, diff --git a/pkg/schedule/placement/region_rule_cache_test.go b/pkg/schedule/placement/region_rule_cache_test.go index b4164e85530..835203bed26 100644 --- a/pkg/schedule/placement/region_rule_cache_test.go +++ b/pkg/schedule/placement/region_rule_cache_test.go @@ -99,8 +99,8 @@ func TestRegionRuleFitCache(t *testing.T) { region: mockRegion(3, 0), rules: []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: 4, Version: 1, @@ -114,8 +114,8 @@ func TestRegionRuleFitCache(t *testing.T) { region: mockRegion(3, 0), rules: []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: 3, CreateTimestamp: 1, @@ -141,7 +141,7 @@ func TestRegionRuleFitCache(t *testing.T) { region: mockRegion(3, 0), rules: []*Rule{ { - GroupID: "pd", + GroupID: DefaultGroupID, ID: "default-2", Role: Voter, Count: 3, @@ -155,7 +155,7 @@ func TestRegionRuleFitCache(t *testing.T) { region: nil, rules: []*Rule{ { - GroupID: "pd", + GroupID: DefaultGroupID, ID: "default-2", Role: Voter, Count: 3, diff --git a/pkg/schedule/placement/rule_manager.go b/pkg/schedule/placement/rule_manager.go index a7e169b74aa..e25b8802b45 100644 --- a/pkg/schedule/placement/rule_manager.go +++ b/pkg/schedule/placement/rule_manager.go @@ -37,6 +37,15 @@ import ( "golang.org/x/exp/slices" ) +const ( + // DefaultGroupID is the default rule group ID. + DefaultGroupID = "pd" + // DefaultRuleID is the default rule ID. + DefaultRuleID = "default" + // defaultWitnessRuleID is the default witness rule ID. + defaultWitnessRuleID = "witness" +) + // RuleManager is responsible for the lifecycle of all placement Rules. // It is thread safe. type RuleManager struct { @@ -88,16 +97,16 @@ func (m *RuleManager) Initialize(maxReplica int, locationLabels []string, isolat defaultRules = append(defaultRules, []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: maxReplica - witnessCount, LocationLabels: locationLabels, IsolationLevel: isolationLevel, }, { - GroupID: "pd", - ID: "witness", + GroupID: DefaultGroupID, + ID: defaultWitnessRuleID, Role: Voter, Count: witnessCount, IsWitness: true, @@ -108,8 +117,8 @@ func (m *RuleManager) Initialize(maxReplica int, locationLabels []string, isolat ) } else { defaultRules = append(defaultRules, &Rule{ - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: maxReplica, LocationLabels: locationLabels, diff --git a/pkg/schedule/placement/rule_manager_test.go b/pkg/schedule/placement/rule_manager_test.go index dad50a2d881..68a18b538d4 100644 --- a/pkg/schedule/placement/rule_manager_test.go +++ b/pkg/schedule/placement/rule_manager_test.go @@ -44,8 +44,8 @@ func TestDefault(t *testing.T) { _, manager := newTestManager(t, false) rules := manager.GetAllRules() re.Len(rules, 1) - re.Equal("pd", rules[0].GroupID) - re.Equal("default", rules[0].ID) + re.Equal(DefaultGroupID, rules[0].GroupID) + re.Equal(DefaultRuleID, rules[0].ID) re.Equal(0, rules[0].Index) re.Empty(rules[0].StartKey) re.Empty(rules[0].EndKey) @@ -58,15 +58,15 @@ func TestDefault2(t *testing.T) { _, manager := newTestManager(t, true) rules := manager.GetAllRules() re.Len(rules, 2) - re.Equal("pd", rules[0].GroupID) - re.Equal("default", rules[0].ID) + re.Equal(DefaultGroupID, rules[0].GroupID) + re.Equal(DefaultRuleID, rules[0].ID) re.Equal(0, rules[0].Index) re.Empty(rules[0].StartKey) re.Empty(rules[0].EndKey) re.Equal(Voter, rules[0].Role) re.Equal([]string{"zone", "rack", "host"}, rules[0].LocationLabels) - re.Equal("pd", rules[1].GroupID) - re.Equal("witness", rules[1].ID) + re.Equal(DefaultGroupID, rules[1].GroupID) + re.Equal(defaultWitnessRuleID, rules[1].ID) re.Equal(0, rules[1].Index) re.Empty(rules[1].StartKey) re.Empty(rules[1].EndKey) @@ -79,16 +79,16 @@ func TestAdjustRule(t *testing.T) { re := require.New(t) _, manager := newTestManager(t, false) rules := []Rule{ - {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 3}, - {GroupID: "", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 3}, - {GroupID: "group", ID: "", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 3}, - {GroupID: "group", ID: "id", StartKeyHex: "123ab", EndKeyHex: "123abf", Role: "voter", Count: 3}, - {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "1123abf", Role: "voter", Count: 3}, - {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123aaa", Role: "voter", Count: 3}, + {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 3}, + {GroupID: "", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 3}, + {GroupID: "group", ID: "", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 3}, + {GroupID: "group", ID: "id", StartKeyHex: "123ab", EndKeyHex: "123abf", Role: Voter, Count: 3}, + {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "1123abf", Role: Voter, Count: 3}, + {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123aaa", Role: Voter, Count: 3}, {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "master", Count: 3}, - {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 0}, - {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: -1}, - {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 3, LabelConstraints: []LabelConstraint{{Op: "foo"}}}, + {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 0}, + {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: -1}, + {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 3, LabelConstraints: []LabelConstraint{{Op: "foo"}}}, } re.NoError(manager.adjustRule(&rules[0], "group")) @@ -101,17 +101,17 @@ func TestAdjustRule(t *testing.T) { } manager.SetKeyType(constant.Table.String()) - re.Error(manager.adjustRule(&Rule{GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 3}, "group")) + re.Error(manager.adjustRule(&Rule{GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 3}, "group")) manager.SetKeyType(constant.Txn.String()) - re.Error(manager.adjustRule(&Rule{GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 3}, "group")) + re.Error(manager.adjustRule(&Rule{GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 3}, "group")) re.Error(manager.adjustRule(&Rule{ GroupID: "group", ID: "id", StartKeyHex: hex.EncodeToString(codec.EncodeBytes([]byte{0})), EndKeyHex: "123abf", - Role: "voter", + Role: Voter, Count: 3, }, "group")) @@ -120,7 +120,7 @@ func TestAdjustRule(t *testing.T) { ID: "id", StartKeyHex: hex.EncodeToString(codec.EncodeBytes([]byte{0})), EndKeyHex: hex.EncodeToString(codec.EncodeBytes([]byte{1})), - Role: "learner", + Role: Learner, Count: 1, IsWitness: true, LabelConstraints: []LabelConstraint{{Key: "engine", Op: "in", Values: []string{"tiflash"}}}, @@ -130,15 +130,15 @@ func TestAdjustRule(t *testing.T) { func TestLeaderCheck(t *testing.T) { re := require.New(t) _, manager := newTestManager(t, false) - re.Regexp(".*needs at least one leader or voter.*", manager.SetRule(&Rule{GroupID: "pd", ID: "default", Role: "learner", Count: 3}).Error()) - re.Regexp(".*define multiple leaders by count 2.*", manager.SetRule(&Rule{GroupID: "g2", ID: "33", Role: "leader", Count: 2}).Error()) + re.Regexp(".*needs at least one leader or voter.*", manager.SetRule(&Rule{GroupID: DefaultGroupID, ID: DefaultRuleID, Role: Learner, Count: 3}).Error()) + re.Regexp(".*define multiple leaders by count 2.*", manager.SetRule(&Rule{GroupID: "g2", ID: "33", Role: Leader, Count: 2}).Error()) re.Regexp(".*multiple leader replicas.*", manager.Batch([]RuleOp{ { - Rule: &Rule{GroupID: "g2", ID: "foo1", Role: "leader", Count: 1}, + Rule: &Rule{GroupID: "g2", ID: "foo1", Role: Leader, Count: 1}, Action: RuleOpAdd, }, { - Rule: &Rule{GroupID: "g2", ID: "foo2", Role: "leader", Count: 1}, + Rule: &Rule{GroupID: "g2", ID: "foo2", Role: Leader, Count: 1}, Action: RuleOpAdd, }, }).Error()) @@ -148,9 +148,9 @@ func TestSaveLoad(t *testing.T) { re := require.New(t) store, manager := newTestManager(t, false) rules := []*Rule{ - {GroupID: "pd", ID: "default", Role: "voter", Count: 5}, - {GroupID: "foo", ID: "baz", StartKeyHex: "", EndKeyHex: "abcd", Role: "voter", Count: 1}, - {GroupID: "foo", ID: "bar", Role: "learner", Count: 1}, + {GroupID: DefaultGroupID, ID: DefaultRuleID, Role: Voter, Count: 5}, + {GroupID: "foo", ID: "baz", StartKeyHex: "", EndKeyHex: "abcd", Role: Voter, Count: 1}, + {GroupID: "foo", ID: "bar", Role: Learner, Count: 1}, } for _, r := range rules { re.NoError(manager.SetRule(r.Clone())) @@ -160,7 +160,7 @@ func TestSaveLoad(t *testing.T) { err := m2.Initialize(3, []string{"no", "labels"}, "") re.NoError(err) re.Len(m2.GetAllRules(), 3) - re.Equal(rules[0].String(), m2.GetRule("pd", "default").String()) + re.Equal(rules[0].String(), m2.GetRule(DefaultGroupID, DefaultRuleID).String()) re.Equal(rules[1].String(), m2.GetRule("foo", "baz").String()) re.Equal(rules[2].String(), m2.GetRule("foo", "bar").String()) re.Equal(manager.GetRulesCount(), 3) @@ -170,14 +170,14 @@ func TestSaveLoad(t *testing.T) { func TestSetAfterGet(t *testing.T) { re := require.New(t) store, manager := newTestManager(t, false) - rule := manager.GetRule("pd", "default") + rule := manager.GetRule(DefaultGroupID, DefaultRuleID) rule.Count = 1 manager.SetRule(rule) m2 := NewRuleManager(store, nil, nil) err := m2.Initialize(100, []string{}, "") re.NoError(err) - rule = m2.GetRule("pd", "default") + rule = m2.GetRule(DefaultGroupID, DefaultRuleID) re.Equal(1, rule.Count) } @@ -193,9 +193,9 @@ func TestKeys(t *testing.T) { re := require.New(t) _, manager := newTestManager(t, false) rules := []*Rule{ - {GroupID: "1", ID: "1", Role: "voter", Count: 1, StartKeyHex: "", EndKeyHex: ""}, - {GroupID: "2", ID: "2", Role: "voter", Count: 1, StartKeyHex: "11", EndKeyHex: "ff"}, - {GroupID: "2", ID: "3", Role: "voter", Count: 1, StartKeyHex: "22", EndKeyHex: "dd"}, + {GroupID: "1", ID: "1", Role: Voter, Count: 1, StartKeyHex: "", EndKeyHex: ""}, + {GroupID: "2", ID: "2", Role: Voter, Count: 1, StartKeyHex: "11", EndKeyHex: "ff"}, + {GroupID: "2", ID: "3", Role: Voter, Count: 1, StartKeyHex: "22", EndKeyHex: "dd"}, } toDelete := []RuleOp{} @@ -207,16 +207,16 @@ func TestKeys(t *testing.T) { DeleteByIDPrefix: false, }) } - checkRules(t, manager.GetAllRules(), [][2]string{{"1", "1"}, {"2", "2"}, {"2", "3"}, {"pd", "default"}}) + checkRules(t, manager.GetAllRules(), [][2]string{{"1", "1"}, {"2", "2"}, {"2", "3"}, {DefaultGroupID, DefaultRuleID}}) manager.Batch(toDelete) - checkRules(t, manager.GetAllRules(), [][2]string{{"pd", "default"}}) + checkRules(t, manager.GetAllRules(), [][2]string{{DefaultGroupID, DefaultRuleID}}) - rules = append(rules, &Rule{GroupID: "3", ID: "4", Role: "voter", Count: 1, StartKeyHex: "44", EndKeyHex: "ee"}, - &Rule{GroupID: "3", ID: "5", Role: "voter", Count: 1, StartKeyHex: "44", EndKeyHex: "dd"}) + rules = append(rules, &Rule{GroupID: "3", ID: "4", Role: Voter, Count: 1, StartKeyHex: "44", EndKeyHex: "ee"}, + &Rule{GroupID: "3", ID: "5", Role: Voter, Count: 1, StartKeyHex: "44", EndKeyHex: "dd"}) manager.SetRules(rules) - checkRules(t, manager.GetAllRules(), [][2]string{{"1", "1"}, {"2", "2"}, {"2", "3"}, {"3", "4"}, {"3", "5"}, {"pd", "default"}}) + checkRules(t, manager.GetAllRules(), [][2]string{{"1", "1"}, {"2", "2"}, {"2", "3"}, {"3", "4"}, {"3", "5"}, {DefaultGroupID, DefaultRuleID}}) - manager.DeleteRule("pd", "default") + manager.DeleteRule(DefaultGroupID, DefaultRuleID) checkRules(t, manager.GetAllRules(), [][2]string{{"1", "1"}, {"2", "2"}, {"2", "3"}, {"3", "4"}, {"3", "5"}}) splitKeys := [][]string{ @@ -282,12 +282,12 @@ func TestKeys(t *testing.T) { func TestDeleteByIDPrefix(t *testing.T) { _, manager := newTestManager(t, false) manager.SetRules([]*Rule{ - {GroupID: "g1", ID: "foo1", Role: "voter", Count: 1}, - {GroupID: "g2", ID: "foo1", Role: "voter", Count: 1}, - {GroupID: "g2", ID: "foobar", Role: "voter", Count: 1}, - {GroupID: "g2", ID: "baz2", Role: "voter", Count: 1}, + {GroupID: "g1", ID: "foo1", Role: Voter, Count: 1}, + {GroupID: "g2", ID: "foo1", Role: Voter, Count: 1}, + {GroupID: "g2", ID: "foobar", Role: Voter, Count: 1}, + {GroupID: "g2", ID: "baz2", Role: Voter, Count: 1}, }) - manager.DeleteRule("pd", "default") + manager.DeleteRule(DefaultGroupID, DefaultRuleID) checkRules(t, manager.GetAllRules(), [][2]string{{"g1", "foo1"}, {"g2", "baz2"}, {"g2", "foo1"}, {"g2", "foobar"}}) manager.Batch([]RuleOp{{ @@ -301,40 +301,40 @@ func TestDeleteByIDPrefix(t *testing.T) { func TestRangeGap(t *testing.T) { re := require.New(t) _, manager := newTestManager(t, false) - err := manager.DeleteRule("pd", "default") + err := manager.DeleteRule(DefaultGroupID, DefaultRuleID) re.Error(err) - err = manager.SetRule(&Rule{GroupID: "pd", ID: "foo", StartKeyHex: "", EndKeyHex: "abcd", Role: "voter", Count: 1}) + err = manager.SetRule(&Rule{GroupID: DefaultGroupID, ID: "foo", StartKeyHex: "", EndKeyHex: "abcd", Role: Voter, Count: 1}) re.NoError(err) // |-- default --| // |-- foo --| // still cannot delete default since it will cause ("abcd", "") has no rules inside. - err = manager.DeleteRule("pd", "default") + err = manager.DeleteRule(DefaultGroupID, DefaultRuleID) re.Error(err) - err = manager.SetRule(&Rule{GroupID: "pd", ID: "bar", StartKeyHex: "abcd", EndKeyHex: "", Role: "voter", Count: 1}) + err = manager.SetRule(&Rule{GroupID: DefaultGroupID, ID: "bar", StartKeyHex: "abcd", EndKeyHex: "", Role: Voter, Count: 1}) re.NoError(err) // now default can be deleted. - err = manager.DeleteRule("pd", "default") + err = manager.DeleteRule(DefaultGroupID, DefaultRuleID) re.NoError(err) // cannot change range since it will cause ("abaa", "abcd") has no rules inside. - err = manager.SetRule(&Rule{GroupID: "pd", ID: "foo", StartKeyHex: "", EndKeyHex: "abaa", Role: "voter", Count: 1}) + err = manager.SetRule(&Rule{GroupID: DefaultGroupID, ID: "foo", StartKeyHex: "", EndKeyHex: "abaa", Role: Voter, Count: 1}) re.Error(err) } func TestGroupConfig(t *testing.T) { re := require.New(t) _, manager := newTestManager(t, false) - pd1 := &RuleGroup{ID: "pd"} - re.Equal(pd1, manager.GetRuleGroup("pd")) + pd1 := &RuleGroup{ID: DefaultGroupID} + re.Equal(pd1, manager.GetRuleGroup(DefaultGroupID)) // update group pd - pd2 := &RuleGroup{ID: "pd", Index: 100, Override: true} + pd2 := &RuleGroup{ID: DefaultGroupID, Index: 100, Override: true} err := manager.SetRuleGroup(pd2) re.NoError(err) - re.Equal(pd2, manager.GetRuleGroup("pd")) + re.Equal(pd2, manager.GetRuleGroup(DefaultGroupID)) // new group g without config - err = manager.SetRule(&Rule{GroupID: "g", ID: "1", Role: "voter", Count: 1}) + err = manager.SetRule(&Rule{GroupID: "g", ID: "1", Role: Voter, Count: 1}) re.NoError(err) g1 := &RuleGroup{ID: "g"} re.Equal(g1, manager.GetRuleGroup("g")) @@ -347,12 +347,12 @@ func TestGroupConfig(t *testing.T) { re.Equal([]*RuleGroup{g2, pd2}, manager.GetRuleGroups()) // delete pd group, restore to default config - err = manager.DeleteRuleGroup("pd") + err = manager.DeleteRuleGroup(DefaultGroupID) re.NoError(err) re.Equal([]*RuleGroup{pd1, g2}, manager.GetRuleGroups()) // delete rule, the group is removed too - err = manager.DeleteRule("pd", "default") + err = manager.DeleteRule(DefaultGroupID, DefaultRuleID) re.NoError(err) re.Equal([]*RuleGroup{g2}, manager.GetRuleGroups()) } @@ -360,16 +360,16 @@ func TestGroupConfig(t *testing.T) { func TestRuleVersion(t *testing.T) { re := require.New(t) _, manager := newTestManager(t, false) - rule1 := manager.GetRule("pd", "default") + rule1 := manager.GetRule(DefaultGroupID, DefaultRuleID) re.Equal(uint64(0), rule1.Version) // create new rule - newRule := &Rule{GroupID: "g1", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 3} + newRule := &Rule{GroupID: "g1", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 3} err := manager.SetRule(newRule) re.NoError(err) newRule = manager.GetRule("g1", "id") re.Equal(uint64(0), newRule.Version) // update rule - newRule = &Rule{GroupID: "g1", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 2} + newRule = &Rule{GroupID: "g1", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 2} err = manager.SetRule(newRule) re.NoError(err) newRule = manager.GetRule("g1", "id") diff --git a/pkg/schedule/placement/rule_test.go b/pkg/schedule/placement/rule_test.go index b91a1f22d65..75d7bab23c9 100644 --- a/pkg/schedule/placement/rule_test.go +++ b/pkg/schedule/placement/rule_test.go @@ -110,9 +110,9 @@ func TestGroupProperties(t *testing.T) { func TestBuildRuleList(t *testing.T) { re := require.New(t) defaultRule := &Rule{ - GroupID: "pd", - ID: "default", - Role: "voter", + GroupID: DefaultGroupID, + ID: DefaultRuleID, + Role: Voter, StartKey: []byte{}, EndKey: []byte{}, Count: 3, @@ -122,13 +122,13 @@ func TestBuildRuleList(t *testing.T) { byteEnd, err := hex.DecodeString("a2") re.NoError(err) ruleMeta := &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: "meta", Index: 1, Override: true, StartKey: byteStart, EndKey: byteEnd, - Role: "voter", + Role: Voter, Count: 5, } @@ -140,7 +140,7 @@ func TestBuildRuleList(t *testing.T) { { name: "default rule", rules: map[[2]string]*Rule{ - {"pd", "default"}: defaultRule, + {DefaultGroupID, DefaultRuleID}: defaultRule, }, expect: ruleList{ ranges: []rangeRules{ @@ -155,8 +155,8 @@ func TestBuildRuleList(t *testing.T) { { name: "metadata case", rules: map[[2]string]*Rule{ - {"pd", "default"}: defaultRule, - {"pd", "meta"}: ruleMeta, + {DefaultGroupID, DefaultRuleID}: defaultRule, + {DefaultGroupID, "meta"}: ruleMeta, }, expect: ruleList{ranges: []rangeRules{ { diff --git a/pkg/schedule/prepare_checker.go b/pkg/schedule/prepare_checker.go index c7faa57af81..34618427930 100644 --- a/pkg/schedule/prepare_checker.go +++ b/pkg/schedule/prepare_checker.go @@ -25,16 +25,13 @@ import ( type prepareChecker struct { syncutil.RWMutex - reactiveRegions map[uint64]int - start time.Time - sum int - prepared bool + start time.Time + prepared bool } func newPrepareChecker() *prepareChecker { return &prepareChecker{ - start: time.Now(), - reactiveRegions: make(map[uint64]int), + start: time.Now(), } } @@ -51,13 +48,8 @@ func (checker *prepareChecker) check(c *core.BasicCluster) bool { } notLoadedFromRegionsCnt := c.GetClusterNotFromStorageRegionsCnt() totalRegionsCnt := c.GetTotalRegionCount() - if float64(notLoadedFromRegionsCnt) > float64(totalRegionsCnt)*collectFactor { - log.Info("meta not loaded from region number is satisfied, finish prepare checker", zap.Int("not-from-storage-region", notLoadedFromRegionsCnt), zap.Int("total-region", totalRegionsCnt)) - checker.prepared = true - return true - } // The number of active regions should be more than total region of all stores * collectFactor - if float64(totalRegionsCnt)*collectFactor > float64(checker.sum) { + if float64(totalRegionsCnt)*collectFactor > float64(notLoadedFromRegionsCnt) { return false } for _, store := range c.GetStores() { @@ -66,23 +58,15 @@ func (checker *prepareChecker) check(c *core.BasicCluster) bool { } storeID := store.GetID() // For each store, the number of active regions should be more than total region of the store * collectFactor - if float64(c.GetStoreRegionCount(storeID))*collectFactor > float64(checker.reactiveRegions[storeID]) { + if float64(c.GetStoreRegionCount(storeID))*collectFactor > float64(c.GetNotFromStorageRegionsCntByStore(storeID)) { return false } } + log.Info("not loaded from storage region number is satisfied, finish prepare checker", zap.Int("not-from-storage-region", notLoadedFromRegionsCnt), zap.Int("total-region", totalRegionsCnt)) checker.prepared = true return true } -func (checker *prepareChecker) Collect(region *core.RegionInfo) { - checker.Lock() - defer checker.Unlock() - for _, p := range region.GetPeers() { - checker.reactiveRegions[p.GetStoreId()]++ - } - checker.sum++ -} - func (checker *prepareChecker) IsPrepared() bool { checker.RLock() defer checker.RUnlock() @@ -95,10 +79,3 @@ func (checker *prepareChecker) SetPrepared() { defer checker.Unlock() checker.prepared = true } - -// for test purpose -func (checker *prepareChecker) GetSum() int { - checker.RLock() - defer checker.RUnlock() - return checker.sum -} diff --git a/pkg/schedule/scatter/region_scatterer.go b/pkg/schedule/scatter/region_scatterer.go index 68d868750e8..898c4d052a7 100644 --- a/pkg/schedule/scatter/region_scatterer.go +++ b/pkg/schedule/scatter/region_scatterer.go @@ -53,6 +53,8 @@ var ( scatterUnnecessaryCounter = scatterCounter.WithLabelValues("unnecessary", "") scatterFailCounter = scatterCounter.WithLabelValues("fail", "") scatterSuccessCounter = scatterCounter.WithLabelValues("success", "") + errRegionNotFound = errors.New("region not found") + errEmptyRegion = errors.New("empty region") ) const ( @@ -165,7 +167,7 @@ func (r *RegionScatterer) ScatterRegionsByRange(startKey, endKey []byte, group s regions := r.cluster.ScanRegions(startKey, endKey, -1) if len(regions) < 1 { scatterSkipEmptyRegionCounter.Inc() - return 0, nil, errors.New("empty region") + return 0, nil, errEmptyRegion } failures := make(map[uint64]error, len(regions)) regionMap := make(map[uint64]*core.RegionInfo, len(regions)) @@ -184,7 +186,14 @@ func (r *RegionScatterer) ScatterRegionsByRange(startKey, endKey []byte, group s func (r *RegionScatterer) ScatterRegionsByID(regionsID []uint64, group string, retryLimit int, skipStoreLimit bool) (int, map[uint64]error, error) { if len(regionsID) < 1 { scatterSkipEmptyRegionCounter.Inc() - return 0, nil, errors.New("empty region") + return 0, nil, errEmptyRegion + } + if len(regionsID) == 1 { + region := r.cluster.GetRegion(regionsID[0]) + if region == nil { + scatterSkipNoRegionCounter.Inc() + return 0, nil, errRegionNotFound + } } failures := make(map[uint64]error, len(regionsID)) regions := make([]*core.RegionInfo, 0, len(regionsID)) @@ -219,7 +228,7 @@ func (r *RegionScatterer) ScatterRegionsByID(regionsID []uint64, group string, r func (r *RegionScatterer) scatterRegions(regions map[uint64]*core.RegionInfo, failures map[uint64]error, group string, retryLimit int, skipStoreLimit bool) (int, error) { if len(regions) < 1 { scatterSkipEmptyRegionCounter.Inc() - return 0, errors.New("empty region") + return 0, errEmptyRegion } if retryLimit > maxRetryLimit { retryLimit = maxRetryLimit diff --git a/pkg/schedule/scatter/region_scatterer_test.go b/pkg/schedule/scatter/region_scatterer_test.go index 681b863aea6..70517d23fee 100644 --- a/pkg/schedule/scatter/region_scatterer_test.go +++ b/pkg/schedule/scatter/region_scatterer_test.go @@ -185,7 +185,7 @@ func scatterSpecial(re *require.Assertions, numOrdinaryStores, numSpecialStores, } tc.SetEnablePlacementRules(true) re.NoError(tc.RuleManager.SetRule(&placement.Rule{ - GroupID: "pd", ID: "learner", Role: placement.Learner, Count: 3, + GroupID: placement.DefaultGroupID, ID: "learner", Role: placement.Learner, Count: 3, LabelConstraints: []placement.LabelConstraint{{Key: "engine", Op: placement.In, Values: []string{"tiflash"}}}})) // Region 1 has the same distribution with the Region 2, which is used to test selectPeerToReplace. @@ -575,8 +575,8 @@ func TestRegionHasLearner(t *testing.T) { tc.AddLabelsStore(i, 0, map[string]string{"zone": "z2"}) } tc.RuleManager.SetRule(&placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3, LabelConstraints: []placement.LabelConstraint{ @@ -588,7 +588,7 @@ func TestRegionHasLearner(t *testing.T) { }, }) tc.RuleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "learner", Role: placement.Learner, Count: 1, diff --git a/pkg/schedule/schedulers/balance_leader.go b/pkg/schedule/schedulers/balance_leader.go index e5516317f46..eb94752944b 100644 --- a/pkg/schedule/schedulers/balance_leader.go +++ b/pkg/schedule/schedulers/balance_leader.go @@ -67,7 +67,7 @@ var ( ) type balanceLeaderSchedulerConfig struct { - mu syncutil.RWMutex + syncutil.RWMutex storage endpoint.ConfigStorage Ranges []core.KeyRange `json:"ranges"` // Batch is used to generate multiple operators by one scheduling @@ -75,22 +75,22 @@ type balanceLeaderSchedulerConfig struct { } func (conf *balanceLeaderSchedulerConfig) Update(data []byte) (int, interface{}) { - conf.mu.Lock() - defer conf.mu.Unlock() + conf.Lock() + defer conf.Unlock() - oldc, _ := json.Marshal(conf) + oldConfig, _ := json.Marshal(conf) if err := json.Unmarshal(data, conf); err != nil { return http.StatusInternalServerError, err.Error() } - newc, _ := json.Marshal(conf) - if !bytes.Equal(oldc, newc) { - if !conf.validate() { - json.Unmarshal(oldc, conf) + newConfig, _ := json.Marshal(conf) + if !bytes.Equal(oldConfig, newConfig) { + if !conf.validateLocked() { + json.Unmarshal(oldConfig, conf) return http.StatusBadRequest, "invalid batch size which should be an integer between 1 and 10" } conf.persistLocked() - log.Info("balance-leader-scheduler config is updated", zap.ByteString("old", oldc), zap.ByteString("new", newc)) + log.Info("balance-leader-scheduler config is updated", zap.ByteString("old", oldConfig), zap.ByteString("new", newConfig)) return http.StatusOK, "Config is updated." } m := make(map[string]interface{}) @@ -104,13 +104,13 @@ func (conf *balanceLeaderSchedulerConfig) Update(data []byte) (int, interface{}) return http.StatusBadRequest, "Config item is not found." } -func (conf *balanceLeaderSchedulerConfig) validate() bool { +func (conf *balanceLeaderSchedulerConfig) validateLocked() bool { return conf.Batch >= 1 && conf.Batch <= 10 } func (conf *balanceLeaderSchedulerConfig) Clone() *balanceLeaderSchedulerConfig { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() ranges := make([]core.KeyRange, len(conf.Ranges)) copy(ranges, conf.Ranges) return &balanceLeaderSchedulerConfig{ @@ -127,6 +127,20 @@ func (conf *balanceLeaderSchedulerConfig) persistLocked() error { return conf.storage.SaveSchedulerConfig(BalanceLeaderName, data) } +func (conf *balanceLeaderSchedulerConfig) getBatch() int { + conf.RLock() + defer conf.RUnlock() + return conf.Batch +} + +func (conf *balanceLeaderSchedulerConfig) getRanges() []core.KeyRange { + conf.RLock() + defer conf.RUnlock() + ranges := make([]core.KeyRange, len(conf.Ranges)) + copy(ranges, conf.Ranges) + return ranges +} + type balanceLeaderHandler struct { rd *render.Render config *balanceLeaderSchedulerConfig @@ -210,14 +224,14 @@ func (l *balanceLeaderScheduler) GetType() string { } func (l *balanceLeaderScheduler) EncodeConfig() ([]byte, error) { - l.conf.mu.RLock() - defer l.conf.mu.RUnlock() + l.conf.RLock() + defer l.conf.RUnlock() return EncodeConfig(l.conf) } func (l *balanceLeaderScheduler) ReloadConfig() error { - l.conf.mu.Lock() - defer l.conf.mu.Unlock() + l.conf.Lock() + defer l.conf.Unlock() cfgData, err := l.conf.storage.LoadSchedulerConfig(l.GetName()) if err != nil { return err @@ -335,14 +349,12 @@ func (cs *candidateStores) resortStoreWithPos(pos int) { } func (l *balanceLeaderScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { - l.conf.mu.RLock() - defer l.conf.mu.RUnlock() basePlan := plan.NewBalanceSchedulerPlan() var collector *plan.Collector if dryRun { collector = plan.NewCollector(basePlan) } - batch := l.conf.Batch + batch := l.conf.getBatch() balanceLeaderScheduleCounter.Inc() leaderSchedulePolicy := cluster.GetSchedulerConfig().GetLeaderSchedulePolicy() @@ -441,7 +453,7 @@ func makeInfluence(op *operator.Operator, plan *solver, usedRegions map[uint64]s // It randomly selects a health region from the source store, then picks // the best follower peer and transfers the leader. func (l *balanceLeaderScheduler) transferLeaderOut(solver *solver, collector *plan.Collector) *operator.Operator { - solver.Region = filter.SelectOneRegion(solver.RandLeaderRegions(solver.SourceStoreID(), l.conf.Ranges), + solver.Region = filter.SelectOneRegion(solver.RandLeaderRegions(solver.SourceStoreID(), l.conf.getRanges()), collector, filter.NewRegionPendingFilter(), filter.NewRegionDownFilter()) if solver.Region == nil { log.Debug("store has no leader", zap.String("scheduler", l.GetName()), zap.Uint64("store-id", solver.SourceStoreID())) @@ -485,7 +497,7 @@ func (l *balanceLeaderScheduler) transferLeaderOut(solver *solver, collector *pl // It randomly selects a health region from the target store, then picks // the worst follower peer and transfers the leader. func (l *balanceLeaderScheduler) transferLeaderIn(solver *solver, collector *plan.Collector) *operator.Operator { - solver.Region = filter.SelectOneRegion(solver.RandFollowerRegions(solver.TargetStoreID(), l.conf.Ranges), + solver.Region = filter.SelectOneRegion(solver.RandFollowerRegions(solver.TargetStoreID(), l.conf.getRanges()), nil, filter.NewRegionPendingFilter(), filter.NewRegionDownFilter()) if solver.Region == nil { log.Debug("store has no follower", zap.String("scheduler", l.GetName()), zap.Uint64("store-id", solver.TargetStoreID())) diff --git a/pkg/schedule/schedulers/balance_region.go b/pkg/schedule/schedulers/balance_region.go index 1343600af06..1cef3a4615b 100644 --- a/pkg/schedule/schedulers/balance_region.go +++ b/pkg/schedule/schedulers/balance_region.go @@ -51,6 +51,7 @@ var ( type balanceRegionSchedulerConfig struct { Name string `json:"name"` Ranges []core.KeyRange `json:"ranges"` + // TODO: When we prepare to use Ranges, we will need to implement the ReloadConfig function for this scheduler. } type balanceRegionScheduler struct { diff --git a/pkg/schedule/schedulers/balance_witness.go b/pkg/schedule/schedulers/balance_witness.go index e9bab6c1bc7..9994866ac50 100644 --- a/pkg/schedule/schedulers/balance_witness.go +++ b/pkg/schedule/schedulers/balance_witness.go @@ -53,7 +53,7 @@ const ( ) type balanceWitnessSchedulerConfig struct { - mu syncutil.RWMutex + syncutil.RWMutex storage endpoint.ConfigStorage Ranges []core.KeyRange `json:"ranges"` // Batch is used to generate multiple operators by one scheduling @@ -61,8 +61,8 @@ type balanceWitnessSchedulerConfig struct { } func (conf *balanceWitnessSchedulerConfig) Update(data []byte) (int, interface{}) { - conf.mu.Lock() - defer conf.mu.Unlock() + conf.Lock() + defer conf.Unlock() oldc, _ := json.Marshal(conf) @@ -71,7 +71,7 @@ func (conf *balanceWitnessSchedulerConfig) Update(data []byte) (int, interface{} } newc, _ := json.Marshal(conf) if !bytes.Equal(oldc, newc) { - if !conf.validate() { + if !conf.validateLocked() { json.Unmarshal(oldc, conf) return http.StatusBadRequest, "invalid batch size which should be an integer between 1 and 10" } @@ -90,13 +90,13 @@ func (conf *balanceWitnessSchedulerConfig) Update(data []byte) (int, interface{} return http.StatusBadRequest, "Config item is not found." } -func (conf *balanceWitnessSchedulerConfig) validate() bool { +func (conf *balanceWitnessSchedulerConfig) validateLocked() bool { return conf.Batch >= 1 && conf.Batch <= 10 } func (conf *balanceWitnessSchedulerConfig) Clone() *balanceWitnessSchedulerConfig { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() ranges := make([]core.KeyRange, len(conf.Ranges)) copy(ranges, conf.Ranges) return &balanceWitnessSchedulerConfig{ @@ -113,6 +113,20 @@ func (conf *balanceWitnessSchedulerConfig) persistLocked() error { return conf.storage.SaveSchedulerConfig(BalanceWitnessName, data) } +func (conf *balanceWitnessSchedulerConfig) getBatch() int { + conf.RLock() + defer conf.RUnlock() + return conf.Batch +} + +func (conf *balanceWitnessSchedulerConfig) getRanges() []core.KeyRange { + conf.RLock() + defer conf.RUnlock() + ranges := make([]core.KeyRange, len(conf.Ranges)) + copy(ranges, conf.Ranges) + return ranges +} + type balanceWitnessHandler struct { rd *render.Render config *balanceWitnessSchedulerConfig @@ -205,14 +219,14 @@ func (b *balanceWitnessScheduler) GetType() string { } func (b *balanceWitnessScheduler) EncodeConfig() ([]byte, error) { - b.conf.mu.RLock() - defer b.conf.mu.RUnlock() + b.conf.RLock() + defer b.conf.RUnlock() return EncodeConfig(b.conf) } func (b *balanceWitnessScheduler) ReloadConfig() error { - b.conf.mu.Lock() - defer b.conf.mu.Unlock() + b.conf.Lock() + defer b.conf.Unlock() cfgData, err := b.conf.storage.LoadSchedulerConfig(b.GetName()) if err != nil { return err @@ -238,14 +252,12 @@ func (b *balanceWitnessScheduler) IsScheduleAllowed(cluster sche.SchedulerCluste } func (b *balanceWitnessScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { - b.conf.mu.RLock() - defer b.conf.mu.RUnlock() basePlan := plan.NewBalanceSchedulerPlan() var collector *plan.Collector if dryRun { collector = plan.NewCollector(basePlan) } - batch := b.conf.Batch + batch := b.conf.getBatch() schedulerCounter.WithLabelValues(b.GetName(), "schedule").Inc() opInfluence := b.OpController.GetOpInfluence(cluster.GetBasicCluster()) @@ -305,7 +317,7 @@ func createTransferWitnessOperator(cs *candidateStores, b *balanceWitnessSchedul // It randomly selects a health region from the source store, then picks // the best follower peer and transfers the witness. func (b *balanceWitnessScheduler) transferWitnessOut(solver *solver, collector *plan.Collector) *operator.Operator { - solver.Region = filter.SelectOneRegion(solver.RandWitnessRegions(solver.SourceStoreID(), b.conf.Ranges), + solver.Region = filter.SelectOneRegion(solver.RandWitnessRegions(solver.SourceStoreID(), b.conf.getRanges()), collector, filter.NewRegionPendingFilter(), filter.NewRegionDownFilter()) if solver.Region == nil { log.Debug("store has no witness", zap.String("scheduler", b.GetName()), zap.Uint64("store-id", solver.SourceStoreID())) diff --git a/pkg/schedule/schedulers/balance_witness_test.go b/pkg/schedule/schedulers/balance_witness_test.go index abd4a3b3bba..59bf04c2303 100644 --- a/pkg/schedule/schedulers/balance_witness_test.go +++ b/pkg/schedule/schedulers/balance_witness_test.go @@ -43,8 +43,8 @@ func (suite *balanceWitnessSchedulerTestSuite) SetupTest() { suite.cancel, suite.conf, suite.tc, suite.oc = prepareSchedulersTest() suite.tc.RuleManager.SetRules([]*placement.Rule{ { - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 4, }, diff --git a/pkg/schedule/schedulers/base_scheduler.go b/pkg/schedule/schedulers/base_scheduler.go index 6e712c18fe3..f4c8c577767 100644 --- a/pkg/schedule/schedulers/base_scheduler.go +++ b/pkg/schedule/schedulers/base_scheduler.go @@ -92,8 +92,8 @@ func (s *BaseScheduler) GetNextInterval(interval time.Duration) time.Duration { return intervalGrow(interval, MaxScheduleInterval, exponentialGrowth) } -// Prepare does some prepare work -func (s *BaseScheduler) Prepare(cluster sche.SchedulerCluster) error { return nil } +// PrepareConfig does some prepare work about config. +func (s *BaseScheduler) PrepareConfig(cluster sche.SchedulerCluster) error { return nil } -// Cleanup does some cleanup work -func (s *BaseScheduler) Cleanup(cluster sche.SchedulerCluster) {} +// CleanConfig does some cleanup work about config. +func (s *BaseScheduler) CleanConfig(cluster sche.SchedulerCluster) {} diff --git a/pkg/schedule/schedulers/evict_leader.go b/pkg/schedule/schedulers/evict_leader.go index a5c67856df8..879aa9869b3 100644 --- a/pkg/schedule/schedulers/evict_leader.go +++ b/pkg/schedule/schedulers/evict_leader.go @@ -56,7 +56,7 @@ var ( ) type evictLeaderSchedulerConfig struct { - mu syncutil.RWMutex + syncutil.RWMutex storage endpoint.ConfigStorage StoreIDWithRanges map[uint64][]core.KeyRange `json:"store-id-ranges"` cluster *core.BasicCluster @@ -64,8 +64,8 @@ type evictLeaderSchedulerConfig struct { } func (conf *evictLeaderSchedulerConfig) getStores() []uint64 { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() stores := make([]uint64, 0, len(conf.StoreIDWithRanges)) for storeID := range conf.StoreIDWithRanges { stores = append(stores, storeID) @@ -86,15 +86,15 @@ func (conf *evictLeaderSchedulerConfig) BuildWithArgs(args []string) error { if err != nil { return err } - conf.mu.Lock() - defer conf.mu.Unlock() + conf.Lock() + defer conf.Unlock() conf.StoreIDWithRanges[id] = ranges return nil } func (conf *evictLeaderSchedulerConfig) Clone() *evictLeaderSchedulerConfig { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() storeIDWithRanges := make(map[uint64][]core.KeyRange) for id, ranges := range conf.StoreIDWithRanges { storeIDWithRanges[id] = append(storeIDWithRanges[id], ranges...) @@ -106,8 +106,8 @@ func (conf *evictLeaderSchedulerConfig) Clone() *evictLeaderSchedulerConfig { func (conf *evictLeaderSchedulerConfig) Persist() error { name := conf.getSchedulerName() - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() data, err := EncodeConfig(conf) failpoint.Inject("persistFail", func() { err = errors.New("fail to persist") @@ -123,8 +123,8 @@ func (conf *evictLeaderSchedulerConfig) getSchedulerName() string { } func (conf *evictLeaderSchedulerConfig) getRanges(id uint64) []string { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() ranges := conf.StoreIDWithRanges[id] res := make([]string, 0, len(ranges)*2) for index := range ranges { @@ -134,8 +134,8 @@ func (conf *evictLeaderSchedulerConfig) getRanges(id uint64) []string { } func (conf *evictLeaderSchedulerConfig) removeStore(id uint64) (succ bool, last bool) { - conf.mu.Lock() - defer conf.mu.Unlock() + conf.Lock() + defer conf.Unlock() _, exists := conf.StoreIDWithRanges[id] succ, last = false, false if exists { @@ -148,15 +148,15 @@ func (conf *evictLeaderSchedulerConfig) removeStore(id uint64) (succ bool, last } func (conf *evictLeaderSchedulerConfig) resetStore(id uint64, keyRange []core.KeyRange) { - conf.mu.Lock() - defer conf.mu.Unlock() + conf.Lock() + defer conf.Unlock() conf.cluster.PauseLeaderTransfer(id) conf.StoreIDWithRanges[id] = keyRange } func (conf *evictLeaderSchedulerConfig) getKeyRangesByID(id uint64) []core.KeyRange { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() if ranges, exist := conf.StoreIDWithRanges[id]; exist { return ranges } @@ -199,14 +199,14 @@ func (s *evictLeaderScheduler) GetType() string { } func (s *evictLeaderScheduler) EncodeConfig() ([]byte, error) { - s.conf.mu.RLock() - defer s.conf.mu.RUnlock() + s.conf.RLock() + defer s.conf.RUnlock() return EncodeConfig(s.conf) } func (s *evictLeaderScheduler) ReloadConfig() error { - s.conf.mu.Lock() - defer s.conf.mu.Unlock() + s.conf.Lock() + defer s.conf.Unlock() cfgData, err := s.conf.storage.LoadSchedulerConfig(s.GetName()) if err != nil { return err @@ -223,25 +223,9 @@ func (s *evictLeaderScheduler) ReloadConfig() error { return nil } -// pauseAndResumeLeaderTransfer checks the old and new store IDs, and pause or resume the leader transfer. -func pauseAndResumeLeaderTransfer(cluster *core.BasicCluster, old, new map[uint64][]core.KeyRange) { - for id := range old { - if _, ok := new[id]; ok { - continue - } - cluster.ResumeLeaderTransfer(id) - } - for id := range new { - if _, ok := old[id]; ok { - continue - } - cluster.PauseLeaderTransfer(id) - } -} - -func (s *evictLeaderScheduler) Prepare(cluster sche.SchedulerCluster) error { - s.conf.mu.RLock() - defer s.conf.mu.RUnlock() +func (s *evictLeaderScheduler) PrepareConfig(cluster sche.SchedulerCluster) error { + s.conf.RLock() + defer s.conf.RUnlock() var res error for id := range s.conf.StoreIDWithRanges { if err := cluster.PauseLeaderTransfer(id); err != nil { @@ -251,9 +235,9 @@ func (s *evictLeaderScheduler) Prepare(cluster sche.SchedulerCluster) error { return res } -func (s *evictLeaderScheduler) Cleanup(cluster sche.SchedulerCluster) { - s.conf.mu.RLock() - defer s.conf.mu.RUnlock() +func (s *evictLeaderScheduler) CleanConfig(cluster sche.SchedulerCluster) { + s.conf.RLock() + defer s.conf.RUnlock() for id := range s.conf.StoreIDWithRanges { cluster.ResumeLeaderTransfer(id) } @@ -382,15 +366,15 @@ func (handler *evictLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R idFloat, ok := input["store_id"].(float64) if ok { id = (uint64)(idFloat) - handler.config.mu.RLock() + handler.config.RLock() if _, exists = handler.config.StoreIDWithRanges[id]; !exists { if err := handler.config.cluster.PauseLeaderTransfer(id); err != nil { - handler.config.mu.RUnlock() + handler.config.RUnlock() handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } } - handler.config.mu.RUnlock() + handler.config.RUnlock() args = append(args, strconv.FormatUint(id, 10)) } diff --git a/pkg/schedule/schedulers/evict_slow_store.go b/pkg/schedule/schedulers/evict_slow_store.go index cc1b16300c5..713920828cc 100644 --- a/pkg/schedule/schedulers/evict_slow_store.go +++ b/pkg/schedule/schedulers/evict_slow_store.go @@ -16,7 +16,6 @@ package schedulers import ( "net/http" - "sync/atomic" "time" "github.com/gorilla/mux" @@ -29,6 +28,7 @@ import ( "github.com/tikv/pd/pkg/schedule/plan" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/apiutil" + "github.com/tikv/pd/pkg/utils/syncutil" "github.com/unrolled/render" "go.uber.org/zap" ) @@ -47,6 +47,8 @@ const ( var evictSlowStoreCounter = schedulerCounter.WithLabelValues(EvictSlowStoreName, "schedule") type evictSlowStoreSchedulerConfig struct { + syncutil.RWMutex + cluster *core.BasicCluster storage endpoint.ConfigStorage // Last timestamp of the chosen slow store for eviction. lastSlowStoreCaptureTS time.Time @@ -65,13 +67,15 @@ func initEvictSlowStoreSchedulerConfig(storage endpoint.ConfigStorage) *evictSlo } func (conf *evictSlowStoreSchedulerConfig) Clone() *evictSlowStoreSchedulerConfig { + conf.RLock() + defer conf.RUnlock() return &evictSlowStoreSchedulerConfig{ - RecoveryDurationGap: atomic.LoadUint64(&conf.RecoveryDurationGap), + RecoveryDurationGap: conf.RecoveryDurationGap, } } -func (conf *evictSlowStoreSchedulerConfig) Persist() error { - name := conf.getSchedulerName() +func (conf *evictSlowStoreSchedulerConfig) persistLocked() error { + name := EvictSlowStoreName data, err := EncodeConfig(conf) failpoint.Inject("persistFail", func() { err = errors.New("fail to persist") @@ -82,11 +86,9 @@ func (conf *evictSlowStoreSchedulerConfig) Persist() error { return conf.storage.SaveSchedulerConfig(name, data) } -func (conf *evictSlowStoreSchedulerConfig) getSchedulerName() string { - return EvictSlowStoreName -} - func (conf *evictSlowStoreSchedulerConfig) getStores() []uint64 { + conf.RLock() + defer conf.RUnlock() return conf.EvictedStores } @@ -98,15 +100,17 @@ func (conf *evictSlowStoreSchedulerConfig) getKeyRangesByID(id uint64) []core.Ke } func (conf *evictSlowStoreSchedulerConfig) evictStore() uint64 { - if len(conf.EvictedStores) == 0 { + if len(conf.getStores()) == 0 { return 0 } - return conf.EvictedStores[0] + return conf.getStores()[0] } // readyForRecovery checks whether the last cpatured candidate is ready for recovery. func (conf *evictSlowStoreSchedulerConfig) readyForRecovery() bool { - recoveryDurationGap := atomic.LoadUint64(&conf.RecoveryDurationGap) + conf.RLock() + defer conf.RUnlock() + recoveryDurationGap := conf.RecoveryDurationGap failpoint.Inject("transientRecoveryGap", func() { recoveryDurationGap = 0 }) @@ -114,17 +118,21 @@ func (conf *evictSlowStoreSchedulerConfig) readyForRecovery() bool { } func (conf *evictSlowStoreSchedulerConfig) setStoreAndPersist(id uint64) error { + conf.Lock() + defer conf.Unlock() conf.EvictedStores = []uint64{id} conf.lastSlowStoreCaptureTS = time.Now() - return conf.Persist() + return conf.persistLocked() } func (conf *evictSlowStoreSchedulerConfig) clearAndPersist() (oldID uint64, err error) { oldID = conf.evictStore() + conf.Lock() + defer conf.Unlock() if oldID > 0 { conf.EvictedStores = []uint64{} conf.lastSlowStoreCaptureTS = time.Time{} - err = conf.Persist() + err = conf.persistLocked() } return } @@ -155,9 +163,16 @@ func (handler *evictSlowStoreHandler) UpdateConfig(w http.ResponseWriter, r *htt handler.rd.JSON(w, http.StatusInternalServerError, errors.New("invalid argument for 'recovery-duration'").Error()) return } - recoveryDurationGap := (uint64)(recoveryDurationGapFloat) - prevRecoveryDurationGap := atomic.LoadUint64(&handler.config.RecoveryDurationGap) - atomic.StoreUint64(&handler.config.RecoveryDurationGap, recoveryDurationGap) + handler.config.Lock() + defer handler.config.Unlock() + prevRecoveryDurationGap := handler.config.RecoveryDurationGap + recoveryDurationGap := uint64(recoveryDurationGapFloat) + handler.config.RecoveryDurationGap = recoveryDurationGap + if err := handler.config.persistLocked(); err != nil { + handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) + handler.config.RecoveryDurationGap = prevRecoveryDurationGap + return + } log.Info("evict-slow-store-scheduler update 'recovery-duration' - unit: s", zap.Uint64("prev", prevRecoveryDurationGap), zap.Uint64("cur", recoveryDurationGap)) handler.rd.JSON(w, http.StatusOK, nil) } @@ -189,7 +204,35 @@ func (s *evictSlowStoreScheduler) EncodeConfig() ([]byte, error) { return EncodeConfig(s.conf) } -func (s *evictSlowStoreScheduler) Prepare(cluster sche.SchedulerCluster) error { +func (s *evictSlowStoreScheduler) ReloadConfig() error { + s.conf.Lock() + defer s.conf.Unlock() + cfgData, err := s.conf.storage.LoadSchedulerConfig(s.GetName()) + if err != nil { + return err + } + if len(cfgData) == 0 { + return nil + } + newCfg := &evictSlowStoreSchedulerConfig{} + if err = DecodeConfig([]byte(cfgData), newCfg); err != nil { + return err + } + old := make(map[uint64]struct{}) + for _, id := range s.conf.EvictedStores { + old[id] = struct{}{} + } + new := make(map[uint64]struct{}) + for _, id := range newCfg.EvictedStores { + new[id] = struct{}{} + } + pauseAndResumeLeaderTransfer(s.conf.cluster, old, new) + s.conf.RecoveryDurationGap = newCfg.RecoveryDurationGap + s.conf.EvictedStores = newCfg.EvictedStores + return nil +} + +func (s *evictSlowStoreScheduler) PrepareConfig(cluster sche.SchedulerCluster) error { evictStore := s.conf.evictStore() if evictStore != 0 { return cluster.SlowStoreEvicted(evictStore) @@ -197,7 +240,7 @@ func (s *evictSlowStoreScheduler) Prepare(cluster sche.SchedulerCluster) error { return nil } -func (s *evictSlowStoreScheduler) Cleanup(cluster sche.SchedulerCluster) { +func (s *evictSlowStoreScheduler) CleanConfig(cluster sche.SchedulerCluster) { s.cleanupEvictLeader(cluster) } diff --git a/pkg/schedule/schedulers/evict_slow_store_test.go b/pkg/schedule/schedulers/evict_slow_store_test.go index 813d17ae541..11cd69e60f7 100644 --- a/pkg/schedule/schedulers/evict_slow_store_test.go +++ b/pkg/schedule/schedulers/evict_slow_store_test.go @@ -123,13 +123,13 @@ func (suite *evictSlowStoreTestSuite) TestEvictSlowStorePrepare() { suite.True(ok) suite.Zero(es2.conf.evictStore()) // prepare with no evict store. - suite.es.Prepare(suite.tc) + suite.es.PrepareConfig(suite.tc) es2.conf.setStoreAndPersist(1) suite.Equal(uint64(1), es2.conf.evictStore()) suite.False(es2.conf.readyForRecovery()) // prepare with evict store. - suite.es.Prepare(suite.tc) + suite.es.PrepareConfig(suite.tc) } func (suite *evictSlowStoreTestSuite) TestEvictSlowStorePersistFail() { diff --git a/pkg/schedule/schedulers/evict_slow_trend.go b/pkg/schedule/schedulers/evict_slow_trend.go index 3983e9c345d..53e096baec7 100644 --- a/pkg/schedule/schedulers/evict_slow_trend.go +++ b/pkg/schedule/schedulers/evict_slow_trend.go @@ -17,7 +17,6 @@ package schedulers import ( "net/http" "strconv" - "sync/atomic" "time" "github.com/gorilla/mux" @@ -30,6 +29,7 @@ import ( "github.com/tikv/pd/pkg/schedule/plan" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/apiutil" + "github.com/tikv/pd/pkg/utils/syncutil" "github.com/unrolled/render" "go.uber.org/zap" ) @@ -54,6 +54,8 @@ type slowCandidate struct { } type evictSlowTrendSchedulerConfig struct { + syncutil.RWMutex + cluster *core.BasicCluster storage endpoint.ConfigStorage // Candidate for eviction in current tick. evictCandidate slowCandidate @@ -76,13 +78,15 @@ func initEvictSlowTrendSchedulerConfig(storage endpoint.ConfigStorage) *evictSlo } func (conf *evictSlowTrendSchedulerConfig) Clone() *evictSlowTrendSchedulerConfig { + conf.RLock() + defer conf.RUnlock() return &evictSlowTrendSchedulerConfig{ - RecoveryDurationGap: atomic.LoadUint64(&conf.RecoveryDurationGap), + RecoveryDurationGap: conf.RecoveryDurationGap, } } -func (conf *evictSlowTrendSchedulerConfig) Persist() error { - name := conf.getSchedulerName() +func (conf *evictSlowTrendSchedulerConfig) persistLocked() error { + name := EvictSlowTrendName data, err := EncodeConfig(conf) failpoint.Inject("persistFail", func() { err = errors.New("fail to persist") @@ -93,11 +97,9 @@ func (conf *evictSlowTrendSchedulerConfig) Persist() error { return conf.storage.SaveSchedulerConfig(name, data) } -func (conf *evictSlowTrendSchedulerConfig) getSchedulerName() string { - return EvictSlowTrendName -} - func (conf *evictSlowTrendSchedulerConfig) getStores() []uint64 { + conf.RLock() + defer conf.RUnlock() return conf.EvictedStores } @@ -108,10 +110,18 @@ func (conf *evictSlowTrendSchedulerConfig) getKeyRangesByID(id uint64) []core.Ke return []core.KeyRange{core.NewKeyRange("", "")} } +func (conf *evictSlowTrendSchedulerConfig) hasEvictedStores() bool { + conf.RLock() + defer conf.RUnlock() + return len(conf.EvictedStores) > 0 +} + func (conf *evictSlowTrendSchedulerConfig) evictedStore() uint64 { - if len(conf.EvictedStores) == 0 { + if !conf.hasEvictedStores() { return 0 } + conf.RLock() + defer conf.RUnlock() // If a candidate passes all checks and proved to be slow, it will be // recorded in `conf.EvictStores`, and `conf.lastEvictCandidate` will record // the captured timestamp of this store. @@ -119,18 +129,26 @@ func (conf *evictSlowTrendSchedulerConfig) evictedStore() uint64 { } func (conf *evictSlowTrendSchedulerConfig) candidate() uint64 { + conf.RLock() + defer conf.RUnlock() return conf.evictCandidate.storeID } func (conf *evictSlowTrendSchedulerConfig) captureTS() time.Time { + conf.RLock() + defer conf.RUnlock() return conf.evictCandidate.captureTS } func (conf *evictSlowTrendSchedulerConfig) candidateCapturedSecs() uint64 { + conf.RLock() + defer conf.RUnlock() return DurationSinceAsSecs(conf.evictCandidate.captureTS) } func (conf *evictSlowTrendSchedulerConfig) lastCapturedCandidate() *slowCandidate { + conf.RLock() + defer conf.RUnlock() return &conf.lastEvictCandidate } @@ -140,7 +158,9 @@ func (conf *evictSlowTrendSchedulerConfig) lastCandidateCapturedSecs() uint64 { // readyForRecovery checks whether the last cpatured candidate is ready for recovery. func (conf *evictSlowTrendSchedulerConfig) readyForRecovery() bool { - recoveryDurationGap := atomic.LoadUint64(&conf.RecoveryDurationGap) + conf.RLock() + defer conf.RUnlock() + recoveryDurationGap := conf.RecoveryDurationGap failpoint.Inject("transientRecoveryGap", func() { recoveryDurationGap = 0 }) @@ -148,6 +168,8 @@ func (conf *evictSlowTrendSchedulerConfig) readyForRecovery() bool { } func (conf *evictSlowTrendSchedulerConfig) captureCandidate(id uint64) { + conf.Lock() + defer conf.Unlock() conf.evictCandidate = slowCandidate{ storeID: id, captureTS: time.Now(), @@ -159,6 +181,8 @@ func (conf *evictSlowTrendSchedulerConfig) captureCandidate(id uint64) { } func (conf *evictSlowTrendSchedulerConfig) popCandidate(updLast bool) uint64 { + conf.Lock() + defer conf.Unlock() id := conf.evictCandidate.storeID if updLast { conf.lastEvictCandidate = conf.evictCandidate @@ -168,14 +192,18 @@ func (conf *evictSlowTrendSchedulerConfig) popCandidate(updLast bool) uint64 { } func (conf *evictSlowTrendSchedulerConfig) markCandidateRecovered() { + conf.Lock() + defer conf.Unlock() if conf.lastEvictCandidate != (slowCandidate{}) { conf.lastEvictCandidate.recoverTS = time.Now() } } func (conf *evictSlowTrendSchedulerConfig) setStoreAndPersist(id uint64) error { + conf.Lock() + defer conf.Unlock() conf.EvictedStores = []uint64{id} - return conf.Persist() + return conf.persistLocked() } func (conf *evictSlowTrendSchedulerConfig) clearAndPersist(cluster sche.SchedulerCluster) (oldID uint64, err error) { @@ -189,8 +217,10 @@ func (conf *evictSlowTrendSchedulerConfig) clearAndPersist(cluster sche.Schedule address = store.GetAddress() } storeSlowTrendEvictedStatusGauge.WithLabelValues(address, strconv.FormatUint(oldID, 10)).Set(0) + conf.Lock() + defer conf.Unlock() conf.EvictedStores = []uint64{} - return oldID, conf.Persist() + return oldID, conf.persistLocked() } type evictSlowTrendHandler struct { @@ -219,9 +249,16 @@ func (handler *evictSlowTrendHandler) UpdateConfig(w http.ResponseWriter, r *htt handler.rd.JSON(w, http.StatusInternalServerError, errors.New("invalid argument for 'recovery-duration'").Error()) return } - recoveryDurationGap := (uint64)(recoveryDurationGapFloat) - prevRecoveryDurationGap := atomic.LoadUint64(&handler.config.RecoveryDurationGap) - atomic.StoreUint64(&handler.config.RecoveryDurationGap, recoveryDurationGap) + handler.config.Lock() + defer handler.config.Unlock() + prevRecoveryDurationGap := handler.config.RecoveryDurationGap + recoveryDurationGap := uint64(recoveryDurationGapFloat) + handler.config.RecoveryDurationGap = recoveryDurationGap + if err := handler.config.persistLocked(); err != nil { + handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) + handler.config.RecoveryDurationGap = prevRecoveryDurationGap + return + } log.Info("evict-slow-trend-scheduler update 'recovery-duration' - unit: s", zap.Uint64("prev", prevRecoveryDurationGap), zap.Uint64("cur", recoveryDurationGap)) handler.rd.JSON(w, http.StatusOK, nil) } @@ -237,6 +274,19 @@ type evictSlowTrendScheduler struct { handler http.Handler } +func (s *evictSlowTrendScheduler) GetNextInterval(interval time.Duration) time.Duration { + var growthType intervalGrowthType + // If it already found a slow node as candidate, the next interval should be shorter + // to make the next scheduling as soon as possible. This adjustment will decrease the + // response time, as heartbeats from other nodes will be received and updated more quickly. + if s.conf.hasEvictedStores() { + growthType = zeroGrowth + } else { + growthType = exponentialGrowth + } + return intervalGrow(s.GetMinInterval(), MaxScheduleInterval, growthType) +} + func (s *evictSlowTrendScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { s.handler.ServeHTTP(w, r) } @@ -253,7 +303,35 @@ func (s *evictSlowTrendScheduler) EncodeConfig() ([]byte, error) { return EncodeConfig(s.conf) } -func (s *evictSlowTrendScheduler) Prepare(cluster sche.SchedulerCluster) error { +func (s *evictSlowTrendScheduler) ReloadConfig() error { + s.conf.Lock() + defer s.conf.Unlock() + cfgData, err := s.conf.storage.LoadSchedulerConfig(s.GetName()) + if err != nil { + return err + } + if len(cfgData) == 0 { + return nil + } + newCfg := &evictSlowTrendSchedulerConfig{} + if err = DecodeConfig([]byte(cfgData), newCfg); err != nil { + return err + } + old := make(map[uint64]struct{}) + for _, id := range s.conf.EvictedStores { + old[id] = struct{}{} + } + new := make(map[uint64]struct{}) + for _, id := range newCfg.EvictedStores { + new[id] = struct{}{} + } + pauseAndResumeLeaderTransfer(s.conf.cluster, old, new) + s.conf.RecoveryDurationGap = newCfg.RecoveryDurationGap + s.conf.EvictedStores = newCfg.EvictedStores + return nil +} + +func (s *evictSlowTrendScheduler) PrepareConfig(cluster sche.SchedulerCluster) error { evictedStoreID := s.conf.evictedStore() if evictedStoreID == 0 { return nil @@ -261,7 +339,7 @@ func (s *evictSlowTrendScheduler) Prepare(cluster sche.SchedulerCluster) error { return cluster.SlowTrendEvicted(evictedStoreID) } -func (s *evictSlowTrendScheduler) Cleanup(cluster sche.SchedulerCluster) { +func (s *evictSlowTrendScheduler) CleanConfig(cluster sche.SchedulerCluster) { s.cleanupEvictLeader(cluster) } diff --git a/pkg/schedule/schedulers/evict_slow_trend_test.go b/pkg/schedule/schedulers/evict_slow_trend_test.go index c6ad058455f..65a70962a20 100644 --- a/pkg/schedule/schedulers/evict_slow_trend_test.go +++ b/pkg/schedule/schedulers/evict_slow_trend_test.go @@ -1,3 +1,4 @@ +// Copyright 2023 TiKV Project Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -255,10 +256,10 @@ func (suite *evictSlowTrendTestSuite) TestEvictSlowTrendPrepare() { suite.True(ok) suite.Zero(es2.conf.evictedStore()) // prepare with no evict store. - suite.es.Prepare(suite.tc) + suite.es.PrepareConfig(suite.tc) es2.conf.setStoreAndPersist(1) suite.Equal(uint64(1), es2.conf.evictedStore()) // prepare with evict store. - suite.es.Prepare(suite.tc) + suite.es.PrepareConfig(suite.tc) } diff --git a/pkg/schedule/schedulers/grant_hot_region.go b/pkg/schedule/schedulers/grant_hot_region.go index 5a68da069b8..81399b58c58 100644 --- a/pkg/schedule/schedulers/grant_hot_region.go +++ b/pkg/schedule/schedulers/grant_hot_region.go @@ -54,7 +54,7 @@ var ( ) type grantHotRegionSchedulerConfig struct { - mu syncutil.RWMutex + syncutil.RWMutex storage endpoint.ConfigStorage cluster *core.BasicCluster StoreIDs []uint64 `json:"store-id"` @@ -62,8 +62,8 @@ type grantHotRegionSchedulerConfig struct { } func (conf *grantHotRegionSchedulerConfig) setStore(leaderID uint64, peers []uint64) bool { - conf.mu.Lock() - defer conf.mu.Unlock() + conf.Lock() + defer conf.Unlock() ret := slice.AnyOf(peers, func(i int) bool { return leaderID == peers[i] }) @@ -75,20 +75,20 @@ func (conf *grantHotRegionSchedulerConfig) setStore(leaderID uint64, peers []uin } func (conf *grantHotRegionSchedulerConfig) GetStoreLeaderID() uint64 { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() return conf.StoreLeaderID } func (conf *grantHotRegionSchedulerConfig) SetStoreLeaderID(id uint64) { - conf.mu.Lock() - defer conf.mu.Unlock() + conf.Lock() + defer conf.Unlock() conf.StoreLeaderID = id } func (conf *grantHotRegionSchedulerConfig) Clone() *grantHotRegionSchedulerConfig { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() newStoreIDs := make([]uint64, len(conf.StoreIDs)) copy(newStoreIDs, conf.StoreIDs) return &grantHotRegionSchedulerConfig{ @@ -99,8 +99,8 @@ func (conf *grantHotRegionSchedulerConfig) Clone() *grantHotRegionSchedulerConfi func (conf *grantHotRegionSchedulerConfig) Persist() error { name := conf.getSchedulerName() - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() data, err := EncodeConfig(conf) if err != nil { return err @@ -113,13 +113,21 @@ func (conf *grantHotRegionSchedulerConfig) getSchedulerName() string { } func (conf *grantHotRegionSchedulerConfig) has(storeID uint64) bool { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() return slice.AnyOf(conf.StoreIDs, func(i int) bool { return storeID == conf.StoreIDs[i] }) } +func (conf *grantHotRegionSchedulerConfig) getStoreIDs() []uint64 { + conf.RLock() + defer conf.RUnlock() + storeIDs := make([]uint64, len(conf.StoreIDs)) + copy(storeIDs, conf.StoreIDs) + return storeIDs +} + // grantLeaderScheduler transfers all hot peers to peers and transfer leader to the fixed store type grantHotRegionScheduler struct { *baseHotScheduler @@ -151,6 +159,25 @@ func (s *grantHotRegionScheduler) EncodeConfig() ([]byte, error) { return EncodeConfig(s.conf) } +func (s *grantHotRegionScheduler) ReloadConfig() error { + s.conf.Lock() + defer s.conf.Unlock() + cfgData, err := s.conf.storage.LoadSchedulerConfig(s.GetName()) + if err != nil { + return err + } + if len(cfgData) == 0 { + return nil + } + newCfg := &grantHotRegionSchedulerConfig{} + if err := DecodeConfig([]byte(cfgData), newCfg); err != nil { + return err + } + s.conf.StoreIDs = newCfg.StoreIDs + s.conf.StoreLeaderID = newCfg.StoreLeaderID + return nil +} + // IsScheduleAllowed returns whether the scheduler is allowed to schedule. // TODO it should check if there is any scheduler such as evict or hot region scheduler func (s *grantHotRegionScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { @@ -294,7 +321,8 @@ func (s *grantHotRegionScheduler) transfer(cluster sche.SchedulerCluster, region filter.NewPlacementSafeguard(s.GetName(), cluster.GetSchedulerConfig(), cluster.GetBasicCluster(), cluster.GetRuleManager(), srcRegion, srcStore, nil), } - destStoreIDs := make([]uint64, 0, len(s.conf.StoreIDs)) + storeIDs := s.conf.getStoreIDs() + destStoreIDs := make([]uint64, 0, len(storeIDs)) var candidate []uint64 if isLeader { filters = append(filters, &filter.StoreStateFilter{ActionScope: s.GetName(), TransferLeader: true, OperatorLevel: constant.High}) @@ -302,7 +330,7 @@ func (s *grantHotRegionScheduler) transfer(cluster sche.SchedulerCluster, region } else { filters = append(filters, &filter.StoreStateFilter{ActionScope: s.GetName(), MoveRegion: true, OperatorLevel: constant.High}, filter.NewExcludedFilter(s.GetName(), srcRegion.GetStoreIDs(), srcRegion.GetStoreIDs())) - candidate = s.conf.StoreIDs + candidate = storeIDs } for _, storeID := range candidate { store := cluster.GetStore(storeID) diff --git a/pkg/schedule/schedulers/grant_leader.go b/pkg/schedule/schedulers/grant_leader.go index f244228a10f..885f81e2442 100644 --- a/pkg/schedule/schedulers/grant_leader.go +++ b/pkg/schedule/schedulers/grant_leader.go @@ -49,7 +49,7 @@ var ( ) type grantLeaderSchedulerConfig struct { - mu syncutil.RWMutex + syncutil.RWMutex storage endpoint.ConfigStorage StoreIDWithRanges map[uint64][]core.KeyRange `json:"store-id-ranges"` cluster *core.BasicCluster @@ -69,15 +69,15 @@ func (conf *grantLeaderSchedulerConfig) BuildWithArgs(args []string) error { if err != nil { return err } - conf.mu.Lock() - defer conf.mu.Unlock() + conf.Lock() + defer conf.Unlock() conf.StoreIDWithRanges[id] = ranges return nil } func (conf *grantLeaderSchedulerConfig) Clone() *grantLeaderSchedulerConfig { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() newStoreIDWithRanges := make(map[uint64][]core.KeyRange) for k, v := range conf.StoreIDWithRanges { newStoreIDWithRanges[k] = v @@ -89,8 +89,8 @@ func (conf *grantLeaderSchedulerConfig) Clone() *grantLeaderSchedulerConfig { func (conf *grantLeaderSchedulerConfig) Persist() error { name := conf.getSchedulerName() - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() data, err := EncodeConfig(conf) if err != nil { return err @@ -103,8 +103,8 @@ func (conf *grantLeaderSchedulerConfig) getSchedulerName() string { } func (conf *grantLeaderSchedulerConfig) getRanges(id uint64) []string { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() ranges := conf.StoreIDWithRanges[id] res := make([]string, 0, len(ranges)*2) for index := range ranges { @@ -114,8 +114,8 @@ func (conf *grantLeaderSchedulerConfig) getRanges(id uint64) []string { } func (conf *grantLeaderSchedulerConfig) removeStore(id uint64) (succ bool, last bool) { - conf.mu.Lock() - defer conf.mu.Unlock() + conf.Lock() + defer conf.Unlock() _, exists := conf.StoreIDWithRanges[id] succ, last = false, false if exists { @@ -128,21 +128,31 @@ func (conf *grantLeaderSchedulerConfig) removeStore(id uint64) (succ bool, last } func (conf *grantLeaderSchedulerConfig) resetStore(id uint64, keyRange []core.KeyRange) { - conf.mu.Lock() - defer conf.mu.Unlock() + conf.Lock() + defer conf.Unlock() conf.cluster.PauseLeaderTransfer(id) conf.StoreIDWithRanges[id] = keyRange } func (conf *grantLeaderSchedulerConfig) getKeyRangesByID(id uint64) []core.KeyRange { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() if ranges, exist := conf.StoreIDWithRanges[id]; exist { return ranges } return nil } +func (conf *grantLeaderSchedulerConfig) getStoreIDWithRanges() map[uint64][]core.KeyRange { + conf.RLock() + defer conf.RUnlock() + storeIDWithRanges := make(map[uint64][]core.KeyRange) + for id, ranges := range conf.StoreIDWithRanges { + storeIDWithRanges[id] = ranges + } + return storeIDWithRanges +} + // grantLeaderScheduler transfers all leaders to peers in the store. type grantLeaderScheduler struct { *BaseScheduler @@ -179,8 +189,8 @@ func (s *grantLeaderScheduler) EncodeConfig() ([]byte, error) { } func (s *grantLeaderScheduler) ReloadConfig() error { - s.conf.mu.Lock() - defer s.conf.mu.Unlock() + s.conf.Lock() + defer s.conf.Unlock() cfgData, err := s.conf.storage.LoadSchedulerConfig(s.GetName()) if err != nil { return err @@ -197,9 +207,9 @@ func (s *grantLeaderScheduler) ReloadConfig() error { return nil } -func (s *grantLeaderScheduler) Prepare(cluster sche.SchedulerCluster) error { - s.conf.mu.RLock() - defer s.conf.mu.RUnlock() +func (s *grantLeaderScheduler) PrepareConfig(cluster sche.SchedulerCluster) error { + s.conf.RLock() + defer s.conf.RUnlock() var res error for id := range s.conf.StoreIDWithRanges { if err := cluster.PauseLeaderTransfer(id); err != nil { @@ -209,9 +219,9 @@ func (s *grantLeaderScheduler) Prepare(cluster sche.SchedulerCluster) error { return res } -func (s *grantLeaderScheduler) Cleanup(cluster sche.SchedulerCluster) { - s.conf.mu.RLock() - defer s.conf.mu.RUnlock() +func (s *grantLeaderScheduler) CleanConfig(cluster sche.SchedulerCluster) { + s.conf.RLock() + defer s.conf.RUnlock() for id := range s.conf.StoreIDWithRanges { cluster.ResumeLeaderTransfer(id) } @@ -227,12 +237,11 @@ func (s *grantLeaderScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) func (s *grantLeaderScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { grantLeaderCounter.Inc() - s.conf.mu.RLock() - defer s.conf.mu.RUnlock() - ops := make([]*operator.Operator, 0, len(s.conf.StoreIDWithRanges)) + storeIDWithRanges := s.conf.getStoreIDWithRanges() + ops := make([]*operator.Operator, 0, len(storeIDWithRanges)) pendingFilter := filter.NewRegionPendingFilter() downFilter := filter.NewRegionDownFilter() - for id, ranges := range s.conf.StoreIDWithRanges { + for id, ranges := range storeIDWithRanges { region := filter.SelectOneRegion(cluster.RandFollowerRegions(id, ranges), nil, pendingFilter, downFilter) if region == nil { grantLeaderNoFollowerCounter.Inc() @@ -268,15 +277,15 @@ func (handler *grantLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R idFloat, ok := input["store_id"].(float64) if ok { id = (uint64)(idFloat) - handler.config.mu.RLock() + handler.config.RLock() if _, exists = handler.config.StoreIDWithRanges[id]; !exists { if err := handler.config.cluster.PauseLeaderTransfer(id); err != nil { - handler.config.mu.RUnlock() + handler.config.RUnlock() handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } } - handler.config.mu.RUnlock() + handler.config.RUnlock() args = append(args, strconv.FormatUint(id, 10)) } diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index c353621bb7f..fdd07e85145 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -257,6 +257,44 @@ func (h *hotScheduler) EncodeConfig() ([]byte, error) { return h.conf.EncodeConfig() } +func (h *hotScheduler) ReloadConfig() error { + h.conf.Lock() + defer h.conf.Unlock() + cfgData, err := h.conf.storage.LoadSchedulerConfig(h.GetName()) + if err != nil { + return err + } + if len(cfgData) == 0 { + return nil + } + newCfg := &hotRegionSchedulerConfig{} + if err := DecodeConfig([]byte(cfgData), newCfg); err != nil { + return err + } + h.conf.MinHotByteRate = newCfg.MinHotByteRate + h.conf.MinHotKeyRate = newCfg.MinHotKeyRate + h.conf.MinHotQueryRate = newCfg.MinHotQueryRate + h.conf.MaxZombieRounds = newCfg.MaxZombieRounds + h.conf.MaxPeerNum = newCfg.MaxPeerNum + h.conf.ByteRateRankStepRatio = newCfg.ByteRateRankStepRatio + h.conf.KeyRateRankStepRatio = newCfg.KeyRateRankStepRatio + h.conf.QueryRateRankStepRatio = newCfg.QueryRateRankStepRatio + h.conf.CountRankStepRatio = newCfg.CountRankStepRatio + h.conf.GreatDecRatio = newCfg.GreatDecRatio + h.conf.MinorDecRatio = newCfg.MinorDecRatio + h.conf.SrcToleranceRatio = newCfg.SrcToleranceRatio + h.conf.DstToleranceRatio = newCfg.DstToleranceRatio + h.conf.WriteLeaderPriorities = newCfg.WriteLeaderPriorities + h.conf.WritePeerPriorities = newCfg.WritePeerPriorities + h.conf.ReadPriorities = newCfg.ReadPriorities + h.conf.StrictPickingStore = newCfg.StrictPickingStore + h.conf.EnableForTiFlash = newCfg.EnableForTiFlash + h.conf.RankFormulaVersion = newCfg.RankFormulaVersion + h.conf.ForbidRWType = newCfg.ForbidRWType + h.conf.SplitThresholds = newCfg.SplitThresholds + return nil +} + func (h *hotScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { h.conf.ServeHTTP(w, r) } @@ -758,6 +796,9 @@ func (bs *balanceSolver) tryAddPendingInfluence() bool { dstStoreID := uint64(0) if isSplit { region := bs.GetRegion(bs.ops[0].RegionID()) + if region == nil { + return false + } for id := range region.GetStoreIDs() { srcStoreIDs = append(srcStoreIDs, id) } diff --git a/pkg/schedule/schedulers/hot_region_config.go b/pkg/schedule/schedulers/hot_region_config.go index 2ff78748f02..3f9f8b8c669 100644 --- a/pkg/schedule/schedulers/hot_region_config.go +++ b/pkg/schedule/schedulers/hot_region_config.go @@ -366,7 +366,7 @@ func isPriorityValid(priorities []string) (map[string]bool, error) { return priorityMap, nil } -func (conf *hotRegionSchedulerConfig) valid() error { +func (conf *hotRegionSchedulerConfig) validateLocked() error { if _, err := isPriorityValid(conf.ReadPriorities); err != nil { return err } @@ -409,7 +409,7 @@ func (conf *hotRegionSchedulerConfig) handleSetConfig(w http.ResponseWriter, r * rd.JSON(w, http.StatusInternalServerError, err.Error()) return } - if err := conf.valid(); err != nil { + if err := conf.validateLocked(); err != nil { // revert to old version if err2 := json.Unmarshal(oldc, conf); err2 != nil { rd.JSON(w, http.StatusInternalServerError, err2.Error()) diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index d8f9bbc532c..6e7208e4251 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -582,8 +582,8 @@ func TestHotWriteRegionScheduleByteRateOnlyWithTiFlash(t *testing.T) { tc.SetHotRegionCacheHitsThreshold(0) re.NoError(tc.RuleManager.SetRules([]*placement.Rule{ { - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3, LocationLabels: []string{"zone", "host"}, @@ -1143,7 +1143,7 @@ func TestHotWriteRegionScheduleWithRuleEnabled(t *testing.T) { tc.AddRegionStore(3, 20) err = tc.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "leader", Index: 1, Override: true, @@ -1161,7 +1161,7 @@ func TestHotWriteRegionScheduleWithRuleEnabled(t *testing.T) { }) re.NoError(err) err = tc.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "voter", Index: 2, Override: false, @@ -2499,32 +2499,32 @@ func TestConfigValidation(t *testing.T) { re := require.New(t) hc := initHotRegionScheduleConfig() - err := hc.valid() + err := hc.validateLocked() re.NoError(err) // priorities is illegal hc.ReadPriorities = []string{"byte", "error"} - err = hc.valid() + err = hc.validateLocked() re.Error(err) // priorities should have at least 2 dimensions hc = initHotRegionScheduleConfig() hc.WriteLeaderPriorities = []string{"byte"} - err = hc.valid() + err = hc.validateLocked() re.Error(err) // query is not allowed to be set in priorities for write-peer-priorities hc = initHotRegionScheduleConfig() hc.WritePeerPriorities = []string{"query", "byte"} - err = hc.valid() + err = hc.validateLocked() re.Error(err) // priorities shouldn't be repeated hc.WritePeerPriorities = []string{"byte", "byte"} - err = hc.valid() + err = hc.validateLocked() re.Error(err) // no error hc.WritePeerPriorities = []string{"byte", "key"} - err = hc.valid() + err = hc.validateLocked() re.NoError(err) // rank-formula-version @@ -2533,17 +2533,17 @@ func TestConfigValidation(t *testing.T) { re.Equal("v2", hc.GetRankFormulaVersion()) // v1 hc.RankFormulaVersion = "v1" - err = hc.valid() + err = hc.validateLocked() re.NoError(err) re.Equal("v1", hc.GetRankFormulaVersion()) // v2 hc.RankFormulaVersion = "v2" - err = hc.valid() + err = hc.validateLocked() re.NoError(err) re.Equal("v2", hc.GetRankFormulaVersion()) // illegal hc.RankFormulaVersion = "v0" - err = hc.valid() + err = hc.validateLocked() re.Error(err) // forbid-rw-type @@ -2553,27 +2553,27 @@ func TestConfigValidation(t *testing.T) { re.False(hc.IsForbidRWType(utils.Write)) // read hc.ForbidRWType = "read" - err = hc.valid() + err = hc.validateLocked() re.NoError(err) re.True(hc.IsForbidRWType(utils.Read)) re.False(hc.IsForbidRWType(utils.Write)) // write hc.ForbidRWType = "write" - err = hc.valid() + err = hc.validateLocked() re.NoError(err) re.False(hc.IsForbidRWType(utils.Read)) re.True(hc.IsForbidRWType(utils.Write)) // illegal hc.ForbidRWType = "test" - err = hc.valid() + err = hc.validateLocked() re.Error(err) hc.SplitThresholds = 0 - err = hc.valid() + err = hc.validateLocked() re.Error(err) hc.SplitThresholds = 1.1 - err = hc.valid() + err = hc.validateLocked() re.Error(err) } diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index d45602b90e1..f60be1e5b06 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -163,6 +163,7 @@ func schedulersRegister() { if err := decoder(conf); err != nil { return nil, err } + conf.cluster = opController.GetCluster() return newEvictSlowStoreScheduler(opController, conf), nil }) @@ -378,6 +379,7 @@ func schedulersRegister() { if err := decoder(conf); err != nil { return nil, err } + conf.storage = storage return newShuffleHotRegionScheduler(opController, conf), nil }) diff --git a/pkg/schedule/schedulers/label.go b/pkg/schedule/schedulers/label.go index 62a1100d16b..90310bcf10e 100644 --- a/pkg/schedule/schedulers/label.go +++ b/pkg/schedule/schedulers/label.go @@ -46,6 +46,7 @@ var ( type labelSchedulerConfig struct { Name string `json:"name"` Ranges []core.KeyRange `json:"ranges"` + // TODO: When we prepare to use Ranges, we will need to implement the ReloadConfig function for this scheduler. } type labelScheduler struct { diff --git a/pkg/schedule/schedulers/random_merge.go b/pkg/schedule/schedulers/random_merge.go index a621b595198..44bb5081ef9 100644 --- a/pkg/schedule/schedulers/random_merge.go +++ b/pkg/schedule/schedulers/random_merge.go @@ -48,6 +48,7 @@ var ( type randomMergeSchedulerConfig struct { Name string `json:"name"` Ranges []core.KeyRange `json:"ranges"` + // TODO: When we prepare to use Ranges, we will need to implement the ReloadConfig function for this scheduler. } type randomMergeScheduler struct { diff --git a/pkg/schedule/schedulers/scatter_range.go b/pkg/schedule/schedulers/scatter_range.go index e301b4c6e76..977d8cff05c 100644 --- a/pkg/schedule/schedulers/scatter_range.go +++ b/pkg/schedule/schedulers/scatter_range.go @@ -49,7 +49,7 @@ var ( ) type scatterRangeSchedulerConfig struct { - mu syncutil.RWMutex + syncutil.RWMutex storage endpoint.ConfigStorage RangeName string `json:"range-name"` StartKey string `json:"start-key"` @@ -60,8 +60,8 @@ func (conf *scatterRangeSchedulerConfig) BuildWithArgs(args []string) error { if len(args) != 3 { return errs.ErrSchedulerConfig.FastGenByArgs("ranges and name") } - conf.mu.Lock() - defer conf.mu.Unlock() + conf.Lock() + defer conf.Unlock() conf.RangeName = args[0] conf.StartKey = args[1] @@ -70,8 +70,8 @@ func (conf *scatterRangeSchedulerConfig) BuildWithArgs(args []string) error { } func (conf *scatterRangeSchedulerConfig) Clone() *scatterRangeSchedulerConfig { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() return &scatterRangeSchedulerConfig{ StartKey: conf.StartKey, EndKey: conf.EndKey, @@ -81,8 +81,8 @@ func (conf *scatterRangeSchedulerConfig) Clone() *scatterRangeSchedulerConfig { func (conf *scatterRangeSchedulerConfig) Persist() error { name := conf.getSchedulerName() - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() data, err := EncodeConfig(conf) if err != nil { return err @@ -91,26 +91,26 @@ func (conf *scatterRangeSchedulerConfig) Persist() error { } func (conf *scatterRangeSchedulerConfig) GetRangeName() string { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() return conf.RangeName } func (conf *scatterRangeSchedulerConfig) GetStartKey() []byte { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() return []byte(conf.StartKey) } func (conf *scatterRangeSchedulerConfig) GetEndKey() []byte { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() return []byte(conf.EndKey) } func (conf *scatterRangeSchedulerConfig) getSchedulerName() string { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() return fmt.Sprintf("scatter-range-%s", conf.RangeName) } @@ -161,14 +161,14 @@ func (l *scatterRangeScheduler) GetType() string { } func (l *scatterRangeScheduler) EncodeConfig() ([]byte, error) { - l.config.mu.RLock() - defer l.config.mu.RUnlock() + l.config.RLock() + defer l.config.RUnlock() return EncodeConfig(l.config) } func (l *scatterRangeScheduler) ReloadConfig() error { - l.config.mu.Lock() - defer l.config.mu.Unlock() + l.config.Lock() + defer l.config.Unlock() cfgData, err := l.config.storage.LoadSchedulerConfig(l.GetName()) if err != nil { return err @@ -176,7 +176,14 @@ func (l *scatterRangeScheduler) ReloadConfig() error { if len(cfgData) == 0 { return nil } - return DecodeConfig([]byte(cfgData), l.config) + newCfg := &scatterRangeSchedulerConfig{} + if err := DecodeConfig([]byte(cfgData), newCfg); err != nil { + return err + } + l.config.RangeName = newCfg.RangeName + l.config.StartKey = newCfg.StartKey + l.config.EndKey = newCfg.EndKey + return nil } func (l *scatterRangeScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { @@ -207,7 +214,7 @@ func (l *scatterRangeScheduler) Schedule(cluster sche.SchedulerCluster, dryRun b if l.allowBalanceLeader(cluster) { ops, _ := l.balanceLeader.Schedule(c, false) if len(ops) > 0 { - ops[0].SetDesc(fmt.Sprintf("scatter-range-leader-%s", l.config.RangeName)) + ops[0].SetDesc(fmt.Sprintf("scatter-range-leader-%s", l.config.GetRangeName())) ops[0].AttachKind(operator.OpRange) ops[0].Counters = append(ops[0].Counters, scatterRangeNewOperatorCounter, @@ -219,7 +226,7 @@ func (l *scatterRangeScheduler) Schedule(cluster sche.SchedulerCluster, dryRun b if l.allowBalanceRegion(cluster) { ops, _ := l.balanceRegion.Schedule(c, false) if len(ops) > 0 { - ops[0].SetDesc(fmt.Sprintf("scatter-range-region-%s", l.config.RangeName)) + ops[0].SetDesc(fmt.Sprintf("scatter-range-region-%s", l.config.GetRangeName())) ops[0].AttachKind(operator.OpRange) ops[0].Counters = append(ops[0].Counters, scatterRangeNewOperatorCounter, diff --git a/pkg/schedule/schedulers/scheduler.go b/pkg/schedule/schedulers/scheduler.go index ba02c280d40..1c788989454 100644 --- a/pkg/schedule/schedulers/scheduler.go +++ b/pkg/schedule/schedulers/scheduler.go @@ -42,8 +42,8 @@ type Scheduler interface { ReloadConfig() error GetMinInterval() time.Duration GetNextInterval(interval time.Duration) time.Duration - Prepare(cluster sche.SchedulerCluster) error - Cleanup(cluster sche.SchedulerCluster) + PrepareConfig(cluster sche.SchedulerCluster) error + CleanConfig(cluster sche.SchedulerCluster) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) IsScheduleAllowed(cluster sche.SchedulerCluster) bool } @@ -66,6 +66,24 @@ func DecodeConfig(data []byte, v interface{}) error { return nil } +// ToPayload returns the payload of config. +func ToPayload(sches, configs []string) map[string]interface{} { + payload := make(map[string]interface{}) + for i, sche := range sches { + var config interface{} + err := DecodeConfig([]byte(configs[i]), &config) + if err != nil { + log.Error("failed to decode scheduler config", + zap.String("config", configs[i]), + zap.String("scheduler", sche), + errs.ZapError(err)) + continue + } + payload[sche] = config + } + return payload +} + // ConfigDecoder used to decode the config. type ConfigDecoder func(v interface{}) error diff --git a/pkg/schedule/schedulers/scheduler_controller.go b/pkg/schedule/schedulers/scheduler_controller.go index 0f2264392aa..818f02685ea 100644 --- a/pkg/schedule/schedulers/scheduler_controller.go +++ b/pkg/schedule/schedulers/scheduler_controller.go @@ -16,6 +16,7 @@ package schedulers import ( "context" + "fmt" "net/http" "sync" "sync/atomic" @@ -38,8 +39,6 @@ const maxScheduleRetries = 10 var ( denySchedulersByLabelerCounter = labeler.LabelerEventCounter.WithLabelValues("schedulers", "deny") - rulesCntStatusGauge = ruleStatusGauge.WithLabelValues("rule_count") - groupsCntStatusGauge = ruleStatusGauge.WithLabelValues("group_count") ) // Controller is used to manage all schedulers. @@ -128,8 +127,8 @@ func (c *Controller) CollectSchedulerMetrics() { } ruleCnt := ruleMgr.GetRulesCount() groupCnt := ruleMgr.GetGroupsCount() - rulesCntStatusGauge.Set(float64(ruleCnt)) - groupsCntStatusGauge.Set(float64(groupCnt)) + ruleStatusGauge.WithLabelValues("rule_count").Set(float64(ruleCnt)) + ruleStatusGauge.WithLabelValues("group_count").Set(float64(groupCnt)) } func (c *Controller) isSchedulingHalted() bool { @@ -137,12 +136,9 @@ func (c *Controller) isSchedulingHalted() bool { } // ResetSchedulerMetrics resets metrics of all schedulers. -func (c *Controller) ResetSchedulerMetrics() { +func ResetSchedulerMetrics() { schedulerStatusGauge.Reset() ruleStatusGauge.Reset() - // create in map again - rulesCntStatusGauge = ruleStatusGauge.WithLabelValues("rule_count") - groupsCntStatusGauge = ruleStatusGauge.WithLabelValues("group_count") } // AddSchedulerHandler adds the HTTP handler for a scheduler. @@ -161,7 +157,8 @@ func (c *Controller) AddSchedulerHandler(scheduler Scheduler, args ...string) er return err } c.cluster.GetSchedulerConfig().AddSchedulerCfg(scheduler.GetType(), args) - return nil + err := scheduler.PrepareConfig(c.cluster) + return err } // RemoveSchedulerHandler removes the HTTP handler for a scheduler. @@ -188,6 +185,7 @@ func (c *Controller) RemoveSchedulerHandler(name string) error { return err } + s.(Scheduler).CleanConfig(c.cluster) delete(c.schedulerHandlers, name) return nil @@ -203,7 +201,7 @@ func (c *Controller) AddScheduler(scheduler Scheduler, args ...string) error { } s := NewScheduleController(c.ctx, c.cluster, c.opController, scheduler) - if err := s.Scheduler.Prepare(c.cluster); err != nil { + if err := s.Scheduler.PrepareConfig(c.cluster); err != nil { return err } @@ -283,7 +281,7 @@ func (c *Controller) PauseOrResumeScheduler(name string, t int64) error { // ReloadSchedulerConfig reloads a scheduler's config if it exists. func (c *Controller) ReloadSchedulerConfig(name string) error { if exist, _ := c.IsSchedulerExisted(name); !exist { - return nil + return fmt.Errorf("scheduler %s is not existed", name) } return c.GetScheduler(name).ReloadConfig() } @@ -348,7 +346,7 @@ func (c *Controller) IsSchedulerExisted(name string) (bool, error) { func (c *Controller) runScheduler(s *ScheduleController) { defer logutil.LogPanic() defer c.wg.Done() - defer s.Scheduler.Cleanup(c.cluster) + defer s.Scheduler.CleanConfig(c.cluster) ticker := time.NewTicker(s.GetInterval()) defer ticker.Stop() @@ -418,6 +416,11 @@ func (c *Controller) CheckTransferWitnessLeader(region *core.RegionInfo) { } } +// GetAllSchedulerConfigs returns all scheduler configs. +func (c *Controller) GetAllSchedulerConfigs() ([]string, []string, error) { + return c.storage.LoadAllSchedulerConfigs() +} + // ScheduleController is used to manage a scheduler. type ScheduleController struct { Scheduler diff --git a/pkg/schedule/schedulers/scheduler_test.go b/pkg/schedule/schedulers/scheduler_test.go index 12ab9f8aa2f..57f1fcf1e3f 100644 --- a/pkg/schedule/schedulers/scheduler_test.go +++ b/pkg/schedule/schedulers/scheduler_test.go @@ -261,13 +261,13 @@ func TestShuffleRegionRole(t *testing.T) { // update rule to 1leader+1follower+1learner tc.SetEnablePlacementRules(true) tc.RuleManager.SetRule(&placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 2, }) tc.RuleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "learner", Role: placement.Learner, Count: 1, @@ -428,8 +428,8 @@ func TestBalanceLeaderWithConflictRule(t *testing.T) { { name: "default Rule", rule: &placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Index: 1, StartKey: []byte(""), EndKey: []byte(""), @@ -442,8 +442,8 @@ func TestBalanceLeaderWithConflictRule(t *testing.T) { { name: "single store allowed to be placed leader", rule: &placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Index: 1, StartKey: []byte(""), EndKey: []byte(""), @@ -463,8 +463,8 @@ func TestBalanceLeaderWithConflictRule(t *testing.T) { { name: "2 store allowed to be placed leader", rule: &placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Index: 1, StartKey: []byte(""), EndKey: []byte(""), diff --git a/pkg/schedule/schedulers/shuffle_hot_region.go b/pkg/schedule/schedulers/shuffle_hot_region.go index d5264b90428..cd5c40d4e07 100644 --- a/pkg/schedule/schedulers/shuffle_hot_region.go +++ b/pkg/schedule/schedulers/shuffle_hot_region.go @@ -15,6 +15,9 @@ package schedulers import ( + "net/http" + + "github.com/gorilla/mux" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/core/constant" @@ -24,6 +27,10 @@ import ( "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" "github.com/tikv/pd/pkg/statistics" + "github.com/tikv/pd/pkg/storage/endpoint" + "github.com/tikv/pd/pkg/utils/apiutil" + "github.com/tikv/pd/pkg/utils/syncutil" + "github.com/unrolled/render" "go.uber.org/zap" ) @@ -42,8 +49,38 @@ var ( ) type shuffleHotRegionSchedulerConfig struct { - Name string `json:"name"` - Limit uint64 `json:"limit"` + syncutil.RWMutex + storage endpoint.ConfigStorage + Name string `json:"name"` + Limit uint64 `json:"limit"` +} + +func (conf *shuffleHotRegionSchedulerConfig) getSchedulerName() string { + return conf.Name +} + +func (conf *shuffleHotRegionSchedulerConfig) Clone() *shuffleHotRegionSchedulerConfig { + conf.RLock() + defer conf.RUnlock() + return &shuffleHotRegionSchedulerConfig{ + Name: conf.Name, + Limit: conf.Limit, + } +} + +func (conf *shuffleHotRegionSchedulerConfig) persistLocked() error { + name := conf.getSchedulerName() + data, err := EncodeConfig(conf) + if err != nil { + return err + } + return conf.storage.SaveSchedulerConfig(name, data) +} + +func (conf *shuffleHotRegionSchedulerConfig) getLimit() uint64 { + conf.RLock() + defer conf.RUnlock() + return conf.Limit } // ShuffleHotRegionScheduler mainly used to test. @@ -52,19 +89,26 @@ type shuffleHotRegionSchedulerConfig struct { // the hot peer. type shuffleHotRegionScheduler struct { *baseHotScheduler - conf *shuffleHotRegionSchedulerConfig + conf *shuffleHotRegionSchedulerConfig + handler http.Handler } // newShuffleHotRegionScheduler creates an admin scheduler that random balance hot regions func newShuffleHotRegionScheduler(opController *operator.Controller, conf *shuffleHotRegionSchedulerConfig) Scheduler { base := newBaseHotScheduler(opController) + handler := newShuffleHotRegionHandler(conf) ret := &shuffleHotRegionScheduler{ baseHotScheduler: base, conf: conf, + handler: handler, } return ret } +func (s *shuffleHotRegionScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + s.handler.ServeHTTP(w, r) +} + func (s *shuffleHotRegionScheduler) GetName() string { return s.conf.Name } @@ -77,8 +121,26 @@ func (s *shuffleHotRegionScheduler) EncodeConfig() ([]byte, error) { return EncodeConfig(s.conf) } +func (s *shuffleHotRegionScheduler) ReloadConfig() error { + s.conf.Lock() + defer s.conf.Unlock() + cfgData, err := s.conf.storage.LoadSchedulerConfig(s.GetName()) + if err != nil { + return err + } + if len(cfgData) == 0 { + return nil + } + newCfg := &shuffleHotRegionSchedulerConfig{} + if err = DecodeConfig([]byte(cfgData), newCfg); err != nil { + return err + } + s.conf.Limit = newCfg.Limit + return nil +} + func (s *shuffleHotRegionScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { - hotRegionAllowed := s.OpController.OperatorCount(operator.OpHotRegion) < s.conf.Limit + hotRegionAllowed := s.OpController.OperatorCount(operator.OpHotRegion) < s.conf.getLimit() conf := cluster.GetSchedulerConfig() regionAllowed := s.OpController.OperatorCount(operator.OpRegion) < conf.GetRegionScheduleLimit() leaderAllowed := s.OpController.OperatorCount(operator.OpLeader) < conf.GetLeaderScheduleLimit() @@ -158,3 +220,47 @@ func (s *shuffleHotRegionScheduler) randomSchedule(cluster sche.SchedulerCluster shuffleHotRegionSkipCounter.Inc() return nil } + +type shuffleHotRegionHandler struct { + rd *render.Render + config *shuffleHotRegionSchedulerConfig +} + +func (handler *shuffleHotRegionHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { + var input map[string]interface{} + if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { + return + } + limit, ok := input["limit"].(float64) + if !ok { + handler.rd.JSON(w, http.StatusBadRequest, "invalid limit") + return + } + handler.config.Lock() + defer handler.config.Unlock() + previous := handler.config.Limit + handler.config.Limit = uint64(limit) + err := handler.config.persistLocked() + if err != nil { + handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) + handler.config.Limit = previous + return + } + handler.rd.JSON(w, http.StatusOK, nil) +} + +func (handler *shuffleHotRegionHandler) ListConfig(w http.ResponseWriter, r *http.Request) { + conf := handler.config.Clone() + handler.rd.JSON(w, http.StatusOK, conf) +} + +func newShuffleHotRegionHandler(config *shuffleHotRegionSchedulerConfig) http.Handler { + h := &shuffleHotRegionHandler{ + config: config, + rd: render.New(render.Options{IndentJSON: true}), + } + router := mux.NewRouter() + router.HandleFunc("/config", h.UpdateConfig).Methods(http.MethodPost) + router.HandleFunc("/list", h.ListConfig).Methods(http.MethodGet) + return router +} diff --git a/pkg/schedule/schedulers/shuffle_leader.go b/pkg/schedule/schedulers/shuffle_leader.go index 0e33fa802db..a6ff4baf65b 100644 --- a/pkg/schedule/schedulers/shuffle_leader.go +++ b/pkg/schedule/schedulers/shuffle_leader.go @@ -43,6 +43,7 @@ var ( type shuffleLeaderSchedulerConfig struct { Name string `json:"name"` Ranges []core.KeyRange `json:"ranges"` + // TODO: When we prepare to use Ranges, we will need to implement the ReloadConfig function for this scheduler. } type shuffleLeaderScheduler struct { diff --git a/pkg/schedule/schedulers/shuffle_region_config.go b/pkg/schedule/schedulers/shuffle_region_config.go index f503a6f67c7..7d04879c992 100644 --- a/pkg/schedule/schedulers/shuffle_region_config.go +++ b/pkg/schedule/schedulers/shuffle_region_config.go @@ -69,6 +69,7 @@ func (conf *shuffleRegionSchedulerConfig) IsRoleAllow(role string) bool { func (conf *shuffleRegionSchedulerConfig) ServeHTTP(w http.ResponseWriter, r *http.Request) { router := mux.NewRouter() + router.HandleFunc("/list", conf.handleGetRoles).Methods(http.MethodGet) router.HandleFunc("/roles", conf.handleGetRoles).Methods(http.MethodGet) router.HandleFunc("/roles", conf.handleSetRoles).Methods(http.MethodPost) router.ServeHTTP(w, r) diff --git a/pkg/schedule/schedulers/split_bucket.go b/pkg/schedule/schedulers/split_bucket.go index 5e75bded9b4..5e31f58129c 100644 --- a/pkg/schedule/schedulers/split_bucket.go +++ b/pkg/schedule/schedulers/split_bucket.go @@ -53,7 +53,7 @@ var ( splitBucketOperatorExistCounter = schedulerCounter.WithLabelValues(SplitBucketName, "operator-exist") splitBucketKeyRangeNotMatchCounter = schedulerCounter.WithLabelValues(SplitBucketName, "key-range-not-match") splitBucketNoSplitKeysCounter = schedulerCounter.WithLabelValues(SplitBucketName, "no-split-keys") - splitBucketCreateOpeartorFailCounter = schedulerCounter.WithLabelValues(SplitBucketName, "create-operator-fail") + splitBucketCreateOperatorFailCounter = schedulerCounter.WithLabelValues(SplitBucketName, "create-operator-fail") splitBucketNewOperatorCounter = schedulerCounter.WithLabelValues(SplitBucketName, "new-operator") ) @@ -65,15 +65,15 @@ func initSplitBucketConfig() *splitBucketSchedulerConfig { } type splitBucketSchedulerConfig struct { - mu syncutil.RWMutex + syncutil.RWMutex storage endpoint.ConfigStorage Degree int `json:"degree"` SplitLimit uint64 `json:"split-limit"` } func (conf *splitBucketSchedulerConfig) Clone() *splitBucketSchedulerConfig { - conf.mu.RLock() - defer conf.mu.RUnlock() + conf.RLock() + defer conf.RUnlock() return &splitBucketSchedulerConfig{ Degree: conf.Degree, } @@ -87,6 +87,18 @@ func (conf *splitBucketSchedulerConfig) persistLocked() error { return conf.storage.SaveSchedulerConfig(SplitBucketName, data) } +func (conf *splitBucketSchedulerConfig) getDegree() int { + conf.RLock() + defer conf.RUnlock() + return conf.Degree +} + +func (conf *splitBucketSchedulerConfig) getSplitLimit() uint64 { + conf.RLock() + defer conf.RUnlock() + return conf.SplitLimit +} + type splitBucketScheduler struct { *BaseScheduler conf *splitBucketSchedulerConfig @@ -104,8 +116,8 @@ func (h *splitBucketHandler) ListConfig(w http.ResponseWriter, _ *http.Request) } func (h *splitBucketHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { - h.conf.mu.Lock() - defer h.conf.mu.Unlock() + h.conf.Lock() + defer h.conf.Unlock() rd := render.New(render.Options{IndentJSON: true}) oldc, _ := json.Marshal(h.conf) data, err := io.ReadAll(r.Body) @@ -173,8 +185,8 @@ func (s *splitBucketScheduler) GetType() string { } func (s *splitBucketScheduler) ReloadConfig() error { - s.conf.mu.Lock() - defer s.conf.mu.Unlock() + s.conf.Lock() + defer s.conf.Unlock() cfgData, err := s.conf.storage.LoadSchedulerConfig(s.GetName()) if err != nil { return err @@ -182,7 +194,13 @@ func (s *splitBucketScheduler) ReloadConfig() error { if len(cfgData) == 0 { return nil } - return DecodeConfig([]byte(cfgData), s.conf) + newCfg := &splitBucketSchedulerConfig{} + if err := DecodeConfig([]byte(cfgData), newCfg); err != nil { + return err + } + s.conf.SplitLimit = newCfg.SplitLimit + s.conf.Degree = newCfg.Degree + return nil } // ServerHTTP implement Http server. @@ -196,7 +214,7 @@ func (s *splitBucketScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) splitBucketDisableCounter.Inc() return false } - allowed := s.BaseScheduler.OpController.OperatorCount(operator.OpSplit) < s.conf.SplitLimit + allowed := s.BaseScheduler.OpController.OperatorCount(operator.OpSplit) < s.conf.getSplitLimit() if !allowed { splitBuckerSplitLimitCounter.Inc() operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpSplit.String()).Inc() @@ -218,7 +236,7 @@ func (s *splitBucketScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bo plan := &splitBucketPlan{ conf: conf, cluster: cluster, - hotBuckets: cluster.BucketsStats(conf.Degree), + hotBuckets: cluster.BucketsStats(conf.getDegree()), hotRegionSplitSize: cluster.GetSchedulerConfig().GetMaxMovableHotPeerSize(), } return s.splitBucket(plan), nil @@ -262,6 +280,9 @@ func (s *splitBucketScheduler) splitBucket(plan *splitBucketPlan) []*operator.Op } if splitBucket != nil { region := plan.cluster.GetRegion(splitBucket.RegionID) + if region == nil { + return nil + } splitKey := make([][]byte, 0) if bytes.Compare(region.GetStartKey(), splitBucket.StartKey) < 0 { splitKey = append(splitKey, splitBucket.StartKey) @@ -269,10 +290,10 @@ func (s *splitBucketScheduler) splitBucket(plan *splitBucketPlan) []*operator.Op if bytes.Compare(region.GetEndKey(), splitBucket.EndKey) > 0 { splitKey = append(splitKey, splitBucket.EndKey) } - op, err := operator.CreateSplitRegionOperator(SplitBucketType, plan.cluster.GetRegion(splitBucket.RegionID), operator.OpSplit, + op, err := operator.CreateSplitRegionOperator(SplitBucketType, region, operator.OpSplit, pdpb.CheckPolicy_USEKEY, splitKey) if err != nil { - splitBucketCreateOpeartorFailCounter.Inc() + splitBucketCreateOperatorFailCounter.Inc() return nil } splitBucketNewOperatorCounter.Inc() diff --git a/pkg/schedule/schedulers/transfer_witness_leader.go b/pkg/schedule/schedulers/transfer_witness_leader.go index 2586065ea80..c651a8ef872 100644 --- a/pkg/schedule/schedulers/transfer_witness_leader.go +++ b/pkg/schedule/schedulers/transfer_witness_leader.go @@ -34,6 +34,7 @@ const ( // TransferWitnessLeaderBatchSize is the number of operators to to transfer // leaders by one scheduling transferWitnessLeaderBatchSize = 3 + // TODO: When we prepare to use Ranges, we will need to implement the ReloadConfig function for this scheduler. // TransferWitnessLeaderRecvMaxRegionSize is the max number of region can receive // TODO: make it a reasonable value transferWitnessLeaderRecvMaxRegionSize = 10000 diff --git a/pkg/schedule/schedulers/utils.go b/pkg/schedule/schedulers/utils.go index c7cdf9191ca..fea51798d1c 100644 --- a/pkg/schedule/schedulers/utils.go +++ b/pkg/schedule/schedulers/utils.go @@ -390,3 +390,19 @@ func (q *retryQuota) GC(keepStores []*core.StoreInfo) { } } } + +// pauseAndResumeLeaderTransfer checks the old and new store IDs, and pause or resume the leader transfer. +func pauseAndResumeLeaderTransfer[T any](cluster *core.BasicCluster, old, new map[uint64]T) { + for id := range old { + if _, ok := new[id]; ok { + continue + } + cluster.ResumeLeaderTransfer(id) + } + for id := range new { + if _, ok := old[id]; ok { + continue + } + cluster.PauseLeaderTransfer(id) + } +} diff --git a/pkg/statistics/hot_cache.go b/pkg/statistics/hot_cache.go index de7189a1332..799fb240d10 100644 --- a/pkg/statistics/hot_cache.go +++ b/pkg/statistics/hot_cache.go @@ -125,8 +125,8 @@ func (w *HotCache) CollectMetrics() { w.CheckReadAsync(newCollectMetricsTask()) } -// ResetMetrics resets the hot cache metrics. -func (w *HotCache) ResetMetrics() { +// ResetHotCacheStatusMetrics resets the hot cache metrics. +func ResetHotCacheStatusMetrics() { hotCacheStatusGauge.Reset() } @@ -205,3 +205,10 @@ func (w *HotCache) GetThresholds(kind utils.RWType, storeID uint64) []float64 { } return nil } + +// CleanCache cleans the cache. +// This is used for test purpose. +func (w *HotCache) CleanCache() { + w.writeCache.removeAllItem() + w.readCache.removeAllItem() +} diff --git a/pkg/statistics/hot_peer_cache.go b/pkg/statistics/hot_peer_cache.go index 1ac07289a3c..0e35e0e23be 100644 --- a/pkg/statistics/hot_peer_cache.go +++ b/pkg/statistics/hot_peer_cache.go @@ -544,6 +544,18 @@ func (f *hotPeerCache) removeItem(item *HotPeerStat) { } } +// removeAllItem removes all items of the cache. +// It is used for test. +func (f *hotPeerCache) removeAllItem() { + for _, peers := range f.peersOfStore { + for _, peer := range peers.GetAll() { + item := peer.(*HotPeerStat) + item.actionType = utils.Remove + f.updateStat(item) + } + } +} + func (f *hotPeerCache) coldItem(newItem, oldItem *HotPeerStat) { newItem.HotDegree = oldItem.HotDegree - 1 newItem.AntiCount = oldItem.AntiCount - 1 diff --git a/pkg/statistics/region_collection.go b/pkg/statistics/region_collection.go index c79eb0a3132..21af8e152fd 100644 --- a/pkg/statistics/region_collection.go +++ b/pkg/statistics/region_collection.go @@ -119,7 +119,11 @@ func (r *RegionStatistics) GetRegionStatsByType(typ RegionStatisticType) []*core defer r.RUnlock() res := make([]*core.RegionInfo, 0, len(r.stats[typ])) for regionID := range r.stats[typ] { - res = append(res, r.rip.GetRegion(regionID).Clone()) + region := r.rip.GetRegion(regionID) + if region == nil { + continue + } + res = append(res, region.Clone()) } return res } @@ -268,8 +272,8 @@ func (r *RegionStatistics) Collect() { regionWitnessLeaderRegionCounter.Set(float64(len(r.stats[WitnessLeader]))) } -// Reset resets the metrics of the regions' status. -func (r *RegionStatistics) Reset() { +// ResetRegionStatsMetrics resets the metrics of the regions' status. +func ResetRegionStatsMetrics() { regionMissPeerRegionCounter.Set(0) regionExtraPeerRegionCounter.Set(0) regionDownPeerRegionCounter.Set(0) @@ -322,8 +326,8 @@ func (l *LabelStatistics) Collect() { } } -// Reset resets the metrics of the label status. -func (l *LabelStatistics) Reset() { +// ResetLabelStatsMetrics resets the metrics of the label status. +func ResetLabelStatsMetrics() { regionLabelLevelGauge.Reset() } diff --git a/pkg/statistics/store_collection.go b/pkg/statistics/store_collection.go index dcdd77d9112..aacd45338d1 100644 --- a/pkg/statistics/store_collection.go +++ b/pkg/statistics/store_collection.go @@ -322,4 +322,7 @@ func Reset() { storeStatusGauge.Reset() clusterStatusGauge.Reset() placementStatusGauge.Reset() + ResetRegionStatsMetrics() + ResetLabelStatsMetrics() + ResetHotCacheStatusMetrics() } diff --git a/pkg/syncer/client.go b/pkg/syncer/client.go index f61ce320a74..00dd8c5107d 100644 --- a/pkg/syncer/client.go +++ b/pkg/syncer/client.go @@ -19,6 +19,8 @@ import ( "time" "github.com/docker/go-units" + "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" @@ -77,6 +79,11 @@ func (s *RegionSyncer) syncRegion(ctx context.Context, conn *grpc.ClientConn) (C var regionGuide = core.GenerateRegionGuideFunc(false) +// IsRunning returns whether the region syncer client is running. +func (s *RegionSyncer) IsRunning() bool { + return s.streamingRunning.Load() +} + // StartSyncWithLeader starts to sync with leader. func (s *RegionSyncer) StartSyncWithLeader(addr string) { s.wg.Add(1) @@ -89,6 +96,7 @@ func (s *RegionSyncer) StartSyncWithLeader(addr string) { go func() { defer logutil.LogPanic() defer s.wg.Done() + defer s.streamingRunning.Store(false) // used to load region from kv storage to cache storage. bc := s.server.GetBasicCluster() regionStorage := s.server.GetStorage() @@ -132,6 +140,9 @@ func (s *RegionSyncer) StartSyncWithLeader(addr string) { } stream, err := s.syncRegion(ctx, conn) + failpoint.Inject("disableClientStreaming", func() { + err = errors.Errorf("no stream") + }) if err != nil { if ev, ok := status.FromError(err); ok { if ev.Code() == codes.Canceled { @@ -142,11 +153,11 @@ func (s *RegionSyncer) StartSyncWithLeader(addr string) { time.Sleep(time.Second) continue } - log.Info("server starts to synchronize with leader", zap.String("server", s.server.Name()), zap.String("leader", s.server.GetLeader().GetName()), zap.Uint64("request-index", s.history.GetNextIndex())) for { resp, err := stream.Recv() if err != nil { + s.streamingRunning.Store(false) log.Error("region sync with leader meet error", errs.ZapError(errs.ErrGRPCRecv, err)) if err = stream.CloseSend(); err != nil { log.Error("failed to terminate client stream", errs.ZapError(errs.ErrGRPCCloseSend, err)) @@ -212,6 +223,8 @@ func (s *RegionSyncer) StartSyncWithLeader(addr string) { _ = regionStorage.DeleteRegion(old.GetMeta()) } } + // mark the client as running status when it finished the first history region sync. + s.streamingRunning.Store(true) } } }() diff --git a/pkg/syncer/server.go b/pkg/syncer/server.go index 7d339e75dbe..4fb38614de0 100644 --- a/pkg/syncer/server.go +++ b/pkg/syncer/server.go @@ -18,6 +18,7 @@ import ( "context" "io" "sync" + "sync/atomic" "time" "github.com/docker/go-units" @@ -83,6 +84,8 @@ type RegionSyncer struct { history *historyBuffer limit *ratelimit.RateLimiter tlsConfig *grpcutil.TLSConfig + // status when as client + streamingRunning atomic.Bool } // NewRegionSyncer returns a region syncer. @@ -228,7 +231,16 @@ func (s *RegionSyncer) syncHistoryRegion(ctx context.Context, request *pdpb.Sync if s.history.GetNextIndex() == startIndex { log.Info("requested server has already in sync with server", zap.String("requested-server", name), zap.String("server", s.server.Name()), zap.Uint64("last-index", startIndex)) - return nil + // still send a response to follower to show the history region sync. + resp := &pdpb.SyncRegionResponse{ + Header: &pdpb.ResponseHeader{ClusterId: s.server.ClusterID()}, + Regions: nil, + StartIndex: startIndex, + RegionStats: nil, + RegionLeaders: nil, + Buckets: nil, + } + return stream.Send(resp) } // do full synchronization if startIndex == 0 { diff --git a/pkg/tso/admin.go b/pkg/tso/admin.go index 7d510cdef65..f19d8e71d05 100644 --- a/pkg/tso/admin.go +++ b/pkg/tso/admin.go @@ -15,6 +15,7 @@ package tso import ( + "fmt" "net/http" "strconv" @@ -53,6 +54,7 @@ func NewAdminHandler(handler Handler, rd *render.Render) *AdminHandler { // @Failure 400 {string} string "The input is invalid." // @Failure 403 {string} string "Reset ts is forbidden." // @Failure 500 {string} string "TSO server failed to proceed the request." +// @Failure 503 {string} string "It's a temporary failure, please retry." // @Router /admin/reset-ts [post] // if force-use-larger=true: // @@ -96,6 +98,12 @@ func (h *AdminHandler) ResetTS(w http.ResponseWriter, r *http.Request) { if err = handler.ResetTS(ts, ignoreSmaller, skipUpperBoundCheck, 0); err != nil { if err == errs.ErrServerNotStarted { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + } else if err == errs.ErrEtcdTxnConflict { + // If the error is ErrEtcdTxnConflict, it means there is a temporary failure. + // Return 503 to let the client retry. + // Ref: https://datatracker.ietf.org/doc/html/rfc7231#section-6.6.4 + h.rd.JSON(w, http.StatusServiceUnavailable, + fmt.Sprintf("It's a temporary failure with error %s, please retry.", err.Error())) } else { h.rd.JSON(w, http.StatusForbidden, err.Error()) } diff --git a/pkg/tso/allocator_manager.go b/pkg/tso/allocator_manager.go index df0ca0affc9..251a3aaf2e6 100644 --- a/pkg/tso/allocator_manager.go +++ b/pkg/tso/allocator_manager.go @@ -101,13 +101,13 @@ func (info *DCLocationInfo) clone() DCLocationInfo { type ElectionMember interface { // ID returns the unique ID in the election group. For example, it can be unique // server id of a cluster or the unique keyspace group replica id of the election - // group comprised of the replicas of a keyspace group. + // group composed of the replicas of a keyspace group. ID() uint64 - // ID returns the unique name in the election group. + // Name returns the unique name in the election group. Name() string // MemberValue returns the member value. MemberValue() string - // GetMember() returns the current member + // GetMember returns the current member GetMember() interface{} // Client returns the etcd client. Client() *clientv3.Client @@ -124,7 +124,7 @@ type ElectionMember interface { // KeepLeader is used to keep the leader's leadership. KeepLeader(ctx context.Context) // CampaignLeader is used to campaign the leadership and make it become a leader in an election group. - CampaignLeader(leaseTimeout int64) error + CampaignLeader(ctx context.Context, leaseTimeout int64) error // ResetLeader is used to reset the member's current leadership. // Basically it will reset the leader lease and unset leader info. ResetLeader() diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index 613ceb3eafc..a37bcc73881 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -568,7 +568,7 @@ func (gta *GlobalTSOAllocator) campaignLeader() { log.Info("start to campaign the primary", logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0), zap.String("campaign-tso-primary-name", gta.member.Name())) - if err := gta.am.member.CampaignLeader(gta.am.leaderLease); err != nil { + if err := gta.am.member.CampaignLeader(gta.ctx, gta.am.leaderLease); err != nil { if errors.Is(err, errs.ErrEtcdTxnConflict) { log.Info("campaign tso primary meets error due to txn conflict, another tso server may campaign successfully", logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0), diff --git a/pkg/tso/keyspace_group_manager_test.go b/pkg/tso/keyspace_group_manager_test.go index c20abfc5f79..0c1b017d7aa 100644 --- a/pkg/tso/keyspace_group_manager_test.go +++ b/pkg/tso/keyspace_group_manager_test.go @@ -1224,14 +1224,12 @@ func waitForPrimariesServing( re *require.Assertions, mgrs []*KeyspaceGroupManager, ids []uint32, ) { testutil.Eventually(re, func() bool { - for i := 0; i < 100; i++ { - for j, id := range ids { - if member, err := mgrs[j].GetElectionMember(id, id); err != nil || !member.IsLeader() { - return false - } - if _, _, err := mgrs[j].HandleTSORequest(mgrs[j].ctx, id, id, GlobalDCLocation, 1); err != nil { - return false - } + for j, id := range ids { + if member, err := mgrs[j].GetElectionMember(id, id); err != nil || member == nil || !member.IsLeader() { + return false + } + if _, _, err := mgrs[j].HandleTSORequest(mgrs[j].ctx, id, id, GlobalDCLocation, 1); err != nil { + return false } } return true diff --git a/pkg/utils/apiutil/apiutil.go b/pkg/utils/apiutil/apiutil.go index 633dc8fa557..53fab682fcb 100644 --- a/pkg/utils/apiutil/apiutil.go +++ b/pkg/utils/apiutil/apiutil.go @@ -39,15 +39,14 @@ import ( "go.uber.org/zap" ) -var ( - // componentSignatureKey is used for http request header key - // to identify component signature +const ( + // componentSignatureKey is used for http request header key to identify component signature. + // Deprecated: please use `XCallerIDHeader` below to obtain a more granular source identification. + // This is kept for backward compatibility. componentSignatureKey = "component" - // componentAnonymousValue identifies anonymous request source - componentAnonymousValue = "anonymous" -) + // anonymousValue identifies anonymous request source + anonymousValue = "anonymous" -const ( // PDRedirectorHeader is used to mark which PD redirected this request. PDRedirectorHeader = "PD-Redirector" // PDAllowFollowerHandleHeader is used to mark whether this request is allowed to be handled by the follower PD. @@ -58,14 +57,11 @@ const ( XForwardedPortHeader = "X-Forwarded-Port" // XRealIPHeader is used to mark the real client IP. XRealIPHeader = "X-Real-Ip" + // XCallerIDHeader is used to mark the caller ID. + XCallerIDHeader = "X-Caller-ID" // ForwardToMicroServiceHeader is used to mark the request is forwarded to micro service. ForwardToMicroServiceHeader = "Forward-To-Micro-Service" - // ErrRedirectFailed is the error message for redirect failed. - ErrRedirectFailed = "redirect failed" - // ErrRedirectToNotLeader is the error message for redirect to not leader. - ErrRedirectToNotLeader = "redirect to not leader" - chunkSize = 4096 ) @@ -117,7 +113,7 @@ func ErrorResp(rd *render.Render, w http.ResponseWriter, err error) { // GetIPPortFromHTTPRequest returns http client host IP and port from context. // Because `X-Forwarded-For ` header has been written into RFC 7239(Forwarded HTTP Extension), -// so `X-Forwarded-For` has the higher priority than `X-Real-IP`. +// so `X-Forwarded-For` has the higher priority than `X-Real-Ip`. // And both of them have the higher priority than `RemoteAddr` func GetIPPortFromHTTPRequest(r *http.Request) (ip, port string) { forwardedIPs := strings.Split(r.Header.Get(XForwardedForHeader), ",") @@ -141,32 +137,42 @@ func GetIPPortFromHTTPRequest(r *http.Request) (ip, port string) { return splitIP, splitPort } -// GetComponentNameOnHTTP returns component name from Request Header -func GetComponentNameOnHTTP(r *http.Request) string { +// getComponentNameOnHTTP returns component name from the request header. +func getComponentNameOnHTTP(r *http.Request) string { componentName := r.Header.Get(componentSignatureKey) if len(componentName) == 0 { - componentName = componentAnonymousValue + componentName = anonymousValue } return componentName } -// ComponentSignatureRoundTripper is used to add component signature in HTTP header -type ComponentSignatureRoundTripper struct { - proxied http.RoundTripper - component string +// GetCallerIDOnHTTP returns caller ID from the request header. +func GetCallerIDOnHTTP(r *http.Request) string { + callerID := r.Header.Get(XCallerIDHeader) + if len(callerID) == 0 { + // Fall back to get the component name to keep backward compatibility. + callerID = getComponentNameOnHTTP(r) + } + return callerID +} + +// CallerIDRoundTripper is used to add caller ID in the HTTP header. +type CallerIDRoundTripper struct { + proxied http.RoundTripper + callerID string } -// NewComponentSignatureRoundTripper returns a new ComponentSignatureRoundTripper. -func NewComponentSignatureRoundTripper(roundTripper http.RoundTripper, componentName string) *ComponentSignatureRoundTripper { - return &ComponentSignatureRoundTripper{ - proxied: roundTripper, - component: componentName, +// NewCallerIDRoundTripper returns a new `CallerIDRoundTripper`. +func NewCallerIDRoundTripper(roundTripper http.RoundTripper, callerID string) *CallerIDRoundTripper { + return &CallerIDRoundTripper{ + proxied: roundTripper, + callerID: callerID, } } // RoundTrip is used to implement RoundTripper -func (rt *ComponentSignatureRoundTripper) RoundTrip(req *http.Request) (resp *http.Response, err error) { - req.Header.Add(componentSignatureKey, rt.component) +func (rt *CallerIDRoundTripper) RoundTrip(req *http.Request) (resp *http.Response, err error) { + req.Header.Add(XCallerIDHeader, rt.callerID) // Send the request, get the response and the error resp, err = rt.proxied.RoundTrip(req) return @@ -228,7 +234,7 @@ func PostJSONIgnoreResp(client *http.Client, url string, data []byte) error { // DoDelete is used to send delete request and return http response code. func DoDelete(client *http.Client, url string) (*http.Response, error) { - req, err := http.NewRequest(http.MethodDelete, url, nil) + req, err := http.NewRequest(http.MethodDelete, url, http.NoBody) if err != nil { return nil, err } @@ -459,7 +465,7 @@ func (p *customReverseProxies) ServeHTTP(w http.ResponseWriter, r *http.Request) } return } - http.Error(w, ErrRedirectFailed, http.StatusInternalServerError) + http.Error(w, errs.ErrRedirect.FastGenByArgs().Error(), http.StatusInternalServerError) } // copyHeader duplicates the HTTP headers from the source `src` to the destination `dst`. diff --git a/pkg/utils/apiutil/apiutil_test.go b/pkg/utils/apiutil/apiutil_test.go index a4e7b97aa4d..106d3fb21cb 100644 --- a/pkg/utils/apiutil/apiutil_test.go +++ b/pkg/utils/apiutil/apiutil_test.go @@ -101,7 +101,7 @@ func TestGetIPPortFromHTTPRequest(t *testing.T) { ip: "127.0.0.1", port: "5299", }, - // IPv4 "X-Real-IP" with port + // IPv4 "X-Real-Ip" with port { r: &http.Request{ Header: map[string][]string{ @@ -111,7 +111,7 @@ func TestGetIPPortFromHTTPRequest(t *testing.T) { ip: "127.0.0.1", port: "5299", }, - // IPv4 "X-Real-IP" without port + // IPv4 "X-Real-Ip" without port { r: &http.Request{ Header: map[string][]string{ @@ -158,7 +158,7 @@ func TestGetIPPortFromHTTPRequest(t *testing.T) { ip: "::1", port: "", }, - // IPv6 "X-Real-IP" with port + // IPv6 "X-Real-Ip" with port { r: &http.Request{ Header: map[string][]string{ @@ -168,7 +168,7 @@ func TestGetIPPortFromHTTPRequest(t *testing.T) { ip: "::1", port: "5299", }, - // IPv6 "X-Real-IP" without port + // IPv6 "X-Real-Ip" without port { r: &http.Request{ Header: map[string][]string{ diff --git a/pkg/utils/apiutil/serverapi/middleware.go b/pkg/utils/apiutil/serverapi/middleware.go index 19438ad0f91..eb0f8a5f8eb 100644 --- a/pkg/utils/apiutil/serverapi/middleware.go +++ b/pkg/utils/apiutil/serverapi/middleware.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/failpoint" "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" + mcsutils "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/server" @@ -79,6 +80,7 @@ type microserviceRedirectRule struct { targetPath string targetServiceName string matchMethods []string + filter func(*http.Request) bool } // NewRedirector redirects request to the leader if needs to be handled in the leader. @@ -94,14 +96,19 @@ func NewRedirector(s *server.Server, opts ...RedirectorOption) negroni.Handler { type RedirectorOption func(*redirector) // MicroserviceRedirectRule new a microservice redirect rule option -func MicroserviceRedirectRule(matchPath, targetPath, targetServiceName string, methods []string) RedirectorOption { +func MicroserviceRedirectRule(matchPath, targetPath, targetServiceName string, + methods []string, filters ...func(*http.Request) bool) RedirectorOption { return func(s *redirector) { - s.microserviceRedirectRules = append(s.microserviceRedirectRules, µserviceRedirectRule{ - matchPath, - targetPath, - targetServiceName, - methods, - }) + rule := µserviceRedirectRule{ + matchPath: matchPath, + targetPath: targetPath, + targetServiceName: targetServiceName, + matchMethods: methods, + } + if len(filters) > 0 { + rule.filter = filters[0] + } + s.microserviceRedirectRules = append(s.microserviceRedirectRules, rule) } } @@ -116,26 +123,45 @@ func (h *redirector) matchMicroServiceRedirectRules(r *http.Request) (bool, stri // It will be helpful when matching the redirect rules "schedulers" or "schedulers/{name}" r.URL.Path = strings.TrimRight(r.URL.Path, "/") for _, rule := range h.microserviceRedirectRules { - if strings.HasPrefix(r.URL.Path, rule.matchPath) && slice.Contains(rule.matchMethods, r.Method) { + // Now we only support checking the scheduling service whether it is independent + if rule.targetServiceName == mcsutils.SchedulingServiceName { + if !h.s.IsServiceIndependent(mcsutils.SchedulingServiceName) { + continue + } + } + if strings.HasPrefix(r.URL.Path, rule.matchPath) && + slice.Contains(rule.matchMethods, r.Method) { + if rule.filter != nil && !rule.filter(r) { + continue + } + // we check the service primary addr here, + // if the service is not available, we will return ErrRedirect by returning an empty addr. addr, ok := h.s.GetServicePrimaryAddr(r.Context(), rule.targetServiceName) if !ok || addr == "" { log.Warn("failed to get the service primary addr when trying to match redirect rules", zap.String("path", r.URL.Path)) + return true, "" + } + // If the URL contains escaped characters, use RawPath instead of Path + origin := r.URL.Path + path := r.URL.Path + if r.URL.RawPath != "" { + path = r.URL.RawPath } // Extract parameters from the URL path // e.g. r.URL.Path = /pd/api/v1/operators/1 (before redirect) // matchPath = /pd/api/v1/operators // targetPath = /scheduling/api/v1/operators // r.URL.Path = /scheduling/api/v1/operator/1 (after redirect) - pathParams := strings.TrimPrefix(r.URL.Path, rule.matchPath) + pathParams := strings.TrimPrefix(path, rule.matchPath) pathParams = strings.Trim(pathParams, "/") // Remove leading and trailing '/' if len(pathParams) > 0 { r.URL.Path = rule.targetPath + "/" + pathParams } else { r.URL.Path = rule.targetPath } - log.Debug("redirect to micro service", zap.String("path", r.URL.Path), zap.String("target", addr), - zap.String("method", r.Method)) + log.Debug("redirect to micro service", zap.String("path", r.URL.Path), zap.String("origin-path", origin), + zap.String("target", addr), zap.String("method", r.Method)) return true, addr } } @@ -145,8 +171,13 @@ func (h *redirector) matchMicroServiceRedirectRules(r *http.Request) (bool, stri func (h *redirector) ServeHTTP(w http.ResponseWriter, r *http.Request, next http.HandlerFunc) { redirectToMicroService, targetAddr := h.matchMicroServiceRedirectRules(r) allowFollowerHandle := len(r.Header.Get(apiutil.PDAllowFollowerHandleHeader)) > 0 - isLeader := h.s.GetMember().IsLeader() - if !h.s.IsClosed() && (allowFollowerHandle || isLeader) && !redirectToMicroService { + + if h.s.IsClosed() { + http.Error(w, errs.ErrServerNotStarted.FastGenByArgs().Error(), http.StatusInternalServerError) + return + } + + if (allowFollowerHandle || h.s.GetMember().IsLeader()) && !redirectToMicroService { next(w, r) return } @@ -154,7 +185,7 @@ func (h *redirector) ServeHTTP(w http.ResponseWriter, r *http.Request, next http // Prevent more than one redirection. if name := r.Header.Get(apiutil.PDRedirectorHeader); len(name) != 0 { log.Error("redirect but server is not leader", zap.String("from", name), zap.String("server", h.s.Name()), errs.ZapError(errs.ErrRedirect)) - http.Error(w, apiutil.ErrRedirectToNotLeader, http.StatusInternalServerError) + http.Error(w, errs.ErrRedirectToNotLeader.FastGenByArgs().Error(), http.StatusInternalServerError) return } @@ -173,7 +204,7 @@ func (h *redirector) ServeHTTP(w http.ResponseWriter, r *http.Request, next http var clientUrls []string if redirectToMicroService { if len(targetAddr) == 0 { - http.Error(w, apiutil.ErrRedirectFailed, http.StatusInternalServerError) + http.Error(w, errs.ErrRedirect.FastGenByArgs().Error(), http.StatusInternalServerError) return } clientUrls = append(clientUrls, targetAddr) diff --git a/pkg/utils/etcdutil/etcdutil.go b/pkg/utils/etcdutil/etcdutil.go index 1432b6e37c3..03c2374efc6 100644 --- a/pkg/utils/etcdutil/etcdutil.go +++ b/pkg/utils/etcdutil/etcdutil.go @@ -382,13 +382,18 @@ func (checker *healthyChecker) patrol(ctx context.Context) []string { } func (checker *healthyChecker) update(eps []string) { + epMap := make(map[string]struct{}) for _, ep := range eps { + epMap[ep] = struct{}{} + } + + for ep := range epMap { // check if client exists, if not, create one, if exists, check if it's offline or disconnected. if client, ok := checker.Load(ep); ok { lastHealthy := client.(*healthyClient).lastHealth if time.Since(lastHealthy) > etcdServerOfflineTimeout { log.Info("some etcd server maybe offline", zap.String("endpoint", ep)) - checker.Delete(ep) + checker.removeClient(ep) } if time.Since(lastHealthy) > etcdServerDisconnectedTimeout { // try to reset client endpoint to trigger reconnect @@ -399,6 +404,16 @@ func (checker *healthyChecker) update(eps []string) { } checker.addClient(ep, time.Now()) } + + // check if there are some stale clients, if exists, remove them. + checker.Range(func(key, value interface{}) bool { + ep := key.(string) + if _, ok := epMap[ep]; !ok { + log.Info("remove stale etcd client", zap.String("endpoint", ep)) + checker.removeClient(ep) + } + return true + }) } func (checker *healthyChecker) addClient(ep string, lastHealth time.Time) { @@ -413,6 +428,15 @@ func (checker *healthyChecker) addClient(ep string, lastHealth time.Time) { }) } +func (checker *healthyChecker) removeClient(ep string) { + if client, ok := checker.LoadAndDelete(ep); ok { + err := client.(*healthyClient).Close() + if err != nil { + log.Error("failed to close etcd healthy client", zap.Error(err)) + } + } +} + func syncUrls(client *clientv3.Client) []string { // See https://github.com/etcd-io/etcd/blob/85b640cee793e25f3837c47200089d14a8392dc7/clientv3/client.go#L170-L183 ctx, cancel := context.WithTimeout(clientv3.WithRequireLeader(client.Ctx()), DefaultRequestTimeout) @@ -707,7 +731,6 @@ func (lw *LoopWatcher) watch(ctx context.Context, revision int64) (nextRevision }() ticker := time.NewTicker(RequestProgressInterval) defer ticker.Stop() - lastReceivedResponseTime := time.Now() for { if watcherCancel != nil { @@ -736,8 +759,10 @@ func (lw *LoopWatcher) watch(ctx context.Context, revision int64) (nextRevision continue } } + lastReceivedResponseTime := time.Now() log.Info("watch channel is created in watch loop", zap.Int64("revision", revision), zap.String("name", lw.name), zap.String("key", lw.key)) + watchChanLoop: select { case <-ctx.Done(): diff --git a/pkg/utils/grpcutil/grpcutil.go b/pkg/utils/grpcutil/grpcutil.go index ee9d85a4ee1..a001ec4bd03 100644 --- a/pkg/utils/grpcutil/grpcutil.go +++ b/pkg/utils/grpcutil/grpcutil.go @@ -18,7 +18,9 @@ import ( "context" "crypto/tls" "crypto/x509" + "io" "net/url" + "strings" "time" "github.com/pingcap/errors" @@ -28,6 +30,7 @@ import ( "go.etcd.io/etcd/pkg/transport" "go.uber.org/zap" "google.golang.org/grpc" + "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials" "google.golang.org/grpc/metadata" ) @@ -160,6 +163,7 @@ func GetForwardedHost(ctx context.Context) string { md, ok := metadata.FromIncomingContext(ctx) if !ok { log.Debug("failed to get forwarding metadata") + return "" } if t, ok := md[ForwardMetadataKey]; ok { return t[0] @@ -221,3 +225,14 @@ func CheckStream(ctx context.Context, cancel context.CancelFunc, done chan struc } <-done } + +// NeedRebuildConnection checks if the error is a connection error. +func NeedRebuildConnection(err error) bool { + return err == io.EOF || + strings.Contains(err.Error(), codes.Unavailable.String()) || // Unavailable indicates the service is currently unavailable. This is a most likely a transient condition. + strings.Contains(err.Error(), codes.DeadlineExceeded.String()) || // DeadlineExceeded means operation expired before completion. + strings.Contains(err.Error(), codes.Internal.String()) || // Internal errors. + strings.Contains(err.Error(), codes.Unknown.String()) || // Unknown error. + strings.Contains(err.Error(), codes.ResourceExhausted.String()) // ResourceExhausted is returned when either the client or the server has exhausted their resources. + // Besides, we don't need to rebuild the connection if the code is Canceled, which means the client cancelled the request. +} diff --git a/pkg/utils/requestutil/context_test.go b/pkg/utils/requestutil/context_test.go index 475b109e410..298fc1ff8a3 100644 --- a/pkg/utils/requestutil/context_test.go +++ b/pkg/utils/requestutil/context_test.go @@ -34,7 +34,7 @@ func TestRequestInfo(t *testing.T) { RequestInfo{ ServiceLabel: "test label", Method: http.MethodPost, - Component: "pdctl", + CallerID: "pdctl", IP: "localhost", URLParam: "{\"id\"=1}", BodyParam: "{\"state\"=\"Up\"}", @@ -45,7 +45,7 @@ func TestRequestInfo(t *testing.T) { re.True(ok) re.Equal("test label", result.ServiceLabel) re.Equal(http.MethodPost, result.Method) - re.Equal("pdctl", result.Component) + re.Equal("pdctl", result.CallerID) re.Equal("localhost", result.IP) re.Equal("{\"id\"=1}", result.URLParam) re.Equal("{\"state\"=\"Up\"}", result.BodyParam) diff --git a/pkg/utils/requestutil/request_info.go b/pkg/utils/requestutil/request_info.go index 40724bb790f..cc5403f7232 100644 --- a/pkg/utils/requestutil/request_info.go +++ b/pkg/utils/requestutil/request_info.go @@ -27,9 +27,11 @@ import ( // RequestInfo holds service information from http.Request type RequestInfo struct { - ServiceLabel string - Method string - Component string + ServiceLabel string + Method string + // CallerID is used to identify the specific source of a HTTP request, it will be marked in + // the PD HTTP client, with granularity that can be refined to a specific functionality within a component. + CallerID string IP string Port string URLParam string @@ -38,8 +40,8 @@ type RequestInfo struct { } func (info *RequestInfo) String() string { - s := fmt.Sprintf("{ServiceLabel:%s, Method:%s, Component:%s, IP:%s, Port:%s, StartTime:%s, URLParam:%s, BodyParam:%s}", - info.ServiceLabel, info.Method, info.Component, info.IP, info.Port, time.Unix(info.StartTimeStamp, 0), info.URLParam, info.BodyParam) + s := fmt.Sprintf("{ServiceLabel:%s, Method:%s, CallerID:%s, IP:%s, Port:%s, StartTime:%s, URLParam:%s, BodyParam:%s}", + info.ServiceLabel, info.Method, info.CallerID, info.IP, info.Port, time.Unix(info.StartTimeStamp, 0), info.URLParam, info.BodyParam) return s } @@ -49,7 +51,7 @@ func GetRequestInfo(r *http.Request) RequestInfo { return RequestInfo{ ServiceLabel: apiutil.GetRouteName(r), Method: fmt.Sprintf("%s/%s:%s", r.Proto, r.Method, r.URL.Path), - Component: apiutil.GetComponentNameOnHTTP(r), + CallerID: apiutil.GetCallerIDOnHTTP(r), IP: ip, Port: port, URLParam: getURLParam(r), diff --git a/pkg/utils/testutil/api_check.go b/pkg/utils/testutil/api_check.go index 84af97f828d..58934bf08f6 100644 --- a/pkg/utils/testutil/api_check.go +++ b/pkg/utils/testutil/api_check.go @@ -37,29 +37,29 @@ func StatusOK(re *require.Assertions) func([]byte, int, http.Header) { // StatusNotOK is used to check whether http response code is not equal http.StatusOK. func StatusNotOK(re *require.Assertions) func([]byte, int, http.Header) { - return func(_ []byte, i int, _ http.Header) { - re.NotEqual(http.StatusOK, i) + return func(resp []byte, i int, _ http.Header) { + re.NotEqual(http.StatusOK, i, "resp: "+string(resp)) } } // ExtractJSON is used to check whether given data can be extracted successfully. func ExtractJSON(re *require.Assertions, data interface{}) func([]byte, int, http.Header) { - return func(res []byte, _ int, _ http.Header) { - re.NoError(json.Unmarshal(res, data)) + return func(resp []byte, _ int, _ http.Header) { + re.NoError(json.Unmarshal(resp, data), "resp: "+string(resp)) } } // StringContain is used to check whether response context contains given string. func StringContain(re *require.Assertions, sub string) func([]byte, int, http.Header) { - return func(res []byte, _ int, _ http.Header) { - re.Contains(string(res), sub) + return func(resp []byte, _ int, _ http.Header) { + re.Contains(string(resp), sub, "resp: "+string(resp)) } } // StringEqual is used to check whether response context equal given string. func StringEqual(re *require.Assertions, str string) func([]byte, int, http.Header) { - return func(res []byte, _ int, _ http.Header) { - re.Contains(string(res), str) + return func(resp []byte, _ int, _ http.Header) { + re.Contains(string(resp), str, "resp: "+string(resp)) } } @@ -88,7 +88,7 @@ func ReadGetJSON(re *require.Assertions, client *http.Client, url string, data i } // ReadGetJSONWithBody is used to do get request with input and check whether given data can be extracted successfully. -func ReadGetJSONWithBody(re *require.Assertions, client *http.Client, url string, input []byte, data interface{}) error { +func ReadGetJSONWithBody(re *require.Assertions, client *http.Client, url string, input []byte, data interface{}, checkOpts ...func([]byte, int, http.Header)) error { resp, err := apiutil.GetJSON(client, url, input) if err != nil { return err @@ -114,6 +114,21 @@ func CheckGetJSON(client *http.Client, url string, data []byte, checkOpts ...fun return checkResp(resp, checkOpts...) } +// CheckGetUntilStatusCode is used to do get request and do check options. +func CheckGetUntilStatusCode(re *require.Assertions, client *http.Client, url string, code int) error { + var err error + Eventually(re, func() bool { + resp, err2 := apiutil.GetJSON(client, url, nil) + if err2 != nil { + err = err2 + return true + } + defer resp.Body.Close() + return resp.StatusCode == code + }) + return err +} + // CheckPatchJSON is used to do patch request and do check options. func CheckPatchJSON(client *http.Client, url string, data []byte, checkOpts ...func([]byte, int, http.Header)) error { resp, err := apiutil.PatchJSON(client, url, data) diff --git a/pkg/utils/tsoutil/tsoutil.go b/pkg/utils/tsoutil/tsoutil.go index 796012ae031..43d8b09aa49 100644 --- a/pkg/utils/tsoutil/tsoutil.go +++ b/pkg/utils/tsoutil/tsoutil.go @@ -25,6 +25,11 @@ const ( logicalBits = (1 << physicalShiftBits) - 1 ) +// TimeToTS converts a `time.Time` to an `uint64` TS. +func TimeToTS(t time.Time) uint64 { + return ComposeTS(t.UnixNano()/int64(time.Millisecond), 0) +} + // ParseTS parses the ts to (physical,logical). func ParseTS(ts uint64) (time.Time, uint64) { physical, logical := ParseTSUint64(ts) diff --git a/pkg/versioninfo/fips.go b/pkg/versioninfo/fips.go new file mode 100644 index 00000000000..02478b103fa --- /dev/null +++ b/pkg/versioninfo/fips.go @@ -0,0 +1,26 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build boringcrypto +// +build boringcrypto + +package versioninfo + +import ( + _ "crypto/tls/fipsonly" +) + +func init() { + PDReleaseVersion += "-fips" +} diff --git a/pkg/window/policy.go b/pkg/window/policy.go index d67a8aa6e59..fed4fedc32a 100644 --- a/pkg/window/policy.go +++ b/pkg/window/policy.go @@ -18,8 +18,9 @@ package window import ( - "sync" "time" + + "github.com/tikv/pd/pkg/utils/syncutil" ) // RollingPolicy is a policy for ring window based on time duration. @@ -27,7 +28,7 @@ import ( // e.g. If the last point is appended one bucket duration ago, // RollingPolicy will increment current offset. type RollingPolicy struct { - mu sync.RWMutex + mu syncutil.RWMutex size int window *Window offset int diff --git a/pkg/window/policy_test.go b/pkg/window/policy_test.go index 14b3b326192..489c8428c9a 100644 --- a/pkg/window/policy_test.go +++ b/pkg/window/policy_test.go @@ -26,9 +26,12 @@ import ( "github.com/stretchr/testify/require" ) -func GetRollingPolicy() *RollingPolicy { - w := NewWindow(Options{Size: 3}) - return NewRollingPolicy(w, RollingPolicyOpts{BucketDuration: 100 * time.Millisecond}) +const defaultBucketDuration = 100 * time.Millisecond +const defaultSize = 3 + +func getRollingPolicy() *RollingPolicy { + w := NewWindow(Options{Size: defaultSize}) + return NewRollingPolicy(w, RollingPolicyOpts{BucketDuration: defaultBucketDuration}) } func TestRollingPolicy_Add(t *testing.T) { @@ -45,6 +48,7 @@ func TestRollingPolicy_Add(t *testing.T) { points: []float64{1, 1}, }, { + // In CI, the actual sleep time may be more than 100 (timeSleep = 94). timeSleep: []int{94, 250}, offset: []int{0, 0}, points: []float64{1, 1}, @@ -60,14 +64,25 @@ func TestRollingPolicy_Add(t *testing.T) { t.Run("test policy add", func(t *testing.T) { var totalTS, lastOffset int timeSleep := test.timeSleep - policy := GetRollingPolicy() + beginTime := time.Now() + policy := getRollingPolicy() + points := make([]float64, defaultSize) + asExpected := true for i, n := range timeSleep { totalTS += n time.Sleep(time.Duration(n) * time.Millisecond) - offset, point := test.offset[i], test.points[i] + point := test.points[i] + offset := int(time.Since(beginTime)/defaultBucketDuration) % defaultSize + points[i] += point policy.Add(point) - - re.Less(math.Abs(point-policy.window.buckets[offset].Points[0]), 1e-6, + if offset != test.offset[i] { + asExpected = false + } + if asExpected { + re.Less(math.Abs(point-policy.window.buckets[offset].Points[0]), 1e-6, + fmt.Sprintf("error, time since last append: %vms, last offset: %v", totalTS, lastOffset)) + } + re.Less(math.Abs(points[i]-policy.window.buckets[offset].Points[0]), 1e-6, fmt.Sprintf("error, time since last append: %vms, last offset: %v", totalTS, lastOffset)) lastOffset = offset } @@ -78,7 +93,7 @@ func TestRollingPolicy_Add(t *testing.T) { func TestRollingPolicy_AddWithTimespan(t *testing.T) { re := require.New(t) t.Run("timespan < bucket number", func(t *testing.T) { - policy := GetRollingPolicy() + policy := getRollingPolicy() // bucket 0 policy.Add(0) // bucket 1 @@ -102,7 +117,7 @@ func TestRollingPolicy_AddWithTimespan(t *testing.T) { }) t.Run("timespan > bucket number", func(t *testing.T) { - policy := GetRollingPolicy() + policy := getRollingPolicy() // bucket 0 policy.Add(0) diff --git a/plugin/scheduler_example/evict_leader.go b/plugin/scheduler_example/evict_leader.go index 8919d1bdb4b..063ae9eb150 100644 --- a/plugin/scheduler_example/evict_leader.go +++ b/plugin/scheduler_example/evict_leader.go @@ -186,7 +186,7 @@ func (s *evictLeaderScheduler) EncodeConfig() ([]byte, error) { return schedulers.EncodeConfig(s.conf) } -func (s *evictLeaderScheduler) Prepare(cluster sche.SchedulerCluster) error { +func (s *evictLeaderScheduler) PrepareConfig(cluster sche.SchedulerCluster) error { s.conf.mu.RLock() defer s.conf.mu.RUnlock() var res error @@ -198,7 +198,7 @@ func (s *evictLeaderScheduler) Prepare(cluster sche.SchedulerCluster) error { return res } -func (s *evictLeaderScheduler) Cleanup(cluster sche.SchedulerCluster) { +func (s *evictLeaderScheduler) CleanConfig(cluster sche.SchedulerCluster) { s.conf.mu.RLock() defer s.conf.mu.RUnlock() for id := range s.conf.StoreIDWitRanges { diff --git a/scripts/ci-subtask.sh b/scripts/ci-subtask.sh index 389d7f43341..5d7392efe11 100755 --- a/scripts/ci-subtask.sh +++ b/scripts/ci-subtask.sh @@ -10,7 +10,9 @@ if [[ $2 -gt 10 ]]; then # Currently, we only have 3 integration tests, so we can hardcode the task index. for t in ${integration_tasks[@]}; do if [[ "$t" = "./tests/integrations/client" && "$2" = 11 ]]; then - printf "%s " "$t" + res=("./client") + res+=($t) + printf "%s " "${res[@]}" break elif [[ "$t" = "./tests/integrations/tso" && "$2" = 12 ]]; then printf "%s " "$t" @@ -29,10 +31,22 @@ else weight() { [[ $1 == "github.com/tikv/pd/server/api" ]] && return 30 [[ $1 == "github.com/tikv/pd/pkg/schedule" ]] && return 30 + [[ $1 == "github.com/tikv/pd/pkg/core" ]] && return 30 + [[ $1 == "github.com/tikv/pd/tests/server/api" ]] && return 30 [[ $1 =~ "pd/tests" ]] && return 5 return 1 } + # Create an associative array to store the weight of each task. + declare -A task_weights + for t in ${tasks[@]}; do + weight $t + task_weights[$t]=$? + done + + # Sort tasks by weight in descending order. + tasks=($(printf "%s\n" "${tasks[@]}" | sort -rn)) + scores=($(seq "$1" | xargs -I{} echo 0)) res=() @@ -41,8 +55,7 @@ else for i in ${!scores[@]}; do [[ ${scores[i]} -lt ${scores[$min_i]} ]] && min_i=$i done - weight $t - scores[$min_i]=$((${scores[$min_i]} + $?)) + scores[$min_i]=$((${scores[$min_i]} + ${task_weights[$t]})) [[ $(($min_i + 1)) -eq $2 ]] && res+=($t) done printf "%s " "${res[@]}" diff --git a/server/api/admin.go b/server/api/admin.go index 7a1dfb0f1e8..49fe7cdc567 100644 --- a/server/api/admin.go +++ b/server/api/admin.go @@ -16,6 +16,7 @@ package api import ( "encoding/json" + "fmt" "io" "net/http" "strconv" @@ -24,6 +25,7 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/server" "github.com/unrolled/render" @@ -59,7 +61,11 @@ func (h *adminHandler) DeleteRegionCache(w http.ResponseWriter, r *http.Request) return } rc.DropCacheRegion(regionID) - h.rd.JSON(w, http.StatusOK, "The region is removed from server cache.") + if h.svr.IsServiceIndependent(utils.SchedulingServiceName) { + err = h.DeleteRegionCacheInSchedulingServer(regionID) + } + msg := "The region is removed from server cache." + h.rd.JSON(w, http.StatusOK, h.buildMsg(msg, err)) } // @Tags admin @@ -95,8 +101,11 @@ func (h *adminHandler) DeleteRegionStorage(w http.ResponseWriter, r *http.Reques } // Remove region from cache. rc.DropCacheRegion(regionID) - - h.rd.JSON(w, http.StatusOK, "The region is removed from server cache and region meta storage.") + if h.svr.IsServiceIndependent(utils.SchedulingServiceName) { + err = h.DeleteRegionCacheInSchedulingServer(regionID) + } + msg := "The region is removed from server cache and region meta storage." + h.rd.JSON(w, http.StatusOK, h.buildMsg(msg, err)) } // @Tags admin @@ -105,9 +114,14 @@ func (h *adminHandler) DeleteRegionStorage(w http.ResponseWriter, r *http.Reques // @Success 200 {string} string "All regions are removed from server cache." // @Router /admin/cache/regions [delete] func (h *adminHandler) DeleteAllRegionCache(w http.ResponseWriter, r *http.Request) { + var err error rc := getCluster(r) rc.DropCacheAllRegion() - h.rd.JSON(w, http.StatusOK, "All regions are removed from server cache.") + if h.svr.IsServiceIndependent(utils.SchedulingServiceName) { + err = h.DeleteRegionCacheInSchedulingServer() + } + msg := "All regions are removed from server cache." + h.rd.JSON(w, http.StatusOK, h.buildMsg(msg, err)) } // Intentionally no swagger mark as it is supposed to be only used in @@ -200,3 +214,35 @@ func (h *adminHandler) RecoverAllocID(w http.ResponseWriter, r *http.Request) { _ = h.rd.Text(w, http.StatusOK, "") } + +func (h *adminHandler) DeleteRegionCacheInSchedulingServer(id ...uint64) error { + addr, ok := h.svr.GetServicePrimaryAddr(h.svr.Context(), utils.SchedulingServiceName) + if !ok { + return errs.ErrNotFoundSchedulingAddr.FastGenByArgs() + } + var idStr string + if len(id) > 0 { + idStr = strconv.FormatUint(id[0], 10) + } + url := fmt.Sprintf("%s/scheduling/api/v1/admin/cache/regions/%s", addr, idStr) + req, err := http.NewRequest(http.MethodDelete, url, http.NoBody) + if err != nil { + return err + } + resp, err := h.svr.GetHTTPClient().Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return errs.ErrSchedulingServer.FastGenByArgs(resp.StatusCode) + } + return nil +} + +func (h *adminHandler) buildMsg(msg string, err error) string { + if h.svr.IsServiceIndependent(utils.SchedulingServiceName) && err != nil { + return fmt.Sprintf("This operation was executed in API server but needs to be re-executed on scheduling server due to the following error: %s", err.Error()) + } + return msg +} diff --git a/server/api/admin_test.go b/server/api/admin_test.go index 09130fd8385..76c5e729eb0 100644 --- a/server/api/admin_test.go +++ b/server/api/admin_test.go @@ -18,6 +18,7 @@ import ( "context" "encoding/json" "fmt" + "io" "net/http" "testing" "time" @@ -27,6 +28,7 @@ import ( "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/replication" + "github.com/tikv/pd/pkg/utils/apiutil" tu "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server" ) @@ -83,7 +85,7 @@ func (suite *adminTestSuite) TestDropRegion() { // After drop region from cache, lower version is accepted. url := fmt.Sprintf("%s/admin/cache/region/%d", suite.urlPrefix, region.GetID()) - req, err := http.NewRequest(http.MethodDelete, url, nil) + req, err := http.NewRequest(http.MethodDelete, url, http.NoBody) suite.NoError(err) res, err := testDialClient.Do(req) suite.NoError(err) @@ -145,7 +147,7 @@ func (suite *adminTestSuite) TestDropRegions() { // After drop all regions from cache, lower version is accepted. url := fmt.Sprintf("%s/admin/cache/regions", suite.urlPrefix) - req, err := http.NewRequest(http.MethodDelete, url, nil) + req, err := http.NewRequest(http.MethodDelete, url, http.NoBody) suite.NoError(err) res, err := testDialClient.Do(req) suite.NoError(err) @@ -188,9 +190,24 @@ func (suite *adminTestSuite) TestResetTS() { values, err := json.Marshal(args) suite.NoError(err) re := suite.Require() - err = tu.CheckPostJSON(testDialClient, url, values, - tu.StatusOK(re), - tu.StringEqual(re, "\"Reset ts successfully.\"\n")) + tu.Eventually(re, func() bool { + resp, err := apiutil.PostJSON(testDialClient, url, values) + re.NoError(err) + defer resp.Body.Close() + b, err := io.ReadAll(resp.Body) + re.NoError(err) + switch resp.StatusCode { + case http.StatusOK: + re.Contains(string(b), "Reset ts successfully.") + return true + case http.StatusServiceUnavailable: + re.Contains(string(b), "[PD:etcd:ErrEtcdTxnConflict]etcd transaction failed, conflicted and rolled back") + return false + default: + re.FailNow("unexpected status code %d", resp.StatusCode) + return false + } + }) suite.NoError(err) t2 := makeTS(32 * time.Hour) args["tso"] = fmt.Sprintf("%d", t2) diff --git a/server/api/cluster_test.go b/server/api/cluster_test.go index 01aa6ba5f24..d6d8effa365 100644 --- a/server/api/cluster_test.go +++ b/server/api/cluster_test.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/suite" sc "github.com/tikv/pd/pkg/schedule/config" + "github.com/tikv/pd/pkg/schedule/placement" tu "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server" "github.com/tikv/pd/server/cluster" @@ -57,7 +58,7 @@ func (suite *clusterTestSuite) TestCluster() { suite.svr.GetPersistOptions().SetPlacementRuleEnabled(true) suite.svr.GetPersistOptions().GetReplicationConfig().LocationLabels = []string{"host"} rm := suite.svr.GetRaftCluster().GetRuleManager() - rule := rm.GetRule("pd", "default") + rule := rm.GetRule(placement.DefaultGroupID, placement.DefaultRuleID) rule.LocationLabels = []string{"host"} rule.Count = 1 rm.SetRule(rule) @@ -81,7 +82,7 @@ func (suite *clusterTestSuite) TestCluster() { c1.MaxPeerCount = 6 suite.Equal(c2, c1) - suite.Equal(int(r.MaxReplicas), suite.svr.GetRaftCluster().GetRuleManager().GetRule("pd", "default").Count) + suite.Equal(int(r.MaxReplicas), suite.svr.GetRaftCluster().GetRuleManager().GetRule(placement.DefaultGroupID, placement.DefaultRuleID).Count) } func (suite *clusterTestSuite) testGetClusterStatus() { diff --git a/server/api/config.go b/server/api/config.go index c63bd953c37..e075095d7a9 100644 --- a/server/api/config.go +++ b/server/api/config.go @@ -27,6 +27,8 @@ import ( "github.com/pingcap/errcode" "github.com/pingcap/errors" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/mcs/utils" sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/jsonutil" @@ -60,7 +62,17 @@ func newConfHandler(svr *server.Server, rd *render.Render) *confHandler { // @Router /config [get] func (h *confHandler) GetConfig(w http.ResponseWriter, r *http.Request) { cfg := h.svr.GetConfig() - cfg.Schedule.MaxMergeRegionKeys = cfg.Schedule.GetMaxMergeRegionKeys() + if h.svr.IsServiceIndependent(utils.SchedulingServiceName) { + schedulingServerConfig, err := h.GetSchedulingServerConfig() + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + cfg.Schedule = schedulingServerConfig.Schedule + cfg.Replication = schedulingServerConfig.Replication + } else { + cfg.Schedule.MaxMergeRegionKeys = cfg.Schedule.GetMaxMergeRegionKeys() + } h.rd.JSON(w, http.StatusOK, cfg) } @@ -301,6 +313,16 @@ func getConfigMap(cfg map[string]interface{}, key []string, value interface{}) m // @Success 200 {object} sc.ScheduleConfig // @Router /config/schedule [get] func (h *confHandler) GetScheduleConfig(w http.ResponseWriter, r *http.Request) { + if h.svr.IsServiceIndependent(utils.SchedulingServiceName) { + cfg, err := h.GetSchedulingServerConfig() + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + cfg.Schedule.SchedulersPayload = nil + h.rd.JSON(w, http.StatusOK, cfg.Schedule) + return + } cfg := h.svr.GetScheduleConfig() cfg.MaxMergeRegionKeys = cfg.GetMaxMergeRegionKeys() h.rd.JSON(w, http.StatusOK, cfg) @@ -364,6 +386,15 @@ func (h *confHandler) SetScheduleConfig(w http.ResponseWriter, r *http.Request) // @Success 200 {object} sc.ReplicationConfig // @Router /config/replicate [get] func (h *confHandler) GetReplicationConfig(w http.ResponseWriter, r *http.Request) { + if h.svr.IsServiceIndependent(utils.SchedulingServiceName) { + cfg, err := h.GetSchedulingServerConfig() + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + h.rd.JSON(w, http.StatusOK, cfg.Replication) + return + } h.rd.JSON(w, http.StatusOK, h.svr.GetReplicationConfig()) } @@ -505,3 +536,33 @@ func (h *confHandler) SetReplicationModeConfig(w http.ResponseWriter, r *http.Re func (h *confHandler) GetPDServerConfig(w http.ResponseWriter, r *http.Request) { h.rd.JSON(w, http.StatusOK, h.svr.GetPDServerConfig()) } + +func (h *confHandler) GetSchedulingServerConfig() (*config.Config, error) { + addr, ok := h.svr.GetServicePrimaryAddr(h.svr.Context(), utils.SchedulingServiceName) + if !ok { + return nil, errs.ErrNotFoundSchedulingAddr.FastGenByArgs() + } + url := fmt.Sprintf("%s/scheduling/api/v1/config", addr) + req, err := http.NewRequest(http.MethodGet, url, http.NoBody) + if err != nil { + return nil, err + } + resp, err := h.svr.GetHTTPClient().Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, errs.ErrSchedulingServer.FastGenByArgs(resp.StatusCode) + } + b, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + var schedulingServerConfig config.Config + err = json.Unmarshal(b, &schedulingServerConfig) + if err != nil { + return nil, err + } + return &schedulingServerConfig, nil +} diff --git a/server/api/config_test.go b/server/api/config_test.go deleted file mode 100644 index fbfb3f94518..00000000000 --- a/server/api/config_test.go +++ /dev/null @@ -1,440 +0,0 @@ -// Copyright 2016 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package api - -import ( - "encoding/json" - "fmt" - "testing" - "time" - - "github.com/stretchr/testify/suite" - sc "github.com/tikv/pd/pkg/schedule/config" - tu "github.com/tikv/pd/pkg/utils/testutil" - "github.com/tikv/pd/pkg/utils/typeutil" - "github.com/tikv/pd/pkg/versioninfo" - "github.com/tikv/pd/server" - "github.com/tikv/pd/server/config" -) - -type configTestSuite struct { - suite.Suite - svr *server.Server - cleanup tu.CleanupFunc - urlPrefix string -} - -func TestConfigTestSuite(t *testing.T) { - suite.Run(t, new(configTestSuite)) -} - -func (suite *configTestSuite) SetupSuite() { - re := suite.Require() - suite.svr, suite.cleanup = mustNewServer(re, func(cfg *config.Config) { - cfg.Replication.EnablePlacementRules = false - }) - server.MustWaitLeader(re, []*server.Server{suite.svr}) - - addr := suite.svr.GetAddr() - suite.urlPrefix = fmt.Sprintf("%s%s/api/v1", addr, apiPrefix) -} - -func (suite *configTestSuite) TearDownSuite() { - suite.cleanup() -} - -func (suite *configTestSuite) TestConfigAll() { - re := suite.Require() - addr := fmt.Sprintf("%s/config", suite.urlPrefix) - cfg := &config.Config{} - err := tu.ReadGetJSON(re, testDialClient, addr, cfg) - suite.NoError(err) - - // the original way - r := map[string]int{"max-replicas": 5} - postData, err := json.Marshal(r) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - l := map[string]interface{}{ - "location-labels": "zone,rack", - "region-schedule-limit": 10, - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - l = map[string]interface{}{ - "metric-storage": "http://127.0.0.1:9090", - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - newCfg := &config.Config{} - err = tu.ReadGetJSON(re, testDialClient, addr, newCfg) - suite.NoError(err) - cfg.Replication.MaxReplicas = 5 - cfg.Replication.LocationLabels = []string{"zone", "rack"} - cfg.Schedule.RegionScheduleLimit = 10 - cfg.PDServerCfg.MetricStorage = "http://127.0.0.1:9090" - suite.Equal(newCfg, cfg) - - // the new way - l = map[string]interface{}{ - "schedule.tolerant-size-ratio": 2.5, - "schedule.enable-tikv-split-region": "false", - "replication.location-labels": "idc,host", - "pd-server.metric-storage": "http://127.0.0.1:1234", - "log.level": "warn", - "cluster-version": "v4.0.0-beta", - "replication-mode.replication-mode": "dr-auto-sync", - "replication-mode.dr-auto-sync.label-key": "foobar", - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - newCfg1 := &config.Config{} - err = tu.ReadGetJSON(re, testDialClient, addr, newCfg1) - suite.NoError(err) - cfg.Schedule.EnableTiKVSplitRegion = false - cfg.Schedule.TolerantSizeRatio = 2.5 - cfg.Replication.LocationLabels = []string{"idc", "host"} - cfg.PDServerCfg.MetricStorage = "http://127.0.0.1:1234" - cfg.Log.Level = "warn" - cfg.ReplicationMode.DRAutoSync.LabelKey = "foobar" - cfg.ReplicationMode.ReplicationMode = "dr-auto-sync" - v, err := versioninfo.ParseVersion("v4.0.0-beta") - suite.NoError(err) - cfg.ClusterVersion = *v - suite.Equal(cfg, newCfg1) - - // revert this to avoid it affects TestConfigTTL - l["schedule.enable-tikv-split-region"] = "true" - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - // illegal prefix - l = map[string]interface{}{ - "replicate.max-replicas": 1, - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, - tu.StatusNotOK(re), - tu.StringContain(re, "not found")) - suite.NoError(err) - - // update prefix directly - l = map[string]interface{}{ - "replication-mode": nil, - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, - tu.StatusNotOK(re), - tu.StringContain(re, "cannot update config prefix")) - suite.NoError(err) - - // config item not found - l = map[string]interface{}{ - "schedule.region-limit": 10, - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusNotOK(re), tu.StringContain(re, "not found")) - suite.NoError(err) -} - -func (suite *configTestSuite) TestConfigSchedule() { - re := suite.Require() - addr := fmt.Sprintf("%s/config/schedule", suite.urlPrefix) - scheduleConfig := &sc.ScheduleConfig{} - suite.NoError(tu.ReadGetJSON(re, testDialClient, addr, scheduleConfig)) - scheduleConfig.MaxStoreDownTime.Duration = time.Second - postData, err := json.Marshal(scheduleConfig) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - scheduleConfig1 := &sc.ScheduleConfig{} - suite.NoError(tu.ReadGetJSON(re, testDialClient, addr, scheduleConfig1)) - suite.Equal(*scheduleConfig1, *scheduleConfig) -} - -func (suite *configTestSuite) TestConfigReplication() { - re := suite.Require() - addr := fmt.Sprintf("%s/config/replicate", suite.urlPrefix) - rc := &sc.ReplicationConfig{} - err := tu.ReadGetJSON(re, testDialClient, addr, rc) - suite.NoError(err) - - rc.MaxReplicas = 5 - rc1 := map[string]int{"max-replicas": 5} - postData, err := json.Marshal(rc1) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - rc.LocationLabels = []string{"zone", "rack"} - rc2 := map[string]string{"location-labels": "zone,rack"} - postData, err = json.Marshal(rc2) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - rc.IsolationLevel = "zone" - rc3 := map[string]string{"isolation-level": "zone"} - postData, err = json.Marshal(rc3) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - rc4 := &sc.ReplicationConfig{} - err = tu.ReadGetJSON(re, testDialClient, addr, rc4) - suite.NoError(err) - - suite.Equal(*rc4, *rc) -} - -func (suite *configTestSuite) TestConfigLabelProperty() { - re := suite.Require() - addr := suite.svr.GetAddr() + apiPrefix + "/api/v1/config/label-property" - loadProperties := func() config.LabelPropertyConfig { - var cfg config.LabelPropertyConfig - err := tu.ReadGetJSON(re, testDialClient, addr, &cfg) - suite.NoError(err) - return cfg - } - - cfg := loadProperties() - suite.Empty(cfg) - - cmds := []string{ - `{"type": "foo", "action": "set", "label-key": "zone", "label-value": "cn1"}`, - `{"type": "foo", "action": "set", "label-key": "zone", "label-value": "cn2"}`, - `{"type": "bar", "action": "set", "label-key": "host", "label-value": "h1"}`, - } - for _, cmd := range cmds { - err := tu.CheckPostJSON(testDialClient, addr, []byte(cmd), tu.StatusOK(re)) - suite.NoError(err) - } - - cfg = loadProperties() - suite.Len(cfg, 2) - suite.Equal([]config.StoreLabel{ - {Key: "zone", Value: "cn1"}, - {Key: "zone", Value: "cn2"}, - }, cfg["foo"]) - suite.Equal([]config.StoreLabel{{Key: "host", Value: "h1"}}, cfg["bar"]) - - cmds = []string{ - `{"type": "foo", "action": "delete", "label-key": "zone", "label-value": "cn1"}`, - `{"type": "bar", "action": "delete", "label-key": "host", "label-value": "h1"}`, - } - for _, cmd := range cmds { - err := tu.CheckPostJSON(testDialClient, addr, []byte(cmd), tu.StatusOK(re)) - suite.NoError(err) - } - - cfg = loadProperties() - suite.Len(cfg, 1) - suite.Equal([]config.StoreLabel{{Key: "zone", Value: "cn2"}}, cfg["foo"]) -} - -func (suite *configTestSuite) TestConfigDefault() { - addr := fmt.Sprintf("%s/config", suite.urlPrefix) - - r := map[string]int{"max-replicas": 5} - postData, err := json.Marshal(r) - suite.NoError(err) - re := suite.Require() - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - l := map[string]interface{}{ - "location-labels": "zone,rack", - "region-schedule-limit": 10, - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - l = map[string]interface{}{ - "metric-storage": "http://127.0.0.1:9090", - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - addr = fmt.Sprintf("%s/config/default", suite.urlPrefix) - defaultCfg := &config.Config{} - err = tu.ReadGetJSON(re, testDialClient, addr, defaultCfg) - suite.NoError(err) - - suite.Equal(uint64(3), defaultCfg.Replication.MaxReplicas) - suite.Equal(typeutil.StringSlice([]string{}), defaultCfg.Replication.LocationLabels) - suite.Equal(uint64(2048), defaultCfg.Schedule.RegionScheduleLimit) - suite.Equal("", defaultCfg.PDServerCfg.MetricStorage) -} - -func (suite *configTestSuite) TestConfigPDServer() { - re := suite.Require() - addrPost := fmt.Sprintf("%s/config", suite.urlPrefix) - ms := map[string]interface{}{ - "metric-storage": "", - } - postData, err := json.Marshal(ms) - suite.NoError(err) - suite.NoError(tu.CheckPostJSON(testDialClient, addrPost, postData, tu.StatusOK(re))) - addrGet := fmt.Sprintf("%s/config/pd-server", suite.urlPrefix) - sc := &config.PDServerConfig{} - suite.NoError(tu.ReadGetJSON(re, testDialClient, addrGet, sc)) - suite.Equal(bool(true), sc.UseRegionStorage) - suite.Equal("table", sc.KeyType) - suite.Equal(typeutil.StringSlice([]string{}), sc.RuntimeServices) - suite.Equal("", sc.MetricStorage) - suite.Equal("auto", sc.DashboardAddress) - suite.Equal(int(3), sc.FlowRoundByDigit) - suite.Equal(typeutil.NewDuration(time.Second), sc.MinResolvedTSPersistenceInterval) - suite.Equal(24*time.Hour, sc.MaxResetTSGap.Duration) -} - -var ttlConfig = map[string]interface{}{ - "schedule.max-snapshot-count": 999, - "schedule.enable-location-replacement": false, - "schedule.max-merge-region-size": 999, - "schedule.max-merge-region-keys": 999, - "schedule.scheduler-max-waiting-operator": 999, - "schedule.leader-schedule-limit": 999, - "schedule.region-schedule-limit": 999, - "schedule.hot-region-schedule-limit": 999, - "schedule.replica-schedule-limit": 999, - "schedule.merge-schedule-limit": 999, - "schedule.enable-tikv-split-region": false, -} - -var invalidTTLConfig = map[string]interface{}{ - "schedule.invalid-ttl-config": 0, -} - -func assertTTLConfig( - options *config.PersistOptions, - equality func(interface{}, interface{}, ...interface{}) bool, -) { - equality(uint64(999), options.GetMaxSnapshotCount()) - equality(false, options.IsLocationReplacementEnabled()) - equality(uint64(999), options.GetMaxMergeRegionSize()) - equality(uint64(999), options.GetMaxMergeRegionKeys()) - equality(uint64(999), options.GetSchedulerMaxWaitingOperator()) - equality(uint64(999), options.GetLeaderScheduleLimit()) - equality(uint64(999), options.GetRegionScheduleLimit()) - equality(uint64(999), options.GetHotRegionScheduleLimit()) - equality(uint64(999), options.GetReplicaScheduleLimit()) - equality(uint64(999), options.GetMergeScheduleLimit()) - equality(false, options.IsTikvRegionSplitEnabled()) -} - -func createTTLUrl(url string, ttl int) string { - return fmt.Sprintf("%s/config?ttlSecond=%d", url, ttl) -} - -func (suite *configTestSuite) TestConfigTTL() { - postData, err := json.Marshal(ttlConfig) - suite.NoError(err) - - // test no config and cleaning up - re := suite.Require() - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 0), postData, tu.StatusOK(re)) - suite.NoError(err) - assertTTLConfig(suite.svr.GetPersistOptions(), suite.NotEqual) - - // test time goes by - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 1), postData, tu.StatusOK(re)) - suite.NoError(err) - assertTTLConfig(suite.svr.GetPersistOptions(), suite.Equal) - time.Sleep(2 * time.Second) - assertTTLConfig(suite.svr.GetPersistOptions(), suite.NotEqual) - - // test cleaning up - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 1), postData, tu.StatusOK(re)) - suite.NoError(err) - assertTTLConfig(suite.svr.GetPersistOptions(), suite.Equal) - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 0), postData, tu.StatusOK(re)) - suite.NoError(err) - assertTTLConfig(suite.svr.GetPersistOptions(), suite.NotEqual) - - postData, err = json.Marshal(invalidTTLConfig) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 1), postData, - tu.StatusNotOK(re), tu.StringEqual(re, "\"unsupported ttl config schedule.invalid-ttl-config\"\n")) - suite.NoError(err) - - // only set max-merge-region-size - mergeConfig := map[string]interface{}{ - "schedule.max-merge-region-size": 999, - } - postData, err = json.Marshal(mergeConfig) - suite.NoError(err) - - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 1), postData, tu.StatusOK(re)) - suite.NoError(err) - suite.Equal(uint64(999), suite.svr.GetPersistOptions().GetMaxMergeRegionSize()) - // max-merge-region-keys should keep consistence with max-merge-region-size. - suite.Equal(uint64(999*10000), suite.svr.GetPersistOptions().GetMaxMergeRegionKeys()) - - // on invalid value, we use default config - mergeConfig = map[string]interface{}{ - "schedule.enable-tikv-split-region": "invalid", - } - postData, err = json.Marshal(mergeConfig) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 1), postData, tu.StatusOK(re)) - suite.NoError(err) - suite.True(suite.svr.GetPersistOptions().IsTikvRegionSplitEnabled()) -} - -func (suite *configTestSuite) TestTTLConflict() { - addr := createTTLUrl(suite.urlPrefix, 1) - postData, err := json.Marshal(ttlConfig) - suite.NoError(err) - re := suite.Require() - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - assertTTLConfig(suite.svr.GetPersistOptions(), suite.Equal) - - cfg := map[string]interface{}{"max-snapshot-count": 30} - postData, err = json.Marshal(cfg) - suite.NoError(err) - addr = fmt.Sprintf("%s/config", suite.urlPrefix) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusNotOK(re), tu.StringEqual(re, "\"need to clean up TTL first for schedule.max-snapshot-count\"\n")) - suite.NoError(err) - addr = fmt.Sprintf("%s/config/schedule", suite.urlPrefix) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusNotOK(re), tu.StringEqual(re, "\"need to clean up TTL first for schedule.max-snapshot-count\"\n")) - suite.NoError(err) - cfg = map[string]interface{}{"schedule.max-snapshot-count": 30} - postData, err = json.Marshal(cfg) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 0), postData, tu.StatusOK(re)) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) -} diff --git a/server/api/diagnostic_test.go b/server/api/diagnostic_test.go index 1774c221539..4e08426ea43 100644 --- a/server/api/diagnostic_test.go +++ b/server/api/diagnostic_test.go @@ -17,6 +17,7 @@ package api import ( "encoding/json" "fmt" + "net/http" "testing" "time" @@ -63,6 +64,8 @@ func (suite *diagnosticTestSuite) TearDownSuite() { func (suite *diagnosticTestSuite) checkStatus(status string, url string) { re := suite.Require() + err := tu.CheckGetUntilStatusCode(re, testDialClient, url, http.StatusOK) + suite.NoError(err) suite.Eventually(func() bool { result := &schedulers.DiagnosticResult{} err := tu.ReadGetJSON(re, testDialClient, url, result) diff --git a/server/api/middleware.go b/server/api/middleware.go index 20ea61109ae..6536935592f 100644 --- a/server/api/middleware.go +++ b/server/api/middleware.go @@ -69,7 +69,7 @@ func (rm *requestInfoMiddleware) ServeHTTP(w http.ResponseWriter, r *http.Reques w.Header().Add("body-param", requestInfo.BodyParam) w.Header().Add("url-param", requestInfo.URLParam) w.Header().Add("method", requestInfo.Method) - w.Header().Add("component", requestInfo.Component) + w.Header().Add("caller-id", requestInfo.CallerID) w.Header().Add("ip", requestInfo.IP) }) @@ -114,7 +114,7 @@ func newAuditMiddleware(s *server.Server) negroni.Handler { return &auditMiddleware{svr: s} } -// ServeHTTP is used to implememt negroni.Handler for auditMiddleware +// ServeHTTP is used to implement negroni.Handler for auditMiddleware func (s *auditMiddleware) ServeHTTP(w http.ResponseWriter, r *http.Request, next http.HandlerFunc) { if !s.svr.GetServiceMiddlewarePersistOptions().IsAuditEnabled() { next(w, r) @@ -164,7 +164,7 @@ func newRateLimitMiddleware(s *server.Server) negroni.Handler { return &rateLimitMiddleware{svr: s} } -// ServeHTTP is used to implememt negroni.Handler for rateLimitMiddleware +// ServeHTTP is used to implement negroni.Handler for rateLimitMiddleware func (s *rateLimitMiddleware) ServeHTTP(w http.ResponseWriter, r *http.Request, next http.HandlerFunc) { if !s.svr.GetServiceMiddlewarePersistOptions().IsRateLimitEnabled() { next(w, r) diff --git a/server/api/region.go b/server/api/region.go index 68e280f610c..62713cb6dcd 100644 --- a/server/api/region.go +++ b/server/api/region.go @@ -27,21 +27,18 @@ import ( "github.com/gorilla/mux" jwriter "github.com/mailru/easyjson/jwriter" - "github.com/pingcap/failpoint" + "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/kvproto/pkg/replication_modepb" - "github.com/pingcap/log" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/keyspace" - "github.com/tikv/pd/pkg/schedule/filter" "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/typeutil" "github.com/tikv/pd/server" "github.com/unrolled/render" - "go.uber.org/zap" ) // MetaPeer is api compatible with *metapb.Peer. @@ -301,51 +298,28 @@ func (h *regionHandler) GetRegion(w http.ResponseWriter, r *http.Request) { // @Failure 400 {string} string "The input is invalid." // @Router /regions/replicated [get] func (h *regionsHandler) CheckRegionsReplicated(w http.ResponseWriter, r *http.Request) { - rc := getCluster(r) - vars := mux.Vars(r) - startKeyHex := vars["startKey"] - startKey, err := hex.DecodeString(startKeyHex) + rawStartKey := vars["startKey"] + rawEndKey := vars["endKey"] + state, err := h.Handler.CheckRegionsReplicated(rawStartKey, rawEndKey) if err != nil { h.rd.JSON(w, http.StatusBadRequest, err.Error()) return } - endKeyHex := vars["endKey"] - endKey, err := hex.DecodeString(endKeyHex) - if err != nil { - h.rd.JSON(w, http.StatusBadRequest, err.Error()) - return - } - - regions := rc.ScanRegions(startKey, endKey, -1) - state := "REPLICATED" - for _, region := range regions { - if !filter.IsRegionReplicated(rc, region) { - state = "INPROGRESS" - if rc.GetCoordinator().IsPendingRegion(region.GetID()) { - state = "PENDING" - break - } - } - } - failpoint.Inject("mockPending", func(val failpoint.Value) { - aok, ok := val.(bool) - if ok && aok { - state = "PENDING" - } - }) h.rd.JSON(w, http.StatusOK, state) } type regionsHandler struct { + *server.Handler svr *server.Server rd *render.Render } func newRegionsHandler(svr *server.Server, rd *render.Render) *regionsHandler { return ®ionsHandler{ - svr: svr, - rd: rd, + Handler: svr.GetHandler(), + svr: svr, + rd: rd, } } @@ -422,19 +396,12 @@ func (h *regionsHandler) ScanRegions(w http.ResponseWriter, r *http.Request) { rc := getCluster(r) startKey := r.URL.Query().Get("key") endKey := r.URL.Query().Get("end_key") - - limit := defaultRegionLimit - if limitStr := r.URL.Query().Get("limit"); limitStr != "" { - var err error - limit, err = strconv.Atoi(limitStr) - if err != nil { - h.rd.JSON(w, http.StatusBadRequest, err.Error()) - return - } - } - if limit > maxRegionLimit { - limit = maxRegionLimit + limit, err := h.AdjustLimit(r.URL.Query().Get("limit")) + if err != nil { + h.rd.JSON(w, http.StatusBadRequest, err.Error()) + return } + regions := rc.ScanRegions([]byte(startKey), []byte(endKey), limit) b, err := marshalRegionsInfoJSON(r.Context(), regions) if err != nil { @@ -509,16 +476,10 @@ func (h *regionsHandler) GetKeyspaceRegions(w http.ResponseWriter, r *http.Reque return } - limit := defaultRegionLimit - if limitStr := r.URL.Query().Get("limit"); limitStr != "" { - limit, err = strconv.Atoi(limitStr) - if err != nil { - h.rd.JSON(w, http.StatusBadRequest, err.Error()) - return - } - } - if limit > maxRegionLimit { - limit = maxRegionLimit + limit, err := h.AdjustLimit(r.URL.Query().Get("limit")) + if err != nil { + h.rd.JSON(w, http.StatusBadRequest, err.Error()) + return } regionBound := keyspace.MakeRegionBound(keyspaceID) regions := rc.ScanRegions(regionBound.RawLeftBound, regionBound.RawRightBound, limit) @@ -789,8 +750,6 @@ func (h *regionsHandler) GetRegionSiblings(w http.ResponseWriter, r *http.Reques } const ( - defaultRegionLimit = 16 - maxRegionLimit = 10240 minRegionHistogramSize = 1 minRegionHistogramKeys = 1000 ) @@ -892,43 +851,27 @@ func (h *regionsHandler) GetTopCPURegions(w http.ResponseWriter, r *http.Request // @Failure 400 {string} string "The input is invalid." // @Router /regions/accelerate-schedule [post] func (h *regionsHandler) AccelerateRegionsScheduleInRange(w http.ResponseWriter, r *http.Request) { - rc := getCluster(r) var input map[string]interface{} if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } - startKey, rawStartKey, err := apiutil.ParseKey("start_key", input) - if err != nil { - h.rd.JSON(w, http.StatusBadRequest, err.Error()) + rawStartKey, ok1 := input["start_key"].(string) + rawEndKey, ok2 := input["end_key"].(string) + if !ok1 || !ok2 { + h.rd.JSON(w, http.StatusBadRequest, "start_key or end_key is not string") return } - endKey, rawEndKey, err := apiutil.ParseKey("end_key", input) + limit, err := h.AdjustLimit(r.URL.Query().Get("limit"), 256 /*default limit*/) if err != nil { h.rd.JSON(w, http.StatusBadRequest, err.Error()) return } - limit := 256 - if limitStr := r.URL.Query().Get("limit"); limitStr != "" { - var err error - limit, err = strconv.Atoi(limitStr) - if err != nil { - h.rd.JSON(w, http.StatusBadRequest, err.Error()) - return - } - } - if limit > maxRegionLimit { - limit = maxRegionLimit - } - - regions := rc.ScanRegions(startKey, endKey, limit) - if len(regions) > 0 { - regionsIDList := make([]uint64, 0, len(regions)) - for _, region := range regions { - regionsIDList = append(regionsIDList, region.GetID()) - } - rc.AddSuspectRegions(regionsIDList...) + err = h.Handler.AccelerateRegionsScheduleInRange(rawStartKey, rawEndKey, limit) + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return } h.rd.Text(w, http.StatusOK, fmt.Sprintf("Accelerate regions scheduling in a given range [%s,%s)", rawStartKey, rawEndKey)) } @@ -943,27 +886,20 @@ func (h *regionsHandler) AccelerateRegionsScheduleInRange(w http.ResponseWriter, // @Failure 400 {string} string "The input is invalid." // @Router /regions/accelerate-schedule/batch [post] func (h *regionsHandler) AccelerateRegionsScheduleInRanges(w http.ResponseWriter, r *http.Request) { - rc := getCluster(r) var input []map[string]interface{} if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } - limit := 256 - if limitStr := r.URL.Query().Get("limit"); limitStr != "" { - var err error - limit, err = strconv.Atoi(limitStr) - if err != nil { - h.rd.JSON(w, http.StatusBadRequest, err.Error()) - return - } - } - if limit > maxRegionLimit { - limit = maxRegionLimit + limit, err := h.AdjustLimit(r.URL.Query().Get("limit"), 256 /*default limit*/) + if err != nil { + h.rd.JSON(w, http.StatusBadRequest, err.Error()) + return } + var msgBuilder strings.Builder msgBuilder.Grow(128) msgBuilder.WriteString("Accelerate regions scheduling in given ranges: ") - var regions []*core.RegionInfo + var startKeys, endKeys [][]byte for _, rg := range input { startKey, rawStartKey, err := apiutil.ParseKey("start_key", rg) if err != nil { @@ -975,32 +911,24 @@ func (h *regionsHandler) AccelerateRegionsScheduleInRanges(w http.ResponseWriter h.rd.JSON(w, http.StatusBadRequest, err.Error()) return } - regions = append(regions, rc.ScanRegions(startKey, endKey, limit)...) + startKeys = append(startKeys, startKey) + endKeys = append(endKeys, endKey) msgBuilder.WriteString(fmt.Sprintf("[%s,%s), ", rawStartKey, rawEndKey)) } - if len(regions) > 0 { - regionsIDList := make([]uint64, 0, len(regions)) - for _, region := range regions { - regionsIDList = append(regionsIDList, region.GetID()) - } - rc.AddSuspectRegions(regionsIDList...) + err = h.Handler.AccelerateRegionsScheduleInRanges(startKeys, endKeys, limit) + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return } h.rd.Text(w, http.StatusOK, msgBuilder.String()) } func (h *regionsHandler) GetTopNRegions(w http.ResponseWriter, r *http.Request, less func(a, b *core.RegionInfo) bool) { rc := getCluster(r) - limit := defaultRegionLimit - if limitStr := r.URL.Query().Get("limit"); limitStr != "" { - var err error - limit, err = strconv.Atoi(limitStr) - if err != nil { - h.rd.JSON(w, http.StatusBadRequest, err.Error()) - return - } - } - if limit > maxRegionLimit { - limit = maxRegionLimit + limit, err := h.AdjustLimit(r.URL.Query().Get("limit")) + if err != nil { + h.rd.JSON(w, http.StatusBadRequest, err.Error()) + return } regions := TopNRegions(rc.GetRegions(), less, limit) b, err := marshalRegionsInfoJSON(r.Context(), regions) @@ -1020,69 +948,33 @@ func (h *regionsHandler) GetTopNRegions(w http.ResponseWriter, r *http.Request, // @Failure 400 {string} string "The input is invalid." // @Router /regions/scatter [post] func (h *regionsHandler) ScatterRegions(w http.ResponseWriter, r *http.Request) { - rc := getCluster(r) var input map[string]interface{} if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } - _, ok1 := input["start_key"].(string) - _, ok2 := input["end_key"].(string) - group, ok := input["group"].(string) - if !ok { - group = "" - } + rawStartKey, ok1 := input["start_key"].(string) + rawEndKey, ok2 := input["end_key"].(string) + group, _ := input["group"].(string) retryLimit := 5 if rl, ok := input["retry_limit"].(float64); ok { retryLimit = int(rl) } - opsCount := 0 - var failures map[uint64]error - var err error - if ok1 && ok2 { - startKey, _, err := apiutil.ParseKey("start_key", input) - if err != nil { - h.rd.JSON(w, http.StatusBadRequest, err.Error()) - return - } - endKey, _, err := apiutil.ParseKey("end_key", input) - if err != nil { - h.rd.JSON(w, http.StatusBadRequest, err.Error()) - return - } - opsCount, failures, err = rc.GetRegionScatterer().ScatterRegionsByRange(startKey, endKey, group, retryLimit) - if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) - return + + opsCount, failures, err := func() (int, map[uint64]error, error) { + if ok1 && ok2 { + return h.ScatterRegionsByRange(rawStartKey, rawEndKey, group, retryLimit) } - } else { ids, ok := typeutil.JSONToUint64Slice(input["regions_id"]) if !ok { - h.rd.JSON(w, http.StatusBadRequest, "regions_id is invalid") - return + return 0, nil, errors.New("regions_id is invalid") } - opsCount, failures, err = rc.GetRegionScatterer().ScatterRegionsByID(ids, group, retryLimit, false) - if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) - return - } - } - // If there existed any operator failed to be added into Operator Controller, add its regions into unProcessedRegions - percentage := 100 - if len(failures) > 0 { - percentage = 100 - 100*len(failures)/(opsCount+len(failures)) - log.Debug("scatter regions", zap.Errors("failures", func() []error { - r := make([]error, 0, len(failures)) - for _, err := range failures { - r = append(r, err) - } - return r - }())) - } - s := struct { - ProcessedPercentage int `json:"processed-percentage"` - }{ - ProcessedPercentage: percentage, + return h.ScatterRegionsByID(ids, group, retryLimit, false) + }() + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return } + s := h.BuildScatterRegionsResp(opsCount, failures) h.rd.JSON(w, http.StatusOK, &s) } @@ -1095,16 +987,16 @@ func (h *regionsHandler) ScatterRegions(w http.ResponseWriter, r *http.Request) // @Failure 400 {string} string "The input is invalid." // @Router /regions/split [post] func (h *regionsHandler) SplitRegions(w http.ResponseWriter, r *http.Request) { - rc := getCluster(r) var input map[string]interface{} if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { return } - rawSplitKeys, ok := input["split_keys"].([]interface{}) + s, ok := input["split_keys"] if !ok { h.rd.JSON(w, http.StatusBadRequest, "split_keys should be provided.") return } + rawSplitKeys := s.([]interface{}) if len(rawSplitKeys) < 1 { h.rd.JSON(w, http.StatusBadRequest, "empty split keys.") return @@ -1113,29 +1005,11 @@ func (h *regionsHandler) SplitRegions(w http.ResponseWriter, r *http.Request) { if rl, ok := input["retry_limit"].(float64); ok { retryLimit = int(rl) } - splitKeys := make([][]byte, 0, len(rawSplitKeys)) - for _, rawKey := range rawSplitKeys { - key, err := hex.DecodeString(rawKey.(string)) - if err != nil { - h.rd.JSON(w, http.StatusBadRequest, err.Error()) - return - } - splitKeys = append(splitKeys, key) - } - s := struct { - ProcessedPercentage int `json:"processed-percentage"` - NewRegionsID []uint64 `json:"regions-id"` - }{} - percentage, newRegionsID := rc.GetRegionSplitter().SplitRegions(r.Context(), splitKeys, retryLimit) - s.ProcessedPercentage = percentage - s.NewRegionsID = newRegionsID - failpoint.Inject("splitResponses", func(val failpoint.Value) { - rawID, ok := val.(int) - if ok { - s.ProcessedPercentage = 100 - s.NewRegionsID = []uint64{uint64(rawID)} - } - }) + s, err := h.Handler.SplitRegions(r.Context(), rawSplitKeys, retryLimit) + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } h.rd.JSON(w, http.StatusOK, &s) } diff --git a/server/api/region_label.go b/server/api/region_label.go index 003dfb1132f..7958bacd371 100644 --- a/server/api/region_label.go +++ b/server/api/region_label.go @@ -83,7 +83,7 @@ func (h *regionLabelHandler) PatchRegionLabelRules(w http.ResponseWriter, r *htt // @Success 200 {array} labeler.LabelRule // @Failure 400 {string} string "The input is invalid." // @Failure 500 {string} string "PD server failed to proceed the request." -// @Router /config/region-label/rule/ids [get] +// @Router /config/region-label/rules/ids [get] func (h *regionLabelHandler) GetRegionLabelRulesByIDs(w http.ResponseWriter, r *http.Request) { cluster := getCluster(r) var ids []string diff --git a/server/api/region_test.go b/server/api/region_test.go index a39a1e5c5fd..ea2f2871a95 100644 --- a/server/api/region_test.go +++ b/server/api/region_test.go @@ -15,7 +15,6 @@ package api import ( - "bytes" "context" "encoding/hex" "encoding/json" @@ -28,13 +27,11 @@ import ( "time" "github.com/docker/go-units" - "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/core" - "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/utils/apiutil" tu "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server" @@ -241,14 +238,14 @@ func (suite *regionTestSuite) TestRegions() { mustRegionHeartbeat(re, suite.svr, r) } url := fmt.Sprintf("%s/regions", suite.urlPrefix) - RegionsInfo := &RegionsInfo{} - err := tu.ReadGetJSON(re, testDialClient, url, RegionsInfo) + regionsInfo := &RegionsInfo{} + err := tu.ReadGetJSON(re, testDialClient, url, regionsInfo) suite.NoError(err) - suite.Len(regions, RegionsInfo.Count) - sort.Slice(RegionsInfo.Regions, func(i, j int) bool { - return RegionsInfo.Regions[i].ID < RegionsInfo.Regions[j].ID + suite.Len(regions, regionsInfo.Count) + sort.Slice(regionsInfo.Regions, func(i, j int) bool { + return regionsInfo.Regions[i].ID < regionsInfo.Regions[j].ID }) - for i, r := range RegionsInfo.Regions { + for i, r := range regionsInfo.Regions { suite.Equal(regions[i].ID, r.ID) suite.Equal(regions[i].ApproximateSize, r.ApproximateSize) suite.Equal(regions[i].ApproximateKeys, r.ApproximateKeys) @@ -337,99 +334,6 @@ func (suite *regionTestSuite) TestTop() { suite.checkTopRegions(fmt.Sprintf("%s/regions/cpu", suite.urlPrefix), []uint64{3, 2, 1}) } -func (suite *regionTestSuite) TestAccelerateRegionsScheduleInRange() { - re := suite.Require() - r1 := core.NewTestRegionInfo(557, 13, []byte("a1"), []byte("a2")) - r2 := core.NewTestRegionInfo(558, 14, []byte("a2"), []byte("a3")) - r3 := core.NewTestRegionInfo(559, 15, []byte("a3"), []byte("a4")) - mustRegionHeartbeat(re, suite.svr, r1) - mustRegionHeartbeat(re, suite.svr, r2) - mustRegionHeartbeat(re, suite.svr, r3) - body := fmt.Sprintf(`{"start_key":"%s", "end_key": "%s"}`, hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a3"))) - - err := tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/accelerate-schedule", suite.urlPrefix), []byte(body), tu.StatusOK(re)) - suite.NoError(err) - idList := suite.svr.GetRaftCluster().GetSuspectRegions() - suite.Len(idList, 2) -} - -func (suite *regionTestSuite) TestAccelerateRegionsScheduleInRanges() { - re := suite.Require() - r1 := core.NewTestRegionInfo(557, 13, []byte("a1"), []byte("a2")) - r2 := core.NewTestRegionInfo(558, 14, []byte("a2"), []byte("a3")) - r3 := core.NewTestRegionInfo(559, 15, []byte("a3"), []byte("a4")) - r4 := core.NewTestRegionInfo(560, 16, []byte("a4"), []byte("a5")) - r5 := core.NewTestRegionInfo(561, 17, []byte("a5"), []byte("a6")) - mustRegionHeartbeat(re, suite.svr, r1) - mustRegionHeartbeat(re, suite.svr, r2) - mustRegionHeartbeat(re, suite.svr, r3) - mustRegionHeartbeat(re, suite.svr, r4) - mustRegionHeartbeat(re, suite.svr, r5) - body := fmt.Sprintf(`[{"start_key":"%s", "end_key": "%s"}, {"start_key":"%s", "end_key": "%s"}]`, hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a3")), hex.EncodeToString([]byte("a4")), hex.EncodeToString([]byte("a6"))) - - err := tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/accelerate-schedule/batch", suite.urlPrefix), []byte(body), tu.StatusOK(re)) - suite.NoError(err) - idList := suite.svr.GetRaftCluster().GetSuspectRegions() - suite.Len(idList, 4) -} - -func (suite *regionTestSuite) TestScatterRegions() { - re := suite.Require() - r1 := core.NewTestRegionInfo(601, 13, []byte("b1"), []byte("b2")) - r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 5, StoreId: 14}, &metapb.Peer{Id: 6, StoreId: 15}) - r2 := core.NewTestRegionInfo(602, 13, []byte("b2"), []byte("b3")) - r2.GetMeta().Peers = append(r2.GetMeta().Peers, &metapb.Peer{Id: 7, StoreId: 14}, &metapb.Peer{Id: 8, StoreId: 15}) - r3 := core.NewTestRegionInfo(603, 13, []byte("b4"), []byte("b4")) - r3.GetMeta().Peers = append(r3.GetMeta().Peers, &metapb.Peer{Id: 9, StoreId: 14}, &metapb.Peer{Id: 10, StoreId: 15}) - mustRegionHeartbeat(re, suite.svr, r1) - mustRegionHeartbeat(re, suite.svr, r2) - mustRegionHeartbeat(re, suite.svr, r3) - mustPutStore(re, suite.svr, 13, metapb.StoreState_Up, metapb.NodeState_Serving, []*metapb.StoreLabel{}) - mustPutStore(re, suite.svr, 14, metapb.StoreState_Up, metapb.NodeState_Serving, []*metapb.StoreLabel{}) - mustPutStore(re, suite.svr, 15, metapb.StoreState_Up, metapb.NodeState_Serving, []*metapb.StoreLabel{}) - mustPutStore(re, suite.svr, 16, metapb.StoreState_Up, metapb.NodeState_Serving, []*metapb.StoreLabel{}) - body := fmt.Sprintf(`{"start_key":"%s", "end_key": "%s"}`, hex.EncodeToString([]byte("b1")), hex.EncodeToString([]byte("b3"))) - - err := tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/scatter", suite.urlPrefix), []byte(body), tu.StatusOK(re)) - suite.NoError(err) - op1 := suite.svr.GetRaftCluster().GetOperatorController().GetOperator(601) - op2 := suite.svr.GetRaftCluster().GetOperatorController().GetOperator(602) - op3 := suite.svr.GetRaftCluster().GetOperatorController().GetOperator(603) - // At least one operator used to scatter region - suite.True(op1 != nil || op2 != nil || op3 != nil) - - body = `{"regions_id": [601, 602, 603]}` - err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/scatter", suite.urlPrefix), []byte(body), tu.StatusOK(re)) - suite.NoError(err) -} - -func (suite *regionTestSuite) TestSplitRegions() { - re := suite.Require() - r1 := core.NewTestRegionInfo(601, 13, []byte("aaa"), []byte("ggg")) - r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 5, StoreId: 13}, &metapb.Peer{Id: 6, StoreId: 13}) - mustRegionHeartbeat(re, suite.svr, r1) - mustPutStore(re, suite.svr, 13, metapb.StoreState_Up, metapb.NodeState_Serving, []*metapb.StoreLabel{}) - newRegionID := uint64(11) - body := fmt.Sprintf(`{"retry_limit":%v, "split_keys": ["%s","%s","%s"]}`, 3, - hex.EncodeToString([]byte("bbb")), - hex.EncodeToString([]byte("ccc")), - hex.EncodeToString([]byte("ddd"))) - checkOpt := func(res []byte, code int, _ http.Header) { - s := &struct { - ProcessedPercentage int `json:"processed-percentage"` - NewRegionsID []uint64 `json:"regions-id"` - }{} - err := json.Unmarshal(res, s) - suite.NoError(err) - suite.Equal(100, s.ProcessedPercentage) - suite.Equal([]uint64{newRegionID}, s.NewRegionsID) - } - suite.NoError(failpoint.Enable("github.com/tikv/pd/server/api/splitResponses", fmt.Sprintf("return(%v)", newRegionID))) - err := tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/split", suite.urlPrefix), []byte(body), checkOpt) - suite.NoError(failpoint.Disable("github.com/tikv/pd/server/api/splitResponses")) - suite.NoError(err) -} - func (suite *regionTestSuite) checkTopRegions(url string, regionIDs []uint64) { regions := &RegionsInfo{} err := tu.ReadGetJSON(suite.Require(), testDialClient, url, regions) @@ -480,7 +384,7 @@ func TestRegionsWithKillRequest(t *testing.T) { } ctx, cancel := context.WithCancel(context.Background()) - req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, bytes.NewBuffer(nil)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, http.NoBody) re.NoError(err) respCh := make(chan *http.Response) go func() { @@ -652,131 +556,6 @@ func (suite *getRegionRangeHolesTestSuite) TestRegionRangeHoles() { }, *rangeHoles) } -type regionsReplicatedTestSuite struct { - suite.Suite - svr *server.Server - cleanup tu.CleanupFunc - urlPrefix string -} - -func TestRegionsReplicatedTestSuite(t *testing.T) { - suite.Run(t, new(regionsReplicatedTestSuite)) -} - -func (suite *regionsReplicatedTestSuite) SetupSuite() { - re := suite.Require() - suite.svr, suite.cleanup = mustNewServer(re) - server.MustWaitLeader(re, []*server.Server{suite.svr}) - - addr := suite.svr.GetAddr() - suite.urlPrefix = fmt.Sprintf("%s%s/api/v1", addr, apiPrefix) - - mustBootstrapCluster(re, suite.svr) -} - -func (suite *regionsReplicatedTestSuite) TearDownSuite() { - suite.cleanup() -} - -func (suite *regionsReplicatedTestSuite) TestCheckRegionsReplicated() { - re := suite.Require() - // enable placement rule - suite.NoError(tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/config", []byte(`{"enable-placement-rules":"true"}`), tu.StatusOK(re))) - defer func() { - suite.NoError(tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/config", []byte(`{"enable-placement-rules":"false"}`), tu.StatusOK(re))) - }() - - // add test region - r1 := core.NewTestRegionInfo(2, 1, []byte("a"), []byte("b")) - mustRegionHeartbeat(re, suite.svr, r1) - - // set the bundle - bundle := []placement.GroupBundle{ - { - ID: "5", - Index: 5, - Rules: []*placement.Rule{ - { - ID: "foo", Index: 1, Role: "voter", Count: 1, - }, - }, - }, - } - - status := "" - - // invalid url - url := fmt.Sprintf(`%s/regions/replicated?startKey=%s&endKey=%s`, suite.urlPrefix, "_", "t") - err := tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, http.StatusBadRequest)) - suite.NoError(err) - - url = fmt.Sprintf(`%s/regions/replicated?startKey=%s&endKey=%s`, suite.urlPrefix, hex.EncodeToString(r1.GetStartKey()), "_") - err = tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, http.StatusBadRequest)) - suite.NoError(err) - - // correct test - url = fmt.Sprintf(`%s/regions/replicated?startKey=%s&endKey=%s`, suite.urlPrefix, hex.EncodeToString(r1.GetStartKey()), hex.EncodeToString(r1.GetEndKey())) - - // test one rule - data, err := json.Marshal(bundle) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) - suite.NoError(err) - - err = tu.ReadGetJSON(re, testDialClient, url, &status) - suite.NoError(err) - suite.Equal("REPLICATED", status) - - suite.NoError(failpoint.Enable("github.com/tikv/pd/server/api/mockPending", "return(true)")) - err = tu.ReadGetJSON(re, testDialClient, url, &status) - suite.NoError(err) - suite.Equal("PENDING", status) - suite.NoError(failpoint.Disable("github.com/tikv/pd/server/api/mockPending")) - // test multiple rules - r1 = core.NewTestRegionInfo(2, 1, []byte("a"), []byte("b")) - r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 5, StoreId: 1}) - mustRegionHeartbeat(re, suite.svr, r1) - - bundle[0].Rules = append(bundle[0].Rules, &placement.Rule{ - ID: "bar", Index: 1, Role: "voter", Count: 1, - }) - data, err = json.Marshal(bundle) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) - suite.NoError(err) - - err = tu.ReadGetJSON(re, testDialClient, url, &status) - suite.NoError(err) - suite.Equal("REPLICATED", status) - - // test multiple bundles - bundle = append(bundle, placement.GroupBundle{ - ID: "6", - Index: 6, - Rules: []*placement.Rule{ - { - ID: "foo", Index: 1, Role: "voter", Count: 2, - }, - }, - }) - data, err = json.Marshal(bundle) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) - suite.NoError(err) - - err = tu.ReadGetJSON(re, testDialClient, url, &status) - suite.NoError(err) - suite.Equal("INPROGRESS", status) - - r1 = core.NewTestRegionInfo(2, 1, []byte("a"), []byte("b")) - r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 5, StoreId: 1}, &metapb.Peer{Id: 6, StoreId: 1}, &metapb.Peer{Id: 7, StoreId: 1}) - mustRegionHeartbeat(re, suite.svr, r1) - - err = tu.ReadGetJSON(re, testDialClient, url, &status) - suite.NoError(err) - suite.Equal("REPLICATED", status) -} - func TestRegionsInfoMarshal(t *testing.T) { re := require.New(t) regionWithNilPeer := core.NewRegionInfo(&metapb.Region{Id: 1}, &metapb.Peer{Id: 1}) diff --git a/server/api/router.go b/server/api/router.go index 93811e264f1..d3c8f10cbf2 100644 --- a/server/api/router.go +++ b/server/api/router.go @@ -174,29 +174,31 @@ func createRouter(prefix string, svr *server.Server) *mux.Router { registerFunc(apiRouter, "/config/replication-mode", confHandler.SetReplicationModeConfig, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) rulesHandler := newRulesHandler(svr, rd) - registerFunc(clusterRouter, "/config/rules", rulesHandler.GetAllRules, setMethods(http.MethodGet), setAuditBackend(prometheus)) - registerFunc(clusterRouter, "/config/rules", rulesHandler.SetAllRules, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) - registerFunc(clusterRouter, "/config/rules/batch", rulesHandler.BatchRules, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) - registerFunc(clusterRouter, "/config/rules/group/{group}", rulesHandler.GetRuleByGroup, setMethods(http.MethodGet), setAuditBackend(prometheus)) - registerFunc(clusterRouter, "/config/rules/region/{region}", rulesHandler.GetRulesByRegion, setMethods(http.MethodGet), setAuditBackend(prometheus)) - registerFunc(clusterRouter, "/config/rules/region/{region}/detail", rulesHandler.CheckRegionPlacementRule, setMethods(http.MethodGet), setAuditBackend(prometheus)) - registerFunc(clusterRouter, "/config/rules/key/{key}", rulesHandler.GetRulesByKey, setMethods(http.MethodGet), setAuditBackend(prometheus)) - registerFunc(clusterRouter, "/config/rule/{group}/{id}", rulesHandler.GetRuleByGroupAndID, setMethods(http.MethodGet), setAuditBackend(prometheus)) - registerFunc(clusterRouter, "/config/rule", rulesHandler.SetRule, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) - registerFunc(clusterRouter, "/config/rule/{group}/{id}", rulesHandler.DeleteRuleByGroup, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) - - registerFunc(clusterRouter, "/config/rule_group/{id}", rulesHandler.GetGroupConfig, setMethods(http.MethodGet), setAuditBackend(prometheus)) - registerFunc(clusterRouter, "/config/rule_group", rulesHandler.SetGroupConfig, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) - registerFunc(clusterRouter, "/config/rule_group/{id}", rulesHandler.DeleteGroupConfig, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) - registerFunc(clusterRouter, "/config/rule_groups", rulesHandler.GetAllGroupConfigs, setMethods(http.MethodGet), setAuditBackend(prometheus)) - - registerFunc(clusterRouter, "/config/placement-rule", rulesHandler.GetPlacementRules, setMethods(http.MethodGet), setAuditBackend(prometheus)) - registerFunc(clusterRouter, "/config/placement-rule", rulesHandler.SetPlacementRules, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + ruleRouter := clusterRouter.NewRoute().Subrouter() + ruleRouter.Use(newRuleMiddleware(svr, rd).Middleware) + registerFunc(ruleRouter, "/config/rules", rulesHandler.GetAllRules, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(ruleRouter, "/config/rules", rulesHandler.SetAllRules, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(ruleRouter, "/config/rules/batch", rulesHandler.BatchRules, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(ruleRouter, "/config/rules/group/{group}", rulesHandler.GetRuleByGroup, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(ruleRouter, "/config/rules/region/{region}", rulesHandler.GetRulesByRegion, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(ruleRouter, "/config/rules/region/{region}/detail", rulesHandler.CheckRegionPlacementRule, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(ruleRouter, "/config/rules/key/{key}", rulesHandler.GetRulesByKey, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(ruleRouter, "/config/rule/{group}/{id}", rulesHandler.GetRuleByGroupAndID, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(ruleRouter, "/config/rule", rulesHandler.SetRule, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(ruleRouter, "/config/rule/{group}/{id}", rulesHandler.DeleteRuleByGroup, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) + + registerFunc(ruleRouter, "/config/rule_group/{id}", rulesHandler.GetGroupConfig, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(ruleRouter, "/config/rule_group", rulesHandler.SetGroupConfig, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(ruleRouter, "/config/rule_group/{id}", rulesHandler.DeleteGroupConfig, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) + registerFunc(ruleRouter, "/config/rule_groups", rulesHandler.GetAllGroupConfigs, setMethods(http.MethodGet), setAuditBackend(prometheus)) + + registerFunc(ruleRouter, "/config/placement-rule", rulesHandler.GetPlacementRules, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(ruleRouter, "/config/placement-rule", rulesHandler.SetPlacementRules, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) // {group} can be a regular expression, we should enable path encode to // support special characters. - registerFunc(clusterRouter, "/config/placement-rule/{group}", rulesHandler.GetPlacementRuleByGroup, setMethods(http.MethodGet), setAuditBackend(prometheus)) - registerFunc(clusterRouter, "/config/placement-rule/{group}", rulesHandler.SetPlacementRuleByGroup, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) - registerFunc(escapeRouter, "/config/placement-rule/{group}", rulesHandler.DeletePlacementRuleByGroup, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) + registerFunc(ruleRouter, "/config/placement-rule/{group}", rulesHandler.GetPlacementRuleByGroup, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(ruleRouter, "/config/placement-rule/{group}", rulesHandler.SetPlacementRuleByGroup, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(ruleRouter, "/config/placement-rule/{group}", rulesHandler.DeletePlacementRuleByGroup, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) regionLabelHandler := newRegionLabelHandler(svr, rd) registerFunc(clusterRouter, "/config/region-label/rules", regionLabelHandler.GetAllRegionLabelRules, setMethods(http.MethodGet), setAuditBackend(prometheus)) @@ -334,16 +336,16 @@ func createRouter(prefix string, svr *server.Server) *mux.Router { pprofHandler := newPprofHandler(svr, rd) // profile API - registerFunc(apiRouter, "/debug/pprof/profile", pprof.Profile) - registerFunc(apiRouter, "/debug/pprof/trace", pprof.Trace) - registerFunc(apiRouter, "/debug/pprof/symbol", pprof.Symbol) - registerFunc(apiRouter, "/debug/pprof/heap", pprofHandler.PProfHeap) - registerFunc(apiRouter, "/debug/pprof/mutex", pprofHandler.PProfMutex) - registerFunc(apiRouter, "/debug/pprof/allocs", pprofHandler.PProfAllocs) - registerFunc(apiRouter, "/debug/pprof/block", pprofHandler.PProfBlock) - registerFunc(apiRouter, "/debug/pprof/goroutine", pprofHandler.PProfGoroutine) - registerFunc(apiRouter, "/debug/pprof/threadcreate", pprofHandler.PProfThreadcreate) - registerFunc(apiRouter, "/debug/pprof/zip", pprofHandler.PProfZip) + registerFunc(apiRouter, "/debug/pprof/profile", pprof.Profile, setAuditBackend(localLog)) + registerFunc(apiRouter, "/debug/pprof/trace", pprof.Trace, setAuditBackend(localLog)) + registerFunc(apiRouter, "/debug/pprof/symbol", pprof.Symbol, setAuditBackend(localLog)) + registerFunc(apiRouter, "/debug/pprof/heap", pprofHandler.PProfHeap, setAuditBackend(localLog)) + registerFunc(apiRouter, "/debug/pprof/mutex", pprofHandler.PProfMutex, setAuditBackend(localLog)) + registerFunc(apiRouter, "/debug/pprof/allocs", pprofHandler.PProfAllocs, setAuditBackend(localLog)) + registerFunc(apiRouter, "/debug/pprof/block", pprofHandler.PProfBlock, setAuditBackend(localLog)) + registerFunc(apiRouter, "/debug/pprof/goroutine", pprofHandler.PProfGoroutine, setAuditBackend(localLog)) + registerFunc(apiRouter, "/debug/pprof/threadcreate", pprofHandler.PProfThreadcreate, setAuditBackend(localLog)) + registerFunc(apiRouter, "/debug/pprof/zip", pprofHandler.PProfZip, setAuditBackend(localLog)) // service GC safepoint API serviceGCSafepointHandler := newServiceGCSafepointHandler(svr, rd) diff --git a/server/api/rule.go b/server/api/rule.go index b3a720ece41..bdb3db2016d 100644 --- a/server/api/rule.go +++ b/server/api/rule.go @@ -15,50 +15,80 @@ package api import ( + "context" "encoding/hex" "fmt" "net/http" "net/url" - "strconv" "github.com/gorilla/mux" - "github.com/pingcap/errors" - "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/server" - "github.com/tikv/pd/server/cluster" "github.com/unrolled/render" ) -var errPlacementDisabled = errors.New("placement rules feature is disabled") - type ruleHandler struct { + *server.Handler svr *server.Server rd *render.Render } func newRulesHandler(svr *server.Server, rd *render.Render) *ruleHandler { return &ruleHandler{ - svr: svr, - rd: rd, + Handler: svr.GetHandler(), + svr: svr, + rd: rd, + } +} + +type ruleMiddleware struct { + s *server.Server + rd *render.Render + *server.Handler +} + +func newRuleMiddleware(s *server.Server, rd *render.Render) ruleMiddleware { + return ruleMiddleware{ + s: s, + rd: rd, + Handler: s.GetHandler(), } } +func (m ruleMiddleware) Middleware(h http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + manager, err := m.GetRuleManager() + if err == errs.ErrPlacementDisabled { + m.rd.JSON(w, http.StatusPreconditionFailed, err.Error()) + return + } + if err != nil { + m.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + ctx := context.WithValue(r.Context(), ruleCtxKey{}, manager) + h.ServeHTTP(w, r.WithContext(ctx)) + }) +} + +type ruleCtxKey struct{} + +func getRuleManager(r *http.Request) *placement.RuleManager { + return r.Context().Value(ruleCtxKey{}).(*placement.RuleManager) +} + // @Tags rule // @Summary List all rules of cluster. // @Produce json // @Success 200 {array} placement.Rule // @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/rules [get] func (h *ruleHandler) GetAllRules(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } - rules := cluster.GetRuleManager().GetAllRules() + manager := getRuleManager(r) + rules := manager.GetAllRules() h.rd.JSON(w, http.StatusOK, rules) } @@ -72,11 +102,7 @@ func (h *ruleHandler) GetAllRules(w http.ResponseWriter, r *http.Request) { // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/rules [post] func (h *ruleHandler) SetAllRules(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } + manager := getRuleManager(r) var rules []*placement.Rule if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &rules); err != nil { return @@ -87,7 +113,7 @@ func (h *ruleHandler) SetAllRules(w http.ResponseWriter, r *http.Request) { return } } - if err := cluster.GetRuleManager().SetKeyType(h.svr.GetConfig().PDServerCfg.KeyType). + if err := manager.SetKeyType(h.svr.GetConfig().PDServerCfg.KeyType). SetRules(rules); err != nil { if errs.ErrRuleContent.Equal(err) || errs.ErrHexDecodingString.Equal(err) { h.rd.JSON(w, http.StatusBadRequest, err.Error()) @@ -105,15 +131,12 @@ func (h *ruleHandler) SetAllRules(w http.ResponseWriter, r *http.Request) { // @Produce json // @Success 200 {array} placement.Rule // @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/rules/group/{group} [get] func (h *ruleHandler) GetRuleByGroup(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } + manager := getRuleManager(r) group := mux.Vars(r)["group"] - rules := cluster.GetRuleManager().GetRulesByGroup(group) + rules := manager.GetRulesByGroup(group) h.rd.JSON(w, http.StatusOK, rules) } @@ -125,13 +148,17 @@ func (h *ruleHandler) GetRuleByGroup(w http.ResponseWriter, r *http.Request) { // @Failure 400 {string} string "The input is invalid." // @Failure 404 {string} string "The region does not exist." // @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/rules/region/{region} [get] func (h *ruleHandler) GetRulesByRegion(w http.ResponseWriter, r *http.Request) { - cluster, region := h.preCheckForRegionAndRule(w, r) - if cluster == nil || region == nil { + manager := getRuleManager(r) + regionStr := mux.Vars(r)["region"] + region, code, err := h.PreCheckForRegion(regionStr) + if err != nil { + h.rd.JSON(w, code, err.Error()) return } - rules := cluster.GetRuleManager().GetRulesForApplyRegion(region) + rules := manager.GetRulesForApplyRegion(region) h.rd.JSON(w, http.StatusOK, rules) } @@ -143,34 +170,25 @@ func (h *ruleHandler) GetRulesByRegion(w http.ResponseWriter, r *http.Request) { // @Failure 400 {string} string "The input is invalid." // @Failure 404 {string} string "The region does not exist." // @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/rules/region/{region}/detail [get] func (h *ruleHandler) CheckRegionPlacementRule(w http.ResponseWriter, r *http.Request) { - cluster, region := h.preCheckForRegionAndRule(w, r) - if cluster == nil || region == nil { + regionStr := mux.Vars(r)["region"] + region, code, err := h.PreCheckForRegion(regionStr) + if err != nil { + h.rd.JSON(w, code, err.Error()) return } - regionFit := cluster.GetRuleManager().FitRegion(cluster, region) - h.rd.JSON(w, http.StatusOK, regionFit) -} - -func (h *ruleHandler) preCheckForRegionAndRule(w http.ResponseWriter, r *http.Request) (*cluster.RaftCluster, *core.RegionInfo) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return cluster, nil + regionFit, err := h.Handler.CheckRegionPlacementRule(region) + if err == errs.ErrPlacementDisabled { + h.rd.JSON(w, http.StatusPreconditionFailed, err.Error()) + return } - regionStr := mux.Vars(r)["region"] - regionID, err := strconv.ParseUint(regionStr, 10, 64) if err != nil { - h.rd.JSON(w, http.StatusBadRequest, "invalid region id") - return cluster, nil - } - region := cluster.GetRegion(regionID) - if region == nil { - h.rd.JSON(w, http.StatusNotFound, errs.ErrRegionNotFound.FastGenByArgs(regionID).Error()) - return cluster, nil + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return } - return cluster, region + h.rd.JSON(w, http.StatusOK, regionFit) } // @Tags rule @@ -180,20 +198,17 @@ func (h *ruleHandler) preCheckForRegionAndRule(w http.ResponseWriter, r *http.Re // @Success 200 {array} placement.Rule // @Failure 400 {string} string "The input is invalid." // @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/rules/key/{key} [get] func (h *ruleHandler) GetRulesByKey(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } + manager := getRuleManager(r) keyHex := mux.Vars(r)["key"] key, err := hex.DecodeString(keyHex) if err != nil { - h.rd.JSON(w, http.StatusBadRequest, "key should be in hex format") + h.rd.JSON(w, http.StatusBadRequest, errs.ErrKeyFormat.FastGenByArgs(err).Error()) return } - rules := cluster.GetRuleManager().GetRulesByKey(key) + rules := manager.GetRulesByKey(key) h.rd.JSON(w, http.StatusOK, rules) } @@ -207,15 +222,11 @@ func (h *ruleHandler) GetRulesByKey(w http.ResponseWriter, r *http.Request) { // @Failure 412 {string} string "Placement rules feature is disabled." // @Router /config/rule/{group}/{id} [get] func (h *ruleHandler) GetRuleByGroupAndID(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } + manager := getRuleManager(r) group, id := mux.Vars(r)["group"], mux.Vars(r)["id"] - rule := cluster.GetRuleManager().GetRule(group, id) + rule := manager.GetRule(group, id) if rule == nil { - h.rd.JSON(w, http.StatusNotFound, nil) + h.rd.JSON(w, http.StatusNotFound, errs.ErrRuleNotFound.Error()) return } h.rd.JSON(w, http.StatusOK, rule) @@ -232,21 +243,17 @@ func (h *ruleHandler) GetRuleByGroupAndID(w http.ResponseWriter, r *http.Request // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/rule [post] func (h *ruleHandler) SetRule(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } + manager := getRuleManager(r) var rule placement.Rule if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &rule); err != nil { return } - oldRule := cluster.GetRuleManager().GetRule(rule.GroupID, rule.ID) + oldRule := manager.GetRule(rule.GroupID, rule.ID) if err := h.syncReplicateConfigWithDefaultRule(&rule); err != nil { h.rd.JSON(w, http.StatusBadRequest, err.Error()) return } - if err := cluster.GetRuleManager().SetKeyType(h.svr.GetConfig().PDServerCfg.KeyType). + if err := manager.SetKeyType(h.svr.GetConfig().PDServerCfg.KeyType). SetRule(&rule); err != nil { if errs.ErrRuleContent.Equal(err) || errs.ErrHexDecodingString.Equal(err) { h.rd.JSON(w, http.StatusBadRequest, err.Error()) @@ -255,6 +262,7 @@ func (h *ruleHandler) SetRule(w http.ResponseWriter, r *http.Request) { } return } + cluster := getCluster(r) cluster.AddSuspectKeyRange(rule.StartKey, rule.EndKey) if oldRule != nil { cluster.AddSuspectKeyRange(oldRule.StartKey, oldRule.EndKey) @@ -265,7 +273,7 @@ func (h *ruleHandler) SetRule(w http.ResponseWriter, r *http.Request) { // sync replicate config with default-rule func (h *ruleHandler) syncReplicateConfigWithDefaultRule(rule *placement.Rule) error { // sync default rule with replicate config - if rule.GroupID == "pd" && rule.ID == "default" { + if rule.GroupID == placement.DefaultGroupID && rule.ID == placement.DefaultRuleID { cfg := h.svr.GetReplicationConfig().Clone() cfg.MaxReplicas = uint64(rule.Count) if err := h.svr.SetReplicationConfig(*cfg); err != nil { @@ -285,18 +293,15 @@ func (h *ruleHandler) syncReplicateConfigWithDefaultRule(rule *placement.Rule) e // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/rule/{group}/{id} [delete] func (h *ruleHandler) DeleteRuleByGroup(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } + manager := getRuleManager(r) group, id := mux.Vars(r)["group"], mux.Vars(r)["id"] - rule := cluster.GetRuleManager().GetRule(group, id) - if err := cluster.GetRuleManager().DeleteRule(group, id); err != nil { + rule := manager.GetRule(group, id) + if err := manager.DeleteRule(group, id); err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } if rule != nil { + cluster := getCluster(r) cluster.AddSuspectKeyRange(rule.StartKey, rule.EndKey) } @@ -313,16 +318,12 @@ func (h *ruleHandler) DeleteRuleByGroup(w http.ResponseWriter, r *http.Request) // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/rules/batch [post] func (h *ruleHandler) BatchRules(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } + manager := getRuleManager(r) var opts []placement.RuleOp if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &opts); err != nil { return } - if err := cluster.GetRuleManager().SetKeyType(h.svr.GetConfig().PDServerCfg.KeyType). + if err := manager.SetKeyType(h.svr.GetConfig().PDServerCfg.KeyType). Batch(opts); err != nil { if errs.ErrRuleContent.Equal(err) || errs.ErrHexDecodingString.Equal(err) { h.rd.JSON(w, http.StatusBadRequest, err.Error()) @@ -341,15 +342,12 @@ func (h *ruleHandler) BatchRules(w http.ResponseWriter, r *http.Request) { // @Success 200 {object} placement.RuleGroup // @Failure 404 {string} string "The RuleGroup does not exist." // @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/rule_group/{id} [get] func (h *ruleHandler) GetGroupConfig(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } + manager := getRuleManager(r) id := mux.Vars(r)["id"] - group := cluster.GetRuleManager().GetRuleGroup(id) + group := manager.GetRuleGroup(id) if group == nil { h.rd.JSON(w, http.StatusNotFound, nil) return @@ -368,21 +366,18 @@ func (h *ruleHandler) GetGroupConfig(w http.ResponseWriter, r *http.Request) { // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/rule_group [post] func (h *ruleHandler) SetGroupConfig(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } + manager := getRuleManager(r) var ruleGroup placement.RuleGroup if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &ruleGroup); err != nil { return } - if err := cluster.GetRuleManager().SetRuleGroup(&ruleGroup); err != nil { + if err := manager.SetRuleGroup(&ruleGroup); err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } - for _, r := range cluster.GetRuleManager().GetRulesByGroup(ruleGroup.ID) { - cluster.AddSuspectKeyRange(r.StartKey, r.EndKey) + cluster := getCluster(r) + for _, rule := range manager.GetRulesByGroup(ruleGroup.ID) { + cluster.AddSuspectKeyRange(rule.StartKey, rule.EndKey) } h.rd.JSON(w, http.StatusOK, "Update rule group successfully.") } @@ -396,18 +391,15 @@ func (h *ruleHandler) SetGroupConfig(w http.ResponseWriter, r *http.Request) { // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/rule_group/{id} [delete] func (h *ruleHandler) DeleteGroupConfig(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } + manager := getRuleManager(r) id := mux.Vars(r)["id"] - err := cluster.GetRuleManager().DeleteRuleGroup(id) + err := manager.DeleteRuleGroup(id) if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } - for _, r := range cluster.GetRuleManager().GetRulesByGroup(id) { + cluster := getCluster(r) + for _, r := range manager.GetRulesByGroup(id) { cluster.AddSuspectKeyRange(r.StartKey, r.EndKey) } h.rd.JSON(w, http.StatusOK, "Delete rule group successfully.") @@ -418,14 +410,11 @@ func (h *ruleHandler) DeleteGroupConfig(w http.ResponseWriter, r *http.Request) // @Produce json // @Success 200 {array} placement.RuleGroup // @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/rule_groups [get] func (h *ruleHandler) GetAllGroupConfigs(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } - ruleGroups := cluster.GetRuleManager().GetRuleGroups() + manager := getRuleManager(r) + ruleGroups := manager.GetRuleGroups() h.rd.JSON(w, http.StatusOK, ruleGroups) } @@ -434,14 +423,11 @@ func (h *ruleHandler) GetAllGroupConfigs(w http.ResponseWriter, r *http.Request) // @Produce json // @Success 200 {array} placement.GroupBundle // @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/placement-rule [get] func (h *ruleHandler) GetPlacementRules(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } - bundles := cluster.GetRuleManager().GetAllGroupBundles() + manager := getRuleManager(r) + bundles := manager.GetAllGroupBundles() h.rd.JSON(w, http.StatusOK, bundles) } @@ -455,17 +441,13 @@ func (h *ruleHandler) GetPlacementRules(w http.ResponseWriter, r *http.Request) // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/placement-rule [post] func (h *ruleHandler) SetPlacementRules(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } + manager := getRuleManager(r) var groups []placement.GroupBundle if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &groups); err != nil { return } _, partial := r.URL.Query()["partial"] - if err := cluster.GetRuleManager().SetKeyType(h.svr.GetConfig().PDServerCfg.KeyType). + if err := manager.SetKeyType(h.svr.GetConfig().PDServerCfg.KeyType). SetAllGroupBundles(groups, !partial); err != nil { if errs.ErrRuleContent.Equal(err) || errs.ErrHexDecodingString.Equal(err) { h.rd.JSON(w, http.StatusBadRequest, err.Error()) @@ -483,14 +465,12 @@ func (h *ruleHandler) SetPlacementRules(w http.ResponseWriter, r *http.Request) // @Produce json // @Success 200 {object} placement.GroupBundle // @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/placement-rule/{group} [get] func (h *ruleHandler) GetPlacementRuleByGroup(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } - group := cluster.GetRuleManager().GetGroupBundle(mux.Vars(r)["group"]) + manager := getRuleManager(r) + g := mux.Vars(r)["group"] + group := manager.GetGroupBundle(g) h.rd.JSON(w, http.StatusOK, group) } @@ -502,21 +482,19 @@ func (h *ruleHandler) GetPlacementRuleByGroup(w http.ResponseWriter, r *http.Req // @Success 200 {string} string "Delete group and rules successfully." // @Failure 400 {string} string "Bad request." // @Failure 412 {string} string "Placement rules feature is disabled." +// @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/placement-rule [delete] func (h *ruleHandler) DeletePlacementRuleByGroup(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } + manager := getRuleManager(r) group := mux.Vars(r)["group"] - group, err := url.PathUnescape(group) + var err error + group, err = url.PathUnescape(group) if err != nil { h.rd.JSON(w, http.StatusBadRequest, err.Error()) return } _, regex := r.URL.Query()["regexp"] - if err := cluster.GetRuleManager().DeleteGroupBundle(group, regex); err != nil { + if err := manager.DeleteGroupBundle(group, regex); err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } @@ -532,11 +510,7 @@ func (h *ruleHandler) DeletePlacementRuleByGroup(w http.ResponseWriter, r *http. // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /config/placement-rule/{group} [post] func (h *ruleHandler) SetPlacementRuleByGroup(w http.ResponseWriter, r *http.Request) { - cluster := getCluster(r) - if !cluster.GetOpts().IsPlacementRulesEnabled() { - h.rd.JSON(w, http.StatusPreconditionFailed, errPlacementDisabled.Error()) - return - } + manager := getRuleManager(r) groupID := mux.Vars(r)["group"] var group placement.GroupBundle if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &group); err != nil { @@ -549,7 +523,7 @@ func (h *ruleHandler) SetPlacementRuleByGroup(w http.ResponseWriter, r *http.Req h.rd.JSON(w, http.StatusBadRequest, fmt.Sprintf("group id %s does not match request URI %s", group.ID, groupID)) return } - if err := cluster.GetRuleManager().SetKeyType(h.svr.GetConfig().PDServerCfg.KeyType). + if err := manager.SetKeyType(h.svr.GetConfig().PDServerCfg.KeyType). SetGroupBundle(group); err != nil { if errs.ErrRuleContent.Equal(err) || errs.ErrHexDecodingString.Equal(err) { h.rd.JSON(w, http.StatusBadRequest, err.Error()) diff --git a/server/api/server.go b/server/api/server.go index ee301ea54c8..ad614593b2f 100644 --- a/server/api/server.go +++ b/server/api/server.go @@ -17,6 +17,7 @@ package api import ( "context" "net/http" + "strings" "github.com/gorilla/mux" scheapi "github.com/tikv/pd/pkg/mcs/scheduling/server/apis/v1" @@ -52,6 +53,7 @@ func NewHandler(_ context.Context, svr *server.Server) (http.Handler, apiutil.AP // "/schedulers", http.MethodGet // "/schedulers/{name}", http.MethodPost // "/schedulers/diagnostic/{name}", http.MethodGet + // "/scheduler-config", http.MethodGet // "/hotspot/regions/read", http.MethodGet // "/hotspot/regions/write", http.MethodGet // "/hotspot/regions/history", http.MethodGet @@ -78,11 +80,76 @@ func NewHandler(_ context.Context, svr *server.Server) (http.Handler, apiutil.AP scheapi.APIPathPrefix+"/checkers", mcs.SchedulingServiceName, []string{http.MethodPost, http.MethodGet}), + serverapi.MicroserviceRedirectRule( + prefix+"/region/id", + scheapi.APIPathPrefix+"/config/regions", + mcs.SchedulingServiceName, + []string{http.MethodGet}, + func(r *http.Request) bool { + // The original code uses the path "/region/id" to get the region id. + // However, the path "/region/id" is used to get the region by id, which is not what we want. + return strings.Contains(r.URL.Path, "label") + }), + serverapi.MicroserviceRedirectRule( + prefix+"/regions/accelerate-schedule", + scheapi.APIPathPrefix+"/regions/accelerate-schedule", + mcs.SchedulingServiceName, + []string{http.MethodPost}), + serverapi.MicroserviceRedirectRule( + prefix+"/regions/scatter", + scheapi.APIPathPrefix+"/regions/scatter", + mcs.SchedulingServiceName, + []string{http.MethodPost}), + serverapi.MicroserviceRedirectRule( + prefix+"/regions/split", + scheapi.APIPathPrefix+"/regions/split", + mcs.SchedulingServiceName, + []string{http.MethodPost}), + serverapi.MicroserviceRedirectRule( + prefix+"/regions/replicated", + scheapi.APIPathPrefix+"/regions/replicated", + mcs.SchedulingServiceName, + []string{http.MethodGet}), + serverapi.MicroserviceRedirectRule( + prefix+"/config/region-label/rules", + scheapi.APIPathPrefix+"/config/region-label/rules", + mcs.SchedulingServiceName, + []string{http.MethodGet}), + serverapi.MicroserviceRedirectRule( + prefix+"/config/region-label/rule/", // Note: this is a typo in the original code + scheapi.APIPathPrefix+"/config/region-label/rules", + mcs.SchedulingServiceName, + []string{http.MethodGet}), serverapi.MicroserviceRedirectRule( prefix+"/hotspot", scheapi.APIPathPrefix+"/hotspot", mcs.SchedulingServiceName, []string{http.MethodGet}), + serverapi.MicroserviceRedirectRule( + prefix+"/config/rules", + scheapi.APIPathPrefix+"/config/rules", + mcs.SchedulingServiceName, + []string{http.MethodGet}), + serverapi.MicroserviceRedirectRule( + prefix+"/config/rule/", + scheapi.APIPathPrefix+"/config/rule", + mcs.SchedulingServiceName, + []string{http.MethodGet}), + serverapi.MicroserviceRedirectRule( + prefix+"/config/rule_group/", + scheapi.APIPathPrefix+"/config/rule_groups", // Note: this is a typo in the original code + mcs.SchedulingServiceName, + []string{http.MethodGet}), + serverapi.MicroserviceRedirectRule( + prefix+"/config/rule_groups", + scheapi.APIPathPrefix+"/config/rule_groups", + mcs.SchedulingServiceName, + []string{http.MethodGet}), + serverapi.MicroserviceRedirectRule( + prefix+"/config/placement-rule", + scheapi.APIPathPrefix+"/config/placement-rule", + mcs.SchedulingServiceName, + []string{http.MethodGet}), // because the writing of all the meta information of the scheduling service is in the API server, // we should not post and delete the scheduler directly in the scheduling service. serverapi.MicroserviceRedirectRule( @@ -90,13 +157,16 @@ func NewHandler(_ context.Context, svr *server.Server) (http.Handler, apiutil.AP scheapi.APIPathPrefix+"/schedulers", mcs.SchedulingServiceName, []string{http.MethodGet}), + serverapi.MicroserviceRedirectRule( + prefix+"/scheduler-config", + scheapi.APIPathPrefix+"/schedulers/config", + mcs.SchedulingServiceName, + []string{http.MethodGet}), serverapi.MicroserviceRedirectRule( prefix+"/schedulers/", // Note: this means "/schedulers/{name}" scheapi.APIPathPrefix+"/schedulers", mcs.SchedulingServiceName, []string{http.MethodPost}), - // TODO: we need to consider the case that v1 api not support restful api. - // we might change the previous path parameters to query parameters. ), negroni.Wrap(r)), ) diff --git a/server/api/server_test.go b/server/api/server_test.go index 22989b92a03..d834941193b 100644 --- a/server/api/server_test.go +++ b/server/api/server_test.go @@ -273,7 +273,7 @@ func (suite *adminTestSuite) TestCleanPath() { // handled by router response := httptest.NewRecorder() r, _, _ := NewHandler(context.Background(), suite.svr) - request, err := http.NewRequest(http.MethodGet, url, nil) + request, err := http.NewRequest(http.MethodGet, url, http.NoBody) re.NoError(err) r.ServeHTTP(response, request) // handled by `cleanPath` which is in `mux.ServeHTTP` diff --git a/server/api/service_gc_safepoint.go b/server/api/service_gc_safepoint.go index b26edaba07d..270edca58bf 100644 --- a/server/api/service_gc_safepoint.go +++ b/server/api/service_gc_safepoint.go @@ -35,8 +35,9 @@ func newServiceGCSafepointHandler(svr *server.Server, rd *render.Render) *servic } } +// ListServiceGCSafepoint is the response for list service GC safepoint. // NOTE: This type is exported by HTTP API. Please pay more attention when modifying it. -type listServiceGCSafepoint struct { +type ListServiceGCSafepoint struct { ServiceGCSafepoints []*endpoint.ServiceSafePoint `json:"service_gc_safe_points"` GCSafePoint uint64 `json:"gc_safe_point"` } @@ -44,7 +45,7 @@ type listServiceGCSafepoint struct { // @Tags service_gc_safepoint // @Summary Get all service GC safepoint. // @Produce json -// @Success 200 {array} listServiceGCSafepoint +// @Success 200 {array} ListServiceGCSafepoint // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /gc/safepoint [get] func (h *serviceGCSafepointHandler) GetGCSafePoint(w http.ResponseWriter, r *http.Request) { @@ -59,7 +60,7 @@ func (h *serviceGCSafepointHandler) GetGCSafePoint(w http.ResponseWriter, r *htt h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } - list := listServiceGCSafepoint{ + list := ListServiceGCSafepoint{ GCSafePoint: gcSafepoint, ServiceGCSafepoints: ssps, } diff --git a/server/api/service_gc_safepoint_test.go b/server/api/service_gc_safepoint_test.go index 517a94c2e23..3df9102d116 100644 --- a/server/api/service_gc_safepoint_test.go +++ b/server/api/service_gc_safepoint_test.go @@ -58,7 +58,7 @@ func (suite *serviceGCSafepointTestSuite) TestServiceGCSafepoint() { sspURL := suite.urlPrefix + "/gc/safepoint" storage := suite.svr.GetStorage() - list := &listServiceGCSafepoint{ + list := &ListServiceGCSafepoint{ ServiceGCSafepoints: []*endpoint.ServiceSafePoint{ { ServiceID: "a", @@ -87,7 +87,7 @@ func (suite *serviceGCSafepointTestSuite) TestServiceGCSafepoint() { res, err := testDialClient.Get(sspURL) suite.NoError(err) defer res.Body.Close() - listResp := &listServiceGCSafepoint{} + listResp := &ListServiceGCSafepoint{} err = apiutil.ReadJSON(res.Body, listResp) suite.NoError(err) suite.Equal(list, listResp) diff --git a/server/api/stats.go b/server/api/stats.go index 1798597b6cc..915d33ddfdf 100644 --- a/server/api/stats.go +++ b/server/api/stats.go @@ -36,8 +36,9 @@ func newStatsHandler(svr *server.Server, rd *render.Render) *statsHandler { // @Tags stats // @Summary Get region statistics of a specified range. -// @Param start_key query string true "Start key" -// @Param end_key query string true "End key" +// @Param start_key query string true "Start key" +// @Param end_key query string true "End key" +// @Param count query bool false "Whether only count the number of regions" // @Produce json // @Success 200 {object} statistics.RegionStats // @Router /stats/region [get] diff --git a/server/api/store.go b/server/api/store.go index a44850d35cc..8537cd45c5b 100644 --- a/server/api/store.go +++ b/server/api/store.go @@ -172,14 +172,14 @@ func newStoreHandler(handler *server.Handler, rd *render.Render) *storeHandler { } } -// @Tags store +// @Tags store // @Summary Get a store's information. // @Param id path integer true "Store Id" -// @Produce json +// @Produce json // @Success 200 {object} StoreInfo // @Failure 400 {string} string "The input is invalid." // @Failure 404 {string} string "The store does not exist." -// @Failure 500 {string} string "PD server failed to proceed the request." +// @Failure 500 {string} string "PD server failed to proceed the request." // @Router /store/{id} [get] func (h *storeHandler) GetStore(w http.ResponseWriter, r *http.Request) { rc := getCluster(r) @@ -735,13 +735,13 @@ func (h *storesHandler) GetStoresProgress(w http.ResponseWriter, r *http.Request } // @Tags store -// @Summary Get all stores in the cluster. -// @Param state query array true "Specify accepted store states." +// @Summary Get all stores in the cluster. +// @Param state query array true "Specify accepted store states." // @Produce json -// @Success 200 {object} StoresInfo +// @Success 200 {object} StoresInfo // @Failure 500 {string} string "PD server failed to proceed the request." -// @Router /stores [get] -// @Deprecated Better to use /stores/check instead. +// @Router /stores [get] +// @Deprecated Better to use /stores/check instead. func (h *storesHandler) GetAllStores(w http.ResponseWriter, r *http.Request) { rc := getCluster(r) stores := rc.GetMetaStores() diff --git a/server/api/store_test.go b/server/api/store_test.go index 2b3a8dee9bb..d0961d572f8 100644 --- a/server/api/store_test.go +++ b/server/api/store_test.go @@ -51,7 +51,7 @@ func TestStoreTestSuite(t *testing.T) { } func (suite *storeTestSuite) requestStatusBody(client *http.Client, method string, url string) int { - req, err := http.NewRequest(method, url, nil) + req, err := http.NewRequest(method, url, http.NoBody) suite.NoError(err) resp, err := client.Do(req) suite.NoError(err) diff --git a/server/apiv2/handlers/keyspace.go b/server/apiv2/handlers/keyspace.go index 9602cc863ef..c2802bb939d 100644 --- a/server/apiv2/handlers/keyspace.go +++ b/server/apiv2/handlers/keyspace.go @@ -113,7 +113,7 @@ func LoadKeyspace(c *gin.Context) { if value, ok := c.GetQuery("force_refresh_group_id"); ok && value == "true" { groupManager := svr.GetKeyspaceGroupManager() if groupManager == nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, managerUninitializedErr) + c.AbortWithStatusJSON(http.StatusInternalServerError, GroupManagerUninitializedErr) return } // keyspace has been checked in LoadKeyspace, so no need to check again. diff --git a/server/apiv2/handlers/tso_keyspace_group.go b/server/apiv2/handlers/tso_keyspace_group.go index a580b21f705..a9f042687f6 100644 --- a/server/apiv2/handlers/tso_keyspace_group.go +++ b/server/apiv2/handlers/tso_keyspace_group.go @@ -30,7 +30,8 @@ import ( "github.com/tikv/pd/server/apiv2/middlewares" ) -const groupManagerUninitializedErr = "keyspace group manager is not initialized" +// GroupManagerUninitializedErr is the error message for uninitialized keyspace group manager. +const GroupManagerUninitializedErr = "keyspace group manager is not initialized" // RegisterTSOKeyspaceGroup registers keyspace group handlers to the server. func RegisterTSOKeyspaceGroup(r *gin.RouterGroup) { @@ -78,7 +79,7 @@ func CreateKeyspaceGroups(c *gin.Context) { svr := c.MustGet(middlewares.ServerContextKey).(*server.Server) manager := svr.GetKeyspaceGroupManager() if manager == nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, groupManagerUninitializedErr) + c.AbortWithStatusJSON(http.StatusInternalServerError, GroupManagerUninitializedErr) return } err = manager.CreateKeyspaceGroups(createParams.KeyspaceGroups) @@ -101,7 +102,7 @@ func GetKeyspaceGroups(c *gin.Context) { svr := c.MustGet(middlewares.ServerContextKey).(*server.Server) manager := svr.GetKeyspaceGroupManager() if manager == nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, groupManagerUninitializedErr) + c.AbortWithStatusJSON(http.StatusInternalServerError, GroupManagerUninitializedErr) return } keyspaceGroups, err := manager.GetKeyspaceGroups(scanStart, scanLimit) @@ -152,7 +153,7 @@ func GetKeyspaceGroupByID(c *gin.Context) { svr := c.MustGet(middlewares.ServerContextKey).(*server.Server) manager := svr.GetKeyspaceGroupManager() if manager == nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, groupManagerUninitializedErr) + c.AbortWithStatusJSON(http.StatusInternalServerError, GroupManagerUninitializedErr) return } @@ -189,7 +190,7 @@ func DeleteKeyspaceGroupByID(c *gin.Context) { svr := c.MustGet(middlewares.ServerContextKey).(*server.Server) manager := svr.GetKeyspaceGroupManager() if manager == nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, groupManagerUninitializedErr) + c.AbortWithStatusJSON(http.StatusInternalServerError, GroupManagerUninitializedErr) return } kg, err := manager.DeleteKeyspaceGroupByID(id) @@ -250,7 +251,7 @@ func SplitKeyspaceGroupByID(c *gin.Context) { } groupManager := svr.GetKeyspaceGroupManager() if groupManager == nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, groupManagerUninitializedErr) + c.AbortWithStatusJSON(http.StatusInternalServerError, GroupManagerUninitializedErr) return } @@ -289,7 +290,7 @@ func FinishSplitKeyspaceByID(c *gin.Context) { svr := c.MustGet(middlewares.ServerContextKey).(*server.Server) manager := svr.GetKeyspaceGroupManager() if manager == nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, groupManagerUninitializedErr) + c.AbortWithStatusJSON(http.StatusInternalServerError, GroupManagerUninitializedErr) return } err = manager.FinishSplitKeyspaceByID(id) @@ -337,7 +338,7 @@ func MergeKeyspaceGroups(c *gin.Context) { svr := c.MustGet(middlewares.ServerContextKey).(*server.Server) groupManager := svr.GetKeyspaceGroupManager() if groupManager == nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, groupManagerUninitializedErr) + c.AbortWithStatusJSON(http.StatusInternalServerError, GroupManagerUninitializedErr) return } // Merge keyspace group. @@ -364,7 +365,7 @@ func FinishMergeKeyspaceByID(c *gin.Context) { svr := c.MustGet(middlewares.ServerContextKey).(*server.Server) manager := svr.GetKeyspaceGroupManager() if manager == nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, groupManagerUninitializedErr) + c.AbortWithStatusJSON(http.StatusInternalServerError, GroupManagerUninitializedErr) return } err = manager.FinishMergeKeyspaceByID(id) @@ -390,7 +391,7 @@ func AllocNodesForKeyspaceGroup(c *gin.Context) { svr := c.MustGet(middlewares.ServerContextKey).(*server.Server) manager := svr.GetKeyspaceGroupManager() if manager == nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, groupManagerUninitializedErr) + c.AbortWithStatusJSON(http.StatusInternalServerError, GroupManagerUninitializedErr) return } allocParams := &AllocNodesForKeyspaceGroupParams{} @@ -437,7 +438,7 @@ func SetNodesForKeyspaceGroup(c *gin.Context) { svr := c.MustGet(middlewares.ServerContextKey).(*server.Server) manager := svr.GetKeyspaceGroupManager() if manager == nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, groupManagerUninitializedErr) + c.AbortWithStatusJSON(http.StatusInternalServerError, GroupManagerUninitializedErr) return } setParams := &SetNodesForKeyspaceGroupParams{} @@ -493,7 +494,7 @@ func SetPriorityForKeyspaceGroup(c *gin.Context) { svr := c.MustGet(middlewares.ServerContextKey).(*server.Server) manager := svr.GetKeyspaceGroupManager() if manager == nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, groupManagerUninitializedErr) + c.AbortWithStatusJSON(http.StatusInternalServerError, GroupManagerUninitializedErr) return } setParams := &SetPriorityForKeyspaceGroupParams{} diff --git a/server/apiv2/middlewares/redirector.go b/server/apiv2/middlewares/redirector.go index 285f096e823..37c06de1585 100644 --- a/server/apiv2/middlewares/redirector.go +++ b/server/apiv2/middlewares/redirector.go @@ -30,9 +30,13 @@ import ( func Redirector() gin.HandlerFunc { return func(c *gin.Context) { svr := c.MustGet(ServerContextKey).(*server.Server) + + if svr.IsClosed() { + c.AbortWithStatusJSON(http.StatusInternalServerError, errs.ErrServerNotStarted.FastGenByArgs().Error()) + return + } allowFollowerHandle := len(c.Request.Header.Get(apiutil.PDAllowFollowerHandleHeader)) > 0 - isLeader := svr.GetMember().IsLeader() - if !svr.IsClosed() && (allowFollowerHandle || isLeader) { + if allowFollowerHandle || svr.GetMember().IsLeader() { c.Next() return } @@ -46,12 +50,11 @@ func Redirector() gin.HandlerFunc { c.Request.Header.Set(apiutil.PDRedirectorHeader, svr.Name()) - leader := svr.GetMember().GetLeader() - if leader == nil { + if svr.GetMember().GetLeader() == nil { c.AbortWithStatusJSON(http.StatusServiceUnavailable, errs.ErrLeaderNil.FastGenByArgs().Error()) return } - clientUrls := leader.GetClientUrls() + clientUrls := svr.GetMember().GetLeader().GetClientUrls() urls := make([]url.URL, 0, len(clientUrls)) for _, item := range clientUrls { u, err := url.Parse(item) diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 25a47a7fca9..78f6ddd4364 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -15,7 +15,6 @@ package cluster import ( - "bytes" "context" "encoding/json" "fmt" @@ -41,22 +40,17 @@ import ( "github.com/tikv/pd/pkg/gctuner" "github.com/tikv/pd/pkg/id" "github.com/tikv/pd/pkg/keyspace" + "github.com/tikv/pd/pkg/mcs/discovery" + mcsutils "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/memory" "github.com/tikv/pd/pkg/progress" "github.com/tikv/pd/pkg/replication" - "github.com/tikv/pd/pkg/schedule" - "github.com/tikv/pd/pkg/schedule/checker" sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/schedule/hbstream" "github.com/tikv/pd/pkg/schedule/labeler" - "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/placement" - "github.com/tikv/pd/pkg/schedule/scatter" - "github.com/tikv/pd/pkg/schedule/schedulers" - "github.com/tikv/pd/pkg/schedule/splitter" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/statistics" - "github.com/tikv/pd/pkg/statistics/buckets" "github.com/tikv/pd/pkg/statistics/utils" "github.com/tikv/pd/pkg/storage" "github.com/tikv/pd/pkg/storage/endpoint" @@ -97,6 +91,7 @@ const ( clientTimeout = 3 * time.Second defaultChangedRegionsLimit = 10000 gcTombstoneInterval = 30 * 24 * time.Hour + serviceCheckInterval = 10 * time.Second // persistLimitRetryTimes is used to reduce the probability of the persistent error // since the once the store is added or removed, we shouldn't return an error even if the store limit is failed to persist. persistLimitRetryTimes = 5 @@ -155,16 +150,12 @@ type RaftCluster struct { prevStoreLimit map[uint64]map[storelimit.Type]float64 // This below fields are all read-only, we cannot update itself after the raft cluster starts. - clusterID uint64 - id id.Allocator - core *core.BasicCluster // cached cluster info - opt *config.PersistOptions - limiter *StoreLimiter - coordinator *schedule.Coordinator - labelLevelStats *statistics.LabelStatistics - regionStats *statistics.RegionStatistics - hotStat *statistics.HotStat - slowStat *statistics.SlowStat + clusterID uint64 + id id.Allocator + core *core.BasicCluster // cached cluster info + opt *config.PersistOptions + limiter *StoreLimiter + *schedulingController ruleManager *placement.RuleManager regionLabeler *labeler.RegionLabeler replicationMode *replication.ModeManager @@ -173,6 +164,8 @@ type RaftCluster struct { regionSyncer *syncer.RegionSyncer changedRegions chan *core.RegionInfo keyspaceGroupManager *keyspace.GroupManager + independentServices sync.Map + hbstreams *hbstream.HeartbeatStreams } // Status saves some state information. @@ -184,7 +177,7 @@ type Status struct { } // NewRaftCluster create a new cluster. -func NewRaftCluster(ctx context.Context, clusterID uint64, regionSyncer *syncer.RegionSyncer, etcdClient *clientv3.Client, +func NewRaftCluster(ctx context.Context, clusterID uint64, basicCluster *core.BasicCluster, storage storage.Storage, regionSyncer *syncer.RegionSyncer, etcdClient *clientv3.Client, httpClient *http.Client) *RaftCluster { return &RaftCluster{ serverCtx: ctx, @@ -192,6 +185,8 @@ func NewRaftCluster(ctx context.Context, clusterID uint64, regionSyncer *syncer. regionSyncer: regionSyncer, httpClient: httpClient, etcdClient: etcdClient, + core: basicCluster, + storage: storage, } } @@ -264,19 +259,25 @@ func (c *RaftCluster) loadBootstrapTime() (time.Time, error) { func (c *RaftCluster) InitCluster( id id.Allocator, opt sc.ConfProvider, - storage storage.Storage, - basicCluster *core.BasicCluster, - keyspaceGroupManager *keyspace.GroupManager) { - c.core, c.opt, c.storage, c.id = basicCluster, opt.(*config.PersistOptions), storage, id + hbstreams *hbstream.HeartbeatStreams, + keyspaceGroupManager *keyspace.GroupManager) error { + c.opt, c.id = opt.(*config.PersistOptions), id c.ctx, c.cancel = context.WithCancel(c.serverCtx) - c.labelLevelStats = statistics.NewLabelStatistics() - c.hotStat = statistics.NewHotStat(c.ctx) - c.slowStat = statistics.NewSlowStat(c.ctx) c.progressManager = progress.NewManager() c.changedRegions = make(chan *core.RegionInfo, defaultChangedRegionsLimit) c.prevStoreLimit = make(map[uint64]map[storelimit.Type]float64) c.unsafeRecoveryController = unsaferecovery.NewController(c) c.keyspaceGroupManager = keyspaceGroupManager + c.hbstreams = hbstreams + c.ruleManager = placement.NewRuleManager(c.storage, c, c.GetOpts()) + if c.opt.IsPlacementRulesEnabled() { + err := c.ruleManager.Initialize(c.opt.GetMaxReplicas(), c.opt.GetLocationLabels(), c.opt.GetIsolationLevel()) + if err != nil { + return err + } + } + c.schedulingController = newSchedulingController(c.ctx, c.core, c.opt, c.ruleManager) + return nil } // Start starts a cluster. @@ -290,7 +291,10 @@ func (c *RaftCluster) Start(s Server) error { } c.isAPIServiceMode = s.IsAPIServiceMode() - c.InitCluster(s.GetAllocator(), s.GetPersistOptions(), s.GetStorage(), s.GetBasicCluster(), s.GetKeyspaceGroupManager()) + err := c.InitCluster(s.GetAllocator(), s.GetPersistOptions(), s.GetHBStreams(), s.GetKeyspaceGroupManager()) + if err != nil { + return err + } cluster, err := c.LoadClusterInfo() if err != nil { return err @@ -299,25 +303,21 @@ func (c *RaftCluster) Start(s Server) error { return nil } - c.ruleManager = placement.NewRuleManager(c.storage, c, c.GetOpts()) - if c.opt.IsPlacementRulesEnabled() { - err = c.ruleManager.Initialize(c.opt.GetMaxReplicas(), c.opt.GetLocationLabels(), c.opt.GetIsolationLevel()) - if err != nil { - return err - } - } c.regionLabeler, err = labeler.NewRegionLabeler(c.ctx, c.storage, regionLabelGCInterval) if err != nil { return err } + if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { + for _, store := range c.GetStores() { + storeID := store.GetID() + c.slowStat.ObserveSlowStoreStatus(storeID, store.IsSlow()) + } + } c.replicationMode, err = replication.NewReplicationModeManager(s.GetConfig().ReplicationMode, c.storage, cluster, s) if err != nil { return err } - - c.coordinator = schedule.NewCoordinator(c.ctx, cluster, s.GetHBStreams()) - c.regionStats = statistics.NewRegionStatistics(c.core, c.opt, c.ruleManager) c.limiter = NewStoreLimiter(s.GetPersistOptions()) c.externalTS, err = c.storage.LoadExternalTS() if err != nil { @@ -331,14 +331,10 @@ func (c *RaftCluster) Start(s Server) error { if err != nil { return err } - c.initSchedulers() - } else { - c.wg.Add(2) - go c.runCoordinator() - go c.runStatsBackgroundJobs() } - - c.wg.Add(8) + c.checkServices() + c.wg.Add(9) + go c.runServiceCheckJob() go c.runMetricsCollectionJob() go c.runNodeStateCheckJob() go c.syncRegions() @@ -352,6 +348,52 @@ func (c *RaftCluster) Start(s Server) error { return nil } +var once sync.Once + +func (c *RaftCluster) checkServices() { + if c.isAPIServiceMode { + servers, err := discovery.Discover(c.etcdClient, strconv.FormatUint(c.clusterID, 10), mcsutils.SchedulingServiceName) + if err != nil || len(servers) == 0 { + c.startSchedulingJobs(c, c.hbstreams) + c.independentServices.Delete(mcsutils.SchedulingServiceName) + } else { + if c.stopSchedulingJobs() { + c.initCoordinator(c.ctx, c, c.hbstreams) + } else { + once.Do(func() { + c.initCoordinator(c.ctx, c, c.hbstreams) + }) + } + c.independentServices.Store(mcsutils.SchedulingServiceName, true) + } + } else { + c.startSchedulingJobs(c, c.hbstreams) + c.independentServices.Delete(mcsutils.SchedulingServiceName) + } +} + +func (c *RaftCluster) runServiceCheckJob() { + defer logutil.LogPanic() + defer c.wg.Done() + + ticker := time.NewTicker(serviceCheckInterval) + failpoint.Inject("highFrequencyClusterJobs", func() { + ticker.Stop() + ticker = time.NewTicker(time.Millisecond) + }) + defer ticker.Stop() + + for { + select { + case <-c.ctx.Done(): + log.Info("service check job is stopped") + return + case <-ticker.C: + c.checkServices() + } + } +} + // startGCTuner func (c *RaftCluster) startGCTuner() { defer logutil.LogPanic() @@ -425,6 +467,10 @@ func (c *RaftCluster) startGCTuner() { func (c *RaftCluster) runStoreConfigSync() { defer logutil.LogPanic() defer c.wg.Done() + // TODO: After we fix the atomic problem of config, we can remove this failpoint. + failpoint.Inject("skipStoreConfigSync", func() { + failpoint.Return() + }) var ( synced, switchRaftV2Config, needPersist bool @@ -449,6 +495,7 @@ func (c *RaftCluster) runStoreConfigSync() { if err := c.opt.Persist(c.storage); err != nil { log.Warn("store config persisted failed", zap.Error(err)) } + log.Info("store config is updated") } select { case <-c.ctx.Done(): @@ -547,7 +594,7 @@ func (c *RaftCluster) fetchStoreConfigFromTiKV(ctx context.Context, statusAddres url = fmt.Sprintf("%s://%s/config", "http", statusAddress) } ctx, cancel := context.WithTimeout(ctx, clientTimeout) - req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, bytes.NewBuffer(nil)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, http.NoBody) if err != nil { cancel() return nil, fmt.Errorf("failed to create store config http request: %w", err) @@ -600,13 +647,7 @@ func (c *RaftCluster) LoadClusterInfo() (*RaftCluster, error) { zap.Int("count", c.core.GetTotalRegionCount()), zap.Duration("cost", time.Since(start)), ) - if !c.isAPIServiceMode { - for _, store := range c.GetStores() { - storeID := store.GetID() - c.hotStat.GetOrCreateRollingStoreStats(storeID) - c.slowStat.ObserveSlowStoreStatus(storeID, store.IsSlow()) - } - } + return c, nil } @@ -617,9 +658,8 @@ func (c *RaftCluster) runMetricsCollectionJob() { ticker := time.NewTicker(metricsCollectionJobInterval) failpoint.Inject("highFrequencyClusterJobs", func() { ticker.Stop() - ticker = time.NewTicker(time.Microsecond) + ticker = time.NewTicker(time.Millisecond) }) - defer ticker.Stop() for { @@ -657,24 +697,6 @@ func (c *RaftCluster) runNodeStateCheckJob() { } } -func (c *RaftCluster) runStatsBackgroundJobs() { - defer logutil.LogPanic() - defer c.wg.Done() - - ticker := time.NewTicker(statistics.RegionsStatsObserveInterval) - defer ticker.Stop() - - for { - select { - case <-c.ctx.Done(): - log.Info("statistics background jobs has been stopped") - return - case <-ticker.C: - c.hotStat.ObserveRegionsStats(c.core.GetStoresWriteRate()) - } - } -} - func (c *RaftCluster) runUpdateStoreStats() { defer logutil.LogPanic() defer c.wg.Done() @@ -696,13 +718,6 @@ func (c *RaftCluster) runUpdateStoreStats() { } } -// runCoordinator runs the main scheduling loop. -func (c *RaftCluster) runCoordinator() { - defer logutil.LogPanic() - defer c.wg.Done() - c.coordinator.RunUntilStop() -} - func (c *RaftCluster) syncRegions() { defer logutil.LogPanic() defer c.wg.Done() @@ -723,14 +738,14 @@ func (c *RaftCluster) Stop() { return } c.running = false - if !c.isAPIServiceMode { - c.coordinator.Stop() - } c.cancel() + if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { + c.stopSchedulingJobs() + } c.Unlock() c.wg.Wait() - log.Info("raftcluster is stopped") + log.Info("raft cluster is stopped") } // IsRunning return if the cluster is running. @@ -750,79 +765,9 @@ func (c *RaftCluster) Context() context.Context { return nil } -// GetCoordinator returns the coordinator. -func (c *RaftCluster) GetCoordinator() *schedule.Coordinator { - return c.coordinator -} - -// GetOperatorController returns the operator controller. -func (c *RaftCluster) GetOperatorController() *operator.Controller { - return c.coordinator.GetOperatorController() -} - -// SetPrepared set the prepare check to prepared. Only for test purpose. -func (c *RaftCluster) SetPrepared() { - c.coordinator.GetPrepareChecker().SetPrepared() -} - -// GetRegionScatterer returns the region scatter. -func (c *RaftCluster) GetRegionScatterer() *scatter.RegionScatterer { - return c.coordinator.GetRegionScatterer() -} - -// GetRegionSplitter returns the region splitter -func (c *RaftCluster) GetRegionSplitter() *splitter.RegionSplitter { - return c.coordinator.GetRegionSplitter() -} - -// GetMergeChecker returns merge checker. -func (c *RaftCluster) GetMergeChecker() *checker.MergeChecker { - return c.coordinator.GetMergeChecker() -} - -// GetRuleChecker returns rule checker. -func (c *RaftCluster) GetRuleChecker() *checker.RuleChecker { - return c.coordinator.GetRuleChecker() -} - -// GetSchedulers gets all schedulers. -func (c *RaftCluster) GetSchedulers() []string { - return c.coordinator.GetSchedulersController().GetSchedulerNames() -} - -// GetSchedulerHandlers gets all scheduler handlers. -func (c *RaftCluster) GetSchedulerHandlers() map[string]http.Handler { - return c.coordinator.GetSchedulersController().GetSchedulerHandlers() -} - -// AddSchedulerHandler adds a scheduler handler. -func (c *RaftCluster) AddSchedulerHandler(scheduler schedulers.Scheduler, args ...string) error { - return c.coordinator.GetSchedulersController().AddSchedulerHandler(scheduler, args...) -} - -// RemoveSchedulerHandler removes a scheduler handler. -func (c *RaftCluster) RemoveSchedulerHandler(name string) error { - return c.coordinator.GetSchedulersController().RemoveSchedulerHandler(name) -} - -// AddScheduler adds a scheduler. -func (c *RaftCluster) AddScheduler(scheduler schedulers.Scheduler, args ...string) error { - return c.coordinator.GetSchedulersController().AddScheduler(scheduler, args...) -} - -// RemoveScheduler removes a scheduler. -func (c *RaftCluster) RemoveScheduler(name string) error { - return c.coordinator.GetSchedulersController().RemoveScheduler(name) -} - -// PauseOrResumeScheduler pauses or resumes a scheduler. -func (c *RaftCluster) PauseOrResumeScheduler(name string, t int64) error { - return c.coordinator.GetSchedulersController().PauseOrResumeScheduler(name, t) -} - -// PauseOrResumeChecker pauses or resumes checker. -func (c *RaftCluster) PauseOrResumeChecker(name string, t int64) error { - return c.coordinator.PauseOrResumeChecker(name, t) +// GetHeartbeatStreams returns the heartbeat streams. +func (c *RaftCluster) GetHeartbeatStreams() *hbstream.HeartbeatStreams { + return c.hbstreams } // AllocID returns a global unique ID. @@ -861,10 +806,6 @@ func (c *RaftCluster) GetOpts() sc.ConfProvider { return c.opt } -func (c *RaftCluster) initSchedulers() { - c.coordinator.InitSchedulers(false) -} - // GetScheduleConfig returns scheduling configurations. func (c *RaftCluster) GetScheduleConfig() *sc.ScheduleConfig { return c.opt.GetScheduleConfig() @@ -890,60 +831,11 @@ func (c *RaftCluster) SetPDServerConfig(cfg *config.PDServerConfig) { c.opt.SetPDServerConfig(cfg) } -// AddSuspectRegions adds regions to suspect list. -func (c *RaftCluster) AddSuspectRegions(regionIDs ...uint64) { - c.coordinator.GetCheckerController().AddSuspectRegions(regionIDs...) -} - -// GetSuspectRegions gets all suspect regions. -func (c *RaftCluster) GetSuspectRegions() []uint64 { - return c.coordinator.GetCheckerController().GetSuspectRegions() -} - -// GetHotStat gets hot stat. -func (c *RaftCluster) GetHotStat() *statistics.HotStat { - return c.hotStat -} - -// GetRegionStats gets region statistics. -func (c *RaftCluster) GetRegionStats() *statistics.RegionStatistics { - return c.regionStats -} - -// GetLabelStats gets label statistics. -func (c *RaftCluster) GetLabelStats() *statistics.LabelStatistics { - return c.labelLevelStats -} - -// RemoveSuspectRegion removes region from suspect list. -func (c *RaftCluster) RemoveSuspectRegion(id uint64) { - c.coordinator.GetCheckerController().RemoveSuspectRegion(id) -} - // GetUnsafeRecoveryController returns the unsafe recovery controller. func (c *RaftCluster) GetUnsafeRecoveryController() *unsaferecovery.Controller { return c.unsafeRecoveryController } -// AddSuspectKeyRange adds the key range with the its ruleID as the key -// The instance of each keyRange is like following format: -// [2][]byte: start key/end key -func (c *RaftCluster) AddSuspectKeyRange(start, end []byte) { - c.coordinator.GetCheckerController().AddSuspectKeyRange(start, end) -} - -// PopOneSuspectKeyRange gets one suspect keyRange group. -// it would return value and true if pop success, or return empty [][2][]byte and false -// if suspectKeyRanges couldn't pop keyRange group. -func (c *RaftCluster) PopOneSuspectKeyRange() ([2][]byte, bool) { - return c.coordinator.GetCheckerController().PopOneSuspectKeyRange() -} - -// ClearSuspectKeyRanges clears the suspect keyRanges, only for unit test -func (c *RaftCluster) ClearSuspectKeyRanges() { - c.coordinator.GetCheckerController().ClearSuspectKeyRanges() -} - // HandleStoreHeartbeat updates the store status. func (c *RaftCluster) HandleStoreHeartbeat(heartbeat *pdpb.StoreHeartbeatRequest, resp *pdpb.StoreHeartbeatResponse) error { stats := heartbeat.GetStats() @@ -970,7 +862,7 @@ func (c *RaftCluster) HandleStoreHeartbeat(heartbeat *pdpb.StoreHeartbeatRequest nowTime := time.Now() var newStore *core.StoreInfo // If this cluster has slow stores, we should awaken hibernated regions in other stores. - if !c.isAPIServiceMode { + if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { if needAwaken, slowStoreIDs := c.NeedAwakenAllRegionsInStore(storeID); needAwaken { log.Info("forcely awaken hibernated regions", zap.Uint64("store-id", storeID), zap.Uint64s("slow-stores", slowStoreIDs)) newStore = store.Clone(core.SetStoreStats(stats), core.SetLastHeartbeatTS(nowTime), core.SetLastAwakenTime(nowTime), opt) @@ -1005,7 +897,7 @@ func (c *RaftCluster) HandleStoreHeartbeat(heartbeat *pdpb.StoreHeartbeatRequest regions map[uint64]*core.RegionInfo interval uint64 ) - if !c.isAPIServiceMode { + if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { c.hotStat.Observe(storeID, newStore.GetStoreStats()) c.hotStat.FilterUnhealthyStore(c) c.slowStat.ObserveSlowStoreStatus(storeID, newStore.IsSlow()) @@ -1061,7 +953,7 @@ func (c *RaftCluster) HandleStoreHeartbeat(heartbeat *pdpb.StoreHeartbeatRequest e := int64(dur)*2 - int64(stat.GetTotalDurationSec()) store.Feedback(float64(e)) } - if !c.isAPIServiceMode { + if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { // Here we will compare the reported regions with the previous hot peers to decide if it is still hot. c.hotStat.CheckReadAsync(statistics.NewCollectUnReportedPeerTask(storeID, regions, interval)) } @@ -1097,11 +989,6 @@ func (c *RaftCluster) processReportBuckets(buckets *metapb.Buckets) error { return nil } -// IsPrepared return true if the prepare checker is ready. -func (c *RaftCluster) IsPrepared() bool { - return c.coordinator.GetPrepareChecker().IsPrepared() -} - var regionGuide = core.GenerateRegionGuideFunc(true) // processRegionHeartbeat updates the region information. @@ -1112,7 +999,7 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { } region.Inherit(origin, c.GetStoreConfig().IsEnableRegionBucket()) - if !c.isAPIServiceMode { + if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { cluster.HandleStatsAsync(c, region) } @@ -1121,7 +1008,7 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { // Save to cache if meta or leader is updated, or contains any down/pending peer. // Mark isNew if the region in cache does not have leader. isNew, saveKV, saveCache, needSync := regionGuide(region, origin) - if !c.isAPIServiceMode && !saveKV && !saveCache && !isNew { + if !saveKV && !saveCache && !isNew { // Due to some config changes need to update the region stats as well, // so we do some extra checks here. if hasRegionStats && c.regionStats.RegionStatsNeedUpdate(region) { @@ -1142,19 +1029,21 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { // To prevent a concurrent heartbeat of another region from overriding the up-to-date region info by a stale one, // check its validation again here. // - // However it can't solve the race condition of concurrent heartbeats from the same region. + // However, it can't solve the race condition of concurrent heartbeats from the same region. if overlaps, err = c.core.AtomicCheckAndPutRegion(region); err != nil { return err } - if !c.isAPIServiceMode { + if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { cluster.HandleOverlaps(c, overlaps) } regionUpdateCacheEventCounter.Inc() } - if !c.isAPIServiceMode { - cluster.Collect(c, region, c.GetRegionStores(region), hasRegionStats, isNew, c.IsPrepared()) + isPrepared := true + if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { + isPrepared = c.IsPrepared() } + cluster.Collect(c, region, c.GetRegionStores(region), hasRegionStats, isNew, isPrepared) if c.storage != nil { // If there are concurrent heartbeats from the same region, the last write will win even if @@ -1566,24 +1455,6 @@ func (c *RaftCluster) checkReplicaBeforeOfflineStore(storeID uint64) error { return nil } -func (c *RaftCluster) getEvictLeaderStores() (evictStores []uint64) { - if c.coordinator == nil { - return nil - } - handler, ok := c.coordinator.GetSchedulersController().GetSchedulerHandlers()[schedulers.EvictLeaderName] - if !ok { - return - } - type evictLeaderHandler interface { - EvictStoreIDs() []uint64 - } - h, ok := handler.(evictLeaderHandler) - if !ok { - return - } - return h.EvictStoreIDs() -} - func (c *RaftCluster) getUpStores() []uint64 { upStores := make([]uint64, 0) for _, store := range c.GetStores() { @@ -1634,9 +1505,8 @@ func (c *RaftCluster) BuryStore(storeID uint64, forceBury bool) error { c.resetProgress(storeID, addr) storeIDStr := strconv.FormatUint(storeID, 10) statistics.ResetStoreStatistics(addr, storeIDStr) - if !c.isAPIServiceMode { - c.hotStat.RemoveRollingStoreStats(storeID) - c.slowStat.RemoveSlowStoreStatus(storeID) + if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { + c.removeStoreStatistics(storeID) } } return err @@ -1811,9 +1681,8 @@ func (c *RaftCluster) putStoreLocked(store *core.StoreInfo) error { } } c.core.PutStore(store) - if !c.isAPIServiceMode { - c.hotStat.GetOrCreateRollingStoreStats(store.GetID()) - c.slowStat.ObserveSlowStoreStatus(store.GetID(), store.IsSlow()) + if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { + c.updateStoreStatistics(store.GetID(), store.IsSlow()) } return nil } @@ -1846,12 +1715,13 @@ func (c *RaftCluster) checkStores() { if err := c.ReadyToServe(storeID); err != nil { log.Error("change store to serving failed", zap.Stringer("store", store.GetMeta()), + zap.Int("region-count", c.GetTotalRegionCount()), errs.ZapError(err)) } } else if c.IsPrepared() { threshold := c.getThreshold(stores, store) - log.Debug("store serving threshold", zap.Uint64("store-id", storeID), zap.Float64("threshold", threshold)) regionSize := float64(store.GetRegionSize()) + log.Debug("store serving threshold", zap.Uint64("store-id", storeID), zap.Float64("threshold", threshold), zap.Float64("region-size", regionSize)) if regionSize >= threshold { if err := c.ReadyToServe(storeID); err != nil { log.Error("change store to serving failed", @@ -2161,53 +2031,14 @@ func (c *RaftCluster) deleteStore(store *core.StoreInfo) error { } func (c *RaftCluster) collectMetrics() { - if !c.isAPIServiceMode { - statsMap := statistics.NewStoreStatisticsMap(c.opt) - stores := c.GetStores() - for _, s := range stores { - statsMap.Observe(s) - statsMap.ObserveHotStat(s, c.hotStat.StoresStats) - } - statsMap.Collect() - c.coordinator.GetSchedulersController().CollectSchedulerMetrics() - c.coordinator.CollectHotSpotMetrics() - c.collectClusterMetrics() - } c.collectHealthStatus() } func (c *RaftCluster) resetMetrics() { - statistics.Reset() - - if !c.isAPIServiceMode { - c.coordinator.GetSchedulersController().ResetSchedulerMetrics() - c.coordinator.ResetHotSpotMetrics() - c.resetClusterMetrics() - } c.resetHealthStatus() c.resetProgressIndicator() } -func (c *RaftCluster) collectClusterMetrics() { - if c.regionStats == nil { - return - } - c.regionStats.Collect() - c.labelLevelStats.Collect() - // collect hot cache metrics - c.hotStat.CollectMetrics() -} - -func (c *RaftCluster) resetClusterMetrics() { - if c.regionStats == nil { - return - } - c.regionStats.Reset() - c.labelLevelStats.Reset() - // reset hot cache metrics - c.hotStat.ResetMetrics() -} - func (c *RaftCluster) collectHealthStatus() { members, err := GetMembers(c.etcdClient) if err != nil { @@ -2234,21 +2065,6 @@ func (c *RaftCluster) resetProgressIndicator() { storesETAGauge.Reset() } -// GetRegionStatsByType gets the status of the region by types. -func (c *RaftCluster) GetRegionStatsByType(typ statistics.RegionStatisticType) []*core.RegionInfo { - if c.regionStats == nil { - return nil - } - return c.regionStats.GetRegionStatsByType(typ) -} - -// UpdateRegionsLabelLevelStats updates the status of the region label level by types. -func (c *RaftCluster) UpdateRegionsLabelLevelStats(regions []*core.RegionInfo) { - for _, region := range regions { - c.labelLevelStats.Observe(region, c.getStoresWithoutLabelLocked(region, core.EngineKey, core.EngineTiFlash), c.opt.GetLocationLabels()) - } -} - func (c *RaftCluster) getRegionStoresLocked(region *core.RegionInfo) []*core.StoreInfo { stores := make([]*core.StoreInfo, 0, len(region.GetPeers())) for _, p := range region.GetPeers() { @@ -2259,16 +2075,6 @@ func (c *RaftCluster) getRegionStoresLocked(region *core.RegionInfo) []*core.Sto return stores } -func (c *RaftCluster) getStoresWithoutLabelLocked(region *core.RegionInfo, key, value string) []*core.StoreInfo { - stores := make([]*core.StoreInfo, 0, len(region.GetPeers())) - for _, p := range region.GetPeers() { - if store := c.core.GetStore(p.StoreId); store != nil && !core.IsStoreContainLabel(store.GetMeta(), key, value) { - stores = append(stores, store) - } - } - return stores -} - // OnStoreVersionChange changes the version of the cluster when needed. func (c *RaftCluster) OnStoreVersionChange() { c.RLock() @@ -2344,49 +2150,6 @@ func (c *RaftCluster) GetRegionCount(startKey, endKey []byte) *statistics.Region return stats } -// GetStoresStats returns stores' statistics from cluster. -// And it will be unnecessary to filter unhealthy store, because it has been solved in process heartbeat -func (c *RaftCluster) GetStoresStats() *statistics.StoresStats { - return c.hotStat.StoresStats -} - -// GetStoresLoads returns load stats of all stores. -func (c *RaftCluster) GetStoresLoads() map[uint64][]float64 { - return c.hotStat.GetStoresLoads() -} - -// IsRegionHot checks if a region is in hot state. -func (c *RaftCluster) IsRegionHot(region *core.RegionInfo) bool { - return c.hotStat.IsRegionHot(region, c.opt.GetHotRegionCacheHitsThreshold()) -} - -// GetHotPeerStat returns hot peer stat with specified regionID and storeID. -func (c *RaftCluster) GetHotPeerStat(rw utils.RWType, regionID, storeID uint64) *statistics.HotPeerStat { - return c.hotStat.GetHotPeerStat(rw, regionID, storeID) -} - -// RegionReadStats returns hot region's read stats. -// The result only includes peers that are hot enough. -// RegionStats is a thread-safe method -func (c *RaftCluster) RegionReadStats() map[uint64][]*statistics.HotPeerStat { - // As read stats are reported by store heartbeat, the threshold needs to be adjusted. - threshold := c.GetOpts().GetHotRegionCacheHitsThreshold() * - (utils.RegionHeartBeatReportInterval / utils.StoreHeartBeatReportInterval) - return c.hotStat.RegionStats(utils.Read, threshold) -} - -// RegionWriteStats returns hot region's write stats. -// The result only includes peers that are hot enough. -func (c *RaftCluster) RegionWriteStats() map[uint64][]*statistics.HotPeerStat { - // RegionStats is a thread-safe method - return c.hotStat.RegionStats(utils.Write, c.GetOpts().GetHotRegionCacheHitsThreshold()) -} - -// BucketsStats returns hot region's buckets stats. -func (c *RaftCluster) BucketsStats(degree int, regionIDs ...uint64) map[uint64][]*buckets.BucketStat { - return c.hotStat.BucketsStats(degree, regionIDs...) -} - // TODO: remove me. // only used in test. func (c *RaftCluster) putRegion(region *core.RegionInfo) error { @@ -2483,7 +2246,9 @@ func (c *RaftCluster) SetMinResolvedTS(storeID, minResolvedTS uint64) error { return nil } -func (c *RaftCluster) checkAndUpdateMinResolvedTS() (uint64, bool) { +// CheckAndUpdateMinResolvedTS checks and updates the min resolved ts of the cluster. +// This is exported for testing purpose. +func (c *RaftCluster) CheckAndUpdateMinResolvedTS() (uint64, bool) { c.Lock() defer c.Unlock() @@ -2526,7 +2291,7 @@ func (c *RaftCluster) runMinResolvedTSJob() { case <-ticker.C: interval = c.opt.GetMinResolvedTSPersistenceInterval() if interval != 0 { - if current, needPersist := c.checkAndUpdateMinResolvedTS(); needPersist { + if current, needPersist := c.CheckAndUpdateMinResolvedTS(); needPersist { c.storage.SaveMinResolvedTS(current) } } else { @@ -2716,7 +2481,7 @@ func CheckHealth(client *http.Client, members []*pdpb.Member) map[uint64]*pdpb.M for _, member := range members { for _, cURL := range member.ClientUrls { ctx, cancel := context.WithTimeout(context.Background(), clientTimeout) - req, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("%s%s", cURL, healthURL), nil) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("%s%s", cURL, healthURL), http.NoBody) if err != nil { log.Error("failed to new request", errs.ZapError(errs.ErrNewHTTPRequest, err)) cancel() @@ -2774,12 +2539,11 @@ func IsClientURL(addr string, etcdClient *clientv3.Client) bool { return false } -// GetPausedSchedulerDelayAt returns DelayAt of a paused scheduler -func (c *RaftCluster) GetPausedSchedulerDelayAt(name string) (int64, error) { - return c.coordinator.GetSchedulersController().GetPausedSchedulerDelayAt(name) -} - -// GetPausedSchedulerDelayUntil returns DelayUntil of a paused scheduler -func (c *RaftCluster) GetPausedSchedulerDelayUntil(name string) (int64, error) { - return c.coordinator.GetSchedulersController().GetPausedSchedulerDelayUntil(name) +// IsServiceIndependent returns whether the service is independent. +func (c *RaftCluster) IsServiceIndependent(name string) bool { + independent, exist := c.independentServices.Load(name) + if !exist { + return false + } + return independent.(bool) } diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index b7b9dcfb736..85edf911779 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -68,7 +68,7 @@ func TestStoreHeartbeat(t *testing.T) { _, opt, err := newTestScheduleConfig() opt.GetScheduleConfig().StoreLimitVersion = "v2" re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) n, np := uint64(3), uint64(3) stores := newTestStores(n, "2.0.0") @@ -201,7 +201,7 @@ func TestFilterUnhealthyStore(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) stores := newTestStores(3, "2.0.0") req := &pdpb.StoreHeartbeatRequest{} @@ -239,7 +239,7 @@ func TestSetOfflineStore(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.ruleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), cluster, cluster.GetOpts()) if opt.IsPlacementRulesEnabled() { @@ -305,7 +305,7 @@ func TestSetOfflineWithReplica(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) // Put 4 stores. @@ -344,7 +344,7 @@ func TestSetOfflineStoreWithEvictLeader(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) opt.SetMaxReplicas(1) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) // Put 3 stores. @@ -371,7 +371,7 @@ func TestForceBuryStore(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) // Put 2 stores. stores := newTestStores(2, "5.3.0") stores[1] = stores[1].Clone(core.SetLastHeartbeatTS(time.Now())) @@ -390,7 +390,7 @@ func TestReuseAddress(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) // Put 4 stores. for _, store := range newTestStores(4, "2.0.0") { @@ -436,7 +436,7 @@ func TestUpStore(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.ruleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), cluster, cluster.GetOpts()) if opt.IsPlacementRulesEnabled() { @@ -481,7 +481,7 @@ func TestRemovingProcess(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.SetPrepared() @@ -539,7 +539,7 @@ func TestDeleteStoreUpdatesClusterVersion(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.ruleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), cluster, cluster.GetOpts()) if opt.IsPlacementRulesEnabled() { @@ -574,7 +574,7 @@ func TestStoreClusterVersion(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) stores := newTestStores(3, "5.0.0") s1, s2, s3 := stores[0].GetMeta(), stores[1].GetMeta(), stores[2].GetMeta() s1.Version = "5.0.1" @@ -599,7 +599,7 @@ func TestRegionHeartbeatHotStat(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) newTestStores(4, "2.0.0") peers := []*metapb.Peer{ @@ -661,7 +661,7 @@ func TestBucketHeartbeat(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) // case1: region is not exist @@ -718,7 +718,7 @@ func TestRegionHeartbeat(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) n, np := uint64(3), uint64(3) cluster.wg.Add(1) @@ -963,7 +963,7 @@ func TestRegionFlowChanged(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) regions := []*core.RegionInfo{core.NewTestRegionInfo(1, 1, []byte{}, []byte{})} processRegions := func(regions []*core.RegionInfo) { @@ -988,7 +988,7 @@ func TestRegionSizeChanged(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.regionStats = statistics.NewRegionStatistics( cluster.GetBasicCluster(), @@ -1034,7 +1034,7 @@ func TestConcurrentReportBucket(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) regions := []*core.RegionInfo{core.NewTestRegionInfo(1, 1, []byte{}, []byte{})} @@ -1064,7 +1064,7 @@ func TestConcurrentRegionHeartbeat(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) regions := []*core.RegionInfo{core.NewTestRegionInfo(1, 1, []byte{}, []byte{})} @@ -1105,7 +1105,7 @@ func TestRegionLabelIsolationLevel(t *testing.T) { cfg.LocationLabels = []string{"zone"} opt.SetReplicationConfig(cfg) re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) for i := uint64(1); i <= 4; i++ { var labels []*metapb.StoreLabel @@ -1144,7 +1144,7 @@ func TestRegionLabelIsolationLevel(t *testing.T) { re.NoError(cluster.putRegion(r)) cluster.UpdateRegionsLabelLevelStats([]*core.RegionInfo{r}) - counter := cluster.labelLevelStats.GetLabelCounter() + counter := cluster.labelStats.GetLabelCounter() re.Equal(0, counter["none"]) re.Equal(1, counter["zone"]) } @@ -1184,7 +1184,7 @@ func TestHeartbeatSplit(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) // 1: [nil, nil) @@ -1228,7 +1228,7 @@ func TestRegionSplitAndMerge(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) regions := []*core.RegionInfo{core.NewTestRegionInfo(1, 1, []byte{}, []byte{})} @@ -1266,7 +1266,7 @@ func TestOfflineAndMerge(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.ruleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), cluster, cluster.GetOpts()) if opt.IsPlacementRulesEnabled() { @@ -1634,7 +1634,7 @@ func TestCalculateStoreSize1(t *testing.T) { cfg := opt.GetReplicationConfig() cfg.EnablePlacementRules = true opt.SetReplicationConfig(cfg) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.regionStats = statistics.NewRegionStatistics( cluster.GetBasicCluster(), @@ -1669,7 +1669,7 @@ func TestCalculateStoreSize1(t *testing.T) { } cluster.ruleManager.SetRule( - &placement.Rule{GroupID: "pd", ID: "zone1", StartKey: []byte(""), EndKey: []byte(""), Role: "voter", Count: 2, + &placement.Rule{GroupID: placement.DefaultGroupID, ID: "zone1", StartKey: []byte(""), EndKey: []byte(""), Role: placement.Voter, Count: 2, LabelConstraints: []placement.LabelConstraint{ {Key: "zone", Op: "in", Values: []string{"zone1"}}, }, @@ -1677,7 +1677,7 @@ func TestCalculateStoreSize1(t *testing.T) { ) cluster.ruleManager.SetRule( - &placement.Rule{GroupID: "pd", ID: "zone2", StartKey: []byte(""), EndKey: []byte(""), Role: "voter", Count: 2, + &placement.Rule{GroupID: placement.DefaultGroupID, ID: "zone2", StartKey: []byte(""), EndKey: []byte(""), Role: placement.Voter, Count: 2, LabelConstraints: []placement.LabelConstraint{ {Key: "zone", Op: "in", Values: []string{"zone2"}}, }, @@ -1685,13 +1685,13 @@ func TestCalculateStoreSize1(t *testing.T) { ) cluster.ruleManager.SetRule( - &placement.Rule{GroupID: "pd", ID: "zone3", StartKey: []byte(""), EndKey: []byte(""), Role: "follower", Count: 1, + &placement.Rule{GroupID: placement.DefaultGroupID, ID: "zone3", StartKey: []byte(""), EndKey: []byte(""), Role: placement.Follower, Count: 1, LabelConstraints: []placement.LabelConstraint{ {Key: "zone", Op: "in", Values: []string{"zone3"}}, }, LocationLabels: []string{"rack", "host"}}, ) - cluster.ruleManager.DeleteRule("pd", "default") + cluster.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) regions := newTestRegions(100, 10, 5) for _, region := range regions { @@ -1720,7 +1720,7 @@ func TestCalculateStoreSize2(t *testing.T) { cfg.EnablePlacementRules = true opt.SetReplicationConfig(cfg) opt.SetMaxReplicas(3) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.regionStats = statistics.NewRegionStatistics( cluster.GetBasicCluster(), @@ -1753,7 +1753,7 @@ func TestCalculateStoreSize2(t *testing.T) { } cluster.ruleManager.SetRule( - &placement.Rule{GroupID: "pd", ID: "dc1", StartKey: []byte(""), EndKey: []byte(""), Role: "voter", Count: 2, + &placement.Rule{GroupID: placement.DefaultGroupID, ID: "dc1", StartKey: []byte(""), EndKey: []byte(""), Role: placement.Voter, Count: 2, LabelConstraints: []placement.LabelConstraint{ {Key: "dc", Op: "in", Values: []string{"dc1"}}, }, @@ -1761,7 +1761,7 @@ func TestCalculateStoreSize2(t *testing.T) { ) cluster.ruleManager.SetRule( - &placement.Rule{GroupID: "pd", ID: "logic3", StartKey: []byte(""), EndKey: []byte(""), Role: "voter", Count: 1, + &placement.Rule{GroupID: placement.DefaultGroupID, ID: "logic3", StartKey: []byte(""), EndKey: []byte(""), Role: placement.Voter, Count: 1, LabelConstraints: []placement.LabelConstraint{ {Key: "logic", Op: "in", Values: []string{"logic3"}}, }, @@ -1769,13 +1769,13 @@ func TestCalculateStoreSize2(t *testing.T) { ) cluster.ruleManager.SetRule( - &placement.Rule{GroupID: "pd", ID: "logic4", StartKey: []byte(""), EndKey: []byte(""), Role: "learner", Count: 1, + &placement.Rule{GroupID: placement.DefaultGroupID, ID: "logic4", StartKey: []byte(""), EndKey: []byte(""), Role: placement.Learner, Count: 1, LabelConstraints: []placement.LabelConstraint{ {Key: "logic", Op: "in", Values: []string{"logic4"}}, }, LocationLabels: []string{"dc", "logic", "rack", "host"}}, ) - cluster.ruleManager.DeleteRule("pd", "default") + cluster.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) regions := newTestRegions(100, 10, 5) for _, region := range regions { @@ -1829,7 +1829,7 @@ func Test(t *testing.T) { regions := newTestRegions(n, n, np) _, opts, err := newTestScheduleConfig() re.NoError(err) - tc := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opts, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + tc := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opts, storage.NewStorageWithMemoryBackend()) cache := tc.core for i := uint64(0); i < n; i++ { @@ -1943,7 +1943,7 @@ func TestAwakenStore(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) n := uint64(3) stores := newTestStores(n, "6.5.0") re.True(stores[0].NeedAwakenStore()) @@ -1997,7 +1997,7 @@ func TestUpdateAndDeleteLabel(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) stores := newTestStores(1, "6.5.1") for _, store := range stores { re.NoError(cluster.PutStore(store.GetMeta())) @@ -2115,7 +2115,7 @@ func newTestScheduleConfig() (*sc.ScheduleConfig, *config.PersistOptions, error) } func newTestCluster(ctx context.Context, opt *config.PersistOptions) *testCluster { - rc := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + rc := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) storage := storage.NewStorageWithMemoryBackend() rc.regionLabeler, _ = labeler.NewRegionLabeler(ctx, storage, time.Second*5) @@ -2127,10 +2127,9 @@ func newTestRaftCluster( id id.Allocator, opt *config.PersistOptions, s storage.Storage, - basicCluster *core.BasicCluster, ) *RaftCluster { - rc := &RaftCluster{serverCtx: ctx} - rc.InitCluster(id, opt, s, basicCluster, nil) + rc := &RaftCluster{serverCtx: ctx, core: core.NewBasicCluster(), storage: s} + rc.InitCluster(id, opt, nil, nil) rc.ruleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), rc, opt) if opt.IsPlacementRulesEnabled() { err := rc.ruleManager.Initialize(opt.GetMaxReplicas(), opt.GetLocationLabels(), opt.GetIsolationLevel()) @@ -2138,6 +2137,7 @@ func newTestRaftCluster( panic(err) } } + rc.schedulingController = newSchedulingController(rc.ctx, rc.core, rc.opt, rc.ruleManager) return rc } @@ -2383,7 +2383,7 @@ func (c *testCluster) LoadRegion(regionID uint64, followerStoreIDs ...uint64) er peer, _ := c.AllocPeer(id) region.Peers = append(region.Peers, peer) } - return c.putRegion(core.NewRegionInfo(region, nil)) + return c.putRegion(core.NewRegionInfo(region, nil, core.SetSource(core.Storage))) } func TestBasic(t *testing.T) { @@ -2468,7 +2468,7 @@ func TestDispatch(t *testing.T) { func dispatchHeartbeat(co *schedule.Coordinator, region *core.RegionInfo, stream hbstream.HeartbeatStream) error { co.GetHeartbeatStreams().BindStream(region.GetLeader().GetStoreId(), stream) - if err := co.GetCluster().(*RaftCluster).putRegion(region.Clone()); err != nil { + if err := co.GetCluster().(*RaftCluster).putRegion(region.Clone(core.SetSource(core.Heartbeat))); err != nil { return err } co.GetOperatorController().Dispatch(region, operator.DispatchFromHeartBeat, nil) @@ -2485,7 +2485,10 @@ func TestCollectMetricsConcurrent(t *testing.T) { nil) }, func(co *schedule.Coordinator) { co.Run() }, re) defer cleanup() - + rc := co.GetCluster().(*RaftCluster) + rc.schedulingController = newSchedulingController(rc.serverCtx, rc.GetBasicCluster(), rc.GetOpts(), rc.GetRuleManager()) + rc.schedulingController.coordinator = co + controller := co.GetSchedulersController() // Make sure there are no problem when concurrent write and read var wg sync.WaitGroup count := 10 @@ -2498,15 +2501,14 @@ func TestCollectMetricsConcurrent(t *testing.T) { } }(i) } - controller := co.GetSchedulersController() for i := 0; i < 1000; i++ { co.CollectHotSpotMetrics() controller.CollectSchedulerMetrics() - co.GetCluster().(*RaftCluster).collectClusterMetrics() + rc.collectSchedulingMetrics() } - co.ResetHotSpotMetrics() - controller.ResetSchedulerMetrics() - co.GetCluster().(*RaftCluster).resetClusterMetrics() + schedule.ResetHotSpotMetrics() + schedulers.ResetSchedulerMetrics() + rc.resetSchedulingMetrics() wg.Wait() } @@ -2520,6 +2522,11 @@ func TestCollectMetrics(t *testing.T) { nil) }, func(co *schedule.Coordinator) { co.Run() }, re) defer cleanup() + + rc := co.GetCluster().(*RaftCluster) + rc.schedulingController = newSchedulingController(rc.serverCtx, rc.GetBasicCluster(), rc.GetOpts(), rc.GetRuleManager()) + rc.schedulingController.coordinator = co + controller := co.GetSchedulersController() count := 10 for i := 0; i <= count; i++ { for k := 0; k < 200; k++ { @@ -2533,11 +2540,11 @@ func TestCollectMetrics(t *testing.T) { tc.hotStat.HotCache.Update(item, utils.Write) } } - controller := co.GetSchedulersController() + for i := 0; i < 1000; i++ { co.CollectHotSpotMetrics() controller.CollectSchedulerMetrics() - co.GetCluster().(*RaftCluster).collectClusterMetrics() + rc.collectSchedulingMetrics() } stores := co.GetCluster().GetStores() regionStats := co.GetCluster().RegionWriteStats() @@ -2550,9 +2557,9 @@ func TestCollectMetrics(t *testing.T) { s.Stats = nil } re.Equal(status1, status2) - co.ResetHotSpotMetrics() - controller.ResetSchedulerMetrics() - co.GetCluster().(*RaftCluster).resetClusterMetrics() + schedule.ResetHotSpotMetrics() + schedulers.ResetSchedulerMetrics() + rc.resetSchedulingMetrics() } func prepare(setCfg func(*sc.ScheduleConfig), setTc func(*testCluster), run func(*schedule.Coordinator), re *require.Assertions) (*testCluster, *schedule.Coordinator, func()) { @@ -2792,11 +2799,11 @@ func TestReplica(t *testing.T) { re.NoError(dispatchHeartbeat(co, region, stream)) waitNoResponse(re, stream) - // Remove peer from store 4. + // Remove peer from store 3. re.NoError(tc.addLeaderRegion(2, 1, 2, 3, 4)) region = tc.GetRegion(2) re.NoError(dispatchHeartbeat(co, region, stream)) - region = waitRemovePeer(re, stream, region, 4) + region = waitRemovePeer(re, stream, region, 3) // store3 is down, we should remove it firstly. re.NoError(dispatchHeartbeat(co, region, stream)) waitNoResponse(re, stream) @@ -2942,14 +2949,14 @@ func TestShouldRun(t *testing.T) { for _, testCase := range testCases { r := tc.GetRegion(testCase.regionID) - nr := r.Clone(core.WithLeader(r.GetPeers()[0])) + nr := r.Clone(core.WithLeader(r.GetPeers()[0]), core.SetSource(core.Heartbeat)) re.NoError(tc.processRegionHeartbeat(nr)) re.Equal(testCase.ShouldRun, co.ShouldRun()) } nr := &metapb.Region{Id: 6, Peers: []*metapb.Peer{}} - newRegion := core.NewRegionInfo(nr, nil) + newRegion := core.NewRegionInfo(nr, nil, core.SetSource(core.Heartbeat)) re.Error(tc.processRegionHeartbeat(newRegion)) - re.Equal(7, co.GetPrepareChecker().GetSum()) + re.Equal(7, tc.core.GetClusterNotFromStorageRegionsCnt()) } func TestShouldRunWithNonLeaderRegions(t *testing.T) { @@ -2985,14 +2992,14 @@ func TestShouldRunWithNonLeaderRegions(t *testing.T) { for _, testCase := range testCases { r := tc.GetRegion(testCase.regionID) - nr := r.Clone(core.WithLeader(r.GetPeers()[0])) + nr := r.Clone(core.WithLeader(r.GetPeers()[0]), core.SetSource(core.Heartbeat)) re.NoError(tc.processRegionHeartbeat(nr)) re.Equal(testCase.ShouldRun, co.ShouldRun()) } nr := &metapb.Region{Id: 9, Peers: []*metapb.Peer{}} - newRegion := core.NewRegionInfo(nr, nil) + newRegion := core.NewRegionInfo(nr, nil, core.SetSource(core.Heartbeat)) re.Error(tc.processRegionHeartbeat(newRegion)) - re.Equal(9, co.GetPrepareChecker().GetSum()) + re.Equal(9, tc.core.GetClusterNotFromStorageRegionsCnt()) // Now, after server is prepared, there exist some regions with no leader. re.Equal(uint64(0), tc.GetRegion(10).GetLeader().GetStoreId()) @@ -3133,7 +3140,7 @@ func TestPersistScheduler(t *testing.T) { // option have 6 items because the default scheduler do not remove. re.Len(newOpt.GetSchedulers(), defaultCount+3) re.NoError(newOpt.Persist(storage)) - tc.RaftCluster.opt = newOpt + tc.RaftCluster.SetScheduleConfig(newOpt.GetScheduleConfig()) co = schedule.NewCoordinator(ctx, tc.RaftCluster, hbStreams) co.Run() @@ -3146,7 +3153,7 @@ func TestPersistScheduler(t *testing.T) { _, newOpt, err = newTestScheduleConfig() re.NoError(err) re.NoError(newOpt.Reload(storage)) - tc.RaftCluster.opt = newOpt + tc.RaftCluster.SetScheduleConfig(newOpt.GetScheduleConfig()) co = schedule.NewCoordinator(ctx, tc.RaftCluster, hbStreams) co.Run() controller = co.GetSchedulersController() @@ -3173,7 +3180,7 @@ func TestPersistScheduler(t *testing.T) { _, newOpt, err = newTestScheduleConfig() re.NoError(err) re.NoError(newOpt.Reload(co.GetCluster().GetStorage())) - tc.RaftCluster.opt = newOpt + tc.RaftCluster.SetScheduleConfig(newOpt.GetScheduleConfig()) co = schedule.NewCoordinator(ctx, tc.RaftCluster, hbStreams) co.Run() @@ -3232,7 +3239,7 @@ func TestRemoveScheduler(t *testing.T) { _, newOpt, err := newTestScheduleConfig() re.NoError(err) re.NoError(newOpt.Reload(tc.storage)) - tc.RaftCluster.opt = newOpt + tc.RaftCluster.SetScheduleConfig(newOpt.GetScheduleConfig()) co = schedule.NewCoordinator(ctx, tc.RaftCluster, hbStreams) co.Run() re.Empty(controller.GetSchedulerNames()) @@ -3262,7 +3269,6 @@ func TestRestart(t *testing.T) { re.NoError(tc.addRegionStore(3, 3)) re.NoError(tc.addLeaderRegion(1, 1)) region := tc.GetRegion(1) - co.GetPrepareChecker().Collect(region) // Add 1 replica on store 2. stream := mockhbstream.NewHeartbeatStream() @@ -3276,7 +3282,6 @@ func TestRestart(t *testing.T) { // Recreate coordinator then add another replica on store 3. co = schedule.NewCoordinator(ctx, tc.RaftCluster, hbStreams) - co.GetPrepareChecker().Collect(region) co.Run() re.NoError(dispatchHeartbeat(co, region, stream)) region = waitAddLearner(re, stream, region, 3) diff --git a/server/cluster/cluster_worker.go b/server/cluster/cluster_worker.go index a38ae86123f..74a445ad78e 100644 --- a/server/cluster/cluster_worker.go +++ b/server/cluster/cluster_worker.go @@ -23,6 +23,7 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/errs" + mcsutils "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/statistics/buckets" "github.com/tikv/pd/pkg/utils/logutil" @@ -37,6 +38,9 @@ func (c *RaftCluster) HandleRegionHeartbeat(region *core.RegionInfo) error { return err } + if c.IsServiceIndependent(mcsutils.SchedulingServiceName) { + return nil + } c.coordinator.GetOperatorController().Dispatch(region, operator.DispatchFromHeartBeat, c.coordinator.RecordOpStepWithTTL) return nil } @@ -233,7 +237,7 @@ func (c *RaftCluster) HandleReportBuckets(b *metapb.Buckets) error { if err := c.processReportBuckets(b); err != nil { return err } - if !c.isAPIServiceMode { + if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { c.hotStat.CheckAsync(buckets.NewCheckPeerTask(b)) } return nil diff --git a/server/cluster/cluster_worker_test.go b/server/cluster/cluster_worker_test.go index b376b38edc3..afc979e2b97 100644 --- a/server/cluster/cluster_worker_test.go +++ b/server/cluster/cluster_worker_test.go @@ -21,7 +21,6 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/stretchr/testify/require" - "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/mock/mockid" "github.com/tikv/pd/pkg/storage" ) @@ -33,7 +32,7 @@ func TestReportSplit(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) left := &metapb.Region{Id: 1, StartKey: []byte("a"), EndKey: []byte("b")} right := &metapb.Region{Id: 2, StartKey: []byte("b"), EndKey: []byte("c")} _, err = cluster.HandleReportSplit(&pdpb.ReportSplitRequest{Left: left, Right: right}) @@ -49,7 +48,7 @@ func TestReportBatchSplit(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) regions := []*metapb.Region{ {Id: 1, StartKey: []byte(""), EndKey: []byte("a")}, {Id: 2, StartKey: []byte("a"), EndKey: []byte("b")}, diff --git a/server/cluster/scheduling_controller.go b/server/cluster/scheduling_controller.go new file mode 100644 index 00000000000..a36e7159cfd --- /dev/null +++ b/server/cluster/scheduling_controller.go @@ -0,0 +1,496 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cluster + +import ( + "context" + "net/http" + "sync" + "time" + + "github.com/pingcap/failpoint" + "github.com/pingcap/log" + "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/schedule" + "github.com/tikv/pd/pkg/schedule/checker" + sc "github.com/tikv/pd/pkg/schedule/config" + sche "github.com/tikv/pd/pkg/schedule/core" + "github.com/tikv/pd/pkg/schedule/hbstream" + "github.com/tikv/pd/pkg/schedule/operator" + "github.com/tikv/pd/pkg/schedule/placement" + "github.com/tikv/pd/pkg/schedule/scatter" + "github.com/tikv/pd/pkg/schedule/schedulers" + "github.com/tikv/pd/pkg/schedule/splitter" + "github.com/tikv/pd/pkg/statistics" + "github.com/tikv/pd/pkg/statistics/buckets" + "github.com/tikv/pd/pkg/statistics/utils" + "github.com/tikv/pd/pkg/utils/logutil" + "github.com/tikv/pd/pkg/utils/syncutil" +) + +// schedulingController is used to manage all schedulers and checkers. +type schedulingController struct { + parentCtx context.Context + ctx context.Context + cancel context.CancelFunc + mu syncutil.RWMutex + wg sync.WaitGroup + *core.BasicCluster + opt sc.ConfProvider + coordinator *schedule.Coordinator + labelStats *statistics.LabelStatistics + regionStats *statistics.RegionStatistics + hotStat *statistics.HotStat + slowStat *statistics.SlowStat + running bool +} + +// newSchedulingController creates a new scheduling controller. +func newSchedulingController(parentCtx context.Context, basicCluster *core.BasicCluster, opt sc.ConfProvider, ruleManager *placement.RuleManager) *schedulingController { + ctx, cancel := context.WithCancel(parentCtx) + return &schedulingController{ + parentCtx: parentCtx, + ctx: ctx, + cancel: cancel, + BasicCluster: basicCluster, + opt: opt, + labelStats: statistics.NewLabelStatistics(), + hotStat: statistics.NewHotStat(parentCtx), + slowStat: statistics.NewSlowStat(parentCtx), + regionStats: statistics.NewRegionStatistics(basicCluster, opt, ruleManager), + } +} + +func (sc *schedulingController) stopSchedulingJobs() bool { + sc.mu.Lock() + defer sc.mu.Unlock() + if !sc.running { + return false + } + sc.coordinator.Stop() + sc.cancel() + sc.wg.Wait() + sc.running = false + log.Info("scheduling service is stopped") + return true +} + +func (sc *schedulingController) startSchedulingJobs(cluster sche.ClusterInformer, hbstreams *hbstream.HeartbeatStreams) { + sc.mu.Lock() + defer sc.mu.Unlock() + if sc.running { + return + } + sc.initCoordinatorLocked(sc.parentCtx, cluster, hbstreams) + sc.wg.Add(3) + go sc.runCoordinator() + go sc.runStatsBackgroundJobs() + go sc.runSchedulingMetricsCollectionJob() + sc.running = true + log.Info("scheduling service is started") +} + +func (sc *schedulingController) initCoordinator(ctx context.Context, cluster sche.ClusterInformer, hbstreams *hbstream.HeartbeatStreams) { + sc.mu.Lock() + defer sc.mu.Unlock() + sc.initCoordinatorLocked(ctx, cluster, hbstreams) + sc.coordinator.InitSchedulers(false) +} + +func (sc *schedulingController) initCoordinatorLocked(ctx context.Context, cluster sche.ClusterInformer, hbstreams *hbstream.HeartbeatStreams) { + sc.ctx, sc.cancel = context.WithCancel(ctx) + sc.coordinator = schedule.NewCoordinator(sc.ctx, cluster, hbstreams) +} + +// runCoordinator runs the main scheduling loop. +func (sc *schedulingController) runCoordinator() { + defer logutil.LogPanic() + defer sc.wg.Done() + sc.coordinator.RunUntilStop() +} + +func (sc *schedulingController) runStatsBackgroundJobs() { + defer logutil.LogPanic() + defer sc.wg.Done() + + ticker := time.NewTicker(statistics.RegionsStatsObserveInterval) + defer ticker.Stop() + + for _, store := range sc.GetStores() { + storeID := store.GetID() + sc.hotStat.GetOrCreateRollingStoreStats(storeID) + } + for { + select { + case <-sc.ctx.Done(): + log.Info("statistics background jobs has been stopped") + return + case <-ticker.C: + sc.hotStat.ObserveRegionsStats(sc.GetStoresWriteRate()) + } + } +} + +func (sc *schedulingController) runSchedulingMetricsCollectionJob() { + defer logutil.LogPanic() + defer sc.wg.Done() + + ticker := time.NewTicker(metricsCollectionJobInterval) + failpoint.Inject("highFrequencyClusterJobs", func() { + ticker.Stop() + ticker = time.NewTicker(time.Millisecond) + }) + defer ticker.Stop() + + for { + select { + case <-sc.ctx.Done(): + log.Info("scheduling metrics are reset") + sc.resetSchedulingMetrics() + log.Info("scheduling metrics collection job has been stopped") + return + case <-ticker.C: + sc.collectSchedulingMetrics() + } + } +} + +func (sc *schedulingController) resetSchedulingMetrics() { + statistics.Reset() + schedulers.ResetSchedulerMetrics() + schedule.ResetHotSpotMetrics() + statistics.ResetRegionStatsMetrics() + statistics.ResetLabelStatsMetrics() + // reset hot cache metrics + statistics.ResetHotCacheStatusMetrics() +} + +func (sc *schedulingController) collectSchedulingMetrics() { + statsMap := statistics.NewStoreStatisticsMap(sc.opt) + stores := sc.GetStores() + for _, s := range stores { + statsMap.Observe(s) + statsMap.ObserveHotStat(s, sc.hotStat.StoresStats) + } + statsMap.Collect() + sc.coordinator.GetSchedulersController().CollectSchedulerMetrics() + sc.coordinator.CollectHotSpotMetrics() + if sc.regionStats == nil { + return + } + sc.regionStats.Collect() + sc.labelStats.Collect() + // collect hot cache metrics + sc.hotStat.CollectMetrics() +} + +func (sc *schedulingController) removeStoreStatistics(storeID uint64) { + sc.hotStat.RemoveRollingStoreStats(storeID) + sc.slowStat.RemoveSlowStoreStatus(storeID) +} + +func (sc *schedulingController) updateStoreStatistics(storeID uint64, isSlow bool) { + sc.hotStat.GetOrCreateRollingStoreStats(storeID) + sc.slowStat.ObserveSlowStoreStatus(storeID, isSlow) +} + +// GetHotStat gets hot stat. +func (sc *schedulingController) GetHotStat() *statistics.HotStat { + return sc.hotStat +} + +// GetRegionStats gets region statistics. +func (sc *schedulingController) GetRegionStats() *statistics.RegionStatistics { + return sc.regionStats +} + +// GetLabelStats gets label statistics. +func (sc *schedulingController) GetLabelStats() *statistics.LabelStatistics { + return sc.labelStats +} + +// GetRegionStatsByType gets the status of the region by types. +func (sc *schedulingController) GetRegionStatsByType(typ statistics.RegionStatisticType) []*core.RegionInfo { + if sc.regionStats == nil { + return nil + } + return sc.regionStats.GetRegionStatsByType(typ) +} + +// UpdateRegionsLabelLevelStats updates the status of the region label level by types. +func (sc *schedulingController) UpdateRegionsLabelLevelStats(regions []*core.RegionInfo) { + for _, region := range regions { + sc.labelStats.Observe(region, sc.getStoresWithoutLabelLocked(region, core.EngineKey, core.EngineTiFlash), sc.opt.GetLocationLabels()) + } +} + +func (sc *schedulingController) getStoresWithoutLabelLocked(region *core.RegionInfo, key, value string) []*core.StoreInfo { + stores := make([]*core.StoreInfo, 0, len(region.GetPeers())) + for _, p := range region.GetPeers() { + if store := sc.GetStore(p.StoreId); store != nil && !core.IsStoreContainLabel(store.GetMeta(), key, value) { + stores = append(stores, store) + } + } + return stores +} + +// GetStoresStats returns stores' statistics from cluster. +// And it will be unnecessary to filter unhealthy store, because it has been solved in process heartbeat +func (sc *schedulingController) GetStoresStats() *statistics.StoresStats { + return sc.hotStat.StoresStats +} + +// GetStoresLoads returns load stats of all stores. +func (sc *schedulingController) GetStoresLoads() map[uint64][]float64 { + return sc.hotStat.GetStoresLoads() +} + +// IsRegionHot checks if a region is in hot state. +func (sc *schedulingController) IsRegionHot(region *core.RegionInfo) bool { + return sc.hotStat.IsRegionHot(region, sc.opt.GetHotRegionCacheHitsThreshold()) +} + +// GetHotPeerStat returns hot peer stat with specified regionID and storeID. +func (sc *schedulingController) GetHotPeerStat(rw utils.RWType, regionID, storeID uint64) *statistics.HotPeerStat { + return sc.hotStat.GetHotPeerStat(rw, regionID, storeID) +} + +// RegionReadStats returns hot region's read stats. +// The result only includes peers that are hot enough. +// RegionStats is a thread-safe method +func (sc *schedulingController) RegionReadStats() map[uint64][]*statistics.HotPeerStat { + // As read stats are reported by store heartbeat, the threshold needs to be adjusted. + threshold := sc.opt.GetHotRegionCacheHitsThreshold() * + (utils.RegionHeartBeatReportInterval / utils.StoreHeartBeatReportInterval) + return sc.hotStat.RegionStats(utils.Read, threshold) +} + +// RegionWriteStats returns hot region's write stats. +// The result only includes peers that are hot enough. +func (sc *schedulingController) RegionWriteStats() map[uint64][]*statistics.HotPeerStat { + // RegionStats is a thread-safe method + return sc.hotStat.RegionStats(utils.Write, sc.opt.GetHotRegionCacheHitsThreshold()) +} + +// BucketsStats returns hot region's buckets stats. +func (sc *schedulingController) BucketsStats(degree int, regionIDs ...uint64) map[uint64][]*buckets.BucketStat { + return sc.hotStat.BucketsStats(degree, regionIDs...) +} + +// GetCoordinator returns the coordinator. +func (sc *schedulingController) GetCoordinator() *schedule.Coordinator { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator +} + +// GetPausedSchedulerDelayAt returns DelayAt of a paused scheduler +func (sc *schedulingController) GetPausedSchedulerDelayAt(name string) (int64, error) { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetSchedulersController().GetPausedSchedulerDelayAt(name) +} + +// GetPausedSchedulerDelayUntil returns DelayUntil of a paused scheduler +func (sc *schedulingController) GetPausedSchedulerDelayUntil(name string) (int64, error) { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetSchedulersController().GetPausedSchedulerDelayUntil(name) +} + +// GetOperatorController returns the operator controller. +func (sc *schedulingController) GetOperatorController() *operator.Controller { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetOperatorController() +} + +// GetRegionScatterer returns the region scatter. +func (sc *schedulingController) GetRegionScatterer() *scatter.RegionScatterer { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetRegionScatterer() +} + +// GetRegionSplitter returns the region splitter +func (sc *schedulingController) GetRegionSplitter() *splitter.RegionSplitter { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetRegionSplitter() +} + +// GetMergeChecker returns merge checker. +func (sc *schedulingController) GetMergeChecker() *checker.MergeChecker { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetMergeChecker() +} + +// GetRuleChecker returns rule checker. +func (sc *schedulingController) GetRuleChecker() *checker.RuleChecker { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetRuleChecker() +} + +// GetSchedulers gets all schedulers. +func (sc *schedulingController) GetSchedulers() []string { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetSchedulersController().GetSchedulerNames() +} + +// GetSchedulerHandlers gets all scheduler handlers. +func (sc *schedulingController) GetSchedulerHandlers() map[string]http.Handler { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetSchedulersController().GetSchedulerHandlers() +} + +// AddSchedulerHandler adds a scheduler handler. +func (sc *schedulingController) AddSchedulerHandler(scheduler schedulers.Scheduler, args ...string) error { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetSchedulersController().AddSchedulerHandler(scheduler, args...) +} + +// RemoveSchedulerHandler removes a scheduler handler. +func (sc *schedulingController) RemoveSchedulerHandler(name string) error { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetSchedulersController().RemoveSchedulerHandler(name) +} + +// AddScheduler adds a scheduler. +func (sc *schedulingController) AddScheduler(scheduler schedulers.Scheduler, args ...string) error { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetSchedulersController().AddScheduler(scheduler, args...) +} + +// RemoveScheduler removes a scheduler. +func (sc *schedulingController) RemoveScheduler(name string) error { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetSchedulersController().RemoveScheduler(name) +} + +// PauseOrResumeScheduler pauses or resumes a scheduler. +func (sc *schedulingController) PauseOrResumeScheduler(name string, t int64) error { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetSchedulersController().PauseOrResumeScheduler(name, t) +} + +// PauseOrResumeChecker pauses or resumes checker. +func (sc *schedulingController) PauseOrResumeChecker(name string, t int64) error { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.PauseOrResumeChecker(name, t) +} + +// AddSuspectRegions adds regions to suspect list. +func (sc *schedulingController) AddSuspectRegions(regionIDs ...uint64) { + sc.mu.RLock() + defer sc.mu.RUnlock() + sc.coordinator.GetCheckerController().AddSuspectRegions(regionIDs...) +} + +// GetSuspectRegions gets all suspect regions. +func (sc *schedulingController) GetSuspectRegions() []uint64 { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetCheckerController().GetSuspectRegions() +} + +// RemoveSuspectRegion removes region from suspect list. +func (sc *schedulingController) RemoveSuspectRegion(id uint64) { + sc.mu.RLock() + defer sc.mu.RUnlock() + sc.coordinator.GetCheckerController().RemoveSuspectRegion(id) +} + +// PopOneSuspectKeyRange gets one suspect keyRange group. +// it would return value and true if pop success, or return empty [][2][]byte and false +// if suspectKeyRanges couldn't pop keyRange group. +func (sc *schedulingController) PopOneSuspectKeyRange() ([2][]byte, bool) { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetCheckerController().PopOneSuspectKeyRange() +} + +// ClearSuspectKeyRanges clears the suspect keyRanges, only for unit test +func (sc *schedulingController) ClearSuspectKeyRanges() { + sc.mu.RLock() + defer sc.mu.RUnlock() + sc.coordinator.GetCheckerController().ClearSuspectKeyRanges() +} + +// ClearSuspectRegions clears the suspect regions, only for unit test +func (sc *schedulingController) ClearSuspectRegions() { + sc.mu.RLock() + defer sc.mu.RUnlock() + sc.coordinator.GetCheckerController().ClearSuspectRegions() +} + +// AddSuspectKeyRange adds the key range with the its ruleID as the key +// The instance of each keyRange is like following format: +// [2][]byte: start key/end key +func (sc *schedulingController) AddSuspectKeyRange(start, end []byte) { + sc.mu.RLock() + defer sc.mu.RUnlock() + sc.coordinator.GetCheckerController().AddSuspectKeyRange(start, end) +} + +func (sc *schedulingController) getEvictLeaderStores() (evictStores []uint64) { + sc.mu.RLock() + defer sc.mu.RUnlock() + if sc.coordinator == nil { + return nil + } + handler, ok := sc.coordinator.GetSchedulersController().GetSchedulerHandlers()[schedulers.EvictLeaderName] + if !ok { + return + } + type evictLeaderHandler interface { + EvictStoreIDs() []uint64 + } + h, ok := handler.(evictLeaderHandler) + if !ok { + return + } + return h.EvictStoreIDs() +} + +// IsPrepared return true if the prepare checker is ready. +func (sc *schedulingController) IsPrepared() bool { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetPrepareChecker().IsPrepared() +} + +// SetPrepared set the prepare check to prepared. Only for test purpose. +func (sc *schedulingController) SetPrepared() { + sc.mu.RLock() + defer sc.mu.RUnlock() + sc.coordinator.GetPrepareChecker().SetPrepared() +} + +// IsSchedulingControllerRunning returns whether the scheduling controller is running. Only for test purpose. +func (sc *schedulingController) IsSchedulingControllerRunning() bool { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.running +} diff --git a/server/config/config.go b/server/config/config.go index 0485e077c67..da6b0e29e07 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -831,13 +831,14 @@ func NormalizeReplicationMode(m string) string { // DRAutoSyncReplicationConfig is the configuration for auto sync mode between 2 data centers. type DRAutoSyncReplicationConfig struct { - LabelKey string `toml:"label-key" json:"label-key"` - Primary string `toml:"primary" json:"primary"` - DR string `toml:"dr" json:"dr"` - PrimaryReplicas int `toml:"primary-replicas" json:"primary-replicas"` - DRReplicas int `toml:"dr-replicas" json:"dr-replicas"` - WaitStoreTimeout typeutil.Duration `toml:"wait-store-timeout" json:"wait-store-timeout"` - PauseRegionSplit bool `toml:"pause-region-split" json:"pause-region-split,string"` + LabelKey string `toml:"label-key" json:"label-key"` + Primary string `toml:"primary" json:"primary"` + DR string `toml:"dr" json:"dr"` + PrimaryReplicas int `toml:"primary-replicas" json:"primary-replicas"` + DRReplicas int `toml:"dr-replicas" json:"dr-replicas"` + WaitStoreTimeout typeutil.Duration `toml:"wait-store-timeout" json:"wait-store-timeout"` + WaitRecoverTimeout typeutil.Duration `toml:"wait-recover-timeout" json:"wait-recover-timeout"` + PauseRegionSplit bool `toml:"pause-region-split" json:"pause-region-split,string"` } func (c *DRAutoSyncReplicationConfig) adjust(meta *configutil.ConfigMetaData) { diff --git a/server/config/config_test.go b/server/config/config_test.go index 75e69c26d5c..07cdc966409 100644 --- a/server/config/config_test.go +++ b/server/config/config_test.go @@ -26,6 +26,7 @@ import ( "github.com/BurntSushi/toml" "github.com/spf13/pflag" "github.com/stretchr/testify/require" + "github.com/tikv/pd/pkg/ratelimit" sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/storage" "github.com/tikv/pd/pkg/utils/configutil" @@ -479,3 +480,28 @@ func newTestScheduleOption() (*PersistOptions, error) { opt := NewPersistOptions(cfg) return opt, nil } + +func TestRateLimitClone(t *testing.T) { + re := require.New(t) + cfg := &RateLimitConfig{ + EnableRateLimit: defaultEnableRateLimitMiddleware, + LimiterConfig: make(map[string]ratelimit.DimensionConfig), + } + clone := cfg.Clone() + clone.LimiterConfig["test"] = ratelimit.DimensionConfig{ + ConcurrencyLimit: 200, + } + dc := cfg.LimiterConfig["test"] + re.Equal(dc.ConcurrencyLimit, uint64(0)) + + gCfg := &GRPCRateLimitConfig{ + EnableRateLimit: defaultEnableGRPCRateLimitMiddleware, + LimiterConfig: make(map[string]ratelimit.DimensionConfig), + } + gClone := gCfg.Clone() + gClone.LimiterConfig["test"] = ratelimit.DimensionConfig{ + ConcurrencyLimit: 300, + } + gdc := gCfg.LimiterConfig["test"] + re.Equal(gdc.ConcurrencyLimit, uint64(0)) +} diff --git a/server/config/persist_options.go b/server/config/persist_options.go index c0a0ebf5c47..0fa1804b879 100644 --- a/server/config/persist_options.go +++ b/server/config/persist_options.go @@ -216,38 +216,21 @@ func (o *PersistOptions) SetMaxReplicas(replicas int) { o.SetReplicationConfig(v) } -const ( - maxSnapshotCountKey = "schedule.max-snapshot-count" - maxMergeRegionSizeKey = "schedule.max-merge-region-size" - maxPendingPeerCountKey = "schedule.max-pending-peer-count" - maxMergeRegionKeysKey = "schedule.max-merge-region-keys" - leaderScheduleLimitKey = "schedule.leader-schedule-limit" - regionScheduleLimitKey = "schedule.region-schedule-limit" - witnessScheduleLimitKey = "schedule.witness-schedule-limit" - replicaRescheduleLimitKey = "schedule.replica-schedule-limit" - mergeScheduleLimitKey = "schedule.merge-schedule-limit" - hotRegionScheduleLimitKey = "schedule.hot-region-schedule-limit" - schedulerMaxWaitingOperatorKey = "schedule.scheduler-max-waiting-operator" - enableLocationReplacement = "schedule.enable-location-replacement" - // it's related to schedule, but it's not an explicit config - enableTiKVSplitRegion = "schedule.enable-tikv-split-region" -) - var supportedTTLConfigs = []string{ - maxSnapshotCountKey, - maxMergeRegionSizeKey, - maxPendingPeerCountKey, - maxMergeRegionKeysKey, - leaderScheduleLimitKey, - regionScheduleLimitKey, - replicaRescheduleLimitKey, - mergeScheduleLimitKey, - hotRegionScheduleLimitKey, - schedulerMaxWaitingOperatorKey, - enableLocationReplacement, - enableTiKVSplitRegion, - "default-add-peer", - "default-remove-peer", + sc.MaxSnapshotCountKey, + sc.MaxMergeRegionSizeKey, + sc.MaxPendingPeerCountKey, + sc.MaxMergeRegionKeysKey, + sc.LeaderScheduleLimitKey, + sc.RegionScheduleLimitKey, + sc.ReplicaRescheduleLimitKey, + sc.MergeScheduleLimitKey, + sc.HotRegionScheduleLimitKey, + sc.SchedulerMaxWaitingOperatorKey, + sc.EnableLocationReplacement, + sc.EnableTiKVSplitRegion, + sc.DefaultAddPeer, + sc.DefaultRemovePeer, } // IsSupportedTTLConfig checks whether a key is a supported config item with ttl @@ -262,27 +245,27 @@ func IsSupportedTTLConfig(key string) bool { // GetMaxSnapshotCount returns the number of the max snapshot which is allowed to send. func (o *PersistOptions) GetMaxSnapshotCount() uint64 { - return o.getTTLUintOr(maxSnapshotCountKey, o.GetScheduleConfig().MaxSnapshotCount) + return o.getTTLUintOr(sc.MaxSnapshotCountKey, o.GetScheduleConfig().MaxSnapshotCount) } // GetMaxPendingPeerCount returns the number of the max pending peers. func (o *PersistOptions) GetMaxPendingPeerCount() uint64 { - return o.getTTLUintOr(maxPendingPeerCountKey, o.GetScheduleConfig().MaxPendingPeerCount) + return o.getTTLUintOr(sc.MaxPendingPeerCountKey, o.GetScheduleConfig().MaxPendingPeerCount) } // GetMaxMergeRegionSize returns the max region size. func (o *PersistOptions) GetMaxMergeRegionSize() uint64 { - return o.getTTLUintOr(maxMergeRegionSizeKey, o.GetScheduleConfig().MaxMergeRegionSize) + return o.getTTLUintOr(sc.MaxMergeRegionSizeKey, o.GetScheduleConfig().MaxMergeRegionSize) } // GetMaxMergeRegionKeys returns the max number of keys. // It returns size * 10000 if the key of max-merge-region-Keys doesn't exist. func (o *PersistOptions) GetMaxMergeRegionKeys() uint64 { - keys, exist, err := o.getTTLUint(maxMergeRegionKeysKey) + keys, exist, err := o.getTTLUint(sc.MaxMergeRegionKeysKey) if exist && err == nil { return keys } - size, exist, err := o.getTTLUint(maxMergeRegionSizeKey) + size, exist, err := o.getTTLUint(sc.MaxMergeRegionSizeKey) if exist && err == nil { return size * 10000 } @@ -424,32 +407,32 @@ func (o *PersistOptions) GetMaxStorePreparingTime() time.Duration { // GetLeaderScheduleLimit returns the limit for leader schedule. func (o *PersistOptions) GetLeaderScheduleLimit() uint64 { - return o.getTTLUintOr(leaderScheduleLimitKey, o.GetScheduleConfig().LeaderScheduleLimit) + return o.getTTLUintOr(sc.LeaderScheduleLimitKey, o.GetScheduleConfig().LeaderScheduleLimit) } // GetRegionScheduleLimit returns the limit for region schedule. func (o *PersistOptions) GetRegionScheduleLimit() uint64 { - return o.getTTLUintOr(regionScheduleLimitKey, o.GetScheduleConfig().RegionScheduleLimit) + return o.getTTLUintOr(sc.RegionScheduleLimitKey, o.GetScheduleConfig().RegionScheduleLimit) } // GetWitnessScheduleLimit returns the limit for region schedule. func (o *PersistOptions) GetWitnessScheduleLimit() uint64 { - return o.getTTLUintOr(witnessScheduleLimitKey, o.GetScheduleConfig().WitnessScheduleLimit) + return o.getTTLUintOr(sc.WitnessScheduleLimitKey, o.GetScheduleConfig().WitnessScheduleLimit) } // GetReplicaScheduleLimit returns the limit for replica schedule. func (o *PersistOptions) GetReplicaScheduleLimit() uint64 { - return o.getTTLUintOr(replicaRescheduleLimitKey, o.GetScheduleConfig().ReplicaScheduleLimit) + return o.getTTLUintOr(sc.ReplicaRescheduleLimitKey, o.GetScheduleConfig().ReplicaScheduleLimit) } // GetMergeScheduleLimit returns the limit for merge schedule. func (o *PersistOptions) GetMergeScheduleLimit() uint64 { - return o.getTTLUintOr(mergeScheduleLimitKey, o.GetScheduleConfig().MergeScheduleLimit) + return o.getTTLUintOr(sc.MergeScheduleLimitKey, o.GetScheduleConfig().MergeScheduleLimit) } // GetHotRegionScheduleLimit returns the limit for hot region schedule. func (o *PersistOptions) GetHotRegionScheduleLimit() uint64 { - return o.getTTLUintOr(hotRegionScheduleLimitKey, o.GetScheduleConfig().HotRegionScheduleLimit) + return o.getTTLUintOr(sc.HotRegionScheduleLimitKey, o.GetScheduleConfig().HotRegionScheduleLimit) } // GetStoreLimit returns the limit of a store. @@ -552,7 +535,7 @@ func (o *PersistOptions) GetRegionScoreFormulaVersion() string { // GetSchedulerMaxWaitingOperator returns the number of the max waiting operators. func (o *PersistOptions) GetSchedulerMaxWaitingOperator() uint64 { - return o.getTTLUintOr(schedulerMaxWaitingOperatorKey, o.GetScheduleConfig().SchedulerMaxWaitingOperator) + return o.getTTLUintOr(sc.SchedulerMaxWaitingOperatorKey, o.GetScheduleConfig().SchedulerMaxWaitingOperator) } // GetLeaderSchedulePolicy is to get leader schedule policy. @@ -622,12 +605,12 @@ func (o *PersistOptions) IsRemoveExtraReplicaEnabled() bool { // IsLocationReplacementEnabled returns if location replace is enabled. func (o *PersistOptions) IsLocationReplacementEnabled() bool { - return o.getTTLBoolOr(enableLocationReplacement, o.GetScheduleConfig().EnableLocationReplacement) + return o.getTTLBoolOr(sc.EnableLocationReplacement, o.GetScheduleConfig().EnableLocationReplacement) } -// IsTikvRegionSplitEnabled returns whether tikv split region is disabled. +// IsTikvRegionSplitEnabled returns whether tikv split region is enabled. func (o *PersistOptions) IsTikvRegionSplitEnabled() bool { - return o.getTTLBoolOr(enableTiKVSplitRegion, o.GetScheduleConfig().EnableTiKVSplitRegion) + return o.getTTLBoolOr(sc.EnableTiKVSplitRegion, o.GetScheduleConfig().EnableTiKVSplitRegion) } // GetMaxMovableHotPeerSize returns the max movable hot peer size. @@ -789,11 +772,10 @@ func (o *PersistOptions) Persist(storage endpoint.ConfigStorage) error { }, StoreConfig: *o.GetStoreConfig(), } - err := storage.SaveConfig(cfg) failpoint.Inject("persistFail", func() { - err = errors.New("fail to persist") + failpoint.Return(errors.New("fail to persist")) }) - return err + return storage.SaveConfig(cfg) } // Reload reloads the configuration from the storage. @@ -853,16 +835,22 @@ func (o *PersistOptions) GetMinResolvedTSPersistenceInterval() time.Duration { return o.GetPDServerConfig().MinResolvedTSPersistenceInterval.Duration } -const ttlConfigPrefix = "/config/ttl" - // SetTTLData set temporary configuration func (o *PersistOptions) SetTTLData(parCtx context.Context, client *clientv3.Client, key string, value string, ttl time.Duration) error { if o.ttl == nil { - o.ttl = cache.NewStringTTL(parCtx, time.Second*5, time.Minute*5) + o.ttl = cache.NewStringTTL(parCtx, sc.DefaultGCInterval, sc.DefaultTTL) } - _, err := etcdutil.EtcdKVPutWithTTL(parCtx, client, ttlConfigPrefix+"/"+key, value, int64(ttl.Seconds())) - if err != nil { - return err + if ttl != 0 { + // the minimum ttl is 5 seconds, if the given ttl is less than 5 seconds, we will use 5 seconds instead. + _, err := etcdutil.EtcdKVPutWithTTL(parCtx, client, sc.TTLConfigPrefix+"/"+key, value, int64(ttl.Seconds())) + if err != nil { + return err + } + } else { + _, err := client.Delete(parCtx, sc.TTLConfigPrefix+"/"+key) + if err != nil { + return err + } } o.ttl.PutWithTTL(key, value, ttl) return nil @@ -939,15 +927,15 @@ func (o *PersistOptions) GetTTLData(key string) (string, bool) { // LoadTTLFromEtcd loads temporary configuration which was persisted into etcd func (o *PersistOptions) LoadTTLFromEtcd(ctx context.Context, client *clientv3.Client) error { - resps, err := etcdutil.EtcdKVGet(client, ttlConfigPrefix, clientv3.WithPrefix()) + resps, err := etcdutil.EtcdKVGet(client, sc.TTLConfigPrefix, clientv3.WithPrefix()) if err != nil { return err } if o.ttl == nil { - o.ttl = cache.NewStringTTL(ctx, time.Second*5, time.Minute*5) + o.ttl = cache.NewStringTTL(ctx, sc.DefaultGCInterval, sc.DefaultTTL) } for _, resp := range resps.Kvs { - key := string(resp.Key)[len(ttlConfigPrefix)+1:] + key := string(resp.Key)[len(sc.TTLConfigPrefix)+1:] value := string(resp.Value) leaseID := resp.Lease resp, err := client.TimeToLive(ctx, clientv3.LeaseID(leaseID)) @@ -1036,6 +1024,7 @@ func (o *PersistOptions) IsRaftKV2() bool { } // SetRegionBucketEnabled sets if the region bucket is enabled. +// only used for test. func (o *PersistOptions) SetRegionBucketEnabled(enabled bool) { cfg := o.GetStoreConfig().Clone() cfg.SetRegionBucketEnabled(enabled) diff --git a/server/config/service_middleware_config.go b/server/config/service_middleware_config.go index ef0b04b2abd..b13e3398ac5 100644 --- a/server/config/service_middleware_config.go +++ b/server/config/service_middleware_config.go @@ -78,7 +78,12 @@ type RateLimitConfig struct { // Clone returns a cloned rate limit config. func (c *RateLimitConfig) Clone() *RateLimitConfig { + m := make(map[string]ratelimit.DimensionConfig, len(c.LimiterConfig)) + for k, v := range c.LimiterConfig { + m[k] = v + } cfg := *c + cfg.LimiterConfig = m return &cfg } @@ -92,6 +97,11 @@ type GRPCRateLimitConfig struct { // Clone returns a cloned rate limit config. func (c *GRPCRateLimitConfig) Clone() *GRPCRateLimitConfig { + m := make(map[string]ratelimit.DimensionConfig, len(c.LimiterConfig)) + for k, v := range c.LimiterConfig { + m[k] = v + } cfg := *c + cfg.LimiterConfig = m return &cfg } diff --git a/server/forward.go b/server/forward.go new file mode 100644 index 00000000000..e765d442539 --- /dev/null +++ b/server/forward.go @@ -0,0 +1,504 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package server + +import ( + "context" + "io" + "strings" + "time" + + "github.com/pingcap/errors" + "github.com/pingcap/failpoint" + "github.com/pingcap/kvproto/pkg/pdpb" + "github.com/pingcap/kvproto/pkg/schedulingpb" + "github.com/pingcap/kvproto/pkg/tsopb" + "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/mcs/utils" + "github.com/tikv/pd/pkg/tso" + "github.com/tikv/pd/pkg/utils/grpcutil" + "github.com/tikv/pd/pkg/utils/logutil" + "github.com/tikv/pd/pkg/utils/tsoutil" + "go.uber.org/zap" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +func (s *GrpcServer) forwardTSORequest( + ctx context.Context, + request *pdpb.TsoRequest, + forwardStream tsopb.TSO_TsoClient) (*tsopb.TsoResponse, error) { + tsopbReq := &tsopb.TsoRequest{ + Header: &tsopb.RequestHeader{ + ClusterId: request.GetHeader().GetClusterId(), + SenderId: request.GetHeader().GetSenderId(), + KeyspaceId: utils.DefaultKeyspaceID, + KeyspaceGroupId: utils.DefaultKeyspaceGroupID, + }, + Count: request.GetCount(), + DcLocation: request.GetDcLocation(), + } + + failpoint.Inject("tsoProxySendToTSOTimeout", func() { + // block until watchDeadline routine cancels the context. + <-ctx.Done() + }) + + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + + if err := forwardStream.Send(tsopbReq); err != nil { + return nil, err + } + + failpoint.Inject("tsoProxyRecvFromTSOTimeout", func() { + // block until watchDeadline routine cancels the context. + <-ctx.Done() + }) + + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + + return forwardStream.Recv() +} + +// forwardTSO forward the TSO requests to the TSO service. +func (s *GrpcServer) forwardTSO(stream pdpb.PD_TsoServer) error { + var ( + server = &tsoServer{stream: stream} + forwardStream tsopb.TSO_TsoClient + forwardCtx context.Context + cancelForward context.CancelFunc + lastForwardedHost string + ) + defer func() { + s.concurrentTSOProxyStreamings.Add(-1) + if cancelForward != nil { + cancelForward() + } + }() + + maxConcurrentTSOProxyStreamings := int32(s.GetMaxConcurrentTSOProxyStreamings()) + if maxConcurrentTSOProxyStreamings >= 0 { + if newCount := s.concurrentTSOProxyStreamings.Add(1); newCount > maxConcurrentTSOProxyStreamings { + return errors.WithStack(ErrMaxCountTSOProxyRoutinesExceeded) + } + } + + tsDeadlineCh := make(chan *tsoutil.TSDeadline, 1) + go tsoutil.WatchTSDeadline(stream.Context(), tsDeadlineCh) + + for { + select { + case <-s.ctx.Done(): + return errors.WithStack(s.ctx.Err()) + case <-stream.Context().Done(): + return stream.Context().Err() + default: + } + + request, err := server.Recv(s.GetTSOProxyRecvFromClientTimeout()) + if err == io.EOF { + return nil + } + if err != nil { + return errors.WithStack(err) + } + if request.GetCount() == 0 { + err = errs.ErrGenerateTimestamp.FastGenByArgs("tso count should be positive") + return status.Errorf(codes.Unknown, err.Error()) + } + + forwardedHost, ok := s.GetServicePrimaryAddr(stream.Context(), utils.TSOServiceName) + if !ok || len(forwardedHost) == 0 { + return errors.WithStack(ErrNotFoundTSOAddr) + } + if forwardStream == nil || lastForwardedHost != forwardedHost { + if cancelForward != nil { + cancelForward() + } + + clientConn, err := s.getDelegateClient(s.ctx, forwardedHost) + if err != nil { + return errors.WithStack(err) + } + forwardStream, forwardCtx, cancelForward, err = s.createTSOForwardStream(stream.Context(), clientConn) + if err != nil { + return errors.WithStack(err) + } + lastForwardedHost = forwardedHost + } + + tsopbResp, err := s.forwardTSORequestWithDeadLine(forwardCtx, cancelForward, forwardStream, request, tsDeadlineCh) + if err != nil { + return errors.WithStack(err) + } + + // The error types defined for tsopb and pdpb are different, so we need to convert them. + var pdpbErr *pdpb.Error + tsopbErr := tsopbResp.GetHeader().GetError() + if tsopbErr != nil { + if tsopbErr.Type == tsopb.ErrorType_OK { + pdpbErr = &pdpb.Error{ + Type: pdpb.ErrorType_OK, + Message: tsopbErr.GetMessage(), + } + } else { + // TODO: specify FORWARD FAILURE error type instead of UNKNOWN. + pdpbErr = &pdpb.Error{ + Type: pdpb.ErrorType_UNKNOWN, + Message: tsopbErr.GetMessage(), + } + } + } + + response := &pdpb.TsoResponse{ + Header: &pdpb.ResponseHeader{ + ClusterId: tsopbResp.GetHeader().GetClusterId(), + Error: pdpbErr, + }, + Count: tsopbResp.GetCount(), + Timestamp: tsopbResp.GetTimestamp(), + } + if err := server.Send(response); err != nil { + return errors.WithStack(err) + } + } +} + +func (s *GrpcServer) forwardTSORequestWithDeadLine( + forwardCtx context.Context, + cancelForward context.CancelFunc, + forwardStream tsopb.TSO_TsoClient, + request *pdpb.TsoRequest, + tsDeadlineCh chan<- *tsoutil.TSDeadline) (*tsopb.TsoResponse, error) { + done := make(chan struct{}) + dl := tsoutil.NewTSDeadline(tsoutil.DefaultTSOProxyTimeout, done, cancelForward) + select { + case tsDeadlineCh <- dl: + case <-forwardCtx.Done(): + return nil, forwardCtx.Err() + } + + start := time.Now() + resp, err := s.forwardTSORequest(forwardCtx, request, forwardStream) + close(done) + if err != nil { + if strings.Contains(err.Error(), errs.NotLeaderErr) { + s.tsoPrimaryWatcher.ForceLoad() + } + return nil, err + } + tsoProxyBatchSize.Observe(float64(request.GetCount())) + tsoProxyHandleDuration.Observe(time.Since(start).Seconds()) + return resp, nil +} + +func (s *GrpcServer) createTSOForwardStream(ctx context.Context, client *grpc.ClientConn) (tsopb.TSO_TsoClient, context.Context, context.CancelFunc, error) { + done := make(chan struct{}) + forwardCtx, cancelForward := context.WithCancel(ctx) + go grpcutil.CheckStream(forwardCtx, cancelForward, done) + forwardStream, err := tsopb.NewTSOClient(client).Tso(forwardCtx) + done <- struct{}{} + return forwardStream, forwardCtx, cancelForward, err +} + +func (s *GrpcServer) createRegionHeartbeatForwardStream(client *grpc.ClientConn) (pdpb.PD_RegionHeartbeatClient, context.CancelFunc, error) { + done := make(chan struct{}) + ctx, cancel := context.WithCancel(s.ctx) + go grpcutil.CheckStream(ctx, cancel, done) + forwardStream, err := pdpb.NewPDClient(client).RegionHeartbeat(ctx) + done <- struct{}{} + return forwardStream, cancel, err +} + +func (s *GrpcServer) createRegionHeartbeatSchedulingStream(ctx context.Context, client *grpc.ClientConn) (schedulingpb.Scheduling_RegionHeartbeatClient, context.Context, context.CancelFunc, error) { + done := make(chan struct{}) + forwardCtx, cancelForward := context.WithCancel(ctx) + go grpcutil.CheckStream(forwardCtx, cancelForward, done) + forwardStream, err := schedulingpb.NewSchedulingClient(client).RegionHeartbeat(forwardCtx) + done <- struct{}{} + return forwardStream, forwardCtx, cancelForward, err +} + +func forwardRegionHeartbeatToScheduling(forwardStream schedulingpb.Scheduling_RegionHeartbeatClient, server *heartbeatServer, errCh chan error) { + defer logutil.LogPanic() + defer close(errCh) + for { + resp, err := forwardStream.Recv() + if err == io.EOF { + errCh <- errors.WithStack(err) + return + } + if err != nil { + errCh <- errors.WithStack(err) + return + } + // The error types defined for schedulingpb and pdpb are different, so we need to convert them. + var pdpbErr *pdpb.Error + schedulingpbErr := resp.GetHeader().GetError() + if schedulingpbErr != nil { + if schedulingpbErr.Type == schedulingpb.ErrorType_OK { + pdpbErr = &pdpb.Error{ + Type: pdpb.ErrorType_OK, + Message: schedulingpbErr.GetMessage(), + } + } else { + // TODO: specify FORWARD FAILURE error type instead of UNKNOWN. + pdpbErr = &pdpb.Error{ + Type: pdpb.ErrorType_UNKNOWN, + Message: schedulingpbErr.GetMessage(), + } + } + } + response := &pdpb.RegionHeartbeatResponse{ + Header: &pdpb.ResponseHeader{ + ClusterId: resp.GetHeader().GetClusterId(), + Error: pdpbErr, + }, + ChangePeer: resp.GetChangePeer(), + TransferLeader: resp.GetTransferLeader(), + RegionId: resp.GetRegionId(), + RegionEpoch: resp.GetRegionEpoch(), + TargetPeer: resp.GetTargetPeer(), + Merge: resp.GetMerge(), + SplitRegion: resp.GetSplitRegion(), + ChangePeerV2: resp.GetChangePeerV2(), + SwitchWitnesses: resp.GetSwitchWitnesses(), + } + + if err := server.Send(response); err != nil { + errCh <- errors.WithStack(err) + return + } + } +} + +func forwardRegionHeartbeatClientToServer(forwardStream pdpb.PD_RegionHeartbeatClient, server *heartbeatServer, errCh chan error) { + defer logutil.LogPanic() + defer close(errCh) + for { + resp, err := forwardStream.Recv() + if err != nil { + errCh <- errors.WithStack(err) + return + } + if err := server.Send(resp); err != nil { + errCh <- errors.WithStack(err) + return + } + } +} + +func forwardReportBucketClientToServer(forwardStream pdpb.PD_ReportBucketsClient, server *bucketHeartbeatServer, errCh chan error) { + defer logutil.LogPanic() + defer close(errCh) + for { + resp, err := forwardStream.CloseAndRecv() + if err != nil { + errCh <- errors.WithStack(err) + return + } + if err := server.Send(resp); err != nil { + errCh <- errors.WithStack(err) + return + } + } +} + +func (s *GrpcServer) createReportBucketsForwardStream(client *grpc.ClientConn) (pdpb.PD_ReportBucketsClient, context.CancelFunc, error) { + done := make(chan struct{}) + ctx, cancel := context.WithCancel(s.ctx) + go grpcutil.CheckStream(ctx, cancel, done) + forwardStream, err := pdpb.NewPDClient(client).ReportBuckets(ctx) + done <- struct{}{} + return forwardStream, cancel, err +} + +func (s *GrpcServer) getDelegateClient(ctx context.Context, forwardedHost string) (*grpc.ClientConn, error) { + client, ok := s.clientConns.Load(forwardedHost) + if ok { + // Mostly, the connection is already established, and return it directly. + return client.(*grpc.ClientConn), nil + } + + tlsConfig, err := s.GetTLSConfig().ToTLSConfig() + if err != nil { + return nil, err + } + ctxTimeout, cancel := context.WithTimeout(ctx, defaultGRPCDialTimeout) + defer cancel() + newConn, err := grpcutil.GetClientConn(ctxTimeout, forwardedHost, tlsConfig) + if err != nil { + return nil, err + } + conn, loaded := s.clientConns.LoadOrStore(forwardedHost, newConn) + if !loaded { + // Successfully stored the connection we created. + return newConn, nil + } + // Loaded a connection created/stored by another goroutine, so close the one we created + // and return the one we loaded. + newConn.Close() + return conn.(*grpc.ClientConn), nil +} + +func (s *GrpcServer) getForwardedHost(ctx, streamCtx context.Context, serviceName ...string) (forwardedHost string, err error) { + if s.IsAPIServiceMode() { + var ok bool + if len(serviceName) == 0 { + return "", ErrNotFoundService + } + forwardedHost, ok = s.GetServicePrimaryAddr(ctx, serviceName[0]) + if !ok || len(forwardedHost) == 0 { + switch serviceName[0] { + case utils.TSOServiceName: + return "", ErrNotFoundTSOAddr + case utils.SchedulingServiceName: + return "", ErrNotFoundSchedulingAddr + } + } + } else if fh := grpcutil.GetForwardedHost(streamCtx); !s.isLocalRequest(fh) { + forwardedHost = fh + } + return forwardedHost, nil +} + +func (s *GrpcServer) isLocalRequest(forwardedHost string) bool { + failpoint.Inject("useForwardRequest", func() { + failpoint.Return(false) + }) + if forwardedHost == "" { + return true + } + memberAddrs := s.GetMember().Member().GetClientUrls() + for _, addr := range memberAddrs { + if addr == forwardedHost { + return true + } + } + return false +} + +func (s *GrpcServer) getGlobalTSO(ctx context.Context) (pdpb.Timestamp, error) { + if !s.IsAPIServiceMode() { + return s.tsoAllocatorManager.HandleRequest(ctx, tso.GlobalDCLocation, 1) + } + request := &tsopb.TsoRequest{ + Header: &tsopb.RequestHeader{ + ClusterId: s.clusterID, + KeyspaceId: utils.DefaultKeyspaceID, + KeyspaceGroupId: utils.DefaultKeyspaceGroupID, + }, + Count: 1, + } + var ( + forwardedHost string + forwardStream tsopb.TSO_TsoClient + ts *tsopb.TsoResponse + err error + ok bool + ) + handleStreamError := func(err error) (needRetry bool) { + if strings.Contains(err.Error(), errs.NotLeaderErr) { + s.tsoPrimaryWatcher.ForceLoad() + log.Warn("force to load tso primary address due to error", zap.Error(err), zap.String("tso-addr", forwardedHost)) + return true + } + if grpcutil.NeedRebuildConnection(err) { + s.tsoClientPool.Lock() + delete(s.tsoClientPool.clients, forwardedHost) + s.tsoClientPool.Unlock() + log.Warn("client connection removed due to error", zap.Error(err), zap.String("tso-addr", forwardedHost)) + return true + } + return false + } + for i := 0; i < maxRetryTimesRequestTSOServer; i++ { + if i > 0 { + time.Sleep(retryIntervalRequestTSOServer) + } + forwardedHost, ok = s.GetServicePrimaryAddr(ctx, utils.TSOServiceName) + if !ok || forwardedHost == "" { + return pdpb.Timestamp{}, ErrNotFoundTSOAddr + } + forwardStream, err = s.getTSOForwardStream(forwardedHost) + if err != nil { + return pdpb.Timestamp{}, err + } + err = forwardStream.Send(request) + if err != nil { + if needRetry := handleStreamError(err); needRetry { + continue + } + log.Error("send request to tso primary server failed", zap.Error(err), zap.String("tso-addr", forwardedHost)) + return pdpb.Timestamp{}, err + } + ts, err = forwardStream.Recv() + if err != nil { + if needRetry := handleStreamError(err); needRetry { + continue + } + log.Error("receive response from tso primary server failed", zap.Error(err), zap.String("tso-addr", forwardedHost)) + return pdpb.Timestamp{}, err + } + return *ts.GetTimestamp(), nil + } + log.Error("get global tso from tso primary server failed after retry", zap.Error(err), zap.String("tso-addr", forwardedHost)) + return pdpb.Timestamp{}, err +} + +func (s *GrpcServer) getTSOForwardStream(forwardedHost string) (tsopb.TSO_TsoClient, error) { + s.tsoClientPool.RLock() + forwardStream, ok := s.tsoClientPool.clients[forwardedHost] + s.tsoClientPool.RUnlock() + if ok { + // This is the common case to return here + return forwardStream, nil + } + + s.tsoClientPool.Lock() + defer s.tsoClientPool.Unlock() + + // Double check after entering the critical section + forwardStream, ok = s.tsoClientPool.clients[forwardedHost] + if ok { + return forwardStream, nil + } + + // Now let's create the client connection and the forward stream + client, err := s.getDelegateClient(s.ctx, forwardedHost) + if err != nil { + return nil, err + } + done := make(chan struct{}) + ctx, cancel := context.WithCancel(s.ctx) + go grpcutil.CheckStream(ctx, cancel, done) + forwardStream, err = tsopb.NewTSOClient(client).Tso(ctx) + done <- struct{}{} + if err != nil { + return nil, err + } + s.tsoClientPool.clients[forwardedHost] = forwardStream + return forwardStream, nil +} diff --git a/server/gc_service.go b/server/gc_service.go index d8a0158920d..90333654e5e 100644 --- a/server/gc_service.go +++ b/server/gc_service.go @@ -26,7 +26,6 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/storage/endpoint" - "github.com/tikv/pd/pkg/tso" "github.com/tikv/pd/pkg/utils/etcdutil" "github.com/tikv/pd/pkg/utils/tsoutil" "go.etcd.io/etcd/clientv3" @@ -107,15 +106,7 @@ func (s *GrpcServer) UpdateServiceSafePointV2(ctx context.Context, request *pdpb return rsp.(*pdpb.UpdateServiceSafePointV2Response), err } - var ( - nowTSO pdpb.Timestamp - err error - ) - if s.IsAPIServiceMode() { - nowTSO, err = s.getGlobalTSOFromTSOServer(ctx) - } else { - nowTSO, err = s.tsoAllocatorManager.HandleRequest(ctx, tso.GlobalDCLocation, 1) - } + nowTSO, err := s.getGlobalTSO(ctx) if err != nil { return nil, err } diff --git a/server/grpc_service.go b/server/grpc_service.go index 14b3f72979d..24280f46437 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -70,12 +70,24 @@ var ( ErrSendHeartbeatTimeout = status.Errorf(codes.DeadlineExceeded, "send heartbeat timeout") ErrNotFoundTSOAddr = status.Errorf(codes.NotFound, "not found tso address") ErrNotFoundSchedulingAddr = status.Errorf(codes.NotFound, "not found scheduling address") + ErrNotFoundService = status.Errorf(codes.NotFound, "not found service") ErrForwardTSOTimeout = status.Errorf(codes.DeadlineExceeded, "forward tso request timeout") ErrMaxCountTSOProxyRoutinesExceeded = status.Errorf(codes.ResourceExhausted, "max count of concurrent tso proxy routines exceeded") ErrTSOProxyRecvFromClientTimeout = status.Errorf(codes.DeadlineExceeded, "tso proxy timeout when receiving from client; stream closed by server") ErrEtcdNotStarted = status.Errorf(codes.Unavailable, "server is started, but etcd not started") ) +var ( + errRegionHeartbeatSend = forwardFailCounter.WithLabelValues("region_heartbeat", "send") + errRegionHeartbeatClient = forwardFailCounter.WithLabelValues("region_heartbeat", "client") + errRegionHeartbeatStream = forwardFailCounter.WithLabelValues("region_heartbeat", "stream") + errRegionHeartbeatRecv = forwardFailCounter.WithLabelValues("region_heartbeat", "recv") + errScatterRegionSend = forwardFailCounter.WithLabelValues("scatter_region", "send") + errSplitRegionsSend = forwardFailCounter.WithLabelValues("split_regions", "send") + errStoreHeartbeatSend = forwardFailCounter.WithLabelValues("store_heartbeat", "send") + errGetOperatorSend = forwardFailCounter.WithLabelValues("get_operator", "send") +) + // GrpcServer wraps Server to provide grpc service. type GrpcServer struct { *Server @@ -83,9 +95,120 @@ type GrpcServer struct { concurrentTSOProxyStreamings atomic.Int32 } +// tsoServer wraps PD_TsoServer to ensure when any error +// occurs on Send() or Recv(), both endpoints will be closed. +type tsoServer struct { + stream pdpb.PD_TsoServer + closed int32 +} + +type pdpbTSORequest struct { + request *pdpb.TsoRequest + err error +} + +func (s *tsoServer) Send(m *pdpb.TsoResponse) error { + if atomic.LoadInt32(&s.closed) == 1 { + return io.EOF + } + done := make(chan error, 1) + go func() { + defer logutil.LogPanic() + failpoint.Inject("tsoProxyFailToSendToClient", func() { + done <- errors.New("injected error") + failpoint.Return() + }) + done <- s.stream.Send(m) + }() + timer := time.NewTimer(tsoutil.DefaultTSOProxyTimeout) + defer timer.Stop() + select { + case err := <-done: + if err != nil { + atomic.StoreInt32(&s.closed, 1) + } + return errors.WithStack(err) + case <-timer.C: + atomic.StoreInt32(&s.closed, 1) + return ErrForwardTSOTimeout + } +} + +func (s *tsoServer) Recv(timeout time.Duration) (*pdpb.TsoRequest, error) { + if atomic.LoadInt32(&s.closed) == 1 { + return nil, io.EOF + } + failpoint.Inject("tsoProxyRecvFromClientTimeout", func(val failpoint.Value) { + if customTimeoutInSeconds, ok := val.(int); ok { + timeout = time.Duration(customTimeoutInSeconds) * time.Second + } + }) + requestCh := make(chan *pdpbTSORequest, 1) + go func() { + defer logutil.LogPanic() + request, err := s.stream.Recv() + requestCh <- &pdpbTSORequest{request: request, err: err} + }() + timer := time.NewTimer(timeout) + defer timer.Stop() + select { + case req := <-requestCh: + if req.err != nil { + atomic.StoreInt32(&s.closed, 1) + return nil, errors.WithStack(req.err) + } + return req.request, nil + case <-timer.C: + atomic.StoreInt32(&s.closed, 1) + return nil, ErrTSOProxyRecvFromClientTimeout + } +} + +// heartbeatServer wraps PD_RegionHeartbeatServer to ensure when any error +// occurs on Send() or Recv(), both endpoints will be closed. +type heartbeatServer struct { + stream pdpb.PD_RegionHeartbeatServer + closed int32 +} + +func (s *heartbeatServer) Send(m core.RegionHeartbeatResponse) error { + if atomic.LoadInt32(&s.closed) == 1 { + return io.EOF + } + done := make(chan error, 1) + go func() { + defer logutil.LogPanic() + done <- s.stream.Send(m.(*pdpb.RegionHeartbeatResponse)) + }() + timer := time.NewTimer(heartbeatSendTimeout) + defer timer.Stop() + select { + case err := <-done: + if err != nil { + atomic.StoreInt32(&s.closed, 1) + } + return errors.WithStack(err) + case <-timer.C: + atomic.StoreInt32(&s.closed, 1) + return ErrSendHeartbeatTimeout + } +} + +func (s *heartbeatServer) Recv() (*pdpb.RegionHeartbeatRequest, error) { + if atomic.LoadInt32(&s.closed) == 1 { + return nil, io.EOF + } + req, err := s.stream.Recv() + if err != nil { + atomic.StoreInt32(&s.closed, 1) + return nil, errors.WithStack(err) + } + return req, nil +} + type schedulingClient struct { - client schedulingpb.SchedulingClient - lastPrimary string + client schedulingpb.SchedulingClient + primary string } func (s *schedulingClient) getClient() schedulingpb.SchedulingClient { @@ -99,7 +222,7 @@ func (s *schedulingClient) getPrimaryAddr() string { if s == nil { return "" } - return s.lastPrimary + return s.primary } type request interface { @@ -159,10 +282,13 @@ func (s *GrpcServer) GetClusterInfo(ctx context.Context, _ *pdpb.GetClusterInfoR func (s *GrpcServer) GetMinTS( ctx context.Context, request *pdpb.GetMinTSRequest, ) (*pdpb.GetMinTSResponse, error) { - if err := s.validateRequest(request.GetHeader()); err != nil { - return &pdpb.GetMinTSResponse{ - Header: s.wrapErrorToHeader(pdpb.ErrorType_UNKNOWN, err.Error()), - }, nil + fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { + return pdpb.NewPDClient(client).GetMinTS(ctx, request) + } + if rsp, err := s.unaryMiddleware(ctx, request, fn); err != nil { + return nil, err + } else if rsp != nil { + return rsp.(*pdpb.GetMinTSResponse), nil } var ( @@ -202,11 +328,11 @@ func (s *GrpcServer) GetMinTSFromTSOService(dcLocation string) (*pdpb.Timestamp, // Get the minimal timestamp from the TSO servers/pods var mutex syncutil.Mutex - resps := make([]*tsopb.GetMinTSResponse, len(addrs)) + resps := make([]*tsopb.GetMinTSResponse, 0) wg := sync.WaitGroup{} wg.Add(len(addrs)) - for idx, addr := range addrs { - go func(idx int, addr string) { + for _, addr := range addrs { + go func(addr string) { defer wg.Done() resp, err := s.getMinTSFromSingleServer(s.ctx, dcLocation, addr) if err != nil || resp == nil { @@ -216,8 +342,8 @@ func (s *GrpcServer) GetMinTSFromTSOService(dcLocation string) (*pdpb.Timestamp, } mutex.Lock() defer mutex.Unlock() - resps[idx] = resp - }(idx, addr) + resps = append(resps, resp) + }(addr) } wg.Wait() @@ -393,7 +519,7 @@ func (s *GrpcServer) Tso(stream pdpb.PD_TsoServer) error { return errors.WithStack(err) } - if forwardedHost, err := s.getForwardedHost(ctx, stream.Context()); err != nil { + if forwardedHost, err := s.getForwardedHost(ctx, stream.Context(), utils.TSOServiceName); err != nil { return err } else if len(forwardedHost) > 0 { clientConn, err := s.getDelegateClient(s.ctx, forwardedHost) @@ -440,268 +566,6 @@ func (s *GrpcServer) Tso(stream pdpb.PD_TsoServer) error { } } -// forwardTSO forward the TSO requests to the TSO service. -func (s *GrpcServer) forwardTSO(stream pdpb.PD_TsoServer) error { - var ( - server = &tsoServer{stream: stream} - forwardStream tsopb.TSO_TsoClient - forwardCtx context.Context - cancelForward context.CancelFunc - lastForwardedHost string - ) - defer func() { - s.concurrentTSOProxyStreamings.Add(-1) - if cancelForward != nil { - cancelForward() - } - }() - - maxConcurrentTSOProxyStreamings := int32(s.GetMaxConcurrentTSOProxyStreamings()) - if maxConcurrentTSOProxyStreamings >= 0 { - if newCount := s.concurrentTSOProxyStreamings.Add(1); newCount > maxConcurrentTSOProxyStreamings { - return errors.WithStack(ErrMaxCountTSOProxyRoutinesExceeded) - } - } - - tsDeadlineCh := make(chan *tsoutil.TSDeadline, 1) - go tsoutil.WatchTSDeadline(stream.Context(), tsDeadlineCh) - - for { - select { - case <-s.ctx.Done(): - return errors.WithStack(s.ctx.Err()) - case <-stream.Context().Done(): - return stream.Context().Err() - default: - } - - request, err := server.Recv(s.GetTSOProxyRecvFromClientTimeout()) - if err == io.EOF { - return nil - } - if err != nil { - return errors.WithStack(err) - } - if request.GetCount() == 0 { - err = errs.ErrGenerateTimestamp.FastGenByArgs("tso count should be positive") - return status.Errorf(codes.Unknown, err.Error()) - } - - forwardedHost, ok := s.GetServicePrimaryAddr(stream.Context(), utils.TSOServiceName) - if !ok || len(forwardedHost) == 0 { - return errors.WithStack(ErrNotFoundTSOAddr) - } - if forwardStream == nil || lastForwardedHost != forwardedHost { - if cancelForward != nil { - cancelForward() - } - - clientConn, err := s.getDelegateClient(s.ctx, forwardedHost) - if err != nil { - return errors.WithStack(err) - } - forwardStream, forwardCtx, cancelForward, err = - s.createTSOForwardStream(stream.Context(), clientConn) - if err != nil { - return errors.WithStack(err) - } - lastForwardedHost = forwardedHost - } - - tsopbResp, err := s.forwardTSORequestWithDeadLine( - forwardCtx, cancelForward, forwardStream, request, tsDeadlineCh) - if err != nil { - return errors.WithStack(err) - } - - // The error types defined for tsopb and pdpb are different, so we need to convert them. - var pdpbErr *pdpb.Error - tsopbErr := tsopbResp.GetHeader().GetError() - if tsopbErr != nil { - if tsopbErr.Type == tsopb.ErrorType_OK { - pdpbErr = &pdpb.Error{ - Type: pdpb.ErrorType_OK, - Message: tsopbErr.GetMessage(), - } - } else { - // TODO: specify FORWARD FAILURE error type instead of UNKNOWN. - pdpbErr = &pdpb.Error{ - Type: pdpb.ErrorType_UNKNOWN, - Message: tsopbErr.GetMessage(), - } - } - } - - response := &pdpb.TsoResponse{ - Header: &pdpb.ResponseHeader{ - ClusterId: tsopbResp.GetHeader().GetClusterId(), - Error: pdpbErr, - }, - Count: tsopbResp.GetCount(), - Timestamp: tsopbResp.GetTimestamp(), - } - if err := server.Send(response); err != nil { - return errors.WithStack(err) - } - } -} - -func (s *GrpcServer) forwardTSORequestWithDeadLine( - forwardCtx context.Context, - cancelForward context.CancelFunc, - forwardStream tsopb.TSO_TsoClient, - request *pdpb.TsoRequest, - tsDeadlineCh chan<- *tsoutil.TSDeadline, -) (*tsopb.TsoResponse, error) { - done := make(chan struct{}) - dl := tsoutil.NewTSDeadline(tsoutil.DefaultTSOProxyTimeout, done, cancelForward) - select { - case tsDeadlineCh <- dl: - case <-forwardCtx.Done(): - return nil, forwardCtx.Err() - } - - start := time.Now() - resp, err := s.forwardTSORequest(forwardCtx, request, forwardStream) - close(done) - if err != nil { - if strings.Contains(err.Error(), errs.NotLeaderErr) { - s.tsoPrimaryWatcher.ForceLoad() - } - return nil, err - } - tsoProxyBatchSize.Observe(float64(request.GetCount())) - tsoProxyHandleDuration.Observe(time.Since(start).Seconds()) - return resp, nil -} - -func (s *GrpcServer) forwardTSORequest( - ctx context.Context, - request *pdpb.TsoRequest, - forwardStream tsopb.TSO_TsoClient, -) (*tsopb.TsoResponse, error) { - tsopbReq := &tsopb.TsoRequest{ - Header: &tsopb.RequestHeader{ - ClusterId: request.GetHeader().GetClusterId(), - SenderId: request.GetHeader().GetSenderId(), - KeyspaceId: utils.DefaultKeyspaceID, - KeyspaceGroupId: utils.DefaultKeyspaceGroupID, - }, - Count: request.GetCount(), - DcLocation: request.GetDcLocation(), - } - - failpoint.Inject("tsoProxySendToTSOTimeout", func() { - // block until watchDeadline routine cancels the context. - <-ctx.Done() - }) - - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: - } - - if err := forwardStream.Send(tsopbReq); err != nil { - return nil, err - } - - failpoint.Inject("tsoProxyRecvFromTSOTimeout", func() { - // block until watchDeadline routine cancels the context. - <-ctx.Done() - }) - - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: - } - - return forwardStream.Recv() -} - -// tsoServer wraps PD_TsoServer to ensure when any error -// occurs on Send() or Recv(), both endpoints will be closed. -type tsoServer struct { - stream pdpb.PD_TsoServer - closed int32 -} - -type pdpbTSORequest struct { - request *pdpb.TsoRequest - err error -} - -func (s *tsoServer) Send(m *pdpb.TsoResponse) error { - if atomic.LoadInt32(&s.closed) == 1 { - return io.EOF - } - done := make(chan error, 1) - go func() { - defer logutil.LogPanic() - failpoint.Inject("tsoProxyFailToSendToClient", func() { - done <- errors.New("injected error") - failpoint.Return() - }) - done <- s.stream.Send(m) - }() - timer := time.NewTimer(tsoutil.DefaultTSOProxyTimeout) - defer timer.Stop() - select { - case err := <-done: - if err != nil { - atomic.StoreInt32(&s.closed, 1) - } - return errors.WithStack(err) - case <-timer.C: - atomic.StoreInt32(&s.closed, 1) - return ErrForwardTSOTimeout - } -} - -func (s *tsoServer) Recv(timeout time.Duration) (*pdpb.TsoRequest, error) { - if atomic.LoadInt32(&s.closed) == 1 { - return nil, io.EOF - } - failpoint.Inject("tsoProxyRecvFromClientTimeout", func(val failpoint.Value) { - if customTimeoutInSeconds, ok := val.(int); ok { - timeout = time.Duration(customTimeoutInSeconds) * time.Second - } - }) - requestCh := make(chan *pdpbTSORequest, 1) - go func() { - defer logutil.LogPanic() - request, err := s.stream.Recv() - requestCh <- &pdpbTSORequest{request: request, err: err} - }() - timer := time.NewTimer(timeout) - defer timer.Stop() - select { - case req := <-requestCh: - if req.err != nil { - atomic.StoreInt32(&s.closed, 1) - return nil, errors.WithStack(req.err) - } - return req.request, nil - case <-timer.C: - atomic.StoreInt32(&s.closed, 1) - return nil, ErrTSOProxyRecvFromClientTimeout - } -} - -func (s *GrpcServer) getForwardedHost(ctx, streamCtx context.Context) (forwardedHost string, err error) { - if s.IsAPIServiceMode() { - var ok bool - forwardedHost, ok = s.GetServicePrimaryAddr(ctx, utils.TSOServiceName) - if !ok || len(forwardedHost) == 0 { - return "", ErrNotFoundTSOAddr - } - } else if fh := grpcutil.GetForwardedHost(streamCtx); !s.isLocalRequest(fh) { - forwardedHost = fh - } - return forwardedHost, nil -} - // Bootstrap implements gRPC PDServer. func (s *GrpcServer) Bootstrap(ctx context.Context, request *pdpb.BootstrapRequest) (*pdpb.BootstrapResponse, error) { fn := func(ctx context.Context, client *grpc.ClientConn) (interface{}, error) { @@ -1002,9 +866,10 @@ func (s *GrpcServer) StoreHeartbeat(ctx context.Context, request *pdpb.StoreHear s.handleDamagedStore(request.GetStats()) storeHeartbeatHandleDuration.WithLabelValues(storeAddress, storeLabel).Observe(time.Since(start).Seconds()) - if s.IsAPIServiceMode() { + if s.IsServiceIndependent(utils.SchedulingServiceName) { forwardCli, _ := s.updateSchedulingClient(ctx) - if forwardCli != nil { + cli := forwardCli.getClient() + if cli != nil { req := &schedulingpb.StoreHeartbeatRequest{ Header: &schedulingpb.RequestHeader{ ClusterId: request.GetHeader().GetClusterId(), @@ -1012,9 +877,11 @@ func (s *GrpcServer) StoreHeartbeat(ctx context.Context, request *pdpb.StoreHear }, Stats: request.GetStats(), } - if _, err := forwardCli.StoreHeartbeat(ctx, req); err != nil { + if _, err := cli.StoreHeartbeat(ctx, req); err != nil { + errStoreHeartbeatSend.Inc() + log.Debug("forward store heartbeat failed", zap.Error(err)) // reset to let it be updated in the next request - s.schedulingClient.Store(&schedulingClient{}) + s.schedulingClient.CompareAndSwap(forwardCli, &schedulingClient{}) } } } @@ -1031,22 +898,38 @@ func (s *GrpcServer) StoreHeartbeat(ctx context.Context, request *pdpb.StoreHear return resp, nil } -func (s *GrpcServer) updateSchedulingClient(ctx context.Context) (schedulingpb.SchedulingClient, error) { +// 1. forwardedHost is empty, return nil +// 2. forwardedHost is not empty and forwardedHost is equal to pre, return pre +// 3. the rest of cases, update forwardedHost and return new client +func (s *GrpcServer) updateSchedulingClient(ctx context.Context) (*schedulingClient, error) { forwardedHost, _ := s.GetServicePrimaryAddr(ctx, utils.SchedulingServiceName) + if forwardedHost == "" { + return nil, ErrNotFoundSchedulingAddr + } + pre := s.schedulingClient.Load() - if forwardedHost != "" && ((pre == nil) || (pre != nil && forwardedHost != pre.(*schedulingClient).getPrimaryAddr())) { - client, err := s.getDelegateClient(ctx, forwardedHost) - if err != nil { - log.Error("get delegate client failed", zap.Error(err)) - } - forwardCli := &schedulingClient{ - client: schedulingpb.NewSchedulingClient(client), - lastPrimary: forwardedHost, + if pre != nil && forwardedHost == pre.(*schedulingClient).getPrimaryAddr() { + return pre.(*schedulingClient), nil + } + + client, err := s.getDelegateClient(ctx, forwardedHost) + if err != nil { + log.Error("get delegate client failed", zap.Error(err)) + return nil, err + } + forwardCli := &schedulingClient{ + client: schedulingpb.NewSchedulingClient(client), + primary: forwardedHost, + } + swapped := s.schedulingClient.CompareAndSwap(pre, forwardCli) + if swapped { + oldForwardedHost := "" + if pre != nil { + oldForwardedHost = pre.(*schedulingClient).getPrimaryAddr() } - s.schedulingClient.Store(forwardCli) - return forwardCli.getClient(), nil + log.Info("update scheduling client", zap.String("old-forwarded-host", oldForwardedHost), zap.String("new-forwarded-host", forwardedHost)) } - return nil, ErrNotFoundSchedulingAddr + return forwardCli, nil } // bucketHeartbeatServer wraps PD_ReportBucketsServer to ensure when any error @@ -1091,48 +974,6 @@ func (b *bucketHeartbeatServer) Recv() (*pdpb.ReportBucketsRequest, error) { return req, nil } -// heartbeatServer wraps PD_RegionHeartbeatServer to ensure when any error -// occurs on Send() or Recv(), both endpoints will be closed. -type heartbeatServer struct { - stream pdpb.PD_RegionHeartbeatServer - closed int32 -} - -func (s *heartbeatServer) Send(m core.RegionHeartbeatResponse) error { - if atomic.LoadInt32(&s.closed) == 1 { - return io.EOF - } - done := make(chan error, 1) - go func() { - defer logutil.LogPanic() - done <- s.stream.Send(m.(*pdpb.RegionHeartbeatResponse)) - }() - timer := time.NewTimer(heartbeatSendTimeout) - defer timer.Stop() - select { - case err := <-done: - if err != nil { - atomic.StoreInt32(&s.closed, 1) - } - return errors.WithStack(err) - case <-timer.C: - atomic.StoreInt32(&s.closed, 1) - return ErrSendHeartbeatTimeout - } -} - -func (s *heartbeatServer) Recv() (*pdpb.RegionHeartbeatRequest, error) { - if atomic.LoadInt32(&s.closed) == 1 { - return nil, io.EOF - } - req, err := s.stream.Recv() - if err != nil { - atomic.StoreInt32(&s.closed, 1) - return nil, errors.WithStack(err) - } - return req, nil -} - // ReportBuckets implements gRPC PDServer func (s *GrpcServer) ReportBuckets(stream pdpb.PD_ReportBucketsServer) error { var ( @@ -1230,16 +1071,16 @@ func (s *GrpcServer) ReportBuckets(stream pdpb.PD_ReportBucketsServer) error { // RegionHeartbeat implements gRPC PDServer. func (s *GrpcServer) RegionHeartbeat(stream pdpb.PD_RegionHeartbeatServer) error { var ( - server = &heartbeatServer{stream: stream} - flowRoundOption = core.WithFlowRoundByDigit(s.persistOptions.GetPDServerConfig().FlowRoundByDigit) - forwardStream pdpb.PD_RegionHeartbeatClient - cancel context.CancelFunc - lastForwardedHost string - lastBind time.Time - errCh chan error - schedulingStream schedulingpb.Scheduling_RegionHeartbeatClient - cancel1 context.CancelFunc - lastPrimaryAddr string + server = &heartbeatServer{stream: stream} + flowRoundOption = core.WithFlowRoundByDigit(s.persistOptions.GetPDServerConfig().FlowRoundByDigit) + cancel context.CancelFunc + lastBind time.Time + errCh chan error + forwardStream pdpb.PD_RegionHeartbeatClient + lastForwardedHost string + forwardErrCh chan error + forwardSchedulingStream schedulingpb.Scheduling_RegionHeartbeatClient + lastForwardedSchedulingHost string ) defer func() { // cancel the forward stream @@ -1256,8 +1097,10 @@ func (s *GrpcServer) RegionHeartbeat(stream pdpb.PD_RegionHeartbeatServer) error if err != nil { return errors.WithStack(err) } - forwardedHost := grpcutil.GetForwardedHost(stream.Context()) + failpoint.Inject("grpcClientClosed", func() { + forwardedHost = s.GetMember().Member().GetClientUrls()[0] + }) if !s.isLocalRequest(forwardedHost) { if forwardStream == nil || lastForwardedHost != forwardedHost { if cancel != nil { @@ -1268,7 +1111,7 @@ func (s *GrpcServer) RegionHeartbeat(stream pdpb.PD_RegionHeartbeatServer) error return err } log.Info("create region heartbeat forward stream", zap.String("forwarded-host", forwardedHost)) - forwardStream, cancel, err = s.createHeartbeatForwardStream(client) + forwardStream, cancel, err = s.createRegionHeartbeatForwardStream(client) if err != nil { return err } @@ -1354,56 +1197,87 @@ func (s *GrpcServer) RegionHeartbeat(stream pdpb.PD_RegionHeartbeatServer) error continue } - if s.IsAPIServiceMode() { - ctx := stream.Context() - primaryAddr, _ := s.GetServicePrimaryAddr(ctx, utils.SchedulingServiceName) - if schedulingStream == nil || lastPrimaryAddr != primaryAddr { - if cancel1 != nil { - cancel1() + regionHeartbeatHandleDuration.WithLabelValues(storeAddress, storeLabel).Observe(time.Since(start).Seconds()) + regionHeartbeatCounter.WithLabelValues(storeAddress, storeLabel, "report", "ok").Inc() + + if s.IsServiceIndependent(utils.SchedulingServiceName) { + if forwardErrCh != nil { + select { + case err, ok := <-forwardErrCh: + if ok { + if cancel != nil { + cancel() + } + forwardSchedulingStream = nil + log.Error("meet error and need to re-establish the stream", zap.Error(err)) + } + default: + } + } + forwardedSchedulingHost, ok := s.GetServicePrimaryAddr(stream.Context(), utils.SchedulingServiceName) + if !ok || len(forwardedSchedulingHost) == 0 { + log.Debug("failed to find scheduling service primary address") + if cancel != nil { + cancel() + } + continue + } + if forwardSchedulingStream == nil || lastForwardedSchedulingHost != forwardedSchedulingHost { + if cancel != nil { + cancel() } - client, err := s.getDelegateClient(ctx, primaryAddr) + client, err := s.getDelegateClient(s.ctx, forwardedSchedulingHost) if err != nil { - log.Error("get delegate client failed", zap.Error(err)) + errRegionHeartbeatClient.Inc() + log.Error("failed to get client", zap.Error(err)) + continue } - - log.Info("create region heartbeat forward stream", zap.String("forwarded-host", primaryAddr)) - schedulingStream, cancel1, err = s.createSchedulingStream(client) + log.Info("create scheduling forwarding stream", zap.String("forwarded-host", forwardedSchedulingHost)) + forwardSchedulingStream, _, cancel, err = s.createRegionHeartbeatSchedulingStream(stream.Context(), client) if err != nil { - log.Error("create region heartbeat forward stream failed", zap.Error(err)) - } else { - lastPrimaryAddr = primaryAddr - errCh = make(chan error, 1) - go forwardSchedulingToServer(schedulingStream, server, errCh) + errRegionHeartbeatStream.Inc() + log.Error("failed to create stream", zap.Error(err)) + continue } + lastForwardedSchedulingHost = forwardedSchedulingHost + forwardErrCh = make(chan error, 1) + go forwardRegionHeartbeatToScheduling(forwardSchedulingStream, server, forwardErrCh) } - if schedulingStream != nil { - req := &schedulingpb.RegionHeartbeatRequest{ - Header: &schedulingpb.RequestHeader{ - ClusterId: request.GetHeader().GetClusterId(), - SenderId: request.GetHeader().GetSenderId(), - }, - Region: request.GetRegion(), - Leader: request.GetLeader(), - DownPeers: request.GetDownPeers(), - PendingPeers: request.GetPendingPeers(), - BytesWritten: request.GetBytesWritten(), - BytesRead: request.GetBytesRead(), - KeysWritten: request.GetKeysWritten(), - KeysRead: request.GetKeysRead(), - ApproximateSize: request.GetApproximateSize(), - ApproximateKeys: request.GetApproximateKeys(), - Interval: request.GetInterval(), - Term: request.GetTerm(), - QueryStats: request.GetQueryStats(), - } - if err := schedulingStream.Send(req); err != nil { - log.Error("forward region heartbeat failed", zap.Error(err)) + schedulingpbReq := &schedulingpb.RegionHeartbeatRequest{ + Header: &schedulingpb.RequestHeader{ + ClusterId: request.GetHeader().GetClusterId(), + SenderId: request.GetHeader().GetSenderId(), + }, + Region: request.GetRegion(), + Leader: request.GetLeader(), + DownPeers: request.GetDownPeers(), + PendingPeers: request.GetPendingPeers(), + BytesWritten: request.GetBytesWritten(), + BytesRead: request.GetBytesRead(), + KeysWritten: request.GetKeysWritten(), + KeysRead: request.GetKeysRead(), + ApproximateSize: request.GetApproximateSize(), + ApproximateKeys: request.GetApproximateKeys(), + Interval: request.GetInterval(), + Term: request.GetTerm(), + QueryStats: request.GetQueryStats(), + } + if err := forwardSchedulingStream.Send(schedulingpbReq); err != nil { + forwardSchedulingStream = nil + errRegionHeartbeatSend.Inc() + log.Error("failed to send request to scheduling service", zap.Error(err)) + } + + select { + case err, ok := <-forwardErrCh: + if ok { + forwardSchedulingStream = nil + errRegionHeartbeatRecv.Inc() + log.Error("failed to send response", zap.Error(err)) } + default: } } - - regionHeartbeatHandleDuration.WithLabelValues(storeAddress, storeLabel).Observe(time.Since(start).Seconds()) - regionHeartbeatCounter.WithLabelValues(storeAddress, storeLabel, "report", "ok").Inc() } } @@ -1626,9 +1500,15 @@ func (s *GrpcServer) AskSplit(ctx context.Context, request *pdpb.AskSplitRequest // AskBatchSplit implements gRPC PDServer. func (s *GrpcServer) AskBatchSplit(ctx context.Context, request *pdpb.AskBatchSplitRequest) (*pdpb.AskBatchSplitResponse, error) { - if s.IsAPIServiceMode() { - s.updateSchedulingClient(ctx) - if s.schedulingClient.Load() != nil { + if s.IsServiceIndependent(utils.SchedulingServiceName) { + forwardCli, err := s.updateSchedulingClient(ctx) + if err != nil { + return &pdpb.AskBatchSplitResponse{ + Header: s.wrapErrorToHeader(pdpb.ErrorType_UNKNOWN, err.Error()), + }, nil + } + cli := forwardCli.getClient() + if cli != nil { req := &schedulingpb.AskBatchSplitRequest{ Header: &schedulingpb.RequestHeader{ ClusterId: request.GetHeader().GetClusterId(), @@ -1637,10 +1517,10 @@ func (s *GrpcServer) AskBatchSplit(ctx context.Context, request *pdpb.AskBatchSp Region: request.GetRegion(), SplitCount: request.GetSplitCount(), } - resp, err := s.schedulingClient.Load().(*schedulingClient).getClient().AskBatchSplit(ctx, req) + resp, err := cli.AskBatchSplit(ctx, req) if err != nil { // reset to let it be updated in the next request - s.schedulingClient.Store(&schedulingClient{}) + s.schedulingClient.CompareAndSwap(forwardCli, &schedulingClient{}) return s.convertAskSplitResponse(resp), err } return s.convertAskSplitResponse(resp), nil @@ -1794,15 +1674,23 @@ func (s *GrpcServer) PutClusterConfig(ctx context.Context, request *pdpb.PutClus // ScatterRegion implements gRPC PDServer. func (s *GrpcServer) ScatterRegion(ctx context.Context, request *pdpb.ScatterRegionRequest) (*pdpb.ScatterRegionResponse, error) { - if s.IsAPIServiceMode() { + if s.IsServiceIndependent(utils.SchedulingServiceName) { forwardCli, err := s.updateSchedulingClient(ctx) if err != nil { return &pdpb.ScatterRegionResponse{ Header: s.wrapErrorToHeader(pdpb.ErrorType_UNKNOWN, err.Error()), }, nil } - if forwardCli != nil { - regionsID := request.GetRegionsId() + cli := forwardCli.getClient() + if cli != nil { + var regionsID []uint64 + // nolint + if request.GetRegionId() != 0 { + // nolint + regionsID = []uint64{request.GetRegionId()} + } else { + regionsID = request.GetRegionsId() + } if len(regionsID) == 0 { return &pdpb.ScatterRegionResponse{ Header: s.invalidValue("regions id is required"), @@ -1818,10 +1706,11 @@ func (s *GrpcServer) ScatterRegion(ctx context.Context, request *pdpb.ScatterReg RetryLimit: request.GetRetryLimit(), SkipStoreLimit: request.GetSkipStoreLimit(), } - resp, err := forwardCli.ScatterRegions(ctx, req) + resp, err := cli.ScatterRegions(ctx, req) if err != nil { + errScatterRegionSend.Inc() // reset to let it be updated in the next request - s.schedulingClient.Store(&schedulingClient{}) + s.schedulingClient.CompareAndSwap(forwardCli, &schedulingClient{}) return s.convertScatterResponse(resp), err } return s.convertScatterResponse(resp), nil @@ -1984,15 +1873,7 @@ func (s *GrpcServer) UpdateServiceGCSafePoint(ctx context.Context, request *pdpb return nil, err } } - var ( - nowTSO pdpb.Timestamp - err error - ) - if s.IsAPIServiceMode() { - nowTSO, err = s.getGlobalTSOFromTSOServer(ctx) - } else { - nowTSO, err = s.tsoAllocatorManager.HandleRequest(ctx, tso.GlobalDCLocation, 1) - } + nowTSO, err := s.getGlobalTSO(ctx) if err != nil { return nil, err } @@ -2018,14 +1899,15 @@ func (s *GrpcServer) UpdateServiceGCSafePoint(ctx context.Context, request *pdpb // GetOperator gets information about the operator belonging to the specify region. func (s *GrpcServer) GetOperator(ctx context.Context, request *pdpb.GetOperatorRequest) (*pdpb.GetOperatorResponse, error) { - if s.IsAPIServiceMode() { + if s.IsServiceIndependent(utils.SchedulingServiceName) { forwardCli, err := s.updateSchedulingClient(ctx) if err != nil { return &pdpb.GetOperatorResponse{ Header: s.wrapErrorToHeader(pdpb.ErrorType_UNKNOWN, err.Error()), }, nil } - if forwardCli != nil { + cli := forwardCli.getClient() + if cli != nil { req := &schedulingpb.GetOperatorRequest{ Header: &schedulingpb.RequestHeader{ ClusterId: request.GetHeader().GetClusterId(), @@ -2033,10 +1915,11 @@ func (s *GrpcServer) GetOperator(ctx context.Context, request *pdpb.GetOperatorR }, RegionId: request.GetRegionId(), } - resp, err := forwardCli.GetOperator(ctx, req) + resp, err := cli.GetOperator(ctx, req) if err != nil { + errGetOperatorSend.Inc() // reset to let it be updated in the next request - s.schedulingClient.Store(&schedulingClient{}) + s.schedulingClient.CompareAndSwap(forwardCli, &schedulingClient{}) return s.convertOperatorResponse(resp), err } return s.convertOperatorResponse(resp), nil @@ -2134,6 +2017,15 @@ func (s *GrpcServer) invalidValue(msg string) *pdpb.ResponseHeader { func (s *GrpcServer) convertHeader(header *schedulingpb.ResponseHeader) *pdpb.ResponseHeader { switch header.GetError().GetType() { case schedulingpb.ErrorType_UNKNOWN: + if strings.Contains(header.GetError().GetMessage(), "region not found") { + return &pdpb.ResponseHeader{ + ClusterId: header.GetClusterId(), + Error: &pdpb.Error{ + Type: pdpb.ErrorType_REGION_NOT_FOUND, + Message: header.GetError().GetMessage(), + }, + } + } return &pdpb.ResponseHeader{ ClusterId: header.GetClusterId(), Error: &pdpb.Error{ @@ -2281,14 +2173,15 @@ func (s *GrpcServer) SyncMaxTS(_ context.Context, request *pdpb.SyncMaxTSRequest // SplitRegions split regions by the given split keys func (s *GrpcServer) SplitRegions(ctx context.Context, request *pdpb.SplitRegionsRequest) (*pdpb.SplitRegionsResponse, error) { - if s.IsAPIServiceMode() { + if s.IsServiceIndependent(utils.SchedulingServiceName) { forwardCli, err := s.updateSchedulingClient(ctx) if err != nil { return &pdpb.SplitRegionsResponse{ Header: s.wrapErrorToHeader(pdpb.ErrorType_UNKNOWN, err.Error()), }, nil } - if forwardCli != nil { + cli := forwardCli.getClient() + if cli != nil { req := &schedulingpb.SplitRegionsRequest{ Header: &schedulingpb.RequestHeader{ ClusterId: request.GetHeader().GetClusterId(), @@ -2297,10 +2190,11 @@ func (s *GrpcServer) SplitRegions(ctx context.Context, request *pdpb.SplitRegion SplitKeys: request.GetSplitKeys(), RetryLimit: request.GetRetryLimit(), } - resp, err := forwardCli.SplitRegions(ctx, req) + resp, err := cli.SplitRegions(ctx, req) if err != nil { + errSplitRegionsSend.Inc() // reset to let it be updated in the next request - s.schedulingClient.Store(&schedulingClient{}) + s.schedulingClient.CompareAndSwap(forwardCli, &schedulingClient{}) return s.convertSplitResponse(resp), err } return s.convertSplitResponse(resp), nil @@ -2432,234 +2326,6 @@ func (s *GrpcServer) validateInternalRequest(header *pdpb.RequestHeader, onlyAll return nil } -func (s *GrpcServer) getDelegateClient(ctx context.Context, forwardedHost string) (*grpc.ClientConn, error) { - client, ok := s.clientConns.Load(forwardedHost) - if ok { - // Mostly, the connection is already established, and return it directly. - return client.(*grpc.ClientConn), nil - } - - tlsConfig, err := s.GetTLSConfig().ToTLSConfig() - if err != nil { - return nil, err - } - ctxTimeout, cancel := context.WithTimeout(ctx, defaultGRPCDialTimeout) - defer cancel() - newConn, err := grpcutil.GetClientConn(ctxTimeout, forwardedHost, tlsConfig) - if err != nil { - return nil, err - } - conn, loaded := s.clientConns.LoadOrStore(forwardedHost, newConn) - if !loaded { - // Successfully stored the connection we created. - return newConn, nil - } - // Loaded a connection created/stored by another goroutine, so close the one we created - // and return the one we loaded. - newConn.Close() - return conn.(*grpc.ClientConn), nil -} - -func (s *GrpcServer) isLocalRequest(forwardedHost string) bool { - failpoint.Inject("useForwardRequest", func() { - failpoint.Return(false) - }) - if forwardedHost == "" { - return true - } - memberAddrs := s.GetMember().Member().GetClientUrls() - for _, addr := range memberAddrs { - if addr == forwardedHost { - return true - } - } - return false -} - -func (s *GrpcServer) createHeartbeatForwardStream(client *grpc.ClientConn) (pdpb.PD_RegionHeartbeatClient, context.CancelFunc, error) { - done := make(chan struct{}) - ctx, cancel := context.WithCancel(s.ctx) - go grpcutil.CheckStream(ctx, cancel, done) - forwardStream, err := pdpb.NewPDClient(client).RegionHeartbeat(ctx) - done <- struct{}{} - return forwardStream, cancel, err -} - -func forwardRegionHeartbeatClientToServer(forwardStream pdpb.PD_RegionHeartbeatClient, server *heartbeatServer, errCh chan error) { - defer logutil.LogPanic() - defer close(errCh) - for { - resp, err := forwardStream.Recv() - if err != nil { - errCh <- errors.WithStack(err) - return - } - if err := server.Send(resp); err != nil { - errCh <- errors.WithStack(err) - return - } - } -} - -func (s *GrpcServer) createSchedulingStream(client *grpc.ClientConn) (schedulingpb.Scheduling_RegionHeartbeatClient, context.CancelFunc, error) { - done := make(chan struct{}) - ctx, cancel := context.WithCancel(s.ctx) - go grpcutil.CheckStream(ctx, cancel, done) - forwardStream, err := schedulingpb.NewSchedulingClient(client).RegionHeartbeat(ctx) - done <- struct{}{} - return forwardStream, cancel, err -} - -func forwardSchedulingToServer(forwardStream schedulingpb.Scheduling_RegionHeartbeatClient, server *heartbeatServer, errCh chan error) { - defer logutil.LogPanic() - defer close(errCh) - for { - resp, err := forwardStream.Recv() - if err != nil { - errCh <- errors.WithStack(err) - return - } - response := &pdpb.RegionHeartbeatResponse{ - Header: &pdpb.ResponseHeader{ - ClusterId: resp.GetHeader().GetClusterId(), - // ignore error here - }, - ChangePeer: resp.GetChangePeer(), - TransferLeader: resp.GetTransferLeader(), - RegionId: resp.GetRegionId(), - RegionEpoch: resp.GetRegionEpoch(), - TargetPeer: resp.GetTargetPeer(), - Merge: resp.GetMerge(), - SplitRegion: resp.GetSplitRegion(), - ChangePeerV2: resp.GetChangePeerV2(), - SwitchWitnesses: resp.GetSwitchWitnesses(), - } - - if err := server.Send(response); err != nil { - errCh <- errors.WithStack(err) - return - } - } -} - -func (s *GrpcServer) createTSOForwardStream( - ctx context.Context, client *grpc.ClientConn, -) (tsopb.TSO_TsoClient, context.Context, context.CancelFunc, error) { - done := make(chan struct{}) - forwardCtx, cancelForward := context.WithCancel(ctx) - go grpcutil.CheckStream(forwardCtx, cancelForward, done) - forwardStream, err := tsopb.NewTSOClient(client).Tso(forwardCtx) - done <- struct{}{} - return forwardStream, forwardCtx, cancelForward, err -} - -func (s *GrpcServer) createReportBucketsForwardStream(client *grpc.ClientConn) (pdpb.PD_ReportBucketsClient, context.CancelFunc, error) { - done := make(chan struct{}) - ctx, cancel := context.WithCancel(s.ctx) - go grpcutil.CheckStream(ctx, cancel, done) - forwardStream, err := pdpb.NewPDClient(client).ReportBuckets(ctx) - done <- struct{}{} - return forwardStream, cancel, err -} - -func forwardReportBucketClientToServer(forwardStream pdpb.PD_ReportBucketsClient, server *bucketHeartbeatServer, errCh chan error) { - defer logutil.LogPanic() - defer close(errCh) - for { - resp, err := forwardStream.CloseAndRecv() - if err != nil { - errCh <- errors.WithStack(err) - return - } - if err := server.Send(resp); err != nil { - errCh <- errors.WithStack(err) - return - } - } -} - -func (s *GrpcServer) getGlobalTSOFromTSOServer(ctx context.Context) (pdpb.Timestamp, error) { - request := &tsopb.TsoRequest{ - Header: &tsopb.RequestHeader{ - ClusterId: s.clusterID, - KeyspaceId: utils.DefaultKeyspaceID, - KeyspaceGroupId: utils.DefaultKeyspaceGroupID, - }, - Count: 1, - } - var ( - forwardedHost string - forwardStream tsopb.TSO_TsoClient - ts *tsopb.TsoResponse - err error - ) - for i := 0; i < maxRetryTimesRequestTSOServer; i++ { - forwardedHost, ok := s.GetServicePrimaryAddr(ctx, utils.TSOServiceName) - if !ok || forwardedHost == "" { - return pdpb.Timestamp{}, ErrNotFoundTSOAddr - } - forwardStream, err = s.getTSOForwardStream(forwardedHost) - if err != nil { - return pdpb.Timestamp{}, err - } - forwardStream.Send(request) - ts, err = forwardStream.Recv() - if err != nil { - if strings.Contains(err.Error(), errs.NotLeaderErr) { - s.tsoPrimaryWatcher.ForceLoad() - time.Sleep(retryIntervalRequestTSOServer) - continue - } - if strings.Contains(err.Error(), codes.Unavailable.String()) { - s.tsoClientPool.Lock() - delete(s.tsoClientPool.clients, forwardedHost) - s.tsoClientPool.Unlock() - continue - } - log.Error("get global tso from tso service primary addr failed", zap.Error(err), zap.String("tso-addr", forwardedHost)) - return pdpb.Timestamp{}, err - } - return *ts.GetTimestamp(), nil - } - log.Error("get global tso from tso service primary addr failed after retry", zap.Error(err), zap.String("tso-addr", forwardedHost)) - return pdpb.Timestamp{}, err -} - -func (s *GrpcServer) getTSOForwardStream(forwardedHost string) (tsopb.TSO_TsoClient, error) { - s.tsoClientPool.RLock() - forwardStream, ok := s.tsoClientPool.clients[forwardedHost] - s.tsoClientPool.RUnlock() - if ok { - // This is the common case to return here - return forwardStream, nil - } - - s.tsoClientPool.Lock() - defer s.tsoClientPool.Unlock() - - // Double check after entering the critical section - forwardStream, ok = s.tsoClientPool.clients[forwardedHost] - if ok { - return forwardStream, nil - } - - // Now let's create the client connection and the forward stream - client, err := s.getDelegateClient(s.ctx, forwardedHost) - if err != nil { - return nil, err - } - done := make(chan struct{}) - ctx, cancel := context.WithCancel(s.ctx) - go grpcutil.CheckStream(ctx, cancel, done) - forwardStream, err = tsopb.NewTSOClient(client).Tso(ctx) - done <- struct{}{} - if err != nil { - return nil, err - } - s.tsoClientPool.clients[forwardedHost] = forwardStream - return forwardStream, nil -} - // for CDC compatibility, we need to initialize config path to `globalConfigPath` const globalConfigPath = "/global/config/" @@ -2870,15 +2536,7 @@ func (s *GrpcServer) SetExternalTimestamp(ctx context.Context, request *pdpb.Set return rsp.(*pdpb.SetExternalTimestampResponse), nil } - var ( - nowTSO pdpb.Timestamp - err error - ) - if s.IsAPIServiceMode() { - nowTSO, err = s.getGlobalTSOFromTSOServer(ctx) - } else { - nowTSO, err = s.tsoAllocatorManager.HandleRequest(ctx, tso.GlobalDCLocation, 1) - } + nowTSO, err := s.getGlobalTSO(ctx) if err != nil { return nil, err } diff --git a/server/handler.go b/server/handler.go index dc4b43238d0..6c0679bd9f9 100644 --- a/server/handler.go +++ b/server/handler.go @@ -30,6 +30,7 @@ import ( "github.com/tikv/pd/pkg/core/storelimit" "github.com/tikv/pd/pkg/encryption" "github.com/tikv/pd/pkg/errs" + mcsutils "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/schedule" sc "github.com/tikv/pd/pkg/schedule/config" sche "github.com/tikv/pd/pkg/schedule/core" @@ -192,7 +193,7 @@ func (h *Handler) AddScheduler(name string, args ...string) error { } var removeSchedulerCb func(string) error - if h.s.IsAPIServiceMode() { + if c.IsServiceIndependent(mcsutils.SchedulingServiceName) { removeSchedulerCb = c.GetCoordinator().GetSchedulersController().RemoveSchedulerHandler } else { removeSchedulerCb = c.GetCoordinator().GetSchedulersController().RemoveScheduler @@ -202,7 +203,7 @@ func (h *Handler) AddScheduler(name string, args ...string) error { return err } log.Info("create scheduler", zap.String("scheduler-name", s.GetName()), zap.Strings("scheduler-args", args)) - if h.s.IsAPIServiceMode() { + if c.IsServiceIndependent(mcsutils.SchedulingServiceName) { if err = c.AddSchedulerHandler(s, args...); err != nil { log.Error("can not add scheduler handler", zap.String("scheduler-name", s.GetName()), zap.Strings("scheduler-args", args), errs.ZapError(err)) return err @@ -229,7 +230,7 @@ func (h *Handler) RemoveScheduler(name string) error { if err != nil { return err } - if h.s.IsAPIServiceMode() { + if c.IsServiceIndependent(mcsutils.SchedulingServiceName) { if err = c.RemoveSchedulerHandler(name); err != nil { log.Error("can not remove scheduler handler", zap.String("scheduler-name", name), errs.ZapError(err)) } else { diff --git a/server/metrics.go b/server/metrics.go index 94eb9bf19a2..54c5830dc52 100644 --- a/server/metrics.go +++ b/server/metrics.go @@ -151,7 +151,7 @@ var ( Name: "audit_handling_seconds", Help: "PD server service handling audit", Buckets: prometheus.DefBuckets, - }, []string{"service", "method", "component", "ip"}) + }, []string{"service", "method", "caller_id", "ip"}) serverMaxProcs = prometheus.NewGauge( prometheus.GaugeOpts{ Namespace: "pd", @@ -159,6 +159,14 @@ var ( Name: "maxprocs", Help: "The value of GOMAXPROCS.", }) + + forwardFailCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "pd", + Subsystem: "server", + Name: "forward_fail_total", + Help: "Counter of forward fail.", + }, []string{"request", "type"}) ) func init() { @@ -179,4 +187,5 @@ func init() { prometheus.MustRegister(serviceAuditHistogram) prometheus.MustRegister(bucketReportInterval) prometheus.MustRegister(serverMaxProcs) + prometheus.MustRegister(forwardFailCounter) } diff --git a/server/server.go b/server/server.go index 0772cb9a47b..187c30dbf7a 100644 --- a/server/server.go +++ b/server/server.go @@ -473,7 +473,7 @@ func (s *Server) startServer(ctx context.Context) error { s.gcSafePointManager = gc.NewSafePointManager(s.storage, s.cfg.PDServerCfg) s.basicCluster = core.NewBasicCluster() - s.cluster = cluster.NewRaftCluster(ctx, s.clusterID, syncer.NewRegionSyncer(s), s.client, s.httpClient) + s.cluster = cluster.NewRaftCluster(ctx, s.clusterID, s.GetBasicCluster(), s.GetStorage(), syncer.NewRegionSyncer(s), s.client, s.httpClient) keyspaceIDAllocator := id.NewAllocator(&id.AllocatorParams{ Client: s.client, RootPath: s.rootPath, @@ -489,13 +489,13 @@ func (s *Server) startServer(ctx context.Context) error { s.safePointV2Manager = gc.NewSafePointManagerV2(s.ctx, s.storage, s.storage, s.storage) s.hbStreams = hbstream.NewHeartbeatStreams(ctx, s.clusterID, "", s.cluster) // initial hot_region_storage in here. - if !s.IsAPIServiceMode() { - s.hotRegionStorage, err = storage.NewHotRegionsStorage( - ctx, filepath.Join(s.cfg.DataDir, "hot-region"), s.encryptionKeyManager, s.handler) - if err != nil { - return err - } + + s.hotRegionStorage, err = storage.NewHotRegionsStorage( + ctx, filepath.Join(s.cfg.DataDir, "hot-region"), s.encryptionKeyManager, s.handler) + if err != nil { + return err } + // Run callbacks log.Info("triggering the start callback functions") for _, cb := range s.startCallbacks { @@ -948,20 +948,7 @@ func (s *Server) GetConfig() *config.Config { if err != nil { return cfg } - payload := make(map[string]interface{}) - for i, sche := range sches { - var config interface{} - err := schedulers.DecodeConfig([]byte(configs[i]), &config) - if err != nil { - log.Error("failed to decode scheduler config", - zap.String("config", configs[i]), - zap.String("scheduler", sche), - errs.ZapError(err)) - continue - } - payload[sche] = config - } - cfg.Schedule.SchedulersPayload = payload + cfg.Schedule.SchedulersPayload = schedulers.ToPayload(sches, configs) return cfg } @@ -1056,7 +1043,7 @@ func (s *Server) SetReplicationConfig(cfg sc.ReplicationConfig) error { return errs.ErrNotBootstrapped.GenWithStackByArgs() } // replication.MaxReplicas won't work when placement rule is enabled and not only have one default rule. - defaultRule := rc.GetRuleManager().GetRule("pd", "default") + defaultRule := rc.GetRuleManager().GetRule(placement.DefaultGroupID, placement.DefaultRuleID) CheckInDefaultRule := func() error { // replication config won't work when placement rule is enabled and exceeds one default rule @@ -1376,6 +1363,12 @@ func (s *Server) GetRaftCluster() *cluster.RaftCluster { return s.cluster } +// DirectlyGetRaftCluster returns raft cluster directly. +// Only used for test. +func (s *Server) DirectlyGetRaftCluster() *cluster.RaftCluster { + return s.cluster +} + // GetCluster gets cluster. func (s *Server) GetCluster() *metapb.Cluster { return &metapb.Cluster{ @@ -1407,6 +1400,15 @@ func (s *Server) GetRegions() []*core.RegionInfo { return nil } +// IsServiceIndependent returns if the service is enabled +func (s *Server) IsServiceIndependent(name string) bool { + rc := s.GetRaftCluster() + if rc != nil { + return rc.IsServiceIndependent(name) + } + return false +} + // GetServiceLabels returns ApiAccessPaths by given service label // TODO: this function will be used for updating api rate limit config func (s *Server) GetServiceLabels(serviceLabel string) []apiutil.AccessPath { @@ -1640,7 +1642,7 @@ func (s *Server) leaderLoop() { func (s *Server) campaignLeader() { log.Info(fmt.Sprintf("start to campaign %s leader", s.mode), zap.String("campaign-leader-name", s.Name())) - if err := s.member.CampaignLeader(s.cfg.LeaderLease); err != nil { + if err := s.member.CampaignLeader(s.ctx, s.cfg.LeaderLease); err != nil { if err.Error() == errs.ErrEtcdTxnConflict.Error() { log.Info(fmt.Sprintf("campaign %s leader meets error due to txn conflict, another PD/API server may campaign successfully", s.mode), zap.String("campaign-leader-name", s.Name())) diff --git a/server/server_test.go b/server/server_test.go index 62cf5b168fc..a0562879057 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -218,7 +218,7 @@ func (suite *leaderServerTestSuite) TestSourceIpForHeaderForwarded() { err = svr.Run() suite.NoError(err) - req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/apis/mock/v1/hello", svr.GetAddr()), nil) + req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/apis/mock/v1/hello", svr.GetAddr()), http.NoBody) suite.NoError(err) req.Header.Add(apiutil.XForwardedForHeader, "127.0.0.2") resp, err := http.DefaultClient.Do(req) @@ -248,7 +248,7 @@ func (suite *leaderServerTestSuite) TestSourceIpForHeaderXReal() { err = svr.Run() suite.NoError(err) - req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/apis/mock/v1/hello", svr.GetAddr()), nil) + req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/apis/mock/v1/hello", svr.GetAddr()), http.NoBody) suite.NoError(err) req.Header.Add(apiutil.XRealIPHeader, "127.0.0.2") resp, err := http.DefaultClient.Do(req) @@ -278,7 +278,7 @@ func (suite *leaderServerTestSuite) TestSourceIpForHeaderBoth() { err = svr.Run() suite.NoError(err) - req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/apis/mock/v1/hello", svr.GetAddr()), nil) + req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/apis/mock/v1/hello", svr.GetAddr()), http.NoBody) suite.NoError(err) req.Header.Add(apiutil.XForwardedForHeader, "127.0.0.2") req.Header.Add(apiutil.XRealIPHeader, "127.0.0.3") diff --git a/server/util.go b/server/util.go index 654b424465e..f88d0146a7f 100644 --- a/server/util.go +++ b/server/util.go @@ -17,6 +17,7 @@ package server import ( "context" "net/http" + "net/http/pprof" "path/filepath" "strings" @@ -121,8 +122,13 @@ func combineBuilderServerHTTPService(ctx context.Context, svr *Server, serviceBu userHandlers[pathPrefix] = handler } } + apiService.UseHandler(router) userHandlers[pdAPIPrefix] = apiService + + // fix issue https://github.com/tikv/pd/issues/7253 + // FIXME: remove me after upgrade + userHandlers["/debug/pprof/trace"] = http.HandlerFunc(pprof.Trace) return userHandlers, nil } diff --git a/tests/cluster.go b/tests/cluster.go index ae1ae331856..41efc2b045d 100644 --- a/tests/cluster.go +++ b/tests/cluster.go @@ -155,6 +155,13 @@ func (s *TestServer) Destroy() error { return nil } +// ResetPDLeader resigns the leader of the server. +func (s *TestServer) ResetPDLeader() { + s.Lock() + defer s.Unlock() + s.server.GetMember().ResetLeader() +} + // ResignLeader resigns the leader of the server. func (s *TestServer) ResignLeader() error { s.Lock() diff --git a/tests/integrations/client/client_test.go b/tests/integrations/client/client_test.go index 9cabbb03090..bb4d6851fd0 100644 --- a/tests/integrations/client/client_test.go +++ b/tests/integrations/client/client_test.go @@ -158,6 +158,10 @@ func TestLeaderTransfer(t *testing.T) { cluster, err := tests.NewTestCluster(ctx, 2) re.NoError(err) defer cluster.Destroy() + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/member/skipCampaignLeaderCheck", "return(true)")) + defer func() { + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/member/skipCampaignLeaderCheck")) + }() endpoints := runServer(re, cluster) cli := setupCli(re, ctx, endpoints) @@ -514,7 +518,7 @@ func TestCustomTimeout(t *testing.T) { re.Less(time.Since(start), 2*time.Second) } -func TestGetRegionFromFollowerClient(t *testing.T) { +func TestGetRegionByFollowerForwarding(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -540,7 +544,7 @@ func TestGetRegionFromFollowerClient(t *testing.T) { } // case 1: unreachable -> normal -func TestGetTsoFromFollowerClient1(t *testing.T) { +func TestGetTsoByFollowerForwarding1(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -571,7 +575,7 @@ func TestGetTsoFromFollowerClient1(t *testing.T) { } // case 2: unreachable -> leader transfer -> normal -func TestGetTsoFromFollowerClient2(t *testing.T) { +func TestGetTsoByFollowerForwarding2(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -605,6 +609,101 @@ func TestGetTsoFromFollowerClient2(t *testing.T) { checkTS(re, cli, lastTS) } +// case 3: network partition between client and follower A -> transfer leader to follower A -> normal +func TestGetTsoAndRegionByFollowerForwarding(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + pd.LeaderHealthCheckInterval = 100 * time.Millisecond + cluster, err := tests.NewTestCluster(ctx, 3) + re.NoError(err) + defer cluster.Destroy() + + endpoints := runServer(re, cluster) + re.NotEmpty(cluster.WaitLeader()) + leader := cluster.GetLeaderServer() + grpcPDClient := testutil.MustNewGrpcClient(re, leader.GetAddr()) + testutil.Eventually(re, func() bool { + regionHeartbeat, err := grpcPDClient.RegionHeartbeat(ctx) + re.NoError(err) + regionID := regionIDAllocator.alloc() + region := &metapb.Region{ + Id: regionID, + RegionEpoch: &metapb.RegionEpoch{ + ConfVer: 1, + Version: 1, + }, + Peers: peers, + } + req := &pdpb.RegionHeartbeatRequest{ + Header: newHeader(leader.GetServer()), + Region: region, + Leader: peers[0], + } + err = regionHeartbeat.Send(req) + re.NoError(err) + _, err = regionHeartbeat.Recv() + return err == nil + }) + follower := cluster.GetServer(cluster.GetFollower()) + re.NoError(failpoint.Enable("github.com/tikv/pd/client/grpcutil/unreachableNetwork2", fmt.Sprintf("return(\"%s\")", follower.GetAddr()))) + + cli := setupCli(re, ctx, endpoints, pd.WithForwardingOption(true)) + var lastTS uint64 + testutil.Eventually(re, func() bool { + physical, logical, err := cli.GetTS(context.TODO()) + if err == nil { + lastTS = tsoutil.ComposeTS(physical, logical) + return true + } + t.Log(err) + return false + }) + lastTS = checkTS(re, cli, lastTS) + r, err := cli.GetRegion(context.Background(), []byte("a")) + re.NoError(err) + re.NotNil(r) + leader.GetServer().GetMember().ResignEtcdLeader(leader.GetServer().Context(), + leader.GetServer().Name(), follower.GetServer().Name()) + re.NotEmpty(cluster.WaitLeader()) + testutil.Eventually(re, func() bool { + physical, logical, err := cli.GetTS(context.TODO()) + if err == nil { + lastTS = tsoutil.ComposeTS(physical, logical) + return true + } + t.Log(err) + return false + }) + lastTS = checkTS(re, cli, lastTS) + testutil.Eventually(re, func() bool { + r, err = cli.GetRegion(context.Background(), []byte("a")) + if err == nil && r != nil { + return true + } + return false + }) + + re.NoError(failpoint.Disable("github.com/tikv/pd/client/grpcutil/unreachableNetwork2")) + testutil.Eventually(re, func() bool { + physical, logical, err := cli.GetTS(context.TODO()) + if err == nil { + lastTS = tsoutil.ComposeTS(physical, logical) + return true + } + t.Log(err) + return false + }) + lastTS = checkTS(re, cli, lastTS) + testutil.Eventually(re, func() bool { + r, err = cli.GetRegion(context.Background(), []byte("a")) + if err == nil && r != nil { + return true + } + return false + }) +} + func checkTS(re *require.Assertions, cli pd.Client, lastTS uint64) uint64 { for i := 0; i < tsoRequestRound; i++ { physical, logical, err := cli.GetTS(context.TODO()) diff --git a/tests/integrations/client/go.mod b/tests/integrations/client/go.mod index e38efbeb438..799901ff2e3 100644 --- a/tests/integrations/client/go.mod +++ b/tests/integrations/client/go.mod @@ -15,7 +15,7 @@ require ( github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 github.com/pingcap/kvproto v0.0.0-20231018065736-c0689aded40c github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 - github.com/stretchr/testify v1.8.2 + github.com/stretchr/testify v1.8.3 github.com/tikv/pd v0.0.0-00010101000000-000000000000 github.com/tikv/pd/client v0.0.0-00010101000000-000000000000 go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793 @@ -50,9 +50,11 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/bitly/go-simplejson v0.5.0 // indirect github.com/breeswish/gin-jwt/v2 v2.6.4-jwt-patch // indirect + github.com/bytedance/sonic v1.9.1 // indirect github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 // indirect github.com/cenkalti/backoff/v4 v4.0.2 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect github.com/coreos/go-semver v0.3.0 // indirect github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f // indirect github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect @@ -60,23 +62,24 @@ require ( github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4 // indirect github.com/elliotchance/pie/v2 v2.1.0 // indirect github.com/fogleman/gg v1.3.0 // indirect + github.com/gabriel-vasile/mimetype v1.4.2 // indirect github.com/gin-contrib/cors v1.4.0 // indirect github.com/gin-contrib/gzip v0.0.1 // indirect github.com/gin-contrib/pprof v1.4.0 // indirect github.com/gin-contrib/sse v0.1.0 // indirect - github.com/gin-gonic/gin v1.8.1 // indirect + github.com/gin-gonic/gin v1.9.1 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-openapi/jsonpointer v0.19.5 // indirect github.com/go-openapi/jsonreference v0.19.6 // indirect github.com/go-openapi/spec v0.20.4 // indirect github.com/go-openapi/swag v0.19.15 // indirect - github.com/go-playground/locales v0.14.0 // indirect - github.com/go-playground/universal-translator v0.18.0 // indirect - github.com/go-playground/validator/v10 v10.10.0 // indirect + github.com/go-playground/locales v0.14.1 // indirect + github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/go-playground/validator/v10 v10.14.0 // indirect github.com/go-resty/resty/v2 v2.6.0 // indirect github.com/go-sql-driver/mysql v1.7.0 // indirect github.com/goccy/go-graphviz v0.0.9 // indirect - github.com/goccy/go-json v0.9.7 // indirect + github.com/goccy/go-json v0.10.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang-jwt/jwt v3.2.1+incompatible // indirect github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect @@ -100,11 +103,12 @@ require ( github.com/joomcode/errorx v1.0.1 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/cpuid/v2 v2.2.4 // indirect github.com/konsorten/go-windows-terminal-sequences v1.0.3 // indirect - github.com/leodido/go-urn v1.2.1 // indirect + github.com/leodido/go-urn v1.2.4 // indirect github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a // indirect github.com/mailru/easyjson v0.7.6 // indirect - github.com/mattn/go-isatty v0.0.14 // indirect + github.com/mattn/go-isatty v0.0.19 // indirect github.com/mattn/go-sqlite3 v1.14.15 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect github.com/minio/sio v0.3.0 // indirect @@ -112,14 +116,14 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/oleiade/reflections v1.0.1 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect - github.com/pelletier/go-toml/v2 v2.0.1 // indirect + github.com/pelletier/go-toml/v2 v2.0.8 // indirect github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36 // indirect github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d // indirect github.com/pingcap/check v0.0.0-20211026125417-57bd13f7b5f0 // indirect github.com/pingcap/errcode v0.3.0 // indirect github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c // indirect github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 // indirect - github.com/pingcap/tidb-dashboard v0.0.0-20230911054332-22add1e00511 // indirect + github.com/pingcap/tidb-dashboard v0.0.0-20231127105651-ce4097837c5e // indirect github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect @@ -147,7 +151,8 @@ require ( github.com/tklauser/go-sysconf v0.3.11 // indirect github.com/tklauser/numcpus v0.6.0 // indirect github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 // indirect - github.com/ugorji/go/codec v1.2.7 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/ugorji/go/codec v1.2.11 // indirect github.com/unrolled/render v1.0.1 // indirect github.com/urfave/negroni v0.3.0 // indirect github.com/vmihailenco/msgpack/v5 v5.3.5 // indirect @@ -159,6 +164,7 @@ require ( go.uber.org/dig v1.9.0 // indirect go.uber.org/fx v1.12.0 // indirect go.uber.org/multierr v1.11.0 // indirect + golang.org/x/arch v0.3.0 // indirect golang.org/x/crypto v0.14.0 // indirect golang.org/x/exp v0.0.0-20230711005742-c3f37128e5a4 // indirect golang.org/x/image v0.5.0 // indirect diff --git a/tests/integrations/client/go.sum b/tests/integrations/client/go.sum index c745c4fa518..e13da5d8375 100644 --- a/tests/integrations/client/go.sum +++ b/tests/integrations/client/go.sum @@ -68,6 +68,9 @@ github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4Yn github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= github.com/breeswish/gin-jwt/v2 v2.6.4-jwt-patch h1:KLE/YeX+9FNaGVW5MtImRVPhjDpfpgJhvkuYWBmOYbo= github.com/breeswish/gin-jwt/v2 v2.6.4-jwt-patch/go.mod h1:KjBLriHXe7L6fGceqWzTod8HUB/TP1WWDtfuSYtYXaI= +github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= +github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s= +github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U= github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 h1:BjkPE3785EwPhhyuFkbINB+2a1xATwk8SNDWnJiD41g= github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5/go.mod h1:jtAfVaU/2cu1+wdSRPWE2c1N2qeAA3K4RH9pYgqwets= github.com/cenkalti/backoff/v4 v4.0.2 h1:JIufpQLbh4DkbQoii76ItQIUFzevQSqOLZca4eamEDs= @@ -77,6 +80,9 @@ github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghf github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= @@ -123,6 +129,8 @@ github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5Kwzbycv github.com/fogleman/gg v1.3.0 h1:/7zJX8F6AaYQc57WQCyN9cAIz+4bCJGO9B+dyW29am8= github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= +github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gin-contrib/cors v1.4.0 h1:oJ6gwtUl3lqV0WEIwM/LxPF1QZ5qe2lGWdY2+bz7y0g= github.com/gin-contrib/cors v1.4.0/go.mod h1:bs9pNM0x/UsmHPBWT2xZz9ROh8xYjYkiURUfmBoMlcs= @@ -135,8 +143,9 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= github.com/gin-gonic/gin v1.3.0/go.mod h1:7cKuhb5qV2ggCFctp2fJQ+ErvciLZrIeoOSOm6mUr7Y= github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M= -github.com/gin-gonic/gin v1.8.1 h1:4+fr/el88TOO3ewCmQr8cx/CtZ/umlIRIs5M4NTNjf8= github.com/gin-gonic/gin v1.8.1/go.mod h1:ji8BvRH1azfM+SYow9zQ6SZMvR8qOMZHmsCuWR9tTTk= +github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= +github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= @@ -155,17 +164,21 @@ github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7 github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM= github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= -github.com/go-playground/assert/v2 v2.0.1 h1:MsBgLAaY856+nPRTKrp3/OZK38U/wa0CcBYNjji3q3A= github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= +github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= -github.com/go-playground/locales v0.14.0 h1:u50s323jtVGugKlcYeyzC0etD1HifMjqmJqb8WugfUU= github.com/go-playground/locales v0.14.0/go.mod h1:sawfccIbzZTqEDETgFXqTho0QybSa7l++s0DH+LDiLs= +github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= +github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= -github.com/go-playground/universal-translator v0.18.0 h1:82dyy6p4OuJq4/CByFNOn/jYrnRPArHwAcmLoJZxyho= github.com/go-playground/universal-translator v0.18.0/go.mod h1:UvRDBj+xPUEGrFYl+lu/H90nyDXpg0fqeB/AQUGNTVA= +github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= +github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI= -github.com/go-playground/validator/v10 v10.10.0 h1:I7mrTYv78z8k8VXa/qJlOlEXn/nBh+BF8dHX5nt/dr0= github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos= +github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js= +github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= github.com/go-resty/resty/v2 v2.6.0 h1:joIR5PNLM2EFqqESUjCMGXrWmXNHEU9CEiK813oKYS4= github.com/go-resty/resty/v2 v2.6.0/go.mod h1:PwvJS6hvaPkjtjNg9ph+VrSD92bi5Zq73w/BIH7cC3Q= github.com/go-sql-driver/mysql v1.7.0 h1:ueSltNNllEqE3qcWBTD0iQd3IpL/6U+mJxLkazJ7YPc= @@ -173,8 +186,9 @@ github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9 github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/goccy/go-graphviz v0.0.9 h1:s/FMMJ1Joj6La3S5ApO3Jk2cwM4LpXECC2muFx3IPQQ= github.com/goccy/go-graphviz v0.0.9/go.mod h1:wXVsXxmyMQU6TN3zGRttjNn3h+iCAS7xQFC6TlNvLhk= -github.com/goccy/go-json v0.9.7 h1:IcB+Aqpx/iMHu5Yooh7jEzJk1JZ7Pjtmys2ukPr7EeM= github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= +github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/gogo/protobuf v0.0.0-20171007142547-342cbe0a0415/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v0.0.0-20180717141946-636bf0302bc9/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= @@ -312,6 +326,9 @@ github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8 github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= +github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -325,8 +342,9 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= -github.com/leodido/go-urn v1.2.1 h1:BqpAaACuzVSgi/VLzGZIobT2z4v53pjosyNd9Yv6n/w= github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY= +github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= +github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a h1:N9zuLhTvBSRt0gWSiJswwQ2HqDmtX/ZCDJURnKUt1Ik= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a/go.mod h1:JKx41uQRwqlTZabZc+kILPrO/3jlKnQ2Z8b7YiVw5cE= @@ -338,8 +356,9 @@ github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJ github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= -github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= +github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI= github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= @@ -383,8 +402,9 @@ github.com/otiai10/mint v1.3.0/go.mod h1:F5AjcsTsWUqX+Na9fpHb52P8pcRX2CI6A3ctIT9 github.com/otiai10/mint v1.3.3/go.mod h1:/yxELlJQ0ufhjUwhshSj+wFjZ78CnZ48/1wtmBH1OTc= github.com/pascaldekloe/name v0.0.0-20180628100202-0fd16699aae1/go.mod h1:eD5JxqMiuNYyFNmyY9rkJ/slN8y59oEu4Ei7F8OoKWQ= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= -github.com/pelletier/go-toml/v2 v2.0.1 h1:8e3L2cCQzLFi2CR4g7vGFuFxX7Jl1kKX8gW+iV0GUKU= github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo= +github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= +github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36 h1:64bxqeTEN0/xoEqhKGowgihNuzISS9rEG6YUMU4bzJo= github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36/go.mod h1:pxMtw7cyUw6B2bRH0ZBANSPg+AoSud1I1iyJHI69jH4= github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d h1:U+PMnTlV2tu7RuMK5etusZG3Cf+rpow5hqQByeCzJ2g= @@ -410,8 +430,8 @@ github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8 github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 h1:QV6jqlfOkh8hqvEAgwBZa+4bSgO0EeKC7s5c6Luam2I= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21/go.mod h1:QYnjfA95ZaMefyl1NO8oPtKeb8pYUdnDVhQgf+qdpjM= -github.com/pingcap/tidb-dashboard v0.0.0-20230911054332-22add1e00511 h1:oyrCfNlAWmLlUfEr+7YTSBo29SP/J1N8hnxBt5yUABo= -github.com/pingcap/tidb-dashboard v0.0.0-20230911054332-22add1e00511/go.mod h1:EZ90+V5S4TttbYag6oKZ3jcNKRwZe1Mc9vXwOt9JBYw= +github.com/pingcap/tidb-dashboard v0.0.0-20231127105651-ce4097837c5e h1:SJUSDejvKtj9vSh5ptRHh4iMrvPV3oKO8yp6/SYE8vc= +github.com/pingcap/tidb-dashboard v0.0.0-20231127105651-ce4097837c5e/go.mod h1:ucZBRz52icb23T/5Z4CsuUHmarYiin7p2MeiVBe+o8c= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e h1:FBaTXU8C3xgt/drM58VHxojHo/QoG1oPsgWTGvaSpO4= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= @@ -508,8 +528,10 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY= +github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/swaggo/files v0.0.0-20210815190702-a29dd2bc99b2 h1:+iNTcqQJy0OZ5jk6a5NLib47eqXK8uYcPX+O4+cBpEM= github.com/swaggo/files v0.0.0-20210815190702-a29dd2bc99b2/go.mod h1:lKJPbtWzJ9JhsTN1k1gZgleJWY/cqq0psdoMmaThG3w= github.com/swaggo/http-swagger v1.2.6 h1:ihTjChUoSRMpFMjWw+0AkL1Ti4r6v8pCgVYLmQVRlRw= @@ -535,13 +557,16 @@ github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 h1:j6JEOq5QWFker+d7mFQYOhjTZonQ7YkLTHm56dbn+yM= github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= +github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= github.com/ugorji/go/codec v0.0.0-20181022190402-e5e69e061d4f/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= -github.com/ugorji/go/codec v1.2.7 h1:YPXUKf7fYbp/y8xloBqZOw2qaVggbfwMlI8WM3wZUJ0= github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY= +github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= +github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/unrolled/render v1.0.1 h1:VDDnQQVfBMsOsp3VaCJszSO0nkBIVEYoPWeRThk9spY= github.com/unrolled/render v1.0.1/go.mod h1:gN9T0NhL4Bfbwu8ann7Ry/TGHYfosul+J0obPf6NBdM= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= @@ -599,6 +624,9 @@ go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ= go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= +golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= +golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -708,6 +736,7 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -842,6 +871,7 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= moul.io/zapgorm2 v1.1.0 h1:qwAlMBYf+qJkJ7PAzJl4oCe6eS6QGiKAXUPeis0+RBE= moul.io/zapgorm2 v1.1.0/go.mod h1:emRfKjNqSzVj5lcgasBdovIXY1jSOwFz2GQZn1Rddks= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q= sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= diff --git a/tests/integrations/client/http_client_test.go b/tests/integrations/client/http_client_test.go new file mode 100644 index 00000000000..476b4d2f541 --- /dev/null +++ b/tests/integrations/client/http_client_test.go @@ -0,0 +1,446 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package client_test + +import ( + "context" + "math" + "net/http" + "sort" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + pd "github.com/tikv/pd/client/http" + "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/schedule/labeler" + "github.com/tikv/pd/pkg/schedule/placement" + "github.com/tikv/pd/pkg/utils/testutil" + "github.com/tikv/pd/pkg/utils/tsoutil" + "github.com/tikv/pd/tests" +) + +type httpClientTestSuite struct { + suite.Suite + ctx context.Context + cancelFunc context.CancelFunc + cluster *tests.TestCluster + client pd.Client +} + +func TestHTTPClientTestSuite(t *testing.T) { + suite.Run(t, new(httpClientTestSuite)) +} + +func (suite *httpClientTestSuite) SetupSuite() { + re := suite.Require() + var err error + suite.ctx, suite.cancelFunc = context.WithCancel(context.Background()) + suite.cluster, err = tests.NewTestCluster(suite.ctx, 2) + re.NoError(err) + err = suite.cluster.RunInitialServers() + re.NoError(err) + leader := suite.cluster.WaitLeader() + re.NotEmpty(leader) + leaderServer := suite.cluster.GetLeaderServer() + err = leaderServer.BootstrapCluster() + re.NoError(err) + for _, region := range []*core.RegionInfo{ + core.NewTestRegionInfo(10, 1, []byte("a1"), []byte("a2")), + core.NewTestRegionInfo(11, 1, []byte("a2"), []byte("a3")), + } { + err := leaderServer.GetRaftCluster().HandleRegionHeartbeat(region) + re.NoError(err) + } + var ( + testServers = suite.cluster.GetServers() + endpoints = make([]string, 0, len(testServers)) + ) + for _, s := range testServers { + endpoints = append(endpoints, s.GetConfig().AdvertiseClientUrls) + } + suite.client = pd.NewClient(endpoints) +} + +func (suite *httpClientTestSuite) TearDownSuite() { + suite.cancelFunc() + suite.client.Close() + suite.cluster.Destroy() +} + +func (suite *httpClientTestSuite) TestMeta() { + re := suite.Require() + region, err := suite.client.GetRegionByID(suite.ctx, 10) + re.NoError(err) + re.Equal(int64(10), region.ID) + re.Equal(core.HexRegionKeyStr([]byte("a1")), region.StartKey) + re.Equal(core.HexRegionKeyStr([]byte("a2")), region.EndKey) + region, err = suite.client.GetRegionByKey(suite.ctx, []byte("a2")) + re.NoError(err) + re.Equal(int64(11), region.ID) + re.Equal(core.HexRegionKeyStr([]byte("a2")), region.StartKey) + re.Equal(core.HexRegionKeyStr([]byte("a3")), region.EndKey) + regions, err := suite.client.GetRegions(suite.ctx) + re.NoError(err) + re.Equal(int64(2), regions.Count) + re.Len(regions.Regions, 2) + regions, err = suite.client.GetRegionsByKeyRange(suite.ctx, pd.NewKeyRange([]byte("a1"), []byte("a3")), -1) + re.NoError(err) + re.Equal(int64(2), regions.Count) + re.Len(regions.Regions, 2) + regions, err = suite.client.GetRegionsByStoreID(suite.ctx, 1) + re.NoError(err) + re.Equal(int64(2), regions.Count) + re.Len(regions.Regions, 2) + state, err := suite.client.GetRegionsReplicatedStateByKeyRange(suite.ctx, pd.NewKeyRange([]byte("a1"), []byte("a3"))) + re.NoError(err) + re.Equal("INPROGRESS", state) + regionStats, err := suite.client.GetRegionStatusByKeyRange(suite.ctx, pd.NewKeyRange([]byte("a1"), []byte("a3")), false) + re.NoError(err) + re.Greater(regionStats.Count, 0) + re.NotEmpty(regionStats.StoreLeaderCount) + regionStats, err = suite.client.GetRegionStatusByKeyRange(suite.ctx, pd.NewKeyRange([]byte("a1"), []byte("a3")), true) + re.NoError(err) + re.Greater(regionStats.Count, 0) + re.Empty(regionStats.StoreLeaderCount) + hotReadRegions, err := suite.client.GetHotReadRegions(suite.ctx) + re.NoError(err) + re.Len(hotReadRegions.AsPeer, 1) + re.Len(hotReadRegions.AsLeader, 1) + hotWriteRegions, err := suite.client.GetHotWriteRegions(suite.ctx) + re.NoError(err) + re.Len(hotWriteRegions.AsPeer, 1) + re.Len(hotWriteRegions.AsLeader, 1) + historyHorRegions, err := suite.client.GetHistoryHotRegions(suite.ctx, &pd.HistoryHotRegionsRequest{ + StartTime: 0, + EndTime: time.Now().AddDate(0, 0, 1).UnixNano() / int64(time.Millisecond), + }) + re.NoError(err) + re.Len(historyHorRegions.HistoryHotRegion, 0) + store, err := suite.client.GetStores(suite.ctx) + re.NoError(err) + re.Equal(1, store.Count) + re.Len(store.Stores, 1) +} + +func (suite *httpClientTestSuite) TestGetMinResolvedTSByStoresIDs() { + re := suite.Require() + testMinResolvedTS := tsoutil.TimeToTS(time.Now()) + raftCluster := suite.cluster.GetLeaderServer().GetRaftCluster() + err := raftCluster.SetMinResolvedTS(1, testMinResolvedTS) + re.NoError(err) + // Make sure the min resolved TS is updated. + testutil.Eventually(re, func() bool { + minResolvedTS, _ := raftCluster.CheckAndUpdateMinResolvedTS() + return minResolvedTS == testMinResolvedTS + }) + // Wait for the cluster-level min resolved TS to be initialized. + minResolvedTS, storeMinResolvedTSMap, err := suite.client.GetMinResolvedTSByStoresIDs(suite.ctx, nil) + re.NoError(err) + re.Equal(testMinResolvedTS, minResolvedTS) + re.Empty(storeMinResolvedTSMap) + // Get the store-level min resolved TS. + minResolvedTS, storeMinResolvedTSMap, err = suite.client.GetMinResolvedTSByStoresIDs(suite.ctx, []uint64{1}) + re.NoError(err) + re.Equal(testMinResolvedTS, minResolvedTS) + re.Len(storeMinResolvedTSMap, 1) + re.Equal(minResolvedTS, storeMinResolvedTSMap[1]) + // Get the store-level min resolved TS with an invalid store ID. + minResolvedTS, storeMinResolvedTSMap, err = suite.client.GetMinResolvedTSByStoresIDs(suite.ctx, []uint64{1, 2}) + re.NoError(err) + re.Equal(testMinResolvedTS, minResolvedTS) + re.Len(storeMinResolvedTSMap, 2) + re.Equal(minResolvedTS, storeMinResolvedTSMap[1]) + re.Equal(uint64(math.MaxUint64), storeMinResolvedTSMap[2]) +} + +func (suite *httpClientTestSuite) TestRule() { + re := suite.Require() + bundles, err := suite.client.GetAllPlacementRuleBundles(suite.ctx) + re.NoError(err) + re.Len(bundles, 1) + re.Equal(bundles[0].ID, placement.DefaultGroupID) + bundle, err := suite.client.GetPlacementRuleBundleByGroup(suite.ctx, placement.DefaultGroupID) + re.NoError(err) + re.Equal(bundles[0], bundle) + // Check if we have the default rule. + suite.checkRule(re, &pd.Rule{ + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, + Role: pd.Voter, + Count: 3, + StartKey: []byte{}, + EndKey: []byte{}, + }, 1, true) + // Should be the same as the rules in the bundle. + suite.checkRule(re, bundle.Rules[0], 1, true) + testRule := &pd.Rule{ + GroupID: placement.DefaultGroupID, + ID: "test", + Role: pd.Voter, + Count: 3, + StartKey: []byte{}, + EndKey: []byte{}, + } + err = suite.client.SetPlacementRule(suite.ctx, testRule) + re.NoError(err) + suite.checkRule(re, testRule, 2, true) + err = suite.client.DeletePlacementRule(suite.ctx, placement.DefaultGroupID, "test") + re.NoError(err) + suite.checkRule(re, testRule, 1, false) + testRuleOp := &pd.RuleOp{ + Rule: testRule, + Action: pd.RuleOpAdd, + } + err = suite.client.SetPlacementRuleInBatch(suite.ctx, []*pd.RuleOp{testRuleOp}) + re.NoError(err) + suite.checkRule(re, testRule, 2, true) + testRuleOp = &pd.RuleOp{ + Rule: testRule, + Action: pd.RuleOpDel, + } + err = suite.client.SetPlacementRuleInBatch(suite.ctx, []*pd.RuleOp{testRuleOp}) + re.NoError(err) + suite.checkRule(re, testRule, 1, false) + err = suite.client.SetPlacementRuleBundles(suite.ctx, []*pd.GroupBundle{ + { + ID: placement.DefaultGroupID, + Rules: []*pd.Rule{testRule}, + }, + }, true) + re.NoError(err) + suite.checkRule(re, testRule, 1, true) + ruleGroups, err := suite.client.GetAllPlacementRuleGroups(suite.ctx) + re.NoError(err) + re.Len(ruleGroups, 1) + re.Equal(placement.DefaultGroupID, ruleGroups[0].ID) + ruleGroup, err := suite.client.GetPlacementRuleGroupByID(suite.ctx, placement.DefaultGroupID) + re.NoError(err) + re.Equal(ruleGroups[0], ruleGroup) + testRuleGroup := &pd.RuleGroup{ + ID: "test-group", + Index: 1, + Override: true, + } + err = suite.client.SetPlacementRuleGroup(suite.ctx, testRuleGroup) + re.NoError(err) + ruleGroup, err = suite.client.GetPlacementRuleGroupByID(suite.ctx, testRuleGroup.ID) + re.NoError(err) + re.Equal(testRuleGroup, ruleGroup) + err = suite.client.DeletePlacementRuleGroupByID(suite.ctx, testRuleGroup.ID) + re.NoError(err) + ruleGroup, err = suite.client.GetPlacementRuleGroupByID(suite.ctx, testRuleGroup.ID) + re.ErrorContains(err, http.StatusText(http.StatusNotFound)) + re.Empty(ruleGroup) + // Test the start key and end key. + testRule = &pd.Rule{ + GroupID: placement.DefaultGroupID, + ID: "test", + Role: pd.Voter, + Count: 5, + StartKey: []byte("a1"), + EndKey: []byte(""), + } + err = suite.client.SetPlacementRule(suite.ctx, testRule) + re.NoError(err) + suite.checkRule(re, testRule, 1, true) +} + +func (suite *httpClientTestSuite) checkRule( + re *require.Assertions, + rule *pd.Rule, totalRuleCount int, exist bool, +) { + // Check through the `GetPlacementRulesByGroup` API. + rules, err := suite.client.GetPlacementRulesByGroup(suite.ctx, rule.GroupID) + re.NoError(err) + checkRuleFunc(re, rules, rule, totalRuleCount, exist) + // Check through the `GetPlacementRuleBundleByGroup` API. + bundle, err := suite.client.GetPlacementRuleBundleByGroup(suite.ctx, rule.GroupID) + re.NoError(err) + checkRuleFunc(re, bundle.Rules, rule, totalRuleCount, exist) +} + +func checkRuleFunc( + re *require.Assertions, + rules []*pd.Rule, rule *pd.Rule, totalRuleCount int, exist bool, +) { + re.Len(rules, totalRuleCount) + for _, r := range rules { + if r.ID != rule.ID { + continue + } + re.Equal(rule.GroupID, r.GroupID) + re.Equal(rule.ID, r.ID) + re.Equal(rule.Role, r.Role) + re.Equal(rule.Count, r.Count) + re.Equal(rule.StartKey, r.StartKey) + re.Equal(rule.EndKey, r.EndKey) + return + } + if exist { + re.Failf("Failed to check the rule", "rule %+v not found", rule) + } +} + +func (suite *httpClientTestSuite) TestRegionLabel() { + re := suite.Require() + labelRules, err := suite.client.GetAllRegionLabelRules(suite.ctx) + re.NoError(err) + re.Len(labelRules, 1) + re.Equal("keyspaces/0", labelRules[0].ID) + // Set a new region label rule. + labelRule := &pd.LabelRule{ + ID: "rule1", + Labels: []pd.RegionLabel{{Key: "k1", Value: "v1"}}, + RuleType: "key-range", + Data: labeler.MakeKeyRanges("1234", "5678"), + } + err = suite.client.SetRegionLabelRule(suite.ctx, labelRule) + re.NoError(err) + labelRules, err = suite.client.GetAllRegionLabelRules(suite.ctx) + re.NoError(err) + re.Len(labelRules, 2) + sort.Slice(labelRules, func(i, j int) bool { + return labelRules[i].ID < labelRules[j].ID + }) + re.Equal(labelRule.ID, labelRules[1].ID) + re.Equal(labelRule.Labels, labelRules[1].Labels) + re.Equal(labelRule.RuleType, labelRules[1].RuleType) + // Patch the region label rule. + labelRule = &pd.LabelRule{ + ID: "rule2", + Labels: []pd.RegionLabel{{Key: "k2", Value: "v2"}}, + RuleType: "key-range", + Data: labeler.MakeKeyRanges("ab12", "cd12"), + } + patch := &pd.LabelRulePatch{ + SetRules: []*pd.LabelRule{labelRule}, + DeleteRules: []string{"rule1"}, + } + err = suite.client.PatchRegionLabelRules(suite.ctx, patch) + re.NoError(err) + allLabelRules, err := suite.client.GetAllRegionLabelRules(suite.ctx) + re.NoError(err) + re.Len(labelRules, 2) + sort.Slice(allLabelRules, func(i, j int) bool { + return allLabelRules[i].ID < allLabelRules[j].ID + }) + re.Equal(labelRule.ID, allLabelRules[1].ID) + re.Equal(labelRule.Labels, allLabelRules[1].Labels) + re.Equal(labelRule.RuleType, allLabelRules[1].RuleType) + labelRules, err = suite.client.GetRegionLabelRulesByIDs(suite.ctx, []string{"keyspaces/0", "rule2"}) + re.NoError(err) + sort.Slice(labelRules, func(i, j int) bool { + return labelRules[i].ID < labelRules[j].ID + }) + re.Equal(allLabelRules, labelRules) +} + +func (suite *httpClientTestSuite) TestAccelerateSchedule() { + re := suite.Require() + raftCluster := suite.cluster.GetLeaderServer().GetRaftCluster() + suspectRegions := raftCluster.GetSuspectRegions() + re.Len(suspectRegions, 0) + err := suite.client.AccelerateSchedule(suite.ctx, pd.NewKeyRange([]byte("a1"), []byte("a2"))) + re.NoError(err) + suspectRegions = raftCluster.GetSuspectRegions() + re.Len(suspectRegions, 1) + raftCluster.ClearSuspectRegions() + suspectRegions = raftCluster.GetSuspectRegions() + re.Len(suspectRegions, 0) + err = suite.client.AccelerateScheduleInBatch(suite.ctx, []*pd.KeyRange{ + pd.NewKeyRange([]byte("a1"), []byte("a2")), + pd.NewKeyRange([]byte("a2"), []byte("a3")), + }) + re.NoError(err) + suspectRegions = raftCluster.GetSuspectRegions() + re.Len(suspectRegions, 2) +} + +func (suite *httpClientTestSuite) TestScheduleConfig() { + re := suite.Require() + config, err := suite.client.GetScheduleConfig(suite.ctx) + re.NoError(err) + re.Equal(float64(4), config["leader-schedule-limit"]) + re.Equal(float64(2048), config["region-schedule-limit"]) + config["leader-schedule-limit"] = float64(8) + err = suite.client.SetScheduleConfig(suite.ctx, config) + re.NoError(err) + config, err = suite.client.GetScheduleConfig(suite.ctx) + re.NoError(err) + re.Equal(float64(8), config["leader-schedule-limit"]) + re.Equal(float64(2048), config["region-schedule-limit"]) +} + +func (suite *httpClientTestSuite) TestSchedulers() { + re := suite.Require() + schedulers, err := suite.client.GetSchedulers(suite.ctx) + re.NoError(err) + re.Len(schedulers, 0) + + err = suite.client.CreateScheduler(suite.ctx, "evict-leader-scheduler", 1) + re.NoError(err) + schedulers, err = suite.client.GetSchedulers(suite.ctx) + re.NoError(err) + re.Len(schedulers, 1) +} + +func (suite *httpClientTestSuite) TestSetStoreLabels() { + re := suite.Require() + resp, err := suite.client.GetStores(suite.ctx) + re.NoError(err) + setStore := resp.Stores[0] + re.Empty(setStore.Store.Labels, nil) + storeLabels := map[string]string{ + "zone": "zone1", + } + err = suite.client.SetStoreLabels(suite.ctx, 1, storeLabels) + re.NoError(err) + + resp, err = suite.client.GetStores(suite.ctx) + re.NoError(err) + for _, store := range resp.Stores { + if store.Store.ID == setStore.Store.ID { + for _, label := range store.Store.Labels { + re.Equal(label.Value, storeLabels[label.Key]) + } + } + } +} + +func (suite *httpClientTestSuite) TestTransferLeader() { + re := suite.Require() + members, err := suite.client.GetMembers(suite.ctx) + re.NoError(err) + re.Len(members.Members, 2) + + oldLeader, err := suite.client.GetLeader(suite.ctx) + re.NoError(err) + + // Transfer leader to another pd + for _, member := range members.Members { + if member.Name != oldLeader.Name { + err = suite.client.TransferLeader(suite.ctx, member.Name) + re.NoError(err) + break + } + } + + newLeader := suite.cluster.WaitLeader() + re.NotEmpty(newLeader) + re.NoError(err) + re.NotEqual(oldLeader.Name, newLeader) +} diff --git a/tests/integrations/mcs/go.mod b/tests/integrations/mcs/go.mod index 000bfdc8312..75d70e3cf06 100644 --- a/tests/integrations/mcs/go.mod +++ b/tests/integrations/mcs/go.mod @@ -15,7 +15,7 @@ require ( github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 github.com/pingcap/kvproto v0.0.0-20231018065736-c0689aded40c github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 - github.com/stretchr/testify v1.8.2 + github.com/stretchr/testify v1.8.3 github.com/tikv/pd v0.0.0-00010101000000-000000000000 github.com/tikv/pd/client v0.0.0-00010101000000-000000000000 go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793 @@ -50,9 +50,11 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/bitly/go-simplejson v0.5.0 // indirect github.com/breeswish/gin-jwt/v2 v2.6.4-jwt-patch // indirect + github.com/bytedance/sonic v1.9.1 // indirect github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 // indirect github.com/cenkalti/backoff/v4 v4.0.2 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect github.com/cloudfoundry/gosigar v1.3.6 // indirect github.com/coreos/go-semver v0.3.0 // indirect github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f // indirect @@ -61,23 +63,24 @@ require ( github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4 // indirect github.com/elliotchance/pie/v2 v2.1.0 // indirect github.com/fogleman/gg v1.3.0 // indirect + github.com/gabriel-vasile/mimetype v1.4.2 // indirect github.com/gin-contrib/cors v1.4.0 // indirect github.com/gin-contrib/gzip v0.0.1 // indirect github.com/gin-contrib/pprof v1.4.0 // indirect github.com/gin-contrib/sse v0.1.0 // indirect - github.com/gin-gonic/gin v1.8.1 // indirect + github.com/gin-gonic/gin v1.9.1 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-openapi/jsonpointer v0.19.5 // indirect github.com/go-openapi/jsonreference v0.19.6 // indirect github.com/go-openapi/spec v0.20.4 // indirect github.com/go-openapi/swag v0.19.15 // indirect - github.com/go-playground/locales v0.14.0 // indirect - github.com/go-playground/universal-translator v0.18.0 // indirect - github.com/go-playground/validator/v10 v10.10.0 // indirect + github.com/go-playground/locales v0.14.1 // indirect + github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/go-playground/validator/v10 v10.14.0 // indirect github.com/go-resty/resty/v2 v2.6.0 // indirect github.com/go-sql-driver/mysql v1.7.0 // indirect github.com/goccy/go-graphviz v0.0.9 // indirect - github.com/goccy/go-json v0.9.7 // indirect + github.com/goccy/go-json v0.10.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang-jwt/jwt v3.2.1+incompatible // indirect github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect @@ -101,11 +104,12 @@ require ( github.com/joomcode/errorx v1.0.1 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/cpuid/v2 v2.2.4 // indirect github.com/konsorten/go-windows-terminal-sequences v1.0.3 // indirect - github.com/leodido/go-urn v1.2.1 // indirect + github.com/leodido/go-urn v1.2.4 // indirect github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a // indirect github.com/mailru/easyjson v0.7.6 // indirect - github.com/mattn/go-isatty v0.0.14 // indirect + github.com/mattn/go-isatty v0.0.19 // indirect github.com/mattn/go-sqlite3 v1.14.15 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect github.com/minio/sio v0.3.0 // indirect @@ -113,13 +117,13 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/oleiade/reflections v1.0.1 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect - github.com/pelletier/go-toml/v2 v2.0.1 // indirect + github.com/pelletier/go-toml/v2 v2.0.8 // indirect github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36 // indirect github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d // indirect github.com/pingcap/errcode v0.3.0 // indirect github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c // indirect github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 // indirect - github.com/pingcap/tidb-dashboard v0.0.0-20230911054332-22add1e00511 // indirect + github.com/pingcap/tidb-dashboard v0.0.0-20231127105651-ce4097837c5e // indirect github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect @@ -147,7 +151,8 @@ require ( github.com/tklauser/go-sysconf v0.3.11 // indirect github.com/tklauser/numcpus v0.6.0 // indirect github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 // indirect - github.com/ugorji/go/codec v1.2.7 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/ugorji/go/codec v1.2.11 // indirect github.com/unrolled/render v1.0.1 // indirect github.com/urfave/negroni v0.3.0 // indirect github.com/vmihailenco/msgpack/v5 v5.3.5 // indirect @@ -159,6 +164,7 @@ require ( go.uber.org/dig v1.9.0 // indirect go.uber.org/fx v1.12.0 // indirect go.uber.org/multierr v1.11.0 // indirect + golang.org/x/arch v0.3.0 // indirect golang.org/x/crypto v0.14.0 // indirect golang.org/x/exp v0.0.0-20230711005742-c3f37128e5a4 // indirect golang.org/x/image v0.5.0 // indirect diff --git a/tests/integrations/mcs/go.sum b/tests/integrations/mcs/go.sum index 0da75329284..dfead54afe1 100644 --- a/tests/integrations/mcs/go.sum +++ b/tests/integrations/mcs/go.sum @@ -68,6 +68,9 @@ github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4Yn github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= github.com/breeswish/gin-jwt/v2 v2.6.4-jwt-patch h1:KLE/YeX+9FNaGVW5MtImRVPhjDpfpgJhvkuYWBmOYbo= github.com/breeswish/gin-jwt/v2 v2.6.4-jwt-patch/go.mod h1:KjBLriHXe7L6fGceqWzTod8HUB/TP1WWDtfuSYtYXaI= +github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= +github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s= +github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U= github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 h1:BjkPE3785EwPhhyuFkbINB+2a1xATwk8SNDWnJiD41g= github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5/go.mod h1:jtAfVaU/2cu1+wdSRPWE2c1N2qeAA3K4RH9pYgqwets= github.com/cenkalti/backoff/v4 v4.0.2 h1:JIufpQLbh4DkbQoii76ItQIUFzevQSqOLZca4eamEDs= @@ -77,6 +80,9 @@ github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghf github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= @@ -127,6 +133,8 @@ github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzP github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= +github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= +github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gin-contrib/cors v1.4.0 h1:oJ6gwtUl3lqV0WEIwM/LxPF1QZ5qe2lGWdY2+bz7y0g= github.com/gin-contrib/cors v1.4.0/go.mod h1:bs9pNM0x/UsmHPBWT2xZz9ROh8xYjYkiURUfmBoMlcs= @@ -139,8 +147,9 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= github.com/gin-gonic/gin v1.3.0/go.mod h1:7cKuhb5qV2ggCFctp2fJQ+ErvciLZrIeoOSOm6mUr7Y= github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M= -github.com/gin-gonic/gin v1.8.1 h1:4+fr/el88TOO3ewCmQr8cx/CtZ/umlIRIs5M4NTNjf8= github.com/gin-gonic/gin v1.8.1/go.mod h1:ji8BvRH1azfM+SYow9zQ6SZMvR8qOMZHmsCuWR9tTTk= +github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= +github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= @@ -159,17 +168,21 @@ github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7 github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM= github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= -github.com/go-playground/assert/v2 v2.0.1 h1:MsBgLAaY856+nPRTKrp3/OZK38U/wa0CcBYNjji3q3A= github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= +github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= -github.com/go-playground/locales v0.14.0 h1:u50s323jtVGugKlcYeyzC0etD1HifMjqmJqb8WugfUU= github.com/go-playground/locales v0.14.0/go.mod h1:sawfccIbzZTqEDETgFXqTho0QybSa7l++s0DH+LDiLs= +github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= +github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= -github.com/go-playground/universal-translator v0.18.0 h1:82dyy6p4OuJq4/CByFNOn/jYrnRPArHwAcmLoJZxyho= github.com/go-playground/universal-translator v0.18.0/go.mod h1:UvRDBj+xPUEGrFYl+lu/H90nyDXpg0fqeB/AQUGNTVA= +github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= +github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI= -github.com/go-playground/validator/v10 v10.10.0 h1:I7mrTYv78z8k8VXa/qJlOlEXn/nBh+BF8dHX5nt/dr0= github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos= +github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js= +github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= github.com/go-resty/resty/v2 v2.6.0 h1:joIR5PNLM2EFqqESUjCMGXrWmXNHEU9CEiK813oKYS4= github.com/go-resty/resty/v2 v2.6.0/go.mod h1:PwvJS6hvaPkjtjNg9ph+VrSD92bi5Zq73w/BIH7cC3Q= github.com/go-sql-driver/mysql v1.7.0 h1:ueSltNNllEqE3qcWBTD0iQd3IpL/6U+mJxLkazJ7YPc= @@ -177,8 +190,9 @@ github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9 github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/goccy/go-graphviz v0.0.9 h1:s/FMMJ1Joj6La3S5ApO3Jk2cwM4LpXECC2muFx3IPQQ= github.com/goccy/go-graphviz v0.0.9/go.mod h1:wXVsXxmyMQU6TN3zGRttjNn3h+iCAS7xQFC6TlNvLhk= -github.com/goccy/go-json v0.9.7 h1:IcB+Aqpx/iMHu5Yooh7jEzJk1JZ7Pjtmys2ukPr7EeM= github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= +github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/gogo/protobuf v0.0.0-20171007142547-342cbe0a0415/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v0.0.0-20180717141946-636bf0302bc9/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= @@ -315,6 +329,9 @@ github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8 github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= +github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -328,8 +345,9 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= -github.com/leodido/go-urn v1.2.1 h1:BqpAaACuzVSgi/VLzGZIobT2z4v53pjosyNd9Yv6n/w= github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY= +github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= +github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a h1:N9zuLhTvBSRt0gWSiJswwQ2HqDmtX/ZCDJURnKUt1Ik= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a/go.mod h1:JKx41uQRwqlTZabZc+kILPrO/3jlKnQ2Z8b7YiVw5cE= @@ -341,8 +359,9 @@ github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJ github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= -github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= +github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI= github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= @@ -389,8 +408,9 @@ github.com/otiai10/mint v1.3.0/go.mod h1:F5AjcsTsWUqX+Na9fpHb52P8pcRX2CI6A3ctIT9 github.com/otiai10/mint v1.3.3/go.mod h1:/yxELlJQ0ufhjUwhshSj+wFjZ78CnZ48/1wtmBH1OTc= github.com/pascaldekloe/name v0.0.0-20180628100202-0fd16699aae1/go.mod h1:eD5JxqMiuNYyFNmyY9rkJ/slN8y59oEu4Ei7F8OoKWQ= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= -github.com/pelletier/go-toml/v2 v2.0.1 h1:8e3L2cCQzLFi2CR4g7vGFuFxX7Jl1kKX8gW+iV0GUKU= github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo= +github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= +github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36 h1:64bxqeTEN0/xoEqhKGowgihNuzISS9rEG6YUMU4bzJo= github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36/go.mod h1:pxMtw7cyUw6B2bRH0ZBANSPg+AoSud1I1iyJHI69jH4= github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d h1:U+PMnTlV2tu7RuMK5etusZG3Cf+rpow5hqQByeCzJ2g= @@ -414,8 +434,8 @@ github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8 github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 h1:QV6jqlfOkh8hqvEAgwBZa+4bSgO0EeKC7s5c6Luam2I= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21/go.mod h1:QYnjfA95ZaMefyl1NO8oPtKeb8pYUdnDVhQgf+qdpjM= -github.com/pingcap/tidb-dashboard v0.0.0-20230911054332-22add1e00511 h1:oyrCfNlAWmLlUfEr+7YTSBo29SP/J1N8hnxBt5yUABo= -github.com/pingcap/tidb-dashboard v0.0.0-20230911054332-22add1e00511/go.mod h1:EZ90+V5S4TttbYag6oKZ3jcNKRwZe1Mc9vXwOt9JBYw= +github.com/pingcap/tidb-dashboard v0.0.0-20231127105651-ce4097837c5e h1:SJUSDejvKtj9vSh5ptRHh4iMrvPV3oKO8yp6/SYE8vc= +github.com/pingcap/tidb-dashboard v0.0.0-20231127105651-ce4097837c5e/go.mod h1:ucZBRz52icb23T/5Z4CsuUHmarYiin7p2MeiVBe+o8c= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e h1:FBaTXU8C3xgt/drM58VHxojHo/QoG1oPsgWTGvaSpO4= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= @@ -512,8 +532,10 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY= +github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/swaggo/files v0.0.0-20210815190702-a29dd2bc99b2 h1:+iNTcqQJy0OZ5jk6a5NLib47eqXK8uYcPX+O4+cBpEM= github.com/swaggo/files v0.0.0-20210815190702-a29dd2bc99b2/go.mod h1:lKJPbtWzJ9JhsTN1k1gZgleJWY/cqq0psdoMmaThG3w= github.com/swaggo/http-swagger v1.2.6 h1:ihTjChUoSRMpFMjWw+0AkL1Ti4r6v8pCgVYLmQVRlRw= @@ -539,13 +561,16 @@ github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 h1:j6JEOq5QWFker+d7mFQYOhjTZonQ7YkLTHm56dbn+yM= github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= +github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= github.com/ugorji/go/codec v0.0.0-20181022190402-e5e69e061d4f/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= -github.com/ugorji/go/codec v1.2.7 h1:YPXUKf7fYbp/y8xloBqZOw2qaVggbfwMlI8WM3wZUJ0= github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY= +github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= +github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/unrolled/render v1.0.1 h1:VDDnQQVfBMsOsp3VaCJszSO0nkBIVEYoPWeRThk9spY= github.com/unrolled/render v1.0.1/go.mod h1:gN9T0NhL4Bfbwu8ann7Ry/TGHYfosul+J0obPf6NBdM= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= @@ -603,6 +628,9 @@ go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ= go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= +golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= +golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -712,6 +740,7 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -844,6 +873,7 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= moul.io/zapgorm2 v1.1.0 h1:qwAlMBYf+qJkJ7PAzJl4oCe6eS6QGiKAXUPeis0+RBE= moul.io/zapgorm2 v1.1.0/go.mod h1:emRfKjNqSzVj5lcgasBdovIXY1jSOwFz2GQZn1Rddks= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q= sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= diff --git a/tests/integrations/mcs/keyspace/tso_keyspace_group_test.go b/tests/integrations/mcs/keyspace/tso_keyspace_group_test.go index af7b31553b3..7dcce498d56 100644 --- a/tests/integrations/mcs/keyspace/tso_keyspace_group_test.go +++ b/tests/integrations/mcs/keyspace/tso_keyspace_group_test.go @@ -301,7 +301,10 @@ func (suite *keyspaceGroupTestSuite) TestDefaultKeyspaceGroup() { return code == http.StatusOK && kg != nil }, testutil.WithWaitFor(time.Second*1)) suite.Equal(utils.DefaultKeyspaceGroupID, kg.ID) - suite.Len(kg.Members, utils.DefaultKeyspaceGroupReplicaCount) + // the allocNodesToAllKeyspaceGroups loop will run every 100ms. + testutil.Eventually(suite.Require(), func() bool { + return len(kg.Members) == utils.DefaultKeyspaceGroupReplicaCount + }) for _, member := range kg.Members { suite.Contains(nodes, member.Address) } @@ -336,7 +339,7 @@ func (suite *keyspaceGroupTestSuite) tryCreateKeyspaceGroup(request *handlers.Cr } func (suite *keyspaceGroupTestSuite) tryGetKeyspaceGroup(id uint32) (*endpoint.KeyspaceGroup, int) { - httpReq, err := http.NewRequest(http.MethodGet, suite.server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d", id), nil) + httpReq, err := http.NewRequest(http.MethodGet, suite.server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d", id), http.NoBody) suite.NoError(err) resp, err := suite.dialClient.Do(httpReq) suite.NoError(err) diff --git a/tests/integrations/mcs/resourcemanager/resource_manager_test.go b/tests/integrations/mcs/resourcemanager/resource_manager_test.go index ed6a3ee501c..36eb87a83db 100644 --- a/tests/integrations/mcs/resourcemanager/resource_manager_test.go +++ b/tests/integrations/mcs/resourcemanager/resource_manager_test.go @@ -4,7 +4,7 @@ // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, @@ -63,6 +63,7 @@ func (suite *resourceManagerClientTestSuite) SetupSuite() { re := suite.Require() re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/mcs/resourcemanager/server/enableDegradedMode", `return(true)`)) + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/member/skipCampaignLeaderCheck", "return(true)")) suite.ctx, suite.clean = context.WithCancel(context.Background()) @@ -148,6 +149,7 @@ func (suite *resourceManagerClientTestSuite) TearDownSuite() { suite.cluster.Destroy() suite.clean() re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/mcs/resourcemanager/server/enableDegradedMode")) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/member/skipCampaignLeaderCheck")) } func (suite *resourceManagerClientTestSuite) TearDownTest() { @@ -438,9 +440,9 @@ func (suite *resourceManagerClientTestSuite) TestResourceGroupController() { rres := cas.tcs[i].makeReadResponse() wres := cas.tcs[i].makeWriteResponse() startTime := time.Now() - _, _, err := controller.OnRequestWait(suite.ctx, cas.resourceGroupName, rreq) + _, _, _, _, err := controller.OnRequestWait(suite.ctx, cas.resourceGroupName, rreq) re.NoError(err) - _, _, err = controller.OnRequestWait(suite.ctx, cas.resourceGroupName, wreq) + _, _, _, _, err = controller.OnRequestWait(suite.ctx, cas.resourceGroupName, wreq) re.NoError(err) sum += time.Since(startTime) controller.OnResponse(cas.resourceGroupName, rreq, rres) @@ -457,7 +459,7 @@ func (suite *resourceManagerClientTestSuite) TestResourceGroupController() { re.NoError(failpoint.Enable("github.com/tikv/pd/client/resource_group/controller/triggerUpdate", "return(true)")) tcs := tokenConsumptionPerSecond{rruTokensAtATime: 1, wruTokensAtATime: 900000000, times: 1, waitDuration: 0} wreq := tcs.makeWriteRequest() - _, _, err = controller.OnRequestWait(suite.ctx, rg.Name, wreq) + _, _, _, _, err = controller.OnRequestWait(suite.ctx, rg.Name, wreq) re.Error(err) time.Sleep(time.Millisecond * 200) re.NoError(failpoint.Disable("github.com/tikv/pd/client/resource_group/controller/triggerUpdate")) @@ -512,9 +514,9 @@ func (suite *resourceManagerClientTestSuite) TestSwitchBurst() { wreq := tcs.makeWriteRequest() rres := tcs.makeReadResponse() wres := tcs.makeWriteResponse() - _, _, err := controller.OnRequestWait(suite.ctx, resourceGroupName, rreq) + _, _, _, _, err := controller.OnRequestWait(suite.ctx, resourceGroupName, rreq) re.NoError(err) - _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName, wreq) + _, _, _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName, wreq) re.NoError(err) controller.OnResponse(resourceGroupName, rreq, rres) controller.OnResponse(resourceGroupName, wreq, wres) @@ -551,9 +553,9 @@ func (suite *resourceManagerClientTestSuite) TestSwitchBurst() { rres := cas.tcs[i].makeReadResponse() wres := cas.tcs[i].makeWriteResponse() startTime := time.Now() - _, _, err := controller.OnRequestWait(suite.ctx, resourceGroupName, rreq) + _, _, _, _, err := controller.OnRequestWait(suite.ctx, resourceGroupName, rreq) re.NoError(err) - _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName, wreq) + _, _, _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName, wreq) re.NoError(err) sum += time.Since(startTime) controller.OnResponse(resourceGroupName, rreq, rres) @@ -571,14 +573,14 @@ func (suite *resourceManagerClientTestSuite) TestSwitchBurst() { resourceGroupName2 := suite.initGroups[2].Name tcs = tokenConsumptionPerSecond{rruTokensAtATime: 1, wruTokensAtATime: 100000, times: 1, waitDuration: 0} wreq := tcs.makeWriteRequest() - _, _, err := controller.OnRequestWait(suite.ctx, resourceGroupName2, wreq) + _, _, _, _, err := controller.OnRequestWait(suite.ctx, resourceGroupName2, wreq) re.NoError(err) re.NoError(failpoint.Enable("github.com/tikv/pd/client/resource_group/controller/acceleratedSpeedTrend", "return(true)")) resourceGroupName3 := suite.initGroups[3].Name tcs = tokenConsumptionPerSecond{rruTokensAtATime: 1, wruTokensAtATime: 1000, times: 1, waitDuration: 0} wreq = tcs.makeWriteRequest() - _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName3, wreq) + _, _, _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName3, wreq) re.NoError(err) time.Sleep(110 * time.Millisecond) tcs = tokenConsumptionPerSecond{rruTokensAtATime: 1, wruTokensAtATime: 10, times: 1010, waitDuration: 0} @@ -586,7 +588,7 @@ func (suite *resourceManagerClientTestSuite) TestSwitchBurst() { for i := 0; i < tcs.times; i++ { wreq = tcs.makeWriteRequest() startTime := time.Now() - _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName3, wreq) + _, _, _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName3, wreq) duration += time.Since(startTime) re.NoError(err) } @@ -635,7 +637,7 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { // init req := controller.NewTestRequestInfo(false, 0, 2 /* store2 */) resp := controller.NewTestResponseInfo(0, time.Duration(30), true) - _, penalty, err := c.OnRequestWait(suite.ctx, resourceGroupName, req) + _, penalty, _, _, err := c.OnRequestWait(suite.ctx, resourceGroupName, req) re.NoError(err) re.Equal(penalty.WriteBytes, 0.0) re.Equal(penalty.TotalCpuTimeMs, 0.0) @@ -644,7 +646,7 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { req = controller.NewTestRequestInfo(true, 60, 1 /* store1 */) resp = controller.NewTestResponseInfo(0, time.Duration(10), true) - _, penalty, err = c.OnRequestWait(suite.ctx, resourceGroupName, req) + _, penalty, _, _, err = c.OnRequestWait(suite.ctx, resourceGroupName, req) re.NoError(err) re.Equal(penalty.WriteBytes, 0.0) re.Equal(penalty.TotalCpuTimeMs, 0.0) @@ -654,7 +656,7 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { // failed request, shouldn't be counted in penalty req = controller.NewTestRequestInfo(true, 20, 1 /* store1 */) resp = controller.NewTestResponseInfo(0, time.Duration(0), false) - _, penalty, err = c.OnRequestWait(suite.ctx, resourceGroupName, req) + _, penalty, _, _, err = c.OnRequestWait(suite.ctx, resourceGroupName, req) re.NoError(err) re.Equal(penalty.WriteBytes, 0.0) re.Equal(penalty.TotalCpuTimeMs, 0.0) @@ -664,7 +666,7 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { // from same store, should be zero req1 := controller.NewTestRequestInfo(false, 0, 1 /* store1 */) resp1 := controller.NewTestResponseInfo(0, time.Duration(10), true) - _, penalty, err = c.OnRequestWait(suite.ctx, resourceGroupName, req1) + _, penalty, _, _, err = c.OnRequestWait(suite.ctx, resourceGroupName, req1) re.NoError(err) re.Equal(penalty.WriteBytes, 0.0) _, err = c.OnResponse(resourceGroupName, req1, resp1) @@ -673,7 +675,7 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { // from different store, should be non-zero req2 := controller.NewTestRequestInfo(true, 50, 2 /* store2 */) resp2 := controller.NewTestResponseInfo(0, time.Duration(10), true) - _, penalty, err = c.OnRequestWait(suite.ctx, resourceGroupName, req2) + _, penalty, _, _, err = c.OnRequestWait(suite.ctx, resourceGroupName, req2) re.NoError(err) re.Equal(penalty.WriteBytes, 60.0) re.InEpsilon(penalty.TotalCpuTimeMs, 10.0/1000.0/1000.0, 1e-6) @@ -683,7 +685,7 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { // from new store, should be zero req3 := controller.NewTestRequestInfo(true, 0, 3 /* store3 */) resp3 := controller.NewTestResponseInfo(0, time.Duration(10), true) - _, penalty, err = c.OnRequestWait(suite.ctx, resourceGroupName, req3) + _, penalty, _, _, err = c.OnRequestWait(suite.ctx, resourceGroupName, req3) re.NoError(err) re.Equal(penalty.WriteBytes, 0.0) _, err = c.OnResponse(resourceGroupName, req3, resp3) @@ -693,7 +695,7 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { resourceGroupName = groupNames[1] req4 := controller.NewTestRequestInfo(true, 50, 1 /* store2 */) resp4 := controller.NewTestResponseInfo(0, time.Duration(10), true) - _, penalty, err = c.OnRequestWait(suite.ctx, resourceGroupName, req4) + _, penalty, _, _, err = c.OnRequestWait(suite.ctx, resourceGroupName, req4) re.NoError(err) re.Equal(penalty.WriteBytes, 0.0) _, err = c.OnResponse(resourceGroupName, req4, resp4) @@ -1004,7 +1006,7 @@ func (suite *resourceManagerClientTestSuite) TestBasicResourceGroupCURD() { // Delete all resource groups for _, g := range groups { - req, err := http.NewRequest(http.MethodDelete, getAddr(i+1)+"/resource-manager/api/v1/config/group/"+g.Name, nil) + req, err := http.NewRequest(http.MethodDelete, getAddr(i+1)+"/resource-manager/api/v1/config/group/"+g.Name, http.NoBody) re.NoError(err) resp, err := http.DefaultClient.Do(req) re.NoError(err) diff --git a/tests/integrations/mcs/scheduling/api_test.go b/tests/integrations/mcs/scheduling/api_test.go index 5284913813c..8f5d37ee1bb 100644 --- a/tests/integrations/mcs/scheduling/api_test.go +++ b/tests/integrations/mcs/scheduling/api_test.go @@ -1,7 +1,7 @@ package scheduling_test import ( - "context" + "encoding/hex" "encoding/json" "fmt" "net/http" @@ -10,12 +10,16 @@ import ( "github.com/pingcap/failpoint" "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/core" _ "github.com/tikv/pd/pkg/mcs/scheduling/server/apis/v1" + "github.com/tikv/pd/pkg/mcs/scheduling/server/config" + "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/schedule/handler" + "github.com/tikv/pd/pkg/schedule/labeler" + "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/storage" "github.com/tikv/pd/pkg/utils/apiutil" - "github.com/tikv/pd/pkg/utils/tempurl" "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/tests" ) @@ -28,45 +32,27 @@ var testDialClient = &http.Client{ type apiTestSuite struct { suite.Suite - ctx context.Context - cleanupFunc testutil.CleanupFunc - cluster *tests.TestCluster - server *tests.TestServer - backendEndpoints string - dialClient *http.Client + env *tests.SchedulingTestEnvironment } func TestAPI(t *testing.T) { - suite.Run(t, &apiTestSuite{}) + suite.Run(t, new(apiTestSuite)) } func (suite *apiTestSuite) SetupSuite() { - ctx, cancel := context.WithCancel(context.Background()) - suite.ctx = ctx - cluster, err := tests.NewTestAPICluster(suite.ctx, 1) - suite.cluster = cluster - suite.NoError(err) - suite.NoError(cluster.RunInitialServers()) - suite.NotEmpty(cluster.WaitLeader()) - suite.server = cluster.GetLeaderServer() - suite.NoError(suite.server.BootstrapCluster()) - suite.backendEndpoints = suite.server.GetAddr() - suite.dialClient = &http.Client{ - Transport: &http.Transport{ - DisableKeepAlives: true, - }, - } - suite.cleanupFunc = func() { - cancel() - } + suite.env = tests.NewSchedulingTestEnvironment(suite.T()) } func (suite *apiTestSuite) TearDownSuite() { - suite.cluster.Destroy() - suite.cleanupFunc() + suite.env.Cleanup() } func (suite *apiTestSuite) TestGetCheckerByName() { + suite.env.RunTestInAPIMode(suite.checkGetCheckerByName) +} + +func (suite *apiTestSuite) checkGetCheckerByName(cluster *tests.TestCluster) { + re := suite.Require() testCases := []struct { name string }{ @@ -78,14 +64,8 @@ func (suite *apiTestSuite) TestGetCheckerByName() { {name: "joint-state"}, } - re := suite.Require() - s, cleanup := tests.StartSingleSchedulingTestServer(suite.ctx, re, suite.backendEndpoints, tempurl.Alloc()) - defer cleanup() - testutil.Eventually(re, func() bool { - return s.IsServing() - }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) - addr := s.GetAddr() - urlPrefix := fmt.Sprintf("%s/scheduling/api/v1/checkers", addr) + s := cluster.GetSchedulingPrimaryServer() + urlPrefix := fmt.Sprintf("%s/scheduling/api/v1/checkers", s.GetAddr()) co := s.GetCoordinator() for _, testCase := range testCases { @@ -114,23 +94,26 @@ func (suite *apiTestSuite) TestGetCheckerByName() { } func (suite *apiTestSuite) TestAPIForward() { + suite.env.RunTestInAPIMode(suite.checkAPIForward) +} + +func (suite *apiTestSuite) checkAPIForward(cluster *tests.TestCluster) { re := suite.Require() re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/utils/apiutil/serverapi/checkHeader", "return(true)")) defer func() { re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/utils/apiutil/serverapi/checkHeader")) }() - tc, err := tests.NewTestSchedulingCluster(suite.ctx, 2, suite.backendEndpoints) - re.NoError(err) - defer tc.Destroy() - tc.WaitForPrimaryServing(re) - - urlPrefix := fmt.Sprintf("%s/pd/api/v1", suite.backendEndpoints) + leader := cluster.GetLeaderServer().GetServer() + urlPrefix := fmt.Sprintf("%s/pd/api/v1", leader.GetAddr()) var slice []string var resp map[string]interface{} + testutil.Eventually(re, func() bool { + return leader.GetRaftCluster().IsServiceIndependent(utils.SchedulingServiceName) + }) - // Test opeartor - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "operators"), &slice, + // Test operators + err := testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "operators"), &slice, testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) re.NoError(err) re.Len(slice, 0) @@ -157,19 +140,27 @@ func (suite *apiTestSuite) TestAPIForward() { re.NoError(err) suite.False(resp["paused"].(bool)) - input := make(map[string]interface{}) - input["delay"] = 10 - pauseArgs, err := json.Marshal(input) - suite.NoError(err) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "checker/merge"), pauseArgs, - testutil.StatusOK(re), testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) - suite.NoError(err) + // Test pause + postChecker := func(delay int) { + input := make(map[string]interface{}) + input["delay"] = delay + pauseArgs, err := json.Marshal(input) + suite.NoError(err) + err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "checker/merge"), pauseArgs, + testutil.StatusOK(re), testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + suite.NoError(err) + } + postChecker(30) + postChecker(0) // Test scheduler: // Need to redirect: // "/schedulers", http.MethodGet // "/schedulers/{name}", http.MethodPost // "/schedulers/diagnostic/{name}", http.MethodGet + // "/scheduler-config/", http.MethodGet + // "/scheduler-config/{name}/list", http.MethodGet + // "/scheduler-config/{name}/roles", http.MethodGet // Should not redirect: // "/schedulers", http.MethodPost // "/schedulers/{name}", http.MethodDelete @@ -178,18 +169,41 @@ func (suite *apiTestSuite) TestAPIForward() { re.NoError(err) re.Contains(slice, "balance-leader-scheduler") - input["delay"] = 30 - pauseArgs, err = json.Marshal(input) - suite.NoError(err) - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers/balance-leader-scheduler"), pauseArgs, + postScheduler := func(delay int) { + input := make(map[string]interface{}) + input["delay"] = delay + pauseArgs, err := json.Marshal(input) + suite.NoError(err) + err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers/balance-leader-scheduler"), pauseArgs, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + suite.NoError(err) + } + postScheduler(30) + postScheduler(0) + + err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers/diagnostic/balance-leader-scheduler"), &resp, testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) suite.NoError(err) - err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers/diagnostic/balance-leader-scheduler"), &resp, + err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "scheduler-config"), &resp, testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) suite.NoError(err) + re.Contains(resp, "balance-leader-scheduler") + re.Contains(resp, "balance-witness-scheduler") + re.Contains(resp, "balance-hot-region-scheduler") - err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers"), pauseArgs, + schedulers := []string{ + "balance-leader-scheduler", + "balance-witness-scheduler", + "balance-hot-region-scheduler", + } + for _, schedulerName := range schedulers { + err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s/%s/%s", urlPrefix, "scheduler-config", schedulerName, "list"), &resp, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + suite.NoError(err) + } + + err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers"), nil, testutil.WithoutHeader(re, apiutil.ForwardToMicroServiceHeader)) re.NoError(err) @@ -197,6 +211,14 @@ func (suite *apiTestSuite) TestAPIForward() { testutil.WithoutHeader(re, apiutil.ForwardToMicroServiceHeader)) re.NoError(err) + input := make(map[string]interface{}) + input["name"] = "balance-leader-scheduler" + b, err := json.Marshal(input) + re.NoError(err) + err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers"), b, + testutil.WithoutHeader(re, apiutil.ForwardToMicroServiceHeader)) + re.NoError(err) + // Test hotspot var hotRegions statistics.StoreHotPeersInfos err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "hotspot/regions/write"), &hotRegions, @@ -217,4 +239,263 @@ func (suite *apiTestSuite) TestAPIForward() { err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "hotspot/regions/history"), &history, testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) re.NoError(err) + + // Test region label + var labelRules []*labeler.LabelRule + err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/region-label/rules"), &labelRules, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + err = testutil.ReadGetJSONWithBody(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/region-label/rules/ids"), []byte(`["rule1", "rule3"]`), + &labelRules, testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/region-label/rule/rule1"), nil, + testutil.StatusNotOK(re), testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + + err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "region/id/1"), nil, + testutil.WithoutHeader(re, apiutil.ForwardToMicroServiceHeader)) + re.NoError(err) + err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "region/id/1/label/key"), nil, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "region/id/1/labels"), nil, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + + // Test Region + body := fmt.Sprintf(`{"start_key":"%s", "end_key": "%s"}`, hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a3"))) + err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "regions/accelerate-schedule"), []byte(body), + testutil.StatusOK(re), testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + body = fmt.Sprintf(`[{"start_key":"%s", "end_key": "%s"}, {"start_key":"%s", "end_key": "%s"}]`, hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a3")), hex.EncodeToString([]byte("a4")), hex.EncodeToString([]byte("a6"))) + err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "regions/accelerate-schedule/batch"), []byte(body), + testutil.StatusOK(re), testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + body = fmt.Sprintf(`{"start_key":"%s", "end_key": "%s"}`, hex.EncodeToString([]byte("b1")), hex.EncodeToString([]byte("b3"))) + err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "regions/scatter"), []byte(body), + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + suite.NoError(err) + body = fmt.Sprintf(`{"retry_limit":%v, "split_keys": ["%s","%s","%s"]}`, 3, + hex.EncodeToString([]byte("bbb")), + hex.EncodeToString([]byte("ccc")), + hex.EncodeToString([]byte("ddd"))) + err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "regions/split"), []byte(body), + testutil.StatusOK(re), testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + suite.NoError(err) + err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf(`%s/regions/replicated?startKey=%s&endKey=%s`, urlPrefix, hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a2"))), nil, + testutil.StatusOK(re), testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + suite.NoError(err) + // Test rules: only forward `GET` request + var rules []*placement.Rule + tests.MustPutRegion(re, cluster, 2, 1, []byte("a"), []byte("b"), core.SetApproximateSize(60)) + rules = []*placement.Rule{ + { + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, + Role: placement.Voter, + Count: 3, + LocationLabels: []string{}, + }, + } + rulesArgs, err := json.Marshal(rules) + suite.NoError(err) + + err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "/config/rules"), &rules, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules"), rulesArgs, + testutil.WithoutHeader(re, apiutil.ForwardToMicroServiceHeader)) + re.NoError(err) + err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules/batch"), rulesArgs, + testutil.WithoutHeader(re, apiutil.ForwardToMicroServiceHeader)) + re.NoError(err) + err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules/group/pd"), &rules, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules/region/2"), &rules, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + var fit placement.RegionFit + err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules/region/2/detail"), &fit, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rules/key/0000000000000001"), &rules, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule/pd/2"), nil, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + err = testutil.CheckDelete(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule/pd/2"), + testutil.WithoutHeader(re, apiutil.ForwardToMicroServiceHeader)) + re.NoError(err) + err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule"), rulesArgs, + testutil.WithoutHeader(re, apiutil.ForwardToMicroServiceHeader)) + re.NoError(err) + err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule_group/pd"), nil, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + err = testutil.CheckDelete(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule_group/pd"), + testutil.WithoutHeader(re, apiutil.ForwardToMicroServiceHeader)) + re.NoError(err) + err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule_group"), rulesArgs, + testutil.WithoutHeader(re, apiutil.ForwardToMicroServiceHeader)) + re.NoError(err) + err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/rule_groups"), nil, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule"), nil, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule"), rulesArgs, + testutil.WithoutHeader(re, apiutil.ForwardToMicroServiceHeader)) + re.NoError(err) + err = testutil.CheckGetJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule/pd"), nil, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + err = testutil.CheckDelete(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule/pd"), + testutil.WithoutHeader(re, apiutil.ForwardToMicroServiceHeader)) + re.NoError(err) + err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "config/placement-rule/pd"), rulesArgs, + testutil.WithoutHeader(re, apiutil.ForwardToMicroServiceHeader)) + re.NoError(err) +} + +func (suite *apiTestSuite) TestConfig() { + suite.env.RunTestInAPIMode(suite.checkConfig) +} + +func (suite *apiTestSuite) checkConfig(cluster *tests.TestCluster) { + re := suite.Require() + s := cluster.GetSchedulingPrimaryServer() + testutil.Eventually(re, func() bool { + return s.IsServing() + }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) + addr := s.GetAddr() + urlPrefix := fmt.Sprintf("%s/scheduling/api/v1/config", addr) + + var cfg config.Config + testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) + suite.Equal(cfg.GetListenAddr(), s.GetConfig().GetListenAddr()) + suite.Equal(cfg.Schedule.LeaderScheduleLimit, s.GetConfig().Schedule.LeaderScheduleLimit) + suite.Equal(cfg.Schedule.EnableCrossTableMerge, s.GetConfig().Schedule.EnableCrossTableMerge) + suite.Equal(cfg.Replication.MaxReplicas, s.GetConfig().Replication.MaxReplicas) + suite.Equal(cfg.Replication.LocationLabels, s.GetConfig().Replication.LocationLabels) + suite.Equal(cfg.DataDir, s.GetConfig().DataDir) + testutil.Eventually(re, func() bool { + // wait for all schedulers to be loaded in scheduling server. + return len(cfg.Schedule.SchedulersPayload) == 5 + }) + suite.Contains(cfg.Schedule.SchedulersPayload, "balance-leader-scheduler") + suite.Contains(cfg.Schedule.SchedulersPayload, "balance-region-scheduler") + suite.Contains(cfg.Schedule.SchedulersPayload, "balance-hot-region-scheduler") + suite.Contains(cfg.Schedule.SchedulersPayload, "balance-witness-scheduler") + suite.Contains(cfg.Schedule.SchedulersPayload, "transfer-witness-leader-scheduler") +} + +func (suite *apiTestSuite) TestConfigForward() { + suite.env.RunTestInAPIMode(suite.checkConfigForward) +} + +func (suite *apiTestSuite) checkConfigForward(cluster *tests.TestCluster) { + re := suite.Require() + sche := cluster.GetSchedulingPrimaryServer() + opts := sche.GetPersistConfig() + var cfg map[string]interface{} + addr := cluster.GetLeaderServer().GetAddr() + urlPrefix := fmt.Sprintf("%s/pd/api/v1/config", addr) + + // Test config forward + // Expect to get same config in scheduling server and api server + testutil.Eventually(re, func() bool { + testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) + re.Equal(cfg["schedule"].(map[string]interface{})["leader-schedule-limit"], + float64(opts.GetLeaderScheduleLimit())) + re.Equal(cfg["replication"].(map[string]interface{})["max-replicas"], + float64(opts.GetReplicationConfig().MaxReplicas)) + schedulers := cfg["schedule"].(map[string]interface{})["schedulers-payload"].(map[string]interface{}) + return len(schedulers) == 5 + }) + + // Test to change config in api server + // Expect to get new config in scheduling server and api server + reqData, err := json.Marshal(map[string]interface{}{ + "max-replicas": 4, + }) + re.NoError(err) + err = testutil.CheckPostJSON(testDialClient, urlPrefix, reqData, testutil.StatusOK(re)) + re.NoError(err) + testutil.Eventually(re, func() bool { + testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) + return cfg["replication"].(map[string]interface{})["max-replicas"] == 4. && + opts.GetReplicationConfig().MaxReplicas == 4. + }) + + // Test to change config only in scheduling server + // Expect to get new config in scheduling server but not old config in api server + opts.GetScheduleConfig().LeaderScheduleLimit = 100 + re.Equal(100, int(opts.GetLeaderScheduleLimit())) + testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) + re.Equal(100., cfg["schedule"].(map[string]interface{})["leader-schedule-limit"]) + opts.GetReplicationConfig().MaxReplicas = 5 + re.Equal(5, int(opts.GetReplicationConfig().MaxReplicas)) + testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) + re.Equal(5., cfg["replication"].(map[string]interface{})["max-replicas"]) +} + +func (suite *apiTestSuite) TestAdminRegionCache() { + suite.env.RunTestInAPIMode(suite.checkAdminRegionCache) +} + +func (suite *apiTestSuite) checkAdminRegionCache(cluster *tests.TestCluster) { + re := suite.Require() + r1 := core.NewTestRegionInfo(10, 1, []byte(""), []byte("b"), core.SetRegionConfVer(100), core.SetRegionVersion(100)) + tests.MustPutRegionInfo(re, cluster, r1) + r2 := core.NewTestRegionInfo(20, 1, []byte("b"), []byte("c"), core.SetRegionConfVer(100), core.SetRegionVersion(100)) + tests.MustPutRegionInfo(re, cluster, r2) + r3 := core.NewTestRegionInfo(30, 1, []byte("c"), []byte(""), core.SetRegionConfVer(100), core.SetRegionVersion(100)) + tests.MustPutRegionInfo(re, cluster, r3) + + schedulingServer := cluster.GetSchedulingPrimaryServer() + re.Equal(3, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) + + addr := schedulingServer.GetAddr() + urlPrefix := fmt.Sprintf("%s/scheduling/api/v1/admin/cache/regions", addr) + err := testutil.CheckDelete(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "30"), testutil.StatusOK(re)) + re.NoError(err) + re.Equal(2, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) + + err = testutil.CheckDelete(testDialClient, urlPrefix, testutil.StatusOK(re)) + re.NoError(err) + re.Equal(0, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) +} + +func (suite *apiTestSuite) TestAdminRegionCacheForward() { + suite.env.RunTestInAPIMode(suite.checkAdminRegionCacheForward) +} + +func (suite *apiTestSuite) checkAdminRegionCacheForward(cluster *tests.TestCluster) { + re := suite.Require() + r1 := core.NewTestRegionInfo(10, 1, []byte(""), []byte("b"), core.SetRegionConfVer(100), core.SetRegionVersion(100)) + tests.MustPutRegionInfo(re, cluster, r1) + r2 := core.NewTestRegionInfo(20, 1, []byte("b"), []byte("c"), core.SetRegionConfVer(100), core.SetRegionVersion(100)) + tests.MustPutRegionInfo(re, cluster, r2) + r3 := core.NewTestRegionInfo(30, 1, []byte("c"), []byte(""), core.SetRegionConfVer(100), core.SetRegionVersion(100)) + tests.MustPutRegionInfo(re, cluster, r3) + + apiServer := cluster.GetLeaderServer().GetServer() + schedulingServer := cluster.GetSchedulingPrimaryServer() + re.Equal(3, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) + re.Equal(3, apiServer.GetRaftCluster().GetRegionCount([]byte{}, []byte{}).Count) + + addr := cluster.GetLeaderServer().GetAddr() + urlPrefix := fmt.Sprintf("%s/pd/api/v1/admin/cache/region", addr) + err := testutil.CheckDelete(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "30"), testutil.StatusOK(re)) + re.NoError(err) + re.Equal(2, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) + re.Equal(2, apiServer.GetRaftCluster().GetRegionCount([]byte{}, []byte{}).Count) + + err = testutil.CheckDelete(testDialClient, urlPrefix+"s", testutil.StatusOK(re)) + re.NoError(err) + re.Equal(0, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) + re.Equal(0, apiServer.GetRaftCluster().GetRegionCount([]byte{}, []byte{}).Count) } diff --git a/tests/integrations/mcs/scheduling/config_test.go b/tests/integrations/mcs/scheduling/config_test.go index 8b8e284f765..69d77bb24ac 100644 --- a/tests/integrations/mcs/scheduling/config_test.go +++ b/tests/integrations/mcs/scheduling/config_test.go @@ -23,6 +23,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/cache" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/mcs/scheduling/server/config" sc "github.com/tikv/pd/pkg/schedule/config" @@ -84,7 +85,7 @@ func (suite *configTestSuite) TestConfigWatch() { suite.ctx, suite.pdLeaderServer.GetEtcdClient(), suite.cluster.GetCluster().GetId(), - config.NewPersistConfig(config.NewConfig()), + config.NewPersistConfig(config.NewConfig(), cache.NewStringTTL(suite.ctx, sc.DefaultGCInterval, sc.DefaultTTL)), endpoint.NewStorageEndpoint(kv.NewMemoryKV(), nil), ) re.NoError(err) @@ -93,6 +94,9 @@ func (suite *configTestSuite) TestConfigWatch() { re.Equal(sc.DefaultSplitMergeInterval, watcher.GetScheduleConfig().SplitMergeInterval.Duration) re.Equal("0.0.0", watcher.GetClusterVersion().String()) // Update the config and check if the scheduling config watcher can get the latest value. + testutil.Eventually(re, func() bool { + return watcher.GetReplicationConfig().MaxReplicas == 3 + }) persistOpts := suite.pdLeaderServer.GetPersistOptions() persistOpts.SetMaxReplicas(5) persistConfig(re, suite.pdLeaderServer) @@ -133,7 +137,6 @@ func persistConfig(re *require.Assertions, pdLeaderServer *tests.TestServer) { func (suite *configTestSuite) TestSchedulerConfigWatch() { re := suite.Require() - // Make sure the config is persisted before the watcher is created. persistConfig(re, suite.pdLeaderServer) // Create a config watcher. @@ -142,13 +145,14 @@ func (suite *configTestSuite) TestSchedulerConfigWatch() { suite.ctx, suite.pdLeaderServer.GetEtcdClient(), suite.cluster.GetCluster().GetId(), - config.NewPersistConfig(config.NewConfig()), + config.NewPersistConfig(config.NewConfig(), cache.NewStringTTL(suite.ctx, sc.DefaultGCInterval, sc.DefaultTTL)), storage, ) re.NoError(err) // Get all default scheduler names. - var namesFromAPIServer, _, _ = suite.pdLeaderServer.GetRaftCluster().GetStorage().LoadAllSchedulerConfigs() + var namesFromAPIServer []string testutil.Eventually(re, func() bool { + namesFromAPIServer, _, _ = suite.pdLeaderServer.GetRaftCluster().GetStorage().LoadAllSchedulerConfigs() return len(namesFromAPIServer) == len(sc.DefaultSchedulers) }) // Check all default schedulers' configs. diff --git a/tests/integrations/mcs/scheduling/meta_test.go b/tests/integrations/mcs/scheduling/meta_test.go index 74497e0b552..ce0dc620aef 100644 --- a/tests/integrations/mcs/scheduling/meta_test.go +++ b/tests/integrations/mcs/scheduling/meta_test.go @@ -99,4 +99,15 @@ func (suite *metaTestSuite) TestStoreWatch() { testutil.Eventually(re, func() bool { return cluster.GetStore(2) == nil }) + + // test synchronized store labels + suite.pdLeaderServer.GetServer().GetRaftCluster().PutStore( + &metapb.Store{Id: 5, Address: "mock-5", State: metapb.StoreState_Up, NodeState: metapb.NodeState_Serving, LastHeartbeat: time.Now().UnixNano(), Labels: []*metapb.StoreLabel{{Key: "zone", Value: "z1"}}}, + ) + testutil.Eventually(re, func() bool { + if len(cluster.GetStore(5).GetLabels()) == 0 { + return false + } + return cluster.GetStore(5).GetLabels()[0].GetValue() == "z1" + }) } diff --git a/tests/integrations/mcs/scheduling/rule_test.go b/tests/integrations/mcs/scheduling/rule_test.go index bffa58d0fe6..761e9b1ecbc 100644 --- a/tests/integrations/mcs/scheduling/rule_test.go +++ b/tests/integrations/mcs/scheduling/rule_test.go @@ -76,8 +76,8 @@ func (suite *ruleTestSuite) TestRuleWatch() { // Check the default rule and rule group. rules := ruleManager.GetAllRules() re.Len(rules, 1) - re.Equal("pd", rules[0].GroupID) - re.Equal("default", rules[0].ID) + re.Equal(placement.DefaultGroupID, rules[0].GroupID) + re.Equal(placement.DefaultRuleID, rules[0].ID) re.Equal(0, rules[0].Index) re.Empty(rules[0].StartKey) re.Empty(rules[0].EndKey) @@ -85,7 +85,7 @@ func (suite *ruleTestSuite) TestRuleWatch() { re.Empty(rules[0].LocationLabels) ruleGroups := ruleManager.GetRuleGroups() re.Len(ruleGroups, 1) - re.Equal("pd", ruleGroups[0].ID) + re.Equal(placement.DefaultGroupID, ruleGroups[0].ID) re.Equal(0, ruleGroups[0].Index) re.False(ruleGroups[0].Override) // Set a new rule via the PD API server. @@ -93,7 +93,7 @@ func (suite *ruleTestSuite) TestRuleWatch() { rule := &placement.Rule{ GroupID: "2", ID: "3", - Role: "voter", + Role: placement.Voter, Count: 1, StartKeyHex: "22", EndKeyHex: "dd", @@ -122,7 +122,7 @@ func (suite *ruleTestSuite) TestRuleWatch() { return len(rules) == 1 }) re.Len(rules, 1) - re.Equal("pd", rules[0].GroupID) + re.Equal(placement.DefaultGroupID, rules[0].GroupID) // Create a new rule group. ruleGroup := &placement.RuleGroup{ ID: "2", diff --git a/tests/integrations/mcs/scheduling/server_test.go b/tests/integrations/mcs/scheduling/server_test.go index 85cf84361b4..c65352114df 100644 --- a/tests/integrations/mcs/scheduling/server_test.go +++ b/tests/integrations/mcs/scheduling/server_test.go @@ -59,9 +59,9 @@ func TestServerTestSuite(t *testing.T) { func (suite *serverTestSuite) SetupSuite() { var err error re := suite.Require() - + re.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs", `return(true)`)) suite.ctx, suite.cancel = context.WithCancel(context.Background()) - suite.cluster, err = tests.NewTestAPICluster(suite.ctx, 3) + suite.cluster, err = tests.NewTestAPICluster(suite.ctx, 1) re.NoError(err) err = suite.cluster.RunInitialServers() @@ -76,6 +76,7 @@ func (suite *serverTestSuite) SetupSuite() { func (suite *serverTestSuite) TearDownSuite() { suite.cluster.Destroy() suite.cancel() + suite.NoError(failpoint.Disable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs")) } func (suite *serverTestSuite) TestAllocID() { @@ -95,6 +96,10 @@ func (suite *serverTestSuite) TestAllocID() { func (suite *serverTestSuite) TestAllocIDAfterLeaderChange() { re := suite.Require() re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/mcs/scheduling/server/fastUpdateMember", `return(true)`)) + pd2, err := suite.cluster.Join(suite.ctx) + re.NoError(err) + err = pd2.Run() + re.NoError(err) tc, err := tests.NewTestSchedulingCluster(suite.ctx, 1, suite.backendEndpoints) re.NoError(err) defer tc.Destroy() @@ -116,6 +121,8 @@ func (suite *serverTestSuite) TestAllocIDAfterLeaderChange() { // Update the pdLeader in test suite. suite.pdLeader = suite.cluster.GetServer(suite.cluster.WaitLeader()) suite.backendEndpoints = suite.pdLeader.GetAddr() + suite.TearDownSuite() + suite.SetupSuite() } func (suite *serverTestSuite) TestPrimaryChange() { @@ -126,21 +133,21 @@ func (suite *serverTestSuite) TestPrimaryChange() { tc.WaitForPrimaryServing(re) primary := tc.GetPrimaryServer() oldPrimaryAddr := primary.GetAddr() - re.Len(primary.GetCluster().GetCoordinator().GetSchedulersController().GetSchedulerNames(), 5) testutil.Eventually(re, func() bool { watchedAddr, ok := suite.pdLeader.GetServicePrimaryAddr(suite.ctx, mcs.SchedulingServiceName) - return ok && oldPrimaryAddr == watchedAddr + return ok && oldPrimaryAddr == watchedAddr && + len(primary.GetCluster().GetCoordinator().GetSchedulersController().GetSchedulerNames()) == 5 }) - // transfer leader + // change primary primary.Close() tc.WaitForPrimaryServing(re) primary = tc.GetPrimaryServer() newPrimaryAddr := primary.GetAddr() re.NotEqual(oldPrimaryAddr, newPrimaryAddr) - re.Len(primary.GetCluster().GetCoordinator().GetSchedulersController().GetSchedulerNames(), 5) testutil.Eventually(re, func() bool { watchedAddr, ok := suite.pdLeader.GetServicePrimaryAddr(suite.ctx, mcs.SchedulingServiceName) - return ok && newPrimaryAddr == watchedAddr + return ok && newPrimaryAddr == watchedAddr && + len(primary.GetCluster().GetCoordinator().GetSchedulersController().GetSchedulerNames()) == 5 }) } @@ -166,24 +173,24 @@ func (suite *serverTestSuite) TestForwardStoreHeartbeat() { re.NoError(err) re.Empty(resp.GetHeader().GetError()) - resp1, err := s.StoreHeartbeat( - context.Background(), &pdpb.StoreHeartbeatRequest{ - Header: &pdpb.RequestHeader{ClusterId: suite.pdLeader.GetClusterID()}, - Stats: &pdpb.StoreStats{ - StoreId: 1, - Capacity: 1798985089024, - Available: 1709868695552, - UsedSize: 85150956358, - KeysWritten: 20000, - BytesWritten: 199, - KeysRead: 10000, - BytesRead: 99, - }, - }, - ) - re.NoError(err) - re.Empty(resp1.GetHeader().GetError()) testutil.Eventually(re, func() bool { + resp1, err := s.StoreHeartbeat( + context.Background(), &pdpb.StoreHeartbeatRequest{ + Header: &pdpb.RequestHeader{ClusterId: suite.pdLeader.GetClusterID()}, + Stats: &pdpb.StoreStats{ + StoreId: 1, + Capacity: 1798985089024, + Available: 1709868695552, + UsedSize: 85150956358, + KeysWritten: 20000, + BytesWritten: 199, + KeysRead: 10000, + BytesRead: 99, + }, + }, + ) + re.NoError(err) + re.Empty(resp1.GetHeader().GetError()) store := tc.GetPrimaryServer().GetCluster().GetStore(1) return store.GetStoreStats().GetCapacity() == uint64(1798985089024) && store.GetStoreStats().GetAvailable() == uint64(1709868695552) && @@ -195,6 +202,44 @@ func (suite *serverTestSuite) TestForwardStoreHeartbeat() { }) } +func (suite *serverTestSuite) TestDynamicSwitch() { + re := suite.Require() + // API server will execute scheduling jobs since there is no scheduler server. + testutil.Eventually(re, func() bool { + return suite.pdLeader.GetServer().GetRaftCluster().IsSchedulingControllerRunning() + }) + + tc, err := tests.NewTestSchedulingCluster(suite.ctx, 1, suite.backendEndpoints) + re.NoError(err) + defer tc.Destroy() + tc.WaitForPrimaryServing(re) + // After scheduling server is started, API server will not execute scheduling jobs. + testutil.Eventually(re, func() bool { + return !suite.pdLeader.GetServer().GetRaftCluster().IsSchedulingControllerRunning() + }) + // Scheduling server is responsible for executing scheduling jobs. + testutil.Eventually(re, func() bool { + return tc.GetPrimaryServer().GetCluster().IsBackgroundJobsRunning() + }) + tc.GetPrimaryServer().Close() + // Stop scheduling server. API server will execute scheduling jobs again. + testutil.Eventually(re, func() bool { + return suite.pdLeader.GetServer().GetRaftCluster().IsSchedulingControllerRunning() + }) + tc1, err := tests.NewTestSchedulingCluster(suite.ctx, 1, suite.backendEndpoints) + re.NoError(err) + defer tc1.Destroy() + tc1.WaitForPrimaryServing(re) + // After scheduling server is started, API server will not execute scheduling jobs. + testutil.Eventually(re, func() bool { + return !suite.pdLeader.GetServer().GetRaftCluster().IsSchedulingControllerRunning() + }) + // Scheduling server is responsible for executing scheduling jobs again. + testutil.Eventually(re, func() bool { + return tc1.GetPrimaryServer().GetCluster().IsBackgroundJobsRunning() + }) +} + func (suite *serverTestSuite) TestSchedulerSync() { re := suite.Require() tc, err := tests.NewTestSchedulingCluster(suite.ctx, 1, suite.backendEndpoints) @@ -509,6 +554,6 @@ func checkOperatorFail(re *require.Assertions, oc *operator.Controller, op *oper func waitSyncFinish(re *require.Assertions, tc *tests.TestSchedulingCluster, typ storelimit.Type, expectedLimit float64) { testutil.Eventually(re, func() bool { - return tc.GetPrimaryServer().GetPersistConfig().GetStoreLimitByType(2, typ) == expectedLimit + return tc.GetPrimaryServer().GetCluster().GetSharedConfig().GetStoreLimitByType(2, typ) == expectedLimit }) } diff --git a/tests/integrations/mcs/testutil.go b/tests/integrations/mcs/testutil.go index bbedd65209d..d23da905f78 100644 --- a/tests/integrations/mcs/testutil.go +++ b/tests/integrations/mcs/testutil.go @@ -106,7 +106,7 @@ func WaitForMultiKeyspacesTSOAvailable( clients := make([]pd.Client, 0, len(keyspaceIDs)) for _, keyspaceID := range keyspaceIDs { - cli := SetupClientWithKeyspaceID(ctx, re, keyspaceID, backendEndpoints) + cli := SetupClientWithKeyspaceID(ctx, re, keyspaceID, backendEndpoints, pd.WithForwardingOption(true)) re.NotNil(cli) clients = append(clients, cli) diff --git a/tests/integrations/mcs/tso/api_test.go b/tests/integrations/mcs/tso/api_test.go index 81cc798851f..94b38dd93cb 100644 --- a/tests/integrations/mcs/tso/api_test.go +++ b/tests/integrations/mcs/tso/api_test.go @@ -18,6 +18,7 @@ import ( "bytes" "context" "encoding/json" + "fmt" "io" "net/http" "testing" @@ -124,7 +125,7 @@ func (suite *tsoAPITestSuite) TestForwardResetTS() { } func mustGetKeyspaceGroupMembers(re *require.Assertions, server *tso.Server) map[uint32]*apis.KeyspaceGroupMember { - httpReq, err := http.NewRequest(http.MethodGet, server.GetAddr()+tsoKeyspaceGroupsPrefix+"/members", nil) + httpReq, err := http.NewRequest(http.MethodGet, server.GetAddr()+tsoKeyspaceGroupsPrefix+"/members", http.NoBody) re.NoError(err) httpResp, err := dialClient.Do(httpReq) re.NoError(err) @@ -184,7 +185,7 @@ func TestTSOServerStartFirst(t *testing.T) { defer httpResp.Body.Close() re.Equal(http.StatusOK, httpResp.StatusCode) - httpReq, err = http.NewRequest(http.MethodGet, addr+"/pd/api/v2/tso/keyspace-groups/0", nil) + httpReq, err = http.NewRequest(http.MethodGet, addr+"/pd/api/v2/tso/keyspace-groups/0", http.NoBody) re.NoError(err) httpResp, err = dialClient.Do(httpReq) re.NoError(err) @@ -200,3 +201,46 @@ func TestTSOServerStartFirst(t *testing.T) { re.NoError(failpoint.Disable("github.com/tikv/pd/server/delayStartServerLoop")) } + +func TestForwardOnlyTSONoScheduling(t *testing.T) { + re := require.New(t) + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/utils/apiutil/serverapi/checkHeader", "return(true)")) + defer func() { + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/utils/apiutil/serverapi/checkHeader")) + }() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + tc, err := tests.NewTestAPICluster(ctx, 1) + defer tc.Destroy() + re.NoError(err) + err = tc.RunInitialServers() + re.NoError(err) + pdAddr := tc.GetConfig().GetClientURL() + ttc, err := tests.NewTestTSOCluster(ctx, 2, pdAddr) + re.NoError(err) + tc.WaitLeader() + leaderServer := tc.GetLeaderServer() + re.NoError(leaderServer.BootstrapCluster()) + + urlPrefix := fmt.Sprintf("%s/pd/api/v1", pdAddr) + + // Test /operators, it should not forward when there is no scheduling server. + var slice []string + err = testutil.ReadGetJSON(re, dialClient, fmt.Sprintf("%s/%s", urlPrefix, "operators"), &slice, + testutil.WithoutHeader(re, apiutil.ForwardToMicroServiceHeader)) + re.NoError(err) + re.Len(slice, 0) + + // Test admin/reset-ts, it should forward to tso server. + input := []byte(`{"tso":"121312", "force-use-larger":true}`) + err = testutil.CheckPostJSON(dialClient, fmt.Sprintf("%s/%s", urlPrefix, "admin/reset-ts"), input, + testutil.StatusOK(re), testutil.StringContain(re, "Reset ts successfully"), testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + re.NoError(err) + + // If close tso server, it should try forward to tso server, but return error in api mode. + ttc.Destroy() + err = testutil.CheckPostJSON(dialClient, fmt.Sprintf("%s/%s", urlPrefix, "admin/reset-ts"), input, + testutil.Status(re, http.StatusInternalServerError), testutil.StringContain(re, "[PD:apiutil:ErrRedirect]redirect failed")) + re.NoError(err) +} diff --git a/tests/integrations/mcs/tso/proxy_test.go b/tests/integrations/mcs/tso/proxy_test.go index fc33a6a41be..60280fa892d 100644 --- a/tests/integrations/mcs/tso/proxy_test.go +++ b/tests/integrations/mcs/tso/proxy_test.go @@ -4,7 +4,7 @@ // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, diff --git a/tests/integrations/mcs/tso/server_test.go b/tests/integrations/mcs/tso/server_test.go index 58006b87eeb..c81c39af094 100644 --- a/tests/integrations/mcs/tso/server_test.go +++ b/tests/integrations/mcs/tso/server_test.go @@ -89,7 +89,7 @@ func (suite *tsoServerTestSuite) TearDownSuite() { func (suite *tsoServerTestSuite) TestTSOServerStartAndStopNormally() { defer func() { if r := recover(); r != nil { - fmt.Println("Recovered from an unexpected panic", r) + suite.T().Log("Recovered from an unexpected panic", r) suite.T().Errorf("Expected no panic, but something bad occurred with") } }() @@ -392,6 +392,11 @@ func (suite *APIServerForwardTestSuite) TestResignAPIPrimaryForward() { defer tc.Destroy() tc.WaitForDefaultPrimaryServing(re) + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/member/skipCampaignLeaderCheck", "return(true)")) + defer func() { + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/member/skipCampaignLeaderCheck")) + }() + for j := 0; j < 10; j++ { suite.pdLeader.ResignLeader() suite.pdLeader = suite.cluster.GetServer(suite.cluster.WaitLeader()) diff --git a/tests/integrations/tso/client_test.go b/tests/integrations/tso/client_test.go index 63243214e81..b021e73a2f9 100644 --- a/tests/integrations/tso/client_test.go +++ b/tests/integrations/tso/client_test.go @@ -98,7 +98,7 @@ func (suite *tsoClientTestSuite) SetupSuite() { suite.keyspaceIDs = make([]uint32, 0) if suite.legacy { - client, err := pd.NewClientWithContext(suite.ctx, strings.Split(suite.backendEndpoints, ","), pd.SecurityOption{}) + client, err := pd.NewClientWithContext(suite.ctx, strings.Split(suite.backendEndpoints, ","), pd.SecurityOption{}, pd.WithForwardingOption(true)) re.NoError(err) innerClient, ok := client.(interface{ GetServiceDiscovery() pd.ServiceDiscovery }) re.True(ok) @@ -263,7 +263,9 @@ func (suite *tsoClientTestSuite) TestDiscoverTSOServiceWithLegacyPath() { // TestGetMinTS tests the correctness of GetMinTS. func (suite *tsoClientTestSuite) TestGetMinTS() { re := suite.Require() - suite.waitForAllKeyspaceGroupsInServing(re) + if !suite.legacy { + suite.waitForAllKeyspaceGroupsInServing(re) + } var wg sync.WaitGroup wg.Add(tsoRequestConcurrencyNumber * len(suite.clients)) @@ -293,6 +295,15 @@ func (suite *tsoClientTestSuite) TestGetMinTS() { } } wg.Wait() + + re.NoError(failpoint.Enable("github.com/tikv/pd/client/unreachableNetwork1", "return(true)")) + time.Sleep(time.Second) + testutil.Eventually(re, func() bool { + var err error + _, _, err = suite.clients[0].GetMinTS(suite.ctx) + return err == nil + }) + re.NoError(failpoint.Disable("github.com/tikv/pd/client/unreachableNetwork1")) } // More details can be found in this issue: https://github.com/tikv/pd/issues/4884 @@ -300,7 +311,10 @@ func (suite *tsoClientTestSuite) TestUpdateAfterResetTSO() { re := suite.Require() ctx, cancel := context.WithCancel(suite.ctx) defer cancel() - + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/member/skipCampaignLeaderCheck", "return(true)")) + defer func() { + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/member/skipCampaignLeaderCheck")) + }() for i := 0; i < len(suite.clients); i++ { client := suite.clients[i] testutil.Eventually(re, func() bool { @@ -336,6 +350,11 @@ func (suite *tsoClientTestSuite) TestUpdateAfterResetTSO() { func (suite *tsoClientTestSuite) TestRandomResignLeader() { re := suite.Require() re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/tso/fastUpdatePhysicalInterval", "return(true)")) + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/member/skipCampaignLeaderCheck", "return(true)")) + defer func() { + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/tso/fastUpdatePhysicalInterval")) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/member/skipCampaignLeaderCheck")) + }() parallelAct := func() { // After https://github.com/tikv/pd/issues/6376 is fixed, we can use a smaller number here. @@ -373,7 +392,6 @@ func (suite *tsoClientTestSuite) TestRandomResignLeader() { } mcs.CheckMultiKeyspacesTSO(suite.ctx, re, suite.clients, parallelAct) - re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/tso/fastUpdatePhysicalInterval")) } func (suite *tsoClientTestSuite) TestRandomShutdown() { diff --git a/tests/integrations/tso/consistency_test.go b/tests/integrations/tso/consistency_test.go index 1d35e8bf5e2..74f5090bf3b 100644 --- a/tests/integrations/tso/consistency_test.go +++ b/tests/integrations/tso/consistency_test.go @@ -27,7 +27,7 @@ import ( tso "github.com/tikv/pd/pkg/mcs/tso/server" tsopkg "github.com/tikv/pd/pkg/tso" "github.com/tikv/pd/pkg/utils/tempurl" - pd "github.com/tikv/pd/pkg/utils/testutil" + tu "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/pkg/utils/tsoutil" "github.com/tikv/pd/tests" "google.golang.org/grpc" @@ -82,7 +82,7 @@ func (suite *tsoConsistencyTestSuite) SetupSuite() { suite.pdLeaderServer = suite.cluster.GetServer(leaderName) backendEndpoints := suite.pdLeaderServer.GetAddr() if suite.legacy { - suite.pdClient = pd.MustNewGrpcClient(re, backendEndpoints) + suite.pdClient = tu.MustNewGrpcClient(re, backendEndpoints) } else { suite.tsoServer, suite.tsoServerCleanup = tests.StartSingleTSOTestServer(suite.ctx, re, backendEndpoints, tempurl.Alloc()) suite.tsoClientConn, suite.tsoClient = tso.MustNewGrpcClient(re, suite.tsoServer.GetAddr()) @@ -127,12 +127,15 @@ func (suite *tsoConsistencyTestSuite) request(ctx context.Context, count uint32) DcLocation: tsopkg.GlobalDCLocation, Count: count, } - tsoClient, err := suite.tsoClient.Tso(ctx) - re.NoError(err) - defer tsoClient.CloseSend() - re.NoError(tsoClient.Send(req)) - resp, err := tsoClient.Recv() - re.NoError(err) + var resp *tsopb.TsoResponse + tu.Eventually(re, func() bool { + tsoClient, err := suite.tsoClient.Tso(ctx) + re.NoError(err) + defer tsoClient.CloseSend() + re.NoError(tsoClient.Send(req)) + resp, err = tsoClient.Recv() + return err == nil && resp != nil + }) return checkAndReturnTimestampResponse(re, resp) } diff --git a/tests/integrations/tso/go.mod b/tests/integrations/tso/go.mod index f8a5cfac75f..309ea9dbc4d 100644 --- a/tests/integrations/tso/go.mod +++ b/tests/integrations/tso/go.mod @@ -47,9 +47,11 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/bitly/go-simplejson v0.5.0 // indirect github.com/breeswish/gin-jwt/v2 v2.6.4-jwt-patch // indirect + github.com/bytedance/sonic v1.9.1 // indirect github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 // indirect github.com/cenkalti/backoff/v4 v4.0.2 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect github.com/coreos/go-semver v0.3.0 // indirect github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f // indirect github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect @@ -58,23 +60,24 @@ require ( github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4 // indirect github.com/elliotchance/pie/v2 v2.1.0 // indirect github.com/fogleman/gg v1.3.0 // indirect + github.com/gabriel-vasile/mimetype v1.4.2 // indirect github.com/gin-contrib/cors v1.4.0 // indirect github.com/gin-contrib/gzip v0.0.1 // indirect github.com/gin-contrib/pprof v1.4.0 // indirect github.com/gin-contrib/sse v0.1.0 // indirect - github.com/gin-gonic/gin v1.8.1 // indirect + github.com/gin-gonic/gin v1.9.1 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-openapi/jsonpointer v0.19.5 // indirect github.com/go-openapi/jsonreference v0.19.6 // indirect github.com/go-openapi/spec v0.20.4 // indirect github.com/go-openapi/swag v0.19.15 // indirect - github.com/go-playground/locales v0.14.0 // indirect - github.com/go-playground/universal-translator v0.18.0 // indirect - github.com/go-playground/validator/v10 v10.10.0 // indirect + github.com/go-playground/locales v0.14.1 // indirect + github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/go-playground/validator/v10 v10.14.0 // indirect github.com/go-resty/resty/v2 v2.6.0 // indirect github.com/go-sql-driver/mysql v1.7.0 // indirect github.com/goccy/go-graphviz v0.0.9 // indirect - github.com/goccy/go-json v0.9.7 // indirect + github.com/goccy/go-json v0.10.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang-jwt/jwt v3.2.1+incompatible // indirect github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect @@ -98,11 +101,12 @@ require ( github.com/joomcode/errorx v1.0.1 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/cpuid/v2 v2.2.4 // indirect github.com/konsorten/go-windows-terminal-sequences v1.0.3 // indirect - github.com/leodido/go-urn v1.2.1 // indirect + github.com/leodido/go-urn v1.2.4 // indirect github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a // indirect github.com/mailru/easyjson v0.7.6 // indirect - github.com/mattn/go-isatty v0.0.14 // indirect + github.com/mattn/go-isatty v0.0.19 // indirect github.com/mattn/go-sqlite3 v1.14.15 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect github.com/minio/sio v0.3.0 // indirect @@ -110,14 +114,14 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/oleiade/reflections v1.0.1 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect - github.com/pelletier/go-toml/v2 v2.0.1 // indirect + github.com/pelletier/go-toml/v2 v2.0.8 // indirect github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36 // indirect github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d // indirect github.com/pingcap/errcode v0.3.0 // indirect github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c // indirect github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 // indirect github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 // indirect - github.com/pingcap/tidb-dashboard v0.0.0-20230911054332-22add1e00511 // indirect + github.com/pingcap/tidb-dashboard v0.0.0-20231127105651-ce4097837c5e // indirect github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect @@ -145,7 +149,8 @@ require ( github.com/tklauser/go-sysconf v0.3.11 // indirect github.com/tklauser/numcpus v0.6.0 // indirect github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 // indirect - github.com/ugorji/go/codec v1.2.7 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/ugorji/go/codec v1.2.11 // indirect github.com/unrolled/render v1.0.1 // indirect github.com/urfave/negroni v0.3.0 // indirect github.com/vmihailenco/msgpack/v5 v5.3.5 // indirect @@ -160,6 +165,7 @@ require ( go.uber.org/goleak v1.1.12 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.24.0 // indirect + golang.org/x/arch v0.3.0 // indirect golang.org/x/crypto v0.14.0 // indirect golang.org/x/exp v0.0.0-20230711005742-c3f37128e5a4 // indirect golang.org/x/image v0.5.0 // indirect diff --git a/tests/integrations/tso/go.sum b/tests/integrations/tso/go.sum index 63327985f0d..94fbde2ad57 100644 --- a/tests/integrations/tso/go.sum +++ b/tests/integrations/tso/go.sum @@ -68,6 +68,9 @@ github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4Yn github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= github.com/breeswish/gin-jwt/v2 v2.6.4-jwt-patch h1:KLE/YeX+9FNaGVW5MtImRVPhjDpfpgJhvkuYWBmOYbo= github.com/breeswish/gin-jwt/v2 v2.6.4-jwt-patch/go.mod h1:KjBLriHXe7L6fGceqWzTod8HUB/TP1WWDtfuSYtYXaI= +github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= +github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s= +github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U= github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 h1:BjkPE3785EwPhhyuFkbINB+2a1xATwk8SNDWnJiD41g= github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5/go.mod h1:jtAfVaU/2cu1+wdSRPWE2c1N2qeAA3K4RH9pYgqwets= github.com/cenkalti/backoff/v4 v4.0.2 h1:JIufpQLbh4DkbQoii76ItQIUFzevQSqOLZca4eamEDs= @@ -77,6 +80,9 @@ github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghf github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= @@ -123,6 +129,8 @@ github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5Kwzbycv github.com/fogleman/gg v1.3.0 h1:/7zJX8F6AaYQc57WQCyN9cAIz+4bCJGO9B+dyW29am8= github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= +github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gin-contrib/cors v1.4.0 h1:oJ6gwtUl3lqV0WEIwM/LxPF1QZ5qe2lGWdY2+bz7y0g= github.com/gin-contrib/cors v1.4.0/go.mod h1:bs9pNM0x/UsmHPBWT2xZz9ROh8xYjYkiURUfmBoMlcs= @@ -135,8 +143,9 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= github.com/gin-gonic/gin v1.3.0/go.mod h1:7cKuhb5qV2ggCFctp2fJQ+ErvciLZrIeoOSOm6mUr7Y= github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M= -github.com/gin-gonic/gin v1.8.1 h1:4+fr/el88TOO3ewCmQr8cx/CtZ/umlIRIs5M4NTNjf8= github.com/gin-gonic/gin v1.8.1/go.mod h1:ji8BvRH1azfM+SYow9zQ6SZMvR8qOMZHmsCuWR9tTTk= +github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= +github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= @@ -155,17 +164,21 @@ github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7 github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM= github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= -github.com/go-playground/assert/v2 v2.0.1 h1:MsBgLAaY856+nPRTKrp3/OZK38U/wa0CcBYNjji3q3A= github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= +github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= -github.com/go-playground/locales v0.14.0 h1:u50s323jtVGugKlcYeyzC0etD1HifMjqmJqb8WugfUU= github.com/go-playground/locales v0.14.0/go.mod h1:sawfccIbzZTqEDETgFXqTho0QybSa7l++s0DH+LDiLs= +github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= +github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= -github.com/go-playground/universal-translator v0.18.0 h1:82dyy6p4OuJq4/CByFNOn/jYrnRPArHwAcmLoJZxyho= github.com/go-playground/universal-translator v0.18.0/go.mod h1:UvRDBj+xPUEGrFYl+lu/H90nyDXpg0fqeB/AQUGNTVA= +github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= +github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI= -github.com/go-playground/validator/v10 v10.10.0 h1:I7mrTYv78z8k8VXa/qJlOlEXn/nBh+BF8dHX5nt/dr0= github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos= +github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js= +github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= github.com/go-resty/resty/v2 v2.6.0 h1:joIR5PNLM2EFqqESUjCMGXrWmXNHEU9CEiK813oKYS4= github.com/go-resty/resty/v2 v2.6.0/go.mod h1:PwvJS6hvaPkjtjNg9ph+VrSD92bi5Zq73w/BIH7cC3Q= github.com/go-sql-driver/mysql v1.7.0 h1:ueSltNNllEqE3qcWBTD0iQd3IpL/6U+mJxLkazJ7YPc= @@ -173,8 +186,9 @@ github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9 github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/goccy/go-graphviz v0.0.9 h1:s/FMMJ1Joj6La3S5ApO3Jk2cwM4LpXECC2muFx3IPQQ= github.com/goccy/go-graphviz v0.0.9/go.mod h1:wXVsXxmyMQU6TN3zGRttjNn3h+iCAS7xQFC6TlNvLhk= -github.com/goccy/go-json v0.9.7 h1:IcB+Aqpx/iMHu5Yooh7jEzJk1JZ7Pjtmys2ukPr7EeM= github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= +github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/gogo/protobuf v0.0.0-20171007142547-342cbe0a0415/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v0.0.0-20180717141946-636bf0302bc9/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= @@ -312,6 +326,9 @@ github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8 github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= +github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -325,8 +342,9 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= -github.com/leodido/go-urn v1.2.1 h1:BqpAaACuzVSgi/VLzGZIobT2z4v53pjosyNd9Yv6n/w= github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY= +github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= +github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a h1:N9zuLhTvBSRt0gWSiJswwQ2HqDmtX/ZCDJURnKUt1Ik= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a/go.mod h1:JKx41uQRwqlTZabZc+kILPrO/3jlKnQ2Z8b7YiVw5cE= @@ -338,8 +356,9 @@ github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJ github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= -github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= +github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI= github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= @@ -383,8 +402,9 @@ github.com/otiai10/mint v1.3.0/go.mod h1:F5AjcsTsWUqX+Na9fpHb52P8pcRX2CI6A3ctIT9 github.com/otiai10/mint v1.3.3/go.mod h1:/yxELlJQ0ufhjUwhshSj+wFjZ78CnZ48/1wtmBH1OTc= github.com/pascaldekloe/name v0.0.0-20180628100202-0fd16699aae1/go.mod h1:eD5JxqMiuNYyFNmyY9rkJ/slN8y59oEu4Ei7F8OoKWQ= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= -github.com/pelletier/go-toml/v2 v2.0.1 h1:8e3L2cCQzLFi2CR4g7vGFuFxX7Jl1kKX8gW+iV0GUKU= github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo= +github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= +github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36 h1:64bxqeTEN0/xoEqhKGowgihNuzISS9rEG6YUMU4bzJo= github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36/go.mod h1:pxMtw7cyUw6B2bRH0ZBANSPg+AoSud1I1iyJHI69jH4= github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d h1:U+PMnTlV2tu7RuMK5etusZG3Cf+rpow5hqQByeCzJ2g= @@ -408,8 +428,8 @@ github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8 github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 h1:QV6jqlfOkh8hqvEAgwBZa+4bSgO0EeKC7s5c6Luam2I= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21/go.mod h1:QYnjfA95ZaMefyl1NO8oPtKeb8pYUdnDVhQgf+qdpjM= -github.com/pingcap/tidb-dashboard v0.0.0-20230911054332-22add1e00511 h1:oyrCfNlAWmLlUfEr+7YTSBo29SP/J1N8hnxBt5yUABo= -github.com/pingcap/tidb-dashboard v0.0.0-20230911054332-22add1e00511/go.mod h1:EZ90+V5S4TttbYag6oKZ3jcNKRwZe1Mc9vXwOt9JBYw= +github.com/pingcap/tidb-dashboard v0.0.0-20231127105651-ce4097837c5e h1:SJUSDejvKtj9vSh5ptRHh4iMrvPV3oKO8yp6/SYE8vc= +github.com/pingcap/tidb-dashboard v0.0.0-20231127105651-ce4097837c5e/go.mod h1:ucZBRz52icb23T/5Z4CsuUHmarYiin7p2MeiVBe+o8c= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e h1:FBaTXU8C3xgt/drM58VHxojHo/QoG1oPsgWTGvaSpO4= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= @@ -506,7 +526,9 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/swaggo/files v0.0.0-20210815190702-a29dd2bc99b2 h1:+iNTcqQJy0OZ5jk6a5NLib47eqXK8uYcPX+O4+cBpEM= @@ -534,13 +556,16 @@ github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 h1:j6JEOq5QWFker+d7mFQYOhjTZonQ7YkLTHm56dbn+yM= github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= +github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= github.com/ugorji/go/codec v0.0.0-20181022190402-e5e69e061d4f/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= -github.com/ugorji/go/codec v1.2.7 h1:YPXUKf7fYbp/y8xloBqZOw2qaVggbfwMlI8WM3wZUJ0= github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY= +github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= +github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/unrolled/render v1.0.1 h1:VDDnQQVfBMsOsp3VaCJszSO0nkBIVEYoPWeRThk9spY= github.com/unrolled/render v1.0.1/go.mod h1:gN9T0NhL4Bfbwu8ann7Ry/TGHYfosul+J0obPf6NBdM= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= @@ -598,6 +623,9 @@ go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ= go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= +golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= +golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -707,6 +735,7 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -840,6 +869,7 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= moul.io/zapgorm2 v1.1.0 h1:qwAlMBYf+qJkJ7PAzJl4oCe6eS6QGiKAXUPeis0+RBE= moul.io/zapgorm2 v1.1.0/go.mod h1:emRfKjNqSzVj5lcgasBdovIXY1jSOwFz2GQZn1Rddks= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q= sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= diff --git a/tests/integrations/tso/server_test.go b/tests/integrations/tso/server_test.go index c44f4967a84..ac3d914aa80 100644 --- a/tests/integrations/tso/server_test.go +++ b/tests/integrations/tso/server_test.go @@ -26,7 +26,7 @@ import ( tso "github.com/tikv/pd/pkg/mcs/tso/server" tsopkg "github.com/tikv/pd/pkg/tso" "github.com/tikv/pd/pkg/utils/tempurl" - pd "github.com/tikv/pd/pkg/utils/testutil" + tu "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/tests" "google.golang.org/grpc" ) @@ -80,7 +80,7 @@ func (suite *tsoServerTestSuite) SetupSuite() { suite.pdLeaderServer = suite.cluster.GetServer(leaderName) backendEndpoints := suite.pdLeaderServer.GetAddr() if suite.legacy { - suite.pdClient = pd.MustNewGrpcClient(re, backendEndpoints) + suite.pdClient = tu.MustNewGrpcClient(re, backendEndpoints) } else { suite.tsoServer, suite.tsoServerCleanup = tests.StartSingleTSOTestServer(suite.ctx, re, backendEndpoints, tempurl.Alloc()) suite.tsoClientConn, suite.tsoClient = tso.MustNewGrpcClient(re, suite.tsoServer.GetAddr()) diff --git a/tests/pdctl/config/config_test.go b/tests/pdctl/config/config_test.go index 6ed0841bf74..c63160a32e5 100644 --- a/tests/pdctl/config/config_test.go +++ b/tests/pdctl/config/config_test.go @@ -17,16 +17,22 @@ package config_test import ( "context" "encoding/json" + "net/http" "os" "reflect" + "strconv" + "strings" "testing" "time" "github.com/coreos/go-semver/semver" + "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/schedule/placement" + "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/pkg/utils/typeutil" "github.com/tikv/pd/server/config" "github.com/tikv/pd/tests" @@ -34,6 +40,13 @@ import ( pdctlCmd "github.com/tikv/pd/tools/pd-ctl/pdctl" ) +// testDialClient used to dial http request. only used for test. +var testDialClient = &http.Client{ + Transport: &http.Transport{ + DisableKeepAlives: true, + }, +} + type testCase struct { name string value interface{} @@ -48,27 +61,60 @@ func (t *testCase) judge(re *require.Assertions, scheduleConfigs ...*sc.Schedule } } -func TestConfig(t *testing.T) { - re := require.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1) - re.NoError(err) - err = cluster.RunInitialServers() - re.NoError(err) - cluster.WaitLeader() - pdAddr := cluster.GetConfig().GetClientURL() +type configTestSuite struct { + suite.Suite + env *tests.SchedulingTestEnvironment +} + +func TestConfigTestSuite(t *testing.T) { + suite.Run(t, new(configTestSuite)) +} + +func (suite *configTestSuite) SetupSuite() { + suite.env = tests.NewSchedulingTestEnvironment(suite.T()) +} + +func (suite *configTestSuite) TearDownSuite() { + suite.env.Cleanup() +} + +func (suite *configTestSuite) TearDownTest() { + cleanFunc := func(cluster *tests.TestCluster) { + def := placement.GroupBundle{ + ID: "pd", + Rules: []*placement.Rule{ + {GroupID: "pd", ID: "default", Role: "voter", Count: 3}, + }, + } + data, err := json.Marshal([]placement.GroupBundle{def}) + suite.NoError(err) + leader := cluster.GetLeaderServer() + suite.NotNil(leader) + urlPrefix := leader.GetAddr() + err = testutil.CheckPostJSON(testDialClient, urlPrefix+"/pd/api/v1/config/placement-rule", data, testutil.StatusOK(suite.Require())) + suite.NoError(err) + } + suite.env.RunFuncInTwoModes(cleanFunc) +} + +func (suite *configTestSuite) TestConfig() { + suite.NoError(failpoint.Enable("github.com/tikv/pd/pkg/dashboard/adapter/skipDashboardLoop", `return(true)`)) + suite.env.RunTestInTwoModes(suite.checkConfig) + suite.NoError(failpoint.Disable("github.com/tikv/pd/pkg/dashboard/adapter/skipDashboardLoop")) +} + +func (suite *configTestSuite) checkConfig(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() cmd := pdctlCmd.GetRootCmd() store := &metapb.Store{ Id: 1, State: metapb.StoreState_Up, } - leaderServer := cluster.GetLeaderServer() - re.NoError(leaderServer.BootstrapCluster()) svr := leaderServer.GetServer() tests.MustPutStore(re, cluster, store) - defer cluster.Destroy() // config show args := []string{"-u", pdAddr, "config", "show"} @@ -100,6 +146,7 @@ func TestConfig(t *testing.T) { re.NoError(err) re.False(svr.GetPDServerConfig().TraceRegionFlow) + origin := svr.GetPDServerConfig().FlowRoundByDigit args = []string{"-u", pdAddr, "config", "set", "flow-round-by-digit", "10"} _, err = pdctl.ExecuteCommand(cmd, args...) re.NoError(err) @@ -109,6 +156,17 @@ func TestConfig(t *testing.T) { _, err = pdctl.ExecuteCommand(cmd, args...) re.Error(err) + args = []string{"-u", pdAddr, "config", "set", "flow-round-by-digit", strconv.Itoa(origin)} + _, err = pdctl.ExecuteCommand(cmd, args...) + re.NoError(err) + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "show", "server") + re.NoError(err) + var conf config.PDServerConfig + re.NoError(json.Unmarshal(output, &conf)) + return conf.FlowRoundByDigit == origin + }) + // config show schedule args = []string{"-u", pdAddr, "config", "show", "schedule"} output, err = pdctl.ExecuteCommand(cmd, args...) @@ -283,16 +341,14 @@ func TestConfig(t *testing.T) { re.Contains(string(output), "is invalid") } -func TestPlacementRules(t *testing.T) { - re := require.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1) - re.NoError(err) - err = cluster.RunInitialServers() - re.NoError(err) - cluster.WaitLeader() - pdAddr := cluster.GetConfig().GetClientURL() +func (suite *configTestSuite) TestPlacementRules() { + suite.env.RunTestInTwoModes(suite.checkPlacementRules) +} + +func (suite *configTestSuite) checkPlacementRules(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() cmd := pdctlCmd.GetRootCmd() store := &metapb.Store{ @@ -300,22 +356,14 @@ func TestPlacementRules(t *testing.T) { State: metapb.StoreState_Up, LastHeartbeat: time.Now().UnixNano(), } - leaderServer := cluster.GetLeaderServer() - re.NoError(leaderServer.BootstrapCluster()) tests.MustPutStore(re, cluster, store) - defer cluster.Destroy() output, err := pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "enable") re.NoError(err) re.Contains(string(output), "Success!") // test show - var rules []placement.Rule - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "show") - re.NoError(err) - re.NoError(json.Unmarshal(output, &rules)) - re.Len(rules, 1) - re.Equal([2]string{"pd", "default"}, rules[0].Key()) + suite.checkShowRuleKey(pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}) f, _ := os.CreateTemp("/tmp", "pd_tests") fname := f.Name() @@ -323,73 +371,50 @@ func TestPlacementRules(t *testing.T) { defer os.RemoveAll(fname) // test load - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "load", "--out="+fname) - re.NoError(err) - b, _ := os.ReadFile(fname) - re.NoError(json.Unmarshal(b, &rules)) - re.Len(rules, 1) - re.Equal([2]string{"pd", "default"}, rules[0].Key()) + rules := suite.checkLoadRule(pdAddr, fname, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}) // test save rules = append(rules, placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test1", - Role: "voter", + Role: placement.Voter, Count: 1, }, placement.Rule{ GroupID: "test-group", ID: "test2", - Role: "voter", + Role: placement.Voter, Count: 2, }) - b, _ = json.Marshal(rules) + b, _ := json.Marshal(rules) os.WriteFile(fname, b, 0600) _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "save", "--in="+fname) re.NoError(err) // test show group - var rules2 []placement.Rule - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "show", "--group=pd") - re.NoError(err) - re.NoError(json.Unmarshal(output, &rules2)) - re.Len(rules2, 2) - re.Equal([2]string{"pd", "default"}, rules2[0].Key()) - re.Equal([2]string{"pd", "test1"}, rules2[1].Key()) + suite.checkShowRuleKey(pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}, {placement.DefaultGroupID, "test1"}}, "--group=pd") // test rule region detail tests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b")) - fit := &placement.RegionFit{} - // need clear up args, so create new a cobra.Command. Otherwise gourp still exists. - cmd2 := pdctlCmd.GetRootCmd() - output, err = pdctl.ExecuteCommand(cmd2, "-u", pdAddr, "config", "placement-rules", "show", "--region=1", "--detail") - re.NoError(err) - re.NoError(json.Unmarshal(output, fit)) - re.Len(fit.RuleFits, 3) - re.Equal([2]string{"pd", "default"}, fit.RuleFits[0].Rule.Key()) + suite.checkShowRuleKey(pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}, "--region=1", "--detail") // test delete + // need clear up args, so create new a cobra.Command. Otherwise gourp still exists. rules[0].Count = 0 b, _ = json.Marshal(rules) os.WriteFile(fname, b, 0600) _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "save", "--in="+fname) re.NoError(err) - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "show", "--group=pd") - re.NoError(err) - re.NoError(json.Unmarshal(output, &rules)) - re.Len(rules, 1) - re.Equal([2]string{"pd", "test1"}, rules[0].Key()) + suite.checkShowRuleKey(pdAddr, [][2]string{{placement.DefaultGroupID, "test1"}}, "--group=pd") } -func TestPlacementRuleGroups(t *testing.T) { - re := require.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1) - re.NoError(err) - err = cluster.RunInitialServers() - re.NoError(err) - cluster.WaitLeader() - pdAddr := cluster.GetConfig().GetClientURL() +func (suite *configTestSuite) TestPlacementRuleGroups() { + suite.env.RunTestInTwoModes(suite.checkPlacementRuleGroups) +} + +func (suite *configTestSuite) checkPlacementRuleGroups(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() cmd := pdctlCmd.GetRootCmd() store := &metapb.Store{ @@ -397,24 +422,23 @@ func TestPlacementRuleGroups(t *testing.T) { State: metapb.StoreState_Up, LastHeartbeat: time.Now().UnixNano(), } - leaderServer := cluster.GetLeaderServer() - re.NoError(leaderServer.BootstrapCluster()) tests.MustPutStore(re, cluster, store) - defer cluster.Destroy() - output, err := pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "enable") re.NoError(err) re.Contains(string(output), "Success!") // test show var group placement.RuleGroup - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show", "pd") - re.NoError(err) - re.NoError(json.Unmarshal(output, &group)) - re.Equal(placement.RuleGroup{ID: "pd"}, group) + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show", placement.DefaultGroupID) + re.NoError(err) + return !strings.Contains(string(output), "404") + }) + re.NoError(json.Unmarshal(output, &group), string(output)) + re.Equal(placement.RuleGroup{ID: placement.DefaultGroupID}, group) // test set - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "set", "pd", "42", "true") + output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "set", placement.DefaultGroupID, "42", "true") re.NoError(err) re.Contains(string(output), "Success!") output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "set", "group2", "100", "false") @@ -426,14 +450,16 @@ func TestPlacementRuleGroups(t *testing.T) { // show all var groups []placement.RuleGroup - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show") - re.NoError(err) - re.NoError(json.Unmarshal(output, &groups)) - re.Equal([]placement.RuleGroup{ - {ID: "pd", Index: 42, Override: true}, - {ID: "group2", Index: 100, Override: false}, - {ID: "group3", Index: 200, Override: false}, - }, groups) + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show") + re.NoError(err) + re.NoError(json.Unmarshal(output, &groups)) + return reflect.DeepEqual([]placement.RuleGroup{ + {ID: placement.DefaultGroupID, Index: 42, Override: true}, + {ID: "group2", Index: 100, Override: false}, + {ID: "group3", Index: 200, Override: false}, + }, groups) + }) // delete output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "delete", "group2") @@ -441,29 +467,31 @@ func TestPlacementRuleGroups(t *testing.T) { re.Contains(string(output), "Delete group and rules successfully.") // show again - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show", "group2") - re.NoError(err) - re.Contains(string(output), "404") + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show", "group2") + re.NoError(err) + return strings.Contains(string(output), "404") + }) // delete using regex _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "delete", "--regexp", ".*3") re.NoError(err) - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show", "group3") - re.NoError(err) - re.Contains(string(output), "404") + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show", "group3") + re.NoError(err) + return strings.Contains(string(output), "404") + }) } -func TestPlacementRuleBundle(t *testing.T) { - re := require.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1) - re.NoError(err) - err = cluster.RunInitialServers() - re.NoError(err) - cluster.WaitLeader() - pdAddr := cluster.GetConfig().GetClientURL() +func (suite *configTestSuite) TestPlacementRuleBundle() { + suite.env.RunTestInTwoModes(suite.checkPlacementRuleBundle) +} + +func (suite *configTestSuite) checkPlacementRuleBundle(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() cmd := pdctlCmd.GetRootCmd() store := &metapb.Store{ @@ -471,10 +499,7 @@ func TestPlacementRuleBundle(t *testing.T) { State: metapb.StoreState_Up, LastHeartbeat: time.Now().UnixNano(), } - leaderServer := cluster.GetLeaderServer() - re.NoError(leaderServer.BootstrapCluster()) tests.MustPutStore(re, cluster, store) - defer cluster.Destroy() output, err := pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "enable") re.NoError(err) @@ -482,10 +507,10 @@ func TestPlacementRuleBundle(t *testing.T) { // test get var bundle placement.GroupBundle - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "get", "pd") + output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "get", placement.DefaultGroupID) re.NoError(err) re.NoError(json.Unmarshal(output, &bundle)) - re.Equal(placement.GroupBundle{ID: "pd", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pd", ID: "default", Role: "voter", Count: 3}}}, bundle) + re.Equal(placement.GroupBundle{ID: placement.DefaultGroupID, Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, bundle) f, err := os.CreateTemp("/tmp", "pd_tests") re.NoError(err) @@ -494,81 +519,63 @@ func TestPlacementRuleBundle(t *testing.T) { defer os.RemoveAll(fname) // test load - var bundles []placement.GroupBundle - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "load", "--out="+fname) - re.NoError(err) - b, _ := os.ReadFile(fname) - re.NoError(json.Unmarshal(b, &bundles)) - re.Len(bundles, 1) - re.Equal(placement.GroupBundle{ID: "pd", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pd", ID: "default", Role: "voter", Count: 3}}}, bundles[0]) + suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ + {ID: placement.DefaultGroupID, Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + }) // test set bundle.ID = "pe" bundle.Rules[0].GroupID = "pe" - b, err = json.Marshal(bundle) + b, err := json.Marshal(bundle) re.NoError(err) re.NoError(os.WriteFile(fname, b, 0600)) _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "set", "--in="+fname) re.NoError(err) - - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "load", "--out="+fname) - re.NoError(err) - b, _ = os.ReadFile(fname) - re.NoError(json.Unmarshal(b, &bundles)) - assertBundles(re, bundles, []placement.GroupBundle{ - {ID: "pd", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pd", ID: "default", Role: "voter", Count: 3}}}, - {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: "default", Role: "voter", Count: 3}}}, + suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ + {ID: placement.DefaultGroupID, Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) // test delete - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "delete", "pd") + _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "delete", placement.DefaultGroupID) re.NoError(err) - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "load", "--out="+fname) - re.NoError(err) - b, _ = os.ReadFile(fname) - re.NoError(json.Unmarshal(b, &bundles)) - assertBundles(re, bundles, []placement.GroupBundle{ - {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: "default", Role: "voter", Count: 3}}}, + suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) // test delete regexp bundle.ID = "pf" - bundle.Rules = []*placement.Rule{{GroupID: "pf", ID: "default", Role: "voter", Count: 3}} + bundle.Rules = []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}} b, err = json.Marshal(bundle) re.NoError(err) re.NoError(os.WriteFile(fname, b, 0600)) _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "set", "--in="+fname) re.NoError(err) + suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + }) _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "delete", "--regexp", ".*f") re.NoError(err) - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "load", "--out="+fname) - re.NoError(err) - b, _ = os.ReadFile(fname) - re.NoError(json.Unmarshal(b, &bundles)) - assertBundles(re, bundles, []placement.GroupBundle{ - {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: "default", Role: "voter", Count: 3}}}, - }) + bundles := []placement.GroupBundle{ + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + } + suite.checkLoadRuleBundle(pdAddr, fname, bundles) // test save - bundle.Rules = []*placement.Rule{{GroupID: "pf", ID: "default", Role: "voter", Count: 3}} + bundle.Rules = []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}} bundles = append(bundles, bundle) b, err = json.Marshal(bundles) re.NoError(err) re.NoError(os.WriteFile(fname, b, 0600)) _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "save", "--in="+fname) re.NoError(err) - - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "load", "--out="+fname) - re.NoError(err) - b, err = os.ReadFile(fname) - re.NoError(err) - re.NoError(json.Unmarshal(b, &bundles)) - assertBundles(re, bundles, []placement.GroupBundle{ - {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: "default", Role: "voter", Count: 3}}}, - {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: "default", Role: "voter", Count: 3}}}, + suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) // partial update, so still one group is left, no error @@ -579,14 +586,84 @@ func TestPlacementRuleBundle(t *testing.T) { _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "save", "--in="+fname, "--partial") re.NoError(err) - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "load", "--out="+fname) + suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ + {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + }) + + // set default rule only + bundles = []placement.GroupBundle{{ + ID: "pd", + Rules: []*placement.Rule{ + {GroupID: "pd", ID: "default", Role: "voter", Count: 3}, + }, + }} + b, err = json.Marshal(bundles) re.NoError(err) - b, err = os.ReadFile(fname) + re.NoError(os.WriteFile(fname, b, 0600)) + _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "save", "--in="+fname) + re.NoError(err) + _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "delete", "--regexp", ".*f") re.NoError(err) - re.NoError(json.Unmarshal(b, &bundles)) - assertBundles(re, bundles, []placement.GroupBundle{ - {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: "default", Role: "voter", Count: 3}}}, + + suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ + {ID: "pd", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pd", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + }) +} + +func (suite *configTestSuite) checkLoadRuleBundle(pdAddr string, fname string, expectValues []placement.GroupBundle) { + var bundles []placement.GroupBundle + cmd := pdctlCmd.GetRootCmd() + testutil.Eventually(suite.Require(), func() bool { // wait for the config to be synced to the scheduling server + _, err := pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "load", "--out="+fname) + suite.NoError(err) + b, _ := os.ReadFile(fname) + suite.NoError(json.Unmarshal(b, &bundles)) + return len(bundles) == len(expectValues) + }) + assertBundles(suite.Require(), bundles, expectValues) +} + +func (suite *configTestSuite) checkLoadRule(pdAddr string, fname string, expectValues [][2]string) []placement.Rule { + var rules []placement.Rule + cmd := pdctlCmd.GetRootCmd() + testutil.Eventually(suite.Require(), func() bool { // wait for the config to be synced to the scheduling server + _, err := pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "load", "--out="+fname) + suite.NoError(err) + b, _ := os.ReadFile(fname) + suite.NoError(json.Unmarshal(b, &rules)) + return len(rules) == len(expectValues) + }) + for i, v := range expectValues { + suite.Equal(v, rules[i].Key()) + } + return rules +} + +func (suite *configTestSuite) checkShowRuleKey(pdAddr string, expectValues [][2]string, opts ...string) { + var rules []placement.Rule + var fit placement.RegionFit + cmd := pdctlCmd.GetRootCmd() + testutil.Eventually(suite.Require(), func() bool { // wait for the config to be synced to the scheduling server + args := []string{"-u", pdAddr, "config", "placement-rules", "show"} + output, err := pdctl.ExecuteCommand(cmd, append(args, opts...)...) + suite.NoError(err) + err = json.Unmarshal(output, &rules) + if err == nil { + return len(rules) == len(expectValues) + } + suite.NoError(json.Unmarshal(output, &fit)) + return len(fit.RuleFits) != 0 }) + if len(rules) != 0 { + for i, v := range expectValues { + suite.Equal(v, rules[i].Key()) + } + } + if len(fit.RuleFits) != 0 { + for i, v := range expectValues { + suite.Equal(v, fit.RuleFits[i].Rule.Key()) + } + } } func TestReplicationMode(t *testing.T) { @@ -609,7 +686,6 @@ func TestReplicationMode(t *testing.T) { leaderServer := cluster.GetLeaderServer() re.NoError(leaderServer.BootstrapCluster()) tests.MustPutStore(re, cluster, store) - defer cluster.Destroy() conf := config.ReplicationModeConfig{ ReplicationMode: "majority", @@ -648,79 +724,85 @@ func TestReplicationMode(t *testing.T) { check() } -func TestUpdateDefaultReplicaConfig(t *testing.T) { - re := require.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1) - re.NoError(err) - err = cluster.RunInitialServers() - re.NoError(err) - cluster.WaitLeader() - pdAddr := cluster.GetConfig().GetClientURL() +func (suite *configTestSuite) TestUpdateDefaultReplicaConfig() { + suite.env.RunTestInTwoModes(suite.checkUpdateDefaultReplicaConfig) +} + +func (suite *configTestSuite) checkUpdateDefaultReplicaConfig(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() cmd := pdctlCmd.GetRootCmd() store := &metapb.Store{ Id: 1, State: metapb.StoreState_Up, } - leaderServer := cluster.GetLeaderServer() - re.NoError(leaderServer.BootstrapCluster()) tests.MustPutStore(re, cluster, store) - defer cluster.Destroy() - checkMaxReplicas := func(expect uint64) { args := []string{"-u", pdAddr, "config", "show", "replication"} - output, err := pdctl.ExecuteCommand(cmd, args...) - re.NoError(err) - replicationCfg := sc.ReplicationConfig{} - re.NoError(json.Unmarshal(output, &replicationCfg)) - re.Equal(expect, replicationCfg.MaxReplicas) + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err := pdctl.ExecuteCommand(cmd, args...) + re.NoError(err) + replicationCfg := sc.ReplicationConfig{} + re.NoError(json.Unmarshal(output, &replicationCfg)) + return replicationCfg.MaxReplicas == expect + }) } checkLocationLabels := func(expect int) { args := []string{"-u", pdAddr, "config", "show", "replication"} - output, err := pdctl.ExecuteCommand(cmd, args...) - re.NoError(err) - replicationCfg := sc.ReplicationConfig{} - re.NoError(json.Unmarshal(output, &replicationCfg)) - re.Len(replicationCfg.LocationLabels, expect) + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err := pdctl.ExecuteCommand(cmd, args...) + re.NoError(err) + replicationCfg := sc.ReplicationConfig{} + re.NoError(json.Unmarshal(output, &replicationCfg)) + return len(replicationCfg.LocationLabels) == expect + }) } checkIsolationLevel := func(expect string) { args := []string{"-u", pdAddr, "config", "show", "replication"} - output, err := pdctl.ExecuteCommand(cmd, args...) - re.NoError(err) - replicationCfg := sc.ReplicationConfig{} - re.NoError(json.Unmarshal(output, &replicationCfg)) - re.Equal(replicationCfg.IsolationLevel, expect) + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err := pdctl.ExecuteCommand(cmd, args...) + re.NoError(err) + replicationCfg := sc.ReplicationConfig{} + re.NoError(json.Unmarshal(output, &replicationCfg)) + return replicationCfg.IsolationLevel == expect + }) } checkRuleCount := func(expect int) { - args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", "pd", "--id", "default"} - output, err := pdctl.ExecuteCommand(cmd, args...) - re.NoError(err) - rule := placement.Rule{} - re.NoError(json.Unmarshal(output, &rule)) - re.Equal(expect, rule.Count) + args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", placement.DefaultGroupID, "--id", placement.DefaultRuleID} + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err := pdctl.ExecuteCommand(cmd, args...) + re.NoError(err) + rule := placement.Rule{} + re.NoError(json.Unmarshal(output, &rule)) + return rule.Count == expect + }) } checkRuleLocationLabels := func(expect int) { - args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", "pd", "--id", "default"} - output, err := pdctl.ExecuteCommand(cmd, args...) - re.NoError(err) - rule := placement.Rule{} - re.NoError(json.Unmarshal(output, &rule)) - re.Len(rule.LocationLabels, expect) + args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", placement.DefaultGroupID, "--id", placement.DefaultRuleID} + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err := pdctl.ExecuteCommand(cmd, args...) + re.NoError(err) + rule := placement.Rule{} + re.NoError(json.Unmarshal(output, &rule)) + return len(rule.LocationLabels) == expect + }) } checkRuleIsolationLevel := func(expect string) { - args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", "pd", "--id", "default"} - output, err := pdctl.ExecuteCommand(cmd, args...) - re.NoError(err) - rule := placement.Rule{} - re.NoError(json.Unmarshal(output, &rule)) - re.Equal(rule.IsolationLevel, expect) + args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", placement.DefaultGroupID, "--id", placement.DefaultRuleID} + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err := pdctl.ExecuteCommand(cmd, args...) + re.NoError(err) + rule := placement.Rule{} + re.NoError(json.Unmarshal(output, &rule)) + return rule.IsolationLevel == expect + }) } // update successfully when placement rules is not enabled. @@ -764,10 +846,10 @@ func TestUpdateDefaultReplicaConfig(t *testing.T) { checkRuleIsolationLevel("host") // update unsuccessfully when many rule exists. - fname := t.TempDir() + fname := suite.T().TempDir() rules := []placement.Rule{ { - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test1", Role: "voter", Count: 1, @@ -791,16 +873,14 @@ func TestUpdateDefaultReplicaConfig(t *testing.T) { checkRuleIsolationLevel("host") } -func TestPDServerConfig(t *testing.T) { - re := require.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1) - re.NoError(err) - err = cluster.RunInitialServers() - re.NoError(err) - cluster.WaitLeader() - pdAddr := cluster.GetConfig().GetClientURL() +func (suite *configTestSuite) TestPDServerConfig() { + suite.env.RunTestInTwoModes(suite.checkPDServerConfig) +} + +func (suite *configTestSuite) checkPDServerConfig(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() cmd := pdctlCmd.GetRootCmd() store := &metapb.Store{ @@ -808,10 +888,7 @@ func TestPDServerConfig(t *testing.T) { State: metapb.StoreState_Up, LastHeartbeat: time.Now().UnixNano(), } - leaderServer := cluster.GetLeaderServer() - re.NoError(leaderServer.BootstrapCluster()) tests.MustPutStore(re, cluster, store) - defer cluster.Destroy() output, err := pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "show", "server") re.NoError(err) @@ -823,7 +900,9 @@ func TestPDServerConfig(t *testing.T) { re.Equal("table", conf.KeyType) re.Equal(typeutil.StringSlice([]string{}), conf.RuntimeServices) re.Equal("", conf.MetricStorage) - re.Equal("auto", conf.DashboardAddress) + if conf.DashboardAddress != "auto" { // dashboard has been assigned + re.Equal(leaderServer.GetAddr(), conf.DashboardAddress) + } re.Equal(int(3), conf.FlowRoundByDigit) } diff --git a/tests/pdctl/global_test.go b/tests/pdctl/global_test.go index 7e57f589249..00d31a384d5 100644 --- a/tests/pdctl/global_test.go +++ b/tests/pdctl/global_test.go @@ -30,18 +30,20 @@ import ( "go.uber.org/zap" ) +const pdControlCallerID = "pd-ctl" + func TestSendAndGetComponent(t *testing.T) { re := require.New(t) handler := func(ctx context.Context, s *server.Server) (http.Handler, apiutil.APIServiceGroup, error) { mux := http.NewServeMux() mux.HandleFunc("/pd/api/v1/health", func(w http.ResponseWriter, r *http.Request) { - component := apiutil.GetComponentNameOnHTTP(r) + callerID := apiutil.GetCallerIDOnHTTP(r) for k := range r.Header { log.Info("header", zap.String("key", k)) } - log.Info("component", zap.String("component", component)) - re.Equal("pdctl", component) - fmt.Fprint(w, component) + log.Info("caller id", zap.String("caller-id", callerID)) + re.Equal(pdControlCallerID, callerID) + fmt.Fprint(w, callerID) }) info := apiutil.APIServiceGroup{ IsCore: true, @@ -65,5 +67,5 @@ func TestSendAndGetComponent(t *testing.T) { args := []string{"-u", pdAddr, "health"} output, err := ExecuteCommand(cmd, args...) re.NoError(err) - re.Equal("pdctl\n", string(output)) + re.Equal(fmt.Sprintf("%s\n", pdControlCallerID), string(output)) } diff --git a/tests/pdctl/hot/hot_test.go b/tests/pdctl/hot/hot_test.go index ac9bb3d83bf..03c26f40441 100644 --- a/tests/pdctl/hot/hot_test.go +++ b/tests/pdctl/hot/hot_test.go @@ -42,30 +42,40 @@ import ( type hotTestSuite struct { suite.Suite + env *tests.SchedulingTestEnvironment } func TestHotTestSuite(t *testing.T) { suite.Run(t, new(hotTestSuite)) } -func (suite *hotTestSuite) TestHot() { - var start time.Time - start = start.Add(time.Hour) - opts := []tests.ConfigOption{ +func (suite *hotTestSuite) SetupSuite() { + suite.env = tests.NewSchedulingTestEnvironment(suite.T(), func(conf *config.Config, serverName string) { - conf.Schedule.MaxStoreDownTime.Duration = time.Since(start) + conf.Schedule.MaxStoreDownTime.Duration = time.Hour + conf.Schedule.HotRegionCacheHitsThreshold = 0 }, + ) +} + +func (suite *hotTestSuite) TearDownSuite() { + suite.env.Cleanup() +} + +func (suite *hotTestSuite) TearDownTest() { + cleanFunc := func(cluster *tests.TestCluster) { + leader := cluster.GetLeaderServer() + hotStat := leader.GetRaftCluster().GetHotStat() + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + hotStat = sche.GetCluster().GetHotStat() + } + hotStat.HotCache.CleanCache() } - env := tests.NewSchedulingTestEnvironment(suite.T(), opts...) - env.RunTestInTwoModes(suite.checkHot) + suite.env.RunFuncInTwoModes(cleanFunc) +} - opts = append(opts, func(conf *config.Config, serverName string) { - conf.Schedule.HotRegionCacheHitsThreshold = 0 - }) - env = tests.NewSchedulingTestEnvironment(suite.T(), opts...) - env.RunTestInTwoModes(suite.checkHotWithoutHotPeer) - env = tests.NewSchedulingTestEnvironment(suite.T(), opts...) - env.RunTestInTwoModes(suite.checkHotWithStoreID) +func (suite *hotTestSuite) TestHot() { + suite.env.RunTestInTwoModes(suite.checkHot) } func (suite *hotTestSuite) checkHot(cluster *tests.TestCluster) { @@ -229,6 +239,10 @@ func (suite *hotTestSuite) checkHot(cluster *tests.TestCluster) { testCommand(reportIntervals, "read") } +func (suite *hotTestSuite) TestHotWithStoreID() { + suite.env.RunTestInTwoModes(suite.checkHotWithStoreID) +} + func (suite *hotTestSuite) checkHotWithStoreID(cluster *tests.TestCluster) { re := suite.Require() statistics.Denoising = false @@ -292,6 +306,10 @@ func (suite *hotTestSuite) checkHotWithStoreID(cluster *tests.TestCluster) { re.Equal(float64(200000000), hotRegion.AsLeader[1].TotalBytesRate) } +func (suite *hotTestSuite) TestHotWithoutHotPeer() { + suite.env.RunTestInTwoModes(suite.checkHotWithoutHotPeer) +} + func (suite *hotTestSuite) checkHotWithoutHotPeer(cluster *tests.TestCluster) { re := suite.Require() statistics.Denoising = false @@ -349,6 +367,7 @@ func (suite *hotTestSuite) checkHotWithoutHotPeer(cluster *tests.TestCluster) { hotRegion := statistics.StoreHotPeersInfos{} re.NoError(err) re.NoError(json.Unmarshal(output, &hotRegion)) + re.NotNil(hotRegion.AsPeer[1]) re.Equal(hotRegion.AsPeer[1].Count, 0) re.Equal(0.0, hotRegion.AsPeer[1].TotalBytesRate) re.Equal(load, hotRegion.AsPeer[1].StoreByteRate) @@ -362,10 +381,10 @@ func (suite *hotTestSuite) checkHotWithoutHotPeer(cluster *tests.TestCluster) { hotRegion := statistics.StoreHotPeersInfos{} re.NoError(err) re.NoError(json.Unmarshal(output, &hotRegion)) - re.Equal(hotRegion.AsPeer[1].Count, 0) + re.Equal(0, hotRegion.AsPeer[1].Count) re.Equal(0.0, hotRegion.AsPeer[1].TotalBytesRate) re.Equal(load, hotRegion.AsPeer[1].StoreByteRate) - re.Equal(hotRegion.AsLeader[1].Count, 0) + re.Equal(0, hotRegion.AsLeader[1].Count) re.Equal(0.0, hotRegion.AsLeader[1].TotalBytesRate) re.Equal(0.0, hotRegion.AsLeader[1].StoreByteRate) // write leader sum } diff --git a/tests/pdctl/keyspace/keyspace_group_test.go b/tests/pdctl/keyspace/keyspace_group_test.go index 0b09550d967..cbfdf1d099a 100644 --- a/tests/pdctl/keyspace/keyspace_group_test.go +++ b/tests/pdctl/keyspace/keyspace_group_test.go @@ -503,7 +503,7 @@ func TestShowKeyspaceGroupPrimary(t *testing.T) { for i := 0; i < 10; i++ { keyspaces = append(keyspaces, fmt.Sprintf("keyspace_%d", i)) } - tc, err := tests.NewTestAPICluster(ctx, 3, func(conf *config.Config, serverName string) { + tc, err := tests.NewTestAPICluster(ctx, 1, func(conf *config.Config, serverName string) { conf.Keyspace.PreAlloc = keyspaces }) re.NoError(err) @@ -528,7 +528,6 @@ func TestShowKeyspaceGroupPrimary(t *testing.T) { args := []string{"-u", pdAddr, "keyspace-group"} output, err := pdctl.ExecuteCommand(cmd, append(args, defaultKeyspaceGroupID)...) re.NoError(err) - err = json.Unmarshal(output, &keyspaceGroup) re.NoError(err) re.Equal(utils.DefaultKeyspaceGroupID, keyspaceGroup.ID) diff --git a/tests/pdctl/keyspace/keyspace_test.go b/tests/pdctl/keyspace/keyspace_test.go index 805a30e6f18..3ff755fe601 100644 --- a/tests/pdctl/keyspace/keyspace_test.go +++ b/tests/pdctl/keyspace/keyspace_test.go @@ -105,6 +105,35 @@ func TestKeyspace(t *testing.T) { re.NoError(failpoint.Disable("github.com/tikv/pd/server/delayStartServerLoop")) } +// Show command should auto retry without refresh_group_id if keyspace group manager not initialized. +// See issue: #7441 +func TestKeyspaceGroupUninitialized(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + re.NoError(failpoint.Enable("github.com/tikv/pd/server/delayStartServerLoop", `return(true)`)) + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/keyspace/skipSplitRegion", "return(true)")) + tc, err := tests.NewTestCluster(ctx, 1) + re.NoError(err) + re.NoError(tc.RunInitialServers()) + tc.WaitLeader() + re.NoError(tc.GetLeaderServer().BootstrapCluster()) + pdAddr := tc.GetConfig().GetClientURL() + + keyspaceName := "DEFAULT" + keyspaceID := uint32(0) + args := []string{"-u", pdAddr, "keyspace", "show", "name", keyspaceName} + output, err := pdctl.ExecuteCommand(pdctlCmd.GetRootCmd(), args...) + re.NoError(err) + var meta api.KeyspaceMeta + re.NoError(json.Unmarshal(output, &meta)) + re.Equal(keyspaceName, meta.GetName()) + re.Equal(keyspaceID, meta.GetId()) + + re.NoError(failpoint.Disable("github.com/tikv/pd/server/delayStartServerLoop")) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/keyspace/skipSplitRegion")) +} + type keyspaceTestSuite struct { suite.Suite ctx context.Context diff --git a/tests/pdctl/operator/operator_test.go b/tests/pdctl/operator/operator_test.go index 1752c28a3c0..aa2fe5d1304 100644 --- a/tests/pdctl/operator/operator_test.go +++ b/tests/pdctl/operator/operator_test.go @@ -25,6 +25,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server/config" "github.com/tikv/pd/tests" "github.com/tikv/pd/tests/pdctl" @@ -33,27 +34,30 @@ import ( type operatorTestSuite struct { suite.Suite + env *tests.SchedulingTestEnvironment } func TestOperatorTestSuite(t *testing.T) { suite.Run(t, new(operatorTestSuite)) } -func (suite *operatorTestSuite) TestOperator() { - var start time.Time - start = start.Add(time.Hour) - opts := []tests.ConfigOption{ - // TODO: enable placementrules +func (suite *operatorTestSuite) SetupSuite() { + suite.env = tests.NewSchedulingTestEnvironment(suite.T(), func(conf *config.Config, serverName string) { + // TODO: enable placement rules conf.Replication.MaxReplicas = 2 conf.Replication.EnablePlacementRules = false + conf.Schedule.MaxStoreDownTime.Duration = time.Hour }, - func(conf *config.Config, serverName string) { - conf.Schedule.MaxStoreDownTime.Duration = time.Since(start) - }, - } - env := tests.NewSchedulingTestEnvironment(suite.T(), opts...) - env.RunTestInTwoModes(suite.checkOperator) + ) +} + +func (suite *operatorTestSuite) TearDownSuite() { + suite.env.Cleanup() +} + +func (suite *operatorTestSuite) TestOperator() { + suite.env.RunTestInTwoModes(suite.checkOperator) } func (suite *operatorTestSuite) checkOperator(cluster *tests.TestCluster) { @@ -221,6 +225,13 @@ func (suite *operatorTestSuite) checkOperator(cluster *tests.TestCluster) { _, err = pdctl.ExecuteCommand(cmd, "config", "set", "enable-placement-rules", "true") re.NoError(err) + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + // wait for the scheduler server to update the config + testutil.Eventually(re, func() bool { + return sche.GetCluster().GetCheckerConfig().IsPlacementRulesEnabled() + }) + } + output, err = pdctl.ExecuteCommand(cmd, "operator", "add", "transfer-region", "1", "2", "3") re.NoError(err) re.Contains(string(output), "not supported") diff --git a/tests/pdctl/resourcemanager/resource_manager_command_test.go b/tests/pdctl/resourcemanager/resource_manager_command_test.go new file mode 100644 index 00000000000..ad43e0abca9 --- /dev/null +++ b/tests/pdctl/resourcemanager/resource_manager_command_test.go @@ -0,0 +1,97 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package resourcemanager_test + +import ( + "context" + "encoding/json" + "testing" + "time" + + "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/mcs/resourcemanager/server" + "github.com/tikv/pd/pkg/utils/typeutil" + "github.com/tikv/pd/tests" + "github.com/tikv/pd/tests/pdctl" + pdctlCmd "github.com/tikv/pd/tools/pd-ctl/pdctl" +) + +func TestResourceManagerSuite(t *testing.T) { + suite.Run(t, new(testResourceManagerSuite)) +} + +type testResourceManagerSuite struct { + suite.Suite + ctx context.Context + cancel context.CancelFunc + cluster *tests.TestCluster + pdAddr string +} + +func (s *testResourceManagerSuite) SetupSuite() { + s.ctx, s.cancel = context.WithCancel(context.Background()) + cluster, err := tests.NewTestCluster(s.ctx, 1) + s.Nil(err) + s.cluster = cluster + s.cluster.RunInitialServers() + cluster.WaitLeader() + s.pdAddr = cluster.GetConfig().GetClientURL() +} + +func (s *testResourceManagerSuite) TearDownSuite() { + s.cancel() + s.cluster.Destroy() +} + +func (s *testResourceManagerSuite) TestConfigController() { + expectCfg := server.ControllerConfig{} + expectCfg.Adjust(nil) + // Show controller config + checkShow := func() { + args := []string{"-u", s.pdAddr, "resource-manager", "config", "controller", "show"} + output, err := pdctl.ExecuteCommand(pdctlCmd.GetRootCmd(), args...) + s.Nil(err) + + actualCfg := server.ControllerConfig{} + err = json.Unmarshal(output, &actualCfg) + s.Nil(err) + s.Equal(expectCfg, actualCfg) + } + + // Check default config + checkShow() + + // Set controller config + args := []string{"-u", s.pdAddr, "resource-manager", "config", "controller", "set", "ltb-max-wait-duration", "1h"} + output, err := pdctl.ExecuteCommand(pdctlCmd.GetRootCmd(), args...) + s.Nil(err) + s.Contains(string(output), "Success!") + expectCfg.LTBMaxWaitDuration = typeutil.Duration{Duration: 1 * time.Hour} + checkShow() + + args = []string{"-u", s.pdAddr, "resource-manager", "config", "controller", "set", "enable-controller-trace-log", "true"} + output, err = pdctl.ExecuteCommand(pdctlCmd.GetRootCmd(), args...) + s.Nil(err) + s.Contains(string(output), "Success!") + expectCfg.EnableControllerTraceLog = true + checkShow() + + args = []string{"-u", s.pdAddr, "resource-manager", "config", "controller", "set", "write-base-cost", "2"} + output, err = pdctl.ExecuteCommand(pdctlCmd.GetRootCmd(), args...) + s.Nil(err) + s.Contains(string(output), "Success!") + expectCfg.RequestUnit.WriteBaseCost = 2 + checkShow() +} diff --git a/tests/pdctl/scheduler/scheduler_test.go b/tests/pdctl/scheduler/scheduler_test.go index b3d9f356ad1..d8d54a79d13 100644 --- a/tests/pdctl/scheduler/scheduler_test.go +++ b/tests/pdctl/scheduler/scheduler_test.go @@ -17,15 +17,20 @@ package scheduler_test import ( "context" "encoding/json" + "fmt" + "reflect" + "strings" "testing" "time" + "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" "github.com/spf13/cobra" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/core" sc "github.com/tikv/pd/pkg/schedule/config" + "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/pkg/versioninfo" "github.com/tikv/pd/tests" @@ -35,17 +40,61 @@ import ( type schedulerTestSuite struct { suite.Suite + env *tests.SchedulingTestEnvironment + defaultSchedulers []string } func TestSchedulerTestSuite(t *testing.T) { suite.Run(t, new(schedulerTestSuite)) } +func (suite *schedulerTestSuite) SetupSuite() { + suite.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/skipStoreConfigSync", `return(true)`)) + suite.env = tests.NewSchedulingTestEnvironment(suite.T()) + suite.defaultSchedulers = []string{ + "balance-leader-scheduler", + "balance-region-scheduler", + "balance-hot-region-scheduler", + "balance-witness-scheduler", + "transfer-witness-leader-scheduler", + } +} + +func (suite *schedulerTestSuite) TearDownSuite() { + suite.env.Cleanup() + suite.NoError(failpoint.Disable("github.com/tikv/pd/server/cluster/skipStoreConfigSync")) +} + +func (suite *schedulerTestSuite) TearDownTest() { + cleanFunc := func(cluster *tests.TestCluster) { + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := pdctlCmd.GetRootCmd() + + var currentSchedulers []string + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, ¤tSchedulers) + for _, scheduler := range suite.defaultSchedulers { + if slice.NoneOf(currentSchedulers, func(i int) bool { + return currentSchedulers[i] == scheduler + }) { + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", scheduler}, nil) + re.Contains(echo, "Success!") + } + } + for _, scheduler := range currentSchedulers { + if slice.NoneOf(suite.defaultSchedulers, func(i int) bool { + return suite.defaultSchedulers[i] == scheduler + }) { + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", scheduler}, nil) + re.Contains(echo, "Success!") + } + } + } + suite.env.RunFuncInTwoModes(cleanFunc) +} + func (suite *schedulerTestSuite) TestScheduler() { - env := tests.NewSchedulingTestEnvironment(suite.T()) - // Fixme: use RunTestInTwoModes when sync deleted scheduler is supported. - env.RunTestInPDMode(suite.checkScheduler) - env.RunTestInTwoModes(suite.checkSchedulerDiagnostic) + suite.env.RunTestInTwoModes(suite.checkScheduler) } func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { @@ -84,19 +133,30 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { checkSchedulerCommand := func(args []string, expected map[string]bool) { if args != nil { - mustExec(re, cmd, args, nil) - } - var schedulers []string - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, &schedulers) - for _, scheduler := range schedulers { - re.True(expected[scheduler]) + echo := mustExec(re, cmd, args, nil) + re.Contains(echo, "Success!") } + testutil.Eventually(re, func() bool { + var schedulers []string + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, &schedulers) + if len(schedulers) != len(expected) { + return false + } + for _, scheduler := range schedulers { + if _, ok := expected[scheduler]; !ok { + return false + } + } + return true + }) } checkSchedulerConfigCommand := func(expectedConfig map[string]interface{}, schedulerName string) { - configInfo := make(map[string]interface{}) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName}, &configInfo) - re.Equal(expectedConfig, configInfo) + testutil.Eventually(re, func() bool { + configInfo := make(map[string]interface{}) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName}, &configInfo) + return reflect.DeepEqual(expectedConfig, configInfo) + }) } leaderServer := cluster.GetLeaderServer() @@ -106,7 +166,6 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { // note: because pdqsort is a unstable sort algorithm, set ApproximateSize for this region. tests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10)) - time.Sleep(3 * time.Second) // scheduler show command expected := map[string]bool{ @@ -120,7 +179,6 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { // scheduler delete command args := []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"} - time.Sleep(10 * time.Second) expected = map[string]bool{ "balance-leader-scheduler": true, "balance-hot-region-scheduler": true, @@ -129,9 +187,40 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { } checkSchedulerCommand(args, expected) - schedulers := []string{"evict-leader-scheduler", "grant-leader-scheduler"} + // avoid the influence of the scheduler order + schedulers := []string{"evict-leader-scheduler", "grant-leader-scheduler", "evict-leader-scheduler", "grant-leader-scheduler"} + + checkStorePause := func(changedStores []uint64, schedulerName string) { + status := func() string { + switch schedulerName { + case "evict-leader-scheduler": + return "paused" + case "grant-leader-scheduler": + return "resumed" + default: + re.Fail(fmt.Sprintf("unknown scheduler %s", schedulerName)) + return "" + } + }() + for _, store := range stores { + isStorePaused := !cluster.GetLeaderServer().GetRaftCluster().GetStore(store.GetId()).AllowLeaderTransfer() + if slice.AnyOf(changedStores, func(i int) bool { + return store.GetId() == changedStores[i] + }) { + re.True(isStorePaused, + fmt.Sprintf("store %d should be %s with %s", store.GetId(), status, schedulerName)) + } else { + re.False(isStorePaused, + fmt.Sprintf("store %d should not be %s with %s", store.GetId(), status, schedulerName)) + } + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + re.Equal(isStorePaused, !sche.GetCluster().GetStore(store.GetId()).AllowLeaderTransfer()) + } + } + } for idx := range schedulers { + checkStorePause([]uint64{}, schedulers[idx]) // scheduler add command args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "2"} expected = map[string]bool{ @@ -147,6 +236,7 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { expectedConfig := make(map[string]interface{}) expectedConfig["store-id-ranges"] = map[string]interface{}{"2": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}} checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) + checkStorePause([]uint64{2}, schedulers[idx]) // scheduler config update command args = []string{"-u", pdAddr, "scheduler", "config", schedulers[idx], "add-store", "3"} @@ -157,11 +247,12 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { "transfer-witness-leader-scheduler": true, "balance-witness-scheduler": true, } - checkSchedulerCommand(args, expected) // check update success + checkSchedulerCommand(args, expected) expectedConfig["store-id-ranges"] = map[string]interface{}{"2": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}, "3": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}} checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) + checkStorePause([]uint64{2, 3}, schedulers[idx]) // scheduler delete command args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx]} @@ -172,6 +263,7 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { "balance-witness-scheduler": true, } checkSchedulerCommand(args, expected) + checkStorePause([]uint64{}, schedulers[idx]) // scheduler add command args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "2"} @@ -183,6 +275,7 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { "balance-witness-scheduler": true, } checkSchedulerCommand(args, expected) + checkStorePause([]uint64{2}, schedulers[idx]) // scheduler add command twice args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "4"} @@ -198,6 +291,7 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { // check add success expectedConfig["store-id-ranges"] = map[string]interface{}{"2": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}, "4": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}} checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) + checkStorePause([]uint64{2, 4}, schedulers[idx]) // scheduler remove command [old] args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx] + "-4"} @@ -213,6 +307,7 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { // check remove success expectedConfig["store-id-ranges"] = map[string]interface{}{"2": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}} checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) + checkStorePause([]uint64{2}, schedulers[idx]) // scheduler remove command, when remove the last store, it should remove whole scheduler args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx] + "-2"} @@ -223,6 +318,7 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { "balance-witness-scheduler": true, } checkSchedulerCommand(args, expected) + checkStorePause([]uint64{}, schedulers[idx]) } // test shuffle region config @@ -236,9 +332,12 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { var roles []string mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "show-roles"}, &roles) re.Equal([]string{"leader", "follower", "learner"}, roles) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "set-roles", "learner"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "show-roles"}, &roles) - re.Equal([]string{"learner"}, roles) + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "set-roles", "learner"}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "show-roles"}, &roles) + return reflect.DeepEqual([]string{"learner"}, roles) + }) mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler"}, &roles) re.Equal([]string{"learner"}, roles) @@ -259,28 +358,35 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) re.Equal(expected3, conf3) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler", "set", "2", "1,2,3"}, nil) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler", "set", "2", "1,2,3"}, nil) + re.Contains(echo, "Success!") expected3["store-leader-id"] = float64(2) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) - re.Equal(expected3, conf3) + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) + return reflect.DeepEqual(expected3, conf3) + }) - // test balance region config - echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-region-scheduler"}, nil) + // test remove and add scheduler + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-region-scheduler"}, nil) re.Contains(echo, "Success!") echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) re.Contains(echo, "Success!") echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) re.NotContains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-region-scheduler"}, nil) + re.Contains(echo, "Success!") echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "1"}, nil) re.Contains(echo, "Success!") echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}, nil) re.Contains(echo, "Success!") echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}, nil) re.Contains(echo, "404") + testutil.Eventually(re, func() bool { // wait for removed scheduler to be synced to scheduling server. + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "evict-leader-scheduler"}, nil) + return strings.Contains(echo, "[404] scheduler not found") + }) // test hot region config - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "evict-leader-scheduler"}, nil) - re.Contains(echo, "[404] scheduler not found") expected1 := map[string]interface{}{ "min-hot-byte-rate": float64(100), "min-hot-key-rate": float64(10), @@ -303,82 +409,90 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { "rank-formula-version": "v2", "split-thresholds": 0.2, } + checkHotSchedulerConfig := func(expect map[string]interface{}) { + testutil.Eventually(re, func() bool { + var conf1 map[string]interface{} + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + return reflect.DeepEqual(expect, conf1) + }) + } + var conf map[string]interface{} mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "list"}, &conf) re.Equal(expected1, conf) mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "show"}, &conf) re.Equal(expected1, conf) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "src-tolerance-ratio", "1.02"}, nil) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "src-tolerance-ratio", "1.02"}, nil) + re.Contains(echo, "Success!") expected1["src-tolerance-ratio"] = 1.02 - var conf1 map[string]interface{} - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) + checkHotSchedulerConfig(expected1) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "byte,key"}, nil) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "byte,key"}, nil) + re.Contains(echo, "Success!") expected1["read-priorities"] = []interface{}{"byte", "key"} - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) + checkHotSchedulerConfig(expected1) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,byte"}, nil) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,byte"}, nil) + re.Contains(echo, "Success!") expected1["read-priorities"] = []interface{}{"key", "byte"} - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "foo,bar"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", ""}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,key"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "byte,byte"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,key,byte"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "foo,bar"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", ""}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,key"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "byte,byte"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,key,byte"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) // write-priorities is divided into write-leader-priorities and write-peer-priorities - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-priorities", "key,byte"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v0"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-priorities", "key,byte"}, nil) + re.Contains(echo, "Failed!") + re.Contains(echo, "Config item is not found.") + checkHotSchedulerConfig(expected1) + + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v0"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) expected1["rank-formula-version"] = "v2" - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v2"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v2"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) expected1["rank-formula-version"] = "v1" - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v1"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v1"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) expected1["forbid-rw-type"] = "read" - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "forbid-rw-type", "read"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "forbid-rw-type", "read"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) // test compatibility re.Equal("2.0.0", leaderServer.GetClusterVersion().String()) for _, store := range stores { version := versioninfo.HotScheduleWithQuery store.Version = versioninfo.MinSupportedVersion(version).String() + store.LastHeartbeat = time.Now().UnixNano() tests.MustPutStore(re, cluster, store) } re.Equal("5.2.0", leaderServer.GetClusterVersion().String()) // After upgrading, we should not use query. - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(conf1["read-priorities"], []interface{}{"key", "byte"}) + checkHotSchedulerConfig(expected1) // cannot set qps as write-peer-priorities echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-peer-priorities", "query,byte"}, nil) re.Contains(echo, "query is not allowed to be set in priorities for write-peer-priorities") - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(conf1["write-peer-priorities"], []interface{}{"byte", "key"}) + checkHotSchedulerConfig(expected1) // test remove and add echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-hot-region-scheduler"}, nil) @@ -388,12 +502,15 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { // test balance leader config conf = make(map[string]interface{}) - conf1 = make(map[string]interface{}) + conf1 := make(map[string]interface{}) mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler", "show"}, &conf) re.Equal(4., conf["batch"]) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler", "set", "batch", "3"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler"}, &conf1) - re.Equal(3., conf1["batch"]) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler", "set", "batch", "3"}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler"}, &conf1) + return conf1["batch"] == 3. + }) echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-leader-scheduler"}, nil) re.NotContains(echo, "Success!") echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-leader-scheduler"}, nil) @@ -412,28 +529,58 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { for _, schedulerName := range evictSlownessSchedulers { echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", schedulerName}, nil) re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) - re.Contains(echo, schedulerName) + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return strings.Contains(echo, schedulerName) + }) echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "set", "recovery-duration", "100"}, nil) re.Contains(echo, "Success!") conf = make(map[string]interface{}) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "show"}, &conf) - re.Equal(100., conf["recovery-duration"]) + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "show"}, &conf) + return conf["recovery-duration"] == 100. + }) echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", schedulerName}, nil) re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) - re.NotContains(echo, schedulerName) + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return !strings.Contains(echo, schedulerName) + }) } + // test shuffle hot region scheduler + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "shuffle-hot-region-scheduler"}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return strings.Contains(echo, "shuffle-hot-region-scheduler") + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-hot-region-scheduler", "set", "limit", "127"}, nil) + re.Contains(echo, "Success!") + conf = make(map[string]interface{}) + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-hot-region-scheduler", "show"}, &conf) + return conf["limit"] == 127. + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "shuffle-hot-region-scheduler"}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return !strings.Contains(echo, "shuffle-hot-region-scheduler") + }) + // test show scheduler with paused and disabled status. checkSchedulerWithStatusCommand := func(status string, expected []string) { - var schedulers []string - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show", "--status", status}, &schedulers) - re.Equal(expected, schedulers) + testutil.Eventually(re, func() bool { + var schedulers []string + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show", "--status", status}, &schedulers) + return reflect.DeepEqual(expected, schedulers) + }) } mustUsage([]string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler"}) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler", "60"}, nil) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler", "60"}, nil) + re.Contains(echo, "Success!") checkSchedulerWithStatusCommand("paused", []string{ "balance-leader-scheduler", }) @@ -444,8 +591,9 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { }, testutil.WithWaitFor(30*time.Second)) mustUsage([]string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler", "60"}) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler"}, nil) - checkSchedulerWithStatusCommand("paused", nil) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler"}, nil) + re.Contains(echo, "Success!") + checkSchedulerWithStatusCommand("paused", []string{}) // set label scheduler to disabled manually. echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "label-scheduler"}, nil) @@ -460,7 +608,11 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { cfg.Schedulers = origin err = leaderServer.GetServer().SetScheduleConfig(*cfg) re.NoError(err) - checkSchedulerWithStatusCommand("disabled", nil) + checkSchedulerWithStatusCommand("disabled", []string{}) +} + +func (suite *schedulerTestSuite) TestSchedulerDiagnostic() { + suite.env.RunTestInTwoModes(suite.checkSchedulerDiagnostic) } func (suite *schedulerTestSuite) checkSchedulerDiagnostic(cluster *tests.TestCluster) { @@ -472,10 +624,8 @@ func (suite *schedulerTestSuite) checkSchedulerDiagnostic(cluster *tests.TestClu result := make(map[string]interface{}) testutil.Eventually(re, func() bool { mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "describe", schedulerName}, &result) - return len(result) != 0 + return len(result) != 0 && expectedStatus == result["status"] && expectedSummary == result["summary"] }, testutil.WithTickInterval(50*time.Millisecond)) - re.Equal(expectedStatus, result["status"]) - re.Equal(expectedSummary, result["summary"]) } stores := []*metapb.Store{ @@ -504,23 +654,22 @@ func (suite *schedulerTestSuite) checkSchedulerDiagnostic(cluster *tests.TestClu tests.MustPutStore(re, cluster, store) } - // note: because pdqsort is a unstable sort algorithm, set ApproximateSize for this region. + // note: because pdqsort is an unstable sort algorithm, set ApproximateSize for this region. tests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10)) - time.Sleep(3 * time.Second) echo := mustExec(re, cmd, []string{"-u", pdAddr, "config", "set", "enable-diagnostic", "true"}, nil) re.Contains(echo, "Success!") checkSchedulerDescribeCommand("balance-region-scheduler", "pending", "1 store(s) RegionNotMatchRule; ") // scheduler delete command - // Fixme: use RunTestInTwoModes when sync deleted scheduler is supported. - if sche := cluster.GetSchedulingPrimaryServer(); sche == nil { - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) - checkSchedulerDescribeCommand("balance-region-scheduler", "disabled", "") - } + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) + re.Contains(echo, "Success!") + checkSchedulerDescribeCommand("balance-region-scheduler", "disabled", "") - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler", "60"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler"}, nil) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler", "60"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler"}, nil) + re.Contains(echo, "Success!") checkSchedulerDescribeCommand("balance-leader-scheduler", "normal", "") } @@ -530,7 +679,7 @@ func mustExec(re *require.Assertions, cmd *cobra.Command, args []string, v inter if v == nil { return string(output) } - re.NoError(json.Unmarshal(output, v)) + re.NoError(json.Unmarshal(output, v), string(output)) return "" } @@ -554,18 +703,20 @@ func TestForwardSchedulerRequest(t *testing.T) { server := cluster.GetLeaderServer() re.NoError(server.BootstrapCluster()) backendEndpoints := server.GetAddr() - tc, err := tests.NewTestSchedulingCluster(ctx, 2, backendEndpoints) + tc, err := tests.NewTestSchedulingCluster(ctx, 1, backendEndpoints) re.NoError(err) defer tc.Destroy() tc.WaitForPrimaryServing(re) cmd := pdctlCmd.GetRootCmd() args := []string{"-u", backendEndpoints, "scheduler", "show"} - var slice []string - output, err := pdctl.ExecuteCommand(cmd, args...) - re.NoError(err) - re.NoError(json.Unmarshal(output, &slice)) - re.Contains(slice, "balance-leader-scheduler") + var sches []string + testutil.Eventually(re, func() bool { + output, err := pdctl.ExecuteCommand(cmd, args...) + re.NoError(err) + re.NoError(json.Unmarshal(output, &sches)) + return slice.Contains(sches, "balance-leader-scheduler") + }) mustUsage := func(args []string) { output, err := pdctl.ExecuteCommand(cmd, args...) @@ -573,7 +724,8 @@ func TestForwardSchedulerRequest(t *testing.T) { re.Contains(string(output), "Usage") } mustUsage([]string{"-u", backendEndpoints, "scheduler", "pause", "balance-leader-scheduler"}) - mustExec(re, cmd, []string{"-u", backendEndpoints, "scheduler", "pause", "balance-leader-scheduler", "60"}, nil) + echo := mustExec(re, cmd, []string{"-u", backendEndpoints, "scheduler", "pause", "balance-leader-scheduler", "60"}, nil) + re.Contains(echo, "Success!") checkSchedulerWithStatusCommand := func(status string, expected []string) { var schedulers []string mustExec(re, cmd, []string{"-u", backendEndpoints, "scheduler", "show", "--status", status}, &schedulers) diff --git a/tests/registry/registry_test.go b/tests/registry/registry_test.go index a3aff76a1cf..d2661cda616 100644 --- a/tests/registry/registry_test.go +++ b/tests/registry/registry_test.go @@ -4,7 +4,7 @@ // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, diff --git a/tests/server/api/api_test.go b/tests/server/api/api_test.go index ff430f1b848..f5db6bb2513 100644 --- a/tests/server/api/api_test.go +++ b/tests/server/api/api_test.go @@ -164,7 +164,7 @@ func (suite *middlewareTestSuite) TestRequestInfoMiddleware() { suite.Equal("{\"force\":[\"true\"]}", resp.Header.Get("url-param")) suite.Equal("{\"testkey\":\"testvalue\"}", resp.Header.Get("body-param")) suite.Equal("HTTP/1.1/POST:/pd/api/v1/debug/pprof/profile", resp.Header.Get("method")) - suite.Equal("anonymous", resp.Header.Get("component")) + suite.Equal("anonymous", resp.Header.Get("caller-id")) suite.Equal("127.0.0.1", resp.Header.Get("ip")) input = map[string]interface{}{ @@ -374,7 +374,7 @@ func (suite *middlewareTestSuite) TestRateLimitMiddleware() { func (suite *middlewareTestSuite) TestSwaggerUrl() { leader := suite.cluster.GetLeaderServer() suite.NotNil(leader) - req, _ := http.NewRequest(http.MethodGet, leader.GetAddr()+"/swagger/ui/index", nil) + req, _ := http.NewRequest(http.MethodGet, leader.GetAddr()+"/swagger/ui/index", http.NoBody) resp, err := dialClient.Do(req) suite.NoError(err) suite.True(resp.StatusCode == http.StatusNotFound) @@ -395,20 +395,20 @@ func (suite *middlewareTestSuite) TestAuditPrometheusBackend() { resp.Body.Close() suite.True(leader.GetServer().GetServiceMiddlewarePersistOptions().IsAuditEnabled()) timeUnix := time.Now().Unix() - 20 - req, _ = http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/api/v1/trend?from=%d", leader.GetAddr(), timeUnix), nil) + req, _ = http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/api/v1/trend?from=%d", leader.GetAddr(), timeUnix), http.NoBody) resp, err = dialClient.Do(req) suite.NoError(err) _, err = io.ReadAll(resp.Body) resp.Body.Close() suite.NoError(err) - req, _ = http.NewRequest(http.MethodGet, leader.GetAddr()+"/metrics", nil) + req, _ = http.NewRequest(http.MethodGet, leader.GetAddr()+"/metrics", http.NoBody) resp, err = dialClient.Do(req) suite.NoError(err) defer resp.Body.Close() content, _ := io.ReadAll(resp.Body) output := string(content) - suite.Contains(output, "pd_service_audit_handling_seconds_count{component=\"anonymous\",ip=\"127.0.0.1\",method=\"HTTP\",service=\"GetTrend\"} 1") + suite.Contains(output, "pd_service_audit_handling_seconds_count{caller_id=\"anonymous\",ip=\"127.0.0.1\",method=\"HTTP\",service=\"GetTrend\"} 1") // resign to test persist config oldLeaderName := leader.GetServer().Name() @@ -421,20 +421,20 @@ func (suite *middlewareTestSuite) TestAuditPrometheusBackend() { leader = suite.cluster.GetLeaderServer() timeUnix = time.Now().Unix() - 20 - req, _ = http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/api/v1/trend?from=%d", leader.GetAddr(), timeUnix), nil) + req, _ = http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/api/v1/trend?from=%d", leader.GetAddr(), timeUnix), http.NoBody) resp, err = dialClient.Do(req) suite.NoError(err) _, err = io.ReadAll(resp.Body) resp.Body.Close() suite.NoError(err) - req, _ = http.NewRequest(http.MethodGet, leader.GetAddr()+"/metrics", nil) + req, _ = http.NewRequest(http.MethodGet, leader.GetAddr()+"/metrics", http.NoBody) resp, err = dialClient.Do(req) suite.NoError(err) defer resp.Body.Close() content, _ = io.ReadAll(resp.Body) output = string(content) - suite.Contains(output, "pd_service_audit_handling_seconds_count{component=\"anonymous\",ip=\"127.0.0.1\",method=\"HTTP\",service=\"GetTrend\"} 2") + suite.Contains(output, "pd_service_audit_handling_seconds_count{caller_id=\"anonymous\",ip=\"127.0.0.1\",method=\"HTTP\",service=\"GetTrend\"} 2") input = map[string]interface{}{ "enable-audit": "false", @@ -542,16 +542,16 @@ func BenchmarkDoRequestWithoutServiceMiddleware(b *testing.B) { } func doTestRequestWithLogAudit(srv *tests.TestServer) { - req, _ := http.NewRequest(http.MethodDelete, fmt.Sprintf("%s/pd/api/v1/admin/cache/regions", srv.GetAddr()), nil) - req.Header.Set("component", "test") + req, _ := http.NewRequest(http.MethodDelete, fmt.Sprintf("%s/pd/api/v1/admin/cache/regions", srv.GetAddr()), http.NoBody) + req.Header.Set(apiutil.XCallerIDHeader, "test") resp, _ := dialClient.Do(req) resp.Body.Close() } func doTestRequestWithPrometheus(srv *tests.TestServer) { timeUnix := time.Now().Unix() - 20 - req, _ := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/api/v1/trend?from=%d", srv.GetAddr(), timeUnix), nil) - req.Header.Set("component", "test") + req, _ := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/api/v1/trend?from=%d", srv.GetAddr(), timeUnix), http.NoBody) + req.Header.Set(apiutil.XCallerIDHeader, "test") resp, _ := dialClient.Do(req) resp.Body.Close() } @@ -611,7 +611,7 @@ func (suite *redirectorTestSuite) TestAllowFollowerHandle() { } addr := follower.GetAddr() + "/pd/api/v1/version" - request, err := http.NewRequest(http.MethodGet, addr, nil) + request, err := http.NewRequest(http.MethodGet, addr, http.NoBody) suite.NoError(err) request.Header.Add(apiutil.PDAllowFollowerHandleHeader, "true") resp, err := dialClient.Do(request) @@ -636,7 +636,7 @@ func (suite *redirectorTestSuite) TestNotLeader() { addr := follower.GetAddr() + "/pd/api/v1/version" // Request to follower without redirectorHeader is OK. - request, err := http.NewRequest(http.MethodGet, addr, nil) + request, err := http.NewRequest(http.MethodGet, addr, http.NoBody) suite.NoError(err) resp, err := dialClient.Do(request) suite.NoError(err) @@ -664,7 +664,7 @@ func (suite *redirectorTestSuite) TestXForwardedFor() { follower := suite.cluster.GetServer(suite.cluster.GetFollower()) addr := follower.GetAddr() + "/pd/api/v1/regions" - request, err := http.NewRequest(http.MethodGet, addr, nil) + request, err := http.NewRequest(http.MethodGet, addr, http.NoBody) suite.NoError(err) resp, err := dialClient.Do(request) suite.NoError(err) @@ -914,7 +914,7 @@ func TestPreparingProgress(t *testing.T) { tests.MustPutStore(re, cluster, store) } for i := 0; i < 100; i++ { - tests.MustPutRegion(re, cluster, uint64(i+1), uint64(i)%3+1, []byte(fmt.Sprintf("p%d", i)), []byte(fmt.Sprintf("%d", i+1)), core.SetApproximateSize(10)) + tests.MustPutRegion(re, cluster, uint64(i+1), uint64(i)%3+1, []byte(fmt.Sprintf("%20d", i)), []byte(fmt.Sprintf("%20d", i+1)), core.SetApproximateSize(10)) } // no store preparing output := sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?action=preparing", http.MethodGet, http.StatusNotFound) @@ -941,8 +941,8 @@ func TestPreparingProgress(t *testing.T) { re.Equal(math.MaxFloat64, p.LeftSeconds) // update size - tests.MustPutRegion(re, cluster, 1000, 4, []byte(fmt.Sprintf("%d", 1000)), []byte(fmt.Sprintf("%d", 1001)), core.SetApproximateSize(10)) - tests.MustPutRegion(re, cluster, 1001, 5, []byte(fmt.Sprintf("%d", 1001)), []byte(fmt.Sprintf("%d", 1002)), core.SetApproximateSize(40)) + tests.MustPutRegion(re, cluster, 1000, 4, []byte(fmt.Sprintf("%20d", 1000)), []byte(fmt.Sprintf("%20d", 1001)), core.SetApproximateSize(10)) + tests.MustPutRegion(re, cluster, 1001, 5, []byte(fmt.Sprintf("%20d", 1001)), []byte(fmt.Sprintf("%20d", 1002)), core.SetApproximateSize(40)) time.Sleep(2 * time.Second) output = sendRequest(re, leader.GetAddr()+"/pd/api/v1/stores/progress?action=preparing", http.MethodGet, http.StatusOK) re.NoError(json.Unmarshal(output, &p)) @@ -970,7 +970,7 @@ func TestPreparingProgress(t *testing.T) { } func sendRequest(re *require.Assertions, url string, method string, statusCode int) []byte { - req, _ := http.NewRequest(method, url, nil) + req, _ := http.NewRequest(method, url, http.NoBody) resp, err := dialClient.Do(req) re.NoError(err) re.Equal(statusCode, resp.StatusCode) diff --git a/tests/server/api/checker_test.go b/tests/server/api/checker_test.go index 0f359553b73..8037fcc3989 100644 --- a/tests/server/api/checker_test.go +++ b/tests/server/api/checker_test.go @@ -27,14 +27,23 @@ import ( type checkerTestSuite struct { suite.Suite + env *tests.SchedulingTestEnvironment } func TestCheckerTestSuite(t *testing.T) { suite.Run(t, new(checkerTestSuite)) } + +func (suite *checkerTestSuite) SetupSuite() { + suite.env = tests.NewSchedulingTestEnvironment(suite.T()) +} + +func (suite *checkerTestSuite) TearDownSuite() { + suite.env.Cleanup() +} + func (suite *checkerTestSuite) TestAPI() { - env := tests.NewSchedulingTestEnvironment(suite.T()) - env.RunTestInTwoModes(suite.checkAPI) + suite.env.RunTestInTwoModes(suite.checkAPI) } func (suite *checkerTestSuite) checkAPI(cluster *tests.TestCluster) { diff --git a/tests/server/api/operator_test.go b/tests/server/api/operator_test.go index 64ed5114646..41a687b1181 100644 --- a/tests/server/api/operator_test.go +++ b/tests/server/api/operator_test.go @@ -15,6 +15,7 @@ package api import ( + "encoding/json" "errors" "fmt" "net/http" @@ -26,7 +27,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/core" - pdoperator "github.com/tikv/pd/pkg/schedule/operator" + "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/placement" tu "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server/config" @@ -44,35 +45,31 @@ var ( type operatorTestSuite struct { suite.Suite + env *tests.SchedulingTestEnvironment } func TestOperatorTestSuite(t *testing.T) { suite.Run(t, new(operatorTestSuite)) } -func (suite *operatorTestSuite) TestOperator() { - opts := []tests.ConfigOption{ +func (suite *operatorTestSuite) SetupSuite() { + suite.env = tests.NewSchedulingTestEnvironment(suite.T(), func(conf *config.Config, serverName string) { conf.Replication.MaxReplicas = 1 - }, - } - env := tests.NewSchedulingTestEnvironment(suite.T(), opts...) - env.RunTestInTwoModes(suite.checkAddRemovePeer) + }) +} - env = tests.NewSchedulingTestEnvironment(suite.T(), opts...) - env.RunTestInTwoModes(suite.checkMergeRegionOperator) +func (suite *operatorTestSuite) TearDownSuite() { + suite.env.Cleanup() +} - opts = []tests.ConfigOption{ - func(conf *config.Config, serverName string) { - conf.Replication.MaxReplicas = 3 - }, - } - env = tests.NewSchedulingTestEnvironment(suite.T(), opts...) - env.RunTestInTwoModes(suite.checkTransferRegionWithPlacementRule) +func (suite *operatorTestSuite) TestAddRemovePeer() { + suite.env.RunTestInTwoModes(suite.checkAddRemovePeer) } func (suite *operatorTestSuite) checkAddRemovePeer(cluster *tests.TestCluster) { re := suite.Require() + suite.pauseRuleChecker(cluster) stores := []*metapb.Store{ { Id: 1, @@ -106,6 +103,8 @@ func (suite *operatorTestSuite) checkAddRemovePeer(cluster *tests.TestCluster) { ConfVer: 1, Version: 1, }, + StartKey: []byte("a"), + EndKey: []byte("b"), } regionInfo := core.NewRegionInfo(region, peer1) tests.MustPutRegionInfo(re, cluster, regionInfo) @@ -174,8 +173,38 @@ func (suite *operatorTestSuite) checkAddRemovePeer(cluster *tests.TestCluster) { suite.NoError(err) } +func (suite *operatorTestSuite) TestMergeRegionOperator() { + suite.env.RunTestInTwoModes(suite.checkMergeRegionOperator) +} + func (suite *operatorTestSuite) checkMergeRegionOperator(cluster *tests.TestCluster) { re := suite.Require() + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 3, + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + LastHeartbeat: time.Now().UnixNano(), + }, + } + + for _, store := range stores { + tests.MustPutStore(re, cluster, store) + } + + suite.pauseRuleChecker(cluster) r1 := core.NewTestRegionInfo(10, 1, []byte(""), []byte("b"), core.SetWrittenBytes(1000), core.SetReadBytes(1000), core.SetRegionConfVer(1), core.SetRegionVersion(1)) tests.MustPutRegionInfo(re, cluster, r1) r2 := core.NewTestRegionInfo(20, 1, []byte("b"), []byte("c"), core.SetWrittenBytes(2000), core.SetReadBytes(0), core.SetRegionConfVer(2), core.SetRegionVersion(3)) @@ -199,8 +228,19 @@ func (suite *operatorTestSuite) checkMergeRegionOperator(cluster *tests.TestClus suite.NoError(err) } +func (suite *operatorTestSuite) TestTransferRegionWithPlacementRule() { + // use a new environment to avoid affecting other tests + env := tests.NewSchedulingTestEnvironment(suite.T(), + func(conf *config.Config, serverName string) { + conf.Replication.MaxReplicas = 3 + }) + env.RunTestInTwoModes(suite.checkTransferRegionWithPlacementRule) + env.Cleanup() +} + func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *tests.TestCluster) { re := suite.Require() + suite.pauseRuleChecker(cluster) stores := []*metapb.Store{ { Id: 1, @@ -239,6 +279,8 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te ConfVer: 1, Version: 1, }, + StartKey: []byte("a"), + EndKey: []byte("b"), } tests.MustPutRegionInfo(re, cluster, core.NewRegionInfo(region, peer1)) @@ -268,10 +310,10 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3]}`), expectedError: nil, expectSteps: convertStepsToStr([]string{ - pdoperator.AddLearner{ToStore: 3, PeerID: 1}.String(), - pdoperator.PromoteLearner{ToStore: 3, PeerID: 1}.String(), - pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), - pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), + operator.AddLearner{ToStore: 3, PeerID: 1}.String(), + operator.PromoteLearner{ToStore: 3, PeerID: 1}.String(), + operator.TransferLeader{FromStore: 1, ToStore: 2}.String(), + operator.RemovePeer{FromStore: 1, PeerID: 1}.String(), }), }, { @@ -280,11 +322,11 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["follower", "leader"]}`), expectedError: nil, expectSteps: convertStepsToStr([]string{ - pdoperator.AddLearner{ToStore: 3, PeerID: 2}.String(), - pdoperator.PromoteLearner{ToStore: 3, PeerID: 2}.String(), - pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), - pdoperator.RemovePeer{FromStore: 1, PeerID: 2}.String(), - pdoperator.TransferLeader{FromStore: 2, ToStore: 3}.String(), + operator.AddLearner{ToStore: 3, PeerID: 2}.String(), + operator.PromoteLearner{ToStore: 3, PeerID: 2}.String(), + operator.TransferLeader{FromStore: 1, ToStore: 2}.String(), + operator.RemovePeer{FromStore: 1, PeerID: 2}.String(), + operator.TransferLeader{FromStore: 2, ToStore: 3}.String(), }), }, { @@ -299,11 +341,11 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te placementRuleEnable: true, input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["follower", "leader"]}`), expectSteps: convertStepsToStr([]string{ - pdoperator.AddLearner{ToStore: 3, PeerID: 3}.String(), - pdoperator.PromoteLearner{ToStore: 3, PeerID: 3}.String(), - pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), - pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), - pdoperator.TransferLeader{FromStore: 2, ToStore: 3}.String(), + operator.AddLearner{ToStore: 3, PeerID: 3}.String(), + operator.PromoteLearner{ToStore: 3, PeerID: 3}.String(), + operator.TransferLeader{FromStore: 1, ToStore: 2}.String(), + operator.RemovePeer{FromStore: 1, PeerID: 1}.String(), + operator.TransferLeader{FromStore: 2, ToStore: 3}.String(), }), }, { @@ -360,10 +402,10 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["follower", "leader"]}`), expectedError: nil, expectSteps: convertStepsToStr([]string{ - pdoperator.AddLearner{ToStore: 3, PeerID: 5}.String(), - pdoperator.PromoteLearner{ToStore: 3, PeerID: 5}.String(), - pdoperator.TransferLeader{FromStore: 1, ToStore: 3}.String(), - pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), + operator.AddLearner{ToStore: 3, PeerID: 5}.String(), + operator.PromoteLearner{ToStore: 3, PeerID: 5}.String(), + operator.TransferLeader{FromStore: 1, ToStore: 3}.String(), + operator.RemovePeer{FromStore: 1, PeerID: 1}.String(), }), }, { @@ -400,21 +442,32 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["leader", "follower"]}`), expectedError: nil, expectSteps: convertStepsToStr([]string{ - pdoperator.AddLearner{ToStore: 3, PeerID: 6}.String(), - pdoperator.PromoteLearner{ToStore: 3, PeerID: 6}.String(), - pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), - pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), + operator.AddLearner{ToStore: 3, PeerID: 6}.String(), + operator.PromoteLearner{ToStore: 3, PeerID: 6}.String(), + operator.TransferLeader{FromStore: 1, ToStore: 2}.String(), + operator.RemovePeer{FromStore: 1, PeerID: 1}.String(), }), }, } svr := cluster.GetLeaderServer() + url := fmt.Sprintf("%s/pd/api/v1/config", svr.GetAddr()) for _, testCase := range testCases { suite.T().Log(testCase.name) - // TODO: remove this after we can sync this config to all servers. - if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { - sche.GetCluster().GetSchedulerConfig().SetPlacementRuleEnabled(testCase.placementRuleEnable) + data := make(map[string]interface{}) + if testCase.placementRuleEnable { + data["enable-placement-rules"] = "true" } else { - svr.GetRaftCluster().GetOpts().SetPlacementRuleEnabled(testCase.placementRuleEnable) + data["enable-placement-rules"] = "false" + } + reqData, e := json.Marshal(data) + re.NoError(e) + err := tu.CheckPostJSON(testDialClient, url, reqData, tu.StatusOK(re)) + re.NoError(err) + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + // wait for the scheduler server to update the config + tu.Eventually(re, func() bool { + return sche.GetCluster().GetCheckerConfig().IsPlacementRulesEnabled() == testCase.placementRuleEnable + }) } manager := svr.GetRaftCluster().GetRuleManager() if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { @@ -433,10 +486,9 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te // add customized rule first and then remove default rule err := manager.SetRules(testCase.rules) suite.NoError(err) - err = manager.DeleteRule("pd", "default") + err = manager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) suite.NoError(err) } - var err error if testCase.expectedError == nil { err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), testCase.input, tu.StatusOK(re)) } else { @@ -450,10 +502,22 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te suite.NoError(err) err = tu.CheckDelete(testDialClient, regionURL, tu.StatusOK(re)) } else { - // FIXME: we should check the delete result, which should be failed, - // but the delete operator may be success because the cluster create a new operator to remove ophan peer. - err = tu.CheckDelete(testDialClient, regionURL) + err = tu.CheckDelete(testDialClient, regionURL, tu.StatusNotOK(re)) } suite.NoError(err) } } + +// pauseRuleChecker will pause rule checker to avoid unexpected operator. +func (suite *operatorTestSuite) pauseRuleChecker(cluster *tests.TestCluster) { + re := suite.Require() + checkerName := "rule" + addr := cluster.GetLeaderServer().GetAddr() + resp := make(map[string]interface{}) + url := fmt.Sprintf("%s/pd/api/v1/checker/%s", addr, checkerName) + err := tu.CheckPostJSON(testDialClient, url, []byte(`{"delay":1000}`), tu.StatusOK(re)) + re.NoError(err) + err = tu.ReadGetJSON(re, testDialClient, url, &resp) + re.NoError(err) + re.True(resp["paused"].(bool)) +} diff --git a/tests/server/api/region_test.go b/tests/server/api/region_test.go new file mode 100644 index 00000000000..450995a6e5e --- /dev/null +++ b/tests/server/api/region_test.go @@ -0,0 +1,387 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package api + +import ( + "encoding/hex" + "encoding/json" + "fmt" + "net/http" + "strconv" + "testing" + + "github.com/pingcap/failpoint" + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/schedule/placement" + tu "github.com/tikv/pd/pkg/utils/testutil" + "github.com/tikv/pd/tests" +) + +type regionTestSuite struct { + suite.Suite + env *tests.SchedulingTestEnvironment +} + +func TestRegionTestSuite(t *testing.T) { + suite.Run(t, new(regionTestSuite)) +} + +func (suite *regionTestSuite) SetupSuite() { + suite.env = tests.NewSchedulingTestEnvironment(suite.T()) +} + +func (suite *regionTestSuite) TearDownSuite() { + suite.env.Cleanup() +} + +func (suite *regionTestSuite) TearDownTest() { + cleanFunc := func(cluster *tests.TestCluster) { + // clean region cache + leader := cluster.GetLeaderServer() + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + for _, region := range leader.GetRegions() { + url := fmt.Sprintf("%s/pd/api/v1/admin/cache/region/%d", pdAddr, region.GetID()) + err := tu.CheckDelete(testDialClient, url, tu.StatusOK(re)) + suite.NoError(err) + } + suite.Empty(leader.GetRegions()) + // clean rules + def := placement.GroupBundle{ + ID: "pd", + Rules: []*placement.Rule{ + {GroupID: "pd", ID: "default", Role: "voter", Count: 3}, + }, + } + data, err := json.Marshal([]placement.GroupBundle{def}) + suite.NoError(err) + urlPrefix := cluster.GetLeaderServer().GetAddr() + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/pd/api/v1/config/placement-rule", data, tu.StatusOK(suite.Require())) + suite.NoError(err) + // clean stores + // TODO: cannot sync to scheduling server? + for _, store := range leader.GetStores() { + suite.NoError(cluster.GetLeaderServer().GetRaftCluster().RemoveStore(store.GetId(), true)) + suite.NoError(cluster.GetLeaderServer().GetRaftCluster().BuryStore(store.GetId(), true)) + } + suite.NoError(cluster.GetLeaderServer().GetRaftCluster().RemoveTombStoneRecords()) + suite.Empty(leader.GetStores()) + } + suite.env.RunFuncInTwoModes(cleanFunc) +} + +func (suite *regionTestSuite) TestSplitRegions() { + // use a new environment to avoid affecting other tests + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkSplitRegions) + env.Cleanup() +} + +func (suite *regionTestSuite) checkSplitRegions(cluster *tests.TestCluster) { + leader := cluster.GetLeaderServer() + urlPrefix := leader.GetAddr() + "/pd/api/v1" + re := suite.Require() + s1 := &metapb.Store{ + Id: 13, + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + } + tests.MustPutStore(re, cluster, s1) + r1 := core.NewTestRegionInfo(601, 13, []byte("aaa"), []byte("ggg")) + r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 5, StoreId: 14}, &metapb.Peer{Id: 6, StoreId: 15}) + tests.MustPutRegionInfo(re, cluster, r1) + suite.checkRegionCount(cluster, 1) + + newRegionID := uint64(11) + body := fmt.Sprintf(`{"retry_limit":%v, "split_keys": ["%s","%s","%s"]}`, 3, + hex.EncodeToString([]byte("bbb")), + hex.EncodeToString([]byte("ccc")), + hex.EncodeToString([]byte("ddd"))) + checkOpt := func(res []byte, code int, _ http.Header) { + s := &struct { + ProcessedPercentage int `json:"processed-percentage"` + NewRegionsID []uint64 `json:"regions-id"` + }{} + err := json.Unmarshal(res, s) + suite.NoError(err) + suite.Equal(100, s.ProcessedPercentage) + suite.Equal([]uint64{newRegionID}, s.NewRegionsID) + } + suite.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/handler/splitResponses", fmt.Sprintf("return(%v)", newRegionID))) + err := tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/split", urlPrefix), []byte(body), checkOpt) + suite.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/handler/splitResponses")) + suite.NoError(err) +} + +func (suite *regionTestSuite) TestAccelerateRegionsScheduleInRange() { + suite.env.RunTestInTwoModes(suite.checkAccelerateRegionsScheduleInRange) +} + +func (suite *regionTestSuite) checkAccelerateRegionsScheduleInRange(cluster *tests.TestCluster) { + leader := cluster.GetLeaderServer() + urlPrefix := leader.GetAddr() + "/pd/api/v1" + re := suite.Require() + for i := 1; i <= 3; i++ { + s1 := &metapb.Store{ + Id: uint64(i), + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + } + tests.MustPutStore(re, cluster, s1) + } + regionCount := uint64(3) + for i := uint64(1); i <= regionCount; i++ { + r1 := core.NewTestRegionInfo(550+i, 1, []byte("a"+strconv.FormatUint(i, 10)), []byte("a"+strconv.FormatUint(i+1, 10))) + r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 100 + i, StoreId: (i + 1) % regionCount}, &metapb.Peer{Id: 200 + i, StoreId: (i + 2) % regionCount}) + tests.MustPutRegionInfo(re, cluster, r1) + } + suite.checkRegionCount(cluster, regionCount) + + body := fmt.Sprintf(`{"start_key":"%s", "end_key": "%s"}`, hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a3"))) + err := tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/accelerate-schedule", urlPrefix), []byte(body), + tu.StatusOK(re)) + suite.NoError(err) + idList := leader.GetRaftCluster().GetSuspectRegions() + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + idList = sche.GetCluster().GetCoordinator().GetCheckerController().GetSuspectRegions() + } + re.Len(idList, 2, len(idList)) +} + +func (suite *regionTestSuite) TestAccelerateRegionsScheduleInRanges() { + suite.env.RunTestInTwoModes(suite.checkAccelerateRegionsScheduleInRanges) +} + +func (suite *regionTestSuite) checkAccelerateRegionsScheduleInRanges(cluster *tests.TestCluster) { + leader := cluster.GetLeaderServer() + urlPrefix := leader.GetAddr() + "/pd/api/v1" + re := suite.Require() + for i := 1; i <= 6; i++ { + s1 := &metapb.Store{ + Id: uint64(i), + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + } + tests.MustPutStore(re, cluster, s1) + } + regionCount := uint64(6) + for i := uint64(1); i <= regionCount; i++ { + r1 := core.NewTestRegionInfo(550+i, 1, []byte("a"+strconv.FormatUint(i, 10)), []byte("a"+strconv.FormatUint(i+1, 10))) + r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 100 + i, StoreId: (i + 1) % regionCount}, &metapb.Peer{Id: 200 + i, StoreId: (i + 2) % regionCount}) + tests.MustPutRegionInfo(re, cluster, r1) + } + suite.checkRegionCount(cluster, regionCount) + + body := fmt.Sprintf(`[{"start_key":"%s", "end_key": "%s"}, {"start_key":"%s", "end_key": "%s"}]`, + hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a3")), hex.EncodeToString([]byte("a4")), hex.EncodeToString([]byte("a6"))) + err := tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/accelerate-schedule/batch", urlPrefix), []byte(body), + tu.StatusOK(re)) + suite.NoError(err) + idList := leader.GetRaftCluster().GetSuspectRegions() + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + idList = sche.GetCluster().GetCoordinator().GetCheckerController().GetSuspectRegions() + } + re.Len(idList, 4) +} + +func (suite *regionTestSuite) TestScatterRegions() { + // use a new environment to avoid affecting other tests + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkScatterRegions) + env.Cleanup() +} + +func (suite *regionTestSuite) checkScatterRegions(cluster *tests.TestCluster) { + leader := cluster.GetLeaderServer() + urlPrefix := leader.GetAddr() + "/pd/api/v1" + re := suite.Require() + for i := 13; i <= 16; i++ { + s1 := &metapb.Store{ + Id: uint64(i), + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + } + tests.MustPutStore(re, cluster, s1) + } + r1 := core.NewTestRegionInfo(701, 13, []byte("b1"), []byte("b2")) + r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 5, StoreId: 14}, &metapb.Peer{Id: 6, StoreId: 15}) + r2 := core.NewTestRegionInfo(702, 13, []byte("b2"), []byte("b3")) + r2.GetMeta().Peers = append(r2.GetMeta().Peers, &metapb.Peer{Id: 7, StoreId: 14}, &metapb.Peer{Id: 8, StoreId: 15}) + r3 := core.NewTestRegionInfo(703, 13, []byte("b4"), []byte("b4")) + r3.GetMeta().Peers = append(r3.GetMeta().Peers, &metapb.Peer{Id: 9, StoreId: 14}, &metapb.Peer{Id: 10, StoreId: 15}) + tests.MustPutRegionInfo(re, cluster, r1) + tests.MustPutRegionInfo(re, cluster, r2) + tests.MustPutRegionInfo(re, cluster, r3) + suite.checkRegionCount(cluster, 3) + + body := fmt.Sprintf(`{"start_key":"%s", "end_key": "%s"}`, hex.EncodeToString([]byte("b1")), hex.EncodeToString([]byte("b3"))) + err := tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/scatter", urlPrefix), []byte(body), tu.StatusOK(re)) + suite.NoError(err) + oc := leader.GetRaftCluster().GetOperatorController() + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + oc = sche.GetCoordinator().GetOperatorController() + } + + op1 := oc.GetOperator(701) + op2 := oc.GetOperator(702) + op3 := oc.GetOperator(703) + // At least one operator used to scatter region + suite.True(op1 != nil || op2 != nil || op3 != nil) + + body = `{"regions_id": [701, 702, 703]}` + err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/scatter", urlPrefix), []byte(body), tu.StatusOK(re)) + suite.NoError(err) +} + +func (suite *regionTestSuite) TestCheckRegionsReplicated() { + // Fixme: after delete+set rule, the key range will be empty, so the test will fail in api mode. + suite.env.RunTestInPDMode(suite.checkRegionsReplicated) +} + +func (suite *regionTestSuite) checkRegionsReplicated(cluster *tests.TestCluster) { + suite.pauseRuleChecker(cluster) + leader := cluster.GetLeaderServer() + urlPrefix := leader.GetAddr() + "/pd/api/v1" + re := suite.Require() + + // add test region + s1 := &metapb.Store{ + Id: 1, + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + } + tests.MustPutStore(re, cluster, s1) + r1 := core.NewTestRegionInfo(2, 1, []byte("a"), []byte("b")) + tests.MustPutRegionInfo(re, cluster, r1) + suite.checkRegionCount(cluster, 1) + + // set the bundle + bundle := []placement.GroupBundle{ + { + ID: "5", + Index: 5, + Rules: []*placement.Rule{ + { + ID: "foo", Index: 1, Role: placement.Voter, Count: 1, + }, + }, + }, + } + + status := "" + + // invalid url + url := fmt.Sprintf(`%s/regions/replicated?startKey=%s&endKey=%s`, urlPrefix, "_", "t") + err := tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, http.StatusBadRequest)) + suite.NoError(err) + + url = fmt.Sprintf(`%s/regions/replicated?startKey=%s&endKey=%s`, urlPrefix, hex.EncodeToString(r1.GetStartKey()), "_") + err = tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, http.StatusBadRequest)) + suite.NoError(err) + + // correct test + url = fmt.Sprintf(`%s/regions/replicated?startKey=%s&endKey=%s`, urlPrefix, hex.EncodeToString(r1.GetStartKey()), hex.EncodeToString(r1.GetEndKey())) + err = tu.CheckGetJSON(testDialClient, url, nil, tu.StatusOK(re)) + suite.NoError(err) + + // test one rule + data, err := json.Marshal(bundle) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) + suite.NoError(err) + + tu.Eventually(re, func() bool { + err = tu.ReadGetJSON(re, testDialClient, url, &status) + suite.NoError(err) + return status == "REPLICATED" + }) + + suite.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/handler/mockPending", "return(true)")) + err = tu.ReadGetJSON(re, testDialClient, url, &status) + suite.NoError(err) + suite.Equal("PENDING", status) + suite.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/handler/mockPending")) + // test multiple rules + r1 = core.NewTestRegionInfo(2, 1, []byte("a"), []byte("b")) + r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 5, StoreId: 1}) + tests.MustPutRegionInfo(re, cluster, r1) + + bundle[0].Rules = append(bundle[0].Rules, &placement.Rule{ + ID: "bar", Index: 1, Role: placement.Voter, Count: 1, + }) + data, err = json.Marshal(bundle) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) + suite.NoError(err) + + err = tu.ReadGetJSON(re, testDialClient, url, &status) + suite.NoError(err) + suite.Equal("REPLICATED", status) + + // test multiple bundles + bundle = append(bundle, placement.GroupBundle{ + ID: "6", + Index: 6, + Rules: []*placement.Rule{ + { + ID: "foo", Index: 1, Role: placement.Voter, Count: 2, + }, + }, + }) + data, err = json.Marshal(bundle) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) + suite.NoError(err) + + err = tu.ReadGetJSON(re, testDialClient, url, &status) + suite.NoError(err) + suite.Equal("INPROGRESS", status) + + r1 = core.NewTestRegionInfo(2, 1, []byte("a"), []byte("b")) + r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 5, StoreId: 1}, &metapb.Peer{Id: 6, StoreId: 1}, &metapb.Peer{Id: 7, StoreId: 1}) + tests.MustPutRegionInfo(re, cluster, r1) + + err = tu.ReadGetJSON(re, testDialClient, url, &status) + suite.NoError(err) + suite.Equal("REPLICATED", status) +} + +func (suite *regionTestSuite) checkRegionCount(cluster *tests.TestCluster, count uint64) { + leader := cluster.GetLeaderServer() + tu.Eventually(suite.Require(), func() bool { + return leader.GetRaftCluster().GetRegionCount([]byte{}, []byte{}).Count == int(count) + }) + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + tu.Eventually(suite.Require(), func() bool { + return sche.GetCluster().GetRegionCount([]byte{}, []byte{}) == int(count) + }) + } +} + +// pauseRuleChecker will pause rule checker to avoid unexpected operator. +func (suite *regionTestSuite) pauseRuleChecker(cluster *tests.TestCluster) { + re := suite.Require() + checkerName := "rule" + addr := cluster.GetLeaderServer().GetAddr() + resp := make(map[string]interface{}) + url := fmt.Sprintf("%s/pd/api/v1/checker/%s", addr, checkerName) + err := tu.CheckPostJSON(testDialClient, url, []byte(`{"delay":1000}`), tu.StatusOK(re)) + re.NoError(err) + err = tu.ReadGetJSON(re, testDialClient, url, &resp) + re.NoError(err) + re.True(resp["paused"].(bool)) +} diff --git a/server/api/rule_test.go b/tests/server/api/rule_test.go similarity index 57% rename from server/api/rule_test.go rename to tests/server/api/rule_test.go index d2dc50f1119..0a0c3f2fb2e 100644 --- a/server/api/rule_test.go +++ b/tests/server/api/rule_test.go @@ -25,17 +25,17 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/schedule/labeler" "github.com/tikv/pd/pkg/schedule/placement" tu "github.com/tikv/pd/pkg/utils/testutil" - "github.com/tikv/pd/server" "github.com/tikv/pd/server/config" + "github.com/tikv/pd/tests" ) type ruleTestSuite struct { suite.Suite - svr *server.Server - cleanup tu.CleanupFunc - urlPrefix string + env *tests.SchedulingTestEnvironment } func TestRuleTestSuite(t *testing.T) { @@ -43,40 +43,43 @@ func TestRuleTestSuite(t *testing.T) { } func (suite *ruleTestSuite) SetupSuite() { - re := suite.Require() - suite.svr, suite.cleanup = mustNewServer(re) - server.MustWaitLeader(re, []*server.Server{suite.svr}) - - addr := suite.svr.GetAddr() - suite.urlPrefix = fmt.Sprintf("%s%s/api/v1/config", addr, apiPrefix) - - mustBootstrapCluster(re, suite.svr) - PDServerCfg := suite.svr.GetConfig().PDServerCfg - PDServerCfg.KeyType = "raw" - err := suite.svr.SetPDServerConfig(PDServerCfg) - suite.NoError(err) - suite.NoError(tu.CheckPostJSON(testDialClient, suite.urlPrefix, []byte(`{"enable-placement-rules":"true"}`), tu.StatusOK(re))) + suite.env = tests.NewSchedulingTestEnvironment(suite.T(), func(conf *config.Config, serverName string) { + conf.PDServerCfg.KeyType = "raw" + conf.Replication.EnablePlacementRules = true + }) } func (suite *ruleTestSuite) TearDownSuite() { - suite.cleanup() + suite.env.Cleanup() } func (suite *ruleTestSuite) TearDownTest() { - def := placement.GroupBundle{ - ID: "pd", - Rules: []*placement.Rule{ - {GroupID: "pd", ID: "default", Role: "voter", Count: 3}, - }, + cleanFunc := func(cluster *tests.TestCluster) { + def := placement.GroupBundle{ + ID: "pd", + Rules: []*placement.Rule{ + {GroupID: "pd", ID: "default", Role: "voter", Count: 3}, + }, + } + data, err := json.Marshal([]placement.GroupBundle{def}) + suite.NoError(err) + urlPrefix := cluster.GetLeaderServer().GetAddr() + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/pd/api/v1/config/placement-rule", data, tu.StatusOK(suite.Require())) + suite.NoError(err) } - data, err := json.Marshal([]placement.GroupBundle{def}) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/placement-rule", data, tu.StatusOK(suite.Require())) - suite.NoError(err) + suite.env.RunFuncInTwoModes(cleanFunc) } func (suite *ruleTestSuite) TestSet() { - rule := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} + suite.env.RunTestInTwoModes(suite.checkSet) +} + +func (suite *ruleTestSuite) checkSet(cluster *tests.TestCluster) { + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + rule := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} successData, err := json.Marshal(rule) suite.NoError(err) oldStartKey, err := hex.DecodeString(rule.StartKeyHex) @@ -84,13 +87,13 @@ func (suite *ruleTestSuite) TestSet() { oldEndKey, err := hex.DecodeString(rule.EndKeyHex) suite.NoError(err) parseErrData := []byte("foo") - rule1 := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "XXXX", EndKeyHex: "3333", Role: "voter", Count: 1} + rule1 := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "XXXX", EndKeyHex: "3333", Role: placement.Voter, Count: 1} checkErrData, err := json.Marshal(rule1) suite.NoError(err) - rule2 := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: -1} + rule2 := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: -1} setErrData, err := json.Marshal(rule2) suite.NoError(err) - rule3 := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "1111", EndKeyHex: "3333", Role: "follower", Count: 3} + rule3 := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Follower, Count: 3} updateData, err := json.Marshal(rule3) suite.NoError(err) newStartKey, err := hex.DecodeString(rule.StartKeyHex) @@ -159,12 +162,12 @@ func (suite *ruleTestSuite) TestSet() { for _, testCase := range testCases { suite.T().Log(testCase.name) // clear suspect keyRanges to prevent test case from others - suite.svr.GetRaftCluster().ClearSuspectKeyRanges() + leaderServer.GetRaftCluster().ClearSuspectKeyRanges() if testCase.success { - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/rule", testCase.rawData, tu.StatusOK(re)) + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", testCase.rawData, tu.StatusOK(re)) popKeyRangeMap := map[string]struct{}{} for i := 0; i < len(testCase.popKeyRange)/2; i++ { - v, got := suite.svr.GetRaftCluster().PopOneSuspectKeyRange() + v, got := leaderServer.GetRaftCluster().PopOneSuspectKeyRange() suite.True(got) popKeyRangeMap[hex.EncodeToString(v[0])] = struct{}{} popKeyRangeMap[hex.EncodeToString(v[1])] = struct{}{} @@ -175,7 +178,7 @@ func (suite *ruleTestSuite) TestSet() { suite.True(ok) } } else { - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/rule", testCase.rawData, + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", testCase.rawData, tu.StatusNotOK(re), tu.StringEqual(re, testCase.response)) } @@ -184,11 +187,19 @@ func (suite *ruleTestSuite) TestSet() { } func (suite *ruleTestSuite) TestGet() { - rule := placement.Rule{GroupID: "a", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} + suite.env.RunTestInTwoModes(suite.checkGet) +} + +func (suite *ruleTestSuite) checkGet(cluster *tests.TestCluster) { + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + rule := placement.Rule{GroupID: "a", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) suite.NoError(err) re := suite.Require() - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) suite.NoError(err) testCases := []struct { @@ -205,7 +216,7 @@ func (suite *ruleTestSuite) TestGet() { }, { name: "not found", - rule: placement.Rule{GroupID: "a", ID: "30", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1}, + rule: placement.Rule{GroupID: "a", ID: "30", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, found: false, code: http.StatusNotFound, }, @@ -213,10 +224,12 @@ func (suite *ruleTestSuite) TestGet() { for _, testCase := range testCases { suite.T().Log(testCase.name) var resp placement.Rule - url := fmt.Sprintf("%s/rule/%s/%s", suite.urlPrefix, testCase.rule.GroupID, testCase.rule.ID) + url := fmt.Sprintf("%s/rule/%s/%s", urlPrefix, testCase.rule.GroupID, testCase.rule.ID) if testCase.found { - err = tu.ReadGetJSON(re, testDialClient, url, &resp) - suite.compareRule(&resp, &testCase.rule) + tu.Eventually(suite.Require(), func() bool { + err = tu.ReadGetJSON(re, testDialClient, url, &resp) + return suite.compareRule(&resp, &testCase.rule) + }) } else { err = tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, testCase.code)) } @@ -225,32 +238,48 @@ func (suite *ruleTestSuite) TestGet() { } func (suite *ruleTestSuite) TestGetAll() { - rule := placement.Rule{GroupID: "b", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} + suite.env.RunTestInTwoModes(suite.checkGetAll) +} + +func (suite *ruleTestSuite) checkGetAll(cluster *tests.TestCluster) { + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + rule := placement.Rule{GroupID: "b", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) suite.NoError(err) re := suite.Require() - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) suite.NoError(err) var resp2 []*placement.Rule - err = tu.ReadGetJSON(re, testDialClient, suite.urlPrefix+"/rules", &resp2) + err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/rules", &resp2) suite.NoError(err) suite.GreaterOrEqual(len(resp2), 1) } func (suite *ruleTestSuite) TestSetAll() { - rule1 := placement.Rule{GroupID: "a", ID: "12", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} - rule2 := placement.Rule{GroupID: "b", ID: "12", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} - rule3 := placement.Rule{GroupID: "a", ID: "12", StartKeyHex: "XXXX", EndKeyHex: "3333", Role: "voter", Count: 1} - rule4 := placement.Rule{GroupID: "a", ID: "12", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: -1} - rule5 := placement.Rule{GroupID: "pd", ID: "default", StartKeyHex: "", EndKeyHex: "", Role: "voter", Count: 1, + suite.env.RunTestInTwoModes(suite.checkSetAll) +} + +func (suite *ruleTestSuite) checkSetAll(cluster *tests.TestCluster) { + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + rule1 := placement.Rule{GroupID: "a", ID: "12", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + rule2 := placement.Rule{GroupID: "b", ID: "12", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + rule3 := placement.Rule{GroupID: "a", ID: "12", StartKeyHex: "XXXX", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + rule4 := placement.Rule{GroupID: "a", ID: "12", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: -1} + rule5 := placement.Rule{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, StartKeyHex: "", EndKeyHex: "", Role: placement.Voter, Count: 1, LocationLabels: []string{"host"}} - rule6 := placement.Rule{GroupID: "pd", ID: "default", StartKeyHex: "", EndKeyHex: "", Role: "voter", Count: 3} + rule6 := placement.Rule{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, StartKeyHex: "", EndKeyHex: "", Role: placement.Voter, Count: 3} - suite.svr.GetPersistOptions().GetReplicationConfig().LocationLabels = []string{"host"} - defaultRule := suite.svr.GetRaftCluster().GetRuleManager().GetRule("pd", "default") + leaderServer.GetPersistOptions().GetReplicationConfig().LocationLabels = []string{"host"} + defaultRule := leaderServer.GetRaftCluster().GetRuleManager().GetRule(placement.DefaultGroupID, placement.DefaultRuleID) defaultRule.LocationLabels = []string{"host"} - suite.svr.GetRaftCluster().GetRuleManager().SetRule(defaultRule) + leaderServer.GetRaftCluster().GetRuleManager().SetRule(defaultRule) successData, err := json.Marshal([]*placement.Rule{&rule1, &rule2}) suite.NoError(err) @@ -333,13 +362,13 @@ func (suite *ruleTestSuite) TestSetAll() { for _, testCase := range testCases { suite.T().Log(testCase.name) if testCase.success { - err := tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/rules", testCase.rawData, tu.StatusOK(re)) + err := tu.CheckPostJSON(testDialClient, urlPrefix+"/rules", testCase.rawData, tu.StatusOK(re)) suite.NoError(err) if testCase.isDefaultRule { - suite.Equal(int(suite.svr.GetPersistOptions().GetReplicationConfig().MaxReplicas), testCase.count) + suite.Equal(int(leaderServer.GetPersistOptions().GetReplicationConfig().MaxReplicas), testCase.count) } } else { - err := tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/rules", testCase.rawData, + err := tu.CheckPostJSON(testDialClient, urlPrefix+"/rules", testCase.rawData, tu.StringEqual(re, testCase.response)) suite.NoError(err) } @@ -347,17 +376,25 @@ func (suite *ruleTestSuite) TestSetAll() { } func (suite *ruleTestSuite) TestGetAllByGroup() { + suite.env.RunTestInTwoModes(suite.checkGetAllByGroup) +} + +func (suite *ruleTestSuite) checkGetAllByGroup(cluster *tests.TestCluster) { + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + re := suite.Require() - rule := placement.Rule{GroupID: "c", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} + rule := placement.Rule{GroupID: "c", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) suite.NoError(err) - rule1 := placement.Rule{GroupID: "c", ID: "30", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} + rule1 := placement.Rule{GroupID: "c", ID: "30", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} data, err = json.Marshal(rule1) suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) suite.NoError(err) testCases := []struct { @@ -380,27 +417,39 @@ func (suite *ruleTestSuite) TestGetAllByGroup() { for _, testCase := range testCases { suite.T().Log(testCase.name) var resp []*placement.Rule - url := fmt.Sprintf("%s/rules/group/%s", suite.urlPrefix, testCase.groupID) - err = tu.ReadGetJSON(re, testDialClient, url, &resp) - suite.NoError(err) - suite.Len(resp, testCase.count) - if testCase.count == 2 { - suite.compareRule(resp[0], &rule) - suite.compareRule(resp[1], &rule1) - } + url := fmt.Sprintf("%s/rules/group/%s", urlPrefix, testCase.groupID) + tu.Eventually(re, func() bool { + err = tu.ReadGetJSON(re, testDialClient, url, &resp) + suite.NoError(err) + if len(resp) != testCase.count { + return false + } + if testCase.count == 2 { + return suite.compareRule(resp[0], &rule) && suite.compareRule(resp[1], &rule1) + } + return true + }) } } func (suite *ruleTestSuite) TestGetAllByRegion() { - rule := placement.Rule{GroupID: "e", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} + suite.env.RunTestInTwoModes(suite.checkGetAllByRegion) +} + +func (suite *ruleTestSuite) checkGetAllByRegion(cluster *tests.TestCluster) { + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + rule := placement.Rule{GroupID: "e", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) suite.NoError(err) re := suite.Require() - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) suite.NoError(err) r := core.NewTestRegionInfo(4, 1, []byte{0x22, 0x22}, []byte{0x33, 0x33}) - mustRegionHeartbeat(re, suite.svr, r) + tests.MustPutRegionInfo(re, cluster, r) testCases := []struct { name string @@ -429,15 +478,18 @@ func (suite *ruleTestSuite) TestGetAllByRegion() { for _, testCase := range testCases { suite.T().Log(testCase.name) var resp []*placement.Rule - url := fmt.Sprintf("%s/rules/region/%s", suite.urlPrefix, testCase.regionID) + url := fmt.Sprintf("%s/rules/region/%s", urlPrefix, testCase.regionID) if testCase.success { - err = tu.ReadGetJSON(re, testDialClient, url, &resp) - for _, r := range resp { - if r.GroupID == "e" { - suite.compareRule(r, &rule) + tu.Eventually(suite.Require(), func() bool { + err = tu.ReadGetJSON(re, testDialClient, url, &resp) + for _, r := range resp { + if r.GroupID == "e" { + return suite.compareRule(r, &rule) + } } - } + return true + }) } else { err = tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, testCase.code)) } @@ -446,11 +498,20 @@ func (suite *ruleTestSuite) TestGetAllByRegion() { } func (suite *ruleTestSuite) TestGetAllByKey() { - rule := placement.Rule{GroupID: "f", ID: "40", StartKeyHex: "8888", EndKeyHex: "9111", Role: "voter", Count: 1} + // Fixme: after delete+set rule, the key range will be empty, so the test will fail in api mode. + suite.env.RunTestInPDMode(suite.checkGetAllByKey) +} + +func (suite *ruleTestSuite) checkGetAllByKey(cluster *tests.TestCluster) { + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + rule := placement.Rule{GroupID: "f", ID: "40", StartKeyHex: "8888", EndKeyHex: "9111", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) suite.NoError(err) re := suite.Require() - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) suite.NoError(err) testCases := []struct { @@ -483,10 +544,12 @@ func (suite *ruleTestSuite) TestGetAllByKey() { for _, testCase := range testCases { suite.T().Log(testCase.name) var resp []*placement.Rule - url := fmt.Sprintf("%s/rules/key/%s", suite.urlPrefix, testCase.key) + url := fmt.Sprintf("%s/rules/key/%s", urlPrefix, testCase.key) if testCase.success { - err = tu.ReadGetJSON(re, testDialClient, url, &resp) - suite.Len(resp, testCase.respSize) + tu.Eventually(re, func() bool { + err = tu.ReadGetJSON(re, testDialClient, url, &resp) + return len(resp) == testCase.respSize + }) } else { err = tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, testCase.code)) } @@ -495,10 +558,18 @@ func (suite *ruleTestSuite) TestGetAllByKey() { } func (suite *ruleTestSuite) TestDelete() { - rule := placement.Rule{GroupID: "g", ID: "10", StartKeyHex: "8888", EndKeyHex: "9111", Role: "voter", Count: 1} + suite.env.RunTestInTwoModes(suite.checkDelete) +} + +func (suite *ruleTestSuite) checkDelete(cluster *tests.TestCluster) { + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + rule := placement.Rule{GroupID: "g", ID: "10", StartKeyHex: "8888", EndKeyHex: "9111", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/rule", data, tu.StatusOK(suite.Require())) + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(suite.Require())) suite.NoError(err) oldStartKey, err := hex.DecodeString(rule.StartKeyHex) suite.NoError(err) @@ -529,15 +600,15 @@ func (suite *ruleTestSuite) TestDelete() { } for _, testCase := range testCases { suite.T().Log(testCase.name) - url := fmt.Sprintf("%s/rule/%s/%s", suite.urlPrefix, testCase.groupID, testCase.id) + url := fmt.Sprintf("%s/rule/%s/%s", urlPrefix, testCase.groupID, testCase.id) // clear suspect keyRanges to prevent test case from others - suite.svr.GetRaftCluster().ClearSuspectKeyRanges() + leaderServer.GetRaftCluster().ClearSuspectKeyRanges() err = tu.CheckDelete(testDialClient, url, tu.StatusOK(suite.Require())) suite.NoError(err) if len(testCase.popKeyRange) > 0 { popKeyRangeMap := map[string]struct{}{} for i := 0; i < len(testCase.popKeyRange)/2; i++ { - v, got := suite.svr.GetRaftCluster().PopOneSuspectKeyRange() + v, got := leaderServer.GetRaftCluster().PopOneSuspectKeyRange() suite.True(got) popKeyRangeMap[hex.EncodeToString(v[0])] = struct{}{} popKeyRangeMap[hex.EncodeToString(v[1])] = struct{}{} @@ -551,31 +622,30 @@ func (suite *ruleTestSuite) TestDelete() { } } -func (suite *ruleTestSuite) compareRule(r1 *placement.Rule, r2 *placement.Rule) { - suite.Equal(r2.GroupID, r1.GroupID) - suite.Equal(r2.ID, r1.ID) - suite.Equal(r2.StartKeyHex, r1.StartKeyHex) - suite.Equal(r2.EndKeyHex, r1.EndKeyHex) - suite.Equal(r2.Role, r1.Role) - suite.Equal(r2.Count, r1.Count) +func (suite *ruleTestSuite) TestBatch() { + suite.env.RunTestInTwoModes(suite.checkBatch) } -func (suite *ruleTestSuite) TestBatch() { +func (suite *ruleTestSuite) checkBatch(cluster *tests.TestCluster) { + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + opt1 := placement.RuleOp{ Action: placement.RuleOpAdd, - Rule: &placement.Rule{GroupID: "a", ID: "13", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1}, + Rule: &placement.Rule{GroupID: "a", ID: "13", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, } opt2 := placement.RuleOp{ Action: placement.RuleOpAdd, - Rule: &placement.Rule{GroupID: "b", ID: "13", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1}, + Rule: &placement.Rule{GroupID: "b", ID: "13", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, } opt3 := placement.RuleOp{ Action: placement.RuleOpAdd, - Rule: &placement.Rule{GroupID: "a", ID: "14", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1}, + Rule: &placement.Rule{GroupID: "a", ID: "14", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, } opt4 := placement.RuleOp{ Action: placement.RuleOpAdd, - Rule: &placement.Rule{GroupID: "a", ID: "15", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1}, + Rule: &placement.Rule{GroupID: "a", ID: "15", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, } opt5 := placement.RuleOp{ Action: placement.RuleOpDel, @@ -592,11 +662,11 @@ func (suite *ruleTestSuite) TestBatch() { } opt8 := placement.RuleOp{ Action: placement.RuleOpAdd, - Rule: &placement.Rule{GroupID: "a", ID: "16", StartKeyHex: "XXXX", EndKeyHex: "3333", Role: "voter", Count: 1}, + Rule: &placement.Rule{GroupID: "a", ID: "16", StartKeyHex: "XXXX", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, } opt9 := placement.RuleOp{ Action: placement.RuleOpAdd, - Rule: &placement.Rule{GroupID: "a", ID: "17", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: -1}, + Rule: &placement.Rule{GroupID: "a", ID: "17", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: -1}, } successData1, err := json.Marshal([]placement.RuleOp{opt1, opt2, opt3}) @@ -670,10 +740,10 @@ func (suite *ruleTestSuite) TestBatch() { for _, testCase := range testCases { suite.T().Log(testCase.name) if testCase.success { - err := tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/rules/batch", testCase.rawData, tu.StatusOK(re)) + err := tu.CheckPostJSON(testDialClient, urlPrefix+"/rules/batch", testCase.rawData, tu.StatusOK(re)) suite.NoError(err) } else { - err := tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/rules/batch", testCase.rawData, + err := tu.CheckPostJSON(testDialClient, urlPrefix+"/rules/batch", testCase.rawData, tu.StatusNotOK(re), tu.StringEqual(re, testCase.response)) suite.NoError(err) @@ -682,16 +752,29 @@ func (suite *ruleTestSuite) TestBatch() { } func (suite *ruleTestSuite) TestBundle() { + suite.env.RunTestInTwoModes(suite.checkBundle) +} + +func (suite *ruleTestSuite) checkBundle(cluster *tests.TestCluster) { + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + re := suite.Require() // GetAll b1 := placement.GroupBundle{ - ID: "pd", + ID: placement.DefaultGroupID, Rules: []*placement.Rule{ - {GroupID: "pd", ID: "default", Role: "voter", Count: 3}, + { + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, + Role: placement.Voter, + Count: 3, + }, }, } var bundles []placement.GroupBundle - err := tu.ReadGetJSON(re, testDialClient, suite.urlPrefix+"/placement-rule", &bundles) + err := tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule", &bundles) suite.NoError(err) suite.Len(bundles, 1) suite.compareBundle(bundles[0], b1) @@ -702,48 +785,48 @@ func (suite *ruleTestSuite) TestBundle() { Index: 42, Override: true, Rules: []*placement.Rule{ - {GroupID: "foo", ID: "bar", Index: 1, Override: true, Role: "voter", Count: 1}, + {GroupID: "foo", ID: "bar", Index: 1, Override: true, Role: placement.Voter, Count: 1}, }, } data, err := json.Marshal(b2) suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/placement-rule/foo", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/placement-rule/foo", data, tu.StatusOK(re)) suite.NoError(err) // Get var bundle placement.GroupBundle - err = tu.ReadGetJSON(re, testDialClient, suite.urlPrefix+"/placement-rule/foo", &bundle) + err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule/foo", &bundle) suite.NoError(err) suite.compareBundle(bundle, b2) // GetAll again - err = tu.ReadGetJSON(re, testDialClient, suite.urlPrefix+"/placement-rule", &bundles) + err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule", &bundles) suite.NoError(err) suite.Len(bundles, 2) suite.compareBundle(bundles[0], b1) suite.compareBundle(bundles[1], b2) // Delete - err = tu.CheckDelete(testDialClient, suite.urlPrefix+"/placement-rule/pd", tu.StatusOK(suite.Require())) + err = tu.CheckDelete(testDialClient, urlPrefix+"/placement-rule/pd", tu.StatusOK(suite.Require())) suite.NoError(err) // GetAll again - err = tu.ReadGetJSON(re, testDialClient, suite.urlPrefix+"/placement-rule", &bundles) + err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule", &bundles) suite.NoError(err) suite.Len(bundles, 1) suite.compareBundle(bundles[0], b2) // SetAll - b2.Rules = append(b2.Rules, &placement.Rule{GroupID: "foo", ID: "baz", Index: 2, Role: "follower", Count: 1}) + b2.Rules = append(b2.Rules, &placement.Rule{GroupID: "foo", ID: "baz", Index: 2, Role: placement.Follower, Count: 1}) b2.Index, b2.Override = 0, false b3 := placement.GroupBundle{ID: "foobar", Index: 100} data, err = json.Marshal([]placement.GroupBundle{b1, b2, b3}) suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/placement-rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/placement-rule", data, tu.StatusOK(re)) suite.NoError(err) // GetAll again - err = tu.ReadGetJSON(re, testDialClient, suite.urlPrefix+"/placement-rule", &bundles) + err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule", &bundles) suite.NoError(err) suite.Len(bundles, 3) suite.compareBundle(bundles[0], b2) @@ -751,11 +834,11 @@ func (suite *ruleTestSuite) TestBundle() { suite.compareBundle(bundles[2], b3) // Delete using regexp - err = tu.CheckDelete(testDialClient, suite.urlPrefix+"/placement-rule/"+url.PathEscape("foo.*")+"?regexp", tu.StatusOK(suite.Require())) + err = tu.CheckDelete(testDialClient, urlPrefix+"/placement-rule/"+url.PathEscape("foo.*")+"?regexp", tu.StatusOK(suite.Require())) suite.NoError(err) // GetAll again - err = tu.ReadGetJSON(re, testDialClient, suite.urlPrefix+"/placement-rule", &bundles) + err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule", &bundles) suite.NoError(err) suite.Len(bundles, 1) suite.compareBundle(bundles[0], b1) @@ -765,24 +848,24 @@ func (suite *ruleTestSuite) TestBundle() { b4 := placement.GroupBundle{ Index: 4, Rules: []*placement.Rule{ - {ID: "bar", Index: 1, Override: true, Role: "voter", Count: 1}, + {ID: "bar", Index: 1, Override: true, Role: placement.Voter, Count: 1}, }, } data, err = json.Marshal(b4) suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/placement-rule/"+id, data, tu.StatusOK(re)) + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/placement-rule/"+id, data, tu.StatusOK(re)) suite.NoError(err) b4.ID = id b4.Rules[0].GroupID = b4.ID // Get - err = tu.ReadGetJSON(re, testDialClient, suite.urlPrefix+"/placement-rule/"+id, &bundle) + err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule/"+id, &bundle) suite.NoError(err) suite.compareBundle(bundle, b4) // GetAll again - err = tu.ReadGetJSON(re, testDialClient, suite.urlPrefix+"/placement-rule", &bundles) + err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule", &bundles) suite.NoError(err) suite.Len(bundles, 2) suite.compareBundle(bundles[0], b1) @@ -793,18 +876,18 @@ func (suite *ruleTestSuite) TestBundle() { ID: "rule-without-group-id-2", Index: 5, Rules: []*placement.Rule{ - {ID: "bar", Index: 1, Override: true, Role: "voter", Count: 1}, + {ID: "bar", Index: 1, Override: true, Role: placement.Voter, Count: 1}, }, } data, err = json.Marshal([]placement.GroupBundle{b1, b4, b5}) suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, suite.urlPrefix+"/placement-rule", data, tu.StatusOK(re)) + err = tu.CheckPostJSON(testDialClient, urlPrefix+"/placement-rule", data, tu.StatusOK(re)) suite.NoError(err) b5.Rules[0].GroupID = b5.ID // GetAll again - err = tu.ReadGetJSON(re, testDialClient, suite.urlPrefix+"/placement-rule", &bundles) + err = tu.ReadGetJSON(re, testDialClient, urlPrefix+"/placement-rule", &bundles) suite.NoError(err) suite.Len(bundles, 3) suite.compareBundle(bundles[0], b1) @@ -813,6 +896,14 @@ func (suite *ruleTestSuite) TestBundle() { } func (suite *ruleTestSuite) TestBundleBadRequest() { + suite.env.RunTestInTwoModes(suite.checkBundleBadRequest) +} + +func (suite *ruleTestSuite) checkBundleBadRequest(cluster *tests.TestCluster) { + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + testCases := []struct { uri string data string @@ -826,7 +917,7 @@ func (suite *ruleTestSuite) TestBundleBadRequest() { {"/placement-rule", `[{"group_id":"foo", "rules": [{"group_id":"bar", "id":"baz", "role":"voter", "count":1}]}]`, false}, } for _, testCase := range testCases { - err := tu.CheckPostJSON(testDialClient, suite.urlPrefix+testCase.uri, []byte(testCase.data), + err := tu.CheckPostJSON(testDialClient, urlPrefix+testCase.uri, []byte(testCase.data), func(_ []byte, code int, _ http.Header) { suite.Equal(testCase.ok, code == http.StatusOK) }) @@ -835,23 +926,31 @@ func (suite *ruleTestSuite) TestBundleBadRequest() { } func (suite *ruleTestSuite) compareBundle(b1, b2 placement.GroupBundle) { - suite.Equal(b2.ID, b1.ID) - suite.Equal(b2.Index, b1.Index) - suite.Equal(b2.Override, b1.Override) - suite.Len(b2.Rules, len(b1.Rules)) - for i := range b1.Rules { - suite.compareRule(b1.Rules[i], b2.Rules[i]) - } + tu.Eventually(suite.Require(), func() bool { + if b2.ID != b1.ID || b2.Index != b1.Index || b2.Override != b1.Override || len(b2.Rules) != len(b1.Rules) { + return false + } + for i := range b1.Rules { + if !suite.compareRule(b1.Rules[i], b2.Rules[i]) { + return false + } + } + return true + }) +} + +func (suite *ruleTestSuite) compareRule(r1 *placement.Rule, r2 *placement.Rule) bool { + return r2.GroupID == r1.GroupID && + r2.ID == r1.ID && + r2.StartKeyHex == r1.StartKeyHex && + r2.EndKeyHex == r1.EndKeyHex && + r2.Role == r1.Role && + r2.Count == r1.Count } type regionRuleTestSuite struct { suite.Suite - svr *server.Server - grpcSvr *server.GrpcServer - cleanup tu.CleanupFunc - urlPrefix string - stores []*metapb.Store - regions []*core.RegionInfo + env *tests.SchedulingTestEnvironment } func TestRegionRuleTestSuite(t *testing.T) { @@ -859,7 +958,27 @@ func TestRegionRuleTestSuite(t *testing.T) { } func (suite *regionRuleTestSuite) SetupSuite() { - suite.stores = []*metapb.Store{ + suite.env = tests.NewSchedulingTestEnvironment(suite.T(), func(conf *config.Config, serverName string) { + conf.Replication.EnablePlacementRules = true + conf.Replication.MaxReplicas = 1 + }) +} + +func (suite *regionRuleTestSuite) TearDownSuite() { + suite.env.Cleanup() +} + +func (suite *regionRuleTestSuite) TestRegionPlacementRule() { + suite.env.RunTestInTwoModes(suite.checkRegionPlacementRule) +} + +func (suite *regionRuleTestSuite) checkRegionPlacementRule(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1", pdAddr, apiPrefix) + + stores := []*metapb.Store{ { Id: 1, Address: "tikv1", @@ -875,49 +994,30 @@ func (suite *regionRuleTestSuite) SetupSuite() { Version: "2.0.0", }, } - re := suite.Require() - suite.svr, suite.cleanup = mustNewServer(re, func(cfg *config.Config) { - cfg.Replication.EnablePlacementRules = true - cfg.Replication.MaxReplicas = 1 - }) - server.MustWaitLeader(re, []*server.Server{suite.svr}) - - addr := suite.svr.GetAddr() - suite.grpcSvr = &server.GrpcServer{Server: suite.svr} - suite.urlPrefix = fmt.Sprintf("%s%s/api/v1", addr, apiPrefix) - - mustBootstrapCluster(re, suite.svr) - - for _, store := range suite.stores { - mustPutStore(re, suite.svr, store.Id, store.State, store.NodeState, nil) + for _, store := range stores { + tests.MustPutStore(re, cluster, store) } - suite.regions = make([]*core.RegionInfo, 0) + regions := make([]*core.RegionInfo, 0) peers1 := []*metapb.Peer{ {Id: 102, StoreId: 1, Role: metapb.PeerRole_Voter}, {Id: 103, StoreId: 2, Role: metapb.PeerRole_Voter}} - suite.regions = append(suite.regions, core.NewRegionInfo(&metapb.Region{Id: 1, Peers: peers1, RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}}, peers1[0], + regions = append(regions, core.NewRegionInfo(&metapb.Region{Id: 1, Peers: peers1, RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}}, peers1[0], core.WithStartKey([]byte("abc")), core.WithEndKey([]byte("def")))) peers2 := []*metapb.Peer{ {Id: 104, StoreId: 1, Role: metapb.PeerRole_Voter}, {Id: 105, StoreId: 2, Role: metapb.PeerRole_Learner}} - suite.regions = append(suite.regions, core.NewRegionInfo(&metapb.Region{Id: 2, Peers: peers2, RegionEpoch: &metapb.RegionEpoch{ConfVer: 2, Version: 2}}, peers2[0], + regions = append(regions, core.NewRegionInfo(&metapb.Region{Id: 2, Peers: peers2, RegionEpoch: &metapb.RegionEpoch{ConfVer: 2, Version: 2}}, peers2[0], core.WithStartKey([]byte("ghi")), core.WithEndKey([]byte("jkl")))) peers3 := []*metapb.Peer{ {Id: 106, StoreId: 1, Role: metapb.PeerRole_Voter}, {Id: 107, StoreId: 2, Role: metapb.PeerRole_Learner}} - suite.regions = append(suite.regions, core.NewRegionInfo(&metapb.Region{Id: 3, Peers: peers3, RegionEpoch: &metapb.RegionEpoch{ConfVer: 3, Version: 3}}, peers3[0], + regions = append(regions, core.NewRegionInfo(&metapb.Region{Id: 3, Peers: peers3, RegionEpoch: &metapb.RegionEpoch{ConfVer: 3, Version: 3}}, peers3[0], core.WithStartKey([]byte("mno")), core.WithEndKey([]byte("pqr")))) - for _, rg := range suite.regions { - suite.svr.GetBasicCluster().PutRegion(rg) + for _, rg := range regions { + tests.MustPutRegionInfo(re, cluster, rg) } -} -func (suite *regionRuleTestSuite) TearDownSuite() { - suite.cleanup() -} - -func (suite *regionRuleTestSuite) TestRegionPlacementRule() { - ruleManager := suite.svr.GetRaftCluster().GetRuleManager() + ruleManager := leaderServer.GetRaftCluster().GetRuleManager() ruleManager.SetRule(&placement.Rule{ GroupID: "test", ID: "test2", @@ -934,39 +1034,81 @@ func (suite *regionRuleTestSuite) TestRegionPlacementRule() { Role: placement.Learner, Count: 1, }) - re := suite.Require() - url := fmt.Sprintf("%s/config/rules/region/%d/detail", suite.urlPrefix, 1) fit := &placement.RegionFit{} - err := tu.ReadGetJSON(re, testDialClient, url, fit) + + u := fmt.Sprintf("%s/config/rules/region/%d/detail", urlPrefix, 1) + err := tu.ReadGetJSON(re, testDialClient, u, fit) + suite.NoError(err) suite.Equal(len(fit.RuleFits), 1) suite.Equal(len(fit.OrphanPeers), 1) - suite.NoError(err) - url = fmt.Sprintf("%s/config/rules/region/%d/detail", suite.urlPrefix, 2) + u = fmt.Sprintf("%s/config/rules/region/%d/detail", urlPrefix, 2) fit = &placement.RegionFit{} - err = tu.ReadGetJSON(re, testDialClient, url, fit) + err = tu.ReadGetJSON(re, testDialClient, u, fit) + suite.NoError(err) suite.Equal(len(fit.RuleFits), 2) suite.Equal(len(fit.OrphanPeers), 0) - suite.NoError(err) - url = fmt.Sprintf("%s/config/rules/region/%d/detail", suite.urlPrefix, 3) + u = fmt.Sprintf("%s/config/rules/region/%d/detail", urlPrefix, 3) fit = &placement.RegionFit{} - err = tu.ReadGetJSON(re, testDialClient, url, fit) + err = tu.ReadGetJSON(re, testDialClient, u, fit) + suite.NoError(err) suite.Equal(len(fit.RuleFits), 0) suite.Equal(len(fit.OrphanPeers), 2) + + var label labeler.LabelRule + escapedID := url.PathEscape("keyspaces/0") + u = fmt.Sprintf("%s/config/region-label/rule/%s", urlPrefix, escapedID) + err = tu.ReadGetJSON(re, testDialClient, u, &label) + suite.NoError(err) + suite.Equal(label.ID, "keyspaces/0") + + var labels []labeler.LabelRule + u = fmt.Sprintf("%s/config/region-label/rules", urlPrefix) + err = tu.ReadGetJSON(re, testDialClient, u, &labels) suite.NoError(err) + suite.Len(labels, 1) + suite.Equal(labels[0].ID, "keyspaces/0") - url = fmt.Sprintf("%s/config/rules/region/%d/detail", suite.urlPrefix, 4) - err = tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, http.StatusNotFound), tu.StringContain( + u = fmt.Sprintf("%s/config/region-label/rules/ids", urlPrefix) + err = tu.CheckGetJSON(testDialClient, u, []byte(`["rule1", "rule3"]`), func(resp []byte, statusCode int, _ http.Header) { + err := json.Unmarshal(resp, &labels) + suite.NoError(err) + suite.Len(labels, 0) + }) + suite.NoError(err) + + err = tu.CheckGetJSON(testDialClient, u, []byte(`["keyspaces/0"]`), func(resp []byte, statusCode int, _ http.Header) { + err := json.Unmarshal(resp, &labels) + suite.NoError(err) + suite.Len(labels, 1) + suite.Equal(labels[0].ID, "keyspaces/0") + }) + suite.NoError(err) + + u = fmt.Sprintf("%s/config/rules/region/%d/detail", urlPrefix, 4) + err = tu.CheckGetJSON(testDialClient, u, nil, tu.Status(re, http.StatusNotFound), tu.StringContain( re, "region 4 not found")) suite.NoError(err) - url = fmt.Sprintf("%s/config/rules/region/%s/detail", suite.urlPrefix, "id") - err = tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, http.StatusBadRequest), tu.StringContain( - re, "invalid region id")) + u = fmt.Sprintf("%s/config/rules/region/%s/detail", urlPrefix, "id") + err = tu.CheckGetJSON(testDialClient, u, nil, tu.Status(re, http.StatusBadRequest), tu.StringContain( + re, errs.ErrRegionInvalidID.Error())) suite.NoError(err) - suite.svr.GetRaftCluster().GetReplicationConfig().EnablePlacementRules = false - url = fmt.Sprintf("%s/config/rules/region/%d/detail", suite.urlPrefix, 1) - err = tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, http.StatusPreconditionFailed), tu.StringContain( + data := make(map[string]interface{}) + data["enable-placement-rules"] = "false" + reqData, e := json.Marshal(data) + re.NoError(e) + u = fmt.Sprintf("%s/config", urlPrefix) + err = tu.CheckPostJSON(testDialClient, u, reqData, tu.StatusOK(re)) + re.NoError(err) + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + // wait for the scheduler server to update the config + tu.Eventually(re, func() bool { + return !sche.GetCluster().GetCheckerConfig().IsPlacementRulesEnabled() + }) + } + u = fmt.Sprintf("%s/config/rules/region/%d/detail", urlPrefix, 1) + err = tu.CheckGetJSON(testDialClient, u, nil, tu.Status(re, http.StatusPreconditionFailed), tu.StringContain( re, "placement rules feature is disabled")) suite.NoError(err) } diff --git a/tests/server/api/scheduler_test.go b/tests/server/api/scheduler_test.go index 95c4d936a8c..b3810da154a 100644 --- a/tests/server/api/scheduler_test.go +++ b/tests/server/api/scheduler_test.go @@ -17,7 +17,10 @@ package api import ( "encoding/json" "fmt" + "io" "net/http" + "reflect" + "strings" "testing" "time" @@ -25,6 +28,8 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/suite" sc "github.com/tikv/pd/pkg/schedule/config" + "github.com/tikv/pd/pkg/slice" + "github.com/tikv/pd/pkg/utils/apiutil" tu "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server" "github.com/tikv/pd/tests" @@ -34,20 +39,25 @@ const apiPrefix = "/pd" type scheduleTestSuite struct { suite.Suite + env *tests.SchedulingTestEnvironment } func TestScheduleTestSuite(t *testing.T) { suite.Run(t, new(scheduleTestSuite)) } -func (suite *scheduleTestSuite) TestScheduler() { - // Fixme: use RunTestInTwoModes when sync deleted scheduler is supported. - env := tests.NewSchedulingTestEnvironment(suite.T()) - env.RunTestInPDMode(suite.checkOriginAPI) - env = tests.NewSchedulingTestEnvironment(suite.T()) - env.RunTestInPDMode(suite.checkAPI) - env = tests.NewSchedulingTestEnvironment(suite.T()) - env.RunTestInPDMode(suite.checkDisable) +func (suite *scheduleTestSuite) SetupSuite() { + suite.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/skipStoreConfigSync", `return(true)`)) + suite.env = tests.NewSchedulingTestEnvironment(suite.T()) +} + +func (suite *scheduleTestSuite) TearDownSuite() { + suite.env.Cleanup() + suite.NoError(failpoint.Disable("github.com/tikv/pd/server/cluster/skipStoreConfigSync")) +} + +func (suite *scheduleTestSuite) TestOriginAPI() { + suite.env.RunTestInTwoModes(suite.checkOriginAPI) } func (suite *scheduleTestSuite) checkOriginAPI(cluster *tests.TestCluster) { @@ -71,7 +81,7 @@ func (suite *scheduleTestSuite) checkOriginAPI(cluster *tests.TestCluster) { re := suite.Require() suite.NoError(tu.CheckPostJSON(testDialClient, urlPrefix, body, tu.StatusOK(re))) - suite.Len(suite.getSchedulers(urlPrefix), 1) + suite.assertSchedulerExists(urlPrefix, "evict-leader-scheduler") resp := make(map[string]interface{}) listURL := fmt.Sprintf("%s%s%s/%s/list", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, "evict-leader-scheduler") suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) @@ -83,20 +93,20 @@ func (suite *scheduleTestSuite) checkOriginAPI(cluster *tests.TestCluster) { suite.NoError(err) suite.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/schedulers/persistFail", "return(true)")) suite.NoError(tu.CheckPostJSON(testDialClient, urlPrefix, body, tu.StatusNotOK(re))) - suite.Len(suite.getSchedulers(urlPrefix), 1) + suite.assertSchedulerExists(urlPrefix, "evict-leader-scheduler") resp = make(map[string]interface{}) suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) suite.Len(resp["store-id-ranges"], 1) suite.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/schedulers/persistFail")) suite.NoError(tu.CheckPostJSON(testDialClient, urlPrefix, body, tu.StatusOK(re))) - suite.Len(suite.getSchedulers(urlPrefix), 1) + suite.assertSchedulerExists(urlPrefix, "evict-leader-scheduler") resp = make(map[string]interface{}) suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) suite.Len(resp["store-id-ranges"], 2) deleteURL := fmt.Sprintf("%s/%s", urlPrefix, "evict-leader-scheduler-1") err = tu.CheckDelete(testDialClient, deleteURL, tu.StatusOK(re)) suite.NoError(err) - suite.Len(suite.getSchedulers(urlPrefix), 1) + suite.assertSchedulerExists(urlPrefix, "evict-leader-scheduler") resp1 := make(map[string]interface{}) suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp1)) suite.Len(resp1["store-id-ranges"], 1) @@ -104,16 +114,20 @@ func (suite *scheduleTestSuite) checkOriginAPI(cluster *tests.TestCluster) { suite.NoError(failpoint.Enable("github.com/tikv/pd/server/config/persistFail", "return(true)")) err = tu.CheckDelete(testDialClient, deleteURL, tu.Status(re, http.StatusInternalServerError)) suite.NoError(err) - suite.Len(suite.getSchedulers(urlPrefix), 1) + suite.assertSchedulerExists(urlPrefix, "evict-leader-scheduler") suite.NoError(failpoint.Disable("github.com/tikv/pd/server/config/persistFail")) err = tu.CheckDelete(testDialClient, deleteURL, tu.StatusOK(re)) suite.NoError(err) - suite.Empty(suite.getSchedulers(urlPrefix)) + suite.assertNoScheduler(urlPrefix, "evict-leader-scheduler") suite.NoError(tu.CheckGetJSON(testDialClient, listURL, nil, tu.Status(re, http.StatusNotFound))) err = tu.CheckDelete(testDialClient, deleteURL, tu.Status(re, http.StatusNotFound)) suite.NoError(err) } +func (suite *scheduleTestSuite) TestAPI() { + suite.env.RunTestInTwoModes(suite.checkAPI) +} + func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { re := suite.Require() leaderAddr := cluster.GetLeaderServer().GetAddr() @@ -152,9 +166,12 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { body, err := json.Marshal(dataMap) suite.NoError(err) suite.NoError(tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re))) - resp = make(map[string]interface{}) - suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - suite.Equal(3.0, resp["batch"]) + tu.Eventually(re, func() bool { // wait for scheduling server to be synced. + resp = make(map[string]interface{}) + suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) + return resp["batch"] == 3.0 + }) + // update again err = tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re), @@ -230,9 +247,11 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { suite.NoError(tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re))) resp = make(map[string]interface{}) suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) + for key := range expectMap { suite.Equal(expectMap[key], resp[key], "key %s", key) } + // update again err = tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re), @@ -438,18 +457,22 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { suite.NoError(err) suite.NoError(tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re))) resp = make(map[string]interface{}) - suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - exceptMap["4"] = []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}} - suite.Equal(exceptMap, resp["store-id-ranges"]) + tu.Eventually(re, func() bool { + suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) + exceptMap["4"] = []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}} + return reflect.DeepEqual(exceptMap, resp["store-id-ranges"]) + }) // using /pd/v1/schedule-config/evict-leader-scheduler/config to delete exist store from evict-leader-scheduler deleteURL := fmt.Sprintf("%s%s%s/%s/delete/%s", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name, "4") err = tu.CheckDelete(testDialClient, deleteURL, tu.StatusOK(re)) suite.NoError(err) resp = make(map[string]interface{}) - suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - delete(exceptMap, "4") - suite.Equal(exceptMap, resp["store-id-ranges"]) + tu.Eventually(re, func() bool { + suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) + delete(exceptMap, "4") + return reflect.DeepEqual(exceptMap, resp["store-id-ranges"]) + }) err = tu.CheckDelete(testDialClient, deleteURL, tu.Status(re, http.StatusNotFound)) suite.NoError(err) }, @@ -468,6 +491,7 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { testCase.extraTestFunc(testCase.createdName) } suite.deleteScheduler(urlPrefix, testCase.createdName) + suite.assertNoScheduler(urlPrefix, testCase.createdName) } // test pause and resume all schedulers. @@ -482,6 +506,7 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { body, err := json.Marshal(input) suite.NoError(err) suite.addScheduler(urlPrefix, body) + suite.assertSchedulerExists(urlPrefix, testCase.createdName) // wait for scheduler to be synced. if testCase.extraTestFunc != nil { testCase.extraTestFunc(testCase.createdName) } @@ -545,9 +570,14 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { createdName = testCase.name } suite.deleteScheduler(urlPrefix, createdName) + suite.assertNoScheduler(urlPrefix, createdName) } } +func (suite *scheduleTestSuite) TestDisable() { + suite.env.RunTestInTwoModes(suite.checkDisable) +} + func (suite *scheduleTestSuite) checkDisable(cluster *tests.TestCluster) { re := suite.Require() leaderAddr := cluster.GetLeaderServer().GetAddr() @@ -581,16 +611,8 @@ func (suite *scheduleTestSuite) checkDisable(cluster *tests.TestCluster) { err = tu.CheckPostJSON(testDialClient, u, body, tu.StatusOK(re)) suite.NoError(err) - var schedulers []string - err = tu.ReadGetJSON(re, testDialClient, urlPrefix, &schedulers) - suite.NoError(err) - suite.Len(schedulers, 1) - suite.Equal(name, schedulers[0]) - - err = tu.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s?status=disabled", urlPrefix), &schedulers) - suite.NoError(err) - suite.Len(schedulers, 1) - suite.Equal(name, schedulers[0]) + suite.assertNoScheduler(urlPrefix, name) + suite.assertSchedulerExists(fmt.Sprintf("%s?status=disabled", urlPrefix), name) // reset schedule config scheduleConfig.Schedulers = originSchedulers @@ -600,6 +622,7 @@ func (suite *scheduleTestSuite) checkDisable(cluster *tests.TestCluster) { suite.NoError(err) suite.deleteScheduler(urlPrefix, name) + suite.assertNoScheduler(urlPrefix, name) } func (suite *scheduleTestSuite) addScheduler(urlPrefix string, body []byte) { @@ -614,12 +637,17 @@ func (suite *scheduleTestSuite) deleteScheduler(urlPrefix string, createdName st } func (suite *scheduleTestSuite) testPauseOrResume(urlPrefix string, name, createdName string, body []byte) { + re := suite.Require() if createdName == "" { createdName = name } - re := suite.Require() - err := tu.CheckPostJSON(testDialClient, urlPrefix, body, tu.StatusOK(re)) - suite.NoError(err) + var schedulers []string + tu.ReadGetJSON(suite.Require(), testDialClient, urlPrefix, &schedulers) + if !slice.Contains(schedulers, createdName) { + err := tu.CheckPostJSON(testDialClient, urlPrefix, body, tu.StatusOK(re)) + re.NoError(err) + } + suite.assertSchedulerExists(urlPrefix, createdName) // wait for scheduler to be synced. // test pause. input := make(map[string]interface{}) @@ -655,14 +683,76 @@ func (suite *scheduleTestSuite) testPauseOrResume(urlPrefix string, name, create suite.False(isPaused) } -func (suite *scheduleTestSuite) getSchedulers(urlPrefix string) (resp []string) { - tu.ReadGetJSON(suite.Require(), testDialClient, urlPrefix, &resp) - return +func (suite *scheduleTestSuite) TestEmptySchedulers() { + suite.env.RunTestInTwoModes(suite.checkEmptySchedulers) +} + +func (suite *scheduleTestSuite) checkEmptySchedulers(cluster *tests.TestCluster) { + re := suite.Require() + leaderAddr := cluster.GetLeaderServer().GetAddr() + urlPrefix := fmt.Sprintf("%s/pd/api/v1/schedulers", leaderAddr) + for i := 1; i <= 4; i++ { + store := &metapb.Store{ + Id: uint64(i), + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + LastHeartbeat: time.Now().UnixNano(), + } + tests.MustPutStore(suite.Require(), cluster, store) + } + for _, query := range []string{"", "?status=paused", "?status=disabled"} { + schedulers := make([]string, 0) + suite.NoError(tu.ReadGetJSON(re, testDialClient, urlPrefix+query, &schedulers)) + for _, scheduler := range schedulers { + if strings.Contains(query, "disable") { + input := make(map[string]interface{}) + input["name"] = scheduler + body, err := json.Marshal(input) + suite.NoError(err) + suite.addScheduler(urlPrefix, body) + } else { + suite.deleteScheduler(urlPrefix, scheduler) + } + } + tu.Eventually(re, func() bool { + resp, err := apiutil.GetJSON(testDialClient, urlPrefix+query, nil) + suite.NoError(err) + defer resp.Body.Close() + suite.Equal(http.StatusOK, resp.StatusCode) + b, err := io.ReadAll(resp.Body) + suite.NoError(err) + return strings.Contains(string(b), "[]") && !strings.Contains(string(b), "null") + }) + } +} + +func (suite *scheduleTestSuite) assertSchedulerExists(urlPrefix string, scheduler string) { + var schedulers []string + re := suite.Require() + tu.Eventually(re, func() bool { + err := tu.ReadGetJSON(re, testDialClient, urlPrefix, &schedulers, + tu.StatusOK(re)) + suite.NoError(err) + return slice.Contains(schedulers, scheduler) + }) +} + +func (suite *scheduleTestSuite) assertNoScheduler(urlPrefix string, scheduler string) { + var schedulers []string + re := suite.Require() + tu.Eventually(re, func() bool { + err := tu.ReadGetJSON(re, testDialClient, urlPrefix, &schedulers, + tu.StatusOK(re)) + suite.NoError(err) + return !slice.Contains(schedulers, scheduler) + }) } func (suite *scheduleTestSuite) isSchedulerPaused(urlPrefix, name string) bool { var schedulers []string - err := tu.ReadGetJSON(suite.Require(), testDialClient, fmt.Sprintf("%s?status=paused", urlPrefix), &schedulers) + re := suite.Require() + err := tu.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s?status=paused", urlPrefix), &schedulers, + tu.StatusOK(re)) suite.NoError(err) for _, scheduler := range schedulers { if scheduler == name { diff --git a/tests/server/api/testutil.go b/tests/server/api/testutil.go index c6c2cc79611..6fab82ea2e3 100644 --- a/tests/server/api/testutil.go +++ b/tests/server/api/testutil.go @@ -63,7 +63,7 @@ func MustAddScheduler( // MustDeleteScheduler deletes a scheduler with HTTP API. func MustDeleteScheduler(re *require.Assertions, serverAddr, schedulerName string) { - httpReq, err := http.NewRequest(http.MethodDelete, fmt.Sprintf("%s%s/%s", serverAddr, schedulersPrefix, schedulerName), nil) + httpReq, err := http.NewRequest(http.MethodDelete, fmt.Sprintf("%s%s/%s", serverAddr, schedulersPrefix, schedulerName), http.NoBody) re.NoError(err) resp, err := dialClient.Do(httpReq) re.NoError(err) diff --git a/tests/server/apiv2/handlers/testutil.go b/tests/server/apiv2/handlers/testutil.go index aca29ebeb52..d26ce732714 100644 --- a/tests/server/apiv2/handlers/testutil.go +++ b/tests/server/apiv2/handlers/testutil.go @@ -42,7 +42,7 @@ var dialClient = &http.Client{ func sendLoadRangeRequest(re *require.Assertions, server *tests.TestServer, token, limit string) *handlers.LoadAllKeyspacesResponse { // Construct load range request. - httpReq, err := http.NewRequest(http.MethodGet, server.GetAddr()+keyspacesPrefix, nil) + httpReq, err := http.NewRequest(http.MethodGet, server.GetAddr()+keyspacesPrefix, http.NoBody) re.NoError(err) query := httpReq.URL.Query() query.Add("page_token", token) @@ -135,7 +135,7 @@ func mustLoadKeyspaces(re *require.Assertions, server *tests.TestServer, name st // MustLoadKeyspaceGroups loads all keyspace groups from the server. func MustLoadKeyspaceGroups(re *require.Assertions, server *tests.TestServer, token, limit string) []*endpoint.KeyspaceGroup { // Construct load range request. - httpReq, err := http.NewRequest(http.MethodGet, server.GetAddr()+keyspaceGroupsPrefix, nil) + httpReq, err := http.NewRequest(http.MethodGet, server.GetAddr()+keyspaceGroupsPrefix, http.NoBody) re.NoError(err) query := httpReq.URL.Query() query.Add("page_token", token) @@ -175,7 +175,7 @@ func MustLoadKeyspaceGroupByID(re *require.Assertions, server *tests.TestServer, // TryLoadKeyspaceGroupByID loads the keyspace group by ID with HTTP API. func TryLoadKeyspaceGroupByID(re *require.Assertions, server *tests.TestServer, id uint32) (*endpoint.KeyspaceGroup, int) { - httpReq, err := http.NewRequest(http.MethodGet, server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d", id), nil) + httpReq, err := http.NewRequest(http.MethodGet, server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d", id), http.NoBody) re.NoError(err) resp, err := dialClient.Do(httpReq) re.NoError(err) @@ -205,7 +205,7 @@ func FailCreateKeyspaceGroupWithCode(re *require.Assertions, server *tests.TestS // MustDeleteKeyspaceGroup deletes a keyspace group with HTTP API. func MustDeleteKeyspaceGroup(re *require.Assertions, server *tests.TestServer, id uint32) { - httpReq, err := http.NewRequest(http.MethodDelete, server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d", id), nil) + httpReq, err := http.NewRequest(http.MethodDelete, server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d", id), http.NoBody) re.NoError(err) resp, err := dialClient.Do(httpReq) re.NoError(err) @@ -232,7 +232,7 @@ func MustSplitKeyspaceGroup(re *require.Assertions, server *tests.TestServer, id // MustFinishSplitKeyspaceGroup finishes a keyspace group split with HTTP API. func MustFinishSplitKeyspaceGroup(re *require.Assertions, server *tests.TestServer, id uint32) { - httpReq, err := http.NewRequest(http.MethodDelete, server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d/split", id), nil) + httpReq, err := http.NewRequest(http.MethodDelete, server.GetAddr()+keyspaceGroupsPrefix+fmt.Sprintf("/%d/split", id), http.NoBody) re.NoError(err) // Send request. resp, err := dialClient.Do(httpReq) diff --git a/tests/server/cluster/cluster_test.go b/tests/server/cluster/cluster_test.go index 701eb9b5d69..18a82bcf0fe 100644 --- a/tests/server/cluster/cluster_test.go +++ b/tests/server/cluster/cluster_test.go @@ -510,6 +510,7 @@ func TestRaftClusterMultipleRestart(t *testing.T) { err = rc.PutStore(store) re.NoError(err) re.NotNil(tc) + rc.Stop() // let the job run at small interval re.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs", `return(true)`)) @@ -517,8 +518,6 @@ func TestRaftClusterMultipleRestart(t *testing.T) { err = rc.Start(leaderServer.GetServer()) re.NoError(err) time.Sleep(time.Millisecond) - rc = leaderServer.GetRaftCluster() - re.NotNil(rc) rc.Stop() } re.NoError(failpoint.Disable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs")) @@ -812,10 +811,10 @@ func TestLoadClusterInfo(t *testing.T) { tc.WaitLeader() leaderServer := tc.GetLeaderServer() svr := leaderServer.GetServer() - rc := cluster.NewRaftCluster(ctx, svr.ClusterID(), syncer.NewRegionSyncer(svr), svr.GetClient(), svr.GetHTTPClient()) + rc := cluster.NewRaftCluster(ctx, svr.ClusterID(), svr.GetBasicCluster(), svr.GetStorage(), syncer.NewRegionSyncer(svr), svr.GetClient(), svr.GetHTTPClient()) // Cluster is not bootstrapped. - rc.InitCluster(svr.GetAllocator(), svr.GetPersistOptions(), svr.GetStorage(), svr.GetBasicCluster(), svr.GetKeyspaceGroupManager()) + rc.InitCluster(svr.GetAllocator(), svr.GetPersistOptions(), svr.GetHBStreams(), svr.GetKeyspaceGroupManager()) raftCluster, err := rc.LoadClusterInfo() re.NoError(err) re.Nil(raftCluster) @@ -852,8 +851,8 @@ func TestLoadClusterInfo(t *testing.T) { } re.NoError(testStorage.Flush()) - raftCluster = cluster.NewRaftCluster(ctx, svr.ClusterID(), syncer.NewRegionSyncer(svr), svr.GetClient(), svr.GetHTTPClient()) - raftCluster.InitCluster(mockid.NewIDAllocator(), svr.GetPersistOptions(), testStorage, basicCluster, svr.GetKeyspaceGroupManager()) + raftCluster = cluster.NewRaftCluster(ctx, svr.ClusterID(), basicCluster, testStorage, syncer.NewRegionSyncer(svr), svr.GetClient(), svr.GetHTTPClient()) + raftCluster.InitCluster(mockid.NewIDAllocator(), svr.GetPersistOptions(), svr.GetHBStreams(), svr.GetKeyspaceGroupManager()) raftCluster, err = raftCluster.LoadClusterInfo() re.NoError(err) re.NotNil(raftCluster) @@ -1289,7 +1288,7 @@ func TestTransferLeaderForScheduler(t *testing.T) { re.NoError(err) tc.WaitLeader() // start - leaderServer := tc.GetServer(tc.GetLeader()) + leaderServer := tc.GetLeaderServer() re.NoError(leaderServer.BootstrapCluster()) rc := leaderServer.GetServer().GetRaftCluster() re.NotNil(rc) @@ -1328,7 +1327,7 @@ func TestTransferLeaderForScheduler(t *testing.T) { tc.ResignLeader() rc.Stop() tc.WaitLeader() - leaderServer = tc.GetServer(tc.GetLeader()) + leaderServer = tc.GetLeaderServer() rc1 := leaderServer.GetServer().GetRaftCluster() rc1.Start(leaderServer.GetServer()) re.NoError(err) @@ -1348,7 +1347,7 @@ func TestTransferLeaderForScheduler(t *testing.T) { tc.ResignLeader() rc1.Stop() tc.WaitLeader() - leaderServer = tc.GetServer(tc.GetLeader()) + leaderServer = tc.GetLeaderServer() rc = leaderServer.GetServer().GetRaftCluster() rc.Start(leaderServer.GetServer()) re.NotNil(rc) @@ -1402,7 +1401,7 @@ func putRegionWithLeader(re *require.Assertions, rc *cluster.RaftCluster, id id. StartKey: []byte{byte(i)}, EndKey: []byte{byte(i + 1)}, } - rc.HandleRegionHeartbeat(core.NewRegionInfo(region, region.Peers[0])) + rc.HandleRegionHeartbeat(core.NewRegionInfo(region, region.Peers[0], core.SetSource(core.Heartbeat))) } time.Sleep(50 * time.Millisecond) @@ -1560,8 +1559,8 @@ func TestTransferLeaderBack(t *testing.T) { tc.WaitLeader() leaderServer := tc.GetLeaderServer() svr := leaderServer.GetServer() - rc := cluster.NewRaftCluster(ctx, svr.ClusterID(), syncer.NewRegionSyncer(svr), svr.GetClient(), svr.GetHTTPClient()) - rc.InitCluster(svr.GetAllocator(), svr.GetPersistOptions(), svr.GetStorage(), svr.GetBasicCluster(), svr.GetKeyspaceGroupManager()) + rc := cluster.NewRaftCluster(ctx, svr.ClusterID(), svr.GetBasicCluster(), svr.GetStorage(), syncer.NewRegionSyncer(svr), svr.GetClient(), svr.GetHTTPClient()) + rc.InitCluster(svr.GetAllocator(), svr.GetPersistOptions(), svr.GetHBStreams(), svr.GetKeyspaceGroupManager()) storage := rc.GetStorage() meta := &metapb.Cluster{Id: 123} re.NoError(storage.SaveMeta(meta)) diff --git a/tests/server/config/config_test.go b/tests/server/config/config_test.go index 1b2178bde33..faa03c15329 100644 --- a/tests/server/config/config_test.go +++ b/tests/server/config/config_test.go @@ -18,17 +18,26 @@ import ( "bytes" "context" "encoding/json" + "fmt" "net/http" + "reflect" "testing" + "time" "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/ratelimit" + sc "github.com/tikv/pd/pkg/schedule/config" + tu "github.com/tikv/pd/pkg/utils/testutil" + "github.com/tikv/pd/pkg/utils/typeutil" + "github.com/tikv/pd/pkg/versioninfo" "github.com/tikv/pd/server" + "github.com/tikv/pd/server/config" "github.com/tikv/pd/tests" ) -// dialClient used to dial http request. -var dialClient = &http.Client{ +// testDialClient used to dial http request. +var testDialClient = &http.Client{ Transport: &http.Transport{ DisableKeepAlives: true, }, @@ -56,7 +65,7 @@ func TestRateLimitConfigReload(t *testing.T) { data, err := json.Marshal(input) re.NoError(err) req, _ := http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) - resp, err := dialClient.Do(req) + resp, err := testDialClient.Do(req) re.NoError(err) resp.Body.Close() re.True(leader.GetServer().GetServiceMiddlewarePersistOptions().IsRateLimitEnabled()) @@ -74,3 +83,523 @@ func TestRateLimitConfigReload(t *testing.T) { re.True(leader.GetServer().GetServiceMiddlewarePersistOptions().IsRateLimitEnabled()) re.Len(leader.GetServer().GetServiceMiddlewarePersistOptions().GetRateLimitConfig().LimiterConfig, 1) } + +type configTestSuite struct { + suite.Suite + env *tests.SchedulingTestEnvironment +} + +func TestConfigTestSuite(t *testing.T) { + suite.Run(t, new(configTestSuite)) +} + +func (suite *configTestSuite) SetupSuite() { + suite.env = tests.NewSchedulingTestEnvironment(suite.T()) +} + +func (suite *configTestSuite) TearDownSuite() { + suite.env.Cleanup() +} +func (suite *configTestSuite) TestConfigAll() { + suite.env.RunTestInTwoModes(suite.checkConfigAll) +} + +func (suite *configTestSuite) checkConfigAll(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + + addr := fmt.Sprintf("%s/pd/api/v1/config", urlPrefix) + cfg := &config.Config{} + tu.Eventually(re, func() bool { + err := tu.ReadGetJSON(re, testDialClient, addr, cfg) + suite.NoError(err) + return cfg.PDServerCfg.DashboardAddress != "auto" + }) + + // the original way + r := map[string]int{"max-replicas": 5} + postData, err := json.Marshal(r) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + l := map[string]interface{}{ + "location-labels": "zone,rack", + "region-schedule-limit": 10, + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + l = map[string]interface{}{ + "metric-storage": "http://127.0.0.1:9090", + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + newCfg := &config.Config{} + err = tu.ReadGetJSON(re, testDialClient, addr, newCfg) + suite.NoError(err) + cfg.Replication.MaxReplicas = 5 + cfg.Replication.LocationLabels = []string{"zone", "rack"} + cfg.Schedule.RegionScheduleLimit = 10 + cfg.PDServerCfg.MetricStorage = "http://127.0.0.1:9090" + suite.Equal(newCfg, cfg) + + // the new way + l = map[string]interface{}{ + "schedule.tolerant-size-ratio": 2.5, + "schedule.enable-tikv-split-region": "false", + "replication.location-labels": "idc,host", + "pd-server.metric-storage": "http://127.0.0.1:1234", + "log.level": "warn", + "cluster-version": "v4.0.0-beta", + "replication-mode.replication-mode": "dr-auto-sync", + "replication-mode.dr-auto-sync.label-key": "foobar", + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + newCfg1 := &config.Config{} + err = tu.ReadGetJSON(re, testDialClient, addr, newCfg1) + suite.NoError(err) + cfg.Schedule.EnableTiKVSplitRegion = false + cfg.Schedule.TolerantSizeRatio = 2.5 + cfg.Replication.LocationLabels = []string{"idc", "host"} + cfg.PDServerCfg.MetricStorage = "http://127.0.0.1:1234" + cfg.Log.Level = "warn" + cfg.ReplicationMode.DRAutoSync.LabelKey = "foobar" + cfg.ReplicationMode.ReplicationMode = "dr-auto-sync" + v, err := versioninfo.ParseVersion("v4.0.0-beta") + suite.NoError(err) + cfg.ClusterVersion = *v + suite.Equal(cfg, newCfg1) + + // revert this to avoid it affects TestConfigTTL + l["schedule.enable-tikv-split-region"] = "true" + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + // illegal prefix + l = map[string]interface{}{ + "replicate.max-replicas": 1, + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, + tu.StatusNotOK(re), + tu.StringContain(re, "not found")) + suite.NoError(err) + + // update prefix directly + l = map[string]interface{}{ + "replication-mode": nil, + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, + tu.StatusNotOK(re), + tu.StringContain(re, "cannot update config prefix")) + suite.NoError(err) + + // config item not found + l = map[string]interface{}{ + "schedule.region-limit": 10, + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusNotOK(re), tu.StringContain(re, "not found")) + suite.NoError(err) +} + +func (suite *configTestSuite) TestConfigSchedule() { + suite.env.RunTestInTwoModes(suite.checkConfigSchedule) +} + +func (suite *configTestSuite) checkConfigSchedule(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + + addr := fmt.Sprintf("%s/pd/api/v1/config/schedule", urlPrefix) + + scheduleConfig := &sc.ScheduleConfig{} + suite.NoError(tu.ReadGetJSON(re, testDialClient, addr, scheduleConfig)) + scheduleConfig.MaxStoreDownTime.Duration = time.Second + postData, err := json.Marshal(scheduleConfig) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + tu.Eventually(re, func() bool { + scheduleConfig1 := &sc.ScheduleConfig{} + suite.NoError(tu.ReadGetJSON(re, testDialClient, addr, scheduleConfig1)) + return reflect.DeepEqual(*scheduleConfig1, *scheduleConfig) + }) +} + +func (suite *configTestSuite) TestConfigReplication() { + suite.env.RunTestInTwoModes(suite.checkConfigReplication) +} + +func (suite *configTestSuite) checkConfigReplication(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + + addr := fmt.Sprintf("%s/pd/api/v1/config/replicate", urlPrefix) + rc := &sc.ReplicationConfig{} + err := tu.ReadGetJSON(re, testDialClient, addr, rc) + suite.NoError(err) + + rc.MaxReplicas = 5 + rc1 := map[string]int{"max-replicas": 5} + postData, err := json.Marshal(rc1) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + rc.LocationLabels = []string{"zone", "rack"} + rc2 := map[string]string{"location-labels": "zone,rack"} + postData, err = json.Marshal(rc2) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + rc.IsolationLevel = "zone" + rc3 := map[string]string{"isolation-level": "zone"} + postData, err = json.Marshal(rc3) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + rc4 := &sc.ReplicationConfig{} + tu.Eventually(re, func() bool { + err = tu.ReadGetJSON(re, testDialClient, addr, rc4) + suite.NoError(err) + return reflect.DeepEqual(*rc4, *rc) + }) +} + +func (suite *configTestSuite) TestConfigLabelProperty() { + suite.env.RunTestInTwoModes(suite.checkConfigLabelProperty) +} + +func (suite *configTestSuite) checkConfigLabelProperty(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + + addr := urlPrefix + "/pd/api/v1/config/label-property" + loadProperties := func() config.LabelPropertyConfig { + var cfg config.LabelPropertyConfig + err := tu.ReadGetJSON(re, testDialClient, addr, &cfg) + suite.NoError(err) + return cfg + } + + cfg := loadProperties() + suite.Empty(cfg) + + cmds := []string{ + `{"type": "foo", "action": "set", "label-key": "zone", "label-value": "cn1"}`, + `{"type": "foo", "action": "set", "label-key": "zone", "label-value": "cn2"}`, + `{"type": "bar", "action": "set", "label-key": "host", "label-value": "h1"}`, + } + for _, cmd := range cmds { + err := tu.CheckPostJSON(testDialClient, addr, []byte(cmd), tu.StatusOK(re)) + suite.NoError(err) + } + + cfg = loadProperties() + suite.Len(cfg, 2) + suite.Equal([]config.StoreLabel{ + {Key: "zone", Value: "cn1"}, + {Key: "zone", Value: "cn2"}, + }, cfg["foo"]) + suite.Equal([]config.StoreLabel{{Key: "host", Value: "h1"}}, cfg["bar"]) + + cmds = []string{ + `{"type": "foo", "action": "delete", "label-key": "zone", "label-value": "cn1"}`, + `{"type": "bar", "action": "delete", "label-key": "host", "label-value": "h1"}`, + } + for _, cmd := range cmds { + err := tu.CheckPostJSON(testDialClient, addr, []byte(cmd), tu.StatusOK(re)) + suite.NoError(err) + } + + cfg = loadProperties() + suite.Len(cfg, 1) + suite.Equal([]config.StoreLabel{{Key: "zone", Value: "cn2"}}, cfg["foo"]) +} + +func (suite *configTestSuite) TestConfigDefault() { + suite.env.RunTestInTwoModes(suite.checkConfigDefault) +} + +func (suite *configTestSuite) checkConfigDefault(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + + addr := urlPrefix + "/pd/api/v1/config" + + r := map[string]int{"max-replicas": 5} + postData, err := json.Marshal(r) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + l := map[string]interface{}{ + "location-labels": "zone,rack", + "region-schedule-limit": 10, + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + l = map[string]interface{}{ + "metric-storage": "http://127.0.0.1:9090", + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + addr = fmt.Sprintf("%s/pd/api/v1/config/default", urlPrefix) + defaultCfg := &config.Config{} + err = tu.ReadGetJSON(re, testDialClient, addr, defaultCfg) + suite.NoError(err) + + suite.Equal(uint64(3), defaultCfg.Replication.MaxReplicas) + suite.Equal(typeutil.StringSlice([]string{}), defaultCfg.Replication.LocationLabels) + suite.Equal(uint64(2048), defaultCfg.Schedule.RegionScheduleLimit) + suite.Equal("", defaultCfg.PDServerCfg.MetricStorage) +} + +func (suite *configTestSuite) TestConfigPDServer() { + suite.env.RunTestInTwoModes(suite.checkConfigPDServer) +} + +func (suite *configTestSuite) checkConfigPDServer(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + + addrPost := urlPrefix + "/pd/api/v1/config" + ms := map[string]interface{}{ + "metric-storage": "", + } + postData, err := json.Marshal(ms) + suite.NoError(err) + suite.NoError(tu.CheckPostJSON(testDialClient, addrPost, postData, tu.StatusOK(re))) + addrGet := fmt.Sprintf("%s/pd/api/v1/config/pd-server", urlPrefix) + sc := &config.PDServerConfig{} + suite.NoError(tu.ReadGetJSON(re, testDialClient, addrGet, sc)) + suite.Equal(bool(true), sc.UseRegionStorage) + suite.Equal("table", sc.KeyType) + suite.Equal(typeutil.StringSlice([]string{}), sc.RuntimeServices) + suite.Equal("", sc.MetricStorage) + if sc.DashboardAddress != "auto" { // dashboard has been assigned + re.Equal(leaderServer.GetAddr(), sc.DashboardAddress) + } + suite.Equal(int(3), sc.FlowRoundByDigit) + suite.Equal(typeutil.NewDuration(time.Second), sc.MinResolvedTSPersistenceInterval) + suite.Equal(24*time.Hour, sc.MaxResetTSGap.Duration) +} + +var ttlConfig = map[string]interface{}{ + "schedule.max-snapshot-count": 999, + "schedule.enable-location-replacement": false, + "schedule.max-merge-region-size": 999, + "schedule.max-merge-region-keys": 999, + "schedule.scheduler-max-waiting-operator": 999, + "schedule.leader-schedule-limit": 999, + "schedule.region-schedule-limit": 999, + "schedule.hot-region-schedule-limit": 999, + "schedule.replica-schedule-limit": 999, + "schedule.merge-schedule-limit": 999, + "schedule.enable-tikv-split-region": false, +} + +var invalidTTLConfig = map[string]interface{}{ + "schedule.invalid-ttl-config": 0, +} + +type ttlConfigInterface interface { + GetMaxSnapshotCount() uint64 + IsLocationReplacementEnabled() bool + GetMaxMergeRegionSize() uint64 + GetMaxMergeRegionKeys() uint64 + GetSchedulerMaxWaitingOperator() uint64 + GetLeaderScheduleLimit() uint64 + GetRegionScheduleLimit() uint64 + GetHotRegionScheduleLimit() uint64 + GetReplicaScheduleLimit() uint64 + GetMergeScheduleLimit() uint64 + IsTikvRegionSplitEnabled() bool +} + +func (suite *configTestSuite) assertTTLConfig( + cluster *tests.TestCluster, + expectedEqual bool, +) { + equality := suite.Equal + if !expectedEqual { + equality = suite.NotEqual + } + checkFunc := func(options ttlConfigInterface) { + equality(uint64(999), options.GetMaxSnapshotCount()) + equality(false, options.IsLocationReplacementEnabled()) + equality(uint64(999), options.GetMaxMergeRegionSize()) + equality(uint64(999), options.GetMaxMergeRegionKeys()) + equality(uint64(999), options.GetSchedulerMaxWaitingOperator()) + equality(uint64(999), options.GetLeaderScheduleLimit()) + equality(uint64(999), options.GetRegionScheduleLimit()) + equality(uint64(999), options.GetHotRegionScheduleLimit()) + equality(uint64(999), options.GetReplicaScheduleLimit()) + equality(uint64(999), options.GetMergeScheduleLimit()) + equality(false, options.IsTikvRegionSplitEnabled()) + } + checkFunc(cluster.GetLeaderServer().GetServer().GetPersistOptions()) + if cluster.GetSchedulingPrimaryServer() != nil { + // wait for the scheduling primary server to be synced + options := cluster.GetSchedulingPrimaryServer().GetPersistConfig() + tu.Eventually(suite.Require(), func() bool { + if expectedEqual { + return uint64(999) == options.GetMaxSnapshotCount() + } + return uint64(999) != options.GetMaxSnapshotCount() + }) + checkFunc(options) + } +} + +func (suite *configTestSuite) assertTTLConfigItemEqual( + cluster *tests.TestCluster, + item string, + expectedValue interface{}, +) { + checkFunc := func(options ttlConfigInterface) bool { + switch item { + case "max-merge-region-size": + return expectedValue.(uint64) == options.GetMaxMergeRegionSize() + case "max-merge-region-keys": + return expectedValue.(uint64) == options.GetMaxMergeRegionKeys() + case "enable-tikv-split-region": + return expectedValue.(bool) == options.IsTikvRegionSplitEnabled() + } + return false + } + suite.True(checkFunc(cluster.GetLeaderServer().GetServer().GetPersistOptions())) + if cluster.GetSchedulingPrimaryServer() != nil { + // wait for the scheduling primary server to be synced + tu.Eventually(suite.Require(), func() bool { + return checkFunc(cluster.GetSchedulingPrimaryServer().GetPersistConfig()) + }) + } +} + +func createTTLUrl(url string, ttl int) string { + return fmt.Sprintf("%s/pd/api/v1/config?ttlSecond=%d", url, ttl) +} + +func (suite *configTestSuite) TestConfigTTL() { + suite.env.RunTestInTwoModes(suite.checkConfigTTL) +} + +func (suite *configTestSuite) checkConfigTTL(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + postData, err := json.Marshal(ttlConfig) + suite.NoError(err) + + // test no config and cleaning up + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 0), postData, tu.StatusOK(re)) + suite.NoError(err) + suite.assertTTLConfig(cluster, false) + + // test time goes by + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 5), postData, tu.StatusOK(re)) + suite.NoError(err) + suite.assertTTLConfig(cluster, true) + time.Sleep(5 * time.Second) + suite.assertTTLConfig(cluster, false) + + // test cleaning up + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 5), postData, tu.StatusOK(re)) + suite.NoError(err) + suite.assertTTLConfig(cluster, true) + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 0), postData, tu.StatusOK(re)) + suite.NoError(err) + suite.assertTTLConfig(cluster, false) + + postData, err = json.Marshal(invalidTTLConfig) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 1), postData, + tu.StatusNotOK(re), tu.StringEqual(re, "\"unsupported ttl config schedule.invalid-ttl-config\"\n")) + suite.NoError(err) + + // only set max-merge-region-size + mergeConfig := map[string]interface{}{ + "schedule.max-merge-region-size": 999, + } + postData, err = json.Marshal(mergeConfig) + suite.NoError(err) + + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 1), postData, tu.StatusOK(re)) + suite.NoError(err) + suite.assertTTLConfigItemEqual(cluster, "max-merge-region-size", uint64(999)) + // max-merge-region-keys should keep consistence with max-merge-region-size. + suite.assertTTLConfigItemEqual(cluster, "max-merge-region-keys", uint64(999*10000)) + + // on invalid value, we use default config + mergeConfig = map[string]interface{}{ + "schedule.enable-tikv-split-region": "invalid", + } + postData, err = json.Marshal(mergeConfig) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 10), postData, tu.StatusOK(re)) + suite.NoError(err) + suite.assertTTLConfigItemEqual(cluster, "enable-tikv-split-region", true) +} + +func (suite *configTestSuite) TestTTLConflict() { + suite.env.RunTestInTwoModes(suite.checkTTLConflict) +} + +func (suite *configTestSuite) checkTTLConflict(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + addr := createTTLUrl(urlPrefix, 1) + postData, err := json.Marshal(ttlConfig) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + suite.assertTTLConfig(cluster, true) + + cfg := map[string]interface{}{"max-snapshot-count": 30} + postData, err = json.Marshal(cfg) + suite.NoError(err) + addr = fmt.Sprintf("%s/pd/api/v1/config", urlPrefix) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusNotOK(re), tu.StringEqual(re, "\"need to clean up TTL first for schedule.max-snapshot-count\"\n")) + suite.NoError(err) + addr = fmt.Sprintf("%s/pd/api/v1/config/schedule", urlPrefix) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusNotOK(re), tu.StringEqual(re, "\"need to clean up TTL first for schedule.max-snapshot-count\"\n")) + suite.NoError(err) + cfg = map[string]interface{}{"schedule.max-snapshot-count": 30} + postData, err = json.Marshal(cfg) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 0), postData, tu.StatusOK(re)) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) +} diff --git a/tests/server/member/member_test.go b/tests/server/member/member_test.go index 26d4fa2a904..68fbdb33bd9 100644 --- a/tests/server/member/member_test.go +++ b/tests/server/member/member_test.go @@ -88,7 +88,7 @@ func TestMemberDelete(t *testing.T) { t.Log(time.Now(), "try to delete:", table.path) testutil.Eventually(re, func() bool { addr := leader.GetConfig().ClientUrls + "/pd/api/v1/members/" + table.path - req, err := http.NewRequest(http.MethodDelete, addr, nil) + req, err := http.NewRequest(http.MethodDelete, addr, http.NoBody) re.NoError(err) res, err := httpClient.Do(req) re.NoError(err) @@ -260,7 +260,7 @@ func TestPDLeaderLostWhileEtcdLeaderIntact(t *testing.T) { re.NoError(err) leader1 := cluster.WaitLeader() - memberID := cluster.GetServer(leader1).GetLeader().GetMemberId() + memberID := cluster.GetLeaderServer().GetLeader().GetMemberId() re.NoError(failpoint.Enable("github.com/tikv/pd/server/leaderLoopCheckAgain", fmt.Sprintf("return(\"%d\")", memberID))) re.NoError(failpoint.Enable("github.com/tikv/pd/server/exitCampaignLeader", fmt.Sprintf("return(\"%d\")", memberID))) @@ -323,6 +323,29 @@ func TestMoveLeader(t *testing.T) { } } +func TestCampaignLeaderFrequently(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + cluster, err := tests.NewTestCluster(ctx, 5) + defer cluster.Destroy() + re.NoError(err) + + err = cluster.RunInitialServers() + re.NoError(err) + cluster.WaitLeader() + leader := cluster.GetLeader() + re.NotEmpty(cluster.GetLeader()) + + for i := 0; i < 3; i++ { + cluster.GetLeaderServer().ResetPDLeader() + cluster.WaitLeader() + } + // PD leader should be different from before because etcd leader changed. + re.NotEmpty(cluster.GetLeader()) + re.NotEqual(leader, cluster.GetLeader()) +} + func TestGetLeader(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) diff --git a/tests/server/region_syncer/region_syncer_test.go b/tests/server/region_syncer/region_syncer_test.go index b73d4abb9b5..87b5c0683c7 100644 --- a/tests/server/region_syncer/region_syncer_test.go +++ b/tests/server/region_syncer/region_syncer_test.go @@ -47,21 +47,34 @@ func (i *idAllocator) alloc() uint64 { func TestRegionSyncer(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) - defer cancel() re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/storage/regionStorageFastFlush", `return(true)`)) re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/syncer/noFastExitSync", `return(true)`)) + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/syncer/disableClientStreaming", `return(true)`)) cluster, err := tests.NewTestCluster(ctx, 3, func(conf *config.Config, serverName string) { conf.PDServerCfg.UseRegionStorage = true }) - defer cluster.Destroy() + defer func() { + cluster.Destroy() + cancel() + }() re.NoError(err) re.NoError(cluster.RunInitialServers()) cluster.WaitLeader() leaderServer := cluster.GetLeaderServer() + re.NoError(leaderServer.BootstrapCluster()) rc := leaderServer.GetServer().GetRaftCluster() re.NotNil(rc) + followerServer := cluster.GetServer(cluster.GetFollower()) + + testutil.Eventually(re, func() bool { + return !followerServer.GetServer().DirectlyGetRaftCluster().GetRegionSyncer().IsRunning() + }) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/syncer/disableClientStreaming")) re.True(cluster.WaitRegionSyncerClientsReady(2)) + testutil.Eventually(re, func() bool { + return followerServer.GetServer().DirectlyGetRaftCluster().GetRegionSyncer().IsRunning() + }) regionLen := 110 regions := initRegions(regionLen) @@ -119,7 +132,6 @@ func TestRegionSyncer(t *testing.T) { time.Sleep(4 * time.Second) // test All regions have been synchronized to the cache of followerServer - followerServer := cluster.GetServer(cluster.GetFollower()) re.NotNil(followerServer) cacheRegions := leaderServer.GetServer().GetBasicCluster().GetRegions() re.Len(cacheRegions, regionLen) @@ -141,6 +153,9 @@ func TestRegionSyncer(t *testing.T) { re.NoError(err) cluster.WaitLeader() leaderServer = cluster.GetLeaderServer() + testutil.Eventually(re, func() bool { + return !leaderServer.GetServer().GetRaftCluster().GetRegionSyncer().IsRunning() + }) re.NotNil(leaderServer) loadRegions := leaderServer.GetServer().GetRaftCluster().GetRegions() re.Len(loadRegions, regionLen) @@ -259,7 +274,7 @@ func TestPrepareCheckerWithTransferLeader(t *testing.T) { err = cluster.RunInitialServers() re.NoError(err) cluster.WaitLeader() - leaderServer := cluster.GetServer(cluster.GetLeader()) + leaderServer := cluster.GetLeaderServer() re.NoError(leaderServer.BootstrapCluster()) rc := leaderServer.GetServer().GetRaftCluster() re.NotNil(rc) diff --git a/tests/testutil.go b/tests/testutil.go index 613705d3eb6..0956cf1a4bd 100644 --- a/tests/testutil.go +++ b/tests/testutil.go @@ -18,6 +18,8 @@ import ( "context" "fmt" "os" + "runtime" + "strings" "sync" "testing" "time" @@ -34,6 +36,7 @@ import ( scheduling "github.com/tikv/pd/pkg/mcs/scheduling/server" sc "github.com/tikv/pd/pkg/mcs/scheduling/server/config" tso "github.com/tikv/pd/pkg/mcs/tso/server" + "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/pkg/versioninfo" @@ -172,12 +175,19 @@ func MustPutStore(re *require.Assertions, cluster *TestCluster, store *metapb.St }) re.NoError(err) + ts := store.GetLastHeartbeat() + if ts == 0 { + ts = time.Now().UnixNano() + } storeInfo := grpcServer.GetRaftCluster().GetStore(store.GetId()) - newStore := storeInfo.Clone(core.SetStoreStats(&pdpb.StoreStats{ - Capacity: uint64(10 * units.GiB), - UsedSize: uint64(9 * units.GiB), - Available: uint64(1 * units.GiB), - })) + newStore := storeInfo.Clone( + core.SetStoreStats(&pdpb.StoreStats{ + Capacity: uint64(10 * units.GiB), + UsedSize: uint64(9 * units.GiB), + Available: uint64(1 * units.GiB), + }), + core.SetLastHeartbeatTS(time.Unix(ts/1e9, ts%1e9)), + ) grpcServer.GetRaftCluster().GetBasicCluster().PutStore(newStore) if cluster.GetSchedulingPrimaryServer() != nil { cluster.GetSchedulingPrimaryServer().GetCluster().PutStore(newStore) @@ -197,6 +207,7 @@ func MustPutRegion(re *require.Assertions, cluster *TestCluster, regionID, store Peers: []*metapb.Peer{leader}, RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, } + opts = append(opts, core.SetSource(core.Heartbeat)) r := core.NewRegionInfo(metaRegion, leader, opts...) MustPutRegionInfo(re, cluster, r) return r @@ -237,18 +248,19 @@ const ( // SchedulingTestEnvironment is used for test purpose. type SchedulingTestEnvironment struct { - t *testing.T - ctx context.Context - cancel context.CancelFunc - cluster *TestCluster - opts []ConfigOption + t *testing.T + opts []ConfigOption + clusters map[mode]*TestCluster + cancels []context.CancelFunc } // NewSchedulingTestEnvironment is to create a new SchedulingTestEnvironment. func NewSchedulingTestEnvironment(t *testing.T, opts ...ConfigOption) *SchedulingTestEnvironment { return &SchedulingTestEnvironment{ - t: t, - opts: opts, + t: t, + opts: opts, + clusters: make(map[mode]*TestCluster), + cancels: make([]context.CancelFunc, 0), } } @@ -260,56 +272,95 @@ func (s *SchedulingTestEnvironment) RunTestInTwoModes(test func(*TestCluster)) { // RunTestInPDMode is to run test in pd mode. func (s *SchedulingTestEnvironment) RunTestInPDMode(test func(*TestCluster)) { - s.t.Log("start to run test in pd mode") - s.startCluster(pdMode) - test(s.cluster) - s.cleanup() - s.t.Log("finish to run test in pd mode") + s.t.Logf("start test %s in pd mode", s.getTestName()) + if _, ok := s.clusters[pdMode]; !ok { + s.startCluster(pdMode) + } + test(s.clusters[pdMode]) +} + +func (s *SchedulingTestEnvironment) getTestName() string { + pc, _, _, _ := runtime.Caller(2) + caller := runtime.FuncForPC(pc) + if caller == nil || strings.Contains(caller.Name(), "RunTestInTwoModes") { + pc, _, _, _ = runtime.Caller(3) + caller = runtime.FuncForPC(pc) + } + if caller != nil { + elements := strings.Split(caller.Name(), ".") + return elements[len(elements)-1] + } + return "" } // RunTestInAPIMode is to run test in api mode. func (s *SchedulingTestEnvironment) RunTestInAPIMode(test func(*TestCluster)) { - s.t.Log("start to run test in api mode") re := require.New(s.t) + re.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs", `return(true)`)) re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/mcs/scheduling/server/fastUpdateMember", `return(true)`)) - s.startCluster(apiMode) - test(s.cluster) - s.cleanup() - re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/mcs/scheduling/server/fastUpdateMember")) - s.t.Log("finish to run test in api mode") + defer func() { + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/mcs/scheduling/server/fastUpdateMember")) + re.NoError(failpoint.Disable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs")) + }() + s.t.Logf("start test %s in api mode", s.getTestName()) + if _, ok := s.clusters[apiMode]; !ok { + s.startCluster(apiMode) + } + test(s.clusters[apiMode]) } -func (s *SchedulingTestEnvironment) cleanup() { - s.cluster.Destroy() - s.cancel() +// RunFuncInTwoModes is to run func in two modes. +func (s *SchedulingTestEnvironment) RunFuncInTwoModes(f func(*TestCluster)) { + if c, ok := s.clusters[pdMode]; ok { + f(c) + } + if c, ok := s.clusters[apiMode]; ok { + f(c) + } +} + +// Cleanup is to cleanup the environment. +func (s *SchedulingTestEnvironment) Cleanup() { + for _, cluster := range s.clusters { + cluster.Destroy() + } + for _, cancel := range s.cancels { + cancel() + } } func (s *SchedulingTestEnvironment) startCluster(m mode) { - var err error re := require.New(s.t) - s.ctx, s.cancel = context.WithCancel(context.Background()) + ctx, cancel := context.WithCancel(context.Background()) + s.cancels = append(s.cancels, cancel) switch m { case pdMode: - s.cluster, err = NewTestCluster(s.ctx, 1, s.opts...) + cluster, err := NewTestCluster(ctx, 1, s.opts...) re.NoError(err) - err = s.cluster.RunInitialServers() + err = cluster.RunInitialServers() re.NoError(err) - re.NotEmpty(s.cluster.WaitLeader()) - leaderServer := s.cluster.GetServer(s.cluster.GetLeader()) + re.NotEmpty(cluster.WaitLeader()) + leaderServer := cluster.GetServer(cluster.GetLeader()) re.NoError(leaderServer.BootstrapCluster()) + s.clusters[pdMode] = cluster case apiMode: - s.cluster, err = NewTestAPICluster(s.ctx, 1, s.opts...) + cluster, err := NewTestAPICluster(ctx, 1, s.opts...) re.NoError(err) - err = s.cluster.RunInitialServers() + err = cluster.RunInitialServers() re.NoError(err) - re.NotEmpty(s.cluster.WaitLeader()) - leaderServer := s.cluster.GetServer(s.cluster.GetLeader()) + re.NotEmpty(cluster.WaitLeader()) + leaderServer := cluster.GetServer(cluster.GetLeader()) re.NoError(leaderServer.BootstrapCluster()) + leaderServer.GetRaftCluster().SetPrepared() // start scheduling cluster - tc, err := NewTestSchedulingCluster(s.ctx, 1, leaderServer.GetAddr()) + tc, err := NewTestSchedulingCluster(ctx, 1, leaderServer.GetAddr()) re.NoError(err) tc.WaitForPrimaryServing(re) - s.cluster.SetSchedulingCluster(tc) + cluster.SetSchedulingCluster(tc) time.Sleep(200 * time.Millisecond) // wait for scheduling cluster to update member + testutil.Eventually(re, func() bool { + return cluster.GetLeaderServer().GetServer().GetRaftCluster().IsServiceIndependent(utils.SchedulingServiceName) + }) + s.clusters[apiMode] = cluster } } diff --git a/tools/pd-api-bench/cases/cases.go b/tools/pd-api-bench/cases/cases.go index d431b6f325c..2f93d2e9454 100644 --- a/tools/pd-api-bench/cases/cases.go +++ b/tools/pd-api-bench/cases/cases.go @@ -44,7 +44,7 @@ var ( // InitCluster initializes the cluster. func InitCluster(ctx context.Context, cli pd.Client, httpClit *http.Client) error { req, _ := http.NewRequestWithContext(ctx, http.MethodGet, - PDAddress+"/pd/api/v1/stats/region?start_key=&end_key=&count", nil) + PDAddress+"/pd/api/v1/stats/region?start_key=&end_key=&count", http.NoBody) resp, err := httpClit.Do(req) if err != nil { return err @@ -158,7 +158,7 @@ type minResolvedTSStruct struct { func (c *minResolvedTS) Do(ctx context.Context, cli *http.Client) error { url := fmt.Sprintf("%s%s", PDAddress, c.path) - req, _ := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + req, _ := http.NewRequestWithContext(ctx, http.MethodGet, url, http.NoBody) res, err := cli.Do(req) if err != nil { return err @@ -212,7 +212,7 @@ func (c *regionsStats) Do(ctx context.Context, cli *http.Client) error { url.QueryEscape(string(generateKeyForSimulator(startID, 56))), url.QueryEscape(string(generateKeyForSimulator(endID, 56))), "") - req, _ := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + req, _ := http.NewRequestWithContext(ctx, http.MethodGet, url, http.NoBody) res, err := cli.Do(req) if err != nil { return err diff --git a/tools/pd-api-bench/go.mod b/tools/pd-api-bench/go.mod index e6e896a0797..8050f433e8b 100644 --- a/tools/pd-api-bench/go.mod +++ b/tools/pd-api-bench/go.mod @@ -26,8 +26,10 @@ require ( github.com/aws/smithy-go v1.13.5 // indirect github.com/benbjohnson/clock v1.3.0 // indirect github.com/beorn7/perks v1.0.1 // indirect + github.com/bytedance/sonic v1.9.1 // indirect github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect github.com/coreos/go-semver v0.3.0 // indirect github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f // indirect github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect @@ -35,12 +37,13 @@ require ( github.com/docker/go-units v0.4.0 // indirect github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4 // indirect github.com/elliotchance/pie/v2 v2.1.0 // indirect + github.com/gabriel-vasile/mimetype v1.4.2 // indirect github.com/gin-contrib/sse v0.1.0 // indirect - github.com/gin-gonic/gin v1.8.1 // indirect - github.com/go-playground/locales v0.14.0 // indirect - github.com/go-playground/universal-translator v0.18.0 // indirect - github.com/go-playground/validator/v10 v10.10.0 // indirect - github.com/goccy/go-json v0.9.7 // indirect + github.com/gin-gonic/gin v1.9.1 // indirect + github.com/go-playground/locales v0.14.1 // indirect + github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/go-playground/validator/v10 v10.14.0 // indirect + github.com/goccy/go-json v0.10.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang-jwt/jwt v3.2.1+incompatible // indirect github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef // indirect @@ -56,14 +59,15 @@ require ( github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect github.com/jonboulle/clockwork v0.2.2 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/cpuid/v2 v2.2.4 // indirect github.com/konsorten/go-windows-terminal-sequences v1.0.3 // indirect - github.com/leodido/go-urn v1.2.1 // indirect - github.com/mattn/go-isatty v0.0.14 // indirect + github.com/leodido/go-urn v1.2.4 // indirect + github.com/mattn/go-isatty v0.0.19 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect - github.com/pelletier/go-toml/v2 v2.0.1 // indirect + github.com/pelletier/go-toml/v2 v2.0.8 // indirect github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36 // indirect github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d // indirect github.com/pingcap/errcode v0.3.0 // indirect @@ -81,10 +85,11 @@ require ( github.com/smallnest/chanx v0.0.0-20221229104322-eb4c998d2072 // indirect github.com/soheilhy/cmux v0.1.4 // indirect github.com/spf13/pflag v1.0.5 // indirect - github.com/stretchr/testify v1.8.2 // indirect + github.com/stretchr/testify v1.8.3 // indirect github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965 // indirect github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 // indirect - github.com/ugorji/go/codec v1.2.7 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/ugorji/go/codec v1.2.11 // indirect github.com/unrolled/render v1.0.1 // indirect github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect go.etcd.io/bbolt v1.3.6 // indirect @@ -92,6 +97,7 @@ require ( go.uber.org/atomic v1.10.0 // indirect go.uber.org/goleak v1.1.12 // indirect go.uber.org/multierr v1.11.0 // indirect + golang.org/x/arch v0.3.0 // indirect golang.org/x/crypto v0.14.0 // indirect golang.org/x/exp v0.0.0-20230711005742-c3f37128e5a4 // indirect golang.org/x/net v0.17.0 // indirect diff --git a/tools/pd-api-bench/go.sum b/tools/pd-api-bench/go.sum index f40a4fe2f5a..1e40c511586 100644 --- a/tools/pd-api-bench/go.sum +++ b/tools/pd-api-bench/go.sum @@ -42,12 +42,18 @@ github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+Ce github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= +github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= +github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s= +github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U= github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 h1:BjkPE3785EwPhhyuFkbINB+2a1xATwk8SNDWnJiD41g= github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5/go.mod h1:jtAfVaU/2cu1+wdSRPWE2c1N2qeAA3K4RH9pYgqwets= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa h1:OaNxuTZr7kxeODyLWsRMC+OD03aFUH+mW6r2d+MWa5Y= @@ -61,7 +67,6 @@ github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7 github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg= github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -80,11 +85,13 @@ github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1m github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= +github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= -github.com/gin-gonic/gin v1.8.1 h1:4+fr/el88TOO3ewCmQr8cx/CtZ/umlIRIs5M4NTNjf8= -github.com/gin-gonic/gin v1.8.1/go.mod h1:ji8BvRH1azfM+SYow9zQ6SZMvR8qOMZHmsCuWR9tTTk= +github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= +github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= @@ -93,17 +100,17 @@ github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= -github.com/go-playground/assert/v2 v2.0.1 h1:MsBgLAaY856+nPRTKrp3/OZK38U/wa0CcBYNjji3q3A= -github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= -github.com/go-playground/locales v0.14.0 h1:u50s323jtVGugKlcYeyzC0etD1HifMjqmJqb8WugfUU= -github.com/go-playground/locales v0.14.0/go.mod h1:sawfccIbzZTqEDETgFXqTho0QybSa7l++s0DH+LDiLs= -github.com/go-playground/universal-translator v0.18.0 h1:82dyy6p4OuJq4/CByFNOn/jYrnRPArHwAcmLoJZxyho= -github.com/go-playground/universal-translator v0.18.0/go.mod h1:UvRDBj+xPUEGrFYl+lu/H90nyDXpg0fqeB/AQUGNTVA= -github.com/go-playground/validator/v10 v10.10.0 h1:I7mrTYv78z8k8VXa/qJlOlEXn/nBh+BF8dHX5nt/dr0= -github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos= +github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= +github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= +github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= +github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= +github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= +github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js= +github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/goccy/go-json v0.9.7 h1:IcB+Aqpx/iMHu5Yooh7jEzJk1JZ7Pjtmys2ukPr7EeM= -github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= +github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= @@ -183,26 +190,26 @@ github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8 github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= +github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= -github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/leodido/go-urn v1.2.1 h1:BqpAaACuzVSgi/VLzGZIobT2z4v53pjosyNd9Yv6n/w= -github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY= +github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= +github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a h1:N9zuLhTvBSRt0gWSiJswwQ2HqDmtX/ZCDJURnKUt1Ik= github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a/go.mod h1:JKx41uQRwqlTZabZc+kILPrO/3jlKnQ2Z8b7YiVw5cE= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= -github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= +github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= @@ -224,8 +231,8 @@ github.com/onsi/gomega v1.20.1 h1:PA/3qinGoukvymdIDV8pii6tiZgC8kbmJO6Z5+b002Q= github.com/onsi/gomega v1.20.1/go.mod h1:DtrZpjmvpn2mPm4YWQa0/ALMDj9v4YxLgojwPeREyVo= github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= -github.com/pelletier/go-toml/v2 v2.0.1 h1:8e3L2cCQzLFi2CR4g7vGFuFxX7Jl1kKX8gW+iV0GUKU= -github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo= +github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= +github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36 h1:64bxqeTEN0/xoEqhKGowgihNuzISS9rEG6YUMU4bzJo= github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36/go.mod h1:pxMtw7cyUw6B2bRH0ZBANSPg+AoSud1I1iyJHI69jH4= github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d h1:U+PMnTlV2tu7RuMK5etusZG3Cf+rpow5hqQByeCzJ2g= @@ -244,7 +251,6 @@ github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8 github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 h1:QV6jqlfOkh8hqvEAgwBZa+4bSgO0EeKC7s5c6Luam2I= github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21/go.mod h1:QYnjfA95ZaMefyl1NO8oPtKeb8pYUdnDVhQgf+qdpjM= -github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -274,9 +280,6 @@ github.com/prometheus/procfs v0.6.0 h1:mxy4L2jP6qMonqmq+aTtOx1ifVWUgG/TAmntgbh3x github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= -github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= -github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8= -github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= github.com/sasha-s/go-deadlock v0.2.0 h1:lMqc+fUb7RrFS3gQLtoQsJ7/6TV/pAIFvBsqX73DK8Y= github.com/sasha-s/go-deadlock v0.2.0/go.mod h1:StQn567HiB1fF2yJ44N9au7wOhrPS3iZqiDbRupzT10= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= @@ -305,12 +308,13 @@ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpE github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY= +github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965 h1:1oFLiOyVl+W7bnBzGhf7BbIv9loSFQcieWWYIjLqcAw= github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965/go.mod h1:9OrXJhf154huy1nPWmuSrkgjPUtUNhA+Zmy+6AESzuA= github.com/tklauser/go-sysconf v0.3.11 h1:89WgdJhk5SNwJfu+GKyYveZ4IaJ7xAkecBo+KdJV0CM= @@ -319,9 +323,10 @@ github.com/tklauser/numcpus v0.6.0 h1:kebhY2Qt+3U6RNK7UqpYNA+tJ23IBEGKkB7JQBfDYm github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4= github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 h1:j6JEOq5QWFker+d7mFQYOhjTZonQ7YkLTHm56dbn+yM= github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= -github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= -github.com/ugorji/go/codec v1.2.7 h1:YPXUKf7fYbp/y8xloBqZOw2qaVggbfwMlI8WM3wZUJ0= -github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY= +github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= +github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= +github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= +github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/unrolled/render v1.0.1 h1:VDDnQQVfBMsOsp3VaCJszSO0nkBIVEYoPWeRThk9spY= github.com/unrolled/render v1.0.1/go.mod h1:gN9T0NhL4Bfbwu8ann7Ry/TGHYfosul+J0obPf6NBdM= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= @@ -354,11 +359,13 @@ go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= +golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= +golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= @@ -388,7 +395,6 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= @@ -425,9 +431,9 @@ golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -486,11 +492,9 @@ google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqw gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= -gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= @@ -515,6 +519,7 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q= sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= diff --git a/tools/pd-ctl/pdctl/command/gc_safepoint_command.go b/tools/pd-ctl/pdctl/command/gc_safepoint_command.go index 619cf5a928b..80c6328e955 100644 --- a/tools/pd-ctl/pdctl/command/gc_safepoint_command.go +++ b/tools/pd-ctl/pdctl/command/gc_safepoint_command.go @@ -15,9 +15,12 @@ package command import ( + "encoding/json" "net/http" + "sort" "github.com/spf13/cobra" + "github.com/tikv/pd/server/api" ) var ( @@ -52,7 +55,20 @@ func showSSPs(cmd *cobra.Command, args []string) { cmd.Printf("Failed to get service GC safepoint: %s\n", err) return } - cmd.Println(r) + var safepoint api.ListServiceGCSafepoint + if err := json.Unmarshal([]byte(r), &safepoint); err != nil { + cmd.Printf("Failed to unmarshal service GC safepoint: %s\n", err) + return + } + sort.Slice(safepoint.ServiceGCSafepoints, func(i, j int) bool { + return safepoint.ServiceGCSafepoints[i].SafePoint < safepoint.ServiceGCSafepoints[j].SafePoint + }) + data, err := json.MarshalIndent(safepoint, "", " ") + if err != nil { + cmd.Printf("Failed to marshal service GC safepoint: %s\n", err) + return + } + cmd.Println(string(data)) } func deleteSSP(cmd *cobra.Command, args []string) { diff --git a/tools/pd-ctl/pdctl/command/global.go b/tools/pd-ctl/pdctl/command/global.go index 5d8552da51a..0b1f4b4409a 100644 --- a/tools/pd-ctl/pdctl/command/global.go +++ b/tools/pd-ctl/pdctl/command/global.go @@ -29,14 +29,15 @@ import ( "go.etcd.io/etcd/pkg/transport" ) -var ( - pdControllerComponentName = "pdctl" - dialClient = &http.Client{ - Transport: apiutil.NewComponentSignatureRoundTripper(http.DefaultTransport, pdControllerComponentName), - } - pingPrefix = "pd/api/v1/ping" +const ( + pdControlCallerID = "pd-ctl" + pingPrefix = "pd/api/v1/ping" ) +var dialClient = &http.Client{ + Transport: apiutil.NewCallerIDRoundTripper(http.DefaultTransport, pdControlCallerID), +} + // InitHTTPSClient creates https client with ca file func InitHTTPSClient(caPath, certPath, keyPath string) error { tlsInfo := transport.TLSInfo{ @@ -50,8 +51,8 @@ func InitHTTPSClient(caPath, certPath, keyPath string) error { } dialClient = &http.Client{ - Transport: apiutil.NewComponentSignatureRoundTripper( - &http.Transport{TLSClientConfig: tlsConfig}, pdControllerComponentName), + Transport: apiutil.NewCallerIDRoundTripper( + &http.Transport{TLSClientConfig: tlsConfig}, pdControlCallerID), } return nil diff --git a/tools/pd-ctl/pdctl/command/keyspace_command.go b/tools/pd-ctl/pdctl/command/keyspace_command.go index 7c0d3d78bf6..93a99abc39f 100644 --- a/tools/pd-ctl/pdctl/command/keyspace_command.go +++ b/tools/pd-ctl/pdctl/command/keyspace_command.go @@ -28,11 +28,12 @@ import ( const ( keyspacePrefix = "pd/api/v2/keyspaces" // flags - nmConfig = "config" - nmLimit = "limit" - nmPageToken = "page_token" - nmRemove = "remove" - nmUpdate = "update" + nmConfig = "config" + nmLimit = "limit" + nmPageToken = "page_token" + nmRemove = "remove" + nmUpdate = "update" + nmForceRefreshGroupID = "force_refresh_group_id" ) // NewKeyspaceCommand returns a keyspace subcommand of rootCmd. @@ -64,6 +65,7 @@ func newShowKeyspaceCommand() *cobra.Command { Short: "show keyspace metadata specified by keyspace name", Run: showKeyspaceNameCommandFunc, } + showByName.Flags().Bool(nmForceRefreshGroupID, true, "force refresh keyspace group id") r.AddCommand(showByID) r.AddCommand(showByName) return r @@ -87,7 +89,21 @@ func showKeyspaceNameCommandFunc(cmd *cobra.Command, args []string) { cmd.Usage() return } - resp, err := doRequest(cmd, fmt.Sprintf("%s/%s?force_refresh_group_id=true", keyspacePrefix, args[0]), http.MethodGet, http.Header{}) + refreshGroupID, err := cmd.Flags().GetBool(nmForceRefreshGroupID) + if err != nil { + cmd.PrintErrln("Failed to parse flag: ", err) + return + } + url := fmt.Sprintf("%s/%s", keyspacePrefix, args[0]) + if refreshGroupID { + url += "?force_refresh_group_id=true" + } + resp, err := doRequest(cmd, url, http.MethodGet, http.Header{}) + // Retry without the force_refresh_group_id if the keyspace group manager is not initialized. + // This can happen when PD is not running in API mode. + if err != nil && refreshGroupID && strings.Contains(err.Error(), handlers.GroupManagerUninitializedErr) { + resp, err = doRequest(cmd, fmt.Sprintf("%s/%s", keyspacePrefix, args[0]), http.MethodGet, http.Header{}) + } if err != nil { cmd.PrintErrln("Failed to get the keyspace information: ", err) return diff --git a/tools/pd-ctl/pdctl/command/resource_manager_command.go b/tools/pd-ctl/pdctl/command/resource_manager_command.go new file mode 100644 index 00000000000..8bc5ea85977 --- /dev/null +++ b/tools/pd-ctl/pdctl/command/resource_manager_command.go @@ -0,0 +1,112 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package command + +import ( + "bytes" + "encoding/json" + "fmt" + "net/http" + "strconv" + + "github.com/spf13/cobra" +) + +const ( + resourceManagerPrefix = "resource-manager/api/v1" + // flags + rmConfigController = "config/controller" +) + +// NewResourceManagerCommand return a resource manager subcommand of rootCmd +func NewResourceManagerCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "resource-manager [flags]", + Short: "resource-manager commands", + } + cmd.AddCommand(newResourceManagerConfigCommand()) + return cmd +} + +func newResourceManagerConfigCommand() *cobra.Command { + r := &cobra.Command{ + Use: "config", + Short: "config resource manager", + } + r.AddCommand(newConfigControllerCommand()) + return r +} + +func newConfigControllerCommand() *cobra.Command { + r := &cobra.Command{ + Use: "controller", + Short: "config controller", + } + r.AddCommand(newConfigControllerSetCommand()) + r.AddCommand(newConfigControllerShowCommand()) + return r +} + +func newConfigControllerSetCommand() *cobra.Command { + r := &cobra.Command{ + Use: "set ", + Short: "set controller config", + Run: func(cmd *cobra.Command, args []string) { + if len(args) != 2 { + cmd.Println(cmd.UsageString()) + return + } + + var val interface{} + val, err := strconv.ParseFloat(args[1], 64) + if err != nil { + val = args[1] + } + data := map[string]interface{}{args[0]: val} + jsonData, err := json.Marshal(data) + if err != nil { + cmd.Println(err) + return + } + resp, err := doRequest(cmd, fmt.Sprintf("%s/%s", resourceManagerPrefix, rmConfigController), http.MethodPost, http.Header{}, WithBody(bytes.NewBuffer(jsonData))) + if err != nil { + cmd.PrintErrln("Failed to set the config: ", err) + return + } + cmd.Println(resp) + }, + } + return r +} + +func newConfigControllerShowCommand() *cobra.Command { + r := &cobra.Command{ + Use: "show", + Short: "show controller config", + Run: func(cmd *cobra.Command, args []string) { + if len(args) != 0 { + cmd.Println(cmd.UsageString()) + return + } + resp, err := doRequest(cmd, fmt.Sprintf("%s/%s", resourceManagerPrefix, rmConfigController), http.MethodGet, http.Header{}) + if err != nil { + cmd.Println(err) + return + } + cmd.Println(resp) + }, + } + return r +} diff --git a/tools/pd-ctl/pdctl/command/scheduler.go b/tools/pd-ctl/pdctl/command/scheduler.go index 4349735f06d..695576edf84 100644 --- a/tools/pd-ctl/pdctl/command/scheduler.go +++ b/tools/pd-ctl/pdctl/command/scheduler.go @@ -500,6 +500,7 @@ func NewConfigSchedulerCommand() *cobra.Command { newConfigBalanceLeaderCommand(), newSplitBucketCommand(), newConfigEvictSlowStoreCommand(), + newConfigShuffleHotRegionSchedulerCommand(), newConfigEvictSlowTrendCommand(), ) return c @@ -745,11 +746,17 @@ func showShuffleRegionSchedulerRolesCommandFunc(cmd *cobra.Command, args []strin if p == "show-roles" { p = cmd.Parent().Name() } - path := path.Join(schedulerConfigPrefix, p, "roles") - r, err := doRequest(cmd, path, http.MethodGet, http.Header{}) + url := path.Join(schedulerConfigPrefix, p, "list") + r, err := doRequest(cmd, url, http.MethodGet, http.Header{}) if err != nil { - cmd.Println(err) - return + // try to use old api + var err2 error + url := path.Join(schedulerConfigPrefix, p, "roles") + r, err2 = doRequest(cmd, url, http.MethodGet, http.Header{}) + if err2 != nil { + cmd.Println(err, err2) + return + } } cmd.Println(r) } @@ -796,6 +803,25 @@ func newConfigEvictSlowStoreCommand() *cobra.Command { return c } +func newConfigShuffleHotRegionSchedulerCommand() *cobra.Command { + c := &cobra.Command{ + Use: "shuffle-hot-region-scheduler", + Short: "shuffle-hot-region-scheduler config", + Run: listSchedulerConfigCommandFunc, + } + + c.AddCommand(&cobra.Command{ + Use: "show", + Short: "list the config item", + Run: listSchedulerConfigCommandFunc, + }, &cobra.Command{ + Use: "set ", + Short: "set the config item", + Run: func(cmd *cobra.Command, args []string) { postSchedulerConfigCommandFunc(cmd, c.Name(), args) }, + }) + return c +} + func newConfigEvictSlowTrendCommand() *cobra.Command { c := &cobra.Command{ Use: "evict-slow-trend-scheduler", diff --git a/tools/pd-ctl/pdctl/ctl.go b/tools/pd-ctl/pdctl/ctl.go index 86494c046eb..7a3c540b266 100644 --- a/tools/pd-ctl/pdctl/ctl.go +++ b/tools/pd-ctl/pdctl/ctl.go @@ -67,6 +67,7 @@ func GetRootCmd() *cobra.Command { command.NewUnsafeCommand(), command.NewKeyspaceGroupCommand(), command.NewKeyspaceCommand(), + command.NewResourceManagerCommand(), ) rootCmd.Flags().ParseErrorsWhitelist.UnknownFlags = true diff --git a/tools/pd-simulator/simulator/cases/diagnose_rule.go b/tools/pd-simulator/simulator/cases/diagnose_rule.go index b4b30fdc772..6cd76c854b7 100644 --- a/tools/pd-simulator/simulator/cases/diagnose_rule.go +++ b/tools/pd-simulator/simulator/cases/diagnose_rule.go @@ -46,8 +46,8 @@ func newRule1() *Case { }, LocationLabels: []string{"host"}, }, &placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, StartKeyHex: "", EndKeyHex: "", Role: placement.Voter, diff --git a/tools/pd-simulator/simulator/node.go b/tools/pd-simulator/simulator/node.go index b8fb422d6dd..68a10a8638e 100644 --- a/tools/pd-simulator/simulator/node.go +++ b/tools/pd-simulator/simulator/node.go @@ -25,6 +25,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/tikv/pd/pkg/ratelimit" + "github.com/tikv/pd/pkg/utils/syncutil" "github.com/tikv/pd/tools/pd-simulator/simulator/cases" "github.com/tikv/pd/tools/pd-simulator/simulator/info" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" @@ -39,7 +40,7 @@ const ( // Node simulates a TiKV. type Node struct { *metapb.Store - sync.RWMutex + syncutil.RWMutex stats *info.StoreStats tick uint64 wg sync.WaitGroup @@ -50,7 +51,7 @@ type Node struct { cancel context.CancelFunc raftEngine *RaftEngine limiter *ratelimit.RateLimiter - sizeMutex sync.Mutex + sizeMutex syncutil.Mutex hasExtraUsedSpace bool snapStats []*pdpb.SnapshotStat } diff --git a/tools/pd-tso-bench/go.mod b/tools/pd-tso-bench/go.mod index f89f11ee082..8d4b3d18a31 100644 --- a/tools/pd-tso-bench/go.mod +++ b/tools/pd-tso-bench/go.mod @@ -9,7 +9,7 @@ require ( github.com/prometheus/client_golang v1.11.1 github.com/tikv/pd/client v0.0.0-00010101000000-000000000000 go.uber.org/zap v1.24.0 - google.golang.org/grpc v1.54.0 + google.golang.org/grpc v1.56.3 ) require ( diff --git a/tools/pd-tso-bench/go.sum b/tools/pd-tso-bench/go.sum index 1c266823dee..15ba2923695 100644 --- a/tools/pd-tso-bench/go.sum +++ b/tools/pd-tso-bench/go.sum @@ -205,8 +205,8 @@ gonum.org/v1/netlib v0.0.0-20181029234149-ec6d1f5cefe6/go.mod h1:wa6Ws7BG/ESfp6d google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 h1:KpwkzHKEF7B9Zxg18WzOa7djJ+Ha5DzthMyZYQfEn2A= google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1/go.mod h1:nKE/iIaLqn2bQwXBg8f1g2Ylh6r5MN5CmZvuzZCgsCU= -google.golang.org/grpc v1.54.0 h1:EhTqbhiYeixwWQtAEZAxmV9MGqcjEU2mFx52xCzNyag= -google.golang.org/grpc v1.54.0/go.mod h1:PUSEXI6iWghWaB6lXM4knEgpJNu2qUcKfDtNci3EC2g= +google.golang.org/grpc v1.56.3 h1:8I4C0Yq1EjstUzUJzpcRVbuYA2mODtEmpWiQoN/b2nc= +google.golang.org/grpc v1.56.3/go.mod h1:I9bI3vqKfayGqPUAwGdOSu7kt6oIJLixfffKrpXqQ9s= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=