diff --git a/internal/locate/region_cache.go b/internal/locate/region_cache.go index 00d79bada..622e70aec 100644 --- a/internal/locate/region_cache.go +++ b/internal/locate/region_cache.go @@ -452,10 +452,12 @@ func (c *RegionCache) Close() { c.cancelFunc() } +var reloadRegionInterval = int64(10 * time.Second) + // asyncCheckAndResolveLoop with func (c *RegionCache) asyncCheckAndResolveLoop(interval time.Duration) { ticker := time.NewTicker(interval) - reloadRegionTicker := time.NewTicker(10 * time.Second) + reloadRegionTicker := time.NewTicker(time.Duration(atomic.LoadInt64(&reloadRegionInterval))) defer func() { ticker.Stop() reloadRegionTicker.Stop() diff --git a/internal/locate/region_request.go b/internal/locate/region_request.go index 6feb7c884..a9134509e 100644 --- a/internal/locate/region_request.go +++ b/internal/locate/region_request.go @@ -427,6 +427,8 @@ func (state *tryFollower) next(bo *retry.Backoffer, selector *replicaSelector) ( return rpcCtx, err } if state.fallbackFromLeader { + staleRead := false + rpcCtx.contextPatcher.staleRead = &staleRead replicaRead := true rpcCtx.contextPatcher.replicaRead = &replicaRead } @@ -562,6 +564,10 @@ type accessFollower struct { lastIdx AccessIndex } +// Follower read will try followers first, if no follower is available, it will fallback to leader. +// Specially, for stale read, it tries local peer(can be either leader or follower), then use snapshot read in the leader, +// if the leader read receive server-is-busy and connection errors, the region cache is still valid, +// and the state will be changed to tryFollower, which will read by replica read. func (state *accessFollower) next(bo *retry.Backoffer, selector *replicaSelector) (*RPCContext, error) { resetStaleRead := false if state.lastIdx < 0 { @@ -609,7 +615,8 @@ func (state *accessFollower) next(bo *retry.Backoffer, selector *replicaSelector // If there is no candidate, fallback to the leader. if selector.targetIdx < 0 { leader := selector.replicas[state.leaderIdx] - leaderInvalid := leader.isEpochStale() || state.IsLeaderExhausted(leader) + leaderEpochStale := leader.isEpochStale() + leaderInvalid := leaderEpochStale || state.IsLeaderExhausted(leader) if len(state.option.labels) > 0 { logutil.BgLogger().Warn("unable to find stores with given labels", zap.Uint64("region", selector.region.GetID()), @@ -617,6 +624,21 @@ func (state *accessFollower) next(bo *retry.Backoffer, selector *replicaSelector zap.Any("labels", state.option.labels)) } if leaderInvalid { + // In stale-read, the request will fallback to leader after the local follower failure. + // If the leader is also unavailable, we can fallback to the follower and use replica-read flag again, + // The remote follower not tried yet, and the local follower can retry without stale-read flag. + if state.isStaleRead { + selector.state = &tryFollower{ + fallbackFromLeader: true, + leaderIdx: state.leaderIdx, + lastIdx: state.leaderIdx, + labels: state.option.labels, + } + if leaderEpochStale { + selector.regionCache.scheduleReloadRegion(selector.region) + } + return nil, stateChanged{} + } metrics.TiKVReplicaSelectorFailureCounter.WithLabelValues("exhausted").Inc() selector.invalidateRegion() return nil, nil @@ -655,13 +677,17 @@ func (state *accessFollower) onSendFailure(bo *retry.Backoffer, selector *replic } func (state *accessFollower) isCandidate(idx AccessIndex, replica *replica) bool { - return !replica.isEpochStale() && !replica.isExhausted(1) && + if replica.isEpochStale() || replica.isExhausted(1) || replica.store.getLivenessState() == unreachable { + return false + } + if state.option.leaderOnly && idx == state.leaderIdx { // The request can only be sent to the leader. - ((state.option.leaderOnly && idx == state.leaderIdx) || - // Choose a replica with matched labels. - (!state.option.leaderOnly && (state.tryLeader || idx != state.leaderIdx) && replica.store.IsLabelsMatch(state.option.labels))) && - // Make sure the replica is not unreachable. - replica.store.getLivenessState() != unreachable + return true + } else if !state.tryLeader && idx == state.leaderIdx { + // The request cannot be sent to leader. + return false + } + return replica.store.IsLabelsMatch(state.option.labels) } type invalidStore struct { @@ -930,25 +956,21 @@ func (s *replicaSelector) updateLeader(leader *metapb.Peer) { s.region.invalidate(StoreNotFound) } -// For some reason, the leader is unreachable by now, try followers instead. -func (s *replicaSelector) fallback2Follower(ctx *RPCContext) bool { - if ctx == nil || s == nil || s.state == nil { +// For some reasons, the leader is unreachable by now, try followers instead. +// the state is changed in accessFollower.next when leader is unavailable. +func (s *replicaSelector) canFallback2Follower() bool { + if s == nil || s.state == nil { return false } state, ok := s.state.(*accessFollower) if !ok { return false } - if state.lastIdx != state.leaderIdx { + if !state.isStaleRead { return false } - s.state = &tryFollower{ - fallbackFromLeader: true, - leaderIdx: state.leaderIdx, - lastIdx: state.leaderIdx, - labels: state.option.labels, - } - return true + // can fallback to follower only when the leader is exhausted. + return state.lastIdx == state.leaderIdx && state.IsLeaderExhausted(s.replicas[state.leaderIdx]) } func (s *replicaSelector) invalidateRegion() { @@ -1680,6 +1702,7 @@ func (s *RegionRequestSender) onRegionError(bo *retry.Backoffer, ctx *RPCContext } // This peer is removed from the region. Invalidate the region since it's too stale. + // if the region error is from follower, can we mark the peer unavailable and reload region asynchronously? if regionErr.GetRegionNotFound() != nil { s.regionCache.InvalidateCachedRegion(ctx.Region) return false, nil @@ -1706,7 +1729,7 @@ func (s *RegionRequestSender) onRegionError(bo *retry.Backoffer, ctx *RPCContext logutil.BgLogger().Warn("tikv reports `ServerIsBusy` retry later", zap.String("reason", regionErr.GetServerIsBusy().GetReason()), zap.Stringer("ctx", ctx)) - if s.replicaSelector.fallback2Follower(ctx) { + if s.replicaSelector.canFallback2Follower() { // immediately retry on followers. return true, nil } diff --git a/internal/locate/region_request_state_test.go b/internal/locate/region_request_state_test.go new file mode 100644 index 000000000..2f34e41ae --- /dev/null +++ b/internal/locate/region_request_state_test.go @@ -0,0 +1,838 @@ +// Copyright 2023 TiKV Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package locate + +import ( + "context" + "fmt" + "strconv" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/pingcap/kvproto/pkg/errorpb" + "github.com/pingcap/kvproto/pkg/kvrpcpb" + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pkg/errors" + "github.com/stretchr/testify/require" + tikverr "github.com/tikv/client-go/v2/error" + "github.com/tikv/client-go/v2/internal/mockstore/mocktikv" + "github.com/tikv/client-go/v2/internal/retry" + "github.com/tikv/client-go/v2/kv" + "github.com/tikv/client-go/v2/metrics" + "github.com/tikv/client-go/v2/tikvrpc" +) + +type testRegionCacheStaleReadSuite struct { + *require.Assertions + cluster *mocktikv.Cluster + storeIDs []uint64 + peerIDs []uint64 + regionID uint64 + leaderPeer uint64 + store2zone map[uint64]string + cache *RegionCache + bo *retry.Backoffer + regionRequestSender *RegionRequestSender + mvccStore mocktikv.MVCCStore + injection testRegionCacheFSMSuiteInjection +} + +type testRegionCacheFSMSuiteInjection struct { + leaderRegionError func(*tikvrpc.Request, string) *errorpb.Error + followerRegionError func(*tikvrpc.Request, string) *errorpb.Error + unavailableStoreIDs map[uint64]struct{} + timeoutStoreIDs map[uint64]struct{} +} + +type SuccessReadType int + +const ( + ReadFail SuccessReadType = iota + SuccessLeaderRead + SuccessFollowerRead + SuccessStaleRead +) + +func (s *testRegionCacheStaleReadSuite) SetupTest() { + s.mvccStore = mocktikv.MustNewMVCCStore() + s.cluster = mocktikv.NewCluster(s.mvccStore) + s.storeIDs, s.peerIDs, s.regionID, s.leaderPeer, s.store2zone = mocktikv.BootstrapWithMultiZones(s.cluster, 3, 2) + pdCli := &CodecPDClient{mocktikv.NewPDClient(s.cluster)} + s.cache = NewRegionCache(pdCli) + s.bo = retry.NewNoopBackoff(context.Background()) + client := mocktikv.NewRPCClient(s.cluster, s.mvccStore, nil) + s.regionRequestSender = NewRegionRequestSender(s.cache, client) + s.setClient() + s.injection = testRegionCacheFSMSuiteInjection{ + unavailableStoreIDs: make(map[uint64]struct{}), + } +} + +func (s *testRegionCacheStaleReadSuite) TearDownTest() { + s.cache.testingKnobs.mockRequestLiveness.Store((*livenessFunc)(nil)) + s.cache.Close() + s.mvccStore.Close() +} + +func (s *testRegionCacheStaleReadSuite) getStore(leader bool) (uint64, *metapb.Store) { + var ( + zone string + peerID uint64 + storeID uint64 + ) + if leader { + zone = "z1" + } else { + zone = "z2" + } + region, _ := s.cluster.GetRegion(s.regionID) +FIND: + for _, peer := range region.Peers { + store := s.cluster.GetStore(peer.StoreId) + for _, label := range store.Labels { + if label.Key == "zone" && label.Value == zone { + peerID = peer.Id + storeID = peer.StoreId + break FIND + } + } + } + store := s.cluster.GetStore(storeID) + if store == nil { + return 0, nil + } + return peerID, store +} + +func (s *testRegionCacheStaleReadSuite) getLeader() (uint64, *metapb.Store) { + return s.getStore(true) +} + +func (s *testRegionCacheStaleReadSuite) getFollower() (uint64, *metapb.Store) { + return s.getStore(false) +} + +func (s *testRegionCacheStaleReadSuite) setClient() { + s.regionRequestSender.client = &fnClient{fn: func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (response *tikvrpc.Response, err error) { + var store *metapb.Store + find := false + for _, one := range s.cluster.GetAllStores() { + if one.Address == addr { + store = one + find = true + break + } + } + if !find { + return nil, errors.New("no available connections") + } + if _, unavailable := s.injection.unavailableStoreIDs[store.Id]; unavailable { + return nil, errors.New("no available connections") + } + if _, timeout := s.injection.timeoutStoreIDs[store.Id]; timeout { + return nil, errors.WithMessage(context.DeadlineExceeded, "wait recvLoop") + } + + zone := "" + for _, label := range store.Labels { + if label.Key == "zone" { + zone = label.Value + break + } + } + response = &tikvrpc.Response{} + region, _ := s.cluster.GetRegion(s.regionID) + peerExist := false + for _, peer := range region.Peers { + if req.Peer.Id == peer.Id { + if peer.StoreId != store.Id { + response.Resp = &kvrpcpb.GetResponse{RegionError: &errorpb.Error{ + RegionNotFound: &errorpb.RegionNotFound{RegionId: s.regionID}, + }} + return + } + peerExist = true + } + } + if !peerExist { + response.Resp = &kvrpcpb.GetResponse{RegionError: &errorpb.Error{ + RegionNotFound: &errorpb.RegionNotFound{RegionId: s.regionID}, + }} + return + } + + _, leader := s.getLeader() + s.NotNil(leader) + isLeader := addr == leader.Address + if isLeader { + // leader region error + if s.injection.leaderRegionError != nil { + if regionRrr := s.injection.leaderRegionError(req, zone); regionRrr != nil { + response.Resp = &kvrpcpb.GetResponse{RegionError: regionRrr} + return + } + } + } else { + // follower read leader + if !req.ReplicaRead && !req.StaleRead { + _, leaderPeer, _ := s.cluster.GetRegionByID(s.regionID) + response.Resp = &kvrpcpb.GetResponse{RegionError: &errorpb.Error{ + NotLeader: &errorpb.NotLeader{ + RegionId: req.RegionId, + Leader: leaderPeer, + }, + }} + return + } + // follower region error + if s.injection.followerRegionError != nil { + if regionRrr := s.injection.followerRegionError(req, zone); regionRrr != nil { + response.Resp = &kvrpcpb.GetResponse{RegionError: regionRrr} + return + } + } + } + // no error + var successReadType SuccessReadType + if req.StaleRead { + successReadType = SuccessStaleRead + } else if isLeader { + successReadType = SuccessLeaderRead + } else { + successReadType = SuccessFollowerRead + } + s.NotEmpty(zone) + respStr := fmt.Sprintf("%d-%s-%d", store.Id, zone, successReadType) + response.Resp = &kvrpcpb.GetResponse{Value: []byte(respStr)} + return + }} + + tf := func(store *Store, bo *retry.Backoffer) livenessState { + _, ok := s.injection.unavailableStoreIDs[store.storeID] + if ok { + return unreachable + } + return reachable + } + s.cache.testingKnobs.mockRequestLiveness.Store((*livenessFunc)(&tf)) +} + +func (s *testRegionCacheStaleReadSuite) extractResp(resp *tikvrpc.Response) (uint64, string, SuccessReadType) { + resps := strings.Split(string(resp.Resp.(*kvrpcpb.GetResponse).Value), "-") + s.Len(resps, 3) + storeID, err := strconv.Atoi(resps[0]) + s.Nil(err) + successReadType, err := strconv.Atoi(resps[2]) + return uint64(storeID), resps[1], SuccessReadType(successReadType) +} + +func (s *testRegionCacheStaleReadSuite) setUnavailableStore(id uint64) { + s.injection.unavailableStoreIDs[id] = struct{}{} +} + +func (s *testRegionCacheStaleReadSuite) setTimeout(id uint64) { + s.injection.timeoutStoreIDs[id] = struct{}{} +} + +func TestRegionCacheStaleRead(t *testing.T) { + originReloadRegionInterval := atomic.LoadInt64(&reloadRegionInterval) + originBoTiKVServerBusy := retry.BoTiKVServerBusy + defer func() { + reloadRegionInterval = originReloadRegionInterval + retry.BoTiKVServerBusy = originBoTiKVServerBusy + }() + atomic.StoreInt64(&reloadRegionInterval, int64(24*time.Hour)) // disable reload region + retry.BoTiKVServerBusy = retry.NewConfig("tikvServerBusy", &metrics.BackoffHistogramServerBusy, retry.NewBackoffFnCfg(2, 10, retry.EqualJitter), tikverr.ErrTiKVServerBusy) + regionCacheTestCases := []RegionCacheTestCase{ + { + do: followerDown, + leaderRegionValid: true, + leaderAsyncReload: Some(false), + leaderSuccessReplica: []string{"z1"}, + leaderSuccessReadType: SuccessStaleRead, + followerRegionValid: true, + followerAsyncReload: Some(false), + followerSuccessReplica: []string{"z1"}, + followerSuccessReadType: SuccessLeaderRead, + }, + { + do: followerDownAndUp, + leaderRegionValid: true, + leaderAsyncReload: None[bool](), + leaderSuccessReplica: []string{"z1"}, + leaderSuccessReadType: SuccessStaleRead, + followerRegionValid: true, + followerAsyncReload: Some(true), + followerSuccessReplica: []string{"z1"}, + // because follower's epoch is changed, leader will be selected. + followerSuccessReadType: SuccessStaleRead, + }, + { + do: followerMove, + recoverable: true, + leaderRegionValid: true, + leaderAsyncReload: Some(false), + leaderSuccessReplica: []string{"z1"}, + leaderSuccessReadType: SuccessStaleRead, + followerRegionValid: false, + followerAsyncReload: Some(false), + // may async reload region and access it from leader. + followerSuccessReplica: []string{}, + followerSuccessReadType: ReadFail, + }, + { + do: evictLeader, + leaderRegionValid: true, + leaderAsyncReload: Some(false), + // leader is evicted, but can still serve as follower. + leaderSuccessReplica: []string{"z1"}, + leaderSuccessReadType: SuccessStaleRead, + followerRegionValid: true, + followerAsyncReload: Some(false), + followerSuccessReplica: []string{"z2"}, + followerSuccessReadType: SuccessStaleRead, + }, + { + do: leaderMove, + leaderRegionValid: false, + leaderAsyncReload: Some(false), + leaderSuccessReplica: []string{}, + leaderSuccessReadType: ReadFail, + followerRegionValid: true, + followerAsyncReload: Some(false), + followerSuccessReplica: []string{"z2"}, + followerSuccessReadType: SuccessStaleRead, + }, + { + do: leaderDown, + leaderRegionValid: true, + leaderAsyncReload: Some(true), + leaderSuccessReplica: []string{"z2", "z3"}, + leaderSuccessReadType: SuccessFollowerRead, + followerRegionValid: true, + followerAsyncReload: Some(false), + followerSuccessReplica: []string{"z2"}, + followerSuccessReadType: SuccessStaleRead, + }, + { + do: leaderDownAndUp, + leaderRegionValid: true, + leaderAsyncReload: Some(true), + leaderSuccessReplica: []string{"z2", "z3"}, + leaderSuccessReadType: SuccessFollowerRead, + followerRegionValid: true, + followerAsyncReload: None[bool](), + followerSuccessReplica: []string{"z2"}, + followerSuccessReadType: SuccessStaleRead, + }, + { + do: leaderDownAndElect, + leaderRegionValid: true, + leaderAsyncReload: Some(true), + leaderSuccessReplica: []string{"z2", "z3"}, + leaderSuccessReadType: SuccessFollowerRead, + followerRegionValid: true, + followerAsyncReload: None[bool](), + followerSuccessReplica: []string{"z2"}, + followerSuccessReadType: SuccessStaleRead, + }, + { + do: leaderDataIsNotReady, + leaderRegionValid: true, + leaderAsyncReload: Some(false), + leaderSuccessReplica: []string{"z1"}, + leaderSuccessReadType: SuccessLeaderRead, + followerRegionValid: true, + followerAsyncReload: Some(false), + followerSuccessReplica: []string{"z2"}, + followerSuccessReadType: SuccessStaleRead, + }, + { + do: followerDataIsNotReady, + leaderRegionValid: true, + leaderAsyncReload: Some(false), + leaderSuccessReplica: []string{"z1"}, + leaderSuccessReadType: SuccessStaleRead, + followerRegionValid: true, + followerAsyncReload: Some(false), + followerSuccessReplica: []string{"z1"}, + followerSuccessReadType: SuccessLeaderRead, + }, + { + debug: true, + do: leaderServerIsBusy, + recoverable: true, + leaderRegionValid: true, + leaderAsyncReload: Some(false), + leaderSuccessReplica: []string{"z2", "z3"}, + leaderSuccessReadType: SuccessFollowerRead, + followerRegionValid: true, + followerAsyncReload: Some(false), + followerSuccessReplica: []string{"z2"}, + followerSuccessReadType: SuccessStaleRead, + }, + { + do: followerServerIsBusy, + recoverable: true, + leaderRegionValid: true, + leaderAsyncReload: Some(false), + leaderSuccessReplica: []string{"z1"}, + leaderSuccessReadType: SuccessStaleRead, + followerRegionValid: true, + followerAsyncReload: Some(false), + followerSuccessReplica: []string{"z1"}, + followerSuccessReadType: SuccessLeaderRead, + }, + { + do: leaderDataIsNotReady, + extra: []func(suite *testRegionCacheStaleReadSuite){followerServerIsBusy}, + recoverable: true, + leaderRegionValid: true, + leaderAsyncReload: Some(false), + leaderSuccessReplica: []string{"z1"}, + leaderSuccessReadType: SuccessLeaderRead, + followerRegionValid: true, + followerAsyncReload: Some(false), + followerSuccessReplica: []string{"z1"}, + followerSuccessReadType: SuccessLeaderRead, + }, + { + do: leaderDataIsNotReady, + extra: []func(suite *testRegionCacheStaleReadSuite){followerDataIsNotReady}, + recoverable: true, + leaderRegionValid: true, + leaderAsyncReload: Some(false), + leaderSuccessReplica: []string{"z1"}, + leaderSuccessReadType: SuccessLeaderRead, + followerRegionValid: true, + followerAsyncReload: Some(false), + followerSuccessReplica: []string{"z1"}, + followerSuccessReadType: SuccessLeaderRead, + }, + { + do: leaderDataIsNotReady, + extra: []func(suite *testRegionCacheStaleReadSuite){followerDown}, + recoverable: true, + leaderRegionValid: true, + leaderAsyncReload: Some(false), + leaderSuccessReplica: []string{"z1"}, + leaderSuccessReadType: SuccessLeaderRead, + followerRegionValid: true, + followerAsyncReload: Some(false), + followerSuccessReplica: []string{"z1"}, + followerSuccessReadType: SuccessLeaderRead, + }, + { + do: leaderServerIsBusy, + extra: []func(suite *testRegionCacheStaleReadSuite){followerServerIsBusy}, + recoverable: true, + leaderRegionValid: true, + leaderAsyncReload: Some(false), + leaderSuccessReplica: []string{"z3"}, + leaderSuccessReadType: SuccessFollowerRead, + followerRegionValid: true, + followerAsyncReload: Some(false), + followerSuccessReplica: []string{"z3"}, + followerSuccessReadType: SuccessFollowerRead, + }, + { + do: leaderServerIsBusy, + extra: []func(suite *testRegionCacheStaleReadSuite){followerDataIsNotReady}, + recoverable: true, + leaderRegionValid: true, + leaderAsyncReload: Some(false), + leaderSuccessReplica: []string{"z2", "z3"}, + leaderSuccessReadType: SuccessFollowerRead, + followerRegionValid: true, + followerAsyncReload: Some(false), + followerSuccessReplica: []string{"z2", "z3"}, + followerSuccessReadType: SuccessFollowerRead, + }, + { + do: leaderServerIsBusy, + extra: []func(suite *testRegionCacheStaleReadSuite){followerDown}, + recoverable: true, + leaderRegionValid: true, + leaderAsyncReload: Some(false), + leaderSuccessReplica: []string{"z3"}, + leaderSuccessReadType: SuccessFollowerRead, + followerRegionValid: true, + followerAsyncReload: Some(false), + followerSuccessReplica: []string{"z3"}, + followerSuccessReadType: SuccessFollowerRead, + }, + { + do: leaderDown, + extra: []func(suite *testRegionCacheStaleReadSuite){followerDataIsNotReady}, + recoverable: true, + leaderRegionValid: true, + leaderAsyncReload: Some(true), + leaderSuccessReplica: []string{"z2", "z3"}, + leaderSuccessReadType: SuccessFollowerRead, + followerRegionValid: true, + followerAsyncReload: Some(true), + followerSuccessReplica: []string{"z2", "z3"}, + followerSuccessReadType: SuccessFollowerRead, + }, + { + do: leaderDown, + extra: []func(suite *testRegionCacheStaleReadSuite){followerServerIsBusy}, + recoverable: true, + leaderRegionValid: true, + leaderAsyncReload: Some(true), + leaderSuccessReplica: []string{"z3"}, + leaderSuccessReadType: SuccessFollowerRead, + followerRegionValid: true, + followerAsyncReload: Some(true), + followerSuccessReplica: []string{"z3"}, + followerSuccessReadType: SuccessFollowerRead, + }, + { + do: leaderDown, + extra: []func(suite *testRegionCacheStaleReadSuite){followerDown}, + recoverable: true, + leaderRegionValid: true, + leaderAsyncReload: Some(true), + leaderSuccessReplica: []string{"z3"}, + leaderSuccessReadType: SuccessFollowerRead, + followerRegionValid: true, + followerAsyncReload: Some(true), + followerSuccessReplica: []string{"z3"}, + followerSuccessReadType: SuccessFollowerRead, + }, + } + tests := []func(*testRegionCacheStaleReadSuite, *RegionCacheTestCase){ + testStaleReadFollower, testStaleReadLeader, + } + for _, regionCacheTestCase := range regionCacheTestCases { + for _, test := range tests { + s := &testRegionCacheStaleReadSuite{ + Assertions: require.New(t), + } + s.SetupTest() + _, err := s.cache.LocateRegionByID(s.bo, s.regionID) + s.Nil(err) + regionCacheTestCase.do(s) + for _, extra := range regionCacheTestCase.extra { + extra(s) + } + test(s, ®ionCacheTestCase) + s.TearDownTest() + } + } +} + +func testStaleReadFollower(s *testRegionCacheStaleReadSuite, r *RegionCacheTestCase) { + testStaleRead(s, r, "z2") +} + +func testStaleReadLeader(s *testRegionCacheStaleReadSuite, r *RegionCacheTestCase) { + testStaleRead(s, r, "z1") +} + +func testStaleRead(s *testRegionCacheStaleReadSuite, r *RegionCacheTestCase, zone string) { + ctx, _ := context.WithTimeout(context.Background(), 10*time.Second) + leaderZone := zone == "z1" + var available bool + if leaderZone { + available = len(r.leaderSuccessReplica) > 0 + } else { + available = len(r.followerSuccessReplica) > 0 + } + + regionLoc, err := s.cache.LocateRegionByID(s.bo, s.regionID) + s.Nil(err) + s.NotNil(regionLoc) + + s.cache.mu.RLock() + region := s.cache.getRegionByIDFromCache(s.regionID) + s.cache.mu.RUnlock() + defer func() { + var ( + valid bool + asyncReload *bool + ) + if leaderZone { + valid = r.leaderRegionValid + asyncReload = r.leaderAsyncReload.Inner() + } else { + valid = r.followerRegionValid + asyncReload = r.followerAsyncReload.Inner() + } + s.Equal(valid, region.isValid()) + + if asyncReload == nil { + return + } + + s.cache.regionsNeedReload.Lock() + if *asyncReload { + s.Len(s.cache.regionsNeedReload.regions, 1) + s.Equal(s.cache.regionsNeedReload.regions[0], s.regionID) + } else { + s.Empty(s.cache.regionsNeedReload.regions) + } + s.cache.regionsNeedReload.Unlock() + }() + + bo := retry.NewBackoffer(ctx, -1) + req := tikvrpc.NewReplicaReadRequest(tikvrpc.CmdGet, &kvrpcpb.GetRequest{Key: []byte("key")}, kv.ReplicaReadMixed, nil) + req.EnableStaleRead() + ops := []StoreSelectorOption{WithMatchLabels([]*metapb.StoreLabel{{ + Key: "zone", + Value: zone, + }})} + + resp, _, err := s.regionRequestSender.SendReqCtx(bo, req, regionLoc.Region, time.Second, tikvrpc.TiKV, ops...) + if !available { + if err != nil { + return + } + regionErr, err := resp.GetRegionError() + s.Nil(err) + s.NotNil(regionErr) + return + } + + _, successZone, successReadType := s.extractResp(resp) + find := false + if leaderZone { + s.Equal(r.leaderSuccessReadType, successReadType) + for _, z := range r.leaderSuccessReplica { + if z == successZone { + find = true + break + } + } + } else { + s.Equal(r.followerSuccessReadType, successReadType) + for _, z := range r.followerSuccessReplica { + if z == successZone { + find = true + break + } + } + } + s.True(find) +} + +type Option[T interface{}] struct { + inner *T +} + +func Some[T interface{}](inner T) Option[T] { + return Option[T]{inner: &inner} +} + +func None[T interface{}]() Option[T] { + return Option[T]{inner: nil} +} + +func (o Option[T]) Inner() *T { + return o.inner +} + +type RegionCacheTestCase struct { + debug bool + do func(s *testRegionCacheStaleReadSuite) + extra []func(s *testRegionCacheStaleReadSuite) + recoverable bool + // local peer is leader + leaderRegionValid bool + leaderAsyncReload Option[bool] + leaderSuccessReplica []string + leaderSuccessReadType SuccessReadType + // local peer is follower + followerRegionValid bool + followerAsyncReload Option[bool] + followerSuccessReplica []string + followerSuccessReadType SuccessReadType +} + +func followerDown(s *testRegionCacheStaleReadSuite) { + _, follower := s.getFollower() + s.NotNil(follower) + s.setUnavailableStore(follower.Id) +} + +func followerDownAndUp(s *testRegionCacheStaleReadSuite) { + s.cache.mu.RLock() + cachedRegion := s.cache.getRegionByIDFromCache(s.regionID) + s.cache.mu.RUnlock() + _, follower := s.getFollower() + s.NotNil(cachedRegion) + s.NotNil(follower) + regionStore := cachedRegion.getStore() + for _, storeIdx := range regionStore.accessIndex[tiKVOnly] { + if regionStore.stores[storeIdx].storeID == follower.Id { + atomic.AddUint32(®ionStore.stores[storeIdx].epoch, 1) + } + } +} + +func followerMove(s *testRegionCacheStaleReadSuite) { + peerID, follower := s.getFollower() + zone := "" + for _, label := range follower.Labels { + if label.Key == "zone" { + zone = label.Value + break + } + } + s.NotEqual("", zone) + var target *metapb.Store +FIND: + for _, store := range s.cluster.GetAllStores() { + if store.Id == follower.Id { + continue + } + for _, label := range store.Labels { + if label.Key == "zone" && label.Value == zone { + target = store + break FIND + } + } + } + s.NotNil(target) + s.cluster.RemovePeer(s.regionID, peerID) + s.cluster.AddPeer(s.regionID, target.Id, peerID) +} + +func evictLeader(s *testRegionCacheStaleReadSuite) { + region, leader := s.cluster.GetRegion(s.regionID) + for _, peer := range region.Peers { + if peer.Id != leader { + s.cluster.ChangeLeader(s.regionID, peer.Id) + return + } + } + s.Fail("unreachable") +} + +func leaderMove(s *testRegionCacheStaleReadSuite) { + peerID, leader := s.getLeader() + zone := "" + for _, label := range leader.Labels { + if label.Key == "zone" { + zone = label.Value + break + } + } + s.NotEqual("", zone) + var target *metapb.Store +FIND: + for _, store := range s.cluster.GetAllStores() { + if store.Id == leader.Id { + continue + } + for _, label := range store.Labels { + if label.Key == "zone" && label.Value == zone { + target = store + break FIND + } + } + } + s.NotNil(target) + s.cluster.RemovePeer(s.regionID, peerID) + s.cluster.AddPeer(s.regionID, target.Id, peerID) + s.cluster.ChangeLeader(s.regionID, peerID) +} + +func leaderDown(s *testRegionCacheStaleReadSuite) { + _, leader := s.getLeader() + s.NotNil(leader) + s.setUnavailableStore(leader.Id) +} + +func leaderDownAndUp(s *testRegionCacheStaleReadSuite) { + s.cache.mu.RLock() + cachedRegion := s.cache.getRegionByIDFromCache(s.regionID) + s.cache.mu.RUnlock() + _, leader := s.getLeader() + s.NotNil(cachedRegion) + s.NotNil(leader) + regionStore := cachedRegion.getStore() + for _, storeIdx := range regionStore.accessIndex[tiKVOnly] { + if regionStore.stores[storeIdx].storeID == leader.Id { + atomic.AddUint32(®ionStore.stores[storeIdx].epoch, 1) + } + } +} +func leaderDownAndElect(s *testRegionCacheStaleReadSuite) { + _, leader := s.getLeader() + s.NotNil(leader) + leaderMove(s) + s.setUnavailableStore(leader.Id) +} + +func leaderDataIsNotReady(s *testRegionCacheStaleReadSuite) { + peerID, _ := s.getLeader() + s.injection.leaderRegionError = func(req *tikvrpc.Request, zone string) *errorpb.Error { + if !req.StaleRead || zone != "z1" { + return nil + } + return &errorpb.Error{ + DataIsNotReady: &errorpb.DataIsNotReady{ + RegionId: s.regionID, + PeerId: peerID, + SafeTs: 0, + }, + } + } +} + +func leaderServerIsBusy(s *testRegionCacheStaleReadSuite) { + s.injection.leaderRegionError = func(req *tikvrpc.Request, zone string) *errorpb.Error { + if zone != "z1" { + return nil + } + return &errorpb.Error{ + ServerIsBusy: &errorpb.ServerIsBusy{ + Reason: "test", + BackoffMs: 1, + }, + } + } +} + +func followerDataIsNotReady(s *testRegionCacheStaleReadSuite) { + s.injection.followerRegionError = func(req *tikvrpc.Request, zone string) *errorpb.Error { + if !req.StaleRead || zone != "z2" { + return nil + } + return &errorpb.Error{ + DataIsNotReady: &errorpb.DataIsNotReady{ + RegionId: s.regionID, + SafeTs: 0, + }, + } + } +} + +func followerServerIsBusy(s *testRegionCacheStaleReadSuite) { + s.injection.followerRegionError = func(req *tikvrpc.Request, zone string) *errorpb.Error { + if zone != "z2" { + return nil + } + return &errorpb.Error{ + ServerIsBusy: &errorpb.ServerIsBusy{ + Reason: "test", + BackoffMs: 1, + }, + } + } +} diff --git a/internal/mockstore/mocktikv/cluster.go b/internal/mockstore/mocktikv/cluster.go index e90f043dd..aba9a3c23 100644 --- a/internal/mockstore/mocktikv/cluster.go +++ b/internal/mockstore/mocktikv/cluster.go @@ -385,11 +385,11 @@ func (c *Cluster) AddPeer(regionID, storeID, peerID uint64) { // RemovePeer removes the Peer from the Region. Note that if the Peer is leader, // the Region will have no leader before calling ChangeLeader(). -func (c *Cluster) RemovePeer(regionID, storeID uint64) { +func (c *Cluster) RemovePeer(regionID, peerID uint64) { c.Lock() defer c.Unlock() - c.regions[regionID].removePeer(storeID) + c.regions[regionID].removePeer(peerID) } // ChangeLeader sets the Region's leader Peer. Caller should guarantee the Peer diff --git a/internal/mockstore/mocktikv/cluster_manipulate.go b/internal/mockstore/mocktikv/cluster_manipulate.go index ae02beeb5..e61707940 100644 --- a/internal/mockstore/mocktikv/cluster_manipulate.go +++ b/internal/mockstore/mocktikv/cluster_manipulate.go @@ -80,3 +80,29 @@ func BootstrapWithMultiRegions(cluster *Cluster, splitKeys ...[]byte) (storeID u } return } + +// BootstrapWithMultiZones initializes a Cluster with 1 Region and n Zones and m Stores in each Zone. +func BootstrapWithMultiZones(cluster *Cluster, n, m int) (storeIDs, peerIDs []uint64, regionID uint64, leaderPeer uint64, store2zone map[uint64]string) { + storeIDs = cluster.AllocIDs(n * m) + peerIDs = cluster.AllocIDs(n) + leaderPeer = peerIDs[0] + regionID = cluster.AllocID() + store2zone = make(map[uint64]string, n*m) + for id, storeID := range storeIDs { + zone := fmt.Sprintf("z%d", (id%n)+1) + store2zone[storeID] = zone + labels := []*metapb.StoreLabel{ + { + Key: "id", + Value: fmt.Sprintf("%v", storeID), + }, + { + Key: "zone", + Value: fmt.Sprintf(zone), + }, + } + cluster.AddStore(storeID, fmt.Sprintf("store%d", storeID), labels...) + } + cluster.Bootstrap(regionID, storeIDs[:n], peerIDs, leaderPeer) + return +}