From cd19cfadf46c1028c7beb50cb5803c34d8d2a775 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Fri, 20 Mar 2020 16:03:53 +0800 Subject: [PATCH 1/2] gc_worker: Skip TiFlash nodes when doing UnsafeDestroyRange and Green GC (#15505) --- store/tikv/gcworker/gc_worker.go | 55 +++++++++++++++++++++++++-- store/tikv/gcworker/gc_worker_test.go | 30 +++++++++++++++ 2 files changed, 81 insertions(+), 4 deletions(-) diff --git a/store/tikv/gcworker/gc_worker.go b/store/tikv/gcworker/gc_worker.go index 1f8c15e7537dc..a6fd34438972c 100644 --- a/store/tikv/gcworker/gc_worker.go +++ b/store/tikv/gcworker/gc_worker.go @@ -389,7 +389,7 @@ func (w *GCWorker) getGCConcurrency(ctx context.Context) (int, error) { return w.loadGCConcurrencyWithDefault() } - stores, err := w.getUpStores(ctx) + stores, err := w.getUpStoresForGC(ctx) concurrency := len(stores) if err != nil { logutil.Logger(ctx).Error("[gc worker] failed to get up stores to calculate concurrency. use config.", @@ -648,7 +648,7 @@ func (w *GCWorker) redoDeleteRanges(ctx context.Context, safePoint uint64, concu func (w *GCWorker) doUnsafeDestroyRangeRequest(ctx context.Context, startKey []byte, endKey []byte, concurrency int) error { // Get all stores every time deleting a region. So the store list is less probably to be stale. - stores, err := w.getUpStores(ctx) + stores, err := w.getUpStoresForGC(ctx) if err != nil { logutil.Logger(ctx).Error("[gc worker] delete ranges: got an error while trying to get store list from PD", zap.String("uuid", w.uuid), @@ -699,7 +699,47 @@ func (w *GCWorker) doUnsafeDestroyRangeRequest(ctx context.Context, startKey []b return errors.Trace(err) } -func (w *GCWorker) getUpStores(ctx context.Context) ([]*metapb.Store, error) { +const ( + engineLabelKey = "engine" + engineLabelTiFlash = "tiflash" + engineLabelTiKV = "tikv" +) + +// needsGCOperationForStore checks if the store-level requests related to GC needs to be sent to the store. The store-level +// requests includes UnsafeDestroyRange, PhysicalScanLock, etc. +func needsGCOperationForStore(store *metapb.Store) (bool, error) { + engineLabel := "" + + for _, label := range store.GetLabels() { + if label.GetKey() == engineLabelKey { + engineLabel = label.GetValue() + break + } + } + + switch engineLabel { + case engineLabelTiFlash: + // For a TiFlash node, it uses other approach to delete dropped tables, so it's safe to skip sending + // UnsafeDestroyRange requests; it has only learner peers and their data must exist in TiKV, so it's safe to + // skip physical resolve locks for it. + return false, nil + + case "": + // If no engine label is set, it should be a TiKV node. + fallthrough + case engineLabelTiKV: + return true, nil + + default: + return true, errors.Errorf("unsupported store engine \"%v\" with storeID %v, addr %v", + engineLabel, + store.GetId(), + store.GetAddress()) + } +} + +// getUpStoresForGC gets the list of stores that needs to be processed during GC. +func (w *GCWorker) getUpStoresForGC(ctx context.Context) ([]*metapb.Store, error) { stores, err := w.pdClient.GetAllStores(ctx) if err != nil { return nil, errors.Trace(err) @@ -707,7 +747,14 @@ func (w *GCWorker) getUpStores(ctx context.Context) ([]*metapb.Store, error) { upStores := make([]*metapb.Store, 0, len(stores)) for _, store := range stores { - if store.State == metapb.StoreState_Up { + if store.State != metapb.StoreState_Up { + continue + } + needsGCOp, err := needsGCOperationForStore(store) + if err != nil { + return nil, errors.Trace(err) + } + if needsGCOp { upStores = append(upStores, store) } } diff --git a/store/tikv/gcworker/gc_worker_test.go b/store/tikv/gcworker/gc_worker_test.go index ca08c15e7fcb9..9336d411dcb58 100644 --- a/store/tikv/gcworker/gc_worker_test.go +++ b/store/tikv/gcworker/gc_worker_test.go @@ -328,6 +328,36 @@ func (s *testGCWorkerSuite) TestCheckGCMode(c *C) { c.Assert(useDistributedGC, Equals, true) } +func (s *testGCWorkerSuite) TestNeedsGCOperationForStore(c *C) { + newStore := func(hasEngineLabel bool, engineLabel string) *metapb.Store { + store := &metapb.Store{} + if hasEngineLabel { + store.Labels = []*metapb.StoreLabel{{Key: engineLabelKey, Value: engineLabel}} + } + return store + } + + // TiKV needs to do the store-level GC operations. + res, err := needsGCOperationForStore(newStore(false, "")) + c.Assert(err, IsNil) + c.Assert(res, IsTrue) + res, err = needsGCOperationForStore(newStore(true, "")) + c.Assert(err, IsNil) + c.Assert(res, IsTrue) + res, err = needsGCOperationForStore(newStore(true, engineLabelTiKV)) + c.Assert(err, IsNil) + c.Assert(res, IsTrue) + + // TiFlash does not need these operations. + res, err = needsGCOperationForStore(newStore(true, engineLabelTiFlash)) + c.Assert(err, IsNil) + c.Assert(res, IsFalse) + + // Throw an error for unknown store types. + _, err = needsGCOperationForStore(newStore(true, "invalid")) + c.Assert(err, NotNil) +} + func (s *testGCWorkerSuite) TestResolveLockRangeInfine(c *C) { c.Assert(failpoint.Enable("github.com/pingcap/tidb/store/tikv/invalidCacheAndRetry", "return(true)"), IsNil) c.Assert(failpoint.Enable("github.com/pingcap/tidb/store/tikv/gcworker/setGcResolveMaxBackoff", "return(1)"), IsNil) From d0b96f38b82643ae88e5293c5351340d1476137e Mon Sep 17 00:00:00 2001 From: MyonKeminta Date: Wed, 8 Apr 2020 14:05:20 +0800 Subject: [PATCH 2/2] Fix missing import --- store/tikv/gcworker/gc_worker_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/store/tikv/gcworker/gc_worker_test.go b/store/tikv/gcworker/gc_worker_test.go index 9336d411dcb58..342694b1dadc8 100644 --- a/store/tikv/gcworker/gc_worker_test.go +++ b/store/tikv/gcworker/gc_worker_test.go @@ -23,6 +23,7 @@ import ( . "github.com/pingcap/check" "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/errorpb" + "github.com/pingcap/kvproto/pkg/metapb" pd "github.com/pingcap/pd/v3/client" "github.com/pingcap/tidb/config" "github.com/pingcap/tidb/domain"