From b8fde24d3a8b086a2f352fe7d303de8f760b4649 Mon Sep 17 00:00:00 2001 From: lucklove Date: Thu, 22 Oct 2020 12:13:43 +0800 Subject: [PATCH 1/8] More safe way to cleanup tombstone nodes --- components/cluster/command/display.go | 55 +---------- components/cluster/command/prune.go | 126 ++++++++++++++++++++++++++ components/cluster/command/root.go | 1 + pkg/cluster/operation/action.go | 23 +++-- pkg/cluster/operation/scale_in.go | 2 +- 5 files changed, 142 insertions(+), 65 deletions(-) create mode 100644 components/cluster/command/prune.go diff --git a/components/cluster/command/display.go b/components/cluster/command/display.go index d35ba5c9de..dc807c3cb4 100644 --- a/components/cluster/command/display.go +++ b/components/cluster/command/display.go @@ -23,11 +23,8 @@ import ( "github.com/fatih/color" perrs "github.com/pingcap/errors" "github.com/pingcap/tiup/pkg/cluster/api" - operator "github.com/pingcap/tiup/pkg/cluster/operation" "github.com/pingcap/tiup/pkg/cluster/spec" - "github.com/pingcap/tiup/pkg/cluster/task" "github.com/pingcap/tiup/pkg/crypto" - "github.com/pingcap/tiup/pkg/logger/log" "github.com/pingcap/tiup/pkg/meta" "github.com/spf13/cobra" ) @@ -70,12 +67,7 @@ func newDisplayCmd() *cobra.Command { return displayDashboardInfo(clusterName, tlsCfg) } - err = manager.Display(clusterName, gOpt) - if err != nil { - return perrs.AddStack(err) - } - - return destroyTombstoneIfNeed(clusterName, metadata, gOpt) + return manager.Display(clusterName, gOpt) }, } @@ -133,48 +125,3 @@ func displayDashboardInfo(clusterName string, tlsCfg *tls.Config) error { return nil } - -func destroyTombstoneIfNeed(clusterName string, metadata *spec.ClusterMeta, opt operator.Options) error { - topo := metadata.Topology - - if !operator.NeedCheckTomebsome(topo) { - return nil - } - - tlsCfg, err := topo.TLSConfig(tidbSpec.Path(clusterName, spec.TLSCertKeyDir)) - if err != nil { - return perrs.AddStack(err) - } - - ctx := task.NewContext() - err = ctx.SetSSHKeySet(spec.ClusterPath(clusterName, "ssh", "id_rsa"), - spec.ClusterPath(clusterName, "ssh", "id_rsa.pub")) - if err != nil { - return perrs.AddStack(err) - } - - err = ctx.SetClusterSSH(topo, metadata.User, gOpt.SSHTimeout, gOpt.SSHType, topo.BaseTopo().GlobalOptions.SSHType) - if err != nil { - return perrs.AddStack(err) - } - - nodes, err := operator.DestroyTombstone(ctx, topo, true /* returnNodesOnly */, opt, tlsCfg) - if err != nil { - return perrs.AddStack(err) - } - - if len(nodes) == 0 { - return nil - } - - log.Infof("Start destroy Tombstone nodes: %v ...", nodes) - - _, err = operator.DestroyTombstone(ctx, topo, false /* returnNodesOnly */, opt, tlsCfg) - if err != nil { - return perrs.AddStack(err) - } - - log.Infof("Destroy success") - - return spec.SaveClusterMeta(clusterName, metadata) -} diff --git a/components/cluster/command/prune.go b/components/cluster/command/prune.go new file mode 100644 index 0000000000..c501e75ce4 --- /dev/null +++ b/components/cluster/command/prune.go @@ -0,0 +1,126 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package command + +import ( + "fmt" + "strconv" + + "github.com/fatih/color" + perrs "github.com/pingcap/errors" + "github.com/pingcap/tiup/pkg/cliutil" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/cluster/task" + "github.com/pingcap/tiup/pkg/logger/log" + "github.com/spf13/cobra" +) + +func newPruneCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "prune ", + Short: "Destroy and remove instances that is in tombstone state", + RunE: func(cmd *cobra.Command, args []string) error { + if len(args) != 1 { + return cmd.Help() + } + + clusterName := args[0] + + metadata, err := spec.ClusterMetadata(clusterName) + if err != nil { + return err + } + + return destroyTombstoneIfNeed(clusterName, metadata, gOpt, skipConfirm) + }, + } + + return cmd +} + +func confirmTombstone(tombs []spec.InstanceSpec) error { + printTable := [][]string{} + printTable = append(printTable, []string{ + "ID", "Role", "Host", "Main Port", + }) + + for _, inst := range tombs { + host, _ := inst.SSH() + role := inst.Role() + port := inst.GetMainPort() + id := fmt.Sprintf("%s:%d", host, port) + + printTable = append(printTable, []string{ + id, role, host, strconv.Itoa(port), + }) + } + + fmt.Println(color.HiYellowString("These instances will be destroyed:")) + cliutil.PrintTable(printTable, true) + return cliutil.PromptForConfirmOrAbortError(color.HiYellowString("Do you confirm this action? [y/N]:")) +} + +func destroyTombstoneIfNeed(clusterName string, metadata *spec.ClusterMeta, opt operator.Options, skipConfirm bool) error { + topo := metadata.Topology + + tombs := operator.NeedCheckTomebsome(topo) + if len(tombs) == 0 { + fmt.Println("There is no instance in tombstone state") + return nil + } + + if !skipConfirm { + if err := confirmTombstone(tombs); err != nil { + return err + } + } + + tlsCfg, err := topo.TLSConfig(tidbSpec.Path(clusterName, spec.TLSCertKeyDir)) + if err != nil { + return perrs.AddStack(err) + } + + ctx := task.NewContext() + err = ctx.SetSSHKeySet(spec.ClusterPath(clusterName, "ssh", "id_rsa"), + spec.ClusterPath(clusterName, "ssh", "id_rsa.pub")) + if err != nil { + return perrs.AddStack(err) + } + + err = ctx.SetClusterSSH(topo, metadata.User, gOpt.SSHTimeout, gOpt.SSHType, topo.BaseTopo().GlobalOptions.SSHType) + if err != nil { + return perrs.AddStack(err) + } + + nodes, err := operator.DestroyTombstone(ctx, topo, true /* returnNodesOnly */, opt, tlsCfg) + if err != nil { + return perrs.AddStack(err) + } + + if len(nodes) == 0 { + return nil + } + + log.Infof("Start destroy Tombstone nodes: %v ...", nodes) + + _, err = operator.DestroyTombstone(ctx, topo, false /* returnNodesOnly */, opt, tlsCfg) + if err != nil { + return perrs.AddStack(err) + } + + log.Infof("Destroy success") + + return spec.SaveClusterMeta(clusterName, metadata) +} diff --git a/components/cluster/command/root.go b/components/cluster/command/root.go index 502bb51995..41a1377466 100644 --- a/components/cluster/command/root.go +++ b/components/cluster/command/root.go @@ -153,6 +153,7 @@ func init() { newUpgradeCmd(), newExecCmd(), newDisplayCmd(), + newPruneCmd(), newListCmd(), newAuditCmd(), newImportCmd(), diff --git a/pkg/cluster/operation/action.go b/pkg/cluster/operation/action.go index 4a9be8facf..77da667afd 100644 --- a/pkg/cluster/operation/action.go +++ b/pkg/cluster/operation/action.go @@ -136,28 +136,31 @@ func Stop( } // NeedCheckTomebsome return true if we need to check and destroy some node. -func NeedCheckTomebsome(spec *spec.Specification) bool { - for _, s := range spec.TiKVServers { +func NeedCheckTomebsome(topo *spec.Specification) []spec.InstanceSpec { + tombs := []spec.InstanceSpec{} + + for _, s := range topo.TiKVServers { if s.Offline { - return true + tombs = append(tombs, s) } } - for _, s := range spec.TiFlashServers { + for _, s := range topo.TiFlashServers { if s.Offline { - return true + tombs = append(tombs, s) } } - for _, s := range spec.PumpServers { + for _, s := range topo.PumpServers { if s.Offline { - return true + tombs = append(tombs, s) } } - for _, s := range spec.Drainers { + for _, s := range topo.Drainers { if s.Offline { - return true + tombs = append(tombs, s) } } - return false + + return tombs } // DestroyTombstone remove the tombstone node in spec and destroy them. diff --git a/pkg/cluster/operation/scale_in.go b/pkg/cluster/operation/scale_in.go index 0ddbacccd7..bce515dd38 100644 --- a/pkg/cluster/operation/scale_in.go +++ b/pkg/cluster/operation/scale_in.go @@ -237,7 +237,7 @@ func ScaleInCluster( return errors.Annotatef(err, "failed to destroy %s", component.Name()) } } else { - log.Warnf(color.YellowString("The component `%s` will be destroyed when display cluster info when it become tombstone, maybe exists in several minutes or hours", + log.Warnf(color.YellowString("The component `%s` will become tombstone, maybe exists in several minutes or hours, after that you can use the prune command to clean it", component.Name())) } } From 5cbf69d5b5cf540ae94d31fa5b0bbf36ed4261eb Mon Sep 17 00:00:00 2001 From: lucklove Date: Thu, 22 Oct 2020 13:06:31 +0800 Subject: [PATCH 2/8] Fix test --- tests/tiup-cluster/script/util.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/tiup-cluster/script/util.sh b/tests/tiup-cluster/script/util.sh index 470ad687c0..8766161686 100755 --- a/tests/tiup-cluster/script/util.sh +++ b/tests/tiup-cluster/script/util.sh @@ -49,5 +49,6 @@ function wait_instance_num_reach() { echo "fail to wait instance number reach $target_num, retry num: $i" tiup-cluster $client display $name + tiup-cluster $client prune $name --yes exit -1 } From e8ede969fe143342bbd1df505ea75fb38ca0d274 Mon Sep 17 00:00:00 2001 From: lucklove Date: Thu, 22 Oct 2020 15:57:07 +0800 Subject: [PATCH 3/8] Fix test Signed-off-by: lucklove --- components/cluster/command/prune.go | 40 ++++++----------------------- go.sum | 1 + pkg/cluster/manager.go | 8 +++++- pkg/cluster/operation/action.go | 14 +++++----- tests/tiup-cluster/script/util.sh | 2 +- 5 files changed, 23 insertions(+), 42 deletions(-) diff --git a/components/cluster/command/prune.go b/components/cluster/command/prune.go index c501e75ce4..e25e3932bf 100644 --- a/components/cluster/command/prune.go +++ b/components/cluster/command/prune.go @@ -15,7 +15,6 @@ package command import ( "fmt" - "strconv" "github.com/fatih/color" perrs "github.com/pingcap/errors" @@ -50,43 +49,13 @@ func newPruneCmd() *cobra.Command { return cmd } -func confirmTombstone(tombs []spec.InstanceSpec) error { - printTable := [][]string{} - printTable = append(printTable, []string{ - "ID", "Role", "Host", "Main Port", - }) - - for _, inst := range tombs { - host, _ := inst.SSH() - role := inst.Role() - port := inst.GetMainPort() - id := fmt.Sprintf("%s:%d", host, port) - - printTable = append(printTable, []string{ - id, role, host, strconv.Itoa(port), - }) - } - - fmt.Println(color.HiYellowString("These instances will be destroyed:")) - cliutil.PrintTable(printTable, true) - return cliutil.PromptForConfirmOrAbortError(color.HiYellowString("Do you confirm this action? [y/N]:")) -} - func destroyTombstoneIfNeed(clusterName string, metadata *spec.ClusterMeta, opt operator.Options, skipConfirm bool) error { topo := metadata.Topology - tombs := operator.NeedCheckTomebsome(topo) - if len(tombs) == 0 { - fmt.Println("There is no instance in tombstone state") + if !operator.NeedCheckTomebsome(topo) { return nil } - if !skipConfirm { - if err := confirmTombstone(tombs); err != nil { - return err - } - } - tlsCfg, err := topo.TLSConfig(tidbSpec.Path(clusterName, spec.TLSCertKeyDir)) if err != nil { return perrs.AddStack(err) @@ -113,6 +82,13 @@ func destroyTombstoneIfNeed(clusterName string, metadata *spec.ClusterMeta, opt return nil } + err = cliutil.PromptForConfirmOrAbortError( + color.HiYellowString(fmt.Sprintf("Will destroy these nodes: %v\nDo you confirm this action? [y/N]:", nodes)), + ) + if err != nil { + return err + } + log.Infof("Start destroy Tombstone nodes: %v ...", nodes) _, err = operator.DestroyTombstone(ctx, topo, false /* returnNodesOnly */, opt, tlsCfg) diff --git a/go.sum b/go.sum index 88a657275c..9764f649d4 100644 --- a/go.sum +++ b/go.sum @@ -639,6 +639,7 @@ github.com/pingcap/parser v0.0.0-20190506092653-e336082eb825/go.mod h1:1FNvfp9+J github.com/pingcap/parser v0.0.0-20200422082501-7329d80eaf2c/go.mod h1:9v0Edh8IbgjGYW2ArJr19E+bvL8zKahsFp+ixWeId+4= github.com/pingcap/pd v2.1.5+incompatible h1:vOLV2tSQdRjjmxaTXtJULoC94dYQOd+6fzn2yChODHc= github.com/pingcap/pd v2.1.5+incompatible/go.mod h1:nD3+EoYes4+aNNODO99ES59V83MZSI+dFbhyr667a0E= +github.com/pingcap/pd/v4 v4.0.0-rc.1.0.20200422143320-428acd53eba2 h1:JTzYYukREvxVSKW/ncrzNjFitd8snoQ/Xz32pw8i+s8= github.com/pingcap/pd/v4 v4.0.0-rc.1.0.20200422143320-428acd53eba2/go.mod h1:s+utZtXDznOiL24VK0qGmtoHjjXNsscJx3m1n8cC56s= github.com/pingcap/sysutil v0.0.0-20200206130906-2bfa6dc40bcd/go.mod h1:EB/852NMQ+aRKioCpToQ94Wl7fktV+FNnxf3CX/TTXI= github.com/pingcap/sysutil v0.0.0-20200408114249-ed3bd6f7fdb1/go.mod h1:EB/852NMQ+aRKioCpToQ94Wl7fktV+FNnxf3CX/TTXI= diff --git a/pkg/cluster/manager.go b/pkg/cluster/manager.go index b0bdc5cba2..338699e25e 100644 --- a/pkg/cluster/manager.go +++ b/pkg/cluster/manager.go @@ -599,7 +599,7 @@ func (m *Manager) Display(clusterName string, opt operator.Options) error { cliutil.PrintTable(clusterTable, true) fmt.Printf("Total nodes: %d\n", len(clusterTable)-1) - if _, ok := topo.(*spec.Specification); ok { + if t, ok := topo.(*spec.Specification); ok { // Check if TiKV's label set correctly pdClient := api.NewPDClient(pdList, 10*time.Second, tlsCfg) if lbs, err := pdClient.GetLocationLabels(); err != nil { @@ -607,6 +607,12 @@ func (m *Manager) Display(clusterName string, opt operator.Options) error { } else if err := spec.CheckTiKVLocationLabels(lbs, pdClient); err != nil { color.Yellow("\nWARN: there is something wrong with TiKV labels, which may cause data losing:\n%v", err) } + + // Check if there is some instance in tombstone state + nodes, _ := operator.DestroyTombstone(ctx, t, true /* returnNodesOnly */, opt, tlsCfg) + if len(nodes) != 0 { + color.Green("There is some nodes in state Tombstone, you can destroy them with the command `tiup cluster prune %s`", clusterName) + } } return nil diff --git a/pkg/cluster/operation/action.go b/pkg/cluster/operation/action.go index 77da667afd..58a521967b 100644 --- a/pkg/cluster/operation/action.go +++ b/pkg/cluster/operation/action.go @@ -136,31 +136,29 @@ func Stop( } // NeedCheckTomebsome return true if we need to check and destroy some node. -func NeedCheckTomebsome(topo *spec.Specification) []spec.InstanceSpec { - tombs := []spec.InstanceSpec{} - +func NeedCheckTomebsome(topo *spec.Specification) bool { for _, s := range topo.TiKVServers { if s.Offline { - tombs = append(tombs, s) + return true } } for _, s := range topo.TiFlashServers { if s.Offline { - tombs = append(tombs, s) + return true } } for _, s := range topo.PumpServers { if s.Offline { - tombs = append(tombs, s) + return true } } for _, s := range topo.Drainers { if s.Offline { - tombs = append(tombs, s) + return true } } - return tombs + return false } // DestroyTombstone remove the tombstone node in spec and destroy them. diff --git a/tests/tiup-cluster/script/util.sh b/tests/tiup-cluster/script/util.sh index 8766161686..2bb569d4c6 100755 --- a/tests/tiup-cluster/script/util.sh +++ b/tests/tiup-cluster/script/util.sh @@ -16,6 +16,7 @@ function instance_num() { client="--native-ssh" fi + tiup-cluster $client prune $name --yes count=$(tiup-cluster $client display $name | grep "Total nodes" | awk -F ' ' '{print $3}') echo $count @@ -49,6 +50,5 @@ function wait_instance_num_reach() { echo "fail to wait instance number reach $target_num, retry num: $i" tiup-cluster $client display $name - tiup-cluster $client prune $name --yes exit -1 } From d3cb350c6bb1f15b84900a0b2a5f721fb41eb8ff Mon Sep 17 00:00:00 2001 From: SIGSEGV Date: Thu, 22 Oct 2020 17:09:27 +0800 Subject: [PATCH 4/8] Update pkg/cluster/manager.go Co-authored-by: Lonng --- pkg/cluster/manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/cluster/manager.go b/pkg/cluster/manager.go index 338699e25e..9988f46c45 100644 --- a/pkg/cluster/manager.go +++ b/pkg/cluster/manager.go @@ -611,7 +611,7 @@ func (m *Manager) Display(clusterName string, opt operator.Options) error { // Check if there is some instance in tombstone state nodes, _ := operator.DestroyTombstone(ctx, t, true /* returnNodesOnly */, opt, tlsCfg) if len(nodes) != 0 { - color.Green("There is some nodes in state Tombstone, you can destroy them with the command `tiup cluster prune %s`", clusterName) + color.Green("There are some nodes in state: `Tombstone`\n\tNodes: %+v\n\tYou can destroy them with the command: `tiup cluster prune %s`", nodes, clusterName) } } From a953b6721ea8092b6c5f24d2ea1957c050243825 Mon Sep 17 00:00:00 2001 From: lucklove Date: Thu, 22 Oct 2020 17:13:14 +0800 Subject: [PATCH 5/8] Add dm prune Signed-off-by: lucklove --- components/dm/command/display.go | 84 +--------------------- components/dm/command/prune.go | 113 ++++++++++++++++++++++++++++++ components/dm/command/root.go | 1 + tests/tiup-cluster/script/util.sh | 2 +- 4 files changed, 116 insertions(+), 84 deletions(-) create mode 100644 components/dm/command/prune.go diff --git a/components/dm/command/display.go b/components/dm/command/display.go index 812fb8314a..b0a183363c 100644 --- a/components/dm/command/display.go +++ b/components/dm/command/display.go @@ -14,15 +14,7 @@ package command import ( - "sync" - "time" - - perrs "github.com/pingcap/errors" - "github.com/pingcap/tiup/components/dm/spec" - "github.com/pingcap/tiup/pkg/cluster/api" - operator "github.com/pingcap/tiup/pkg/cluster/operation" "github.com/spf13/cobra" - "go.uber.org/zap" ) func newDisplayCmd() *cobra.Command { @@ -39,18 +31,7 @@ func newDisplayCmd() *cobra.Command { clusterName = args[0] - err := manager.Display(clusterName, gOpt) - if err != nil { - return perrs.AddStack(err) - } - - metadata := new(spec.Metadata) - err = dmspec.Metadata(clusterName, metadata) - if err != nil { - return perrs.AddStack(err) - } - - return clearOutDatedEtcdInfo(clusterName, metadata, gOpt) + return manager.Display(clusterName, gOpt) }, } @@ -58,66 +39,3 @@ func newDisplayCmd() *cobra.Command { cmd.Flags().StringSliceVarP(&gOpt.Nodes, "node", "N", nil, "Only display specified nodes") return cmd } - -func clearOutDatedEtcdInfo(clusterName string, metadata *spec.Metadata, opt operator.Options) error { - topo := metadata.Topology - - existedMasters := make(map[string]struct{}) - existedWorkers := make(map[string]struct{}) - mastersToDelete := make([]string, 0) - workersToDelete := make([]string, 0) - - for _, masterSpec := range topo.Masters { - existedMasters[masterSpec.Name] = struct{}{} - } - for _, workerSpec := range topo.Workers { - existedWorkers[workerSpec.Name] = struct{}{} - } - - dmMasterClient := api.NewDMMasterClient(topo.GetMasterList(), 10*time.Second, nil) - registeredMasters, registeredWorkers, err := dmMasterClient.GetRegisteredMembers() - if err != nil { - return err - } - - for _, master := range registeredMasters { - if _, ok := existedMasters[master]; !ok { - mastersToDelete = append(mastersToDelete, master) - } - } - for _, worker := range registeredWorkers { - if _, ok := existedWorkers[worker]; !ok { - workersToDelete = append(workersToDelete, worker) - } - } - - zap.L().Info("Outdated components needed to clear etcd info", zap.Strings("masters", mastersToDelete), zap.Strings("workers", workersToDelete)) - - errCh := make(chan error, len(existedMasters)+len(existedWorkers)) - var wg sync.WaitGroup - - for _, master := range mastersToDelete { - master := master - wg.Add(1) - go func() { - errCh <- dmMasterClient.OfflineMaster(master, nil) - wg.Done() - }() - } - for _, worker := range workersToDelete { - worker := worker - wg.Add(1) - go func() { - errCh <- dmMasterClient.OfflineWorker(worker, nil) - wg.Done() - }() - } - - wg.Wait() - if len(errCh) == 0 { - return nil - } - - // return any one error - return <-errCh -} diff --git a/components/dm/command/prune.go b/components/dm/command/prune.go new file mode 100644 index 0000000000..fa01693e64 --- /dev/null +++ b/components/dm/command/prune.go @@ -0,0 +1,113 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package command + +import ( + "sync" + "time" + + "github.com/pingcap/tiup/components/dm/spec" + "github.com/pingcap/tiup/pkg/cluster/api" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/spf13/cobra" + "go.uber.org/zap" +) + +func newPruneCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "prune ", + Short: "Clear etcd info ", + RunE: func(cmd *cobra.Command, args []string) error { + if len(args) != 1 { + return cmd.Help() + } + + clusterName := args[0] + + metadata := new(spec.Metadata) + err := dmspec.Metadata(clusterName, metadata) + if err != nil { + return err + } + + return clearOutDatedEtcdInfo(clusterName, metadata, gOpt) + }, + } + + return cmd +} + +func clearOutDatedEtcdInfo(clusterName string, metadata *spec.Metadata, opt operator.Options) error { + topo := metadata.Topology + + existedMasters := make(map[string]struct{}) + existedWorkers := make(map[string]struct{}) + mastersToDelete := make([]string, 0) + workersToDelete := make([]string, 0) + + for _, masterSpec := range topo.Masters { + existedMasters[masterSpec.Name] = struct{}{} + } + for _, workerSpec := range topo.Workers { + existedWorkers[workerSpec.Name] = struct{}{} + } + + dmMasterClient := api.NewDMMasterClient(topo.GetMasterList(), 10*time.Second, nil) + registeredMasters, registeredWorkers, err := dmMasterClient.GetRegisteredMembers() + if err != nil { + return err + } + + for _, master := range registeredMasters { + if _, ok := existedMasters[master]; !ok { + mastersToDelete = append(mastersToDelete, master) + } + } + for _, worker := range registeredWorkers { + if _, ok := existedWorkers[worker]; !ok { + workersToDelete = append(workersToDelete, worker) + } + } + + panic(len(registeredMasters) + len(registeredWorkers)) + zap.L().Info("Outdated components needed to clear etcd info", zap.Strings("masters", mastersToDelete), zap.Strings("workers", workersToDelete)) + + errCh := make(chan error, len(existedMasters)+len(existedWorkers)) + var wg sync.WaitGroup + + for _, master := range mastersToDelete { + master := master + wg.Add(1) + go func() { + errCh <- dmMasterClient.OfflineMaster(master, nil) + wg.Done() + }() + } + for _, worker := range workersToDelete { + worker := worker + wg.Add(1) + go func() { + errCh <- dmMasterClient.OfflineWorker(worker, nil) + wg.Done() + }() + } + + wg.Wait() + if len(errCh) == 0 { + return nil + } + + // return any one error + return <-errCh +} diff --git a/components/dm/command/root.go b/components/dm/command/root.go index 2d58eb131a..e994ff432a 100644 --- a/components/dm/command/root.go +++ b/components/dm/command/root.go @@ -124,6 +124,7 @@ func init() { newExecCmd(), newEditConfigCmd(), newDisplayCmd(), + newPruneCmd(), newReloadCmd(), newUpgradeCmd(), newPatchCmd(), diff --git a/tests/tiup-cluster/script/util.sh b/tests/tiup-cluster/script/util.sh index 2bb569d4c6..7f4d0ffc7b 100755 --- a/tests/tiup-cluster/script/util.sh +++ b/tests/tiup-cluster/script/util.sh @@ -48,7 +48,7 @@ function wait_instance_num_reach() { sleep 1 done - echo "fail to wait instance number reach $target_num, retry num: $i" + echo "fail to wait instance number reach $target_num, count $count, retry num: $i" tiup-cluster $client display $name exit -1 } From 7c9533ab44100b6ea083e7f3714fce0ee575d48d Mon Sep 17 00:00:00 2001 From: lucklove Date: Thu, 22 Oct 2020 18:21:06 +0800 Subject: [PATCH 6/8] Add test for dm --- tests/tiup-dm/script/util.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/tiup-dm/script/util.sh b/tests/tiup-dm/script/util.sh index 5bdfda3dba..5e6aeb863a 100755 --- a/tests/tiup-dm/script/util.sh +++ b/tests/tiup-dm/script/util.sh @@ -9,6 +9,7 @@ set -eu # coverage: 12.7% of statements in github.com/pingcap/tiup/components/dm/... function instance_num() { name=$1 + tiup-dm prune $name --yes count=$(tiup-dm display $name | grep "Total nodes" | awk -F ' ' '{print $3}') echo $count From 0b3efea517b6eaa54621ad5dad6068becaff38d1 Mon Sep 17 00:00:00 2001 From: lucklove Date: Thu, 22 Oct 2020 19:58:41 +0800 Subject: [PATCH 7/8] Skip confirm Signed-off-by: lucklove --- components/cluster/command/prune.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/components/cluster/command/prune.go b/components/cluster/command/prune.go index e25e3932bf..c8854fd388 100644 --- a/components/cluster/command/prune.go +++ b/components/cluster/command/prune.go @@ -82,11 +82,13 @@ func destroyTombstoneIfNeed(clusterName string, metadata *spec.ClusterMeta, opt return nil } - err = cliutil.PromptForConfirmOrAbortError( - color.HiYellowString(fmt.Sprintf("Will destroy these nodes: %v\nDo you confirm this action? [y/N]:", nodes)), - ) - if err != nil { - return err + if !skipConfirm { + err = cliutil.PromptForConfirmOrAbortError( + color.HiYellowString(fmt.Sprintf("Will destroy these nodes: %v\nDo you confirm this action? [y/N]:", nodes)), + ) + if err != nil { + return err + } } log.Infof("Start destroy Tombstone nodes: %v ...", nodes) From b225d2ab76e77f52349a4deda27638c7b0f6f09b Mon Sep 17 00:00:00 2001 From: lucklove Date: Thu, 22 Oct 2020 21:02:15 +0800 Subject: [PATCH 8/8] Adjust script Signed-off-by: lucklove --- tests/tiup-cluster/script/util.sh | 2 +- tests/tiup-dm/script/util.sh | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/tiup-cluster/script/util.sh b/tests/tiup-cluster/script/util.sh index 7f4d0ffc7b..32013b2077 100755 --- a/tests/tiup-cluster/script/util.sh +++ b/tests/tiup-cluster/script/util.sh @@ -16,7 +16,6 @@ function instance_num() { client="--native-ssh" fi - tiup-cluster $client prune $name --yes count=$(tiup-cluster $client display $name | grep "Total nodes" | awk -F ' ' '{print $3}') echo $count @@ -37,6 +36,7 @@ function wait_instance_num_reach() { for ((i=0;i<120;i++)) do + tiup-cluster $client prune $name --yes count=$(instance_num $name $native_ssh) if [ "$count" == "$target_num" ]; then echo "instance number reach $target_num" diff --git a/tests/tiup-dm/script/util.sh b/tests/tiup-dm/script/util.sh index 5e6aeb863a..9ddf37afa3 100755 --- a/tests/tiup-dm/script/util.sh +++ b/tests/tiup-dm/script/util.sh @@ -9,7 +9,7 @@ set -eu # coverage: 12.7% of statements in github.com/pingcap/tiup/components/dm/... function instance_num() { name=$1 - tiup-dm prune $name --yes + count=$(tiup-dm display $name | grep "Total nodes" | awk -F ' ' '{print $3}') echo $count @@ -24,6 +24,7 @@ function wait_instance_num_reach() { for ((i=0;i<120;i++)) do + tiup-dm prune $name --yes count=$(instance_num $name) if [ "$count" == "$target_num" ]; then echo "instance number reach $target_num" @@ -35,7 +36,7 @@ function wait_instance_num_reach() { sleep 1 done - echo "fail to wait instance number reach $target_num, retry num: $i" + echo "fail to wait instance number reach $target_num, count $count, retry num: $i" tiup-dm display $name exit -1 }