diff --git a/components/cluster/command/display.go b/components/cluster/command/display.go index d35ba5c9de..dc807c3cb4 100644 --- a/components/cluster/command/display.go +++ b/components/cluster/command/display.go @@ -23,11 +23,8 @@ import ( "github.com/fatih/color" perrs "github.com/pingcap/errors" "github.com/pingcap/tiup/pkg/cluster/api" - operator "github.com/pingcap/tiup/pkg/cluster/operation" "github.com/pingcap/tiup/pkg/cluster/spec" - "github.com/pingcap/tiup/pkg/cluster/task" "github.com/pingcap/tiup/pkg/crypto" - "github.com/pingcap/tiup/pkg/logger/log" "github.com/pingcap/tiup/pkg/meta" "github.com/spf13/cobra" ) @@ -70,12 +67,7 @@ func newDisplayCmd() *cobra.Command { return displayDashboardInfo(clusterName, tlsCfg) } - err = manager.Display(clusterName, gOpt) - if err != nil { - return perrs.AddStack(err) - } - - return destroyTombstoneIfNeed(clusterName, metadata, gOpt) + return manager.Display(clusterName, gOpt) }, } @@ -133,48 +125,3 @@ func displayDashboardInfo(clusterName string, tlsCfg *tls.Config) error { return nil } - -func destroyTombstoneIfNeed(clusterName string, metadata *spec.ClusterMeta, opt operator.Options) error { - topo := metadata.Topology - - if !operator.NeedCheckTomebsome(topo) { - return nil - } - - tlsCfg, err := topo.TLSConfig(tidbSpec.Path(clusterName, spec.TLSCertKeyDir)) - if err != nil { - return perrs.AddStack(err) - } - - ctx := task.NewContext() - err = ctx.SetSSHKeySet(spec.ClusterPath(clusterName, "ssh", "id_rsa"), - spec.ClusterPath(clusterName, "ssh", "id_rsa.pub")) - if err != nil { - return perrs.AddStack(err) - } - - err = ctx.SetClusterSSH(topo, metadata.User, gOpt.SSHTimeout, gOpt.SSHType, topo.BaseTopo().GlobalOptions.SSHType) - if err != nil { - return perrs.AddStack(err) - } - - nodes, err := operator.DestroyTombstone(ctx, topo, true /* returnNodesOnly */, opt, tlsCfg) - if err != nil { - return perrs.AddStack(err) - } - - if len(nodes) == 0 { - return nil - } - - log.Infof("Start destroy Tombstone nodes: %v ...", nodes) - - _, err = operator.DestroyTombstone(ctx, topo, false /* returnNodesOnly */, opt, tlsCfg) - if err != nil { - return perrs.AddStack(err) - } - - log.Infof("Destroy success") - - return spec.SaveClusterMeta(clusterName, metadata) -} diff --git a/components/cluster/command/prune.go b/components/cluster/command/prune.go new file mode 100644 index 0000000000..c8854fd388 --- /dev/null +++ b/components/cluster/command/prune.go @@ -0,0 +1,104 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package command + +import ( + "fmt" + + "github.com/fatih/color" + perrs "github.com/pingcap/errors" + "github.com/pingcap/tiup/pkg/cliutil" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/cluster/task" + "github.com/pingcap/tiup/pkg/logger/log" + "github.com/spf13/cobra" +) + +func newPruneCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "prune ", + Short: "Destroy and remove instances that is in tombstone state", + RunE: func(cmd *cobra.Command, args []string) error { + if len(args) != 1 { + return cmd.Help() + } + + clusterName := args[0] + + metadata, err := spec.ClusterMetadata(clusterName) + if err != nil { + return err + } + + return destroyTombstoneIfNeed(clusterName, metadata, gOpt, skipConfirm) + }, + } + + return cmd +} + +func destroyTombstoneIfNeed(clusterName string, metadata *spec.ClusterMeta, opt operator.Options, skipConfirm bool) error { + topo := metadata.Topology + + if !operator.NeedCheckTomebsome(topo) { + return nil + } + + tlsCfg, err := topo.TLSConfig(tidbSpec.Path(clusterName, spec.TLSCertKeyDir)) + if err != nil { + return perrs.AddStack(err) + } + + ctx := task.NewContext() + err = ctx.SetSSHKeySet(spec.ClusterPath(clusterName, "ssh", "id_rsa"), + spec.ClusterPath(clusterName, "ssh", "id_rsa.pub")) + if err != nil { + return perrs.AddStack(err) + } + + err = ctx.SetClusterSSH(topo, metadata.User, gOpt.SSHTimeout, gOpt.SSHType, topo.BaseTopo().GlobalOptions.SSHType) + if err != nil { + return perrs.AddStack(err) + } + + nodes, err := operator.DestroyTombstone(ctx, topo, true /* returnNodesOnly */, opt, tlsCfg) + if err != nil { + return perrs.AddStack(err) + } + + if len(nodes) == 0 { + return nil + } + + if !skipConfirm { + err = cliutil.PromptForConfirmOrAbortError( + color.HiYellowString(fmt.Sprintf("Will destroy these nodes: %v\nDo you confirm this action? [y/N]:", nodes)), + ) + if err != nil { + return err + } + } + + log.Infof("Start destroy Tombstone nodes: %v ...", nodes) + + _, err = operator.DestroyTombstone(ctx, topo, false /* returnNodesOnly */, opt, tlsCfg) + if err != nil { + return perrs.AddStack(err) + } + + log.Infof("Destroy success") + + return spec.SaveClusterMeta(clusterName, metadata) +} diff --git a/components/cluster/command/root.go b/components/cluster/command/root.go index 502bb51995..41a1377466 100644 --- a/components/cluster/command/root.go +++ b/components/cluster/command/root.go @@ -153,6 +153,7 @@ func init() { newUpgradeCmd(), newExecCmd(), newDisplayCmd(), + newPruneCmd(), newListCmd(), newAuditCmd(), newImportCmd(), diff --git a/components/dm/command/display.go b/components/dm/command/display.go index 812fb8314a..b0a183363c 100644 --- a/components/dm/command/display.go +++ b/components/dm/command/display.go @@ -14,15 +14,7 @@ package command import ( - "sync" - "time" - - perrs "github.com/pingcap/errors" - "github.com/pingcap/tiup/components/dm/spec" - "github.com/pingcap/tiup/pkg/cluster/api" - operator "github.com/pingcap/tiup/pkg/cluster/operation" "github.com/spf13/cobra" - "go.uber.org/zap" ) func newDisplayCmd() *cobra.Command { @@ -39,18 +31,7 @@ func newDisplayCmd() *cobra.Command { clusterName = args[0] - err := manager.Display(clusterName, gOpt) - if err != nil { - return perrs.AddStack(err) - } - - metadata := new(spec.Metadata) - err = dmspec.Metadata(clusterName, metadata) - if err != nil { - return perrs.AddStack(err) - } - - return clearOutDatedEtcdInfo(clusterName, metadata, gOpt) + return manager.Display(clusterName, gOpt) }, } @@ -58,66 +39,3 @@ func newDisplayCmd() *cobra.Command { cmd.Flags().StringSliceVarP(&gOpt.Nodes, "node", "N", nil, "Only display specified nodes") return cmd } - -func clearOutDatedEtcdInfo(clusterName string, metadata *spec.Metadata, opt operator.Options) error { - topo := metadata.Topology - - existedMasters := make(map[string]struct{}) - existedWorkers := make(map[string]struct{}) - mastersToDelete := make([]string, 0) - workersToDelete := make([]string, 0) - - for _, masterSpec := range topo.Masters { - existedMasters[masterSpec.Name] = struct{}{} - } - for _, workerSpec := range topo.Workers { - existedWorkers[workerSpec.Name] = struct{}{} - } - - dmMasterClient := api.NewDMMasterClient(topo.GetMasterList(), 10*time.Second, nil) - registeredMasters, registeredWorkers, err := dmMasterClient.GetRegisteredMembers() - if err != nil { - return err - } - - for _, master := range registeredMasters { - if _, ok := existedMasters[master]; !ok { - mastersToDelete = append(mastersToDelete, master) - } - } - for _, worker := range registeredWorkers { - if _, ok := existedWorkers[worker]; !ok { - workersToDelete = append(workersToDelete, worker) - } - } - - zap.L().Info("Outdated components needed to clear etcd info", zap.Strings("masters", mastersToDelete), zap.Strings("workers", workersToDelete)) - - errCh := make(chan error, len(existedMasters)+len(existedWorkers)) - var wg sync.WaitGroup - - for _, master := range mastersToDelete { - master := master - wg.Add(1) - go func() { - errCh <- dmMasterClient.OfflineMaster(master, nil) - wg.Done() - }() - } - for _, worker := range workersToDelete { - worker := worker - wg.Add(1) - go func() { - errCh <- dmMasterClient.OfflineWorker(worker, nil) - wg.Done() - }() - } - - wg.Wait() - if len(errCh) == 0 { - return nil - } - - // return any one error - return <-errCh -} diff --git a/components/dm/command/prune.go b/components/dm/command/prune.go new file mode 100644 index 0000000000..96db6e8762 --- /dev/null +++ b/components/dm/command/prune.go @@ -0,0 +1,112 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package command + +import ( + "sync" + "time" + + "github.com/pingcap/tiup/components/dm/spec" + "github.com/pingcap/tiup/pkg/cluster/api" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/spf13/cobra" + "go.uber.org/zap" +) + +func newPruneCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "prune ", + Short: "Clear etcd info ", + RunE: func(cmd *cobra.Command, args []string) error { + if len(args) != 1 { + return cmd.Help() + } + + clusterName := args[0] + + metadata := new(spec.Metadata) + err := dmspec.Metadata(clusterName, metadata) + if err != nil { + return err + } + + return clearOutDatedEtcdInfo(clusterName, metadata, gOpt) + }, + } + + return cmd +} + +func clearOutDatedEtcdInfo(clusterName string, metadata *spec.Metadata, opt operator.Options) error { + topo := metadata.Topology + + existedMasters := make(map[string]struct{}) + existedWorkers := make(map[string]struct{}) + mastersToDelete := make([]string, 0) + workersToDelete := make([]string, 0) + + for _, masterSpec := range topo.Masters { + existedMasters[masterSpec.Name] = struct{}{} + } + for _, workerSpec := range topo.Workers { + existedWorkers[workerSpec.Name] = struct{}{} + } + + dmMasterClient := api.NewDMMasterClient(topo.GetMasterList(), 10*time.Second, nil) + registeredMasters, registeredWorkers, err := dmMasterClient.GetRegisteredMembers() + if err != nil { + return err + } + + for _, master := range registeredMasters { + if _, ok := existedMasters[master]; !ok { + mastersToDelete = append(mastersToDelete, master) + } + } + for _, worker := range registeredWorkers { + if _, ok := existedWorkers[worker]; !ok { + workersToDelete = append(workersToDelete, worker) + } + } + + zap.L().Info("Outdated components needed to clear etcd info", zap.Strings("masters", mastersToDelete), zap.Strings("workers", workersToDelete)) + + errCh := make(chan error, len(existedMasters)+len(existedWorkers)) + var wg sync.WaitGroup + + for _, master := range mastersToDelete { + master := master + wg.Add(1) + go func() { + errCh <- dmMasterClient.OfflineMaster(master, nil) + wg.Done() + }() + } + for _, worker := range workersToDelete { + worker := worker + wg.Add(1) + go func() { + errCh <- dmMasterClient.OfflineWorker(worker, nil) + wg.Done() + }() + } + + wg.Wait() + if len(errCh) == 0 { + return nil + } + + // return any one error + return <-errCh +} diff --git a/components/dm/command/root.go b/components/dm/command/root.go index 2d58eb131a..e994ff432a 100644 --- a/components/dm/command/root.go +++ b/components/dm/command/root.go @@ -124,6 +124,7 @@ func init() { newExecCmd(), newEditConfigCmd(), newDisplayCmd(), + newPruneCmd(), newReloadCmd(), newUpgradeCmd(), newPatchCmd(), diff --git a/go.sum b/go.sum index 88a657275c..9764f649d4 100644 --- a/go.sum +++ b/go.sum @@ -639,6 +639,7 @@ github.com/pingcap/parser v0.0.0-20190506092653-e336082eb825/go.mod h1:1FNvfp9+J github.com/pingcap/parser v0.0.0-20200422082501-7329d80eaf2c/go.mod h1:9v0Edh8IbgjGYW2ArJr19E+bvL8zKahsFp+ixWeId+4= github.com/pingcap/pd v2.1.5+incompatible h1:vOLV2tSQdRjjmxaTXtJULoC94dYQOd+6fzn2yChODHc= github.com/pingcap/pd v2.1.5+incompatible/go.mod h1:nD3+EoYes4+aNNODO99ES59V83MZSI+dFbhyr667a0E= +github.com/pingcap/pd/v4 v4.0.0-rc.1.0.20200422143320-428acd53eba2 h1:JTzYYukREvxVSKW/ncrzNjFitd8snoQ/Xz32pw8i+s8= github.com/pingcap/pd/v4 v4.0.0-rc.1.0.20200422143320-428acd53eba2/go.mod h1:s+utZtXDznOiL24VK0qGmtoHjjXNsscJx3m1n8cC56s= github.com/pingcap/sysutil v0.0.0-20200206130906-2bfa6dc40bcd/go.mod h1:EB/852NMQ+aRKioCpToQ94Wl7fktV+FNnxf3CX/TTXI= github.com/pingcap/sysutil v0.0.0-20200408114249-ed3bd6f7fdb1/go.mod h1:EB/852NMQ+aRKioCpToQ94Wl7fktV+FNnxf3CX/TTXI= diff --git a/pkg/cluster/manager.go b/pkg/cluster/manager.go index b0bdc5cba2..9988f46c45 100644 --- a/pkg/cluster/manager.go +++ b/pkg/cluster/manager.go @@ -599,7 +599,7 @@ func (m *Manager) Display(clusterName string, opt operator.Options) error { cliutil.PrintTable(clusterTable, true) fmt.Printf("Total nodes: %d\n", len(clusterTable)-1) - if _, ok := topo.(*spec.Specification); ok { + if t, ok := topo.(*spec.Specification); ok { // Check if TiKV's label set correctly pdClient := api.NewPDClient(pdList, 10*time.Second, tlsCfg) if lbs, err := pdClient.GetLocationLabels(); err != nil { @@ -607,6 +607,12 @@ func (m *Manager) Display(clusterName string, opt operator.Options) error { } else if err := spec.CheckTiKVLocationLabels(lbs, pdClient); err != nil { color.Yellow("\nWARN: there is something wrong with TiKV labels, which may cause data losing:\n%v", err) } + + // Check if there is some instance in tombstone state + nodes, _ := operator.DestroyTombstone(ctx, t, true /* returnNodesOnly */, opt, tlsCfg) + if len(nodes) != 0 { + color.Green("There are some nodes in state: `Tombstone`\n\tNodes: %+v\n\tYou can destroy them with the command: `tiup cluster prune %s`", nodes, clusterName) + } } return nil diff --git a/pkg/cluster/operation/action.go b/pkg/cluster/operation/action.go index 4a9be8facf..58a521967b 100644 --- a/pkg/cluster/operation/action.go +++ b/pkg/cluster/operation/action.go @@ -136,27 +136,28 @@ func Stop( } // NeedCheckTomebsome return true if we need to check and destroy some node. -func NeedCheckTomebsome(spec *spec.Specification) bool { - for _, s := range spec.TiKVServers { +func NeedCheckTomebsome(topo *spec.Specification) bool { + for _, s := range topo.TiKVServers { if s.Offline { return true } } - for _, s := range spec.TiFlashServers { + for _, s := range topo.TiFlashServers { if s.Offline { return true } } - for _, s := range spec.PumpServers { + for _, s := range topo.PumpServers { if s.Offline { return true } } - for _, s := range spec.Drainers { + for _, s := range topo.Drainers { if s.Offline { return true } } + return false } diff --git a/pkg/cluster/operation/scale_in.go b/pkg/cluster/operation/scale_in.go index 0ddbacccd7..bce515dd38 100644 --- a/pkg/cluster/operation/scale_in.go +++ b/pkg/cluster/operation/scale_in.go @@ -237,7 +237,7 @@ func ScaleInCluster( return errors.Annotatef(err, "failed to destroy %s", component.Name()) } } else { - log.Warnf(color.YellowString("The component `%s` will be destroyed when display cluster info when it become tombstone, maybe exists in several minutes or hours", + log.Warnf(color.YellowString("The component `%s` will become tombstone, maybe exists in several minutes or hours, after that you can use the prune command to clean it", component.Name())) } } diff --git a/tests/tiup-cluster/script/util.sh b/tests/tiup-cluster/script/util.sh index 470ad687c0..32013b2077 100755 --- a/tests/tiup-cluster/script/util.sh +++ b/tests/tiup-cluster/script/util.sh @@ -36,6 +36,7 @@ function wait_instance_num_reach() { for ((i=0;i<120;i++)) do + tiup-cluster $client prune $name --yes count=$(instance_num $name $native_ssh) if [ "$count" == "$target_num" ]; then echo "instance number reach $target_num" @@ -47,7 +48,7 @@ function wait_instance_num_reach() { sleep 1 done - echo "fail to wait instance number reach $target_num, retry num: $i" + echo "fail to wait instance number reach $target_num, count $count, retry num: $i" tiup-cluster $client display $name exit -1 } diff --git a/tests/tiup-dm/script/util.sh b/tests/tiup-dm/script/util.sh index 5bdfda3dba..9ddf37afa3 100755 --- a/tests/tiup-dm/script/util.sh +++ b/tests/tiup-dm/script/util.sh @@ -9,6 +9,7 @@ set -eu # coverage: 12.7% of statements in github.com/pingcap/tiup/components/dm/... function instance_num() { name=$1 + count=$(tiup-dm display $name | grep "Total nodes" | awk -F ' ' '{print $3}') echo $count @@ -23,6 +24,7 @@ function wait_instance_num_reach() { for ((i=0;i<120;i++)) do + tiup-dm prune $name --yes count=$(instance_num $name) if [ "$count" == "$target_num" ]; then echo "instance number reach $target_num" @@ -34,7 +36,7 @@ function wait_instance_num_reach() { sleep 1 done - echo "fail to wait instance number reach $target_num, retry num: $i" + echo "fail to wait instance number reach $target_num, count $count, retry num: $i" tiup-dm display $name exit -1 }