Skip to content

Commit

Permalink
More safe way to cleanup tombstone nodes (#858)
Browse files Browse the repository at this point in the history
  • Loading branch information
lucklove authored Oct 23, 2020
1 parent d1b803c commit 2471475
Show file tree
Hide file tree
Showing 12 changed files with 240 additions and 146 deletions.
55 changes: 1 addition & 54 deletions components/cluster/command/display.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,8 @@ import (
"github.com/fatih/color"
perrs "github.com/pingcap/errors"
"github.com/pingcap/tiup/pkg/cluster/api"
operator "github.com/pingcap/tiup/pkg/cluster/operation"
"github.com/pingcap/tiup/pkg/cluster/spec"
"github.com/pingcap/tiup/pkg/cluster/task"
"github.com/pingcap/tiup/pkg/crypto"
"github.com/pingcap/tiup/pkg/logger/log"
"github.com/pingcap/tiup/pkg/meta"
"github.com/spf13/cobra"
)
Expand Down Expand Up @@ -70,12 +67,7 @@ func newDisplayCmd() *cobra.Command {
return displayDashboardInfo(clusterName, tlsCfg)
}

err = manager.Display(clusterName, gOpt)
if err != nil {
return perrs.AddStack(err)
}

return destroyTombstoneIfNeed(clusterName, metadata, gOpt)
return manager.Display(clusterName, gOpt)
},
}

Expand Down Expand Up @@ -133,48 +125,3 @@ func displayDashboardInfo(clusterName string, tlsCfg *tls.Config) error {

return nil
}

func destroyTombstoneIfNeed(clusterName string, metadata *spec.ClusterMeta, opt operator.Options) error {
topo := metadata.Topology

if !operator.NeedCheckTomebsome(topo) {
return nil
}

tlsCfg, err := topo.TLSConfig(tidbSpec.Path(clusterName, spec.TLSCertKeyDir))
if err != nil {
return perrs.AddStack(err)
}

ctx := task.NewContext()
err = ctx.SetSSHKeySet(spec.ClusterPath(clusterName, "ssh", "id_rsa"),
spec.ClusterPath(clusterName, "ssh", "id_rsa.pub"))
if err != nil {
return perrs.AddStack(err)
}

err = ctx.SetClusterSSH(topo, metadata.User, gOpt.SSHTimeout, gOpt.SSHType, topo.BaseTopo().GlobalOptions.SSHType)
if err != nil {
return perrs.AddStack(err)
}

nodes, err := operator.DestroyTombstone(ctx, topo, true /* returnNodesOnly */, opt, tlsCfg)
if err != nil {
return perrs.AddStack(err)
}

if len(nodes) == 0 {
return nil
}

log.Infof("Start destroy Tombstone nodes: %v ...", nodes)

_, err = operator.DestroyTombstone(ctx, topo, false /* returnNodesOnly */, opt, tlsCfg)
if err != nil {
return perrs.AddStack(err)
}

log.Infof("Destroy success")

return spec.SaveClusterMeta(clusterName, metadata)
}
104 changes: 104 additions & 0 deletions components/cluster/command/prune.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package command

import (
"fmt"

"github.com/fatih/color"
perrs "github.com/pingcap/errors"
"github.com/pingcap/tiup/pkg/cliutil"
operator "github.com/pingcap/tiup/pkg/cluster/operation"
"github.com/pingcap/tiup/pkg/cluster/spec"
"github.com/pingcap/tiup/pkg/cluster/task"
"github.com/pingcap/tiup/pkg/logger/log"
"github.com/spf13/cobra"
)

func newPruneCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "prune <cluster-name>",
Short: "Destroy and remove instances that is in tombstone state",
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) != 1 {
return cmd.Help()
}

clusterName := args[0]

metadata, err := spec.ClusterMetadata(clusterName)
if err != nil {
return err
}

return destroyTombstoneIfNeed(clusterName, metadata, gOpt, skipConfirm)
},
}

return cmd
}

func destroyTombstoneIfNeed(clusterName string, metadata *spec.ClusterMeta, opt operator.Options, skipConfirm bool) error {
topo := metadata.Topology

if !operator.NeedCheckTomebsome(topo) {
return nil
}

tlsCfg, err := topo.TLSConfig(tidbSpec.Path(clusterName, spec.TLSCertKeyDir))
if err != nil {
return perrs.AddStack(err)
}

ctx := task.NewContext()
err = ctx.SetSSHKeySet(spec.ClusterPath(clusterName, "ssh", "id_rsa"),
spec.ClusterPath(clusterName, "ssh", "id_rsa.pub"))
if err != nil {
return perrs.AddStack(err)
}

err = ctx.SetClusterSSH(topo, metadata.User, gOpt.SSHTimeout, gOpt.SSHType, topo.BaseTopo().GlobalOptions.SSHType)
if err != nil {
return perrs.AddStack(err)
}

nodes, err := operator.DestroyTombstone(ctx, topo, true /* returnNodesOnly */, opt, tlsCfg)
if err != nil {
return perrs.AddStack(err)
}

if len(nodes) == 0 {
return nil
}

if !skipConfirm {
err = cliutil.PromptForConfirmOrAbortError(
color.HiYellowString(fmt.Sprintf("Will destroy these nodes: %v\nDo you confirm this action? [y/N]:", nodes)),
)
if err != nil {
return err
}
}

log.Infof("Start destroy Tombstone nodes: %v ...", nodes)

_, err = operator.DestroyTombstone(ctx, topo, false /* returnNodesOnly */, opt, tlsCfg)
if err != nil {
return perrs.AddStack(err)
}

log.Infof("Destroy success")

return spec.SaveClusterMeta(clusterName, metadata)
}
1 change: 1 addition & 0 deletions components/cluster/command/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ func init() {
newUpgradeCmd(),
newExecCmd(),
newDisplayCmd(),
newPruneCmd(),
newListCmd(),
newAuditCmd(),
newImportCmd(),
Expand Down
84 changes: 1 addition & 83 deletions components/dm/command/display.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,7 @@
package command

import (
"sync"
"time"

perrs "github.com/pingcap/errors"
"github.com/pingcap/tiup/components/dm/spec"
"github.com/pingcap/tiup/pkg/cluster/api"
operator "github.com/pingcap/tiup/pkg/cluster/operation"
"github.com/spf13/cobra"
"go.uber.org/zap"
)

func newDisplayCmd() *cobra.Command {
Expand All @@ -39,85 +31,11 @@ func newDisplayCmd() *cobra.Command {

clusterName = args[0]

err := manager.Display(clusterName, gOpt)
if err != nil {
return perrs.AddStack(err)
}

metadata := new(spec.Metadata)
err = dmspec.Metadata(clusterName, metadata)
if err != nil {
return perrs.AddStack(err)
}

return clearOutDatedEtcdInfo(clusterName, metadata, gOpt)
return manager.Display(clusterName, gOpt)
},
}

cmd.Flags().StringSliceVarP(&gOpt.Roles, "role", "R", nil, "Only display specified roles")
cmd.Flags().StringSliceVarP(&gOpt.Nodes, "node", "N", nil, "Only display specified nodes")
return cmd
}

func clearOutDatedEtcdInfo(clusterName string, metadata *spec.Metadata, opt operator.Options) error {
topo := metadata.Topology

existedMasters := make(map[string]struct{})
existedWorkers := make(map[string]struct{})
mastersToDelete := make([]string, 0)
workersToDelete := make([]string, 0)

for _, masterSpec := range topo.Masters {
existedMasters[masterSpec.Name] = struct{}{}
}
for _, workerSpec := range topo.Workers {
existedWorkers[workerSpec.Name] = struct{}{}
}

dmMasterClient := api.NewDMMasterClient(topo.GetMasterList(), 10*time.Second, nil)
registeredMasters, registeredWorkers, err := dmMasterClient.GetRegisteredMembers()
if err != nil {
return err
}

for _, master := range registeredMasters {
if _, ok := existedMasters[master]; !ok {
mastersToDelete = append(mastersToDelete, master)
}
}
for _, worker := range registeredWorkers {
if _, ok := existedWorkers[worker]; !ok {
workersToDelete = append(workersToDelete, worker)
}
}

zap.L().Info("Outdated components needed to clear etcd info", zap.Strings("masters", mastersToDelete), zap.Strings("workers", workersToDelete))

errCh := make(chan error, len(existedMasters)+len(existedWorkers))
var wg sync.WaitGroup

for _, master := range mastersToDelete {
master := master
wg.Add(1)
go func() {
errCh <- dmMasterClient.OfflineMaster(master, nil)
wg.Done()
}()
}
for _, worker := range workersToDelete {
worker := worker
wg.Add(1)
go func() {
errCh <- dmMasterClient.OfflineWorker(worker, nil)
wg.Done()
}()
}

wg.Wait()
if len(errCh) == 0 {
return nil
}

// return any one error
return <-errCh
}
Loading

0 comments on commit 2471475

Please sign in to comment.