diff --git a/components/dm/command/scale_in.go b/components/dm/command/scale_in.go index cf66c65845..9e3a935f13 100644 --- a/components/dm/command/scale_in.go +++ b/components/dm/command/scale_in.go @@ -80,11 +80,13 @@ func ScaleInDMCluster( ) error { // instances by uuid instances := map[string]dm.Instance{} + instCount := map[string]int{} // make sure all nodeIds exists in topology for _, component := range topo.ComponentsByStartOrder() { for _, instance := range component.Instances() { instances[instance.ID()] = instance + instCount[instance.GetHost()]++ } } @@ -110,7 +112,8 @@ func ScaleInDMCluster( if !deletedNodes.Exist(instance.ID()) { continue } - if err := operator.StopAndDestroyInstance(getter, topo, instance, options, false); err != nil { + instCount[instance.GetHost()]-- + if err := operator.StopAndDestroyInstance(getter, topo, instance, options, instCount[instance.GetHost()] == 0); err != nil { log.Warnf("failed to stop/destroy %s: %v", component.Name(), err) } } @@ -162,6 +165,14 @@ func ScaleInDMCluster( if err := operator.DestroyComponent(getter, []dm.Instance{instance}, topo, options); err != nil { return errors.Annotatef(err, "failed to destroy %s", component.Name()) } + + instCount[instance.GetHost()]-- + if instCount[instance.GetHost()] == 0 { + if err := operator.DeletePublicKey(getter, instance.GetHost()); err != nil { + return errors.Annotatef(err, "failed to delete public key") + } + } + } } diff --git a/pkg/cluster/executor/executor.go b/pkg/cluster/executor/executor.go index 4815afa58f..9042767a1f 100644 --- a/pkg/cluster/executor/executor.go +++ b/pkg/cluster/executor/executor.go @@ -47,6 +47,9 @@ var ( // It's used to predict if the connection can establish success in the future. // Its main purpose is to avoid sshpass hang when user speficied a wrong prompt. connectionTestCommand = "echo connection test, if killed, check the password prompt" + + // SSH authorized_keys file + defaultSSHAuthorizedKeys = "~/.ssh/authorized_keys" ) // Executor is the executor interface for TiOps, all tasks will in the end @@ -155,3 +158,30 @@ func checkLocalIP(ip string) error { return fmt.Errorf("address %s not found in all interfaces, found ips: %s", ip, strings.Join(foundIps, ",")) } + +// FindSSHAuthorizedKeysFile finds the correct path of SSH authorized keys file +func FindSSHAuthorizedKeysFile(exec Executor) string { + // detect if custom path of authorized keys file is set + // NOTE: we do not yet support: + // - custom config for user (~/.ssh/config) + // - sshd started with custom config (other than /etc/ssh/sshd_config) + // - ssh server implementations other than OpenSSH (such as dropbear) + sshAuthorizedKeys := defaultSSHAuthorizedKeys + cmd := "grep -Ev '^\\s*#|^\\s*$' /etc/ssh/sshd_config" + stdout, _, _ := exec.Execute(cmd, true) // error ignored as we have default value + for _, line := range strings.Split(string(stdout), "\n") { + if !strings.Contains(line, "AuthorizedKeysFile") { + continue + } + fields := strings.Fields(line) + if len(fields) >= 2 { + sshAuthorizedKeys = fields[1] + break + } + } + + if !strings.HasPrefix(sshAuthorizedKeys, "/") && !strings.HasPrefix(sshAuthorizedKeys, "~") { + sshAuthorizedKeys = fmt.Sprintf("~/%s", sshAuthorizedKeys) + } + return sshAuthorizedKeys +} diff --git a/pkg/cluster/operation/action.go b/pkg/cluster/operation/action.go index 4bb0c4182f..622a6f198a 100644 --- a/pkg/cluster/operation/action.go +++ b/pkg/cluster/operation/action.go @@ -114,7 +114,7 @@ func Stop( instCount := map[string]int{} cluster.IterInstance(func(inst spec.Instance) { - instCount[inst.GetHost()] = instCount[inst.GetHost()] + 1 + instCount[inst.GetHost()]++ }) for _, comp := range components { diff --git a/pkg/cluster/operation/destroy.go b/pkg/cluster/operation/destroy.go index 0387b86df3..f09b9581d3 100644 --- a/pkg/cluster/operation/destroy.go +++ b/pkg/cluster/operation/destroy.go @@ -14,8 +14,10 @@ package operator import ( + "bytes" "crypto/tls" "fmt" + "io/ioutil" "path" "path/filepath" "strconv" @@ -23,7 +25,9 @@ import ( "time" "github.com/pingcap/errors" + perrs "github.com/pingcap/errors" "github.com/pingcap/tiup/pkg/cluster/api" + "github.com/pingcap/tiup/pkg/cluster/executor" "github.com/pingcap/tiup/pkg/cluster/module" "github.com/pingcap/tiup/pkg/cluster/spec" "github.com/pingcap/tiup/pkg/logger/log" @@ -54,12 +58,11 @@ func Destroy( cluster spec.Topology, options Options, ) error { - uniqueHosts := set.NewStringSet() coms := cluster.ComponentsByStopOrder() instCount := map[string]int{} cluster.IterInstance(func(inst spec.Instance) { - instCount[inst.GetHost()] = instCount[inst.GetHost()] + 1 + instCount[inst.GetHost()]++ }) for _, com := range coms { @@ -80,9 +83,18 @@ func Destroy( } } + gOpts := cluster.BaseTopo().GlobalOptions + // Delete all global deploy directory - for host := range uniqueHosts { - if err := DeleteGlobalDirs(getter, host, cluster.BaseTopo().GlobalOptions); err != nil { + for host := range instCount { + if err := DeleteGlobalDirs(getter, host, gOpts); err != nil { + return nil + } + } + + // after all things done, try to remove SSH public key + for host := range instCount { + if err := DeletePublicKey(getter, host); err != nil { return nil } } @@ -93,7 +105,7 @@ func Destroy( // StopAndDestroyInstance stop and destroy the instance, // if this instance is the host's last one, and the host has monitor deployed, // we need to destroy the monitor, either -func StopAndDestroyInstance(getter ExecutorGetter, cluster spec.Topology, instance spec.Instance, options Options, destroyMonitor bool) error { +func StopAndDestroyInstance(getter ExecutorGetter, cluster spec.Topology, instance spec.Instance, options Options, destroyNode bool) error { ignoreErr := options.Force compName := instance.ComponentName() @@ -111,22 +123,32 @@ func StopAndDestroyInstance(getter ExecutorGetter, cluster spec.Topology, instan log.Warnf("failed to destroy %s: %v", compName, err) } - // monitoredOptions for dm cluster is nil - monitoredOptions := cluster.GetMonitoredOptions() + if destroyNode { + // monitoredOptions for dm cluster is nil + monitoredOptions := cluster.GetMonitoredOptions() - if destroyMonitor && monitoredOptions != nil { - if err := StopMonitored(getter, instance, monitoredOptions, options.OptTimeout); err != nil { - if !ignoreErr { - return errors.Annotatef(err, "failed to stop monitor") + if monitoredOptions != nil { + if err := StopMonitored(getter, instance, monitoredOptions, options.OptTimeout); err != nil { + if !ignoreErr { + return errors.Annotatef(err, "failed to stop monitor") + } + log.Warnf("failed to stop %s: %v", "monitor", err) + } + if err := DestroyMonitored(getter, instance, monitoredOptions, options.OptTimeout); err != nil { + if !ignoreErr { + return errors.Annotatef(err, "failed to destroy monitor") + } + log.Warnf("failed to destroy %s: %v", "monitor", err) } - log.Warnf("failed to stop %s: %v", "monitor", err) } - if err := DestroyMonitored(getter, instance, monitoredOptions, options.OptTimeout); err != nil { + + if err := DeletePublicKey(getter, instance.GetHost()); err != nil { if !ignoreErr { - return errors.Annotatef(err, "failed to destroy monitor") + return errors.Annotatef(err, "failed to delete public key") } - log.Warnf("failed to destroy %s: %v", "monitor", err) + log.Warnf("failed to delete public key") } + } return nil } @@ -171,12 +193,48 @@ func DeleteGlobalDirs(getter ExecutorGetter, host string, options *spec.GlobalOp return nil } +// DeletePublicKey deletes the SSH public key from host +func DeletePublicKey(getter ExecutorGetter, host string) error { + e := getter.Get(host) + log.Infof("Delete public key %s", host) + _, pubKeyPath := getter.GetSSHKeySet() + publicKey, err := ioutil.ReadFile(pubKeyPath) + if err != nil { + return perrs.Trace(err) + } + + pubKey := string(bytes.TrimSpace(publicKey)) + pubKey = strings.ReplaceAll(pubKey, "/", "\\/") + pubKeysFile := executor.FindSSHAuthorizedKeysFile(e) + + // delete the public key with Linux `sed` toolkit + c := module.ShellModuleConfig{ + Command: fmt.Sprintf("sed -i '/%s/d' %s", pubKey, pubKeysFile), + UseShell: false, + } + shell := module.NewShellModule(c) + stdout, stderr, err := shell.Execute(e) + + if len(stdout) > 0 { + fmt.Println(string(stdout)) + } + if len(stderr) > 0 { + log.Errorf(string(stderr)) + } + + if err != nil { + return errors.Annotatef(err, "failed to delete pulblic key on: %s", host) + } + + log.Infof("Delete public key %s success", host) + return nil +} + // DestroyMonitored destroy the monitored service. func DestroyMonitored(getter ExecutorGetter, inst spec.Instance, options *spec.MonitoredOptions, timeout uint64) error { e := getter.Get(inst.GetHost()) log.Infof("Destroying monitored %s", inst.GetHost()) - log.Infof("Destroying monitored") log.Infof("\tDestroying instance %s", inst.GetHost()) // Stop by systemd. @@ -433,7 +491,7 @@ func DestroyClusterTombstone( instCount := map[string]int{} for _, component := range cluster.ComponentsByStartOrder() { for _, instance := range component.Instances() { - instCount[instance.GetHost()] = instCount[instance.GetHost()] + 1 + instCount[instance.GetHost()]++ } } diff --git a/pkg/cluster/operation/operation.go b/pkg/cluster/operation/operation.go index 89735a5a10..30fed2481e 100644 --- a/pkg/cluster/operation/operation.go +++ b/pkg/cluster/operation/operation.go @@ -120,4 +120,6 @@ func FilterInstance(instances []spec.Instance, nodes set.StringSet) (res []spec. // ExecutorGetter get the executor by host. type ExecutorGetter interface { Get(host string) (e executor.Executor) + // GetSSHKeySet gets the SSH private and public key path + GetSSHKeySet() (privateKeyPath, publicKeyPath string) } diff --git a/pkg/cluster/operation/scale_in.go b/pkg/cluster/operation/scale_in.go index 3b8ee1a171..4b14c0ca1a 100644 --- a/pkg/cluster/operation/scale_in.go +++ b/pkg/cluster/operation/scale_in.go @@ -92,7 +92,7 @@ func ScaleInCluster( for _, component := range cluster.ComponentsByStartOrder() { for _, instance := range component.Instances() { instances[instance.ID()] = instance - instCount[instance.GetHost()] = instCount[instance.GetHost()] + 1 + instCount[instance.GetHost()]++ } } diff --git a/pkg/cluster/task/env_init.go b/pkg/cluster/task/env_init.go index 74c6e41e31..6c70f97d47 100644 --- a/pkg/cluster/task/env_init.go +++ b/pkg/cluster/task/env_init.go @@ -19,6 +19,7 @@ import ( "strings" "github.com/joomcode/errorx" + "github.com/pingcap/tiup/pkg/cluster/executor" "github.com/pingcap/tiup/pkg/cluster/module" ) @@ -27,8 +28,6 @@ var ( errEnvInitSubCommandFailed = errNSEnvInit.NewType("sub_command_failed") // ErrEnvInitFailed is ErrEnvInitFailed ErrEnvInitFailed = errNSEnvInit.NewType("failed") - // SSH authorized_keys file - defaultSSHAuthorizedKeys = "~/.ssh/authorized_keys" ) // EnvInit is used to initialize the remote environment, e.g: @@ -76,36 +75,15 @@ func (e *EnvInit) execute(ctx *Context) error { } // Authorize - cmd := `su - ` + e.deployUser + ` -c 'test -d ~/.ssh || mkdir -p ~/.ssh && chmod 700 ~/.ssh'` + cmd := `su - ` + e.deployUser + ` -c 'mkdir -p ~/.ssh && chmod 700 ~/.ssh'` _, _, err = exec.Execute(cmd, true) if err != nil { return wrapError(errEnvInitSubCommandFailed. Wrap(err, "Failed to create '~/.ssh' directory for user '%s'", e.deployUser)) } - // detect if custom path of authorized keys file is set - // NOTE: we do not yet support: - // - custom config for user (~/.ssh/config) - // - sshd started with custom config (other than /etc/ssh/sshd_config) - // - ssh server implementations other than OpenSSH (such as dropbear) - sshAuthorizedKeys := defaultSSHAuthorizedKeys - cmd = "grep -Ev '^\\s*#|^\\s*$' /etc/ssh/sshd_config" - stdout, _, _ := exec.Execute(cmd, true) // error ignored as we have default value - for _, line := range strings.Split(string(stdout), "\n") { - if !strings.Contains(line, "AuthorizedKeysFile") { - continue - } - fields := strings.Fields(line) - if len(fields) >= 2 { - sshAuthorizedKeys = fields[1] - } - } - - if !strings.HasPrefix(sshAuthorizedKeys, "/") && !strings.HasPrefix(sshAuthorizedKeys, "~") { - sshAuthorizedKeys = fmt.Sprintf("~/%s", sshAuthorizedKeys) - } - pk := strings.TrimSpace(string(pubKey)) + sshAuthorizedKeys := executor.FindSSHAuthorizedKeysFile(exec) cmd = fmt.Sprintf(`su - %[1]s -c 'grep $(echo %[2]s) %[3]s || echo %[2]s >> %[3]s && chmod 600 %[3]s'`, e.deployUser, pk, sshAuthorizedKeys) _, _, err = exec.Execute(cmd, true) diff --git a/pkg/cluster/task/task.go b/pkg/cluster/task/task.go index 11f8da2ab9..abea809da8 100644 --- a/pkg/cluster/task/task.go +++ b/pkg/cluster/task/task.go @@ -56,7 +56,7 @@ type ( checkResults map[string][]*operator.CheckResult } - // The public/private key is used to access remote server via the user `tidb` + // The private/public key is used to access remote server via the user `tidb` PrivateKeyPath string PublicKeyPath string } @@ -95,7 +95,7 @@ func NewContext() *Context { } } -// Get implements operation ExecutorGetter interface. +// Get implements the operation.ExecutorGetter interface. func (ctx *Context) Get(host string) (e executor.Executor) { ctx.exec.Lock() e, ok := ctx.exec.executors[host] @@ -107,6 +107,11 @@ func (ctx *Context) Get(host string) (e executor.Executor) { return } +// GetSSHKeySet implements the operation.ExecutorGetter interface. +func (ctx *Context) GetSSHKeySet() (privateKeyPath, publicKeyPath string) { + return ctx.PrivateKeyPath, ctx.PublicKeyPath +} + // GetExecutor get the executor. func (ctx *Context) GetExecutor(host string) (e executor.Executor, ok bool) { // Mock point for unit test diff --git a/tests/tiup-cluster/script/cmd_subtest.sh b/tests/tiup-cluster/script/cmd_subtest.sh index cc1ff0e929..5089efa5cf 100755 --- a/tests/tiup-cluster/script/cmd_subtest.sh +++ b/tests/tiup-cluster/script/cmd_subtest.sh @@ -104,5 +104,10 @@ function cmd_subtest() { ! tiup-cluster $client _test $name data + cp ~/.tiup/storage/cluster/clusters/$name/ssh/id_rsa "/tmp/$name.id_rsa" tiup-cluster $client --yes destroy $name + + # after destroy the cluster, the public key should be deleted + ! ssh -o "StrictHostKeyChecking=no" -o "PasswordAuthentication=no" -i "/tmp/$name.id_rsa" tidb@$ipprefix.101 "ls" + unlink "/tmp/$name.id_rsa" } diff --git a/tests/tiup-cluster/script/scale_core.sh b/tests/tiup-cluster/script/scale_core.sh index 8936aaccf8..f18dfaef6c 100755 --- a/tests/tiup-cluster/script/scale_core.sh +++ b/tests/tiup-cluster/script/scale_core.sh @@ -78,6 +78,8 @@ function scale_core() { ! tiup-cluster $client exec $name -N $ipprefix.102 --command "ls /home/tidb/deploy/monitor-9100/deploy/monitor-9100" ! tiup-cluster $client exec $name -N $ipprefix.102 --command "ps aux | grep node_exporter | grep -qv grep" ! tiup-cluster $client exec $name -N $ipprefix.102 --command "ps aux | grep blackbox_exporter | grep -qv grep" + # after all components on the node were scale-ined, the SSH public is automatically deleted + ! ssh -o "StrictHostKeyChecking=no "-o "PasswordAuthentication=no" -i ~/.tiup/storage/cluster/$name/ssh/id_rsa tidb@$ipprefix.102 "ls" echo "start scale out tidb" topo=./topo/full_scale_in_tidb.yaml diff --git a/tests/tiup-dm/test_cmd.sh b/tests/tiup-dm/test_cmd.sh index 050bb47a5d..d8bf0c9161 100755 --- a/tests/tiup-dm/test_cmd.sh +++ b/tests/tiup-dm/test_cmd.sh @@ -84,8 +84,13 @@ tiup-dm exec $name -N $ipprefix.101 --command "ls /home/tidb/deploy/grafana-3000 # test create a task and can replicate data ./script/task/run.sh -tiup-dm --yes destroy $name - # test dm log dir tiup-dm notfound-command 2>&1 | grep $HOME/.tiup/logs/tiup-dm-debug TIUP_LOG_PATH=/tmp/a/b tiup-dm notfound-command 2>&1 | grep /tmp/a/b/tiup-dm-debug + +cp ~/.tiup/storage/dm/clusters/$name/ssh/id_rsa "/tmp/$name.id_rsa" +tiup-dm --yes destroy $name + +# after destroy the cluster, the public key should be deleted +! ssh -o "StrictHostKeyChecking=no" -o "PasswordAuthentication=no" -i "/tmp/$name.id_rsa" tidb@$ipprefix.102 "ls" +unlink "/tmp/$name.id_rsa"