From 45c5017424059037e2e26d95fe397bec0e389ebd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Qi=CE=BC=24hi=D0=AFu=C3=AD?= <39378935+srstack@users.noreply.github.com> Date: Thu, 30 Dec 2021 11:25:50 +0800 Subject: [PATCH] cluster: check support check before scale-out the instance (#1659) --- components/cluster/command/check.go | 20 ++++- pkg/cluster/manager/check.go | 127 ++++++++++++++++++++++------ pkg/cluster/manager/scale_out.go | 9 +- pkg/cluster/operation/check.go | 27 ++++++ pkg/cluster/task/check.go | 8 ++ 5 files changed, 152 insertions(+), 39 deletions(-) diff --git a/components/cluster/command/check.go b/components/cluster/command/check.go index 37b1b4dde9..63a9615bba 100644 --- a/components/cluster/command/check.go +++ b/components/cluster/command/check.go @@ -28,22 +28,34 @@ func newCheckCmd() *cobra.Command { IdentityFile: path.Join(utils.UserHome(), ".ssh", "id_rsa"), } cmd := &cobra.Command{ - Use: "check ", + Use: "check [scale-out.yml]", Short: "Perform preflight checks for the cluster.", Long: `Perform preflight checks for the cluster. By default, it checks deploy servers before a cluster is deployed, the input is the topology.yaml for the cluster. If '--cluster' is set, it will perform checks for an existing cluster, the input is the cluster name. Some checks are ignore in this mode, such as port and dir -conflict checks with other clusters`, +conflict checks with other clusters +If you want to check the scale-out topology, please use execute the following command +' check --cluster ' +it will the new instances `, RunE: func(cmd *cobra.Command, args []string) error { - if len(args) != 1 { + if len(args) != 1 && len(args) != 2 { return cmd.Help() } + scaleOutTopo := "" if opt.ExistCluster { clusterReport.ID = scrubClusterName(args[0]) } - return cm.CheckCluster(args[0], opt, gOpt) + + if len(args) == 2 { + if !opt.ExistCluster { + return cmd.Help() + } + scaleOutTopo = args[1] + } + + return cm.CheckCluster(args[0], scaleOutTopo, opt, gOpt) }, } diff --git a/pkg/cluster/manager/check.go b/pkg/cluster/manager/check.go index 47ba547b28..896c746d4d 100644 --- a/pkg/cluster/manager/check.go +++ b/pkg/cluster/manager/check.go @@ -45,13 +45,14 @@ type CheckOptions struct { } // CheckCluster check cluster before deploying or upgrading -func (m *Manager) CheckCluster(clusterOrTopoName string, opt CheckOptions, gOpt operator.Options) error { +func (m *Manager) CheckCluster(clusterOrTopoName, scaleoutTopo string, opt CheckOptions, gOpt operator.Options) error { var topo spec.Specification ctx := ctxt.New( context.Background(), gOpt.Concurrency, m.logger, ) + var currTopo *spec.Specification if opt.ExistCluster { // check for existing cluster clusterName := clusterOrTopoName @@ -69,10 +70,28 @@ func (m *Manager) CheckCluster(clusterOrTopoName string, opt CheckOptions, gOpt if err != nil { return err } - opt.User = metadata.User - opt.IdentityFile = m.specManager.Path(clusterName, "ssh", "id_rsa") - topo = *metadata.Topology + if scaleoutTopo != "" { + currTopo = metadata.Topology + // complete global configuration + topo.GlobalOptions = currTopo.GlobalOptions + topo.MonitoredOptions = currTopo.MonitoredOptions + topo.ServerConfigs = currTopo.ServerConfigs + + if err := spec.ParseTopologyYaml(scaleoutTopo, &topo); err != nil { + return err + } + spec.ExpandRelativeDir(&topo) + + // checkConflict after fillHostArch + // scaleOutTopo also is not exists instacne + opt.ExistCluster = false + } else { + opt.IdentityFile = m.specManager.Path(clusterName, "ssh", "id_rsa") + topo = *metadata.Topology + opt.User = metadata.User + } + topo.AdjustByVersion(metadata.Version) } else { // check before cluster is deployed topoFileName := clusterOrTopoName @@ -82,15 +101,7 @@ func (m *Manager) CheckCluster(clusterOrTopoName string, opt CheckOptions, gOpt } spec.ExpandRelativeDir(&topo) - clusterList, err := m.specManager.GetAllClusters() - if err != nil { - return err - } - // use a dummy cluster name, the real cluster name is set during deploy - if err := spec.CheckClusterPortConflict(clusterList, "nonexist-dummy-tidb-cluster", &topo); err != nil { - return err - } - if err := spec.CheckClusterDirConflict(clusterList, "nonexist-dummy-tidb-cluster", &topo); err != nil { + if err := checkConflict(m, "nonexist-dummy-tidb-cluster", &topo); err != nil { return err } } @@ -115,6 +126,17 @@ func (m *Manager) CheckCluster(clusterOrTopoName string, opt CheckOptions, gOpt return err } + // Abort scale out operation if the merged topology is invalid + if currTopo != nil && scaleoutTopo != "" { + mergedTopo := currTopo.MergeTopo(&topo) + if err := mergedTopo.Validate(); err != nil { + return err + } + if err := checkConflict(m, clusterOrTopoName, mergedTopo); err != nil { + return err + } + } + if err := checkSystemInfo(ctx, sshConnProps, sshProxyProps, &topo, &gOpt, &opt); err != nil { return err } @@ -186,6 +208,38 @@ func checkSystemInfo( opt.Opr, ) } + + if !opt.ExistCluster { + t1 = t1. + CheckSys( + inst.GetHost(), + inst.DeployDir(), + task.ChecktypeIsExist, + topo, + opt.Opr, + ). + CheckSys( + inst.GetHost(), + inst.DataDir(), + task.ChecktypeIsExist, + topo, + opt.Opr, + ). + CheckSys( + inst.GetHost(), + inst.LogDir(), + task.ChecktypeIsExist, + topo, + opt.Opr, + ). + CheckSys( + inst.GetHost(), + fmt.Sprintf("/etc/systemd/system/%s-%d.service", inst.ComponentName(), inst.GetPort()), + task.ChecktypeIsExist, + topo, + opt.Opr, + ) + } // if the data dir set in topology is relative, and the home dir of deploy user // and the user run the check command is on different partitions, the disk detection // may be using incorrect partition for validations. @@ -199,6 +253,7 @@ func checkSystemInfo( topo, opt.Opr, ) + if opt.ExistCluster { t1 = t1.CheckSys( inst.GetHost(), @@ -270,20 +325,6 @@ func checkSystemInfo( topo, opt.Opr, ). - // check for listening port - Shell( - inst.GetHost(), - "ss -lnt", - "", - false, - ). - CheckSys( - inst.GetHost(), - "", - task.CheckTypePort, - topo, - opt.Opr, - ). // check for system limits Shell( inst.GetHost(), @@ -328,6 +369,24 @@ func checkSystemInfo( topo, opt.Opr, ) + + if !opt.ExistCluster { + t1 = t1. + // check for listening port + Shell( + inst.GetHost(), + "ss -lnt", + "", + false, + ). + CheckSys( + inst.GetHost(), + "", + task.CheckTypePort, + topo, + opt.Opr, + ) + } } checkSysTasks = append( @@ -596,3 +655,17 @@ func (m *Manager) checkRegionsInfo(clusterName string, topo *spec.Specification, } return nil } + +// checkConflict checks cluster conflict +func checkConflict(m *Manager, clusterName string, topo spec.Topology) error { + clusterList, err := m.specManager.GetAllClusters() + if err != nil { + return err + } + // use a dummy cluster name, the real cluster name is set during deploy + if err := spec.CheckClusterPortConflict(clusterList, clusterName, topo); err != nil { + return err + } + err = spec.CheckClusterDirConflict(clusterList, clusterName, topo) + return err +} diff --git a/pkg/cluster/manager/scale_out.go b/pkg/cluster/manager/scale_out.go index 7058a04de6..f1f132a998 100644 --- a/pkg/cluster/manager/scale_out.go +++ b/pkg/cluster/manager/scale_out.go @@ -166,14 +166,7 @@ func (m *Manager) ScaleOut( } } - clusterList, err := m.specManager.GetAllClusters() - if err != nil { - return err - } - if err := spec.CheckClusterPortConflict(clusterList, name, mergedTopo); err != nil { - return err - } - if err := spec.CheckClusterDirConflict(clusterList, name, mergedTopo); err != nil { + if err := checkConflict(m, name, mergedTopo); err != nil { return err } } diff --git a/pkg/cluster/operation/check.go b/pkg/cluster/operation/check.go index 903c540563..a306752231 100644 --- a/pkg/cluster/operation/check.go +++ b/pkg/cluster/operation/check.go @@ -62,6 +62,7 @@ var ( CheckNameFio = "fio" CheckNameTHP = "thp" CheckNameDirPermission = "permission" + CheckNameDirExist = "exist" ) // CheckResult is the result of a check @@ -866,3 +867,29 @@ func CheckDirPermission(ctx context.Context, e ctxt.Executor, user, path string) return results } + +// CheckDirIsExist check if the directory exists +func CheckDirIsExist(ctx context.Context, e ctxt.Executor, path string) []*CheckResult { + var results []*CheckResult + + if path == "" { + return results + } + + req, _, _ := e.Execute(ctx, + fmt.Sprintf( + "[ -e %s ] && echo 1", + path, + ), + false) + + if strings.ReplaceAll(string(req), "\n", "") == "1" { + results = append(results, &CheckResult{ + Name: CheckNameDirExist, + Err: fmt.Errorf("%s already exists", path), + Msg: fmt.Sprintf("%s already exists", path), + }) + } + + return results +} diff --git a/pkg/cluster/task/check.go b/pkg/cluster/task/check.go index 4a835346f2..5d26aae9b1 100644 --- a/pkg/cluster/task/check.go +++ b/pkg/cluster/task/check.go @@ -36,6 +36,7 @@ var ( CheckTypePartitions = "partitions" CheckTypeFIO = "fio" CheckTypePermission = "permission" + ChecktypeIsExist = "exist" ) // place the check utilities are stored @@ -147,6 +148,13 @@ func (c *CheckSys) Execute(ctx context.Context) error { return ErrNoExecutor } storeResults(ctx, c.host, operator.CheckDirPermission(ctx, e, c.topo.GlobalOptions.User, c.checkDir)) + case ChecktypeIsExist: + e, ok := ctxt.GetInner(ctx).GetExecutor(c.host) + if !ok { + return ErrNoExecutor + } + // check partition mount options for data_dir + storeResults(ctx, c.host, operator.CheckDirIsExist(ctx, e, c.checkDir)) } return nil