From 10da90538325fb80cffe0326e3c4969ff4f4bd8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Qi=CE=BC=24hi=D0=AFu=C3=AD?= <39378935+srstack@users.noreply.github.com> Date: Tue, 15 Feb 2022 15:11:39 +0800 Subject: [PATCH] cluster: completing the os type of the host (#1753) --- components/cluster/command/check.go | 2 +- components/dm/spec/topology_dm.go | 16 +---- components/dm/spec/topology_dm_test.go | 6 ++ pkg/cluster/ansible/inventory.go | 9 +++ pkg/cluster/manager/builder.go | 2 - pkg/cluster/manager/check.go | 2 +- pkg/cluster/manager/deploy.go | 12 +--- pkg/cluster/manager/manager.go | 92 ++++++++++++++++---------- pkg/cluster/manager/scale_out.go | 2 +- pkg/cluster/spec/spec.go | 51 ++++++++------ pkg/cluster/spec/spec_manager_test.go | 2 +- pkg/cluster/spec/validate_test.go | 5 ++ 12 files changed, 119 insertions(+), 82 deletions(-) diff --git a/components/cluster/command/check.go b/components/cluster/command/check.go index 63a9615bba..d30f85e765 100644 --- a/components/cluster/command/check.go +++ b/components/cluster/command/check.go @@ -37,7 +37,7 @@ is the cluster name. Some checks are ignore in this mode, such as port and dir conflict checks with other clusters If you want to check the scale-out topology, please use execute the following command ' check --cluster ' -it will the new instances `, +it will check the new instances `, RunE: func(cmd *cobra.Command, args []string) error { if len(args) != 1 && len(args) != 2 { return cmd.Help() diff --git a/components/dm/spec/topology_dm.go b/components/dm/spec/topology_dm.go index 1b06f4c11f..8959f7bac0 100644 --- a/components/dm/spec/topology_dm.go +++ b/components/dm/spec/topology_dm.go @@ -687,9 +687,9 @@ func (s *Specification) GetMasterList() []string { return masterList } -// FillHostArch fills the topology with the given host->arch -func (s *Specification) FillHostArch(hostArch map[string]string) error { - return spec.FillHostArch(s, hostArch) +// FillHostArchOrOS fills the topology with the given host->arch +func (s *Specification) FillHostArchOrOS(hostArch map[string]string, fullType spec.FullHostType) error { + return spec.FillHostArchOrOS(s, hostArch, fullType) } // Merge returns a new Topology which sum old ones @@ -796,12 +796,6 @@ func setDMCustomDefaults(globalOptions *GlobalOptions, field reflect.Value) erro field.Field(j).Set(reflect.ValueOf(globalOptions.LogDir)) } case "Arch": - // default values of globalOptions are set before fillCustomDefaults in Unmarshal - // so the globalOptions.Arch already has its default value set, no need to check again - if field.Field(j).String() == "" { - field.Field(j).Set(reflect.ValueOf(globalOptions.Arch)) - } - switch strings.ToLower(field.Field(j).String()) { // replace "x86_64" with amd64, they are the same in our repo case "x86_64": @@ -816,10 +810,6 @@ func setDMCustomDefaults(globalOptions *GlobalOptions, field reflect.Value) erro field.Field(j).Set(reflect.ValueOf(strings.ToLower(field.Field(j).String()))) } case "OS": - // default value of globalOptions.OS is already set, same as "Arch" - if field.Field(j).String() == "" { - field.Field(j).Set(reflect.ValueOf(globalOptions.OS)) - } // convert to lower case if field.Field(j).String() != "" { field.Field(j).Set(reflect.ValueOf(strings.ToLower(field.Field(j).String()))) diff --git a/components/dm/spec/topology_dm_test.go b/components/dm/spec/topology_dm_test.go index 02f8d758f1..c12cc0e9e7 100644 --- a/components/dm/spec/topology_dm_test.go +++ b/components/dm/spec/topology_dm_test.go @@ -168,6 +168,7 @@ master_servers: arch: "arm64" worker_servers: - host: 172.16.5.138 + arch: "aarch64" `), &topo) assert.Nil(t, err) @@ -179,9 +180,11 @@ global: master_servers: - host: 172.16.5.138 arch: "aarch64" + os: "linux" worker_servers: - host: 172.16.5.138 arch: "amd64" + os: "linux" `), &topo) assert.NotNil(t, err) assert.Equal(t, "platform mismatch for '172.16.5.138' between 'master_servers:linux/arm64' and 'worker_servers:linux/amd64'", err.Error()) @@ -195,8 +198,11 @@ global: master_servers: - host: 172.16.5.138 os: "darwin" + arch: "aarch64" worker_servers: - host: 172.16.5.138 + os: "linux" + arch: "aarch64" `), &topo) assert.NotNil(t, err) assert.Equal(t, "platform mismatch for '172.16.5.138' between 'master_servers:darwin/arm64' and 'worker_servers:linux/arm64'", err.Error()) diff --git a/pkg/cluster/ansible/inventory.go b/pkg/cluster/ansible/inventory.go index bc88227000..02ba1c8ada 100644 --- a/pkg/cluster/ansible/inventory.go +++ b/pkg/cluster/ansible/inventory.go @@ -180,6 +180,7 @@ func parseGroupVars(ctx context.Context, dir, ansCfgFile string, clsMeta *spec.C SSHPort: getHostPort(srv, ansCfg), Imported: true, Arch: "amd64", + OS: "linux", } if port, ok := grpVars["tidb_port"]; ok { @@ -223,6 +224,7 @@ func parseGroupVars(ctx context.Context, dir, ansCfgFile string, clsMeta *spec.C SSHPort: getHostPort(srv, ansCfg), Imported: true, Arch: "amd64", + OS: "linux", } if port, ok := grpVars["tikv_port"]; ok { @@ -269,6 +271,7 @@ func parseGroupVars(ctx context.Context, dir, ansCfgFile string, clsMeta *spec.C SSHPort: getHostPort(srv, ansCfg), Imported: true, Arch: "amd64", + OS: "linux", } if tmpIns.Host != srv.Name { tmpIns.Name = srv.Name // use alias as the name of PD @@ -318,6 +321,7 @@ func parseGroupVars(ctx context.Context, dir, ansCfgFile string, clsMeta *spec.C SSHPort: getHostPort(srv, ansCfg), Imported: true, Arch: "amd64", + OS: "linux", } if tcpPort, ok := grpVars["tcp_port"]; ok { @@ -396,6 +400,7 @@ func parseGroupVars(ctx context.Context, dir, ansCfgFile string, clsMeta *spec.C SSHPort: getHostPort(srv, ansCfg), Imported: true, Arch: "amd64", + OS: "linux", } if port, ok := grpVars["prometheus_port"]; ok { @@ -442,6 +447,7 @@ func parseGroupVars(ctx context.Context, dir, ansCfgFile string, clsMeta *spec.C SSHPort: getHostPort(srv, ansCfg), Imported: true, Arch: "amd64", + OS: "linux", } if port, ok := grpVars["alertmanager_port"]; ok { @@ -482,6 +488,7 @@ func parseGroupVars(ctx context.Context, dir, ansCfgFile string, clsMeta *spec.C SSHPort: getHostPort(srv, ansCfg), Imported: true, Arch: "amd64", + OS: "linux", } if port, ok := grpVars["grafana_port"]; ok { @@ -527,6 +534,7 @@ func parseGroupVars(ctx context.Context, dir, ansCfgFile string, clsMeta *spec.C SSHPort: getHostPort(srv, ansCfg), Imported: true, Arch: "amd64", + OS: "linux", } // nothing in pump_servers.yml @@ -569,6 +577,7 @@ func parseGroupVars(ctx context.Context, dir, ansCfgFile string, clsMeta *spec.C SSHPort: getHostPort(srv, ansCfg), Imported: true, Arch: "amd64", + OS: "linux", } // nothing in drainer_servers.yml diff --git a/pkg/cluster/manager/builder.go b/pkg/cluster/manager/builder.go index 1b9c84970a..cdf0b0d843 100644 --- a/pkg/cluster/manager/builder.go +++ b/pkg/cluster/manager/builder.go @@ -258,7 +258,6 @@ func buildScaleOutTask( // Deploy monitor relevant components to remote dlTasks, dpTasks, err := buildMonitoredDeployTask( m, - name, uninitializedHosts, noAgentHosts, topo.BaseTopo().GlobalOptions, @@ -407,7 +406,6 @@ type hostInfo struct { func buildMonitoredDeployTask( m *Manager, - name string, uniqueHosts map[string]hostInfo, // host -> ssh-port, os, arch noAgentHosts set.StringSet, // hosts that do not deploy monitor agents globalOptions *spec.GlobalOptions, diff --git a/pkg/cluster/manager/check.go b/pkg/cluster/manager/check.go index 633de511ec..5050595048 100644 --- a/pkg/cluster/manager/check.go +++ b/pkg/cluster/manager/check.go @@ -123,7 +123,7 @@ func (m *Manager) CheckCluster(clusterOrTopoName, scaleoutTopo string, opt Check } } - if err := m.fillHostArch(sshConnProps, sshProxyProps, &topo, &gOpt, opt.User); err != nil { + if err := m.fillHost(sshConnProps, sshProxyProps, &topo, &gOpt, opt.User); err != nil { return err } diff --git a/pkg/cluster/manager/deploy.go b/pkg/cluster/manager/deploy.go index 60e825c652..04381c5d8a 100644 --- a/pkg/cluster/manager/deploy.go +++ b/pkg/cluster/manager/deploy.go @@ -130,14 +130,7 @@ func (m *Manager) Deploy( } } - clusterList, err := m.specManager.GetAllClusters() - if err != nil { - return err - } - if err := spec.CheckClusterPortConflict(clusterList, name, topo); err != nil { - return err - } - if err := spec.CheckClusterDirConflict(clusterList, name, topo); err != nil { + if err := checkConflict(m, name, topo); err != nil { return err } @@ -157,7 +150,7 @@ func (m *Manager) Deploy( } } - if err := m.fillHostArch(sshConnProps, sshProxyProps, topo, &gOpt, opt.User); err != nil { + if err := m.fillHost(sshConnProps, sshProxyProps, topo, &gOpt, opt.User); err != nil { return err } @@ -325,7 +318,6 @@ func (m *Manager) Deploy( // Deploy monitor relevant components to remote dlTasks, dpTasks, err := buildMonitoredDeployTask( m, - name, uniqueHosts, noAgentHosts, globalOptions, diff --git a/pkg/cluster/manager/manager.go b/pkg/cluster/manager/manager.go index 569ac05ce4..3740a30ea0 100644 --- a/pkg/cluster/manager/manager.go +++ b/pkg/cluster/manager/manager.go @@ -166,42 +166,66 @@ func (m *Manager) sshTaskBuilder(name string, topo spec.Topology, user string, g ), nil } -func (m *Manager) fillHostArch(s, p *tui.SSHConnectionProps, topo spec.Topology, gOpt *operator.Options, user string) error { +// fillHost full host cpu-arch and kernel-name +func (m *Manager) fillHost(s, p *tui.SSHConnectionProps, topo spec.Topology, gOpt *operator.Options, user string) error { + if err := m.fillHostArchOrOS(s, p, topo, gOpt, user, spec.FullArchType); err != nil { + return err + } + + return m.fillHostArchOrOS(s, p, topo, gOpt, user, spec.FullOSType) +} + +// fillHostArchOrOS full host cpu-arch or kernel-name +func (m *Manager) fillHostArchOrOS(s, p *tui.SSHConnectionProps, topo spec.Topology, gOpt *operator.Options, user string, fullType spec.FullHostType) error { globalSSHType := topo.BaseTopo().GlobalOptions.SSHType - hostArch := map[string]string{} + hostArchOrOS := map[string]string{} var detectTasks []*task.StepDisplay + topo.IterInstance(func(inst spec.Instance) { - if inst.Arch() != "" { + if fullType == spec.FullOSType { + if inst.OS() != "" { + return + } + } else if inst.Arch() != "" { return } - if _, ok := hostArch[inst.GetHost()]; ok { + + if _, ok := hostArchOrOS[inst.GetHost()]; ok { return } - hostArch[inst.GetHost()] = "" - - tf := task.NewBuilder(m.logger). - RootSSH( - inst.GetHost(), - inst.GetSSHPort(), - user, - s.Password, - s.IdentityFile, - s.IdentityFilePassphrase, - gOpt.SSHTimeout, - gOpt.OptTimeout, - gOpt.SSHProxyHost, - gOpt.SSHProxyPort, - gOpt.SSHProxyUser, - p.Password, - p.IdentityFile, - p.IdentityFilePassphrase, - gOpt.SSHProxyTimeout, - gOpt.SSHType, - globalSSHType, - ). - Shell(inst.GetHost(), "uname -m", "", false). - BuildAsStep(fmt.Sprintf(" - Detecting node %s", inst.GetHost())) - detectTasks = append(detectTasks, tf) + hostArchOrOS[inst.GetHost()] = "" + + tf := task.NewSimpleUerSSH(m.logger, inst.GetHost(), inst.GetSSHPort(), user, *gOpt, p, globalSSHType) + if s.Password != "" || user == "root" { + tf = task.NewBuilder(m.logger). + RootSSH( + inst.GetHost(), + inst.GetSSHPort(), + user, + s.Password, + s.IdentityFile, + s.IdentityFilePassphrase, + gOpt.SSHTimeout, + gOpt.OptTimeout, + gOpt.SSHProxyHost, + gOpt.SSHProxyPort, + gOpt.SSHProxyUser, + p.Password, + p.IdentityFile, + p.IdentityFilePassphrase, + gOpt.SSHProxyTimeout, + gOpt.SSHType, + globalSSHType, + ) + } + + switch fullType { + case spec.FullOSType: + tf = tf.Shell(inst.GetHost(), "uname -s", "", false) + default: + tf = tf.Shell(inst.GetHost(), "uname -m", "", false) + } + detectTasks = append(detectTasks, tf.BuildAsStep(fmt.Sprintf(" - Detecting node %s %s info", inst.GetHost(), string(fullType)))) }) if len(detectTasks) == 0 { return nil @@ -213,19 +237,19 @@ func (m *Manager) fillHostArch(s, p *tui.SSHConnectionProps, topo spec.Topology, m.logger, ) t := task.NewBuilder(m.logger). - ParallelStep("+ Detect CPU Arch", false, detectTasks...). + ParallelStep(fmt.Sprintf("+ Detect CPU %s Name", string(fullType)), false, detectTasks...). Build() if err := t.Execute(ctx); err != nil { - return perrs.Annotate(err, "failed to fetch cpu arch") + return perrs.Annotate(err, "failed to fetch cpu-arch or kernel-name") } - for host := range hostArch { + for host := range hostArchOrOS { stdout, _, ok := ctxt.GetInner(ctx).GetOutputs(host) if !ok { return fmt.Errorf("no check results found for %s", host) } - hostArch[host] = strings.Trim(string(stdout), "\n") + hostArchOrOS[host] = strings.Trim(string(stdout), "\n") } - return topo.FillHostArch(hostArch) + return topo.FillHostArchOrOS(hostArchOrOS, fullType) } diff --git a/pkg/cluster/manager/scale_out.go b/pkg/cluster/manager/scale_out.go index fde9b118f4..6f8e224ca9 100644 --- a/pkg/cluster/manager/scale_out.go +++ b/pkg/cluster/manager/scale_out.go @@ -121,7 +121,7 @@ func (m *Manager) ScaleOut( } } - if err := m.fillHostArch(sshConnProps, sshProxyProps, newPart, &gOpt, opt.User); err != nil { + if err := m.fillHost(sshConnProps, sshProxyProps, newPart, &gOpt, opt.User); err != nil { return err } diff --git a/pkg/cluster/spec/spec.go b/pkg/cluster/spec/spec.go index f8b6370e87..9407dcde30 100644 --- a/pkg/cluster/spec/spec.go +++ b/pkg/cluster/spec/spec.go @@ -44,6 +44,16 @@ const ( promMetricStartTimeSeconds = "process_start_time_seconds" ) +// FullHostType is the type of fullhost operations +type FullHostType string + +const ( + // FullArchType cpu-arch type + FullArchType FullHostType = "Arch" + // FullOSType kernel-name + FullOSType FullHostType = "OS" +) + // general role names var ( RoleMonitor = "monitor" @@ -153,7 +163,7 @@ type Topology interface { CountDir(host string, dir string) int TLSConfig(dir string) (*tls.Config, error) Merge(that Topology) Topology - FillHostArch(hostArchmap map[string]string) error + FillHostArchOrOS(hostArchmap map[string]string, fullType FullHostType) error GetGrafanaConfig() map[string]string ScaleOutTopology @@ -628,10 +638,6 @@ func setCustomDefaults(globalOptions *GlobalOptions, field reflect.Value) error field.Field(j).Set(reflect.ValueOf(strings.ToLower(field.Field(j).String()))) } case "OS": - // default value of globalOptions.OS is already set, same as "Arch" - if field.Field(j).String() == "" { - field.Field(j).Set(reflect.ValueOf(globalOptions.OS)) - } // convert to lower case if field.Field(j).String() != "" { field.Field(j).Set(reflect.ValueOf(strings.ToLower(field.Field(j).String()))) @@ -806,25 +812,25 @@ func AlertManagerEndpoints(alertmanager []*AlertmanagerSpec, user string, enable return ends } -// FillHostArch fills the topology with the given host->arch -func (s *Specification) FillHostArch(hostArch map[string]string) error { - if err := FillHostArch(s, hostArch); err != nil { +// FillHostArchOrOS fills the topology with the given host->arch +func (s *Specification) FillHostArchOrOS(hostArch map[string]string, fullType FullHostType) error { + if err := FillHostArchOrOS(s, hostArch, fullType); err != nil { return err } return s.platformConflictsDetect() } -// FillHostArch fills the topology with the given host->arch -func FillHostArch(s interface{}, hostArch map[string]string) error { - for host, arch := range hostArch { +// FillHostArchOrOS fills the topology with the given host->arch +func FillHostArchOrOS(s interface{}, hostArchOrOS map[string]string, fullType FullHostType) error { + for host, arch := range hostArchOrOS { switch arch { case "x86_64": - hostArch[host] = "amd64" + hostArchOrOS[host] = "amd64" case "aarch64": - hostArch[host] = "arm64" + hostArchOrOS[host] = "arm64" default: - hostArch[host] = strings.ToLower(arch) + hostArchOrOS[host] = strings.ToLower(arch) } } @@ -837,7 +843,7 @@ func FillHostArch(s interface{}, hostArch map[string]string) error { continue } for j := 0; j < field.Len(); j++ { - if err := setHostArch(field.Index(j), hostArch); err != nil { + if err := setHostArchOrOS(field.Index(j), hostArchOrOS, fullType); err != nil { return err } } @@ -845,13 +851,13 @@ func FillHostArch(s interface{}, hostArch map[string]string) error { return nil } -func setHostArch(field reflect.Value, hostArch map[string]string) error { +func setHostArchOrOS(field reflect.Value, hostArchOrOS map[string]string, fullType FullHostType) error { if !field.CanSet() || isSkipField(field) { return nil } if field.Kind() == reflect.Ptr { - return setHostArch(field.Elem(), hostArch) + return setHostArchOrOS(field.Elem(), hostArchOrOS, fullType) } if field.Kind() != reflect.Struct { @@ -860,10 +866,17 @@ func setHostArch(field reflect.Value, hostArch map[string]string) error { host := field.FieldByName("Host") arch := field.FieldByName("Arch") + os := field.FieldByName("OS") // set arch only if not set before - if !host.IsZero() && arch.CanSet() && len(arch.String()) == 0 { - arch.Set(reflect.ValueOf(hostArch[host.String()])) + if fullType == FullOSType { + if !host.IsZero() && os.CanSet() && len(os.String()) == 0 { + os.Set(reflect.ValueOf(hostArchOrOS[host.String()])) + } + } else { + if !host.IsZero() && arch.CanSet() && len(arch.String()) == 0 { + arch.Set(reflect.ValueOf(hostArchOrOS[host.String()])) + } } return nil diff --git a/pkg/cluster/spec/spec_manager_test.go b/pkg/cluster/spec/spec_manager_test.go index f9f48cb392..254fc39afd 100644 --- a/pkg/cluster/spec/spec_manager_test.go +++ b/pkg/cluster/spec/spec_manager_test.go @@ -48,7 +48,7 @@ func (t *TestTopology) Merge(topo Topology) Topology { panic("not support") } -func (t *TestTopology) FillHostArch(hostArch map[string]string) error { +func (t *TestTopology) FillHostArchOrOS(hostArchOrOS map[string]string, fullType FullHostType) error { panic("not support") } diff --git a/pkg/cluster/spec/validate_test.go b/pkg/cluster/spec/validate_test.go index cdb41a5ab9..cc6b4db841 100644 --- a/pkg/cluster/spec/validate_test.go +++ b/pkg/cluster/spec/validate_test.go @@ -155,9 +155,11 @@ global: tidb_servers: - host: 172.16.5.138 arch: "arm64" + os: "linux" tikv_servers: - host: 172.16.5.138 arch: "arm64" + os: "linux" `), &topo) c.Assert(err, IsNil) @@ -169,9 +171,11 @@ global: tidb_servers: - host: 172.16.5.138 arch: "aarch64" + os: "linux" tikv_servers: - host: 172.16.5.138 arch: "amd64" + os: "linux" `), &topo) c.Assert(err, NotNil) c.Assert(err.Error(), Equals, "platform mismatch for '172.16.5.138' between 'tidb_servers:linux/arm64' and 'tikv_servers:linux/amd64'") @@ -189,6 +193,7 @@ tidb_servers: tikv_servers: - host: 172.16.5.138 arch: "arm64" + os: "linux" `), &topo) c.Assert(err, NotNil) c.Assert(err.Error(), Equals, "platform mismatch for '172.16.5.138' between 'tidb_servers:darwin/arm64' and 'tikv_servers:linux/arm64'")