diff --git a/pkg/cluster/operation/action.go b/pkg/cluster/operation/action.go index 95f033e26d..6591529bae 100644 --- a/pkg/cluster/operation/action.go +++ b/pkg/cluster/operation/action.go @@ -255,6 +255,16 @@ func StopMonitored(ctx context.Context, hosts []string, noAgentHosts set.StringS return systemctlMonitor(ctx, hosts, noAgentHosts, options, "stop", timeout) } +// RestartMonitored stop BlackboxExporter and NodeExporter +func RestartMonitored(ctx context.Context, hosts []string, noAgentHosts set.StringSet, options *spec.MonitoredOptions, timeout uint64) error { + err := StopMonitored(ctx, hosts, noAgentHosts, options, timeout) + if err != nil { + return err + } + + return StartMonitored(ctx, hosts, noAgentHosts, options, timeout) +} + // EnableMonitored enable/disable monitor service in a cluster func EnableMonitored(ctx context.Context, hosts []string, noAgentHosts set.StringSet, options *spec.MonitoredOptions, timeout uint64, isEnable bool) error { action := "disable" diff --git a/pkg/cluster/operation/upgrade.go b/pkg/cluster/operation/upgrade.go index cf6cf20d6e..1f0c962fe7 100644 --- a/pkg/cluster/operation/upgrade.go +++ b/pkg/cluster/operation/upgrade.go @@ -51,12 +51,14 @@ func Upgrade( components = FilterComponent(components, roleFilter) logger := ctx.Value(logprinter.ContextKeyLogger).(*logprinter.Logger) + noAgentHosts := set.NewStringSet() + uniqueHosts := set.NewStringSet() + for _, component := range components { instances := FilterInstance(component.Instances(), nodeFilter) if len(instances) < 1 { continue } - logger.Infof("Upgrading component %s", component.Name()) // perform pre-upgrade actions of component @@ -102,6 +104,11 @@ func Upgrade( deferInstances := make([]spec.Instance, 0) for _, instance := range instances { + // monitors + uniqueHosts.Insert(instance.GetHost()) + if instance.IgnoreMonitorAgent() { + noAgentHosts.Insert(instance.GetHost()) + } switch component.Name() { case spec.ComponentPD: // defer PD leader to be upgraded after others @@ -132,7 +139,11 @@ func Upgrade( } } - return nil + if topo.GetMonitoredOptions() == nil { + return nil + } + + return RestartMonitored(ctx, uniqueHosts.Slice(), noAgentHosts, topo.GetMonitoredOptions(), options.OptTimeout) } func upgradeInstance(ctx context.Context, topo spec.Topology, instance spec.Instance, options Options, tlsCfg *tls.Config) (err error) {