Skip to content

Commit

Permalink
cluster: start pd,dm-master in sequentially (#1262)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsvisa authored and AstroProfundis committed Apr 6, 2021
1 parent 969476b commit dd953aa
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pkg/cluster/manager/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ func buildScaleOutTask(
return m.specManager.SaveMeta(name, metadata)
}).
Func("StartCluster", func(ctx context.Context) error {
return operator.Start(ctx, newPart, operator.Options{OptTimeout: gOpt.OptTimeout}, tlsCfg)
return operator.Start(ctx, newPart, operator.Options{OptTimeout: gOpt.OptTimeout, Operation: operator.ScaleOutOperation}, tlsCfg)
}).
Parallel(false, refreshConfigTasks...).
Parallel(false, buildReloadPromTasks(metadata.GetTopology())...)
Expand Down
20 changes: 20 additions & 0 deletions pkg/cluster/operation/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,14 @@ func StartComponent(ctx context.Context, instances []spec.Instance, options Opti
name := instances[0].ComponentName()
log.Infof("Starting component %s", name)

// start instances in serial for Raft related components
// eg: PD has more strict restrictions on the capacity expansion process,
// that is, there should be only one node in the peer-join stage at most
// ref https://github.com/tikv/pd/blob/d38b36714ccee70480c39e07126e3456b5fb292d/server/join/join.go#L179-L191
if options.Operation == ScaleOutOperation && (name == spec.ComponentPD || name == spec.ComponentDMMaster) {
return serialStartInstances(ctx, instances, options, tlsCfg)
}

errg, _ := errgroup.WithContext(ctx)

for _, ins := range instances {
Expand All @@ -484,6 +492,18 @@ func StartComponent(ctx context.Context, instances []spec.Instance, options Opti
return errg.Wait()
}

func serialStartInstances(ctx context.Context, instances []spec.Instance, options Options, tlsCfg *tls.Config) error {
for _, ins := range instances {
if err := ins.PrepareStart(ctx, tlsCfg); err != nil {
return err
}
if err := startInstance(ctx, ins, options.OptTimeout); err != nil {
return err
}
}
return nil
}

// StopMonitored stop BlackboxExporter and NodeExporter
func StopMonitored(ctx context.Context, instance spec.Instance, options *spec.MonitoredOptions, timeout uint64) error {
ports := map[string]int{
Expand Down
1 change: 1 addition & 0 deletions pkg/cluster/operation/operation.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ type Options struct {
// Some data will be retained when destroying instances
RetainDataRoles []string
RetainDataNodes []string
Operation Operation
}

// Operation represents the type of cluster operation
Expand Down

0 comments on commit dd953aa

Please sign in to comment.