From 6cfc1ddac8fa636016dc1dd23b2911536873308a Mon Sep 17 00:00:00 2001 From: "bert.li" Date: Thu, 14 Oct 2021 15:10:25 +0800 Subject: [PATCH 1/2] [fix]fix set scheduler error Signed-off-by: bert.li --- pkg/controller.v1/tensorflow/tfjob_controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller.v1/tensorflow/tfjob_controller.go b/pkg/controller.v1/tensorflow/tfjob_controller.go index fa3fce0c6e..614d275a2c 100644 --- a/pkg/controller.v1/tensorflow/tfjob_controller.go +++ b/pkg/controller.v1/tensorflow/tfjob_controller.go @@ -824,7 +824,7 @@ func (r *TFJobReconciler) createNewPod(tfjob *tfv1.TFJob, rt, index string, spec // 1. if user has specified other scheduler, we report a warning without overriding any fields. // 2. if no SchedulerName is set for pods, then we set the SchedulerName to "volcano". if r.Config.EnableGangScheduling { - if !util.IsGangSchedulerSet(replicas, gangSchedulerName) { + if util.IsGangSchedulerSet(replicas, gangSchedulerName) { errMsg := "Another scheduler is specified when gang-scheduling is enabled and it will not be overwritten" logger.Warning(errMsg) r.Recorder.Event(tfjob, v1.EventTypeWarning, podTemplateSchedulerNameReason, errMsg) From 7fbe431b040869615976dcc36dc25afefe5cbdc7 Mon Sep 17 00:00:00 2001 From: "bert.li" Date: Thu, 4 Nov 2021 20:27:11 +0800 Subject: [PATCH 2/2] refactor gangScheduler set Signed-off-by: bert.li --- pkg/common/util/util.go | 8 ++++++++ pkg/controller.v1/tensorflow/tfjob_controller.go | 7 ++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pkg/common/util/util.go b/pkg/common/util/util.go index 28150c5f90..f635f48f4b 100644 --- a/pkg/common/util/util.go +++ b/pkg/common/util/util.go @@ -50,3 +50,11 @@ func GetReplicaTypes(specs map[commonv1.ReplicaType]*commonv1.ReplicaSpec) []com } return keys } +func GetSchedulerName(replicas map[commonv1.ReplicaType]*commonv1.ReplicaSpec) string { + for _, spec := range replicas { + if len(spec.Template.Spec.SchedulerName) > 0 { + return spec.Template.Spec.SchedulerName + } + } + return "" +} diff --git a/pkg/controller.v1/tensorflow/tfjob_controller.go b/pkg/controller.v1/tensorflow/tfjob_controller.go index 614d275a2c..45f5055311 100644 --- a/pkg/controller.v1/tensorflow/tfjob_controller.go +++ b/pkg/controller.v1/tensorflow/tfjob_controller.go @@ -824,12 +824,13 @@ func (r *TFJobReconciler) createNewPod(tfjob *tfv1.TFJob, rt, index string, spec // 1. if user has specified other scheduler, we report a warning without overriding any fields. // 2. if no SchedulerName is set for pods, then we set the SchedulerName to "volcano". if r.Config.EnableGangScheduling { - if util.IsGangSchedulerSet(replicas, gangSchedulerName) { + podSchedulerName := util.GetSchedulerName(replicas) + if len(podSchedulerName) == 0 { + podTemplate.Spec.SchedulerName = gangSchedulerName + } else if strings.Compare(podSchedulerName, gangSchedulerName) != 0 { errMsg := "Another scheduler is specified when gang-scheduling is enabled and it will not be overwritten" logger.Warning(errMsg) r.Recorder.Event(tfjob, v1.EventTypeWarning, podTemplateSchedulerNameReason, errMsg) - } else { - podTemplate.Spec.SchedulerName = gangSchedulerName } if podTemplate.Annotations == nil {