diff --git a/pkg/common/jobcontroller/jobcontroller.go b/pkg/common/jobcontroller/jobcontroller.go index e39d3c6b79..414b9bc921 100644 --- a/pkg/common/jobcontroller/jobcontroller.go +++ b/pkg/common/jobcontroller/jobcontroller.go @@ -212,7 +212,8 @@ func (jc *JobController) SyncPodGroup(job metav1.Object, minAvailableReplicas in kubeBatchClientInterface := jc.KubeBatchClientSet // Check whether podGroup exists or not - podGroup, err := kubeBatchClientInterface.SchedulingV1alpha1().PodGroups(job.GetNamespace()).Get(job.GetName(), metav1.GetOptions{}) + podGroupName := GenPodGroupName(job.GetName()) + podGroup, err := kubeBatchClientInterface.SchedulingV1alpha1().PodGroups(job.GetNamespace()).Get(podGroupName, metav1.GetOptions{}) if err == nil { return podGroup, nil } @@ -221,7 +222,7 @@ func (jc *JobController) SyncPodGroup(job metav1.Object, minAvailableReplicas in minAvailable := intstr.FromInt(int(minAvailableReplicas)) createPodGroup := &v1alpha1.PodGroup{ ObjectMeta: metav1.ObjectMeta{ - Name: job.GetName(), + Name: podGroupName, OwnerReferences: []metav1.OwnerReference{ *jc.GenOwnerReference(job), }, diff --git a/pkg/common/jobcontroller/util.go b/pkg/common/jobcontroller/util.go index 511660d07c..4c87a1db49 100644 --- a/pkg/common/jobcontroller/util.go +++ b/pkg/common/jobcontroller/util.go @@ -50,3 +50,8 @@ func GenExpectationPodsKey(jobKey, replicaType string) string { func GenExpectationServicesKey(jobKey, replicaType string) string { return jobKey + "/" + strings.ToLower(replicaType) + "/services" } + +// Gen PodGroupName for kube-batch, which is used for crd podGroup and annotation in pod +func GenPodGroupName(jobName string) string { + return jobName +} diff --git a/pkg/controller.v1/tensorflow/pod.go b/pkg/controller.v1/tensorflow/pod.go index 219774633c..448322c0ab 100644 --- a/pkg/controller.v1/tensorflow/pod.go +++ b/pkg/controller.v1/tensorflow/pod.go @@ -196,8 +196,7 @@ func (tc *TFController) createNewPod(tfjob *tfv1.TFJob, rt, index string, spec * if podTemplate.Annotations == nil { podTemplate.Annotations = map[string]string{} } - // we create the podGroup with the same name as the tfjob - podTemplate.Annotations["scheduling.k8s.io/group-name"] = tfjob.Name + podTemplate.Annotations["scheduling.k8s.io/group-name"] = jobcontroller.GenPodGroupName(tfjob.Name) } err = tc.PodControl.CreatePodsWithControllerRef(tfjob.Namespace, podTemplate, tfjob, controllerRef)