From d26d39ddec284e6b7e4e80116aaf5068bbf1e29e Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Thu, 11 Nov 2021 16:50:48 +0800 Subject: [PATCH] support queued before scheduled --- pkg/apis/types/submit.go | 3 +++ pkg/apis/types/training.go | 2 ++ pkg/argsbuilder/const.go | 2 ++ pkg/argsbuilder/submit.go | 16 ++++++++++++++++ 4 files changed, 23 insertions(+) diff --git a/pkg/apis/types/submit.go b/pkg/apis/types/submit.go index 1b8aa9c1b..b5300dc04 100644 --- a/pkg/apis/types/submit.go +++ b/pkg/apis/types/submit.go @@ -73,6 +73,9 @@ type CommonSubmitArgs struct { // EnableRDMA enable rdma or not,match option --rdma EnableRDMA bool `yaml:"enableRDMA"` + // EnableQueue enables the feature to queue jobs after they are scheduled. + EnableQueue bool `yaml:"enableQueue"` + // UseENI defines using eni or not UseENI bool `yaml:"useENI"` diff --git a/pkg/apis/types/training.go b/pkg/apis/types/training.go index a68fbe2d9..48802c215 100644 --- a/pkg/apis/types/training.go +++ b/pkg/apis/types/training.go @@ -131,6 +131,8 @@ type TrainingJobInfo struct { type TrainingJobStatus string const ( + // TrainingJobQueuing means the job is queuing + TrainingJobQueuing TrainingJobStatus = "QUEUING" // TrainingJobPending means the job is pending TrainingJobPending TrainingJobStatus = "PENDING" // TrainingJobRunning means the job is running diff --git a/pkg/argsbuilder/const.go b/pkg/argsbuilder/const.go index 113765eac..7c7e5a3c7 100644 --- a/pkg/argsbuilder/const.go +++ b/pkg/argsbuilder/const.go @@ -7,4 +7,6 @@ const ( gangSchdName = "kube-batch" aliyunENIAnnotation = "k8s.aliyun.com/eni" + + jobSuspend = "scheduling.x-k8s.io/suspend" ) diff --git a/pkg/argsbuilder/submit.go b/pkg/argsbuilder/submit.go index b980b2535..ac38b7e7d 100644 --- a/pkg/argsbuilder/submit.go +++ b/pkg/argsbuilder/submit.go @@ -120,6 +120,8 @@ func (s *SubmitArgsBuilder) AddCommandFlags(command *cobra.Command) { command.Flags().BoolVar(&s.args.Coscheduling, "gang", false, "enable gang scheduling") // use priority command.Flags().StringVarP(&s.args.PriorityClassName, "priority", "p", "", "priority class name") + // enable Queue + command.Flags().BoolVar(&s.args.EnableQueue, "queue", false, "enables the feature to queue jobs after they are scheduled (Kube-queue needs to be pre-installed https://github.com/kube-queue/kube-queue)") // add option --toleration,its' value will be get from viper command.Flags().StringArrayVar(&tolerations, "toleration", []string{}, `tolerate some k8s nodes with taints,usage: "--toleration taint-key" or "--toleration all" `) // add option --selector,its' value will be get from viper @@ -174,6 +176,9 @@ func (s *SubmitArgsBuilder) Build() error { if err := s.setAnnotations(); err != nil { return err } + if err := s.setQueue(); err != nil { + return err + } if err := s.setLabels(); err != nil { return err } @@ -323,6 +328,17 @@ func (s *SubmitArgsBuilder) setAnnotations() error { return nil } +// setQueue is used to add annotation for suspend status +func (s *SubmitArgsBuilder) setQueue() error { + if s.args.EnableQueue { + if s.args.Annotations == nil { + s.args.Annotations = map[string]string{} + } + s.args.Annotations[jobSuspend] = "true" + } + return nil +} + // setAnnotations is used to handle option --annotation func (s *SubmitArgsBuilder) setLabels() error { if s.args.Labels == nil {