From 036c760536ac067c1c30310dc93510e2e6f362f0 Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Sun, 20 Jan 2019 00:40:53 -0500 Subject: [PATCH 1/2] Move processing unit specific flags to MPIJobSpec --- cmd/mpi-operator/main.go | 6 ++--- deploy/0-crd.yaml | 26 ++++++++++++++++++- pkg/apis/kubeflow/v1alpha1/types.go | 19 +++++++++++++- .../v1alpha1/zz_generated.deepcopy.go | 10 +++++++ pkg/controllers/mpi_job_controller.go | 12 +++++++++ 5 files changed, 68 insertions(+), 5 deletions(-) diff --git a/cmd/mpi-operator/main.go b/cmd/mpi-operator/main.go index 07aaed40..604277b7 100644 --- a/cmd/mpi-operator/main.go +++ b/cmd/mpi-operator/main.go @@ -99,13 +99,13 @@ func init() { &gpusPerNode, "gpus-per-node", 1, - "The maximum number of GPUs available per node. Note that this will be ignored if the GPU resources are explicitly specified in the MPIJob pod spec.") + "(Deprecated. This will be overwritten by MPIJobSpec) The maximum number of GPUs available per node. Note that this will be ignored if the GPU resources are explicitly specified in the MPIJob pod spec.") flag.StringVar(&kubectlDeliveryImage, "kubectl-delivery-image", "", "The container image used to deliver the kubectl binary.") flag.StringVar(&namespace, "namespace", "", "The namespace used to obtain the listers.") flag.IntVar( &processingUnitsPerNode, "processing-units-per-node", 1, - "The maximum number of processing units available per node. Note that this will be ignored if the processing resources are explicitly specified in the MPIJob pod spec.") - flag.StringVar(&processingResourceType, "processing-resource-type", "nvidia.com/gpu", "The compute resource name, e.g. 'nvidia.com/gpu' or 'cpu'.") + "(Deprecated. This will be overwritten by MPIJobSpec) The maximum number of processing units available per node. Note that this will be ignored if the processing resources are explicitly specified in the MPIJob pod spec.") + flag.StringVar(&processingResourceType, "processing-resource-type", "nvidia.com/gpu", "(Deprecated. This will be overwritten by MPIJobSpec) The compute resource name, e.g. 'nvidia.com/gpu' or 'cpu'.") } diff --git a/deploy/0-crd.yaml b/deploy/0-crd.yaml index b0b79943..aed8bc61 100644 --- a/deploy/0-crd.yaml +++ b/deploy/0-crd.yaml @@ -38,6 +38,11 @@ spec: description: Defaults to the number of processing units per worker type: integer minimum: 1 + gpusPerNode: + title: The maximum number of GPUs available per node + description: Defaults to the number of GPUs per worker + type: integer + minimum: 1 required: - gpus - properties: @@ -58,12 +63,24 @@ spec: description: Defaults to the number of processing units per worker type: integer minimum: 1 + processingUnitsPerNode: + title: The maximum number of processing units available per node + description: Defaults to the number of processing units per worker + type: integer + minimum: 1 + processingResourceType: + title: The processing resource type, e.g. 'nvidia.com/gpu' or 'cpu' + description: Defaults to 'nvidia.com/gpu' + type: string + enum: + - nvidia.com/gpu + - cpu required: - processingUnits - properties: replicas: title: Total number of replicas - description: The GPU resource limit should be specified for each replica + description: The processing resource limit should be specified for each replica type: integer minimum: 1 slotsPerWorker: @@ -71,5 +88,12 @@ spec: description: Defaults to the number of processing units per worker type: integer minimum: 1 + processingResourceType: + title: The processing resource type, e.g. 'nvidia.com/gpu' or 'cpu' + description: Defaults to 'nvidia.com/gpu' + type: string + enum: + - nvidia.com/gpu + - cpu required: - replicas diff --git a/pkg/apis/kubeflow/v1alpha1/types.go b/pkg/apis/kubeflow/v1alpha1/types.go index e90f14e4..b3f9ad7d 100644 --- a/pkg/apis/kubeflow/v1alpha1/types.go +++ b/pkg/apis/kubeflow/v1alpha1/types.go @@ -44,11 +44,28 @@ type MPIJobSpec struct { // +optional GPUs *int32 `json:"gpus,omitempty"` + // The maximum number of GPUs available per node. + // Note that this will be ignored if the GPU resources are explicitly + // specified in the MPIJob pod spec. + // This is deprecated in favor of `ProcessingUnitsPerNode` field. + GPUsPerNode *int32 `json:"gpusPerNode,omitempty"` + // Specifies the desired number of processing units the MPIJob should run on. // Mutually exclusive with the `Replicas` field. // +optional ProcessingUnits *int32 `json:"processingUnits,omitempty"` + // The maximum number of processing units available per node. + // Note that this will be ignored if the processing resources are explicitly + // specified in the MPIJob pod spec. + // +optional + ProcessingUnitsPerNode *int32 `json:"processingUnitsPerNode,omitempty"` + + // The processing resource type, e.g. 'nvidia.com/gpu' or 'cpu'. + // Defaults to 'nvidia.com/gpu' + // +optional + ProcessingResourceType string `json:"processingResourceType,omitempty"` + // Specifies the number of slots per worker used in hostfile. // Defaults to the number of processing units per worker. // +optional @@ -71,7 +88,7 @@ type MPIJobSpec struct { ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty"` // Specifies the desired number of replicas the MPIJob should run on. - // The `PodSpec` should specify the number of GPUs. + // The `PodSpec` should specify the number of processing units. // Mutually exclusive with the `GPUs` or `ProcessingUnits` fields. // +optional Replicas *int32 `json:"replicas,omitempty"` diff --git a/pkg/apis/kubeflow/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/kubeflow/v1alpha1/zz_generated.deepcopy.go index d37ad64d..7d90c550 100644 --- a/pkg/apis/kubeflow/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/kubeflow/v1alpha1/zz_generated.deepcopy.go @@ -91,11 +91,21 @@ func (in *MPIJobSpec) DeepCopyInto(out *MPIJobSpec) { *out = new(int32) **out = **in } + if in.GPUsPerNode != nil { + in, out := &in.GPUsPerNode, &out.GPUsPerNode + *out = new(int32) + **out = **in + } if in.ProcessingUnits != nil { in, out := &in.ProcessingUnits, &out.ProcessingUnits *out = new(int32) **out = **in } + if in.ProcessingUnitsPerNode != nil { + in, out := &in.ProcessingUnitsPerNode, &out.ProcessingUnitsPerNode + *out = new(int32) + **out = **in + } if in.SlotsPerWorker != nil { in, out := &in.SlotsPerWorker, &out.SlotsPerWorker *out = new(int32) diff --git a/pkg/controllers/mpi_job_controller.go b/pkg/controllers/mpi_job_controller.go index 7b06688e..b27310fd 100644 --- a/pkg/controllers/mpi_job_controller.go +++ b/pkg/controllers/mpi_job_controller.go @@ -415,6 +415,18 @@ func (c *MPIJobController) syncHandler(key string) error { // We're done if the launcher either succeeded or failed. done := launcher != nil && (launcher.Status.Succeeded == 1 || launcher.Status.Failed == 1) + // TODO (terrytangyuan): Remove these flags from main.go for next major release + // and update deploy/*.yaml + if mpiJob.Spec.GPUsPerNode != nil { + c.gpusPerNode = int(*mpiJob.Spec.GPUsPerNode) + } + if mpiJob.Spec.ProcessingUnitsPerNode != nil { + c.processingUnitsPerNode = int(*mpiJob.Spec.ProcessingUnitsPerNode) + } + if mpiJob.Spec.ProcessingResourceType != "" { + c.processingResourceType = mpiJob.Spec.ProcessingResourceType + } + workerReplicas, processingUnitsPerWorker, err := allocateProcessingUnits(mpiJob, c.gpusPerNode, c.processingUnitsPerNode, c.processingResourceType, done) if err != nil { runtime.HandleError(err) From e7f376a0c89960ae8020316e58a09b0c0bbe9d28 Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Sun, 20 Jan 2019 22:03:05 -0500 Subject: [PATCH 2/2] Use variable to hold fields instead of modifying controller state --- pkg/controllers/mpi_job_controller.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pkg/controllers/mpi_job_controller.go b/pkg/controllers/mpi_job_controller.go index b27310fd..d12fd3f8 100644 --- a/pkg/controllers/mpi_job_controller.go +++ b/pkg/controllers/mpi_job_controller.go @@ -417,17 +417,20 @@ func (c *MPIJobController) syncHandler(key string) error { // TODO (terrytangyuan): Remove these flags from main.go for next major release // and update deploy/*.yaml + var gpusPerNode = c.gpusPerNode + var processingUnitsPerNode = c.processingUnitsPerNode + var processingResourceType = c.processingResourceType if mpiJob.Spec.GPUsPerNode != nil { - c.gpusPerNode = int(*mpiJob.Spec.GPUsPerNode) + gpusPerNode = int(*mpiJob.Spec.GPUsPerNode) } if mpiJob.Spec.ProcessingUnitsPerNode != nil { - c.processingUnitsPerNode = int(*mpiJob.Spec.ProcessingUnitsPerNode) + processingUnitsPerNode = int(*mpiJob.Spec.ProcessingUnitsPerNode) } if mpiJob.Spec.ProcessingResourceType != "" { - c.processingResourceType = mpiJob.Spec.ProcessingResourceType + processingResourceType = mpiJob.Spec.ProcessingResourceType } - workerReplicas, processingUnitsPerWorker, err := allocateProcessingUnits(mpiJob, c.gpusPerNode, c.processingUnitsPerNode, c.processingResourceType, done) + workerReplicas, processingUnitsPerWorker, err := allocateProcessingUnits(mpiJob, gpusPerNode, processingUnitsPerNode, processingResourceType, done) if err != nil { runtime.HandleError(err) return nil @@ -455,7 +458,7 @@ func (c *MPIJobController) syncHandler(key string) error { } } - worker, err := c.getOrCreateWorkerStatefulSet(mpiJob, workerReplicas, processingUnitsPerWorker, c.processingResourceType) + worker, err := c.getOrCreateWorkerStatefulSet(mpiJob, workerReplicas, processingUnitsPerWorker, processingResourceType) if err != nil { return err }