diff --git a/pkg/apis/kubeflow.org/v1/mxnet_validation.go b/pkg/apis/kubeflow.org/v1/mxnet_validation.go index b88e3e2dac..943678af40 100644 --- a/pkg/apis/kubeflow.org/v1/mxnet_validation.go +++ b/pkg/apis/kubeflow.org/v1/mxnet_validation.go @@ -16,14 +16,21 @@ package v1 import ( "fmt" - commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" + commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" log "github.com/sirupsen/logrus" + apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation" ) -// ValidateV1MXJobSpec checks that the kubeflowv1.MXJobSpec is valid. -func ValidateV1MXJobSpec(c *MXJobSpec) error { - return validateMXNetReplicaSpecs(c.MXReplicaSpecs) +// ValidateV1MXJob checks that the kubeflowv1.MXJobSpec is valid. +func ValidateV1MXJob(mxJob *MXJob) error { + if errors := apimachineryvalidation.NameIsDNS1035Label(mxJob.ObjectMeta.Name, false); errors != nil { + return fmt.Errorf("MXJob name is invalid: %v", errors) + } + if err := validateMXReplicaSpecs(mxJob.Spec.MXReplicaSpecs); err != nil { + return err + } + return nil } // IsScheduler returns true if the type is Scheduler. @@ -31,7 +38,7 @@ func IsScheduler(typ commonv1.ReplicaType) bool { return typ == MXJobReplicaTypeScheduler } -func validateMXNetReplicaSpecs(specs map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error { +func validateMXReplicaSpecs(specs map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error { if specs == nil { return fmt.Errorf("MXJobSpec is not valid") } diff --git a/pkg/apis/kubeflow.org/v1/mxnet_validation_test.go b/pkg/apis/kubeflow.org/v1/mxnet_validation_test.go index 94ea5244cd..addfd9273a 100644 --- a/pkg/apis/kubeflow.org/v1/mxnet_validation_test.go +++ b/pkg/apis/kubeflow.org/v1/mxnet_validation_test.go @@ -18,72 +18,176 @@ import ( "testing" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" - v1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" ) -func TestValidateV1MXJobSpec(t *testing.T) { - testCases := []MXJobSpec{ - { - MXReplicaSpecs: nil, +func TestValidateV1MXJob(t *testing.T) { + validMXReplicaSpecs := map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + MXJobReplicaTypeScheduler: { + Replicas: pointer.Int32(1), + RestartPolicy: commonv1.RestartPolicyNever, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "mxnet", + Image: "mxjob/mxnet", + }}, + }, + }, + }, + MXJobReplicaTypeServer: { + Replicas: pointer.Int32(1), + RestartPolicy: commonv1.RestartPolicyNever, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "mxnet", + Image: "mxjob/mxnet", + }}, + }, + }, }, - { - MXReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - MXJobReplicaTypeWorker: &commonv1.ReplicaSpec{ - Template: v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - Containers: []v1.Container{}, + MXJobReplicaTypeWorker: { + Replicas: pointer.Int32(1), + RestartPolicy: commonv1.RestartPolicyNever, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "mxnet", + Image: "mxjob/mxnet", + Command: []string{"python"}, + Args: []string{ + "/incubator-mxnet/example/image-classification/train_mnist.py", + "--num-epochs=10", + "--num-layers=2", + "--kv-store=dist_device_sync", }, - }, + }}, + }, + }, + }, + } + + testCases := map[string]struct { + MXJob *MXJob + wantErr bool + }{ + "valid mxJob": { + MXJob: &MXJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: MXJobSpec{ + MXReplicaSpecs: validMXReplicaSpecs, + }, + }, + wantErr: false, + }, + "mxReplicaSpecs is nil": { + MXJob: &MXJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", }, }, + wantErr: true, }, - { - MXReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - MXJobReplicaTypeWorker: &commonv1.ReplicaSpec{ - Template: v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - Containers: []v1.Container{ - v1.Container{ - Image: "", + "mxJob name does not meet DNS1035": { + MXJob: &MXJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "10test", + }, + Spec: MXJobSpec{ + MXReplicaSpecs: validMXReplicaSpecs, + }, + }, + wantErr: true, + }, + "no containers": { + MXJob: &MXJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: MXJobSpec{ + MXReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + MXJobReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{}, }, }, }, }, }, }, + wantErr: true, }, - { - MXReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - MXJobReplicaTypeWorker: &commonv1.ReplicaSpec{ - Template: v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - Containers: []v1.Container{ - v1.Container{ - Name: "", - Image: "mxjob/mxnet:gpu", + "image is empty": { + MXJob: &MXJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: MXJobSpec{ + MXReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + MXJobReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "mxnet", + Image: "", + }}, }, }, }, }, }, }, + wantErr: true, }, - { - MXReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - MXJobReplicaTypeScheduler: &commonv1.ReplicaSpec{ - Template: v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - Containers: []v1.Container{}, + "mxnet default container name doesn't find": { + MXJob: &MXJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: MXJobSpec{ + MXReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + MXJobReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "", + Image: "mxjob/mxnet:gpu", + }}, + }, + }, }, }, }, }, + wantErr: true, + }, + "replicaSpec is nil": { + MXJob: &MXJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: MXJobSpec{ + MXReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + MXJobReplicaTypeScheduler: nil, + }, + }, + }, + wantErr: true, }, } - for _, c := range testCases { - err := ValidateV1MXJobSpec(&c) - if err.Error() != "MXJobSpec is not valid" { - t.Error("Failed validate the alpha2.MXJobSpec") - } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + got := ValidateV1MXJob(tc.MXJob) + if (got != nil) != tc.wantErr { + t.Fatalf("ValidateV1MXJob() error = %v, wantErr %v", got, tc.wantErr) + } + }) } } diff --git a/pkg/apis/kubeflow.org/v1/paddlepaddle_validation.go b/pkg/apis/kubeflow.org/v1/paddlepaddle_validation.go index 8969bda032..57310710ce 100644 --- a/pkg/apis/kubeflow.org/v1/paddlepaddle_validation.go +++ b/pkg/apis/kubeflow.org/v1/paddlepaddle_validation.go @@ -18,13 +18,24 @@ import ( "fmt" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" + apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation" ) -func ValidateV1PaddleJobSpec(c *PaddleJobSpec) error { - if c.PaddleReplicaSpecs == nil { +func ValidateV1PaddleJob(paddleJob *PaddleJob) error { + if errors := apimachineryvalidation.NameIsDNS1035Label(paddleJob.ObjectMeta.Name, false); errors != nil { + return fmt.Errorf("PaddleJob name is invalid: %v", errors) + } + if err := validatePaddleReplicaSpecs(paddleJob.Spec.PaddleReplicaSpecs); err != nil { + return err + } + return nil +} + +func validatePaddleReplicaSpecs(specs map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error { + if specs == nil { return fmt.Errorf("PaddleJobSpec is not valid") } - for rType, value := range c.PaddleReplicaSpecs { + for rType, value := range specs { if value == nil || len(value.Template.Spec.Containers) == 0 { return fmt.Errorf("PaddleJobSpec is not valid: containers definition expected in %v", rType) } @@ -63,5 +74,4 @@ func ValidateV1PaddleJobSpec(c *PaddleJobSpec) error { } return nil - } diff --git a/pkg/apis/kubeflow.org/v1/paddlepaddle_validation_test.go b/pkg/apis/kubeflow.org/v1/paddlepaddle_validation_test.go index a09002582e..cdc06d8bb9 100644 --- a/pkg/apis/kubeflow.org/v1/paddlepaddle_validation_test.go +++ b/pkg/apis/kubeflow.org/v1/paddlepaddle_validation_test.go @@ -18,61 +18,150 @@ import ( "testing" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" - v1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" ) -func TestValidateV1PaddleJobSpec(t *testing.T) { - testCases := []PaddleJobSpec{ - { - PaddleReplicaSpecs: nil, +func TestValidateV1PaddleJob(t *testing.T) { + validPaddleReplicaSpecs := map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + PaddleJobReplicaTypeWorker: { + Replicas: pointer.Int32(2), + RestartPolicy: commonv1.RestartPolicyNever, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "paddle", + Image: "registry.baidubce.com/paddlepaddle/paddle:2.4.0rc0-cpu", + Command: []string{"python"}, + Args: []string{ + "-m", + "paddle.distributed.launch", + "run_check", + }, + Ports: []corev1.ContainerPort{{ + Name: "master", + ContainerPort: int32(37777), + }}, + ImagePullPolicy: corev1.PullAlways, + }}, + }, + }, + }, + } + + testCases := map[string]struct { + paddleJob *PaddleJob + wantErr bool + }{ + "valid paddleJob": { + paddleJob: &PaddleJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: PaddleJobSpec{ + PaddleReplicaSpecs: validPaddleReplicaSpecs, + }, + }, + wantErr: false, }, - { - PaddleReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - PaddleJobReplicaTypeWorker: { - Template: v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - Containers: []v1.Container{}, + "paddleJob name does not meet DNS1035": { + paddleJob: &PaddleJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "__test", + }, + Spec: PaddleJobSpec{ + PaddleReplicaSpecs: validPaddleReplicaSpecs, + }, + }, + wantErr: true, + }, + "no containers": { + paddleJob: &PaddleJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: PaddleJobSpec{ + PaddleReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + PaddleJobReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{}, + }, + }, }, }, }, }, + wantErr: true, }, - { - PaddleReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - PaddleJobReplicaTypeWorker: { - Template: v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Image: "", + "image is empty": { + paddleJob: &PaddleJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: PaddleJobSpec{ + PaddleReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + PaddleJobReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "paddle", + Image: "", + }, + }, }, }, }, }, }, }, + wantErr: true, }, - { - PaddleReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - PaddleJobReplicaTypeWorker: { - Template: v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Name: "", - Image: "gcr.io/kubeflow-ci/paddle-dist-mnist_test:1.0", + "paddle default container name doesn't find": { + paddleJob: &PaddleJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: PaddleJobSpec{ + PaddleReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + PaddleJobReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "", + Image: "gcr.io/kubeflow-ci/paddle-dist-mnist_test:1.0", + }, + }, }, }, }, }, }, }, + wantErr: true, + }, + "replicaSpec is nil": { + paddleJob: &PaddleJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: PaddleJobSpec{ + PaddleReplicaSpecs: nil, + }, + }, + wantErr: true, }, } - for _, c := range testCases { - err := ValidateV1PaddleJobSpec(&c) - if err == nil { - t.Error("Failed validate the v1.PaddleJobSpec") - } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + got := ValidateV1PaddleJob(tc.paddleJob) + if (got != nil) != tc.wantErr { + t.Fatalf("ValidateV1PaddleJob() error = %v, wantErr %v", got, tc.wantErr) + } + }) } } diff --git a/pkg/apis/kubeflow.org/v1/pytorch_validation.go b/pkg/apis/kubeflow.org/v1/pytorch_validation.go index 2528c3ecd9..e9869ece1c 100644 --- a/pkg/apis/kubeflow.org/v1/pytorch_validation.go +++ b/pkg/apis/kubeflow.org/v1/pytorch_validation.go @@ -18,13 +18,24 @@ import ( "fmt" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" + apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation" ) -func ValidateV1PyTorchJobSpec(c *PyTorchJobSpec) error { - if c.PyTorchReplicaSpecs == nil { +func ValidateV1PyTorchJob(pytorchJob *PyTorchJob) error { + if errors := apimachineryvalidation.NameIsDNS1035Label(pytorchJob.ObjectMeta.Name, false); errors != nil { + return fmt.Errorf("PyTorchJob name is invalid: %v", errors) + } + if err := validatePyTorchReplicaSpecs(pytorchJob.Spec.PyTorchReplicaSpecs); err != nil { + return err + } + return nil +} + +func validatePyTorchReplicaSpecs(specs map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error { + if specs == nil { return fmt.Errorf("PyTorchJobSpec is not valid") } - for rType, value := range c.PyTorchReplicaSpecs { + for rType, value := range specs { if value == nil || len(value.Template.Spec.Containers) == 0 { return fmt.Errorf("PyTorchJobSpec is not valid: containers definition expected in %v", rType) } diff --git a/pkg/apis/kubeflow.org/v1/pytorch_validation_test.go b/pkg/apis/kubeflow.org/v1/pytorch_validation_test.go index d8a1b20570..6428215d9b 100644 --- a/pkg/apis/kubeflow.org/v1/pytorch_validation_test.go +++ b/pkg/apis/kubeflow.org/v1/pytorch_validation_test.go @@ -15,82 +15,180 @@ package v1 import ( - "k8s.io/utils/pointer" "testing" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" - v1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" ) -func TestValidateV1PyTorchJobSpec(t *testing.T) { - testCases := []PyTorchJobSpec{ - { - PyTorchReplicaSpecs: nil, +func TestValidateV1PyTorchJob(t *testing.T) { + validPyTorchReplicaSpecs := map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + PyTorchJobReplicaTypeMaster: { + Replicas: pointer.Int32(1), + RestartPolicy: commonv1.RestartPolicyOnFailure, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "pytorch", + Image: "docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727", + ImagePullPolicy: corev1.PullAlways, + Command: []string{ + "python3", + "/opt/pytorch-mnist/mnist.py", + "--epochs=1", + }, + }}, + }, + }, + }, + PyTorchJobReplicaTypeWorker: { + Replicas: pointer.Int32(1), + RestartPolicy: commonv1.RestartPolicyOnFailure, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "pytorch", + Image: "docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727", + ImagePullPolicy: corev1.PullAlways, + Command: []string{ + "python3", + "/opt/pytorch-mnist/mnist.py", + "--epochs=1", + }, + }}, + }, + }, + }, + } + + testCases := map[string]struct { + pytorchJob *PyTorchJob + wantErr bool + }{ + "valid PyTorchJob": { + pytorchJob: &PyTorchJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: PyTorchJobSpec{ + PyTorchReplicaSpecs: validPyTorchReplicaSpecs, + }, + }, + wantErr: false, }, - { - PyTorchReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - PyTorchJobReplicaTypeWorker: { - Template: v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - Containers: []v1.Container{}, + "pytorchJob name does not meet DNS1035": { + pytorchJob: &PyTorchJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "0-test", + }, + Spec: PyTorchJobSpec{ + PyTorchReplicaSpecs: validPyTorchReplicaSpecs, + }, + }, + wantErr: true, + }, + "no containers": { + pytorchJob: &PyTorchJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: PyTorchJobSpec{ + PyTorchReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + PyTorchJobReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{}, + }, + }, }, }, }, }, + wantErr: true, }, - { - PyTorchReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - PyTorchJobReplicaTypeWorker: { - Template: v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Image: "", + "image is empty": { + pytorchJob: &PyTorchJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: PyTorchJobSpec{ + PyTorchReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + PyTorchJobReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "pytorch", + Image: "", + }, + }, }, }, }, }, }, }, + wantErr: true, }, - { - PyTorchReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - PyTorchJobReplicaTypeWorker: { - Template: v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Name: "", - Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0", + "pytorchJob default container name doesn't present": { + pytorchJob: &PyTorchJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: PyTorchJobSpec{ + PyTorchReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + PyTorchJobReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "", + Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0", + }, + }, }, }, }, }, }, }, + wantErr: true, }, - { - PyTorchReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - PyTorchJobReplicaTypeMaster: { - Replicas: pointer.Int32(2), - Template: v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Name: "pytorch", - Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0", + "the number of replicas in masterReplica is other than 1": { + pytorchJob: &PyTorchJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: PyTorchJobSpec{ + PyTorchReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + PyTorchJobReplicaTypeMaster: { + Replicas: pointer.Int32(2), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "pytorch", + Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0", + }, + }, }, }, }, }, }, }, + wantErr: true, }, } - for _, c := range testCases { - err := ValidateV1PyTorchJobSpec(&c) - if err == nil { - t.Error("Failed validate the v1.PyTorchJobSpec") - } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + got := ValidateV1PyTorchJob(tc.pytorchJob) + if (got != nil) != tc.wantErr { + t.Fatalf("ValidateV1PyTorchJob() error = %v, wantErr %v", got, tc.wantErr) + } + }) } } diff --git a/pkg/apis/kubeflow.org/v1/tensorflow_validation.go b/pkg/apis/kubeflow.org/v1/tensorflow_validation.go index 32a9c25463..9e06b31e44 100644 --- a/pkg/apis/kubeflow.org/v1/tensorflow_validation.go +++ b/pkg/apis/kubeflow.org/v1/tensorflow_validation.go @@ -17,11 +17,21 @@ package v1 import ( "fmt" - log "github.com/sirupsen/logrus" - commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" + log "github.com/sirupsen/logrus" + apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation" ) +func ValidateV1TFJob(tfjob *TFJob) error { + if errors := apimachineryvalidation.NameIsDNS1035Label(tfjob.ObjectMeta.Name, false); errors != nil { + return fmt.Errorf("TFJob name is invalid: %v", errors) + } + if err := validateV1TFReplicaSpecs(tfjob.Spec.TFReplicaSpecs); err != nil { + return err + } + return nil +} + // IsChieforMaster returns true if the type is Master or Chief. func IsChieforMaster(typ commonv1.ReplicaType) bool { return typ == TFJobReplicaTypeChief || typ == TFJobReplicaTypeMaster @@ -37,12 +47,7 @@ func IsEvaluator(typ commonv1.ReplicaType) bool { return typ == TFJobReplicaTypeEval } -// ValidateV1TFJobSpec checks that the v1.TFJobSpec is valid. -func ValidateV1TFJobSpec(c *TFJobSpec) error { - return validateV1ReplicaSpecs(c.TFReplicaSpecs) -} - -func validateV1ReplicaSpecs(specs map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error { +func validateV1TFReplicaSpecs(specs map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error { if specs == nil { return fmt.Errorf("TFJobSpec is not valid") } diff --git a/pkg/apis/kubeflow.org/v1/tensorflow_validation_test.go b/pkg/apis/kubeflow.org/v1/tensorflow_validation_test.go index 4b070881e3..216256a343 100644 --- a/pkg/apis/kubeflow.org/v1/tensorflow_validation_test.go +++ b/pkg/apis/kubeflow.org/v1/tensorflow_validation_test.go @@ -19,79 +19,154 @@ import ( commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" ) -func TestValidateV1TFJobSpec(t *testing.T) { - testCases := []TFJobSpec{ - { - TFReplicaSpecs: nil, - }, - { - TFReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - TFJobReplicaTypeWorker: &commonv1.ReplicaSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{}, +func TestValidateV1TFJob(t *testing.T) { + validTFReplicaSpecs := map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + TFJobReplicaTypeWorker: { + Replicas: pointer.Int32(2), + RestartPolicy: commonv1.RestartPolicyOnFailure, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "tensorflow", + Image: "kubeflow/tf-mnist-with-summaries:latest", + Command: []string{ + "python", + "/var/tf_mnist/mnist_with_summaries.py", }, - }, + }}, }, }, }, - { - TFReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - TFJobReplicaTypeWorker: &commonv1.ReplicaSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - corev1.Container{ - Image: "", + } + + testCases := map[string]struct { + tfJob *TFJob + wantErr bool + }{ + "valid tfJob": { + tfJob: &TFJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: TFJobSpec{ + TFReplicaSpecs: validTFReplicaSpecs, + }, + }, + wantErr: false, + }, + "TFJob name does not meet DNS1035": { + tfJob: &TFJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "00test", + }, + Spec: TFJobSpec{ + TFReplicaSpecs: validTFReplicaSpecs, + }, + }, + wantErr: true, + }, + "no containers": { + tfJob: &TFJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: TFJobSpec{ + TFReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + TFJobReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{}, }, }, }, }, }, }, + wantErr: true, }, - { - TFReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - TFJobReplicaTypeWorker: &commonv1.ReplicaSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - corev1.Container{ - Name: "", - Image: "kubeflow/tf-dist-mnist-test:1.0", + "empty image": { + tfJob: &TFJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: TFJobSpec{ + TFReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + TFJobReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "tensorflow", + Image: "", + }}, }, }, }, }, }, }, + wantErr: true, }, - { - TFReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - TFJobReplicaTypeChief: &commonv1.ReplicaSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{}, + "tfJob default container name doesn't present": { + tfJob: &TFJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: TFJobSpec{ + TFReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + TFJobReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "", + Image: "kubeflow/tf-dist-mnist-test:1.0", + }}, + }, + }, }, }, }, - TFJobReplicaTypeMaster: &commonv1.ReplicaSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{}, + }, + wantErr: true, + }, + "there are more than 2 masterReplica's or ChiefReplica's": { + tfJob: &TFJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: TFJobSpec{ + TFReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + TFJobReplicaTypeChief: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{}, + }, + }, + }, + TFJobReplicaTypeMaster: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{}, + }, + }, }, }, }, }, + wantErr: true, }, } - for _, c := range testCases { - err := ValidateV1TFJobSpec(&c) - if err == nil { - t.Error("Expected error got nil") - } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + got := ValidateV1TFJob(tc.tfJob) + if (got != nil) != tc.wantErr { + t.Fatalf("ValidateV1TFJob() error = %v, wantErr %v", got, tc.wantErr) + } + }) } } diff --git a/pkg/apis/kubeflow.org/v1/xgboost_validation.go b/pkg/apis/kubeflow.org/v1/xgboost_validation.go index 0091c25582..c7b6186d20 100644 --- a/pkg/apis/kubeflow.org/v1/xgboost_validation.go +++ b/pkg/apis/kubeflow.org/v1/xgboost_validation.go @@ -18,14 +18,25 @@ import ( "fmt" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" + apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation" ) -func ValidateXGBoostJobSpec(c *XGBoostJobSpec) error { - if c.XGBReplicaSpecs == nil { +func ValidateV1XGBoostJob(xgboostJob *XGBoostJob) error { + if errors := apimachineryvalidation.NameIsDNS1035Label(xgboostJob.ObjectMeta.Name, false); errors != nil { + return fmt.Errorf("XGBoostJob name is invalid: %v", errors) + } + if err := validateXGBoostReplicaSpecs(xgboostJob.Spec.XGBReplicaSpecs); err != nil { + return err + } + return nil +} + +func validateXGBoostReplicaSpecs(specs map[commonv1.ReplicaType]*commonv1.ReplicaSpec) error { + if specs == nil { return fmt.Errorf("XGBoostJobSpec is not valid") } masterExists := false - for rType, value := range c.XGBReplicaSpecs { + for rType, value := range specs { if value == nil || len(value.Template.Spec.Containers) == 0 { return fmt.Errorf("XGBoostJobSpec is not valid: containers definition expected in %v", rType) } diff --git a/pkg/apis/kubeflow.org/v1/xgboost_validation_test.go b/pkg/apis/kubeflow.org/v1/xgboost_validation_test.go index 0cb3a82469..b5564b9553 100644 --- a/pkg/apis/kubeflow.org/v1/xgboost_validation_test.go +++ b/pkg/apis/kubeflow.org/v1/xgboost_validation_test.go @@ -15,99 +15,209 @@ package v1 import ( - "k8s.io/utils/pointer" "testing" commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" ) -func TestValidateXGBoostJobSpec(t *testing.T) { - testCases := []XGBoostJobSpec{ - { - XGBReplicaSpecs: nil, +func TestValidateV1XGBoostJob(t *testing.T) { + validXGBoostReplicaSpecs := map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + XGBoostJobReplicaTypeMaster: { + Replicas: pointer.Int32(1), + RestartPolicy: commonv1.RestartPolicyNever, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "xgboost", + Image: "docker.io/merlintang/xgboost-dist-iris:1.1", + Ports: []corev1.ContainerPort{{ + Name: "xgboostjob-port", + ContainerPort: 9991, + }}, + ImagePullPolicy: corev1.PullAlways, + Args: []string{ + "--job_type=Train", + "--xgboost_parameter=objective:multi:softprob,num_class:3", + "--n_estimators=10", + "--learning_rate=0.1", + "--model_path=/tmp/xgboost-model", + "--model_storage_type=local", + }, + }}, + }, + }, }, - { - XGBReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - XGBoostJobReplicaTypeWorker: &commonv1.ReplicaSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{}, + XGBoostJobReplicaTypeWorker: { + Replicas: pointer.Int32(2), + RestartPolicy: commonv1.RestartPolicyExitCode, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "xgboost", + Image: "docker.io/merlintang/xgboost-dist-iris:1.", + Ports: []corev1.ContainerPort{{ + Name: "xgboostjob-port", + ContainerPort: 9991, + }}, + ImagePullPolicy: corev1.PullAlways, + Args: []string{ + "--job_type=Train", + "--xgboost_parameter=objective:multi:softprob,num_class:3", + "--n_estimators=10", + "--learning_rate=0.1", + }, + }}, + }, + }, + }, + } + + testCases := map[string]struct { + xgboostJob *XGBoostJob + wantErr bool + }{ + "valid XGBoostJob": { + xgboostJob: &XGBoostJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: XGBoostJobSpec{ + XGBReplicaSpecs: validXGBoostReplicaSpecs, + }, + }, + wantErr: false, + }, + "XGBoostJob name does not meet DNS1035": { + xgboostJob: &XGBoostJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "-test", + }, + Spec: XGBoostJobSpec{ + XGBReplicaSpecs: validXGBoostReplicaSpecs, + }, + }, + wantErr: true, + }, + "empty containers": { + xgboostJob: &XGBoostJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: XGBoostJobSpec{ + XGBReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + XGBoostJobReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{}, + }, + }, }, }, }, }, + wantErr: true, }, - { - XGBReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - XGBoostJobReplicaTypeWorker: &commonv1.ReplicaSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - corev1.Container{ - Image: "", + "image is empty": { + xgboostJob: &XGBoostJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: XGBoostJobSpec{ + XGBReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + XGBoostJobReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "xgboost", + Image: "", + }}, }, }, }, }, }, }, + wantErr: true, }, - { - XGBReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - XGBoostJobReplicaTypeWorker: &commonv1.ReplicaSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - corev1.Container{ - Name: "", - Image: "gcr.io/kubeflow-ci/xgboost-dist-mnist_test:1.0", + "xgboostJob default container name doesn't present": { + xgboostJob: &XGBoostJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: XGBoostJobSpec{ + XGBReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + XGBoostJobReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "", + Image: "gcr.io/kubeflow-ci/xgboost-dist-mnist_test:1.0", + }}, }, }, }, }, }, }, + wantErr: true, }, - { - XGBReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - XGBoostJobReplicaTypeMaster: &commonv1.ReplicaSpec{ - Replicas: pointer.Int32(2), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - corev1.Container{ - Name: "xgboost", - Image: "gcr.io/kubeflow-ci/xgboost-dist-mnist_test:1.0", + "the number of replicas in masterReplica is other than 1": { + xgboostJob: &XGBoostJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: XGBoostJobSpec{ + XGBReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + XGBoostJobReplicaTypeMaster: { + Replicas: pointer.Int32(2), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "xgboost", + Image: "gcr.io/kubeflow-ci/xgboost-dist-mnist_test:1.0", + }}, }, }, }, }, }, }, + wantErr: true, }, - { - XGBReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ - XGBoostJobReplicaTypeWorker: &commonv1.ReplicaSpec{ - Replicas: pointer.Int32(1), - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - corev1.Container{ - Name: "xgboost", - Image: "gcr.io/kubeflow-ci/xgboost-dist-mnist_test:1.0", + "masterReplica does not exist": { + xgboostJob: &XGBoostJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: XGBoostJobSpec{ + XGBReplicaSpecs: map[commonv1.ReplicaType]*commonv1.ReplicaSpec{ + XGBoostJobReplicaTypeWorker: { + Replicas: pointer.Int32(1), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "xgboost", + Image: "gcr.io/kubeflow-ci/xgboost-dist-mnist_test:1.0", + }}, }, }, }, }, }, }, + wantErr: true, }, } - for _, c := range testCases { - err := ValidateXGBoostJobSpec(&c) - if err == nil { - t.Error("Failed validate the corev1.XGBoostJobSpec") - } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + got := ValidateV1XGBoostJob(tc.xgboostJob) + if (got != nil) != tc.wantErr { + t.Fatalf("ValidateV1XGBoostJob() error = %v, wantErr %v", got, tc.wantErr) + } + }) } } diff --git a/pkg/controller.v1/mxnet/mxjob_controller.go b/pkg/controller.v1/mxnet/mxjob_controller.go index b4ca986048..470e4cbf7a 100644 --- a/pkg/controller.v1/mxnet/mxjob_controller.go +++ b/pkg/controller.v1/mxnet/mxjob_controller.go @@ -128,7 +128,7 @@ func (r *MXJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl return ctrl.Result{}, client.IgnoreNotFound(err) } - if err = kubeflowv1.ValidateV1MXJobSpec(&mxjob.Spec); err != nil { + if err = kubeflowv1.ValidateV1MXJob(mxjob); err != nil { logger.Error(err, "MXJob failed validation") r.Recorder.Eventf(mxjob, corev1.EventTypeWarning, commonutil.JobFailedValidationReason, "MXJob failed validation because %s", err) return ctrl.Result{}, err diff --git a/pkg/controller.v1/paddlepaddle/paddlepaddle_controller.go b/pkg/controller.v1/paddlepaddle/paddlepaddle_controller.go index b0d051e74e..bc8ac7feaf 100644 --- a/pkg/controller.v1/paddlepaddle/paddlepaddle_controller.go +++ b/pkg/controller.v1/paddlepaddle/paddlepaddle_controller.go @@ -129,7 +129,7 @@ func (r *PaddleJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return ctrl.Result{}, client.IgnoreNotFound(err) } - if err = kubeflowv1.ValidateV1PaddleJobSpec(&paddlejob.Spec); err != nil { + if err = kubeflowv1.ValidateV1PaddleJob(paddlejob); err != nil { logger.Error(err, "PaddleJob failed validation") r.Recorder.Eventf(paddlejob, corev1.EventTypeWarning, commonutil.JobFailedValidationReason, "PaddleJob failed validation because %s", err) return ctrl.Result{}, err diff --git a/pkg/controller.v1/pytorch/pytorchjob_controller.go b/pkg/controller.v1/pytorch/pytorchjob_controller.go index dcd929381b..dc6da77441 100644 --- a/pkg/controller.v1/pytorch/pytorchjob_controller.go +++ b/pkg/controller.v1/pytorch/pytorchjob_controller.go @@ -129,7 +129,7 @@ func (r *PyTorchJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, client.IgnoreNotFound(err) } - if err = kubeflowv1.ValidateV1PyTorchJobSpec(&pytorchjob.Spec); err != nil { + if err = kubeflowv1.ValidateV1PyTorchJob(pytorchjob); err != nil { logger.Error(err, "PyTorchJob failed validation") r.Recorder.Eventf(pytorchjob, corev1.EventTypeWarning, commonutil.JobFailedValidationReason, "PyTorchJob failed validation because %s", err) return ctrl.Result{}, err diff --git a/pkg/controller.v1/tensorflow/tfjob_controller.go b/pkg/controller.v1/tensorflow/tfjob_controller.go index b3b9c38b69..da06a0ce7d 100644 --- a/pkg/controller.v1/tensorflow/tfjob_controller.go +++ b/pkg/controller.v1/tensorflow/tfjob_controller.go @@ -147,7 +147,7 @@ func (r *TFJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl return ctrl.Result{}, client.IgnoreNotFound(err) } - if err = kubeflowv1.ValidateV1TFJobSpec(&tfjob.Spec); err != nil { + if err = kubeflowv1.ValidateV1TFJob(tfjob); err != nil { logger.Error(err, "TFJob failed validation") r.Recorder.Eventf(tfjob, corev1.EventTypeWarning, commonutil.JobFailedValidationReason, "TFJob failed validation because %s", err) return ctrl.Result{}, err diff --git a/pkg/controller.v1/xgboost/xgboostjob_controller.go b/pkg/controller.v1/xgboost/xgboostjob_controller.go index 659527ecc5..b78c91e9b1 100644 --- a/pkg/controller.v1/xgboost/xgboostjob_controller.go +++ b/pkg/controller.v1/xgboost/xgboostjob_controller.go @@ -139,7 +139,7 @@ func (r *XGBoostJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, client.IgnoreNotFound(err) } - if err = kubeflowv1.ValidateXGBoostJobSpec(&xgboostjob.Spec); err != nil { + if err = kubeflowv1.ValidateV1XGBoostJob(xgboostjob); err != nil { logger.Error(err, "XGBoostJob failed validation") r.Recorder.Eventf(xgboostjob, corev1.EventTypeWarning, commonutil.JobFailedValidationReason, "XGBoostJob failed validation because %s", err) return ctrl.Result{}, err