Example CRD for katib operator

I think operator-pattern would fit very nicely into Katib.
kubeflow · Jun 15, 2018 · e6b1347 · e6b1347
1 parent c0801d5
commit e6b1347
Showing 1 changed file with 72 additions and 0 deletions.
diff --git a/examples/CRD/example_study.yaml b/examples/CRD/example_study.yaml
@@ -0,0 +1,72 @@
+---
+apiVersion: "kubeflow.org/v1alpha1"
+kind: Study
+metadata:
+    name: mnist-demo
+    owner: katib
+spec:
+    # This is equivalent to studyconf
+    # Each trial config will land in env variable according to name
+    # On top of that trial will also have unique id built from these configs
+    # This will be required to, for example, save model at the end of the trial
+    # to S3 under unique, but descriptive, name
+    optimizationType: "max"
+    optimizationGoal: 0.9
+    parameters:
+        - name: optimizer
+          values:
+              - "sgd"
+              - "adam"
+        - name: learning_rate
+          min: 0.1
+          max: 0.9
+          step: 0.1
+    tfJobSpec:
+        # Set of tf-jobs will be created with this spec
+        # This will allow to use all the features of tf-job
+        # Name will be autogenerated from study name and params
+        replicaSpecs:
+            - replicas: 1
+              tfReplicaType: MASTER
+              template:
+                spec:
+                  containers:
+                    - image: gcr.io/tf-on-k8s-dogfood/tf_sample:dc944ff
+                      name: tensorflow
+                  restartPolicy: OnFailure
+            - replicas: 1
+              tfReplicaType: WORKER
+              template:
+                spec:
+                  containers:
+                    - image: gcr.io/tf-on-k8s-dogfood/tf_sample:dc944ff
+                      name: tensorflow
+                  restartPolicy: OnFailure
+            - replicas: 2
+              tfReplicaType: PS
+
+# Above yaml will be enough to run whole set of optimization, but each
+# optimization will create it's own CRDs, which can be manually manipulated,
+# monitored etc.
+# For example
+#
+# kubectl get trial -> mnist-demo-ldwpw
+# kubectl get trial mnist-demo-ldwpw
+
+---
+apiVersion: "kubeflow.org/v1alpha1"
+# Reason I'd like this to be called Model not Trial is because, at the end, we will
+# want to use by, for example, tf-serving to spawn serving cluster
+kind: Model
+metadata:
+    name: mnist-demo-ldwpw
+    tfJob: mnist-demo-ldwpw  # this will be filled after operator successfully creates tfJob, can be used to get logs, pod status etc
+    optimizationScore: 0.2  # filled by operator after study is complete
+    location: s3://mybucket/mnist-demo-sgd-lr01  # optional path that will represent where model is saved. Ideally generated by us
+spec:
+    parameters:
+    - name: optimizer
+      value: "sgd"
+    - name: learning_rate
+      value: 0.1
+