From e6b13471a6f57a4f86ca33ad9d2e9661b25edd20 Mon Sep 17 00:00:00 2001
From: inc0 <inc007@gmail.com>
Date: Fri, 15 Jun 2018 13:08:23 -0700
Subject: [PATCH 1/2] Example CRD for katib operator

I think operator-pattern would fit very nicely into Katib.
---
 examples/CRD/example_study.yaml | 72 +++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 examples/CRD/example_study.yaml

diff --git a/examples/CRD/example_study.yaml b/examples/CRD/example_study.yaml
new file mode 100644
index 00000000000..9447954bd6c
--- /dev/null
+++ b/examples/CRD/example_study.yaml
@@ -0,0 +1,72 @@
+---
+apiVersion: "kubeflow.org/v1alpha1"
+kind: Study
+metadata:
+    name: mnist-demo
+    owner: katib
+spec:
+    # This is equivalent to studyconf
+    # Each trial config will land in env variable according to name
+    # On top of that trial will also have unique id built from these configs
+    # This will be required to, for example, save model at the end of the trial
+    # to S3 under unique, but descriptive, name
+    optimizationType: "max"
+    optimizationGoal: 0.9
+    parameters:
+        - name: optimizer
+          values:
+              - "sgd"
+              - "adam"
+        - name: learning_rate
+          min: 0.1
+          max: 0.9
+          step: 0.1
+    tfJobSpec:
+        # Set of tf-jobs will be created with this spec
+        # This will allow to use all the features of tf-job
+        # Name will be autogenerated from study name and params
+        replicaSpecs:
+            - replicas: 1
+              tfReplicaType: MASTER
+              template:
+                spec:
+                  containers:
+                    - image: gcr.io/tf-on-k8s-dogfood/tf_sample:dc944ff
+                      name: tensorflow
+                  restartPolicy: OnFailure
+            - replicas: 1
+              tfReplicaType: WORKER
+              template:
+                spec:
+                  containers:
+                    - image: gcr.io/tf-on-k8s-dogfood/tf_sample:dc944ff
+                      name: tensorflow
+                  restartPolicy: OnFailure
+            - replicas: 2
+              tfReplicaType: PS
+
+# Above yaml will be enough to run whole set of optimization, but each
+# optimization will create it's own CRDs, which can be manually manipulated,
+# monitored etc.
+# For example
+#
+# kubectl get trial -> mnist-demo-ldwpw
+# kubectl get trial mnist-demo-ldwpw
+
+---
+apiVersion: "kubeflow.org/v1alpha1"
+# Reason I'd like this to be called Model not Trial is because, at the end, we will
+# want to use by, for example, tf-serving to spawn serving cluster
+kind: Model
+metadata:
+    name: mnist-demo-ldwpw
+    tfJob: mnist-demo-ldwpw  # this will be filled after operator successfully creates tfJob, can be used to get logs, pod status etc
+    optimizationScore: 0.2  # filled by operator after study is complete
+    location: s3://mybucket/mnist-demo-sgd-lr01  # optional path that will represent where model is saved. Ideally generated by us
+spec:
+    parameters:
+    - name: optimizer
+      value: "sgd"
+    - name: learning_rate
+      value: 0.1
+

From 3a88119467c47175be1dab24a9af4adb74ebe6c6 Mon Sep 17 00:00:00 2001
From: inc0 <inc007@gmail.com>
Date: Fri, 15 Jun 2018 13:28:21 -0700
Subject: [PATCH 2/2] add note about storage

---
 examples/CRD/example_study.yaml | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/examples/CRD/example_study.yaml b/examples/CRD/example_study.yaml
index 9447954bd6c..6204e76b2e2 100644
--- a/examples/CRD/example_study.yaml
+++ b/examples/CRD/example_study.yaml
@@ -21,6 +21,13 @@ spec:
           min: 0.1
           max: 0.9
           step: 0.1
+   # prefix to desired storage path. This way we can generate modelLocation and logsLocation
+   # and ask users to save stuff there. We'll set these paths to env variables.
+   # This way model, later on, will be easily reusable by tf-serving etc.
+   # Also tensorboard will be easy to spawn
+   # For local storage (hopefully over pvc) this can be just root path, but it will
+   # require to specify correct pvc mounts in tfJobSpec.
+    studyLocation: s3://mybucket
     tfJobSpec:
         # Set of tf-jobs will be created with this spec
         # This will allow to use all the features of tf-job
@@ -62,7 +69,8 @@ metadata:
     name: mnist-demo-ldwpw
     tfJob: mnist-demo-ldwpw  # this will be filled after operator successfully creates tfJob, can be used to get logs, pod status etc
     optimizationScore: 0.2  # filled by operator after study is complete
-    location: s3://mybucket/mnist-demo-sgd-lr01  # optional path that will represent where model is saved. Ideally generated by us
+    modelLocation: s3://mybucket/mnist-demo-sgd-lr01/model  # optional path that will represent where model is saved. Ideally generated by us
+    logsLocation: s3://mybucket/mnist-demo-sgd-lr01/logs  # tensorboard-style logs. Scalars there can be used for ModelDB and early stopping etc
 spec:
     parameters:
     - name: optimizer