From 04c819a5109cba18474b530d75ea66cce7cc7ebb Mon Sep 17 00:00:00 2001
From: Wang Jian <61138589+hzjane@users.noreply.github.com>
Date: Fri, 2 Sep 2022 11:25:54 +0800
Subject: [PATCH] [PPML] Refine occlum readme and config (#5625)

* Refine occlum readme and config

* Update docker sql config

* Reduce instance to 1
---
 .../scala/docker-occlum/README.md             | 21 ++++++++++-
 .../scala/docker-occlum/kubernetes/README.md  | 37 ++++++++++---------
 .../docker-occlum/kubernetes/driver.yaml      |  6 +--
 .../docker-occlum/kubernetes/executor.yaml    |  2 +-
 .../docker-occlum/kubernetes/run_spark_gbt.sh |  1 +
 .../docker-occlum/kubernetes/run_spark_lr.sh  |  1 +
 .../docker-occlum/kubernetes/run_spark_sql.sh |  1 +
 .../kubernetes/run_spark_xgboost.sh           |  6 +--
 8 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/README.md b/ppml/trusted-big-data-ml/scala/docker-occlum/README.md
index d0f8e97f2a1..f994b8d0f0a 100644
--- a/ppml/trusted-big-data-ml/scala/docker-occlum/README.md
+++ b/ppml/trusted-big-data-ml/scala/docker-occlum/README.md
@@ -100,6 +100,15 @@ The examples are run in the docker container. Attach it and see the results (`do
 
 Download the Cifar-10 dataset (CIFAR-10 binary version) from [here](https://www.cs.toronto.edu/~kriz/cifar.html). The dataset contains 5 files, i.e, `data_batch_1.bin`, `data_batch_2.bin`, `data_batch_3.bin`, `data_batch_4.bin`, `data_batch_5.bin` and `test_batch.bin`. Put all the files in `data` directory.
 
+You can enlarge the configuration in [start-spark-local.sh](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/scala/docker-occlum/start-spark-local.sh)
+``` bash
+#start-spark-local.sh
+-e SGX_MEM_SIZE=30GB \
+-e SGX_THREAD=1024 \
+-e SGX_HEAP=1GB \
+-e SGX_KERNEL_HEAP=4GB \
+```
+
 To run BigDL ResNet CIFAR-10 example, start the docker container with:
 
 ``` bash
@@ -121,7 +130,7 @@ The examples are run in the docker container. Attach it and see the results (`do
 You can change the configuration in [start-spark-local.sh](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/scala/docker-occlum/start-spark-local.sh)
 ``` bash
 #start-spark-local.sh
--e SGX_MEM_SIZE=16GB \
+-e SGX_MEM_SIZE=24GB \
 -e SGX_THREAD=1024 \
 -e SGX_HEAP=1GB \
 -e SGX_KERNEL_HEAP=1GB \
@@ -155,6 +164,16 @@ You will find `output` folder under `/path/to/zoo-tutorials/tpch-spark/dbgen` wh
 ## Spark SQL Scala Unit Tests
 
 ### Run Spark SQl Scala Unit Tests
+
+You can enlarge the configuration in [start-spark-local.sh](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/scala/docker-occlum/start-spark-local.sh)
+``` bash
+#start-spark-local.sh
+-e SGX_MEM_SIZE=60GB \
+-e SGX_THREAD=1024 \
+-e SGX_HEAP=1GB \
+-e SGX_KERNEL_HEAP=1GB \
+```
+
 To run Spark Sql Scala Unit Tests, start the docker container with:
 ```
 bash start-spark-local.sh ut
diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/README.md b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/README.md
index b9610429964..e0e6f2f5b71 100644
--- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/README.md
+++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/README.md
@@ -73,18 +73,18 @@ bash build-docker-image.sh
     - name: DRIVER_MEMORY
       value: "2g"
     - name: SGX_MEM_SIZE
-      value: "4GB"
+      value: "20GB"
     - name: SGX_THREAD
-      value: "128"
+      value: "512"
 ```
 
 ```yaml
 #executor.yaml
     env:
     - name: SGX_MEM_SIZE
-      value: "4GB"
+      value: "10GB"
     - name: SGX_THREAD
-      value: "128"
+      value: "512"
 ```
 
 ### Spark ML GradientBoostedTreeClassifier example
@@ -122,7 +122,7 @@ After training, you can find xgboost model in folder `/tmp/path_to_model_to_be_s
 
 #### Criteo 1TB Click Logs [dataset](https://ailab.criteo.com/download-criteo-1tb-click-logs-dataset/)
 
-Split 50G data from this dataset and put it into `/tmp/xgboost_data`. 
+Split 1G data from this dataset and put it into `/tmp/xgboost_data`. 
 Then change the `class` in [script](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_xgboost.sh#L7) to
 `com.intel.analytics.bigdl.dllib.examples.nnframes.xgboost.xgbClassifierTrainingExampleOnCriteoClickLogsDataset`.
 
@@ -131,19 +131,22 @@ Add these configurations to [script](https://github.com/intel-analytics/BigDL/bl
 ```bash
     --conf spark.driver.extraClassPath=local:///opt/spark/jars/* \
     --conf spark.executor.extraClassPath=local:///opt/spark/jars/* \
-    --conf spark.cores.max=64 \
-    --conf spark.task.cpus=32 \
-    --conf spark.kubernetes.driverEnv.DRIVER_MEMORY=10g \
-    --conf spark.kubernetes.driverEnv.SGX_MEM_SIZE="40GB" \
+    --conf spark.task.cpus=6 \
+    --conf spark.cores.max=12 \
+    --conf spark.executor.instances=2 \
+    --conf spark.kubernetes.driverEnv.DRIVER_MEMORY=1g \
+    --conf spark.kubernetes.driverEnv.SGX_MEM_SIZE="12GB" \
     --conf spark.kubernetes.driverEnv.META_SPACE=1024m \
-    --conf spark.kubernetes.driverEnv.SGX_HEAP="10GB" \
-    --conf spark.kubernetes.driverEnv.SGX_KERNEL_HEAP="4GB" \
-    --conf spark.executorEnv.SGX_MEM_SIZE="178GB" \
-    --conf spark.executorEnv.SGX_KERNEL_HEAP="4GB" \
-    --conf spark.executorEnv.SGX_HEAP="150GB" \
-    --executor-cores 32 \
-    --executor-memory 10g \
-    --driver-memory 10g
+    --conf spark.kubernetes.driverEnv.SGX_HEAP="1GB" \
+    --conf spark.kubernetes.driverEnv.SGX_KERNEL_HEAP="2GB" \
+    --conf spark.executorEnv.SGX_MEM_SIZE="10GB" \
+    --conf spark.executorEnv.SGX_KERNEL_HEAP="1GB" \
+    --conf spark.executorEnv.SGX_HEAP="1GB" \
+    --executor-cores 6 \
+    --executor-memory 3g \
+    --driver-memory 1g \
+    --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE_NO="3G" \
+    --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="1G" 
 ```
 
 Change the `parameters` to:
diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/driver.yaml b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/driver.yaml
index 45c4b4d8284..cb3781f3814 100644
--- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/driver.yaml
+++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/driver.yaml
@@ -20,11 +20,11 @@ spec:
       privileged: true
     env:
     - name: DRIVER_MEMORY
-      value: "5g"
+      value: "2g"
     - name: SGX_MEM_SIZE
-      value: "12GB"
+      value: "20GB"
     - name: SGX_THREAD
-      value: "128"
+      value: "512"
     - name: SGX_HEAP
       value: "512MB"
     - name: SGX_KERNEL_HEAP
diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/executor.yaml b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/executor.yaml
index 081a882edce..bfe3e508a7b 100644
--- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/executor.yaml
+++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/executor.yaml
@@ -22,7 +22,7 @@ spec:
     - name: NETTY_THREAD
       value: "32"
     - name: SGX_MEM_SIZE
-      value: "20GB"
+      value: "10GB"
     - name: SGX_THREAD
       value: "512"
     - name: SGX_HEAP
diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_gbt.sh b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_gbt.sh
index 434c56d64f3..1708cec950e 100644
--- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_gbt.sh
+++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_gbt.sh
@@ -15,6 +15,7 @@ ${SPARK_HOME}/bin/spark-submit \
     --conf spark.kubernetes.executor.podTemplateFile=./executor.yaml \
     --conf spark.kubernetes.sgx.log.level=off \
     --executor-memory 1024m \
+    --executor-cores 6 \
     --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="2G" \
     --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="1G" \
     --jars local:/opt/spark/examples/jars/scopt_2.12-3.7.1.jar \
diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_lr.sh b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_lr.sh
index 6ad0d543fb1..5cc584ecfcb 100644
--- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_lr.sh
+++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_lr.sh
@@ -15,6 +15,7 @@ ${SPARK_HOME}/bin/spark-submit \
     --conf spark.kubernetes.executor.podTemplateFile=./executor.yaml \
     --conf spark.kubernetes.sgx.log.level=off \
     --executor-memory 1024m \
+    --executor-cores 6 \
     --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="2G" \
     --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="1G" \
     --jars local:/opt/spark/examples/jars/scopt_2.12-3.7.1.jar \
diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_sql.sh b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_sql.sh
index 2d20d13e53e..4e9400278e2 100644
--- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_sql.sh
+++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_sql.sh
@@ -15,6 +15,7 @@ ${SPARK_HOME}/bin/spark-submit \
     --conf spark.kubernetes.executor.podTemplateFile=./executor.yaml \
     --conf spark.kubernetes.sgx.log.level=off \
     --executor-memory 1024m \
+    --executor-cores 6 \
     --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="2G" \
     --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="1G" \
     --jars local:/opt/spark/examples/jars/scopt_2.12-3.7.1.jar \
diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_xgboost.sh b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_xgboost.sh
index 61ff1333739..078079972a5 100644
--- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_xgboost.sh
+++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_xgboost.sh
@@ -15,10 +15,10 @@ ${SPARK_HOME}/bin/spark-submit \
     --conf spark.kubernetes.file.upload.path=file:///tmp \
     --conf spark.kubernetes.sgx.log.level=off \
     --conf spark.task.cpus=2 \
-    --executor-cores 2 \
-    --executor-memory 2g \
+    --executor-cores 6 \
+    --executor-memory 3g \
     --driver-memory 2g \
     --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="2G" \
-    --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="2G" \
+    --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="3G" \
     local:/bin/jars/bigdl-dllib-spark_3.1.2-2.1.0-SNAPSHOT.jar \
     /host/data/xgboost_data 2  100 /host/data/xgboost_model_to_be_saved