From 04c819a5109cba18474b530d75ea66cce7cc7ebb Mon Sep 17 00:00:00 2001 From: Wang Jian <61138589+hzjane@users.noreply.github.com> Date: Fri, 2 Sep 2022 11:25:54 +0800 Subject: [PATCH] [PPML] Refine occlum readme and config (#5625) * Refine occlum readme and config * Update docker sql config * Reduce instance to 1 --- .../scala/docker-occlum/README.md | 21 ++++++++++- .../scala/docker-occlum/kubernetes/README.md | 37 ++++++++++--------- .../docker-occlum/kubernetes/driver.yaml | 6 +-- .../docker-occlum/kubernetes/executor.yaml | 2 +- .../docker-occlum/kubernetes/run_spark_gbt.sh | 1 + .../docker-occlum/kubernetes/run_spark_lr.sh | 1 + .../docker-occlum/kubernetes/run_spark_sql.sh | 1 + .../kubernetes/run_spark_xgboost.sh | 6 +-- 8 files changed, 50 insertions(+), 25 deletions(-) diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/README.md b/ppml/trusted-big-data-ml/scala/docker-occlum/README.md index d0f8e97f2a1..f994b8d0f0a 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/README.md +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/README.md @@ -100,6 +100,15 @@ The examples are run in the docker container. Attach it and see the results (`do Download the Cifar-10 dataset (CIFAR-10 binary version) from [here](https://www.cs.toronto.edu/~kriz/cifar.html). The dataset contains 5 files, i.e, `data_batch_1.bin`, `data_batch_2.bin`, `data_batch_3.bin`, `data_batch_4.bin`, `data_batch_5.bin` and `test_batch.bin`. Put all the files in `data` directory. +You can enlarge the configuration in [start-spark-local.sh](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/scala/docker-occlum/start-spark-local.sh) +``` bash +#start-spark-local.sh +-e SGX_MEM_SIZE=30GB \ +-e SGX_THREAD=1024 \ +-e SGX_HEAP=1GB \ +-e SGX_KERNEL_HEAP=4GB \ +``` + To run BigDL ResNet CIFAR-10 example, start the docker container with: ``` bash @@ -121,7 +130,7 @@ The examples are run in the docker container. Attach it and see the results (`do You can change the configuration in [start-spark-local.sh](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/scala/docker-occlum/start-spark-local.sh) ``` bash #start-spark-local.sh --e SGX_MEM_SIZE=16GB \ +-e SGX_MEM_SIZE=24GB \ -e SGX_THREAD=1024 \ -e SGX_HEAP=1GB \ -e SGX_KERNEL_HEAP=1GB \ @@ -155,6 +164,16 @@ You will find `output` folder under `/path/to/zoo-tutorials/tpch-spark/dbgen` wh ## Spark SQL Scala Unit Tests ### Run Spark SQl Scala Unit Tests + +You can enlarge the configuration in [start-spark-local.sh](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/scala/docker-occlum/start-spark-local.sh) +``` bash +#start-spark-local.sh +-e SGX_MEM_SIZE=60GB \ +-e SGX_THREAD=1024 \ +-e SGX_HEAP=1GB \ +-e SGX_KERNEL_HEAP=1GB \ +``` + To run Spark Sql Scala Unit Tests, start the docker container with: ``` bash start-spark-local.sh ut diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/README.md b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/README.md index b9610429964..e0e6f2f5b71 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/README.md +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/README.md @@ -73,18 +73,18 @@ bash build-docker-image.sh - name: DRIVER_MEMORY value: "2g" - name: SGX_MEM_SIZE - value: "4GB" + value: "20GB" - name: SGX_THREAD - value: "128" + value: "512" ``` ```yaml #executor.yaml env: - name: SGX_MEM_SIZE - value: "4GB" + value: "10GB" - name: SGX_THREAD - value: "128" + value: "512" ``` ### Spark ML GradientBoostedTreeClassifier example @@ -122,7 +122,7 @@ After training, you can find xgboost model in folder `/tmp/path_to_model_to_be_s #### Criteo 1TB Click Logs [dataset](https://ailab.criteo.com/download-criteo-1tb-click-logs-dataset/) -Split 50G data from this dataset and put it into `/tmp/xgboost_data`. +Split 1G data from this dataset and put it into `/tmp/xgboost_data`. Then change the `class` in [script](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_xgboost.sh#L7) to `com.intel.analytics.bigdl.dllib.examples.nnframes.xgboost.xgbClassifierTrainingExampleOnCriteoClickLogsDataset`. @@ -131,19 +131,22 @@ Add these configurations to [script](https://github.com/intel-analytics/BigDL/bl ```bash --conf spark.driver.extraClassPath=local:///opt/spark/jars/* \ --conf spark.executor.extraClassPath=local:///opt/spark/jars/* \ - --conf spark.cores.max=64 \ - --conf spark.task.cpus=32 \ - --conf spark.kubernetes.driverEnv.DRIVER_MEMORY=10g \ - --conf spark.kubernetes.driverEnv.SGX_MEM_SIZE="40GB" \ + --conf spark.task.cpus=6 \ + --conf spark.cores.max=12 \ + --conf spark.executor.instances=2 \ + --conf spark.kubernetes.driverEnv.DRIVER_MEMORY=1g \ + --conf spark.kubernetes.driverEnv.SGX_MEM_SIZE="12GB" \ --conf spark.kubernetes.driverEnv.META_SPACE=1024m \ - --conf spark.kubernetes.driverEnv.SGX_HEAP="10GB" \ - --conf spark.kubernetes.driverEnv.SGX_KERNEL_HEAP="4GB" \ - --conf spark.executorEnv.SGX_MEM_SIZE="178GB" \ - --conf spark.executorEnv.SGX_KERNEL_HEAP="4GB" \ - --conf spark.executorEnv.SGX_HEAP="150GB" \ - --executor-cores 32 \ - --executor-memory 10g \ - --driver-memory 10g + --conf spark.kubernetes.driverEnv.SGX_HEAP="1GB" \ + --conf spark.kubernetes.driverEnv.SGX_KERNEL_HEAP="2GB" \ + --conf spark.executorEnv.SGX_MEM_SIZE="10GB" \ + --conf spark.executorEnv.SGX_KERNEL_HEAP="1GB" \ + --conf spark.executorEnv.SGX_HEAP="1GB" \ + --executor-cores 6 \ + --executor-memory 3g \ + --driver-memory 1g \ + --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE_NO="3G" \ + --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="1G" ``` Change the `parameters` to: diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/driver.yaml b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/driver.yaml index 45c4b4d8284..cb3781f3814 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/driver.yaml +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/driver.yaml @@ -20,11 +20,11 @@ spec: privileged: true env: - name: DRIVER_MEMORY - value: "5g" + value: "2g" - name: SGX_MEM_SIZE - value: "12GB" + value: "20GB" - name: SGX_THREAD - value: "128" + value: "512" - name: SGX_HEAP value: "512MB" - name: SGX_KERNEL_HEAP diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/executor.yaml b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/executor.yaml index 081a882edce..bfe3e508a7b 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/executor.yaml +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/executor.yaml @@ -22,7 +22,7 @@ spec: - name: NETTY_THREAD value: "32" - name: SGX_MEM_SIZE - value: "20GB" + value: "10GB" - name: SGX_THREAD value: "512" - name: SGX_HEAP diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_gbt.sh b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_gbt.sh index 434c56d64f3..1708cec950e 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_gbt.sh +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_gbt.sh @@ -15,6 +15,7 @@ ${SPARK_HOME}/bin/spark-submit \ --conf spark.kubernetes.executor.podTemplateFile=./executor.yaml \ --conf spark.kubernetes.sgx.log.level=off \ --executor-memory 1024m \ + --executor-cores 6 \ --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="2G" \ --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="1G" \ --jars local:/opt/spark/examples/jars/scopt_2.12-3.7.1.jar \ diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_lr.sh b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_lr.sh index 6ad0d543fb1..5cc584ecfcb 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_lr.sh +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_lr.sh @@ -15,6 +15,7 @@ ${SPARK_HOME}/bin/spark-submit \ --conf spark.kubernetes.executor.podTemplateFile=./executor.yaml \ --conf spark.kubernetes.sgx.log.level=off \ --executor-memory 1024m \ + --executor-cores 6 \ --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="2G" \ --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="1G" \ --jars local:/opt/spark/examples/jars/scopt_2.12-3.7.1.jar \ diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_sql.sh b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_sql.sh index 2d20d13e53e..4e9400278e2 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_sql.sh +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_sql.sh @@ -15,6 +15,7 @@ ${SPARK_HOME}/bin/spark-submit \ --conf spark.kubernetes.executor.podTemplateFile=./executor.yaml \ --conf spark.kubernetes.sgx.log.level=off \ --executor-memory 1024m \ + --executor-cores 6 \ --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="2G" \ --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="1G" \ --jars local:/opt/spark/examples/jars/scopt_2.12-3.7.1.jar \ diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_xgboost.sh b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_xgboost.sh index 61ff1333739..078079972a5 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_xgboost.sh +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_xgboost.sh @@ -15,10 +15,10 @@ ${SPARK_HOME}/bin/spark-submit \ --conf spark.kubernetes.file.upload.path=file:///tmp \ --conf spark.kubernetes.sgx.log.level=off \ --conf spark.task.cpus=2 \ - --executor-cores 2 \ - --executor-memory 2g \ + --executor-cores 6 \ + --executor-memory 3g \ --driver-memory 2g \ --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="2G" \ - --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="2G" \ + --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="3G" \ local:/bin/jars/bigdl-dllib-spark_3.1.2-2.1.0-SNAPSHOT.jar \ /host/data/xgboost_data 2 100 /host/data/xgboost_model_to_be_saved