diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/entrypoint.sh b/ppml/trusted-big-data-ml/scala/docker-occlum/entrypoint.sh index f3528bdb0a5..f007acdcbe8 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/entrypoint.sh +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/entrypoint.sh @@ -52,6 +52,11 @@ else echo "META_SPACE=$META_SPACE" fi +echo "SGX_LOG_LEVEL $SGX_LOG_LEVEL" && \ +echo "SGX_DRIVER_JVM_MEM_SIZE $SGX_DRIVER_JVM_MEM_SIZE" && \ +echo "SGX_EXECUTOR_JVM_MEM_SIZE $SGX_EXECUTOR_JVM_MEM_SIZE" && \ +echo "SPARK_DRIVER_MEMORY $DRIVER_MEMORY" && \ +echo "SPARK_EXECUTOR_MEMORY $SPARK_EXECUTOR_MEMORY" && \ case "$SPARK_K8S_CMD" in driver) @@ -65,50 +70,97 @@ case "$SPARK_K8S_CMD" in /opt/run_spark_on_occlum_glibc.sh init cd /opt/occlum_spark DMLC_TRACKER_URI=$SPARK_DRIVER_BIND_ADDRESS - CMD=( - /usr/lib/jvm/java-8-openjdk-amd64/bin/java \ - -Divy.home="/tmp/.ivy" \ - -Dos.name="Linux" \ - -XX:-UseCompressedOops \ - -XX:MaxMetaspaceSize=$META_SPACE \ - -Djdk.lang.Process.launchMechanism=posix_spawn \ - -cp "$SPARK_CLASSPATH" \ - -Xmx$DRIVER_MEMORY \ - -XX:ActiveProcessorCount=4 \ - -Dio.netty.availableProcessors=$NETTY_THREAD \ - org.apache.spark.deploy.SparkSubmit \ - --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" \ - --deploy-mode client \ - "$@" + + if [[ -z "$SGX_DRIVER_JVM_MEM_SIZE" ]]; then + echo "SGX_DRIVER_JVM_MEM_SIZE not set, using default DRIVER_MEMORY" + CMD=( + /usr/lib/jvm/java-8-openjdk-amd64/bin/java \ + -Divy.home="/tmp/.ivy" \ + -Dos.name="Linux" \ + -XX:-UseCompressedOops \ + -XX:MaxMetaspaceSize=$META_SPACE \ + -Djdk.lang.Process.launchMechanism=posix_spawn \ + -cp "$SPARK_CLASSPATH" \ + -Xmx$DRIVER_MEMORY \ + -XX:ActiveProcessorCount=4 \ + -Dio.netty.availableProcessors=$NETTY_THREAD \ + org.apache.spark.deploy.SparkSubmit \ + --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" \ + --deploy-mode client \ + "$@" + ) + else + echo "use SGX_DRIVER_JVM_MEM_SIZE=$SGX_DRIVER_JVM_MEM_SIZE" + CMD=( + /usr/lib/jvm/java-8-openjdk-amd64/bin/java \ + -Divy.home="/tmp/.ivy" \ + -Dos.name="Linux" \ + -XX:-UseCompressedOops \ + -XX:MaxMetaspaceSize=$META_SPACE \ + -Djdk.lang.Process.launchMechanism=posix_spawn \ + -cp "$SPARK_CLASSPATH" \ + -Xmx$SGX_DRIVER_JVM_MEM_SIZE \ + -XX:ActiveProcessorCount=4 \ + -Dio.netty.availableProcessors=$NETTY_THREAD \ + org.apache.spark.deploy.SparkSubmit \ + --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" \ + --deploy-mode client \ + "$@" ) + fi ;; executor) echo "SGX Mem $SGX_MEM_SIZE" /opt/run_spark_on_occlum_glibc.sh init cd /opt/occlum_spark DMLC_TRACKER_URI=$SPARK_DRIVER_BIND_ADDRESS - CMD=( - /usr/lib/jvm/java-8-openjdk-amd64/bin/java \ - "${SPARK_EXECUTOR_JAVA_OPTS[@]}" \ - -XX:-UseCompressedOops \ - -XX:MaxMetaspaceSize=$META_SPACE \ - -XX:ActiveProcessorCount=$SPARK_EXECUTOR_CORES \ - -Divy.home=/tmp/.ivy \ - -Xms$SPARK_EXECUTOR_MEMORY \ - -Xmx$SPARK_EXECUTOR_MEMORY \ - -Dos.name=Linux \ - -Dio.netty.availableProcessors=$NETTY_THREAD \ - -Djdk.lang.Process.launchMechanism=posix_spawn \ - -cp "$SPARK_CLASSPATH" \ - org.apache.spark.executor.CoarseGrainedExecutorBackend \ - --driver-url $SPARK_DRIVER_URL \ - --executor-id $SPARK_EXECUTOR_ID \ - --cores $SPARK_EXECUTOR_CORES \ - --app-id $SPARK_APPLICATION_ID \ - --hostname $SPARK_EXECUTOR_POD_IP + + if [[ -z "$SGX_EXECUTOR_JVM_MEM_SIZE" ]]; then + echo "SGX_EXECUTOR_JVM_MEM_SIZE not set, using default EXCUTOR_MEMORY" + CMD=( + /usr/lib/jvm/java-8-openjdk-amd64/bin/java \ + "${SPARK_EXECUTOR_JAVA_OPTS[@]}" \ + -XX:-UseCompressedOops \ + -XX:MaxMetaspaceSize=$META_SPACE \ + -XX:ActiveProcessorCount=$SPARK_EXECUTOR_CORES \ + -Divy.home=/tmp/.ivy \ + -Xms$SPARK_EXECUTOR_MEMORY \ + -Xmx$SPARK_EXECUTOR_MEMORY \ + -Dos.name=Linux \ + -Dio.netty.availableProcessors=$NETTY_THREAD \ + -Djdk.lang.Process.launchMechanism=posix_spawn \ + -cp "$SPARK_CLASSPATH" \ + org.apache.spark.executor.CoarseGrainedExecutorBackend \ + --driver-url $SPARK_DRIVER_URL \ + --executor-id $SPARK_EXECUTOR_ID \ + --cores $SPARK_EXECUTOR_CORES \ + --app-id $SPARK_APPLICATION_ID \ + --hostname $SPARK_EXECUTOR_POD_IP + ) + else + echo "use SGX_EXECUTOR_JVM_MEM_SIZE=$SGX_EXECUTOR_JVM_MEM_SIZE" + CMD=( + /usr/lib/jvm/java-8-openjdk-amd64/bin/java \ + "${SPARK_EXECUTOR_JAVA_OPTS[@]}" \ + -XX:-UseCompressedOops \ + -XX:MaxMetaspaceSize=$META_SPACE \ + -XX:ActiveProcessorCount=$SPARK_EXECUTOR_CORES \ + -Divy.home=/tmp/.ivy \ + -Xms$SGX_EXECUTOR_JVM_MEM_SIZE \ + -Xmx$SGX_EXECUTOR_JVM_MEM_SIZE \ + -Dos.name=Linux \ + -Dio.netty.availableProcessors=$NETTY_THREAD \ + -Djdk.lang.Process.launchMechanism=posix_spawn \ + -cp "$SPARK_CLASSPATH" \ + org.apache.spark.executor.CoarseGrainedExecutorBackend \ + --driver-url $SPARK_DRIVER_URL \ + --executor-id $SPARK_EXECUTOR_ID \ + --cores $SPARK_EXECUTOR_CORES \ + --app-id $SPARK_APPLICATION_ID \ + --hostname $SPARK_EXECUTOR_POD_IP ) + fi ;; - *) echo "Unknown command: $SPARK_K8S_CMD" 1>&2 exit 1 diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/README.md b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/README.md index a38df5186db..b9610429964 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/README.md +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/README.md @@ -2,7 +2,7 @@ ## Resource Configuration Guide some configuration in *.yaml is introduced in [here](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/scala/docker-occlum/README.md), you can refer to it for more information. - +The two new configs 'spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE' and 'spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE' are the same as driver-memory and executor-memory in spark. We use original driver-memory and original executor-memory to alloc extra common memory for libos. You can refer to [this](https://github.com/intel-analytics/BigDL/tree/main/ppml/trusted-big-data-ml/python/docker-graphene#configuration-explainations) for more information. ## Prerequisite * Check Kubernetes env or Install Kubernetes from [wiki](https://kubernetes.io/zh/docs/setup/production-environment) diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_gbt.sh b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_gbt.sh index 52039e36909..434c56d64f3 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_gbt.sh +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_gbt.sh @@ -14,6 +14,9 @@ ${SPARK_HOME}/bin/spark-submit \ --conf spark.kubernetes.driver.podTemplateFile=./driver.yaml \ --conf spark.kubernetes.executor.podTemplateFile=./executor.yaml \ --conf spark.kubernetes.sgx.log.level=off \ + --executor-memory 1024m \ + --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="2G" \ + --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="1G" \ --jars local:/opt/spark/examples/jars/scopt_2.12-3.7.1.jar \ local:/opt/spark/examples/jars/spark-examples_2.12-3.1.2.jar \ /opt/spark/data/mllib/sample_lda_libsvm_data.txt --algo regression diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_lr.sh b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_lr.sh index 617045020ce..6ad0d543fb1 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_lr.sh +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_lr.sh @@ -14,6 +14,9 @@ ${SPARK_HOME}/bin/spark-submit \ --conf spark.kubernetes.driver.podTemplateFile=./driver.yaml \ --conf spark.kubernetes.executor.podTemplateFile=./executor.yaml \ --conf spark.kubernetes.sgx.log.level=off \ + --executor-memory 1024m \ + --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="2G" \ + --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="1G" \ --jars local:/opt/spark/examples/jars/scopt_2.12-3.7.1.jar \ local:/opt/spark/examples/jars/spark-examples_2.12-3.1.2.jar \ --regParam 0.3 --elasticNetParam 0.8 \ diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_pi.sh b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_pi.sh index 359927f037b..d4e143e9c54 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_pi.sh +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_pi.sh @@ -14,4 +14,6 @@ ${SPARK_HOME}/bin/spark-submit \ --conf spark.kubernetes.executor.podTemplateFile=./executor.yaml \ --conf spark.kubernetes.sgx.log.level=off \ --executor-memory 512m \ + --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="512m" \ + --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="512m" \ local:/opt/spark/examples/jars/spark-examples_2.12-3.1.2.jar diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_sql.sh b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_sql.sh index 0c7857c845a..2d20d13e53e 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_sql.sh +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_sql.sh @@ -14,6 +14,9 @@ ${SPARK_HOME}/bin/spark-submit \ --conf spark.kubernetes.driver.podTemplateFile=./driver.yaml \ --conf spark.kubernetes.executor.podTemplateFile=./executor.yaml \ --conf spark.kubernetes.sgx.log.level=off \ + --executor-memory 1024m \ + --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="2G" \ + --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="1G" \ --jars local:/opt/spark/examples/jars/scopt_2.12-3.7.1.jar \ local:/opt/spark/examples/jars/spark-examples_2.12-3.1.2.jar diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_tpch.sh b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_tpch.sh index bd9bba8efb2..88df7517be8 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_tpch.sh +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_tpch.sh @@ -15,10 +15,12 @@ ${SPARK_HOME}/bin/spark-submit \ --conf spark.kubernetes.file.upload.path=file:///tmp \ --conf spark.kubernetes.executor.podNamePrefix="sparktpch" \ --conf spark.kubernetes.sgx.log.level=off \ - --num-executors 1 \ - --executor-cores 8 \ - --executor-memory 16g \ - --driver-memory 16g \ + --num-executors 2 \ + --executor-cores 4 \ + --executor-memory 4g \ + --driver-memory 1g \ + --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="1G" \ + --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="4G" \ --verbose \ local:/opt/spark/jars/spark-tpc-h-queries_2.12-1.0.jar \ /host/data/tpch_data/ /host/data/tpch_output/ diff --git a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_xgboost.sh b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_xgboost.sh index 0904f8a8b2b..61ff1333739 100644 --- a/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_xgboost.sh +++ b/ppml/trusted-big-data-ml/scala/docker-occlum/kubernetes/run_spark_xgboost.sh @@ -18,5 +18,7 @@ ${SPARK_HOME}/bin/spark-submit \ --executor-cores 2 \ --executor-memory 2g \ --driver-memory 2g \ + --conf spark.kubernetes.driverEnv.SGX_DRIVER_JVM_MEM_SIZE="2G" \ + --conf spark.executorEnv.SGX_EXECUTOR_JVM_MEM_SIZE="2G" \ local:/bin/jars/bigdl-dllib-spark_3.1.2-2.1.0-SNAPSHOT.jar \ /host/data/xgboost_data 2 100 /host/data/xgboost_model_to_be_saved