From 2ce5c1010f62157cb73de5cfe7b76f00c7218a04 Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Wed, 17 Aug 2022 19:37:12 +0800 Subject: [PATCH 01/17] feat: add k8s related files --- .../docker-gramine/bigdl-ppml-submit.sh | 147 +++++++++++++ .../python/docker-gramine/entrypoint.sh | 204 ++++++++++++++++++ .../docker-gramine/spark-driver-template.yaml | 66 ++++++ .../spark-executor-template.yaml | 61 ++++++ 4 files changed, 478 insertions(+) create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/bigdl-ppml-submit.sh create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/entrypoint.sh create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/spark-driver-template.yaml create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/spark-executor-template.yaml diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/bigdl-ppml-submit.sh b/ppml/trusted-big-data-ml/python/docker-gramine/bigdl-ppml-submit.sh new file mode 100644 index 00000000000..e939f0da9fb --- /dev/null +++ b/ppml/trusted-big-data-ml/python/docker-gramine/bigdl-ppml-submit.sh @@ -0,0 +1,147 @@ +#!/bin/bash +SGX_ENABLED=false +application_args="" +input_args="" + +while [[ $# -gt 0 ]]; do + case $1 in + --master) + MASTER="$2" + input_args="$input_args $1 $2" + shift # past argument + shift # past value + ;; + --deploy-mode) + DEPLOY_MODE="$2" + input_args="$input_args $1 $2" + shift # past argument + shift # past value + ;; + --sgx-enabled) + SGX_ENABLED="$2" + shift # past argument + shift # past value + ;; + --sgx-log-level) + SGX_LOG_LEVEL="$2" + shift # past argument + shift # past value + ;; + --sgx-driver-memory) + SGX_DRIVER_MEM="$2" + shift # past argument + shift # past value + ;; + --sgx-driver-jvm-memory) + SGX_DRIVER_JVM_MEM="$2" + shift # past argument + shift # past value + ;; + --sgx-executor-memory) + SGX_EXECUTOR_MEM="$2" + shift # past argument + shift # past value + ;; + --sgx-executor-jvm-memory) + SGX_EXECUTOR_JVM_MEM="$2" + shift # past argument + shift # past value + ;; + --verbose) + input_args="$input_args $1" + shift # past argument + ;; + -*|--*) + input_args="$input_args $1 $2" + shift + shift + ;; + *) + application_args="${@}" # save positional arg + break + ;; + esac +done + +echo "input_args $input_args" +echo "app_args $application_args" +echo $MASTER +if [ "$MASTER" == k8s* ] && [ "$DEPLOY_MODE" = "" ]; then + echo "--deploy-mode should be specified for k8s cluster" + exit 1 +fi + + +if [ "$SGX_ENABLED" = "true" ]; then + if [ "$SGX_DRIVER_MEM" = "" ] || [ "$SGX_DRIVER_JVM_MEM" = "" ] || [ "$SGX_EXECUTOR_MEM" = "" ] || [ "$SGX_EXECUTOR_JVM_MEM" = "" ] || [ "$SGX_LOG_LEVEL" = "" ]; then + echo "--sgx-driver-memory, --sgx-driver-jvm-memory, --sgx-executor-memory, --sgx-executor-jvm-memory, --sgx-log-level must be specified when sgx is enabled" + exit 1 + else + sgx_commands="--conf spark.kubernetes.sgx.enabled=$SGX_ENABLED \ + --conf spark.kubernetes.sgx.driver.mem=$SGX_DRIVER_MEM \ + --conf spark.kubernetes.sgx.driver.jvm.mem=$SGX_DRIVER_JVM_MEM \ + --conf spark.kubernetes.sgx.executor.mem=$SGX_EXECUTOR_MEM \ + --conf spark.kubernetes.sgx.executor.jvm.mem=$SGX_EXECUTOR_JVM_MEM \ + --conf spark.kubernetes.sgx.log.level=$SGX_LOG_LEVEL" + fi +else + sgx_commands="" +fi + +default_config="--conf spark.driver.host=$LOCAL_IP \ + --conf spark.driver.port=$RUNTIME_DRIVER_PORT \ + --conf spark.network.timeout=10000000 \ + --conf spark.executor.heartbeatInterval=10000000 \ + --conf spark.python.use.daemon=false \ + --conf spark.python.worker.reuse=false \ + --conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \ + --conf spark.kubernetes.driver.podTemplateFile=/ppml/trusted-big-data-ml/spark-driver-template.yaml \ + --conf spark.kubernetes.executor.podTemplateFile=/ppml/trusted-big-data-ml/spark-executor-template.yaml \ + --conf spark.kubernetes.executor.deleteOnTermination=false" + +if [ $secure_password ]; then + SSL="--conf spark.authenticate=true \ + --conf spark.authenticate.secret=$secure_password \ + --conf spark.kubernetes.executor.secretKeyRef.SPARK_AUTHENTICATE_SECRET="spark-secret:secret" \ + --conf spark.kubernetes.driver.secretKeyRef.SPARK_AUTHENTICATE_SECRET="spark-secret:secret" \ + --conf spark.authenticate.enableSaslEncryption=true \ + --conf spark.network.crypto.enabled=true \ + --conf spark.network.crypto.keyLength=128 \ + --conf spark.network.crypto.keyFactoryAlgorithm=PBKDF2WithHmacSHA1 \ + --conf spark.io.encryption.enabled=true \ + --conf spark.io.encryption.keySizeBits=128 \ + --conf spark.io.encryption.keygen.algorithm=HmacSHA1 \ + --conf spark.ssl.enabled=true \ + --conf spark.ssl.port=8043 \ + --conf spark.ssl.keyPassword=$secure_password \ + --conf spark.ssl.keyStore=/ppml/trusted-big-data-ml/work/keys/keystore.jks \ + --conf spark.ssl.keyStorePassword=$secure_password \ + --conf spark.ssl.keyStoreType=JKS \ + --conf spark.ssl.trustStore=/ppml/trusted-big-data-ml/work/keys/keystore.jks \ + --conf spark.ssl.trustStorePassword=$secure_password \ + --conf spark.ssl.trustStoreType=JKS" +else + SSL="" +fi + +spark_submit_command="${JAVA_HOME}/bin/java \ + -cp ${SPARK_HOME}/conf/:${SPARK_HOME}/jars/* \ + -Xmx${RUNTIME_DRIVER_MEMORY} \ + org.apache.spark.deploy.SparkSubmit \ + $SSL \ + $default_config \ + $sgx_commands" + +set -x + +spark_submit_command="${spark_submit_command} ${input_args} ${application_args}" +echo "spark_submit_command $spark_submit_command" +if [ "$SGX_ENABLED" == "true" ] && [ "$DEPLOY_MODE" != "cluster" ]; then + ./clean.sh + gramine-argv-serializer bash -c "$spark_submit_command" > /ppml/trusted-big-data-ml/secured-argvs + + ./init.sh + gramine-sgx bash 2>&1 | tee bigdl-ppml-submit.log +else + $spark_submit_command 2>&1 | tee bigdl-ppml-submit.log +fi \ No newline at end of file diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/entrypoint.sh b/ppml/trusted-big-data-ml/python/docker-gramine/entrypoint.sh new file mode 100644 index 00000000000..22791876673 --- /dev/null +++ b/ppml/trusted-big-data-ml/python/docker-gramine/entrypoint.sh @@ -0,0 +1,204 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# echo commands to the terminal output +set -ex + +# Check whether there is a passwd entry for the container UID +myuid=$(id -u) +mygid=$(id -g) +# turn off -e for getent because it will return error code in anonymous uid case +set +e +uidentry=$(getent passwd $myuid) +set -e + +# If there is no passwd entry for the container UID, attempt to create one +if [ -z "$uidentry" ] ; then + if [ -w /etc/passwd ] ; then + echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd + else + echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID" + fi +fi + +SPARK_K8S_CMD="$1" +echo "###################################### $SPARK_K8S_CMD" +case "$SPARK_K8S_CMD" in + driver | driver-py | driver-r | executor) + shift 1 + ;; + "") + ;; + *) + echo "Non-spark-on-k8s command provided, proceeding in pass-through mode..." + exec /usr/bin/tini -s -- "$@" + ;; +esac + +SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*" +env | grep SPARK_JAVA_OPT_ | sort -t_ -k4 -n | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt +readarray -t SPARK_EXECUTOR_JAVA_OPTS < /tmp/java_opts.txt + +if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then + SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH" +fi + +if [ -n "$PYSPARK_FILES" ]; then + PYTHONPATH="$PYTHONPATH:$PYSPARK_FILES" +fi + +PYSPARK_ARGS="" +if [ -n "$PYSPARK_APP_ARGS" ]; then + PYSPARK_ARGS="$PYSPARK_APP_ARGS" +fi + +R_ARGS="" +if [ -n "$R_APP_ARGS" ]; then + R_ARGS="$R_APP_ARGS" +fi + +# Attestation +if [ -z "$ATTESTATION" ]; then + echo "[INFO] Attestation is disabled!" + ATTESTATION="false" +elif [ "$ATTESTATION" = "true" ]; then + echo "[INFO] Attestation is enabled!" + # Build ATTESTATION_COMMAND + if [ -z "$ATTESTATION_URL" ]; then + echo "[ERROR] Attestation is enabled, but ATTESTATION_URL is empty!" + echo "[INFO] PPML Application Exit!" + exit 1 + fi + if [ -z "$ATTESTATION_ID" ]; then + echo "[ERROR] Attestation is enabled, but ATTESTATION_ID is empty!" + echo "[INFO] PPML Application Exit!" + exit 1 + fi + if [ -z "$ATTESTATION_KEY" ]; then + echo "[ERROR] Attestation is enabled, but ATTESTATION_KEY is empty!" + echo "[INFO] PPML Application Exit!" + exit 1 + fi + ATTESTATION_COMMAND="/opt/jdk8/bin/java -Xmx1g -cp $SPARK_CLASSPATH:$BIGDL_HOME/jars/* com.intel.analytics.bigdl.ppml.attestation.AttestationCLI -u ${ATTESTATION_URL} -i ${ATTESTATION_ID} -k ${ATTESTATION_KEY}" +fi + + +if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "2" ]; then + pyv="$(python -V 2>&1)" + export PYTHON_VERSION="${pyv:7}" + export PYSPARK_PYTHON="python" + export PYSPARK_DRIVER_PYTHON="python" +elif [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]; then + pyv3="$(python3 -V 2>&1)" + export PYTHON_VERSION="${pyv3:7}" + export PYSPARK_PYTHON="python3" + export PYSPARK_DRIVER_PYTHON="python3" +fi + +case "$SPARK_K8S_CMD" in + driver) + CMD=( + "$SPARK_HOME/bin/spark-submit" + --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" + --deploy-mode client + "$@" + ) + echo $SGX_ENABLED && \ + echo $SGX_DRIVER_MEM_SIZE && \ + echo $SGX_DRIVER_JVM_MEM_SIZE && \ + echo $SGX_EXECUTOR_MEM_SIZE && \ + echo $SGX_EXECUTOR_JVM_MEM_SIZE && \ + echo $SGX_LOG_LEVEL && \ + echo $SPARK_DRIVER_MEMORY && \ + unset PYTHONHOME && \ + unset PYTHONPATH && \ + if [ "$SGX_ENABLED" == "false" ]; then + $SPARK_HOME/bin/spark-submit --conf spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS --deploy-mode client "$@" + elif [ "$SGX_ENABLED" == "true" ]; then + export driverExtraClassPath=`cat /opt/spark/conf/spark.properties | grep -P -o "(?<=spark.driver.extraClassPath=).*"` && \ + echo $driverExtraClassPath && \ + export SGX_MEM_SIZE=$SGX_DRIVER_MEM_SIZE && \ + export spark_commnd="/opt/jdk8/bin/java -Dlog4j.configurationFile=/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/log4j2.xml -Xms1G -Xmx$SGX_DRIVER_JVM_MEM_SIZE -cp "$SPARK_CLASSPATH:$driverExtraClassPath" org.apache.spark.deploy.SparkSubmit --conf spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS --deploy-mode client "$@"" && \ + if [ "$ATTESTATION" = "true" ]; then + spark_commnd=$ATTESTATION_COMMAND" && "$spark_commnd + fi + echo $spark_commnd && \ + gramine-argv-serializer bash -c "export TF_MKL_ALLOC_MAX_BYTES=10737418240 && export _SPARK_AUTH_SECRET=$_SPARK_AUTH_SECRET && $spark_commnd" > /ppml/trusted-big-data-ml/secured-argvs && \ + ./init.sh && \ + gramine-sgx bash 1>&2 + fi + ;; + driver-py) + CMD=( + "$SPARK_HOME/bin/spark-submit" + --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" + --deploy-mode client + "$@" $PYSPARK_PRIMARY $PYSPARK_ARGS + ) + ;; + driver-r) + CMD=( + "$SPARK_HOME/bin/spark-submit" + --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" + --deploy-mode client + "$@" $R_PRIMARY $R_ARGS + ) + ;; + executor) + echo $SGX_ENABLED && \ + echo $SGX_DRIVER_MEM_SIZE && \ + echo $SGX_DRIVER_JVM_MEM_SIZE && \ + echo $SGX_EXECUTOR_MEM_SIZE && \ + echo $SGX_EXECUTOR_JVM_MEM_SIZE && \ + echo $SGX_LOG_LEVEL && \ + echo $SPARK_EXECUTOR_MEMORY && \ + unset PYTHONHOME && \ + unset PYTHONPATH && \ + if [ "$SGX_ENABLED" == "false" ]; then + /opt/jdk8/bin/java \ + -Xms$SPARK_EXECUTOR_MEMORY \ + -Xmx$SPARK_EXECUTOR_MEMORY \ + "${SPARK_EXECUTOR_JAVA_OPTS[@]}" \ + -cp "$SPARK_CLASSPATH" \ + org.apache.spark.executor.CoarseGrainedExecutorBackend \ + --driver-url $SPARK_DRIVER_URL \ + --executor-id $SPARK_EXECUTOR_ID \ + --cores $SPARK_EXECUTOR_CORES \ + --app-id $SPARK_APPLICATION_ID \ + --hostname $SPARK_EXECUTOR_POD_IP \ + --resourceProfileId $SPARK_RESOURCE_PROFILE_ID + elif [ "$SGX_ENABLED" == "true" ]; then + export SGX_MEM_SIZE=$SGX_EXECUTOR_MEM_SIZE && \ + export spark_commnd="/opt/jdk8/bin/java -Dlog4j.configurationFile=/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/log4j2.xml -Xms1G -Xmx$SGX_EXECUTOR_JVM_MEM_SIZE "${SPARK_EXECUTOR_JAVA_OPTS[@]}" -cp "$SPARK_CLASSPATH" org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP --resourceProfileId $SPARK_RESOURCE_PROFILE_ID" && \ + if [ "$ATTESTATION" = "true" ]; then + spark_commnd=$ATTESTATION_COMMAND" && "$spark_commnd + fi + echo $spark_commnd && \ + gramine-argv-serializer bash -c "export TF_MKL_ALLOC_MAX_BYTES=10737418240 && export _SPARK_AUTH_SECRET=$_SPARK_AUTH_SECRET && $spark_commnd" > /ppml/trusted-big-data-ml/secured-argvs && \ + ./init.sh && \ + gramine-sgx bash 1>&2 + fi + ;; + + *) + echo "Unknown command: $SPARK_K8S_CMD" 1>&2 + exit 1 +esac + +# Execute the container CMD under tini for better hygiene +#exec /usr/bin/tini -s -- "${CMD[@]}" \ No newline at end of file diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/spark-driver-template.yaml b/ppml/trusted-big-data-ml/python/docker-gramine/spark-driver-template.yaml new file mode 100644 index 00000000000..8efebd50572 --- /dev/null +++ b/ppml/trusted-big-data-ml/python/docker-gramine/spark-driver-template.yaml @@ -0,0 +1,66 @@ +apiVersion: v1 +kind: Pod +spec: + containers: + - name: spark-driver + securityContext: + privileged: true + env: + - name: ATTESTATION + value: false + - name: ATTESTATION_URL + value: your_attestation_url + #- name: ATTESTATION_ID + # valueFrom: + # secretKeyRef: + # name: kms-secret + # key: app_id + #- name: ATTESTATION_KEY + # valueFrom: + # secretKeyRef: + # name: kms-secret + # key: app_key + volumeMounts: + - name: enclave-key + mountPath: ~/.config/gramine/enclave-key.pem + subPath: enclave-key.pem + - name: device-plugin + mountPath: /var/lib/kubelet/device-plugins + - name: aesm-socket + mountPath: /var/run/aesmd/aesm.socket + - name: nfs-storage + mountPath: /ppml/trusted-big-data-ml/work/data + - name: secure-keys + mountPath: /ppml/trusted-big-data-ml/work/keys + - name: nfs-storage + mountPath: /root/.kube/config + subPath: kubeconfig + #resources: + #requests: + #cpu: 16 + #memory: 128Gi + #sgx.intel.com/epc: 133258905600 + #sgx.intel.com/enclave: 10 + #sgx.intel.com/provision: 10 + #limits: + #cpu: 16 + #memory: 128Gi + #sgx.intel.com/epc: 133258905600 + #sgx.intel.com/enclave: 10 + #sgx.intel.com/provision: 10 + volumes: + - name: enclave-key + secret: + secretName: enclave-key + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins + - name: aesm-socket + hostPath: + path: /var/run/aesmd/aesm.socket + - name: secure-keys + secret: + secretName: ssl-keys + - name: nfs-storage + persistentVolumeClaim: + claimName: nfsvolumeclaim \ No newline at end of file diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/spark-executor-template.yaml b/ppml/trusted-big-data-ml/python/docker-gramine/spark-executor-template.yaml new file mode 100644 index 00000000000..d4a83817542 --- /dev/null +++ b/ppml/trusted-big-data-ml/python/docker-gramine/spark-executor-template.yaml @@ -0,0 +1,61 @@ +apiVersion: v1 +kind: Pod +spec: + containers: + - name: spark-executor + securityContext: + privileged: true + env: + - name: ATTESTATION + value: false + - name: ATTESTATION_URL + value: your_attestation_url + #- name: ATTESTATION_ID + # valueFrom: + # secretKeyRef: + # name: kms-secret + # key: app_id + #- name: ATTESTATION_KEY + # valueFrom: + # secretKeyRef: + # name: kms-secret + # key: app_key + volumeMounts: + - name: enclave-key + mountPath: ~/.config/gramine/enclave-key.pem + subPath: enclave-key.pem + - name: device-plugin + mountPath: /var/lib/kubelet/device-plugins + - name: aesm-socket + mountPath: /var/run/aesmd/aesm.socket + - name: nfs-storage + mountPath: /ppml/trusted-big-data-ml/work/data + - name: nfs-storage + mountPath: /root/.kube/config + subPath: kubeconfig + #resources: + #requests: + #cpu: 16 + #memory: 128Gi + #sgx.intel.com/epc: 133258905600 + #sgx.intel.com/enclave: 10 + #sgx.intel.com/provision: 10 + #limits: + #cpu: 16 + #memory: 128Gi + #sgx.intel.com/epc: 133258905600 + #sgx.intel.com/enclave: 10 + #sgx.intel.com/provision: 10 + volumes: + - name: enclave-key + secret: + secretName: enclave-key + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins + - name: aesm-socket + hostPath: + path: /var/run/aesmd/aesm.socket + - name: nfs-storage + persistentVolumeClaim: + claimName: nfsvolumeclaim \ No newline at end of file From 5dd958f0778fb5998096d5ba19ceb4d78fbe03d2 Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Thu, 18 Aug 2022 13:22:08 +0800 Subject: [PATCH 02/17] fix: change secure-argv to secure_argv in Gramine --- .../python/docker-gramine/bigdl-ppml-submit.sh | 2 +- .../trusted-big-data-ml/python/docker-gramine/entrypoint.sh | 4 ++-- ppml/trusted-big-data-ml/python/docker-gramine/init.sh | 6 +++--- .../python/docker-gramine/java-manifest/init.sh | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/bigdl-ppml-submit.sh b/ppml/trusted-big-data-ml/python/docker-gramine/bigdl-ppml-submit.sh index e939f0da9fb..6c9fc2fce0e 100644 --- a/ppml/trusted-big-data-ml/python/docker-gramine/bigdl-ppml-submit.sh +++ b/ppml/trusted-big-data-ml/python/docker-gramine/bigdl-ppml-submit.sh @@ -138,7 +138,7 @@ spark_submit_command="${spark_submit_command} ${input_args} ${application_args}" echo "spark_submit_command $spark_submit_command" if [ "$SGX_ENABLED" == "true" ] && [ "$DEPLOY_MODE" != "cluster" ]; then ./clean.sh - gramine-argv-serializer bash -c "$spark_submit_command" > /ppml/trusted-big-data-ml/secured-argvs + gramine-argv-serializer bash -c "$spark_submit_command" > /ppml/trusted-big-data-ml/secured_argvs ./init.sh gramine-sgx bash 2>&1 | tee bigdl-ppml-submit.log diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/entrypoint.sh b/ppml/trusted-big-data-ml/python/docker-gramine/entrypoint.sh index 22791876673..f6c6002fc72 100644 --- a/ppml/trusted-big-data-ml/python/docker-gramine/entrypoint.sh +++ b/ppml/trusted-big-data-ml/python/docker-gramine/entrypoint.sh @@ -138,7 +138,7 @@ case "$SPARK_K8S_CMD" in spark_commnd=$ATTESTATION_COMMAND" && "$spark_commnd fi echo $spark_commnd && \ - gramine-argv-serializer bash -c "export TF_MKL_ALLOC_MAX_BYTES=10737418240 && export _SPARK_AUTH_SECRET=$_SPARK_AUTH_SECRET && $spark_commnd" > /ppml/trusted-big-data-ml/secured-argvs && \ + gramine-argv-serializer bash -c "export TF_MKL_ALLOC_MAX_BYTES=10737418240 && export _SPARK_AUTH_SECRET=$_SPARK_AUTH_SECRET && $spark_commnd" > /ppml/trusted-big-data-ml/secured_argvs && \ ./init.sh && \ gramine-sgx bash 1>&2 fi @@ -189,7 +189,7 @@ case "$SPARK_K8S_CMD" in spark_commnd=$ATTESTATION_COMMAND" && "$spark_commnd fi echo $spark_commnd && \ - gramine-argv-serializer bash -c "export TF_MKL_ALLOC_MAX_BYTES=10737418240 && export _SPARK_AUTH_SECRET=$_SPARK_AUTH_SECRET && $spark_commnd" > /ppml/trusted-big-data-ml/secured-argvs && \ + gramine-argv-serializer bash -c "export TF_MKL_ALLOC_MAX_BYTES=10737418240 && export _SPARK_AUTH_SECRET=$_SPARK_AUTH_SECRET && $spark_commnd" > /ppml/trusted-big-data-ml/secured_argvs && \ ./init.sh && \ gramine-sgx bash 1>&2 fi diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/init.sh b/ppml/trusted-big-data-ml/python/docker-gramine/init.sh index 8d6c59c74d1..7cc12028b02 100644 --- a/ppml/trusted-big-data-ml/python/docker-gramine/init.sh +++ b/ppml/trusted-big-data-ml/python/docker-gramine/init.sh @@ -26,10 +26,10 @@ else echo "both /dev/sgx/provision /dev/sgx_provision are not ready, please check the kernel and driver" fi -if [ -c "/ppml/trusted-big-data-ml/secured-argvs" ]; then - echo "/ppml/trusted-big-data-ml/secured-argvs is ready" +if [ -c "/ppml/trusted-big-data-ml/secured_argvs" ]; then + echo "/ppml/trusted-big-data-ml/secured_argvs is ready" else - echo "/ppml/trusted-big-data-ml/secured-argvs is not ready, please generate it before init.sh" + echo "/ppml/trusted-big-data-ml/secured_argvs is not ready, please generate it before init.sh" fi ls -al /dev/sgx diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/java-manifest/init.sh b/ppml/trusted-big-data-ml/python/docker-gramine/java-manifest/init.sh index 8d6c59c74d1..7cc12028b02 100644 --- a/ppml/trusted-big-data-ml/python/docker-gramine/java-manifest/init.sh +++ b/ppml/trusted-big-data-ml/python/docker-gramine/java-manifest/init.sh @@ -26,10 +26,10 @@ else echo "both /dev/sgx/provision /dev/sgx_provision are not ready, please check the kernel and driver" fi -if [ -c "/ppml/trusted-big-data-ml/secured-argvs" ]; then - echo "/ppml/trusted-big-data-ml/secured-argvs is ready" +if [ -c "/ppml/trusted-big-data-ml/secured_argvs" ]; then + echo "/ppml/trusted-big-data-ml/secured_argvs is ready" else - echo "/ppml/trusted-big-data-ml/secured-argvs is not ready, please generate it before init.sh" + echo "/ppml/trusted-big-data-ml/secured_argvs is not ready, please generate it before init.sh" fi ls -al /dev/sgx From aa124157049e2d486cc96510e5d981507f599b89 Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Thu, 18 Aug 2022 15:33:17 +0800 Subject: [PATCH 03/17] fix: check if secured_argv exists with -f --- ppml/trusted-big-data-ml/python/docker-gramine/init.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/init.sh b/ppml/trusted-big-data-ml/python/docker-gramine/init.sh index 7cc12028b02..ed40fb5cad5 100644 --- a/ppml/trusted-big-data-ml/python/docker-gramine/init.sh +++ b/ppml/trusted-big-data-ml/python/docker-gramine/init.sh @@ -26,7 +26,7 @@ else echo "both /dev/sgx/provision /dev/sgx_provision are not ready, please check the kernel and driver" fi -if [ -c "/ppml/trusted-big-data-ml/secured_argvs" ]; then +if [ -f "/ppml/trusted-big-data-ml/secured_argvs" ]; then echo "/ppml/trusted-big-data-ml/secured_argvs is ready" else echo "/ppml/trusted-big-data-ml/secured_argvs is not ready, please generate it before init.sh" From 1ab54da2a6828dad0bf1bb80f959e1ebbdaf8b6f Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Thu, 18 Aug 2022 20:04:29 +0800 Subject: [PATCH 04/17] exit when not ready in init.sh --- ppml/trusted-big-data-ml/python/docker-gramine/init.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/init.sh b/ppml/trusted-big-data-ml/python/docker-gramine/init.sh index ed40fb5cad5..5aed7eafb4b 100644 --- a/ppml/trusted-big-data-ml/python/docker-gramine/init.sh +++ b/ppml/trusted-big-data-ml/python/docker-gramine/init.sh @@ -14,6 +14,7 @@ elif [ -c "/dev/sgx_enclave" ]; then ln -s /dev/sgx_enclave /dev/sgx/enclave else echo "both /dev/sgx/enclave /dev/sgx_enclave are not ready, please check the kernel and driver" + exit 1 fi if [ -c "/dev/sgx/provision" ]; then @@ -24,12 +25,14 @@ elif [ -c "/dev/sgx_provision" ]; then ln -s /dev/sgx_provision /dev/sgx/provision else echo "both /dev/sgx/provision /dev/sgx_provision are not ready, please check the kernel and driver" + exit 1 fi if [ -f "/ppml/trusted-big-data-ml/secured_argvs" ]; then echo "/ppml/trusted-big-data-ml/secured_argvs is ready" else echo "/ppml/trusted-big-data-ml/secured_argvs is not ready, please generate it before init.sh" + exit 1 fi ls -al /dev/sgx From 2df2efc9193a0919b761a516a7a36904b909838c Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Mon, 22 Aug 2022 10:44:08 +0800 Subject: [PATCH 05/17] fix: change path of encalve-key in k8s template --- .../python/docker-gramine/spark-driver-template.yaml | 2 +- .../python/docker-gramine/spark-executor-template.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/spark-driver-template.yaml b/ppml/trusted-big-data-ml/python/docker-gramine/spark-driver-template.yaml index 8efebd50572..55ee3037b1b 100644 --- a/ppml/trusted-big-data-ml/python/docker-gramine/spark-driver-template.yaml +++ b/ppml/trusted-big-data-ml/python/docker-gramine/spark-driver-template.yaml @@ -22,7 +22,7 @@ spec: # key: app_key volumeMounts: - name: enclave-key - mountPath: ~/.config/gramine/enclave-key.pem + mountPath: /root/.config/gramine/enclave-key.pem subPath: enclave-key.pem - name: device-plugin mountPath: /var/lib/kubelet/device-plugins diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/spark-executor-template.yaml b/ppml/trusted-big-data-ml/python/docker-gramine/spark-executor-template.yaml index d4a83817542..9d530fa3b31 100644 --- a/ppml/trusted-big-data-ml/python/docker-gramine/spark-executor-template.yaml +++ b/ppml/trusted-big-data-ml/python/docker-gramine/spark-executor-template.yaml @@ -22,7 +22,7 @@ spec: # key: app_key volumeMounts: - name: enclave-key - mountPath: ~/.config/gramine/enclave-key.pem + mountPath: /root/.config/gramine/enclave-key.pem subPath: enclave-key.pem - name: device-plugin mountPath: /var/lib/kubelet/device-plugins From 45577857ef704572ec3bc801c07ebd35ca1f577e Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Mon, 22 Aug 2022 13:42:41 +0800 Subject: [PATCH 06/17] tmp action --- .github/workflows/docker-publish-ppml.yaml | 175 +++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 .github/workflows/docker-publish-ppml.yaml diff --git a/.github/workflows/docker-publish-ppml.yaml b/.github/workflows/docker-publish-ppml.yaml new file mode 100644 index 00000000000..05f29bb04c5 --- /dev/null +++ b/.github/workflows/docker-publish-ppml.yaml @@ -0,0 +1,175 @@ +name: Nightly Build Docker Publish BigDL-PPML + +on: + pull_request: + branches: [main] + paths: + - 'ppml/trusted-big-data-ml/python/docker-gramine/**' + workflow_dispatch: + inputs: + artifact: + description: 'select which job to run("ALL" will make all jobs run)' + required: true + default: 'latest' + type: choice + options: + - all + - bigdl-ppml-trusted-big-data-ml-python-gramine + - bigdl-ppml-trusted-big-data-ml-python-graphene + - bigdl-ppml-trusted-realtime-ml-scala-graphene + - bigdl-ppml-trusted-big-data-ml-scala-occlum + - bigdl-ppml-trusted-realtime-ml-scala-occlum + tag: + description: 'e.g. 2.1.0-SNAPSHOT' + required: true + default: 'latest' + type: string + + +jobs: + docker-publish-ppml: + runs-on: [self-hosted, Shire] + permissions: + contents: read + packages: write + + steps: + - uses: actions/checkout@v3 + - name: docker login + run: | + docker login -u ${DOCKERHUB_USERNAME} -p ${DOCKERHUB_PASSWORD} + - name: Set the variable + env: + DEFAULT_TAG: 'devel' + DEFAULT_ARTIFACT: 'bigdl-ppml-trusted-big-data-ml-python-gramine' + run: | + echo "TAG=${{ github.event.inputs.tag || env.DEFAULT_TAG }} " >> $GITHUB_ENV + echo "ARTIFACT=${{ github.event.inputs.artifact || env.DEFAULT_ARTIFACT }}" >> $GITHUB_ENV + - name: bigdl-ppml-trusted-big-data-ml-python-gramine + run: | + if [ "$ARTIFACT" = "bigdl-ppml-trusted-big-data-ml-python-gramine" ] || [ "$ARTIFACT" = "ALL" ] ; then + echo "########################################" + echo "####### big-data-ml-python-gramine ####" + echo "########################################" + cd ppml/trusted-big-data-ml/python/docker-gramine + export image=intelanalytics/bigdl-ppml-trusted-big-data-ml-python-gramine + sudo docker build \ + --no-cache=true \ + --build-arg http_proxy=${HTTP_PROXY} \ + --build-arg https_proxy=${HTTPS_PROXY} \ + --build-arg HTTP_PROXY_HOST=${HTTP_PROXY_HOST_2} \ + --build-arg HTTP_PROXY_PORT=${HTTP_PROXY_PORT_2} \ + --build-arg HTTPS_PROXY_HOST=${HTTP_PROXY_HOST_2} \ + --build-arg HTTPS_PROXY_PORT=${HTTP_PROXY_PORT_3} \ + --build-arg JDK_VERSION=8u192 \ + --build-arg JDK_URL=${JDK_URL} \ + --build-arg no_proxy=${NO_PROXY} \ + --build-arg SPARK_JAR_REPO_URL=${SPARK_JAR_REPO_URL} \ + -t ${image}:${TAG} -f ./Dockerfile . + sudo docker tag ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} + sudo docker push 10.239.45.10/arda/${image}:${TAG} + sudo docker rmi -f ${image}:${TAG} + fi + - name: bigdl-ppml-trusted-big-data-ml-python-graphene + run: | + if [[ "$ARTIFACT" == bigdl-ppml-trusted-big-data-ml-python-graphene || "$ARTIFACT" == all ]]; then + echo "########################################" + echo "####### big-data-ml-python-graphene ####" + echo "########################################" + cd ppml/trusted-big-data-ml/python/docker-graphene + export image=intelanalytics/bigdl-ppml-trusted-big-data-ml-python-graphene + sudo docker build \ + --no-cache=true \ + --build-arg http_proxy=${HTTP_PROXY} \ + --build-arg https_proxy=${HTTPS_PROXY} \ + --build-arg HTTP_PROXY_HOST=${HTTP_PROXY_HOST_2} \ + --build-arg HTTP_PROXY_PORT=${HTTP_PROXY_PORT_2} \ + --build-arg HTTPS_PROXY_HOST=${HTTP_PROXY_HOST_2} \ + --build-arg HTTPS_PROXY_PORT=${HTTP_PROXY_PORT_3} \ + --build-arg JDK_VERSION=8u192 \ + --build-arg JDK_URL=${JDK_URL} \ + --build-arg no_proxy=${NO_PROXY} \ + --build-arg SPARK_JAR_REPO_URL=${SPARK_JAR_REPO_URL} \ + -t ${image}:${TAG} -f ./Dockerfile . + sudo docker tag ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} + sudo docker push 10.239.45.10/arda/${image}:${TAG} + sudo docker rmi -f ${image}:${TAG} + fi + - name: bigdl-ppml-trusted-realtime-ml-scala-graphene + run: | + if [[ "$ARTIFACT" == bigdl-ppml-trusted-realtime-ml-scala-graphene || "$ARTIFACT" == all ]]; then + echo "########################################" + echo "####### realtime-ml-scala-graphene #####" + echo "########################################" + cd ppml/trusted-realtime-ml/scala/docker-graphene/ + export image=intelanalytics/bigdl-ppml-trusted-realtime-ml-scala-graphene + pwd + docker build \ + --no-cache=true \ + --build-arg http_proxy=${HTTP_PROXY} \ + --build-arg https_proxy=${HTTPS_PROXY} \ + --build-arg HTTP_PROXY_HOST=${HTTP_PROXY_HOST_2} \ + --build-arg HTTP_PROXY_PORT=${HTTP_PROXY_PORT_2} \ + --build-arg HTTPS_PROXY_HOST=${HTTP_PROXY_HOST_2} \ + --build-arg HTTPS_PROXY_PORT=${HTTP_PROXY_PORT_3} \ + --build-arg JDK_VERSION=8u192 \ + --build-arg JDK_URL=${JDK_URL} \ + --build-arg no_proxy=${NO_PROXY} \ + --build-arg SPARK_JAR_REPO_URL=${SPARK_JAR_REPO_URL} \ + -t ${image}:${TAG} -f ./Dockerfile . + docker tag ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} + docker push 10.239.45.10/arda/${image}:${TAG} + docker rmi -f ${image}:${TAG} + fi + - name: bigdl-ppml-trusted-big-data-ml-scala-occlum + run: | + if [[ "$ARTIFACT" == bigdl-ppml-trusted-big-data-ml-scala-occlum || "$ARTIFACT" == all ]]; then + echo "########################################" + echo "####### big-data-ml-scala-occlum ######" + echo "########################################" + cd ppml/trusted-big-data-ml/scala/docker-occlum/ + export image=intelanalytics/bigdl-ppml-trusted-big-data-ml-scala-occlum + pwd + docker build \ + --no-cache=true \ + --build-arg http_proxy=${HTTP_PROXY} \ + --build-arg https_proxy=${HTTPS_PROXY} \ + --build-arg HTTP_PROXY_HOST=${HTTP_PROXY_HOST_2} \ + --build-arg HTTP_PROXY_PORT=${HTTP_PROXY_PORT_2} \ + --build-arg HTTPS_PROXY_HOST=${HTTP_PROXY_HOST_2} \ + --build-arg HTTPS_PROXY_PORT=${HTTP_PROXY_PORT_3} \ + --build-arg JDK_VERSION=8u192 \ + --build-arg JDK_URL=${JDK_URL} \ + --build-arg no_proxy=${NO_PROXY} \ + --build-arg SPARK_JAR_REPO_URL=${SPARK_JAR_REPO_URL} \ + -t ${image}:${TAG} -f ./Dockerfile . + docker tag ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} + docker push 10.239.45.10/arda/${image}:${TAG} + docker rmi -f ${image}:${TAG} + fi + - name: bigdl-ppml-trusted-realtime-ml-scala-occlum + run: | + if [[ "$ARTIFACT" == bigdl-ppml-trusted-realtime-ml-scala-occlum || "$ARTIFACT" == all ]]; then + echo "########################################" + echo "####### realtime-ml-scala-occlum ######" + echo "########################################" + cd ppml/trusted-realtime-ml/scala/docker-occlum/ + export image=intelanalytics/bigdl-ppml-trusted-realtime-ml-scala-occlum + pwd + docker build \ + --no-cache=true \ + --build-arg http_proxy=${HTTP_PROXY} \ + --build-arg https_proxy=${HTTPS_PROXY} \ + --build-arg HTTP_PROXY_HOST=${HTTP_PROXY_HOST_2} \ + --build-arg HTTP_PROXY_PORT=${HTTP_PROXY_PORT_2} \ + --build-arg HTTPS_PROXY_HOST=${HTTP_PROXY_HOST_2} \ + --build-arg HTTPS_PROXY_PORT=${HTTP_PROXY_PORT_3} \ + --build-arg JDK_VERSION=8u192 \ + --build-arg JDK_URL=${JDK_URL} \ + --build-arg no_proxy=${NO_PROXY} \ + --build-arg SPARK_JAR_REPO_URL=${SPARK_JAR_REPO_URL} \ + -t ${image}:${TAG} -f ./Dockerfile . + docker tag ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} + docker push 10.239.45.10/arda/${image}:${TAG} + docker rmi -f ${image}:${TAG} + fi From 3c530f8dbb271f9d50682a7238fa5c544eaff132 Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Mon, 22 Aug 2022 13:47:47 +0800 Subject: [PATCH 07/17] fix tmp action --- .github/workflows/docker-publish-ppml.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-publish-ppml.yaml b/.github/workflows/docker-publish-ppml.yaml index 05f29bb04c5..cc4ef1a09bd 100644 --- a/.github/workflows/docker-publish-ppml.yaml +++ b/.github/workflows/docker-publish-ppml.yaml @@ -2,7 +2,7 @@ name: Nightly Build Docker Publish BigDL-PPML on: pull_request: - branches: [main] + branches: [main] paths: - 'ppml/trusted-big-data-ml/python/docker-gramine/**' workflow_dispatch: From 2e359947780d0d5f3da4538d83a6814a4e9c7d1c Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Mon, 22 Aug 2022 15:58:56 +0800 Subject: [PATCH 08/17] copy k8s directory to Gramine directory --- .../docker-gramine/kubernetes/README.md | 111 ++++++++++++++++ .../kubernetes/bigdl-ppml-helm/Chart.yaml | 6 + .../bigdl-ppml-helm/templates/spark-job.yaml | 120 ++++++++++++++++++ .../kubernetes/bigdl-ppml-helm/values.yaml | 42 ++++++ .../kubernetes/enclave-key-secret.yaml | 8 ++ .../kubernetes/enclave-key-to-secret.sh | 7 + .../docker-gramine/kubernetes/kms-secret.yaml | 9 ++ .../kubernetes/submit-spark-k8s.sh | 56 ++++++++ 8 files changed, 359 insertions(+) create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/README.md create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/Chart.yaml create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/templates/spark-job.yaml create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/values.yaml create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/enclave-key-secret.yaml create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/enclave-key-to-secret.sh create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/kms-secret.yaml create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/submit-spark-k8s.sh diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/README.md b/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/README.md new file mode 100644 index 00000000000..668a4c84b48 --- /dev/null +++ b/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/README.md @@ -0,0 +1,111 @@ +# Trusted big data ML for Kubernetes with Helm Charts + +## 1 Deploy the Intel SGX Device Plugin for Kubenetes + +Please refer to the document [here][devicePluginK8sQuickStart]. + +## 2 Deploy Trusted Realtime ML for Kubernetes + +### 2.1 Configurables + +In `bigdl-ppml-helm/values.yaml`, configure the full values for: +- `image`: The PPML image you want to use. +- `k8sMaster`: Run `kubectl cluster-info`. The output should be like `Kubernetes control plane is running at https://master_ip:master_port`. Fill in the master ip and port. +- `pvc`: The name of the Persistent Volume Claim (PVC) of your Network File System (NFS). We assume you have a working NFS configured for your Kubernetes cluster. +- `jar`: The `jar` file you would like Spark to run, defaulted to `spark-examples_2.12-3.1.2.jar`. The path should be the path in the container defined in `bigdl-ppml-helm/templates/spark-job.yaml` +- `class`: The `class` you would like Spark to run, defaulted to `org.apache.spark.examples.SparkPi`. + +Please prepare the following and put them in your NFS directory: +- The data (in a directory called `data`), +- A kubeconfig file. Generate your Kubernetes config file with `kubectl config view --flatten --minify > kubeconfig`, then put it in your NFS. + +The other values have self-explanatory names and can be left alone. + +### 2.2 Secure keys, password, and the enclave key + +You need to [generate secure keys and password][keysNpassword]. Run +``` bash +bash ../../../../scripts/generate-keys.sh +bash ../../../../scripts/generate-password.sh YOUR_PASSWORD +kubectl apply -f keys/keys.yaml +kubectl apply -f password/password.yaml +``` + +Run `bash enclave-key-to-secret.sh` to generate your enclave key and add it to your Kubernetes cluster as a secret. + +### 2.3 Create the RBAC +```bash +sudo kubectl create serviceaccount spark +sudo kubectl create clusterrolebinding spark-role --clusterrole=edit --serviceaccount=default:spark --namespace=default +``` + +### 2.4 Create k8s secret + +``` bash +sudo kubectl create secret generic spark-secret --from-literal secret=YOUR_SECRET +``` + +**The secret created (`YOUR_SECRET`) should be the same as `YOUR_PASSWORD` in section 2.2**. + +### 2.5 Using [Helm][helmsite] to run your Spark job + +You can use Helm to deploy your Spark job. Simply run +``` bash +helm install ./bigdl-ppml-helm +``` +where `` is a name you give for this installation. + +### 2.6 Debugging + +To check the logs of the Kubernetes job, run +``` bash +sudo kubectl logs $( sudo kubectl get pod | grep spark-pi-job | cut -d " " -f1 ) +``` + +To check the logs of the Spark driver, run +``` bash +sudo kubectl logs $( sudo kubectl get pod | grep "spark-pi-sgx.*-driver" -m 1 | cut -d " " -f1 ) +``` + +To check the logs of an Spark executor, run +``` bash +sudo kubectl logs $( sudo kubectl get pod | grep "spark-pi-.*-exec" -m 1 | cut -d " " -f1 ) +``` + +### 2.7 Deleting the Job + +To uninstall the helm chart, run +``` bash +helm uninstall +``` + +Note that the `` must be the same as the one you set in section 2.5. Helm does not delete the driver and executors that are run by the Kubernetes Job, so for now we can only delete them manually: +``` bash +sudo kubectl get pod | grep -o "spark-pi-.*-exec-[0-9]*" | xargs sudo kubectl delete pod +sudo kubectl get pod | grep -o "spark-pi-sgx.*-driver" | xargs sudo kubectl delete pod +``` + +## 3 Attestation + +With attestation, we can verify if any service is replaced or hacked by malicious nodes. This helps us ensure integrity of the our distributed applications. + +### 3.1 Prerequisites + +To enable attestation in BigDL PPML, you need to ensure you have correct access to attestation services (eHSM attestation service, amber or Azure attestation service etc). In this example, we will sue eHSM as attestation service. Please ensure eHSM is correctly configured. + +### 3.2 Attestation Configurations + +1. Set APP_ID and APP_KEY in [kms-secret.yaml](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-graphene/kubernetes/kms-secret.yaml). Apply this secret. +2. Mount APP_ID and APP_KEY in [spark-driver-template.yaml](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-graphene/spark-driver-template.yaml#L13) and [spark-executor-template.yaml](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-graphene/spark-executor-template.yaml#L13). +3. Change ATTESTATION to `true` in [spark-driver-template.yaml](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-graphene/spark-driver-template.yaml#L10) and [spark-executor-template.yaml](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-graphene/spark-executor-template.yaml#L10), and set ATTESTATION_URL, e.g., `http://192.168.0.8:9000`. + +### 3.2 Test with examples + +After updating `spark-driver-template.yaml` and `spark-executor-template.yaml`, attestation will by automatically added to BigDL PPML pipe line. That means PPML applications will be automatically attested by attestation service when they start in Kubernetes Pod. They will prevent malicious Pod from getting sensitive information in applications. + +You can test attestation with [Spark Pi](https://github.com/intel-analytics/BigDL/tree/main/ppml/trusted-big-data-ml/python/docker-graphene#143-spark-pi-example) or other Kubernetes examples. + + +[devicePluginK8sQuickStart]: https://bigdl.readthedocs.io/en/latest/doc/PPML/QuickStart/deploy_intel_sgx_device_plugin_for_kubernetes.html +[keysNpassword]: https://github.com/intel-analytics/BigDL/tree/main/ppml/trusted-big-data-ml/python/docker-graphene#2-prepare-data-key-and-password +[helmsite]: https://helm.sh/ diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/Chart.yaml b/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/Chart.yaml new file mode 100644 index 00000000000..e95863d2e29 --- /dev/null +++ b/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: bigdl-ppml-helm-chart +description: A Helm chart for submitting BigDL PPML Spark jobs to Kubernetes +type: application +version: 0.1.0 +appVersion: 2.1.0-SNAPSHOT diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/templates/spark-job.yaml b/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/templates/spark-job.yaml new file mode 100644 index 00000000000..d333fed3df5 --- /dev/null +++ b/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/templates/spark-job.yaml @@ -0,0 +1,120 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: spark-pi-job +spec: + template: + spec: + serviceAccountName: spark + restartPolicy: Never + hostNetwork: true + containers: + - name: spark-local-k8s-client + image: {{ .Values.image }} + imagePullPolicy: Never + command: ["/bin/sh","-c"] + args: [" + sed -i \"s@nfsvolumeclaim@$NFS_PVC_NAME@g\" spark-driver-template.yaml; + sed -i \"s@nfsvolumeclaim@$NFS_PVC_NAME@g\" spark-executor-template.yaml; + export RUNTIME_DRIVER_HOST=$( hostname -I | awk '{print $1}' ); + export LOCAL_IP=$( hostname -I | awk '{print $1}' ); + bash -c \"$ENTRYPOINT\" + "] + securityContext: + privileged: true + env: + - name: ENTRYPOINT + value: {{ tpl .Values.entrypoint . }} + - name: SPARK_JAR + value: {{ .Values.jar }} + - name: SPARK_CLASS + value: {{ .Values.class }} + - name: NFS_PVC_NAME + value: {{ .Values.pvc }} + - name: RUNTIME_K8S_SPARK_IMAGE + value: {{ .Values.image }} + - name: RUNTIME_SPARK_MASTER + value: {{ .Values.k8sMaster }} + - name: RUNTIME_DRIVER_PORT + value: "54321" + - name: SPARK_MODE + value: {{ .Values.deployMode }} + - name: RUNTIME_DRIVER_CORES + value: !!str {{ .Values.driverCores }} + - name: RUNTIME_DRIVER_MEMORY + value: {{ .Values.driverMem }} + - name: RUNTIME_EXECUTOR_CORES + value: !!str {{ .Values.executorCores }} + - name: RUNTIME_EXECUTOR_MEMORY + value: {{ .Values.executorMem }} + - name: RUNTIME_EXECUTOR_INSTANCES + value: !!str {{ .Values.executorInstances }} + - name: SGX_ENABLED + value: !!str {{ .Values.sgx }} + - name: SGX_LOG_LEVEL + value: {{ .Values.sgxLog }} + - name: SGX_DRIVER_MEM + value: {{ .Values.sgxDriverMem }} + - name: SGX_DRIVER_JVM_MEM + value: {{ .Values.sgxDriverJvmMem }} + - name: SGX_EXECUTOR_MEM + value: {{ .Values.sgxExecutorMem }} + - name: SGX_EXECUTOR_JVM_MEM + value: {{ .Values.sgxExecutorJvmMem }} + #- name: ATTESTATION_ID + # valueFrom: + # secretKeyRef: + # name: kms-secret + # key: app_id + #- name: ATTESTATION_KEY + # valueFrom: + # secretKeyRef: + # name: kms-secret + # key: app_key + resources: + requests: + cpu: 1 + limits: + cpu: 4 + volumeMounts: + - name: device-plugin + mountPath: /var/lib/kubelet/device-plugins + - name: dev-gsgx + mountPath: /dev/gsgx + - name: aesm-socket + mountPath: /var/run/aesmd/aesm.socket + - name: enclave-key + mountPath: /graphene/Pal/src/host/Linux-SGX/signer/enclave-key.pem + subPath: enclave-key.pem + - name: secure-keys + mountPath: /ppml/trusted-big-data-ml/work/keys + - name: secure-password + mountPath: /ppml/trusted-big-data-ml/work/password +# - name: nfs-storage +# mountPath: "/ppml/trusted-big-data-ml/submit-spark-k8s.sh" +# subPath: submit-spark-k8s.sh + - name: nfs-storage + mountPath: /root/.kube/config + subPath: kubeconfig + volumes: + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins + - name: dev-gsgx + hostPath: + path: /dev/gsgx + - name: aesm-socket + hostPath: + path: /var/run/aesmd/aesm.socket + - name: enclave-key + secret: + secretName: enclave-key + - name: secure-keys + secret: + secretName: ssl-keys + - name: secure-password + secret: + secretName: ssl-password + - name: nfs-storage + persistentVolumeClaim: + claimName: {{ .Values.pvc }} diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/values.yaml b/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/values.yaml new file mode 100644 index 00000000000..3517c485783 --- /dev/null +++ b/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/values.yaml @@ -0,0 +1,42 @@ +image: your_ppml_image +k8sMaster: k8s://https://master_ip:master_port +jar: your_jar +class: your_main_class +name: your_job_name +pvc: your_pvc + +kmsURL: http://your_kms_url:port +deployMode: client +driverCores: "4" +driverMem: "1g" +executorCores: "8" +executorMem: "1g" +executorInstances: "2" +sgx: "true" +sgxLog: "error" +sgxDriverMem: "32g" +sgxDriverJvmMem: "8g" +sgxExecutorMem: "32g" +sgxExecutorJvmMem: "12g" + +entrypoint: > + export secure_password=`openssl rsautl -inkey /ppml/trusted-big-data-ml/work/password/key.txt -decrypt &1 | tee spark-pi-sgx-$SPARK_MODE.log From a5e0cce70294e565088e8a6b8a2b6d460964b3b4 Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Tue, 23 Aug 2022 14:29:35 +0800 Subject: [PATCH 09/17] version in development --- .../docker-gramine/kubernetes/bigdl-ppml-helm/Chart.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/Chart.yaml b/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/Chart.yaml index e95863d2e29..39938d06400 100644 --- a/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/Chart.yaml +++ b/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: bigdl-ppml-helm-chart description: A Helm chart for submitting BigDL PPML Spark jobs to Kubernetes type: application -version: 0.1.0 -appVersion: 2.1.0-SNAPSHOT +version: 0.0.0 +appVersion: 0.0.0 From 5837c00b3c606038646ca5583c71c570f9779410 Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Tue, 23 Aug 2022 15:30:21 +0800 Subject: [PATCH 10/17] mod: enclave-key path --- .../kubernetes/bigdl-ppml-helm/templates/spark-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/templates/spark-job.yaml b/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/templates/spark-job.yaml index d333fed3df5..4455b5ea0da 100644 --- a/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/templates/spark-job.yaml +++ b/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/bigdl-ppml-helm/templates/spark-job.yaml @@ -84,7 +84,7 @@ spec: - name: aesm-socket mountPath: /var/run/aesmd/aesm.socket - name: enclave-key - mountPath: /graphene/Pal/src/host/Linux-SGX/signer/enclave-key.pem + mountPath: /root/.config/gramine/enclave-key.pem subPath: enclave-key.pem - name: secure-keys mountPath: /ppml/trusted-big-data-ml/work/keys From 5780332465dd9fb33cfd9dc20b2b7e618199f1e2 Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Tue, 23 Aug 2022 16:09:23 +0800 Subject: [PATCH 11/17] del: remove tmp build action --- .github/workflows/docker-publish-ppml.yaml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/docker-publish-ppml.yaml b/.github/workflows/docker-publish-ppml.yaml index cc4ef1a09bd..1407a1866c6 100644 --- a/.github/workflows/docker-publish-ppml.yaml +++ b/.github/workflows/docker-publish-ppml.yaml @@ -1,10 +1,6 @@ name: Nightly Build Docker Publish BigDL-PPML on: - pull_request: - branches: [main] - paths: - - 'ppml/trusted-big-data-ml/python/docker-gramine/**' workflow_dispatch: inputs: artifact: @@ -40,8 +36,8 @@ jobs: docker login -u ${DOCKERHUB_USERNAME} -p ${DOCKERHUB_PASSWORD} - name: Set the variable env: - DEFAULT_TAG: 'devel' - DEFAULT_ARTIFACT: 'bigdl-ppml-trusted-big-data-ml-python-gramine' + DEFAULT_TAG: 'latest' + DEFAULT_ARTIFACT: 'all' run: | echo "TAG=${{ github.event.inputs.tag || env.DEFAULT_TAG }} " >> $GITHUB_ENV echo "ARTIFACT=${{ github.event.inputs.artifact || env.DEFAULT_ARTIFACT }}" >> $GITHUB_ENV @@ -172,4 +168,4 @@ jobs: docker tag ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} docker push 10.239.45.10/arda/${image}:${TAG} docker rmi -f ${image}:${TAG} - fi + fi \ No newline at end of file From bccdb24ed2d43963c3e486aee3ab9ba1f5711574 Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Wed, 24 Aug 2022 13:10:30 +0800 Subject: [PATCH 12/17] Simplify deployment operations for users --- .../docker-gramine/deploy-local-spark-sgx.sh | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/deploy-local-spark-sgx.sh diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/deploy-local-spark-sgx.sh b/ppml/trusted-big-data-ml/python/docker-gramine/deploy-local-spark-sgx.sh new file mode 100644 index 00000000000..7431619ff59 --- /dev/null +++ b/ppml/trusted-big-data-ml/python/docker-gramine/deploy-local-spark-sgx.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# KEYS_PATH means the absolute path to the keys folder +# ENCLAVE_KEY_PATH means the absolute path to the "enclave-key.pem" file +# LOCAL_IP means your local IP address. +export SSL_KEYS_PATH=YOUR_LOCAL_SSL_KEYS_FOLDER_PATH +export ENCLAVE_KEY_PATH=YOUR_LOCAL_ENCLAVE_KEY_PATH +export LOCAL_IP=YOUR_LOCAL_IP +export DOCKER_IMAGE=YOUR_DOCKER_IMAGE + +sudo docker run -itd \ + --privileged \ + --net=host \ + --cpuset-cpus="0-5" \ + --oom-kill-disable \ + --device=/dev/gsgx \ + --device=/dev/sgx/enclave \ + --device=/dev/sgx/provision \ + -v $ENCLAVE_KEY_PATH:/root/.config/gramine/enclave-key.pem \ + -v /var/run/aesmd/aesm.socket:/var/run/aesmd/aesm.socket \ + -v $SSL_KEYS_PATH:/ppml/trusted-big-data-ml/work/keys \ + --name=gramine-test \ + -e LOCAL_IP=$LOCAL_IP \ + -e SGX_MEM_SIZE=64G \ + $DOCKER_IMAGE bash \ No newline at end of file From ca26a60cbc263f0a360cfbb8b962e3c829fb8c71 Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Wed, 24 Aug 2022 13:11:06 +0800 Subject: [PATCH 13/17] doc: update README.md --- .../python/docker-gramine/README.md | 547 ++++++++++++++++-- 1 file changed, 510 insertions(+), 37 deletions(-) diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/README.md b/ppml/trusted-big-data-ml/python/docker-gramine/README.md index c7dc92f1145..4615f67a0e4 100644 --- a/ppml/trusted-big-data-ml/python/docker-gramine/README.md +++ b/ppml/trusted-big-data-ml/python/docker-gramine/README.md @@ -1,68 +1,541 @@ # Gramine +SGX-based Trusted Big Data ML allows the user to run end-to-end big data analytics application and Intel BigDL model training with spark local and distributed cluster on Gramine-SGX. + +*Please mind the IP and file path settings. They should be changed to the IP/path of your own sgx server on which you are running the programs.* ## Before Running code -### 1. Build Docker Image +#### 1. Build Docker Image Before running the following command, please modify the paths in `build-docker-image.sh`. Then build the docker image with the following command. ```bash ./build-docker-image.sh ``` -### 2. Prepare key -- Generate SSL Keys +#### 2. Prepare key +##### Prepare the Key + + The ppml in bigdl needs secured keys to enable spark security such as Authentication, RPC Encryption, Local Storage Encryption and TLS, you need to prepare the secure keys and keystores. In this tutorial, you can generate keys and keystores with root permission (test only, need input security password for keys). + + ```bash + sudo bash ../../../scripts/generate-keys.sh + ``` + + You also need to generate your enclave key using the command below, and keep it safely for future remote attestations and to start SGX enclaves more securely. + + It will generate a file `enclave-key.pem` in your present working directory, which will be your enclave key. To store the key elsewhere, modify the outputted file path. + + ```bash + openssl genrsa -3 -out enclave-key.pem 3072 + ``` + +##### Prepare the Password + + Next, you need to store the password you used for key generation, i.e., `generate-keys.sh`, in a secured file. + + ```bash + sudo bash ../../../scripts/generate-password.sh used_password_when_generate_keys + ``` +## Run Your PySpark Program + +#### 1. Start the container to run native python examples + +Before you run the following commands to start the container, you need to modify the paths in `deploy-local-spark-sgx.sh` and then run the following commands. + +```bash +./deploy-local-spark-sgx.sh +sudo docker exec -it spark-local bash +``` + +#### 2. Run your pyspark program + +To run your pyspark program, first you need to prepare your own pyspark program and put it under the trusted directory in SGX `/ppml/trusted-big-data-ml/work`. Then run with `bigdl-ppml-submit.sh` using the command: + +```bash +./bigdl-ppml-submit.sh work/YOUR_PROMGRAM.py | tee YOUR_PROGRAM-sgx.log +``` + +When the program finishes, check the results with the log `YOUR_PROGRAM-sgx.log`. +## Run Native Python Examples +#### 1. Start the container to run native python examples + +Before you run the following commands to start the container, you need to modify the paths in `deploy-local-spark-sgx.sh` and then run the following commands. + +```bash +./deploy-local-spark-sgx.sh +sudo docker exec -it gramine-test bash ``` -cd BigDL/ppml/ -sudo bash scripts/generate-keys.sh + #### 2. Run native python examples + +##### Example 1: helloworld + +Run the example with SGX with the following command in the terminal. ``` -- Generate enclave-key.pem +sudo docker exec -it gramine-test bash work/start-scripts/start-python-helloworld-sgx.sh ``` -openssl genrsa -3 -out enclave-key.pem 3072 +The result should be: +> Hello World +##### Example 2: numpy + +Run the example with SGX with the following command in the terminal. ``` -### 3. Run container +sudo docker exec -it gramine-test bash work/start-scripts/start-python-numpy-sgx.sh ``` -#!/bin/bash +The result should be like: +> numpy.dot: 0.04753961563110352 sec +## Run as Spark Local Mode -# KEYS_PATH means the absolute path to the keys folder -# ENCLAVE_KEY_PATH means the absolute path to the "enclave-key.pem" file -# LOCAL_IP means your local IP address. -export SSL_KEYS_PATH=YOUR_LOCAL_SSL_KEYS_FOLDER_PATH -export ENCLAVE_KEY_PATH=YOUR_LOCAL_ENCLAVE_KEY_PATH -export LOCAL_IP=YOUR_LOCAL_IP -export DOCKER_IMAGE=YOUR_DOCKER_IMAGE +#### 1. Start the container to run spark applications in spark local mode +Before you run the following commands to start the container, you need to modify the paths in `deploy-local-spark-sgx.sh` and then run the following commands. + +```bash +./deploy-local-spark-sgx.sh +sudo docker exec -it gramine-test bash +``` +#### 2. Run PySpark examples +##### Example : pi + +Run the example with SGX spark local mode with the following command in the terminal. + +```bash +gramine-argv-serializer bash -c "/opt/jdk8/bin/java \ + -cp '/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/*:/ppml/trusted-big-data-ml/work/spark-3.1.2/examples/jars/*' -Xmx16g \ + org.apache.spark.deploy.SparkSubmit \ + --master local[4] \ + --executor-memory 8g \ + --driver-memory 8g \ + --class org.apache.spark.examples.SparkPi \ + --conf spark.network.timeout=10000000 \ + --conf spark.executor.heartbeatInterval=10000000 \ + --verbose \ + local:///ppml/trusted-big-data-ml/work/spark-3.1.2/examples/jars/spark-examples_2.12-3.1.2.jar 100" > /ppml/trusted-big-data-ml/secured_argvs +./init.sh +gramine-sgx bash 2>&1 | tee local-pi-sgx.log +``` + +Then check the output with the following command. + +```bash +cat local-pi-sgx.log | egrep "roughly" +``` + +The result should be similar to + +>Pi is roughly 3.1418551141855113 + +## Run as Spark on Kubernetes Mode + +Follow the guide below to run Spark on Kubernetes manually. Alternatively, you can also use Helm to set everything up automatically. See [kubernetes/README.md][helmGuide]. + +### 1. Start the spark client as Docker container +### 1.1 Prepare the keys/password/data/enclave-key.pem +Please refer to the previous section about [preparing data, key and password](#prepare-data). + +``` bash +bash ../../../scripts/generate-keys.sh +bash ../../../scripts/generate-password.sh YOUR_PASSWORD +kubectl apply -f keys/keys.yaml +kubectl apply -f password/password.yaml +``` +Run `cd kubernetes && bash enclave-key-to-secret.sh` to generate your enclave key and add it to your Kubernetes cluster as a secret. +### 1.2 Prepare the k8s configurations +#### 1.2.1 Create the RBAC +```bash +kubectl create serviceaccount spark +kubectl create clusterrolebinding spark-role --clusterrole=edit --serviceaccount=default:spark --namespace=default +``` +#### 1.2.2 Generate k8s config file +```bash +kubectl config view --flatten --minify > /YOUR_DIR/kubeconfig +``` +#### 1.2.3 Create k8s secret +```bash +kubectl create secret generic spark-secret --from-literal secret=YOUR_SECRET +``` +**The secret created (`YOUR_SECRET`) should be the same as the password you specified in section 1.1** + +### 1.3 Start the client container +Configure the environment variables in the following script before running it. Check [Bigdl ppml SGX related configurations](#1-bigdl-ppml-sgx-related-configurations) for detailed memory configurations. +```bash +export K8S_MASTER=k8s://$(sudo kubectl cluster-info | grep 'https.*6443' -o -m 1) +echo The k8s master is $K8S_MASTER +export ENCLAVE_KEY=/YOUR_DIR/enclave-key.pem +export NFS_INPUT_PATH=/YOUR_DIR/data +export KEYS_PATH=/YOUR_DIR/keys +export SECURE_PASSWORD_PATH=/YOUR_DIR/password +export KUBECONFIG_PATH=/YOUR_DIR/kubeconfig +export LOCAL_IP=$LOCAL_IP +export DOCKER_IMAGE=YOUR_DOCKER_IMAGE sudo docker run -itd \ --privileged \ --net=host \ - --cpuset-cpus="0-5" \ + --name=spark-local-k8s-client \ + --cpuset-cpus="20-24" \ --oom-kill-disable \ - --device=/dev/gsgx \ --device=/dev/sgx/enclave \ --device=/dev/sgx/provision \ - -v $ENCLAVE_KEY_PATH:/root/.config/gramine/enclave-key.pem \ -v /var/run/aesmd/aesm.socket:/var/run/aesmd/aesm.socket \ - -v $SSL_KEYS_PATH:/ppml/trusted-big-data-ml/work/keys \ - --name=gramine-test \ - -e LOCAL_IP=$LOCAL_IP \ + -v $ENCLAVE_KEY:/root/.config/gramine/enclave-key.pem \ + -v $KEYS_PATH:/ppml/trusted-big-data-ml/work/keys \ + -v $SECURE_PASSWORD_PATH:/ppml/trusted-big-data-ml/work/password \ + -v $KUBECONFIG_PATH:/root/.kube/config \ + -v $NFS_INPUT_PATH:/ppml/trusted-big-data-ml/work/data \ + -e RUNTIME_SPARK_MASTER=$K8S_MASTERK8S_MASTER \ + -e RUNTIME_K8S_SERVICE_ACCOUNT=spark \ + -e RUNTIME_K8S_SPARK_IMAGE=$DOCKER_IMAGE \ + -e RUNTIME_DRIVER_HOST=$LOCAL_IP \ + -e RUNTIME_DRIVER_PORT=54321 \ + -e RUNTIME_EXECUTOR_INSTANCES=2 \ + -e RUNTIME_EXECUTOR_CORES=4 \ + -e RUNTIME_EXECUTOR_MEMORY=20g \ + -e RUNTIME_TOTAL_EXECUTOR_CORES=4 \ + -e RUNTIME_DRIVER_CORES=4 \ + -e RUNTIME_DRIVER_MEMORY=10g \ -e SGX_MEM_SIZE=64G \ + -e SGX_DRIVER_MEM=64g \ + -e SGX_DRIVER_JVM_MEM=12g \ + -e SGX_EXECUTOR_MEM=64g \ + -e SGX_EXECUTOR_JVM_MEM=12g \ + -e SGX_ENABLED=true \ + -e SGX_LOG_LEVEL=error \ + -e LOCAL_IP=$LOCAL_IP \ $DOCKER_IMAGE bash ``` -## Test Examples -### 1. Python Examples -#### Example 1:helloworld +run `docker exec -it spark-local-k8s-client bash` to entry the container. + +### 1.4 Init the client and run Spark applications on k8s (1.4 can be skipped if you are using 1.5 to submit jobs) + +#### 1.4.1 Configure `spark-executor-template.yaml` in the container + +We assume you have a working Network File System (NFS) configured for your Kubernetes cluster. Configure the `nfsvolumeclaim` on the last line to the name of the Persistent Volume Claim (PVC) of your NFS. + +Please prepare the following and put them in your NFS directory: + +- The data (in a directory called `data`), +- The kubeconfig file. + +#### 1.4.2 Prepare secured-argvs for client + +Note: If you are running this client in trusted env, please skip this step. Then, directly run this command without `gramine-argv-serializer bash -c`. + +```bash +gramine-argv-serializer bash -c "secure_password=`openssl rsautl -inkey /ppml/trusted-big-data-ml/work/password/key.txt -decrypt /ppml/trusted-big-data-ml/secured_argvs +``` + +Init Graphene command. + +```bash +./init.sh ``` -sudo docker exec -it gramine-test bash work/start-scripts/start-python-helloworld-sgx.sh + +Note that: you can run your own Spark Appliction after changing `--class` and jar path. + +1. `local:///ppml/trusted-big-data-ml/work/spark-3.1.2/examples/jars/spark-examples_2.12-3.1.2.jar` => `your_jar_path` +2. `--class org.apache.spark.examples.SparkPi` => `--class your_class_path` + +#### 1.4.3 Spark-Pi example + +```bash +gramine-sgx bash 2>&1 | tee spark-pi-sgx-$SPARK_MODE.log ``` -The result should be: -> Hello World -#### Example 2:numpy +### 1.5 Use bigdl-ppml-submit.sh to submit ppml jobs +#### 1.5.1 Spark-Pi on local mode +![image2022-6-6_16-18-10](https://user-images.githubusercontent.com/61072813/174703141-63209559-05e1-4c4d-b096-6b862a9bed8a.png) ``` -sudo docker exec -it gramine-test bash work/start-scripts/start-python-numpy-sgx.sh +#!/bin/bash +bash bigdl-ppml-submit.sh \ + --master local[2] \ + --driver-memory 32g \ + --driver-cores 8 \ + --executor-memory 32g \ + --executor-cores 8 \ + --num-executors 2 \ + --class org.apache.spark.examples.SparkPi \ + --name spark-pi \ + --verbose \ + local:///ppml/trusted-big-data-ml/work/spark-3.1.2/examples/jars/spark-examples_2.12-3.1.2.jar 3000 ``` -The result should be like: -> numpy.dot: 0.04753961563110352 sec -### 2. Spark Examples -#### Example 1: pyspark pi +#### 1.5.2 Spark-Pi on local sgx mode +![image2022-6-6_16-18-57](https://user-images.githubusercontent.com/61072813/174703165-2afc280d-6a3d-431d-9856-dd5b3659214a.png) ``` -sudo docker exec -it gramine-test bash work/start-scripts/start-spark-local-pi-sgx.sh +#!/bin/bash +bash bigdl-ppml-submit.sh \ + --master local[2] \ + --sgx-enabled true \ + --sgx-log-level error \ + --sgx-driver-memory 64g\ + --sgx-driver-jvm-memory 12g\ + --sgx-executor-memory 64g\ + --sgx-executor-jvm-memory 12g\ + --driver-memory 32g \ + --driver-cores 8 \ + --executor-memory 32g \ + --executor-cores 8 \ + --num-executors 2 \ + --class org.apache.spark.examples.SparkPi \ + --name spark-pi \ + --verbose \ + local:///ppml/trusted-big-data-ml/work/spark-3.1.2/examples/jars/spark-examples_2.12-3.1.2.jar 3000 + ``` -The result should be like: -> pi is roughly 3.135360 +#### 1.5.3 Spark-Pi on client mode +![image2022-6-6_16-19-43](https://user-images.githubusercontent.com/61072813/174703216-70588315-7479-4b6c-9133-095104efc07d.png) + +``` +#!/bin/bash +export secure_password=`openssl rsautl -inkey /ppml/trusted-big-data-ml/work/password/key.txt -decrypt + +The following parameters enable spark executor running on SGX. +`spark.kubernetes.sgx.enabled`: true -> enable spark executor running on sgx, false -> native on k8s without SGX. +`spark.kubernetes.sgx.driver.mem`: Spark driver SGX epc memeory. +`spark.kubernetes.sgx.driver.jvm.mem`: Spark driver JVM memory, Recommended setting is less than half of epc memory. +`spark.kubernetes.sgx.executor.mem`: Spark executor SGX epc memeory. +`spark.kubernetes.sgx.executor.jvm.mem`: Spark executor JVM memory, Recommended setting is less than half of epc memory. +`spark.kubernetes.sgx.log.level`: Spark executor on SGX log level, Supported values are error,all and debug. +The following is a recommended configuration in client mode. +```bash + --conf spark.kubernetes.sgx.enabled=true + --conf spark.kubernetes.sgx.driver.mem=32g + --conf spark.kubernetes.sgx.driver.jvm.mem=10g + --conf spark.kubernetes.sgx.executor.mem=32g + --conf spark.kubernetes.sgx.executor.jvm.mem=12g + --conf spark.kubernetes.sgx.log.level=error + --conf spark.driver.memory=10g + --conf spark.executor.memory=1g +``` +The following is a recommended configuration in cluster mode. +```bash + --conf spark.kubernetes.sgx.enabled=true + --conf spark.kubernetes.sgx.driver.mem=32g + --conf spark.kubernetes.sgx.driver.jvm.mem=10g + --conf spark.kubernetes.sgx.executor.mem=32g + --conf spark.kubernetes.sgx.executor.jvm.mem=12g + --conf spark.kubernetes.sgx.log.level=error + --conf spark.driver.memory=1g + --conf spark.executor.memory=1g +``` +When SGX is not used, the configuration is the same as spark native. +```bash + --conf spark.driver.memory=10g + --conf spark.executor.memory=12g +``` +#### 2. Spark security configurations +Below is an explanation of these security configurations, Please refer to [Spark Security](https://spark.apache.org/docs/3.1.2/security.html) for detail. +##### 2.1 Spark RPC +###### 2.1.1 Authentication +`spark.authenticate`: true -> Spark authenticates its internal connections, default is false. +`spark.authenticate.secret`: The secret key used authentication. +`spark.kubernetes.executor.secretKeyRef.SPARK_AUTHENTICATE_SECRET` and `spark.kubernetes.driver.secretKeyRef.SPARK_AUTHENTICATE_SECRET`: mount `SPARK_AUTHENTICATE_SECRET` environment variable from a secret for both the Driver and Executors. +`spark.authenticate.enableSaslEncryption`: true -> enable SASL-based encrypted communication, default is false. +```bash + --conf spark.authenticate=true + --conf spark.authenticate.secret=$secure_password + --conf spark.kubernetes.executor.secretKeyRef.SPARK_AUTHENTICATE_SECRET="spark-secret:secret" + --conf spark.kubernetes.driver.secretKeyRef.SPARK_AUTHENTICATE_SECRET="spark-secret:secret" + --conf spark.authenticate.enableSaslEncryption=true +``` + +###### 2.1.2 Encryption +`spark.network.crypto.enabled`: true -> enable AES-based RPC encryption, default is false. +`spark.network.crypto.keyLength`: The length in bits of the encryption key to generate. +`spark.network.crypto.keyFactoryAlgorithm`: The key factory algorithm to use when generating encryption keys. +```bash + --conf spark.network.crypto.enabled=true + --conf spark.network.crypto.keyLength=128 + --conf spark.network.crypto.keyFactoryAlgorithm=PBKDF2WithHmacSHA1 +``` +###### 2.1.3. Local Storage Encryption +`spark.io.encryption.enabled`: true -> enable local disk I/O encryption, default is false. +`spark.io.encryption.keySizeBits`: IO encryption key size in bits. +`spark.io.encryption.keygen.algorithm`: The algorithm to use when generating the IO encryption key. +```bash + --conf spark.io.encryption.enabled=true + --conf spark.io.encryption.keySizeBits=128 + --conf spark.io.encryption.keygen.algorithm=HmacSHA1 +``` +###### 2.1.4 SSL Configuration +`spark.ssl.enabled`: true -> enable SSL. +`spark.ssl.port`: the port where the SSL service will listen on. +`spark.ssl.keyPassword`: the password to the private key in the key store. +`spark.ssl.keyStore`: path to the key store file. +`spark.ssl.keyStorePassword`: password to the key store. +`spark.ssl.keyStoreType`: the type of the key store. +`spark.ssl.trustStore`: path to the trust store file. +`spark.ssl.trustStorePassword`: password for the trust store. +`spark.ssl.trustStoreType`: the type of the trust store. +```bash + --conf spark.ssl.enabled=true + --conf spark.ssl.port=8043 + --conf spark.ssl.keyPassword=$secure_password + --conf spark.ssl.keyStore=/ppml/trusted-big-data-ml/work/keys/keystore.jks + --conf spark.ssl.keyStorePassword=$secure_password + --conf spark.ssl.keyStoreType=JKS + --conf spark.ssl.trustStore=/ppml/trusted-big-data-ml/work/keys/keystore.jks + --conf spark.ssl.trustStorePassword=$secure_password + --conf spark.ssl.trustStoreType=JKS +``` +[helmGuide]: https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/README.md \ No newline at end of file From e3337ffb316eec4d004bf2e0fa84e7d26354356a Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Wed, 24 Aug 2022 13:59:32 +0800 Subject: [PATCH 14/17] merge conflict --- .github/workflows/docker-publish-ppml.yaml | 171 --------------------- 1 file changed, 171 deletions(-) delete mode 100644 .github/workflows/docker-publish-ppml.yaml diff --git a/.github/workflows/docker-publish-ppml.yaml b/.github/workflows/docker-publish-ppml.yaml deleted file mode 100644 index 1407a1866c6..00000000000 --- a/.github/workflows/docker-publish-ppml.yaml +++ /dev/null @@ -1,171 +0,0 @@ -name: Nightly Build Docker Publish BigDL-PPML - -on: - workflow_dispatch: - inputs: - artifact: - description: 'select which job to run("ALL" will make all jobs run)' - required: true - default: 'latest' - type: choice - options: - - all - - bigdl-ppml-trusted-big-data-ml-python-gramine - - bigdl-ppml-trusted-big-data-ml-python-graphene - - bigdl-ppml-trusted-realtime-ml-scala-graphene - - bigdl-ppml-trusted-big-data-ml-scala-occlum - - bigdl-ppml-trusted-realtime-ml-scala-occlum - tag: - description: 'e.g. 2.1.0-SNAPSHOT' - required: true - default: 'latest' - type: string - - -jobs: - docker-publish-ppml: - runs-on: [self-hosted, Shire] - permissions: - contents: read - packages: write - - steps: - - uses: actions/checkout@v3 - - name: docker login - run: | - docker login -u ${DOCKERHUB_USERNAME} -p ${DOCKERHUB_PASSWORD} - - name: Set the variable - env: - DEFAULT_TAG: 'latest' - DEFAULT_ARTIFACT: 'all' - run: | - echo "TAG=${{ github.event.inputs.tag || env.DEFAULT_TAG }} " >> $GITHUB_ENV - echo "ARTIFACT=${{ github.event.inputs.artifact || env.DEFAULT_ARTIFACT }}" >> $GITHUB_ENV - - name: bigdl-ppml-trusted-big-data-ml-python-gramine - run: | - if [ "$ARTIFACT" = "bigdl-ppml-trusted-big-data-ml-python-gramine" ] || [ "$ARTIFACT" = "ALL" ] ; then - echo "########################################" - echo "####### big-data-ml-python-gramine ####" - echo "########################################" - cd ppml/trusted-big-data-ml/python/docker-gramine - export image=intelanalytics/bigdl-ppml-trusted-big-data-ml-python-gramine - sudo docker build \ - --no-cache=true \ - --build-arg http_proxy=${HTTP_PROXY} \ - --build-arg https_proxy=${HTTPS_PROXY} \ - --build-arg HTTP_PROXY_HOST=${HTTP_PROXY_HOST_2} \ - --build-arg HTTP_PROXY_PORT=${HTTP_PROXY_PORT_2} \ - --build-arg HTTPS_PROXY_HOST=${HTTP_PROXY_HOST_2} \ - --build-arg HTTPS_PROXY_PORT=${HTTP_PROXY_PORT_3} \ - --build-arg JDK_VERSION=8u192 \ - --build-arg JDK_URL=${JDK_URL} \ - --build-arg no_proxy=${NO_PROXY} \ - --build-arg SPARK_JAR_REPO_URL=${SPARK_JAR_REPO_URL} \ - -t ${image}:${TAG} -f ./Dockerfile . - sudo docker tag ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} - sudo docker push 10.239.45.10/arda/${image}:${TAG} - sudo docker rmi -f ${image}:${TAG} - fi - - name: bigdl-ppml-trusted-big-data-ml-python-graphene - run: | - if [[ "$ARTIFACT" == bigdl-ppml-trusted-big-data-ml-python-graphene || "$ARTIFACT" == all ]]; then - echo "########################################" - echo "####### big-data-ml-python-graphene ####" - echo "########################################" - cd ppml/trusted-big-data-ml/python/docker-graphene - export image=intelanalytics/bigdl-ppml-trusted-big-data-ml-python-graphene - sudo docker build \ - --no-cache=true \ - --build-arg http_proxy=${HTTP_PROXY} \ - --build-arg https_proxy=${HTTPS_PROXY} \ - --build-arg HTTP_PROXY_HOST=${HTTP_PROXY_HOST_2} \ - --build-arg HTTP_PROXY_PORT=${HTTP_PROXY_PORT_2} \ - --build-arg HTTPS_PROXY_HOST=${HTTP_PROXY_HOST_2} \ - --build-arg HTTPS_PROXY_PORT=${HTTP_PROXY_PORT_3} \ - --build-arg JDK_VERSION=8u192 \ - --build-arg JDK_URL=${JDK_URL} \ - --build-arg no_proxy=${NO_PROXY} \ - --build-arg SPARK_JAR_REPO_URL=${SPARK_JAR_REPO_URL} \ - -t ${image}:${TAG} -f ./Dockerfile . - sudo docker tag ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} - sudo docker push 10.239.45.10/arda/${image}:${TAG} - sudo docker rmi -f ${image}:${TAG} - fi - - name: bigdl-ppml-trusted-realtime-ml-scala-graphene - run: | - if [[ "$ARTIFACT" == bigdl-ppml-trusted-realtime-ml-scala-graphene || "$ARTIFACT" == all ]]; then - echo "########################################" - echo "####### realtime-ml-scala-graphene #####" - echo "########################################" - cd ppml/trusted-realtime-ml/scala/docker-graphene/ - export image=intelanalytics/bigdl-ppml-trusted-realtime-ml-scala-graphene - pwd - docker build \ - --no-cache=true \ - --build-arg http_proxy=${HTTP_PROXY} \ - --build-arg https_proxy=${HTTPS_PROXY} \ - --build-arg HTTP_PROXY_HOST=${HTTP_PROXY_HOST_2} \ - --build-arg HTTP_PROXY_PORT=${HTTP_PROXY_PORT_2} \ - --build-arg HTTPS_PROXY_HOST=${HTTP_PROXY_HOST_2} \ - --build-arg HTTPS_PROXY_PORT=${HTTP_PROXY_PORT_3} \ - --build-arg JDK_VERSION=8u192 \ - --build-arg JDK_URL=${JDK_URL} \ - --build-arg no_proxy=${NO_PROXY} \ - --build-arg SPARK_JAR_REPO_URL=${SPARK_JAR_REPO_URL} \ - -t ${image}:${TAG} -f ./Dockerfile . - docker tag ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} - docker push 10.239.45.10/arda/${image}:${TAG} - docker rmi -f ${image}:${TAG} - fi - - name: bigdl-ppml-trusted-big-data-ml-scala-occlum - run: | - if [[ "$ARTIFACT" == bigdl-ppml-trusted-big-data-ml-scala-occlum || "$ARTIFACT" == all ]]; then - echo "########################################" - echo "####### big-data-ml-scala-occlum ######" - echo "########################################" - cd ppml/trusted-big-data-ml/scala/docker-occlum/ - export image=intelanalytics/bigdl-ppml-trusted-big-data-ml-scala-occlum - pwd - docker build \ - --no-cache=true \ - --build-arg http_proxy=${HTTP_PROXY} \ - --build-arg https_proxy=${HTTPS_PROXY} \ - --build-arg HTTP_PROXY_HOST=${HTTP_PROXY_HOST_2} \ - --build-arg HTTP_PROXY_PORT=${HTTP_PROXY_PORT_2} \ - --build-arg HTTPS_PROXY_HOST=${HTTP_PROXY_HOST_2} \ - --build-arg HTTPS_PROXY_PORT=${HTTP_PROXY_PORT_3} \ - --build-arg JDK_VERSION=8u192 \ - --build-arg JDK_URL=${JDK_URL} \ - --build-arg no_proxy=${NO_PROXY} \ - --build-arg SPARK_JAR_REPO_URL=${SPARK_JAR_REPO_URL} \ - -t ${image}:${TAG} -f ./Dockerfile . - docker tag ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} - docker push 10.239.45.10/arda/${image}:${TAG} - docker rmi -f ${image}:${TAG} - fi - - name: bigdl-ppml-trusted-realtime-ml-scala-occlum - run: | - if [[ "$ARTIFACT" == bigdl-ppml-trusted-realtime-ml-scala-occlum || "$ARTIFACT" == all ]]; then - echo "########################################" - echo "####### realtime-ml-scala-occlum ######" - echo "########################################" - cd ppml/trusted-realtime-ml/scala/docker-occlum/ - export image=intelanalytics/bigdl-ppml-trusted-realtime-ml-scala-occlum - pwd - docker build \ - --no-cache=true \ - --build-arg http_proxy=${HTTP_PROXY} \ - --build-arg https_proxy=${HTTPS_PROXY} \ - --build-arg HTTP_PROXY_HOST=${HTTP_PROXY_HOST_2} \ - --build-arg HTTP_PROXY_PORT=${HTTP_PROXY_PORT_2} \ - --build-arg HTTPS_PROXY_HOST=${HTTP_PROXY_HOST_2} \ - --build-arg HTTPS_PROXY_PORT=${HTTP_PROXY_PORT_3} \ - --build-arg JDK_VERSION=8u192 \ - --build-arg JDK_URL=${JDK_URL} \ - --build-arg no_proxy=${NO_PROXY} \ - --build-arg SPARK_JAR_REPO_URL=${SPARK_JAR_REPO_URL} \ - -t ${image}:${TAG} -f ./Dockerfile . - docker tag ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} - docker push 10.239.45.10/arda/${image}:${TAG} - docker rmi -f ${image}:${TAG} - fi \ No newline at end of file From 281d00c2baf8e0256d106504b69ee1cf3250e335 Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Wed, 24 Aug 2022 14:31:03 +0800 Subject: [PATCH 15/17] doc: correct a typo --- ppml/trusted-big-data-ml/python/docker-gramine/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/README.md b/ppml/trusted-big-data-ml/python/docker-gramine/README.md index 4615f67a0e4..0146e28cdeb 100644 --- a/ppml/trusted-big-data-ml/python/docker-gramine/README.md +++ b/ppml/trusted-big-data-ml/python/docker-gramine/README.md @@ -274,7 +274,7 @@ gramine-argv-serializer bash -c "secure_password=`openssl rsautl -inkey /ppml/tr local:///ppml/trusted-big-data-ml/work/spark-3.1.2/examples/jars/spark-examples_2.12-3.1.2.jar" > /ppml/trusted-big-data-ml/secured_argvs ``` -Init Graphene command. +Init Gramine command. ```bash ./init.sh From 76513d260154c7ac7c29e2734991557c008737f6 Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Wed, 24 Aug 2022 15:47:26 +0800 Subject: [PATCH 16/17] doc: correct typo --- .../python/docker-gramine/kubernetes/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/README.md b/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/README.md index 668a4c84b48..2e0f88b399b 100644 --- a/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/README.md +++ b/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/README.md @@ -95,17 +95,17 @@ To enable attestation in BigDL PPML, you need to ensure you have correct access ### 3.2 Attestation Configurations -1. Set APP_ID and APP_KEY in [kms-secret.yaml](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-graphene/kubernetes/kms-secret.yaml). Apply this secret. -2. Mount APP_ID and APP_KEY in [spark-driver-template.yaml](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-graphene/spark-driver-template.yaml#L13) and [spark-executor-template.yaml](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-graphene/spark-executor-template.yaml#L13). -3. Change ATTESTATION to `true` in [spark-driver-template.yaml](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-graphene/spark-driver-template.yaml#L10) and [spark-executor-template.yaml](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-graphene/spark-executor-template.yaml#L10), and set ATTESTATION_URL, e.g., `http://192.168.0.8:9000`. +1. Set APP_ID and APP_KEY in [kms-secret.yaml](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-gramine/kubernetes/kms-secret.yaml). Apply this secret. +2. Mount APP_ID and APP_KEY in [spark-driver-template.yaml](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-gramine/spark-driver-template.yaml#L13) and [spark-executor-template.yaml](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-gramine/spark-executor-template.yaml#L13). +3. Change ATTESTATION to `true` in [spark-driver-template.yaml](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-gramine/spark-driver-template.yaml#L10) and [spark-executor-template.yaml](https://github.com/intel-analytics/BigDL/blob/main/ppml/trusted-big-data-ml/python/docker-gramine/spark-executor-template.yaml#L10), and set ATTESTATION_URL, e.g., `http://192.168.0.8:9000`. ### 3.2 Test with examples After updating `spark-driver-template.yaml` and `spark-executor-template.yaml`, attestation will by automatically added to BigDL PPML pipe line. That means PPML applications will be automatically attested by attestation service when they start in Kubernetes Pod. They will prevent malicious Pod from getting sensitive information in applications. -You can test attestation with [Spark Pi](https://github.com/intel-analytics/BigDL/tree/main/ppml/trusted-big-data-ml/python/docker-graphene#143-spark-pi-example) or other Kubernetes examples. +You can test attestation with [Spark Pi](https://github.com/intel-analytics/BigDL/tree/main/ppml/trusted-big-data-ml/python/docker-gramine#143-spark-pi-example) or other Kubernetes examples. [devicePluginK8sQuickStart]: https://bigdl.readthedocs.io/en/latest/doc/PPML/QuickStart/deploy_intel_sgx_device_plugin_for_kubernetes.html -[keysNpassword]: https://github.com/intel-analytics/BigDL/tree/main/ppml/trusted-big-data-ml/python/docker-graphene#2-prepare-data-key-and-password +[keysNpassword]: https://github.com/intel-analytics/BigDL/tree/main/ppml/trusted-big-data-ml/python/docker-gramine#2-prepare-data-key-and-password [helmsite]: https://helm.sh/ From cc07d7f83fa0b92c4a052c190fd8787deec2d558 Mon Sep 17 00:00:00 2001 From: ShanSimu Date: Wed, 24 Aug 2022 16:30:41 +0800 Subject: [PATCH 17/17] add spark pi on k8s scripts --- .../start-spark-pi-on-client-sgx.sh | 62 +++++++++++++++++++ .../start-spark-pi-on-cluster-sgx.sh | 57 +++++++++++++++++ .../start-spark-pi-on-local-sgx.sh | 15 +++++ .../start-spark-pi-on-local-without-sgx.sh | 11 ++++ 4 files changed, 145 insertions(+) create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-client-sgx.sh create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-cluster-sgx.sh create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-local-sgx.sh create mode 100644 ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-local-without-sgx.sh diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-client-sgx.sh b/ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-client-sgx.sh new file mode 100644 index 00000000000..46447e4aacc --- /dev/null +++ b/ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-client-sgx.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +export mode=client && \ +secure_password=`openssl rsautl -inkey /ppml/trusted-big-data-ml/work/password/key.txt -decrypt /ppml/trusted-big-data-ml/secured_argvs + +./init.sh +gramine-sgx bash 2>&1 | tee spark-pi-client-sgx.log \ No newline at end of file diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-cluster-sgx.sh b/ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-cluster-sgx.sh new file mode 100644 index 00000000000..2dfa0cf34ea --- /dev/null +++ b/ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-cluster-sgx.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +export mode=cluster && \ +secure_password=`openssl rsautl -inkey /ppml/trusted-big-data-ml/work/password/key.txt -decrypt &1 | tee spark-pi-cluster-sgx.log \ No newline at end of file diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-local-sgx.sh b/ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-local-sgx.sh new file mode 100644 index 00000000000..177d109d124 --- /dev/null +++ b/ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-local-sgx.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +gramine-argv-serializer bash -c "/opt/jdk8/bin/java \ + -cp '/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/*:/ppml/trusted-big-data-ml/work/spark-3.1.2/examples/jars/*' -Xmx16g \ + org.apache.spark.deploy.SparkSubmit \ + --master local[4] \ + --executor-memory 8g \ + --driver-memory 8g \ + --class org.apache.spark.examples.SparkPi \ + --conf spark.network.timeout=10000000 \ + --conf spark.executor.heartbeatInterval=10000000 \ + --verbose \ + local:///ppml/trusted-big-data-ml/work/spark-3.1.2/examples/jars/spark-examples_2.12-3.1.2.jar 100" > /ppml/trusted-big-data-ml/secured_argvs +./init.sh +gramine-sgx bash 2>&1 | tee spark-pi-local-sgx.log \ No newline at end of file diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-local-without-sgx.sh b/ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-local-without-sgx.sh new file mode 100644 index 00000000000..0f6865aad03 --- /dev/null +++ b/ppml/trusted-big-data-ml/python/docker-gramine/start-scripts/start-spark-pi-on-local-without-sgx.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +/opt/jdk8/bin/java \ + -cp '/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/*:/ppml/trusted-big-data-ml/work/spark-3.1.2/examples/jars/*' -Xmx16g \ + org.apache.spark.deploy.SparkSubmit \ + --master local[2] \ + --executor-memory 8g \ + --driver-memory 8g \ + --class org.apache.spark.examples.SparkPi \ + --verbose \ + local:///ppml/trusted-big-data-ml/work/spark-3.1.2/examples/jars/spark-examples_2.12-3.1.2.jar 100 \ No newline at end of file