diff --git a/ppml/docs/examples.md b/ppml/docs/examples.md index 1853044fabe..c197fc5a712 100644 --- a/ppml/docs/examples.md +++ b/ppml/docs/examples.md @@ -303,11 +303,14 @@ bash bigdl-ppml-submit.sh \ ## Trusted ML -### Run Trusted Spark XGBoost Regressor + +Please be noted that the xgboost examples listed here are **deprecated** due to the fact that Rabit's network (contains gradient, split and env) is not protected. + +### (Deprecated) Run Trusted Spark XGBoost Regressor
This example shows how to run trusted Spark XGBoost Regressor. -First, make sure that `Boston_Housing.csv` is under `work/data` directory or the same path in the `start-spark-local-xgboost-regressor-sgx.sh`. Replace the value of `RABIT_TRACKER_IP` with your own IP address in the script. +First, make sure that `Boston_Housing.csv` is under `work/data` directory or the same path in the `start-spark-local-xgboost-regressor-sgx.sh`. Run the script to run trusted Spark XGBoost Regressor and it would take some time to show the final results: @@ -364,11 +367,11 @@ The result should look something like this: > |[7.02259,0.0,18.1...| 14.2| 13.38729190826416|
-### Run Trusted Spark XGBoost Classifier +### (Deprecated) Run Trusted Spark XGBoost Classifier
This example shows how to run trusted Spark XGBoost Classifier. -Before running the example, download the sample dataset from [pima-indians-diabetes](https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv) dataset. After downloading the dataset, make sure that `pima-indians-diabetes.data.csv` is under `work/data` directory or the same path in the `start-spark-local-xgboost-classifier-sgx.sh`. Replace `path_of_pima_indians_diabetes_csv` with your path of `pima-indians-diabetes.data.csv` and the value of `RABIT_TRACKER_IP` with your own IP address in the script. +Before running the example, download the sample dataset from [pima-indians-diabetes](https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv) dataset. After downloading the dataset, make sure that `pima-indians-diabetes.data.csv` is under `work/data` directory or the same path in the `start-spark-local-xgboost-classifier-sgx.sh`. Replace `path_of_pima_indians_diabetes_csv` with your path of `pima-indians-diabetes.data.csv`. Run the script to run trusted Spark XGBoost Classifier and it would take some time to show the final results: diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/tracker.py b/ppml/trusted-big-data-ml/python/docker-gramine/tracker.py index fdc436c5b11..33e80203e35 100644 --- a/ppml/trusted-big-data-ml/python/docker-gramine/tracker.py +++ b/ppml/trusted-big-data-ml/python/docker-gramine/tracker.py @@ -455,16 +455,12 @@ def start_rabit_tracker(args): def main(): """Main function if tracker is executed in standalone mode.""" - host_ip = os.environ['RABIT_TRACKER_IP'] - if host_ip == None: - sys.stdout.write("###PYTHONWARN### RABIT_TRACKER_IP not set in env") - parser = argparse.ArgumentParser(description='Rabit Tracker start.') parser.add_argument('--num-workers', required=True, type=int, help='Number of worker proccess to be launched.') parser.add_argument('--num-servers', default=0, type=int, help='Number of server process to be launched. Only used in PS jobs.') - parser.add_argument('--host-ip', default=host_ip, type=str, + parser.add_argument('--host-ip', default=None, type=str, help=('Host IP addressed, this is only needed ' + 'if the host IP cannot be automatically guessed.')) parser.add_argument('--log-level', default='INFO', type=str, @@ -473,6 +469,10 @@ def main(): args = parser.parse_args() sys.stdout.write("###PYTHONWARN### args for tracker: " + str(args)) + # Open a file and prepare to set the hostname + with open("/etc/hostname", 'r') as f: + hostname = f.readline().strip() + socket.sethostname(hostname) fmt = '%(asctime)s %(levelname)s %(message)s' if args.log_level == 'INFO': level = logging.INFO diff --git a/ppml/trusted-big-data-ml/python/docker-graphene/README.md b/ppml/trusted-big-data-ml/python/docker-graphene/README.md index 79375f9eedb..37e9f8e1440 100644 --- a/ppml/trusted-big-data-ml/python/docker-graphene/README.md +++ b/ppml/trusted-big-data-ml/python/docker-graphene/README.md @@ -420,11 +420,13 @@ The result should be similar to > >2021-06-18 01:46:20 INFO DistriOptimizer$:180 - [Epoch 2 60032/60000][Iteration 938][Wall Clock 845.747782s] Top1Accuracy is Accuracy(correct: 9696, count: 10000, accuracy: 0.9696) -##### Example 5: XGBoost Regressor +##### (Deprecated) Example 5: XGBoost Regressor + +Please be noted that the xgboost example listed here is **deprecated** due to the fact that Rabit's network (contains gradient, split and env) is not protected. The data source `Boston_Housing.csv` can be found at [here](https://github.com/selva86/datasets/blob/master/BostonHousing.csv). -Before running the example, make sure that `Boston_Housing.csv` is under `work/data` directory or the same path in the command. Run the example with SGX spark local mode with the following command in the terminal. Replace `your_IP_address` with your IP address and `path_of_boston_housing_csv` with your path of `Boston_Housing.csv`. +Before running the example, make sure that `Boston_Housing.csv` is under `work/data` directory or the same path in the command. Run the example with SGX spark local mode with the following command in the terminal. Replace `path_of_boston_housing_csv` with your path of `Boston_Housing.csv`. Note that data in `Boston_Housing.csv` needs to be pre-processed, before training with `xgboost_example.py`. @@ -438,7 +440,7 @@ After changing: > 0.00632,18,2.31,**0**,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24 ```bash -/graphene/Tools/argv_serializer bash -c "export RABIT_TRACKER_IP=your_IP_address && /opt/jdk8/bin/java -cp \ +/graphene/Tools/argv_serializer bash -c "/opt/jdk8/bin/java -cp \ '/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/*:/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/*' \ -Xmx2g \ org.apache.spark.deploy.SparkSubmit \ @@ -503,7 +505,9 @@ The result should be similar to > >|[7.02259,0.0,18.1...| 14.2| 13.38729190826416| -##### Example 6: XGBoost Classifier +##### (Deprecated) Example 6: XGBoost Classifier + +Please be noted that the xgboost example listed here is **deprecated** due to the fact that Rabit's network (contains gradient, split and env) is not protected. Before running the example, download the sample dataset from [pima-indians-diabetes](https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv) dataset manually or with following command. @@ -511,10 +515,10 @@ Before running the example, download the sample dataset from [pima-indians-diabe wget https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv ``` -After downloading the dataset, make sure that `pima-indians-diabetes.data.csv` is under `work/data` directory or the same path in the command. Run the example with SGX spark local mode with the following command in the terminal. Replace `your_IP_address` with your IP address and `path_of_pima_indians_diabetes_csv` with your path of `pima-indians-diabetes.data.csv`. +After downloading the dataset, make sure that `pima-indians-diabetes.data.csv` is under `work/data` directory or the same path in the command. Run the example with SGX spark local mode with the following command in the terminal. Replace `path_of_pima_indians_diabetes_csv` with your path of `pima-indians-diabetes.data.csv`. ```bash -/graphene/Tools/argv_serializer bash -c "export RABIT_TRACKER_IP=your_IP_address && /opt/jdk8/bin/java -cp \ +/graphene/Tools/argv_serializer bash -c "/opt/jdk8/bin/java -cp \ '/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/*:/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/*' \ -Xmx2g \ org.apache.spark.deploy.SparkSubmit \ diff --git a/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-classifier-sgx.sh b/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-classifier-sgx.sh index 75e132cca6c..80ec6463dc5 100644 --- a/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-classifier-sgx.sh +++ b/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-classifier-sgx.sh @@ -1,5 +1,5 @@ #!/bin/bash -SGX=1 ./pal_loader bash -c "export RABIT_TRACKER_IP=your_IP_address && /opt/jdk8/bin/java -cp \ +SGX=1 ./pal_loader bash -c "/opt/jdk8/bin/java -cp \ '/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/*:/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/*' \ -Xmx2g \ org.apache.spark.deploy.SparkSubmit \ diff --git a/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-regressor-sgx.sh b/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-regressor-sgx.sh index f500547bbc5..8951603740c 100644 --- a/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-regressor-sgx.sh +++ b/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-regressor-sgx.sh @@ -1,5 +1,5 @@ #!/bin/bash -SGX=1 ./pal_loader bash -c "export RABIT_TRACKER_IP=your_IP_address && /opt/jdk8/bin/java -cp \ +SGX=1 ./pal_loader bash -c "/opt/jdk8/bin/java -cp \ '/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/*:/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/*' \ -Xmx2g \ org.apache.spark.deploy.SparkSubmit \ diff --git a/ppml/trusted-big-data-ml/python/docker-graphene/tracker.py b/ppml/trusted-big-data-ml/python/docker-graphene/tracker.py index e2ead8b13c2..33e80203e35 100644 --- a/ppml/trusted-big-data-ml/python/docker-graphene/tracker.py +++ b/ppml/trusted-big-data-ml/python/docker-graphene/tracker.py @@ -455,16 +455,12 @@ def start_rabit_tracker(args): def main(): """Main function if tracker is executed in standalone mode.""" - host_ip = os.environ.get("RABIT_TRACKER_IP") - if host_ip == None: - sys.stdout.write("###PYTHONWARN### RABIT_TRACKER_IP not set in env") - parser = argparse.ArgumentParser(description='Rabit Tracker start.') parser.add_argument('--num-workers', required=True, type=int, help='Number of worker proccess to be launched.') parser.add_argument('--num-servers', default=0, type=int, help='Number of server process to be launched. Only used in PS jobs.') - parser.add_argument('--host-ip', default=host_ip, type=str, + parser.add_argument('--host-ip', default=None, type=str, help=('Host IP addressed, this is only needed ' + 'if the host IP cannot be automatically guessed.')) parser.add_argument('--log-level', default='INFO', type=str, @@ -473,6 +469,10 @@ def main(): args = parser.parse_args() sys.stdout.write("###PYTHONWARN### args for tracker: " + str(args)) + # Open a file and prepare to set the hostname + with open("/etc/hostname", 'r') as f: + hostname = f.readline().strip() + socket.sethostname(hostname) fmt = '%(asctime)s %(levelname)s %(message)s' if args.log_level == 'INFO': level = logging.INFO diff --git a/python/dllib/examples/nnframes/xgboost/xgboost_classifier.py b/python/dllib/examples/nnframes/xgboost/xgboost_classifier.py index 438f09f7e18..59dd22dd1b2 100644 --- a/python/dllib/examples/nnframes/xgboost/xgboost_classifier.py +++ b/python/dllib/examples/nnframes/xgboost/xgboost_classifier.py @@ -38,6 +38,7 @@ def process(filepath, demo): sparkConf = init_spark_conf().setAppName("testXGBClassifier") + sparkConf = sparkConf.set("xgboost.spark.ignoreSsl", True) sc = init_nncontext(sparkConf) sqlContext = SQLContext(sc) if demo: diff --git a/python/dllib/examples/nnframes/xgboost/xgboost_example.py b/python/dllib/examples/nnframes/xgboost/xgboost_example.py index f97718b38f9..a31774ee133 100644 --- a/python/dllib/examples/nnframes/xgboost/xgboost_example.py +++ b/python/dllib/examples/nnframes/xgboost/xgboost_example.py @@ -39,11 +39,12 @@ def Processdata(filepath, demo): ''' - preProcess the data read from filepath + preProcess the data read from filepath :param filepath: :return: assembledf: ''' sparkConf = init_spark_conf().setAppName("testNNClassifer") + sparkConf = sparkConf.set("xgboost.spark.ignoreSsl", True) sc = init_nncontext(sparkConf) sqlContext = SQLContext(sc) if demo: diff --git a/python/dllib/examples/nnframes/xgboost/xgboost_regressor.py b/python/dllib/examples/nnframes/xgboost/xgboost_regressor.py index b835ee26246..58a748867b8 100644 --- a/python/dllib/examples/nnframes/xgboost/xgboost_regressor.py +++ b/python/dllib/examples/nnframes/xgboost/xgboost_regressor.py @@ -38,11 +38,12 @@ def Processdata(filepath, demo): ''' - preProcess the data read from filepath + preProcess the data read from filepath :param filepath: :return: assembledf: ''' sparkConf = init_spark_conf().setAppName("testNNClassifer") + sparkConf = sparkConf.set("xgboost.spark.ignoreSsl", True) sc = init_nncontext(sparkConf) sqlContext = SQLContext(sc) if demo: