diff --git a/ppml/docs/examples.md b/ppml/docs/examples.md
index 1853044fabe..c197fc5a712 100644
--- a/ppml/docs/examples.md
+++ b/ppml/docs/examples.md
@@ -303,11 +303,14 @@ bash bigdl-ppml-submit.sh \
## Trusted ML
-### Run Trusted Spark XGBoost Regressor
+
+Please be noted that the xgboost examples listed here are **deprecated** due to the fact that Rabit's network (contains gradient, split and env) is not protected.
+
+### (Deprecated) Run Trusted Spark XGBoost Regressor
This example shows how to run trusted Spark XGBoost Regressor.
-First, make sure that `Boston_Housing.csv` is under `work/data` directory or the same path in the `start-spark-local-xgboost-regressor-sgx.sh`. Replace the value of `RABIT_TRACKER_IP` with your own IP address in the script.
+First, make sure that `Boston_Housing.csv` is under `work/data` directory or the same path in the `start-spark-local-xgboost-regressor-sgx.sh`.
Run the script to run trusted Spark XGBoost Regressor and it would take some time to show the final results:
@@ -364,11 +367,11 @@ The result should look something like this:
> |[7.02259,0.0,18.1...| 14.2| 13.38729190826416|
-### Run Trusted Spark XGBoost Classifier
+### (Deprecated) Run Trusted Spark XGBoost Classifier
This example shows how to run trusted Spark XGBoost Classifier.
-Before running the example, download the sample dataset from [pima-indians-diabetes](https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv) dataset. After downloading the dataset, make sure that `pima-indians-diabetes.data.csv` is under `work/data` directory or the same path in the `start-spark-local-xgboost-classifier-sgx.sh`. Replace `path_of_pima_indians_diabetes_csv` with your path of `pima-indians-diabetes.data.csv` and the value of `RABIT_TRACKER_IP` with your own IP address in the script.
+Before running the example, download the sample dataset from [pima-indians-diabetes](https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv) dataset. After downloading the dataset, make sure that `pima-indians-diabetes.data.csv` is under `work/data` directory or the same path in the `start-spark-local-xgboost-classifier-sgx.sh`. Replace `path_of_pima_indians_diabetes_csv` with your path of `pima-indians-diabetes.data.csv`.
Run the script to run trusted Spark XGBoost Classifier and it would take some time to show the final results:
diff --git a/ppml/trusted-big-data-ml/python/docker-gramine/tracker.py b/ppml/trusted-big-data-ml/python/docker-gramine/tracker.py
index fdc436c5b11..33e80203e35 100644
--- a/ppml/trusted-big-data-ml/python/docker-gramine/tracker.py
+++ b/ppml/trusted-big-data-ml/python/docker-gramine/tracker.py
@@ -455,16 +455,12 @@ def start_rabit_tracker(args):
def main():
"""Main function if tracker is executed in standalone mode."""
- host_ip = os.environ['RABIT_TRACKER_IP']
- if host_ip == None:
- sys.stdout.write("###PYTHONWARN### RABIT_TRACKER_IP not set in env")
-
parser = argparse.ArgumentParser(description='Rabit Tracker start.')
parser.add_argument('--num-workers', required=True, type=int,
help='Number of worker proccess to be launched.')
parser.add_argument('--num-servers', default=0, type=int,
help='Number of server process to be launched. Only used in PS jobs.')
- parser.add_argument('--host-ip', default=host_ip, type=str,
+ parser.add_argument('--host-ip', default=None, type=str,
help=('Host IP addressed, this is only needed ' +
'if the host IP cannot be automatically guessed.'))
parser.add_argument('--log-level', default='INFO', type=str,
@@ -473,6 +469,10 @@ def main():
args = parser.parse_args()
sys.stdout.write("###PYTHONWARN### args for tracker: " + str(args))
+ # Open a file and prepare to set the hostname
+ with open("/etc/hostname", 'r') as f:
+ hostname = f.readline().strip()
+ socket.sethostname(hostname)
fmt = '%(asctime)s %(levelname)s %(message)s'
if args.log_level == 'INFO':
level = logging.INFO
diff --git a/ppml/trusted-big-data-ml/python/docker-graphene/README.md b/ppml/trusted-big-data-ml/python/docker-graphene/README.md
index 79375f9eedb..37e9f8e1440 100644
--- a/ppml/trusted-big-data-ml/python/docker-graphene/README.md
+++ b/ppml/trusted-big-data-ml/python/docker-graphene/README.md
@@ -420,11 +420,13 @@ The result should be similar to
>
>2021-06-18 01:46:20 INFO DistriOptimizer$:180 - [Epoch 2 60032/60000][Iteration 938][Wall Clock 845.747782s] Top1Accuracy is Accuracy(correct: 9696, count: 10000, accuracy: 0.9696)
-##### Example 5: XGBoost Regressor
+##### (Deprecated) Example 5: XGBoost Regressor
+
+Please be noted that the xgboost example listed here is **deprecated** due to the fact that Rabit's network (contains gradient, split and env) is not protected.
The data source `Boston_Housing.csv` can be found at [here](https://github.com/selva86/datasets/blob/master/BostonHousing.csv).
-Before running the example, make sure that `Boston_Housing.csv` is under `work/data` directory or the same path in the command. Run the example with SGX spark local mode with the following command in the terminal. Replace `your_IP_address` with your IP address and `path_of_boston_housing_csv` with your path of `Boston_Housing.csv`.
+Before running the example, make sure that `Boston_Housing.csv` is under `work/data` directory or the same path in the command. Run the example with SGX spark local mode with the following command in the terminal. Replace `path_of_boston_housing_csv` with your path of `Boston_Housing.csv`.
Note that data in `Boston_Housing.csv` needs to be pre-processed, before training with `xgboost_example.py`.
@@ -438,7 +440,7 @@ After changing:
> 0.00632,18,2.31,**0**,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24
```bash
-/graphene/Tools/argv_serializer bash -c "export RABIT_TRACKER_IP=your_IP_address && /opt/jdk8/bin/java -cp \
+/graphene/Tools/argv_serializer bash -c "/opt/jdk8/bin/java -cp \
'/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/*:/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/*' \
-Xmx2g \
org.apache.spark.deploy.SparkSubmit \
@@ -503,7 +505,9 @@ The result should be similar to
>
>|[7.02259,0.0,18.1...| 14.2| 13.38729190826416|
-##### Example 6: XGBoost Classifier
+##### (Deprecated) Example 6: XGBoost Classifier
+
+Please be noted that the xgboost example listed here is **deprecated** due to the fact that Rabit's network (contains gradient, split and env) is not protected.
Before running the example, download the sample dataset from [pima-indians-diabetes](https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv) dataset manually or with following command.
@@ -511,10 +515,10 @@ Before running the example, download the sample dataset from [pima-indians-diabe
wget https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv
```
-After downloading the dataset, make sure that `pima-indians-diabetes.data.csv` is under `work/data` directory or the same path in the command. Run the example with SGX spark local mode with the following command in the terminal. Replace `your_IP_address` with your IP address and `path_of_pima_indians_diabetes_csv` with your path of `pima-indians-diabetes.data.csv`.
+After downloading the dataset, make sure that `pima-indians-diabetes.data.csv` is under `work/data` directory or the same path in the command. Run the example with SGX spark local mode with the following command in the terminal. Replace `path_of_pima_indians_diabetes_csv` with your path of `pima-indians-diabetes.data.csv`.
```bash
-/graphene/Tools/argv_serializer bash -c "export RABIT_TRACKER_IP=your_IP_address && /opt/jdk8/bin/java -cp \
+/graphene/Tools/argv_serializer bash -c "/opt/jdk8/bin/java -cp \
'/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/*:/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/*' \
-Xmx2g \
org.apache.spark.deploy.SparkSubmit \
diff --git a/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-classifier-sgx.sh b/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-classifier-sgx.sh
index 75e132cca6c..80ec6463dc5 100644
--- a/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-classifier-sgx.sh
+++ b/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-classifier-sgx.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-SGX=1 ./pal_loader bash -c "export RABIT_TRACKER_IP=your_IP_address && /opt/jdk8/bin/java -cp \
+SGX=1 ./pal_loader bash -c "/opt/jdk8/bin/java -cp \
'/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/*:/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/*' \
-Xmx2g \
org.apache.spark.deploy.SparkSubmit \
diff --git a/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-regressor-sgx.sh b/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-regressor-sgx.sh
index f500547bbc5..8951603740c 100644
--- a/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-regressor-sgx.sh
+++ b/ppml/trusted-big-data-ml/python/docker-graphene/start-scripts/start-spark-local-xgboost-regressor-sgx.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-SGX=1 ./pal_loader bash -c "export RABIT_TRACKER_IP=your_IP_address && /opt/jdk8/bin/java -cp \
+SGX=1 ./pal_loader bash -c "/opt/jdk8/bin/java -cp \
'/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/*:/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/*' \
-Xmx2g \
org.apache.spark.deploy.SparkSubmit \
diff --git a/ppml/trusted-big-data-ml/python/docker-graphene/tracker.py b/ppml/trusted-big-data-ml/python/docker-graphene/tracker.py
index e2ead8b13c2..33e80203e35 100644
--- a/ppml/trusted-big-data-ml/python/docker-graphene/tracker.py
+++ b/ppml/trusted-big-data-ml/python/docker-graphene/tracker.py
@@ -455,16 +455,12 @@ def start_rabit_tracker(args):
def main():
"""Main function if tracker is executed in standalone mode."""
- host_ip = os.environ.get("RABIT_TRACKER_IP")
- if host_ip == None:
- sys.stdout.write("###PYTHONWARN### RABIT_TRACKER_IP not set in env")
-
parser = argparse.ArgumentParser(description='Rabit Tracker start.')
parser.add_argument('--num-workers', required=True, type=int,
help='Number of worker proccess to be launched.')
parser.add_argument('--num-servers', default=0, type=int,
help='Number of server process to be launched. Only used in PS jobs.')
- parser.add_argument('--host-ip', default=host_ip, type=str,
+ parser.add_argument('--host-ip', default=None, type=str,
help=('Host IP addressed, this is only needed ' +
'if the host IP cannot be automatically guessed.'))
parser.add_argument('--log-level', default='INFO', type=str,
@@ -473,6 +469,10 @@ def main():
args = parser.parse_args()
sys.stdout.write("###PYTHONWARN### args for tracker: " + str(args))
+ # Open a file and prepare to set the hostname
+ with open("/etc/hostname", 'r') as f:
+ hostname = f.readline().strip()
+ socket.sethostname(hostname)
fmt = '%(asctime)s %(levelname)s %(message)s'
if args.log_level == 'INFO':
level = logging.INFO
diff --git a/python/dllib/examples/nnframes/xgboost/xgboost_classifier.py b/python/dllib/examples/nnframes/xgboost/xgboost_classifier.py
index 438f09f7e18..59dd22dd1b2 100644
--- a/python/dllib/examples/nnframes/xgboost/xgboost_classifier.py
+++ b/python/dllib/examples/nnframes/xgboost/xgboost_classifier.py
@@ -38,6 +38,7 @@
def process(filepath, demo):
sparkConf = init_spark_conf().setAppName("testXGBClassifier")
+ sparkConf = sparkConf.set("xgboost.spark.ignoreSsl", True)
sc = init_nncontext(sparkConf)
sqlContext = SQLContext(sc)
if demo:
diff --git a/python/dllib/examples/nnframes/xgboost/xgboost_example.py b/python/dllib/examples/nnframes/xgboost/xgboost_example.py
index f97718b38f9..a31774ee133 100644
--- a/python/dllib/examples/nnframes/xgboost/xgboost_example.py
+++ b/python/dllib/examples/nnframes/xgboost/xgboost_example.py
@@ -39,11 +39,12 @@
def Processdata(filepath, demo):
'''
- preProcess the data read from filepath
+ preProcess the data read from filepath
:param filepath:
:return: assembledf:
'''
sparkConf = init_spark_conf().setAppName("testNNClassifer")
+ sparkConf = sparkConf.set("xgboost.spark.ignoreSsl", True)
sc = init_nncontext(sparkConf)
sqlContext = SQLContext(sc)
if demo:
diff --git a/python/dllib/examples/nnframes/xgboost/xgboost_regressor.py b/python/dllib/examples/nnframes/xgboost/xgboost_regressor.py
index b835ee26246..58a748867b8 100644
--- a/python/dllib/examples/nnframes/xgboost/xgboost_regressor.py
+++ b/python/dllib/examples/nnframes/xgboost/xgboost_regressor.py
@@ -38,11 +38,12 @@
def Processdata(filepath, demo):
'''
- preProcess the data read from filepath
+ preProcess the data read from filepath
:param filepath:
:return: assembledf:
'''
sparkConf = init_spark_conf().setAppName("testNNClassifer")
+ sparkConf = sparkConf.set("xgboost.spark.ignoreSsl", True)
sc = init_nncontext(sparkConf)
sqlContext = SQLContext(sc)
if demo: