From ea8b838e16aea5af3bacc9bf5dbe18edb2ae7abe Mon Sep 17 00:00:00 2001
From: gc-fu <guancheng.fu@intel.com>
Date: Tue, 23 Aug 2022 11:11:40 +0800
Subject: [PATCH 1/4] Fix the error in docker-graphene/README.md

When specifying python file as main job for spark-submit, we can only
use --py-files to provide dependencies.

The use of --jars will make spark assuming that we are trying to
executing a java job, and thus trying to find the Java main class.
---
 ppml/trusted-big-data-ml/python/docker-graphene/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ppml/trusted-big-data-ml/python/docker-graphene/README.md b/ppml/trusted-big-data-ml/python/docker-graphene/README.md
index 34d7b1f0832..ef688070a1a 100644
--- a/ppml/trusted-big-data-ml/python/docker-graphene/README.md
+++ b/ppml/trusted-big-data-ml/python/docker-graphene/README.md
@@ -283,6 +283,8 @@ The result should be similar to
 
 ##### Example 5: XGBoost Regressor
 
+The data source `Boston_Housing.csv` can be found at [here](https://github.com/selva86/datasets/blob/master/BostonHousing.csv).
+
 Before running the example, make sure that `Boston_Housing.csv` is under `work/data` directory or the same path in the command. Run the example with SGX spark local mode with the following command in the terminal. Replace `your_IP_address` with your IP address and `path_of_boston_housing_csv` with your path of `Boston_Housing.csv`.
 
 ```bash
@@ -295,7 +297,6 @@ Before running the example, make sure that `Boston_Housing.csv` is under `work/d
   --conf spark.executor.extraClassPath=/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/* \
   --conf spark.driver.extraClassPath=/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/* \
   --properties-file /ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/conf/spark-bigdl.conf \
-  --jars /ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/* \
   --py-files /ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/python/bigdl-orca-spark_3.1.2-2.1.0-SNAPSHOT-python-api.zip \
   --executor-memory 2g \
   /ppml/trusted-big-data-ml/work/examples/pyzoo/xgboost/xgboost_example.py \
@@ -372,7 +373,6 @@ After downloading the dataset, make sure that `pima-indians-diabetes.data.csv` i
   --conf spark.executor.extraClassPath=/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/* \
   --conf spark.driver.extraClassPath=/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/* \
   --properties-file /ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/conf/spark-bigdl.conf \
-  --jars /ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/* \
   --py-files /ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/python/bigdl-orca-spark_3.1.2-2.1.0-SNAPSHOT-python-api.zip \
   --executor-memory 2g \
   /ppml/trusted-big-data-ml/work/examples/pyzoo/xgboost/xgboost_classifier.py \

From 86d43ce3d478dad763a0318ce32518ed8be0034e Mon Sep 17 00:00:00 2001
From: gc-fu <guancheng.fu@intel.com>
Date: Tue, 23 Aug 2022 17:52:19 +0800
Subject: [PATCH 2/4] Add instructions on how to process Boston_Housing.csv

---
 .../python/docker-graphene/README.md                  | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/ppml/trusted-big-data-ml/python/docker-graphene/README.md b/ppml/trusted-big-data-ml/python/docker-graphene/README.md
index ef688070a1a..7c95cb8c7e3 100644
--- a/ppml/trusted-big-data-ml/python/docker-graphene/README.md
+++ b/ppml/trusted-big-data-ml/python/docker-graphene/README.md
@@ -287,6 +287,17 @@ The data source `Boston_Housing.csv` can be found at [here](https://github.com/s
 
 Before running the example, make sure that `Boston_Housing.csv` is under `work/data` directory or the same path in the command. Run the example with SGX spark local mode with the following command in the terminal. Replace `your_IP_address` with your IP address and `path_of_boston_housing_csv` with your path of `Boston_Housing.csv`.
 
+
+Also, please be noted that the data within Boston_Housing.csv needs to be processed beforing handing to xgboost_exmaple.py.
+
+The data for column "chas" is in type "string" and we need to delete all the quotation marks so that the xgboost_example.py can successfully load the data.
+
+Before changing:
+> 0.00632,18,2.31,**"0"**,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24
+
+After changing:
+> 0.00632,18,2.31,**0**,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24
+
 ```bash
 /graphene/Tools/argv_serializer bash -c "export RABIT_TRACKER_IP=your_IP_address && /opt/jdk8/bin/java -cp \
     '/ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/jars/*:/ppml/trusted-big-data-ml/work/spark-3.1.2/conf/:/ppml/trusted-big-data-ml/work/spark-3.1.2/jars/*' \

From 0bb3a8d79318e4b3ee8470773f6ee7a810ad272f Mon Sep 17 00:00:00 2001
From: gc-fu <guancheng.fu@intel.com>
Date: Wed, 24 Aug 2022 09:46:38 +0800
Subject: [PATCH 3/4] Change to use the correct path for xgboost examples

---
 ppml/trusted-big-data-ml/python/docker-graphene/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ppml/trusted-big-data-ml/python/docker-graphene/README.md b/ppml/trusted-big-data-ml/python/docker-graphene/README.md
index 7c95cb8c7e3..9db9a0a5c84 100644
--- a/ppml/trusted-big-data-ml/python/docker-graphene/README.md
+++ b/ppml/trusted-big-data-ml/python/docker-graphene/README.md
@@ -310,7 +310,7 @@ After changing:
   --properties-file /ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/conf/spark-bigdl.conf \
   --py-files /ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/python/bigdl-orca-spark_3.1.2-2.1.0-SNAPSHOT-python-api.zip \
   --executor-memory 2g \
-  /ppml/trusted-big-data-ml/work/examples/pyzoo/xgboost/xgboost_example.py \
+  /ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/examples/dllib/nnframes/xgboost/xgboost_example.py \
   --file-path path_of_boston_housing_csv" > /ppml/trusted-big-data-ml/secured-argvs
 ./init.sh
 SGX=1 ./pal_loader bash 2>&1 | tee test-zoo-xgboost-regressor-sgx.log
@@ -386,7 +386,7 @@ After downloading the dataset, make sure that `pima-indians-diabetes.data.csv` i
   --properties-file /ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/conf/spark-bigdl.conf \
   --py-files /ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/python/bigdl-orca-spark_3.1.2-2.1.0-SNAPSHOT-python-api.zip \
   --executor-memory 2g \
-  /ppml/trusted-big-data-ml/work/examples/pyzoo/xgboost/xgboost_classifier.py \
+  /ppml/trusted-big-data-ml/work/bigdl-2.1.0-SNAPSHOT/examples/dllib/nnframes/xgboost/xgboost_classifier.py \
   -f path_of_pima_indians_diabetes_csv" > /ppml/trusted-big-data-ml/secured-argvs
 ./init.sh
 SGX=1 ./pal_loader bash 2>&1 | tee test-xgboost-classifier-sgx.log

From 32ae255fa66b60fa885eeb657c4ad750fd9f1692 Mon Sep 17 00:00:00 2001
From: gc-fu <guancheng.fu@intel.com>
Date: Fri, 26 Aug 2022 09:05:59 +0800
Subject: [PATCH 4/4] Fix README typos

---
 ppml/trusted-big-data-ml/python/docker-graphene/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ppml/trusted-big-data-ml/python/docker-graphene/README.md b/ppml/trusted-big-data-ml/python/docker-graphene/README.md
index 9db9a0a5c84..0c28dee1a23 100644
--- a/ppml/trusted-big-data-ml/python/docker-graphene/README.md
+++ b/ppml/trusted-big-data-ml/python/docker-graphene/README.md
@@ -288,9 +288,9 @@ The data source `Boston_Housing.csv` can be found at [here](https://github.com/s
 Before running the example, make sure that `Boston_Housing.csv` is under `work/data` directory or the same path in the command. Run the example with SGX spark local mode with the following command in the terminal. Replace `your_IP_address` with your IP address and `path_of_boston_housing_csv` with your path of `Boston_Housing.csv`.
 
 
-Also, please be noted that the data within Boston_Housing.csv needs to be processed beforing handing to xgboost_exmaple.py.
+Note that data in `Boston_Housing.csv` needs to be pre-processed, before training with `xgboost_example.py`.
 
-The data for column "chas" is in type "string" and we need to delete all the quotation marks so that the xgboost_example.py can successfully load the data.
+The data for column "chas" is in type "string" and we need to delete all the **quotation marks(")** so that the `xgboost_example.py` can successfully load the data.
 
 Before changing:
 > 0.00632,18,2.31,**"0"**,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24