From 50d6f0d7b31d0529da01bddd17c7b0694186285e Mon Sep 17 00:00:00 2001
From: dding3 <ding.ding@intel.com>
Date: Tue, 30 Aug 2022 10:05:57 -0700
Subject: [PATCH] Fixprocessing for several orca tutorials so it can use the
 original dataset download from website (#5572)

---
 python/orca/dev/test/run-tutorial-xshards.sh | 12 ++++++------
 python/orca/tutorial/xshards/diabetes.py     | 10 +++++-----
 python/orca/tutorial/xshards/ionosphere.py   |  6 +++---
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/python/orca/dev/test/run-tutorial-xshards.sh b/python/orca/dev/test/run-tutorial-xshards.sh
index 0bc380cd257..82ced626b54 100755
--- a/python/orca/dev/test/run-tutorial-xshards.sh
+++ b/python/orca/dev/test/run-tutorial-xshards.sh
@@ -64,11 +64,11 @@ echo "#3 Running diabetes"
 #timer
 start=$(date "+%s")
 
-if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/pima-indians-diabetes-test.csv ]
+if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/pima-indians-diabetes.csv ]
 then
-    echo "pima-indians-diabetes-test.csv already exists"
+    echo "pima-indians-diabetes.csv already exists"
 else
-    wget -nv $FTP_URI/analytics-zoo-data/xshards/pima-indians-diabetes-test.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/
+    wget -nv $FTP_URI/analytics-zoo-data/xshards/pima-indians-diabetes.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/
 fi
 
 sed -i "s/epochs=150/epochs=2/g" diabetes.py
@@ -82,11 +82,11 @@ echo "#4 Running ionosphere"
 #timer
 start=$(date "+%s")
 
-if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/new_ionosphere.csv ]
+if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/ionosphere.csv ]
 then
-    echo "new_ionosphere.csv already exists"
+    echo "ionosphere.csv already exists"
 else
-    wget -nv $FTP_URI/analytics-zoo-data/xshards/new_ionosphere.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/
+    wget -nv $FTP_URI/analytics-zoo-data/xshards/ionosphere.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/
 fi
 
 sed -i "s/epochs=100/epochs=2/g" ionosphere.py
diff --git a/python/orca/tutorial/xshards/diabetes.py b/python/orca/tutorial/xshards/diabetes.py
index c4e0b2828ae..04eac4dc487 100644
--- a/python/orca/tutorial/xshards/diabetes.py
+++ b/python/orca/tutorial/xshards/diabetes.py
@@ -25,8 +25,9 @@
 
 init_orca_context(cluster_mode="local", cores=4, memory="3g")
 
-path = 'pima-indians-diabetes-test.csv'
-data_shard = bigdl.orca.data.pandas.read_csv(path)
+path = 'pima-indians-diabetes.csv'
+data_shard = bigdl.orca.data.pandas.read_csv(path, header=None)
+column = list(data_shard.get_schema()['columns'])
 
 model = Sequential()
 model.add(Dense(12, input_shape=(8,), activation='relu'))
@@ -35,9 +36,8 @@
 
 model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
 
-data_shard = data_shard.assembleFeatureLabelCols(featureCols=['f1', 'f2', 'f3',
-                                                              'f4', 'f5', 'f6', 'f7', 'f8'],
-                                                 labelCols=['label'])
+data_shard = data_shard.assembleFeatureLabelCols(featureCols=column[:-1],
+                                                 labelCols=list(column[-1]))
 
 est = Estimator.from_keras(keras_model=model)
 est.fit(data=data_shard,
diff --git a/python/orca/tutorial/xshards/ionosphere.py b/python/orca/tutorial/xshards/ionosphere.py
index 135983c58ba..80ec753056e 100644
--- a/python/orca/tutorial/xshards/ionosphere.py
+++ b/python/orca/tutorial/xshards/ionosphere.py
@@ -64,8 +64,8 @@ def forward(self, X):
 
 init_orca_context(memory="4g")
 
-path = 'new_ionosphere.csv'
-data_shard = bigdl.orca.data.pandas.read_csv(path)
+path = 'ionosphere.csv'
+data_shard = bigdl.orca.data.pandas.read_csv(path, header=None)
 
 column = data_shard.get_schema()['columns']
 
@@ -74,7 +74,7 @@ def forward(self, X):
 
 
 def update_label_to_zero_base(df):
-    df['_c34'] = df['_c34'] - 1
+    df['34'] = df['34'] - 1
     return df
 data_shard = data_shard.transform_shard(update_label_to_zero_base)