From 50d6f0d7b31d0529da01bddd17c7b0694186285e Mon Sep 17 00:00:00 2001 From: dding3 Date: Tue, 30 Aug 2022 10:05:57 -0700 Subject: [PATCH] Fixprocessing for several orca tutorials so it can use the original dataset download from website (#5572) --- python/orca/dev/test/run-tutorial-xshards.sh | 12 ++++++------ python/orca/tutorial/xshards/diabetes.py | 10 +++++----- python/orca/tutorial/xshards/ionosphere.py | 6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/python/orca/dev/test/run-tutorial-xshards.sh b/python/orca/dev/test/run-tutorial-xshards.sh index 0bc380cd257..82ced626b54 100755 --- a/python/orca/dev/test/run-tutorial-xshards.sh +++ b/python/orca/dev/test/run-tutorial-xshards.sh @@ -64,11 +64,11 @@ echo "#3 Running diabetes" #timer start=$(date "+%s") -if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/pima-indians-diabetes-test.csv ] +if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/pima-indians-diabetes.csv ] then - echo "pima-indians-diabetes-test.csv already exists" + echo "pima-indians-diabetes.csv already exists" else - wget -nv $FTP_URI/analytics-zoo-data/xshards/pima-indians-diabetes-test.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/ + wget -nv $FTP_URI/analytics-zoo-data/xshards/pima-indians-diabetes.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/ fi sed -i "s/epochs=150/epochs=2/g" diabetes.py @@ -82,11 +82,11 @@ echo "#4 Running ionosphere" #timer start=$(date "+%s") -if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/new_ionosphere.csv ] +if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/ionosphere.csv ] then - echo "new_ionosphere.csv already exists" + echo "ionosphere.csv already exists" else - wget -nv $FTP_URI/analytics-zoo-data/xshards/new_ionosphere.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/ + wget -nv $FTP_URI/analytics-zoo-data/xshards/ionosphere.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/ fi sed -i "s/epochs=100/epochs=2/g" ionosphere.py diff --git a/python/orca/tutorial/xshards/diabetes.py b/python/orca/tutorial/xshards/diabetes.py index c4e0b2828ae..04eac4dc487 100644 --- a/python/orca/tutorial/xshards/diabetes.py +++ b/python/orca/tutorial/xshards/diabetes.py @@ -25,8 +25,9 @@ init_orca_context(cluster_mode="local", cores=4, memory="3g") -path = 'pima-indians-diabetes-test.csv' -data_shard = bigdl.orca.data.pandas.read_csv(path) +path = 'pima-indians-diabetes.csv' +data_shard = bigdl.orca.data.pandas.read_csv(path, header=None) +column = list(data_shard.get_schema()['columns']) model = Sequential() model.add(Dense(12, input_shape=(8,), activation='relu')) @@ -35,9 +36,8 @@ model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) -data_shard = data_shard.assembleFeatureLabelCols(featureCols=['f1', 'f2', 'f3', - 'f4', 'f5', 'f6', 'f7', 'f8'], - labelCols=['label']) +data_shard = data_shard.assembleFeatureLabelCols(featureCols=column[:-1], + labelCols=list(column[-1])) est = Estimator.from_keras(keras_model=model) est.fit(data=data_shard, diff --git a/python/orca/tutorial/xshards/ionosphere.py b/python/orca/tutorial/xshards/ionosphere.py index 135983c58ba..80ec753056e 100644 --- a/python/orca/tutorial/xshards/ionosphere.py +++ b/python/orca/tutorial/xshards/ionosphere.py @@ -64,8 +64,8 @@ def forward(self, X): init_orca_context(memory="4g") -path = 'new_ionosphere.csv' -data_shard = bigdl.orca.data.pandas.read_csv(path) +path = 'ionosphere.csv' +data_shard = bigdl.orca.data.pandas.read_csv(path, header=None) column = data_shard.get_schema()['columns'] @@ -74,7 +74,7 @@ def forward(self, X): def update_label_to_zero_base(df): - df['_c34'] = df['_c34'] - 1 + df['34'] = df['34'] - 1 return df data_shard = data_shard.transform_shard(update_label_to_zero_base)