From 7e46dad20e6906f728df77a374ba9b7decb91588 Mon Sep 17 00:00:00 2001 From: dding3 Date: Mon, 29 Aug 2022 10:43:33 -0700 Subject: [PATCH 1/3] update processing --- python/orca/tutorial/xshards/ionosphere.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/orca/tutorial/xshards/ionosphere.py b/python/orca/tutorial/xshards/ionosphere.py index 135983c58ba..80ec753056e 100644 --- a/python/orca/tutorial/xshards/ionosphere.py +++ b/python/orca/tutorial/xshards/ionosphere.py @@ -64,8 +64,8 @@ def forward(self, X): init_orca_context(memory="4g") -path = 'new_ionosphere.csv' -data_shard = bigdl.orca.data.pandas.read_csv(path) +path = 'ionosphere.csv' +data_shard = bigdl.orca.data.pandas.read_csv(path, header=None) column = data_shard.get_schema()['columns'] @@ -74,7 +74,7 @@ def forward(self, X): def update_label_to_zero_base(df): - df['_c34'] = df['_c34'] - 1 + df['34'] = df['34'] - 1 return df data_shard = data_shard.transform_shard(update_label_to_zero_base) From 7969c570cc8e0fbf63e57bb9d1acbec150b52c3c Mon Sep 17 00:00:00 2001 From: dding3 Date: Mon, 29 Aug 2022 11:17:54 -0700 Subject: [PATCH 2/3] fix processingg for diabete --- python/orca/tutorial/xshards/diabetes.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/orca/tutorial/xshards/diabetes.py b/python/orca/tutorial/xshards/diabetes.py index c4e0b2828ae..04eac4dc487 100644 --- a/python/orca/tutorial/xshards/diabetes.py +++ b/python/orca/tutorial/xshards/diabetes.py @@ -25,8 +25,9 @@ init_orca_context(cluster_mode="local", cores=4, memory="3g") -path = 'pima-indians-diabetes-test.csv' -data_shard = bigdl.orca.data.pandas.read_csv(path) +path = 'pima-indians-diabetes.csv' +data_shard = bigdl.orca.data.pandas.read_csv(path, header=None) +column = list(data_shard.get_schema()['columns']) model = Sequential() model.add(Dense(12, input_shape=(8,), activation='relu')) @@ -35,9 +36,8 @@ model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) -data_shard = data_shard.assembleFeatureLabelCols(featureCols=['f1', 'f2', 'f3', - 'f4', 'f5', 'f6', 'f7', 'f8'], - labelCols=['label']) +data_shard = data_shard.assembleFeatureLabelCols(featureCols=column[:-1], + labelCols=list(column[-1])) est = Estimator.from_keras(keras_model=model) est.fit(data=data_shard, From 4abbf38c8ce50856b91548cee52bf708aff26678 Mon Sep 17 00:00:00 2001 From: dding3 Date: Tue, 30 Aug 2022 09:41:25 -0700 Subject: [PATCH 3/3] update file name --- python/orca/dev/test/run-tutorial-xshards.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/orca/dev/test/run-tutorial-xshards.sh b/python/orca/dev/test/run-tutorial-xshards.sh index 0bc380cd257..82ced626b54 100755 --- a/python/orca/dev/test/run-tutorial-xshards.sh +++ b/python/orca/dev/test/run-tutorial-xshards.sh @@ -64,11 +64,11 @@ echo "#3 Running diabetes" #timer start=$(date "+%s") -if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/pima-indians-diabetes-test.csv ] +if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/pima-indians-diabetes.csv ] then - echo "pima-indians-diabetes-test.csv already exists" + echo "pima-indians-diabetes.csv already exists" else - wget -nv $FTP_URI/analytics-zoo-data/xshards/pima-indians-diabetes-test.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/ + wget -nv $FTP_URI/analytics-zoo-data/xshards/pima-indians-diabetes.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/ fi sed -i "s/epochs=150/epochs=2/g" diabetes.py @@ -82,11 +82,11 @@ echo "#4 Running ionosphere" #timer start=$(date "+%s") -if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/new_ionosphere.csv ] +if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/ionosphere.csv ] then - echo "new_ionosphere.csv already exists" + echo "ionosphere.csv already exists" else - wget -nv $FTP_URI/analytics-zoo-data/xshards/new_ionosphere.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/ + wget -nv $FTP_URI/analytics-zoo-data/xshards/ionosphere.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/ fi sed -i "s/epochs=100/epochs=2/g" ionosphere.py