Skip to content

Commit

Permalink
Fixprocessing for several orca tutorials so it can use the original d…
Browse files Browse the repository at this point in the history
…ataset download from website (intel-analytics#5572)
  • Loading branch information
dding3 authored and ForJadeForest committed Sep 20, 2022
1 parent eda0a35 commit ea09210
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 14 deletions.
12 changes: 6 additions & 6 deletions python/orca/dev/test/run-tutorial-xshards.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ echo "#3 Running diabetes"
#timer
start=$(date "+%s")

if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/pima-indians-diabetes-test.csv ]
if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/pima-indians-diabetes.csv ]
then
echo "pima-indians-diabetes-test.csv already exists"
echo "pima-indians-diabetes.csv already exists"
else
wget -nv $FTP_URI/analytics-zoo-data/xshards/pima-indians-diabetes-test.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/
wget -nv $FTP_URI/analytics-zoo-data/xshards/pima-indians-diabetes.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/
fi

sed -i "s/epochs=150/epochs=2/g" diabetes.py
Expand All @@ -82,11 +82,11 @@ echo "#4 Running ionosphere"
#timer
start=$(date "+%s")

if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/new_ionosphere.csv ]
if [ -f ${BIGDL_ROOT}/python/orca/tutorial/xshards/ionosphere.csv ]
then
echo "new_ionosphere.csv already exists"
echo "ionosphere.csv already exists"
else
wget -nv $FTP_URI/analytics-zoo-data/xshards/new_ionosphere.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/
wget -nv $FTP_URI/analytics-zoo-data/xshards/ionosphere.csv -P ${BIGDL_ROOT}/python/orca/tutorial/xshards/
fi

sed -i "s/epochs=100/epochs=2/g" ionosphere.py
Expand Down
10 changes: 5 additions & 5 deletions python/orca/tutorial/xshards/diabetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@

init_orca_context(cluster_mode="local", cores=4, memory="3g")

path = 'pima-indians-diabetes-test.csv'
data_shard = bigdl.orca.data.pandas.read_csv(path)
path = 'pima-indians-diabetes.csv'
data_shard = bigdl.orca.data.pandas.read_csv(path, header=None)
column = list(data_shard.get_schema()['columns'])

model = Sequential()
model.add(Dense(12, input_shape=(8,), activation='relu'))
Expand All @@ -35,9 +36,8 @@

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

data_shard = data_shard.assembleFeatureLabelCols(featureCols=['f1', 'f2', 'f3',
'f4', 'f5', 'f6', 'f7', 'f8'],
labelCols=['label'])
data_shard = data_shard.assembleFeatureLabelCols(featureCols=column[:-1],
labelCols=list(column[-1]))

est = Estimator.from_keras(keras_model=model)
est.fit(data=data_shard,
Expand Down
6 changes: 3 additions & 3 deletions python/orca/tutorial/xshards/ionosphere.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ def forward(self, X):

init_orca_context(memory="4g")

path = 'new_ionosphere.csv'
data_shard = bigdl.orca.data.pandas.read_csv(path)
path = 'ionosphere.csv'
data_shard = bigdl.orca.data.pandas.read_csv(path, header=None)

column = data_shard.get_schema()['columns']

Expand All @@ -74,7 +74,7 @@ def forward(self, X):


def update_label_to_zero_base(df):
df['_c34'] = df['_c34'] - 1
df['34'] = df['34'] - 1
return df
data_shard = data_shard.transform_shard(update_label_to_zero_base)

Expand Down

0 comments on commit ea09210

Please sign in to comment.