forked from Oneflow-Inc/OneFlow-Benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.sh
executable file
·40 lines (35 loc) · 906 Bytes
/
train.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
rm -rf core.*
rm -rf ./output/snapshots/*
if [ -n "$1" ]; then
NUM_EPOCH=$1
else
NUM_EPOCH=50
fi
echo NUM_EPOCH=$NUM_EPOCH
# training with imagenet
if [ -n "$2" ]; then
DATA_ROOT=$2
else
DATA_ROOT=/data/imagenet/ofrecord
fi
echo DATA_ROOT=$DATA_ROOT
LOG_FOLDER=../logs
mkdir -p $LOG_FOLDER
LOGFILE=$LOG_FOLDER/resnet_training.log
python3 of_cnn_train_val.py \
--train_data_dir=$DATA_ROOT/train \
--train_data_part_num=256 \
--val_data_dir=$DATA_ROOT/validation \
--val_data_part_num=256 \
--num_nodes=1 \
--gpu_num_per_node=8 \
--optimizer="sgd" \
--momentum=0.875 \
--label_smoothing=0.1 \
--learning_rate=1.024 \
--loss_print_every_n_iter=100 \
--batch_size_per_device=128 \
--val_batch_size_per_device=50 \
--num_epoch=$NUM_EPOCH \
--model="resnet50" 2>&1 | tee ${LOGFILE}
echo "Writting log to ${LOGFILE}"