-
Notifications
You must be signed in to change notification settings - Fork 91
/
Copy pathtrain_classifier.sh
executable file
·103 lines (95 loc) · 2.6 KB
/
train_classifier.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/bin/bash
if [[ ! -d "$1" ]]
then
echo "Usage: train_classifer.sh data_dir [dataset_name=ag_news]"
exit 1
fi
DATADIR=$1
DATASET=${2:-ag_news}
OUTPUT=$DATADIR/models/${DATASET}
EXPORT_DIR=$DATADIR/models/${DATASET}
INPUT_TRAIN_FILE=$DATADIR/${DATASET}.train
INPUT_TEST_FILE=$DATADIR/${DATASET}.test
TRAIN_FILE="$DATADIR/${DATASET}.train.tfrecords-*"
TEST_FILE="$DATADIR/${DATASET}.test.tfrecords-*"
echo "Looking for $TRAIN_FILE"
if ls ${TRAIN_FILE} 1> /dev/null 2>&1
then
echo "Found"
else
echo "Not Found $TRAIN_FILE"
echo "Processing training dataset file"
python process_input.py --facebook_input=${INPUT_TRAIN_FILE} --ngrams=2,3,4
if ls ${TRAIN_FILE} 1> /dev/null 2>&1
then
echo "$TRAIN_FILE created"
else
echo "Failed to create $TRAIN_FILE"
exit 1
fi
fi
echo "Looking for $TEST_FILE"
if ls ${TEST_FILE} 1> /dev/null 2>&1
then
echo "Found"
else
echo "Not Found $TEST_FILE"
echo "Processing test dataset file"
python process_input.py --facebook_input=${INPUT_TEST_FILE} --ngrams=2,3,4
if ls ${TEST_FILE} 1> /dev/null 2>&1
then
echo "$TEST_FILE created"
else
echo "Failed to create $TEST_FILE"
exit 1
fi
fi
LABELS=$DATADIR/${DATASET}.train.labels
VOCAB=$DATADIR/${DATASET}.train.vocab
VOCAB_SIZE=`cat $VOCAB | wc -l | sed -e "s/[ \t]//g"`
echo $VOCAB
echo $VOCAB_SIZE
# Uncomment if you don't have horovod installed.
# python classifier.py \
# --train_records=$TRAIN_FILE \
# --eval_records=$TEST_FILE \
# --label_file=$LABELS \
# --vocab_file=$VOCAB \
# --vocab_size=$VOCAB_SIZE \
# --num_oov_vocab_buckets=100 \
# --model_dir=$OUTPUT \
# --export_dir=$EXPORT_DIR \
# --embedding_dimension=10 \
# --num_ngram_buckets=100000 \
# --ngram_embedding_dimension=10 \
# --learning_rate=0.01 \
# --batch_size=32 \
# --train_steps=5000 \
# --eval_steps=100 \
# --num_epochs=1 \
# --num_threads=1 \
# --nouse_ngrams \
# --nolog_device_placement \
# --debug
mpirun -np 2 python classifier.py \
--train_records=$TRAIN_FILE \
--eval_records=$TEST_FILE \
--label_file=$LABELS \
--vocab_file=$VOCAB \
--vocab_size=$VOCAB_SIZE \
--num_oov_vocab_buckets=100 \
--model_dir=$OUTPUT \
--export_dir=$EXPORT_DIR \
--embedding_dimension=10 \
--num_ngram_buckets=100000 \
--ngram_embedding_dimension=10 \
--learning_rate=0.01 \
--batch_size=32 \
--train_steps=5000 \
--eval_steps=100 \
--num_epochs=1 \
--num_threads=1 \
--nouse_ngrams \
--nolog_device_placement \
--horovod \
--debug