-
Notifications
You must be signed in to change notification settings - Fork 2
/
run_prep.sh
42 lines (38 loc) · 1.67 KB
/
run_prep.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
if [ $stage -le 21 ]; then
# Create a version of the lang/ directory that has one state per phone in the
# topo file. [note, it really has two states.. the first one is only repeated
# once, the second one has zero or more repeats.]
if [ -d data/lang_chain ]; then
if [ data/lang_chain/L.fst -nt data/lang_train/L.fst ]; then
echo "$0: data/lang_chain already exists, not overwriting it; continuing"
else
echo "$0: data/lang_chain already exists and seems to be older than data/lang_train..."
echo " ... not sure what to do. Exiting."
exit 1;
fi
else
cp -r data/lang_train data/lang_chain
silphonelist=$(cat data/lang_chain/phones/silence.csl) || exit 1;
nonsilphonelist=$(cat data/lang_chain/phones/nonsilence.csl) || exit 1;
# Use our special topology... note that later on may have to tune this
# topology.
steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >data/lang_chain/topo
fi
fi
if [ $stage -le 22 ]; then
local/chain/build_new_tree.sh exp/chain/tree
fi
if [ $stage -le 23 ]; then
srun --mem 24G --time 1-12:0:0 -c8 \
local/chain/make_shards.py 100 shards/train_960 \
--num-proc 8 \
--wavscp data/train_960/split100/JOB/wav.scp \
--text data/train_960/split100/JOB/text \
--aliark "gunzip -c exp/chain/tree/ali.JOB.gz | ali-to-pdf exp/chain/tree/final.mdl ark:- ark:- |"
srun --mem 6G --time 12:0:0 -c2 \
local/chain/make_shards.py 8 shards/dev_all \
--num-proc 2 \
--wavscp data/dev_all/split8/JOB/wav.scp \
--text data/dev_all/split8/JOB/text \
--aliark "gunzip -c exp/chain/tree/ali.valid.JOB.gz | ali-to-pdf exp/chain/tree/final.mdl ark:- ark:- |"
fi