forked from mravanelli/SincNet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
TIMIT_preparation.py
80 lines (55 loc) · 1.81 KB
/
TIMIT_preparation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python3
# TIMIT_preparation
# Mirco Ravanelli
# Mila - University of Montreal
# July 2018
# Description:
# This code prepares TIMIT for the following speaker identification experiments.
# It removes start and end silences according to the information reported in the *.wrd files and normalizes the amplitude of each sentence.
# How to run it:
# python TIMIT_preparation.py $TIMIT_FOLDER $OUTPUT_FOLDER data_lists/TIMIT_all.scp
# NOTE: This script expects filenames in lowercase (e.g, train/dr1/fcjf0/si1027.wav" rather than "TRAIN/DR1/FCJF0/SI1027.WAV)
import shutil
import os
import soundfile as sf
import numpy as np
import sys
def ReadList(list_file):
f=open(list_file,"r")
lines=f.readlines()
list_sig=[]
for x in lines:
list_sig.append(x.rstrip())
f.close()
return list_sig
def copy_folder(in_folder,out_folder):
if not(os.path.isdir(out_folder)):
shutil.copytree(in_folder, out_folder, ignore=ig_f)
def ig_f(dir, files):
return [f for f in files if os.path.isfile(os.path.join(dir, f))]
in_folder=sys.argv[1]
out_folder=sys.argv[2]
list_file=sys.argv[3]
# Read List file
list_sig=ReadList(list_file)
# Replicate input folder structure to output folder
copy_folder(in_folder,out_folder)
# Speech Data Reverberation Loop
for i in range(len(list_sig)):
# Open the wav file
wav_file=in_folder+'/'+list_sig[i]
[signal, fs] = sf.read(wav_file)
signal=signal.astype(np.float64)
# Signal normalization
signal=signal/np.abs(np.max(signal))
# Read wrd file
wrd_file=wav_file.replace(".wav",".wrd")
wrd_sig=ReadList(wrd_file)
beg_sig=int(wrd_sig[0].split(' ')[0])
end_sig=int(wrd_sig[-1].split(' ')[1])
# Remove silences
signal=signal[beg_sig:end_sig]
# Save normalized speech
file_out=out_folder+'/'+list_sig[i]
sf.write(file_out, signal, fs)
print("Done %s" % (file_out))