-
Notifications
You must be signed in to change notification settings - Fork 667
/
Copy pathlibrispeech_test.py
119 lines (100 loc) · 3.73 KB
/
librispeech_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import os
from pathlib import Path
from torchaudio.datasets import librispeech
from torchaudio_unittest.common_utils import (
TempDirMixin,
TorchaudioTestCase,
get_whitenoise,
save_wav,
normalize_wav,
)
# Used to generate a unique utterance for each dummy audio file
NUMBERS = [
'ZERO',
'ONE',
'TWO',
'THREE',
'FOUR',
'FIVE',
'SIX',
'SEVEN',
'EIGHT',
'NINE'
]
class TestLibriSpeech(TempDirMixin, TorchaudioTestCase):
backend = 'default'
root_dir = None
samples = []
@classmethod
def setUpClass(cls):
cls.root_dir = cls.get_base_temp_dir()
dataset_dir = os.path.join(
cls.root_dir, librispeech.FOLDER_IN_ARCHIVE, librispeech.URL
)
os.makedirs(dataset_dir, exist_ok=True)
sample_rate = 16000 # 16kHz
seed = 0
for speaker_id in range(5):
speaker_path = os.path.join(dataset_dir, str(speaker_id))
os.makedirs(speaker_path, exist_ok=True)
for chapter_id in range(3):
chapter_path = os.path.join(speaker_path, str(chapter_id))
os.makedirs(chapter_path, exist_ok=True)
trans_content = []
for utterance_id in range(10):
filename = f'{speaker_id}-{chapter_id}-{utterance_id:04d}.wav'
path = os.path.join(chapter_path, filename)
utterance = ' '.join(
[NUMBERS[x] for x in [speaker_id, chapter_id, utterance_id]]
)
trans_content.append(
f'{speaker_id}-{chapter_id}-{utterance_id:04d} {utterance}'
)
data = get_whitenoise(
sample_rate=sample_rate,
duration=0.01,
n_channels=1,
dtype='float32',
seed=seed
)
save_wav(path, data, sample_rate)
sample = (
normalize_wav(data),
sample_rate,
utterance,
speaker_id,
chapter_id,
utterance_id
)
cls.samples.append(sample)
seed += 1
trans_filename = f'{speaker_id}-{chapter_id}.trans.txt'
trans_path = os.path.join(chapter_path, trans_filename)
with open(trans_path, 'w') as f:
f.write('\n'.join(trans_content))
@classmethod
def tearDownClass(cls):
# In case of test failure
librispeech.LIBRISPEECH._ext_audio = '.flac'
def _test_librispeech(self, dataset):
num_samples = 0
for i, (
data, sample_rate, utterance, speaker_id, chapter_id, utterance_id
) in enumerate(dataset):
self.assertEqual(data, self.samples[i][0], atol=5e-5, rtol=1e-8)
assert sample_rate == self.samples[i][1]
assert utterance == self.samples[i][2]
assert speaker_id == self.samples[i][3]
assert chapter_id == self.samples[i][4]
assert utterance_id == self.samples[i][5]
num_samples += 1
assert num_samples == len(self.samples)
librispeech.LIBRISPEECH._ext_audio = '.flac'
def test_librispeech_str(self):
librispeech.LIBRISPEECH._ext_audio = '.wav'
dataset = librispeech.LIBRISPEECH(self.root_dir)
self._test_librispeech(dataset)
def test_librispeech_path(self):
librispeech.LIBRISPEECH._ext_audio = '.wav'
dataset = librispeech.LIBRISPEECH(Path(self.root_dir))
self._test_librispeech(dataset)