-
Notifications
You must be signed in to change notification settings - Fork 3
/
silero-VAD.py
87 lines (71 loc) · 3.26 KB
/
silero-VAD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
SAMPLING_RATE = 16000
import torch
torch.set_num_threads(1)
from IPython.display import Audio
from pprint import pprint
import glob
import os
import librosa
import argparse
import csv
model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
model='silero_vad',
force_reload=True)
(get_speech_timestamps, save_audio, read_audio, VADIterator, collect_chunks) = utils
parser = argparse.ArgumentParser()
parser.add_argument('--path_folder_file_wav', type=str)
parser.add_argument('--save_dir', type=str)
parser.add_argument('--path_file_csv', type=str)
args = parser.parse_args()
def vad():
with open(args.path_file_csv, 'w', encoding = 'UTF8', newline='') as f:
writer = csv.writer(f)
writer.writerow('path', 'timestamps')
for name in glob.glob(args.path_folder_file_wav + '/*.wav'):
j = 0
h = 0
wav = read_audio(name, sampling_rate = SAMPLING_RATE)
speech_timestamps = get_speech_timestamps(wav, model, threshold=0.5, sampling_rate=SAMPLING_RATE)
sum = 0
k = 0
speech_timestamps_mini = []
mini_audio = []
while(True):
sum = sum + speech_timestamps[k]['end'] - speech_timestamps[k]['start']
speech_timestamps_mini.append(speech_timestamps[k])
if k < len(speech_timestamps)-1:
k = k + 1
if sum >= 48000:
mini_audio.append(collect_chunks(speech_timestamps_mini, wav))
timestamps = ', '.join(str(item) for item in speech_timestamps_mini)
writer.writerow(args.save_dir + '/' + os.path.splitext(os.path.basename(name))[0] + '/' + str(h), timestamps)
speech_timestamps_mini.clear()
sum = 0
h = h + 1
continue
else:
continue
else:
mini_audio.append(collect_chunks(speech_timestamps_mini, wav))
timestamps = ', '.join(str(item) for item in speech_timestamps_mini)
print(timestamps)
writer.writerow(args.save_dir + '/' + os.path.splitext(os.path.basename(name))[0] + '/' + str(h), timestamps)
speech_timestamps_mini.clear()
sum = 0
break
if not os.path.exists(args.save_dir + '/' + os.path.splitext(os.path.basename(name))[0]):
os.mkdir(args.save_dir + '/' + os.path.splitext(os.path.basename(name))[0])
for i in mini_audio:
save_audio(args.save_dir + '/' + os.path.splitext(os.path.basename(name))[0] + '/' + str(j) + '.wav', i, sampling_rate = SAMPLING_RATE)
j = j + 1
def remove_and_rename():
for i in glob.glob(args.save_dir + '/*' ):
k = 0
for j in glob.glob(i + '/*'):
if librosa.get_duration(filename = j) < 3 or librosa.get_duration(filename = j) > 15:
os.remove(j)
else:
os.rename(j, i + '/audio_' + str(k) + '.wav')
k= k+1
vad()
remove_and_rename()