forked from CUDeepLearningFall2019/final_sorting_hat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
wav_formater.py
158 lines (128 loc) · 4.13 KB
/
wav_formater.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import os
import pandas as pd
import numpy as np
# audio editing libs
# import librosa
# import librosa.display
from pydub import AudioSegment
import matplotlib.pyplot as plt
path = "data/Audio/"
aud = "1.wav"
sound = AudioSegment.from_file(path + aud)
#frame_file_path = "frames1/"
#np_frame_file_path = "npframes1/"
#frame_file_path = "frames1-half/"
#np_frame_file_path = "npframes1-half/"
frame_file_path = "frames1-forth/"
np_frame_file_path = "npframes1-forth/"
#num_cuts = 8135
#num_cuts = 8135 // 2
num_cuts = 8135 // 4
n_mels = 320
n_fft = 2048
hop_length = 100
try:
os.stat(frame_file_path)
except:
os.mkdir(frame_file_path)
try:
os.stat(np_frame_file_path)
except:
os.mkdir(np_frame_file_path)
# the number of cuts/frames it should match the video frames.
size_frame = len(sound) // num_cuts
step_size = len(sound) / num_cuts
sound_set = []
center = size_frame
center_true = step_size
for i in range(num_cuts):
start = center - size_frame
stop = center + size_frame
# sanity check
if start < 0:
start = 0
if stop > len(sound):
stop = len(sound)
sound_set.append(sound[start:stop])
center_true = center_true + step_size
center = int(center_true)
f_num = []
for i, frame in enumerate(sound_set):
f_num.append(i)
frame.export(frame_file_path + "{}.wav".format(i),format="wav")
for i in f_num:
wav = "{}.wav".format(i)
# here kaiser_fast is a technique used for faster extraction
audio, sample_rate = librosa.load(frame_file_path+wav, res_type='kaiser_fast')
#mel_spec = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels= n_mels)
#mel_db = (librosa.power_to_db(mel_spec, ref=np.max) + 40)/40
#break
## we extract mfcc feature from data
##mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
#feature = mfccs
#print(feature)
##print(label)
S = librosa.feature.melspectrogram(audio, sr=sample_rate, n_fft=n_fft,
##########################
# DATA PROCESSING TIME #
##########################
import numpy as np
import os
# load the image
img = np.load("data/Audio/npframes1/1.npy")
count = 0
for im in os.listdir("data/Audio/npframes1/"):
count += 1
count = count - 1
# convert to numpy array
img = img_to_array(img)
# save shape to pass to convolution
img_shape = list(img.shape)
img_shape.reverse()
img_shape.append(count)
img_shape.reverse()
img_shape
data = np.zeros(img_shape)
data.shape
for i, im in enumerate(list(range(count))):
img = np.load("data/Audio/npframes1/{}.npy".format(im))
if img.shape == (320,8):
#print(img.shape)
img = np.reshape(img, (320, 8,1))
data[i,:,:,:] = img
else:
print("skiped ", im)
np.save("data/Audio/audio1.npy", data)
data2 = np.load("data/Audio/audio1.npy")
data2[8128,:,:,:]
data[50,:,:,:]
# trash # zone # trash # zone # trash # zone # trash # zone # trash # zone # trash # zone
hop_length=hop_length,
n_mels=n_mels)
S_DB = librosa.power_to_db(S, ref=np.max)
S_nb = np.asmatrix(S_DB)
np.save(np_frame_file_path + "{}".format(i), S_nb)
print(mel_spec)
print(mel_db)
librosa.display.specshow(S_DB, sr=sample_rate, hop_length=hop_length,
x_axis='time', y_axis='mel');
plt.colorbar(format='%+2.0f dB');
plt.show()
sound_set[0].get_array_of_samples()
temp = train.apply(parser, axis=1)
temp.columns = ['feature', 'label']
def parser(row):
# function to load files and extract features
file_name = os.path.join(os.path.abspath(data_dir), 'Train', str(row.ID) + '.wav')
# handle exception to check if there isn't a file which is corrupted
try:
# here kaiser_fast is a technique used for faster extraction
X, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
# we extract mfcc feature from data
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
except Exception as e:
print("Error encountered while parsing file: ", file)
return None, None
feature = mfccs
label = row.Class
return [feature, label]