-
Notifications
You must be signed in to change notification settings - Fork 0
/
audio_data.py
165 lines (126 loc) · 5.37 KB
/
audio_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import os
import shutil
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
def convertPNGtoArray(img):
return np.ndarray.flatten(np.asarray(img))
def getFeatureSpectrogram(x, genre, number):
features = []
image_name = str(number)
# spectrogram
plt.figure(figsize=(6.4, 4.8))
cmap = plt.get_cmap('inferno')
plt.specgram(x, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB')
plt.axis('off');
if not os.path.exists("images/" + genre):
os.mkdir("images/" + genre)
plt.savefig("images/" + genre + "/" + image_name + ".png", bbox_inches='tight', pad_inches=0)
plt.close()
return features
def getFeaturesMean(x, sr):
# SpectralCentroid, SpectralRolloff, SpectralBandwidth, ZeroCrossingRate, ChromaFeatures, MFFC{20}, Tempo, beats
features = []
features.append(np.mean(get_spectral_centroid(x, sr=sr)[0]))
features.append(np.mean(get_spectral_rolloff(x, sr=sr)[0]))
features.append(np.mean(get_spectral_bandwidth(x, sr=sr)[0]))
features.append(np.mean(librosa.feature.chroma_stft(x, sr=sr)))
features.append(get_mean_zero_crossing(x))
mffcs = librosa.feature.mfcc(x, sr=sr)
for array in mffcs:
features.append(np.mean(array))
# tempo and beat extraction
x_harmonic, x_percussive = librosa.effects.hpss(x)
tempo, beats = librosa.beat.beat_track(y=x_percussive, sr=sr)
features.append(np.mean(tempo))
features.append(np.mean(beats))
return features
def get_mean_zero_crossing(x):
zero_crossing = librosa.zero_crossings(x, pad=False)
converted_array = [int(elem) for elem in zero_crossing] # convert T/F to 1/0
bumped_array = [elem * 10000 for elem in converted_array]
mean_zero_crossing = np.mean(bumped_array)
return mean_zero_crossing
def get_spectral_centroid(x, sr):
return librosa.feature.spectral_centroid(x, sr=sr)[0][:1290]
def get_spectral_rolloff(x, sr):
return librosa.feature.spectral_rolloff(x, sr=sr)[0][:1290]
def get_spectral_bandwidth(x, sr):
return librosa.feature.spectral_bandwidth(x, sr=sr)[0][:1290]
def get_zero_crossing_rate(x, sr):
zerocrossing_temp = librosa.zero_crossings(x, pad=False)[:660000]
# goal about 30k elements
converted_array = [int(elem) for elem in zerocrossing_temp] # convert T/F to 1/0
bumped_array = [elem * 10000 for elem in converted_array]
flatten_zerocrossing = [np.mean(bumped_array[x:x + 5]) for x in range(0, len(bumped_array), 20)]
return flatten_zerocrossing
def get_MFCCs(x, sr):
mffcs_temp = librosa.feature.mfcc(x, sr=sr)
flatten_mffcs = [mffc_array for sublist in mffcs_temp for mffc_array in sublist]
return flatten_mffcs[:25800]
def get_chroma_features(x, sr):
return librosa.feature.chroma_stft(x, sr=sr)[:1290]
# Features
def getFeatures(x, sr):
feature_dict = {
"spectralcentroid": get_spectral_centroid(x, sr),
"spectralrolloff": get_spectral_rolloff(x, sr),
"spectralbandwidth": get_spectral_bandwidth(x, sr),
"zerocrossingrate": get_zero_crossing_rate(x, sr),
"mfcc": get_MFCCs(x, sr),
"chroma": get_chroma_features(x, sr)
}
return feature_dict
def save_to_csv(data):
panda_df = pd.DataFrame(data)
panda_df.to_csv(path_or_buf=".\audio_data_means.csv")
# Creates Huge amount of data if saved to csv
def getAudioData():
# Adapt to path
rootdir = "genres"
return_array = []
for subdir, dirs, files in os.walk(rootdir):
for i in tqdm(range(files.__len__())):
genre = files[i].split(".")[0]
audio_file = os.path.join("genres", genre, files[i])
audio_data, sr = librosa.load(audio_file, sr=None, dtype=np.float64)
audio_tuple = (getFeatures(audio_data, sr), genre)
return_array.append(audio_tuple)
return return_array
def getAudioDataMeans():
# Adapt to path
rootdir = "genres"
result = []
for subdir, dirs, files in os.walk(rootdir):
for i in tqdm(range(files.__len__())):
genre = files[i].split(".")[0]
audio_file = os.path.join("genres", genre, files[i])
audio_data, sr = librosa.load(audio_file, sr=None, dtype=np.float64)
features = getFeaturesMean(audio_data, sr)
audio_tuple = (features, genre)
result.append(audio_tuple)
return result
def generateImages():
# WARNING THIS WILL DELETE THE IMAGES FOLDER AND RECREATE THE PLOTS
if (os.path.exists("images/")):
print("Deleting old images folder")
shutil.rmtree('images/', ignore_errors=True)
print("Recreating images folder")
os.mkdir("images")
else:
print("Creating images folder")
os.mkdir("images")
# Adapt to path
for root, dirs, files in os.walk("genres"):
for i in tqdm(range(files.__len__())):
genre = files[i].split(".")[0]
audio_file = os.path.join("genres", genre, files[i])
audio_data, sr = librosa.load(audio_file, sr=None, dtype=np.float64)
getFeatureSpectrogram(audio_data, genre, i)
return print("Done")
generateImages()
#result = getAudioDataMeans()
#save_to_csv(result)