Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DL features to documents #49

Merged
merged 8 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bioacoustics/classifier/data_preparation_dl.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Script to prepare train and test data to feed into a model"""

import glob
import os
import numpy as np
Expand Down
1 change: 1 addition & 0 deletions bioacoustics/classifier/data_preparation_svm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Data preparation for SVM classifier."""

import pandas as pd
import numpy as np
import glob
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" Module that uses scikit-learn for grid search on the dropout rate """

import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasClassifier
Expand Down Expand Up @@ -62,6 +63,7 @@ def create_model(init_mode, dropout_rate, weight_constraint):

return model


if __name__ == "__main__":
parser = parse_arguments()
args = parser.parse_args()
Expand Down Expand Up @@ -125,4 +127,3 @@ def create_model(init_mode, dropout_rate, weight_constraint):
# save model
estimator = grid_result.best_estimator_
dump(estimator, args.output_dir + "best_estimator.joblib")

Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" Module that uses scikit-learn for grid search on the dropout rate """

import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasClassifier
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" Module that uses scikit-learn for grid search on the dropout rate """

import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasClassifier
Expand Down
27 changes: 13 additions & 14 deletions bioacoustics/classifier/model/acoustic_model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Script of a base class for acoustic models"""

import os
from abc import ABC
import pickle
Expand All @@ -7,7 +8,6 @@
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow import keras
from tensorflow.keras.metrics import Recall


class AcousticModel(ABC):
Expand All @@ -28,13 +28,13 @@ def _compile(self, learning_rate):
learning_rate: float
Learning rate for adam optimizer
"""
optimizer = keras.optimizers.Adam(learning_rate=learning_rate) #, decay=0.001
optimizer = keras.optimizers.Adam(learning_rate=learning_rate) # , decay=0.001

# Compile the model
self.acoustic_model.compile(
loss="binary_crossentropy", #"categorical_crossentropy"
metrics=['accuracy'], #Recall()
optimizer=optimizer
loss="binary_crossentropy", # "categorical_crossentropy"
metrics=["accuracy"], # Recall()
optimizer=optimizer,
)

# Display model architecture summary
Expand Down Expand Up @@ -151,7 +151,6 @@ def apply_model(
self._predict(x_test)

def predict_model(self, x_test, file_path, dl_model):

"""Load a trained model and make a prediction

Parameters
Expand Down Expand Up @@ -203,17 +202,17 @@ def plot_measures(self, history, file_path, title=""):
Title of the graph
"""
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
fp_loss = os.path.join(file_path, 'loss.png')
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.title("model loss")
plt.ylabel("loss")
plt.xlabel("epoch")
plt.legend(["train", "val"], loc="upper left")
fp_loss = os.path.join(file_path, "loss.png")
plt.savefig(fp_loss)

# convert the history.history dict to a pandas DataFrame:
hist_df = pd.DataFrame(history.history)
hist_csv_file = os.path.join(file_path, "history.csv")
with open(hist_csv_file, mode='w') as f:
with open(hist_csv_file, mode="w") as f:
hist_df.to_csv(f)
3 changes: 2 additions & 1 deletion bioacoustics/classifier/model/cnn10_model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""A class for acoustic model with 10 nn blocks"""

from acoustic_model import AcousticModel

import tensorflow as tf
Expand All @@ -10,6 +11,7 @@
from tensorflow.keras.layers import Conv2D, AveragePooling2D
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import regularizers
from tensorflow.keras.constraints import MaxNorm


Expand Down Expand Up @@ -59,7 +61,6 @@ def _make_cnn_model(self, init_mode, dropout_rate, weight_constraint):
input_shape=input_shape,
data_format=data_format,
padding="same",

kernel_regularizer=regularizers.l2(l=0.01),
kernel_initializer=init_mode,
kernel_constraint=MaxNorm(weight_constraint),
Expand Down
43 changes: 35 additions & 8 deletions bioacoustics/feature_extraction/README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# Feature extraction

The modules in this directory are used to extract acoustic and/or deep learning features from '.wav' files. The features are used as input for the classifier ([step 3](../3_classifier)).
The modules in this directory are used to extract acoustic and/or deep learning features from '.wav' files. The features are used as input for the [classifiers](../classifier), i.e. svm and cnn.

## Instructions

[Installation instructions](https://github.com/UtrechtUniversity/animal-sounds#getting-started)
[Installation instructions](https://github.com/UtrechtUniversity/animal-sounds/tree/documenation_svm#getting-started)

## Feature extraction for Support Vector Machines
We extract several feature sets from using:

### Feature extraction for Support Vector Machines
We extract several feature sets using:
- a [python version](https://github.com/mystlee/rasta_py) of the [rasta-mat](https://www.ee.columbia.edu/~dpwe/resources/matlab/rastamat/) library.
- an [Automatic Analysis Architecture](https://doi.org/10.5281/zenodo.1216028)

Expand All @@ -19,8 +19,8 @@ We extend the feature set with the features from an [Automatic Analysis Architec

The script results in a feature set of 1140 features per audio frame.

#### Running the script
Use shell script `run.sh` to start `main.py` from the command line. The following arguments should be specified:
### Running the script
Use shell script `run_svm.sh` to start `extract_features_svm.py` from the command line. The following arguments should be specified:
- `--input_dir`; directory where the '.wav' files are located.
- `--output_dir`; directory where the feature files ('.csv') should be stored.
- `--frame_length`; subdivide '.wav' files in frames of this length (in number of samples, if the sample rate is 48000 samples per second, choose e.g. 24000 for 0.5 second frames)
Expand All @@ -29,8 +29,35 @@ Use shell script `run.sh` to start `main.py` from the command line. The followin

In `./config` the user can specify which features to extract.

## sndfile library
### sndfile library
If you get an error saying something about a 'snd_file' dependency on an ubuntu machine, this can be fixed by installing the following C library:
```
sudo apt-get install libsndfile-dev
```
## Feature extraction for Convolutional Neural Network (CNN)
To extract audio features for CNN classifier, .wav files are converted to Log-mel Spectrograms using [librosa](https://zenodo.org/badge/latestdoi/6309729).
Log-Melspectrograms had the best results in [[1]](#ref). As a future work we can try others such as Log-Spectrograms, and Gammatone-Spectrograms.

In this study, first we apply a [Butterworth (bandpass) filter](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.butter.html) to filter frequencies between 100 and 2000 hz for further processing. Then the short time Fourier transform (STFT) is applied to create spectrograms.
Then we convert the spectrograms to MFCC (Mel-Frequency Cepstral coefficient) representation, which is often done for speech processing (Find more info [here](https://speechprocessingbook.aalto.fi/Representations/Melcepstrum.html)).

| <img src="../../img/melspectrogram.png" width="400" /> |

### Running the script
Open a command line and run the following command:
```
sh run_dl.sh
```

This command applies `extract_features_dl.py` on the whole dataset. The following arguments should be specified:
- `--input_dir`; directory where the '.wav' files are located.
- `--output_dir`; directory where the feature files ('.pkl') should be stored.
- `--label`; the label of the wav file, i.e. chimpanze or background
- `--window_length`; subdivide '.wav' files in frames of this length (in number of samples, in our case, the sample rate is 48000 samples per second, we chose 750 for 15-millisecond frames)
- `--hop_length`; overlap between frames in number of samples per hop (in our case, the sample rate is 48000 samples per second, we chose 376)
- `--n_mel`; number of mel features, i.e. horizontal bars in spectrogram, which in our case it is 64.
- `--new_img_size`; the number of rows and columns of the log-melspectrograms which is ingested as an image to cnn. In our case it is 64 * 64.

## <a name="ref"></a>References
1. K. Palanisamy,D. Singhania†, and A. Yao,"Rethinking CNN Models for Audio Classification",2020
[arXiv preprint](https://arxiv.org/abs/2007.11154), [github](https://github.com/kamalesh0406/Audio-Classification)
22 changes: 11 additions & 11 deletions bioacoustics/feature_extraction/acoustic_features/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,9 @@ def _readFeaturesFunctions(self):
featuresRefUnique[i] = str(i_feature)
# -----> Then extend to all domains
for i, domain in enumerate(self.domains):
self.featuresFunctions[
i * self.n_features : (i + 1) * self.n_features
] = featuresFunctionsUnique
self.featuresFunctions[i * self.n_features : (i + 1) * self.n_features] = (
featuresFunctionsUnique
)
self.featuresOptArguments[
i * self.n_features : (i + 1) * self.n_features
] = featuresOptArgumentsUnique
Expand Down Expand Up @@ -240,15 +240,15 @@ def _computation(self, signals, fs):
new_dictionary.update(
self.featuresOptArguments[i * self.n_features + j]
)
self.featuresValues[
i * self.n_features + j
] = self.featuresFunctions[i * self.n_features + j](
signals[i], new_dictionary
self.featuresValues[i * self.n_features + j] = (
self.featuresFunctions[i * self.n_features + j](
signals[i], new_dictionary
)
)
# Otherwise directly compute feature value.
else:
self.featuresValues[
i * self.n_features + j
] = self.featuresFunctions[i * self.n_features + j](
signals[i], self.intermValues
self.featuresValues[i * self.n_features + j] = (
self.featuresFunctions[i * self.n_features + j](
signals[i], self.intermValues
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,7 @@ def energy_kurtosis(signal, arg_dict):
E_kur = 0
else:
E_kur = (
(1 / len(signal) / 2)
* np.sum((E_u / len(signal) - E_bar) ** 4)
/ E_bar**4
(1 / len(signal) / 2) * np.sum((E_u / len(signal) - E_bar) ** 4) / E_bar**4
)
if np.isfinite(E_kur):
return E_kur
Expand Down
16 changes: 16 additions & 0 deletions bioacoustics/feature_extraction/run_dl.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

DATADIR='/Volumes/science.data.uu.nl/research-zwerts/data/sanaga_test_chunks/'
RECORDERS='A1 A3 A4 A5 A21 A22 A26 A38'

OUTPUTDIR='../../output/features/'
echo $DATADIR
for RECORDER in $RECORDERS
do
echo $DATADIR
echo $OUTPUTDIR
python3 extract_features_dl.py --input_dir $DATADIR'chimps/'$RECORDER'/*/*.wav' --output_dir $OUTPUTDIR$RECORDER'/'$RECORDER'_chimpanze.pkl' --label 'chimpanze' --window_length 750 --hop_length 376 --n_mel 64 --new_img_size 64 64
python3 extract_features_dl.py --input_dir $DATADIR'background/'$RECORDER'/*/*.wav' --output_dir $OUTPUTDIR$RECORDER'/'$RECORDER'_background.pkl' --label 'background' --window_length 750 --hop_length 376 --n_mel 64 --new_img_size 64 64
done


1 change: 1 addition & 0 deletions bioacoustics/wav_processing/chunk_wav/make_chunks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Script to make .wav files of the same length."""

import os
import glob
import argparse
Expand Down
2 changes: 1 addition & 1 deletion bioacoustics/wav_processing/condensation/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def detect_vocalizations(

# get all indexes of dbs rows of every band that we're
# interested in
for (low, high) in freqs:
for low, high in freqs:
idx_low = (np.abs(f - low)).argmin() - 1
idx_low = 0 if idx_low < 0 else idx_low
idx_high = (np.abs(f - high)).argmin() + 1
Expand Down
Loading