UtrechtUniversity · jelletreep · Jul 1, 2024 · Jun 26, 2024 · Jun 26, 2024 · Jun 26, 2024
diff --git a/bioacoustics/classifier/data_preparation_dl.py b/bioacoustics/classifier/data_preparation_dl.py
@@ -1,4 +1,5 @@
 """Script to prepare train and test data to feed into a model"""
+
 import glob
 import os
 import numpy as np

diff --git a/bioacoustics/classifier/data_preparation_svm.py b/bioacoustics/classifier/data_preparation_svm.py
@@ -1,4 +1,5 @@
 """Data preparation for SVM classifier."""
+
 import pandas as pd
 import numpy as np
 import glob

diff --git a/bioacoustics/classifier/hyperparameter_optimization/hpo_dropout_weight.py b/bioacoustics/classifier/hyperparameter_optimization/hpo_dropout_weight.py
@@ -1,4 +1,5 @@
 """ Module that uses scikit-learn for grid search on the dropout rate """
+
 import tensorflow as tf
 from sklearn.model_selection import GridSearchCV
 from scikeras.wrappers import KerasClassifier
@@ -62,6 +63,7 @@ def create_model(init_mode, dropout_rate, weight_constraint):
 
     return model
 
+
 if __name__ == "__main__":
     parser = parse_arguments()
     args = parser.parse_args()
@@ -125,4 +127,3 @@ def create_model(init_mode, dropout_rate, weight_constraint):
     # save model
     estimator = grid_result.best_estimator_
     dump(estimator, args.output_dir + "best_estimator.joblib")
-
diff --git a/bioacoustics/classifier/hyperparameter_optimization/hpo_epoch_batch.py b/bioacoustics/classifier/hyperparameter_optimization/hpo_epoch_batch.py
@@ -1,4 +1,5 @@
 """ Module that uses scikit-learn for grid search on the dropout rate """
+
 import tensorflow as tf
 from sklearn.model_selection import GridSearchCV
 from scikeras.wrappers import KerasClassifier

diff --git a/bioacoustics/classifier/hyperparameter_optimization/hpo_learing_rate.py b/bioacoustics/classifier/hyperparameter_optimization/hpo_learing_rate.py
@@ -1,4 +1,5 @@
 """ Module that uses scikit-learn for grid search on the dropout rate """
+
 import tensorflow as tf
 from sklearn.model_selection import GridSearchCV
 from scikeras.wrappers import KerasClassifier

diff --git a/bioacoustics/classifier/model/acoustic_model.py b/bioacoustics/classifier/model/acoustic_model.py
@@ -1,4 +1,5 @@
 """Script of a base class for acoustic models"""
+
 import os
 from abc import ABC
 import pickle
@@ -7,7 +8,6 @@
 import matplotlib.pyplot as plt
 from tensorflow.keras.models import load_model
 from tensorflow import keras
-from tensorflow.keras.metrics import Recall
 
 
 class AcousticModel(ABC):
@@ -28,13 +28,13 @@ def _compile(self, learning_rate):
         learning_rate: float
             Learning rate for adam optimizer
         """
-        optimizer = keras.optimizers.Adam(learning_rate=learning_rate) #, decay=0.001
+        optimizer = keras.optimizers.Adam(learning_rate=learning_rate)  # , decay=0.001
 
         # Compile the model
         self.acoustic_model.compile(
-            loss="binary_crossentropy", #"categorical_crossentropy"
-            metrics=['accuracy'], #Recall()
-            optimizer=optimizer
+            loss="binary_crossentropy",  # "categorical_crossentropy"
+            metrics=["accuracy"],  # Recall()
+            optimizer=optimizer,
         )
 
         # Display model architecture summary
@@ -151,7 +151,6 @@ def apply_model(
         self._predict(x_test)
 
     def predict_model(self, x_test, file_path, dl_model):
-
         """Load a trained model and make a prediction
 
         Parameters
@@ -203,17 +202,17 @@ def plot_measures(self, history, file_path, title=""):
             Title of the graph
         """
         # summarize history for loss
-        plt.plot(history.history['loss'])
-        plt.plot(history.history['val_loss'])
-        plt.title('model loss')
-        plt.ylabel('loss')
-        plt.xlabel('epoch')
-        plt.legend(['train', 'val'], loc='upper left')
-        fp_loss = os.path.join(file_path, 'loss.png')
+        plt.plot(history.history["loss"])
+        plt.plot(history.history["val_loss"])
+        plt.title("model loss")
+        plt.ylabel("loss")
+        plt.xlabel("epoch")
+        plt.legend(["train", "val"], loc="upper left")
+        fp_loss = os.path.join(file_path, "loss.png")
         plt.savefig(fp_loss)
 
         # convert the history.history dict to a pandas DataFrame:
         hist_df = pd.DataFrame(history.history)
         hist_csv_file = os.path.join(file_path, "history.csv")
-        with open(hist_csv_file, mode='w') as f:
+        with open(hist_csv_file, mode="w") as f:
             hist_df.to_csv(f)
diff --git a/bioacoustics/classifier/model/cnn10_model.py b/bioacoustics/classifier/model/cnn10_model.py
@@ -1,4 +1,5 @@
 """A class for acoustic model with 10 nn blocks"""
+
 from acoustic_model import AcousticModel
 
 import tensorflow as tf
@@ -10,6 +11,7 @@
 from tensorflow.keras.layers import Conv2D, AveragePooling2D
 from tensorflow.keras.layers import GlobalAveragePooling2D
 from tensorflow.keras.callbacks import ModelCheckpoint
+from tensorflow.keras import regularizers
 from tensorflow.keras.constraints import MaxNorm
 
 
@@ -59,7 +61,6 @@ def _make_cnn_model(self, init_mode, dropout_rate, weight_constraint):
                 input_shape=input_shape,
                 data_format=data_format,
                 padding="same",
-
                 kernel_regularizer=regularizers.l2(l=0.01),
                 kernel_initializer=init_mode,
                 kernel_constraint=MaxNorm(weight_constraint),

diff --git a/bioacoustics/feature_extraction/README.md b/bioacoustics/feature_extraction/README.md
@@ -1,13 +1,13 @@
 # Feature extraction
-
-The modules in this directory are used to extract acoustic and/or deep learning features from '.wav' files. The features are used as input for the classifier ([step 3](../3_classifier)).
+The modules in this directory are used to extract acoustic and/or deep learning features from '.wav' files. The features are used as input for the  [classifiers](../classifier), i.e. svm and cnn.
 
 ## Instructions
 
-[Installation instructions](https://github.com/UtrechtUniversity/animal-sounds#getting-started)
+[Installation instructions](https://github.com/UtrechtUniversity/animal-sounds/tree/documenation_svm#getting-started)
+
+## Feature extraction for Support Vector Machines
+We extract several feature sets from using:
 
-### Feature extraction for Support Vector Machines
-We extract several feature sets using:
 - a [python version](https://github.com/mystlee/rasta_py) of the [rasta-mat](https://www.ee.columbia.edu/~dpwe/resources/matlab/rastamat/) library.
 - an [Automatic Analysis Architecture](https://doi.org/10.5281/zenodo.1216028)
 
@@ -19,8 +19,8 @@ We extend the feature set with the features from an [Automatic Analysis Architec
 
 The script results in a feature set of 1140 features per audio frame.
 
-#### Running the script
-Use shell script `run.sh` to start `main.py` from the command line. The following arguments should be specified:
+### Running the script
+Use shell script `run_svm.sh` to start `extract_features_svm.py` from the command line. The following arguments should be specified:
 - `--input_dir`; directory where the '.wav' files are located.
 - `--output_dir`; directory where the feature files ('.csv') should be stored.
 - `--frame_length`; subdivide '.wav' files in frames of this length (in number of samples, if the sample rate is 48000 samples per second, choose e.g. 24000 for 0.5 second frames)
@@ -29,8 +29,35 @@ Use shell script `run.sh` to start `main.py` from the command line. The followin
 
 In `./config` the user can specify which features to extract.
 
-## sndfile library
+### sndfile library
 If you get an error saying something about a 'snd_file' dependency on an ubuntu machine, this can be fixed by installing the following C library:
 ```
 sudo apt-get install libsndfile-dev
 ```
+## Feature extraction for Convolutional Neural Network (CNN)
+To extract audio features for CNN classifier, .wav files are converted to Log-mel Spectrograms using [librosa](https://zenodo.org/badge/latestdoi/6309729). 
+Log-Melspectrograms had the best results in [[1]](#ref). As a future work we can try others such as Log-Spectrograms, and Gammatone-Spectrograms.
+
+In this study, first we apply a [Butterworth (bandpass) filter](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.butter.html) to filter frequencies between 100 and 2000 hz for further processing. Then the short time Fourier transform (STFT) is applied to create spectrograms. 
+Then we convert the spectrograms to MFCC (Mel-Frequency Cepstral coefficient) representation, which is often done for speech processing (Find more info [here](https://speechprocessingbook.aalto.fi/Representations/Melcepstrum.html)).
+
+| <img src="../../img/melspectrogram.png" width="400" /> | 
+
+### Running the script
+Open a command line and run the following command:
+```
+sh run_dl.sh
+```
+
+This command applies `extract_features_dl.py` on the whole dataset. The following arguments should be specified:
+- `--input_dir`; directory where the '.wav' files are located.
+- `--output_dir`; directory where the feature files ('.pkl') should be stored.
+- `--label`; the label of the wav file, i.e. chimpanze or background
+- `--window_length`; subdivide '.wav' files in frames of this length (in number of samples, in our case, the sample rate is 48000 samples per second, we chose 750 for 15-millisecond frames)
+- `--hop_length`; overlap between frames in number of samples per hop (in our case, the sample rate is 48000 samples per second, we chose 376)
+- `--n_mel`; number of mel features, i.e. horizontal bars in spectrogram, which in our case it is 64.
+- `--new_img_size`; the number of rows and columns of the log-melspectrograms which is ingested as an image to cnn. In our case it is 64 * 64.
+
+## <a name="ref"></a>References
+1. K. Palanisamy,D. Singhania†, and A. Yao,"Rethinking CNN Models for Audio Classification",2020 
+   [arXiv preprint](https://arxiv.org/abs/2007.11154), [github](https://github.com/kamalesh0406/Audio-Classification)
diff --git a/bioacoustics/feature_extraction/acoustic_features/features.py b/bioacoustics/feature_extraction/acoustic_features/features.py
@@ -178,9 +178,9 @@ def _readFeaturesFunctions(self):
             featuresRefUnique[i] = str(i_feature)
         # -----> Then extend to all domains
         for i, domain in enumerate(self.domains):
-            self.featuresFunctions[
-                i * self.n_features : (i + 1) * self.n_features
-            ] = featuresFunctionsUnique
+            self.featuresFunctions[i * self.n_features : (i + 1) * self.n_features] = (
+                featuresFunctionsUnique
+            )
             self.featuresOptArguments[
                 i * self.n_features : (i + 1) * self.n_features
             ] = featuresOptArgumentsUnique
@@ -240,15 +240,15 @@ def _computation(self, signals, fs):
                     new_dictionary.update(
                         self.featuresOptArguments[i * self.n_features + j]
                     )
-                    self.featuresValues[
-                        i * self.n_features + j
-                    ] = self.featuresFunctions[i * self.n_features + j](
-                        signals[i], new_dictionary
+                    self.featuresValues[i * self.n_features + j] = (
+                        self.featuresFunctions[i * self.n_features + j](
+                            signals[i], new_dictionary
+                        )
                     )
                 # Otherwise directly compute feature value.
                 else:
-                    self.featuresValues[
-                        i * self.n_features + j
-                    ] = self.featuresFunctions[i * self.n_features + j](
-                        signals[i], self.intermValues
+                    self.featuresValues[i * self.n_features + j] = (
+                        self.featuresFunctions[i * self.n_features + j](
+                            signals[i], self.intermValues
+                        )
                     )
diff --git a/bioacoustics/feature_extraction/acoustic_features/featuresFunctions.py b/bioacoustics/feature_extraction/acoustic_features/featuresFunctions.py
@@ -241,9 +241,7 @@ def energy_kurtosis(signal, arg_dict):
         E_kur = 0
     else:
         E_kur = (
-            (1 / len(signal) / 2)
-            * np.sum((E_u / len(signal) - E_bar) ** 4)
-            / E_bar**4
+            (1 / len(signal) / 2) * np.sum((E_u / len(signal) - E_bar) ** 4) / E_bar**4
         )
     if np.isfinite(E_kur):
         return E_kur

diff --git a/bioacoustics/feature_extraction/run_dl.sh b/bioacoustics/feature_extraction/run_dl.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+DATADIR='/Volumes/science.data.uu.nl/research-zwerts/data/sanaga_test_chunks/'
+RECORDERS='A1 A3 A4 A5 A21 A22 A26 A38'
+
+OUTPUTDIR='../../output/features/'
+echo $DATADIR
+for RECORDER in $RECORDERS
+do
+  echo $DATADIR
+  echo $OUTPUTDIR
+  python3 extract_features_dl.py --input_dir $DATADIR'chimps/'$RECORDER'/*/*.wav' --output_dir $OUTPUTDIR$RECORDER'/'$RECORDER'_chimpanze.pkl' --label 'chimpanze' --window_length 750  --hop_length 376 --n_mel 64  --new_img_size 64 64
+  python3 extract_features_dl.py --input_dir $DATADIR'background/'$RECORDER'/*/*.wav' --output_dir $OUTPUTDIR$RECORDER'/'$RECORDER'_background.pkl' --label 'background' --window_length 750  --hop_length 376 --n_mel 64  --new_img_size 64 64
+done
+
+
diff --git a/bioacoustics/feature_extraction/run.sh → bioacoustics/feature_extraction/run_svm.sh b/bioacoustics/feature_extraction/run.sh → bioacoustics/feature_extraction/run_svm.sh
diff --git a/bioacoustics/wav_processing/chunk_wav/make_chunks.py b/bioacoustics/wav_processing/chunk_wav/make_chunks.py
@@ -1,4 +1,5 @@
 """Script to make .wav files of the same length."""
+
 import os
 import glob
 import argparse

diff --git a/bioacoustics/wav_processing/condensation/extractor.py b/bioacoustics/wav_processing/condensation/extractor.py
@@ -83,7 +83,7 @@ def detect_vocalizations(
 
         # get all indexes of dbs rows of every band that we're
         # interested in
-        for (low, high) in freqs:
+        for low, high in freqs:
             idx_low = (np.abs(f - low)).argmin() - 1
             idx_low = 0 if idx_low < 0 else idx_low
             idx_high = (np.abs(f - high)).argmin() + 1