diff --git a/src/algorithms/extractor/tonalextractor.cpp b/src/algorithms/extractor/tonalextractor.cpp index e15becb49..ebb9f74cf 100644 --- a/src/algorithms/extractor/tonalextractor.cpp +++ b/src/algorithms/extractor/tonalextractor.cpp @@ -128,7 +128,7 @@ void TonalExtractor::configure() { _hpcpChord->configure("referenceFrequency", tuningFrequency, "minFrequency", 40.0, "nonLinear", true, - "splitFrequency", 500.0, + "bandSplitFrequency", 500.0, "maxFrequency", 5000.0, "bandPreset", true, "windowSize", 0.5, @@ -139,7 +139,7 @@ void TonalExtractor::configure() { _hpcpTuning->configure("referenceFrequency", tuningFrequency, "minFrequency", 40.0, "nonLinear", true, - "splitFrequency", 500.0, + "bandSplitFrequency", 500.0, "maxFrequency", 5000.0, "bandPreset", true, "windowSize", 0.5, diff --git a/src/algorithms/spectral/hpcp.cpp b/src/algorithms/spectral/hpcp.cpp index 04ac1d9e1..b254804cd 100644 --- a/src/algorithms/spectral/hpcp.cpp +++ b/src/algorithms/spectral/hpcp.cpp @@ -28,7 +28,7 @@ const char* HPCP::name = "HPCP"; const char* HPCP::category = "Tonal"; const char* HPCP::description = DOC("Computes a Harmonic Pitch Class Profile (HPCP) from the spectral peaks of a signal. HPCP is a k*12 dimensional vector which represents the intensities of the twelve (k==1) semitone pitch classes (corresponsing to notes from A to G#), or subdivisions of these (k>1).\n" "\n" -"Exceptions are thrown if \"minFrequency\", \"splitFrequency\" and \"maxFrequency\" are not separated by at least 200Hz from each other, requiring that \"maxFrequency\" be greater than \"splitFrequency\" and \"splitFrequency\" be greater than \"minFrequency\". Other exceptions are thrown if input vectors have different size, if parameter \"size\" is not a positive non-zero multiple of 12 or if \"windowSize\" is less than one hpcp bin (12/size).\n" +"Exceptions are thrown if \"minFrequency\", \"bandSplitFrequency\" and \"maxFrequency\" are not separated by at least 200Hz from each other, requiring that \"maxFrequency\" be greater than \"bandSplitFrequency\" and \"bandSplitFrequency\" be greater than \"minFrequency\". Other exceptions are thrown if input vectors have different size, if parameter \"size\" is not a positive non-zero multiple of 12 or if \"windowSize\" is less than one hpcp bin (12/size).\n" "\n" "References:\n" " [1] T. Fujishima, \"Realtime Chord Recognition of Musical Sound: A System\n" @@ -66,7 +66,7 @@ void HPCP::configure() { throw EssentiaException("HPCP: Minimum and maximum frequencies are too close"); } - _splitFrequency = parameter("splitFrequency").toReal(); + _splitFrequency = parameter("bandSplitFrequency").toReal(); _bandPreset = parameter("bandPreset").toBool(); if (_bandPreset) { @@ -86,10 +86,15 @@ void HPCP::configure() { _nonLinear = parameter("nonLinear").toBool(); _maxShifted = parameter("maxShifted").toBool(); - _normalized = parameter("normalized").toBool(); + + string normalized = toLower(parameter("normalized").toString()); + if (normalized == "none") _normalized = N_NONE; + if (normalized == "unitsum") _normalized = N_UNIT_SUM; + if (normalized == "unitmax") _normalized = N_UNIT_MAX; - if (_nonLinear && !_normalized) { - throw EssentiaException("HPCP: Cannot apply non-linear filter when HPCP vector is not normalized"); + + if (_nonLinear && _normalized != N_UNIT_MAX) { + throw EssentiaException("HPCP: Cannot apply non-linear filter when HPCP vector is not normalized to unit max."); } initHarmonicContributionTable(); @@ -254,24 +259,34 @@ void HPCP::compute() { } // Normalize the HPCP vector - if (_normalized) { - if (_bandPreset) { + + if (_bandPreset) { + if (_normalized == N_UNIT_MAX) { normalize(hpcp_LO); normalize(hpcp_HI); - for (int i=0; i<(int)hpcp.size(); i++) { - hpcp[i] = hpcp_LO[i] + hpcp_HI[i]; - } } - normalize(hpcp); - } else { - if (_bandPreset) { - for (int i=0; i<(int)hpcp.size(); i++) { - hpcp[i] = hpcp_LO[i] + hpcp_HI[i]; - } + else if (_normalized == N_UNIT_SUM) { + // TODO does it makes sense to apply band preset together with unit sum normalization? + E_WARNING("HPCP: applying band preset together with unit sum normalization was not tested."); + normalizeSum(hpcp_LO); + normalizeSum(hpcp_HI); } + + for (int i=0; i<(int)hpcp.size(); i++) { + hpcp[i] = hpcp_LO[i] + hpcp_HI[i]; + } + } + + if (_normalized == N_UNIT_MAX) { + normalize(hpcp); + } + else if (_normalized == N_UNIT_SUM) { + normalizeSum(hpcp); } // Perform the Jordi non-linear post-processing step + // This makes small values (below 0.6) even smaller + // while boosting further values close to 1. if (_nonLinear) { for (int i=0; i<(int)hpcp.size(); i++) { hpcp[i] = sin(hpcp[i] * M_PI * 0.5); diff --git a/src/algorithms/spectral/hpcp.h b/src/algorithms/spectral/hpcp.h index aab870c23..92b4c3236 100644 --- a/src/algorithms/spectral/hpcp.h +++ b/src/algorithms/spectral/hpcp.h @@ -52,15 +52,15 @@ class HPCP : public Algorithm { declareParameter("referenceFrequency", "the reference frequency for semitone index calculation, corresponding to A3 [Hz]", "(0,inf)", 440.0); declareParameter("harmonics", "number of harmonics for frequency contribution, 0 indicates exclusive fundamental frequency contribution", "[0,inf)", 0); // 8 for chord estimation declareParameter("bandPreset", "enables whether to use a band preset", "{true,false}", true); + declareParameter("bandSplitFrequency", "the split frequency for low and high bands, not used if bandPreset is false [Hz]", "(0,inf)", 500.0); declareParameter("minFrequency", "the minimum frequency that contributes to the HPCP [Hz] (the difference between the min and split frequencies must not be less than 200.0 Hz)", "(0,inf)", 40.0); declareParameter("maxFrequency", "the maximum frequency that contributes to the HPCP [Hz] (the difference between the max and split frequencies must not be less than 200.0 Hz)", "(0,inf)", 5000.0); - declareParameter("splitFrequency", "the split frequency for low and high bands, not used if bandPreset is false [Hz]", "(0,inf)", 500.0); declareParameter("weightType", "type of weighting function for determining frequency contribution", "{none,cosine,squaredCosine}", "squaredCosine"); - declareParameter("nonLinear", "enables whether to apply a Jordi non-linear post-processing function to the output", "{true,false}", false); + declareParameter("nonLinear", "apply non-linear post-processing to the output (use with normalized='unitMax'). Boosts values close to 1, decreases values close to 0.", "{true,false}", false); declareParameter("windowSize", "the size, in semitones, of the window used for the weighting", "(0,12]", 1.0); declareParameter("sampleRate", "the sampling rate of the audio signal [Hz]", "(0,inf)", 44100.); declareParameter("maxShifted", "whether to shift the HPCP vector so that the maximum peak is at index 0", "{true,false}", false); - declareParameter("normalized", "whether to normalize the HPCP vector", "{true,false}", true); + declareParameter("normalized", "whether to normalize the HPCP vector", "{none,unitSum,unitMax}", "unitMax"); } void configure(); @@ -90,9 +90,14 @@ class HPCP : public Algorithm { NONE, COSINE, SQUARED_COSINE }; WeightType _weightType; + + enum NormalizeType { + N_NONE, N_UNIT_MAX, N_UNIT_SUM + }; + NormalizeType _normalized; + bool _nonLinear; bool _maxShifted; - bool _normalized; std::vector _harmonicPeaks; }; diff --git a/src/examples/extractor_music/MusicTonalDescriptors.cpp b/src/examples/extractor_music/MusicTonalDescriptors.cpp index e56ff3f7f..ddb65ebf4 100644 --- a/src/examples/extractor_music/MusicTonalDescriptors.cpp +++ b/src/examples/extractor_music/MusicTonalDescriptors.cpp @@ -109,7 +109,7 @@ void MusicTonalDescriptors::createNetwork(SourceBase& source, Pool& pool){ "bandPreset", true, "minFrequency", 40.0, "maxFrequency", 5000.0, - "splitFrequency", 500.0, + "bandSplitFrequency", 500.0, "weightType", "cosine", "nonLinear", true, "windowSize", 0.5); @@ -160,7 +160,7 @@ void MusicTonalDescriptors::createNetwork(SourceBase& source, Pool& pool){ "bandPreset", true, "minFrequency", 40.0, "maxFrequency", 5000.0, - "splitFrequency", 500.0, + "bandSplitFrequency", 500.0, "weightType", "cosine", "nonLinear", true, "windowSize", 0.5); diff --git a/src/examples/freesound/FreesoundTonalDescriptors.cpp b/src/examples/freesound/FreesoundTonalDescriptors.cpp index 5f6255480..24d214787 100644 --- a/src/examples/freesound/FreesoundTonalDescriptors.cpp +++ b/src/examples/freesound/FreesoundTonalDescriptors.cpp @@ -157,7 +157,7 @@ void FreesoundTonalDescriptors ::createTuningFrequencyNetwork(SourceBase& source "bandPreset", true, "minFrequency", 40.0, "maxFrequency", 5000.0, - "splitFrequency", 500.0, + "bandSplitFrequency", 500.0, "weightType", "cosine", "nonLinear", true, "windowSize", 0.5); diff --git a/src/examples/outdated/streaming_extractortonal.cpp b/src/examples/outdated/streaming_extractortonal.cpp index e67055f49..f2c737698 100644 --- a/src/examples/outdated/streaming_extractortonal.cpp +++ b/src/examples/outdated/streaming_extractortonal.cpp @@ -205,7 +205,7 @@ void TonalDescriptors(SourceBase& input, Pool& pool, const Pool& options, const "bandPreset", true, "minFrequency", 40.0, "maxFrequency", 5000.0, - "splitFrequency", 500.0, + "bandSplitFrequency", 500.0, "weightType", "cosine", "nonLinear", true, "windowSize", 0.5); @@ -238,7 +238,7 @@ void TonalDescriptors(SourceBase& input, Pool& pool, const Pool& options, const "bandPreset", true, "minFrequency", 40.0, "maxFrequency", 5000.0, - "splitFrequency", 500.0, + "bandSplitFrequency", 500.0, "weightType", "cosine", "nonLinear", true, "windowSize", 0.5);