Skip to content

Commit

Permalink
Add filter bank figures (#1891)
Browse files Browse the repository at this point in the history
  • Loading branch information
mthrok authored Oct 16, 2021
1 parent 9e3778d commit 89aeb68
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 2 deletions.
15 changes: 15 additions & 0 deletions torchaudio/functional/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,13 @@ def melscale_fbanks(
) -> Tensor:
r"""Create a frequency bin conversion matrix.
Note:
For the sake of the numerical compatibility with librosa, not all the coefficients
in the resulting filter bank has magnitude of 1.
.. image:: https://download.pytorch.org/torchaudio/doc-assets/mel_fbanks.png
:alt: Visualization of generated filter bank
Args:
n_freqs (int): Number of frequencies to highlight/apply
f_min (float): Minimum frequency (Hz)
Expand All @@ -559,6 +566,7 @@ def melscale_fbanks(
Each column is a filterbank so that assuming there is a matrix A of
size (..., ``n_freqs``), the applied result would be
``A * melscale_fbanks(A.size(-1), ...)``.
"""

if norm is not None and norm != "slaney":
Expand Down Expand Up @@ -601,6 +609,13 @@ def linear_fbanks(
) -> Tensor:
r"""Creates a linear triangular filterbank.
Note:
For the sake of the numerical compatibility with librosa, not all the coefficients
in the resulting filter bank has magnitude of 1.
.. image:: https://download.pytorch.org/torchaudio/doc-assets/lin_fbanks.png
:alt: Visualization of generated filter bank
Args:
n_freqs (int): Number of frequencies to highlight/apply
f_min (float): Minimum frequency (Hz)
Expand Down
23 changes: 21 additions & 2 deletions torchaudio/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,10 @@ class MelScale(torch.nn.Module):
norm (str or None, optional): If 'slaney', divide the triangular mel weights by the width of the mel band
(area normalization). (Default: ``None``)
mel_scale (str, optional): Scale to use: ``htk`` or ``slaney``. (Default: ``htk``)
See also:
:py:func:`torchaudio.functional.melscale_fbanks` - The function used to
generate the filter banks.
"""
__constants__ = ['n_mels', 'sample_rate', 'f_min', 'f_max']

Expand Down Expand Up @@ -483,8 +487,10 @@ def forward(self, melspec: Tensor) -> Tensor:


class MelSpectrogram(torch.nn.Module):
r"""Create MelSpectrogram for a raw audio signal. This is a composition of Spectrogram
and MelScale.
r"""Create MelSpectrogram for a raw audio signal.
This is a composition of :py:func:`torchaudio.transforms.Spectrogram` and
and :py:func:`torchaudio.transforms.MelScale`.
Sources
* https://gist.github.com/kastnerkyle/179d6e9a88202ab0a2fe
Expand Down Expand Up @@ -521,6 +527,10 @@ class MelSpectrogram(torch.nn.Module):
>>> waveform, sample_rate = torchaudio.load('test.wav', normalize=True)
>>> transform = transforms.MelSpectrogram(sample_rate)
>>> mel_specgram = transform(waveform) # (channel, n_mels, time)
See also:
:py:func:`torchaudio.functional.melscale_fbanks` - The function used to
generate the filter banks.
"""
__constants__ = ['sample_rate', 'n_fft', 'win_length', 'hop_length', 'pad', 'n_mels', 'f_min']

Expand Down Expand Up @@ -599,6 +609,10 @@ class MFCC(torch.nn.Module):
norm (str, optional): norm to use. (Default: ``'ortho'``)
log_mels (bool, optional): whether to use log-mel spectrograms instead of db-scaled. (Default: ``False``)
melkwargs (dict or None, optional): arguments for MelSpectrogram. (Default: ``None``)
See also:
:py:func:`torchaudio.functional.melscale_fbanks` - The function used to
generate the filter banks.
"""
__constants__ = ['sample_rate', 'n_mfcc', 'dct_type', 'top_db', 'log_mels']

Expand Down Expand Up @@ -670,6 +684,11 @@ class LFCC(torch.nn.Module):
norm (str, optional): norm to use. (Default: ``'ortho'``)
log_lf (bool, optional): whether to use log-lf spectrograms instead of db-scaled. (Default: ``False``)
speckwargs (dict or None, optional): arguments for Spectrogram. (Default: ``None``)
See also:
:py:func:`torchaudio.functional.linear_fbanks` - The function used to
generate the filter banks.
"""
__constants__ = ['sample_rate', 'n_filter', 'n_lfcc', 'dct_type', 'top_db', 'log_lf']

Expand Down

0 comments on commit 89aeb68

Please sign in to comment.