Skip to content

Commit

Permalink
Import torchaudio #1639 37dbf29
Browse files Browse the repository at this point in the history
Summary: Import torchaudio #1639 37dbf29

Reviewed By: carolineechen, mthrok

Differential Revision: D29920658

fbshipit-source-id: 94ba8c04edcfb50e355b1ca8e937f612917ecf38
  • Loading branch information
yangarbiter authored and facebook-github-bot committed Jul 27, 2021
1 parent 1f1bd18 commit 3702055
Show file tree
Hide file tree
Showing 9 changed files with 211 additions and 64 deletions.
Empty file.
95 changes: 95 additions & 0 deletions examples/pipeline_tacotron2/text/numbers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# *****************************************************************************
# Copyright (c) 2017 Keith Ito
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# *****************************************************************************
"""
Modified from https://github.com/keithito/tacotron
"""

import inflect
import re


_inflect = inflect.engine()
_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)')
_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)')
_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)')
_number_re = re.compile(r'[0-9]+')


def _remove_commas(m: re.Match) -> str:
return m.group(1).replace(',', '')


def _expand_decimal_point(m: re.Match) -> str:
return m.group(1).replace('.', ' point ')


def _expand_dollars(m: re.Match) -> str:
match = m.group(1)
parts = match.split('.')
if len(parts) > 2:
return match + ' dollars' # Unexpected format
dollars = int(parts[0]) if parts[0] else 0
cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
if dollars and cents:
dollar_unit = 'dollar' if dollars == 1 else 'dollars'
cent_unit = 'cent' if cents == 1 else 'cents'
return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit)
elif dollars:
dollar_unit = 'dollar' if dollars == 1 else 'dollars'
return '%s %s' % (dollars, dollar_unit)
elif cents:
cent_unit = 'cent' if cents == 1 else 'cents'
return '%s %s' % (cents, cent_unit)
else:
return 'zero dollars'


def _expand_ordinal(m: re.Match) -> str:
return _inflect.number_to_words(m.group(0))


def _expand_number(m: re.Match) -> str:
num = int(m.group(0))
if num > 1000 and num < 3000:
if num == 2000:
return 'two thousand'
elif num > 2000 and num < 2010:
return 'two thousand ' + _inflect.number_to_words(num % 100)
elif num % 100 == 0:
return _inflect.number_to_words(num // 100) + ' hundred'
else:
return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ')
else:
return _inflect.number_to_words(num, andword='')


def normalize_numbers(text: str) -> str:
text = re.sub(_comma_number_re, _remove_commas, text)
text = re.sub(_pounds_re, r'\1 pounds', text)
text = re.sub(_dollars_re, _expand_dollars, text)
text = re.sub(_decimal_number_re, _expand_decimal_point, text)
text = re.sub(_ordinal_re, _expand_ordinal, text)
text = re.sub(_number_re, _expand_number, text)
return text
22 changes: 22 additions & 0 deletions examples/pipeline_tacotron2/text/test_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import unittest

from parameterized import parameterized

from .text_preprocessing import text_to_sequence


class TestTextPreprocessor(unittest.TestCase):

@parameterized.expand(
[
["dr. Strange?", [15, 26, 14, 31, 26, 29, 11, 30, 31, 29, 12, 25, 18, 16, 10]],
["ML, is fun.", [24, 23, 6, 11, 20, 30, 11, 17, 32, 25, 7]],
["I love torchaudio!", [20, 11, 23, 26, 33, 16, 11, 31, 26, 29, 14, 19, 12, 32, 15, 20, 26, 2]],
# 'one thousand dollars, twenty cents'
["$1,000.20", [26, 25, 16, 11, 31, 19, 26, 32, 30, 12, 25, 15, 11, 15, 26, 23, 23,
12, 29, 30, 6, 11, 31, 34, 16, 25, 31, 36, 11, 14, 16, 25, 31, 30]],
]
)
def test_text_to_sequence(self, sent, seq):

assert (text_to_sequence(sent) == seq)
85 changes: 85 additions & 0 deletions examples/pipeline_tacotron2/text/text_preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# *****************************************************************************
# Copyright (c) 2017 Keith Ito
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# *****************************************************************************
"""
Modified from https://github.com/keithito/tacotron
"""

from typing import List
import re

from unidecode import unidecode

from .numbers import normalize_numbers


# Regular expression matching whitespace:
_whitespace_re = re.compile(r'\s+')

# List of (regular expression, replacement) pairs for abbreviations:
_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [
('mrs', 'misess'),
('mr', 'mister'),
('dr', 'doctor'),
('st', 'saint'),
('co', 'company'),
('jr', 'junior'),
('maj', 'major'),
('gen', 'general'),
('drs', 'doctors'),
('rev', 'reverend'),
('lt', 'lieutenant'),
('hon', 'honorable'),
('sgt', 'sergeant'),
('capt', 'captain'),
('esq', 'esquire'),
('ltd', 'limited'),
('col', 'colonel'),
('ft', 'fort'),
]]

_pad = '_'
_punctuation = '!\'(),.:;? '
_special = '-'
_letters = 'abcdefghijklmnopqrstuvwxyz'

symbols = [_pad] + list(_special) + list(_punctuation) + list(_letters)
_symbol_to_id = {s: i for i, s in enumerate(symbols)}


def text_to_sequence(sent: str) -> List[int]:
r'''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
Args:
sent (str): The input sentence to convert to a sequence.
Returns:
List of integers corresponding to the symbols in the sentence.
'''
sent = unidecode(sent) # convert to ascii
sent = sent.lower() # lower case
sent = normalize_numbers(sent) # expand numbers
for regex, replacement in _abbreviations: # expand abbreviations
sent = re.sub(regex, replacement, sent)
sent = re.sub(_whitespace_re, ' ', sent) # collapse whitespace

return [_symbol_to_id[s] for s in sent if s in _symbol_to_id]
4 changes: 2 additions & 2 deletions test/torchaudio_unittest/transforms/batch_consistency_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ def test_batch_Resample(self):
self.assertEqual(computed, expected)

def test_batch_MelScale(self):
specgram = torch.randn(2, 31, 2786)
specgram = torch.randn(2, 201, 256)

# Single then transform then batch
expected = torchaudio.transforms.MelScale()(specgram).repeat(3, 1, 1, 1)

# Batch then transform
computed = torchaudio.transforms.MelScale()(specgram.repeat(3, 1, 1, 1))

# shape = (3, 2, 201, 1394)
# shape = (3, 2, 128, 256)
self.assertEqual(computed, expected)

def test_batch_InverseMelScale(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,6 @@ def test_AmplitudeToDB(self):
spec = torch.rand((6, 201))
self._assert_consistency(T.AmplitudeToDB(), spec)

def test_MelScale_invalid(self):
with self.assertRaises(ValueError):
torch.jit.script(T.MelScale())

def test_MelScale(self):
spec_f = torch.rand((1, 201, 6))
self._assert_consistency(T.MelScale(n_stft=201), spec_f)
Expand Down
6 changes: 3 additions & 3 deletions test/torchaudio_unittest/transforms/transforms_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,17 @@ def test_AmplitudeToDB(self):
self.assertEqual(mag_to_db_torch, power_to_db_torch)

def test_melscale_load_save(self):
specgram = torch.ones(1, 1000, 100)
specgram = torch.ones(1, 201, 100)
melscale_transform = transforms.MelScale()
melscale_transform(specgram)

melscale_transform_copy = transforms.MelScale(n_stft=1000)
melscale_transform_copy = transforms.MelScale()
melscale_transform_copy.load_state_dict(melscale_transform.state_dict())

fb = melscale_transform.fb
fb_copy = melscale_transform_copy.fb

self.assertEqual(fb_copy.size(), (1000, 128))
self.assertEqual(fb_copy.size(), (201, 128))
self.assertEqual(fb, fb_copy)

def test_melspectrogram_load_save(self):
Expand Down
18 changes: 0 additions & 18 deletions test/torchaudio_unittest/transforms/transforms_test_impl.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import warnings

import torch
import torchaudio.transforms as T

Expand Down Expand Up @@ -63,22 +61,6 @@ def test_InverseMelScale(self):
assert _get_ratio(relative_diff < 1e-3) > 5e-3
assert _get_ratio(relative_diff < 1e-5) > 1e-5

def test_melscale_unset_weight_warning(self):
"""Issue a warning if MelScale initialized without a weight
As part of the deprecation of lazy intialization behavior (#1510),
issue a warning if `n_stft` is not set.
"""
with warnings.catch_warnings(record=True) as caught_warnings:
warnings.simplefilter("always")
T.MelScale(n_mels=64, sample_rate=8000)
assert len(caught_warnings) == 1

with warnings.catch_warnings(record=True) as caught_warnings:
warnings.simplefilter("always")
T.MelScale(n_mels=64, sample_rate=8000, n_stft=201)
assert len(caught_warnings) == 0

@nested_params(
["sinc_interpolation", "kaiser_window"],
[16000, 44100],
Expand Down
41 changes: 4 additions & 37 deletions torchaudio/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,8 @@ class MelScale(torch.nn.Module):
sample_rate (int, optional): Sample rate of audio signal. (Default: ``16000``)
f_min (float, optional): Minimum frequency. (Default: ``0.``)
f_max (float or None, optional): Maximum frequency. (Default: ``sample_rate // 2``)
n_stft (int, optional): Number of bins in STFT. Calculated from first input
if None is given. See ``n_fft`` in :class:`Spectrogram`. (Default: ``None``)
norm (Optional[str]): If 'slaney', divide the triangular mel weights by the width of the mel band
n_stft (int, optional): Number of bins in STFT. See ``n_fft`` in :class:`Spectrogram`. (Default: ``201``)
norm (str or None, optional): If 'slaney', divide the triangular mel weights by the width of the mel band
(area normalization). (Default: ``None``)
mel_scale (str, optional): Scale to use: ``htk`` or ``slaney``. (Default: ``htk``)
"""
Expand All @@ -257,7 +256,7 @@ def __init__(self,
sample_rate: int = 16000,
f_min: float = 0.,
f_max: Optional[float] = None,
n_stft: Optional[int] = None,
n_stft: int = 201,
norm: Optional[str] = None,
mel_scale: str = "htk") -> None:
super(MelScale, self).__init__()
Expand All @@ -269,35 +268,11 @@ def __init__(self,
self.mel_scale = mel_scale

assert f_min <= self.f_max, 'Require f_min: {} < f_max: {}'.format(f_min, self.f_max)

if n_stft is None or n_stft == 0:
warnings.warn(
'Initialization of torchaudio.transforms.MelScale with an unset weight '
'`n_stft=None` is deprecated and will be removed in release 0.10. '
'Please set a proper `n_stft` value. Typically this is `n_fft // 2 + 1`. '
'Refer to https://github.com/pytorch/audio/issues/1510 '
'for more details.'
)

fb = torch.empty(0) if n_stft is None else F.create_fb_matrix(
fb = F.create_fb_matrix(
n_stft, self.f_min, self.f_max, self.n_mels, self.sample_rate, self.norm,
self.mel_scale)
self.register_buffer('fb', fb)

def __prepare_scriptable__(self):
r"""If `self.fb` is empty, the `forward` method will try to resize the parameter,
which does not work once the transform is scripted. However, this error does not happen
until the transform is executed. This is inconvenient especially if the resulting
TorchScript object is executed in other environments. Therefore, we check the
validity of `self.fb` here and fail if the resulting TS does not work.
Returns:
MelScale: self
"""
if self.fb.numel() == 0:
raise ValueError("n_stft must be provided at construction")
return self

def forward(self, specgram: Tensor) -> Tensor:
r"""
Args:
Expand All @@ -311,14 +286,6 @@ def forward(self, specgram: Tensor) -> Tensor:
shape = specgram.size()
specgram = specgram.reshape(-1, shape[-2], shape[-1])

if self.fb.numel() == 0:
tmp_fb = F.create_fb_matrix(specgram.size(1), self.f_min, self.f_max,
self.n_mels, self.sample_rate, self.norm,
self.mel_scale)
# Attributes cannot be reassigned outside __init__ so workaround
self.fb.resize_(tmp_fb.size())
self.fb.copy_(tmp_fb)

# (channel, frequency, time).transpose(...) dot (frequency, n_mels)
# -> (channel, time, n_mels).transpose(...)
mel_specgram = torch.matmul(specgram.transpose(1, 2), self.fb).transpose(1, 2)
Expand Down

0 comments on commit 3702055

Please sign in to comment.