Skip to content

Commit

Permalink
Remove istft (#841)
Browse files Browse the repository at this point in the history
* `istft` has been migrated to `pytorch`, and `torchaudio.functional.istft` has been deprecated in 0.6.0 release. This PR removes it

Co-authored-by: Jeremy Chen <[email protected]>
  • Loading branch information
j3remych3n and Jeremy Chen authored Jul 30, 2020
1 parent 870811c commit dab7f64
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 360 deletions.
5 changes: 0 additions & 5 deletions docs/source/functional.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,6 @@ torchaudio.functional

Functions to perform common audio operations.

:hidden:`istft`
~~~~~~~~~~~~~~~

.. autofunction:: istft

:hidden:`spectrogram`
~~~~~~~~~~~~~~~~~~~~~

Expand Down
261 changes: 0 additions & 261 deletions test/functional_cpu_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,6 @@
from .functional_impl import Lfilter


def random_float_tensor(seed, size, a=22695477, c=1, m=2 ** 32):
""" Generates random tensors given a seed and size
https://en.wikipedia.org/wiki/Linear_congruential_generator
X_{n + 1} = (a * X_n + c) % m
Using Borland C/C++ values
The tensor will have values between [0,1)
Inputs:
seed (int): an int
size (Tuple[int]): the size of the output tensor
a (int): the multiplier constant to the generator
c (int): the additive constant to the generator
m (int): the modulus constant to the generator
"""
num_elements = 1
for s in size:
num_elements *= s

arr = [(a * seed + c) % m]
for i in range(num_elements - 1):
arr.append((a * arr[i] + c) % m)

return torch.tensor(arr).float().view(size) / m


class TestLFilterFloat32(Lfilter, common_utils.PytorchTestCase):
dtype = torch.float32
device = torch.device('cpu')
Expand Down Expand Up @@ -63,242 +38,6 @@ def test_two_channels(self):
torch.testing.assert_allclose(computed, expected)


def _compare_estimate(sound, estimate, atol=1e-6, rtol=1e-8):
# trim sound for case when constructed signal is shorter than original
sound = sound[..., :estimate.size(-1)]
torch.testing.assert_allclose(estimate, sound, atol=atol, rtol=rtol)


def _test_istft_is_inverse_of_stft(kwargs):
# generates a random sound signal for each tril and then does the stft/istft
# operation to check whether we can reconstruct signal
for data_size in [(2, 20), (3, 15), (4, 10)]:
for i in range(100):

sound = random_float_tensor(i, data_size)

stft = torch.stft(sound, **kwargs)
estimate = torchaudio.functional.istft(stft, length=sound.size(1), **kwargs)

_compare_estimate(sound, estimate)


class TestIstft(common_utils.TorchaudioTestCase):
"""Test suite for correctness of istft with various input"""
number_of_trials = 100

def test_istft_is_inverse_of_stft1(self):
# hann_window, centered, normalized, onesided
kwargs1 = {
'n_fft': 12,
'hop_length': 4,
'win_length': 12,
'window': torch.hann_window(12),
'center': True,
'pad_mode': 'reflect',
'normalized': True,
'onesided': True,
}
_test_istft_is_inverse_of_stft(kwargs1)

def test_istft_is_inverse_of_stft2(self):
# hann_window, centered, not normalized, not onesided
kwargs2 = {
'n_fft': 12,
'hop_length': 2,
'win_length': 8,
'window': torch.hann_window(8),
'center': True,
'pad_mode': 'reflect',
'normalized': False,
'onesided': False,
}
_test_istft_is_inverse_of_stft(kwargs2)

def test_istft_is_inverse_of_stft3(self):
# hamming_window, centered, normalized, not onesided
kwargs3 = {
'n_fft': 15,
'hop_length': 3,
'win_length': 11,
'window': torch.hamming_window(11),
'center': True,
'pad_mode': 'constant',
'normalized': True,
'onesided': False,
}
_test_istft_is_inverse_of_stft(kwargs3)

def test_istft_is_inverse_of_stft4(self):
# hamming_window, not centered, not normalized, onesided
# window same size as n_fft
kwargs4 = {
'n_fft': 5,
'hop_length': 2,
'win_length': 5,
'window': torch.hamming_window(5),
'center': False,
'pad_mode': 'constant',
'normalized': False,
'onesided': True,
}
_test_istft_is_inverse_of_stft(kwargs4)

def test_istft_is_inverse_of_stft5(self):
# hamming_window, not centered, not normalized, not onesided
# window same size as n_fft
kwargs5 = {
'n_fft': 3,
'hop_length': 2,
'win_length': 3,
'window': torch.hamming_window(3),
'center': False,
'pad_mode': 'reflect',
'normalized': False,
'onesided': False,
}
_test_istft_is_inverse_of_stft(kwargs5)

def test_istft_of_ones(self):
# stft = torch.stft(torch.ones(4), 4)
stft = torch.tensor([
[[4., 0.], [4., 0.], [4., 0.], [4., 0.], [4., 0.]],
[[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]],
[[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]]
])

estimate = torchaudio.functional.istft(stft, n_fft=4, length=4)
_compare_estimate(torch.ones(4), estimate)

def test_istft_of_zeros(self):
# stft = torch.stft(torch.zeros(4), 4)
stft = torch.zeros((3, 5, 2))

estimate = torchaudio.functional.istft(stft, n_fft=4, length=4)
_compare_estimate(torch.zeros(4), estimate)

def test_istft_requires_overlap_windows(self):
# the window is size 1 but it hops 20 so there is a gap which throw an error
stft = torch.zeros((3, 5, 2))
self.assertRaises(RuntimeError, torchaudio.functional.istft, stft, n_fft=4,
hop_length=20, win_length=1, window=torch.ones(1))

def test_istft_requires_nola(self):
stft = torch.zeros((3, 5, 2))
kwargs_ok = {
'n_fft': 4,
'win_length': 4,
'window': torch.ones(4),
}

kwargs_not_ok = {
'n_fft': 4,
'win_length': 4,
'window': torch.zeros(4),
}

# A window of ones meets NOLA but a window of zeros does not. This should
# throw an error.
torchaudio.functional.istft(stft, **kwargs_ok)
self.assertRaises(RuntimeError, torchaudio.functional.istft, stft, **kwargs_not_ok)

def test_istft_requires_non_empty(self):
self.assertRaises(RuntimeError, torchaudio.functional.istft, torch.zeros((3, 0, 2)), 2)
self.assertRaises(RuntimeError, torchaudio.functional.istft, torch.zeros((0, 3, 2)), 2)

def _test_istft_of_sine(self, amplitude, L, n):
# stft of amplitude*sin(2*pi/L*n*x) with the hop length and window size equaling L
x = torch.arange(2 * L + 1, dtype=torch.get_default_dtype())
sound = amplitude * torch.sin(2 * math.pi / L * x * n)
# stft = torch.stft(sound, L, hop_length=L, win_length=L,
# window=torch.ones(L), center=False, normalized=False)
stft = torch.zeros((L // 2 + 1, 2, 2))
stft_largest_val = (amplitude * L) / 2.0
if n < stft.size(0):
stft[n, :, 1] = -stft_largest_val

if 0 <= L - n < stft.size(0):
# symmetric about L // 2
stft[L - n, :, 1] = stft_largest_val

estimate = torchaudio.functional.istft(stft, L, hop_length=L, win_length=L,
window=torch.ones(L), center=False, normalized=False)
# There is a larger error due to the scaling of amplitude
_compare_estimate(sound, estimate, atol=1e-3)

def test_istft_of_sine(self):
self._test_istft_of_sine(amplitude=123, L=5, n=1)
self._test_istft_of_sine(amplitude=150, L=5, n=2)
self._test_istft_of_sine(amplitude=111, L=5, n=3)
self._test_istft_of_sine(amplitude=160, L=7, n=4)
self._test_istft_of_sine(amplitude=145, L=8, n=5)
self._test_istft_of_sine(amplitude=80, L=9, n=6)
self._test_istft_of_sine(amplitude=99, L=10, n=7)

def _test_linearity_of_istft(self, data_size, kwargs, atol=1e-6, rtol=1e-8):
for i in range(self.number_of_trials):
tensor1 = random_float_tensor(i, data_size)
tensor2 = random_float_tensor(i * 2, data_size)
a, b = torch.rand(2)
istft1 = torchaudio.functional.istft(tensor1, **kwargs)
istft2 = torchaudio.functional.istft(tensor2, **kwargs)
istft = a * istft1 + b * istft2
estimate = torchaudio.functional.istft(a * tensor1 + b * tensor2, **kwargs)
_compare_estimate(istft, estimate, atol, rtol)

def test_linearity_of_istft1(self):
# hann_window, centered, normalized, onesided
kwargs1 = {
'n_fft': 12,
'window': torch.hann_window(12),
'center': True,
'pad_mode': 'reflect',
'normalized': True,
'onesided': True,
}
data_size = (2, 7, 7, 2)
self._test_linearity_of_istft(data_size, kwargs1)

def test_linearity_of_istft2(self):
# hann_window, centered, not normalized, not onesided
kwargs2 = {
'n_fft': 12,
'window': torch.hann_window(12),
'center': True,
'pad_mode': 'reflect',
'normalized': False,
'onesided': False,
}
data_size = (2, 12, 7, 2)
self._test_linearity_of_istft(data_size, kwargs2)

def test_linearity_of_istft3(self):
# hamming_window, centered, normalized, not onesided
kwargs3 = {
'n_fft': 12,
'window': torch.hamming_window(12),
'center': True,
'pad_mode': 'constant',
'normalized': True,
'onesided': False,
}
data_size = (2, 12, 7, 2)
self._test_linearity_of_istft(data_size, kwargs3)

def test_linearity_of_istft4(self):
# hamming_window, not centered, not normalized, onesided
kwargs4 = {
'n_fft': 12,
'window': torch.hamming_window(12),
'center': False,
'pad_mode': 'constant',
'normalized': False,
'onesided': True,
}
data_size = (2, 7, 3, 2)
self._test_linearity_of_istft(data_size, kwargs4, atol=1e-5, rtol=1e-8)


class TestDetectPitchFrequency(common_utils.TorchaudioTestCase):
@parameterized.expand([(100,), (440,)])
def test_pitch(self, frequency):
Expand Down
8 changes: 0 additions & 8 deletions test/test_batch_consistency.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,6 @@ def test_detect_pitch_frequency(self, frequency, sample_rate, n_channels):
n_channels=n_channels, duration=5)
self.assert_batch_consistencies(F.detect_pitch_frequency, waveform, sample_rate)

def test_istft(self):
stft = torch.tensor([
[[4., 0.], [4., 0.], [4., 0.], [4., 0.], [4., 0.]],
[[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]],
[[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]]
])
self.assert_batch_consistencies(F.istft, stft, n_fft=4, length=4)

def test_contrast(self):
waveform = torch.rand(2, 100) - 0.5
self.assert_batch_consistencies(F.contrast, waveform, enhancement_amount=80.)
Expand Down
Loading

0 comments on commit dab7f64

Please sign in to comment.