pytorch · anjali411 · Jul 1, 2020 · Jul 1, 2020 · Jul 1, 2020 · Jul 1, 2020
diff --git a/test/torchaudio_unittest/batch_consistency_test.py b/test/torchaudio_unittest/batch_consistency_test.py
@@ -280,3 +280,40 @@ def test_batch_Vol(self):
         # Batch then transform
         computed = torchaudio.transforms.Vol(gain=1.1)(waveform.repeat(3, 1, 1))
         self.assertEqual(computed, expected)
+
+
+class TestTransformsWithComplexTensors(common_utils.TorchaudioTestCase):
+    def test_batch_TimeStretch(self):
+        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
+        waveform, _ = common_utils.load_wav(test_filepath)  # (2, 278756), 44100
+
+        kwargs = {
+            'n_fft': 2048,
+            'hop_length': 512,
+            'win_length': 2048,
+            'window': torch.hann_window(2048),
+            'center': True,
+            'pad_mode': 'reflect',
+            'normalized': True,
+            'onesided': True,
+        }
+        rate = 2
+
+        complex_specgrams = torch.stft(waveform, **kwargs)
+        complex_specgrams = torch.view_as_complex(complex_specgrams)
+
+        # Single then transform then batch
+        expected = torchaudio.transforms.TimeStretch(
+            fixed_rate=rate,
+            n_freq=1025,
+            hop_length=512,
+        )(complex_specgrams).repeat(3, 1, 1, 1, 1)
+
+        # Batch then transform
+        computed = torchaudio.transforms.TimeStretch(
+            fixed_rate=rate,
+            n_freq=1025,
+            hop_length=512,
+        )(complex_specgrams.repeat(3, 1, 1, 1, 1))
+
+        self.assertEqual(computed, expected, atol=1e-5, rtol=1e-5)
diff --git a/test/torchaudio_unittest/librosa_compatibility_test.py b/test/torchaudio_unittest/librosa_compatibility_test.py
@@ -2,6 +2,7 @@
 import os
 import unittest
 from distutils.version import StrictVersion
+from parameterized import parameterized
 
 import torch
 import torchaudio
@@ -111,6 +112,43 @@ def test_amplitude_to_DB(self):
         self.assertEqual(ta_out, lr_out, atol=5e-5, rtol=1e-5)
 
 
+@unittest.skipIf(not LIBROSA_AVAILABLE, "Librosa not available")
+class TestFunctionalWithComplexTensors(common_utils.TorchaudioTestCase):
+    """Test suite for functions in `functional` module using as input tensors with complex dtypes."""
+    @parameterized.expand([
+        (0.5,), (1.01,), (1.3,)
+    ])
+    def test_phase_vocoder(self, rate):
+        torch.random.manual_seed(48)
+        complex_specgrams = torch.randn(2, 1025, 400, dtype=torch.cdouble)
+        hop_length = 256
+
+        # Due to cummulative sum, numerical error in using torch.float32 will
+        # result in bottom right values of the stretched sectrogram to not
+        # match with librosa.
+
+        phase_advance = torch.linspace(0, np.pi * hop_length,
+                                       complex_specgrams.shape[-2], dtype=torch.double)[..., None]
+
+        complex_specgrams_stretch = F.phase_vocoder(complex_specgrams, rate=rate, phase_advance=phase_advance)
+
+        # == Test shape
+        expected_size = list(complex_specgrams.size())
+        expected_size[-1] = int(np.ceil(expected_size[-1] / rate))
+
+        assert complex_specgrams.dim() == complex_specgrams_stretch.dim()
+        assert complex_specgrams_stretch.size() == torch.Size(expected_size)
+
+        # == Test values
+        index = [0] + [slice(None)] * 2
+        mono_complex_specgram = complex_specgrams[index].numpy()
+        expected_complex_stretch = librosa.phase_vocoder(mono_complex_specgram,
+                                                         rate=rate,
+                                                         hop_length=hop_length)
+
+        self.assertEqual(complex_specgrams_stretch[index], torch.from_numpy(expected_complex_stretch))
+
+
 @pytest.mark.parametrize('complex_specgrams', [
     torch.randn(2, 1025, 400, 2)
 ])

diff --git a/test/torchaudio_unittest/torchscript_consistency_cuda_test.py b/test/torchaudio_unittest/torchscript_consistency_cuda_test.py
@@ -1,7 +1,7 @@
 import torch
 
 from torchaudio_unittest import common_utils
-from .torchscript_consistency_impl import Functional, Transforms
+from .torchscript_consistency_impl import Functional, Transforms, TransformsWithComplexDtypes
 
 
 @common_utils.skipIfNoCuda
@@ -26,3 +26,15 @@ class TestTransformsFloat32(Transforms, common_utils.PytorchTestCase):
 class TestTransformsFloat64(Transforms, common_utils.PytorchTestCase):
     dtype = torch.float64
     device = torch.device('cuda')
+
+
+@common_utils.skipIfNoCuda
+class TestTransformsCFloat(TransformsWithComplexDtypes, common_utils.PytorchTestCase):
+    dtype = torch.cfloat
+    device = torch.device('cuda')
+
+
+@common_utils.skipIfNoCuda
+class TestTransformsCDouble(TransformsWithComplexDtypes, common_utils.PytorchTestCase):
+    dtype = torch.cdouble
+    device = torch.device('cuda')
diff --git a/test/torchaudio_unittest/torchscript_consistency_impl.py b/test/torchaudio_unittest/torchscript_consistency_impl.py
@@ -529,7 +529,7 @@ def func(tensor):
         self._assert_consistency(func, waveform)
 
 
-class Transforms(common_utils.TestBaseMixin):
+class TransformsMixin:
     """Implements test for Transforms that are performed for different devices"""
     def _assert_consistency(self, transform, tensor):
         tensor = tensor.to(device=self.device, dtype=self.dtype)
@@ -540,6 +540,30 @@ def _assert_consistency(self, transform, tensor):
         ts_output = ts_transform(tensor)
         self.assertEqual(ts_output, output)
 
+
+class TransformsWithComplexDtypes(TransformsMixin, common_utils.TestBaseMixin):
+    """Implements test for Transforms that are performed for different devices"""
+    def _assert_consistency(self, transform, tensor):
+        tensor = tensor.to(device=self.device, dtype=self.dtype)
+        transform = transform.to(device=self.device, dtype=self.dtype)
+
+        ts_transform = torch.jit.script(transform)
+        output = transform(tensor)
+        ts_output = ts_transform(tensor)
+        self.assertEqual(ts_output, output)
+
+    def test_TimeStretch(self):
+        n_freq = 400
+        hop_length = 512
+        fixed_rate = 1.3
+        tensor = torch.rand((10, 2, n_freq, 10))
+        self._assert_consistency(
+            T.TimeStretch(n_freq=n_freq, hop_length=hop_length, fixed_rate=fixed_rate),
+            tensor,
+        )
+
+
+class Transforms(TransformsMixin, common_utils.TestBaseMixin):
     def test_Spectrogram(self):
         tensor = torch.rand((1, 1000))
         self._assert_consistency(T.Spectrogram(), tensor)
@@ -585,7 +609,7 @@ def test_TimeStretch(self):
         n_freq = 400
         hop_length = 512
         fixed_rate = 1.3
-        tensor = torch.rand((10, 2, n_freq, 10, 2))
+        tensor = torch.rand((10, 2, n_freq, 10, 2), dtype=torch.double)
         self._assert_consistency(
             T.TimeStretch(n_freq=n_freq, hop_length=hop_length, fixed_rate=fixed_rate),
             tensor,

diff --git a/torchaudio/functional.py b/torchaudio/functional.py
@@ -458,14 +458,28 @@ def phase_vocoder(
     factor of ``rate``.
 
     Args:
-        complex_specgrams (Tensor): Dimension of `(..., freq, time, complex=2)`
+        complex_specgrams (Tensor): Either a real tensor of dimension of `(..., freq, time, complex=2)`
+            or a tensor of dimension `(..., freq, time)` with complex dtype.
         rate (float): Speed-up factor
         phase_advance (Tensor): Expected phase advance in each bin. Dimension of (freq, 1)
 
     Returns:
-        Tensor: Complex Specgrams Stretch with dimension of `(..., freq, ceil(time/rate), complex=2)`
+        Tensor: Complex Specgrams Stretch with either a real dtype and dimension of
+                `(..., freq, ceil(time/rate), complex=2)` or
+                a complex dtype and dimension of `(..., freq, ceil(time/rate))`.
 
-    Example
+    Example - New API (using tensors with complex dtype)
+        >>> freq, hop_length = 1025, 512
+        >>> # (channel, freq, time)
+        >>> complex_specgrams = torch.randn(2, freq, 300, dtype=torch.cfloat)
+        >>> rate = 1.3 # Speed up by 30%
+        >>> phase_advance = torch.linspace(
+        >>>    0, math.pi * hop_length, freq)[..., None]
+        >>> x = phase_vocoder(complex_specgrams, rate, phase_advance)
+        >>> x.shape # with 231 == ceil(300 / 1.3)
+        torch.Size([2, 1025, 231])
+
+    Example - Old API (using real tensors with shape (..., complex=2))
         >>> freq, hop_length = 1025, 512
         >>> # (channel, freq, time, complex=2)
         >>> complex_specgrams = torch.randn(2, freq, 300, 2)
@@ -476,50 +490,71 @@ def phase_vocoder(
         >>> x.shape # with 231 == ceil(300 / 1.3)
         torch.Size([2, 1025, 231, 2])
     """
-
-    # pack batch
+    use_complex = complex_specgrams.is_complex()
     shape = complex_specgrams.size()
-    complex_specgrams = complex_specgrams.reshape([-1] + list(shape[-3:]))
-
-    time_steps = torch.arange(0,
-                              complex_specgrams.size(-2),
-                              rate,
-                              device=complex_specgrams.device,
-                              dtype=complex_specgrams.dtype)
-
-    alphas = time_steps % 1.0
-    phase_0 = angle(complex_specgrams[..., :1, :])
-
-    # Time Padding
-    complex_specgrams = torch.nn.functional.pad(complex_specgrams, [0, 0, 0, 2])
-
-    # (new_bins, freq, 2)
-    complex_specgrams_0 = complex_specgrams.index_select(-2, time_steps.long())
-    complex_specgrams_1 = complex_specgrams.index_select(-2, (time_steps + 1).long())
-
-    angle_0 = angle(complex_specgrams_0)
-    angle_1 = angle(complex_specgrams_1)
-
-    norm_0 = torch.norm(complex_specgrams_0, p=2, dim=-1)
-    norm_1 = torch.norm(complex_specgrams_1, p=2, dim=-1)
+    if use_complex:
+        # pack batch
+        complex_specgrams = complex_specgrams.reshape([-1] + list(shape[-2:]))
+        time_steps = torch.arange(0,
+                                  complex_specgrams.size(-1),
+                                  rate,
+                                  device=complex_specgrams.device,
+                                  dtype=torch.real(complex_specgrams).dtype)
+        phase_0 = complex_specgrams[..., :1].angle()
+        alphas = time_steps % 1.0
+        # Time Padding
+        complex_specgrams = torch.nn.functional.pad(complex_specgrams, [0, 2])
+        # (new_bins, freq, 2)
+        complex_specgrams_0 = complex_specgrams.index_select(-1, time_steps.long())
+        complex_specgrams_1 = complex_specgrams.index_select(-1, (time_steps + 1).long())
+
+        angle_0 = complex_specgrams_0.angle()
+        angle_1 = complex_specgrams_1.angle()
+        norm_0 = complex_specgrams_0.abs()
+        norm_1 = complex_specgrams_1.abs()
+    else:
+        complex_specgrams = complex_specgrams.reshape([-1] + list(shape[-3:]))
+        time_steps = torch.arange(0,
+                                  complex_specgrams.size(-2),
+                                  rate,
+                                  device=complex_specgrams.device,
+                                  dtype=complex_specgrams.dtype)
+        alphas = time_steps % 1.0
+        phase_0 = angle(complex_specgrams[..., :1, :])
+        # Time Padding
+        complex_specgrams = torch.nn.functional.pad(complex_specgrams, [0, 0, 0, 2])
+        # (new_bins, freq, 2)
+        complex_specgrams_0 = complex_specgrams.index_select(-2, time_steps.long())
+        complex_specgrams_1 = complex_specgrams.index_select(-2, (time_steps + 1).long())
+
+        angle_0 = angle(complex_specgrams_0)
+        angle_1 = angle(complex_specgrams_1)
+        norm_0 = torch.norm(complex_specgrams_0, p=2, dim=-1)
+        norm_1 = torch.norm(complex_specgrams_1, p=2, dim=-1)
 
     phase = angle_1 - angle_0 - phase_advance
     phase = phase - 2 * math.pi * torch.round(phase / (2 * math.pi))
 
     # Compute Phase Accum
     phase = phase + phase_advance
     phase = torch.cat([phase_0, phase[..., :-1]], dim=-1)
+
     phase_acc = torch.cumsum(phase, -1)
 
     mag = alphas * norm_1 + (1 - alphas) * norm_0
 
     real_stretch = mag * torch.cos(phase_acc)
     imag_stretch = mag * torch.sin(phase_acc)
 
-    complex_specgrams_stretch = torch.stack([real_stretch, imag_stretch], dim=-1)
+    if use_complex:
+        complex_specgrams_stretch = torch.view_as_complex(torch.stack([real_stretch, imag_stretch], dim=-1))
 
-    # unpack batch
-    complex_specgrams_stretch = complex_specgrams_stretch.reshape(shape[:-3] + complex_specgrams_stretch.shape[1:])
+        # unpack batch
+        complex_specgrams_stretch = complex_specgrams_stretch.reshape(shape[:-2] + complex_specgrams_stretch.shape[1:])
+    else:
+        complex_specgrams_stretch = torch.stack([real_stretch, imag_stretch], dim=-1)
+        # unpack batch
+        complex_specgrams_stretch = complex_specgrams_stretch.reshape(shape[:-3] + complex_specgrams_stretch.shape[1:])
 
     return complex_specgrams_stretch
 

diff --git a/torchaudio/transforms.py b/torchaudio/transforms.py
@@ -693,7 +693,10 @@ def forward(self, complex_specgrams: Tensor, overriding_rate: Optional[float] =
         Returns:
             Tensor: Stretched complex spectrogram of dimension (..., freq, ceil(time/rate), complex=2).
         """
-        assert complex_specgrams.size(-1) == 2, "complex_specgrams should be a complex tensor, shape (..., complex=2)"
+        use_complex = complex_specgrams.is_complex()
+        if not use_complex:
+            assert complex_specgrams.size(-1) == 2, "complex_specgrams \
+            should be a complex tensor, shape (..., complex=2)"
 
         if overriding_rate is None:
             rate = self.fixed_rate