Import torchaudio #1782 40f2a08

Summary: Import torchaudio by commit 40f2a08 Reviewed By: carolineechen Differential Revision: D31056614 fbshipit-source-id: b04e83fe5460faad8f5d106da44a6e0f3aa2756b
pytorch · Sep 23, 2021 · 3fef9d5 · 3fef9d5
1 parent 4c659bc
commit 3fef9d5
Show file tree

Hide file tree

Showing 49 changed files with 1,121 additions and 1,142 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
@@ -105,7 +105,7 @@ smoke_test_common: &smoke_test_common
 jobs:
   circleci_consistency:
     docker:
-      - image: circleci/python:3.8
+      - image: cimg/python:3.8
     steps:
       - checkout
       - run:
@@ -298,7 +298,7 @@ jobs:
         description: "What whl subfolder to upload to, e.g., blank or cu100/ (trailing slash is important)"
         type: string
     docker:
-      - image: circleci/python:3.8
+      - image: cimg/python:3.8
     steps:
       - attach_workspace:
           at: ~/workspace
@@ -633,7 +633,7 @@ jobs:
   docstring_parameters_sync:
     <<: *binary_common
     docker:
-      - image: circleci/python:3.8
+      - image: cimg/python:3.8
     steps:
       - checkout
       - run:

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -18,13 +18,6 @@ endif()
 
 project(torchaudio)
 
-# Find the HIP package, set the HIP paths, load the HIP CMake.
-if(USE_ROCM)
-  include(cmake/LoadHIP.cmake)
-  if(NOT PYTORCH_FOUND_HIP)
-    set(USE_ROCM OFF)
-  endif()
-endif()
 
 # check and set CMAKE_CXX_STANDARD
 string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
@@ -57,9 +50,23 @@ endif()
 option(BUILD_SOX "Build libsox statically" ON)
 option(BUILD_KALDI "Build kaldi statically" ON)
 option(BUILD_RNNT "Enable RNN transducer" ON)
-option(BUILD_LIBTORCHAUDIO "Build C++ Library" ON)
 option(BUILD_TORCHAUDIO_PYTHON_EXTENSION "Build Python extension" OFF)
 option(USE_CUDA "Enable CUDA support" OFF)
+option(USE_ROCM "Enable ROCM support" OFF)
+
+
+# check that USE_CUDA and USE_ROCM are not set at the same time
+if(USE_CUDA AND USE_ROCM)
+  message(FATAL "CUDA and ROCm are mutually exclusive")
+endif()
+
+if(USE_ROCM)
+  # Find the HIP package, set the HIP paths, load the HIP CMake.
+  include(cmake/LoadHIP.cmake)
+  if(NOT PYTORCH_FOUND_HIP)
+    set(USE_ROCM OFF)
+  endif()
+endif()
 
 if(USE_CUDA)
   enable_language(CUDA)

diff --git a/build_tools/setup_helpers/extension.py b/build_tools/setup_helpers/extension.py
@@ -37,13 +37,16 @@ def _get_build(var, default=False):
 _BUILD_SOX = False if platform.system() == 'Windows' else _get_build("BUILD_SOX", True)
 _BUILD_KALDI = False if platform.system() == 'Windows' else _get_build("BUILD_KALDI", True)
 _BUILD_RNNT = _get_build("BUILD_RNNT", True)
-_USE_ROCM = _get_build("USE_ROCM")
-_USE_CUDA = _get_build("USE_CUDA", torch.cuda.is_available())
+_USE_ROCM = _get_build("USE_ROCM", torch.cuda.is_available() and torch.version.hip is not None)
+_USE_CUDA = _get_build("USE_CUDA", torch.cuda.is_available() and torch.version.hip is None)
 _TORCH_CUDA_ARCH_LIST = os.environ.get('TORCH_CUDA_ARCH_LIST', None)
 
 
 def get_ext_modules():
-    return [Extension(name='torchaudio._torchaudio', sources=[])]
+    return [
+        Extension(name='torchaudio.lib.libtorchaudio', sources=[]),
+        Extension(name='torchaudio._torchaudio', sources=[]),
+    ]
 
 
 # Based off of
@@ -53,10 +56,19 @@ def run(self):
         try:
             subprocess.check_output(['cmake', '--version'])
         except OSError:
-            raise RuntimeError("CMake is not available.")
+            raise RuntimeError("CMake is not available.") from None
         super().run()
 
     def build_extension(self, ext):
+        # Since two library files (libtorchaudio and _torchaudio) need to be
+        # recognized by setuptools, we instantiate `Extension` twice. (see `get_ext_modules`)
+        # This leads to the situation where this `build_extension` method is called twice.
+        # However, the following `cmake` command will build all of them at the same time,
+        # so, we do not need to perform `cmake` twice.
+        # Therefore we call `cmake` only for `torchaudio._torchaudio`.
+        if ext.name != 'torchaudio._torchaudio':
+            return
+
         extdir = os.path.abspath(
             os.path.dirname(self.get_ext_fullpath(ext.name)))
 
@@ -76,7 +88,6 @@ def build_extension(self, ext):
             f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
             f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}",
             "-DBUILD_TORCHAUDIO_PYTHON_EXTENSION:BOOL=ON",
-            "-DBUILD_LIBTORCHAUDIO:BOOL=OFF",
             f"-DUSE_ROCM:BOOL={'ON' if _USE_ROCM else 'OFF'}",
             f"-DUSE_CUDA:BOOL={'ON' if _USE_CUDA else 'OFF'}",
         ]

diff --git a/docs/source/models.rst b/docs/source/models.rst
@@ -28,6 +28,12 @@ DeepSpeech
 Tacotron2
 ~~~~~~~~~
 
+Model
+-----
+
+Tacotoron2
+^^^^^^^^^^
+
 .. autoclass:: Tacotron2
 
   .. automethod:: forward
@@ -38,7 +44,7 @@ Factory Functions
 -----------------
 
 tacotron2
----------
+^^^^^^^^^
 
 .. autofunction:: tacotron2
 
@@ -55,8 +61,11 @@ Wav2Letter
 Wav2Vec2.0
 ~~~~~~~~~~
 
+Model
+-----
+
 Wav2Vec2Model
--------------
+^^^^^^^^^^^^^
 
 .. autoclass:: Wav2Vec2Model
 
@@ -68,17 +77,17 @@ Factory Functions
 -----------------
 
 wav2vec2_base
--------------
+^^^^^^^^^^^^^
 
 .. autofunction:: wav2vec2_base
 
 wav2vec2_large
---------------
+^^^^^^^^^^^^^^
 
 .. autofunction:: wav2vec2_large
 
 wav2vec2_large_lv60k
---------------------
+^^^^^^^^^^^^^^^^^^^^
 
 .. autofunction:: wav2vec2_large_lv60k
 
@@ -88,12 +97,12 @@ Utility Functions
 -----------------
 
 import_huggingface_model
-------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^
 
 .. autofunction:: import_huggingface_model
 
 import_fairseq_model
---------------------
+^^^^^^^^^^^^^^^^^^^^
 
 .. autofunction:: import_fairseq_model
 
@@ -102,6 +111,12 @@ import_fairseq_model
 WaveRNN
 ~~~~~~~
 
+Model
+-----
+
+WaveRNN
+^^^^^^^
+
 .. autoclass:: WaveRNN
 
   .. automethod:: forward
@@ -112,7 +127,7 @@ Factory Functions
 -----------------
 
 wavernn
--------
+^^^^^^^
 
 .. autofunction:: wavernn
 

diff --git a/docs/source/refs.bib b/docs/source/refs.bib
@@ -95,4 +95,42 @@ @inproceedings{shen2018natural
   pages={4779--4783},
   year={2018},
   organization={IEEE}
-}
+}
+@inproceedings{souden2009optimal,
+  title={On optimal frequency-domain multichannel linear filtering for noise reduction},
+  author={Souden, Mehrez and Benesty, Jacob and Affes, Sofiene},
+  booktitle={IEEE Transactions on audio, speech, and language processing},
+  volume={18},
+  number={2},
+  pages={260--276},
+  year={2009},
+  publisher={IEEE}
+}
+@inproceedings{higuchi2016robust,
+  title={Robust MVDR beamforming using time-frequency masks for online/offline ASR in noise},
+  author={Higuchi, Takuya and Ito, Nobutaka and Yoshioka, Takuya and Nakatani, Tomohiro},
+  booktitle={2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
+  pages={5210--5214},
+  year={2016},
+  organization={IEEE}
+}
+@article{mises1929praktische,
+  title={Praktische Verfahren der Gleichungsaufl{\"o}sung.},
+  author={Mises, RV and Pollaczek-Geiringer, Hilda},
+  journal={ZAMM-Journal of Applied Mathematics and Mechanics/Zeitschrift f{\"u}r Angewandte Mathematik und Mechanik},
+  volume={9},
+  number={1},
+  pages={58--77},
+  year={1929},
+  publisher={Wiley Online Library}
+}
+@article{higuchi2017online,
+  title={Online MVDR beamformer based on complex Gaussian mixture model with spatial prior for noise robust ASR},
+  author={Higuchi, Takuya and Ito, Nobutaka and Araki, Shoko and Yoshioka, Takuya and Delcroix, Marc and Nakatani, Tomohiro},
+  journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
+  volume={25},
+  number={4},
+  pages={780--793},
+  year={2017},
+  publisher={IEEE}
+}
diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
@@ -188,6 +188,23 @@ Transforms are common audio transforms. They can be chained together using :clas
 
   .. automethod:: forward
 
+:hidden:`Multi-channel`
+~~~~~~~~~~~~~~~~~~~~~~~
+
+:hidden:`PSD`
+-------------
+
+.. autoclass:: PSD
+
+  .. automethod:: forward
+
+:hidden:`MVDR`
+--------------
+
+.. autoclass:: MVDR
+
+  .. automethod:: forward
+
 References
 ~~~~~~~~~~