Skip to content

Commit

Permalink
Import torchaudio #1782 40f2a08
Browse files Browse the repository at this point in the history
Summary: Import torchaudio by commit 40f2a08

Reviewed By: carolineechen

Differential Revision: D31056614

fbshipit-source-id: b04e83fe5460faad8f5d106da44a6e0f3aa2756b
  • Loading branch information
nateanl authored and facebook-github-bot committed Sep 23, 2021
1 parent 4c659bc commit 3fef9d5
Show file tree
Hide file tree
Showing 49 changed files with 1,121 additions and 1,142 deletions.
6 changes: 3 additions & 3 deletions .circleci/config.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions .circleci/config.yml.in
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ smoke_test_common: &smoke_test_common
jobs:
circleci_consistency:
docker:
- image: circleci/python:3.8
- image: cimg/python:3.8
steps:
- checkout
- run:
Expand Down Expand Up @@ -298,7 +298,7 @@ jobs:
description: "What whl subfolder to upload to, e.g., blank or cu100/ (trailing slash is important)"
type: string
docker:
- image: circleci/python:3.8
- image: cimg/python:3.8
steps:
- attach_workspace:
at: ~/workspace
Expand Down Expand Up @@ -633,7 +633,7 @@ jobs:
docstring_parameters_sync:
<<: *binary_common
docker:
- image: circleci/python:3.8
- image: cimg/python:3.8
steps:
- checkout
- run:
Expand Down
23 changes: 15 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,6 @@ endif()

project(torchaudio)

# Find the HIP package, set the HIP paths, load the HIP CMake.
if(USE_ROCM)
include(cmake/LoadHIP.cmake)
if(NOT PYTORCH_FOUND_HIP)
set(USE_ROCM OFF)
endif()
endif()

# check and set CMAKE_CXX_STANDARD
string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
Expand Down Expand Up @@ -57,9 +50,23 @@ endif()
option(BUILD_SOX "Build libsox statically" ON)
option(BUILD_KALDI "Build kaldi statically" ON)
option(BUILD_RNNT "Enable RNN transducer" ON)
option(BUILD_LIBTORCHAUDIO "Build C++ Library" ON)
option(BUILD_TORCHAUDIO_PYTHON_EXTENSION "Build Python extension" OFF)
option(USE_CUDA "Enable CUDA support" OFF)
option(USE_ROCM "Enable ROCM support" OFF)


# check that USE_CUDA and USE_ROCM are not set at the same time
if(USE_CUDA AND USE_ROCM)
message(FATAL "CUDA and ROCm are mutually exclusive")
endif()

if(USE_ROCM)
# Find the HIP package, set the HIP paths, load the HIP CMake.
include(cmake/LoadHIP.cmake)
if(NOT PYTORCH_FOUND_HIP)
set(USE_ROCM OFF)
endif()
endif()

if(USE_CUDA)
enable_language(CUDA)
Expand Down
21 changes: 16 additions & 5 deletions build_tools/setup_helpers/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,16 @@ def _get_build(var, default=False):
_BUILD_SOX = False if platform.system() == 'Windows' else _get_build("BUILD_SOX", True)
_BUILD_KALDI = False if platform.system() == 'Windows' else _get_build("BUILD_KALDI", True)
_BUILD_RNNT = _get_build("BUILD_RNNT", True)
_USE_ROCM = _get_build("USE_ROCM")
_USE_CUDA = _get_build("USE_CUDA", torch.cuda.is_available())
_USE_ROCM = _get_build("USE_ROCM", torch.cuda.is_available() and torch.version.hip is not None)
_USE_CUDA = _get_build("USE_CUDA", torch.cuda.is_available() and torch.version.hip is None)
_TORCH_CUDA_ARCH_LIST = os.environ.get('TORCH_CUDA_ARCH_LIST', None)


def get_ext_modules():
return [Extension(name='torchaudio._torchaudio', sources=[])]
return [
Extension(name='torchaudio.lib.libtorchaudio', sources=[]),
Extension(name='torchaudio._torchaudio', sources=[]),
]


# Based off of
Expand All @@ -53,10 +56,19 @@ def run(self):
try:
subprocess.check_output(['cmake', '--version'])
except OSError:
raise RuntimeError("CMake is not available.")
raise RuntimeError("CMake is not available.") from None
super().run()

def build_extension(self, ext):
# Since two library files (libtorchaudio and _torchaudio) need to be
# recognized by setuptools, we instantiate `Extension` twice. (see `get_ext_modules`)
# This leads to the situation where this `build_extension` method is called twice.
# However, the following `cmake` command will build all of them at the same time,
# so, we do not need to perform `cmake` twice.
# Therefore we call `cmake` only for `torchaudio._torchaudio`.
if ext.name != 'torchaudio._torchaudio':
return

extdir = os.path.abspath(
os.path.dirname(self.get_ext_fullpath(ext.name)))

Expand All @@ -76,7 +88,6 @@ def build_extension(self, ext):
f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}",
"-DBUILD_TORCHAUDIO_PYTHON_EXTENSION:BOOL=ON",
"-DBUILD_LIBTORCHAUDIO:BOOL=OFF",
f"-DUSE_ROCM:BOOL={'ON' if _USE_ROCM else 'OFF'}",
f"-DUSE_CUDA:BOOL={'ON' if _USE_CUDA else 'OFF'}",
]
Expand Down
31 changes: 23 additions & 8 deletions docs/source/models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ DeepSpeech
Tacotron2
~~~~~~~~~

Model
-----

Tacotoron2
^^^^^^^^^^

.. autoclass:: Tacotron2

.. automethod:: forward
Expand All @@ -38,7 +44,7 @@ Factory Functions
-----------------

tacotron2
---------
^^^^^^^^^

.. autofunction:: tacotron2

Expand All @@ -55,8 +61,11 @@ Wav2Letter
Wav2Vec2.0
~~~~~~~~~~

Model
-----

Wav2Vec2Model
-------------
^^^^^^^^^^^^^

.. autoclass:: Wav2Vec2Model

Expand All @@ -68,17 +77,17 @@ Factory Functions
-----------------

wav2vec2_base
-------------
^^^^^^^^^^^^^

.. autofunction:: wav2vec2_base

wav2vec2_large
--------------
^^^^^^^^^^^^^^

.. autofunction:: wav2vec2_large

wav2vec2_large_lv60k
--------------------
^^^^^^^^^^^^^^^^^^^^

.. autofunction:: wav2vec2_large_lv60k

Expand All @@ -88,12 +97,12 @@ Utility Functions
-----------------

import_huggingface_model
------------------------
^^^^^^^^^^^^^^^^^^^^^^^^

.. autofunction:: import_huggingface_model

import_fairseq_model
--------------------
^^^^^^^^^^^^^^^^^^^^

.. autofunction:: import_fairseq_model

Expand All @@ -102,6 +111,12 @@ import_fairseq_model
WaveRNN
~~~~~~~

Model
-----

WaveRNN
^^^^^^^

.. autoclass:: WaveRNN

.. automethod:: forward
Expand All @@ -112,7 +127,7 @@ Factory Functions
-----------------

wavernn
-------
^^^^^^^

.. autofunction:: wavernn

Expand Down
40 changes: 39 additions & 1 deletion docs/source/refs.bib
Original file line number Diff line number Diff line change
Expand Up @@ -95,4 +95,42 @@ @inproceedings{shen2018natural
pages={4779--4783},
year={2018},
organization={IEEE}
}
}
@inproceedings{souden2009optimal,
title={On optimal frequency-domain multichannel linear filtering for noise reduction},
author={Souden, Mehrez and Benesty, Jacob and Affes, Sofiene},
booktitle={IEEE Transactions on audio, speech, and language processing},
volume={18},
number={2},
pages={260--276},
year={2009},
publisher={IEEE}
}
@inproceedings{higuchi2016robust,
title={Robust MVDR beamforming using time-frequency masks for online/offline ASR in noise},
author={Higuchi, Takuya and Ito, Nobutaka and Yoshioka, Takuya and Nakatani, Tomohiro},
booktitle={2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages={5210--5214},
year={2016},
organization={IEEE}
}
@article{mises1929praktische,
title={Praktische Verfahren der Gleichungsaufl{\"o}sung.},
author={Mises, RV and Pollaczek-Geiringer, Hilda},
journal={ZAMM-Journal of Applied Mathematics and Mechanics/Zeitschrift f{\"u}r Angewandte Mathematik und Mechanik},
volume={9},
number={1},
pages={58--77},
year={1929},
publisher={Wiley Online Library}
}
@article{higuchi2017online,
title={Online MVDR beamformer based on complex Gaussian mixture model with spatial prior for noise robust ASR},
author={Higuchi, Takuya and Ito, Nobutaka and Araki, Shoko and Yoshioka, Takuya and Delcroix, Marc and Nakatani, Tomohiro},
journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
volume={25},
number={4},
pages={780--793},
year={2017},
publisher={IEEE}
}
17 changes: 17 additions & 0 deletions docs/source/transforms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,23 @@ Transforms are common audio transforms. They can be chained together using :clas

.. automethod:: forward

:hidden:`Multi-channel`
~~~~~~~~~~~~~~~~~~~~~~~

:hidden:`PSD`
-------------

.. autoclass:: PSD

.. automethod:: forward

:hidden:`MVDR`
--------------

.. autoclass:: MVDR

.. automethod:: forward

References
~~~~~~~~~~

Expand Down
Loading

0 comments on commit 3fef9d5

Please sign in to comment.