lhotse-speech · pzelasko · Nov 6, 2024 · Aug 7, 2024 · Aug 7, 2024 · Nov 6, 2024
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
@@ -22,14 +22,14 @@ jobs:
           - python-version: "3.9"
             torch-install-cmd: "pip install torch==2.3 torchaudio==2.3 --extra-index-url https://download.pytorch.org/whl/cpu"
             extra_deps: ""
-          - python-version: "3.10"
-            torch-install-cmd: "pip install torch==2.3 torchaudio==2.3 --extra-index-url https://download.pytorch.org/whl/cpu"
+          - python-version: "3.10"  # note: no torchaudio
+            torch-install-cmd: "pip install torch==2.3 --extra-index-url https://download.pytorch.org/whl/cpu"
             extra_deps: ""
-          - python-version: "3.11"
-            torch-install-cmd: "pip install torch==2.3 torchaudio==2.3 --extra-index-url https://download.pytorch.org/whl/cpu"
+          - python-version: "3.11"  # note: no torchaudio
+            torch-install-cmd: "pip install torch==2.3 --extra-index-url https://download.pytorch.org/whl/cpu"
             extra_deps: ""
-          - python-version: "3.12"
-            torch-install-cmd: "pip install torch==2.3 torchaudio==2.3 --extra-index-url https://download.pytorch.org/whl/cpu"
+          - python-version: "3.12"  # note: no torchaudio
+            torch-install-cmd: "pip install torch==2.3 --extra-index-url https://download.pytorch.org/whl/cpu"
             extra_deps: ""
 
       fail-fast: false

diff --git a/README.md b/README.md
@@ -116,7 +116,8 @@ Lhotse uses several environment variables to customize it's behavior. They are a
 
 ### Optional dependencies
 
-**Other pip packages.** You can leverage optional features of Lhotse by installing the relevant supporting package like this: `pip install lhotse[package_name]`. The supported optional packages include:
+**Other pip packages.** You can leverage optional features of Lhotse by installing the relevant supporting package:
+- `torchaudio` used to be a core dependency in Lhotse, but is now optional. Refer to [official PyTorch documentation for installation](https://pytorch.org/get-started/locally/).
 - `pip install lhotse[kaldi]` for a maximal feature set related to Kaldi compatibility. It includes libraries such as `kaldi_native_io` (a more efficient variant of `kaldi_io`) and `kaldifeat` that port some of Kaldi functionality into Python.
 - `pip install lhotse[orjson]` for up to 50% faster reading of JSONL manifests.
 - `pip install lhotse[webdataset]`. We support "compiling" your data into WebDataset tarball format for more effective IO. You can still interact with the data as if it was a regular lazy CutSet. To learn more, check out the following tutorial: [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lhotse-speech/lhotse/blob/master/examples/02-webdataset-integration.ipynb)

diff --git a/docs/conf.py b/docs/conf.py
@@ -78,4 +78,4 @@
     "exclude-members": "__weakref__",
 }
 
-autodoc_mock_imports = ["torchaudio", "SoundFile", "soundfile"]
+autodoc_mock_imports = ["SoundFile", "soundfile"]
diff --git a/docs/getting-started.rst b/docs/getting-started.rst
@@ -143,7 +143,9 @@ Lhotse uses several environment variables to customize it's behavior. They are a
 Optional dependencies
 *********************
 
-**Other pip packages.** You can leverage optional features of Lhotse by installing the relevant supporting package like this: ``pip install lhotse[package_name]``. The supported optional packages include:
+**Other pip packages.** You can leverage optional features of Lhotse by installing the relevant supporting package:
+
+* ``torchaudio`` used to be a core dependency in Lhotse, but is now optional. Refer to official PyTorch documentation for installation at `official Pytorch documentation for installation`_.
 
 * ``pip install lhotse[kaldi]`` for a maximal feature set related to Kaldi compatibility. It includes libraries such as ``kaldi_native_io`` (a more efficient variant of ``kaldi_io``) and ``kaldifeat`` that port some of Kaldi functionality into Python.
 
@@ -230,3 +232,4 @@ the speech starts roughly at the first second (100 frames):
 .. _Icefall recipes: https://github.com/k2-fsa/icefall
 .. _orjson: https://pypi.org/project/orjson/
 .. _AIStore: https://aiatscale.org
+.. _official Pytorch documentation for installation: https://pytorch.org/get-started/locally/
diff --git a/lhotse/audio/recording.py b/lhotse/audio/recording.py
@@ -8,7 +8,7 @@
 import torch
 from _decimal import ROUND_HALF_UP
 
-from lhotse.audio.backend import info, save_audio, torchaudio_info
+from lhotse.audio.backend import get_current_audio_backend, info, save_audio
 from lhotse.audio.source import AudioSource
 from lhotse.audio.utils import (
     AudioLoadingError,
@@ -260,7 +260,7 @@ def from_bytes(
         :return: a new ``Recording`` instance that owns the byte string data.
         """
         stream = BytesIO(data)
-        audio_info = torchaudio_info(stream)
+        audio_info = get_current_audio_backend().info(stream)
         return Recording(
             id=recording_id,
             sampling_rate=audio_info.samplerate,

diff --git a/setup.py b/setup.py
@@ -58,7 +58,7 @@
 )  # False = public release, True = otherwise
 
 
-LHOTSE_REQUIRE_TORCHAUDIO = os.environ.get("LHOTSE_REQUIRE_TORCHAUDIO", "1") in (
+LHOTSE_REQUIRE_TORCHAUDIO = os.environ.get("LHOTSE_REQUIRE_TORCHAUDIO", "0") in (
     "1",
     "True",
     "true",
@@ -157,6 +157,7 @@ def mark_lhotse_version(version: str) -> None:
     "packaging",
     "pyyaml>=5.3.1",
     "tabulate>=0.8.1",
+    "torch",
     "tqdm",
 ]
 
@@ -167,30 +168,6 @@ def mark_lhotse_version(version: str) -> None:
 else:
     install_requires.append("lilcom>=1.1.0")
 
-try:
-    # If the user already installed PyTorch, make sure he has torchaudio too.
-    # Otherwise, we'll just install the latest versions from PyPI for the user.
-    import torch
-
-    if LHOTSE_REQUIRE_TORCHAUDIO:
-        try:
-            import torchaudio
-        except ImportError:
-            raise ValueError(
-                "We detected that you have already installed PyTorch, but haven't installed torchaudio. "
-                "Unfortunately we can't detect the compatible torchaudio version for you; "
-                "you will have to install it manually. "
-                "For instructions, please refer either to https://pytorch.org/get-started/locally/ "
-                "or https://github.com/pytorch/audio#dependencies "
-                "You can also disable torchaudio dependency by setting the following environment variable: "
-                "LHOTSE_USE_TORCHAUDIO=0"
-            )
-except ImportError:
-    extras = ["torch"]
-    if LHOTSE_REQUIRE_TORCHAUDIO:
-        extras.append("torchaudio")
-    install_requires.extend(extras)
-
 docs_require = (project_root / "docs" / "requirements.txt").read_text().splitlines()
 tests_require = [
     "pytest==7.1.3",
@@ -222,13 +199,10 @@ def mark_lhotse_version(version: str) -> None:
 all_requires = sorted(dev_requires)
 
 if os.environ.get("READTHEDOCS", False):
-    # When building documentation, omit torchaudio installation and mock it instead.
-    # This works around the inability to install libsoundfile1 in read-the-docs env,
-    # which caused the documentation builds to silently crash.
     install_requires = [
         req
         for req in install_requires
-        if not any(req.startswith(dep) for dep in ["torchaudio", "SoundFile"])
+        if not any(req.startswith(dep) for dep in ["SoundFile"])
     ]
 
 setup(