3.2.0 (#835)

* CLI for whisperx and speechbrain transcription * Optimizations for training acoustic models * Switch to using miniforge for gha * Fixing adaptation for older models
MontrealCorpusTools · Oct 1, 2024 · 1f91bff · 1f91bff
1 parent 78e481d
commit 1f91bff
Show file tree

Hide file tree

Showing 62 changed files with 3,999 additions and 1,453 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -19,7 +19,6 @@ jobs:
         include:
           - os: ubuntu-latest
             label: linux-64
-            prefix: /usr/share/miniconda3/envs/my-env
 
           #- os: macos-latest
           #  label: osx-64
@@ -37,13 +36,10 @@ jobs:
           fetch-depth: 0
 
       - name: Install Conda environment with Micromamba
-        uses: mamba-org/setup-micromamba@v1
+        uses: conda-incubator/setup-miniconda@v3
         with:
-          environment-file: environment.yml
-          environment-name: mfa
-          create-args: >-
-            python=3.9
-          cache-environment: true
+          environment-file: github_environment.yml
+          miniforge-version: latest
 
       - name: Configure mfa
         shell: bash -l {0}

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,12 +1,24 @@
-default_language_version:
-  python: python3.11
 repos:
+  - repo: local
+    hooks:
+      - id: profile-check
+        name: no profiling
+        entry: '@profile'
+        language: pygrep
+        types: [ python ]
+      - id: print-check
+        name: no print statements
+        entry: '\bprint\('
+        language: pygrep
+        types: [ python ]
+        files: ^montreal_forced_aligner/
+        exclude: ^montreal_forced_aligner/command_line/transcribe.py
   - repo: https://github.com/psf/black
     rev: 23.9.1
     hooks:
       - id: black
   - repo: https://github.com/pycqa/flake8
-    rev: 6.0.0
+    rev: 7.0.0
     hooks:
       - id: flake8
         additional_dependencies:

diff --git a/bin/mfa_update b/bin/mfa_update
@@ -1,18 +1,50 @@
 #!/usr/bin/env python
 
+import argparse
 import os
 import shutil
 import subprocess
+import sys
 from importlib.util import find_spec
 
-anchor_found = find_spec("anchor") is not None
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument(
+        "--install_3p",
+        action="store_true",
+        help="Install/update third party dependencies (Speechbrain and WhisperX)",
+    )
+    args = parser.parse_args()
+    anchor_found = find_spec("anchor") is not None
+    speechbrain_found = find_spec("speechbrain") is not None
+    whisperx_found = find_spec("whisperx") is not None
 
-conda_path = shutil.which("conda")
-mamba_path = shutil.which("mamba")
-if mamba_path is None:
-    print("No mamba found, installing first...")
-    subprocess.call([conda_path, "install", "-c", "conda-forge", "-y", "mamba"], env=os.environ)
-package_list = ["montreal-forced-aligner", "kalpy", "kaldi=*=cpu*"]
-if anchor_found:
-    package_list.append("anchor-annotator")
-subprocess.call([mamba_path, "update", "-c", "conda-forge", "-y"] + package_list, env=os.environ)
+    conda_path = shutil.which("conda")
+    if conda_path is None:
+        print("Please install conda before running this command.")
+        sys.exit(1)
+    mamba_path = shutil.which("mamba")
+    if mamba_path is None:
+        print("No mamba found, installing first...")
+        subprocess.call(
+            [conda_path, "install", "-c", "conda-forge", "-y", "mamba"], env=os.environ
+        )
+    package_list = ["montreal-forced-aligner", "kalpy", "kaldi=*=cpu*"]
+    if anchor_found:
+        package_list.append("anchor-annotator")
+    subprocess.call(
+        [mamba_path, "update", "-c", "conda-forge", "-y"] + package_list, env=os.environ
+    )
+    if args.install_3p:
+        channels = ["conda-forge", "pytorch", "nvidia", "anaconda"]
+        package_list = ["pytorch", "torchaudio"]
+        if not whisperx_found:
+            package_list.extend(["cudnn=8", "transformers"])
+        command = [mamba_path, "install", "-y"]
+        for c in channels:
+            command.extend(["-c", c])
+        command += package_list
+        subprocess.call(command, env=os.environ)
+        command = ["pip", "install", "-U"]
+        package_list = ["whisperx", "speechbrain", "pygtrie"]
+        subprocess.call(command, env=os.environ)
diff --git a/docs/source/changelog/changelog_3.0.rst b/docs/source/changelog/changelog_3.0.rst
@@ -5,39 +5,6 @@
 3.0 Changelog
 *************
 
-3.1.3
------
-
-- Fixed an issue where silence probability being zero was not correctly removing silence
-- Compatibility with kalpy v0.6.5
-- Added API functionality for verifying transcripts with interjection words in alignment
-- Fixed an error in fine tuning that generated nonsensical boundaries
-
-3.1.2
------
-
-- Fixed a bug where hidden files and folders would be parsed as corpus data
-- Fixed a bug where validation would not respect :code:`--no_final_clean`
-- Fixed a rare crash in training when a job would not have utterances assigned to it
-- Fixed a bug where MFA would mistakenly report a dictionary and acoustic model phones did not match for older versions
-
-3.1.1
------
-
-- Fixed an issue with TextGrids missing intervals
-
-3.1.0
------
-
-- Fixed a bug where cutoffs were not properly modelled
-- Added additional filter on create subset to not include utterances with cutoffs in smaller subsets
-- Added the ability to specify HMM topologies for phones
-- Fixed issues caused by validators not cleaning up temporary files and databases
-- Added support for default and nonnative dictionaries generated from other dictionaries
-- Restricted initial training rounds to exclude default and nonnative dictionaries
-- Changed clustering of phones to not mix silence and non-silence phones
-- Optimized textgrid export
-- Added better memory management for collecting alignments
 
 3.0.8
 -----

diff --git a/docs/source/changelog/changelog_3.1.rst b/docs/source/changelog/changelog_3.1.rst
@@ -0,0 +1,48 @@
+
+.. _changelog_3.1:
+
+*************
+3.1 Changelog
+*************
+
+3.1.4
+-----
+
+- Optimized :code:`mfa g2p` to better use multiple processes
+- Added :code:`--export_scores` to :code:`mfa g2p` for adding a column representing the final weights of the generated pronunciations
+- Added :code:`--output_directory` to :code:`mfa validate` to save generated validation files rather than the temporary directory
+- Fixed a bug in cutoff modeling that was preventing them from being properly parsed
+
+3.1.3
+-----
+
+- Fixed an issue where silence probability being zero was not correctly removing silence
+- Compatibility with kalpy v0.6.5
+- Added API functionality for verifying transcripts with interjection words in alignment
+- Fixed an error in fine tuning that generated nonsensical boundaries
+
+3.1.2
+-----
+
+- Fixed a bug where hidden files and folders would be parsed as corpus data
+- Fixed a bug where validation would not respect :code:`--no_final_clean`
+- Fixed a rare crash in training when a job would not have utterances assigned to it
+- Fixed a bug where MFA would mistakenly report a dictionary and acoustic model phones did not match for older versions
+
+3.1.1
+-----
+
+- Fixed an issue with TextGrids missing intervals
+
+3.1.0
+-----
+
+- Fixed a bug where cutoffs were not properly modelled
+- Added additional filter on create subset to not include utterances with cutoffs in smaller subsets
+- Added the ability to specify HMM topologies for phones
+- Fixed issues caused by validators not cleaning up temporary files and databases
+- Added support for default and nonnative dictionaries generated from other dictionaries
+- Restricted initial training rounds to exclude default and nonnative dictionaries
+- Changed clustering of phones to not mix silence and non-silence phones
+- Optimized textgrid export
+- Added better memory management for collecting alignments
diff --git a/docs/source/changelog/changelog_3.2.rst b/docs/source/changelog/changelog_3.2.rst
@@ -0,0 +1,16 @@
+
+.. _changelog_3.2:
+
+*************
+3.2 Changelog
+*************
+
+3.2.0
+-----
+
+- Added :code:`--subset_word_count` parameter to :ref:`train_acoustic_model` to add a minimum word count for an utterance  to be included in training subsets
+- Added :code:`--minimum_utterance_length` parameter to :ref:`train_acoustic_model` to add a minimum word count for an utterance to be included in training at all
+- Improved memory usage in compiling training graphs for initial subsets
+- Add support for transcription via whisperx and speechbrain models
+- Update text normalization to normalize to decomposed forms
+- Compatibility with Kalpy 0.6.7
diff --git a/docs/source/changelog/index.md b/docs/source/changelog/index.md
@@ -53,6 +53,8 @@
 :hidden:
 :maxdepth: 1
 
+changelog_3.2.rst
+changelog_3.1.rst
 news_3.0.rst
 changelog_3.0.rst
 changelog_2.2.rst

diff --git a/environment.yml b/environment.yml
@@ -29,8 +29,6 @@ dependencies:
   - postgresql
   - psycopg2
   - click
-  - pytorch
-  - torchaudio
   - setuptools_scm
   - pytest
   - pytest-mypy
@@ -47,14 +45,15 @@ dependencies:
   - rich
   - rich-click
   - kalpy
+    # Tokenization dependencies
   - spacy
   - sudachipy
   - sudachidict-core
   - spacy-pkuseg
   - pip:
       - build
       - twine
-      - speechbrain
+      # Tokenization dependencies
       - python-mecab-ko
       - jamo
       - pythainlp

diff --git a/github_environment.yml b/github_environment.yml
@@ -0,0 +1,43 @@
+channels:
+  - conda-forge
+dependencies:
+  - python>=3.8
+  - numpy
+  - librosa
+  - pysoundfile
+  - tqdm
+  - requests
+  - pyyaml
+  - dataclassy
+  - kaldi=*=*cpu*
+  - scipy
+  - pynini
+  - openfst=1.8.3
+  - scikit-learn<1.3
+  - hdbscan
+  - baumwelch
+  - ngram
+  - praatio=6.0.0
+  - biopython=1.79
+  - sqlalchemy>=2.0
+  - pgvector
+  - pgvector-python
+  - sqlite
+  - postgresql
+  - psycopg2
+  - click
+  - setuptools_scm
+  - pytest
+  - pytest-mypy
+  - pytest-cov
+  - pytest-timeout
+  - mock
+  - coverage
+  - coveralls
+  - interrogate
+  - kneed
+  - matplotlib
+  - seaborn
+  - rich
+  - rich-click
+  - kalpy
diff --git a/montreal_forced_aligner/abc.py b/montreal_forced_aligner/abc.py
@@ -283,7 +283,8 @@ def initialize_database(self) -> None:
                     )
                 except Exception:
                     raise DatabaseError(
-                        f"There was an error connecting to the {config.CURRENT_PROFILE_NAME} MFA database server. "
+                        f"There was an error connecting to the {config.CURRENT_PROFILE_NAME} MFA database server "
+                        f"at {config.database_socket()}. "
                         "Please ensure the server is initialized (mfa server init) or running (mfa server start)"
                     )
                 exist_check = False
@@ -304,7 +305,7 @@ def initialize_database(self) -> None:
                 conn.execute(sqlalchemy.text("CREATE EXTENSION IF NOT EXISTS vector"))
                 conn.execute(sqlalchemy.text("CREATE EXTENSION IF NOT EXISTS pg_trgm"))
                 conn.execute(sqlalchemy.text("CREATE EXTENSION IF NOT EXISTS pg_stat_statements"))
-                conn.execute(sqlalchemy.text(f"select setseed({config.SEED/32768})"))
+                conn.execute(sqlalchemy.text(f"select setseed({config.SEED / 32768})"))
                 conn.commit()
 
         MfaSqlBase.metadata.create_all(self.db_engine)
@@ -617,6 +618,8 @@ def parse_args(
         dict[str, Any]
             Dictionary of specified configuration parameters
         """
+        from montreal_forced_aligner.data import Language
+
         param_types = cls.get_configuration_parameters()
         params = {}
         unknown_dict = {}
@@ -639,7 +642,10 @@ def parse_args(
             ):
                 continue
             if args is not None and name in args and args[name] is not None:
-                params[name] = param_type(args[name])
+                if param_type == Language:
+                    params[name] = param_type[args[name]]
+                else:
+                    params[name] = param_type(args[name])
             elif name in unknown_dict:
                 params[name] = param_type(unknown_dict[name])
                 if param_type == bool and not isinstance(unknown_dict[name], bool):
@@ -818,7 +824,7 @@ def setup_logger(self) -> None:
                         f"You are currently running an older version of MFA ({current_version}) than the latest available ({latest_version}). "
                         f"To update, please run mfa_update."
                     )
-            except KeyError:
+            except Exception:
                 pass
         if re.search(r"\d+\.\d+\.\d+a", current_version) is not None:
             logger.debug(