From fb42f2dae200d482bd7ba887e8865b8a6463a356 Mon Sep 17 00:00:00 2001
From: moto <855818+mthrok@users.noreply.github.com>
Date: Fri, 2 Jun 2023 23:14:00 -0400
Subject: [PATCH] Clean-up ComputeKaldiPitch residue

Follow up of: https://github.com/pytorch/audio/pull/3368

Remove files and lines no longer used.
---
 CONTRIBUTING.md                   |  1 -
 docs/source/functional.rst        |  1 -
 tools/setup_helpers/extension.py  |  2 -
 torchaudio/csrc/kaldi.cpp         | 93 -------------------------------
 torchaudio/csrc/pybind/pybind.cpp |  1 -
 torchaudio/csrc/utils.cpp         |  8 ---
 torchaudio/csrc/utils.h           |  1 -
 7 files changed, 107 deletions(-)
 delete mode 100644 torchaudio/csrc/kaldi.cpp

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 125aa83847..3e615a799e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -74,7 +74,6 @@ python setup.py develop
 Some environmnet variables that change the build behavior
 - `BUILD_SOX`: Deteremines whether build and bind libsox in non-Windows environments. (no effect in Windows as libsox integration is not available) Default value is 1 (build and bind). Use 0 for disabling it.
 - `USE_CUDA`: Determines whether build the custom CUDA kernel. Default to the availability of CUDA-compatible GPUs.
-- `BUILD_KALDI`: Determines whether build Kaldi extension. This is required for `kaldi_pitch` function. Default value is 1 on Linux/macOS and 0 on Windows.
 - `BUILD_RNNT`: Determines whether build RNN-T loss function. Default value is 1.
 - `BUILD_CUDA_CTC_DECODER`: Determines whether build decoder features based on CUDA CTC decoder. Default value is 1. (`USE_CUDA` has to be 1.)
 
diff --git a/docs/source/functional.rst b/docs/source/functional.rst
index fb9bea9cd7..8be4492648 100644
--- a/docs/source/functional.rst
+++ b/docs/source/functional.rst
@@ -80,7 +80,6 @@ Feature Extractions
    compute_deltas
    detect_pitch_frequency
    sliding_window_cmn
-   compute_kaldi_pitch
    spectral_centroid
 
 Multi-channel
diff --git a/tools/setup_helpers/extension.py b/tools/setup_helpers/extension.py
index c72415f0d8..c68de9f4c3 100644
--- a/tools/setup_helpers/extension.py
+++ b/tools/setup_helpers/extension.py
@@ -34,7 +34,6 @@ def _get_build(var, default=False):
 
 
 _BUILD_SOX = False if platform.system() == "Windows" else _get_build("BUILD_SOX", True)
-_BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True)
 _BUILD_RIR = _get_build("BUILD_RIR", True)
 _BUILD_RNNT = _get_build("BUILD_RNNT", True)
 _USE_FFMPEG = _get_build("USE_FFMPEG", False)
@@ -117,7 +116,6 @@ def build_extension(self, ext):
             "-DCMAKE_VERBOSE_MAKEFILE=ON",
             f"-DPython_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}",
             f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}",
-            f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
             f"-DBUILD_RIR:BOOL={'ON' if _BUILD_RIR else 'OFF'}",
             f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}",
             f"-DBUILD_ALIGN:BOOL={'ON' if _BUILD_ALIGN else 'OFF'}",
diff --git a/torchaudio/csrc/kaldi.cpp b/torchaudio/csrc/kaldi.cpp
deleted file mode 100644
index 6f2b36c28f..0000000000
--- a/torchaudio/csrc/kaldi.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-#include <torch/script.h>
-#include "feat/pitch-functions.h"
-
-namespace torchaudio {
-namespace kaldi {
-
-namespace {
-
-torch::Tensor denormalize(const torch::Tensor& t) {
-  auto ret = t;
-  auto pos = t > 0, neg = t < 0;
-  ret.index_put({pos}, t.index({pos}) * 32767);
-  ret.index_put({neg}, t.index({neg}) * 32768);
-  return ret;
-}
-
-torch::Tensor compute_kaldi_pitch(
-    const torch::Tensor& wave,
-    const ::kaldi::PitchExtractionOptions& opts) {
-  ::kaldi::VectorBase<::kaldi::BaseFloat> input(wave);
-  ::kaldi::Matrix<::kaldi::BaseFloat> output;
-  ::kaldi::ComputeKaldiPitch(opts, input, &output);
-  return output.tensor_;
-}
-
-} // namespace
-
-torch::Tensor ComputeKaldiPitch(
-    const torch::Tensor& wave,
-    double sample_frequency,
-    double frame_length,
-    double frame_shift,
-    double min_f0,
-    double max_f0,
-    double soft_min_f0,
-    double penalty_factor,
-    double lowpass_cutoff,
-    double resample_frequency,
-    double delta_pitch,
-    double nccf_ballast,
-    int64_t lowpass_filter_width,
-    int64_t upsample_filter_width,
-    int64_t max_frames_latency,
-    int64_t frames_per_chunk,
-    bool simulate_first_pass_online,
-    int64_t recompute_frame,
-    bool snip_edges) {
-  TORCH_CHECK(wave.ndimension() == 2, "Input tensor must be 2 dimentional.");
-  TORCH_CHECK(wave.device().is_cpu(), "Input tensor must be on CPU.");
-  TORCH_CHECK(
-      wave.dtype() == torch::kFloat32, "Input tensor must be float32 type.");
-
-  ::kaldi::PitchExtractionOptions opts;
-  opts.samp_freq = static_cast<::kaldi::BaseFloat>(sample_frequency);
-  opts.frame_shift_ms = static_cast<::kaldi::BaseFloat>(frame_shift);
-  opts.frame_length_ms = static_cast<::kaldi::BaseFloat>(frame_length);
-  opts.min_f0 = static_cast<::kaldi::BaseFloat>(min_f0);
-  opts.max_f0 = static_cast<::kaldi::BaseFloat>(max_f0);
-  opts.soft_min_f0 = static_cast<::kaldi::BaseFloat>(soft_min_f0);
-  opts.penalty_factor = static_cast<::kaldi::BaseFloat>(penalty_factor);
-  opts.lowpass_cutoff = static_cast<::kaldi::BaseFloat>(lowpass_cutoff);
-  opts.resample_freq = static_cast<::kaldi::BaseFloat>(resample_frequency);
-  opts.delta_pitch = static_cast<::kaldi::BaseFloat>(delta_pitch);
-  opts.lowpass_filter_width = static_cast<::kaldi::int32>(lowpass_filter_width);
-  opts.upsample_filter_width =
-      static_cast<::kaldi::int32>(upsample_filter_width);
-  opts.max_frames_latency = static_cast<::kaldi::int32>(max_frames_latency);
-  opts.frames_per_chunk = static_cast<::kaldi::int32>(frames_per_chunk);
-  opts.simulate_first_pass_online = simulate_first_pass_online;
-  opts.recompute_frame = static_cast<::kaldi::int32>(recompute_frame);
-  opts.snip_edges = snip_edges;
-
-  // Kaldi's float type expects value range of int16 expressed as float
-  torch::Tensor wave_ = denormalize(wave);
-
-  auto batch_size = wave_.size(0);
-  std::vector<torch::Tensor> results(batch_size);
-  at::parallel_for(0, batch_size, 1, [&](int64_t begin, int64_t end) {
-    for (auto i = begin; i < end; ++i) {
-      results[i] = compute_kaldi_pitch(wave_.index({i}), opts);
-    }
-  });
-  return torch::stack(results, 0);
-}
-
-TORCH_LIBRARY_FRAGMENT(torchaudio, m) {
-  m.def(
-      "torchaudio::kaldi_ComputeKaldiPitch",
-      &torchaudio::kaldi::ComputeKaldiPitch);
-}
-
-} // namespace kaldi
-} // namespace torchaudio
diff --git a/torchaudio/csrc/pybind/pybind.cpp b/torchaudio/csrc/pybind/pybind.cpp
index 9d2d0e35a2..b956deb0e4 100644
--- a/torchaudio/csrc/pybind/pybind.cpp
+++ b/torchaudio/csrc/pybind/pybind.cpp
@@ -5,7 +5,6 @@ namespace torchaudio {
 namespace {
 
 PYBIND11_MODULE(_torchaudio, m) {
-  m.def("is_kaldi_available", &is_kaldi_available, "");
   m.def("is_rir_available", &is_rir_available, "");
   m.def("is_align_available", &is_align_available, "");
   m.def("cuda_version", &cuda_version, "");
diff --git a/torchaudio/csrc/utils.cpp b/torchaudio/csrc/utils.cpp
index c76a4ffa7a..8c5898cb49 100644
--- a/torchaudio/csrc/utils.cpp
+++ b/torchaudio/csrc/utils.cpp
@@ -7,14 +7,6 @@
 
 namespace torchaudio {
 
-bool is_kaldi_available() {
-#ifdef INCLUDE_KALDI
-  return true;
-#else
-  return false;
-#endif
-}
-
 bool is_rir_available() {
 #ifdef INCLUDE_RIR
   return true;
diff --git a/torchaudio/csrc/utils.h b/torchaudio/csrc/utils.h
index 751cfa1ad2..1d7060efa7 100644
--- a/torchaudio/csrc/utils.h
+++ b/torchaudio/csrc/utils.h
@@ -2,7 +2,6 @@
 #include <torch/torch.h>
 
 namespace torchaudio {
-bool is_kaldi_available();
 bool is_rir_available();
 bool is_align_available();
 c10::optional<int64_t> cuda_version();