Skip to content

Commit

Permalink
[BC-Breaking] Remove compute_kaldi_pitch (#3368)
Browse files Browse the repository at this point in the history
Summary:
This commit removes compute_kaldi_pitch function and the underlying Kaldi integration from torchaudio.

Kaldi pitch function was added in a short period of time by integrating the original Kaldi implementation, instead of reimplementing it in PyTorch.

The Kaldi integration employed a hack which replaces the base vector/matrix implementation of Kaldi with PyTorch Tensor so that there is only one blas library within torchaudio.

Recently, we are making torchaudio more lean, and we don't see a wide adoption of kaldi_pitch feature, so we decided to remove them.

See some of the discussion #1269

Pull Request resolved: #3368

Differential Revision: D46406176

Pulled By: mthrok

fbshipit-source-id: ee5e24d825188f379979ddccd680c7323b119b1e
  • Loading branch information
mthrok authored and facebook-github-bot committed Jun 2, 2023
1 parent 2ba36b4 commit 5bbbb1d
Show file tree
Hide file tree
Showing 24 changed files with 7 additions and 956 deletions.
2 changes: 1 addition & 1 deletion .circleci/unittest/linux/scripts/run_style_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ fi

printf "\x1b[34mRunning clang-format:\x1b[0m\n"
"${this_dir}"/run_clang_format.py \
-r torchaudio/csrc third_party/kaldi/src \
-r torchaudio/csrc \
--clang-format-executable "${clangformat_path}" \
&& git diff --exit-code
status=$?
Expand Down
4 changes: 0 additions & 4 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +0,0 @@
[submodule "kaldi"]
path = third_party/kaldi/submodule
url = https://github.com/kaldi-asr/kaldi
ignore = dirty
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ endif()

# Options
option(BUILD_SOX "Build libsox statically" ON)
option(BUILD_KALDI "Build kaldi statically" ON)
option(BUILD_RIR "Enable RIR simulation" ON)
option(BUILD_RNNT "Enable RNN transducer" ON)
option(BUILD_ALIGN "Enable forced alignment" ON)
Expand Down
51 changes: 0 additions & 51 deletions examples/tutorials/audio_feature_extractions_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,54 +406,3 @@ def plot_pitch(waveform, sr, pitch):


plot_pitch(SPEECH_WAVEFORM, SAMPLE_RATE, pitch)

######################################################################
# Kaldi Pitch (beta)
# ------------------
#
# Kaldi Pitch feature [1] is a pitch detection mechanism tuned for automatic
# speech recognition (ASR) applications. This is a beta feature in ``torchaudio``,
# and it is available as :py:func:`torchaudio.functional.compute_kaldi_pitch`.
#
# 1. A pitch extraction algorithm tuned for automatic speech recognition
#
# Ghahremani, B. BabaAli, D. Povey, K. Riedhammer, J. Trmal and S.
# Khudanpur
#
# 2014 IEEE International Conference on Acoustics, Speech and Signal
# Processing (ICASSP), Florence, 2014, pp. 2494-2498, doi:
# 10.1109/ICASSP.2014.6854049.
# [`abstract <https://ieeexplore.ieee.org/document/6854049>`__],
# [`paper <https://danielpovey.com/files/2014_icassp_pitch.pdf>`__]
#

pitch_feature = F.compute_kaldi_pitch(SPEECH_WAVEFORM, SAMPLE_RATE)
pitch, nfcc = pitch_feature[..., 0], pitch_feature[..., 1]

######################################################################
#

def plot_kaldi_pitch(waveform, sr, pitch, nfcc):
_, axis = plt.subplots(1, 1)
axis.set_title("Kaldi Pitch Feature")
axis.grid(True)

end_time = waveform.shape[1] / sr
time_axis = torch.linspace(0, end_time, waveform.shape[1])
axis.plot(time_axis, waveform[0], linewidth=1, color="gray", alpha=0.3)

time_axis = torch.linspace(0, end_time, pitch.shape[1])
ln1 = axis.plot(time_axis, pitch[0], linewidth=2, label="Pitch", color="green")
axis.set_ylim((-1.3, 1.3))

axis2 = axis.twinx()
time_axis = torch.linspace(0, end_time, nfcc.shape[1])
ln2 = axis2.plot(time_axis, nfcc[0], linewidth=2, label="NFCC", color="blue", linestyle="--")

lns = ln1 + ln2
labels = [l.get_label() for l in lns]
axis.legend(lns, labels, loc=0)
plt.show(block=False)


plot_kaldi_pitch(SPEECH_WAVEFORM, SAMPLE_RATE, pitch, nfcc)
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ def _fetch_archives(src):


def _fetch_third_party_libraries():
_init_submodule()
# Revert this when a submodule is added again
# _init_submodule()
if os.name != "nt":
_fetch_archives(_parse_sources())

Expand Down
2 changes: 0 additions & 2 deletions test/torchaudio_unittest/common_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
skipIfNoExec,
skipIfNoFFmpeg,
skipIfNoHWAccel,
skipIfNoKaldi,
skipIfNoMacOS,
skipIfNoModule,
skipIfNoQengine,
Expand Down Expand Up @@ -52,7 +51,6 @@
"skipIfNoExec",
"skipIfNoMacOS",
"skipIfNoModule",
"skipIfNoKaldi",
"skipIfNoRIR",
"skipIfNoSox",
"skipIfNoSoxBackend",
Expand Down
5 changes: 0 additions & 5 deletions test/torchaudio_unittest/common_utils/case_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,6 @@ def skipIfNoModule(module, display_name=None):
reason="Sox features are not available.",
key="NO_SOX",
)
skipIfNoKaldi = _skipIf(
not torchaudio._extension._IS_KALDI_AVAILABLE,
reason="Kaldi features are not available.",
key="NO_KALDI",
)
skipIfNoRIR = _skipIf(
not torchaudio._extension._IS_RIR_AVAILABLE,
reason="RIR features are not available.",
Expand Down
12 changes: 0 additions & 12 deletions test/torchaudio_unittest/functional/batch_consistency_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,18 +257,6 @@ def test_resample_waveform(self, resampling_method):
atol=1e-7,
)

@common_utils.skipIfNoKaldi
def test_compute_kaldi_pitch(self):
sample_rate = 44100
n_channels = 2
waveform = common_utils.get_whitenoise(sample_rate=sample_rate, n_channels=self.batch_size * n_channels)
batch = waveform.view(self.batch_size, n_channels, waveform.size(-1))
kwargs = {
"sample_rate": sample_rate,
}
func = partial(F.compute_kaldi_pitch, **kwargs)
self.assert_batch_consistency(func, inputs=(batch,))

def test_lfilter(self):
signal_length = 2048
x = torch.randn(self.batch_size, signal_length)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
import torch
from torchaudio_unittest.common_utils import PytorchTestCase

from .kaldi_compatibility_test_impl import Kaldi, KaldiCPUOnly


class TestKaldiCPUOnly(KaldiCPUOnly, PytorchTestCase):
dtype = torch.float32
device = torch.device("cpu")
from .kaldi_compatibility_test_impl import Kaldi


class TestKaldiFloat32(Kaldi, PytorchTestCase):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
import torch
import torchaudio.functional as F
from parameterized import parameterized
from torchaudio_unittest.common_utils import (
get_sinusoid,
load_params,
save_wav,
skipIfNoExec,
TempDirMixin,
TestBaseMixin,
)
from torchaudio_unittest.common_utils import skipIfNoExec, TempDirMixin, TestBaseMixin
from torchaudio_unittest.common_utils.kaldi_utils import convert_args, run_kaldi


Expand All @@ -32,25 +24,3 @@ def test_sliding_window_cmn(self):
command = ["apply-cmvn-sliding"] + convert_args(**kwargs) + ["ark:-", "ark:-"]
kaldi_result = run_kaldi(command, "ark", tensor)
self.assert_equal(result, expected=kaldi_result)


class KaldiCPUOnly(TempDirMixin, TestBaseMixin):
def assert_equal(self, output, *, expected, rtol=None, atol=None):
expected = expected.to(dtype=self.dtype, device=self.device)
self.assertEqual(output, expected, rtol=rtol, atol=atol)

@parameterized.expand(load_params("kaldi_test_pitch_args.jsonl"))
@skipIfNoExec("compute-kaldi-pitch-feats")
def test_pitch_feats(self, kwargs):
"""compute_kaldi_pitch produces numerically compatible result with compute-kaldi-pitch-feats"""
sample_rate = kwargs["sample_rate"]
waveform = get_sinusoid(dtype="float32", sample_rate=sample_rate)
result = F.compute_kaldi_pitch(waveform[0], **kwargs)

waveform = get_sinusoid(dtype="int16", sample_rate=sample_rate)
wave_file = self.get_temp_path("test.wav")
save_wav(wave_file, waveform, sample_rate)

command = ["compute-kaldi-pitch-feats"] + convert_args(**kwargs) + ["scp:-", "ark:-"]
kaldi_result = run_kaldi(command, "scp", wave_file)
self.assert_equal(result, expected=kaldi_result)
Original file line number Diff line number Diff line change
Expand Up @@ -585,18 +585,6 @@ def func(tensor):
tensor = common_utils.get_whitenoise(sample_rate=44100)
self._assert_consistency(func, (tensor,))

@common_utils.skipIfNoKaldi
def test_compute_kaldi_pitch(self):
if self.dtype != torch.float32 or self.device != torch.device("cpu"):
raise unittest.SkipTest("Only float32, cpu is supported.")

def func(tensor):
sample_rate: float = 44100.0
return F.compute_kaldi_pitch(tensor, sample_rate)

tensor = common_utils.get_whitenoise(sample_rate=44100)
self._assert_consistency(func, (tensor,))

def test_resample_sinc(self):
def func(tensor):
sr1, sr2 = 16000, 8000
Expand Down
7 changes: 0 additions & 7 deletions third_party/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,3 @@ file(MAKE_DIRECTORY install/lib)
if (BUILD_SOX)
add_subdirectory(sox)
endif()

################################################################################
# kaldi
################################################################################
if (BUILD_KALDI)
add_subdirectory(kaldi)
endif()
32 changes: 0 additions & 32 deletions third_party/kaldi/CMakeLists.txt

This file was deleted.

6 changes: 0 additions & 6 deletions third_party/kaldi/README.md

This file was deleted.

76 changes: 0 additions & 76 deletions third_party/kaldi/kaldi.patch

This file was deleted.

39 changes: 0 additions & 39 deletions third_party/kaldi/src/matrix/kaldi-matrix.cc

This file was deleted.

Loading

0 comments on commit 5bbbb1d

Please sign in to comment.