Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BC-Breaking] Remove compute_kaldi_pitch #3368

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/unittest/linux/scripts/run_style_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ fi

printf "\x1b[34mRunning clang-format:\x1b[0m\n"
"${this_dir}"/run_clang_format.py \
-r torchaudio/csrc third_party/kaldi/src \
-r torchaudio/csrc \
--clang-format-executable "${clangformat_path}" \
&& git diff --exit-code
status=$?
Expand Down
4 changes: 0 additions & 4 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +0,0 @@
[submodule "kaldi"]
path = third_party/kaldi/submodule
url = https://github.com/kaldi-asr/kaldi
ignore = dirty
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ endif()

# Options
option(BUILD_SOX "Build libsox statically" ON)
option(BUILD_KALDI "Build kaldi statically" ON)
option(BUILD_RIR "Enable RIR simulation" ON)
option(BUILD_RNNT "Enable RNN transducer" ON)
option(BUILD_ALIGN "Enable forced alignment" ON)
Expand Down
51 changes: 0 additions & 51 deletions examples/tutorials/audio_feature_extractions_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,54 +406,3 @@ def plot_pitch(waveform, sr, pitch):


plot_pitch(SPEECH_WAVEFORM, SAMPLE_RATE, pitch)

######################################################################
# Kaldi Pitch (beta)
# ------------------
#
# Kaldi Pitch feature [1] is a pitch detection mechanism tuned for automatic
# speech recognition (ASR) applications. This is a beta feature in ``torchaudio``,
# and it is available as :py:func:`torchaudio.functional.compute_kaldi_pitch`.
#
# 1. A pitch extraction algorithm tuned for automatic speech recognition
#
# Ghahremani, B. BabaAli, D. Povey, K. Riedhammer, J. Trmal and S.
# Khudanpur
#
# 2014 IEEE International Conference on Acoustics, Speech and Signal
# Processing (ICASSP), Florence, 2014, pp. 2494-2498, doi:
# 10.1109/ICASSP.2014.6854049.
# [`abstract <https://ieeexplore.ieee.org/document/6854049>`__],
# [`paper <https://danielpovey.com/files/2014_icassp_pitch.pdf>`__]
#

pitch_feature = F.compute_kaldi_pitch(SPEECH_WAVEFORM, SAMPLE_RATE)
pitch, nfcc = pitch_feature[..., 0], pitch_feature[..., 1]

######################################################################
#

def plot_kaldi_pitch(waveform, sr, pitch, nfcc):
_, axis = plt.subplots(1, 1)
axis.set_title("Kaldi Pitch Feature")
axis.grid(True)

end_time = waveform.shape[1] / sr
time_axis = torch.linspace(0, end_time, waveform.shape[1])
axis.plot(time_axis, waveform[0], linewidth=1, color="gray", alpha=0.3)

time_axis = torch.linspace(0, end_time, pitch.shape[1])
ln1 = axis.plot(time_axis, pitch[0], linewidth=2, label="Pitch", color="green")
axis.set_ylim((-1.3, 1.3))

axis2 = axis.twinx()
time_axis = torch.linspace(0, end_time, nfcc.shape[1])
ln2 = axis2.plot(time_axis, nfcc[0], linewidth=2, label="NFCC", color="blue", linestyle="--")

lns = ln1 + ln2
labels = [l.get_label() for l in lns]
axis.legend(lns, labels, loc=0)
plt.show(block=False)


plot_kaldi_pitch(SPEECH_WAVEFORM, SAMPLE_RATE, pitch, nfcc)
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ def _fetch_archives(src):


def _fetch_third_party_libraries():
_init_submodule()
# Revert this when a submodule is added again
# _init_submodule()
if os.name != "nt":
_fetch_archives(_parse_sources())

Expand Down
2 changes: 0 additions & 2 deletions test/torchaudio_unittest/common_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
skipIfNoExec,
skipIfNoFFmpeg,
skipIfNoHWAccel,
skipIfNoKaldi,
skipIfNoMacOS,
skipIfNoModule,
skipIfNoQengine,
Expand Down Expand Up @@ -52,7 +51,6 @@
"skipIfNoExec",
"skipIfNoMacOS",
"skipIfNoModule",
"skipIfNoKaldi",
"skipIfNoRIR",
"skipIfNoSox",
"skipIfNoSoxBackend",
Expand Down
5 changes: 0 additions & 5 deletions test/torchaudio_unittest/common_utils/case_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,6 @@ def skipIfNoModule(module, display_name=None):
reason="Sox features are not available.",
key="NO_SOX",
)
skipIfNoKaldi = _skipIf(
not torchaudio._extension._IS_KALDI_AVAILABLE,
reason="Kaldi features are not available.",
key="NO_KALDI",
)
skipIfNoRIR = _skipIf(
not torchaudio._extension._IS_RIR_AVAILABLE,
reason="RIR features are not available.",
Expand Down
12 changes: 0 additions & 12 deletions test/torchaudio_unittest/functional/batch_consistency_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,18 +257,6 @@ def test_resample_waveform(self, resampling_method):
atol=1e-7,
)

@common_utils.skipIfNoKaldi
def test_compute_kaldi_pitch(self):
sample_rate = 44100
n_channels = 2
waveform = common_utils.get_whitenoise(sample_rate=sample_rate, n_channels=self.batch_size * n_channels)
batch = waveform.view(self.batch_size, n_channels, waveform.size(-1))
kwargs = {
"sample_rate": sample_rate,
}
func = partial(F.compute_kaldi_pitch, **kwargs)
self.assert_batch_consistency(func, inputs=(batch,))

def test_lfilter(self):
signal_length = 2048
x = torch.randn(self.batch_size, signal_length)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
import torch
from torchaudio_unittest.common_utils import PytorchTestCase

from .kaldi_compatibility_test_impl import Kaldi, KaldiCPUOnly


class TestKaldiCPUOnly(KaldiCPUOnly, PytorchTestCase):
dtype = torch.float32
device = torch.device("cpu")
from .kaldi_compatibility_test_impl import Kaldi


class TestKaldiFloat32(Kaldi, PytorchTestCase):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
import torch
import torchaudio.functional as F
from parameterized import parameterized
from torchaudio_unittest.common_utils import (
get_sinusoid,
load_params,
save_wav,
skipIfNoExec,
TempDirMixin,
TestBaseMixin,
)
from torchaudio_unittest.common_utils import skipIfNoExec, TempDirMixin, TestBaseMixin
from torchaudio_unittest.common_utils.kaldi_utils import convert_args, run_kaldi


Expand All @@ -32,25 +24,3 @@ def test_sliding_window_cmn(self):
command = ["apply-cmvn-sliding"] + convert_args(**kwargs) + ["ark:-", "ark:-"]
kaldi_result = run_kaldi(command, "ark", tensor)
self.assert_equal(result, expected=kaldi_result)


class KaldiCPUOnly(TempDirMixin, TestBaseMixin):
def assert_equal(self, output, *, expected, rtol=None, atol=None):
expected = expected.to(dtype=self.dtype, device=self.device)
self.assertEqual(output, expected, rtol=rtol, atol=atol)

@parameterized.expand(load_params("kaldi_test_pitch_args.jsonl"))
@skipIfNoExec("compute-kaldi-pitch-feats")
def test_pitch_feats(self, kwargs):
"""compute_kaldi_pitch produces numerically compatible result with compute-kaldi-pitch-feats"""
sample_rate = kwargs["sample_rate"]
waveform = get_sinusoid(dtype="float32", sample_rate=sample_rate)
result = F.compute_kaldi_pitch(waveform[0], **kwargs)

waveform = get_sinusoid(dtype="int16", sample_rate=sample_rate)
wave_file = self.get_temp_path("test.wav")
save_wav(wave_file, waveform, sample_rate)

command = ["compute-kaldi-pitch-feats"] + convert_args(**kwargs) + ["scp:-", "ark:-"]
kaldi_result = run_kaldi(command, "scp", wave_file)
self.assert_equal(result, expected=kaldi_result)
Original file line number Diff line number Diff line change
Expand Up @@ -585,18 +585,6 @@ def func(tensor):
tensor = common_utils.get_whitenoise(sample_rate=44100)
self._assert_consistency(func, (tensor,))

@common_utils.skipIfNoKaldi
def test_compute_kaldi_pitch(self):
if self.dtype != torch.float32 or self.device != torch.device("cpu"):
raise unittest.SkipTest("Only float32, cpu is supported.")

def func(tensor):
sample_rate: float = 44100.0
return F.compute_kaldi_pitch(tensor, sample_rate)

tensor = common_utils.get_whitenoise(sample_rate=44100)
self._assert_consistency(func, (tensor,))

def test_resample_sinc(self):
def func(tensor):
sr1, sr2 = 16000, 8000
Expand Down
7 changes: 0 additions & 7 deletions third_party/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,3 @@ file(MAKE_DIRECTORY install/lib)
if (BUILD_SOX)
add_subdirectory(sox)
endif()

################################################################################
# kaldi
################################################################################
if (BUILD_KALDI)
add_subdirectory(kaldi)
endif()
32 changes: 0 additions & 32 deletions third_party/kaldi/CMakeLists.txt

This file was deleted.

6 changes: 0 additions & 6 deletions third_party/kaldi/README.md

This file was deleted.

76 changes: 0 additions & 76 deletions third_party/kaldi/kaldi.patch

This file was deleted.

39 changes: 0 additions & 39 deletions third_party/kaldi/src/matrix/kaldi-matrix.cc

This file was deleted.

Loading