From 0e2b6e7a30548366161a1ed186db16e0ea8e953d Mon Sep 17 00:00:00 2001 From: whoami <48873278+lsrami@users.noreply.github.com> Date: Thu, 21 Mar 2024 00:25:34 +0800 Subject: [PATCH] [torchaudio] Fix torchaudio interface error (#2352) --- tools/compute_cmvn_stats.py | 6 ++---- tools/compute_fbank_feats.py | 8 ++------ tools/wav2dur.py | 1 - 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/tools/compute_cmvn_stats.py b/tools/compute_cmvn_stats.py index cd3a2e2ef..4dcad825d 100755 --- a/tools/compute_cmvn_stats.py +++ b/tools/compute_cmvn_stats.py @@ -12,8 +12,6 @@ import torchaudio.compliance.kaldi as kaldi from torch.utils.data import Dataset, DataLoader -torchaudio.set_audio_backend("sox_io") - class CollateFunc(object): ''' Collate function for AudioDataset @@ -32,7 +30,7 @@ def __call__(self, batch): value = item[1].strip().split(",") assert len(value) == 3 or len(value) == 1 wav_path = value[0] - sample_rate = torchaudio.backend.sox_io_backend.info( + sample_rate = torchaudio.info( wav_path).sample_rate resample_rate = sample_rate # len(value) == 3 means segmented wav.scp, @@ -40,7 +38,7 @@ def __call__(self, batch): if len(value) == 3: start_frame = int(float(value[1]) * sample_rate) end_frame = int(float(value[2]) * sample_rate) - waveform, sample_rate = torchaudio.backend.sox_io_backend.load( + waveform, sample_rate = torchaudio.load( filepath=wav_path, num_frames=end_frame - start_frame, frame_offset=start_frame) diff --git a/tools/compute_fbank_feats.py b/tools/compute_fbank_feats.py index 4cc7dae54..e900954cb 100644 --- a/tools/compute_fbank_feats.py +++ b/tools/compute_fbank_feats.py @@ -20,10 +20,6 @@ import wenet.dataset.kaldi_io as kaldi_io -# The "sox" backends are deprecated and will be removed in 0.9.0 release. -# So here we use sox_io backend -torchaudio.set_audio_backend("sox_io") - def parse_opts(): parser = argparse.ArgumentParser(description='training your network') @@ -104,14 +100,14 @@ def load_wav_segments(wav_scp_file, segments_file): for item in audio_list: if len(item) == 2: key, wav_path = item - waveform, sample_rate = torchaudio.load_wav(wav_path) + waveform, sample_rate = torchaudio.load(wav_path) else: assert len(item) == 4 key, wav_path, start, end = item sample_rate = torchaudio.info(wav_path).sample_rate frame_offset = int(start * sample_rate) num_frames = int((end - start) * sample_rate) - waveform, sample_rate = torchaudio.load_wav( + waveform, sample_rate = torchaudio.load( wav_path, frame_offset, num_frames) mat = kaldi.fbank(waveform, diff --git a/tools/wav2dur.py b/tools/wav2dur.py index b53a7fe1d..d416b1ad9 100755 --- a/tools/wav2dur.py +++ b/tools/wav2dur.py @@ -5,7 +5,6 @@ import torchaudio -torchaudio.set_audio_backend("sox_io") scp = sys.argv[1] dur_scp = sys.argv[2]