diff --git a/test/torchaudio_unittest/sox_effect/smoke_test.py b/test/torchaudio_unittest/sox_effect/smoke_test.py index 996c62063f..2a8d9e7ff9 100644 --- a/test/torchaudio_unittest/sox_effect/smoke_test.py +++ b/test/torchaudio_unittest/sox_effect/smoke_test.py @@ -43,7 +43,7 @@ def test_apply_effects_tensor(self, args): load_params("sox_effect_test_args.json"), name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}', ) - def test_apply_effects(self, args): + def test_apply_effects_file(self, args): """`apply_effects_file` should return identical data as sox command""" dtype = 'int32' channels_first = True @@ -57,3 +57,23 @@ def test_apply_effects(self, args): _found, _sr = sox_effects.apply_effects_file( input_path, effects, normalize=False, channels_first=channels_first) + + @parameterized.expand( + load_params("sox_effect_test_args.json"), + name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}', + ) + def test_apply_effects_fileobj(self, args): + """`apply_effects_file` should return identical data as sox command""" + dtype = 'int32' + channels_first = True + effects = args['effects'] + num_channels = args.get("num_channels", 2) + input_sr = args.get("input_sample_rate", 8000) + + input_path = self.get_temp_path('input.wav') + data = get_wav_data(dtype, num_channels, channels_first=channels_first) + save_wav(input_path, data, input_sr, channels_first=channels_first) + + with open(input_path, 'rb') as fileobj: + _found, _sr = sox_effects.apply_effects_file( + fileobj, effects, normalize=False, channels_first=channels_first) diff --git a/test/torchaudio_unittest/sox_effect/sox_effect_test.py b/test/torchaudio_unittest/sox_effect/sox_effect_test.py index c25e376359..ab334613e3 100644 --- a/test/torchaudio_unittest/sox_effect/sox_effect_test.py +++ b/test/torchaudio_unittest/sox_effect/sox_effect_test.py @@ -1,13 +1,19 @@ +import io import itertools from pathlib import Path +import tarfile -from torchaudio import sox_effects from parameterized import parameterized +from torchaudio import sox_effects +from torchaudio._internal import module_utils as _mod_utils from torchaudio_unittest.common_utils import ( TempDirMixin, + HttpServerMixin, PytorchTestCase, skipIfNoExtension, + skipIfNoModule, + skipIfNoExec, get_asset_path, get_sinusoid, get_wav_data, @@ -21,6 +27,10 @@ ) +if _mod_utils.is_module_available("requests"): + import requests + + @skipIfNoExtension class TestSoxEffects(PytorchTestCase): def test_init(self): @@ -262,3 +272,152 @@ def test_mp3(self): path = get_asset_path("mp3_without_ext") _, sr = sox_effects.apply_effects_file(path, effects, format="mp3") assert sr == 16000 + + +@skipIfNoExec('sox') +@skipIfNoExtension +class TestFileObject(TempDirMixin, PytorchTestCase): + @parameterized.expand([ + ('wav', None), + ('mp3', 128), + ('mp3', 320), + ('flac', 0), + ('flac', 5), + ('flac', 8), + ('vorbis', -1), + ('vorbis', 10), + ('amb', None), + ]) + def test_fileobj(self, ext, compression): + """Applying effects via file object works""" + sample_rate = 16000 + channels_first = True + effects = [['band', '300', '10']] + format_ = ext if ext in ['mp3'] else None + input_path = self.get_temp_path(f'input.{ext}') + reference_path = self.get_temp_path('reference.wav') + + sox_utils.gen_audio_file( + input_path, sample_rate, num_channels=2, compression=compression) + sox_utils.run_sox_effect( + input_path, reference_path, effects, output_bitdepth=32) + expected, expected_sr = load_wav(reference_path) + + with open(input_path, 'rb') as fileobj: + found, sr = sox_effects.apply_effects_file( + fileobj, effects, channels_first=channels_first, format=format_) + save_wav(self.get_temp_path('result.wav'), found, sr, channels_first=channels_first) + assert sr == expected_sr + self.assertEqual(found, expected) + + @parameterized.expand([ + ('wav', None), + ('mp3', 128), + ('mp3', 320), + ('flac', 0), + ('flac', 5), + ('flac', 8), + ('vorbis', -1), + ('vorbis', 10), + ('amb', None), + ]) + def test_bytesio(self, ext, compression): + """Applying effects via BytesIO object works""" + sample_rate = 16000 + channels_first = True + effects = [['band', '300', '10']] + format_ = ext if ext in ['mp3'] else None + input_path = self.get_temp_path(f'input.{ext}') + reference_path = self.get_temp_path('reference.wav') + + sox_utils.gen_audio_file( + input_path, sample_rate, num_channels=2, compression=compression) + sox_utils.run_sox_effect( + input_path, reference_path, effects, output_bitdepth=32) + expected, expected_sr = load_wav(reference_path) + + with open(input_path, 'rb') as file_: + fileobj = io.BytesIO(file_.read()) + found, sr = sox_effects.apply_effects_file( + fileobj, effects, channels_first=channels_first, format=format_) + save_wav(self.get_temp_path('result.wav'), found, sr, channels_first=channels_first) + assert sr == expected_sr + self.assertEqual(found, expected) + + @parameterized.expand([ + ('wav', None), + ('mp3', 128), + ('mp3', 320), + ('flac', 0), + ('flac', 5), + ('flac', 8), + ('vorbis', -1), + ('vorbis', 10), + ('amb', None), + ]) + def test_tarfile(self, ext, compression): + """Applying effects to compressed audio via file-like file works""" + sample_rate = 16000 + channels_first = True + effects = [['band', '300', '10']] + format_ = ext if ext in ['mp3'] else None + audio_file = f'input.{ext}' + + input_path = self.get_temp_path(audio_file) + reference_path = self.get_temp_path('reference.wav') + archive_path = self.get_temp_path('archive.tar.gz') + + sox_utils.gen_audio_file( + input_path, sample_rate, num_channels=2, compression=compression) + sox_utils.run_sox_effect( + input_path, reference_path, effects, output_bitdepth=32) + expected, expected_sr = load_wav(reference_path) + + with tarfile.TarFile(archive_path, 'w') as tarobj: + tarobj.add(input_path, arcname=audio_file) + with tarfile.TarFile(archive_path, 'r') as tarobj: + fileobj = tarobj.extractfile(audio_file) + found, sr = sox_effects.apply_effects_file( + fileobj, effects, channels_first=channels_first, format=format_) + save_wav(self.get_temp_path('result.wav'), found, sr, channels_first=channels_first) + assert sr == expected_sr + self.assertEqual(found, expected) + + +@skipIfNoExtension +@skipIfNoExec('sox') +@skipIfNoModule("requests") +class TestFileObjectHttp(HttpServerMixin, PytorchTestCase): + @parameterized.expand([ + ('wav', None), + ('mp3', 128), + ('mp3', 320), + ('flac', 0), + ('flac', 5), + ('flac', 8), + ('vorbis', -1), + ('vorbis', 10), + ('amb', None), + ]) + def test_requests(self, ext, compression): + sample_rate = 16000 + channels_first = True + effects = [['band', '300', '10']] + format_ = ext if ext in ['mp3'] else None + audio_file = f'input.{ext}' + input_path = self.get_temp_path(audio_file) + reference_path = self.get_temp_path('reference.wav') + + sox_utils.gen_audio_file( + input_path, sample_rate, num_channels=2, compression=compression) + sox_utils.run_sox_effect( + input_path, reference_path, effects, output_bitdepth=32) + expected, expected_sr = load_wav(reference_path) + + url = self.get_url(audio_file) + with requests.get(url, stream=True) as resp: + found, sr = sox_effects.apply_effects_file( + resp.raw, effects, channels_first=channels_first, format=format_) + save_wav(self.get_temp_path('result.wav'), found, sr, channels_first=channels_first) + assert sr == expected_sr + self.assertEqual(found, expected) diff --git a/torchaudio/csrc/pybind.cpp b/torchaudio/csrc/pybind.cpp index bb18e75b1e..373b9d0d96 100644 --- a/torchaudio/csrc/pybind.cpp +++ b/torchaudio/csrc/pybind.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -112,4 +113,8 @@ PYBIND11_MODULE(_torchaudio, m) { "save_audio_fileobj", &torchaudio::sox_io::save_audio_fileobj, "Save audio to file obj."); + m.def( + "apply_effects_fileobj", + &torchaudio::sox_effects::apply_effects_fileobj, + "Decode audio data from file-like obj and apply effects."); } diff --git a/torchaudio/sox_effects/sox_effects.py b/torchaudio/sox_effects/sox_effects.py index 7534714965..d94a776cd6 100644 --- a/torchaudio/sox_effects/sox_effects.py +++ b/torchaudio/sox_effects/sox_effects.py @@ -1,7 +1,10 @@ +import os +from pathlib import Path from typing import List, Tuple, Optional import torch +import torchaudio from torchaudio._internal import module_utils as _mod_utils from torchaudio.utils.sox_utils import list_effects @@ -170,8 +173,16 @@ def apply_effects_file( rate and leave samples untouched. Args: - path (str or pathlib.Path): Path to the audio file. This function also handles ``pathlib.Path`` objects, but is - annotated as ``str`` for TorchScript compiler compatibility. + path (path-like object or file-like object): + Source of audio data. When the function is not compiled by TorchScript, + (e.g. ``torch.jit.script``), the following types are accepted; + * ``path-like``: file path + * ``file-like``: Object with ``read(size: int) -> bytes`` method, + which returns byte string of at most ``size`` length. + When the function is compiled by TorchScript, only ``str`` type is allowed. + Note: + * This argument is intentionally annotated as ``str`` only for + TorchScript compiler compatibility. effects (List[List[str]]): List of effects. normalize (bool): When ``True``, this function always return ``float32``, and sample values are @@ -252,8 +263,11 @@ def apply_effects_file( >>> for batch in loader: >>> pass """ - # Get string representation of 'path' in case Path object is passed - path = str(path) + if not torch.jit.is_scripting(): + if hasattr(path, 'read'): + return torchaudio._torchaudio.apply_effects_fileobj( + path, effects, normalize, channels_first, format) + path = os.fspath(path) signal = torch.ops.torchaudio.sox_effects_apply_effects_file( path, effects, normalize, channels_first, format) return signal.get_tensor(), signal.get_sample_rate()