From ba215bf172897b589661b0b302c21626f50fc6eb Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Sun, 5 Feb 2023 22:29:21 -0500 Subject: [PATCH] [BC-Breaking] Remove file-like object support from sox_io backend This commit removes file-like obejct support so that we can remove custom patch The motivation and plan is outlined in https://github.com/pytorch/audio/issues/2950. --- .../backend/sox_io/info_test.py | 268 +----------------- .../backend/sox_io/load_test.py | 253 ----------------- .../backend/sox_io/save_test.py | 90 ++---- .../backend/sox_io/smoke_test.py | 85 ------ .../sox_effect/smoke_test.py | 21 -- .../sox_effect/sox_effect_test.py | 143 ---------- third_party/patches/sox.patch | 16 -- third_party/sox/CMakeLists.txt | 2 +- torchaudio/_extension/utils.py | 2 +- torchaudio/backend/sox_io_backend.py | 122 +------- torchaudio/csrc/sox/CMakeLists.txt | 10 +- torchaudio/csrc/sox/pybind/effects.cpp | 123 -------- torchaudio/csrc/sox/pybind/effects.h | 20 -- torchaudio/csrc/sox/pybind/effects_chain.cpp | 237 ---------------- torchaudio/csrc/sox/pybind/effects_chain.h | 30 -- torchaudio/csrc/sox/pybind/io.cpp | 195 ------------- torchaudio/csrc/sox/pybind/io.h | 37 --- torchaudio/csrc/sox/pybind/pybind.cpp | 50 +++- torchaudio/csrc/sox/pybind/utils.cpp | 33 --- torchaudio/csrc/sox/pybind/utils.h | 14 - torchaudio/csrc/sox/utils.cpp | 25 -- torchaudio/sox_effects/sox_effects.py | 32 +-- torchaudio/utils/sox_utils.py | 17 +- 23 files changed, 84 insertions(+), 1741 deletions(-) delete mode 100644 third_party/patches/sox.patch delete mode 100644 torchaudio/csrc/sox/pybind/effects.cpp delete mode 100644 torchaudio/csrc/sox/pybind/effects.h delete mode 100644 torchaudio/csrc/sox/pybind/effects_chain.cpp delete mode 100644 torchaudio/csrc/sox/pybind/effects_chain.h delete mode 100644 torchaudio/csrc/sox/pybind/io.cpp delete mode 100644 torchaudio/csrc/sox/pybind/io.h delete mode 100644 torchaudio/csrc/sox/pybind/utils.cpp delete mode 100644 torchaudio/csrc/sox/pybind/utils.h diff --git a/test/torchaudio_unittest/backend/sox_io/info_test.py b/test/torchaudio_unittest/backend/sox_io/info_test.py index 70532f4ba0..c2a4f1c9ab 100644 --- a/test/torchaudio_unittest/backend/sox_io/info_test.py +++ b/test/torchaudio_unittest/backend/sox_io/info_test.py @@ -1,22 +1,14 @@ -import io import itertools -import os -import tarfile -from contextlib import contextmanager from parameterized import parameterized -from torchaudio._internal import module_utils as _mod_utils from torchaudio.backend import sox_io_backend -from torchaudio.utils.sox_utils import get_buffer_size, set_buffer_size -from torchaudio_unittest.backend.common import get_bits_per_sample, get_encoding +from torchaudio_unittest.backend.common import get_encoding from torchaudio_unittest.common_utils import ( get_asset_path, get_wav_data, - HttpServerMixin, PytorchTestCase, save_wav, skipIfNoExec, - skipIfNoModule, skipIfNoSox, sox_utils, TempDirMixin, @@ -25,10 +17,6 @@ from .common import name_func -if _mod_utils.is_module_available("requests"): - import requests - - @skipIfNoExec("sox") @skipIfNoSox class TestInfo(TempDirMixin, PytorchTestCase): @@ -338,260 +326,6 @@ def test_mp3(self): assert sinfo.encoding == "MP3" -class FileObjTestBase(TempDirMixin): - def _gen_file(self, ext, dtype, sample_rate, num_channels, num_frames, *, comments=None): - path = self.get_temp_path(f"test.{ext}") - bit_depth = sox_utils.get_bit_depth(dtype) - duration = num_frames / sample_rate - comment_file = self._gen_comment_file(comments) if comments else None - - sox_utils.gen_audio_file( - path, - sample_rate, - num_channels=num_channels, - encoding=sox_utils.get_encoding(dtype), - bit_depth=bit_depth, - duration=duration, - comment_file=comment_file, - ) - return path - - def _gen_comment_file(self, comments): - comment_path = self.get_temp_path("comment.txt") - with open(comment_path, "w") as file_: - file_.writelines(comments) - return comment_path - - -class Unseekable: - def __init__(self, fileobj): - self.fileobj = fileobj - - def read(self, n): - return self.fileobj.read(n) - - -@skipIfNoSox -@skipIfNoExec("sox") -class TestFileObject(FileObjTestBase, PytorchTestCase): - def _query_fileobj(self, ext, dtype, sample_rate, num_channels, num_frames, *, comments=None): - path = self._gen_file(ext, dtype, sample_rate, num_channels, num_frames, comments=comments) - format_ = ext if ext in ["mp3"] else None - with open(path, "rb") as fileobj: - return sox_io_backend.info(fileobj, format_) - - def _query_bytesio(self, ext, dtype, sample_rate, num_channels, num_frames): - path = self._gen_file(ext, dtype, sample_rate, num_channels, num_frames) - format_ = ext if ext in ["mp3"] else None - with open(path, "rb") as file_: - fileobj = io.BytesIO(file_.read()) - return sox_io_backend.info(fileobj, format_) - - def _query_tarfile(self, ext, dtype, sample_rate, num_channels, num_frames): - audio_path = self._gen_file(ext, dtype, sample_rate, num_channels, num_frames) - audio_file = os.path.basename(audio_path) - archive_path = self.get_temp_path("archive.tar.gz") - with tarfile.TarFile(archive_path, "w") as tarobj: - tarobj.add(audio_path, arcname=audio_file) - format_ = ext if ext in ["mp3"] else None - with tarfile.TarFile(archive_path, "r") as tarobj: - fileobj = tarobj.extractfile(audio_file) - return sox_io_backend.info(fileobj, format_) - - @contextmanager - def _set_buffer_size(self, buffer_size): - try: - original_buffer_size = get_buffer_size() - set_buffer_size(buffer_size) - yield - finally: - set_buffer_size(original_buffer_size) - - @parameterized.expand( - [ - ("wav", "float32"), - ("wav", "int32"), - ("wav", "int16"), - ("wav", "uint8"), - ("mp3", "float32"), - ("flac", "float32"), - ("vorbis", "float32"), - ("amb", "int16"), - ] - ) - def test_fileobj(self, ext, dtype): - """Querying audio via file object works""" - sample_rate = 16000 - num_frames = 3 * sample_rate - num_channels = 2 - sinfo = self._query_fileobj(ext, dtype, sample_rate, num_channels, num_frames) - - bits_per_sample = get_bits_per_sample(ext, dtype) - num_frames = {"vorbis": 0, "mp3": 49536}.get(ext, num_frames) - - assert sinfo.sample_rate == sample_rate - assert sinfo.num_channels == num_channels - assert sinfo.num_frames == num_frames - assert sinfo.bits_per_sample == bits_per_sample - assert sinfo.encoding == get_encoding(ext, dtype) - - @parameterized.expand( - [ - ("vorbis", "float32"), - ] - ) - def test_fileobj_large_header(self, ext, dtype): - """ - For audio file with header size exceeding default buffer size: - - Querying audio via file object without enlarging buffer size fails. - - Querying audio via file object after enlarging buffer size succeeds. - """ - sample_rate = 16000 - num_frames = 3 * sample_rate - num_channels = 2 - comments = "metadata=" + " ".join(["value" for _ in range(1000)]) - - with self.assertRaises(RuntimeError): - sinfo = self._query_fileobj(ext, dtype, sample_rate, num_channels, num_frames, comments=comments) - - with self._set_buffer_size(16384): - sinfo = self._query_fileobj(ext, dtype, sample_rate, num_channels, num_frames, comments=comments) - bits_per_sample = get_bits_per_sample(ext, dtype) - num_frames = 0 if ext in ["vorbis"] else num_frames - - assert sinfo.sample_rate == sample_rate - assert sinfo.num_channels == num_channels - assert sinfo.num_frames == num_frames - assert sinfo.bits_per_sample == bits_per_sample - assert sinfo.encoding == get_encoding(ext, dtype) - - @parameterized.expand( - [ - ("wav", "float32"), - ("wav", "int32"), - ("wav", "int16"), - ("wav", "uint8"), - ("mp3", "float32"), - ("flac", "float32"), - ("vorbis", "float32"), - ("amb", "int16"), - ] - ) - def test_bytesio(self, ext, dtype): - """Querying audio via ByteIO object works for small data""" - sample_rate = 16000 - num_frames = 3 * sample_rate - num_channels = 2 - sinfo = self._query_bytesio(ext, dtype, sample_rate, num_channels, num_frames) - - bits_per_sample = get_bits_per_sample(ext, dtype) - num_frames = {"vorbis": 0, "mp3": 49536}.get(ext, num_frames) - - assert sinfo.sample_rate == sample_rate - assert sinfo.num_channels == num_channels - assert sinfo.num_frames == num_frames - assert sinfo.bits_per_sample == bits_per_sample - assert sinfo.encoding == get_encoding(ext, dtype) - - @parameterized.expand( - [ - ("wav", "float32"), - ("wav", "int32"), - ("wav", "int16"), - ("wav", "uint8"), - ("mp3", "float32"), - ("flac", "float32"), - ("vorbis", "float32"), - ("amb", "int16"), - ] - ) - def test_bytesio_tiny(self, ext, dtype): - """Querying audio via ByteIO object works for small data""" - sample_rate = 8000 - num_frames = 4 - num_channels = 2 - sinfo = self._query_bytesio(ext, dtype, sample_rate, num_channels, num_frames) - - bits_per_sample = get_bits_per_sample(ext, dtype) - num_frames = {"vorbis": 0, "mp3": 1728}.get(ext, num_frames) - - assert sinfo.sample_rate == sample_rate - assert sinfo.num_channels == num_channels - assert sinfo.num_frames == num_frames - assert sinfo.bits_per_sample == bits_per_sample - assert sinfo.encoding == get_encoding(ext, dtype) - - @parameterized.expand( - [ - ("wav", "float32"), - ("wav", "int32"), - ("wav", "int16"), - ("wav", "uint8"), - ("mp3", "float32"), - ("flac", "float32"), - ("vorbis", "float32"), - ("amb", "int16"), - ] - ) - def test_tarfile(self, ext, dtype): - """Querying compressed audio via file-like object works""" - sample_rate = 16000 - num_frames = 3.0 * sample_rate - num_channels = 2 - sinfo = self._query_tarfile(ext, dtype, sample_rate, num_channels, num_frames) - - bits_per_sample = get_bits_per_sample(ext, dtype) - num_frames = {"vorbis": 0, "mp3": 49536}.get(ext, num_frames) - - assert sinfo.sample_rate == sample_rate - assert sinfo.num_channels == num_channels - assert sinfo.num_frames == num_frames - assert sinfo.bits_per_sample == bits_per_sample - assert sinfo.encoding == get_encoding(ext, dtype) - - -@skipIfNoSox -@skipIfNoExec("sox") -@skipIfNoModule("requests") -class TestFileObjectHttp(HttpServerMixin, FileObjTestBase, PytorchTestCase): - def _query_http(self, ext, dtype, sample_rate, num_channels, num_frames): - audio_path = self._gen_file(ext, dtype, sample_rate, num_channels, num_frames) - audio_file = os.path.basename(audio_path) - - url = self.get_url(audio_file) - format_ = ext if ext in ["mp3"] else None - with requests.get(url, stream=True) as resp: - return sox_io_backend.info(Unseekable(resp.raw), format=format_) - - @parameterized.expand( - [ - ("wav", "float32"), - ("wav", "int32"), - ("wav", "int16"), - ("wav", "uint8"), - ("mp3", "float32"), - ("flac", "float32"), - ("vorbis", "float32"), - ("amb", "int16"), - ] - ) - def test_requests(self, ext, dtype): - """Querying compressed audio via requests works""" - sample_rate = 16000 - num_frames = 3.0 * sample_rate - num_channels = 2 - sinfo = self._query_http(ext, dtype, sample_rate, num_channels, num_frames) - - bits_per_sample = get_bits_per_sample(ext, dtype) - num_frames = {"vorbis": 0, "mp3": 49536}.get(ext, num_frames) - - assert sinfo.sample_rate == sample_rate - assert sinfo.num_channels == num_channels - assert sinfo.num_frames == num_frames - assert sinfo.bits_per_sample == bits_per_sample - assert sinfo.encoding == get_encoding(ext, dtype) - - @skipIfNoSox class TestInfoNoSuchFile(PytorchTestCase): def test_info_fail(self): diff --git a/test/torchaudio_unittest/backend/sox_io/load_test.py b/test/torchaudio_unittest/backend/sox_io/load_test.py index 54cfd7b7ae..bebd67ae6f 100644 --- a/test/torchaudio_unittest/backend/sox_io/load_test.py +++ b/test/torchaudio_unittest/backend/sox_io/load_test.py @@ -370,259 +370,6 @@ def test_mp3(self): assert sr == 16000 -class CloggedFileObj: - def __init__(self, fileobj): - self.fileobj = fileobj - - def read(self, _): - return self.fileobj.read(2) - - def seek(self, offset, whence): - return self.fileobj.seek(offset, whence) - - -@skipIfNoSox -@skipIfNoExec("sox") -class TestFileObject(TempDirMixin, PytorchTestCase): - """ - In this test suite, the result of file-like object input is compared against file path input, - because `load` function is rigrously tested for file path inputs to match libsox's result, - """ - - @parameterized.expand( - [ - ("wav", {"bit_depth": 16}), - ("wav", {"bit_depth": 24}), - ("wav", {"bit_depth": 32}), - ("mp3", {"compression": 128}), - ("mp3", {"compression": 320}), - ("flac", {"compression": 0}), - ("flac", {"compression": 5}), - ("flac", {"compression": 8}), - ("vorbis", {"compression": -1}), - ("vorbis", {"compression": 10}), - ("amb", {}), - ] - ) - def test_fileobj(self, ext, kwargs): - """Loading audio via file object returns the same result as via file path.""" - sample_rate = 16000 - format_ = ext if ext in ["mp3"] else None - path = self.get_temp_path(f"test.{ext}") - - sox_utils.gen_audio_file(path, sample_rate, num_channels=2, **kwargs) - expected, _ = sox_io_backend.load(path) - - with open(path, "rb") as fileobj: - found, sr = sox_io_backend.load(fileobj, format=format_) - - assert sr == sample_rate - self.assertEqual(expected, found) - - @parameterized.expand( - [ - ("wav", {"bit_depth": 16}), - ("wav", {"bit_depth": 24}), - ("wav", {"bit_depth": 32}), - ("mp3", {"compression": 128}), - ("mp3", {"compression": 320}), - ("flac", {"compression": 0}), - ("flac", {"compression": 5}), - ("flac", {"compression": 8}), - ("vorbis", {"compression": -1}), - ("vorbis", {"compression": 10}), - ("amb", {}), - ] - ) - def test_bytesio(self, ext, kwargs): - """Loading audio via BytesIO object returns the same result as via file path.""" - sample_rate = 16000 - format_ = ext if ext in ["mp3"] else None - path = self.get_temp_path(f"test.{ext}") - - sox_utils.gen_audio_file(path, sample_rate, num_channels=2, **kwargs) - expected, _ = sox_io_backend.load(path) - - with open(path, "rb") as file_: - fileobj = io.BytesIO(file_.read()) - found, sr = sox_io_backend.load(fileobj, format=format_) - - assert sr == sample_rate - self.assertEqual(expected, found) - - @parameterized.expand( - [ - ("wav", {"bit_depth": 16}), - ("wav", {"bit_depth": 24}), - ("wav", {"bit_depth": 32}), - ("mp3", {"compression": 128}), - ("mp3", {"compression": 320}), - ("flac", {"compression": 0}), - ("flac", {"compression": 5}), - ("flac", {"compression": 8}), - ("vorbis", {"compression": -1}), - ("vorbis", {"compression": 10}), - ("amb", {}), - ] - ) - def test_bytesio_clogged(self, ext, kwargs): - """Loading audio via clogged file object returns the same result as via file path. - - This test case validates the case where fileobject returns shorter bytes than requeted. - """ - sample_rate = 16000 - format_ = ext if ext in ["mp3"] else None - path = self.get_temp_path(f"test.{ext}") - - sox_utils.gen_audio_file(path, sample_rate, num_channels=2, **kwargs) - expected, _ = sox_io_backend.load(path) - - with open(path, "rb") as file_: - fileobj = CloggedFileObj(io.BytesIO(file_.read())) - found, sr = sox_io_backend.load(fileobj, format=format_) - - assert sr == sample_rate - self.assertEqual(expected, found) - - @parameterized.expand( - [ - ("wav", {"bit_depth": 16}), - ("wav", {"bit_depth": 24}), - ("wav", {"bit_depth": 32}), - ("mp3", {"compression": 128}), - ("mp3", {"compression": 320}), - ("flac", {"compression": 0}), - ("flac", {"compression": 5}), - ("flac", {"compression": 8}), - ("vorbis", {"compression": -1}), - ("vorbis", {"compression": 10}), - ("amb", {}), - ] - ) - def test_bytesio_tiny(self, ext, kwargs): - """Loading very small audio via file object returns the same result as via file path.""" - sample_rate = 16000 - format_ = ext if ext in ["mp3"] else None - path = self.get_temp_path(f"test.{ext}") - - sox_utils.gen_audio_file(path, sample_rate, num_channels=2, duration=1 / 1600, **kwargs) - expected, _ = sox_io_backend.load(path) - - with open(path, "rb") as file_: - fileobj = io.BytesIO(file_.read()) - found, sr = sox_io_backend.load(fileobj, format=format_) - - assert sr == sample_rate - self.assertEqual(expected, found) - - @parameterized.expand( - [ - ("wav", {"bit_depth": 16}), - ("wav", {"bit_depth": 24}), - ("wav", {"bit_depth": 32}), - ("mp3", {"compression": 128}), - ("mp3", {"compression": 320}), - ("flac", {"compression": 0}), - ("flac", {"compression": 5}), - ("flac", {"compression": 8}), - ("vorbis", {"compression": -1}), - ("vorbis", {"compression": 10}), - ("amb", {}), - ] - ) - def test_tarfile(self, ext, kwargs): - """Loading compressed audio via file-like object returns the same result as via file path.""" - sample_rate = 16000 - format_ = ext if ext in ["mp3"] else None - audio_file = f"test.{ext}" - audio_path = self.get_temp_path(audio_file) - archive_path = self.get_temp_path("archive.tar.gz") - - sox_utils.gen_audio_file(audio_path, sample_rate, num_channels=2, **kwargs) - expected, _ = sox_io_backend.load(audio_path) - - with tarfile.TarFile(archive_path, "w") as tarobj: - tarobj.add(audio_path, arcname=audio_file) - with tarfile.TarFile(archive_path, "r") as tarobj: - fileobj = tarobj.extractfile(audio_file) - found, sr = sox_io_backend.load(fileobj, format=format_) - - assert sr == sample_rate - self.assertEqual(expected, found) - - -class Unseekable: - def __init__(self, fileobj): - self.fileobj = fileobj - - def read(self, n): - return self.fileobj.read(n) - - -@skipIfNoSox -@skipIfNoExec("sox") -@skipIfNoModule("requests") -class TestFileObjectHttp(HttpServerMixin, PytorchTestCase): - @parameterized.expand( - [ - ("wav", {"bit_depth": 16}), - ("wav", {"bit_depth": 24}), - ("wav", {"bit_depth": 32}), - ("mp3", {"compression": 128}), - ("mp3", {"compression": 320}), - ("flac", {"compression": 0}), - ("flac", {"compression": 5}), - ("flac", {"compression": 8}), - ("vorbis", {"compression": -1}), - ("vorbis", {"compression": 10}), - ("amb", {}), - ] - ) - def test_requests(self, ext, kwargs): - sample_rate = 16000 - format_ = ext if ext in ["mp3"] else None - audio_file = f"test.{ext}" - audio_path = self.get_temp_path(audio_file) - - sox_utils.gen_audio_file(audio_path, sample_rate, num_channels=2, **kwargs) - expected, _ = sox_io_backend.load(audio_path) - - url = self.get_url(audio_file) - with requests.get(url, stream=True) as resp: - found, sr = sox_io_backend.load(Unseekable(resp.raw), format=format_) - - assert sr == sample_rate - if ext != "mp3": - self.assertEqual(expected, found) - - @parameterized.expand( - list( - itertools.product( - [0, 1, 10, 100, 1000], - [-1, 1, 10, 100, 1000], - ) - ), - name_func=name_func, - ) - def test_frame(self, frame_offset, num_frames): - """num_frames and frame_offset correctly specify the region of data""" - sample_rate = 8000 - audio_file = "test.wav" - audio_path = self.get_temp_path(audio_file) - - original = get_wav_data("float32", num_channels=2) - save_wav(audio_path, original, sample_rate) - frame_end = None if num_frames == -1 else frame_offset + num_frames - expected = original[:, frame_offset:frame_end] - - url = self.get_url(audio_file) - with requests.get(url, stream=True) as resp: - found, sr = sox_io_backend.load(resp.raw, frame_offset, num_frames) - - assert sr == sample_rate - self.assertEqual(expected, found) - - @skipIfNoSox class TestLoadNoSuchFile(PytorchTestCase): def test_load_fail(self): diff --git a/test/torchaudio_unittest/backend/sox_io/save_test.py b/test/torchaudio_unittest/backend/sox_io/save_test.py index 5db7a5a9f8..75656d6ed6 100644 --- a/test/torchaudio_unittest/backend/sox_io/save_test.py +++ b/test/torchaudio_unittest/backend/sox_io/save_test.py @@ -43,7 +43,6 @@ def assert_save_consistency( num_channels: int = 2, num_frames: float = 3 * 8000, src_dtype: str = "int32", - test_mode: str = "path", ): """`save` function produces file that is comparable with `sox` command @@ -97,37 +96,9 @@ def assert_save_consistency( # 2.1. Convert the original wav to target format with torchaudio data = load_wav(src_path, normalize=False)[0] - if test_mode == "path": - sox_io_backend.save( - tgt_path, data, sample_rate, compression=compression, encoding=encoding, bits_per_sample=bits_per_sample - ) - elif test_mode == "fileobj": - with open(tgt_path, "bw") as file_: - sox_io_backend.save( - file_, - data, - sample_rate, - format=format, - compression=compression, - encoding=encoding, - bits_per_sample=bits_per_sample, - ) - elif test_mode == "bytesio": - file_ = io.BytesIO() - sox_io_backend.save( - file_, - data, - sample_rate, - format=format, - compression=compression, - encoding=encoding, - bits_per_sample=bits_per_sample, - ) - file_.seek(0) - with open(tgt_path, "bw") as f: - f.write(file_.read()) - else: - raise ValueError(f"Unexpected test mode: {test_mode}") + sox_io_backend.save( + tgt_path, data, sample_rate, compression=compression, encoding=encoding, bits_per_sample=bits_per_sample + ) # 2.2. Convert the target format to wav with sox sox_utils.convert_audio_file(tgt_path, tst_path, encoding=cmp_encoding, bit_depth=cmp_bit_depth) # 2.3. Load with SciPy @@ -150,7 +121,6 @@ def assert_save_consistency( @skipIfNoSox class SaveTest(SaveTestBase): @nested_params( - ["path", "fileobj", "bytesio"], [ ("PCM_U", 8), ("PCM_S", 16), @@ -161,12 +131,11 @@ class SaveTest(SaveTestBase): ("ALAW", 8), ], ) - def test_save_wav(self, test_mode, enc_params): + def test_save_wav(self, enc_params): encoding, bits_per_sample = enc_params - self.assert_save_consistency("wav", encoding=encoding, bits_per_sample=bits_per_sample, test_mode=test_mode) + self.assert_save_consistency("wav", encoding=encoding, bits_per_sample=bits_per_sample) @nested_params( - ["path", "fileobj", "bytesio"], [ ("float32",), ("int32",), @@ -174,12 +143,11 @@ def test_save_wav(self, test_mode, enc_params): ("uint8",), ], ) - def test_save_wav_dtype(self, test_mode, params): + def test_save_wav_dtype(self, params): (dtype,) = params - self.assert_save_consistency("wav", src_dtype=dtype, test_mode=test_mode) + self.assert_save_consistency("wav", src_dtype=dtype) @nested_params( - ["path", "fileobj", "bytesio"], [8, 16, 24], [ None, @@ -194,19 +162,13 @@ def test_save_wav_dtype(self, test_mode, params): 8, ], ) - def test_save_flac(self, test_mode, bits_per_sample, compression_level): - self.assert_save_consistency( - "flac", compression=compression_level, bits_per_sample=bits_per_sample, test_mode=test_mode - ) + def test_save_flac(self, bits_per_sample, compression_level): + self.assert_save_consistency("flac", compression=compression_level, bits_per_sample=bits_per_sample) - @nested_params( - ["path", "fileobj", "bytesio"], - ) - def test_save_htk(self, test_mode): - self.assert_save_consistency("htk", test_mode=test_mode, num_channels=1) + def test_save_htk(self): + self.assert_save_consistency("htk", num_channels=1) @nested_params( - ["path", "fileobj", "bytesio"], [ None, -1, @@ -219,11 +181,10 @@ def test_save_htk(self, test_mode): 10, ], ) - def test_save_vorbis(self, test_mode, quality_level): - self.assert_save_consistency("vorbis", compression=quality_level, test_mode=test_mode) + def test_save_vorbis(self, quality_level): + self.assert_save_consistency("vorbis", compression=quality_level) @nested_params( - ["path", "fileobj", "bytesio"], [ ( "PCM_S", @@ -248,12 +209,11 @@ def test_save_vorbis(self, test_mode, quality_level): ("ALAW", 32), ], ) - def test_save_sphere(self, test_mode, enc_params): + def test_save_sphere(self, enc_params): encoding, bits_per_sample = enc_params - self.assert_save_consistency("sph", encoding=encoding, bits_per_sample=bits_per_sample, test_mode=test_mode) + self.assert_save_consistency("sph", encoding=encoding, bits_per_sample=bits_per_sample) @nested_params( - ["path", "fileobj", "bytesio"], [ ( "PCM_U", @@ -289,12 +249,11 @@ def test_save_sphere(self, test_mode, enc_params): ), ], ) - def test_save_amb(self, test_mode, enc_params): + def test_save_amb(self, enc_params): encoding, bits_per_sample = enc_params - self.assert_save_consistency("amb", encoding=encoding, bits_per_sample=bits_per_sample, test_mode=test_mode) + self.assert_save_consistency("amb", encoding=encoding, bits_per_sample=bits_per_sample) @nested_params( - ["path", "fileobj", "bytesio"], [ None, 0, @@ -307,18 +266,15 @@ def test_save_amb(self, test_mode, enc_params): 7, ], ) - def test_save_amr_nb(self, test_mode, bit_rate): - self.assert_save_consistency("amr-nb", compression=bit_rate, num_channels=1, test_mode=test_mode) + def test_save_amr_nb(self, bit_rate): + self.assert_save_consistency("amr-nb", compression=bit_rate, num_channels=1) - @nested_params( - ["path", "fileobj", "bytesio"], - ) - def test_save_gsm(self, test_mode): - self.assert_save_consistency("gsm", num_channels=1, test_mode=test_mode) + def test_save_gsm(self): + self.assert_save_consistency("gsm", num_channels=1) with self.assertRaises(RuntimeError, msg="gsm format only supports single channel audio."): - self.assert_save_consistency("gsm", num_channels=2, test_mode=test_mode) + self.assert_save_consistency("gsm", num_channels=2) with self.assertRaises(RuntimeError, msg="gsm format only supports a sampling rate of 8kHz."): - self.assert_save_consistency("gsm", sample_rate=16000, test_mode=test_mode) + self.assert_save_consistency("gsm", sample_rate=16000) @parameterized.expand( [ diff --git a/test/torchaudio_unittest/backend/sox_io/smoke_test.py b/test/torchaudio_unittest/backend/sox_io/smoke_test.py index e394161044..55dc0cb8ff 100644 --- a/test/torchaudio_unittest/backend/sox_io/smoke_test.py +++ b/test/torchaudio_unittest/backend/sox_io/smoke_test.py @@ -89,88 +89,3 @@ def test_vorbis(self, sample_rate, num_channels, quality_level): def test_flac(self, sample_rate, num_channels, compression_level): """Run smoke test on flac format""" self.run_smoke_test("flac", sample_rate, num_channels, compression=compression_level) - - -@skipIfNoSox -class SmokeTestFileObj(TorchaudioTestCase): - """Run smoke test on various audio format - - The purpose of this test suite is to verify that sox_io_backend functionalities do not exhibit - abnormal behaviors. - - This test suite should be able to run without any additional tools (such as sox command), - however without such tools, the correctness of each function cannot be verified. - """ - - def run_smoke_test(self, ext, sample_rate, num_channels, *, compression=None, dtype="float32"): - duration = 1 - num_frames = sample_rate * duration - original = get_wav_data(dtype, num_channels, normalize=False, num_frames=num_frames) - - fileobj = io.BytesIO() - # 1. run save - sox_io_backend.save(fileobj, original, sample_rate, compression=compression, format=ext) - # 2. run info - fileobj.seek(0) - info = sox_io_backend.info(fileobj, format=ext) - assert info.sample_rate == sample_rate - assert info.num_channels == num_channels - # 3. run load - fileobj.seek(0) - loaded, sr = sox_io_backend.load(fileobj, normalize=False, format=ext) - assert sr == sample_rate - assert loaded.shape[0] == num_channels - - @parameterized.expand( - list( - itertools.product( - ["float32", "int32", "int16", "uint8"], - [8000, 16000], - [1, 2], - ) - ), - name_func=name_func, - ) - def test_wav(self, dtype, sample_rate, num_channels): - """Run smoke test on wav format""" - self.run_smoke_test("wav", sample_rate, num_channels, dtype=dtype) - - @parameterized.expand( - list( - itertools.product( - [8000, 16000], - [1, 2], - [-4.2, -0.2, 0, 0.2, 96, 128, 160, 192, 224, 256, 320], - ) - ) - ) - def test_mp3(self, sample_rate, num_channels, bit_rate): - """Run smoke test on mp3 format""" - self.run_smoke_test("mp3", sample_rate, num_channels, compression=bit_rate) - - @parameterized.expand( - list( - itertools.product( - [8000, 16000], - [1, 2], - [-1, 0, 1, 2, 3, 3.6, 5, 10], - ) - ) - ) - def test_vorbis(self, sample_rate, num_channels, quality_level): - """Run smoke test on vorbis format""" - self.run_smoke_test("vorbis", sample_rate, num_channels, compression=quality_level) - - @parameterized.expand( - list( - itertools.product( - [8000, 16000], - [1, 2], - list(range(9)), - ) - ), - name_func=name_func, - ) - def test_flac(self, sample_rate, num_channels, compression_level): - """Run smoke test on flac format""" - self.run_smoke_test("flac", sample_rate, num_channels, compression=compression_level) diff --git a/test/torchaudio_unittest/sox_effect/smoke_test.py b/test/torchaudio_unittest/sox_effect/smoke_test.py index a5de940a50..30befd54ab 100644 --- a/test/torchaudio_unittest/sox_effect/smoke_test.py +++ b/test/torchaudio_unittest/sox_effect/smoke_test.py @@ -54,24 +54,3 @@ def test_apply_effects_file(self, args): _found, _sr = sox_effects.apply_effects_file( input_path, effects, normalize=False, channels_first=channels_first ) - - @parameterized.expand( - load_params("sox_effect_test_args.jsonl"), - name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}', - ) - def test_apply_effects_fileobj(self, args): - """`apply_effects_file` should return identical data as sox command""" - dtype = "int32" - channels_first = True - effects = args["effects"] - num_channels = args.get("num_channels", 2) - input_sr = args.get("input_sample_rate", 8000) - - input_path = self.get_temp_path("input.wav") - data = get_wav_data(dtype, num_channels, channels_first=channels_first) - save_wav(input_path, data, input_sr, channels_first=channels_first) - - with open(input_path, "rb") as fileobj: - _found, _sr = sox_effects.apply_effects_file( - fileobj, effects, normalize=False, channels_first=channels_first - ) diff --git a/test/torchaudio_unittest/sox_effect/sox_effect_test.py b/test/torchaudio_unittest/sox_effect/sox_effect_test.py index be6b646617..2099505502 100644 --- a/test/torchaudio_unittest/sox_effect/sox_effect_test.py +++ b/test/torchaudio_unittest/sox_effect/sox_effect_test.py @@ -1,20 +1,14 @@ -import io import itertools -import tarfile from pathlib import Path from parameterized import parameterized from torchaudio import sox_effects -from torchaudio._internal import module_utils as _mod_utils from torchaudio_unittest.common_utils import ( get_sinusoid, get_wav_data, - HttpServerMixin, load_wav, PytorchTestCase, save_wav, - skipIfNoExec, - skipIfNoModule, skipIfNoSox, sox_utils, TempDirMixin, @@ -23,10 +17,6 @@ from .common import load_params, name_func -if _mod_utils.is_module_available("requests"): - import requests - - @skipIfNoSox class TestSoxEffects(PytorchTestCase): def test_init(self): @@ -241,136 +231,3 @@ def test_vorbis(self, sample_rate, num_channels): assert sr == expected_sr self.assertEqual(found, expected) - - -@skipIfNoExec("sox") -@skipIfNoSox -class TestFileObject(TempDirMixin, PytorchTestCase): - @parameterized.expand( - [ - ("wav", None), - ("flac", 0), - ("flac", 5), - ("flac", 8), - ("vorbis", -1), - ("vorbis", 10), - ("amb", None), - ] - ) - def test_fileobj(self, ext, compression): - """Applying effects via file object works""" - sample_rate = 16000 - channels_first = True - effects = [["band", "300", "10"]] - input_path = self.get_temp_path(f"input.{ext}") - reference_path = self.get_temp_path("reference.wav") - - sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression) - sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32) - expected, expected_sr = load_wav(reference_path) - - with open(input_path, "rb") as fileobj: - found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first) - save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first) - assert sr == expected_sr - self.assertEqual(found, expected) - - @parameterized.expand( - [ - ("wav", None), - ("flac", 0), - ("flac", 5), - ("flac", 8), - ("vorbis", -1), - ("vorbis", 10), - ("amb", None), - ] - ) - def test_bytesio(self, ext, compression): - """Applying effects via BytesIO object works""" - sample_rate = 16000 - channels_first = True - effects = [["band", "300", "10"]] - input_path = self.get_temp_path(f"input.{ext}") - reference_path = self.get_temp_path("reference.wav") - - sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression) - sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32) - expected, expected_sr = load_wav(reference_path) - - with open(input_path, "rb") as file_: - fileobj = io.BytesIO(file_.read()) - found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first) - save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first) - assert sr == expected_sr - self.assertEqual(found, expected) - - @parameterized.expand( - [ - ("wav", None), - ("flac", 0), - ("flac", 5), - ("flac", 8), - ("vorbis", -1), - ("vorbis", 10), - ("amb", None), - ] - ) - def test_tarfile(self, ext, compression): - """Applying effects to compressed audio via file-like file works""" - sample_rate = 16000 - channels_first = True - effects = [["band", "300", "10"]] - audio_file = f"input.{ext}" - - input_path = self.get_temp_path(audio_file) - reference_path = self.get_temp_path("reference.wav") - archive_path = self.get_temp_path("archive.tar.gz") - - sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression) - sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32) - expected, expected_sr = load_wav(reference_path) - - with tarfile.TarFile(archive_path, "w") as tarobj: - tarobj.add(input_path, arcname=audio_file) - with tarfile.TarFile(archive_path, "r") as tarobj: - fileobj = tarobj.extractfile(audio_file) - found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first) - save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first) - assert sr == expected_sr - self.assertEqual(found, expected) - - -@skipIfNoSox -@skipIfNoExec("sox") -@skipIfNoModule("requests") -class TestFileObjectHttp(HttpServerMixin, PytorchTestCase): - @parameterized.expand( - [ - ("wav", None), - ("flac", 0), - ("flac", 5), - ("flac", 8), - ("vorbis", -1), - ("vorbis", 10), - ("amb", None), - ] - ) - def test_requests(self, ext, compression): - sample_rate = 16000 - channels_first = True - effects = [["band", "300", "10"]] - audio_file = f"input.{ext}" - input_path = self.get_temp_path(audio_file) - reference_path = self.get_temp_path("reference.wav") - - sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression) - sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32) - expected, expected_sr = load_wav(reference_path) - - url = self.get_url(audio_file) - with requests.get(url, stream=True) as resp: - found, sr = sox_effects.apply_effects_file(resp.raw, effects, channels_first=channels_first) - save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first) - assert sr == expected_sr - self.assertEqual(found, expected) diff --git a/third_party/patches/sox.patch b/third_party/patches/sox.patch deleted file mode 100644 index fe8df945c0..0000000000 --- a/third_party/patches/sox.patch +++ /dev/null @@ -1,16 +0,0 @@ -See https://github.com/pytorch/audio/pull/1297 -diff -ru sox/src/formats.c sox/src/formats.c ---- sox/src/formats.c 2014-10-26 19:55:50.000000000 -0700 -+++ sox/src/formats.c 2021-02-22 16:01:02.833144070 -0800 -@@ -333,6 +333,10 @@ - assert(ft); - if (!ft->fp) - return sox_false; -- fstat(fileno((FILE*)ft->fp), &st); -+ int fd = fileno((FILE*)ft->fp); -+ if (fd < 0) -+ return sox_false; -+ if (fstat(fd, &st) < 0) -+ return sox_false; - return ((st.st_mode & S_IFMT) == S_IFREG); - } diff --git a/third_party/sox/CMakeLists.txt b/third_party/sox/CMakeLists.txt index 50e5cc9156..b043362925 100644 --- a/third_party/sox/CMakeLists.txt +++ b/third_party/sox/CMakeLists.txt @@ -193,7 +193,7 @@ ExternalProject_Add(sox DOWNLOAD_DIR ${ARCHIVE_DIR} URL https://downloads.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.bz2 URL_HASH SHA256=81a6956d4330e75b5827316e44ae381e6f1e8928003c6aa45896da9041ea149c - PATCH_COMMAND patch -p1 < ${patch_dir}/sox.patch && cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/sox/ + PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/sox/ CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/sox/configure ${COMMON_ARGS} ${SOX_OPTIONS} BUILD_BYPRODUCTS ${SOX_LIBRARIES} DOWNLOAD_NO_PROGRESS ON diff --git a/torchaudio/_extension/utils.py b/torchaudio/_extension/utils.py index 5490385d34..30ef2e4a35 100644 --- a/torchaudio/_extension/utils.py +++ b/torchaudio/_extension/utils.py @@ -67,7 +67,7 @@ def _init_sox(): _load_lib("libtorchaudio_sox") import torchaudio.lib._torchaudio_sox # noqa - torch.ops.torchaudio.sox_utils_set_verbosity(0) + torchaudio.lib._torchaudio_sox.set_verbosity(0) import atexit diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py index 8b540b5954..30b5cecfb0 100644 --- a/torchaudio/backend/sox_io_backend.py +++ b/torchaudio/backend/sox_io_backend.py @@ -1,10 +1,8 @@ import os -import warnings from typing import Optional, Tuple import torch import torchaudio -from torchaudio.utils.sox_utils import get_buffer_size from .common import AudioMetaData @@ -14,10 +12,6 @@ def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData: raise RuntimeError("Failed to fetch metadata from {}".format(filepath)) -def _fail_info_fileobj(fileobj, format: Optional[str], buffer_size: int) -> AudioMetaData: - raise RuntimeError("Failed to fetch metadata from {}".format(fileobj)) - - # Note: need to comply TorchScript syntax -- need annotation and no f-string def _fail_load( filepath: str, @@ -30,30 +24,14 @@ def _fail_load( raise RuntimeError("Failed to load audio from {}".format(filepath)) -def _fail_load_fileobj(fileobj, *args, **kwargs): - raise RuntimeError(f"Failed to load audio from {fileobj}") - - if torchaudio._extension._FFMPEG_INITIALIZED: import torchaudio.io._compat as _compat _fallback_info = _compat.info_audio - _fallback_info_fileobj = _compat.info_audio_fileobj _fallback_load = _compat.load_audio - _fallback_load_fileobj = _compat.load_audio_fileobj else: _fallback_info = _fail_info - _fallback_info_fileobj = _fail_info_fileobj _fallback_load = _fail_load - _fallback_load_fileobj = _fail_load_fileobj - - -_deprecation_message = ( - "File-like object support in sox_io backend is deprecated, " - "and will be removed in v2.1. " - "See https://github.com/pytorch/audio/issues/2950 for the detail." - "Please migrate to the new dispatcher, or use soundfile backend." -) @torchaudio._extension.fail_if_no_sox @@ -64,24 +42,8 @@ def info( """Get signal information of an audio file. Args: - filepath (path-like object or file-like object): - Source of audio data. When the function is not compiled by TorchScript, - (e.g. ``torch.jit.script``), the following types are accepted; - - * ``path-like``: file path - * ``file-like``: Object with ``read(size: int) -> bytes`` method, - which returns byte string of at most ``size`` length. - - When the function is compiled by TorchScript, only ``str`` type is allowed. - - Note: - - * When the input type is file-like object, this function cannot - get the correct length (``num_samples``) for certain formats, - such as ``vorbis``. - In this case, the value of ``num_samples`` is ``0``. - * This argument is intentionally annotated as ``str`` only due to - TorchScript compiler compatibility. + filepath (str): + Source of audio data. format (str or None, optional): Override the format detection with the given format. @@ -93,21 +55,7 @@ def info( """ if not torch.jit.is_scripting(): if hasattr(filepath, "read"): - # Special case for Backward compatibility - # v0.11 -> v0.12, mp3 handling is moved to FFmpeg. - # file-like objects are not necessarily fallback-able - # when they are not seekable. - # The previous libsox-based implementation required `format="mp3"` - # because internally libsox does not auto-detect the format. - # For the special BC for mp3, we handle mp3 differently. - buffer_size = get_buffer_size() - if format == "mp3": - return _fallback_info_fileobj(filepath, format, buffer_size) - warnings.warn(_deprecation_message) - sinfo = torchaudio.lib._torchaudio_sox.get_info_fileobj(filepath, format) - if sinfo is not None: - return AudioMetaData(*sinfo) - return _fallback_info_fileobj(filepath, format, buffer_size) + raise RuntimeError("sox_io backend does not support file-like object.") filepath = os.fspath(filepath) sinfo = torch.ops.torchaudio.sox_io_get_info(filepath, format) if sinfo is not None: @@ -171,18 +119,7 @@ def load( For these formats, this function always returns ``float32`` Tensor with values. Args: - filepath (path-like object or file-like object): - Source of audio data. When the function is not compiled by TorchScript, - (e.g. ``torch.jit.script``), the following types are accepted; - - * ``path-like``: file path - * ``file-like``: Object with ``read(size: int) -> bytes`` method, - which returns byte string of at most ``size`` length. - - When the function is compiled by TorchScript, only ``str`` type is allowed. - - Note: This argument is intentionally annotated as ``str`` only due to - TorchScript compiler compatibility. + filepath (path-like object): Source of audio data. frame_offset (int): Number of frames to skip before start reading data. num_frames (int, optional): @@ -214,39 +151,7 @@ def load( """ if not torch.jit.is_scripting(): if hasattr(filepath, "read"): - # Special case for Backward compatibility - # v0.11 -> v0.12, mp3 handling is moved to FFmpeg. - # file-like objects are not necessarily fallback-able - # when they are not seekable. - # The previous libsox-based implementation required `format="mp3"` - # because internally libsox does not auto-detect the format. - # For the special BC for mp3, we handle mp3 differently. - buffer_size = get_buffer_size() - if format == "mp3": - return _fallback_load_fileobj( - filepath, - frame_offset, - num_frames, - normalize, - channels_first, - format, - buffer_size, - ) - warnings.warn(_deprecation_message) - ret = torchaudio.lib._torchaudio_sox.load_audio_fileobj( - filepath, frame_offset, num_frames, normalize, channels_first, format - ) - if ret is not None: - return ret - return _fallback_load_fileobj( - filepath, - frame_offset, - num_frames, - normalize, - channels_first, - format, - buffer_size, - ) + raise RuntimeError("sox_io backend does not support file-like object.") filepath = os.fspath(filepath) ret = torch.ops.torchaudio.sox_io_load_audio_file( filepath, frame_offset, num_frames, normalize, channels_first, format @@ -270,9 +175,7 @@ def save( """Save audio data to file. Args: - filepath (str or pathlib.Path): Path to save file. - This function also handles ``pathlib.Path`` objects, but is annotated - as ``str`` for TorchScript compiler compatibility. + filepath (path-like object): Path to save file. src (torch.Tensor): Audio data to save. must be 2D tensor. sample_rate (int): sampling rate channels_first (bool, optional): If ``True``, the given tensor is interpreted as `[channel, time]`, @@ -413,18 +316,7 @@ def save( """ if not torch.jit.is_scripting(): if hasattr(filepath, "write"): - warnings.warn(_deprecation_message) - torchaudio.lib._torchaudio_sox.save_audio_fileobj( - filepath, - src, - sample_rate, - channels_first, - compression, - format, - encoding, - bits_per_sample, - ) - return + raise RuntimeError("sox_io backend does not handle file-like object.") filepath = os.fspath(filepath) torch.ops.torchaudio.sox_io_save_audio_file( filepath, diff --git a/torchaudio/csrc/sox/CMakeLists.txt b/torchaudio/csrc/sox/CMakeLists.txt index e369ecf7af..3391a4fc37 100644 --- a/torchaudio/csrc/sox/CMakeLists.txt +++ b/torchaudio/csrc/sox/CMakeLists.txt @@ -15,17 +15,9 @@ torchaudio_library( ) if (BUILD_TORCHAUDIO_PYTHON_EXTENSION) - set( - ext_sources - pybind/pybind.cpp - pybind/effects.cpp - pybind/effects_chain.cpp - pybind/io.cpp - pybind/utils.cpp - ) torchaudio_extension( _torchaudio_sox - "${ext_sources}" + "pybind/pybind.cpp;" "" "libtorchaudio_sox" "" diff --git a/torchaudio/csrc/sox/pybind/effects.cpp b/torchaudio/csrc/sox/pybind/effects.cpp deleted file mode 100644 index db80f98d63..0000000000 --- a/torchaudio/csrc/sox/pybind/effects.cpp +++ /dev/null @@ -1,123 +0,0 @@ -#include -#include -#include - -using namespace torchaudio::sox_utils; - -namespace torchaudio { -namespace sox_effects { - -// Streaming decoding over file-like object is tricky because libsox operates on -// FILE pointer. The folloing is what `sox` and `play` commands do -// - file input -> FILE pointer -// - URL input -> call wget in suprocess and pipe the data -> FILE pointer -// - stdin -> FILE pointer -// -// We want to, instead, fetch byte strings chunk by chunk, consume them, and -// discard. -// -// Here is the approach -// 1. Initialize sox_format_t using sox_open_mem_read, providing the initial -// chunk of byte string -// This will perform header-based format detection, if necessary, then fill -// the metadata of sox_format_t. Internally, sox_open_mem_read uses fmemopen, -// which returns FILE* which points the buffer of the provided byte string. -// 2. Each time sox reads a chunk from the FILE*, we update the underlying -// buffer in a way that it -// starts with unseen data, and append the new data read from the given -// fileobj. This will trick libsox as if it keeps reading from the FILE* -// continuously. -// For Step 2. see `fileobj_input_drain` function in effects_chain.cpp -auto apply_effects_fileobj( - py::object fileobj, - const std::vector>& effects, - c10::optional normalize, - c10::optional channels_first, - c10::optional format) - -> c10::optional> { - // Prepare the buffer used throughout the lifecycle of SoxEffectChain. - // - // For certain format (such as FLAC), libsox keeps reading the content at - // the initialization unless it reaches EOF even when the header is properly - // parsed. (Making buffer size 8192, which is way bigger than the header, - // resulted in libsox consuming all the buffer content at the time it opens - // the file.) Therefore buffer has to always contain valid data, except after - // EOF. We default to `sox_get_globals()->bufsiz`* for buffer size and we - // first check if there is enough data to fill the buffer. `read_fileobj` - // repeatedly calls `read` method until it receives the requested length of - // bytes or it reaches EOF. If we get bytes shorter than requested, that means - // the whole audio data are fetched. - // - // * This can be changed with `torchaudio.utils.sox_utils.set_buffer_size`. - const auto capacity = [&]() { - // NOTE: - // Use the abstraction provided by `libtorchaudio` to access the global - // config defined by libsox. Directly using `sox_get_globals` function will - // end up retrieving the static variable defined in `_torchaudio`, which is - // not correct. - const auto bufsiz = get_buffer_size(); - const int64_t kDefaultCapacityInBytes = 256; - return (bufsiz > kDefaultCapacityInBytes) ? bufsiz - : kDefaultCapacityInBytes; - }(); - std::string buffer(capacity, '\0'); - auto* in_buf = const_cast(buffer.data()); - auto num_read = read_fileobj(&fileobj, capacity, in_buf); - // If the file is shorter than 256, then libsox cannot read the header. - auto in_buffer_size = (num_read > 256) ? num_read : 256; - - // Open file (this starts reading the header) - // When opening a file there are two functions that can touches FILE*. - // * `auto_detect_format` - // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L43 - // * `startread` handler of detected format. - // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L574 - // To see the handler of a particular format, go to - // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/.c - // For example, voribs can be found - // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/vorbis.c#L97-L158 - SoxFormat sf(sox_open_mem_read( - in_buf, - in_buffer_size, - /*signal=*/nullptr, - /*encoding=*/nullptr, - /*filetype=*/format.has_value() ? format.value().c_str() : nullptr)); - - // In case of streamed data, length can be 0 - if (static_cast(sf) == nullptr || - sf->encoding.encoding == SOX_ENCODING_UNKNOWN) { - return {}; - } - - // Prepare output buffer - std::vector out_buffer; - out_buffer.reserve(sf->signal.length); - - // Create and run SoxEffectsChain - const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision); - torchaudio::sox_effects_chain::SoxEffectsChainPyBind chain( - /*input_encoding=*/sf->encoding, - /*output_encoding=*/get_tensor_encodinginfo(dtype)); - chain.addInputFileObj(sf, in_buf, in_buffer_size, &fileobj); - for (const auto& effect : effects) { - chain.addEffect(effect); - } - chain.addOutputBuffer(&out_buffer); - chain.run(); - - // Create tensor from buffer - bool channels_first_ = channels_first.value_or(true); - auto tensor = convert_to_tensor( - /*buffer=*/out_buffer.data(), - /*num_samples=*/out_buffer.size(), - /*num_channels=*/chain.getOutputNumChannels(), - dtype, - normalize.value_or(true), - channels_first_); - - return std::forward_as_tuple( - tensor, static_cast(chain.getOutputSampleRate())); -} - -} // namespace sox_effects -} // namespace torchaudio diff --git a/torchaudio/csrc/sox/pybind/effects.h b/torchaudio/csrc/sox/pybind/effects.h deleted file mode 100644 index 7f1e653cd5..0000000000 --- a/torchaudio/csrc/sox/pybind/effects.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef TORCHAUDIO_PYBIND_SOX_EFFECTS_H -#define TORCHAUDIO_PYBIND_SOX_EFFECTS_H - -#include - -namespace torchaudio { -namespace sox_effects { - -auto apply_effects_fileobj( - py::object fileobj, - const std::vector>& effects, - c10::optional normalize, - c10::optional channels_first, - c10::optional format) - -> c10::optional>; - -} // namespace sox_effects -} // namespace torchaudio - -#endif diff --git a/torchaudio/csrc/sox/pybind/effects_chain.cpp b/torchaudio/csrc/sox/pybind/effects_chain.cpp deleted file mode 100644 index 42128433d6..0000000000 --- a/torchaudio/csrc/sox/pybind/effects_chain.cpp +++ /dev/null @@ -1,237 +0,0 @@ -#include -#include -#include - -using namespace torchaudio::sox_utils; - -namespace torchaudio { -namespace sox_effects_chain { - -namespace { - -/// helper classes for passing file-like object to SoxEffectChain -struct FileObjInputPriv { - sox_format_t* sf; - py::object* fileobj; - bool eof_reached; - char* buffer; - uint64_t buffer_size; -}; - -struct FileObjOutputPriv { - sox_format_t* sf; - py::object* fileobj; - char** buffer; - size_t* buffer_size; -}; - -/// Callback function to feed byte string -/// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/sox.h#L1268-L1278 -auto fileobj_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) - -> int { - auto priv = static_cast(effp->priv); - auto sf = priv->sf; - auto buffer = priv->buffer; - - // 1. Refresh the buffer - // - // NOTE: - // Since the underlying FILE* was opened with `fmemopen`, the only way - // libsox detect EOF is reaching the end of the buffer. (null byte won't - // help) Therefore we need to align the content at the end of buffer, - // otherwise, libsox will keep reading the content beyond intended length. - // - // Before: - // - // |<-------consumed------>|<---remaining--->| - // |***********************|-----------------| - // ^ ftell - // - // After: - // - // |<-offset->|<---remaining--->|<-new data->| - // |**********|-----------------|++++++++++++| - // ^ ftell - - // NOTE: - // Do not use `sf->tell_off` here. Presumably, `tell_off` and `fseek` are - // supposed to be in sync, but there are cases (Vorbis) they are not - // in sync and `tell_off` has seemingly uninitialized value, which - // leads num_remain to be negative and cause segmentation fault - // in `memmove`. - const auto tell = ftell((FILE*)sf->fp); - if (tell < 0) { - throw std::runtime_error("Internal Error: ftell failed."); - } - const auto num_consumed = static_cast(tell); - if (num_consumed > priv->buffer_size) { - throw std::runtime_error("Internal Error: buffer overrun."); - } - - const auto num_remain = priv->buffer_size - num_consumed; - - // 1.1. Fetch the data to see if there is data to fill the buffer - size_t num_refill = 0; - std::string chunk(num_consumed, '\0'); - if (num_consumed && !priv->eof_reached) { - num_refill = read_fileobj( - priv->fileobj, num_consumed, const_cast(chunk.data())); - if (num_refill < num_consumed) { - priv->eof_reached = true; - } - } - const auto offset = num_consumed - num_refill; - - // 1.2. Move the unconsumed data towards the beginning of buffer. - if (num_remain) { - auto src = static_cast(buffer + num_consumed); - auto dst = static_cast(buffer + offset); - memmove(dst, src, num_remain); - } - - // 1.3. Refill the remaining buffer. - if (num_refill) { - auto src = static_cast(const_cast(chunk.c_str())); - auto dst = buffer + offset + num_remain; - memcpy(dst, src, num_refill); - } - - // 1.4. Set the file pointer to the new offset - sf->tell_off = offset; - fseek((FILE*)sf->fp, offset, SEEK_SET); - - // 2. Perform decoding operation - // The following part is practically same as "input" effect - // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/input.c#L30-L48 - - // At this point, osamp represents the buffer size in bytes, - // but sox_read expects the maximum number of samples ready to read. - // Normally, this is fine, but in case when the samples are not 4-byte - // aligned, (e.g. sample is 24bits), the resulting signal is not correct. - // https://github.com/pytorch/audio/issues/2083 - if (sf->encoding.bits_per_sample > 0) - *osamp /= (sf->encoding.bits_per_sample / 8); - - // Ensure that it's a multiple of the number of channels - *osamp -= *osamp % effp->out_signal.channels; - - // Read up to *osamp samples into obuf; - // store the actual number read back to *osamp - *osamp = sox_read(sf, obuf, *osamp); - - // Decoding is finished when fileobject is exhausted and sox can no longer - // decode a sample. - return (priv->eof_reached && !*osamp) ? SOX_EOF : SOX_SUCCESS; -} - -auto fileobj_output_flow( - sox_effect_t* effp, - sox_sample_t const* ibuf, - sox_sample_t* obuf LSX_UNUSED, - size_t* isamp, - size_t* osamp) -> int { - *osamp = 0; - if (*isamp) { - auto priv = static_cast(effp->priv); - auto sf = priv->sf; - auto fp = static_cast(sf->fp); - auto fileobj = priv->fileobj; - auto buffer = priv->buffer; - - // Encode chunk - auto num_samples_written = sox_write(sf, ibuf, *isamp); - fflush(fp); - - // Copy the encoded chunk to python object. - fileobj->attr("write")(py::bytes(*buffer, ftell(fp))); - - // Reset FILE* - sf->tell_off = 0; - fseek(fp, 0, SEEK_SET); - - if (num_samples_written != *isamp) { - if (sf->sox_errno) { - std::ostringstream stream; - stream << sf->sox_errstr << " " << sox_strerror(sf->sox_errno) << " " - << sf->filename; - throw std::runtime_error(stream.str()); - } - return SOX_EOF; - } - } - return SOX_SUCCESS; -} - -auto get_fileobj_input_handler() -> sox_effect_handler_t* { - static sox_effect_handler_t handler{ - /*name=*/"input_fileobj_object", - /*usage=*/nullptr, - /*flags=*/SOX_EFF_MCHAN, - /*getopts=*/nullptr, - /*start=*/nullptr, - /*flow=*/nullptr, - /*drain=*/fileobj_input_drain, - /*stop=*/nullptr, - /*kill=*/nullptr, - /*priv_size=*/sizeof(FileObjInputPriv)}; - return &handler; -} - -auto get_fileobj_output_handler() -> sox_effect_handler_t* { - static sox_effect_handler_t handler{ - /*name=*/"output_fileobj_object", - /*usage=*/nullptr, - /*flags=*/SOX_EFF_MCHAN, - /*getopts=*/nullptr, - /*start=*/nullptr, - /*flow=*/fileobj_output_flow, - /*drain=*/nullptr, - /*stop=*/nullptr, - /*kill=*/nullptr, - /*priv_size=*/sizeof(FileObjOutputPriv)}; - return &handler; -} - -} // namespace - -void SoxEffectsChainPyBind::addInputFileObj( - sox_format_t* sf, - char* buffer, - uint64_t buffer_size, - py::object* fileobj) { - in_sig_ = sf->signal; - interm_sig_ = in_sig_; - - SoxEffect e(sox_create_effect(get_fileobj_input_handler())); - auto priv = static_cast(e->priv); - priv->sf = sf; - priv->fileobj = fileobj; - priv->eof_reached = false; - priv->buffer = buffer; - priv->buffer_size = buffer_size; - if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) { - throw std::runtime_error( - "Internal Error: Failed to add effect: input fileobj"); - } -} - -void SoxEffectsChainPyBind::addOutputFileObj( - sox_format_t* sf, - char** buffer, - size_t* buffer_size, - py::object* fileobj) { - out_sig_ = sf->signal; - SoxEffect e(sox_create_effect(get_fileobj_output_handler())); - auto priv = static_cast(e->priv); - priv->sf = sf; - priv->fileobj = fileobj; - priv->buffer = buffer; - priv->buffer_size = buffer_size; - if (sox_add_effect(sec_, e, &interm_sig_, &out_sig_) != SOX_SUCCESS) { - throw std::runtime_error( - "Internal Error: Failed to add effect: output fileobj"); - } -} - -} // namespace sox_effects_chain -} // namespace torchaudio diff --git a/torchaudio/csrc/sox/pybind/effects_chain.h b/torchaudio/csrc/sox/pybind/effects_chain.h deleted file mode 100644 index acbacf6013..0000000000 --- a/torchaudio/csrc/sox/pybind/effects_chain.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef TORCHAUDIO_PYBIND_SOX_EFFECTS_CHAIN_H -#define TORCHAUDIO_PYBIND_SOX_EFFECTS_CHAIN_H - -#include -#include - -namespace torchaudio { -namespace sox_effects_chain { - -class SoxEffectsChainPyBind : public SoxEffectsChain { - using SoxEffectsChain::SoxEffectsChain; - - public: - void addInputFileObj( - sox_format_t* sf, - char* buffer, - uint64_t buffer_size, - py::object* fileobj); - - void addOutputFileObj( - sox_format_t* sf, - char** buffer, - size_t* buffer_size, - py::object* fileobj); -}; - -} // namespace sox_effects_chain -} // namespace torchaudio - -#endif diff --git a/torchaudio/csrc/sox/pybind/io.cpp b/torchaudio/csrc/sox/pybind/io.cpp deleted file mode 100644 index 5fc6d271b5..0000000000 --- a/torchaudio/csrc/sox/pybind/io.cpp +++ /dev/null @@ -1,195 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include - -using namespace torchaudio::sox_utils; - -namespace torchaudio { -namespace sox_io { - -auto get_info_fileobj(py::object fileobj, c10::optional format) - -> c10::optional { - // Prepare in-memory file object - // When libsox opens a file, it also reads the header. - // When opening a file there are two functions that might touch FILE* (and the - // underlying buffer). - // * `auto_detect_format` - // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L43 - // * `startread` handler of detected format. - // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L574 - // To see the handler of a particular format, go to - // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/.c - // For example, voribs can be found - // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/vorbis.c#L97-L158 - // - // `auto_detect_format` function only requires 256 bytes, but format-dependent - // `startread` handler might require more data. In case of vorbis, the size of - // header is unbounded, but typically 4kB maximum. - // - // "The header size is unbounded, although for streaming a rule-of-thumb of - // 4kB or less is recommended (and Xiph.Org's Vorbis encoder follows this - // suggestion)." - // - // See: - // https://xiph.org/vorbis/doc/Vorbis_I_spec.html - const auto capacity = [&]() { - // NOTE: - // Use the abstraction provided by `libtorchaudio` to access the global - // config defined by libsox. Directly using `sox_get_globals` function will - // end up retrieving the static variable defined in `_torchaudio`, which is - // not correct. - const auto bufsiz = get_buffer_size(); - const int64_t kDefaultCapacityInBytes = 4096; - return (bufsiz > kDefaultCapacityInBytes) ? bufsiz - : kDefaultCapacityInBytes; - }(); - std::string buffer(capacity, '\0'); - auto* buf = const_cast(buffer.data()); - auto num_read = read_fileobj(&fileobj, capacity, buf); - // If the file is shorter than 256, then libsox cannot read the header. - auto buf_size = (num_read > 256) ? num_read : 256; - - SoxFormat sf(sox_open_mem_read( - buf, - buf_size, - /*signal=*/nullptr, - /*encoding=*/nullptr, - /*filetype=*/format.has_value() ? format.value().c_str() : nullptr)); - - if (static_cast(sf) == nullptr || - sf->encoding.encoding == SOX_ENCODING_UNKNOWN) { - return c10::optional{}; - } - - return std::forward_as_tuple( - static_cast(sf->signal.rate), - static_cast(sf->signal.length / sf->signal.channels), - static_cast(sf->signal.channels), - static_cast(sf->encoding.bits_per_sample), - get_encoding(sf->encoding.encoding)); -} - -auto load_audio_fileobj( - py::object fileobj, - c10::optional frame_offset, - c10::optional num_frames, - c10::optional normalize, - c10::optional channels_first, - c10::optional format) - -> c10::optional> { - auto effects = get_effects(frame_offset, num_frames); - return torchaudio::sox_effects::apply_effects_fileobj( - std::move(fileobj), - effects, - normalize, - channels_first, - std::move(format)); -} - -namespace { - -// helper class to automatically release buffer, to be used by -// save_audio_fileobj -struct AutoReleaseBuffer { - char* ptr; - size_t size; - - AutoReleaseBuffer() : ptr(nullptr), size(0) {} - AutoReleaseBuffer(const AutoReleaseBuffer& other) = delete; - AutoReleaseBuffer(AutoReleaseBuffer&& other) = delete; - auto operator=(const AutoReleaseBuffer& other) -> AutoReleaseBuffer& = delete; - auto operator=(AutoReleaseBuffer&& other) -> AutoReleaseBuffer& = delete; - ~AutoReleaseBuffer() { - if (ptr) { - free(ptr); - } - } -}; - -} // namespace - -void save_audio_fileobj( - py::object fileobj, - torch::Tensor tensor, - int64_t sample_rate, - bool channels_first, - c10::optional compression, - c10::optional format, - c10::optional encoding, - c10::optional bits_per_sample) { - validate_input_tensor(tensor); - - if (!format.has_value()) { - throw std::runtime_error( - "`format` is required when saving to file object."); - } - const auto filetype = format.value(); - - if (filetype == "amr-nb") { - const auto num_channels = tensor.size(channels_first ? 0 : 1); - if (num_channels != 1) { - throw std::runtime_error( - "amr-nb format only supports single channel audio."); - } - } else if (filetype == "htk") { - const auto num_channels = tensor.size(channels_first ? 0 : 1); - if (num_channels != 1) { - throw std::runtime_error( - "htk format only supports single channel audio."); - } - } else if (filetype == "gsm") { - const auto num_channels = tensor.size(channels_first ? 0 : 1); - if (num_channels != 1) { - throw std::runtime_error( - "gsm format only supports single channel audio."); - } - if (sample_rate != 8000) { - throw std::runtime_error( - "gsm format only supports a sampling rate of 8kHz."); - } - } - const auto signal_info = - get_signalinfo(&tensor, sample_rate, filetype, channels_first); - const auto encoding_info = get_encodinginfo_for_save( - filetype, - tensor.dtype(), - compression, - std::move(encoding), - bits_per_sample); - - AutoReleaseBuffer buffer; - - SoxFormat sf(sox_open_memstream_write( - &buffer.ptr, - &buffer.size, - &signal_info, - &encoding_info, - filetype.c_str(), - /*oob=*/nullptr)); - - if (static_cast(sf) == nullptr) { - throw std::runtime_error( - "Error saving audio file: failed to open memory stream."); - } - - torchaudio::sox_effects_chain::SoxEffectsChainPyBind chain( - /*input_encoding=*/get_tensor_encodinginfo(tensor.dtype()), - /*output_encoding=*/sf->encoding); - chain.addInputTensor(&tensor, sample_rate, channels_first); - chain.addOutputFileObj(sf, &buffer.ptr, &buffer.size, &fileobj); - chain.run(); - - // Closing the sox_format_t is necessary for flushing the last chunk to the - // buffer - sf.close(); - - fileobj.attr("write")(py::bytes(buffer.ptr, buffer.size)); -} - -} // namespace sox_io -} // namespace torchaudio diff --git a/torchaudio/csrc/sox/pybind/io.h b/torchaudio/csrc/sox/pybind/io.h deleted file mode 100644 index db91ad4ace..0000000000 --- a/torchaudio/csrc/sox/pybind/io.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef TORCHAUDIO_PYBIND_SOX_IO_H -#define TORCHAUDIO_PYBIND_SOX_IO_H - -#include - -namespace torchaudio { -namespace sox_io { - -using MetaDataTuple = - std::tuple; - -auto get_info_fileobj(py::object fileobj, c10::optional format) - -> c10::optional; - -auto load_audio_fileobj( - py::object fileobj, - c10::optional frame_offset, - c10::optional num_frames, - c10::optional normalize, - c10::optional channels_first, - c10::optional format) - -> c10::optional>; - -void save_audio_fileobj( - py::object fileobj, - torch::Tensor tensor, - int64_t sample_rate, - bool channels_first, - c10::optional compression, - c10::optional format, - c10::optional encoding, - c10::optional bits_per_sample); - -} // namespace sox_io -} // namespace torchaudio - -#endif diff --git a/torchaudio/csrc/sox/pybind/pybind.cpp b/torchaudio/csrc/sox/pybind/pybind.cpp index 751471c52e..e7f8a8216c 100644 --- a/torchaudio/csrc/sox/pybind/pybind.cpp +++ b/torchaudio/csrc/sox/pybind/pybind.cpp @@ -1,23 +1,45 @@ #include +#include -#include -#include +namespace torchaudio { +namespace sox { +namespace { PYBIND11_MODULE(_torchaudio_sox, m) { m.def( - "get_info_fileobj", - &torchaudio::sox_io::get_info_fileobj, - "Get metadata of audio in file object."); + "set_seed", + &torchaudio::sox_utils::set_seed, + "Set random seed."); m.def( - "load_audio_fileobj", - &torchaudio::sox_io::load_audio_fileobj, - "Load audio from file object."); + "set_verbosity", + &torchaudio::sox_utils::set_verbosity, + "Set verbosity."); m.def( - "save_audio_fileobj", - &torchaudio::sox_io::save_audio_fileobj, - "Save audio to file obj."); + "set_use_threads", + &torchaudio::sox_utils::set_use_threads, + "Set threading."); m.def( - "apply_effects_fileobj", - &torchaudio::sox_effects::apply_effects_fileobj, - "Decode audio data from file-like obj and apply effects."); + "set_buffer_size", + &torchaudio::sox_utils::set_buffer_size, + "Set buffer size."); + m.def( + "get_buffer_size", + &torchaudio::sox_utils::get_buffer_size, + "Get buffer size."); + m.def( + "list_effects", + &torchaudio::sox_utils::list_effects, + "List available effects."); + m.def( + "list_read_formats", + &torchaudio::sox_utils::list_read_formats, + "List supported formats for decoding."); + m.def( + "list_write_formats", + &torchaudio::sox_utils::list_write_formats, + "List supported formats for encoding."); } + +} // torchaudio +} // sox +} // namespace diff --git a/torchaudio/csrc/sox/pybind/utils.cpp b/torchaudio/csrc/sox/pybind/utils.cpp deleted file mode 100644 index 1744be281a..0000000000 --- a/torchaudio/csrc/sox/pybind/utils.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include - -namespace torchaudio { -namespace sox_utils { - -auto read_fileobj(py::object* fileobj, const uint64_t size, char* buffer) - -> uint64_t { - uint64_t num_read = 0; - while (num_read < size) { - auto request = size - num_read; - auto chunk = static_cast( - static_cast(fileobj->attr("read")(request))); - auto chunk_len = chunk.length(); - if (chunk_len == 0) { - break; - } - if (chunk_len > request) { - std::ostringstream message; - message - << "Requested up to " << request << " bytes but, " - << "received " << chunk_len << " bytes. " - << "The given object does not confirm to read protocol of file object."; - throw std::runtime_error(message.str()); - } - memcpy(buffer, chunk.data(), chunk_len); - buffer += chunk_len; - num_read += chunk_len; - } - return num_read; -} - -} // namespace sox_utils -} // namespace torchaudio diff --git a/torchaudio/csrc/sox/pybind/utils.h b/torchaudio/csrc/sox/pybind/utils.h deleted file mode 100644 index 21955e255c..0000000000 --- a/torchaudio/csrc/sox/pybind/utils.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef TORCHAUDIO_PYBIND_SOX_UTILS_H -#define TORCHAUDIO_PYBIND_SOX_UTILS_H - -#include - -namespace torchaudio { -namespace sox_utils { - -auto read_fileobj(py::object* fileobj, uint64_t size, char* buffer) -> uint64_t; - -} // namespace sox_utils -} // namespace torchaudio - -#endif diff --git a/torchaudio/csrc/sox/utils.cpp b/torchaudio/csrc/sox/utils.cpp index 88c0ec9839..d3f96c33c7 100644 --- a/torchaudio/csrc/sox/utils.cpp +++ b/torchaudio/csrc/sox/utils.cpp @@ -492,30 +492,5 @@ sox_encodinginfo_t get_encodinginfo_for_save( /*opposite_endian=*/sox_false}; } -TORCH_LIBRARY_FRAGMENT(torchaudio, m) { - m.def("torchaudio::sox_utils_set_seed", &torchaudio::sox_utils::set_seed); - m.def( - "torchaudio::sox_utils_set_verbosity", - &torchaudio::sox_utils::set_verbosity); - m.def( - "torchaudio::sox_utils_set_use_threads", - &torchaudio::sox_utils::set_use_threads); - m.def( - "torchaudio::sox_utils_set_buffer_size", - &torchaudio::sox_utils::set_buffer_size); - m.def( - "torchaudio::sox_utils_list_effects", - &torchaudio::sox_utils::list_effects); - m.def( - "torchaudio::sox_utils_list_read_formats", - &torchaudio::sox_utils::list_read_formats); - m.def( - "torchaudio::sox_utils_list_write_formats", - &torchaudio::sox_utils::list_write_formats); - m.def( - "torchaudio::sox_utils_get_buffer_size", - &torchaudio::sox_utils::get_buffer_size); -} - } // namespace sox_utils } // namespace torchaudio diff --git a/torchaudio/sox_effects/sox_effects.py b/torchaudio/sox_effects/sox_effects.py index e876788df4..c343680b65 100644 --- a/torchaudio/sox_effects/sox_effects.py +++ b/torchaudio/sox_effects/sox_effects.py @@ -1,5 +1,4 @@ import os -import warnings from typing import List, Optional, Tuple import torch @@ -156,14 +155,6 @@ def apply_effects_tensor( return torch.ops.torchaudio.sox_effects_apply_effects_tensor(tensor, sample_rate, effects, channels_first) -_deprecation_message = ( - "File-like object support in sox_io backend is deprecated, " - "and will be removed in v2.1. " - "See https://github.com/pytorch/audio/issues/2950 for the detail." - "Please migrate to the new dispatcher, or use soundfile backend." -) - - @torchaudio._extension.fail_if_no_sox def apply_effects_file( path: str, @@ -187,18 +178,8 @@ def apply_effects_file( rate and leave samples untouched. Args: - path (path-like object or file-like object): - Source of audio data. When the function is not compiled by TorchScript, - (e.g. ``torch.jit.script``), the following types are accepted: - - * ``path-like``: file path - * ``file-like``: Object with ``read(size: int) -> bytes`` method, - which returns byte string of at most ``size`` length. - - When the function is compiled by TorchScript, only ``str`` type is allowed. - - Note: This argument is intentionally annotated as ``str`` only for - TorchScript compiler compatibility. + path (path-like object): + Source of audio data. effects (List[List[str]]): List of effects. normalize (bool, optional): When ``True``, this function converts the native sample type to ``float32``. @@ -283,11 +264,10 @@ def apply_effects_file( """ if not torch.jit.is_scripting(): if hasattr(path, "read"): - warnings.warn(_deprecation_message) - ret = torchaudio.lib._torchaudio_sox.apply_effects_fileobj(path, effects, normalize, channels_first, format) - if ret is None: - raise RuntimeError("Failed to load audio from {}".format(path)) - return ret + raise RuntimeError( + "apply_effects_file function does not support file-like object. " + "Please use torchaudio.io.AudioEffector." + ) path = os.fspath(path) ret = torch.ops.torchaudio.sox_effects_apply_effects_file(path, effects, normalize, channels_first, format) if ret is not None: diff --git a/torchaudio/utils/sox_utils.py b/torchaudio/utils/sox_utils.py index 384c00bf82..a978e8d1db 100644 --- a/torchaudio/utils/sox_utils.py +++ b/torchaudio/utils/sox_utils.py @@ -4,7 +4,6 @@ from typing import Dict, List -import torch import torchaudio @@ -18,7 +17,7 @@ def set_seed(seed: int): See Also: http://sox.sourceforge.net/sox.html """ - torch.ops.torchaudio.sox_utils_set_seed(seed) + torchaudio.lib._torchaudio_sox.set_seed(seed) @torchaudio._extension.fail_if_no_sox @@ -36,7 +35,7 @@ def set_verbosity(verbosity: int): See Also: http://sox.sourceforge.net/sox.html """ - torch.ops.torchaudio.sox_utils_set_verbosity(verbosity) + torchaudio.lib._torchaudio_sox.set_verbosity(verbosity) @torchaudio._extension.fail_if_no_sox @@ -49,7 +48,7 @@ def set_buffer_size(buffer_size: int): See Also: http://sox.sourceforge.net/sox.html """ - torch.ops.torchaudio.sox_utils_set_buffer_size(buffer_size) + torchaudio.lib._torchaudio_sox.set_buffer_size(buffer_size) @torchaudio._extension.fail_if_no_sox @@ -63,7 +62,7 @@ def set_use_threads(use_threads: bool): See Also: http://sox.sourceforge.net/sox.html """ - torch.ops.torchaudio.sox_utils_set_use_threads(use_threads) + torchaudio.lib._torchaudio_sox.set_use_threads(use_threads) @torchaudio._extension.fail_if_no_sox @@ -73,7 +72,7 @@ def list_effects() -> Dict[str, str]: Returns: Dict[str, str]: Mapping from ``effect name`` to ``usage`` """ - return dict(torch.ops.torchaudio.sox_utils_list_effects()) + return dict(torchaudio.lib._torchaudio_sox.list_effects()) @torchaudio._extension.fail_if_no_sox @@ -83,7 +82,7 @@ def list_read_formats() -> List[str]: Returns: List[str]: List of supported audio formats """ - return torch.ops.torchaudio.sox_utils_list_read_formats() + return torchaudio.lib._torchaudio_sox.list_read_formats() @torchaudio._extension.fail_if_no_sox @@ -93,7 +92,7 @@ def list_write_formats() -> List[str]: Returns: List[str]: List of supported audio formats """ - return torch.ops.torchaudio.sox_utils_list_write_formats() + return torchaudio.lib._torchaudio_sox.list_write_formats() @torchaudio._extension.fail_if_no_sox @@ -103,4 +102,4 @@ def get_buffer_size() -> int: Returns: int: size in bytes of buffers used for processing audio. """ - return torch.ops.torchaudio.sox_utils_get_buffer_size() + return torchaudio.lib._torchaudio_sox.get_buffer_size()