From ba215bf172897b589661b0b302c21626f50fc6eb Mon Sep 17 00:00:00 2001
From: moto <855818+mthrok@users.noreply.github.com>
Date: Sun, 5 Feb 2023 22:29:21 -0500
Subject: [PATCH] [BC-Breaking] Remove file-like object support from sox_io
 backend

This commit removes file-like obejct support so that we can remove custom patch

The motivation and plan is outlined in https://github.com/pytorch/audio/issues/2950.
---
 .../backend/sox_io/info_test.py               | 268 +-----------------
 .../backend/sox_io/load_test.py               | 253 -----------------
 .../backend/sox_io/save_test.py               |  90 ++----
 .../backend/sox_io/smoke_test.py              |  85 ------
 .../sox_effect/smoke_test.py                  |  21 --
 .../sox_effect/sox_effect_test.py             | 143 ----------
 third_party/patches/sox.patch                 |  16 --
 third_party/sox/CMakeLists.txt                |   2 +-
 torchaudio/_extension/utils.py                |   2 +-
 torchaudio/backend/sox_io_backend.py          | 122 +-------
 torchaudio/csrc/sox/CMakeLists.txt            |  10 +-
 torchaudio/csrc/sox/pybind/effects.cpp        | 123 --------
 torchaudio/csrc/sox/pybind/effects.h          |  20 --
 torchaudio/csrc/sox/pybind/effects_chain.cpp  | 237 ----------------
 torchaudio/csrc/sox/pybind/effects_chain.h    |  30 --
 torchaudio/csrc/sox/pybind/io.cpp             | 195 -------------
 torchaudio/csrc/sox/pybind/io.h               |  37 ---
 torchaudio/csrc/sox/pybind/pybind.cpp         |  50 +++-
 torchaudio/csrc/sox/pybind/utils.cpp          |  33 ---
 torchaudio/csrc/sox/pybind/utils.h            |  14 -
 torchaudio/csrc/sox/utils.cpp                 |  25 --
 torchaudio/sox_effects/sox_effects.py         |  32 +--
 torchaudio/utils/sox_utils.py                 |  17 +-
 23 files changed, 84 insertions(+), 1741 deletions(-)
 delete mode 100644 third_party/patches/sox.patch
 delete mode 100644 torchaudio/csrc/sox/pybind/effects.cpp
 delete mode 100644 torchaudio/csrc/sox/pybind/effects.h
 delete mode 100644 torchaudio/csrc/sox/pybind/effects_chain.cpp
 delete mode 100644 torchaudio/csrc/sox/pybind/effects_chain.h
 delete mode 100644 torchaudio/csrc/sox/pybind/io.cpp
 delete mode 100644 torchaudio/csrc/sox/pybind/io.h
 delete mode 100644 torchaudio/csrc/sox/pybind/utils.cpp
 delete mode 100644 torchaudio/csrc/sox/pybind/utils.h

diff --git a/test/torchaudio_unittest/backend/sox_io/info_test.py b/test/torchaudio_unittest/backend/sox_io/info_test.py
index 70532f4ba0..c2a4f1c9ab 100644
--- a/test/torchaudio_unittest/backend/sox_io/info_test.py
+++ b/test/torchaudio_unittest/backend/sox_io/info_test.py
@@ -1,22 +1,14 @@
-import io
 import itertools
-import os
-import tarfile
-from contextlib import contextmanager
 
 from parameterized import parameterized
-from torchaudio._internal import module_utils as _mod_utils
 from torchaudio.backend import sox_io_backend
-from torchaudio.utils.sox_utils import get_buffer_size, set_buffer_size
-from torchaudio_unittest.backend.common import get_bits_per_sample, get_encoding
+from torchaudio_unittest.backend.common import get_encoding
 from torchaudio_unittest.common_utils import (
     get_asset_path,
     get_wav_data,
-    HttpServerMixin,
     PytorchTestCase,
     save_wav,
     skipIfNoExec,
-    skipIfNoModule,
     skipIfNoSox,
     sox_utils,
     TempDirMixin,
@@ -25,10 +17,6 @@
 from .common import name_func
 
 
-if _mod_utils.is_module_available("requests"):
-    import requests
-
-
 @skipIfNoExec("sox")
 @skipIfNoSox
 class TestInfo(TempDirMixin, PytorchTestCase):
@@ -338,260 +326,6 @@ def test_mp3(self):
         assert sinfo.encoding == "MP3"
 
 
-class FileObjTestBase(TempDirMixin):
-    def _gen_file(self, ext, dtype, sample_rate, num_channels, num_frames, *, comments=None):
-        path = self.get_temp_path(f"test.{ext}")
-        bit_depth = sox_utils.get_bit_depth(dtype)
-        duration = num_frames / sample_rate
-        comment_file = self._gen_comment_file(comments) if comments else None
-
-        sox_utils.gen_audio_file(
-            path,
-            sample_rate,
-            num_channels=num_channels,
-            encoding=sox_utils.get_encoding(dtype),
-            bit_depth=bit_depth,
-            duration=duration,
-            comment_file=comment_file,
-        )
-        return path
-
-    def _gen_comment_file(self, comments):
-        comment_path = self.get_temp_path("comment.txt")
-        with open(comment_path, "w") as file_:
-            file_.writelines(comments)
-        return comment_path
-
-
-class Unseekable:
-    def __init__(self, fileobj):
-        self.fileobj = fileobj
-
-    def read(self, n):
-        return self.fileobj.read(n)
-
-
-@skipIfNoSox
-@skipIfNoExec("sox")
-class TestFileObject(FileObjTestBase, PytorchTestCase):
-    def _query_fileobj(self, ext, dtype, sample_rate, num_channels, num_frames, *, comments=None):
-        path = self._gen_file(ext, dtype, sample_rate, num_channels, num_frames, comments=comments)
-        format_ = ext if ext in ["mp3"] else None
-        with open(path, "rb") as fileobj:
-            return sox_io_backend.info(fileobj, format_)
-
-    def _query_bytesio(self, ext, dtype, sample_rate, num_channels, num_frames):
-        path = self._gen_file(ext, dtype, sample_rate, num_channels, num_frames)
-        format_ = ext if ext in ["mp3"] else None
-        with open(path, "rb") as file_:
-            fileobj = io.BytesIO(file_.read())
-        return sox_io_backend.info(fileobj, format_)
-
-    def _query_tarfile(self, ext, dtype, sample_rate, num_channels, num_frames):
-        audio_path = self._gen_file(ext, dtype, sample_rate, num_channels, num_frames)
-        audio_file = os.path.basename(audio_path)
-        archive_path = self.get_temp_path("archive.tar.gz")
-        with tarfile.TarFile(archive_path, "w") as tarobj:
-            tarobj.add(audio_path, arcname=audio_file)
-        format_ = ext if ext in ["mp3"] else None
-        with tarfile.TarFile(archive_path, "r") as tarobj:
-            fileobj = tarobj.extractfile(audio_file)
-            return sox_io_backend.info(fileobj, format_)
-
-    @contextmanager
-    def _set_buffer_size(self, buffer_size):
-        try:
-            original_buffer_size = get_buffer_size()
-            set_buffer_size(buffer_size)
-            yield
-        finally:
-            set_buffer_size(original_buffer_size)
-
-    @parameterized.expand(
-        [
-            ("wav", "float32"),
-            ("wav", "int32"),
-            ("wav", "int16"),
-            ("wav", "uint8"),
-            ("mp3", "float32"),
-            ("flac", "float32"),
-            ("vorbis", "float32"),
-            ("amb", "int16"),
-        ]
-    )
-    def test_fileobj(self, ext, dtype):
-        """Querying audio via file object works"""
-        sample_rate = 16000
-        num_frames = 3 * sample_rate
-        num_channels = 2
-        sinfo = self._query_fileobj(ext, dtype, sample_rate, num_channels, num_frames)
-
-        bits_per_sample = get_bits_per_sample(ext, dtype)
-        num_frames = {"vorbis": 0, "mp3": 49536}.get(ext, num_frames)
-
-        assert sinfo.sample_rate == sample_rate
-        assert sinfo.num_channels == num_channels
-        assert sinfo.num_frames == num_frames
-        assert sinfo.bits_per_sample == bits_per_sample
-        assert sinfo.encoding == get_encoding(ext, dtype)
-
-    @parameterized.expand(
-        [
-            ("vorbis", "float32"),
-        ]
-    )
-    def test_fileobj_large_header(self, ext, dtype):
-        """
-        For audio file with header size exceeding default buffer size:
-        - Querying audio via file object without enlarging buffer size fails.
-        - Querying audio via file object after enlarging buffer size succeeds.
-        """
-        sample_rate = 16000
-        num_frames = 3 * sample_rate
-        num_channels = 2
-        comments = "metadata=" + " ".join(["value" for _ in range(1000)])
-
-        with self.assertRaises(RuntimeError):
-            sinfo = self._query_fileobj(ext, dtype, sample_rate, num_channels, num_frames, comments=comments)
-
-        with self._set_buffer_size(16384):
-            sinfo = self._query_fileobj(ext, dtype, sample_rate, num_channels, num_frames, comments=comments)
-        bits_per_sample = get_bits_per_sample(ext, dtype)
-        num_frames = 0 if ext in ["vorbis"] else num_frames
-
-        assert sinfo.sample_rate == sample_rate
-        assert sinfo.num_channels == num_channels
-        assert sinfo.num_frames == num_frames
-        assert sinfo.bits_per_sample == bits_per_sample
-        assert sinfo.encoding == get_encoding(ext, dtype)
-
-    @parameterized.expand(
-        [
-            ("wav", "float32"),
-            ("wav", "int32"),
-            ("wav", "int16"),
-            ("wav", "uint8"),
-            ("mp3", "float32"),
-            ("flac", "float32"),
-            ("vorbis", "float32"),
-            ("amb", "int16"),
-        ]
-    )
-    def test_bytesio(self, ext, dtype):
-        """Querying audio via ByteIO object works for small data"""
-        sample_rate = 16000
-        num_frames = 3 * sample_rate
-        num_channels = 2
-        sinfo = self._query_bytesio(ext, dtype, sample_rate, num_channels, num_frames)
-
-        bits_per_sample = get_bits_per_sample(ext, dtype)
-        num_frames = {"vorbis": 0, "mp3": 49536}.get(ext, num_frames)
-
-        assert sinfo.sample_rate == sample_rate
-        assert sinfo.num_channels == num_channels
-        assert sinfo.num_frames == num_frames
-        assert sinfo.bits_per_sample == bits_per_sample
-        assert sinfo.encoding == get_encoding(ext, dtype)
-
-    @parameterized.expand(
-        [
-            ("wav", "float32"),
-            ("wav", "int32"),
-            ("wav", "int16"),
-            ("wav", "uint8"),
-            ("mp3", "float32"),
-            ("flac", "float32"),
-            ("vorbis", "float32"),
-            ("amb", "int16"),
-        ]
-    )
-    def test_bytesio_tiny(self, ext, dtype):
-        """Querying audio via ByteIO object works for small data"""
-        sample_rate = 8000
-        num_frames = 4
-        num_channels = 2
-        sinfo = self._query_bytesio(ext, dtype, sample_rate, num_channels, num_frames)
-
-        bits_per_sample = get_bits_per_sample(ext, dtype)
-        num_frames = {"vorbis": 0, "mp3": 1728}.get(ext, num_frames)
-
-        assert sinfo.sample_rate == sample_rate
-        assert sinfo.num_channels == num_channels
-        assert sinfo.num_frames == num_frames
-        assert sinfo.bits_per_sample == bits_per_sample
-        assert sinfo.encoding == get_encoding(ext, dtype)
-
-    @parameterized.expand(
-        [
-            ("wav", "float32"),
-            ("wav", "int32"),
-            ("wav", "int16"),
-            ("wav", "uint8"),
-            ("mp3", "float32"),
-            ("flac", "float32"),
-            ("vorbis", "float32"),
-            ("amb", "int16"),
-        ]
-    )
-    def test_tarfile(self, ext, dtype):
-        """Querying compressed audio via file-like object works"""
-        sample_rate = 16000
-        num_frames = 3.0 * sample_rate
-        num_channels = 2
-        sinfo = self._query_tarfile(ext, dtype, sample_rate, num_channels, num_frames)
-
-        bits_per_sample = get_bits_per_sample(ext, dtype)
-        num_frames = {"vorbis": 0, "mp3": 49536}.get(ext, num_frames)
-
-        assert sinfo.sample_rate == sample_rate
-        assert sinfo.num_channels == num_channels
-        assert sinfo.num_frames == num_frames
-        assert sinfo.bits_per_sample == bits_per_sample
-        assert sinfo.encoding == get_encoding(ext, dtype)
-
-
-@skipIfNoSox
-@skipIfNoExec("sox")
-@skipIfNoModule("requests")
-class TestFileObjectHttp(HttpServerMixin, FileObjTestBase, PytorchTestCase):
-    def _query_http(self, ext, dtype, sample_rate, num_channels, num_frames):
-        audio_path = self._gen_file(ext, dtype, sample_rate, num_channels, num_frames)
-        audio_file = os.path.basename(audio_path)
-
-        url = self.get_url(audio_file)
-        format_ = ext if ext in ["mp3"] else None
-        with requests.get(url, stream=True) as resp:
-            return sox_io_backend.info(Unseekable(resp.raw), format=format_)
-
-    @parameterized.expand(
-        [
-            ("wav", "float32"),
-            ("wav", "int32"),
-            ("wav", "int16"),
-            ("wav", "uint8"),
-            ("mp3", "float32"),
-            ("flac", "float32"),
-            ("vorbis", "float32"),
-            ("amb", "int16"),
-        ]
-    )
-    def test_requests(self, ext, dtype):
-        """Querying compressed audio via requests works"""
-        sample_rate = 16000
-        num_frames = 3.0 * sample_rate
-        num_channels = 2
-        sinfo = self._query_http(ext, dtype, sample_rate, num_channels, num_frames)
-
-        bits_per_sample = get_bits_per_sample(ext, dtype)
-        num_frames = {"vorbis": 0, "mp3": 49536}.get(ext, num_frames)
-
-        assert sinfo.sample_rate == sample_rate
-        assert sinfo.num_channels == num_channels
-        assert sinfo.num_frames == num_frames
-        assert sinfo.bits_per_sample == bits_per_sample
-        assert sinfo.encoding == get_encoding(ext, dtype)
-
-
 @skipIfNoSox
 class TestInfoNoSuchFile(PytorchTestCase):
     def test_info_fail(self):
diff --git a/test/torchaudio_unittest/backend/sox_io/load_test.py b/test/torchaudio_unittest/backend/sox_io/load_test.py
index 54cfd7b7ae..bebd67ae6f 100644
--- a/test/torchaudio_unittest/backend/sox_io/load_test.py
+++ b/test/torchaudio_unittest/backend/sox_io/load_test.py
@@ -370,259 +370,6 @@ def test_mp3(self):
         assert sr == 16000
 
 
-class CloggedFileObj:
-    def __init__(self, fileobj):
-        self.fileobj = fileobj
-
-    def read(self, _):
-        return self.fileobj.read(2)
-
-    def seek(self, offset, whence):
-        return self.fileobj.seek(offset, whence)
-
-
-@skipIfNoSox
-@skipIfNoExec("sox")
-class TestFileObject(TempDirMixin, PytorchTestCase):
-    """
-    In this test suite, the result of file-like object input is compared against file path input,
-    because `load` function is rigrously tested for file path inputs to match libsox's result,
-    """
-
-    @parameterized.expand(
-        [
-            ("wav", {"bit_depth": 16}),
-            ("wav", {"bit_depth": 24}),
-            ("wav", {"bit_depth": 32}),
-            ("mp3", {"compression": 128}),
-            ("mp3", {"compression": 320}),
-            ("flac", {"compression": 0}),
-            ("flac", {"compression": 5}),
-            ("flac", {"compression": 8}),
-            ("vorbis", {"compression": -1}),
-            ("vorbis", {"compression": 10}),
-            ("amb", {}),
-        ]
-    )
-    def test_fileobj(self, ext, kwargs):
-        """Loading audio via file object returns the same result as via file path."""
-        sample_rate = 16000
-        format_ = ext if ext in ["mp3"] else None
-        path = self.get_temp_path(f"test.{ext}")
-
-        sox_utils.gen_audio_file(path, sample_rate, num_channels=2, **kwargs)
-        expected, _ = sox_io_backend.load(path)
-
-        with open(path, "rb") as fileobj:
-            found, sr = sox_io_backend.load(fileobj, format=format_)
-
-        assert sr == sample_rate
-        self.assertEqual(expected, found)
-
-    @parameterized.expand(
-        [
-            ("wav", {"bit_depth": 16}),
-            ("wav", {"bit_depth": 24}),
-            ("wav", {"bit_depth": 32}),
-            ("mp3", {"compression": 128}),
-            ("mp3", {"compression": 320}),
-            ("flac", {"compression": 0}),
-            ("flac", {"compression": 5}),
-            ("flac", {"compression": 8}),
-            ("vorbis", {"compression": -1}),
-            ("vorbis", {"compression": 10}),
-            ("amb", {}),
-        ]
-    )
-    def test_bytesio(self, ext, kwargs):
-        """Loading audio via BytesIO object returns the same result as via file path."""
-        sample_rate = 16000
-        format_ = ext if ext in ["mp3"] else None
-        path = self.get_temp_path(f"test.{ext}")
-
-        sox_utils.gen_audio_file(path, sample_rate, num_channels=2, **kwargs)
-        expected, _ = sox_io_backend.load(path)
-
-        with open(path, "rb") as file_:
-            fileobj = io.BytesIO(file_.read())
-        found, sr = sox_io_backend.load(fileobj, format=format_)
-
-        assert sr == sample_rate
-        self.assertEqual(expected, found)
-
-    @parameterized.expand(
-        [
-            ("wav", {"bit_depth": 16}),
-            ("wav", {"bit_depth": 24}),
-            ("wav", {"bit_depth": 32}),
-            ("mp3", {"compression": 128}),
-            ("mp3", {"compression": 320}),
-            ("flac", {"compression": 0}),
-            ("flac", {"compression": 5}),
-            ("flac", {"compression": 8}),
-            ("vorbis", {"compression": -1}),
-            ("vorbis", {"compression": 10}),
-            ("amb", {}),
-        ]
-    )
-    def test_bytesio_clogged(self, ext, kwargs):
-        """Loading audio via clogged file object returns the same result as via file path.
-
-        This test case validates the case where fileobject returns shorter bytes than requeted.
-        """
-        sample_rate = 16000
-        format_ = ext if ext in ["mp3"] else None
-        path = self.get_temp_path(f"test.{ext}")
-
-        sox_utils.gen_audio_file(path, sample_rate, num_channels=2, **kwargs)
-        expected, _ = sox_io_backend.load(path)
-
-        with open(path, "rb") as file_:
-            fileobj = CloggedFileObj(io.BytesIO(file_.read()))
-        found, sr = sox_io_backend.load(fileobj, format=format_)
-
-        assert sr == sample_rate
-        self.assertEqual(expected, found)
-
-    @parameterized.expand(
-        [
-            ("wav", {"bit_depth": 16}),
-            ("wav", {"bit_depth": 24}),
-            ("wav", {"bit_depth": 32}),
-            ("mp3", {"compression": 128}),
-            ("mp3", {"compression": 320}),
-            ("flac", {"compression": 0}),
-            ("flac", {"compression": 5}),
-            ("flac", {"compression": 8}),
-            ("vorbis", {"compression": -1}),
-            ("vorbis", {"compression": 10}),
-            ("amb", {}),
-        ]
-    )
-    def test_bytesio_tiny(self, ext, kwargs):
-        """Loading very small audio via file object returns the same result as via file path."""
-        sample_rate = 16000
-        format_ = ext if ext in ["mp3"] else None
-        path = self.get_temp_path(f"test.{ext}")
-
-        sox_utils.gen_audio_file(path, sample_rate, num_channels=2, duration=1 / 1600, **kwargs)
-        expected, _ = sox_io_backend.load(path)
-
-        with open(path, "rb") as file_:
-            fileobj = io.BytesIO(file_.read())
-        found, sr = sox_io_backend.load(fileobj, format=format_)
-
-        assert sr == sample_rate
-        self.assertEqual(expected, found)
-
-    @parameterized.expand(
-        [
-            ("wav", {"bit_depth": 16}),
-            ("wav", {"bit_depth": 24}),
-            ("wav", {"bit_depth": 32}),
-            ("mp3", {"compression": 128}),
-            ("mp3", {"compression": 320}),
-            ("flac", {"compression": 0}),
-            ("flac", {"compression": 5}),
-            ("flac", {"compression": 8}),
-            ("vorbis", {"compression": -1}),
-            ("vorbis", {"compression": 10}),
-            ("amb", {}),
-        ]
-    )
-    def test_tarfile(self, ext, kwargs):
-        """Loading compressed audio via file-like object returns the same result as via file path."""
-        sample_rate = 16000
-        format_ = ext if ext in ["mp3"] else None
-        audio_file = f"test.{ext}"
-        audio_path = self.get_temp_path(audio_file)
-        archive_path = self.get_temp_path("archive.tar.gz")
-
-        sox_utils.gen_audio_file(audio_path, sample_rate, num_channels=2, **kwargs)
-        expected, _ = sox_io_backend.load(audio_path)
-
-        with tarfile.TarFile(archive_path, "w") as tarobj:
-            tarobj.add(audio_path, arcname=audio_file)
-        with tarfile.TarFile(archive_path, "r") as tarobj:
-            fileobj = tarobj.extractfile(audio_file)
-            found, sr = sox_io_backend.load(fileobj, format=format_)
-
-        assert sr == sample_rate
-        self.assertEqual(expected, found)
-
-
-class Unseekable:
-    def __init__(self, fileobj):
-        self.fileobj = fileobj
-
-    def read(self, n):
-        return self.fileobj.read(n)
-
-
-@skipIfNoSox
-@skipIfNoExec("sox")
-@skipIfNoModule("requests")
-class TestFileObjectHttp(HttpServerMixin, PytorchTestCase):
-    @parameterized.expand(
-        [
-            ("wav", {"bit_depth": 16}),
-            ("wav", {"bit_depth": 24}),
-            ("wav", {"bit_depth": 32}),
-            ("mp3", {"compression": 128}),
-            ("mp3", {"compression": 320}),
-            ("flac", {"compression": 0}),
-            ("flac", {"compression": 5}),
-            ("flac", {"compression": 8}),
-            ("vorbis", {"compression": -1}),
-            ("vorbis", {"compression": 10}),
-            ("amb", {}),
-        ]
-    )
-    def test_requests(self, ext, kwargs):
-        sample_rate = 16000
-        format_ = ext if ext in ["mp3"] else None
-        audio_file = f"test.{ext}"
-        audio_path = self.get_temp_path(audio_file)
-
-        sox_utils.gen_audio_file(audio_path, sample_rate, num_channels=2, **kwargs)
-        expected, _ = sox_io_backend.load(audio_path)
-
-        url = self.get_url(audio_file)
-        with requests.get(url, stream=True) as resp:
-            found, sr = sox_io_backend.load(Unseekable(resp.raw), format=format_)
-
-        assert sr == sample_rate
-        if ext != "mp3":
-            self.assertEqual(expected, found)
-
-    @parameterized.expand(
-        list(
-            itertools.product(
-                [0, 1, 10, 100, 1000],
-                [-1, 1, 10, 100, 1000],
-            )
-        ),
-        name_func=name_func,
-    )
-    def test_frame(self, frame_offset, num_frames):
-        """num_frames and frame_offset correctly specify the region of data"""
-        sample_rate = 8000
-        audio_file = "test.wav"
-        audio_path = self.get_temp_path(audio_file)
-
-        original = get_wav_data("float32", num_channels=2)
-        save_wav(audio_path, original, sample_rate)
-        frame_end = None if num_frames == -1 else frame_offset + num_frames
-        expected = original[:, frame_offset:frame_end]
-
-        url = self.get_url(audio_file)
-        with requests.get(url, stream=True) as resp:
-            found, sr = sox_io_backend.load(resp.raw, frame_offset, num_frames)
-
-        assert sr == sample_rate
-        self.assertEqual(expected, found)
-
-
 @skipIfNoSox
 class TestLoadNoSuchFile(PytorchTestCase):
     def test_load_fail(self):
diff --git a/test/torchaudio_unittest/backend/sox_io/save_test.py b/test/torchaudio_unittest/backend/sox_io/save_test.py
index 5db7a5a9f8..75656d6ed6 100644
--- a/test/torchaudio_unittest/backend/sox_io/save_test.py
+++ b/test/torchaudio_unittest/backend/sox_io/save_test.py
@@ -43,7 +43,6 @@ def assert_save_consistency(
         num_channels: int = 2,
         num_frames: float = 3 * 8000,
         src_dtype: str = "int32",
-        test_mode: str = "path",
     ):
         """`save` function produces file that is comparable with `sox` command
 
@@ -97,37 +96,9 @@ def assert_save_consistency(
 
         # 2.1. Convert the original wav to target format with torchaudio
         data = load_wav(src_path, normalize=False)[0]
-        if test_mode == "path":
-            sox_io_backend.save(
-                tgt_path, data, sample_rate, compression=compression, encoding=encoding, bits_per_sample=bits_per_sample
-            )
-        elif test_mode == "fileobj":
-            with open(tgt_path, "bw") as file_:
-                sox_io_backend.save(
-                    file_,
-                    data,
-                    sample_rate,
-                    format=format,
-                    compression=compression,
-                    encoding=encoding,
-                    bits_per_sample=bits_per_sample,
-                )
-        elif test_mode == "bytesio":
-            file_ = io.BytesIO()
-            sox_io_backend.save(
-                file_,
-                data,
-                sample_rate,
-                format=format,
-                compression=compression,
-                encoding=encoding,
-                bits_per_sample=bits_per_sample,
-            )
-            file_.seek(0)
-            with open(tgt_path, "bw") as f:
-                f.write(file_.read())
-        else:
-            raise ValueError(f"Unexpected test mode: {test_mode}")
+        sox_io_backend.save(
+            tgt_path, data, sample_rate, compression=compression, encoding=encoding, bits_per_sample=bits_per_sample
+        )
         # 2.2. Convert the target format to wav with sox
         sox_utils.convert_audio_file(tgt_path, tst_path, encoding=cmp_encoding, bit_depth=cmp_bit_depth)
         # 2.3. Load with SciPy
@@ -150,7 +121,6 @@ def assert_save_consistency(
 @skipIfNoSox
 class SaveTest(SaveTestBase):
     @nested_params(
-        ["path", "fileobj", "bytesio"],
         [
             ("PCM_U", 8),
             ("PCM_S", 16),
@@ -161,12 +131,11 @@ class SaveTest(SaveTestBase):
             ("ALAW", 8),
         ],
     )
-    def test_save_wav(self, test_mode, enc_params):
+    def test_save_wav(self, enc_params):
         encoding, bits_per_sample = enc_params
-        self.assert_save_consistency("wav", encoding=encoding, bits_per_sample=bits_per_sample, test_mode=test_mode)
+        self.assert_save_consistency("wav", encoding=encoding, bits_per_sample=bits_per_sample)
 
     @nested_params(
-        ["path", "fileobj", "bytesio"],
         [
             ("float32",),
             ("int32",),
@@ -174,12 +143,11 @@ def test_save_wav(self, test_mode, enc_params):
             ("uint8",),
         ],
     )
-    def test_save_wav_dtype(self, test_mode, params):
+    def test_save_wav_dtype(self, params):
         (dtype,) = params
-        self.assert_save_consistency("wav", src_dtype=dtype, test_mode=test_mode)
+        self.assert_save_consistency("wav", src_dtype=dtype)
 
     @nested_params(
-        ["path", "fileobj", "bytesio"],
         [8, 16, 24],
         [
             None,
@@ -194,19 +162,13 @@ def test_save_wav_dtype(self, test_mode, params):
             8,
         ],
     )
-    def test_save_flac(self, test_mode, bits_per_sample, compression_level):
-        self.assert_save_consistency(
-            "flac", compression=compression_level, bits_per_sample=bits_per_sample, test_mode=test_mode
-        )
+    def test_save_flac(self, bits_per_sample, compression_level):
+        self.assert_save_consistency("flac", compression=compression_level, bits_per_sample=bits_per_sample)
 
-    @nested_params(
-        ["path", "fileobj", "bytesio"],
-    )
-    def test_save_htk(self, test_mode):
-        self.assert_save_consistency("htk", test_mode=test_mode, num_channels=1)
+    def test_save_htk(self):
+        self.assert_save_consistency("htk", num_channels=1)
 
     @nested_params(
-        ["path", "fileobj", "bytesio"],
         [
             None,
             -1,
@@ -219,11 +181,10 @@ def test_save_htk(self, test_mode):
             10,
         ],
     )
-    def test_save_vorbis(self, test_mode, quality_level):
-        self.assert_save_consistency("vorbis", compression=quality_level, test_mode=test_mode)
+    def test_save_vorbis(self, quality_level):
+        self.assert_save_consistency("vorbis", compression=quality_level)
 
     @nested_params(
-        ["path", "fileobj", "bytesio"],
         [
             (
                 "PCM_S",
@@ -248,12 +209,11 @@ def test_save_vorbis(self, test_mode, quality_level):
             ("ALAW", 32),
         ],
     )
-    def test_save_sphere(self, test_mode, enc_params):
+    def test_save_sphere(self, enc_params):
         encoding, bits_per_sample = enc_params
-        self.assert_save_consistency("sph", encoding=encoding, bits_per_sample=bits_per_sample, test_mode=test_mode)
+        self.assert_save_consistency("sph", encoding=encoding, bits_per_sample=bits_per_sample)
 
     @nested_params(
-        ["path", "fileobj", "bytesio"],
         [
             (
                 "PCM_U",
@@ -289,12 +249,11 @@ def test_save_sphere(self, test_mode, enc_params):
             ),
         ],
     )
-    def test_save_amb(self, test_mode, enc_params):
+    def test_save_amb(self, enc_params):
         encoding, bits_per_sample = enc_params
-        self.assert_save_consistency("amb", encoding=encoding, bits_per_sample=bits_per_sample, test_mode=test_mode)
+        self.assert_save_consistency("amb", encoding=encoding, bits_per_sample=bits_per_sample)
 
     @nested_params(
-        ["path", "fileobj", "bytesio"],
         [
             None,
             0,
@@ -307,18 +266,15 @@ def test_save_amb(self, test_mode, enc_params):
             7,
         ],
     )
-    def test_save_amr_nb(self, test_mode, bit_rate):
-        self.assert_save_consistency("amr-nb", compression=bit_rate, num_channels=1, test_mode=test_mode)
+    def test_save_amr_nb(self, bit_rate):
+        self.assert_save_consistency("amr-nb", compression=bit_rate, num_channels=1)
 
-    @nested_params(
-        ["path", "fileobj", "bytesio"],
-    )
-    def test_save_gsm(self, test_mode):
-        self.assert_save_consistency("gsm", num_channels=1, test_mode=test_mode)
+    def test_save_gsm(self):
+        self.assert_save_consistency("gsm", num_channels=1)
         with self.assertRaises(RuntimeError, msg="gsm format only supports single channel audio."):
-            self.assert_save_consistency("gsm", num_channels=2, test_mode=test_mode)
+            self.assert_save_consistency("gsm", num_channels=2)
         with self.assertRaises(RuntimeError, msg="gsm format only supports a sampling rate of 8kHz."):
-            self.assert_save_consistency("gsm", sample_rate=16000, test_mode=test_mode)
+            self.assert_save_consistency("gsm", sample_rate=16000)
 
     @parameterized.expand(
         [
diff --git a/test/torchaudio_unittest/backend/sox_io/smoke_test.py b/test/torchaudio_unittest/backend/sox_io/smoke_test.py
index e394161044..55dc0cb8ff 100644
--- a/test/torchaudio_unittest/backend/sox_io/smoke_test.py
+++ b/test/torchaudio_unittest/backend/sox_io/smoke_test.py
@@ -89,88 +89,3 @@ def test_vorbis(self, sample_rate, num_channels, quality_level):
     def test_flac(self, sample_rate, num_channels, compression_level):
         """Run smoke test on flac format"""
         self.run_smoke_test("flac", sample_rate, num_channels, compression=compression_level)
-
-
-@skipIfNoSox
-class SmokeTestFileObj(TorchaudioTestCase):
-    """Run smoke test on various audio format
-
-    The purpose of this test suite is to verify that sox_io_backend functionalities do not exhibit
-    abnormal behaviors.
-
-    This test suite should be able to run without any additional tools (such as sox command),
-    however without such tools, the correctness of each function cannot be verified.
-    """
-
-    def run_smoke_test(self, ext, sample_rate, num_channels, *, compression=None, dtype="float32"):
-        duration = 1
-        num_frames = sample_rate * duration
-        original = get_wav_data(dtype, num_channels, normalize=False, num_frames=num_frames)
-
-        fileobj = io.BytesIO()
-        # 1. run save
-        sox_io_backend.save(fileobj, original, sample_rate, compression=compression, format=ext)
-        # 2. run info
-        fileobj.seek(0)
-        info = sox_io_backend.info(fileobj, format=ext)
-        assert info.sample_rate == sample_rate
-        assert info.num_channels == num_channels
-        # 3. run load
-        fileobj.seek(0)
-        loaded, sr = sox_io_backend.load(fileobj, normalize=False, format=ext)
-        assert sr == sample_rate
-        assert loaded.shape[0] == num_channels
-
-    @parameterized.expand(
-        list(
-            itertools.product(
-                ["float32", "int32", "int16", "uint8"],
-                [8000, 16000],
-                [1, 2],
-            )
-        ),
-        name_func=name_func,
-    )
-    def test_wav(self, dtype, sample_rate, num_channels):
-        """Run smoke test on wav format"""
-        self.run_smoke_test("wav", sample_rate, num_channels, dtype=dtype)
-
-    @parameterized.expand(
-        list(
-            itertools.product(
-                [8000, 16000],
-                [1, 2],
-                [-4.2, -0.2, 0, 0.2, 96, 128, 160, 192, 224, 256, 320],
-            )
-        )
-    )
-    def test_mp3(self, sample_rate, num_channels, bit_rate):
-        """Run smoke test on mp3 format"""
-        self.run_smoke_test("mp3", sample_rate, num_channels, compression=bit_rate)
-
-    @parameterized.expand(
-        list(
-            itertools.product(
-                [8000, 16000],
-                [1, 2],
-                [-1, 0, 1, 2, 3, 3.6, 5, 10],
-            )
-        )
-    )
-    def test_vorbis(self, sample_rate, num_channels, quality_level):
-        """Run smoke test on vorbis format"""
-        self.run_smoke_test("vorbis", sample_rate, num_channels, compression=quality_level)
-
-    @parameterized.expand(
-        list(
-            itertools.product(
-                [8000, 16000],
-                [1, 2],
-                list(range(9)),
-            )
-        ),
-        name_func=name_func,
-    )
-    def test_flac(self, sample_rate, num_channels, compression_level):
-        """Run smoke test on flac format"""
-        self.run_smoke_test("flac", sample_rate, num_channels, compression=compression_level)
diff --git a/test/torchaudio_unittest/sox_effect/smoke_test.py b/test/torchaudio_unittest/sox_effect/smoke_test.py
index a5de940a50..30befd54ab 100644
--- a/test/torchaudio_unittest/sox_effect/smoke_test.py
+++ b/test/torchaudio_unittest/sox_effect/smoke_test.py
@@ -54,24 +54,3 @@ def test_apply_effects_file(self, args):
         _found, _sr = sox_effects.apply_effects_file(
             input_path, effects, normalize=False, channels_first=channels_first
         )
-
-    @parameterized.expand(
-        load_params("sox_effect_test_args.jsonl"),
-        name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}',
-    )
-    def test_apply_effects_fileobj(self, args):
-        """`apply_effects_file` should return identical data as sox command"""
-        dtype = "int32"
-        channels_first = True
-        effects = args["effects"]
-        num_channels = args.get("num_channels", 2)
-        input_sr = args.get("input_sample_rate", 8000)
-
-        input_path = self.get_temp_path("input.wav")
-        data = get_wav_data(dtype, num_channels, channels_first=channels_first)
-        save_wav(input_path, data, input_sr, channels_first=channels_first)
-
-        with open(input_path, "rb") as fileobj:
-            _found, _sr = sox_effects.apply_effects_file(
-                fileobj, effects, normalize=False, channels_first=channels_first
-            )
diff --git a/test/torchaudio_unittest/sox_effect/sox_effect_test.py b/test/torchaudio_unittest/sox_effect/sox_effect_test.py
index be6b646617..2099505502 100644
--- a/test/torchaudio_unittest/sox_effect/sox_effect_test.py
+++ b/test/torchaudio_unittest/sox_effect/sox_effect_test.py
@@ -1,20 +1,14 @@
-import io
 import itertools
-import tarfile
 from pathlib import Path
 
 from parameterized import parameterized
 from torchaudio import sox_effects
-from torchaudio._internal import module_utils as _mod_utils
 from torchaudio_unittest.common_utils import (
     get_sinusoid,
     get_wav_data,
-    HttpServerMixin,
     load_wav,
     PytorchTestCase,
     save_wav,
-    skipIfNoExec,
-    skipIfNoModule,
     skipIfNoSox,
     sox_utils,
     TempDirMixin,
@@ -23,10 +17,6 @@
 from .common import load_params, name_func
 
 
-if _mod_utils.is_module_available("requests"):
-    import requests
-
-
 @skipIfNoSox
 class TestSoxEffects(PytorchTestCase):
     def test_init(self):
@@ -241,136 +231,3 @@ def test_vorbis(self, sample_rate, num_channels):
 
         assert sr == expected_sr
         self.assertEqual(found, expected)
-
-
-@skipIfNoExec("sox")
-@skipIfNoSox
-class TestFileObject(TempDirMixin, PytorchTestCase):
-    @parameterized.expand(
-        [
-            ("wav", None),
-            ("flac", 0),
-            ("flac", 5),
-            ("flac", 8),
-            ("vorbis", -1),
-            ("vorbis", 10),
-            ("amb", None),
-        ]
-    )
-    def test_fileobj(self, ext, compression):
-        """Applying effects via file object works"""
-        sample_rate = 16000
-        channels_first = True
-        effects = [["band", "300", "10"]]
-        input_path = self.get_temp_path(f"input.{ext}")
-        reference_path = self.get_temp_path("reference.wav")
-
-        sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression)
-        sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32)
-        expected, expected_sr = load_wav(reference_path)
-
-        with open(input_path, "rb") as fileobj:
-            found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first)
-        save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)
-        assert sr == expected_sr
-        self.assertEqual(found, expected)
-
-    @parameterized.expand(
-        [
-            ("wav", None),
-            ("flac", 0),
-            ("flac", 5),
-            ("flac", 8),
-            ("vorbis", -1),
-            ("vorbis", 10),
-            ("amb", None),
-        ]
-    )
-    def test_bytesio(self, ext, compression):
-        """Applying effects via BytesIO object works"""
-        sample_rate = 16000
-        channels_first = True
-        effects = [["band", "300", "10"]]
-        input_path = self.get_temp_path(f"input.{ext}")
-        reference_path = self.get_temp_path("reference.wav")
-
-        sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression)
-        sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32)
-        expected, expected_sr = load_wav(reference_path)
-
-        with open(input_path, "rb") as file_:
-            fileobj = io.BytesIO(file_.read())
-        found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first)
-        save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)
-        assert sr == expected_sr
-        self.assertEqual(found, expected)
-
-    @parameterized.expand(
-        [
-            ("wav", None),
-            ("flac", 0),
-            ("flac", 5),
-            ("flac", 8),
-            ("vorbis", -1),
-            ("vorbis", 10),
-            ("amb", None),
-        ]
-    )
-    def test_tarfile(self, ext, compression):
-        """Applying effects to compressed audio via file-like file works"""
-        sample_rate = 16000
-        channels_first = True
-        effects = [["band", "300", "10"]]
-        audio_file = f"input.{ext}"
-
-        input_path = self.get_temp_path(audio_file)
-        reference_path = self.get_temp_path("reference.wav")
-        archive_path = self.get_temp_path("archive.tar.gz")
-
-        sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression)
-        sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32)
-        expected, expected_sr = load_wav(reference_path)
-
-        with tarfile.TarFile(archive_path, "w") as tarobj:
-            tarobj.add(input_path, arcname=audio_file)
-        with tarfile.TarFile(archive_path, "r") as tarobj:
-            fileobj = tarobj.extractfile(audio_file)
-            found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first)
-        save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)
-        assert sr == expected_sr
-        self.assertEqual(found, expected)
-
-
-@skipIfNoSox
-@skipIfNoExec("sox")
-@skipIfNoModule("requests")
-class TestFileObjectHttp(HttpServerMixin, PytorchTestCase):
-    @parameterized.expand(
-        [
-            ("wav", None),
-            ("flac", 0),
-            ("flac", 5),
-            ("flac", 8),
-            ("vorbis", -1),
-            ("vorbis", 10),
-            ("amb", None),
-        ]
-    )
-    def test_requests(self, ext, compression):
-        sample_rate = 16000
-        channels_first = True
-        effects = [["band", "300", "10"]]
-        audio_file = f"input.{ext}"
-        input_path = self.get_temp_path(audio_file)
-        reference_path = self.get_temp_path("reference.wav")
-
-        sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression)
-        sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32)
-        expected, expected_sr = load_wav(reference_path)
-
-        url = self.get_url(audio_file)
-        with requests.get(url, stream=True) as resp:
-            found, sr = sox_effects.apply_effects_file(resp.raw, effects, channels_first=channels_first)
-        save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)
-        assert sr == expected_sr
-        self.assertEqual(found, expected)
diff --git a/third_party/patches/sox.patch b/third_party/patches/sox.patch
deleted file mode 100644
index fe8df945c0..0000000000
--- a/third_party/patches/sox.patch
+++ /dev/null
@@ -1,16 +0,0 @@
-See https://github.com/pytorch/audio/pull/1297
-diff -ru sox/src/formats.c sox/src/formats.c
---- sox/src/formats.c	2014-10-26 19:55:50.000000000 -0700
-+++ sox/src/formats.c	2021-02-22 16:01:02.833144070 -0800
-@@ -333,6 +333,10 @@
-   assert(ft);
-   if (!ft->fp)
-     return sox_false;
--  fstat(fileno((FILE*)ft->fp), &st);
-+  int fd = fileno((FILE*)ft->fp);
-+  if (fd < 0)
-+    return sox_false;
-+  if (fstat(fd, &st) < 0)
-+    return sox_false;
-   return ((st.st_mode & S_IFMT) == S_IFREG);
- }
diff --git a/third_party/sox/CMakeLists.txt b/third_party/sox/CMakeLists.txt
index 50e5cc9156..b043362925 100644
--- a/third_party/sox/CMakeLists.txt
+++ b/third_party/sox/CMakeLists.txt
@@ -193,7 +193,7 @@ ExternalProject_Add(sox
   DOWNLOAD_DIR ${ARCHIVE_DIR}
   URL https://downloads.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.bz2
   URL_HASH SHA256=81a6956d4330e75b5827316e44ae381e6f1e8928003c6aa45896da9041ea149c
-  PATCH_COMMAND patch -p1 < ${patch_dir}/sox.patch && cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/sox/
+  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/sox/
   CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/sox/configure ${COMMON_ARGS} ${SOX_OPTIONS}
   BUILD_BYPRODUCTS ${SOX_LIBRARIES}
   DOWNLOAD_NO_PROGRESS ON
diff --git a/torchaudio/_extension/utils.py b/torchaudio/_extension/utils.py
index 5490385d34..30ef2e4a35 100644
--- a/torchaudio/_extension/utils.py
+++ b/torchaudio/_extension/utils.py
@@ -67,7 +67,7 @@ def _init_sox():
     _load_lib("libtorchaudio_sox")
     import torchaudio.lib._torchaudio_sox  # noqa
 
-    torch.ops.torchaudio.sox_utils_set_verbosity(0)
+    torchaudio.lib._torchaudio_sox.set_verbosity(0)
 
     import atexit
 
diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py
index 8b540b5954..30b5cecfb0 100644
--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -1,10 +1,8 @@
 import os
-import warnings
 from typing import Optional, Tuple
 
 import torch
 import torchaudio
-from torchaudio.utils.sox_utils import get_buffer_size
 
 from .common import AudioMetaData
 
@@ -14,10 +12,6 @@ def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData:
     raise RuntimeError("Failed to fetch metadata from {}".format(filepath))
 
 
-def _fail_info_fileobj(fileobj, format: Optional[str], buffer_size: int) -> AudioMetaData:
-    raise RuntimeError("Failed to fetch metadata from {}".format(fileobj))
-
-
 # Note: need to comply TorchScript syntax -- need annotation and no f-string
 def _fail_load(
     filepath: str,
@@ -30,30 +24,14 @@ def _fail_load(
     raise RuntimeError("Failed to load audio from {}".format(filepath))
 
 
-def _fail_load_fileobj(fileobj, *args, **kwargs):
-    raise RuntimeError(f"Failed to load audio from {fileobj}")
-
-
 if torchaudio._extension._FFMPEG_INITIALIZED:
     import torchaudio.io._compat as _compat
 
     _fallback_info = _compat.info_audio
-    _fallback_info_fileobj = _compat.info_audio_fileobj
     _fallback_load = _compat.load_audio
-    _fallback_load_fileobj = _compat.load_audio_fileobj
 else:
     _fallback_info = _fail_info
-    _fallback_info_fileobj = _fail_info_fileobj
     _fallback_load = _fail_load
-    _fallback_load_fileobj = _fail_load_fileobj
-
-
-_deprecation_message = (
-    "File-like object support in sox_io backend is deprecated, "
-    "and will be removed in v2.1. "
-    "See https://github.com/pytorch/audio/issues/2950 for the detail."
-    "Please migrate to the new dispatcher, or use soundfile backend."
-)
 
 
 @torchaudio._extension.fail_if_no_sox
@@ -64,24 +42,8 @@ def info(
     """Get signal information of an audio file.
 
     Args:
-        filepath (path-like object or file-like object):
-            Source of audio data. When the function is not compiled by TorchScript,
-            (e.g. ``torch.jit.script``), the following types are accepted;
-
-                  * ``path-like``: file path
-                  * ``file-like``: Object with ``read(size: int) -> bytes`` method,
-                    which returns byte string of at most ``size`` length.
-
-            When the function is compiled by TorchScript, only ``str`` type is allowed.
-
-            Note:
-
-                  * When the input type is file-like object, this function cannot
-                    get the correct length (``num_samples``) for certain formats,
-                    such as ``vorbis``.
-                    In this case, the value of ``num_samples`` is ``0``.
-                  * This argument is intentionally annotated as ``str`` only due to
-                    TorchScript compiler compatibility.
+        filepath (str):
+            Source of audio data.
 
         format (str or None, optional):
             Override the format detection with the given format.
@@ -93,21 +55,7 @@ def info(
     """
     if not torch.jit.is_scripting():
         if hasattr(filepath, "read"):
-            # Special case for Backward compatibility
-            # v0.11 -> v0.12, mp3 handling is moved to FFmpeg.
-            # file-like objects are not necessarily fallback-able
-            # when they are not seekable.
-            # The previous libsox-based implementation required `format="mp3"`
-            # because internally libsox does not auto-detect the format.
-            # For the special BC for mp3, we handle mp3 differently.
-            buffer_size = get_buffer_size()
-            if format == "mp3":
-                return _fallback_info_fileobj(filepath, format, buffer_size)
-            warnings.warn(_deprecation_message)
-            sinfo = torchaudio.lib._torchaudio_sox.get_info_fileobj(filepath, format)
-            if sinfo is not None:
-                return AudioMetaData(*sinfo)
-            return _fallback_info_fileobj(filepath, format, buffer_size)
+            raise RuntimeError("sox_io backend does not support file-like object.")
         filepath = os.fspath(filepath)
     sinfo = torch.ops.torchaudio.sox_io_get_info(filepath, format)
     if sinfo is not None:
@@ -171,18 +119,7 @@ def load(
        For these formats, this function always returns ``float32`` Tensor with values.
 
     Args:
-        filepath (path-like object or file-like object):
-            Source of audio data. When the function is not compiled by TorchScript,
-            (e.g. ``torch.jit.script``), the following types are accepted;
-
-                  * ``path-like``: file path
-                  * ``file-like``: Object with ``read(size: int) -> bytes`` method,
-                    which returns byte string of at most ``size`` length.
-
-            When the function is compiled by TorchScript, only ``str`` type is allowed.
-
-            Note: This argument is intentionally annotated as ``str`` only due to
-            TorchScript compiler compatibility.
+        filepath (path-like object): Source of audio data.
         frame_offset (int):
             Number of frames to skip before start reading data.
         num_frames (int, optional):
@@ -214,39 +151,7 @@ def load(
     """
     if not torch.jit.is_scripting():
         if hasattr(filepath, "read"):
-            # Special case for Backward compatibility
-            # v0.11 -> v0.12, mp3 handling is moved to FFmpeg.
-            # file-like objects are not necessarily fallback-able
-            # when they are not seekable.
-            # The previous libsox-based implementation required `format="mp3"`
-            # because internally libsox does not auto-detect the format.
-            # For the special BC for mp3, we handle mp3 differently.
-            buffer_size = get_buffer_size()
-            if format == "mp3":
-                return _fallback_load_fileobj(
-                    filepath,
-                    frame_offset,
-                    num_frames,
-                    normalize,
-                    channels_first,
-                    format,
-                    buffer_size,
-                )
-            warnings.warn(_deprecation_message)
-            ret = torchaudio.lib._torchaudio_sox.load_audio_fileobj(
-                filepath, frame_offset, num_frames, normalize, channels_first, format
-            )
-            if ret is not None:
-                return ret
-            return _fallback_load_fileobj(
-                filepath,
-                frame_offset,
-                num_frames,
-                normalize,
-                channels_first,
-                format,
-                buffer_size,
-            )
+            raise RuntimeError("sox_io backend does not support file-like object.")
         filepath = os.fspath(filepath)
     ret = torch.ops.torchaudio.sox_io_load_audio_file(
         filepath, frame_offset, num_frames, normalize, channels_first, format
@@ -270,9 +175,7 @@ def save(
     """Save audio data to file.
 
     Args:
-        filepath (str or pathlib.Path): Path to save file.
-            This function also handles ``pathlib.Path`` objects, but is annotated
-            as ``str`` for TorchScript compiler compatibility.
+        filepath (path-like object): Path to save file.
         src (torch.Tensor): Audio data to save. must be 2D tensor.
         sample_rate (int): sampling rate
         channels_first (bool, optional): If ``True``, the given tensor is interpreted as `[channel, time]`,
@@ -413,18 +316,7 @@ def save(
     """
     if not torch.jit.is_scripting():
         if hasattr(filepath, "write"):
-            warnings.warn(_deprecation_message)
-            torchaudio.lib._torchaudio_sox.save_audio_fileobj(
-                filepath,
-                src,
-                sample_rate,
-                channels_first,
-                compression,
-                format,
-                encoding,
-                bits_per_sample,
-            )
-            return
+            raise RuntimeError("sox_io backend does not handle file-like object.")
         filepath = os.fspath(filepath)
     torch.ops.torchaudio.sox_io_save_audio_file(
         filepath,
diff --git a/torchaudio/csrc/sox/CMakeLists.txt b/torchaudio/csrc/sox/CMakeLists.txt
index e369ecf7af..3391a4fc37 100644
--- a/torchaudio/csrc/sox/CMakeLists.txt
+++ b/torchaudio/csrc/sox/CMakeLists.txt
@@ -15,17 +15,9 @@ torchaudio_library(
   )
 
 if (BUILD_TORCHAUDIO_PYTHON_EXTENSION)
-  set(
-    ext_sources
-    pybind/pybind.cpp
-    pybind/effects.cpp
-    pybind/effects_chain.cpp
-    pybind/io.cpp
-    pybind/utils.cpp
-    )
   torchaudio_extension(
     _torchaudio_sox
-    "${ext_sources}"
+    "pybind/pybind.cpp;"
     ""
     "libtorchaudio_sox"
     ""
diff --git a/torchaudio/csrc/sox/pybind/effects.cpp b/torchaudio/csrc/sox/pybind/effects.cpp
deleted file mode 100644
index db80f98d63..0000000000
--- a/torchaudio/csrc/sox/pybind/effects.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-#include <torchaudio/csrc/sox/pybind/effects.h>
-#include <torchaudio/csrc/sox/pybind/effects_chain.h>
-#include <torchaudio/csrc/sox/pybind/utils.h>
-
-using namespace torchaudio::sox_utils;
-
-namespace torchaudio {
-namespace sox_effects {
-
-// Streaming decoding over file-like object is tricky because libsox operates on
-// FILE pointer. The folloing is what `sox` and `play` commands do
-//  - file input -> FILE pointer
-//  - URL input -> call wget in suprocess and pipe the data -> FILE pointer
-//  - stdin -> FILE pointer
-//
-// We want to, instead, fetch byte strings chunk by chunk, consume them, and
-// discard.
-//
-// Here is the approach
-// 1. Initialize sox_format_t using sox_open_mem_read, providing the initial
-// chunk of byte string
-//    This will perform header-based format detection, if necessary, then fill
-//    the metadata of sox_format_t. Internally, sox_open_mem_read uses fmemopen,
-//    which returns FILE* which points the buffer of the provided byte string.
-// 2. Each time sox reads a chunk from the FILE*, we update the underlying
-// buffer in a way that it
-//    starts with unseen data, and append the new data read from the given
-//    fileobj. This will trick libsox as if it keeps reading from the FILE*
-//    continuously.
-// For Step 2. see `fileobj_input_drain` function in effects_chain.cpp
-auto apply_effects_fileobj(
-    py::object fileobj,
-    const std::vector<std::vector<std::string>>& effects,
-    c10::optional<bool> normalize,
-    c10::optional<bool> channels_first,
-    c10::optional<std::string> format)
-    -> c10::optional<std::tuple<torch::Tensor, int64_t>> {
-  // Prepare the buffer used throughout the lifecycle of SoxEffectChain.
-  //
-  // For certain format (such as FLAC), libsox keeps reading the content at
-  // the initialization unless it reaches EOF even when the header is properly
-  // parsed. (Making buffer size 8192, which is way bigger than the header,
-  // resulted in libsox consuming all the buffer content at the time it opens
-  // the file.) Therefore buffer has to always contain valid data, except after
-  // EOF. We default to `sox_get_globals()->bufsiz`* for buffer size and we
-  // first check if there is enough data to fill the buffer. `read_fileobj`
-  // repeatedly calls `read`  method until it receives the requested length of
-  // bytes or it reaches EOF. If we get bytes shorter than requested, that means
-  // the whole audio data are fetched.
-  //
-  // * This can be changed with `torchaudio.utils.sox_utils.set_buffer_size`.
-  const auto capacity = [&]() {
-    // NOTE:
-    // Use the abstraction provided by `libtorchaudio` to access the global
-    // config defined by libsox. Directly using `sox_get_globals` function will
-    // end up retrieving the static variable defined in `_torchaudio`, which is
-    // not correct.
-    const auto bufsiz = get_buffer_size();
-    const int64_t kDefaultCapacityInBytes = 256;
-    return (bufsiz > kDefaultCapacityInBytes) ? bufsiz
-                                              : kDefaultCapacityInBytes;
-  }();
-  std::string buffer(capacity, '\0');
-  auto* in_buf = const_cast<char*>(buffer.data());
-  auto num_read = read_fileobj(&fileobj, capacity, in_buf);
-  // If the file is shorter than 256, then libsox cannot read the header.
-  auto in_buffer_size = (num_read > 256) ? num_read : 256;
-
-  // Open file (this starts reading the header)
-  // When opening a file there are two functions that can touches FILE*.
-  // * `auto_detect_format`
-  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L43
-  // * `startread` handler of detected format.
-  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L574
-  // To see the handler of a particular format, go to
-  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/<FORMAT>.c
-  // For example, voribs can be found
-  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/vorbis.c#L97-L158
-  SoxFormat sf(sox_open_mem_read(
-      in_buf,
-      in_buffer_size,
-      /*signal=*/nullptr,
-      /*encoding=*/nullptr,
-      /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
-
-  // In case of streamed data, length can be 0
-  if (static_cast<sox_format_t*>(sf) == nullptr ||
-      sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
-    return {};
-  }
-
-  // Prepare output buffer
-  std::vector<sox_sample_t> out_buffer;
-  out_buffer.reserve(sf->signal.length);
-
-  // Create and run SoxEffectsChain
-  const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);
-  torchaudio::sox_effects_chain::SoxEffectsChainPyBind chain(
-      /*input_encoding=*/sf->encoding,
-      /*output_encoding=*/get_tensor_encodinginfo(dtype));
-  chain.addInputFileObj(sf, in_buf, in_buffer_size, &fileobj);
-  for (const auto& effect : effects) {
-    chain.addEffect(effect);
-  }
-  chain.addOutputBuffer(&out_buffer);
-  chain.run();
-
-  // Create tensor from buffer
-  bool channels_first_ = channels_first.value_or(true);
-  auto tensor = convert_to_tensor(
-      /*buffer=*/out_buffer.data(),
-      /*num_samples=*/out_buffer.size(),
-      /*num_channels=*/chain.getOutputNumChannels(),
-      dtype,
-      normalize.value_or(true),
-      channels_first_);
-
-  return std::forward_as_tuple(
-      tensor, static_cast<int64_t>(chain.getOutputSampleRate()));
-}
-
-} // namespace sox_effects
-} // namespace torchaudio
diff --git a/torchaudio/csrc/sox/pybind/effects.h b/torchaudio/csrc/sox/pybind/effects.h
deleted file mode 100644
index 7f1e653cd5..0000000000
--- a/torchaudio/csrc/sox/pybind/effects.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef TORCHAUDIO_PYBIND_SOX_EFFECTS_H
-#define TORCHAUDIO_PYBIND_SOX_EFFECTS_H
-
-#include <torch/extension.h>
-
-namespace torchaudio {
-namespace sox_effects {
-
-auto apply_effects_fileobj(
-    py::object fileobj,
-    const std::vector<std::vector<std::string>>& effects,
-    c10::optional<bool> normalize,
-    c10::optional<bool> channels_first,
-    c10::optional<std::string> format)
-    -> c10::optional<std::tuple<torch::Tensor, int64_t>>;
-
-} // namespace sox_effects
-} // namespace torchaudio
-
-#endif
diff --git a/torchaudio/csrc/sox/pybind/effects_chain.cpp b/torchaudio/csrc/sox/pybind/effects_chain.cpp
deleted file mode 100644
index 42128433d6..0000000000
--- a/torchaudio/csrc/sox/pybind/effects_chain.cpp
+++ /dev/null
@@ -1,237 +0,0 @@
-#include <sox.h>
-#include <torchaudio/csrc/sox/pybind/effects_chain.h>
-#include <torchaudio/csrc/sox/pybind/utils.h>
-
-using namespace torchaudio::sox_utils;
-
-namespace torchaudio {
-namespace sox_effects_chain {
-
-namespace {
-
-/// helper classes for passing file-like object to SoxEffectChain
-struct FileObjInputPriv {
-  sox_format_t* sf;
-  py::object* fileobj;
-  bool eof_reached;
-  char* buffer;
-  uint64_t buffer_size;
-};
-
-struct FileObjOutputPriv {
-  sox_format_t* sf;
-  py::object* fileobj;
-  char** buffer;
-  size_t* buffer_size;
-};
-
-/// Callback function to feed byte string
-/// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/sox.h#L1268-L1278
-auto fileobj_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp)
-    -> int {
-  auto priv = static_cast<FileObjInputPriv*>(effp->priv);
-  auto sf = priv->sf;
-  auto buffer = priv->buffer;
-
-  // 1. Refresh the buffer
-  //
-  // NOTE:
-  //   Since the underlying FILE* was opened with `fmemopen`, the only way
-  //   libsox detect EOF is reaching the end of the buffer. (null byte won't
-  //   help) Therefore we need to align the content at the end of buffer,
-  //   otherwise, libsox will keep reading the content beyond intended length.
-  //
-  // Before:
-  //
-  //     |<-------consumed------>|<---remaining--->|
-  //     |***********************|-----------------|
-  //                             ^ ftell
-  //
-  // After:
-  //
-  //     |<-offset->|<---remaining--->|<-new data->|
-  //     |**********|-----------------|++++++++++++|
-  //                ^ ftell
-
-  // NOTE:
-  //   Do not use `sf->tell_off` here. Presumably, `tell_off` and `fseek` are
-  //   supposed to be in sync, but there are cases (Vorbis) they are not
-  //   in sync and `tell_off` has seemingly uninitialized value, which
-  //   leads num_remain to be negative and cause segmentation fault
-  //   in `memmove`.
-  const auto tell = ftell((FILE*)sf->fp);
-  if (tell < 0) {
-    throw std::runtime_error("Internal Error: ftell failed.");
-  }
-  const auto num_consumed = static_cast<size_t>(tell);
-  if (num_consumed > priv->buffer_size) {
-    throw std::runtime_error("Internal Error: buffer overrun.");
-  }
-
-  const auto num_remain = priv->buffer_size - num_consumed;
-
-  // 1.1. Fetch the data to see if there is data to fill the buffer
-  size_t num_refill = 0;
-  std::string chunk(num_consumed, '\0');
-  if (num_consumed && !priv->eof_reached) {
-    num_refill = read_fileobj(
-        priv->fileobj, num_consumed, const_cast<char*>(chunk.data()));
-    if (num_refill < num_consumed) {
-      priv->eof_reached = true;
-    }
-  }
-  const auto offset = num_consumed - num_refill;
-
-  // 1.2. Move the unconsumed data towards the beginning of buffer.
-  if (num_remain) {
-    auto src = static_cast<void*>(buffer + num_consumed);
-    auto dst = static_cast<void*>(buffer + offset);
-    memmove(dst, src, num_remain);
-  }
-
-  // 1.3. Refill the remaining buffer.
-  if (num_refill) {
-    auto src = static_cast<void*>(const_cast<char*>(chunk.c_str()));
-    auto dst = buffer + offset + num_remain;
-    memcpy(dst, src, num_refill);
-  }
-
-  // 1.4. Set the file pointer to the new offset
-  sf->tell_off = offset;
-  fseek((FILE*)sf->fp, offset, SEEK_SET);
-
-  // 2. Perform decoding operation
-  // The following part is practically same as "input" effect
-  // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/input.c#L30-L48
-
-  // At this point, osamp represents the buffer size in bytes,
-  // but sox_read expects the maximum number of samples ready to read.
-  // Normally, this is fine, but in case when the samples are not 4-byte
-  // aligned, (e.g. sample is 24bits), the resulting signal is not correct.
-  // https://github.com/pytorch/audio/issues/2083
-  if (sf->encoding.bits_per_sample > 0)
-    *osamp /= (sf->encoding.bits_per_sample / 8);
-
-  // Ensure that it's a multiple of the number of channels
-  *osamp -= *osamp % effp->out_signal.channels;
-
-  // Read up to *osamp samples into obuf;
-  // store the actual number read back to *osamp
-  *osamp = sox_read(sf, obuf, *osamp);
-
-  // Decoding is finished when fileobject is exhausted and sox can no longer
-  // decode a sample.
-  return (priv->eof_reached && !*osamp) ? SOX_EOF : SOX_SUCCESS;
-}
-
-auto fileobj_output_flow(
-    sox_effect_t* effp,
-    sox_sample_t const* ibuf,
-    sox_sample_t* obuf LSX_UNUSED,
-    size_t* isamp,
-    size_t* osamp) -> int {
-  *osamp = 0;
-  if (*isamp) {
-    auto priv = static_cast<FileObjOutputPriv*>(effp->priv);
-    auto sf = priv->sf;
-    auto fp = static_cast<FILE*>(sf->fp);
-    auto fileobj = priv->fileobj;
-    auto buffer = priv->buffer;
-
-    // Encode chunk
-    auto num_samples_written = sox_write(sf, ibuf, *isamp);
-    fflush(fp);
-
-    // Copy the encoded chunk to python object.
-    fileobj->attr("write")(py::bytes(*buffer, ftell(fp)));
-
-    // Reset FILE*
-    sf->tell_off = 0;
-    fseek(fp, 0, SEEK_SET);
-
-    if (num_samples_written != *isamp) {
-      if (sf->sox_errno) {
-        std::ostringstream stream;
-        stream << sf->sox_errstr << " " << sox_strerror(sf->sox_errno) << " "
-               << sf->filename;
-        throw std::runtime_error(stream.str());
-      }
-      return SOX_EOF;
-    }
-  }
-  return SOX_SUCCESS;
-}
-
-auto get_fileobj_input_handler() -> sox_effect_handler_t* {
-  static sox_effect_handler_t handler{
-      /*name=*/"input_fileobj_object",
-      /*usage=*/nullptr,
-      /*flags=*/SOX_EFF_MCHAN,
-      /*getopts=*/nullptr,
-      /*start=*/nullptr,
-      /*flow=*/nullptr,
-      /*drain=*/fileobj_input_drain,
-      /*stop=*/nullptr,
-      /*kill=*/nullptr,
-      /*priv_size=*/sizeof(FileObjInputPriv)};
-  return &handler;
-}
-
-auto get_fileobj_output_handler() -> sox_effect_handler_t* {
-  static sox_effect_handler_t handler{
-      /*name=*/"output_fileobj_object",
-      /*usage=*/nullptr,
-      /*flags=*/SOX_EFF_MCHAN,
-      /*getopts=*/nullptr,
-      /*start=*/nullptr,
-      /*flow=*/fileobj_output_flow,
-      /*drain=*/nullptr,
-      /*stop=*/nullptr,
-      /*kill=*/nullptr,
-      /*priv_size=*/sizeof(FileObjOutputPriv)};
-  return &handler;
-}
-
-} // namespace
-
-void SoxEffectsChainPyBind::addInputFileObj(
-    sox_format_t* sf,
-    char* buffer,
-    uint64_t buffer_size,
-    py::object* fileobj) {
-  in_sig_ = sf->signal;
-  interm_sig_ = in_sig_;
-
-  SoxEffect e(sox_create_effect(get_fileobj_input_handler()));
-  auto priv = static_cast<FileObjInputPriv*>(e->priv);
-  priv->sf = sf;
-  priv->fileobj = fileobj;
-  priv->eof_reached = false;
-  priv->buffer = buffer;
-  priv->buffer_size = buffer_size;
-  if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
-    throw std::runtime_error(
-        "Internal Error: Failed to add effect: input fileobj");
-  }
-}
-
-void SoxEffectsChainPyBind::addOutputFileObj(
-    sox_format_t* sf,
-    char** buffer,
-    size_t* buffer_size,
-    py::object* fileobj) {
-  out_sig_ = sf->signal;
-  SoxEffect e(sox_create_effect(get_fileobj_output_handler()));
-  auto priv = static_cast<FileObjOutputPriv*>(e->priv);
-  priv->sf = sf;
-  priv->fileobj = fileobj;
-  priv->buffer = buffer;
-  priv->buffer_size = buffer_size;
-  if (sox_add_effect(sec_, e, &interm_sig_, &out_sig_) != SOX_SUCCESS) {
-    throw std::runtime_error(
-        "Internal Error: Failed to add effect: output fileobj");
-  }
-}
-
-} // namespace sox_effects_chain
-} // namespace torchaudio
diff --git a/torchaudio/csrc/sox/pybind/effects_chain.h b/torchaudio/csrc/sox/pybind/effects_chain.h
deleted file mode 100644
index acbacf6013..0000000000
--- a/torchaudio/csrc/sox/pybind/effects_chain.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef TORCHAUDIO_PYBIND_SOX_EFFECTS_CHAIN_H
-#define TORCHAUDIO_PYBIND_SOX_EFFECTS_CHAIN_H
-
-#include <torch/extension.h>
-#include <torchaudio/csrc/sox/effects_chain.h>
-
-namespace torchaudio {
-namespace sox_effects_chain {
-
-class SoxEffectsChainPyBind : public SoxEffectsChain {
-  using SoxEffectsChain::SoxEffectsChain;
-
- public:
-  void addInputFileObj(
-      sox_format_t* sf,
-      char* buffer,
-      uint64_t buffer_size,
-      py::object* fileobj);
-
-  void addOutputFileObj(
-      sox_format_t* sf,
-      char** buffer,
-      size_t* buffer_size,
-      py::object* fileobj);
-};
-
-} // namespace sox_effects_chain
-} // namespace torchaudio
-
-#endif
diff --git a/torchaudio/csrc/sox/pybind/io.cpp b/torchaudio/csrc/sox/pybind/io.cpp
deleted file mode 100644
index 5fc6d271b5..0000000000
--- a/torchaudio/csrc/sox/pybind/io.cpp
+++ /dev/null
@@ -1,195 +0,0 @@
-#include <torchaudio/csrc/sox/io.h>
-#include <torchaudio/csrc/sox/pybind/effects.h>
-#include <torchaudio/csrc/sox/pybind/effects_chain.h>
-#include <torchaudio/csrc/sox/pybind/io.h>
-#include <torchaudio/csrc/sox/pybind/utils.h>
-#include <torchaudio/csrc/sox/types.h>
-
-#include <utility>
-
-using namespace torchaudio::sox_utils;
-
-namespace torchaudio {
-namespace sox_io {
-
-auto get_info_fileobj(py::object fileobj, c10::optional<std::string> format)
-    -> c10::optional<MetaDataTuple> {
-  // Prepare in-memory file object
-  // When libsox opens a file, it also reads the header.
-  // When opening a file there are two functions that might touch FILE* (and the
-  // underlying buffer).
-  // * `auto_detect_format`
-  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L43
-  // * `startread` handler of detected format.
-  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L574
-  // To see the handler of a particular format, go to
-  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/<FORMAT>.c
-  // For example, voribs can be found
-  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/vorbis.c#L97-L158
-  //
-  // `auto_detect_format` function only requires 256 bytes, but format-dependent
-  // `startread` handler might require more data. In case of vorbis, the size of
-  // header is unbounded, but typically 4kB maximum.
-  //
-  // "The header size is unbounded, although for streaming a rule-of-thumb of
-  // 4kB or less is recommended (and Xiph.Org's Vorbis encoder follows this
-  // suggestion)."
-  //
-  // See:
-  // https://xiph.org/vorbis/doc/Vorbis_I_spec.html
-  const auto capacity = [&]() {
-    // NOTE:
-    // Use the abstraction provided by `libtorchaudio` to access the global
-    // config defined by libsox. Directly using `sox_get_globals` function will
-    // end up retrieving the static variable defined in `_torchaudio`, which is
-    // not correct.
-    const auto bufsiz = get_buffer_size();
-    const int64_t kDefaultCapacityInBytes = 4096;
-    return (bufsiz > kDefaultCapacityInBytes) ? bufsiz
-                                              : kDefaultCapacityInBytes;
-  }();
-  std::string buffer(capacity, '\0');
-  auto* buf = const_cast<char*>(buffer.data());
-  auto num_read = read_fileobj(&fileobj, capacity, buf);
-  // If the file is shorter than 256, then libsox cannot read the header.
-  auto buf_size = (num_read > 256) ? num_read : 256;
-
-  SoxFormat sf(sox_open_mem_read(
-      buf,
-      buf_size,
-      /*signal=*/nullptr,
-      /*encoding=*/nullptr,
-      /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
-
-  if (static_cast<sox_format_t*>(sf) == nullptr ||
-      sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
-    return c10::optional<MetaDataTuple>{};
-  }
-
-  return std::forward_as_tuple(
-      static_cast<int64_t>(sf->signal.rate),
-      static_cast<int64_t>(sf->signal.length / sf->signal.channels),
-      static_cast<int64_t>(sf->signal.channels),
-      static_cast<int64_t>(sf->encoding.bits_per_sample),
-      get_encoding(sf->encoding.encoding));
-}
-
-auto load_audio_fileobj(
-    py::object fileobj,
-    c10::optional<int64_t> frame_offset,
-    c10::optional<int64_t> num_frames,
-    c10::optional<bool> normalize,
-    c10::optional<bool> channels_first,
-    c10::optional<std::string> format)
-    -> c10::optional<std::tuple<torch::Tensor, int64_t>> {
-  auto effects = get_effects(frame_offset, num_frames);
-  return torchaudio::sox_effects::apply_effects_fileobj(
-      std::move(fileobj),
-      effects,
-      normalize,
-      channels_first,
-      std::move(format));
-}
-
-namespace {
-
-// helper class to automatically release buffer, to be used by
-// save_audio_fileobj
-struct AutoReleaseBuffer {
-  char* ptr;
-  size_t size;
-
-  AutoReleaseBuffer() : ptr(nullptr), size(0) {}
-  AutoReleaseBuffer(const AutoReleaseBuffer& other) = delete;
-  AutoReleaseBuffer(AutoReleaseBuffer&& other) = delete;
-  auto operator=(const AutoReleaseBuffer& other) -> AutoReleaseBuffer& = delete;
-  auto operator=(AutoReleaseBuffer&& other) -> AutoReleaseBuffer& = delete;
-  ~AutoReleaseBuffer() {
-    if (ptr) {
-      free(ptr);
-    }
-  }
-};
-
-} // namespace
-
-void save_audio_fileobj(
-    py::object fileobj,
-    torch::Tensor tensor,
-    int64_t sample_rate,
-    bool channels_first,
-    c10::optional<double> compression,
-    c10::optional<std::string> format,
-    c10::optional<std::string> encoding,
-    c10::optional<int64_t> bits_per_sample) {
-  validate_input_tensor(tensor);
-
-  if (!format.has_value()) {
-    throw std::runtime_error(
-        "`format` is required when saving to file object.");
-  }
-  const auto filetype = format.value();
-
-  if (filetype == "amr-nb") {
-    const auto num_channels = tensor.size(channels_first ? 0 : 1);
-    if (num_channels != 1) {
-      throw std::runtime_error(
-          "amr-nb format only supports single channel audio.");
-    }
-  } else if (filetype == "htk") {
-    const auto num_channels = tensor.size(channels_first ? 0 : 1);
-    if (num_channels != 1) {
-      throw std::runtime_error(
-          "htk format only supports single channel audio.");
-    }
-  } else if (filetype == "gsm") {
-    const auto num_channels = tensor.size(channels_first ? 0 : 1);
-    if (num_channels != 1) {
-      throw std::runtime_error(
-          "gsm format only supports single channel audio.");
-    }
-    if (sample_rate != 8000) {
-      throw std::runtime_error(
-          "gsm format only supports a sampling rate of 8kHz.");
-    }
-  }
-  const auto signal_info =
-      get_signalinfo(&tensor, sample_rate, filetype, channels_first);
-  const auto encoding_info = get_encodinginfo_for_save(
-      filetype,
-      tensor.dtype(),
-      compression,
-      std::move(encoding),
-      bits_per_sample);
-
-  AutoReleaseBuffer buffer;
-
-  SoxFormat sf(sox_open_memstream_write(
-      &buffer.ptr,
-      &buffer.size,
-      &signal_info,
-      &encoding_info,
-      filetype.c_str(),
-      /*oob=*/nullptr));
-
-  if (static_cast<sox_format_t*>(sf) == nullptr) {
-    throw std::runtime_error(
-        "Error saving audio file: failed to open memory stream.");
-  }
-
-  torchaudio::sox_effects_chain::SoxEffectsChainPyBind chain(
-      /*input_encoding=*/get_tensor_encodinginfo(tensor.dtype()),
-      /*output_encoding=*/sf->encoding);
-  chain.addInputTensor(&tensor, sample_rate, channels_first);
-  chain.addOutputFileObj(sf, &buffer.ptr, &buffer.size, &fileobj);
-  chain.run();
-
-  // Closing the sox_format_t is necessary for flushing the last chunk to the
-  // buffer
-  sf.close();
-
-  fileobj.attr("write")(py::bytes(buffer.ptr, buffer.size));
-}
-
-} // namespace sox_io
-} // namespace torchaudio
diff --git a/torchaudio/csrc/sox/pybind/io.h b/torchaudio/csrc/sox/pybind/io.h
deleted file mode 100644
index db91ad4ace..0000000000
--- a/torchaudio/csrc/sox/pybind/io.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef TORCHAUDIO_PYBIND_SOX_IO_H
-#define TORCHAUDIO_PYBIND_SOX_IO_H
-
-#include <torch/extension.h>
-
-namespace torchaudio {
-namespace sox_io {
-
-using MetaDataTuple =
-    std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;
-
-auto get_info_fileobj(py::object fileobj, c10::optional<std::string> format)
-    -> c10::optional<MetaDataTuple>;
-
-auto load_audio_fileobj(
-    py::object fileobj,
-    c10::optional<int64_t> frame_offset,
-    c10::optional<int64_t> num_frames,
-    c10::optional<bool> normalize,
-    c10::optional<bool> channels_first,
-    c10::optional<std::string> format)
-    -> c10::optional<std::tuple<torch::Tensor, int64_t>>;
-
-void save_audio_fileobj(
-    py::object fileobj,
-    torch::Tensor tensor,
-    int64_t sample_rate,
-    bool channels_first,
-    c10::optional<double> compression,
-    c10::optional<std::string> format,
-    c10::optional<std::string> encoding,
-    c10::optional<int64_t> bits_per_sample);
-
-} // namespace sox_io
-} // namespace torchaudio
-
-#endif
diff --git a/torchaudio/csrc/sox/pybind/pybind.cpp b/torchaudio/csrc/sox/pybind/pybind.cpp
index 751471c52e..e7f8a8216c 100644
--- a/torchaudio/csrc/sox/pybind/pybind.cpp
+++ b/torchaudio/csrc/sox/pybind/pybind.cpp
@@ -1,23 +1,45 @@
 #include <torch/extension.h>
+#include <torchaudio/csrc/sox/utils.h>
 
-#include <torchaudio/csrc/sox/pybind/effects.h>
-#include <torchaudio/csrc/sox/pybind/io.h>
+namespace torchaudio {
+namespace sox {
+namespace {
 
 PYBIND11_MODULE(_torchaudio_sox, m) {
   m.def(
-      "get_info_fileobj",
-      &torchaudio::sox_io::get_info_fileobj,
-      "Get metadata of audio in file object.");
+      "set_seed",
+      &torchaudio::sox_utils::set_seed,
+      "Set random seed.");
   m.def(
-      "load_audio_fileobj",
-      &torchaudio::sox_io::load_audio_fileobj,
-      "Load audio from file object.");
+      "set_verbosity",
+      &torchaudio::sox_utils::set_verbosity,
+      "Set verbosity.");
   m.def(
-      "save_audio_fileobj",
-      &torchaudio::sox_io::save_audio_fileobj,
-      "Save audio to file obj.");
+      "set_use_threads",
+      &torchaudio::sox_utils::set_use_threads,
+      "Set threading.");
   m.def(
-      "apply_effects_fileobj",
-      &torchaudio::sox_effects::apply_effects_fileobj,
-      "Decode audio data from file-like obj and apply effects.");
+      "set_buffer_size",
+      &torchaudio::sox_utils::set_buffer_size,
+      "Set buffer size.");
+  m.def(
+      "get_buffer_size",
+      &torchaudio::sox_utils::get_buffer_size,
+      "Get buffer size.");
+  m.def(
+      "list_effects",
+      &torchaudio::sox_utils::list_effects,
+      "List available effects.");
+  m.def(
+      "list_read_formats",
+      &torchaudio::sox_utils::list_read_formats,
+      "List supported formats for decoding.");
+  m.def(
+      "list_write_formats",
+      &torchaudio::sox_utils::list_write_formats,
+      "List supported formats for encoding.");
 }
+
+} // torchaudio
+} // sox
+} // namespace
diff --git a/torchaudio/csrc/sox/pybind/utils.cpp b/torchaudio/csrc/sox/pybind/utils.cpp
deleted file mode 100644
index 1744be281a..0000000000
--- a/torchaudio/csrc/sox/pybind/utils.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include <torchaudio/csrc/sox/pybind/utils.h>
-
-namespace torchaudio {
-namespace sox_utils {
-
-auto read_fileobj(py::object* fileobj, const uint64_t size, char* buffer)
-    -> uint64_t {
-  uint64_t num_read = 0;
-  while (num_read < size) {
-    auto request = size - num_read;
-    auto chunk = static_cast<std::string>(
-        static_cast<py::bytes>(fileobj->attr("read")(request)));
-    auto chunk_len = chunk.length();
-    if (chunk_len == 0) {
-      break;
-    }
-    if (chunk_len > request) {
-      std::ostringstream message;
-      message
-          << "Requested up to " << request << " bytes but, "
-          << "received " << chunk_len << " bytes. "
-          << "The given object does not confirm to read protocol of file object.";
-      throw std::runtime_error(message.str());
-    }
-    memcpy(buffer, chunk.data(), chunk_len);
-    buffer += chunk_len;
-    num_read += chunk_len;
-  }
-  return num_read;
-}
-
-} // namespace sox_utils
-} // namespace torchaudio
diff --git a/torchaudio/csrc/sox/pybind/utils.h b/torchaudio/csrc/sox/pybind/utils.h
deleted file mode 100644
index 21955e255c..0000000000
--- a/torchaudio/csrc/sox/pybind/utils.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef TORCHAUDIO_PYBIND_SOX_UTILS_H
-#define TORCHAUDIO_PYBIND_SOX_UTILS_H
-
-#include <torch/extension.h>
-
-namespace torchaudio {
-namespace sox_utils {
-
-auto read_fileobj(py::object* fileobj, uint64_t size, char* buffer) -> uint64_t;
-
-} // namespace sox_utils
-} // namespace torchaudio
-
-#endif
diff --git a/torchaudio/csrc/sox/utils.cpp b/torchaudio/csrc/sox/utils.cpp
index 88c0ec9839..d3f96c33c7 100644
--- a/torchaudio/csrc/sox/utils.cpp
+++ b/torchaudio/csrc/sox/utils.cpp
@@ -492,30 +492,5 @@ sox_encodinginfo_t get_encodinginfo_for_save(
       /*opposite_endian=*/sox_false};
 }
 
-TORCH_LIBRARY_FRAGMENT(torchaudio, m) {
-  m.def("torchaudio::sox_utils_set_seed", &torchaudio::sox_utils::set_seed);
-  m.def(
-      "torchaudio::sox_utils_set_verbosity",
-      &torchaudio::sox_utils::set_verbosity);
-  m.def(
-      "torchaudio::sox_utils_set_use_threads",
-      &torchaudio::sox_utils::set_use_threads);
-  m.def(
-      "torchaudio::sox_utils_set_buffer_size",
-      &torchaudio::sox_utils::set_buffer_size);
-  m.def(
-      "torchaudio::sox_utils_list_effects",
-      &torchaudio::sox_utils::list_effects);
-  m.def(
-      "torchaudio::sox_utils_list_read_formats",
-      &torchaudio::sox_utils::list_read_formats);
-  m.def(
-      "torchaudio::sox_utils_list_write_formats",
-      &torchaudio::sox_utils::list_write_formats);
-  m.def(
-      "torchaudio::sox_utils_get_buffer_size",
-      &torchaudio::sox_utils::get_buffer_size);
-}
-
 } // namespace sox_utils
 } // namespace torchaudio
diff --git a/torchaudio/sox_effects/sox_effects.py b/torchaudio/sox_effects/sox_effects.py
index e876788df4..c343680b65 100644
--- a/torchaudio/sox_effects/sox_effects.py
+++ b/torchaudio/sox_effects/sox_effects.py
@@ -1,5 +1,4 @@
 import os
-import warnings
 from typing import List, Optional, Tuple
 
 import torch
@@ -156,14 +155,6 @@ def apply_effects_tensor(
     return torch.ops.torchaudio.sox_effects_apply_effects_tensor(tensor, sample_rate, effects, channels_first)
 
 
-_deprecation_message = (
-    "File-like object support in sox_io backend is deprecated, "
-    "and will be removed in v2.1. "
-    "See https://github.com/pytorch/audio/issues/2950 for the detail."
-    "Please migrate to the new dispatcher, or use soundfile backend."
-)
-
-
 @torchaudio._extension.fail_if_no_sox
 def apply_effects_file(
     path: str,
@@ -187,18 +178,8 @@ def apply_effects_file(
         rate and leave samples untouched.
 
     Args:
-        path (path-like object or file-like object):
-            Source of audio data. When the function is not compiled by TorchScript,
-            (e.g. ``torch.jit.script``), the following types are accepted:
-
-                  * ``path-like``: file path
-                  * ``file-like``: Object with ``read(size: int) -> bytes`` method,
-                    which returns byte string of at most ``size`` length.
-
-            When the function is compiled by TorchScript, only ``str`` type is allowed.
-
-            Note: This argument is intentionally annotated as ``str`` only for
-            TorchScript compiler compatibility.
+        path (path-like object):
+            Source of audio data.
         effects (List[List[str]]): List of effects.
         normalize (bool, optional):
             When ``True``, this function converts the native sample type to ``float32``.
@@ -283,11 +264,10 @@ def apply_effects_file(
     """
     if not torch.jit.is_scripting():
         if hasattr(path, "read"):
-            warnings.warn(_deprecation_message)
-            ret = torchaudio.lib._torchaudio_sox.apply_effects_fileobj(path, effects, normalize, channels_first, format)
-            if ret is None:
-                raise RuntimeError("Failed to load audio from {}".format(path))
-            return ret
+            raise RuntimeError(
+                "apply_effects_file function does not support file-like object. "
+                "Please use torchaudio.io.AudioEffector."
+            )
         path = os.fspath(path)
     ret = torch.ops.torchaudio.sox_effects_apply_effects_file(path, effects, normalize, channels_first, format)
     if ret is not None:
diff --git a/torchaudio/utils/sox_utils.py b/torchaudio/utils/sox_utils.py
index 384c00bf82..a978e8d1db 100644
--- a/torchaudio/utils/sox_utils.py
+++ b/torchaudio/utils/sox_utils.py
@@ -4,7 +4,6 @@
 
 from typing import Dict, List
 
-import torch
 import torchaudio
 
 
@@ -18,7 +17,7 @@ def set_seed(seed: int):
     See Also:
         http://sox.sourceforge.net/sox.html
     """
-    torch.ops.torchaudio.sox_utils_set_seed(seed)
+    torchaudio.lib._torchaudio_sox.set_seed(seed)
 
 
 @torchaudio._extension.fail_if_no_sox
@@ -36,7 +35,7 @@ def set_verbosity(verbosity: int):
     See Also:
         http://sox.sourceforge.net/sox.html
     """
-    torch.ops.torchaudio.sox_utils_set_verbosity(verbosity)
+    torchaudio.lib._torchaudio_sox.set_verbosity(verbosity)
 
 
 @torchaudio._extension.fail_if_no_sox
@@ -49,7 +48,7 @@ def set_buffer_size(buffer_size: int):
     See Also:
         http://sox.sourceforge.net/sox.html
     """
-    torch.ops.torchaudio.sox_utils_set_buffer_size(buffer_size)
+    torchaudio.lib._torchaudio_sox.set_buffer_size(buffer_size)
 
 
 @torchaudio._extension.fail_if_no_sox
@@ -63,7 +62,7 @@ def set_use_threads(use_threads: bool):
     See Also:
         http://sox.sourceforge.net/sox.html
     """
-    torch.ops.torchaudio.sox_utils_set_use_threads(use_threads)
+    torchaudio.lib._torchaudio_sox.set_use_threads(use_threads)
 
 
 @torchaudio._extension.fail_if_no_sox
@@ -73,7 +72,7 @@ def list_effects() -> Dict[str, str]:
     Returns:
         Dict[str, str]: Mapping from ``effect name`` to ``usage``
     """
-    return dict(torch.ops.torchaudio.sox_utils_list_effects())
+    return dict(torchaudio.lib._torchaudio_sox.list_effects())
 
 
 @torchaudio._extension.fail_if_no_sox
@@ -83,7 +82,7 @@ def list_read_formats() -> List[str]:
     Returns:
         List[str]: List of supported audio formats
     """
-    return torch.ops.torchaudio.sox_utils_list_read_formats()
+    return torchaudio.lib._torchaudio_sox.list_read_formats()
 
 
 @torchaudio._extension.fail_if_no_sox
@@ -93,7 +92,7 @@ def list_write_formats() -> List[str]:
     Returns:
         List[str]: List of supported audio formats
     """
-    return torch.ops.torchaudio.sox_utils_list_write_formats()
+    return torchaudio.lib._torchaudio_sox.list_write_formats()
 
 
 @torchaudio._extension.fail_if_no_sox
@@ -103,4 +102,4 @@ def get_buffer_size() -> int:
     Returns:
         int: size in bytes of buffers used for processing audio.
     """
-    return torch.ops.torchaudio.sox_utils_get_buffer_size()
+    return torchaudio.lib._torchaudio_sox.get_buffer_size()