From f46b7c55e836336466ce669f37fb10562015ac5f Mon Sep 17 00:00:00 2001
From: moto <855818+mthrok@users.noreply.github.com>
Date: Sun, 28 May 2023 01:01:01 -0400
Subject: [PATCH] Use dlopen to open sox

---
 setup.py                              | 20 +-----
 third_party/sox/CMakeLists.txt        | 91 ++++-----------------------
 torchaudio/csrc/sox/CMakeLists.txt    |  1 +
 torchaudio/csrc/sox/effects.cpp       | 36 ++++-------
 torchaudio/csrc/sox/effects.h         |  6 +-
 torchaudio/csrc/sox/effects_chain.cpp | 49 +++++++--------
 torchaudio/csrc/sox/effects_chain.h   |  6 +-
 torchaudio/csrc/sox/io.cpp            | 27 +++-----
 torchaudio/csrc/sox/io.h              |  6 +-
 torchaudio/csrc/sox/libsox.cpp        | 73 +++++++++++++++++++++
 torchaudio/csrc/sox/libsox.h          | 64 +++++++++++++++++++
 torchaudio/csrc/sox/pybind/pybind.cpp | 34 ++++------
 torchaudio/csrc/sox/types.cpp         |  6 +-
 torchaudio/csrc/sox/types.h           |  6 +-
 torchaudio/csrc/sox/utils.cpp         | 28 ++++-----
 torchaudio/csrc/sox/utils.h           |  6 +-
 16 files changed, 233 insertions(+), 226 deletions(-)
 create mode 100644 torchaudio/csrc/sox/libsox.cpp
 create mode 100644 torchaudio/csrc/sox/libsox.h

diff --git a/setup.py b/setup.py
index f55a2c3f01f..37490eb9ae1 100644
--- a/setup.py
+++ b/setup.py
@@ -104,18 +104,6 @@ def _parse_url(path):
                 yield url
 
 
-def _parse_sources():
-    third_party_dir = ROOT_DIR / "third_party"
-    libs = ["sox"]
-    archive_dir = third_party_dir / "archives"
-    archive_dir.mkdir(exist_ok=True)
-    for lib in libs:
-        cmake_file = third_party_dir / lib / "CMakeLists.txt"
-        for url in _parse_url(cmake_file):
-            path = archive_dir / os.path.basename(url)
-            yield path, url
-
-
 def _fetch_archives(src):
     for dest, url in src:
         if not dest.exists():
@@ -123,12 +111,6 @@ def _fetch_archives(src):
             torch.hub.download_url_to_file(url, dest, progress=False)
 
 
-def _fetch_third_party_libraries():
-    _init_submodule()
-    if os.name != "nt":
-        _fetch_archives(_parse_sources())
-
-
 def _main():
     sha = _run_cmd(["git", "rev-parse", "HEAD"])
     branch = _run_cmd(["git", "rev-parse", "--abbrev-ref", "HEAD"])
@@ -142,7 +124,7 @@ def _main():
     print("-- Building version", version)
 
     _make_version_file(version, sha)
-    _fetch_third_party_libraries()
+    _init_submodule()
 
     with open("README.md") as f:
         long_description = f.read()
diff --git a/third_party/sox/CMakeLists.txt b/third_party/sox/CMakeLists.txt
index a9b6dd65b12..4fd0aa73769 100644
--- a/third_party/sox/CMakeLists.txt
+++ b/third_party/sox/CMakeLists.txt
@@ -1,85 +1,18 @@
-find_package(PkgConfig REQUIRED)
+include(FetchContent)
 
-include(ExternalProject)
-
-# set(INSTALL_DIR ${CMAKE_BINARY_DIR}/sox)
-set(INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../install)
-set(ARCHIVE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../archives)
-set(patch_dir ${PROJECT_SOURCE_DIR}/third_party/patches)
-set(COMPILE_ARGS
-  --quiet
-  --enable-shared
-  --disable-static
-  --prefix=${INSTALL_DIR}
-  --with-pic
-  --disable-dependency-tracking
-  --disable-debug
-  --disable-examples
-  --disable-doc
-  --disable-openmp
-  --without-amrnb
-  --without-amrwb
-  --without-flac
-  --without-lame
-  --without-oggvorbis
-  --without-opus
-  --without-alsa
-  --without-ao
-  --without-coreaudio
-  --without-oss
-  --without-id3tag
-  --without-ladspa
-  --without-mad
-  --without-magic
-  --without-png
-  --without-pulseaudio
-  --without-sndfile
-  --without-sndio
-  --without-sunaudio
-  --without-waveaudio
-  --without-wavpack
-  --without-twolame
-  )
-
-if (APPLE)
-  set(byproduct ${INSTALL_DIR}/lib/libsox.3.dylib)
-  set(sox_library ${INSTALL_DIR}/lib/libsox.dylib)
-else()
-  set(byproduct ${INSTALL_DIR}/lib/libsox.so)
-  set(sox_library ${INSTALL_DIR}/lib/libsox.so)
-endif()
-
-ExternalProject_Add(sox
-  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
-  DOWNLOAD_DIR ${ARCHIVE_DIR}
+FetchContent_Declare(
+  sox
   URL https://downloads.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.bz2
   URL_HASH SHA256=81a6956d4330e75b5827316e44ae381e6f1e8928003c6aa45896da9041ea149c
-  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/sox/
-  CONFIGURE_COMMAND ${CMAKE_CURRENT_BINARY_DIR}/src/sox/configure ${COMPILE_ARGS}
-  BUILD_BYPRODUCTS ${byproduct}
-  DOWNLOAD_NO_PROGRESS ON
-  LOG_DOWNLOAD ON
-  LOG_UPDATE ON
-  LOG_CONFIGURE ON
-  LOG_BUILD ON
-  LOG_INSTALL ON
-  LOG_MERGED_STDOUTERR ON
-  LOG_OUTPUT_ON_FAILURE ON
-)
-
-if (APPLE)
-  # Modify RPATH, so that they won't be hardcoded
-  add_custom_command(
-    OUTPUT ${sox_library}
-    COMMAND install_name_tool -change ${byproduct} @rpath/libsox.dylib -id @rpath/libsox.dylib ${byproduct}
-    DEPENDS sox
-    )
-  add_custom_target(_libsox DEPENDS ${sox_library})
-else()
-  add_custom_target(_libsox DEPENDS sox)
+  PATCH_COMMAND ""
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND ""
+  )
+# FetchContent_MakeAvailable will parse the downloaded content and setup the targets.
+# We want to only download and not build, so we run Populate manually.
+if(NOT sox_POPULATED)
+  FetchContent_Populate(sox)
 endif()
 
 add_library(libsox INTERFACE)
-add_dependencies(libsox _libsox)
-target_include_directories(libsox INTERFACE ${INSTALL_DIR}/include)
-target_link_libraries(libsox INTERFACE ${sox_library})
+target_include_directories(libsox INTERFACE ${sox_SOURCE_DIR}/src)
diff --git a/torchaudio/csrc/sox/CMakeLists.txt b/torchaudio/csrc/sox/CMakeLists.txt
index 3391a4fc370..59dbbeefdb9 100644
--- a/torchaudio/csrc/sox/CMakeLists.txt
+++ b/torchaudio/csrc/sox/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(
   sources
+  libsox.cpp
   io.cpp
   utils.cpp
   effects.cpp
diff --git a/torchaudio/csrc/sox/effects.cpp b/torchaudio/csrc/sox/effects.cpp
index 9232d870b1d..3c3ce0d54ac 100644
--- a/torchaudio/csrc/sox/effects.cpp
+++ b/torchaudio/csrc/sox/effects.cpp
@@ -1,13 +1,10 @@
 #include <sox.h>
 #include <torchaudio/csrc/sox/effects.h>
 #include <torchaudio/csrc/sox/effects_chain.h>
+#include <torchaudio/csrc/sox/libsox.h>
 #include <torchaudio/csrc/sox/utils.h>
 
-using namespace torchaudio::sox_utils;
-
-namespace torchaudio {
-namespace sox_effects {
-
+namespace torchaudio::sox {
 namespace {
 
 enum SoxEffectsResourceState { NotInitialized, Initialized, ShutDown };
@@ -22,7 +19,7 @@ void initialize_sox_effects() {
   switch (SOX_RESOURCE_STATE) {
     case NotInitialized:
       TORCH_CHECK(
-          sox_init() == SOX_SUCCESS, "Failed to initialize sox effects.");
+          lsx().sox_init() == SOX_SUCCESS, "Failed to initialize sox effects.");
       SOX_RESOURCE_STATE = Initialized;
       break;
     case Initialized:
@@ -41,7 +38,7 @@ void shutdown_sox_effects() {
       TORCH_CHECK(false, "SoX Effects is not initialized. Cannot shutdown.");
     case Initialized:
       TORCH_CHECK(
-          sox_quit() == SOX_SUCCESS, "Failed to initialize sox effects.");
+          lsx().sox_quit() == SOX_SUCCESS, "Failed to initialize sox effects.");
       SOX_RESOURCE_STATE = ShutDown;
       break;
     case ShutDown:
@@ -58,7 +55,7 @@ auto apply_effects_tensor(
 
   // Create SoxEffectsChain
   const auto dtype = waveform.dtype();
-  torchaudio::sox_effects_chain::SoxEffectsChain chain(
+  SoxEffectsChain chain(
       /*input_encoding=*/get_tensor_encodinginfo(dtype),
       /*output_encoding=*/get_tensor_encodinginfo(dtype));
 
@@ -95,7 +92,7 @@ auto apply_effects_file(
     const c10::optional<std::string>& format)
     -> c10::optional<std::tuple<torch::Tensor, int64_t>> {
   // Open input file
-  SoxFormat sf(sox_open_read(
+  SoxFormat sf(lsx().sox_open_read(
       path.c_str(),
       /*signal=*/nullptr,
       /*encoding=*/nullptr,
@@ -113,7 +110,7 @@ auto apply_effects_file(
   out_buffer.reserve(sf->signal.length);
 
   // Create and run SoxEffectsChain
-  torchaudio::sox_effects_chain::SoxEffectsChain chain(
+  SoxEffectsChain chain(
       /*input_encoding=*/sf->encoding,
       /*output_encoding=*/get_tensor_encodinginfo(dtype));
 
@@ -133,7 +130,6 @@ auto apply_effects_file(
       dtype,
       normalize.value_or(true),
       channels_first_);
-
   return std::tuple<torch::Tensor, int64_t>(
       tensor, chain.getOutputSampleRate());
 }
@@ -141,17 +137,9 @@ auto apply_effects_file(
 TORCH_LIBRARY_FRAGMENT(torchaudio, m) {
   m.def(
       "torchaudio::sox_effects_initialize_sox_effects",
-      &torchaudio::sox_effects::initialize_sox_effects);
-  m.def(
-      "torchaudio::sox_effects_shutdown_sox_effects",
-      &torchaudio::sox_effects::shutdown_sox_effects);
-  m.def(
-      "torchaudio::sox_effects_apply_effects_tensor",
-      &torchaudio::sox_effects::apply_effects_tensor);
-  m.def(
-      "torchaudio::sox_effects_apply_effects_file",
-      &torchaudio::sox_effects::apply_effects_file);
+      &initialize_sox_effects);
+  m.def("torchaudio::sox_effects_shutdown_sox_effects", &shutdown_sox_effects);
+  m.def("torchaudio::sox_effects_apply_effects_tensor", &apply_effects_tensor);
+  m.def("torchaudio::sox_effects_apply_effects_file", &apply_effects_file);
 }
-
-} // namespace sox_effects
-} // namespace torchaudio
+} // namespace torchaudio::sox
diff --git a/torchaudio/csrc/sox/effects.h b/torchaudio/csrc/sox/effects.h
index bac088ee189..70e59f887f0 100644
--- a/torchaudio/csrc/sox/effects.h
+++ b/torchaudio/csrc/sox/effects.h
@@ -4,8 +4,7 @@
 #include <torch/script.h>
 #include <torchaudio/csrc/sox/utils.h>
 
-namespace torchaudio {
-namespace sox_effects {
+namespace torchaudio::sox {
 
 void initialize_sox_effects();
 
@@ -25,7 +24,6 @@ auto apply_effects_file(
     const c10::optional<std::string>& format)
     -> c10::optional<std::tuple<torch::Tensor, int64_t>>;
 
-} // namespace sox_effects
-} // namespace torchaudio
+} // namespace torchaudio::sox
 
 #endif
diff --git a/torchaudio/csrc/sox/effects_chain.cpp b/torchaudio/csrc/sox/effects_chain.cpp
index 8d8fbcc829e..c5d6cc27af3 100644
--- a/torchaudio/csrc/sox/effects_chain.cpp
+++ b/torchaudio/csrc/sox/effects_chain.cpp
@@ -1,13 +1,11 @@
 #include <torchaudio/csrc/sox/effects_chain.h>
+#include <torchaudio/csrc/sox/libsox.h>
 #include <torchaudio/csrc/sox/utils.h>
 #include "c10/util/Exception.h"
 
 using namespace torch::indexing;
-using namespace torchaudio::sox_utils;
-
-namespace torchaudio {
-namespace sox_effects_chain {
 
+namespace torchaudio::sox {
 namespace {
 
 /// helper classes for passing the location of input tensor and output buffer
@@ -114,12 +112,12 @@ int file_output_flow(
   *osamp = 0;
   if (*isamp) {
     auto sf = static_cast<FileOutputPriv*>(effp->priv)->sf;
-    if (sox_write(sf, ibuf, *isamp) != *isamp) {
+    if (lsx().sox_write(sf, ibuf, *isamp) != *isamp) {
       TORCH_CHECK(
           !sf->sox_errno,
           sf->sox_errstr,
           " ",
-          sox_strerror(sf->sox_errno),
+          lsx().sox_strerror(sf->sox_errno),
           " ",
           sf->filename);
       return SOX_EOF;
@@ -199,18 +197,18 @@ SoxEffectsChain::SoxEffectsChain(
       in_sig_(),
       interm_sig_(),
       out_sig_(),
-      sec_(sox_create_effects_chain(&in_enc_, &out_enc_)) {
+      sec_(lsx().sox_create_effects_chain(&in_enc_, &out_enc_)) {
   TORCH_CHECK(sec_, "Failed to create effect chain.");
 }
 
 SoxEffectsChain::~SoxEffectsChain() {
   if (sec_ != nullptr) {
-    sox_delete_effects_chain(sec_);
+    lsx().sox_delete_effects_chain(sec_);
   }
 }
 
 void SoxEffectsChain::run() {
-  sox_flow_effects(sec_, NULL, NULL);
+  lsx().sox_flow_effects(sec_, NULL, NULL);
 }
 
 void SoxEffectsChain::addInputTensor(
@@ -219,44 +217,44 @@ void SoxEffectsChain::addInputTensor(
     bool channels_first) {
   in_sig_ = get_signalinfo(waveform, sample_rate, "wav", channels_first);
   interm_sig_ = in_sig_;
-  SoxEffect e(sox_create_effect(get_tensor_input_handler()));
+  SoxEffect e(lsx().sox_create_effect(get_tensor_input_handler()));
   auto priv = static_cast<TensorInputPriv*>(e->priv);
   priv->index = 0;
   priv->waveform = waveform;
   priv->sample_rate = sample_rate;
   priv->channels_first = channels_first;
   TORCH_CHECK(
-      sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
+      lsx().sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
       "Internal Error: Failed to add effect: input_tensor");
 }
 
 void SoxEffectsChain::addOutputBuffer(
     std::vector<sox_sample_t>* output_buffer) {
-  SoxEffect e(sox_create_effect(get_tensor_output_handler()));
+  SoxEffect e(lsx().sox_create_effect(get_tensor_output_handler()));
   static_cast<TensorOutputPriv*>(e->priv)->buffer = output_buffer;
   TORCH_CHECK(
-      sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
+      lsx().sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
       "Internal Error: Failed to add effect: output_tensor");
 }
 
 void SoxEffectsChain::addInputFile(sox_format_t* sf) {
   in_sig_ = sf->signal;
   interm_sig_ = in_sig_;
-  SoxEffect e(sox_create_effect(sox_find_effect("input")));
+  SoxEffect e(lsx().sox_create_effect(lsx().sox_find_effect("input")));
   char* opts[] = {(char*)sf};
-  sox_effect_options(e, 1, opts);
+  lsx().sox_effect_options(e, 1, opts);
   TORCH_CHECK(
-      sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
+      lsx().sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
       "Internal Error: Failed to add effect: input ",
       sf->filename);
 }
 
 void SoxEffectsChain::addOutputFile(sox_format_t* sf) {
   out_sig_ = sf->signal;
-  SoxEffect e(sox_create_effect(get_file_output_handler()));
+  SoxEffect e(lsx().sox_create_effect(get_file_output_handler()));
   static_cast<FileOutputPriv*>(e->priv)->sf = sf;
   TORCH_CHECK(
-      sox_add_effect(sec_, e, &interm_sig_, &out_sig_) == SOX_SUCCESS,
+      lsx().sox_add_effect(sec_, e, &interm_sig_, &out_sig_) == SOX_SUCCESS,
       "Internal Error: Failed to add effect: output ",
       sf->filename);
 }
@@ -268,12 +266,12 @@ void SoxEffectsChain::addEffect(const std::vector<std::string> effect) {
   TORCH_CHECK(
       UNSUPPORTED_EFFECTS.find(name) == UNSUPPORTED_EFFECTS.end(),
       "Unsupported effect: ",
-      name)
+      name);
 
-  auto returned_effect = sox_find_effect(name.c_str());
+  auto returned_effect = lsx().sox_find_effect(name.c_str());
   TORCH_CHECK(returned_effect, "Unsupported effect: ", name)
 
-  SoxEffect e(sox_create_effect(returned_effect));
+  SoxEffect e(lsx().sox_create_effect(returned_effect));
   const auto num_options = num_args - 1;
 
   std::vector<char*> opts;
@@ -281,12 +279,12 @@ void SoxEffectsChain::addEffect(const std::vector<std::string> effect) {
     opts.push_back((char*)effect[i].c_str());
   }
   TORCH_CHECK(
-      sox_effect_options(e, num_options, num_options ? opts.data() : nullptr) ==
-          SOX_SUCCESS,
+      lsx().sox_effect_options(
+          e, num_options, num_options ? opts.data() : nullptr) == SOX_SUCCESS,
       "Invalid effect option: ",
       c10::Join(" ", effect))
   TORCH_CHECK(
-      sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
+      lsx().sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
       "Internal Error: Failed to add effect: \"",
       c10::Join(" ", effect),
       "\"");
@@ -300,5 +298,4 @@ int64_t SoxEffectsChain::getOutputSampleRate() {
   return interm_sig_.rate;
 }
 
-} // namespace sox_effects_chain
-} // namespace torchaudio
+} // namespace torchaudio::sox
diff --git a/torchaudio/csrc/sox/effects_chain.h b/torchaudio/csrc/sox/effects_chain.h
index c456276ef06..7245447738a 100644
--- a/torchaudio/csrc/sox/effects_chain.h
+++ b/torchaudio/csrc/sox/effects_chain.h
@@ -4,8 +4,7 @@
 #include <sox.h>
 #include <torchaudio/csrc/sox/utils.h>
 
-namespace torchaudio {
-namespace sox_effects_chain {
+namespace torchaudio::sox {
 
 // Helper struct to safely close sox_effect_t* pointer returned by
 // sox_create_effect
@@ -57,7 +56,6 @@ class SoxEffectsChain {
   int64_t getOutputSampleRate();
 };
 
-} // namespace sox_effects_chain
-} // namespace torchaudio
+} // namespace torchaudio::sox
 
 #endif
diff --git a/torchaudio/csrc/sox/io.cpp b/torchaudio/csrc/sox/io.cpp
index dd4951ea7c8..197c3c2bee3 100644
--- a/torchaudio/csrc/sox/io.cpp
+++ b/torchaudio/csrc/sox/io.cpp
@@ -1,19 +1,18 @@
 #include <torchaudio/csrc/sox/effects.h>
 #include <torchaudio/csrc/sox/effects_chain.h>
 #include <torchaudio/csrc/sox/io.h>
+#include <torchaudio/csrc/sox/libsox.h>
 #include <torchaudio/csrc/sox/types.h>
 #include <torchaudio/csrc/sox/utils.h>
 
 using namespace torch::indexing;
-using namespace torchaudio::sox_utils;
 
-namespace torchaudio {
-namespace sox_io {
+namespace torchaudio::sox {
 
 c10::optional<MetaDataTuple> get_info_file(
     const std::string& path,
     const c10::optional<std::string>& format) {
-  SoxFormat sf(sox_open_read(
+  SoxFormat sf(lsx().sox_open_read(
       path.c_str(),
       /*signal=*/nullptr,
       /*encoding=*/nullptr,
@@ -68,8 +67,7 @@ c10::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
     c10::optional<bool> channels_first,
     const c10::optional<std::string>& format) {
   auto effects = get_effects(frame_offset, num_frames);
-  return torchaudio::sox_effects::apply_effects_file(
-      path, effects, normalize, channels_first, format);
+  return apply_effects_file(path, effects, normalize, channels_first, format);
 }
 
 void save_audio_file(
@@ -110,7 +108,7 @@ void save_audio_file(
   const auto encoding_info = get_encodinginfo_for_save(
       filetype, tensor.dtype(), compression, encoding, bits_per_sample);
 
-  SoxFormat sf(sox_open_write(
+  SoxFormat sf(lsx().sox_open_write(
       path.c_str(),
       &signal_info,
       &encoding_info,
@@ -123,7 +121,7 @@ void save_audio_file(
       "Error saving audio file: failed to open file ",
       path);
 
-  torchaudio::sox_effects_chain::SoxEffectsChain chain(
+  SoxEffectsChain chain(
       /*input_encoding=*/get_tensor_encodinginfo(tensor.dtype()),
       /*output_encoding=*/sf->encoding);
   chain.addInputTensor(&tensor, sample_rate, channels_first);
@@ -132,14 +130,9 @@ void save_audio_file(
 }
 
 TORCH_LIBRARY_FRAGMENT(torchaudio, m) {
-  m.def("torchaudio::sox_io_get_info", &torchaudio::sox_io::get_info_file);
-  m.def(
-      "torchaudio::sox_io_load_audio_file",
-      &torchaudio::sox_io::load_audio_file);
-  m.def(
-      "torchaudio::sox_io_save_audio_file",
-      &torchaudio::sox_io::save_audio_file);
+  m.def("torchaudio::sox_io_get_info", &get_info_file);
+  m.def("torchaudio::sox_io_load_audio_file", &load_audio_file);
+  m.def("torchaudio::sox_io_save_audio_file", &save_audio_file);
 }
 
-} // namespace sox_io
-} // namespace torchaudio
+} // namespace torchaudio::sox
diff --git a/torchaudio/csrc/sox/io.h b/torchaudio/csrc/sox/io.h
index a1f4c8a5bc7..7ef84e48ad3 100644
--- a/torchaudio/csrc/sox/io.h
+++ b/torchaudio/csrc/sox/io.h
@@ -4,8 +4,7 @@
 #include <torch/script.h>
 #include <torchaudio/csrc/sox/utils.h>
 
-namespace torchaudio {
-namespace sox_io {
+namespace torchaudio::sox {
 
 auto get_effects(
     const c10::optional<int64_t>& frame_offset,
@@ -37,7 +36,6 @@ void save_audio_file(
     c10::optional<std::string> encoding,
     c10::optional<int64_t> bits_per_sample);
 
-} // namespace sox_io
-} // namespace torchaudio
+} // namespace torchaudio::sox
 
 #endif
diff --git a/torchaudio/csrc/sox/libsox.cpp b/torchaudio/csrc/sox/libsox.cpp
new file mode 100644
index 00000000000..899ca6b3633
--- /dev/null
+++ b/torchaudio/csrc/sox/libsox.cpp
@@ -0,0 +1,73 @@
+#include <torchaudio/csrc/sox/libsox.h>
+#include <c10/util/CallOnce.h>
+
+namespace torchaudio::sox {
+namespace {
+
+// Handle to the dlopen-ed libsox
+static std::unique_ptr<at::DynamicLibrary> libsox;
+// LSX class which torchaudio will be using
+static LSX _lsx;
+
+// dlopen libsox and populate mehotds on _lsx.
+void _init_lsx();
+
+} // namespace
+
+// Fetch lsx
+LSX& lsx() {
+  static c10::once_flag init_flag;
+  c10::call_once(init_flag, _init_lsx);
+  return _lsx;
+}
+
+namespace {
+
+// dlopen libsox and populate mehotds on _lsx.
+void _init_lsx() {
+  libsox = []() {
+#if defined(_WIN32)
+#error Windows is not supported.
+#elif defined(__APPLE__)
+    auto lsx_ =
+        std::make_unique<at::DynamicLibrary>("libsox.3.dylib", "libsox.dylib");
+#else
+    auto lsx_ =
+        std::make_unique<at::DynamicLibrary>("libsox.3.so", "libsox.so");
+#endif
+    // check version: we only support 14.4.2
+    auto fn = (const char* (*)(void))lsx_->sym("sox_version");
+    TORCH_CHECK(
+        strcmp(fn(), "14.4.2") == 0,
+        "Need libsox 14.4.2, but found", fn());
+    return lsx_;
+  }();
+
+#define set_func(NAME) _lsx.NAME = (decltype(LSX::NAME))libsox->sym(#NAME)
+
+  // Note
+  // If any of the following fails, it will leave _lsx in invalid state.
+  // But _lsx cannot be accessed without this fuction succeful, so it'okay.
+  set_func(sox_add_effect);
+  set_func(sox_close);
+  set_func(sox_create_effect);
+  set_func(sox_create_effects_chain);
+  set_func(sox_delete_effect);
+  set_func(sox_delete_effects_chain);
+  set_func(sox_effect_options);
+  set_func(sox_find_effect);
+  set_func(sox_flow_effects);
+  set_func(sox_get_effect_fns);
+  set_func(sox_get_format_fns);
+  set_func(sox_get_globals);
+  set_func(sox_init);
+  set_func(sox_open_read);
+  set_func(sox_open_write);
+  set_func(sox_quit);
+  set_func(sox_strerror);
+  set_func(sox_write);
+#undef set_func
+}
+
+} // namespace
+} // namespace torchaudio::sox
diff --git a/torchaudio/csrc/sox/libsox.h b/torchaudio/csrc/sox/libsox.h
new file mode 100644
index 00000000000..2da01a65edb
--- /dev/null
+++ b/torchaudio/csrc/sox/libsox.h
@@ -0,0 +1,64 @@
+#pragma once
+#include <ATen/DynamicLibrary.h>
+#include <memory.h>
+#include <sox.h>
+
+namespace torchaudio::sox {
+
+struct LSX {
+  int (*sox_add_effect)(
+      sox_effects_chain_t* chain,
+      sox_effect_t* effp,
+      sox_signalinfo_t* in,
+      sox_signalinfo_t const* out) = nullptr;
+  int (*sox_close)(sox_format_t* ft) = nullptr;
+
+  sox_effect_t* (*sox_create_effect)(sox_effect_handler_t const* eh) = nullptr;
+
+  sox_effects_chain_t* (*sox_create_effects_chain)(
+      sox_encodinginfo_t const* in_enc,
+      sox_encodinginfo_t const* out_enc) = nullptr;
+
+  void (*sox_delete_effect)(sox_effect_t* effp);
+  void (*sox_delete_effects_chain)(sox_effects_chain_t* ecp);
+
+  int (*sox_effect_options)(sox_effect_t* effp, int argc, char* const argv[]);
+
+  const sox_effect_handler_t* (*sox_find_effect)(char const* name);
+
+  int (*sox_flow_effects)(
+      sox_effects_chain_t* chain,
+      int (*callback)(sox_bool all_done, void* client_data),
+      void* client_data);
+
+  const sox_effect_fn_t* (*sox_get_effect_fns)(void);
+
+  const sox_format_tab_t* (*sox_get_format_fns)(void);
+
+  sox_globals_t* (*sox_get_globals)(void);
+
+  int (*sox_init)(void);
+
+  sox_format_t* (*sox_open_read)(
+      char const* path,
+      sox_signalinfo_t const* signal,
+      sox_encodinginfo_t const* encoding,
+      char const* filetype);
+
+  sox_format_t* (*sox_open_write)(
+      char const* path,
+      sox_signalinfo_t const* signal,
+      sox_encodinginfo_t const* encoding,
+      char const* filetype,
+      sox_oob_t const* oob,
+      sox_bool (*overwrite_permitted)(char const* filename));
+  int (*sox_quit)(void);
+
+  const char* (*sox_strerror)(int sox_errno);
+
+  size_t (*sox_write)(sox_format_t* ft, const sox_sample_t* buf, size_t len);
+};
+
+LSX& lsx();
+
+} // namespace torchaudio::sox
diff --git a/torchaudio/csrc/sox/pybind/pybind.cpp b/torchaudio/csrc/sox/pybind/pybind.cpp
index e7f8a8216c7..27b32515355 100644
--- a/torchaudio/csrc/sox/pybind/pybind.cpp
+++ b/torchaudio/csrc/sox/pybind/pybind.cpp
@@ -1,4 +1,5 @@
 #include <torch/extension.h>
+#include <torchaudio/csrc/sox/libsox.h>
 #include <torchaudio/csrc/sox/utils.h>
 
 namespace torchaudio {
@@ -6,40 +7,27 @@ namespace sox {
 namespace {
 
 PYBIND11_MODULE(_torchaudio_sox, m) {
+  m.def("set_seed", &torchaudio::sox::set_seed, "Set random seed.");
+  m.def("set_verbosity", &torchaudio::sox::set_verbosity, "Set verbosity.");
+  m.def("set_use_threads", &torchaudio::sox::set_use_threads, "Set threading.");
   m.def(
-      "set_seed",
-      &torchaudio::sox_utils::set_seed,
-      "Set random seed.");
+      "set_buffer_size", &torchaudio::sox::set_buffer_size, "Set buffer size.");
   m.def(
-      "set_verbosity",
-      &torchaudio::sox_utils::set_verbosity,
-      "Set verbosity.");
-  m.def(
-      "set_use_threads",
-      &torchaudio::sox_utils::set_use_threads,
-      "Set threading.");
-  m.def(
-      "set_buffer_size",
-      &torchaudio::sox_utils::set_buffer_size,
-      "Set buffer size.");
-  m.def(
-      "get_buffer_size",
-      &torchaudio::sox_utils::get_buffer_size,
-      "Get buffer size.");
+      "get_buffer_size", &torchaudio::sox::get_buffer_size, "Get buffer size.");
   m.def(
       "list_effects",
-      &torchaudio::sox_utils::list_effects,
+      &torchaudio::sox::list_effects,
       "List available effects.");
   m.def(
       "list_read_formats",
-      &torchaudio::sox_utils::list_read_formats,
+      &torchaudio::sox::list_read_formats,
       "List supported formats for decoding.");
   m.def(
       "list_write_formats",
-      &torchaudio::sox_utils::list_write_formats,
+      &torchaudio::sox::list_write_formats,
       "List supported formats for encoding.");
 }
 
-} // torchaudio
-} // sox
 } // namespace
+} // namespace sox
+} // namespace torchaudio
diff --git a/torchaudio/csrc/sox/types.cpp b/torchaudio/csrc/sox/types.cpp
index 9beaadda409..1b9702d3680 100644
--- a/torchaudio/csrc/sox/types.cpp
+++ b/torchaudio/csrc/sox/types.cpp
@@ -1,7 +1,6 @@
 #include <torchaudio/csrc/sox/types.h>
 
-namespace torchaudio {
-namespace sox_utils {
+namespace torchaudio::sox {
 
 Format get_format_from_string(const std::string& format) {
   if (format == "wav")
@@ -129,5 +128,4 @@ std::string get_encoding(sox_encoding_t encoding) {
   }
 }
 
-} // namespace sox_utils
-} // namespace torchaudio
+} // namespace torchaudio::sox
diff --git a/torchaudio/csrc/sox/types.h b/torchaudio/csrc/sox/types.h
index afd84791a69..6b234c3ce5d 100644
--- a/torchaudio/csrc/sox/types.h
+++ b/torchaudio/csrc/sox/types.h
@@ -4,8 +4,7 @@
 #include <sox.h>
 #include <torch/script.h>
 
-namespace torchaudio {
-namespace sox_utils {
+namespace torchaudio::sox {
 
 enum class Format {
   WAV,
@@ -54,7 +53,6 @@ BitDepth get_bit_depth_from_option(const c10::optional<int64_t> bit_depth);
 
 std::string get_encoding(sox_encoding_t encoding);
 
-} // namespace sox_utils
-} // namespace torchaudio
+} // namespace torchaudio::sox
 
 #endif
diff --git a/torchaudio/csrc/sox/utils.cpp b/torchaudio/csrc/sox/utils.cpp
index f779932da40..51de871d15b 100644
--- a/torchaudio/csrc/sox/utils.cpp
+++ b/torchaudio/csrc/sox/utils.cpp
@@ -1,34 +1,34 @@
 #include <c10/core/ScalarType.h>
 #include <sox.h>
+#include <torchaudio/csrc/sox/libsox.h>
 #include <torchaudio/csrc/sox/types.h>
 #include <torchaudio/csrc/sox/utils.h>
 
-namespace torchaudio {
-namespace sox_utils {
+namespace torchaudio::sox {
 
 void set_seed(const int64_t seed) {
-  sox_get_globals()->ranqd1 = static_cast<sox_int32_t>(seed);
+  lsx().sox_get_globals()->ranqd1 = static_cast<sox_int32_t>(seed);
 }
 
 void set_verbosity(const int64_t verbosity) {
-  sox_get_globals()->verbosity = static_cast<unsigned>(verbosity);
+  lsx().sox_get_globals()->verbosity = static_cast<unsigned>(verbosity);
 }
 
 void set_use_threads(const bool use_threads) {
-  sox_get_globals()->use_threads = static_cast<sox_bool>(use_threads);
+  lsx().sox_get_globals()->use_threads = static_cast<sox_bool>(use_threads);
 }
 
 void set_buffer_size(const int64_t buffer_size) {
-  sox_get_globals()->bufsiz = static_cast<size_t>(buffer_size);
+  lsx().sox_get_globals()->bufsiz = static_cast<size_t>(buffer_size);
 }
 
 int64_t get_buffer_size() {
-  return sox_get_globals()->bufsiz;
+  return lsx().sox_get_globals()->bufsiz;
 }
 
 std::vector<std::vector<std::string>> list_effects() {
   std::vector<std::vector<std::string>> effects;
-  for (const sox_effect_fn_t* fns = sox_get_effect_fns(); *fns; ++fns) {
+  for (const sox_effect_fn_t* fns = lsx().sox_get_effect_fns(); *fns; ++fns) {
     const sox_effect_handler_t* handler = (*fns)();
     if (handler && handler->name) {
       if (UNSUPPORTED_EFFECTS.find(handler->name) ==
@@ -44,7 +44,8 @@ std::vector<std::vector<std::string>> list_effects() {
 
 std::vector<std::string> list_write_formats() {
   std::vector<std::string> formats;
-  for (const sox_format_tab_t* fns = sox_get_format_fns(); fns->fn; ++fns) {
+  for (const sox_format_tab_t* fns = lsx().sox_get_format_fns(); fns->fn;
+       ++fns) {
     const sox_format_handler_t* handler = fns->fn();
     for (const char* const* names = handler->names; *names; ++names) {
       if (!strchr(*names, '/') && handler->write)
@@ -56,7 +57,8 @@ std::vector<std::string> list_write_formats() {
 
 std::vector<std::string> list_read_formats() {
   std::vector<std::string> formats;
-  for (const sox_format_tab_t* fns = sox_get_format_fns(); fns->fn; ++fns) {
+  for (const sox_format_tab_t* fns = lsx().sox_get_format_fns(); fns->fn;
+       ++fns) {
     const sox_format_handler_t* handler = fns->fn();
     for (const char* const* names = handler->names; *names; ++names) {
       if (!strchr(*names, '/') && handler->read)
@@ -80,7 +82,7 @@ SoxFormat::operator sox_format_t*() const noexcept {
 
 void SoxFormat::close() {
   if (fd_ != nullptr) {
-    sox_close(fd_);
+    lsx().sox_close(fd_);
     fd_ = nullptr;
   }
 }
@@ -491,6 +493,4 @@ sox_encodinginfo_t get_encodinginfo_for_save(
       /*reverse_bits=*/sox_option_default,
       /*opposite_endian=*/sox_false};
 }
-
-} // namespace sox_utils
-} // namespace torchaudio
+} // namespace torchaudio::sox
diff --git a/torchaudio/csrc/sox/utils.h b/torchaudio/csrc/sox/utils.h
index ca84b600432..86a018dc3f1 100644
--- a/torchaudio/csrc/sox/utils.h
+++ b/torchaudio/csrc/sox/utils.h
@@ -4,8 +4,7 @@
 #include <sox.h>
 #include <torch/script.h>
 
-namespace torchaudio {
-namespace sox_utils {
+namespace torchaudio::sox {
 
 ////////////////////////////////////////////////////////////////////////////////
 // APIs for Python interaction
@@ -106,6 +105,5 @@ sox_encodinginfo_t get_encodinginfo_for_save(
     const c10::optional<std::string> encoding,
     const c10::optional<int64_t> bits_per_sample);
 
-} // namespace sox_utils
-} // namespace torchaudio
+} // namespace torchaudio::sox
 #endif