Skip to content

Commit

Permalink
[audio][PR] Add option to dlopen FFmpeg libraries (pytorch#3402)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#3402

This is a second attempt of pytorch#3353.

The basic logic to enable dlopen for FFmpeg libraries are same.
It uses `at::DynamicLibrary`, which allows to compile torchaudio without
linking FFmpeg libraries.

This time, the option to enable this feature DLOPEN_FFMPEG has been added,
so that users have a way to disable this feature and keep using build-time
linking.

Differential Revision: D46403783

fbshipit-source-id: 4b675e1c1239503f087a0fd50d078340c09d2282
  • Loading branch information
mthrok committed Jun 2, 2023
1 parent 2ba36b4 commit d747762
Show file tree
Hide file tree
Showing 19 changed files with 768 additions and 242 deletions.
2 changes: 2 additions & 0 deletions tools/setup_helpers/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def _get_build(var, default=False):
_BUILD_RIR = _get_build("BUILD_RIR", True)
_BUILD_RNNT = _get_build("BUILD_RNNT", True)
_USE_FFMPEG = _get_build("USE_FFMPEG", False)
_DLOPEN_FFMPEG = _get_build("DLOPEN_FFMPEG", True)
_USE_ROCM = _get_build("USE_ROCM", torch.backends.cuda.is_built() and torch.version.hip is not None)
_USE_CUDA = _get_build("USE_CUDA", torch.backends.cuda.is_built() and torch.version.hip is None)
_BUILD_ALIGN = _get_build("BUILD_ALIGN", True)
Expand Down Expand Up @@ -126,6 +127,7 @@ def build_extension(self, ext):
f"-DUSE_CUDA:BOOL={'ON' if _USE_CUDA else 'OFF'}",
f"-DUSE_OPENMP:BOOL={'ON' if _USE_OPENMP else 'OFF'}",
f"-DUSE_FFMPEG:BOOL={'ON' if _USE_FFMPEG else 'OFF'}",
f"-DDLOPEN_FFMPEG:BOOL={'ON' if _DLOPEN_FFMPEG else 'OFF'}",
]
build_args = ["--target", "install"]
# Pass CUDA architecture to cmake
Expand Down
18 changes: 10 additions & 8 deletions torchaudio/csrc/ffmpeg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ message(STATUS "FFMPEG_ROOT=$ENV{FFMPEG_ROOT}")
find_package(FFMPEG 4.1 REQUIRED COMPONENTS avdevice avfilter avformat avcodec avutil)
add_library(ffmpeg INTERFACE)
target_include_directories(ffmpeg INTERFACE "${FFMPEG_INCLUDE_DIRS}")
if (NOT DLOPEN_FFMPEG)
target_link_libraries(ffmpeg INTERFACE "${FFMPEG_LIBRARIES}")

endif()

set(
sources
libav.cpp
ffmpeg.cpp
filter_graph.cpp
hw_context.cpp
Expand All @@ -31,24 +33,24 @@ if (USE_CUDA)
cuda_deps)
endif()

if (DLOPEN_FFMPEG)
set(compile_definitions DLOPEN_FFMPEG)
endif()

torchaudio_library(
libtorchaudio_ffmpeg
"${sources}"
""
"torch;ffmpeg;${additional_lib}"
""
"${compile_definitions}"
)

if (BUILD_TORCHAUDIO_PYTHON_EXTENSION)
set(
ext_sources
pybind/pybind.cpp
)
torchaudio_extension(
_torchaudio_ffmpeg
"${ext_sources}"
pybind/pybind.cpp
""
"libtorchaudio_ffmpeg"
""
"${compile_definitions}"
)
endif ()
47 changes: 26 additions & 21 deletions torchaudio/csrc/ffmpeg/ffmpeg.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
#include <c10/util/Exception.h>
#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>

namespace torchaudio {
namespace io {
namespace torchaudio::io {

std::string av_err2string(int errnum) {
char str[AV_ERROR_MAX_STRING_SIZE];
FFMPEG av_strerror(errnum, str, AV_ERROR_MAX_STRING_SIZE);
return str;
}

////////////////////////////////////////////////////////////////////////////////
// AVDictionary
Expand All @@ -15,7 +21,7 @@ AVDictionary* get_option_dict(const c10::optional<OptionDict>& option) {
AVDictionary* opt = nullptr;
if (option) {
for (auto const& [key, value] : option.value()) {
av_dict_set(&opt, key.c_str(), value.c_str(), 0);
FFMPEG av_dict_set(&opt, key.c_str(), value.c_str(), 0);
}
}
return opt;
Expand All @@ -26,10 +32,10 @@ void clean_up_dict(AVDictionary* p) {
std::vector<std::string> unused_keys;
// Check and copy unused keys, clean up the original dictionary
AVDictionaryEntry* t = nullptr;
while ((t = av_dict_get(p, "", t, AV_DICT_IGNORE_SUFFIX))) {
while ((t = FFMPEG av_dict_get(p, "", t, AV_DICT_IGNORE_SUFFIX))) {
unused_keys.emplace_back(t->key);
}
av_dict_free(&p);
FFMPEG av_dict_free(&p);
TORCH_CHECK(
unused_keys.empty(),
"Unexpected options: ",
Expand All @@ -41,14 +47,14 @@ void clean_up_dict(AVDictionary* p) {
// AVFormatContext
////////////////////////////////////////////////////////////////////////////////
void AVFormatInputContextDeleter::operator()(AVFormatContext* p) {
avformat_close_input(&p);
FFMPEG avformat_close_input(&p);
};

AVFormatInputContextPtr::AVFormatInputContextPtr(AVFormatContext* p)
: Wrapper<AVFormatContext, AVFormatInputContextDeleter>(p) {}

void AVFormatOutputContextDeleter::operator()(AVFormatContext* p) {
avformat_free_context(p);
FFMPEG avformat_free_context(p);
};

AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p)
Expand All @@ -58,9 +64,9 @@ AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p)
// AVIO
////////////////////////////////////////////////////////////////////////////////
void AVIOContextDeleter::operator()(AVIOContext* p) {
avio_flush(p);
av_freep(&p->buffer);
av_freep(&p);
FFMPEG avio_flush(p);
FFMPEG av_freep(&p->buffer);
FFMPEG av_freep(&p);
};

AVIOContextPtr::AVIOContextPtr(AVIOContext* p)
Expand All @@ -70,13 +76,13 @@ AVIOContextPtr::AVIOContextPtr(AVIOContext* p)
// AVPacket
////////////////////////////////////////////////////////////////////////////////
void AVPacketDeleter::operator()(AVPacket* p) {
av_packet_free(&p);
FFMPEG av_packet_free(&p);
};

AVPacketPtr::AVPacketPtr(AVPacket* p) : Wrapper<AVPacket, AVPacketDeleter>(p) {}

AVPacketPtr alloc_avpacket() {
AVPacket* p = av_packet_alloc();
AVPacket* p = FFMPEG av_packet_alloc();
TORCH_CHECK(p, "Failed to allocate AVPacket object.");
return AVPacketPtr{p};
}
Expand All @@ -86,7 +92,7 @@ AVPacketPtr alloc_avpacket() {
////////////////////////////////////////////////////////////////////////////////
AutoPacketUnref::AutoPacketUnref(AVPacketPtr& p) : p_(p){};
AutoPacketUnref::~AutoPacketUnref() {
av_packet_unref(p_);
FFMPEG av_packet_unref(p_);
}
AutoPacketUnref::operator AVPacket*() const {
return p_;
Expand All @@ -96,13 +102,13 @@ AutoPacketUnref::operator AVPacket*() const {
// AVFrame
////////////////////////////////////////////////////////////////////////////////
void AVFrameDeleter::operator()(AVFrame* p) {
av_frame_free(&p);
FFMPEG av_frame_free(&p);
};

AVFramePtr::AVFramePtr(AVFrame* p) : Wrapper<AVFrame, AVFrameDeleter>(p) {}

AVFramePtr alloc_avframe() {
AVFrame* p = av_frame_alloc();
AVFrame* p = FFMPEG av_frame_alloc();
TORCH_CHECK(p, "Failed to allocate AVFrame object.");
return AVFramePtr{p};
};
Expand All @@ -111,7 +117,7 @@ AVFramePtr alloc_avframe() {
// AVCodecContext
////////////////////////////////////////////////////////////////////////////////
void AVCodecContextDeleter::operator()(AVCodecContext* p) {
avcodec_free_context(&p);
FFMPEG avcodec_free_context(&p);
};

AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
Expand All @@ -121,7 +127,7 @@ AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
// AVBufferRefPtr
////////////////////////////////////////////////////////////////////////////////
void AutoBufferUnref::operator()(AVBufferRef* p) {
av_buffer_unref(&p);
FFMPEG av_buffer_unref(&p);
}

AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p)
Expand All @@ -131,7 +137,7 @@ AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p)
// AVFilterGraph
////////////////////////////////////////////////////////////////////////////////
void AVFilterGraphDeleter::operator()(AVFilterGraph* p) {
avfilter_graph_free(&p);
FFMPEG avfilter_graph_free(&p);
};

AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p)
Expand All @@ -141,11 +147,10 @@ AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p)
// AVCodecParameters
////////////////////////////////////////////////////////////////////////////////
void AVCodecParametersDeleter::operator()(AVCodecParameters* codecpar) {
avcodec_parameters_free(&codecpar);
FFMPEG avcodec_parameters_free(&codecpar);
}

AVCodecParametersPtr::AVCodecParametersPtr(AVCodecParameters* p)
: Wrapper<AVCodecParameters, AVCodecParametersDeleter>(p) {}

} // namespace io
} // namespace torchaudio
} // namespace torchaudio::io
5 changes: 1 addition & 4 deletions torchaudio/csrc/ffmpeg/ffmpeg.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,7 @@ using OptionDict = std::map<std::string, std::string>;
// Replacement of av_err2str, which causes
// `error: taking address of temporary array`
// https://github.com/joncampbell123/composite-video-simulator/issues/5
av_always_inline std::string av_err2string(int errnum) {
char str[AV_ERROR_MAX_STRING_SIZE];
return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, errnum);
}
std::string av_err2string(int errnum);

// Base structure that handles memory management.
// Resource is freed by the destructor of unique_ptr,
Expand Down
46 changes: 23 additions & 23 deletions torchaudio/csrc/ffmpeg/filter_graph.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#include <torchaudio/csrc/ffmpeg/filter_graph.h>
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <stdexcept>

namespace torchaudio {
namespace io {
namespace torchaudio::io {

namespace {
AVFilterGraph* get_filter_graph() {
AVFilterGraph* ptr = avfilter_graph_alloc();
AVFilterGraph* ptr = FFMPEG avfilter_graph_alloc();
TORCH_CHECK(ptr, "Failed to allocate resouce.");
ptr->nb_threads = 1;
return ptr;
Expand All @@ -32,7 +32,7 @@ std::string get_audio_src_args(
time_base.num,
time_base.den,
sample_rate,
av_get_sample_fmt_name(format),
FFMPEG av_get_sample_fmt_name(format),
channel_layout);
return std::string(args);
}
Expand All @@ -51,7 +51,7 @@ std::string get_video_src_args(
"video_size=%dx%d:pix_fmt=%s:time_base=%d/%d:frame_rate=%d/%d:pixel_aspect=%d/%d",
width,
height,
av_get_pix_fmt_name(format),
FFMPEG av_get_pix_fmt_name(format),
time_base.num,
time_base.den,
frame_rate.num,
Expand All @@ -69,7 +69,7 @@ void FilterGraph::add_audio_src(
int sample_rate,
uint64_t channel_layout) {
add_src(
avfilter_get_by_name("abuffer"),
FFMPEG avfilter_get_by_name("abuffer"),
get_audio_src_args(format, time_base, sample_rate, channel_layout));
}

Expand All @@ -81,13 +81,13 @@ void FilterGraph::add_video_src(
int height,
AVRational sample_aspect_ratio) {
add_src(
avfilter_get_by_name("buffer"),
FFMPEG avfilter_get_by_name("buffer"),
get_video_src_args(
format, time_base, frame_rate, width, height, sample_aspect_ratio));
}

void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) {
int ret = avfilter_graph_create_filter(
int ret = FFMPEG avfilter_graph_create_filter(
&buffersrc_ctx, buffersrc, "in", args.c_str(), nullptr, graph);
TORCH_CHECK(
ret >= 0,
Expand All @@ -96,11 +96,11 @@ void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) {
}

void FilterGraph::add_audio_sink() {
add_sink(avfilter_get_by_name("abuffersink"));
add_sink(FFMPEG avfilter_get_by_name("abuffersink"));
}

void FilterGraph::add_video_sink() {
add_sink(avfilter_get_by_name("buffersink"));
add_sink(FFMPEG avfilter_get_by_name("buffersink"));
}

void FilterGraph::add_sink(const AVFilter* buffersink) {
Expand All @@ -114,7 +114,7 @@ void FilterGraph::add_sink(const AVFilter* buffersink) {
// According to the other example
// https://ffmpeg.org/doxygen/4.1/filter_audio_8c-example.html
// `abuffersink` should not take options, and this resolved issue.
int ret = avfilter_graph_create_filter(
int ret = FFMPEG avfilter_graph_create_filter(
&buffersink_ctx, buffersink, "out", nullptr, nullptr, graph);
TORCH_CHECK(ret >= 0, "Failed to create output filter.");
}
Expand All @@ -131,15 +131,15 @@ class InOuts {

public:
InOuts(const char* name, AVFilterContext* pCtx) {
p = avfilter_inout_alloc();
p = FFMPEG avfilter_inout_alloc();
TORCH_CHECK(p, "Failed to allocate AVFilterInOut.");
p->name = av_strdup(name);
p->name = FFMPEG av_strdup(name);
p->filter_ctx = pCtx;
p->pad_idx = 0;
p->next = nullptr;
}
~InOuts() {
avfilter_inout_free(&p);
FFMPEG avfilter_inout_free(&p);
}
operator AVFilterInOut**() {
return &p;
Expand All @@ -156,7 +156,7 @@ void FilterGraph::add_process(const std::string& filter_description) {
// If you are debugging this part of the code, you might get confused.
InOuts in{"in", buffersrc_ctx}, out{"out", buffersink_ctx};

int ret = avfilter_graph_parse_ptr(
int ret = FFMPEG avfilter_graph_parse_ptr(
graph, filter_description.c_str(), out, in, nullptr);

TORCH_CHECK(
Expand All @@ -167,11 +167,11 @@ void FilterGraph::add_process(const std::string& filter_description) {

void FilterGraph::create_filter(AVBufferRef* hw_frames_ctx) {
buffersrc_ctx->outputs[0]->hw_frames_ctx = hw_frames_ctx;
int ret = avfilter_graph_config(graph, nullptr);
int ret = FFMPEG avfilter_graph_config(graph, nullptr);
TORCH_CHECK(ret >= 0, "Failed to configure the graph: " + av_err2string(ret));
// char* desc = avfilter_graph_dump(graph, NULL);
// char* desc = FFMPEG avfilter_graph_dump(graph, NULL);
// std::cerr << "Filter created:\n" << desc << std::endl;
// av_free(static_cast<void*>(desc));
// FFMPEG av_free(static_cast<void*>(desc));
}

//////////////////////////////////////////////////////////////////////////////
Expand All @@ -191,7 +191,8 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const {
ret.num_channels = l->ch_layout.nb_channels;
#else
// Before FFmpeg 5.1
ret.num_channels = av_get_channel_layout_nb_channels(l->channel_layout);
ret.num_channels =
FFMPEG av_get_channel_layout_nb_channels(l->channel_layout);
#endif
break;
}
Expand All @@ -214,13 +215,12 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const {
// Streaming process
//////////////////////////////////////////////////////////////////////////////
int FilterGraph::add_frame(AVFrame* pInputFrame) {
return av_buffersrc_add_frame_flags(
return FFMPEG av_buffersrc_add_frame_flags(
buffersrc_ctx, pInputFrame, AV_BUFFERSRC_FLAG_KEEP_REF);
}

int FilterGraph::get_frame(AVFrame* pOutputFrame) {
return av_buffersink_get_frame(buffersink_ctx, pOutputFrame);
return FFMPEG av_buffersink_get_frame(buffersink_ctx, pOutputFrame);
}

} // namespace io
} // namespace torchaudio
} // namespace torchaudio::io
3 changes: 2 additions & 1 deletion torchaudio/csrc/ffmpeg/hw_context.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <torchaudio/csrc/ffmpeg/hw_context.h>
#include <torchaudio/csrc/ffmpeg/libav.h>

namespace torchaudio::io {
namespace {
Expand All @@ -15,7 +16,7 @@ AVBufferRef* get_cuda_context(int index) {
}
if (CUDA_CONTEXT_CACHE.count(index) == 0) {
AVBufferRef* p = nullptr;
int ret = av_hwdevice_ctx_create(
int ret = FFMPEG av_hwdevice_ctx_create(
&p, AV_HWDEVICE_TYPE_CUDA, std::to_string(index).c_str(), nullptr, 0);
TORCH_CHECK(
ret >= 0,
Expand Down
Loading

0 comments on commit d747762

Please sign in to comment.