Skip to content

Commit

Permalink
Use dlopen for FFmpeg (pytorch#3353)
Browse files Browse the repository at this point in the history
Summary:
This commit changes the way FFmpeg extension is built and used.
Instead of linking (LGPL) FFmpeg libraries to torchaudio at build time,
It uses dlopen to search and link them at run time.

For dlopen-ing, we use PyTorch's `at::DynamicLibrary` class, which provides
portable wrapper.

Pull Request resolved: pytorch#3353

Differential Revision: D46059199

Pulled By: mthrok

fbshipit-source-id: 4493a5fd8a4c802178d20276522f5334d637307d
  • Loading branch information
mthrok authored and facebook-github-bot committed Jun 1, 2023
1 parent bc54ac8 commit b14ced1
Show file tree
Hide file tree
Showing 19 changed files with 789 additions and 216 deletions.
3 changes: 1 addition & 2 deletions torchaudio/csrc/ffmpeg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@ message(STATUS "FFMPEG_ROOT=$ENV{FFMPEG_ROOT}")
find_package(FFMPEG 4.1 REQUIRED COMPONENTS avdevice avfilter avformat avcodec avutil)
add_library(ffmpeg INTERFACE)
target_include_directories(ffmpeg INTERFACE "${FFMPEG_INCLUDE_DIRS}")
target_link_libraries(ffmpeg INTERFACE "${FFMPEG_LIBRARIES}")


set(
sources
libav.cpp
ffmpeg.cpp
filter_graph.cpp
hw_context.cpp
Expand Down
37 changes: 20 additions & 17 deletions torchaudio/csrc/ffmpeg/ffmpeg.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <c10/util/Exception.h>
#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <sstream>
#include <stdexcept>
#include <string>
Expand All @@ -8,14 +9,16 @@
namespace torchaudio {
namespace io {

using torchaudio::io::detail::libav;

////////////////////////////////////////////////////////////////////////////////
// AVDictionary
////////////////////////////////////////////////////////////////////////////////
AVDictionary* get_option_dict(const c10::optional<OptionDict>& option) {
AVDictionary* opt = nullptr;
if (option) {
for (auto const& [key, value] : option.value()) {
av_dict_set(&opt, key.c_str(), value.c_str(), 0);
libav().av_dict_set(&opt, key.c_str(), value.c_str(), 0);
}
}
return opt;
Expand All @@ -26,10 +29,10 @@ void clean_up_dict(AVDictionary* p) {
std::vector<std::string> unused_keys;
// Check and copy unused keys, clean up the original dictionary
AVDictionaryEntry* t = nullptr;
while ((t = av_dict_get(p, "", t, AV_DICT_IGNORE_SUFFIX))) {
while ((t = libav().av_dict_get(p, "", t, AV_DICT_IGNORE_SUFFIX))) {
unused_keys.emplace_back(t->key);
}
av_dict_free(&p);
libav().av_dict_free(&p);
TORCH_CHECK(
unused_keys.empty(),
"Unexpected options: ",
Expand All @@ -41,14 +44,14 @@ void clean_up_dict(AVDictionary* p) {
// AVFormatContext
////////////////////////////////////////////////////////////////////////////////
void AVFormatInputContextDeleter::operator()(AVFormatContext* p) {
avformat_close_input(&p);
libav().avformat_close_input(&p);
};

AVFormatInputContextPtr::AVFormatInputContextPtr(AVFormatContext* p)
: Wrapper<AVFormatContext, AVFormatInputContextDeleter>(p) {}

void AVFormatOutputContextDeleter::operator()(AVFormatContext* p) {
avformat_free_context(p);
libav().avformat_free_context(p);
};

AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p)
Expand All @@ -58,9 +61,9 @@ AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p)
// AVIO
////////////////////////////////////////////////////////////////////////////////
void AVIOContextDeleter::operator()(AVIOContext* p) {
avio_flush(p);
av_freep(&p->buffer);
av_freep(&p);
libav().avio_flush(p);
libav().av_freep(&p->buffer);
libav().av_freep(&p);
};

AVIOContextPtr::AVIOContextPtr(AVIOContext* p)
Expand All @@ -70,13 +73,13 @@ AVIOContextPtr::AVIOContextPtr(AVIOContext* p)
// AVPacket
////////////////////////////////////////////////////////////////////////////////
void AVPacketDeleter::operator()(AVPacket* p) {
av_packet_free(&p);
libav().av_packet_free(&p);
};

AVPacketPtr::AVPacketPtr(AVPacket* p) : Wrapper<AVPacket, AVPacketDeleter>(p) {}

AVPacketPtr alloc_avpacket() {
AVPacket* p = av_packet_alloc();
AVPacket* p = libav().av_packet_alloc();
TORCH_CHECK(p, "Failed to allocate AVPacket object.");
return AVPacketPtr{p};
}
Expand All @@ -86,7 +89,7 @@ AVPacketPtr alloc_avpacket() {
////////////////////////////////////////////////////////////////////////////////
AutoPacketUnref::AutoPacketUnref(AVPacketPtr& p) : p_(p){};
AutoPacketUnref::~AutoPacketUnref() {
av_packet_unref(p_);
libav().av_packet_unref(p_);
}
AutoPacketUnref::operator AVPacket*() const {
return p_;
Expand All @@ -96,13 +99,13 @@ AutoPacketUnref::operator AVPacket*() const {
// AVFrame
////////////////////////////////////////////////////////////////////////////////
void AVFrameDeleter::operator()(AVFrame* p) {
av_frame_free(&p);
libav().av_frame_free(&p);
};

AVFramePtr::AVFramePtr(AVFrame* p) : Wrapper<AVFrame, AVFrameDeleter>(p) {}

AVFramePtr alloc_avframe() {
AVFrame* p = av_frame_alloc();
AVFrame* p = libav().av_frame_alloc();
TORCH_CHECK(p, "Failed to allocate AVFrame object.");
return AVFramePtr{p};
};
Expand All @@ -111,7 +114,7 @@ AVFramePtr alloc_avframe() {
// AVCodecContext
////////////////////////////////////////////////////////////////////////////////
void AVCodecContextDeleter::operator()(AVCodecContext* p) {
avcodec_free_context(&p);
libav().avcodec_free_context(&p);
};

AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
Expand All @@ -121,7 +124,7 @@ AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
// AVBufferRefPtr
////////////////////////////////////////////////////////////////////////////////
void AutoBufferUnref::operator()(AVBufferRef* p) {
av_buffer_unref(&p);
libav().av_buffer_unref(&p);
}

AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p)
Expand All @@ -131,7 +134,7 @@ AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p)
// AVFilterGraph
////////////////////////////////////////////////////////////////////////////////
void AVFilterGraphDeleter::operator()(AVFilterGraph* p) {
avfilter_graph_free(&p);
libav().avfilter_graph_free(&p);
};

AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p)
Expand All @@ -141,7 +144,7 @@ AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p)
// AVCodecParameters
////////////////////////////////////////////////////////////////////////////////
void AVCodecParametersDeleter::operator()(AVCodecParameters* codecpar) {
avcodec_parameters_free(&codecpar);
libav().avcodec_parameters_free(&codecpar);
}

AVCodecParametersPtr::AVCodecParametersPtr(AVCodecParameters* p)
Expand Down
15 changes: 5 additions & 10 deletions torchaudio/csrc/ffmpeg/ffmpeg.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
#include <memory>
#include <string>

#include <torchaudio/csrc/ffmpeg/libav.h>
#include <torchaudio/csrc/ffmpeg/macro.h>

extern "C" {
#include <libavcodec/avcodec.h>
#include <libavdevice/avdevice.h>
Expand All @@ -29,21 +32,13 @@ namespace io {

using OptionDict = std::map<std::string, std::string>;

// https://github.com/FFmpeg/FFmpeg/blob/4e6debe1df7d53f3f59b37449b82265d5c08a172/doc/APIchanges#L252-L260
// Starting from libavformat 59 (ffmpeg 5),
// AVInputFormat is const and related functions expect constant.
#if LIBAVFORMAT_VERSION_MAJOR >= 59
#define AVFORMAT_CONST const
#else
#define AVFORMAT_CONST
#endif

// Replacement of av_err2str, which causes
// `error: taking address of temporary array`
// https://github.com/joncampbell123/composite-video-simulator/issues/5
av_always_inline std::string av_err2string(int errnum) {
char str[AV_ERROR_MAX_STRING_SIZE];
return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, errnum);
detail::libav().av_strerror(errnum, str, AV_ERROR_MAX_STRING_SIZE);
return str;
}

// Base structure that handles memory management.
Expand Down
42 changes: 23 additions & 19 deletions torchaudio/csrc/ffmpeg/filter_graph.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
#include <torchaudio/csrc/ffmpeg/filter_graph.h>
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <stdexcept>

namespace torchaudio {
namespace io {

using torchaudio::io::detail::libav;

namespace {
AVFilterGraph* get_filter_graph() {
AVFilterGraph* ptr = avfilter_graph_alloc();
AVFilterGraph* ptr = libav().avfilter_graph_alloc();
TORCH_CHECK(ptr, "Failed to allocate resouce.");
ptr->nb_threads = 1;
return ptr;
Expand All @@ -32,7 +35,7 @@ std::string get_audio_src_args(
time_base.num,
time_base.den,
sample_rate,
av_get_sample_fmt_name(format),
libav().av_get_sample_fmt_name(format),
channel_layout);
return std::string(args);
}
Expand All @@ -51,7 +54,7 @@ std::string get_video_src_args(
"video_size=%dx%d:pix_fmt=%s:time_base=%d/%d:frame_rate=%d/%d:pixel_aspect=%d/%d",
width,
height,
av_get_pix_fmt_name(format),
libav().av_get_pix_fmt_name(format),
time_base.num,
time_base.den,
frame_rate.num,
Expand All @@ -69,7 +72,7 @@ void FilterGraph::add_audio_src(
int sample_rate,
uint64_t channel_layout) {
add_src(
avfilter_get_by_name("abuffer"),
libav().avfilter_get_by_name("abuffer"),
get_audio_src_args(format, time_base, sample_rate, channel_layout));
}

Expand All @@ -81,13 +84,13 @@ void FilterGraph::add_video_src(
int height,
AVRational sample_aspect_ratio) {
add_src(
avfilter_get_by_name("buffer"),
libav().avfilter_get_by_name("buffer"),
get_video_src_args(
format, time_base, frame_rate, width, height, sample_aspect_ratio));
}

void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) {
int ret = avfilter_graph_create_filter(
int ret = libav().avfilter_graph_create_filter(
&buffersrc_ctx, buffersrc, "in", args.c_str(), nullptr, graph);
TORCH_CHECK(
ret >= 0,
Expand All @@ -96,11 +99,11 @@ void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) {
}

void FilterGraph::add_audio_sink() {
add_sink(avfilter_get_by_name("abuffersink"));
add_sink(libav().avfilter_get_by_name("abuffersink"));
}

void FilterGraph::add_video_sink() {
add_sink(avfilter_get_by_name("buffersink"));
add_sink(libav().avfilter_get_by_name("buffersink"));
}

void FilterGraph::add_sink(const AVFilter* buffersink) {
Expand All @@ -114,7 +117,7 @@ void FilterGraph::add_sink(const AVFilter* buffersink) {
// According to the other example
// https://ffmpeg.org/doxygen/4.1/filter_audio_8c-example.html
// `abuffersink` should not take options, and this resolved issue.
int ret = avfilter_graph_create_filter(
int ret = libav().avfilter_graph_create_filter(
&buffersink_ctx, buffersink, "out", nullptr, nullptr, graph);
TORCH_CHECK(ret >= 0, "Failed to create output filter.");
}
Expand All @@ -131,15 +134,15 @@ class InOuts {

public:
InOuts(const char* name, AVFilterContext* pCtx) {
p = avfilter_inout_alloc();
p = libav().avfilter_inout_alloc();
TORCH_CHECK(p, "Failed to allocate AVFilterInOut.");
p->name = av_strdup(name);
p->name = libav().av_strdup(name);
p->filter_ctx = pCtx;
p->pad_idx = 0;
p->next = nullptr;
}
~InOuts() {
avfilter_inout_free(&p);
libav().avfilter_inout_free(&p);
}
operator AVFilterInOut**() {
return &p;
Expand All @@ -156,7 +159,7 @@ void FilterGraph::add_process(const std::string& filter_description) {
// If you are debugging this part of the code, you might get confused.
InOuts in{"in", buffersrc_ctx}, out{"out", buffersink_ctx};

int ret = avfilter_graph_parse_ptr(
int ret = libav().avfilter_graph_parse_ptr(
graph, filter_description.c_str(), out, in, nullptr);

TORCH_CHECK(
Expand All @@ -167,11 +170,11 @@ void FilterGraph::add_process(const std::string& filter_description) {

void FilterGraph::create_filter(AVBufferRef* hw_frames_ctx) {
buffersrc_ctx->outputs[0]->hw_frames_ctx = hw_frames_ctx;
int ret = avfilter_graph_config(graph, nullptr);
int ret = libav().avfilter_graph_config(graph, nullptr);
TORCH_CHECK(ret >= 0, "Failed to configure the graph: " + av_err2string(ret));
// char* desc = avfilter_graph_dump(graph, NULL);
// char* desc = libav().avfilter_graph_dump(graph, NULL);
// std::cerr << "Filter created:\n" << desc << std::endl;
// av_free(static_cast<void*>(desc));
// libav().av_free(static_cast<void*>(desc));
}

//////////////////////////////////////////////////////////////////////////////
Expand All @@ -191,7 +194,8 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const {
ret.num_channels = l->ch_layout.nb_channels;
#else
// Before FFmpeg 5.1
ret.num_channels = av_get_channel_layout_nb_channels(l->channel_layout);
ret.num_channels =
libav().av_get_channel_layout_nb_channels(l->channel_layout);
#endif
break;
}
Expand All @@ -214,12 +218,12 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const {
// Streaming process
//////////////////////////////////////////////////////////////////////////////
int FilterGraph::add_frame(AVFrame* pInputFrame) {
return av_buffersrc_add_frame_flags(
return libav().av_buffersrc_add_frame_flags(
buffersrc_ctx, pInputFrame, AV_BUFFERSRC_FLAG_KEEP_REF);
}

int FilterGraph::get_frame(AVFrame* pOutputFrame) {
return av_buffersink_get_frame(buffersink_ctx, pOutputFrame);
return libav().av_buffersink_get_frame(buffersink_ctx, pOutputFrame);
}

} // namespace io
Expand Down
6 changes: 5 additions & 1 deletion torchaudio/csrc/ffmpeg/hw_context.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#include <torchaudio/csrc/ffmpeg/hw_context.h>
#include <torchaudio/csrc/ffmpeg/libav.h>

namespace torchaudio::io {

using detail::libav;

namespace {

static std::mutex MUTEX;
Expand All @@ -15,7 +19,7 @@ AVBufferRef* get_cuda_context(int index) {
}
if (CUDA_CONTEXT_CACHE.count(index) == 0) {
AVBufferRef* p = nullptr;
int ret = av_hwdevice_ctx_create(
int ret = libav().av_hwdevice_ctx_create(
&p, AV_HWDEVICE_TYPE_CUDA, std::to_string(index).c_str(), nullptr, 0);
TORCH_CHECK(
ret >= 0,
Expand Down
Loading

0 comments on commit b14ced1

Please sign in to comment.