From 246eff3cc41ec41d9a6b1a00d701b20e220723f6 Mon Sep 17 00:00:00 2001
From: wo80 <christian.woltering@tu-dortmund.de>
Date: Mon, 17 Jun 2024 17:04:16 +0200
Subject: [PATCH] Fixes for AudioContext/Loader with FFmpeg v7.

---
 packaging/build-dependencies-msvc.bat | 18 ++++-----
 src/algorithms/io/audioloader.cpp     | 24 +++++++++--
 src/essentia/utils/audiocontext.cpp   | 57 +++++++++++++++++++++------
 3 files changed, 74 insertions(+), 25 deletions(-)

diff --git a/packaging/build-dependencies-msvc.bat b/packaging/build-dependencies-msvc.bat
index 23560f2e6..d06967119 100644
--- a/packaging/build-dependencies-msvc.bat
+++ b/packaging/build-dependencies-msvc.bat
@@ -245,26 +245,26 @@ if not exist "..\include\vamp\" (
 )
 
 ::
-:: Install FFmpeg - https://github.com/GyanD/codexffmpeg
+:: Install FFmpeg - https://github.com/wo80/ffmpeg-audio-only
 ::
 
-if not exist "ffmpeg-6.1.1-win64-shared.zip" (
-  echo Downloading GyanD/codexffmpeg ...
-  curl -L -o "ffmpeg-6.1.1-win64-shared.zip" "https://github.com/wo80/ffmpeg-audio-only/releases/download/v6.1.1/ffmpeg-6.1.1-win64-shared.zip"
+if not exist "ffmpeg-7.0.1-win64-shared.zip" (
+  echo Downloading wo80/ffmpeg-audio-only ...
+  curl -L -o "ffmpeg-7.0.1-win64-shared.zip" "https://github.com/wo80/ffmpeg-audio-only/releases/download/v7.0.1/ffmpeg-7.0.1-win64-shared.zip"
 )
 
 if not exist "..\include\libavcodec\" (
-  if not exist "ffmpeg-6.1.1-win64-shared\" (
-    echo Extracting GyanD/codexffmpeg archive ...
-    tar -xf "ffmpeg-6.1.1-win64-shared.zip"
+  if not exist "ffmpeg-7.0.1-win64-shared\" (
+    echo Extracting wo80/ffmpeg-audio-only archive ...
+    tar -xf "ffmpeg-7.0.1-win64-shared.zip"
   )
-  cd "ffmpeg-6.1.1-win64-shared"
+  cd "ffmpeg-7.0.1-win64-shared"
   xcopy /s /y bin %INSTALL_PREFIX%\bin
   xcopy /s /y lib %INSTALL_PREFIX%\lib
   xcopy /s /y include %INSTALL_PREFIX%\include
   cd ..
 ) else (
-  echo Already installed: GyanD/codexffmpeg
+  echo Already installed: wo80/ffmpeg-audio-only
 )
 
 ::
diff --git a/src/algorithms/io/audioloader.cpp b/src/algorithms/io/audioloader.cpp
index 61977bf29..1540be717 100644
--- a/src/algorithms/io/audioloader.cpp
+++ b/src/algorithms/io/audioloader.cpp
@@ -96,9 +96,6 @@ void AudioLoader::openAudioFile(const string& filename) {
         throw EssentiaException("AudioLoader: Unable to instantiate codec...");
     }
 
-    // Configure format conversion  (no samplerate conversion yet)
-    int64_t layout = av_get_default_channel_layout(_audioCtx->channels);
-
     /*
     const char* fmt = 0;
     get_format_from_sample_fmt(&fmt, _audioCtx->sample_fmt);
@@ -106,6 +103,10 @@ void AudioLoader::openAudioFile(const string& filename) {
     */
 
     E_DEBUG(EAlgorithm, "AudioLoader: using sample format conversion from libswresample");
+#if LIBSWRESAMPLE_VERSION_MAJOR < 5
+    // Configure format conversion  (no samplerate conversion yet)
+    int64_t layout = av_get_default_channel_layout(_audioCtx->channels);
+
     _convertCtxAv = swr_alloc();
         
     av_opt_set_int(_convertCtxAv, "in_channel_layout", layout, 0);
@@ -114,6 +115,19 @@ void AudioLoader::openAudioFile(const string& filename) {
     av_opt_set_int(_convertCtxAv, "out_sample_rate", _audioCtx->sample_rate, 0);
     av_opt_set_int(_convertCtxAv, "in_sample_fmt", _audioCtx->sample_fmt, 0);
     av_opt_set_int(_convertCtxAv, "out_sample_fmt", AV_SAMPLE_FMT_FLT, 0);
+#else
+    int error = swr_alloc_set_opts2(&_convertCtxAv,
+        &_audioCtx->ch_layout,
+        AV_SAMPLE_FMT_FLT,
+        _audioCtx->sample_rate,
+        &_audioCtx->ch_layout,
+        _audioCtx->sample_fmt,
+        _audioCtx->sample_rate,
+        0, NULL);
+    if (error < 0) {
+        throw EssentiaException("AudioLoader: Could not allocate resample context\n");
+    }
+#endif
 
     if (swr_init(_convertCtxAv) < 0) {
         throw EssentiaException("AudioLoader: Could not initialize swresample context");
@@ -466,7 +480,11 @@ void AudioLoader::reset() {
     closeAudioFile();
     openAudioFile(filename);
 
+#if LIBAVCODEC_VERSION_MAJOR < 59
     pushChannelsSampleRateInfo(_audioCtx->channels, _audioCtx->sample_rate);
+#else
+    pushChannelsSampleRateInfo(_audioCtx->ch_layout.nb_channels, _audioCtx->sample_rate);
+#endif
     pushCodecInfo(_audioCodecName, _audioCtx->bit_rate);
 }
 
diff --git a/src/essentia/utils/audiocontext.cpp b/src/essentia/utils/audiocontext.cpp
index b905522e1..4d9e54045 100644
--- a/src/essentia/utils/audiocontext.cpp
+++ b/src/essentia/utils/audiocontext.cpp
@@ -85,8 +85,12 @@ int AudioContext::create(const std::string& filename,
   _codecCtx->codec_type     = AVMEDIA_TYPE_AUDIO;
   _codecCtx->bit_rate       = bitrate;
   _codecCtx->sample_rate    = sampleRate;
-  _codecCtx->channels       = nChannels;
+#if LIBAVCODEC_VERSION_MAJOR < 59
+  _codecCtx->channels = nChannels;
   _codecCtx->channel_layout = av_get_default_channel_layout(nChannels);
+#else
+  av_channel_layout_default(&_codecCtx->ch_layout, nChannels);
+#endif
 
   switch (_codecCtx->codec_id) {
     case AV_CODEC_ID_VORBIS:
@@ -133,7 +137,11 @@ int AudioContext::create(const std::string& filename,
     case AV_CODEC_ID_PCM_U16LE:
     case AV_CODEC_ID_PCM_U16BE:
       // PCM codecs do not provide frame size in samples, use 4096 bytes on input
-      _codecCtx->frame_size = 4096 / _codecCtx->channels / av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
+#if LIBAVCODEC_VERSION_MAJOR < 59
+        _codecCtx->frame_size = 4096 / _codecCtx->channels / av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
+#else
+        _codecCtx->frame_size = 4096 / _codecCtx->ch_layout.nb_channels / av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
+#endif
       break;
 
     //case AV_CODEC_ID_FLAC:
@@ -151,10 +159,13 @@ int AudioContext::create(const std::string& filename,
   }
 
   // Allocate input audio FLT buffer
-  _inputBufSize = av_samples_get_buffer_size(NULL, 
-                                             _codecCtx->channels, 
-                                             _codecCtx->frame_size, 
-                                             AV_SAMPLE_FMT_FLT, 0);
+  _inputBufSize = av_samples_get_buffer_size(NULL,
+#if LIBAVCODEC_VERSION_MAJOR < 59
+      _codecCtx->channels,
+#else
+      _codecCtx->ch_layout.nb_channels,
+#endif
+      _codecCtx->frame_size, AV_SAMPLE_FMT_FLT, 0);
 
   _buffer = (float*)av_malloc(_inputBufSize);
 
@@ -261,8 +272,13 @@ void AudioContext::close() {
 
 
 void AudioContext::write(const vector<StereoSample>& stereoData) {
-  if (_codecCtx->channels != 2) {
-    throw EssentiaException("Trying to write stereo audio data to an audio file with ", _codecCtx->channels, " channels");
+#if LIBAVCODEC_VERSION_MAJOR < 59
+    int channels = _codecCtx->channels;
+#else
+    int channels = _codecCtx->ch_layout.nb_channels;
+#endif
+  if (channels != 2) {
+    throw EssentiaException("Trying to write stereo audio data to an audio file with ", channels, " channels");
   }
 
   int dsize = (int)stereoData.size();
@@ -286,8 +302,13 @@ void AudioContext::write(const vector<StereoSample>& stereoData) {
 
 
 void AudioContext::write(const vector<AudioSample>& monoData) {
-  if (_codecCtx->channels != 1) {
-    throw EssentiaException("Trying to write mono audio data to an audio file with ", _codecCtx->channels, " channels");
+#if LIBAVCODEC_VERSION_MAJOR < 59
+    int channels = _codecCtx->channels;
+#else
+    int channels = _codecCtx->ch_layout.nb_channels;
+#endif
+  if (channels != 1) {
+    throw EssentiaException("Trying to write mono audio data to an audio file with ", channels, " channels");
   }
 
   int dsize = (int)monoData.size();
@@ -325,7 +346,12 @@ void AudioContext::encodePacket(int size) {
       if (_convert_buffer)
           av_freep(&_convert_buffer[0]);
       _convert_buffer_size = num_out_samples;
-      if (av_samples_alloc_array_and_samples(&_convert_buffer, &linesize, _codecCtx->channels,
+      if (av_samples_alloc_array_and_samples(&_convert_buffer, &linesize,
+#if LIBAVCODEC_VERSION_MAJOR < 59
+          _codecCtx->channels,
+#else
+          _codecCtx->ch_layout.nb_channels,
+#endif
           num_out_samples, _codecCtx->sample_fmt, 0) < 0) {
           throw EssentiaException("Could not allocate output buffer for sample format conversion");
       }
@@ -365,9 +391,14 @@ void AudioContext::encodePacket(int size) {
   }
 #endif
 
-  int buffer_size = av_samples_get_buffer_size(NULL, _codecCtx->channels, size, AV_SAMPLE_FMT_FLT, 0);
+#if LIBAVCODEC_VERSION_MAJOR < 59
+  int channels = _codecCtx->channels;
+#else
+  int channels = _codecCtx->ch_layout.nb_channels;
+#endif
+  int buffer_size = av_samples_get_buffer_size(NULL, channels, size, AV_SAMPLE_FMT_FLT, 0);
 
-  int result = avcodec_fill_audio_frame(_frame, _codecCtx->channels, _codecCtx->sample_fmt,
+  int result = avcodec_fill_audio_frame(_frame, channels, _codecCtx->sample_fmt,
                                         _convert_buffer[0], buffer_size, 0);
   if (result < 0) {
     char errstring[1204];