Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spectrum display improvement #94

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Spectrum display improvement
– Selection of different frequency mappings for the spectrum display,
from linear (current default) to logarithmic.
– Field added in the preference page to select the frequency mapping
– New preference key to remember this setting:
Audio/Renderer/Spectrum/FreqCurve
– Consistent display with high sampling rates (> 48 kHz)
– Fixed time shift with high quality settings
EleonoreMizo authored and CoffeeFlux committed Dec 17, 2024
commit 5e123f54100700ec997ec95ad5521ae1eaa84c03
10 changes: 10 additions & 0 deletions src/audio_display.cpp
Original file line number Diff line number Diff line change
@@ -759,6 +759,15 @@ void AudioDisplay::ReloadRenderingSettings()
spectrum_width[spectrum_quality],
spectrum_distance[spectrum_quality]);

// Frequency curve
int64_t spectrum_freq_curve = OPT_GET("Audio/Renderer/Spectrum/FreqCurve")->GetInt();
spectrum_freq_curve = mid<int64_t>(0, spectrum_freq_curve, 4);
const float spectrum_fref_pos [] = { 0.001f, 0.125f, 0.333f, 0.425f, 0.999f };

audio_spectrum_renderer->set_reference_frequency_position (
spectrum_fref_pos [spectrum_freq_curve]
);

audio_renderer_provider = std::move(audio_spectrum_renderer);
}
else
@@ -1228,6 +1237,7 @@ void AudioDisplay::OnAudioOpen(agi::AudioProvider *provider)
OPT_SUB("Colour/Audio Display/Spectrum", &AudioDisplay::ReloadRenderingSettings, this),
OPT_SUB("Colour/Audio Display/Waveform", &AudioDisplay::ReloadRenderingSettings, this),
OPT_SUB("Audio/Renderer/Spectrum/Quality", &AudioDisplay::ReloadRenderingSettings, this),
OPT_SUB("Audio/Renderer/Spectrum/FreqCurve", &AudioDisplay::ReloadRenderingSettings, this),
});
OnTimingController();
}
160 changes: 124 additions & 36 deletions src/audio_renderer_spectrum.cpp
Original file line number Diff line number Diff line change
@@ -99,6 +99,8 @@ AudioSpectrumRenderer::~AudioSpectrumRenderer()

void AudioSpectrumRenderer::RecreateCache()
{
update_derivation_values ();

#ifdef WITH_FFTW3
if (dft_plan)
{
@@ -142,20 +144,29 @@ void AudioSpectrumRenderer::OnSetProvider()

void AudioSpectrumRenderer::SetResolution(size_t _derivation_size, size_t _derivation_dist)
{
if (derivation_dist != _derivation_dist)
if (derivation_dist_user != _derivation_dist)
{
derivation_dist = _derivation_dist;
if (cache)
cache->Age(0);
derivation_dist_user = _derivation_dist;
update_derivation_values ();
AgeCache (0);
}

if (derivation_size != _derivation_size)
if (derivation_size_user != _derivation_size)
{
derivation_size = _derivation_size;
derivation_size_user = _derivation_size;
RecreateCache();
}
}

void AudioSpectrumRenderer::set_reference_frequency_position (float pos_fref_)
{
assert (pos_fref_ > 0.f);
assert (pos_fref_ < 1.f);

pos_fref = pos_fref_;
}


template<class T>
void AudioSpectrumRenderer::ConvertToFloat(size_t count, T *dest) {
for (size_t si = 0; si < count; ++si)
@@ -164,6 +175,32 @@ void AudioSpectrumRenderer::ConvertToFloat(size_t count, T *dest) {
}
}

void AudioSpectrumRenderer::update_derivation_values ()
{
// Below this sampling rate (Hz), the derivation values are identical to
// the user-provided ones. Otherwise, they are scaled according to the
// ratio between the sampling rates.
// The threshold is set at 50 kHz so with standard rates like 48 kHz,
// the values are kept identical, and scaled with higher standard rates
// like 88.2 or 96 kHz.
constexpr float sample_rate_ref = 50000.f;

derivation_dist = derivation_dist_user;
derivation_size = derivation_size_user;

if (provider != nullptr)
{
const int sample_rate = provider->GetSampleRate ();
float mult = float (sample_rate) / sample_rate_ref;
while (mult > 1)
{
++ derivation_dist;
++ derivation_size;
mult *= 0.5f;
}
}
}

void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block)
{
assert(cache);
@@ -172,12 +209,19 @@ void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block)
int64_t first_sample = (((int64_t)block_index) << derivation_dist) - ((int64_t)1 << derivation_size);
provider->GetAudio(&audio_scratch[0], first_sample, 2 << derivation_size);

// Because the FFTs used here are unnormalized DFTs, we have to compensate
// the possible length difference between derivation_size used in the
// calculations and its user-provided counterpart. Thus, the display is
// kept independent of the sampling rate.
const float scale_fix =
1.f / sqrtf (float (1 << (derivation_size - derivation_size_user)));

#ifdef WITH_FFTW3
ConvertToFloat(2 << derivation_size, dft_input);

fftw_execute(dft_plan);

double scale_factor = 9 / sqrt(2 << (derivation_size + 1));
double scale_factor = scale_fix * 9 / sqrt(2 << (derivation_size + 1));

fftw_complex *o = dft_output;
for (size_t si = (size_t)1<<derivation_size; si > 0; --si)
@@ -195,7 +239,7 @@ void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block)
FFT fft;
fft.Transform(2<<derivation_size, fft_input, fft_real, fft_imag);

float scale_factor = 9 / sqrt(2 * (float)(2<<derivation_size));
float scale_factor = scale_fix * 9 / sqrt(2 * (float)(2<<derivation_size));

for (size_t si = 1<<derivation_size; si > 0; --si)
{
@@ -210,6 +254,10 @@ void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block)

void AudioSpectrumRenderer::Render(wxBitmap &bmp, int start, AudioRenderingStyle style)
{
// Misc. utility functions
auto floor_int = [] (float val) { return int (floorf (val )); };
auto round_int = [] (float val) { return int (floorf (val + 0.5f)); };

if (!cache)
return;

@@ -230,9 +278,34 @@ void AudioSpectrumRenderer::Render(wxBitmap &bmp, int start, AudioRenderingStyle

const AudioColorScheme *pal = &colors[style];

/// @todo Make minband and maxband configurable
int minband = 0;
int maxband = 1 << derivation_size;
// Sampling rate, in Hz.
const float sample_rate = float (provider->GetSampleRate ());

// Number of FFT bins, excluding the "Nyquist" one
const int nbr_bins = 1 << derivation_size;

// minband and maxband define an half-open range.
int minband = 1; // Starts at 1, we don't care about showing the DC.
int maxband = std::min (
round_int (nbr_bins * max_freq / (sample_rate * 0.5f)),
nbr_bins
);
assert (minband < maxband);

// Precomputes this once, this will be useful for the log curve.
const float scale_log = logf (maxband / minband);

// Turns the user-specified 1 kHz position into a ratio between the linear
// and logarithmic curves that we can directly use in the following
// calculations.
assert (pos_fref > 0);
assert (pos_fref < 1);
float b_fref = nbr_bins * freq_ref / (sample_rate * 0.5f);
b_fref = mid (1.f, b_fref, float (maxband - 1));
const float clin = minband + (maxband - minband) * pos_fref;
const float clog = minband * expf (pos_fref * scale_log);
float log_ratio_calc = (b_fref - clin) / (clog - clin);
log_ratio_calc = mid (0.f, log_ratio_calc, 1.f);

// ax = absolute x, absolute to the virtual spectrum bitmap
for (int ax = start; ax < end; ++ax)
@@ -244,36 +317,51 @@ void AudioSpectrumRenderer::Render(wxBitmap &bmp, int start, AudioRenderingStyle
// Prepare bitmap writing
unsigned char *px = imgdata + (imgheight-1) * stride + (ax - start) * 3;

// Scale up or down vertically?
if (imgheight > 1<<derivation_size)
float bin_prv = minband;
float bin_cur = minband;
for (int y = 0; y < imgheight; ++y)
{
// Interpolate
for (int y = 0; y < imgheight; ++y)
assert (bin_cur < float (maxband));

float bin_nxt = maxband;
if (y + 1 < imgheight)
{
assert(px >= imgdata);
assert(px < imgdata + imgheight*stride);
auto ideal = (double)(y+1.)/imgheight * (maxband-minband) + minband;
float sample1 = power[(int)floor(ideal)+minband];
float sample2 = power[(int)ceil(ideal)+minband];
float frac = ideal - floor(ideal);
float val = (1-frac)*sample1 + frac*sample2;
pal->map(val*amplitude_scale, px);
px -= stride;
// Bin index is an interpolation between the linear and log curves.
const float pos_rel = float (y + 1) / float (imgheight);
const float b_lin = minband + pos_rel * (maxband - minband);
const float b_log = minband * expf (pos_rel * scale_log);
bin_nxt = b_lin + log_ratio_calc * (b_log - b_lin);
}
}
else
{
// Pick greatest
for (int y = 0; y < imgheight; ++y)

float val = 0;

// Interpolate between consecutive bins
if (bin_nxt - bin_prv < 2)
{
const int bin_0 = floor_int (bin_cur);
const int bin_1 = std::min (bin_0 + 1, nbr_bins - 1);
const float frac = bin_cur - float (bin_0);
const float v0 = power [bin_0];
const float v1 = power [bin_1];
val = v0 + frac * (v1 - v0);
}

// Pick the greatest bin on the interval
else
{
assert(px >= imgdata);
assert(px < imgdata + imgheight*stride);
int sample1 = std::max(0, maxband * y/imgheight + minband);
int sample2 = std::min((1<<derivation_size)-1, maxband * (y+1)/imgheight + minband);
float maxval = *std::max_element(&power[sample1], &power[sample2 + 1]);
pal->map(maxval*amplitude_scale, px);
px -= stride;
int bin_inf = floor_int ((bin_prv + bin_cur) * 0.5f);
int bin_sup = floor_int ((bin_cur + bin_nxt) * 0.5f);
bin_inf = std::min (bin_inf, nbr_bins - 2);
bin_sup = std::min (bin_sup, nbr_bins - 1);
assert (bin_inf < bin_sup);
val = *std::max_element (&power [bin_inf], &power [bin_sup]);
}

pal->map (val * amplitude_scale, px);

px -= stride;
bin_prv = bin_cur;
bin_cur = bin_nxt;
}
}

33 changes: 33 additions & 0 deletions src/audio_renderer_spectrum.h
Original file line number Diff line number Diff line change
@@ -61,10 +61,34 @@ class AudioSpectrumRenderer final : public AudioRendererBitmapProvider {
/// Colour tables used for rendering
std::vector<AudioColorScheme> colors;

/// User-provided value for derivation_size
size_t derivation_size_user = 0;

/// User-provided value for derivation_dist
size_t derivation_dist_user = 0;

/// Maximum audible, displayed frequency. Avoids wasting the display space
/// with ultrasonic content at sampling rates > 40 kHz.
float max_freq = 20000.f;

/// Relative vertical position of the 1 kHz frequency, in (0 ; 1) open range
/// 0 = bottom of the display zone, 1 = top
/// The actual position, as displayed, is limited by the available mapping
/// curves (linear and log).
/// Values close to 0 will give a linear curve, and close to 1 a log curve.
float pos_fref = 1.0f / 3;

/// Reference frequency which vertical position is constant, Hz.
const float freq_ref = 1000.0f;

/// Binary logarithm of number of samples to use in deriving frequency-power data
/// This could differ from the user-provided value because the actual value
/// used in computations may be scaled, depending on the sampling rate.
size_t derivation_size = 0;

/// Binary logarithm of number of samples between the start of derivations
/// This could differ from the user-provided value because the actual value
/// used in computations may be scaled, depending on the sampling rate.
size_t derivation_dist = 0;

/// @brief Reset in response to changing audio provider
@@ -90,6 +114,9 @@ class AudioSpectrumRenderer final : public AudioRendererBitmapProvider {
template<class T>
void ConvertToFloat(size_t count, T *dest);

/// @brief Updates the derivation_* after a derivation_*_user change.
void update_derivation_values ();

#ifdef WITH_FFTW3
/// FFTW plan data
fftw_plan dft_plan = nullptr;
@@ -133,6 +160,12 @@ class AudioSpectrumRenderer final : public AudioRendererBitmapProvider {
/// is specified too large, it will be clamped to the size.
void SetResolution(size_t derivation_size, size_t derivation_dist);

/// @brief Set the vertical relative position of the reference frequency (1 kHz)
/// @param fref_pos_ Vertical position of the 1 kHz frequency. Between 0 and 1, boundaries excluded.
///
/// A value close to 0 gives a linear display, and close to 1 a logarithmic display.
void set_reference_frequency_position (float pos_fref_);

/// @brief Cleans up the cache
/// @param max_size Maximum size in bytes for the cache
void AgeCache(size_t max_size) override;
3 changes: 2 additions & 1 deletion src/libresrc/default_config.json
Original file line number Diff line number Diff line change
@@ -71,7 +71,8 @@
"Spectrum" : {
"Cutoff" : 0,
"Memory Max" : 128,
"Quality" : 1
"Quality" : 1,
"FreqCurve" : 0
}
},
"Snap" : {
3 changes: 2 additions & 1 deletion src/libresrc/osx/default_config.json
Original file line number Diff line number Diff line change
@@ -71,7 +71,8 @@
"Spectrum" : {
"Cutoff" : 0,
"Memory Max" : 128,
"Quality" : 1
"Quality" : 1,
"FreqCurve" : 0
}
},
"Snap" : {
4 changes: 4 additions & 0 deletions src/preferences.cpp
Original file line number Diff line number Diff line change
@@ -383,6 +383,10 @@ void Advanced_Audio(wxTreebook *book, Preferences *parent) {
wxArrayString sq_choice(4, sq_arr);
p->OptionChoice(spectrum, _("Quality"), sq_choice, "Audio/Renderer/Spectrum/Quality");

const wxString sc_arr[5] = { _("Linear"), _("Extended"), _("Medium"), _("Compressed"), _("Logarithmic") };
wxArrayString sc_choice(5, sc_arr);
p->OptionChoice(spectrum, _("Frequency mapping"), sc_choice, "Audio/Renderer/Spectrum/FreqCurve");

p->OptionAdd(spectrum, _("Cache memory max (MB)"), "Audio/Renderer/Spectrum/Memory Max", 2, 1024);

#ifdef WITH_AVISYNTH