Synthesis window in lowLatencySpeechEnhancement.ipynb #16

jurihock · 2023-11-14T11:14:23Z

Hi,

is there a reason why the synthesis window is not applied?

See also the attached sketch based on the lowLatencySpeechEnhancement.ipynb example.

fix = False

your version
clearly visible modulation in the OLA output
output gain != 1

fix = True

adequate hop size
unity gain
synthesis window after irfft

from matplotlib.pyplot import *
from numpy import *
from numpy.fft import rfft, irfft

# Apply the fix
fix = False

# Preprocessing params
fftSize = 1024

# Asymmetric windowing params
analysisWindowSize = fftSize
synthesisWindowSize = 128

asymmetricHopSize = synthesisWindowSize // 4 if fix else (synthesisWindowSize * 3) // 4
m = synthesisWindowSize // 2
k = analysisWindowSize
d = 0

# Symmetric windowing params
symmetricWindowSize = fftSize
symmetricHopSize = asymmetricHopSize # to better compare results

# Generate test signal
stereoSamples = ones((1, fftSize*10))
numChannels, numSamples = stereoSamples.shape

def getAsymmetricAnalysisWindow(k, m, d):
    risingSqrtHann = sqrt( hanning(2*(k-m-d)+1)[:2*(k-m-d)] )
    fallingSqrtHann = sqrt( hanning(2*m+1)[:2*m] )

    window = zeros(k)
    window[:d] = 0
    window[d:k-m] = risingSqrtHann[:k-m-d]
    window[k-m:] = fallingSqrtHann[-m:]

    return window

def getAsymmetricSynthesisWindow(k, m, d):
    risingSqrtHannAnalysis = sqrt( hanning(2*(k-m-d)+1)[:2*(k-m-d)] )
    risingNoramlizedHann = hanning(2*m+1)[:m] / risingSqrtHannAnalysis[k-2*m-d:k-m-d]
    fallingSqrtHann = sqrt( hanning(2*m+1)[:2*m] )

    window = zeros(k)
    window[:-2*m] = 0
    window[-2*m:-m] = risingNoramlizedHann
    window[-m:] = fallingSqrtHann[-m:]

    return window

def performOnlineSpeechEnhancement(analysisWindow, synthesisWindow, hopSize):
    # Setup variables to save speech enhancement results
    numFrequencies = len(rfft(zeros(len(analysisWindow))))
    numFrames = (numSamples-len(synthesisWindow)) // hopSize

    if fix:
        gainFactor = hopSize / sum(analysisWindow * synthesisWindow)
    else:
        gainFactor = hopSize / float(len(synthesisWindow)) * 2

    targetEstimateSamplesOLA = zeros_like(stereoSamples)
    inputSpectrogram = zeros( (2, numFrequencies, numFrames), 'complex64')
    outputSpectrogram = zeros( (2, numFrequencies, numFrames), 'complex64')

    for frameIndex in range(numFrames):
        # compute FFT
        frameStart = frameIndex * hopSize
        frameEnd = frameStart + analysisWindowSize
        stereoSTFTFrame = rfft( stereoSamples[:, frameStart:frameEnd] * analysisWindow )
        inputSpectrogram[..., frameIndex] = stereoSTFTFrame
        outputSpectrogram[..., frameIndex] = stereoSTFTFrame

        # reconstruct time domain samples
        recStereoSTFTFrame = irfft(stereoSTFTFrame)

        if fix:
            # apply synthesis window as well
            recStereoSTFTFrame *= synthesisWindow

        # overlap-add to output samples
        targetEstimateSamplesOLA[:, frameStart:frameEnd] += recStereoSTFTFrame

    targetEstimateSamplesOLA *= gainFactor

    return inputSpectrogram, outputSpectrogram, targetEstimateSamplesOLA

analysisWindow = getAsymmetricAnalysisWindow(k, m, d)
synthesisWindow = getAsymmetricSynthesisWindow(k, m, d)

symmetricWindow = sqrt(hanning(symmetricWindowSize))

symmetricResults = performOnlineSpeechEnhancement(symmetricWindow, symmetricWindow, symmetricHopSize)
asymmetricResults = performOnlineSpeechEnhancement(analysisWindow, synthesisWindow, asymmetricHopSize)

title('fixed' if fix else 'orig')
plot(symmetricResults[-1][-1], label='symmetric', color='b', alpha=0.5)
plot(asymmetricResults[-1][-1], label='asymmetric', color='r', alpha=0.5)
legend()
show()

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Synthesis window in lowLatencySpeechEnhancement.ipynb #16

Synthesis window in lowLatencySpeechEnhancement.ipynb #16

jurihock commented Nov 14, 2023

Synthesis window in lowLatencySpeechEnhancement.ipynb #16

Synthesis window in lowLatencySpeechEnhancement.ipynb #16

Comments

jurihock commented Nov 14, 2023