-
Notifications
You must be signed in to change notification settings - Fork 0
/
fft.lua
422 lines (371 loc) · 15.9 KB
/
fft.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
---
--- @module applause
---
local bit = require "bit"
local ffi = require "ffi"
-- FIXME: We return these numbers, so should this be public?
local complex = ffi.typeof('double complex')
-- NOTE: LuaJIT 2.1 does not yet support complex number arithmetics
local function complex_add(a, b)
return complex(a.re + b.re, a.im + b.im)
end
local function complex_sub(a, b)
return complex(a.re - b.re, a.im - b.im)
end
local function complex_mul(a, b)
return complex(a.re*b.re - a.im*b.im, a.re*b.im + a.im*b.re)
end
local function complex_conj(x)
return complex(x.re, -x.im)
end
--[[
-- Direct application of the DFT formula.
-- This has complexity O(n^2) and is here only for reference.
function FFT(samples, invert)
local ret = table.new(#samples, 0)
-- FIXME: For real-valued signals, the spectrum is mirrored,
-- so it would suffice to calculate #samples/2.
for k = 1, #samples do
ret[k] = complex()
for n = 1, #samples do
local sample = complex(samples[n])
-- Euler's formula: exp(I*x) = cos(x) + I*sin(x)
-- exp(-2*pi*I/N * n*k) == cos(-2*pi/N * n*k) + I*sin(-2*pi/N * n*k)
local twiddle = (inverse and 1 or -1)*2*math.pi/#samples * (n-1) * (k-1)
local q = complex(math.cos(twiddle), math.sin(twiddle))
q = complex_mul(sample, q)
ret[k] = complex_add(ret[k], q)
end
end
return ret
end
]]
--[[
-- Naive implementation of a radix-2 decimation-in-time (DIT) FFT
-- via the Cooley-Tukey algorithm.
-- This has complexity O(n*log2(n)), but requires lots of allocations.
-- I didn't even try to optimize it.
-- Samples can be real-values (audio samples), but also complex values
-- (the frequency spectrum).
-- #samples must be a power of 2.
function FFT(samples, inverse)
if #samples == 1 then return {complex(samples[1])} end
local samples_even = table.new(#samples/2, 0)
local samples_odd = table.new(#samples/2, 0)
for i = 1, #samples/2 do
samples_even[i], samples_odd[i] = samples[2*i], samples[2*i-1]
end
samples_even, samples_odd = FFT(samples_even, inverse), FFT(samples_odd, inverse)
for k = 1, #samples/2 do
-- The odd and even sequences are swapped compared to Wikipedia since
-- our arrays have origin 1.
local p = samples_odd[k]
-- Euler's formula: exp(I*x) = cos(x) + I*sin(x)
-- exp(-2*pi*I/N * k) == cos(-2*pi/N * k) + I*sin(-2*pi/N * k)
local twiddle = (inverse and 1 or -1)*2*math.pi/#samples * (k-1)
local q = complex(math.cos(twiddle), math.sin(twiddle))
q = complex_mul(samples_even[k], q)
samples[k] = complex_add(p, q)
samples[k + #samples/2] = complex_sub(p, q)
end
return samples
end
]]
--- Perform radix-2 decimation-in-time (DIT) FFT for internal usage.
-- This is an implementation of the Cooley-Tukey algorithm.
-- It is still an out-of-place algorithm, but requires allocations
-- of auxilliary memory (2*N) only once.
-- On the other hand, this version does not require a potentially costly
-- bit reversal operation.
-- This has complexity O(N*log2(N)).
-- This is used for both directions (FFT and IFFT).
-- For real inputs, the complex output array will conjugate symmetrical.
-- But the second half is actually cheap to calculate.
-- @tparam {sample,...} x Array of real or complex samples
-- @int x_start First index in `x` to analyze.
-- @int N Number of samples in `x` to transform.
-- @int skip Number of entries to skip.
-- The function will convert `x[x_start]`, `x[x_start+skip]`, up tp
-- `x[x_start+skip*(N-1)]`.
-- @tab X Output table. It should be preallocated to a size of `X_start+N-1`.
-- @int X_start Index of first complex ouput sample in `X`.
-- @tab E Temporary storage table. It should be preallocated to a size of `E_start+N-1`.
-- @int E_start Index of first element in `E` to use.
-- @tparam {number,...} twiddles
-- Twiddle factors, precalculated via @{get_twiddles}.
-- @see https://literateprograms.org/cooley-tukey_fft_algorithm__c_.html
local function FFT_internal(x, x_start, N, skip, X, X_start, E, E_start, twiddles)
if N == 1 then
-- NOTE: This is responsible for the mirrorring, important for FFT_backward()
X[X_start] = x_start <= #x and complex(x[x_start]) or complex_conj(x[2*#x - x_start])
return
end
local D, D_start = E, E_start+N/2
-- calculate the odd/even values
FFT_internal(x, x_start, N/2, skip*2, E, E_start, X, X_start, twiddles)
FFT_internal(x, x_start+skip, N/2, skip*2, D, D_start, X, X_start, twiddles)
for k = 0, N/2-1 do
-- FIXME: Is it really important to overwrite D?
D[D_start+k] = complex_mul(twiddles[k*skip+1], D[D_start+k])
X[X_start+k] = complex_add(E[E_start+k], D[D_start+k])
X[X_start+k + N/2] = complex_sub(E[E_start+k], D[D_start+k])
end
end
--- Perform forward-FFT.
-- For real-valued inputs (audio samples), the outputs will be complex (frequency spectrums).
-- The frequency spectrum is always conjugate symmetric, so `out` can be half the size of `samples`,
-- which significantly simplifies manipulations in the frequency domain.
-- @tparam {number,...} samples Audio samples to transform.
-- @tab out The output array. Should be preallocated to `#samples/2+1`.
-- @tab E Temporary storage table. It should be preallocated to a size of `#samples`.
-- @tparam {number,...} twiddles
-- Twiddle factors, precalculated via `get_twiddles(#samples)`.
-- @return The output array `out`.
local function FFT_forward(samples, out, E, twiddles)
local D, D_start = E, 1+#samples/2
FFT_internal(samples, 1, #samples/2, 2, E, 1, out, 1, twiddles)
FFT_internal(samples, 2, #samples/2, 2, D, D_start, out, 1, twiddles)
local t = complex_mul(twiddles[1], D[D_start])
out[1] = complex_add(E[1], t)
out[1 + #samples/2] = complex_sub(E[1], t)
for k = 1, #samples/2-1 do
t = complex_mul(twiddles[k+1], D[D_start+k])
out[1+k] = complex_add(E[1+k], t)
end
return out
end
--- Perform backward-FFT.
-- For complex-valued inputs (frequency spectrum), the outputs will be real-valued (audio samples):
-- The frequency spectrum is made to be mirrored, so `out` will be twice the length of `samples` minus 2.
-- @tparam {complex,...} samples Spectrum samples (`double complex` C type) to transform.
-- @tab out The output array. Should be preallocated to `(#samples-1)*2`.
-- @tab E Temporary storage table. It should be preallocated to a size of `(#samples-1)*2`.
-- @tparam {number,...} twiddles
-- Twiddle factors, precalculated via `get_twiddles(#samples, true)`.
-- @return The output array `out`.
local function FFT_backward(samples, out, E, twiddles)
local size = (#samples-1)*2 -- number of output samples
local D, D_start = E, 1+size/2
FFT_internal(samples, 1, size/2, 2, E, 1, out, 1, twiddles)
FFT_internal(samples, 2, size/2, 2, D, D_start, out, 1, twiddles)
for k = 0, size/2-1 do
-- see complex_mul(): we need only the real part
local t = twiddles[k+1].re*D[D_start+k].re - twiddles[k+1].im*D[D_start+k].im
out[1+k] = (E[1+k].re + t)/size
out[1+k + size/2] = (E[1+k].re - t)/size
end
return out
end
--- Precalculate twiddle factors.
-- @int N Number of output samples to calculcate twiddle factors for.
-- @bool[opt=false] inverse If true, calculcate factors for inverse transformations.
-- @treturn {number,...} `N/2` twiddle factors.
local function get_twiddles(N, inverse)
local ret = table.new(N/2, 0)
for k = 1, N/2 do
-- Euler's formula: exp(I*x) = cos(x) + I*sin(x)
-- exp(-2*pi*I/N * n*k) == cos(-2*pi/N * n*k) + I*sin(-2*pi/N * n*k)
local twiddle = (inverse and 1 or -1)*2*math.pi*(k-1)/N
ret[k] = complex(math.cos(twiddle), math.sin(twiddle))
end
return ret
end
--- Transform fixed number of audio samples to frequency spectrum (FFT).
-- The spectrum is represented by complex numbers (C type: `double complex`).
-- The resulting spectrum is conjuage symmetric and the redundant half is
-- therefore not returned.
-- Each bin `i` in the resulting frequency spectrum represents frequency
-- `(i-1)*samplerate/#samples`.
-- The number of input samples therefore also restricts the frequency resolution.
-- For the human ear, a buffer size of 2048 would be more than enough.
-- @tparam {number,...}|Stream samples
-- The audio samples to transform.
-- This can be a Stream, but only if it is not infinite (it will be automatically
-- converted to a table).
-- The number of samples must be a power of 2.
-- @treturn {complex,...}
-- The complex spectrum of size `#samples/2+1`.
-- @usage FFT(Stream.SinOsc(samplerate*20/1024):sub(1, 1024))
function FFT(samples)
assert(type(samples) == "table")
if samples.is_a_stream then samples = samples:totable() end
assert(bit.band(#samples, #samples-1) == 0, "Array length needs to be a power of 2")
local out = table.new(#samples/2+1, 0)
local scratch = table.new(#samples, 0)
local twiddles = get_twiddles(#samples)
return FFT_forward(samples, out, scratch, twiddles)
end
--- Convert complex frequency spectrum to magnitude/amplitude per frequency.
-- This conversion is **inplace**.
-- @tparam {complex,...} spectrum Frequency spectrum.
-- @treturn {number,...} The magnitudes per frequency (same table as `spectrum`).
-- @see FFT
-- @usage tostream(magnitude(FFT(Stream.SinOsc(samplerate*20/1024):sub(1, 1024)))):gnuplot()
-- @fixme Should this be a Stream method?
-- Would also resolve having to choose between inplace and out-of-place calculation.
function magnitude(spectrum)
for i = 1, #spectrum do
-- This is also the absolute value.
spectrum[i] = math.sqrt(spectrum[i].re^2 + spectrum[i].im^2)
end
return spectrum
end
--- Convert frequency spectrum to phase in value between [0,1].
-- This conversion is **inplace**.
-- @tparam {complex,...} spectrum Frequency spectrum.
-- @number[opt=0.1] threshold
-- The magnitude threshold. Why this is necessary, see
-- [this blog post](https://www.gaussianwaves.com/2015/11/interpreting-fft-results-obtaining-magnitude-and-phase-information/).
-- @treturn {number,...} The phase value between [0,1] per frequency (same table as `spectrum`).
-- @see FFT
-- @see magnitude
-- @usage tostream(phase(FFT(Stream.SinOsc(samplerate*20/1024, 0.7):sub(1, 1024)))):gnuplot()
function phase(spectrum, threshold)
threshold = threshold or 0.1
for i = 1, #spectrum do
local abs = math.sqrt(spectrum[i].re^2 + spectrum[i].im^2)
spectrum[i] = abs > threshold and math.atan2(spectrum[i].im, spectrum[i].re)/math.pi or 0
end
return spectrum
end
--- Transform fixed number of frequency bins to audio samples (inverse FFT).
-- The spectrum is represented by complex numbers (C type: `double complex`).
-- The spectrum is assumed to be conjuage symmetric and the redundant half is
-- not required to be provided.
-- @tparam {complex,...}|Stream spectrum
-- The frequency bins to transform.
-- This can be a Stream, but only if it is not infinite (it will be automatically
-- converted to a table).
-- This spectrum length must be such that the resulting number of audio samples
-- is a power of 2.
-- @treturn {number,...}
-- The audio samples of size `(#spectrum-1)*2`.
-- @see FFT
-- @usage IFFT(FFT(Stream.SinOsc(samplerate*20/1024):sub(1, 1024))):gnuplot()
function IFFT(spectrum)
assert(type(spectrum) == "table")
if spectrum.is_a_stream then spectrum = spectrum:totable() end
local size = (#spectrum-1)*2
assert(bit.band(size, size-1) == 0, "Spectrum length-1 needs to be a power of 2")
local out = table.new(size, 0)
local scratch = table.new(size, 0)
local twiddles = get_twiddles(size, true)
return tostream(FFT_backward(spectrum, out, scratch, twiddles))
end
--- Analyze frequencies of a potentially infinite realtime stream via FFT.
-- This partitions the source stream into buffers of a given `size`,
-- optionally maps a window function and eventually applies the forward FFT,
-- resulting in a stream of frequency spectrums (STFT).
-- @within Class Stream
-- @int size The size of the FFT buffer. This must be a power of 2.
-- @func[opt] window_fnc
-- An optional window function to apply to the audio data before FFT conversion.
-- @treturn Stream
-- Stream of frequency spectrums (arrays of complex numbers, ie.
-- C type `double complex`).
-- These arrays will have size `size/2+1`.
-- @see Hamming
-- @see Hanning
-- @see FFT
-- @usage Stream.SinOsc(440):FFT(1024):IFFT(1024):play()
--
-- @fixme It's unelegant to pass in the window function as an argument.
-- This could only be avoided by moving the partitioning out of this method
-- and repeating the buffer size once again.
-- E.g.: `foo:partition(1024):map(Hamming):FFT(1024)...`.
-- Of course, if we had another FFT implementation like Danielson-Lanczos,
-- that does not require preallocation of a few arrays, this problem also wouldn't
-- exist.
-- These usually represent the complex spectrum as 2 real entries, so they can entirely
-- work inplace.
function Stream:FFT(size, window_fnc)
assert(bit.band(size, size-1) == 0, "Size needs to be a power of 2")
local out = table.new(size/2+1, 0)
local scratch = table.new(size, 0)
local twiddles = get_twiddles(size)
return self:partition(size):map(function(samples)
assert(#samples == size)
if window_fnc then
samples = window_fnc(samples)
end
return FFT_forward(samples, out, scratch, twiddles)
end)
end
--- Synthesize audio samples from a stream of frequency spectrums via inverse FFT.
-- This is the inverse of @{Stream:FFT}.
-- Note that when performing FFT and IFFT on a real-time stream, as is usually done,
-- this will introduce a latency of `size/samplerate` seconds.
-- @within Class Stream
-- @int size
-- The size of the time domain chunks (ie. the same value passed into @{Stream:FFT}).
-- This will be twice the size of the spectrum arrays minus 2.
-- @treturn Stream
-- A stream of real-valued audio samples synthesized from the frequency spectrums.
-- @see Stream:FFT
-- @see IFFT
-- @usage Stream.SinOsc(440):FFT(1024):IFFT(1024):play()
--
-- @fixme A different FFT implementation like Danielson-Lanczos that works entirely
-- inplace, might not need repitition of these sizes.
function Stream:IFFT(size)
-- NOTE: The size could be inferred from the first spectrum array,
-- but we try to avoid allocations at tick() time at all costs.
local out = table.new(size, 0)
local scratch = table.new(size, 0)
local twiddles = get_twiddles(size, true)
return self:map(function(spectrum)
assert(#spectrum == size/2+1)
-- FIXME: Creating a VectorStream is not fully real-time-safe
-- because of the allocations involved.
-- RavelStream also shouldn't really unravel streams of arrays.
-- This could be worked around with a new IFFTStream class optimized
-- for our usecase.
return VectorStream:new(FFT_backward(spectrum, out, scratch, twiddles))
end):ravel()
end
--
-- Windowing functions
-- See also the commented-out code in filters.lua.
--
--- Apply Hamming window to a fixed number of samples.
-- @tparam {number,...}|Stream samples
-- The audio samples to window.
-- This can be a Stream, but only if it is not infinite (it will be automatically
-- converted to a table).
-- @treturn {number,...}
-- The windowed samples.
-- The conversion is **inplace**, so this may be the same table as `samples`.
-- @see Hanning
-- @see Stream:FFT
-- @usage tostream(magnitude(FFT(Hamming(Stream.SinOsc(samplerate*20.5/1024):sub(1, 1024))))):gnuplot()
function Hamming(samples)
assert(type(samples) == "table")
if samples.is_a_stream then samples = samples:totable() end
local cos = math.cos
local pi2 = 2*math.pi
for i = 1, #samples do
local alpha = 0.54
samples[i] = samples[i]*(alpha - (1-alpha)*cos((pi2*i)/(#samples-1)))
end
return samples
end
--- Apply Hann window to a fixed number of samples.
-- @tparam {number,...}|Stream samples
-- The audio samples to window.
-- This can be a Stream, but only if it is not infinite (it will be automatically
-- converted to a table).
-- @treturn {number,...}
-- The windowed samples.
-- The conversion is **inplace**, so this may be the same table as `samples`.
-- @see Hamming
-- @see Stream:FFT
function Hanning(samples)
assert(type(samples) == "table")
if samples.is_a_stream then samples = samples:totable() end
local cos = math.cos
local pi2 = 2*math.pi
for i = 1, #samples do
samples[i] = samples[i]*(1 - cos((pi2*i)/(#samples-1)))/2;
end
return samples
end