From 1be88b58bee96aaffa1a21db7aa531d1ca7b4736 Mon Sep 17 00:00:00 2001
From: wheeheee <104880306+wheeheee@users.noreply.github.com>
Date: Mon, 2 Dec 2024 19:51:50 +0800
Subject: [PATCH] Optimized `_filt_fir!` for `DF2TFilter` (#569)

* tweak `_filt_fir!`, remove block

`VecElement`, (removed multiversioning)

* reuse `_filt_fir!` and `_filt_iir!`

specialized methods should be faster

* rely on constprop/dce

* use `LazyString`

first only

* SMALL_FILT_CUTOFF: 58 -> 66

* Add tests for coverage

* test more

* lazy string macro

* mitigate regressions

* length(b) == n

Co-authored-by: Martin Holters <martin.holters@hsu-hh.de>

---------

Co-authored-by: Martin Holters <martin.holters@hsu-hh.de>
---
 src/Filters/filt.jl | 30 ++++++++++++++---------------
 src/dspbase.jl      | 47 +++++++++++++++++++++++++--------------------
 test/filt.jl        |  5 ++++-
 3 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/src/Filters/filt.jl b/src/Filters/filt.jl
index efad5fe47..6e0cf5771 100644
--- a/src/Filters/filt.jl
+++ b/src/Filters/filt.jl
@@ -3,7 +3,7 @@
 #
 # filt and filt!
 #
-using ..DSP: _filt_iir!
+using ..DSP: _filt_fir!, _filt_iir!
 
 ## PolynomialRatio
 _zerosi(f::PolynomialRatio{:z,T}, ::AbstractArray{S}) where {T,S} =
@@ -171,24 +171,24 @@ function filt!(out::AbstractVector, f::DF2TFilter{<:PolynomialRatio,<:Vector}, x
     si = f.state
     n = length(si) + 1
     b = coefb(f.coef)
-    if length(b) < n
-        append!(b, zeros(n-length(b)))
-    end
-    a = coefa(f.coef)
-    if length(a) < n
-        append!(a, zeros(n-length(a)))
-    end
     if n == 1
         mul!(out, x, b[1])
     else
-        @inbounds for i in eachindex(x, out)
-            xi = x[i]
-            val = muladd(b[1], xi, si[1])
-            for j=2:n-1
-                si[j-1] = muladd(a[j], -val, muladd(b[j], xi, si[j]))
+        a = coefa(f.coef)
+        as = length(a)
+        bs = length(b)
+        if as != 1
+            if as < n
+                append!(a, zero(eltype(a)) for _ in 1:(n-as))
+            elseif bs < n
+                append!(b, zero(eltype(b)) for _ in 1:(n-bs))
             end
-            si[n-1] = muladd(b[n], xi, -a[n] * val)
-            out[i] = val
+            _filt_iir!(out, b, a, x, si, 1)
+        elseif n <= SMALL_FILT_CUTOFF
+            vtup = ntuple(j -> b[j], Val(length(b)))
+            si .= getfield.(_filt_fir!(out, vtup, x, si, Val(:DF2)), :value)
+        else
+            _filt_fir!(out, b, x, si, 1)
         end
     end
     return out
diff --git a/src/dspbase.jl b/src/dspbase.jl
index 23f71579c..1a9f9ddcd 100644
--- a/src/dspbase.jl
+++ b/src/dspbase.jl
@@ -1,6 +1,6 @@
 # This file was formerly a part of Julia. License is MIT: https://julialang.org/license
 
-const SMALL_FILT_CUTOFF = 58
+const SMALL_FILT_CUTOFF = 66
 
 _zerosi(b,a,T) = zeros(promote_type(eltype(b), eltype(a), T), max(length(a), length(b))-1)
 
@@ -35,7 +35,7 @@ function filt!(out::AbstractArray, b::Union{AbstractVector, Number}, a::Union{Ab
     isempty(a) && throw(ArgumentError("filter vector a must be non-empty"))
     a[1] == 0  && throw(ArgumentError("filter vector a[1] must be nonzero"))
     if size(x) != size(out)
-        throw(ArgumentError("output size $(size(out)) must match input size $(size(x))"))
+        throw(ArgumentError(lazy"output size $(size(out)) must match input size $(size(x))"))
     end
 
     as = length(a)
@@ -109,35 +109,39 @@ function _filt_fir!(out, b, x, si, col)
 end
 
 #
-# filt implementation for FIR filters (faster than Base)
+# filt implementation for FIR filters
 #
 
+### NOTE ###
+# Fragile because of the impact of @inbounds and checkbounds
+# on the effects system
+
+const SMALL_FILT_VECT_CUTOFF = 19
+
 # Transposed direct form II
-@generated function _filt_fir!(out, b::NTuple{N,T}, x, siarr, col) where {N,T}
+@generated function _filt_fir!(out, b::NTuple{N,T}, x, siarr, colv) where {N,T}
     silen = N - 1
     si_end = Symbol(:si_, silen)
-    SMALL_FILT_VECT_CUTOFF = 18
-    si_check = N > SMALL_FILT_VECT_CUTOFF ? :(nothing) : :(@assert length(siarr) == $silen)
 
-    q = quote
-        $si_check
+    quote
+        col = colv isa Val{:DF2} ? 1 : colv
+        N <= SMALL_FILT_VECT_CUTOFF && checkbounds(siarr, $silen)
         Base.@nextract $silen si siarr
         for i in axes(x, 1)
             xi = x[i, col]
             val = muladd(xi, b[1], si_1)
-            Base.@nexprs $(silen-1) j -> (si_j = muladd(xi, b[j+1], si_{j+1}))
-            $si_end = b[N] * xi
-            out[i, col] = val
+            Base.@nexprs $(silen - 1) j -> (si_j = muladd(xi, b[j+1], si_{j + 1}))
+            $si_end = xi * b[N]
+            if N > SMALL_FILT_VECT_CUTOFF
+                @inbounds out[i, col] = val
+            else
+                out[i, col] = val
+            end
         end
-    end
-
-    if N > SMALL_FILT_VECT_CUTOFF
-        loop_args = q.args[6].args[2].args
-        for i in (2, 10)
-            loop_args[i] = :(@inbounds $(loop_args[i]))
+        if colv isa Val{:DF2}
+            return Base.@ntuple $silen j -> VecElement(si_j)
         end
     end
-    q
 end
 
 # Convert array filter tap input to tuple for small-filtering
@@ -146,9 +150,10 @@ function _small_filt_fir!(
         si::AbstractArray{S,N}, ::Val{bs}) where {S,N,bs}
 
     bs < 2 && throw(ArgumentError("invalid tuple size"))
-    b = ntuple(j -> @inbounds(h[j]), Val(bs))
+    length(h) != bs && throw(ArgumentError("length(h) does not match bs"))
+    b = ntuple(j -> h[j], Val(bs))
     for col in axes(x, 2)
-        v_si = view(si, :, N > 1 ? col : 1)
+        v_si = N > 1 ? view(si, :, col) : si
         _filt_fir!(out, b, x, v_si, col)
     end
 end
@@ -820,7 +825,7 @@ end
 dsp_reverse(v, ::NTuple{<:Any, Base.OneTo{Int}}) = reverse(v, dims = 1)
 function dsp_reverse(v, vaxes)
     vsize = length(v)
-    reflected_start = - first(vaxes[1]) - vsize + 1
+    reflected_start = - first(only(vaxes)) - vsize + 1
     reflected_axes = (reflected_start : reflected_start + vsize - 1,)
     out = similar(v, reflected_axes)
     copyto!(out, reflected_start, Iterators.reverse(v), 1, vsize)
diff --git a/test/filt.jl b/test/filt.jl
index 48dd04e65..18a0cffac 100644
--- a/test/filt.jl
+++ b/test/filt.jl
@@ -55,6 +55,8 @@ using DSP, Test, Random, FilterTestHelpers
     @test filt([0, 0, 1, 0.8], [1], [1; zeros(9)]) == [0; 0; 1; 0.8; zeros(6)]
     @test filt(DF2TFilter(PolynomialRatio([1], [1, -0.5])), [1; zeros(9)]) ≈ 0.5.^(0:9)
     @test filt([1], [1, -0.5], [1; zeros(9)]) ≈ 0.5.^(0:9)
+    @test_nowarn filt(DF2TFilter(PolynomialRatio([1:5;], [1, -0.5])), ones(10))
+    @test_nowarn filt(DF2TFilter(PolynomialRatio(rand(DSP.SMALL_FILT_CUTOFF + 1), [1])), ones(10))
 
     # DF2TFilter{:z} state type selection for ZeroPoleGain (issue #371)
     s = rand(30) + im * rand(30)
@@ -259,7 +261,8 @@ end
 # fftfilt/filt
 #
 
-@testset "fftfilt $xlen/$blen" for xlen in 2 .^ (7:18) .- 1, blen in 2 .^ (1:6) .- 1
+# make sure to include blen > SMALL_FILT_CUTOFF
+@testset "fftfilt $xlen/$blen" for xlen in 2 .^ (7:18) .- 1, blen in 2 .^ (1:7) .- 1
     b = randn(blen)
     for x in (rand(xlen), rand(xlen, 2))
         out = similar(x)