From 5b5beb18fd28d55aecbb71e6223683b7e92a1c60 Mon Sep 17 00:00:00 2001
From: Zeng Fung Liew <zengfung.liew@gmail.com>
Date: Mon, 29 Nov 2021 23:11:52 -0800
Subject: [PATCH 1/3] Split LDB module to multiple files. Energy map functions
 now compatible with 2D signals.

---
 src/mod/LDB.jl           | 591 +--------------------------------------
 src/mod/ldb_energymap.jl | 204 ++++++++++++++
 src/mod/ldb_measures.jl  | 382 +++++++++++++++++++++++++
 3 files changed, 589 insertions(+), 588 deletions(-)
 create mode 100644 src/mod/ldb_energymap.jl
 create mode 100644 src/mod/ldb_measures.jl

diff --git a/src/mod/LDB.jl b/src/mod/LDB.jl
index 10d944b..cce0de5 100644
--- a/src/mod/LDB.jl
+++ b/src/mod/LDB.jl
@@ -31,8 +31,7 @@ export
     inverse_transform,
     change_nfeatures
 
-using AverageShiftedHistograms,
-      LinearAlgebra,
+using LinearAlgebra,
       Wavelets,
       Parameters,
       Statistics,
@@ -42,592 +41,8 @@ using ..Utils,
       ..DWT,
       ..BestBasis
 
-
-
-## ENERGY MAPS
-"""
-Energy map for Local Discriminant Basis. Current available types are:
-- [`TimeFrequency`](@ref)
-- [`ProbabilityDensity`](@ref)
-"""
-abstract type EnergyMap end
-
-@doc raw"""
-    TimeFrequency <: EnergyMap
-
-An energy map based on time frequencies, a measure based on the differences of 
-derived quantities from projection ``Z_i``, such as mean class energies or 
-cumulants.
-
-**See also:** [`EnergyMap`](@ref), [`ProbabilityDensity`](@ref),
-    [`Signatures`](@ref)
-"""
-struct TimeFrequency <: EnergyMap end
-
-@doc raw"""
-    ProbabilityDensity <: EnergyMap
-
-An energy map based on probability density, a measure based on the differences 
-among the pdfs of ``Z_i``. Since we do not know the true density functions of
-the coefficients, the PDFs are estimated using the Average Shifted Histogram
-(ASH).
-
-**See also:** [`EnergyMap`](@ref), [`TimeFrequency`](@ref), [`Signatures`](@ref)
-"""
-struct ProbabilityDensity <: EnergyMap end
-
-@doc raw"""
-    Signatures <: EnergyMap
-
-An energy map based on signatures, a measure that uses the Earth Mover's
-Distance (EMD) to compute the discriminating  power of a coordinate. Signatures
-provide us with a fully data-driven representation, which can be efficiently
-used with EMD. This representation is more efficient than a histogram and is
-able to represent complex data structure with fewer samples.
-
-Here, a signature for the coefficients in the ``j``-th level, ``k``-th node,
-``l``-th index of class ``c`` is defined as
-
-``s_{j,k,l}^{(c)} = \{(\alpha_{i;j,k,l}^{(c)}, w_{i;j,k,l}^{(c)})\}_{i=1}^{N_c}``
-
-where ``\alpha_{i;j,k,l}^{(c)}`` and ``w_{i;j,k,l}^{(c)}`` are the expansion 
-coefficients and weights at location ``(j,k,l)`` for signal ``i`` of class ``c``
-respectively. Currently, the two valid types of weights are `:equal` and `:pdf`.
-
-# Argumemts
-- `weight::Symbol`: Type of weight to be used to compute ``w_{i;j,k,l}^{(c)}``.
-    Available methods are `:equal` and `pdf`. Default is set to `:equal`.
-
-**See also:** [`EnergyMap`](@ref), [`TimeFrequency`](@ref),
-    [`ProbabilityDensity`](@ref)
-"""
-struct Signatures <: EnergyMap 
-    weight::Symbol
-    Signatures(weight=:equal) = weight ∈ [:equal, :pdf] ? new(weight) : 
-        error("Invalid weight type. Valid weight types are :equal and :pdf.")
-end
-
-"""
-    energy_map(Xw, y, method)
-
-Returns the Time Frequency Energy map or the Probability Density Energy map
-depending on the input `method` (`TimeFrequency()` or `ProbabilityDensity()`).
-
-**See also:** [`EnergyMap`](@ref). [`TimeFrequency`](@ref), 
-    [`ProbabilityDensity`](@ref)
-"""
-function energy_map(Xw::AbstractArray{S,3}, y::AbstractVector{T}, 
-        method::TimeFrequency) where {S<:Number, T}
-
-    # basic summary of data
-    c = unique(y)       # unique classes
-    nc = length(c)      # number of classes
-    Ny = length(y)
-    n, L, Nx = size(Xw)
-    
-    # parameter checking
-    @assert Nx == Ny
-    @assert nc > 1
-    @assert isdyadic(n)
-    @assert 1 <= L-1 <= maxtransformlevels(n)
-
-    # construct normalized energy map for each class
-    Γ = Array{Float64, 3}(undef, (n,L,nc))
-    @inbounds begin
-        for (i,cᵢ) in enumerate(c)
-            idx = findall(yᵢ -> yᵢ==cᵢ, y)
-            x = Xw[:,1,idx]
-            norm_sum = sum(mapslices(xᵢ -> norm(xᵢ,2)^2, x, dims = 1))
-            en = sum(Xw[:,:,idx].^2, dims=3)
-            Γ[:,:,i] = en / norm_sum
-        end
-    end
-    
-    return Γ
-end
-
-function energy_map(Xw::AbstractArray{S,3}, y::AbstractVector{T},
-        method::ProbabilityDensity) where {S<:Number, T}
-
-    # basic summary of data
-    c = unique(y)       # unique classes
-    nc = length(c)      # number of classes
-    Ny = length(y)
-    n, L, Nx = size(Xw)
-    
-    # parameter checking
-    @assert Nx == Ny
-    @assert nc > 1
-    @assert isdyadic(n)
-    @assert 1 <= L-1 <= maxtransformlevels(n)
-
-    # construct empirical probability density for each coefficent of each class
-    nbins = ceil(Int, (30*Nx)^(1/5)) # number of bins/histogram
-    mbins = ceil(Int, 100/nbins)     # number of histograms M/nbins, M=100 is arbitrary
-    Γ = Array{Float64,4}(undef, (n, L, (nbins+1)*mbins, nc))
-    @inbounds begin
-        for (i,cᵢ) in enumerate(c)      # iterate over each class
-            idx = findall(yᵢ -> yᵢ==cᵢ, y)
-            xw = Xw[:,:,idx]            # wavelet packet for class cᵢ
-            for j in axes(xw,1)
-                for k in axes(xw,2)
-                    z = @view Xw[j,k,:] # coefficients at (j,k) for all signals
-                    zᵢ = xw[j,k,:]      # coefficients at (j,k) for cᵢ signals
-                    
-                    # ash parameter setup
-                    σ = std(z)
-                    s = 0.5
-                    δ = (maximum(z)-minimum(z)+σ)/((nbins+1)*mbins-1)
-                    rng = range(minimum(z)-s*σ, step=δ, length=(nbins+1)*mbins)
-                
-                    # empirical pdf
-                    epdf = ash(zᵢ, rng=rng, m=mbins, kernel=Kernels.triangular)
-                    _, Γ[j,k,:,i] = xy(epdf)
-                end
-            end
-        end
-    end
-
-    return Γ
-end
-
-function energy_map(Xw::AbstractArray{S,3}, y::AbstractVector{T},
-        method::Signatures) where {S<:Number, T}
-
-    # basic summary of data
-    c = unique(y)       # unique classes
-    nc = length(c)      # number of classes
-    Ny = length(y)
-    n, L, Nx = size(Xw)
-    
-    # parameter checking
-    @assert Nx == Ny
-    @assert nc > 1
-    @assert isdyadic(n)
-    @assert 1 <= L-1 <= maxtransformlevels(n)
-
-    # form signatures in a structure of a named tuple
-    Γ = method.weight==:equal ? 
-        Array{NamedTuple{(:coef, :weight), Tuple{Array{S}, Float64}},1}(undef, nc) :      # equal weights
-        Array{NamedTuple{(:coef, :weight), Tuple{Array{S}, Array{Float64}}},1}(undef, nc) # pdf-based weights
-    for (i, cᵢ) in enumerate(c)
-        idx = findall(yᵢ -> yᵢ==cᵢ, y)
-        xw = Xw[:,:,idx]            # wavelet packet for class cᵢ
-        if method.weight == :equal
-            Nc = length(idx)
-            w = 1/Nc
-        else
-            Nc = length(idx)
-            nbins = ceil(Int, (30*Nx)^(1/5)) # number of bins/histogram
-            mbins = ceil(Int, 100/nbins)     # number of histograms M/nbins, M=100 is arbitrary
-
-            # compute weights
-            w = Array{Float64,3}(undef, (n,L,Nc))
-            for j in axes(xw,1)
-                for k in axes(xw,2)
-                    z = @view xw[j,k,:] # coefficients at (j,k) for cᵢ signals
-                    
-                    # ash parameter setup
-                    σ = std(z)
-                    s = 0.5
-                    δ = (maximum(z)-minimum(z)+σ)/((nbins+1)*mbins-1)
-                    rng = range(minimum(z)-s*σ, step=δ, length=(nbins+1)*mbins)
-                
-                    # empirical pdf
-                    epdf = ash(z, rng=rng, m=mbins, kernel=Kernels.triangular)
-                    for l in 1:Nc
-                        w[j,k,l] = AverageShiftedHistograms.pdf(epdf, z[l])
-                    end
-                end
-            end
-        end
-        Γ[i] = (coef = xw, weight = w)
-    end
-    return Γ
-end
-
-## DISCRIMINANT MEASURES
-"""
-Discriminant measure for Local Discriminant Basis. Current available subtypes
-are:
-- [`ProbabilityDensityDM`](@ref)
-- [`SignaturesDM`](@ref)
-"""
-abstract type DiscriminantMeasure end
-
-"""
-Discriminant measure for Probability Density and Time Frequency based energy 
-maps. Current available measures are:
-- [`AsymmetricRelativeEntropy`](@ref)
-- [`SymmetricRelativeEntropy`](@ref)
-- [`LpDistance`](@ref)
-- [`HellingerDistance`](@ref)
-"""
-abstract type ProbabilityDensityDM <: DiscriminantMeasure end
-
-"""
-Discriminant measure for Signatures based energy maps. Current available
-measures are:
-- [`EarthMoverDistance`](@ref)
-"""
-abstract type SignaturesDM <: DiscriminantMeasure end
-
-@doc raw"""
-    AsymmetricRelativeEntropy <: ProbabilityDensityDM
-
-Asymmetric Relative Entropy discriminant measure for the Probability Density and
-Time Frequency based energy maps. This measure is also known as cross entropy 
-and Kullback-Leibler divergence.
-
-Equation: ``D(p,q) = \sum p(x) \log \frac{p(x)}{q(x)}``
-"""
-struct AsymmetricRelativeEntropy <: ProbabilityDensityDM end
-
-@doc raw"""
-    SymmetricRelativeEntropy <: ProbabilityDensityDM
-
-Symmetric Relative Entropy discriminant measure for the Probability Density and 
-Time Frequency energy maps. Similar idea to the Asymmetric Relative Entropy, but 
-this aims to make the measure more symmetric.
-
-Equation: Denote the Asymmetric Relative Entropy as ``D_A(p,q)``, then
-
-``D(p,q) = D_A(p,q) + D_A(q,p) = \sum p(x) \log \frac{p(x)}{q(x)} + q(x) \log \frac{q(x)}{p(x)}``
-"""
-struct SymmetricRelativeEntropy <: ProbabilityDensityDM end
-
-@doc raw"""
-    LpDistance <: ProbabilityDensityDM
-
-``\ell^p`` Distance discriminant measure for the Probability Density and Time 
-Frequency based energy maps. The default ``p`` value is set to 2.
-
-Equation: ``W(q,r) = ||q-r||_p^p = \sum_{i=1}^n (q_i - r_i)^p``
-"""
-@with_kw struct LpDistance <: ProbabilityDensityDM 
-    p::Number = 2
-end
-
-@doc raw"""
-    HellingerDistance <: ProbabilityDensityDM
-
-Hellinger Distance discriminant measure for the Probability Density energy 
-map.
-
-Equation: ``H(p,q) = \sum_{i=1}^n (\sqrt{p_i} - \sqrt{q_i})^2``
-"""
-struct HellingerDistance <: ProbabilityDensityDM end
-
-@doc raw"""
-    EarthMoverDistance <: SignaturesDM
-
-Earth Mover Distance discriminant measure for the Signatures energy map.
-
-Equation: 
-``E(P,Q) = \frac{\sum_{k=1}^{m+n+1} |\hat p_k - \hat q_k| (r_{k+1} - r_k)}{w_\Sigma}``
-
-where ``r_1, r_2, \ldots, r_{m+n}`` is the sorted list of ``p_1, \ldots, p_m, 
-q_1, \ldots, q_n`` and ``\hat p_k = \sum_{p_i \leq r_k} w_{p_i}``, 
-``\hat q_k = \sum_{q)i \leq r_k} w_{q_i}``.
-"""
-struct EarthMoverDistance <: SignaturesDM end
-
-"""
-    discriminant_measure(Γ, dm)
-
-Returns the discriminant measure of each node calculated from the energy maps.
-"""
-function discriminant_measure(Γ::AbstractArray{T}, 
-                              dm::ProbabilityDensityDM) where T<:Number
-
-    # basic summary of data
-    @assert 3 <= ndims(Γ) <= 4
-    n = size(Γ,1)
-    L = size(Γ,2)
-    C = size(Γ)[end]
-    @assert C > 1       # ensure more than 1 class
-
-    D = zeros(Float64, (n,L))
-    @inbounds begin
-        for i in 1:(C-1)
-            for j in (i+1):C
-                if ndims(Γ) == 3
-                    D += discriminant_measure(Γ[:,:,i], Γ[:,:,j], dm)
-                else
-                    D += discriminant_measure(Γ[:,:,:,i], Γ[:,:,:,j], dm)
-                end
-            end
-        end
-    end
-
-    return D
-end
-
-# discriminant measure for EMD
-function discriminant_measure(Γ::AbstractArray{NamedTuple{(:coef, :weight), Tuple{S1,S2}},1},
-                              dm::SignaturesDM) where 
-                             {S1<:Array{<:Number}, 
-                              S2<:Union{AbstractFloat,Array{<:AbstractFloat}}}
-    # Basic summary of data
-    C = length(Γ)
-    @assert C > 1
-    n = size(Γ[1][:coef], 1)
-    L = size(Γ[1][:coef], 2)
-
-    D = zeros(Float64, (n,L))
-    @inbounds begin
-        for i in 1:(C-1)
-            for j in (i+1):C
-                D += discriminant_measure(Γ[i], Γ[j], dm)
-            end
-        end
-    end
-
-    return D
-end
-
-# discriminant measure between 2 energy maps
-function discriminant_measure(Γ₁::AbstractArray{T}, Γ₂::AbstractArray{T}, 
-        dm::ProbabilityDensityDM) where T<:Number
-
-    # parameter checking and basic summary
-    @assert 2 <= ndims(Γ₁) <= 3
-    @assert size(Γ₁) == size(Γ₂)
-    n = size(Γ₁,1)
-    L = size(Γ₁,2)
-
-    D = Array{T, 2}(undef, (n,L))
-    @inbounds begin
-        for i in axes(D,1)
-            for j in axes(D,2)
-                if ndims(Γ₁) == 2       # time frequency energy map case
-                    D[i,j] = discriminant_measure(Γ₁[i,j], Γ₂[i,j], dm)
-                else                    # probability density energy map case
-                    for k in axes(Γ₁,3)
-                        D[i,j] += discriminant_measure(Γ₁[i,j,k], Γ₂[i,j,k], dm)
-                    end
-                end
-            end
-        end
-    end
-
-    return D
-end
-
-# discriminant measure between 2 nergy maps for EMD
-function discriminant_measure(Γ₁::NamedTuple{(:coef, :weight), Tuple{S1,S2}}, 
-        Γ₂::NamedTuple{(:coef, :weight), Tuple{S1,S2}}, 
-        dm::SignaturesDM) where
-        {S1<:Array{T} where T<:Number, 
-        S2<:Union{AbstractFloat,Array{<:AbstractFloat}}}
-
-    # parameter checking and basic summary
-    n = size(Γ₁[:coef],1)
-    L = size(Γ₁[:coef],2)
-    @assert n == size(Γ₁[:coef],1) == size(Γ₂[:coef],1)
-    @assert L == size(Γ₁[:coef],2) == size(Γ₂[:coef],2)
-
-    D = Array{Float64,2}(undef, (n,L))
-    for i in 1:n
-        for j in 1:L
-            # signatures
-            if typeof(Γ₁[:weight]) <: AbstractFloat # equal weight
-                P = (coef=Γ₁[:coef][i,j,:], weight=Γ₁[:weight])
-                Q = (coef=Γ₂[:coef][i,j,:], weight=Γ₂[:weight])
-            else                                    # probability density weight
-                P = (coef=Γ₁[:coef][i,j,:], weight=Γ₁[:weight][i,j,:])
-                Q = (coef=Γ₂[:coef][i,j,:], weight=Γ₂[:weight][i,j,:])
-            end
-            D[i,j] = discriminant_measure(P, Q, dm)
-        end
-    end
-    return D
-end
-
-# Asymmetric Relative Entropy
-function discriminant_measure(p::T, q::T, dm::AsymmetricRelativeEntropy) where 
-        T<:Number
-
-    # parameter checking
-    @assert p >= 0 && q >= 0
-
-    if p == 0 || q == 0
-        return 0
-    else
-        return p * log(p/q)
-    end
-end
-
-# Symmetric Relative Entropy
-function discriminant_measure(p::T, q::T, dm::SymmetricRelativeEntropy) where 
-        T<:Number
-
-    return discriminant_measure(p, q, AsymmetricRelativeEntropy()) + 
-        discriminant_measure(q, p, AsymmetricRelativeEntropy())
-end
-
-# Hellinger Distance
-function discriminant_measure(p::T, q::T, dm::HellingerDistance) where T<:Number
-    return (sqrt(p) - sqrt(q))^2
-end
-
-# Lᵖ Distance
-function discriminant_measure(p::T, q::T, dm::LpDistance) where T<:Number
-    return (p - q)^dm.p
-end
-
-# Earth Mover Distance
-function discriminant_measure(P::NamedTuple{(:coef, :weight), Tuple{S1,S2}}, 
-        Q::NamedTuple{(:coef, :weight), Tuple{S1, S2}}, 
-        dm::EarthMoverDistance) where
-        {S1<:Vector{T} where T<:Number, 
-        S2<:Union{AbstractFloat,Vector{<:AbstractFloat}}}
-
-    # assigning tuple signatures into coef and weight
-    p, w_p = P
-    q, w_q = Q
-
-    # sort signature values
-    p_order = sortperm(p)
-    p = p[p_order]
-    w_p = typeof(w_p)<:AbstractFloat ? repeat([w_p], length(p)) : w_p[p_order]
-    q_order = sortperm(q)
-    q = q[q_order]
-    w_q = typeof(w_q)<:AbstractFloat ? repeat([w_q], length(q)) : w_q[q_order]
-
-    # concatenate p and q, then sort them
-    r = [p; q]
-    sort!(r)
-
-    # compute emd
-    n = length(r)
-    emd = 0
-    for i in 1:(n-1)
-        # get total weight of p and q less than or equal to r[i]
-        p_less = p .<= r[i]
-        ∑w_p = sum(w_p[p_less])
-        q_less = q .<= r[i]
-        ∑w_q = sum(w_q[q_less])
-        # add into emd
-        emd += abs(∑w_p - ∑w_q) * (r[i+1] - r[i])
-    end
-    emd /= (sum(w_p) + sum(w_q))
-    return emd
-end
-
-## DISCRIMINATION POWER
-"""
-Discriminant Power measure for the Local Discriminant Basis. Current available
-measures are
-- [`BasisDiscriminantMeasure`](@ref)
-- [`FishersClassSeparability`](@ref)
-- [`RobustFishersClassSeparability`](@ref)
-"""
-abstract type DiscriminantPower end
-
-"""
-    BasisDiscriminantMeasure <: DiscriminantPower
-
-This is the discriminant measure of a single basis function computed in a 
-previous step to construct the energy maps.
-"""
-struct BasisDiscriminantMeasure <: DiscriminantPower end
-
-@doc raw"""
-    FishersClassSeparability <: DiscriminantPower
-
-The Fisher's class separability of the expansion coefficients in the basis 
-function.
-
-Equation: ``\frac{\sum_{c=1}^C \pi_c({\rm mean}_i(\alpha_{\lambda,i}^{(c)}) - {\rm mean}_c({\rm mean}_i(\alpha_{\lambda,i}^{(c)})))^2}{\sum_{c=1}^C \pi_c {\rm var}_i(\alpha_{\lambda,i}^{(c)})}``
-"""
-struct FishersClassSeparability <: DiscriminantPower end
-
-@doc raw"""
-    RobustFishersClassSeparability <: DiscriminantPower
-
-The robust version of Fisher's class separability of the expansion coefficients 
-in the basis function.
-
-Equation: ``\frac{\sum_{c=1}^C \pi_c({\rm med}_i(\alpha_{\lambda,i}^{(c)}) - {\rm med}_c({\rm med}_i(\alpha_{\lambda,i}^{(c)})))^2}{\sum_{c=1}^C \pi_c {\rm mad}_i(\alpha_{\lambda,i}^{(c)})}``
-"""
-struct RobustFishersClassSeparability <: DiscriminantPower end
-
-"""
-    discriminant_power(D, tree, dp)
-
-Returns the discriminant power of each leaf from the local discriminant basis
-(LDB) tree. 
-"""
-function discriminant_power(D::AbstractArray{T,2}, tree::BitVector, 
-        dp::BasisDiscriminantMeasure) where T<:Number
-
-    @assert length(tree) == size(D,1) - 1
-
-    power = getbasiscoef(D, tree)
-    order = sortperm(power, rev = true)
-
-    return (power, order)
-end
-
-function discriminant_power(coefs::AbstractArray{T,2}, 
-                            y::AbstractVector{S}, 
-                            dp::FishersClassSeparability) where {T<:Number, S}
-
-    n = size(coefs,1)                             # signal length
-    
-    classes = unique(y)
-    C = length(coefs)                             # number of classes
-    
-    Nᵢ = Array{T,1}(undef, C)
-    Eαᵢ = Array{T,2}(undef, (n,C))                # mean of each entry
-    Varαᵢ = Array{T,2}(undef, (n,C))              # variance of each entry
-    @inbounds begin
-        for (i, c) in enumerate(classes)
-            idx = findall(yᵢ -> yᵢ == c, y)
-            Nᵢ[i] = length(idx)
-            Eαᵢ[:,i] = mean(coefs[:, idx], dims = 2)
-            Varαᵢ[:,i] = var(coefs[:, idx], dims = 2)
-        end
-    end
-    Eα = mean(Eαᵢ, dims = 2)                      # overall mean of each entry
-    pᵢ = Nᵢ / sum(Nᵢ)                             # proportions of each class
-
-    power = ((Eαᵢ - (Eα .* Eαᵢ)).^2 * pᵢ) ./ (Varαᵢ * pᵢ)
-    order = sortperm(power, rev = true)
-
-    return (power, order)
-end
-
-function discriminant_power(coefs::AbstractArray{T,2}, 
-                            y::AbstractVector{S},
-                            dp::RobustFishersClassSeparability) where {T<:Number,S}
-
-    n = size(coefs,1)                            # signal length
-    
-    classes = unique(y)
-    C = length(classes)
-
-    Nᵢ = Array{T,1}(undef, C)
-    Medαᵢ = Array{T,2}(undef, (n,C))             # mean of each entry
-    Madαᵢ = Array{T,2}(undef, (n,C))             # variance of each entry
-    @inbounds begin
-        for (i, c) in enumerate(classes)
-            idx = findall(yᵢ -> yᵢ == c, y)
-            Nᵢ[i] = length(idx)
-            Medαᵢ[:,i] = median(coefs[:, idx], dims = 2)
-            Madαᵢ[:,i] = mapslices(x -> mad(x, normalize = false), coefs[:, idx], 
-                dims = 2)
-        end
-    end
-    Medα = median(Medαᵢ, dims = 2)               # overall mean of each entry
-    pᵢ = Nᵢ / sum(Nᵢ)                            # proportions of each class
-
-    power = ((Medαᵢ - (Medα .* Medαᵢ)).^2 * pᵢ) ./ (Madαᵢ * pᵢ)
-    order = sortperm(power, rev = true)
-
-    return (power, order)
-end
+include("ldb_energymap.jl")
+include("ldb_measures.jl")
 
 ## LOCAL DISCRIMINANT BASIS
 """
diff --git a/src/mod/ldb_energymap.jl b/src/mod/ldb_energymap.jl
new file mode 100644
index 0000000..62507c4
--- /dev/null
+++ b/src/mod/ldb_energymap.jl
@@ -0,0 +1,204 @@
+import AverageShiftedHistograms: ash, xy, pdf
+
+## ---------- ENERGY MAPS ----------
+"""
+Energy map for Local Discriminant Basis. Current available types are:
+- [`TimeFrequency`](@ref)
+- [`ProbabilityDensity`](@ref)
+"""
+abstract type EnergyMap end
+
+@doc raw"""
+    TimeFrequency <: EnergyMap
+
+An energy map based on time frequencies, a measure based on the differences of 
+derived quantities from projection ``Z_i``, such as mean class energies or 
+cumulants.
+
+**See also:** [`EnergyMap`](@ref), [`ProbabilityDensity`](@ref),
+    [`Signatures`](@ref)
+"""
+struct TimeFrequency <: EnergyMap end
+
+@doc raw"""
+    ProbabilityDensity <: EnergyMap
+
+An energy map based on probability density, a measure based on the differences 
+among the pdfs of ``Z_i``. Since we do not know the true density functions of
+the coefficients, the PDFs are estimated using the Average Shifted Histogram
+(ASH).
+
+**See also:** [`EnergyMap`](@ref), [`TimeFrequency`](@ref), [`Signatures`](@ref)
+"""
+struct ProbabilityDensity <: EnergyMap end
+
+@doc raw"""
+    Signatures <: EnergyMap
+
+An energy map based on signatures, a measure that uses the Earth Mover's Distance (EMD) to
+compute the discriminating  power of a coordinate. Signatures provide us with a fully
+data-driven representation, which can be efficiently used with EMD. This representation is
+more efficient than a histogram and is able to represent complex data structure with fewer
+samples.
+
+Here, a signature for the coefficients in the ``j``-th level, ``k``-th node, ``l``-th index
+of class ``c`` is defined as
+
+``s_{j,k,l}^{(c)} = \{(\alpha_{i;j,k,l}^{(c)}, w_{i;j,k,l}^{(c)})\}_{i=1}^{N_c}``
+
+where ``\alpha_{i;j,k,l}^{(c)}`` and ``w_{i;j,k,l}^{(c)}`` are the expansion coefficients
+and weights at location ``(j,k,l)`` for signal ``i`` of class ``c`` respectively. Currently,
+the two valid types of weights are `:equal` and `:pdf`.
+
+# Argumemts
+- `weight::Symbol`: Type of weight to be used to compute ``w_{i;j,k,l}^{(c)}``. Available
+    methods are `:equal` and `:pdf`. Default is set to `:equal`.
+
+**See also:** [`EnergyMap`](@ref), [`TimeFrequency`](@ref), [`ProbabilityDensity`](@ref)
+"""
+struct Signatures <: EnergyMap 
+    weight::Symbol
+    # Constructor
+    Signatures(weight=:equal) = weight ∈ [:equal, :pdf] ? new(weight) : 
+        throw(ValueError("Invalid weight type. Valid weight types are :equal and :pdf."))
+end
+
+"""
+    energy_map(Xw, y, method)
+
+Returns the Time Frequency Energy map or the Probability Density Energy map
+depending on the input `method` (`TimeFrequency()` or `ProbabilityDensity()`).
+
+**See also:** [`EnergyMap`](@ref). [`TimeFrequency`](@ref), 
+    [`ProbabilityDensity`](@ref)
+"""
+function energy_map(Xw::AbstractArray{S}, y::AbstractVector{T}, method::TimeFrequency) where 
+                   {S<:AbstractFloat, T}
+    # --- Sanity check ---
+    N = ndims(Xw)
+    c = unique(y)       # unique classes
+    nc = length(c)      # number of classes
+    Ny = length(y)
+    sz = size(Xw)[1:(N-2)]
+    L = size(Xw, N-1)
+    Nx = size(Xw, N)
+    # parameter checking
+    @assert 3 ≤ N ≤ 4
+    @assert Nx == Ny
+    @assert nc > 1
+    @assert 1 ≤ L-1 ≤ maxtransformlevels(min(sz...))
+
+    # --- Construct normalized energy map for each class ---
+    Γ = Array{S, N}(undef, (sz...,L,nc))
+    map_size = prod([sz...,L])                      # number of elements per class of energy map
+    slice_dim = N==3 ? [1] : [1,2]                  # dimension for norm computation of each slice of signal
+    for (i,cᵢ) in enumerate(c)
+        idx = findall(yᵢ -> yᵢ==cᵢ, y)
+        @inbounds x = N==3 ? Xw[:,1,idx] : Xw[:,:,1,idx]      # Set of original signals of class cᵢ
+        @inbounds xw = N==3 ? Xw[:,:,idx] : Xw[:,:,:,idx]     # Set of decomposed signals of class cᵢ
+        norm_sum = sum(mapslices(xᵢ -> norm(xᵢ,2)^2, x, dims = slice_dim))
+        en = sum(xw.^2, dims=N)
+        start_index = (i-1)*map_size                # start_index for current class in energy map
+        for j in eachindex(en)
+            @inbounds Γ[start_index+j] = en[j] / norm_sum
+        end
+    end
+    return Γ
+end
+
+function energy_map(Xw::AbstractArray{S}, y::AbstractVector{T}, 
+                    method::ProbabilityDensity) where {S<:AbstractFloat, T}
+    # --- Sanity check ---
+    N = ndims(Xw)
+    c = unique(y)       # unique classes
+    nc = length(c)      # number of classes
+    Ny = length(y)
+    sz = size(Xw)[1:(N-2)]
+    L = size(Xw, N-1)
+    Nx = size(Xw, N)
+    # parameter checking
+    @assert 3 ≤ N ≤ 4
+    @assert Nx == Ny
+    @assert nc > 1
+    @assert 1 ≤ L-1 ≤ maxtransformlevels(min(sz...))
+    
+    # --- Construct empirical probability density for each coefficent of each class ---
+    nbins = ceil(Int, (30*Nx)^(1/5))    # number of bins/histogram
+    mbins = ceil(Int, 100/nbins)        # number of histograms M/nbins, M=100 is arbitrary
+    pdf_len = (nbins+1)*mbins           # vector length of empirical pdf
+    slice_size = prod([sz...,L])        # number of elements per class of energy map slice
+    map_size = prod([sz...,L,pdf_len])  # dimension for norm computation of each slice of signal
+    Γ = Array{Float64,N+1}(undef, (sz..., L, pdf_len, nc))
+    for (i,cᵢ) in enumerate(c)      # iterate over each class
+        idx = findall(yᵢ -> yᵢ==cᵢ, y)
+        @inbounds xw = N==3 ? Xw[:,:,idx] : Xw[:,:,:,idx]  # wavelet packet for class cᵢ
+        for j in 1:slice_size
+            @inbounds z = @view Xw[j:slice_size:end]    # Coefficients at index j for all signals
+            @inbounds zᵢ = @view xw[j:slice_size:end]   # Coefficients at index j for signals in class cᵢ
+            # ASH parameter setup
+            σ = std(z)
+            s = 0.5
+            δ = (maximum(z)-minimum(z)+σ)/((nbins+1)*mbins-1)
+            rng = range(minimum(z)-s*σ, step=δ, length=(nbins+1)*mbins)
+            # Empirical PDF
+            epdf = ash(zᵢ, rng=rng, m=mbins, kernel=Kernels.triangular)
+            start_index = (i-1)*map_size    # start_index for current class in energy map
+            end_index = i*map_size          # end index for current class in energy map
+            @inbounds _, Γ[(start_index+j):slice_size:end_index] = xy(epdf) # copy distribution into Γ
+        end
+    end
+    return Γ
+end
+
+function energy_map(Xw::AbstractArray{S}, y::AbstractVector{T}, method::Signatures) where 
+                   {S<:AbstractFloat, T}
+    # --- Sanity check ---
+    N = ndims(Xw)
+    c = unique(y)       # unique classes
+    nc = length(c)      # number of classes
+    Ny = length(y)
+    sz = size(Xw)[1:(N-2)]
+    L = size(Xw, N-1)
+    Nx = size(Xw, N)
+    # parameter checking
+    @assert 3 ≤ N ≤ 4
+    @assert Nx == Ny
+    @assert nc > 1
+    @assert 1 ≤ L-1 ≤ maxtransformlevels(min(sz...))
+    @assert method.weight ∈ [:equal, :pdf]
+
+    # --- Form signatures in a structure of a named tuple ---
+    Γ = method.weight==:equal ? 
+        Vector{NamedTuple{(:coef, :weight), Tuple{Array{S}, S}}}(undef, nc) :      # equal weights
+        Vector{NamedTuple{(:coef, :weight), Tuple{Array{S}, Array{S}}}}(undef, nc) # pdf-based weights
+    slice_size = prod([sz...,L])        # number of elements per class of energy map slice
+    for (i, cᵢ) in enumerate(c)
+        idx = findall(yᵢ -> yᵢ==cᵢ, y)
+        xw = N==3 ? Xw[:,:,idx] : Xw[:,:,:,idx] # wavelet packet for class cᵢ
+        Nc = length(idx)                        # number of data in class cᵢ
+        if method.weight == :equal
+            w = 1/Nc
+        else
+            nbins = ceil(Int, (30*Nx)^(1/5)) # number of bins/histogram
+            mbins = ceil(Int, 100/nbins)     # number of histograms M/nbins, M=100 is arbitrary
+
+            # compute weights
+            w = Array{S,N}(undef, (sz...,L,Nc))
+            for j in 1:slice_size
+                z = @view xw[j:slice_size:end]  # coefficients at j for cᵢ signals
+                # ASH parameter setup
+                σ = std(z); s = 0.5
+                δ = (maximum(z)-minimum(z)+σ)/((nbins+1)*mbins-1)
+                rng = range(minimum(z)-s*σ, step=δ, length=(nbins+1)*mbins)
+                # Empirical PDF
+                epdf = ash(z, rng=rng, m=mbins, kernel=Kernels.triangular)
+                for k in 1:Nc
+                    start_index = (k-1)*slice_size
+                    w[start_index+j] = pdf(epdf, z[k])
+                end
+            end
+        end
+        Γ[i] = (coef = xw, weight = w)
+    end
+    return Γ
+end
diff --git a/src/mod/ldb_measures.jl b/src/mod/ldb_measures.jl
new file mode 100644
index 0000000..9b4332e
--- /dev/null
+++ b/src/mod/ldb_measures.jl
@@ -0,0 +1,382 @@
+## DISCRIMINANT MEASURES
+"""
+Discriminant measure for Local Discriminant Basis. Current available subtypes
+are:
+- [`ProbabilityDensityDM`](@ref)
+- [`SignaturesDM`](@ref)
+"""
+abstract type DiscriminantMeasure end
+
+"""
+Discriminant measure for Probability Density and Time Frequency based energy 
+maps. Current available measures are:
+- [`AsymmetricRelativeEntropy`](@ref)
+- [`SymmetricRelativeEntropy`](@ref)
+- [`LpDistance`](@ref)
+- [`HellingerDistance`](@ref)
+"""
+abstract type ProbabilityDensityDM <: DiscriminantMeasure end
+
+"""
+Discriminant measure for Signatures based energy maps. Current available
+measures are:
+- [`EarthMoverDistance`](@ref)
+"""
+abstract type SignaturesDM <: DiscriminantMeasure end
+
+@doc raw"""
+    AsymmetricRelativeEntropy <: ProbabilityDensityDM
+
+Asymmetric Relative Entropy discriminant measure for the Probability Density and
+Time Frequency based energy maps. This measure is also known as cross entropy 
+and Kullback-Leibler divergence.
+
+Equation: ``D(p,q) = \sum p(x) \log \frac{p(x)}{q(x)}``
+"""
+struct AsymmetricRelativeEntropy <: ProbabilityDensityDM end
+
+@doc raw"""
+    SymmetricRelativeEntropy <: ProbabilityDensityDM
+
+Symmetric Relative Entropy discriminant measure for the Probability Density and 
+Time Frequency energy maps. Similar idea to the Asymmetric Relative Entropy, but 
+this aims to make the measure more symmetric.
+
+Equation: Denote the Asymmetric Relative Entropy as ``D_A(p,q)``, then
+
+``D(p,q) = D_A(p,q) + D_A(q,p) = \sum p(x) \log \frac{p(x)}{q(x)} + q(x) \log \frac{q(x)}{p(x)}``
+"""
+struct SymmetricRelativeEntropy <: ProbabilityDensityDM end
+
+@doc raw"""
+    LpDistance <: ProbabilityDensityDM
+
+``\ell^p`` Distance discriminant measure for the Probability Density and Time 
+Frequency based energy maps. The default ``p`` value is set to 2.
+
+Equation: ``W(q,r) = ||q-r||_p^p = \sum_{i=1}^n (q_i - r_i)^p``
+"""
+@with_kw struct LpDistance <: ProbabilityDensityDM 
+    p::Number = 2
+end
+
+@doc raw"""
+    HellingerDistance <: ProbabilityDensityDM
+
+Hellinger Distance discriminant measure for the Probability Density energy 
+map.
+
+Equation: ``H(p,q) = \sum_{i=1}^n (\sqrt{p_i} - \sqrt{q_i})^2``
+"""
+struct HellingerDistance <: ProbabilityDensityDM end
+
+@doc raw"""
+    EarthMoverDistance <: SignaturesDM
+
+Earth Mover Distance discriminant measure for the Signatures energy map.
+
+Equation: 
+``E(P,Q) = \frac{\sum_{k=1}^{m+n+1} |\hat p_k - \hat q_k| (r_{k+1} - r_k)}{w_\Sigma}``
+
+where ``r_1, r_2, \ldots, r_{m+n}`` is the sorted list of ``p_1, \ldots, p_m, 
+q_1, \ldots, q_n`` and ``\hat p_k = \sum_{p_i \leq r_k} w_{p_i}``, 
+``\hat q_k = \sum_{q)i \leq r_k} w_{q_i}``.
+"""
+struct EarthMoverDistance <: SignaturesDM end
+
+"""
+    discriminant_measure(Γ, dm)
+
+Returns the discriminant measure of each node calculated from the energy maps.
+"""
+function discriminant_measure(Γ::AbstractArray{T}, 
+                              dm::ProbabilityDensityDM) where T<:Number
+
+    # basic summary of data
+    @assert 3 <= ndims(Γ) <= 4
+    n = size(Γ,1)
+    L = size(Γ,2)
+    C = size(Γ)[end]
+    @assert C > 1       # ensure more than 1 class
+
+    D = zeros(Float64, (n,L))
+    @inbounds begin
+        for i in 1:(C-1)
+            for j in (i+1):C
+                if ndims(Γ) == 3
+                    D += discriminant_measure(Γ[:,:,i], Γ[:,:,j], dm)
+                else
+                    D += discriminant_measure(Γ[:,:,:,i], Γ[:,:,:,j], dm)
+                end
+            end
+        end
+    end
+
+    return D
+end
+
+# discriminant measure for EMD
+function discriminant_measure(Γ::AbstractArray{NamedTuple{(:coef, :weight), Tuple{S1,S2}},1},
+                              dm::SignaturesDM) where 
+                             {S1<:Array{<:Number}, 
+                              S2<:Union{AbstractFloat,Array{<:AbstractFloat}}}
+    # Basic summary of data
+    C = length(Γ)
+    @assert C > 1
+    n = size(Γ[1][:coef], 1)
+    L = size(Γ[1][:coef], 2)
+
+    D = zeros(Float64, (n,L))
+    @inbounds begin
+        for i in 1:(C-1)
+            for j in (i+1):C
+                D += discriminant_measure(Γ[i], Γ[j], dm)
+            end
+        end
+    end
+
+    return D
+end
+
+# discriminant measure between 2 energy maps
+function discriminant_measure(Γ₁::AbstractArray{T}, Γ₂::AbstractArray{T}, 
+        dm::ProbabilityDensityDM) where T<:Number
+
+    # parameter checking and basic summary
+    @assert 2 <= ndims(Γ₁) <= 3
+    @assert size(Γ₁) == size(Γ₂)
+    n = size(Γ₁,1)
+    L = size(Γ₁,2)
+
+    D = Array{T, 2}(undef, (n,L))
+    @inbounds begin
+        for i in axes(D,1)
+            for j in axes(D,2)
+                if ndims(Γ₁) == 2       # time frequency energy map case
+                    D[i,j] = discriminant_measure(Γ₁[i,j], Γ₂[i,j], dm)
+                else                    # probability density energy map case
+                    for k in axes(Γ₁,3)
+                        D[i,j] += discriminant_measure(Γ₁[i,j,k], Γ₂[i,j,k], dm)
+                    end
+                end
+            end
+        end
+    end
+
+    return D
+end
+
+# discriminant measure between 2 nergy maps for EMD
+function discriminant_measure(Γ₁::NamedTuple{(:coef, :weight), Tuple{S1,S2}}, 
+        Γ₂::NamedTuple{(:coef, :weight), Tuple{S1,S2}}, 
+        dm::SignaturesDM) where
+        {S1<:Array{T} where T<:Number, 
+        S2<:Union{AbstractFloat,Array{<:AbstractFloat}}}
+
+    # parameter checking and basic summary
+    n = size(Γ₁[:coef],1)
+    L = size(Γ₁[:coef],2)
+    @assert n == size(Γ₁[:coef],1) == size(Γ₂[:coef],1)
+    @assert L == size(Γ₁[:coef],2) == size(Γ₂[:coef],2)
+
+    D = Array{Float64,2}(undef, (n,L))
+    for i in 1:n
+        for j in 1:L
+            # signatures
+            if typeof(Γ₁[:weight]) <: AbstractFloat # equal weight
+                P = (coef=Γ₁[:coef][i,j,:], weight=Γ₁[:weight])
+                Q = (coef=Γ₂[:coef][i,j,:], weight=Γ₂[:weight])
+            else                                    # probability density weight
+                P = (coef=Γ₁[:coef][i,j,:], weight=Γ₁[:weight][i,j,:])
+                Q = (coef=Γ₂[:coef][i,j,:], weight=Γ₂[:weight][i,j,:])
+            end
+            D[i,j] = discriminant_measure(P, Q, dm)
+        end
+    end
+    return D
+end
+
+# Asymmetric Relative Entropy
+function discriminant_measure(p::T, q::T, dm::AsymmetricRelativeEntropy) where 
+        T<:Number
+
+    # parameter checking
+    @assert p >= 0 && q >= 0
+
+    if p == 0 || q == 0
+        return 0
+    else
+        return p * log(p/q)
+    end
+end
+
+# Symmetric Relative Entropy
+function discriminant_measure(p::T, q::T, dm::SymmetricRelativeEntropy) where 
+        T<:Number
+
+    return discriminant_measure(p, q, AsymmetricRelativeEntropy()) + 
+        discriminant_measure(q, p, AsymmetricRelativeEntropy())
+end
+
+# Hellinger Distance
+function discriminant_measure(p::T, q::T, dm::HellingerDistance) where T<:Number
+    return (sqrt(p) - sqrt(q))^2
+end
+
+# Lᵖ Distance
+function discriminant_measure(p::T, q::T, dm::LpDistance) where T<:Number
+    return (p - q)^dm.p
+end
+
+# Earth Mover Distance
+function discriminant_measure(P::NamedTuple{(:coef, :weight), Tuple{S1,S2}}, 
+        Q::NamedTuple{(:coef, :weight), Tuple{S1, S2}}, 
+        dm::EarthMoverDistance) where
+        {S1<:Vector{T} where T<:Number, 
+        S2<:Union{AbstractFloat,Vector{<:AbstractFloat}}}
+
+    # assigning tuple signatures into coef and weight
+    p, w_p = P
+    q, w_q = Q
+
+    # sort signature values
+    p_order = sortperm(p)
+    p = p[p_order]
+    w_p = typeof(w_p)<:AbstractFloat ? repeat([w_p], length(p)) : w_p[p_order]
+    q_order = sortperm(q)
+    q = q[q_order]
+    w_q = typeof(w_q)<:AbstractFloat ? repeat([w_q], length(q)) : w_q[q_order]
+
+    # concatenate p and q, then sort them
+    r = [p; q]
+    sort!(r)
+
+    # compute emd
+    n = length(r)
+    emd = 0
+    for i in 1:(n-1)
+        # get total weight of p and q less than or equal to r[i]
+        p_less = p .<= r[i]
+        ∑w_p = sum(w_p[p_less])
+        q_less = q .<= r[i]
+        ∑w_q = sum(w_q[q_less])
+        # add into emd
+        emd += abs(∑w_p - ∑w_q) * (r[i+1] - r[i])
+    end
+    emd /= (sum(w_p) + sum(w_q))
+    return emd
+end
+
+## DISCRIMINATION POWER
+"""
+Discriminant Power measure for the Local Discriminant Basis. Current available
+measures are
+- [`BasisDiscriminantMeasure`](@ref)
+- [`FishersClassSeparability`](@ref)
+- [`RobustFishersClassSeparability`](@ref)
+"""
+abstract type DiscriminantPower end
+
+"""
+    BasisDiscriminantMeasure <: DiscriminantPower
+
+This is the discriminant measure of a single basis function computed in a 
+previous step to construct the energy maps.
+"""
+struct BasisDiscriminantMeasure <: DiscriminantPower end
+
+@doc raw"""
+    FishersClassSeparability <: DiscriminantPower
+
+The Fisher's class separability of the expansion coefficients in the basis 
+function.
+
+Equation: ``\frac{\sum_{c=1}^C \pi_c({\rm mean}_i(\alpha_{\lambda,i}^{(c)}) - {\rm mean}_c({\rm mean}_i(\alpha_{\lambda,i}^{(c)})))^2}{\sum_{c=1}^C \pi_c {\rm var}_i(\alpha_{\lambda,i}^{(c)})}``
+"""
+struct FishersClassSeparability <: DiscriminantPower end
+
+@doc raw"""
+    RobustFishersClassSeparability <: DiscriminantPower
+
+The robust version of Fisher's class separability of the expansion coefficients 
+in the basis function.
+
+Equation: ``\frac{\sum_{c=1}^C \pi_c({\rm med}_i(\alpha_{\lambda,i}^{(c)}) - {\rm med}_c({\rm med}_i(\alpha_{\lambda,i}^{(c)})))^2}{\sum_{c=1}^C \pi_c {\rm mad}_i(\alpha_{\lambda,i}^{(c)})}``
+"""
+struct RobustFishersClassSeparability <: DiscriminantPower end
+
+"""
+    discriminant_power(D, tree, dp)
+
+Returns the discriminant power of each leaf from the local discriminant basis
+(LDB) tree. 
+"""
+function discriminant_power(D::AbstractArray{T,2}, tree::BitVector, 
+        dp::BasisDiscriminantMeasure) where T<:Number
+
+    @assert length(tree) == size(D,1) - 1
+
+    power = getbasiscoef(D, tree)
+    order = sortperm(power, rev = true)
+
+    return (power, order)
+end
+
+function discriminant_power(coefs::AbstractArray{T,2}, 
+                            y::AbstractVector{S}, 
+                            dp::FishersClassSeparability) where {T<:Number, S}
+
+    n = size(coefs,1)                             # signal length
+    
+    classes = unique(y)
+    C = length(coefs)                             # number of classes
+    
+    Nᵢ = Array{T,1}(undef, C)
+    Eαᵢ = Array{T,2}(undef, (n,C))                # mean of each entry
+    Varαᵢ = Array{T,2}(undef, (n,C))              # variance of each entry
+    @inbounds begin
+        for (i, c) in enumerate(classes)
+            idx = findall(yᵢ -> yᵢ == c, y)
+            Nᵢ[i] = length(idx)
+            Eαᵢ[:,i] = mean(coefs[:, idx], dims = 2)
+            Varαᵢ[:,i] = var(coefs[:, idx], dims = 2)
+        end
+    end
+    Eα = mean(Eαᵢ, dims = 2)                      # overall mean of each entry
+    pᵢ = Nᵢ / sum(Nᵢ)                             # proportions of each class
+
+    power = ((Eαᵢ - (Eα .* Eαᵢ)).^2 * pᵢ) ./ (Varαᵢ * pᵢ)
+    order = sortperm(power, rev = true)
+
+    return (power, order)
+end
+
+function discriminant_power(coefs::AbstractArray{T,2}, 
+                            y::AbstractVector{S},
+                            dp::RobustFishersClassSeparability) where {T<:Number,S}
+
+    n = size(coefs,1)                            # signal length
+    
+    classes = unique(y)
+    C = length(classes)
+
+    Nᵢ = Array{T,1}(undef, C)
+    Medαᵢ = Array{T,2}(undef, (n,C))             # mean of each entry
+    Madαᵢ = Array{T,2}(undef, (n,C))             # variance of each entry
+    @inbounds begin
+        for (i, c) in enumerate(classes)
+            idx = findall(yᵢ -> yᵢ == c, y)
+            Nᵢ[i] = length(idx)
+            Medαᵢ[:,i] = median(coefs[:, idx], dims = 2)
+            Madαᵢ[:,i] = mapslices(x -> mad(x, normalize = false), coefs[:, idx], 
+                dims = 2)
+        end
+    end
+    Medα = median(Medαᵢ, dims = 2)               # overall mean of each entry
+    pᵢ = Nᵢ / sum(Nᵢ)                            # proportions of each class
+
+    power = ((Medαᵢ - (Medα .* Medαᵢ)).^2 * pᵢ) ./ (Madαᵢ * pᵢ)
+    order = sortperm(power, rev = true)
+
+    return (power, order)
+end

From 8145f68b058c68cc70ecfd61ce643ca5a56c3f92 Mon Sep 17 00:00:00 2001
From: Zeng Fung Liew <zengfung.liew@gmail.com>
Date: Tue, 30 Nov 2021 22:55:36 -0800
Subject: [PATCH 2/3] Generally, LDB is working for 2D cases, but a few more
 bug fixes are needed.

---
 Project.toml             |   1 +
 src/mod/DWT.jl           |   2 +
 src/mod/LDB.jl           | 417 ++++++++++++++++++++-------------------
 src/mod/Utils.jl         | 101 ++++++----
 src/mod/ldb_energymap.jl |   9 +-
 src/mod/ldb_measures.jl  | 359 ++++++++++++++++++---------------
 6 files changed, 479 insertions(+), 410 deletions(-)

diff --git a/Project.toml b/Project.toml
index 5f0c149..3ced1ac 100644
--- a/Project.toml
+++ b/Project.toml
@@ -5,6 +5,7 @@ version = "0.1.12"
 
 [deps]
 AverageShiftedHistograms = "77b51b56-6f8f-5c3a-9cb4-d71f9594ea6e"
+Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 ImageQualityIndexes = "2996bd0c-7a13-11e9-2da2-2f5ce47296a9"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/src/mod/DWT.jl b/src/mod/DWT.jl
index 706cd61..246016c 100644
--- a/src/mod/DWT.jl
+++ b/src/mod/DWT.jl
@@ -508,6 +508,7 @@ function Wavelets.Transforms.wpt!(y::AbstractArray{T,2},
     # ----- Allocation and setup to match Wavelets.jl's function requirements -----
     m, n = size(x)
     g, h = WT.makereverseqmfpair(wt, true)      # low & high pass filters
+    y[:] = copy(x)
     yₜ = copy(x)                                 # Placeholder of y
     temp = Array{T,2}(undef, (m,n))             # Temp array
     # ----- Compute transforms based on tree -----
@@ -669,6 +670,7 @@ function Wavelets.Transforms.iwpt!(x̂::AbstractArray{T,2},
     # ----- Setup -----
     m, n = size(xw)
     g, h = WT.makereverseqmfpair(wt, true)      # low & high pass filters
+    x̂[:] = copy(xw)
     xwₜ = copy(xw)                               # Placeholder for xw
     temp = Array{T,2}(undef, (m,n))             # temp array
     # ----- Compute transforms based on tree -----
diff --git a/src/mod/LDB.jl b/src/mod/LDB.jl
index cce0de5..410b8fc 100644
--- a/src/mod/LDB.jl
+++ b/src/mod/LDB.jl
@@ -33,13 +33,13 @@ export
 
 using LinearAlgebra,
       Wavelets,
-      Parameters,
-      Statistics,
-      StatsBase
+      Statistics
+import Parameters: @with_kw
+import StatsBase: mad
 
 using ..Utils,
-      ..DWT,
-      ..BestBasis
+      ..DWT
+import ..BestBasis: bestbasis_treeselection
 
 include("ldb_energymap.jl")
 include("ldb_measures.jl")
@@ -48,115 +48,116 @@ include("ldb_measures.jl")
 """
     LocalDiscriminantBasis
 
-Class type for the Local Discriminant Basis (LDB), a feature selection algorithm
-developed by N. Saito and R. Coifman in "Local Discriminant Bases and Their
-Applications" in the Journal of Mathematical Imaging and Vision, Vol 5, 337-358
-(1995). This struct contains the following field values: 
+Class type for the Local Discriminant Basis (LDB), a feature selection algorithm developed
+by N. Saito and R. Coifman in "Local Discriminant Bases and Their Applications" in the
+Journal of Mathematical Imaging and Vision, Vol 5, 337-358 (1995). This struct contains the
+following field values: 
 
 # Parameters and Attributes:
-- `wt::DiscreteWavelet`: a discrete wavelet for transform purposes
-- `max_dec_level::Union{Integer, Nothing}`: max level of wavelet packet
-    decomposition to be computed.
-- `dm::DiscriminantMeasure`: the discriminant measure for the LDB algorithm. 
-    Supported measures are the `AsymmetricRelativeEntropy()`, `LpDistance()`,
-    `SymmetricRelativeEntropy()`, and `HellingerDistance()`
-- `en::EnergyMap`: the type of energy map used. Supported maps are 
-    `TimeFrequency()`, `ProbabilityDensity()`, and `Signatures()`.
-- `dp::DiscriminantPower()`: the measure of discriminant power among expansion
-    coefficients. Supported measures are `BasisDiscriminantMeasure()`,
-    `FishersClassSeparability()`, and `RobustFishersClassSeparability()`. 
-- `top_k::Union{Integer, Nothing}`: the top-k coefficients used in each node to 
-    determine the discriminant measure.
-- `n_features::Union{Integer, Nothing}`: the dimension of output after 
+- `wt::DiscreteWavelet`: (Default: `wavelet(WT.haar)`) A discrete wavelet for transform
+  purposes.
+- `max_dec_level::Union{Integer, Nothing}`: (Default: `nothing`) Max level of wavelet packet
+  decomposition to be computed.
+- `dm::DiscriminantMeasure`: (Default: `AsymmetricRelativeEntropy()`) the discriminant
+    measure for the LDB algorithm. Supported measures are the `AsymmetricRelativeEntropy()`,
+    `LpDistance()`, `SymmetricRelativeEntropy()`, and `HellingerDistance()`.
+- `en::EnergyMap`: (Default: `TimeFrequency()`) the type of energy map used. Supported maps
+    are `TimeFrequency()`, `ProbabilityDensity()`, and `Signatures()`.
+- `dp::DiscriminantPower()`: (Default: `BasisDiscriminantMeasure()`) the measure of
+    discriminant power among expansion coefficients. Supported measures are
+    `BasisDiscriminantMeasure()`, `FishersClassSeparability()`, and
+    `RobustFishersClassSeparability()`. 
+- `top_k::Union{Integer, Nothing}`: (Default: `nothing`) the top-k coefficients used in each
+    node to determine the discriminant measure.
+- `n_features::Union{Integer, Nothing}`: (Default: `nothing`) the dimension of output after
     undergoing feature selection and transformation.
-- `n::Union{Integer, Nothing}`: length of signal
-- `Γ::Union{AbstractArray{<:AbstractFloat}, 
-AbstractArray{NamedTuple{(:coef, :weight), Tuple{S1, S2}}} where {S1<:Array{T} 
-    where T<:AbstractFloat, S2<:Union{AbstractFloat, Array{<:AbstractFloat}}},
-    Nothing}`: computed energy map
-- `DM::Union{AbstractArray{<:AbstractFloat}, Nothing}`: computed discriminant
-    measure
-- `cost::Union{AbstractVector{<:AbstractFloat}, Nothing}`: computed wavelet
-    packet decomposition (WPD) tree cost based on the discriminant measure `DM`.
-- `tree::Union{BitVector, Nothing}`: computed best WPD tree based on the 
-    discriminant measure `DM`.
-- `DP::Union{AbstractVector{<:AbstractFloat}, Nothing}`: computed discriminant 
-    power
-- `order::Union{AbstractVector{Integer}, Nothing}`: ordering of `DP` by 
-    descending order.
+- `sz::Union{Vector{T} where T<:Integer, Nothing}`: (Default: `nothing`) Size of signal
+- `Γ::Union{AbstractArray{<:AbstractFloat}, AbstractArray{NamedTuple{(:coef, :weight),
+  Tuple{S1, S2}}} where {S1<:Array{T} where T<:AbstractFloat, S2<:Union{T, Array{T}} where
+  T<:AbstractFloat}, Nothing}`: (Default: `nothing`) computed energy map
+- `DM::Union{AbstractArray{<:AbstractFloat}, Nothing}`: (Default: `nothing`) computed
+  discriminant measure
+- `cost::Union{AbstractVector{<:AbstractFloat}, Nothing}`: (Default: `nothing`) computed
+    wavelet packet decomposition (WPD) tree cost based on the discriminant measure `DM`.
+- `tree::Union{BitVector, Nothing}`: (Default: `nothing`) computed best WPD tree based on
+    the discriminant measure `DM`.
+- `DP::Union{AbstractVector{<:AbstractFloat}, Nothing}`: (Default: `nothing`) computed
+  discriminant power
+- `order::Union{AbstractVector{Integer}, Nothing}`: (Default: `nothing`) ordering of `DP` by
+  descending order.
 """
-mutable struct LocalDiscriminantBasis
+@with_kw mutable struct LocalDiscriminantBasis
     # to be declared by user
-    wt::DiscreteWavelet
-    max_dec_level::Union{Integer, Nothing}
-    dm::DiscriminantMeasure
-    en::EnergyMap
-    dp::DiscriminantPower
-    top_k::Union{Integer, Nothing}
-    n_features::Union{Integer, Nothing}
+    wt::DiscreteWavelet = wavelet(WT.haar)
+    max_dec_level::Union{Integer, Nothing} = nothing
+    dm::DiscriminantMeasure = AsymmetricRelativeEntropy()
+    en::EnergyMap = TimeFrequency()
+    dp::DiscriminantPower = BasisDiscriminantMeasure()
+    top_k::Union{Integer, Nothing} = nothing
+    n_features::Union{Integer, Nothing} = nothing
     # to be computed in fit!
-    n::Union{Integer, Nothing}
+    sz::Union{Tuple{Vararg{T,N}} where {T<:Integer,N}, Nothing} = nothing
     Γ::Union{AbstractArray{<:AbstractFloat}, 
              AbstractArray{NamedTuple{(:coef, :weight), Tuple{S1, S2}}} where
                 {S1<:Array{T} where T<:AbstractFloat,
                  S2<:Union{AbstractFloat, Array{<:AbstractFloat}}},
-             Nothing}
-    DM::Union{AbstractArray{<:AbstractFloat}, Nothing}
-    cost::Union{AbstractVector{<:AbstractFloat}, Nothing}
-    tree::Union{BitVector, Nothing}
-    DP::Union{AbstractVector{<:AbstractFloat}, Nothing}
-    order::Union{AbstractVector{Integer}, Nothing}
+             Nothing} = nothing
+    DM::Union{AbstractArray{<:AbstractFloat}, Nothing} = nothing
+    cost::Union{AbstractVector{<:AbstractFloat}, Nothing} = nothing
+    tree::Union{BitVector, Nothing} = nothing
+    DP::Union{AbstractArray{<:AbstractFloat}, Nothing} = nothing
+    order::Union{AbstractVector{Integer}, Nothing} = nothing
 end
 
-"""
-    LocalDiscriminantBasis([; 
-        wt=wavelet(WT.haar),
-        max_dec_level=nothing,
-        dm=AsymmetricRelativeEntropy(), em=TimeFrequency(), 
-        dp=BasisDiscriminantMeasure(), top_k=nothing,
-        n_features=nothing]
-    )
-
-Class constructor for `LocalDiscriminantBasis`. 
-
-# Arguments:
-- `wt::DiscreteWavelet`: Wavelet used for decomposition of signals. Default is
-    set to be `wavelet(WT.haar)`.
-- `max_dec_level::Union{Integer, Nothing}`: max level of wavelet packet
-    decomposition to be computed. When `max_dec_level=nothing`, the maximum
-    transform levels will be used. Default is set to be `nothing`.
-- `dm::DiscriminantMeasure`: the discriminant measure for the LDB algorithm. 
-    Supported measures are the `AsymmetricRelativeEntropy()`, `LpDistance()`, 
-    `SymmetricRelativeEntropy()`, and `HellingerDistance()`. Default is set to
-    be `AsymmetricRelativeEntropy()`.
-- `en::EnergyMap`: the type of energy map used. Supported maps are 
-    `TimeFrequency()` and `ProbabilityDensity()`. Default is set to be 
-    `TimeFrequency()`.
-- `dp::DiscriminantPower=BasisDiscriminantMeasure()`: the measure of 
-    discriminant power among expansion coefficients. Supported measures are 
-    `BasisDiscriminantMeasure()`, `FishersClassSeparability()`, and 
-    `RobustFishersClassSeparability()`. Default is set to be `BasisDiscriminantMeasure()`.
-- `top_k::Union{Integer, Nothing}`: the top-k coefficients used in each node to 
-    determine the discriminant measure. When `top_k=nothing`, all coefficients 
-    are used to determine the discriminant measure. Default is set to be 
-    `nothing`.
-- `n_features::Union{Integer, Nothing}`: the dimension of output after 
-    undergoing feature selection and transformation. When `n_features=nothing`,
-    all features will be returned as output. Default is set to be `nothing`.
-"""
-function LocalDiscriminantBasis(; wt::DiscreteWavelet=wavelet(WT.haar),
-        max_dec_level::Union{Integer, Nothing}=nothing, 
-        dm::DiscriminantMeasure=AsymmetricRelativeEntropy(),
-        en::EnergyMap=TimeFrequency(), 
-        dp::DiscriminantPower=BasisDiscriminantMeasure(), 
-        top_k::Union{Integer, Nothing}=nothing, 
-        n_features::Union{Integer, Nothing}=nothing)
-
-    return LocalDiscriminantBasis(
-        wt, max_dec_level, dm, en, dp, top_k, n_features, 
-        nothing, nothing, nothing, nothing, nothing, nothing, nothing
-    )
-end
+# """
+#     LocalDiscriminantBasis([; 
+#         wt=wavelet(WT.haar),
+#         max_dec_level=nothing,
+#         dm=AsymmetricRelativeEntropy(), em=TimeFrequency(), 
+#         dp=BasisDiscriminantMeasure(), top_k=nothing,
+#         n_features=nothing]
+#     )
+
+# Class constructor for `LocalDiscriminantBasis`. 
+
+# # Arguments:
+# - `wt::DiscreteWavelet`: Wavelet used for decomposition of signals. Default is
+#     set to be `wavelet(WT.haar)`.
+# - `max_dec_level::Union{Integer, Nothing}`: max level of wavelet packet
+#     decomposition to be computed. When `max_dec_level=nothing`, the maximum
+#     transform levels will be used. Default is set to be `nothing`.
+# - `dm::DiscriminantMeasure`: the discriminant measure for the LDB algorithm. 
+#     Supported measures are the `AsymmetricRelativeEntropy()`, `LpDistance()`, 
+#     `SymmetricRelativeEntropy()`, and `HellingerDistance()`. Default is set to
+#     be `AsymmetricRelativeEntropy()`.
+# - `en::EnergyMap`: the type of energy map used. Supported maps are 
+#     `TimeFrequency()` and `ProbabilityDensity()`. Default is set to be 
+#     `TimeFrequency()`.
+# - `dp::DiscriminantPower=BasisDiscriminantMeasure()`: the measure of 
+#     discriminant power among expansion coefficients. Supported measures are 
+#     `BasisDiscriminantMeasure()`, `FishersClassSeparability()`, and 
+#     `RobustFishersClassSeparability()`. Default is set to be `BasisDiscriminantMeasure()`.
+# - `top_k::Union{Integer, Nothing}`: the top-k coefficients used in each node to 
+#     determine the discriminant measure. When `top_k=nothing`, all coefficients 
+#     are used to determine the discriminant measure. Default is set to be 
+#     `nothing`.
+# - `n_features::Union{Integer, Nothing}`: the dimension of output after 
+#     undergoing feature selection and transformation. When `n_features=nothing`,
+#     all features will be returned as output. Default is set to be `nothing`.
+# """
+# function LocalDiscriminantBasis(; wt::DiscreteWavelet=wavelet(WT.haar),
+#         max_dec_level::Union{Integer, Nothing}=nothing, 
+#         dm::DiscriminantMeasure=AsymmetricRelativeEntropy(),
+#         en::EnergyMap=TimeFrequency(), 
+#         dp::DiscriminantPower=BasisDiscriminantMeasure(), 
+#         top_k::Union{Integer, Nothing}=nothing, 
+#         n_features::Union{Integer, Nothing}=nothing)
+
+#     return LocalDiscriminantBasis(
+#         wt, max_dec_level, dm, en, dp, top_k, n_features, 
+#         nothing, nothing, nothing, nothing, nothing, nothing, nothing
+#     )
+# end
 
 """
     fit!(f, X, y)
@@ -167,84 +168,90 @@ signals `X` (or the decomposed signals `Xw`) with labels `y`.
 **See also:** [`LocalDiscriminantBasis`](@ref), [`fit_transform`](@ref),
     [`transform`](@ref), [`inverse_transform`](@ref), [`change_nfeatures`](@ref)
 """
-function fit!(f::LocalDiscriminantBasis, X::AbstractArray{S,2}, 
-        y::AbstractVector{T}) where {S<:Number, T}
+function fit!(f::LocalDiscriminantBasis, X::AbstractArray{S}, y::AbstractVector{T}) where 
+             {S<:AbstractFloat, T}
 
     # basic summary of data
-    n, N = size(X)
-
+    @assert 2 ≤ ndims(X) ≤ 3
+    sz = size(X)[1:end-1]
+    L = maxtransformlevels(min(sz...))
     # change LocalDiscriminantBasis parameters if necessary
-    f.max_dec_level = f.max_dec_level === nothing ? 
-        maxtransformlevels(n) : f.max_dec_level
-    @assert 1 <= f.max_dec_level <= maxtransformlevels(n)
+    f.max_dec_level = isnothing(f.max_dec_level) ? L : f.max_dec_level
+    @assert 1 ≤ f.max_dec_level ≤ L
     
     # wavelet packet decomposition
-    Xw = Array{S, 3}(undef, (n,f.max_dec_level+1,N))
-    @inbounds begin
-        for i in axes(Xw,3)
-            Xw[:,:,i] = wpd(X[:,i], f.wt, f.max_dec_level)
-        end
-    end
+    Xw = wpdall(X, f.wt, f.max_dec_level)
 
     # fit local discriminant basis
     fit!(f, Xw, y)
     return nothing
 end
 
-function fit!(f::LocalDiscriminantBasis, Xw::AbstractArray{S,3}, 
-        y::AbstractVector{T}) where {S<:Number, T}
-
+# TODO: parameter types are same as the one above
+function fit!(f::LocalDiscriminantBasis, Xw::AbstractArray{S}, y::AbstractVector{T}) where 
+             {S<:AbstractFloat, T}
     # basic summary of data
+    @assert 3 ≤ ndims(Xw) ≤ 4
     c = unique(y)       # unique classes
     nc = length(c)      # number of classes
     Ny = length(y)
-    f.n, L, Nx = size(Xw)
+    f.sz = size(Xw)[1:end-2]
+    L = size(Xw)[end-1]
+    Nx = size(Xw)[end]
+    nelem = prod(f.sz)  # total number of elements in each signal
 
     # change LocalDiscriminantBasis parameters if necessary
-    f.top_k = f.top_k === nothing ? f.n : f.top_k
-    f.n_features = f.n_features === nothing ? f.n : f.n_features
-    f.max_dec_level = f.max_dec_level === nothing ? L-1 : f.max_dec_level
+    f.top_k = isnothing(f.top_k) ? nelem : f.top_k
+    f.n_features = isnothing(f.n_features) ? nelem : f.n_features
+    f.max_dec_level = isnothing(f.max_dec_level) ? L-1 : f.max_dec_level
 
     # parameter checking
     @assert Nx == Ny
-    @assert 1 <= f.top_k <= f.n
-    @assert 1 <= f.n_features <= f.n
+    @assert 1 ≤ f.top_k ≤ nelem
+    @assert 1 ≤ f.n_features ≤ nelem
     @assert f.max_dec_level+1 == L
-    @assert 1 <= f.max_dec_level <= maxtransformlevels(f.n)
+    @assert 1 ≤ f.max_dec_level ≤ maxtransformlevels(min(f.sz...))
     @assert nc > 1
-    @assert isdyadic(f.n)
 
-    # construct energy map for each class
+    # --- Construct energy map for each class ---
     f.Γ = energy_map(Xw, y, f.en)
 
-    # compute discriminant measure D and obtain tree cost
+    # --- Compute discriminant measure D and obtain tree cost ---
     f.DM = discriminant_measure(f.Γ, f.dm)
-    f.cost = Vector{Float64}(undef, 1<<L-1)
-    @inbounds begin
-        for i in eachindex(f.cost)
-            lθ = floor(Integer, log2(i))    # node level
-            θ = i - 1<<lθ                   # node 
-            nθ = nodelength(f.n, lθ)        # node length
+    tree_type = ndims(Xw) == 3 ? :binary : :quad
+    cost_len = ndims(Xw) == 3 ? gettreelength(1<<L) : gettreelength(1<<L,1<<L)
+    f.cost = Vector{S}(undef, cost_len)
+    for i in eachindex(f.cost)
+        d = getdepth(i, tree_type)    # node level
+        # Extract coefficients for current node
+        if tree_type == :binary
+            θ = i - 1<<d
+            nθ = nodelength(f.sz..., d)         # Number of elements in current node
             rng = (θ*nθ+1):((θ+1)*nθ)
-            if f.top_k < nθ
-                DMθ = f.DM[rng,lθ+1]        # discriminant measure for node θ
-                sort!(DMθ, rev=true)
-                f.cost[i] = sum(DMθ[1:f.top_k])
-            else
-                f.cost[i] = sum(f.DM[rng,lθ+1])
-            end
+            DMθ = f.DM[rng,d+1]
+        else
+            rng₁ = getrowrange(f.sz[1], i)
+            rng₂ = getcolrange(f.sz[2], i)
+            nθ = length(rng₁) * length(rng₂)    # Number of elements in current node
+            DMθ = f.DM[rng₁,rng₂,d+1] |> vec
+        end
+        # Compute cost
+        if f.top_k < nθ
+            sort!(DMθ, rev=true)
+            f.cost[i] = sum(DMθ[1:f.top_k])
+        else
+            f.cost[i] = sum(DMθ)
         end
     end
 
-    # select best tree and best set of expansion coefficients
-    f.tree = BestBasis.bestbasis_treeselection(f.cost, f.n, :max)
+    # --- Select best tree and best set of expansion coefficients ---
+    f.tree = bestbasis_treeselection(f.cost, f.sz..., :max)
     Xc = getbasiscoefall(Xw, f.tree)
 
-    # obtain and order basis functions by power of discrimination
+    # --- Obtain and order basis functions by power of discrimination ---
     (f.DP, f.order) = f.dp == BasisDiscriminantMeasure() ?
-        discriminant_power(f.DM, f.tree, f.dp) :
-        discriminant_power(Xc, y, f.dp)
-    
+                      discriminant_power(f.DM, f.tree, f.dp) :
+                      discriminant_power(Xc, y, f.dp)
     return nothing
 end
 
@@ -256,32 +263,35 @@ Extract the LDB features on signals `X`.
 **See also:** [`LocalDiscriminantBasis`](@ref), [`fit!`](@ref), 
     [`fit_transform`](@ref), [`inverse_transform`](@ref), [`change_nfeatures`](@ref)
 """
-function transform(f::LocalDiscriminantBasis, X::AbstractArray{T,2}) where T
+function transform(f::LocalDiscriminantBasis, X::AbstractArray{T}) where T
     # check necessary measurements
-    n, N = size(X)
-    @assert f.max_dec_level !== nothing
-    @assert f.top_k !== nothing
-    @assert f.n_features !== nothing
-    @assert f.n !== nothing
-    @assert f.Γ !== nothing
-    @assert f.DM !== nothing
-    @assert f.cost !== nothing
-    @assert f.tree !== nothing
-    @assert f.DP !== nothing
-    @assert f.order !== nothing
-    @assert n == f.n
-    @assert 1 <= f.max_dec_level <= maxtransformlevels(f.n)
-    @assert 1 <= f.top_k <= f.n
-    @assert 1 <= f.n_features <= f.n
+    sz = size(X)[1:end-1]
+    N = size(X)[end]
+    nelem = prod(f.sz)  # total number of elements in each signal
+    @assert 2 ≤ ndims(X) ≤ 3
+    @assert !isnothing(f.max_dec_level)
+    @assert !isnothing(f.top_k)
+    @assert !isnothing(f.n_features)
+    @assert !isnothing(f.sz)
+    @assert !isnothing(f.Γ)
+    @assert !isnothing(f.DM)
+    @assert !isnothing(f.cost)
+    @assert !isnothing(f.tree)
+    @assert !isnothing(f.DP)
+    @assert !isnothing(f.order)
+    @assert sz == f.sz
+    @assert 1 ≤ f.max_dec_level ≤ maxtransformlevels(min(f.sz...))
+    @assert 1 ≤ f.top_k ≤ nelem
+    @assert 1 ≤ f.n_features ≤ nelem
 
     # wpt on X based on given f.tree
-    Xc = Array{T, 2}(undef, (n,N))
-    @inbounds begin
-        for i in axes(Xc,2)
-            Xc[:,i] = wpt(X[:,i], f.wt, f.tree)
-        end
+    Xw = wptall(X, f.wt, f.tree)
+    # Extract best features
+    Xc = Array{T,2}(undef, f.n_features, N)
+    for (i, xw) in enumerate(eachslice(Xw, dims=ndims(Xw)))
+        Xc[:,i] = xw[f.order[1:f.n_features]]
     end
-    return Xc[f.order[1:f.n_features],:]
+    return Xc
 end
 
 """
@@ -292,31 +302,32 @@ Fit and transform the signals `X` with labels `y` based on the LDB class `f`.
 **See also:** [`LocalDiscriminantBasis`](@ref), [`fit!`](@ref),
     [`transform`](@ref), [`inverse_transform`](@ref), [`change_nfeatures`](@ref)
 """
-function fit_transform(f::LocalDiscriminantBasis, X::AbstractArray{S,2},
-        y::AbstractVector{T}) where {S<:Number, T}
-
+function fit_transform(f::LocalDiscriminantBasis, 
+                       X::AbstractArray{S}, 
+                       y::AbstractVector{T}) where {S<:AbstractFloat, T}
     # get necessary measurements
-    n, N = size(X)
-    f.max_dec_level = f.max_dec_level===nothing ? 
-        maxtransformlevels(n) : f.max_dec_level
-    @assert 1 <= f.max_dec_level <= maxtransformlevels(n)
+    @assert 2 ≤ ndims(X) ≤ 3
+    sz = size(X)[1:end-1]
+    N = size(X)[end]
+    f.max_dec_level = isnothing(f.max_dec_level) ? maxtransformlevels(min(sz...)) : f.max_dec_level
+    @assert 1 ≤ f.max_dec_level ≤ maxtransformlevels(min(sz...))
 
     # wpd on X
-    Xw = Array{S, 3}(undef, (n,f.max_dec_level+1,N))
-    @inbounds begin
-        for i in axes(Xw,3)
-            Xw[:,:,i] = wpd(X[:,i], f.wt, f.max_dec_level)
-        end
-    end
+    Xw = wpdall(X, f.wt, f.max_dec_level)
 
     # fit LDB and return best features
     fit!(f, Xw, y)
-    Xc = getbasiscoefall(Xw, f.tree)
-    return Xc[f.order[1:f.n_features],:]
+    Xw = getbasiscoefall(Xw, f.tree)
+    # Extract best features
+    Xc = Array{S,2}(undef, f.n_features, N)
+    for (i, xw) in enumerate(eachslice(Xw, dims=ndims(Xw)))
+        Xc[:,i] = xw[f.order[1:f.n_features]]
+    end
+    return Xc
 end
 
 """
-    inverse_transform(f, x)
+    inverse_transform(f, X)
 
 Compute the inverse transform on the feature matrix `x` to form the original
 signal based on the LDB class `f`.
@@ -324,25 +335,23 @@ signal based on the LDB class `f`.
 **See also:** [`LocalDiscriminantBasis`](@ref), [`fit!`](@ref),
     [`fit_transform`](@ref), [`transform`](@ref), [`change_nfeatures`](@ref)
 """
-function inverse_transform(f::LocalDiscriminantBasis, 
-        x::AbstractArray{T,2}) where T<:Number
-
+function inverse_transform(f::LocalDiscriminantBasis, X::AbstractArray{T,2}) where 
+                           T<:AbstractFloat
     # get necessary measurements
-    @assert size(x,1) == f.n_features
-    N = size(x,2)
+    @assert size(X,1) == f.n_features
+    N = size(X,2)
 
     # insert the features x into the full matrix X padded with 0's
-    Xc = zeros(f.n, N)
-    Xc[f.order[1:f.n_features],:] = x
-
-    # iwpt on X
-    X = Array{T,2}(undef, (f.n, N))
-    @inbounds begin
-        for i in axes(Xc, 2)
-            X[:,i] = iwpt(Xc[:,i], f.wt, f.tree)
+    Xc = zeros(T, (f.sz..., N))
+    @views for (xc, x) in zip(eachslice(Xc, dims=ndims(Xc)), eachslice(X, dims=2))
+        for (i,j) in enumerate(f.order[1:f.n_features])
+            xc[j] = x[i]
         end
     end
-    return X
+
+    # iwpt on X
+    Xₜ = iwptall(Xc, f.wt, f.tree)
+    return Xₜ
 end
 
 """
@@ -362,13 +371,12 @@ function change_nfeatures(f::LocalDiscriminantBasis, x::AbstractArray{T,2},
         n_features::Integer) where T<:Number
 
     # check measurements
-    @assert f.n_features !== nothing
-    @assert size(x,1) == f.n_features ||
-        throw(ArgumentError("f.n_features and number of rows of x do not match!"))
-    @assert 1 <= n_features <= f.n
+    @assert !isnothing(f.n_features)
+    @assert size(x,1) == f.n_features || throw(ArgumentError("f.n_features and number of rows of x do not match!"))
+    @assert 1 ≤ n_features ≤ f.n
 
     # change number of features
-    if f.n_features >= n_features
+    if f.n_features ≥ n_features
         f.n_features = n_features
         y = x[1:f.n_features,:]
     else
@@ -377,7 +385,6 @@ function change_nfeatures(f::LocalDiscriminantBasis, x::AbstractArray{T,2},
         f.n_features = n_features
         y = transform(f, X)
     end
-
     return y
 end
 
diff --git a/src/mod/Utils.jl b/src/mod/Utils.jl
index 174eb45..52f9161 100644
--- a/src/mod/Utils.jl
+++ b/src/mod/Utils.jl
@@ -98,32 +98,42 @@ tree = maketree(128, 6, :dwt)
 xw = getbasiscoef(Xw, tree)
 ```
 """
-function getbasiscoef(Xw::AbstractArray{T,2}, 
-                      tree::BitVector) where T<:Number
+function getbasiscoef(Xw::AbstractArray{T}, tree::BitVector) where T<:Number
     # Setup and Sanity Check
-    n, k = size(Xw)
-    nₜ = length(tree)
-    leaf = getleaf(tree, :binary)
-    @assert k-1 ≤ maxtransformlevels(n)
-    @assert isvalidtree(Xw[:,1], tree)
-    @assert n == nₜ+1
-    @assert nₜ+n == length(leaf)
-    xw = Array{T,1}(undef, n)
+    N = ndims(Xw)
+    @assert 2 ≤ N ≤ 3
+    sz = size(Xw)[1:end-1]
+    k = size(Xw)[end]
+    x = N==2 ? view(Xw,:,1) : view(Xw,:,:,1)
+    L = maxtransformlevels(x)
+    @assert isvalidtree(x, tree)
+    @assert k-1 ≤ L
+    leaf = N==2 ? getleaf(tree, :binary) : getleaf(tree, :quad)
+    leaf_len = N==2 ? gettreelength(1<<(L+1)) : gettreelength(1<<(L+1),1<<(L+1))
+    @assert leaf_len == length(leaf)
+    xw = Array{T}(undef, sz)
 
     # Extract basis coefficients
     for (i, isleaf) in enumerate(leaf)
         if isleaf
-            d = getdepth(i,:binary)         # Depth of node (0 for root node)
+            d = N==2 ? getdepth(i,:binary) : getdepth(i, :quad) # Depth of node (0 for root node)
             d < k || throw(ArgumentError("Not enough decomposition levels in Xw."))
-            nn = i-1<<d                     # Node number (0 for leftmost node)
-            n₀ = nodelength(n, d)           # Length of node
-            rng = (nn*n₀+1):((nn+1)*n₀)
-            @inbounds xw[rng] = @view Xw[rng, d+1]
+            if N==2
+                nn = i-1<<d                     # Node number (0 for leftmost node)
+                n₀ = nodelength(sz[1], d)           # Length of node
+                rng = (nn*n₀+1):((nn+1)*n₀)
+                @inbounds xw[rng] = Xw[rng, d+1]
+            elseif N==3
+                rng₁ = getrowrange(sz[1], i)
+                rng₂ = getcolrange(sz[2], i)
+                @inbounds xw[rng₁,rng₂] = Xw[rng₁,rng₂,d+1]
+            end
         end
     end
     return xw
 end
 
+
 """
     getbasiscoefall(Xw, tree)
 
@@ -156,52 +166,65 @@ tree = maketree(128, 6, :dwt)
 getbasiscoefall(Xw, tree)
 ```
 """
-function getbasiscoefall(Xw::AbstractArray{T,3}, tree::BitVector) where T<:Number
+function getbasiscoefall(Xw::AbstractArray{T}, tree::BitVector) where T<:Number
     # Setup and Sanity Check
-    n, k, m = size(Xw)
+    N = ndims(Xw)
+    sz = size(Xw)[1:end-2]
+    k = size(Xw)[end-1]
+    m = size(Xw)[end]
+    x = N==3 ? view(Xw,:,1,1) : view(Xw,:,:,1,1)
+    @assert 3 ≤ N ≤ 4
+    @assert k-1 ≤ maxtransformlevels(x)
+    @assert isvalidtree(x, tree)
     nₜ = length(tree)
-    leaf = getleaf(tree, :binary)
-    @assert k-1 ≤ maxtransformlevels(n)
-    @assert isvalidtree(Xw[:,1,1], tree)
-    @assert n == nₜ+1
-    @assert nₜ+n == length(leaf)
-    xw = Array{T,2}(undef, (n,m))
+    @assert nₜ == gettreelength(sz...)
+    leaf = N==3 ? getleaf(tree, :binary) : getleaf(tree, :quad)
+    xw = Array{T}(undef, (sz...,m))
 
     # Extract basis coefficients
     for (i, isleaf) in enumerate(leaf)
         if isleaf
-            d = getdepth(i,:binary)         # Depth of node (0 for root node)
+            d = N==3 ? getdepth(i,:binary) : getdepth(i, :quad) # Depth of node (0 for root node)
             d < k || throw(ArgumentError("Not enough decomposition levels in Xw."))
-            nn = i-1<<d                     # Node number (0 for leftmost node)
-            n₀ = nodelength(n, d)           # Length of node
-            rng = (nn*n₀+1):((nn+1)*n₀)
-            @inbounds xw[rng,:] = @view Xw[rng, d+1,:]
+            if N==3
+                nn = i-1<<d                     # Node number (0 for leftmost node)
+                n₀ = nodelength(sz[1], d)           # Length of node
+                rng = (nn*n₀+1):((nn+1)*n₀)
+                @inbounds xw[rng,:] = @view Xw[rng, d+1,:]
+            elseif N==4
+                rng₁ = getrowrange(sz[1], i)
+                rng₂ = getcolrange(sz[2], i)
+                @inbounds xw[rng₁,rng₂,:] = @view Xw[rng₁,rng₂,d+1,:]
+            end
         end
     end
     return xw
 end
 
-function getbasiscoefall(Xw::AbstractArray{T,3}, tree::BitArray{2}) where T<:Number
+function getbasiscoefall(Xw::AbstractArray{T}, tree::BitArray{2}) where T<:Number
     # Setup and Sanity Check
-    n, k, m = size(Xw)
+    N = ndims(Xw)
+    sz = size(Xw)[1:end-2]
+    k = size(Xw)[end-1]
+    m = size(Xw)[end]
+    x = N==3 ? view(Xw,:,1,1) : view(Xw,:,:,1,1)
+    @assert 3 ≤ N ≤ 4
     nₜ, mₜ = size(tree)
-    @assert k-1 ≤ maxtransformlevels(n)
-    @assert all( mapslices(treeᵢ -> isvalidtree(Xw[:,1,1],treeᵢ), tree, dims=1) )
+    @assert k-1 ≤ maxtransformlevels(x)
+    @assert all( mapslices(treeᵢ -> isvalidtree(x,treeᵢ), tree, dims=1) )
     @assert m == mₜ
-    @assert n == nₜ+1
-    xw = Array{T,2}(undef, (n,m))
+    @assert nₜ == gettreelength(sz...)
+    xw = Array{T}(undef, (sz...,m))
 
     # Extract basis coefficients
-    for i in 1:mₜ
-        Xwᵢ = @view Xw[:,:,i]
-        treeᵢ = tree[:,i]
-        @inbounds xw[:,i] = getbasiscoef(Xwᵢ, treeᵢ)
+    for (i, (Xwᵢ, xwᵢ)) in enumerate(zip(eachslice(Xw, dims=N), eachslice(xw, dims=N-1)))
+        @inbounds treeᵢ = tree[:,i]
+        @inbounds xwᵢ[:] = getbasiscoef(Xwᵢ, treeᵢ)
     end
     return xw
 end
 
 
-
 # Length of node at level L
 """
     nodelength(N, L)
diff --git a/src/mod/ldb_energymap.jl b/src/mod/ldb_energymap.jl
index 62507c4..1382a49 100644
--- a/src/mod/ldb_energymap.jl
+++ b/src/mod/ldb_energymap.jl
@@ -1,4 +1,4 @@
-import AverageShiftedHistograms: ash, xy, pdf
+import AverageShiftedHistograms: ash, xy, pdf, Kernels
 
 ## ---------- ENERGY MAPS ----------
 """
@@ -66,11 +66,10 @@ end
 """
     energy_map(Xw, y, method)
 
-Returns the Time Frequency Energy map or the Probability Density Energy map
-depending on the input `method` (`TimeFrequency()` or `ProbabilityDensity()`).
+Returns the Time Frequency Energy map or the Probability Density Energy map depending on the
+input `method` (`TimeFrequency()` or `ProbabilityDensity()`).
 
-**See also:** [`EnergyMap`](@ref). [`TimeFrequency`](@ref), 
-    [`ProbabilityDensity`](@ref)
+**See also:** [`EnergyMap`](@ref). [`TimeFrequency`](@ref), [`ProbabilityDensity`](@ref)
 """
 function energy_map(Xw::AbstractArray{S}, y::AbstractVector{T}, method::TimeFrequency) where 
                    {S<:AbstractFloat, T}
diff --git a/src/mod/ldb_measures.jl b/src/mod/ldb_measures.jl
index 9b4332e..729cccc 100644
--- a/src/mod/ldb_measures.jl
+++ b/src/mod/ldb_measures.jl
@@ -1,4 +1,6 @@
-## DISCRIMINANT MEASURES
+import Combinatorics: combinations
+
+## ---------- DISCRIMINANT MEASURES ----------
 """
 Discriminant measure for Local Discriminant Basis. Current available subtypes
 are:
@@ -44,7 +46,10 @@ this aims to make the measure more symmetric.
 
 Equation: Denote the Asymmetric Relative Entropy as ``D_A(p,q)``, then
 
-``D(p,q) = D_A(p,q) + D_A(q,p) = \sum p(x) \log \frac{p(x)}{q(x)} + q(x) \log \frac{q(x)}{p(x)}``
+``D(p,q) = D_A(p,q) + D_A(q,p) = \sum p(x) \log \frac{p(x)}{q(x)} + q(x) \log
+\frac{q(x)}{p(x)}``
+
+**See also:** [`AsymmetricRelativeEntropy`](@ref)
 """
 struct SymmetricRelativeEntropy <: ProbabilityDensityDM end
 
@@ -75,12 +80,12 @@ struct HellingerDistance <: ProbabilityDensityDM end
 
 Earth Mover Distance discriminant measure for the Signatures energy map.
 
-Equation: 
-``E(P,Q) = \frac{\sum_{k=1}^{m+n+1} |\hat p_k - \hat q_k| (r_{k+1} - r_k)}{w_\Sigma}``
+Equation: ``E(P,Q) = \frac{\sum_{k=1}^{m+n+1} |\hat p_k - \hat q_k| (r_{k+1} -
+r_k)}{w_\Sigma}``
 
-where ``r_1, r_2, \ldots, r_{m+n}`` is the sorted list of ``p_1, \ldots, p_m, 
-q_1, \ldots, q_n`` and ``\hat p_k = \sum_{p_i \leq r_k} w_{p_i}``, 
-``\hat q_k = \sum_{q)i \leq r_k} w_{q_i}``.
+where ``r_1, r_2, \ldots, r_{m+n}`` is the sorted list of ``p_1, \ldots, p_m, q_1, \ldots,
+q_n`` and ``\hat p_k = \sum_{p_i \leq r_k} w_{p_i}``, ``\hat q_k = \sum_{q)i \leq r_k}
+w_{q_i}``.
 """
 struct EarthMoverDistance <: SignaturesDM end
 
@@ -89,163 +94,183 @@ struct EarthMoverDistance <: SignaturesDM end
 
 Returns the discriminant measure of each node calculated from the energy maps.
 """
-function discriminant_measure(Γ::AbstractArray{T}, 
-                              dm::ProbabilityDensityDM) where T<:Number
-
-    # basic summary of data
-    @assert 3 <= ndims(Γ) <= 4
-    n = size(Γ,1)
-    L = size(Γ,2)
-    C = size(Γ)[end]
-    @assert C > 1       # ensure more than 1 class
-
-    D = zeros(Float64, (n,L))
-    @inbounds begin
-        for i in 1:(C-1)
-            for j in (i+1):C
-                if ndims(Γ) == 3
-                    D += discriminant_measure(Γ[:,:,i], Γ[:,:,j], dm)
-                else
-                    D += discriminant_measure(Γ[:,:,:,i], Γ[:,:,:,j], dm)
-                end
-            end
-        end
+function discriminant_measure(Γ::AbstractArray{T}, dm::ProbabilityDensityDM) where 
+                              T<:AbstractFloat
+    # Basic summary of data
+    N = ndims(Γ)
+    nc = size(Γ,N)
+    @assert 3 ≤ N ≤ 5
+    @assert nc > 1
+    #=======================================================================================
+    Classifying the type of signals and energy maps based on Γ
+
+    1. ndims(Γ)=3                           : Time frequency energy map on 1D signals
+    2. ndims(Γ)=4 and size(Γ,3) small (<100): Time frequency energy map on 2D signals
+    3. ndims(Γ)=4 and size(Γ,3) large (≥100): Probability density energy map on 1D signals
+    4. ndims(Γ)=5                           : Probability density energy map on 2D signals
+    =======================================================================================#
+    if (N==3) || (N==4 && size(Γ,3)≥100)
+        sz = [size(Γ,1)]
+        L = size(Γ,2)
+        pdf_len = N==3 ? 1 : size(Γ,3)
+    elseif (N==4 && size(Γ,3)<100) || (N==5)
+        sz = size(Γ)[1:2]
+        L = size(Γ,3)
+        pdf_len = N==5 ? size(Γ,4) : 1
+    else
+        throw(ArgumentError("Γ has uninterpretable dimensions/unknown size."))
     end
 
+    # --- Compute pairwise discriminant measure ---
+    D = zeros(T, (sz...,L))
+    for (i,j) in combinations(1:nc,2)
+        if N==3
+            Γᵢ = @view Γ[:,:,i]         # Energy map for i-th class
+            Γⱼ = @view Γ[:,:,j]         # Energy map for j-th class
+        elseif N==4
+            Γᵢ = @view Γ[:,:,:,i]       # Energy map for i-th class
+            Γⱼ = @view Γ[:,:,:,j]       # Energy map for j-th class
+        else # N==5
+            Γᵢ = @view Γ[:,:,:,:,i]     # Energy map for i-th class
+            Γⱼ = @view Γ[:,:,:,:,j]     # Energy map for j-th class
+        end
+        D += pairwise_discriminant_measure(Γᵢ, Γⱼ, dm)
+    end
     return D
 end
 
 # discriminant measure for EMD
-function discriminant_measure(Γ::AbstractArray{NamedTuple{(:coef, :weight), Tuple{S1,S2}},1},
+function discriminant_measure(Γ::AbstractArray{NamedTuple{(:coef, :weight), Tuple{S₁,S₂}},1},
                               dm::SignaturesDM) where 
-                             {S1<:Array{<:Number}, 
-                              S2<:Union{AbstractFloat,Array{<:AbstractFloat}}}
+                             {S₁<:Array{T} where T<:AbstractFloat, 
+                              S₂<:Union{T,Array{T}} where T<:AbstractFloat}
     # Basic summary of data
-    C = length(Γ)
-    @assert C > 1
-    n = size(Γ[1][:coef], 1)
-    L = size(Γ[1][:coef], 2)
-
-    D = zeros(Float64, (n,L))
-    @inbounds begin
-        for i in 1:(C-1)
-            for j in (i+1):C
-                D += discriminant_measure(Γ[i], Γ[j], dm)
-            end
-        end
+    nc = length(Γ)
+    @assert nc > 1
+    T = eltype(Γ[1][:coef])
+    sz = size(Γ[1][:coef])[1:end-2]
+    L = size(Γ[1][:coef])[end-1]
+
+    D = zeros(T, (sz...,L))
+    for (Γᵢ,Γⱼ) in combinations(Γ,2)
+        D += pairwise_discriminant_measure(Γᵢ, Γⱼ, dm)
     end
-
     return D
 end
 
 # discriminant measure between 2 energy maps
-function discriminant_measure(Γ₁::AbstractArray{T}, Γ₂::AbstractArray{T}, 
-        dm::ProbabilityDensityDM) where T<:Number
-
+function pairwise_discriminant_measure(Γ₁::AbstractArray{T}, Γ₂::AbstractArray{T}, 
+                                       dm::ProbabilityDensityDM) where T<:AbstractFloat
     # parameter checking and basic summary
-    @assert 2 <= ndims(Γ₁) <= 3
+    N = ndims(Γ₁)
+    @assert 2 ≤ ndims(Γ₁) ≤ 4
     @assert size(Γ₁) == size(Γ₂)
-    n = size(Γ₁,1)
-    L = size(Γ₁,2)
-
-    D = Array{T, 2}(undef, (n,L))
-    @inbounds begin
-        for i in axes(D,1)
-            for j in axes(D,2)
-                if ndims(Γ₁) == 2       # time frequency energy map case
-                    D[i,j] = discriminant_measure(Γ₁[i,j], Γ₂[i,j], dm)
-                else                    # probability density energy map case
-                    for k in axes(Γ₁,3)
-                        D[i,j] += discriminant_measure(Γ₁[i,j,k], Γ₂[i,j,k], dm)
-                    end
-                end
-            end
-        end
+    #=======================================================================================
+    Classifying the type of signals and energy maps based on Γ
+
+    1. ndims(Γ)=2                           : Time frequency energy map on 1D signals
+    2. ndims(Γ)=3 and size(Γ,3) small (<100): Time frequency energy map on 2D signals
+    3. ndims(Γ)=3 and size(Γ,3) large (≥100): Probability density energy map on 1D signals
+    4. ndims(Γ)=4                           : Probability density energy map on 2D signals
+    =======================================================================================#
+    if (N==2) || (N==3 && size(Γ₁,3)≥100)
+        sz = [size(Γ₁,1)]
+        L = size(Γ₁,2)
+    elseif (N==3 && size(Γ₁,3)<100) || (N==4)
+        sz = size(Γ₁)[1:2]
+        L = size(Γ₁,3)
+    else
+        throw(ArgumentError("Γ has uninterpretable dimensions/unknown size."))
     end
 
+    # --- Pairwise discriminant measure for each element ---
+    D = zeros(T, (sz...,L))
+    slice_size = prod([sz...,L])        # Number of elements in each slice of the pdf
+    map_size = prod(size(Γ₁))           # Number of elements in entire energy map
+    for i in 1:slice_size
+        for j in i:slice_size:map_size
+            D[i] += pairwise_discriminant_measure(Γ₁[j], Γ₂[j], dm)
+        end
+    end
     return D
 end
 
 # discriminant measure between 2 nergy maps for EMD
-function discriminant_measure(Γ₁::NamedTuple{(:coef, :weight), Tuple{S1,S2}}, 
-        Γ₂::NamedTuple{(:coef, :weight), Tuple{S1,S2}}, 
-        dm::SignaturesDM) where
-        {S1<:Array{T} where T<:Number, 
-        S2<:Union{AbstractFloat,Array{<:AbstractFloat}}}
+function pairwise_discriminant_measure(Γ₁::NamedTuple{(:coef, :weight), Tuple{S₁,S₂}}, 
+                                       Γ₂::NamedTuple{(:coef, :weight), Tuple{S₁,S₂}}, 
+                                       dm::SignaturesDM) where
+                                      {S₁<:Array{T} where T<:AbstractFloat, 
+                                       S₂<:Union{T,Array{T}} where T<:AbstractFloat}
 
     # parameter checking and basic summary
-    n = size(Γ₁[:coef],1)
-    L = size(Γ₁[:coef],2)
-    @assert n == size(Γ₁[:coef],1) == size(Γ₂[:coef],1)
-    @assert L == size(Γ₁[:coef],2) == size(Γ₂[:coef],2)
-
-    D = Array{Float64,2}(undef, (n,L))
-    for i in 1:n
-        for j in 1:L
-            # signatures
-            if typeof(Γ₁[:weight]) <: AbstractFloat # equal weight
-                P = (coef=Γ₁[:coef][i,j,:], weight=Γ₁[:weight])
-                Q = (coef=Γ₂[:coef][i,j,:], weight=Γ₂[:weight])
-            else                                    # probability density weight
-                P = (coef=Γ₁[:coef][i,j,:], weight=Γ₁[:weight][i,j,:])
-                Q = (coef=Γ₂[:coef][i,j,:], weight=Γ₂[:weight][i,j,:])
-            end
-            D[i,j] = discriminant_measure(P, Q, dm)
+    N = ndims(Γ₁.coef)
+    T = eltype(Γ₁.coef)
+    sz = size(Γ₁.coef)[1:end-2]
+    L = size(Γ₁.coef)[end-1]
+    @assert isa(Γ₁.weight, AbstractFloat) || size(Γ₁.coef) == size(Γ₁.weight)
+    @assert isa(Γ₂.weight, AbstractFloat) || size(Γ₂.coef) == size(Γ₂.weight)
+    @assert typeof(Γ₁.weight) == typeof(Γ₂.weight)
+    @assert sz == size(Γ₂.coef)[1:end-2]
+    @assert L == size(Γ₂.coef)[end-1]
+
+    D = Array{T,N-1}(undef, (sz...,L))
+    slice_size = prod([sz...,L])                # Number of elements for each signal's coefficients
+    for i in 1:slice_size
+        # Signatures
+        if isa(Γ₁.weight, AbstractFloat)        # Equal weight
+            P = (coef = Γ₁.coef[i:slice_size:end], weight = Γ₁.weight)
+            Q = (coef = Γ₂.coef[i:slice_size:end], weight = Γ₂.weight)
+        else                                    # Probability density weight
+            P = (coef = Γ₁.coef[i:slice_size:end], weight = Γ₁.weight[i:slice_size:end])
+            Q = (coef = Γ₂.coef[i:slice_size:end], weight = Γ₂.weight[i:slice_size:end])
         end
+        D[i] = pairwise_discriminant_measure(P, Q, dm)
     end
     return D
 end
 
 # Asymmetric Relative Entropy
-function discriminant_measure(p::T, q::T, dm::AsymmetricRelativeEntropy) where 
-        T<:Number
-
-    # parameter checking
-    @assert p >= 0 && q >= 0
-
-    if p == 0 || q == 0
-        return 0
-    else
-        return p * log(p/q)
-    end
+function pairwise_discriminant_measure(p::T, q::T, dm::AsymmetricRelativeEntropy) where 
+                                       T<:AbstractFloat
+    @assert p ≥ 0 && q ≥ 0
+    return (p==0 || q==0) ? 0 : p * log(p/q)
 end
 
 # Symmetric Relative Entropy
-function discriminant_measure(p::T, q::T, dm::SymmetricRelativeEntropy) where 
-        T<:Number
-
-    return discriminant_measure(p, q, AsymmetricRelativeEntropy()) + 
-        discriminant_measure(q, p, AsymmetricRelativeEntropy())
+function pairwise_discriminant_measure(p::T, q::T, dm::SymmetricRelativeEntropy) where 
+                                       T<:AbstractFloat
+    return pairwise_discriminant_measure(p, q, AsymmetricRelativeEntropy()) + 
+           pairwise_discriminant_measure(q, p, AsymmetricRelativeEntropy())
 end
 
 # Hellinger Distance
-function discriminant_measure(p::T, q::T, dm::HellingerDistance) where T<:Number
+function pairwise_discriminant_measure(p::T, q::T, dm::HellingerDistance) where 
+                                       T<:AbstractFloat
     return (sqrt(p) - sqrt(q))^2
 end
 
 # Lᵖ Distance
-function discriminant_measure(p::T, q::T, dm::LpDistance) where T<:Number
+function pairwise_discriminant_measure(p::T, q::T, dm::LpDistance) where T<:AbstractFloat
     return (p - q)^dm.p
 end
 
 # Earth Mover Distance
-function discriminant_measure(P::NamedTuple{(:coef, :weight), Tuple{S1,S2}}, 
-        Q::NamedTuple{(:coef, :weight), Tuple{S1, S2}}, 
-        dm::EarthMoverDistance) where
-        {S1<:Vector{T} where T<:Number, 
-        S2<:Union{AbstractFloat,Vector{<:AbstractFloat}}}
-
+function pairwise_discriminant_measure(P::NamedTuple{(:coef, :weight), Tuple{S1,S2}}, 
+                                       Q::NamedTuple{(:coef, :weight), Tuple{S1, S2}}, 
+                                       dm::EarthMoverDistance) where
+                                      {S1<:Vector{T} where T<:AbstractFloat, 
+                                       S2<:Union{T,Vector{T}} where T<:AbstractFloat}
     # assigning tuple signatures into coef and weight
     p, w_p = P
     q, w_q = Q
 
     # sort signature values
     p_order = sortperm(p)
-    p = p[p_order]
-    w_p = typeof(w_p)<:AbstractFloat ? repeat([w_p], length(p)) : w_p[p_order]
     q_order = sortperm(q)
+    p = p[p_order]
     q = q[q_order]
-    w_q = typeof(w_q)<:AbstractFloat ? repeat([w_q], length(q)) : w_q[q_order]
+    w_p = isa(w_p, AbstractFloat) ? repeat([w_p], length(p)) : w_p[p_order]
+    w_q = isa(w_q, AbstractFloat) ? repeat([w_q], length(q)) : w_q[q_order]
 
     # concatenate p and q, then sort them
     r = [p; q]
@@ -256,12 +281,10 @@ function discriminant_measure(P::NamedTuple{(:coef, :weight), Tuple{S1,S2}},
     emd = 0
     for i in 1:(n-1)
         # get total weight of p and q less than or equal to r[i]
-        p_less = p .<= r[i]
-        ∑w_p = sum(w_p[p_less])
-        q_less = q .<= r[i]
-        ∑w_q = sum(w_q[q_less])
+        ∑w_p = sum(w_p[p .≤ r[i]])
+        ∑w_q = sum(w_q[q .≤ r[i]])
         # add into emd
-        emd += abs(∑w_p - ∑w_q) * (r[i+1] - r[i])
+        @inbounds emd += abs(∑w_p - ∑w_q) * (r[i+1] - r[i])
     end
     emd /= (sum(w_p) + sum(w_q))
     return emd
@@ -311,72 +334,86 @@ struct RobustFishersClassSeparability <: DiscriminantPower end
 Returns the discriminant power of each leaf from the local discriminant basis
 (LDB) tree. 
 """
-function discriminant_power(D::AbstractArray{T,2}, tree::BitVector, 
-        dp::BasisDiscriminantMeasure) where T<:Number
-
-    @assert length(tree) == size(D,1) - 1
+function discriminant_power(D::AbstractArray{T}, tree::BitVector, 
+                            dp::BasisDiscriminantMeasure) where T<:AbstractFloat
+    @assert 2 ≤ ndims(D) ≤ 3
+    N = ndims(D)
+    sz = size(D)[1:end-1]
+    tmp = Array{T,N-1}(undef, sz)
+    @assert isvalidtree(tmp, tree)
 
     power = getbasiscoef(D, tree)
-    order = sortperm(power, rev = true)
+    order = sortperm(vec(power), rev = true)
 
     return (power, order)
 end
 
-function discriminant_power(coefs::AbstractArray{T,2}, 
-                            y::AbstractVector{S}, 
-                            dp::FishersClassSeparability) where {T<:Number, S}
-
-    n = size(coefs,1)                             # signal length
+function discriminant_power(coefs::AbstractArray{T}, y::AbstractVector{S}, 
+                            dp::FishersClassSeparability) where {T<:AbstractFloat, S}
+    N = ndims(coefs)
+    @assert 2 ≤ N ≤ 3
+    sz = size(coefs)[1:end-1]           # signal length
     
-    classes = unique(y)
-    C = length(coefs)                             # number of classes
+    C = unique(y)
+    Nc = length(C)                      # number of classes
     
-    Nᵢ = Array{T,1}(undef, C)
-    Eαᵢ = Array{T,2}(undef, (n,C))                # mean of each entry
-    Varαᵢ = Array{T,2}(undef, (n,C))              # variance of each entry
-    @inbounds begin
-        for (i, c) in enumerate(classes)
-            idx = findall(yᵢ -> yᵢ == c, y)
-            Nᵢ[i] = length(idx)
-            Eαᵢ[:,i] = mean(coefs[:, idx], dims = 2)
-            Varαᵢ[:,i] = var(coefs[:, idx], dims = 2)
+    Nᵢ = Array{T,1}(undef, Nc)
+    Eαᵢ = Array{T,N}(undef, (sz...,Nc))                # mean of each entry
+    Varαᵢ = Array{T,N}(undef, (sz...,Nc))              # variance of each entry
+    for (i, c) in enumerate(C)
+        idx = findall(yᵢ -> yᵢ==c, y)
+        Nᵢ[i] = length(idx)
+        if N==2
+            coefsᵢ = coefs[:,idx]
+            Eαᵢ[:,i] = mean(coefsᵢ, dims = N)
+            Varαᵢ[:,i] = var(coefsᵢ, dims = N)
+        elseif N==3
+            coefsᵢ = coefs[:,:,idx]
+            Eαᵢ[:,:,i] = mean(coefsᵢ, dims = N)
+            Varαᵢ[:,:,i] = var(coefsᵢ, dims = N)
         end
     end
-    Eα = mean(Eαᵢ, dims = 2)                      # overall mean of each entry
+    Eα = mean(Eαᵢ, dims = N)                      # overall mean of each entry
     pᵢ = Nᵢ / sum(Nᵢ)                             # proportions of each class
 
+    # TODO: This does not work when dealing with 2D signals (multiplication issue)
     power = ((Eαᵢ - (Eα .* Eαᵢ)).^2 * pᵢ) ./ (Varαᵢ * pᵢ)
-    order = sortperm(power, rev = true)
+    order = sortperm(vec(power), rev = true)
 
     return (power, order)
 end
 
-function discriminant_power(coefs::AbstractArray{T,2}, 
-                            y::AbstractVector{S},
-                            dp::RobustFishersClassSeparability) where {T<:Number,S}
-
-    n = size(coefs,1)                            # signal length
+function discriminant_power(coefs::AbstractArray{T}, y::AbstractVector{S},
+                            dp::RobustFishersClassSeparability) where {T<:AbstractFloat,S}
+    N = ndims(coefs)
+    @assert 2 ≤ N ≤ 3
+    sz = size(coefs)[1:end-1]           # signal length
     
-    classes = unique(y)
-    C = length(classes)
-
-    Nᵢ = Array{T,1}(undef, C)
-    Medαᵢ = Array{T,2}(undef, (n,C))             # mean of each entry
-    Madαᵢ = Array{T,2}(undef, (n,C))             # variance of each entry
-    @inbounds begin
-        for (i, c) in enumerate(classes)
-            idx = findall(yᵢ -> yᵢ == c, y)
-            Nᵢ[i] = length(idx)
-            Medαᵢ[:,i] = median(coefs[:, idx], dims = 2)
-            Madαᵢ[:,i] = mapslices(x -> mad(x, normalize = false), coefs[:, idx], 
-                dims = 2)
+    C = unique(y)
+    Nc = length(C)                      # number of classes
+
+    Nᵢ = Array{T,1}(undef, Nc)
+    Medαᵢ = Array{T,N}(undef, (sz...,Nc))              # mean of each entry
+    Madαᵢ = Array{T,N}(undef, (sz...,Nc))              # variance of each entry
+    for (i, c) in enumerate(C)
+        idx = findall(yᵢ -> yᵢ==c, y)
+        Nᵢ[i] = length(idx)
+        if N==2
+            coefsᵢ = coefs[:,idx]
+            Medαᵢ[:,i] = median(coefsᵢ, dims = N)
+            Madαᵢ[:,i] = mapslices(x -> mad(x, normalize=false), coefsᵢ, dims = N)
+        elseif N==3
+            coefsᵢ = coefs[:,:,idx]
+            Medαᵢ[:,:,i] = median(coefsᵢ, dims = N)
+            Madαᵢ[:,:,i] = mapslices(x -> mad(x, normalize=false), coefsᵢ, dims = N)
         end
     end
-    Medα = median(Medαᵢ, dims = 2)               # overall mean of each entry
+    Medα = median(Medαᵢ, dims = N)               # overall mean of each entry
     pᵢ = Nᵢ / sum(Nᵢ)                            # proportions of each class
 
+    # TODO: This does not work when dealing with 2D signals (multiplication issue)
     power = ((Medαᵢ - (Medα .* Medαᵢ)).^2 * pᵢ) ./ (Madαᵢ * pᵢ)
-    order = sortperm(power, rev = true)
+    order = sortperm(vec(power), rev = true)
 
     return (power, order)
 end

From 465b8887ba0d8f077c2ee946c4495201f70fd6fb Mon Sep 17 00:00:00 2001
From: Zeng Fung Liew <zengfung.liew@gmail.com>
Date: Wed, 1 Dec 2021 09:19:11 -0800
Subject: [PATCH 3/3] Fixed remaining bugs in LDB. Added tests for 2D LDB in
 CI.

---
 src/mod/LDB.jl          |  14 +--
 src/mod/ldb_measures.jl |  18 ++-
 test/ldb.jl             | 261 +++++++++++++++++++++++++++-------------
 3 files changed, 197 insertions(+), 96 deletions(-)

diff --git a/src/mod/LDB.jl b/src/mod/LDB.jl
index 410b8fc..13061c4 100644
--- a/src/mod/LDB.jl
+++ b/src/mod/LDB.jl
@@ -26,6 +26,7 @@ export
     # local discriminant basis
     LocalDiscriminantBasis,
     fit!,
+    fitdec!,
     fit_transform,
     transform,
     inverse_transform,
@@ -95,7 +96,7 @@ following field values:
     dp::DiscriminantPower = BasisDiscriminantMeasure()
     top_k::Union{Integer, Nothing} = nothing
     n_features::Union{Integer, Nothing} = nothing
-    # to be computed in fit!
+    # to be computed in fit! or fitdec!
     sz::Union{Tuple{Vararg{T,N}} where {T<:Integer,N}, Nothing} = nothing
     Γ::Union{AbstractArray{<:AbstractFloat}, 
              AbstractArray{NamedTuple{(:coef, :weight), Tuple{S1, S2}}} where
@@ -183,13 +184,12 @@ function fit!(f::LocalDiscriminantBasis, X::AbstractArray{S}, y::AbstractVector{
     Xw = wpdall(X, f.wt, f.max_dec_level)
 
     # fit local discriminant basis
-    fit!(f, Xw, y)
+    fitdec!(f, Xw, y)
     return nothing
 end
 
-# TODO: parameter types are same as the one above
-function fit!(f::LocalDiscriminantBasis, Xw::AbstractArray{S}, y::AbstractVector{T}) where 
-             {S<:AbstractFloat, T}
+function fitdec!(f::LocalDiscriminantBasis, Xw::AbstractArray{S}, y::AbstractVector{T}) where 
+                {S<:AbstractFloat, T}
     # basic summary of data
     @assert 3 ≤ ndims(Xw) ≤ 4
     c = unique(y)       # unique classes
@@ -316,7 +316,7 @@ function fit_transform(f::LocalDiscriminantBasis,
     Xw = wpdall(X, f.wt, f.max_dec_level)
 
     # fit LDB and return best features
-    fit!(f, Xw, y)
+    fitdec!(f, Xw, y)
     Xw = getbasiscoefall(Xw, f.tree)
     # Extract best features
     Xc = Array{S,2}(undef, f.n_features, N)
@@ -373,7 +373,7 @@ function change_nfeatures(f::LocalDiscriminantBasis, x::AbstractArray{T,2},
     # check measurements
     @assert !isnothing(f.n_features)
     @assert size(x,1) == f.n_features || throw(ArgumentError("f.n_features and number of rows of x do not match!"))
-    @assert 1 ≤ n_features ≤ f.n
+    @assert 1 ≤ n_features ≤ prod(f.sz)
 
     # change number of features
     if f.n_features ≥ n_features
diff --git a/src/mod/ldb_measures.jl b/src/mod/ldb_measures.jl
index 729cccc..9d5edd8 100644
--- a/src/mod/ldb_measures.jl
+++ b/src/mod/ldb_measures.jl
@@ -376,8 +376,13 @@ function discriminant_power(coefs::AbstractArray{T}, y::AbstractVector{S},
     Eα = mean(Eαᵢ, dims = N)                      # overall mean of each entry
     pᵢ = Nᵢ / sum(Nᵢ)                             # proportions of each class
 
-    # TODO: This does not work when dealing with 2D signals (multiplication issue)
-    power = ((Eαᵢ - (Eα .* Eαᵢ)).^2 * pᵢ) ./ (Varαᵢ * pᵢ)
+    if N==2     # For 1D signals, can be done via matrix multiplication
+        power = ((Eαᵢ - (Eα .* Eαᵢ)).^2 * pᵢ) ./ (Varαᵢ * pᵢ)
+    else        # For 2D signals, requires some manual work
+        pᵢ = reshape(pᵢ,1,1,:)
+        power = sum((Eαᵢ-(Eα.*Eαᵢ)).^2 .* pᵢ, dims=N) ./ sum(Varαᵢ.*pᵢ, dims=N)
+        power = reshape(power, sz...)
+    end
     order = sortperm(vec(power), rev = true)
 
     return (power, order)
@@ -411,8 +416,13 @@ function discriminant_power(coefs::AbstractArray{T}, y::AbstractVector{S},
     Medα = median(Medαᵢ, dims = N)               # overall mean of each entry
     pᵢ = Nᵢ / sum(Nᵢ)                            # proportions of each class
 
-    # TODO: This does not work when dealing with 2D signals (multiplication issue)
-    power = ((Medαᵢ - (Medα .* Medαᵢ)).^2 * pᵢ) ./ (Madαᵢ * pᵢ)
+    if N==2     # For 1D signals, can be done via matrix multiplication
+        power = ((Medαᵢ - (Medα.*Medαᵢ)).^2 *pᵢ) ./ (Madαᵢ * pᵢ)
+    else        # For 2D signals, requires some manual work
+        pᵢ = reshape(pᵢ,1,1,:)
+        power = sum((Medαᵢ-(Medα.*Medαᵢ)).^2 .* pᵢ, dims=N) ./ sum(Madαᵢ.*pᵢ, dims=N)
+        power = reshape(power, sz...)
+    end
     order = sortperm(vec(power), rev = true)
 
     return (power, order)
diff --git a/test/ldb.jl b/test/ldb.jl
index 27e1909..8e05650 100644
--- a/test/ldb.jl
+++ b/test/ldb.jl
@@ -1,86 +1,177 @@
-X, y = generateclassdata(ClassData(:tri, 5, 5, 5))
-wt = wavelet(WT.haar)
+@testset "1D LDB" begin
+    X, y = generateclassdata(ClassData(:tri, 5, 5, 5))
+    wt = wavelet(WT.haar)
+    
+    # AsymmetricRelativeEntropy + TimeFrequency + BasisDiscriminantMeasure
+    f = LocalDiscriminantBasis(wt=wt, max_dec_level=4, top_k=5, n_features=5)
+    @test typeof(f) == LocalDiscriminantBasis
+    @test_nowarn fit_transform(f, X, y)
+    @test_nowarn fit!(f, X, y)
+    @test_nowarn transform(f, X)
+    Xc = transform(f, X)
+    @test size(Xc) == (5,15)
+    @test_nowarn inverse_transform(f, Xc)
+    X̂ = inverse_transform(f, Xc)
+    @test size(X̂) == (32, 15) 
+    
+    # SymmetricRelativeEntropy + TimeFrequency + FishersClassSeparability
+    f = LocalDiscriminantBasis(wt=wt, max_dec_level=4, dm=SymmetricRelativeEntropy(), 
+        dp=FishersClassSeparability(), top_k=5, n_features=5)
+    @test typeof(f) == LocalDiscriminantBasis
+    @test_nowarn fit_transform(f, X, y)
+    @test_nowarn fit!(f, X, y)
+    @test_nowarn transform(f, X)
+    Xc = transform(f, X)
+    @test size(Xc) == (5,15)
+    @test_nowarn inverse_transform(f, Xc)
+    X̂ = inverse_transform(f, Xc)
+    @test size(X̂) == (32, 15) 
+    
+    # LpDistance + TimeFrequency + RobustFishersClassSeparability
+    f = LocalDiscriminantBasis(wt=wt, max_dec_level=4, dm=LpDistance(), 
+        dp=RobustFishersClassSeparability(), top_k=5, n_features=5)
+    @test typeof(f) == LocalDiscriminantBasis
+    @test_nowarn fit_transform(f, X, y)
+    @test_nowarn fit!(f, X, y)
+    @test_nowarn transform(f, X)
+    Xc = transform(f, X)
+    @test size(Xc) == (5,15)
+    @test_nowarn inverse_transform(f, Xc)
+    X̂ = inverse_transform(f, Xc)
+    @test size(X̂) == (32, 15) 
+    
+    # HellingerDistance + ProbabilityDensity + BasisDiscriminantMeasure
+    f = LocalDiscriminantBasis(wt=wt, max_dec_level=4, dm=HellingerDistance(), 
+        en=ProbabilityDensity(), top_k=5, n_features=5)
+    @test typeof(f) == LocalDiscriminantBasis
+    @test_nowarn fit_transform(f, X, y)
+    @test_nowarn fit!(f, X, y)
+    @test_nowarn transform(f, X)
+    Xc = transform(f, X)
+    @test size(Xc) == (5,15)
+    @test_nowarn inverse_transform(f, Xc)
+    X̂ = inverse_transform(f, Xc)
+    @test size(X̂) == (32, 15) 
+    
+    # EarthMoverDistance + Signatures(equal weight) + BasisDiscriminantMeasure
+    f = LocalDiscriminantBasis(wt=wt, max_dec_level=4, dm=EarthMoverDistance(), 
+        en=Signatures(:equal), top_k=5, n_features=5)
+    @test typeof(f) == LocalDiscriminantBasis
+    @test_nowarn fit_transform(f, X, y)
+    @test_nowarn fit!(f, X, y)
+    @test_nowarn transform(f, X)
+    Xc = transform(f, X)
+    @test size(Xc) == (5,15)
+    @test_nowarn inverse_transform(f, Xc)
+    X̂ = inverse_transform(f, Xc)
+    @test size(X̂) == (32, 15) 
+    
+    # EarthMoverDistance + Signatures(pdf weight) + BasisDiscriminantMeasure
+    f = LocalDiscriminantBasis(wt=wt, max_dec_level=4, dm=EarthMoverDistance(), 
+        en=Signatures(:pdf), top_k=5, n_features=5)
+    @test typeof(f) == LocalDiscriminantBasis
+    @test_nowarn fit_transform(f, X, y)
+    @test_nowarn fit!(f, X, y)
+    @test_nowarn transform(f, X)
+    Xc = transform(f, X)
+    @test size(Xc) == (5,15)
+    @test_nowarn inverse_transform(f, Xc)
+    X̂ = inverse_transform(f, Xc)
+    @test size(X̂) == (32, 15) 
+    
+    # change number of features
+    @test_nowarn change_nfeatures(f, Xc, 5)
+    x = change_nfeatures(f, Xc, 5)
+    @test size(x) == (5, 15)
+    @test_logs (:warn, "Proposed n_features larger than currently saved n_features. Results will be less accurate since inverse_transform and transform is involved.") change_nfeatures(f, Xc, 10)
+    @test_throws ArgumentError change_nfeatures(f, Xc, 10)
+end
 
-# AsymmetricRelativeEntropy + TimeFrequency + BasisDiscriminantMeasure
-f = LocalDiscriminantBasis(wt=wt, max_dec_level=4, top_k=5, n_features=5)
-@test typeof(f) == LocalDiscriminantBasis
-@test_nowarn fit_transform(f, X, y)
-@test_nowarn fit!(f, X, y)
-@test_nowarn transform(f, X)
-Xc = transform(f, X)
-@test size(Xc) == (5,15)
-@test_nowarn inverse_transform(f, Xc)
-X̂ = inverse_transform(f, Xc)
-@test size(X̂) == (32, 15) 
-
-# SymmetricRelativeEntropy + TimeFrequency + FishersClassSeparability
-f = LocalDiscriminantBasis(wt=wt, max_dec_level=4, dm=SymmetricRelativeEntropy(), 
-    dp=FishersClassSeparability(), top_k=5, n_features=5)
-@test typeof(f) == LocalDiscriminantBasis
-@test_nowarn fit_transform(f, X, y)
-@test_nowarn fit!(f, X, y)
-@test_nowarn transform(f, X)
-Xc = transform(f, X)
-@test size(Xc) == (5,15)
-@test_nowarn inverse_transform(f, Xc)
-X̂ = inverse_transform(f, Xc)
-@test size(X̂) == (32, 15) 
-
-# LpDistance + TimeFrequency + RobustFishersClassSeparability
-f = LocalDiscriminantBasis(wt=wt, max_dec_level=4, dm=LpDistance(), 
-    dp=RobustFishersClassSeparability(), top_k=5, n_features=5)
-@test typeof(f) == LocalDiscriminantBasis
-@test_nowarn fit_transform(f, X, y)
-@test_nowarn fit!(f, X, y)
-@test_nowarn transform(f, X)
-Xc = transform(f, X)
-@test size(Xc) == (5,15)
-@test_nowarn inverse_transform(f, Xc)
-X̂ = inverse_transform(f, Xc)
-@test size(X̂) == (32, 15) 
-
-# HellingerDistance + ProbabilityDensity + BasisDiscriminantMeasure
-f = LocalDiscriminantBasis(wt=wt, max_dec_level=4, dm=HellingerDistance(), 
-    en=ProbabilityDensity(), top_k=5, n_features=5)
-@test typeof(f) == LocalDiscriminantBasis
-@test_nowarn fit_transform(f, X, y)
-@test_nowarn fit!(f, X, y)
-@test_nowarn transform(f, X)
-Xc = transform(f, X)
-@test size(Xc) == (5,15)
-@test_nowarn inverse_transform(f, Xc)
-X̂ = inverse_transform(f, Xc)
-@test size(X̂) == (32, 15) 
-
-# EarthMoverDistance + Signatures(equal weight) + BasisDiscriminantMeasure
-f = LocalDiscriminantBasis(wt=wt, max_dec_level=4, dm=EarthMoverDistance(), 
-    en=Signatures(:equal), top_k=5, n_features=5)
-@test typeof(f) == LocalDiscriminantBasis
-@test_nowarn fit_transform(f, X, y)
-@test_nowarn fit!(f, X, y)
-@test_nowarn transform(f, X)
-Xc = transform(f, X)
-@test size(Xc) == (5,15)
-@test_nowarn inverse_transform(f, Xc)
-X̂ = inverse_transform(f, Xc)
-@test size(X̂) == (32, 15) 
-
-# EarthMoverDistance + Signatures(pdf weight) + BasisDiscriminantMeasure
-f = LocalDiscriminantBasis(wt=wt, max_dec_level=4, dm=EarthMoverDistance(), 
-    en=Signatures(:pdf), top_k=5, n_features=5)
-@test typeof(f) == LocalDiscriminantBasis
-@test_nowarn fit_transform(f, X, y)
-@test_nowarn fit!(f, X, y)
-@test_nowarn transform(f, X)
-Xc = transform(f, X)
-@test size(Xc) == (5,15)
-@test_nowarn inverse_transform(f, Xc)
-X̂ = inverse_transform(f, Xc)
-@test size(X̂) == (32, 15) 
-
-# change number of features
-@test_nowarn change_nfeatures(f, Xc, 5)
-x = change_nfeatures(f, Xc, 5)
-@test size(x) == (5, 15)
-@test_logs (:warn, "Proposed n_features larger than currently saved n_features. Results will be less accurate since inverse_transform and transform is involved.") change_nfeatures(f, Xc, 10)
-@test_throws ArgumentError change_nfeatures(f, Xc, 10)
+@testset "2D LDB" begin
+    X = cat(rand(Normal(0,1), 8,8,5), rand(Normal(1,1), 8,8,5), rand(Normal(2,1), 8,8,5), dims=3)
+    y = [repeat([1], 5); repeat([2],5); repeat([3],5)]
+    
+    # AsymmetricRelativeEntropy + TimeFrequency + BasisDiscriminantMeasure
+    f = LocalDiscriminantBasis(max_dec_level=2, top_k=5, n_features=5)
+    @test typeof(f) == LocalDiscriminantBasis
+    @test_nowarn fit_transform(f, X, y)
+    @test_nowarn fit!(f, X, y)
+    @test_nowarn transform(f, X)
+    Xc = transform(f, X)
+    @test size(Xc) == (5,15)
+    @test_nowarn inverse_transform(f, Xc)
+    X̂ = inverse_transform(f, Xc)
+    @test size(X̂) == (8, 8, 15) 
+    
+    # SymmetricRelativeEntropy + TimeFrequency + FishersClassSeparability
+    f = LocalDiscriminantBasis(max_dec_level=2, dm=SymmetricRelativeEntropy(), 
+        dp=FishersClassSeparability(), top_k=5, n_features=5)
+    @test typeof(f) == LocalDiscriminantBasis
+    @test_nowarn fit_transform(f, X, y)
+    @test_nowarn fit!(f, X, y)
+    @test_nowarn transform(f, X)
+    Xc = transform(f, X)
+    @test size(Xc) == (5,15)
+    @test_nowarn inverse_transform(f, Xc)
+    X̂ = inverse_transform(f, Xc)
+    @test size(X̂) == (8, 8, 15) 
+    
+    # LpDistance + TimeFrequency + RobustFishersClassSeparability
+    f = LocalDiscriminantBasis(max_dec_level=2, dm=LpDistance(), 
+        dp=RobustFishersClassSeparability(), top_k=5, n_features=5)
+    @test typeof(f) == LocalDiscriminantBasis
+    @test_nowarn fit_transform(f, X, y)
+    @test_nowarn fit!(f, X, y)
+    @test_nowarn transform(f, X)
+    Xc = transform(f, X)
+    @test size(Xc) == (5,15)
+    @test_nowarn inverse_transform(f, Xc)
+    X̂ = inverse_transform(f, Xc)
+    @test size(X̂) == (8, 8, 15) 
+    
+    # HellingerDistance + ProbabilityDensity + BasisDiscriminantMeasure
+    f = LocalDiscriminantBasis(max_dec_level=2, dm=HellingerDistance(), 
+        en=ProbabilityDensity(), top_k=5, n_features=5)
+    @test typeof(f) == LocalDiscriminantBasis
+    @test_nowarn fit_transform(f, X, y)
+    @test_nowarn fit!(f, X, y)
+    @test_nowarn transform(f, X)
+    Xc = transform(f, X)
+    @test size(Xc) == (5,15)
+    @test_nowarn inverse_transform(f, Xc)
+    X̂ = inverse_transform(f, Xc)
+    @test size(X̂) == (8, 8, 15) 
+    
+    # EarthMoverDistance + Signatures(equal weight) + BasisDiscriminantMeasure
+    f = LocalDiscriminantBasis(max_dec_level=2, dm=EarthMoverDistance(), 
+        en=Signatures(:equal), top_k=5, n_features=5)
+    @test typeof(f) == LocalDiscriminantBasis
+    @test_nowarn fit_transform(f, X, y)
+    @test_nowarn fit!(f, X, y)
+    @test_nowarn transform(f, X)
+    Xc = transform(f, X)
+    @test size(Xc) == (5,15)
+    @test_nowarn inverse_transform(f, Xc)
+    X̂ = inverse_transform(f, Xc)
+    @test size(X̂) == (8, 8, 15) 
+    
+    # EarthMoverDistance + Signatures(pdf weight) + BasisDiscriminantMeasure
+    f = LocalDiscriminantBasis(max_dec_level=2, dm=EarthMoverDistance(), 
+        en=Signatures(:pdf), top_k=5, n_features=5)
+    @test typeof(f) == LocalDiscriminantBasis
+    @test_nowarn fit_transform(f, X, y)
+    @test_nowarn fit!(f, X, y)
+    @test_nowarn transform(f, X)
+    Xc = transform(f, X)
+    @test size(Xc) == (5,15)
+    @test_nowarn inverse_transform(f, Xc)
+    X̂ = inverse_transform(f, Xc)
+    @test size(X̂) == (8, 8, 15) 
+    
+    # change number of features
+    @test_nowarn change_nfeatures(f, Xc, 5)
+    x = change_nfeatures(f, Xc, 5)
+    @test size(x) == (5, 15)
+    @test_logs (:warn, "Proposed n_features larger than currently saved n_features. Results will be less accurate since inverse_transform and transform is involved.") change_nfeatures(f, Xc, 10)
+    @test_throws ArgumentError change_nfeatures(f, Xc, 10)
+end
\ No newline at end of file