Skip to content

Commit

Permalink
Merge pull request JuliaLang#84 from JuliaStats/dh/deviation
Browse files Browse the repository at this point in the history
Move some functions for deviation computing from MLBase to here
  • Loading branch information
lindahua committed Jul 20, 2014
2 parents 42e2b12 + 478c5d9 commit fdd781e
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 0 deletions.
52 changes: 52 additions & 0 deletions docs/source/deviation.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
Computing Deviations
=====================

This package provides functions to compute various deviations between arrays in a variety of ways:

.. function:: sqL2dist(a, b)

Squared L2 distance between ``a`` and ``b``, as :math:`\sum_{i=1}^n |a_i - b_i|^2`.

.. function:: L2dist(a, b)

L2 distance between ``a`` and ``b``, *i.e* ``sqrt(sqL2dist(a, b))``.

.. function:: L1dist(a, b)

L1 distance between ``a`` and ``b``, as :math:`\sum_{i=1}^n |a_i - b_i|`.

.. function:: Linfdist(a, b)

Linf distance between ``a`` and ``b``, as :math:`\mathrm{\mathop{max}}_{i=1:n} |a_i - b_i|`.

.. function:: gkldiv(a, b)

Generalized Kullback-Leibler divergence between two arrays ``a`` and ``b``, defined as
:math:`\sum_{i=1}^n (a_i * \log(a_i/b_i) - a_i + b_i)`.

**Note:** When ``sum(a) == 1`` and ``sum(b) == 1``, it reduces to the KL-divergence in standard sense.

.. function:: meanad(a, b)

Mean absolute deviation between ``a`` and ``b``, *i.e.* ``mean(abs(a - b))``.

.. function:: maxad(a, b)

Maximum absolute deviation between ``a`` and ``b``, *i.e.* ``maximum(abs(a - b))``.

.. function:: msd(a, b)

Mean squared deviation between ``a`` and ``b``, *i.e.* ``mean(abs2(a - b))``.

.. function:: rmsd(a, b[; normalize={true|false}])

Root mean squared deviation, *i.e.* ``sqrt(msd(a, b))``.

The keyword argument ``normalize`` is default to ``false``. If it is set to ``true``, the result is normalized by ``(maximum(a) - minimum(a)``.

.. function:: psnr(a, b, maxv)

Peak signal-to-noise ratio, *i.e.* ``10 * log10(maxv^2 / msd(a, b))``.

**Note:** all these functions are implemented in a reasonably efficient way without creating any temporary arrays in the middle.

1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Welcome to StatsBase.jl's Documentation!
weightvec.rst
means.rst
scalarstats.rst
deviation.rst
cov.rst
counts.rst
ranking.rst
Expand Down
13 changes: 13 additions & 0 deletions src/StatsBase.jl
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,18 @@ module StatsBase
summarystats, # summary statistics
describe, # print the summary statistics

# deviation
sqL2dist, # squared L2 distance between two arrays
L2dist, # L2 distance between two arrays
L1dist, # L1 distance between two arrays
Linfdist, # L-inf distance between two arrays
gkldiv, # (Generalized) Kullback-Leibler divergence between two vectors
meanad, # mean absolute deviation
maxad, # maximum absolute deviation
msd, # mean squared deviation
rmsd, # root mean squared deviation
psnr, # peak signal-to-noise ratio (in dB)

# cov
scattermat, # scatter matrix (i.e. unnormalized covariance)

Expand Down Expand Up @@ -143,6 +155,7 @@ module StatsBase
include("weights.jl")
include("moments.jl")
include("scalarstats.jl")
include("deviation.jl")
include("cov.jl")
include("counts.jl")
include("ranking.jl")
Expand Down
80 changes: 80 additions & 0 deletions src/deviation.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Computing deviation in a variety of ways

# squared L2 distance
function sqL2dist{T<:Number}(a::AbstractArray{T}, b::AbstractArray{T})
n = length(a)
length(b) == n || throw(DimensionMismatch("Input dimension mismatch"))
r = 0.0
for i = 1:n
@inbounds r += abs2(a[i] - b[i])
end
return r
end

# L2 distance
L2dist{T<:Number}(a::AbstractArray{T}, b::AbstractArray{T}) = sqrt(sqL2dist(a, b))

# L1 distance
function L1dist{T<:Number}(a::AbstractArray{T}, b::AbstractArray{T})
n = length(a)
length(b) == n || throw(DimensionMismatch("Input dimension mismatch"))
r = 0.0
for i = 1:n
@inbounds r += abs(a[i] - b[i])
end
return r
end

# Linf distance
function Linfdist{T<:Number}(a::AbstractArray{T}, b::AbstractArray{T})
n = length(a)
length(b) == n || throw(DimensionMismatch("Input dimension mismatch"))
r = 0.0
for i = 1:n
@inbounds v = abs(a[i] - b[i])
if r < v
r = v
end
end
return r
end

# Generalized KL-divergence
function gkldiv{T<:FloatingPoint}(a::AbstractArray{T}, b::AbstractArray{T})
n = length(a)
r = 0.0
for i = 1:n
@inbounds ai = a[i]
@inbounds bi = b[i]
if ai > 0
r += (ai * log(ai / bi) - ai + bi)
else
r += bi
end
end
return r::Float64
end

# MeanAD: mean absolute deviation
meanad{T<:Number}(a::AbstractArray{T}, b::AbstractArray{T}) = L1dist(a, b) / length(a)

# MaxAD: maximum absolute deviation
maxad{T<:Number}(a::AbstractArray{T}, b::AbstractArray{T}) = Linfdist(a, b)

# MSD: mean squared deviation
msd{T<:Number}(a::AbstractArray{T}, b::AbstractArray{T}) = sqL2dist(a, b) / length(a)

# RMSD: root mean squared deviation
function rmsd{T<:Number}(a::AbstractArray{T}, b::AbstractArray{T}; normalize::Bool=false)
v = sqrt(msd(a, b))
if normalize
amin, amax = extrema(a)
v /= (amax - amin)
end
return v
end

# PSNR: peak signal-to-noise ratio
function psnr{T<:Real}(a::AbstractArray{T}, b::AbstractArray{T}, maxv::Real)
20. * log10(maxv) - 10. * log10(msd(a, b))
end
20 changes: 20 additions & 0 deletions test/deviation.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
using StatsBase
using Base.Test

a = rand(5, 6)
b = rand(5, 6)

@test_approx_eq sqL2dist(a, b) sum(abs2(a - b))
@test_approx_eq L2dist(a, b) sqrt(sqL2dist(a, b))
@test_approx_eq L1dist(a, b) sum(abs(a - b))
@test_approx_eq Linfdist(a, b) maximum(abs(a - b))

@test_approx_eq gkldiv(a, b) sum(a .* log(a ./ b) - a + b)

@test_approx_eq meanad(a, b) mean(abs(a - b))
@test_approx_eq maxad(a, b) maximum(abs(a - b))
@test_approx_eq msd(a, b) mean(abs2(a - b))
@test_approx_eq rmsd(a, b) sqrt(msd(a, b))
@test_approx_eq rmsd(a, b; normalize=true) rmsd(a, b) / (maximum(a) - minimum(a))
@test_approx_eq psnr(a, b, 2) 10 * log10(4 / msd(a, b))

1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ tests = ["mathfuns",
"weights",
"moments",
"scalarstats",
"deviation",
"cov",
"counts",
"ranking",
Expand Down

0 comments on commit fdd781e

Please sign in to comment.