Merge pull request #190 from MichielStock/informationtheory

Information theory
PoisotLab · Jul 21, 2021 · a1f768e · a1f768e · tpoisot · Jul 21, 2021
2 parents c085efa + 5e55666
commit a1f768e
Show file tree

Hide file tree

Showing 9 changed files with 252 additions and 103 deletions.
diff --git a/docs/make.jl b/docs/make.jl
@@ -39,7 +39,8 @@ makedocs(
         ],
         "Generating networks" => [
             "Null models" => "random/null.md",
-            "Structural models" => "random/structure.md"
+            "Structural models" => "random/structure.md",
+            "Optimal transportation" => "random/otsin.md"
         ]
     ]
 )

diff --git a/docs/src/properties/information.md b/docs/src/properties/information.md
@@ -2,7 +2,21 @@
 
 Indices based on information theory, such as entropy, mutual information etc, can easily be computed. To this end, the ecological network is transformed in a bivariate distribution. This is done by normalizing the adjacency or incidence matrix to obtain a doubly stochastic matrix. The information theoretic indices are computed either from this matrix or directly from the ecological network. Note that when using an array is input, the functions do not perform any checks whether the matrix is normalized and nonnegative. When the input is an ecological network, the functions automatically convert the network to a normalized probability matrix.
 
-One can compute individual indices or use the function `information_decomposition` which performs the entire decomposition at once.
+One can compute individual indices or use the function `information_decomposition` which performs the entire decomposition at once. This decomposition yields for a given network the deviation of the marginal distributions of the species with the uniform distribution (quantifying the evenness), the mutual information (quantifying the specialisation) and the variance of information (quantifying the freedom and stability of the interactions). These indices satisfy the following balance equation for the top ($T$) and bottom ($B$) throphic level:
+
+$$
+\log(nm) = D(B,T) + 2 I(B;T) + V(B;T)
+$$
+
+$$
+\log(n) = D(B) + I(B;T) + H(B|T)
+$$
+
+$$
+\log(m) = D(T) + I(B;T) + H(T|B)
+$$
+
+Here, $n$ and $m$ are number of bottom and top species, respectively.
 
 Indices can be calculated for the joint distribution, as well as for the marginal distributions of the two trophic levels (if applicable), by changing an optional argument `dim=1` of the function.
 
@@ -34,3 +48,8 @@ information_decomposition
 ```@docs
 convert2effective
 ```
+
+## References
+
+Stock, M.; Hoebeke, L.; De Baets, B. « Disentangling the Information in Species Interaction Networks ».
+Entropy 2021, 23, 703. https://doi.org/10.3390/e23060703
diff --git a/docs/src/random/otsin.md b/docs/src/random/otsin.md
@@ -0,0 +1,17 @@
+# Optimal transportation for species interaction networks
+
+By solving an *optimal transportation* problem, one can estimate the interaction intensities given (1) a matrix of interaction utility values (i.e., the preferences for certain interactions between the species) and (2) the abundances of the top and bottom species. It hence allows predicting *how* species will interact. The interactions estimated intensities are given by the ecological coupling matrix $Q$, which has the optimal trade-off between utility (enriching for prefered interactions) and entropy (interactions are distributed as randomly as possible). The function `optimaltransportation` has the following inputs:
+- a utility matrix `M`;
+- the (relative) abundances of the top and bottom species `a` and `b`;
+- the entropic regularization parameter `λ` (set default to 1).
+For details, we refer to the paper presenting this framework.
+
+```@docs
+optimaltransportation
+```
+
+## References
+
+Stock, M., Poisot, T., & De Baets, B. (2021). « Optimal transportation theory for
+species interaction networks. » Ecology and Evolution, 00(1), ece3.7254.
+https://doi.org/10.1002/ece3.7254
diff --git a/src/EcologicalNetworks.jl b/src/EcologicalNetworks.jl
@@ -172,4 +172,7 @@ export entropy, make_joint_distribution, mutual_information, conditional_entropy
    variation_information, diff_entropy_uniform, information_decomposition,
    convert2effective, potential_information
 
+include(joinpath(".", "information/otsin.jl"))
+export optimaltransportation
+
 end
diff --git a/src/information/entropy.jl b/src/information/entropy.jl
@@ -1,3 +1,4 @@
+using Base: Integer
 
 # safe log(x) function, log 0 = 0
 safelog(x) = x > zero(x) ? log2(x) : zero(x)
@@ -8,7 +9,7 @@ safediv(x, y) = y == zero(y) ? zero(y) : x / y
 """
     make_joint_distribution(N::NT) where {NT<:AbstractEcologicalNetwork}
 
-Returns a double stochastic matrix from the adjacency or incidence matrix.
+Returns a probability matrix computed from the adjacency or incidence matrix.
 Raises an error if the matrix contains negative values. Output in bits.
 """
 function make_joint_distribution(N::NT) where {NT<:AbstractEcologicalNetwork}
@@ -17,64 +18,61 @@ function make_joint_distribution(N::NT) where {NT<:AbstractEcologicalNetwork}
 end
 
 """
-    entropy(P::AbstractArray)
+    entropy(P::AbstractArray; [dims])
 
-Computes the joint entropy of a double stochastic matrix. Does not perform any
+Computes the joint entropy of a probability matrix. Does not perform any
 checks whether the matrix is normalized. Output in bits.
-"""
-function entropy(P::AbstractArray)
-    return - sum(P .* safelog.(P))
-end
-
-"""
-    entropy(P::AbstractArray, dims::I)
 
-Computes the marginal entropy of a double stochastic matrix. `dims` indicates
-whether to compute the entropy for the rows (`dims`=1) or columns (`dims`=2).
-Does not perform any checks whether the matrix is normalized. Output in bits.
+If the `dims` keyword argument is provided, the marginal entropy of the matrix
+is computed. `dims` indicates whether to compute the entropy for the rows 
+(`dims=1`) or columns (`dims=2`).
 """
-function entropy(P::AbstractArray, dims::I) where I <: Int
+function entropy(P::AbstractArray; dims::Union{Integer,Nothing}=nothing)
+    isnothing(dims) && return - sum(P .* safelog.(P))
     return entropy(sum(P', dims=dims))
 end
 
 """
     entropy(N::AbstractEcologicalNetwork)
 
-Computes the joint entropy of an ecological network. Output in bits.
+
 """
 function entropy(N::NT) where {NT<:AbstractEcologicalNetwork}
     P = make_joint_distribution(N)
     return entropy(P)
 end
 
 """
-    entropy(N::AbstractEcologicalNetwork, dims::I)
+    entropy(N::AbstractEcologicalNetwork; [dims])
 
-Computes the marginal entropy of an ecological network. `dims` indicates
-whether to compute the entropy for the rows (`dims`=1) or columns (`dims`=2).
+Computes the joint entropy of an ecological network. If `dims` is specified,
+The marginal entropy of the ecological network is computed. `dims` indicates
+whether to compute the entropy for the rows (`dims=1`) or columns (`dims=2`).
 Output in bits.
 """
-function entropy(N::NT, dims::I) where {NT<:AbstractEcologicalNetwork, I <: Int}
+function entropy(N::NT; dims::Union{Integer,Nothing}=nothing) where {NT<:AbstractEcologicalNetwork}
     P = make_joint_distribution(N)
-    return entropy(P, dims)
+    return entropy(P; dims)
 end
 
 """
     conditional_entropy(P::AbstractArray, given::I)
 
-Computes the conditional entropy of double stochastic matrix. If `given = 1`,
-it is the entropy of the columns, and visa versa when `given = 2`. Output in bits.
+Computes the conditional entropy of probability matrix. If `given = 1`,
+it is the entropy of the columns, and vise versa when `given = 2`. Output in bits.
+
+Does not check whether `P` is a valid probability matrix.
 """
-function conditional_entropy(P::AbstractArray, given::I) where I <: Int
+function conditional_entropy(P::AbstractArray, given::Integer)
     dims = (given % 2) + 1
-    return - sum(P .* safelog.(safediv.(P, sum(P, dims=dims))))
+    return - sum(P .* safelog.(safediv.(P, sum(P; dims=dims))))
 end
 
 """
     conditional_entropy(N::AbstractEcologicalNetwork, given::I)
 
 Computes the conditional entropy of an ecological network. If `given = 1`,
-it is the entropy of the columns, and visa versa when `given = 2`.
+it is the entropy of the columns, and vise versa when `given = 2`.
 """
 function conditional_entropy(N::NT, given::I) where {NT<:AbstractEcologicalNetwork, I <: Int}
     P = make_joint_distribution(N)
@@ -84,10 +82,10 @@ end
 """
     mutual_information(P::AbstractArray)
 
-Computes the mutual information of a double stochastic matrix. Output in bits.
+Computes the mutual information of a probability matrix. Output in bits.
 """
 function mutual_information(P::AbstractArray)
-    I = entropy(P, 1) - conditional_entropy(P, 2)
+    I = entropy(P, dims=1) - conditional_entropy(P, 2)
     return max(I, zero(I))  # might give small negative value
 end
 
@@ -123,26 +121,18 @@ function variation_information(N::NT) where {NT <: AbstractEcologicalNetwork}
 end
 
 """
-    diff_entropy_uniform(P::AbstractArray)
-
-Computes the difference in entropy of the marginals compared to the entropy of
-an uniform distribution. The parameter `dims` indicates which marginals are used,
-with both if no value is provided. Output in bits.
-"""
-function diff_entropy_uniform(P::AbstractArray)
-    D = log2(length(P)) - entropy(P, 1) - entropy(P, 2)
-    return max(zero(D), D)
-end
-
-"""
-    diff_entropy_uniform(P::AbstractArray, dims::I)
+    diff_entropy_uniform(P::AbstractArray; [dims])
 
 Computes the difference in entropy of the marginals compared to the entropy of
 an uniform distribution. The parameter `dims` indicates which marginals are used,
 with both if no value is provided. Output in bits.
 """
-function diff_entropy_uniform(P::AbstractArray, dims::I) where {I <: Int}
-    D = log2(size(P, dims)) - entropy(P, dims)
+function diff_entropy_uniform(P::AbstractArray; dims::Union{Integer,Nothing}=nothing)
+    if isnothing(dims)
+        D = log2(length(P)) - entropy(P, dims=1) - entropy(P, dims=2)
+    else
+        D = log2(size(P, dims)) - entropy(P; dims)
+    end
     return max(zero(D), D)
 end
 
@@ -153,19 +143,13 @@ Computes the difference in entropy of the marginals compared to the entropy of
 an uniform distribution. The parameter `dims` indicates which marginals are used,
 with both if no value is provided. Output in bits.
 """
-function diff_entropy_uniform(N::NT, dims=nothing) where {NT <: AbstractEcologicalNetwork}
-    if isnothing(dims)
-        P = make_joint_distribution(N)
-        return diff_entropy_uniform(P)
-    else  # compute marginals
-        typeof(dims) <: Int || throw(ArgumentError("dims should be an integer (1 or 2)"))
-        P = make_joint_distribution(N)
-        return diff_entropy_uniform(P, dims)
-    end
+function diff_entropy_uniform(N::NT; dims::Union{Integer,Nothing}=nothing) where {NT <: AbstractEcologicalNetwork}
+    P = make_joint_distribution(N)
+    return diff_entropy_uniform(P; dims=dims)
 end
 
 """
-    information_decomposition(N::AbstractEcologicalNetwork; norm::Bool=false, dims::I=nothing)
+    information_decomposition(N::AbstractEcologicalNetwork; norm::Bool=false, dims=nothing)
 
 Performs an information theory decomposition of a given ecological network, i.e.
 the information content in the normalized adjacency matrix is split in:
@@ -179,8 +163,11 @@ One can optinally give the dimision, indicating whether to compute the indices
 for the rows (`dims=1`), columns (`dims=2`) or the whole matrix (default).
 
 Result is returned in a Dict. Outputs in bits.
+
+Stock, M.; Hoebeke, L.; De Baets, B. « Disentangling the Information in Species Interaction Networks ».
+Entropy 2021, 23, 703. https://doi.org/10.3390/e23060703
 """
-function information_decomposition(N::NT; norm::Bool=false, dims::I=nothing) where {NT <: AbstractEcologicalNetwork, I <: Union{Int, Nothing}}
+function information_decomposition(N::NT; norm::Bool=false, dims::Union{Integer,Nothing}=nothing) where {NT <: AbstractEcologicalNetwork}
     decomposition = Dict{Symbol, Float64}()
     P = make_joint_distribution(N)
     if isnothing(dims)
@@ -191,7 +178,7 @@ function information_decomposition(N::NT; norm::Bool=false, dims::I=nothing) whe
         # variance of information
         decomposition[:V] = variation_information(P)
     else
-        decomposition[:D] = diff_entropy_uniform(P, dims)
+        decomposition[:D] = diff_entropy_uniform(P; dims=dims)
         decomposition[:I] = mutual_information(P)
         decomposition[:V] = conditional_entropy(P, (dims % 2) + 1)
     end
@@ -208,31 +195,22 @@ end
     convert2effective(indice::Real)
 
 Convert an information theory indices in an effective number (i.e. number of
-corresponding interactions).
+corresponding interactions). Assumes an input in bits (i.e. log with base 2 is used).
 """
 function convert2effective(indice::R) where R <: Real
     return 2.0^indice
 end
 
 """
-    potential_information(N::NT)
-
-Computes the maximal potential information in a network, corresponding to
-every species interacting with every other species. Compute result for the
-marginals using the optional parameter `dims`. Output in bits.
-"""
-function potential_information(N::NT) where NT <: AbstractEcologicalNetwork
-    m, n = size(N)
-    return log2(m) + log2(n)
-end
-
-"""
-    potential_information(N::NT, dims::I)
+    potential_information(N::NT; [dims])
 
 Computes the maximal potential information in a network, corresponding to
 every species interacting with every other species. Compute result for the
 marginals using the optional parameter `dims`. Output in bits.
 """
-function potential_information(N::NT, dims::I) where {NT <: AbstractEcologicalNetwork, I <: Int}
-    return log2(size(N)[dims])
+function potential_information(N::NT; dims::Union{Integer,Nothing}=nothing) where {NT <: AbstractEcologicalNetwork}
+    n, m = size(N)
+    isnothing(dims) && return log2(n) + log2(m)
+    dims == 1 && return log2(n) 
+    dims == 2 && return log2(m)
 end
diff --git a/src/information/otsin.jl b/src/information/otsin.jl
@@ -0,0 +1,94 @@
+using LinearAlgebra: lmul!, rmul!, Diagonal
+
+"""
+    optimaltransportation(M::AbstractArray;
+            [a, b, λ=1, ϵ=1e-10, maxiter=100])
+
+Performs optimal transportation on an ecological network. Here, `M` is treated
+as an utility matrix, quantifying the preference the species of the two throphic
+levels have for interacting with another. One can fix both, one or none of the
+species abundances by given `a` (row sums, corresponding to top species) and/or
+`b` (column sums, corresponding to bottom species). The strengh of entropic 
+regularisation is set by `λ`, where higher values indicate more utitlity and lower
+values more entropy. 
+
+ϵ and `maxiter` control the number of Sinkhorn iterations. You likely won't need
+to change these.
+
+This version works on arrays.
+
+Stock, M., Poisot, T., & De Baets, B. (2021). « Optimal transportation theory for
+species interaction networks. » Ecology and Evolution, 00(1), ece3.7254.
+https://doi.org/10.1002/ece3.7254
+"""
+function optimaltransportation(M::AbstractArray;
+        a=nothing, b=nothing, λ::Number=1, ϵ=1e-10, maxiter=100)
+    Q = ot(M, a, b; λ=λ, ϵ=ϵ, maxiter=maxiter)
+    return Q
+end
+
+"""
+    optimaltransportation(M::AbstractBipartiteNetwork;
+            [a, b, λ=1, ϵ=1e-10, maxiter=100])
+
+Performs optimal transportation on an ecological network. Here, `M` is treated
+as an utility matrix, quantifying the preference the species of the two throphic
+levels have for interacting with another. One can fix both, one or none of the
+species abundances by given `a` (row sums, corresponding to top species) and/or
+`b` (column sums, corresponding to bottom species). The strengh of entropic 
+regularisation is set by `λ`, where higher values indicate more utitlity and lower
+values more entropy. 
+
+ϵ and `maxiter` control the number of Sinkhorn iterations. You likely won't need
+to change these.
+
+Stock, M., Poisot, T., & De Baets, B. (2021). « Optimal transportation theory for
+species interaction networks. » Ecology and Evolution, 00(1), ece3.7254.
+https://doi.org/10.1002/ece3.7254
+"""
+function optimaltransportation(M::AbstractBipartiteNetwork;
+    a=nothing, b=nothing, λ::Number=1, ϵ=1e-10, maxiter=100)
+    Q = ot(M.edges, a, b; λ=λ, ϵ=ϵ, maxiter=maxiter)
+    return BipartiteQuantitativeNetwork(Q, species(M, dims=1), species(M, dims=2))
+end
+
+
+# optimal transportation fixing both marginals
+function ot(M::AbstractMatrix, a::AbstractVector, b::AbstractVector; λ, ϵ, maxiter)
+    @assert size(M) == (length(a), length(b))
+    @assert all(a .> 0) "all values of `a` have to be greater than 0"
+    @assert all(b .> 0) "all values of `b` have to be greater than 0"
+    @assert sum(a) ≈ sum(b) "`a` and `b` have to have the same sums"
+    Q = exp.(λ * M)
+    iter = 0
+    for iter in 1:maxiter
+        iter += 1
+        lmul!(Diagonal(a ./ sum(Q, dims=2)[:]), Q)  # normalize rows
+        rmul!(Q, Diagonal(b ./ sum(Q, dims=1)[:]))  # normalize columns
+        maximum(abs.(sum(Q, dims=2)[:] - a)) < ϵ && break
+    end
+    return Q
+end
+
+# optimal transportation fixing a
+function ot(M::AbstractMatrix, a::AbstractVector, b::Nothing; λ, ϵ, maxiter)
+    @assert size(M, 1) == length(a) && all(a .> 0)
+    Q = exp.(λ * M)
+    lmul!(Diagonal(a ./ sum(Q, dims=2)[:]), Q)
+    return Q
+end
+
+# optimal transportation fixing b
+function ot(M::AbstractMatrix, a::Nothing, b::AbstractVector; λ, ϵ, maxiter)
+    @assert size(M, 2) == length(b) && all(b .> 0)
+    Q = exp.(λ * M)
+    rmul!(Q, Diagonal(b ./ sum(Q, dims=1)[:]))
+    return Q
+end
+
+# optimal transportation free
+function ot(M::AbstractMatrix, a::Nothing, b::Nothing; λ, ϵ, maxiter)
+    Q = exp.(λ * M)
+    Q ./= sum(Q)
+    return Q
+end