From ca2872bbdc85edf3f0488e07f6b2b64306d26d54 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Fri, 24 May 2024 09:53:49 +0200 Subject: [PATCH 01/32] Updated docs for lower and upper triangular. --- src/arrays/lower_triangular.jl | 43 +++++++++++++++++++++++++++++++- src/arrays/triangular.jl | 3 +++ src/arrays/upper_triangular.jl | 45 ++++++++++++++++++++++++++++++++-- 3 files changed, 88 insertions(+), 3 deletions(-) diff --git a/src/arrays/lower_triangular.jl b/src/arrays/lower_triangular.jl index dfb4e37fb..08acf9ff4 100644 --- a/src/arrays/lower_triangular.jl +++ b/src/arrays/lower_triangular.jl @@ -1,15 +1,56 @@ @doc raw""" + LowerTriangular(S::AbstractVector, n::Int) + +Build a lower-triangular matrix from a vector. + A lower-triangular matrix is an ``n\times{}n`` matrix that has ones on the diagonal and zeros on the upper triangular. -The data are stored in a vector ``S`` similarly to `SkewSymMatrix`. +The data are stored in a vector ``S`` similarly to other matrices. See [`UpperTriangular`](@ref), [`SkewSymMatrix`](@ref) and [`SymmetricMatrix`](@ref). The struct two fields: `S` and `n`. The first stores all the entries of the matrix in a sparse fashion (in a vector) and the second is the dimension ``n`` for ``A\in\mathbb{R}^{n\times{}n}``. + +# Examples +```jldoctest +using GeometricMachineLearning +S = [1, 2, 3, 4, 5, 6] +LowerTriangular(S, 4) + +# output + +4×4 LowerTriangular{Int64, Vector{Int64}}: + 0 0 0 0 + 1 0 0 0 + 2 3 0 0 + 4 5 6 0 +``` """ mutable struct LowerTriangular{T, AT <: AbstractVector{T}} <: AbstractTriangular{T} S::AT n::Int end +@doc raw""" + LowerTriangular(A::AbstractMatrix) + +Build a lower-triangular matrix from a matrix. + +This is done by taking the lower left of that matrix. + +# Examples +```jldoctest +using GeometricMachineLearning +M = [1 2 3 4; 5 6 7 8; 9 10 11 12; 13 14 15 16] +LowerTriangular(M) + +# output + +4×4 LowerTriangular{Int64, Vector{Int64}}: + 0 0 0 0 + 5 0 0 0 + 9 10 0 0 + 13 14 15 0 +``` +""" function LowerTriangular(S::AbstractMatrix{T}) where {T} n = size(S, 1) @assert size(S, 2) == n diff --git a/src/arrays/triangular.jl b/src/arrays/triangular.jl index 2d57be63d..366977c94 100644 --- a/src/arrays/triangular.jl +++ b/src/arrays/triangular.jl @@ -1,3 +1,6 @@ +@doc raw""" +See [`UpperTriangular`](@ref) and [`LowerTriangular`](@ref). +""" abstract type AbstractTriangular{T} <: AbstractMatrix{T} end Base.parent(A::AbstractTriangular) = A.S diff --git a/src/arrays/upper_triangular.jl b/src/arrays/upper_triangular.jl index 6fc2a74de..5aba344ae 100644 --- a/src/arrays/upper_triangular.jl +++ b/src/arrays/upper_triangular.jl @@ -1,15 +1,56 @@ @doc raw""" -An upper-triangular matrix is an ``n\times{}n`` matrix that has ones on the diagonal and zeros on the upper triangular. + LowerTriangular(S::AbstractVector, n::Int) -The data are stored in a vector ``S`` similarly to `SkewSymMatrix`. +Build a lower-triangular matrix from a vector. + +A lower-triangular matrix is an ``n\times{}n`` matrix that has ones on the diagonal and zeros on the upper triangular. + +The data are stored in a vector ``S`` similarly to other matrices. See [`LowerTriangular`](@ref), [`SkewSymMatrix`](@ref) and [`SymmetricMatrix`](@ref). The struct two fields: `S` and `n`. The first stores all the entries of the matrix in a sparse fashion (in a vector) and the second is the dimension ``n`` for ``A\in\mathbb{R}^{n\times{}n}``. + +# Examples +```jldoctest +using GeometricMachineLearning +S = [1, 2, 3, 4, 5, 6] +UpperTriangular(S, 4) + +# output + +4×4 UpperTriangular{Int64, Vector{Int64}}: + 0 1 2 4 + 0 0 3 5 + 0 0 0 6 + 0 0 0 0 +``` """ mutable struct UpperTriangular{T, AT <: AbstractVector{T}} <: AbstractTriangular{T} S::AT n::Int end +@doc raw""" + UpperTriangular(A::AbstractMatrix) + +Build a lower-triangular matrix from a matrix. + +This is done by taking the lower left of that matrix. + +# Examples +```jldoctest +using GeometricMachineLearning +M = [1 2 3 4; 5 6 7 8; 9 10 11 12; 13 14 15 16] +UpperTriangular(M) + +# output + +4×4 UpperTriangular{Int64, Vector{Int64}}: + 0 2 3 4 + 0 0 7 8 + 0 0 0 12 + 0 0 0 0 +``` +""" function UpperTriangular(S::AbstractMatrix{T}) where {T} n = size(S, 1) @assert size(S, 2) == n From 7d6eacd65f00e14d74c79e16a1170e8d9eb49e98 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Fri, 24 May 2024 11:36:25 +0200 Subject: [PATCH 02/32] Now also parsing the caption. --- docs/make.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/make.jl b/docs/make.jl index e00b4b18f..16098433c 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -48,7 +48,7 @@ end function latex_graphics(path::String; label = nothing, caption = nothing, width = .5) figure_width = "$(width)\\textwidth" latex_label = isnothing(label) ? "" : "\\label{" * label * "}" - latex_caption = isnothing(caption) ? "" : "\\caption{" * caption * "}" + latex_caption = isnothing(caption) ? "" : "\\caption{" * Markdown.parse(caption) * "}" latex_string = """\\begin{figure} \\includegraphics[width = """ * figure_width * "]{" * path * ".png}" * latex_caption * From b44cd44fb3a999a3074f2c1a03f2bf77b3777673 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Fri, 24 May 2024 11:37:05 +0200 Subject: [PATCH 03/32] Updated docstrings for Symmetric and skew-symmetric matrix. --- src/arrays/skew_symmetric.jl | 80 +++++++++++++++++++++++++++++++----- src/arrays/symmetric.jl | 78 ++++++++++++++++++++++++++++------- 2 files changed, 132 insertions(+), 26 deletions(-) diff --git a/src/arrays/skew_symmetric.jl b/src/arrays/skew_symmetric.jl index 118daa6a1..47be4c63a 100644 --- a/src/arrays/skew_symmetric.jl +++ b/src/arrays/skew_symmetric.jl @@ -1,12 +1,33 @@ @doc raw""" -A `SkewSymMatrix` is a matrix ``A`` s.t. ``A^T = -A``. + SkewSymMatrix(S::AbstractVector, n::Integer) -If the constructor is called with a matrix as input it returns a symmetric matrix via the projection ``A \mapsto \frac{1}{2}(A - A^T)``. -This is a projection defined via the canonical metric ``\mathbb{R}^{n\times{}n}\times\mathbb{R}^{n\times{}n}\to\mathbb{R}, (A,B) \mapsto \mathrm{Tr}(A^TB)``. +Instantiate a skew-symmetric matrix with information stored in vector `S`. -The first index is the row index, the second one the column index. +A skew-symmetric matrix ``A`` is a matrix ``A^T = -A``. -The struct two fields: `S` and `n`. The first stores all the entries of the matrix in a sparse fashion (in a vector) and the second is the dimension ``n`` for ``A\in\mathbb{R}^{n\times{}n}``. +Internally the `struct` saves a vector ``S`` of size ``n(n-1)\div2``. The conversion is done the following way: +```math +[A]_{ij} = \begin{cases} 0 & \text{if $i=j$} \\ + S[( (i-2) (i-1) ) \div 2 + j] & \text{if $i>j$}\\ + S[( (j-2) (j-1) ) \div 2 + i] & \text{else}. \end{cases} +``` + +Also see [`SymmetricMatrix`](@ref), [`LowerTriangular`](@ref) and [`UpperTriangular`](@ref). + +# Examples +```jldoctest +using GeometricMachineLearning +S = [1, 2, 3, 4, 5, 6] +SkewSymMatrix(S, 4) + +# output + +4×4 SkewSymMatrix{Int64, Vector{Int64}}: + 0 -1 -2 -4 + 1 0 -3 -5 + 2 3 0 -6 + 4 5 6 0 +``` """ mutable struct SkewSymMatrix{T, AT <: AbstractVector{T}} <: AbstractMatrix{T} S::AT @@ -16,14 +37,46 @@ mutable struct SkewSymMatrix{T, AT <: AbstractVector{T}} <: AbstractMatrix{T} @assert length(S) == n*(n-1)÷2 new{T,typeof(S)}(S,n) end - function SkewSymMatrix(S::AbstractMatrix{T}) where {T} - n = size(S, 1) - @assert size(S, 2) == n - S_vec = map_to_Skew(S) - new{T,typeof(S_vec)}(S_vec, n) - end end +@doc raw""" + SkewSymMatrix(A::AbstractMatrix) + +Perform `0.5 * (A - A')` and store the matrix in an efficient way (as a vector with ``n(n-1)/2`` entries). + +If the constructor is called with a matrix as input it returns a skew-symmetric matrix via the projection: +```math +A \mapsto \frac{1}{2}(A - A^T). +``` + +# Examples +```jldoctest +using GeometricMachineLearning +M = [1 2 3 4; 5 6 7 8; 9 10 11 12; 13 14 15 16] +SkewSymMatrix(M) + +# output + +4×4 SkewSymMatrix{Float64, Vector{Float64}}: + 0.0 -1.5 -3.0 -4.5 + 1.5 0.0 -1.5 -3.0 + 3.0 1.5 0.0 -1.5 + 4.5 3.0 1.5 0.0 +``` + +# Extend help + +Note that the constructor is designed in such a way that it always returns matrices of type `SkewSymMatrix{<:AbstractFloat}` when called with a matrix, even if this matrix is of type `AbstractMatrix{<:Integer}`. + +If the user wishes to allocate a matrix `SkewSymMatrix{<:Integer}` the constructor `SkewSymMatrix(::AbstractVector, n::Integer)` has to be called. +""" +function SkewSymMatrix(S::AbstractMatrix{T}) where {T} + n = size(S, 1) + @assert size(S, 2) == n + S_vec = map_to_Skew(S) + SkewSymMatrix(S_vec, n) +end + function Base.getindex(A::SkewSymMatrix, i::Int, j::Int) if j == i return zero(eltype(A)) @@ -212,6 +265,11 @@ function map_to_Skew(A::AbstractMatrix{T}) where T S end +function map_to_Skew(A::AbstractMatrix{T}) where T <: Integer + Float = T == Int64 ? Float64 : Float32 + map_to_Skew(Float.(A)) +end + function Base.copyto!(A::SkewSymMatrix, B::SkewSymMatrix) A.S .= B.S nothing diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl index 773b74757..627e5bbd3 100644 --- a/src/arrays/symmetric.jl +++ b/src/arrays/symmetric.jl @@ -1,23 +1,34 @@ @doc raw""" -A `SymmetricMatrix` ``A`` is a matrix ``A^T = A``. + SymmetricMatrix(S::AbstractVector, n::Integer) + +Instantiate a symmetric matrix with information stored in vector `S`. -This is a projection defined via the canonical metric $(A,B) \mapsto \mathrm{tr}(A^TB)$. +A `SymmetricMatrix` ``A`` is a matrix ``A^T = A``. -Internally the `struct` saves a vector $S$ of size $n(n+1)\div2$. The conversion is done the following way: +Internally the `struct` saves a vector ``S`` of size ``n(n+1)\div2``. The conversion is done the following way: ```math [A]_{ij} = \begin{cases} S[( (i-1) i ) \div 2 + j] & \text{if $i\geq{}j$}\\ S[( (j-1) j ) \div 2 + i] & \text{else}. \end{cases} ``` -So ``S`` stores a string of vectors taken from $A$: $S = [\tilde{a}_1, \tilde{a}_2, \ldots, \tilde{a}_n]$ with $\tilde{a}_i = [[A]_{i1},[A]_{i2},\ldots,[A]_{ii}]$. +So ``S`` stores a string of vectors taken from ``A``: ``S = [\tilde{a}_1, \tilde{a}_2, \ldots, \tilde{a}_n]`` with ``\tilde{a}_i = [[A]_{i1},[A]_{i2},\ldots,[A]_{ii}]``. -### Constructor -If the constructor is called with a matrix as input it returns a symmetric matrix via the projection: -```math -A \mapsto \frac{1}{2}(A + A^T). -``` +Also see [`SkewSymMatrix`](@ref), [`LowerTriangular`](@ref) and [`UpperTriangular`](@ref). + +# Examples +```jldoctest +using GeometricMachineLearning +S = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +SymmetricMatrix(S, 4) -It can also be called with two arguments `S::AbstractVector` and `n::Integer` where `length(S) == n * (n + 1) ÷ 2` has to be true. +# output + +4×4 SymmetricMatrix{Int64, Vector{Int64}}: + 1 2 4 7 + 2 3 5 8 + 4 5 6 9 + 7 8 9 10 +``` """ mutable struct SymmetricMatrix{T, AT <: AbstractVector{T}} <: AbstractMatrix{T} S::AT @@ -27,12 +38,44 @@ mutable struct SymmetricMatrix{T, AT <: AbstractVector{T}} <: AbstractMatrix{T} @assert length(S) == n*(n+1)÷2 new{eltype(S),typeof(S)}(S, n) end - function SymmetricMatrix(A::AbstractMatrix{T}) where {T} - S = map_to_S(A) - new{T, typeof(S)}(S, size(A, 1)) - end end +@doc raw""" + SymmetricMatrix(A::AbstractMatrix) + +Perform `0.5 * (A + A')` and store the matrix in an efficient way (as a vector with ``n(n+1)/2`` entries). + +If the constructor is called with a matrix as input it returns a symmetric matrix via the projection: +```math +A \mapsto \frac{1}{2}(A + A^T). +``` + +# Examples +```jldoctest +using GeometricMachineLearning +M = [1 2 3 4; 5 6 7 8; 9 10 11 12; 13 14 15 16] +SymmetricMatrix(M) + +# output + +4×4 SymmetricMatrix{Float64, Vector{Float64}}: + 1.0 3.5 6.0 8.5 + 3.5 6.0 8.5 11.0 + 6.0 8.5 11.0 13.5 + 8.5 11.0 13.5 16.0 +``` + +# Extend help + +Note that the constructor is designed in such a way that it always returns matrices of type `SymmetricMatrix{<:AbstractFloat}` when called with a matrix, even if this matrix is of type `AbstractMatrix{<:Integer}`. + +If the user wishes to allocate a matrix `SymmetricMatrix{<:Integer}` the constructor `SymmetricMatrix(::AbstractVector, n::Integer)` has to be called. +""" +function SymmetricMatrix(A::AbstractMatrix{T}) where {T} + S = map_to_S(A) + SymmetricMatrix(S, size(A, 1)) +end + # I'm not 100% sure this is the best solution (needed for broadcasting operations ...) function Base.setindex!(A::SymmetricMatrix{T}, val::T, i::Int, j::Int) where T if i ≥ j @@ -47,7 +90,7 @@ end S[i * (i-1)÷2 + j] = A_sym[i, j] end -function map_to_S(A::AbstractMatrix{T}) where T +function map_to_S(A::AbstractMatrix{T}) where {T <: Number} n = size(A, 1) @assert size(A, 2) == n A_sym = T(.5)*(A + A') @@ -60,6 +103,11 @@ function map_to_S(A::AbstractMatrix{T}) where T S end +function map_to_S(A::AbstractMatrix{T}) where {T <: Integer} + Float = T == Int64 ? Float64 : Float32 + map_to_S(Float.(A)) +end + function LinearAlgebra.Adjoint(A::SymmetricMatrix) A end From f578e3f362334fc79568259e331831b547f28d97 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Fri, 24 May 2024 11:38:28 +0200 Subject: [PATCH 04/32] Updated docs. --- docs/src/arrays/skew_symmetric_matrix.md | 112 +++++++++++++++++++++-- 1 file changed, 106 insertions(+), 6 deletions(-) diff --git a/docs/src/arrays/skew_symmetric_matrix.md b/docs/src/arrays/skew_symmetric_matrix.md index 3cfe1ccc1..5043ff7b7 100644 --- a/docs/src/arrays/skew_symmetric_matrix.md +++ b/docs/src/arrays/skew_symmetric_matrix.md @@ -1,17 +1,91 @@ -# `SymmetricMatrix` and `SkewSymMatrix` +# Symmetric Skew-Symmetric and Triangular Matrices. -There are special implementations of symmetric and skew-symmetric matrices in `GeometricMachineLearning.jl`. They are implemented to work on GPU and for multiplication with tensors. The following image demonstrates how the data necessary for an instance of `SkewSymMatrix` are stored[^1]: +Among the special arrays implemented in `GeometricMachineLearning` [`SymmetricMatrix`](@ref), [`SkewSymMatrix`](@ref), [`UpperTriangular`](@ref) and [`LowerTriangular`](@ref) are the most common ones and these can also be found in other libraries; `LinearAlgebra.jl` has an implementation of a symmetric matrix called `Symmetric` for example. The versions of these matrices in `GeometricMachineLearning` are however more memory efficient as they only store as many parameters as are necessary, i.e. ``n(n+1)/2`` for the symmetric matrix and ``n(n-1)/2`` for the other three. In addition operations such as matrix and tensor multiplication are implemented for these matrices to work in parallel on GPU. -[^1]: It works similarly for `SymmetricMatrix`. +We now show the various matrices. First [`UpperTriangular`](@ref): + +```math +U = \begin{pmatrix} + 0 & a_{12} & \cdots & a_{1n} \\ + 0 & \ddots & & a_{2n} \\ + \vdots & \ddots & \ddots & \vdots \\ + 0 & \cdots & 0 & 0 +\end{pmatrix}. +``` + +The matrix [`LowerTriangular`](@ref): + +```math +L = \begin{pmatrix} + 0 & 0 & \cdots & 0 \\ + a_{21} & \ddots & & \vdots \\ + \vdots & \ddots & \ddots & \vdots \\ + a_{n1} & \cdots & a_{n(n-1)} & 0 +\end{pmatrix}. +``` + +An instance of [`SkewSymMatrix`](@ref) can be written as ``A = L - L^T`` or ``A = U - U^T``: + +```math +A = \begin{pmatrix} + 0 & - a_{21} & \cdots & - a_{n1} \\ + a_{21} & \ddots & & \vdots \\ + \vdots & \ddots & \ddots & \vdots \\ + a_{n1} & \cdots & a_{n(n-1)} & 0 +\end{pmatrix}. +``` + +And lastly a [`SymmetricMatrix`](@ref): + +```math +L = \begin{pmatrix} + a_{11} & a_{21} & \cdots & a_{n1} \\ + a_{21} & \ddots & & \vdots \\ + \vdots & \ddots & \ddots & \vdots \\ + a_{n1} & \cdots & a_{n(n-1)} & a_{nn} +\end{pmatrix}. +``` + +Note that any matrix ``M\in\mathbb{R}^{n\times{}n}`` can be written + +```math +M = \frac{1}{2}(M - M^T) + \frac{1}{2}(M + M^T), +``` +where the first part of this matrix is skew-symmetric and the second part is symmetric. This is also how the constructors for [`SkewSymMatrix`](@ref) and [`SymmetricMatrix`](@ref) are designed: + +```@example sym_skew_sym_example +using GeometricMachineLearning + +M = rand(3, 3) +``` + +```@example sym_skew_sym_example +A = SkewSymMatrix(M) +``` + +```@example sym_skew_sym_example +B = SymmetricMatrix(M) +``` + +```@example sym_skew_sym_example +@assert M ≈ A + B # hide +M ≈ A + B +``` + +## How are Special Matrices Stored? + +The following image demonstrates how special matrices are stored in `GeometricMachineLearning`: ```@example -Main.include_graphics("../tikz/skew_sym_visualization") # hide +Main.include_graphics("../tikz/skew_sym_visualization"; caption = "The elements of a skew-symmetric matrix (and other special matrices) are stored as a vector. The elements of the big vector are the entries on the lower left of the matrix, stored row-wise.") # hide ``` -So what is stored internally is a vector of size ``n(n-1)/2`` for the skew-symmetric matrix and a vector of size ``n(n+1)/2`` for the symmetric matrix. We can sample a random skew-symmetric matrix: +So what is stored internally is a vector of size ``n(n-1)/2`` for the skew-symmetric matrix and the triangular matrices and a vector of size ``n(n+1)/2`` for the symmetric matrix. We can sample a random skew-symmetric matrix: ```@example skew_sym -using GeometricMachineLearning # hide +using GeometricMachineLearning +import Random +Random.seed!(123) A = rand(SkewSymMatrix, 5) ``` @@ -20,4 +94,30 @@ and then access the vector: ```@example skew_sym A.S +``` + +This is equivalent to sampling a vector and then assigning a matrix: + +```@example skew_sym +using GeometricMachineLearning +import Random +Random.seed!(123) + +S = rand(5 * (5 - 1) ÷ 2) +SkewSymMatrix(S, 5) +``` + +These special matrices are important for [SympNets](@ref "SympNet Architecture"), [volume-preserving transformers](@ref "Volume-Preserving Transformer") and [linear symplectic transformers](@ref "Linear Symplectic Transformer"). + +# Library Functions + +```@docs; canonical = false +UpperTriangular +UpperTriangular(::AbstractMatrix) +LowerTriangular +LowerTriangular(::AbstractMatrix) +SymmetricMatrix +SymmetricMatrix(::AbstractMatrix) +SkewSymMatrix +SkewSymMatrix(::AbstractMatrix) ``` \ No newline at end of file From 05364e38b22fa605901b93834223233f0a61c49d Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Fri, 24 May 2024 16:36:24 +0200 Subject: [PATCH 05/32] Fixed number of spaces. --- src/arrays/skew_symmetric.jl | 2 +- src/arrays/symmetric.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arrays/skew_symmetric.jl b/src/arrays/skew_symmetric.jl index 47be4c63a..1e2b0ac61 100644 --- a/src/arrays/skew_symmetric.jl +++ b/src/arrays/skew_symmetric.jl @@ -40,7 +40,7 @@ mutable struct SkewSymMatrix{T, AT <: AbstractVector{T}} <: AbstractMatrix{T} end @doc raw""" - SkewSymMatrix(A::AbstractMatrix) + SkewSymMatrix(A::AbstractMatrix) Perform `0.5 * (A - A')` and store the matrix in an efficient way (as a vector with ``n(n-1)/2`` entries). diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl index 627e5bbd3..361647ca1 100644 --- a/src/arrays/symmetric.jl +++ b/src/arrays/symmetric.jl @@ -41,7 +41,7 @@ mutable struct SymmetricMatrix{T, AT <: AbstractVector{T}} <: AbstractMatrix{T} end @doc raw""" - SymmetricMatrix(A::AbstractMatrix) + SymmetricMatrix(A::AbstractMatrix) Perform `0.5 * (A + A')` and store the matrix in an efficient way (as a vector with ``n(n+1)/2`` entries). From 0d2607ab2ea54b0061dd1d05f50542990db502ca Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Fri, 24 May 2024 17:07:44 +0200 Subject: [PATCH 06/32] Removed extra file stiefel_lie_alg_horizontal as all of this is now discussed under global tangent spaces. --- docs/src/arrays/stiefel_lie_alg_horizontal.md | 61 ------------------- 1 file changed, 61 deletions(-) delete mode 100644 docs/src/arrays/stiefel_lie_alg_horizontal.md diff --git a/docs/src/arrays/stiefel_lie_alg_horizontal.md b/docs/src/arrays/stiefel_lie_alg_horizontal.md deleted file mode 100644 index 1e22627d6..000000000 --- a/docs/src/arrays/stiefel_lie_alg_horizontal.md +++ /dev/null @@ -1,61 +0,0 @@ -# Horizontal component of the Lie algebra $\mathfrak{g}$ - -What we use to optimize Adam (and other algorithms) to manifolds is a **global tangent space representation** of the homogeneous spaces. - -For the [Stiefel manifold](@ref "The Stiefel Manifold"), this global tangent space representation takes a simple form: - -```math -\mathcal{B} = \begin{bmatrix} - A & -B^T \\ - B & \mathbb{O} -\end{bmatrix}, -``` - -where ``A\in\mathbb{R}^{n\times{}n}`` is skew-symmetric and ``B\in\mathbb{R}^{N\times{}n}`` is arbitary. In `GeometricMachineLearning` the struct `StiefelLieAlgHorMatrix` implements elements of this form. - -## Theoretical background - -### Vertical and horizontal components - -The Stiefel manifold ``St(n, N)`` is a [homogeneous space](@ref "Homogeneous Spaces") obtained from ``SO(N)`` by setting two matrices, whose first $n$ columns conincide, equivalent. -Another way of expressing this is: -```math -A_1 \sim A_2 \iff A_1E = A_2E -``` -for -```math -E = \begin{bmatrix} \mathbb{I} \\ \mathbb{O}\end{bmatrix}. -``` - -Because ``St(n,N)`` is a homogeneous space, we can take any element ``Y\in{}St(n,N)`` and ``SO(N)`` acts transitively on it, i.e. can produce any other element in ``SO(N)``. A similar statement is also true regarding the tangent spaces of ``St(n,N)``, namely: - -```math -T_YSt(n,N) = \mathfrak{g}\cdot{}Y, -``` - -i.e. every tangent space can be expressed through an action of the associated Lie algebra. - -The kernel of the mapping $\mathfrak{g}\to{}T_YSt(n,N), B\mapsto{}BY$ is referred to as $\mathfrak{g}^{\mathrm{ver},Y}$, the **vertical component** of the Lie algebra at $Y$. In the case ``Y=E`` it is easy to see that elements belonging to $\mathfrak{g}^{\mathrm{ver},E}$ are of the following form: -```math -\begin{bmatrix} -\hat{\mathbb{O}} & \tilde{\mathbb{O}}^T \\ -\tilde{\mathbb{O}} & C -\end{bmatrix}, -``` -where $\hat{\mathbb{O}}\in\mathbb{R}^{n\times{}n}$ is a "small" matrix and $\tilde{\mathbb{O}}\in\mathbb{R}^{N\times{}n}$ is a bigger one. $C\in\mathbb{R}^{N\times{}N}$ is a skew-symmetric matrix. - -The *orthogonal complement* of the vertical component is referred to as the **horizontal component** and denoted by ``\mathfrak{g}^{\mathrm{hor}, Y}``. It is isomorphic to ``T_YSt(n,N)`` and this isomorphism can be found explicitly. In the case of the Stiefel manifold: - -```math -\Omega(Y, \cdot):T_YSt(n,N)\to\mathfrak{g}^{\mathrm{hor},Y},\, \Delta \mapsto (\mathbb{I} - \frac{1}{2}YY^T)\Delta{}Y^T - Y\Delta^T(\mathbb{I} - \frac{1}{2}YY^T) -``` - -The elements of ``\mathfrak{g}^{\mathrm{hor},E}=:\mathfrak{g}^\mathrm{hor}``, i.e. for the special case ``Y=E``. Its elements are of the form described on top of this page. - -## Special functions - -You can also draw random elements from $\mathfrak{g}^\mathrm{hor}$ through e.g. -```julia -rand(CUDADevice(), StiefelLieAlgHorMatrix{Float32}, 10, 5) -``` -In this example: $N=10$ and $n=5$. \ No newline at end of file From 03963579f31c416980c05b5072397373f8b644c7 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 27 May 2024 11:14:56 +0200 Subject: [PATCH 07/32] Moved grassmann LieAlgHorMatrix docs into global_tangent_spaces.md. --- docs/src/arrays/global_tangent_spaces.md | 165 ++++++++++++++++++ .../arrays/grassmann_lie_alg_hor_matrix.md | 34 ---- 2 files changed, 165 insertions(+), 34 deletions(-) create mode 100644 docs/src/arrays/global_tangent_spaces.md delete mode 100644 docs/src/arrays/grassmann_lie_alg_hor_matrix.md diff --git a/docs/src/arrays/global_tangent_spaces.md b/docs/src/arrays/global_tangent_spaces.md new file mode 100644 index 000000000..0672437af --- /dev/null +++ b/docs/src/arrays/global_tangent_spaces.md @@ -0,0 +1,165 @@ +# Global Tangent Spaces + +In `GeometricMachineLearning` standard neural network optimizers are generalized to [homogeneous spaces](@ref "Homogeneous Spaces") by leveraging the special structure of the tangent spaces of this class of manifolds. When we introduced homogeneous spaces we already talked about that every tangent space to a homogeneous space ``T_Y\mathcal{M}`` is of the form: + +```math + T_Y\mathcal{M} = \mathfrak{g} \cdot Y := \{AY: A\in{}\mathfrak{g}\}. +``` + +We then have a decomposition of ``\mathfrak{g}`` into a vertical part ``\mathfrak{g}^{\mathrm{ver}, Y}`` and a horizontal part ``\mathfrak{g}^{\mathrm{hor}, Y}`` and the horizontal part is isomorphic to ``T_Y\mathcal{M}``. + +We now identify a special element ``E \in \mathcal{M}`` and designate the horizontal component ``\mathfrak{g}^{\mathrm{hor}, E}`` as our *global tangent space*. We will refer to this global tangent space by ``\mathfrak{g}^\mathrm{hor}``. We can now find a transformation from any ``\mathfrak{g}^{\mathrm{hor}, Y}`` to ``\mathfrak{g}^\mathrm{hor}`` and vice-versa (these spaces are isomorphic). + +```@eval +Main.theorem(raw"Let ``A\in{}G`` an element such that ``AE = Y``. Then we have +" * Main.indentation * raw"```math +" * Main.indentation * raw"A^{-1}\cdot\mathfrak{g}^{\mathrm{hor},Y}\cdot{}A = \mathfrak{g}^\mathrm{hor}, +" * Main.indentation * raw"``` +" * Main.indentation * raw"i.e. for every element ``B\in\mathfrak{g}^\mathrm{hor}`` we can find a ``B^Y \in \mathfrak{g}^{\mathrm{hor},Y}`` s.t. ``B = A^{-1}B^YA`` (and vice-versa).") +``` + +```@eval +Main.proof(raw"We first show that for every ``B^Y\in\mathfrak{g}^{\mathrm{hor},Y}`` the element ``A^{-1}B^YA`` is in ``\mathfrak{g}^{\mathrm{hor}}``. First not that ``A^{-1}B^YA\in\mathfrak{g}`` by a fundamental theorem of Lie group theory (closedness of the Lie algebra under adjoint action). Now assume that ``A^{-1}B^YA`` is not fully contained in ``\mathfrak{g}^\mathrm{hor}``, i.e. it also has a vertical component. So we would lose information when performing ``A^{-1}B^YA \mapsto A^{-1}B^YAE = A^{-1}B^YY``, but this contradicts the fact that ``B^Y\in\mathfrak{g}^{\mathrm{hor},Y}.`` We now have to proof that for every ``B\in\mathfrak{g}^\mathrm{hor}`` we can find an element in ``\mathfrak{g}^{\mathrm{hor}, Y}`` such that this element is mapped to ``B``. By a argument similar to the one above we can show that ``ABA^{-1}\in\mathfrak{g}^\mathrm{hor, Y}`` and this element maps to ``B``. Proofing that the map is injective is now trivial.") +``` + +We should note that we have written all Lie group and Lie algebra actions as simple matrix multiplications, like ``AE = Y``. For some Lie groups and Lie algebras we should use different notations [holm2009geometric](@cite). But this is not relevant for what we use in `GeometricMachineLearning`. + +## Global Sections + +Note that the theorem above requires us to find an element ``A\in{}G`` such that ``AE = Y``. If we can find a mapping ``\lambda:\mathcal{M}\to{}G`` we call such a mapping a *global section*. Note that in general global sections are not unique because the rank of ``G`` is in general greater than that of ``\mathcal{M}``. We give an example of how to construct such a global section for the Stiefel and the Grassmann manifold below. + +## The Global Tangent Space for the Stiefel Manifold + +We now discuss the specific form of the global tangent space for the [Stiefel manifold](@ref "The Stiefel Manifold"). We choose the distinct element[^1] ``E`` to have an especially simple form: + +[^1]: We already introduced this special matrix together with the Stiefel manifold. + +```math +E = \begin{bmatrix} +\mathbb{I}_n \\ +\mathbb{O} +\end{bmatrix}\in{}St(n, N). +``` + +Based on this elements of the vector space ``\mathfrak{g}^{\mathrm{hor}, E} =: \mathfrak{g}^{\mathrm{hor}}`` are: + +```math +\begin{pmatrix} +A & B^T \\ B & \mathbb{O} +\end{pmatrix}, +``` + +where ``A`` is a skew-symmetric matrix of size ``n\times{}n`` and ``B`` is an arbitrary matrix of size ``(N - n)\times{}n``. + +Arrays of type ``\mathfrak{g}^{\mathrm{hor}, E}`` are implemented in `GeometricMachineLearning` under the name `StiefelLieAlgHorMatrix`. + +We can call this with e.g. a skew-symmetric matrix ``A`` and an arbitrary matrix ``B``: + +```@example call_stiefel_lie_alg_hor_matrix_1 +using GeometricMachineLearning # hide + +N, n = 10, 4 + +A = rand(SkewSymMatrix, n) +``` + +```@example call_stiefel_lie_alg_hor_matrix_1 +B = rand(N - n, n) +``` + +```@example call_stiefel_lie_alg_hor_matrix_1 +B1 = StiefelLieAlgHorMatrix(A, B, N, n) +``` + +We can also call it with a matrix of shape ``N\times{}N``: + +```@example call_stiefel_lie_alg_hor_matrix_1 +B2 = Matrix(B1) # note that this does not have any special structure + +StiefelLieAlgHorMatrix(B2, n) +``` + +Or we can call it a matrix of shape ``N\times{}n``: + +```@example call_stiefel_lie_alg_hor_matrix_1 +E = StiefelProjection(N, n) +``` + +```@example call_stiefel_lie_alg_hor_matrix_1 +B3 = B1 * E + +StiefelLieAlgHorMatrix(B3, n) +``` + +We now demonstrate how to map from an element of ``\mathfrak{g}^{\mathrm{hor}, Y}`` to an element of ``\mathfrak{g}^\mathrm{hor}``: + +```@example global_section +using GeometricMachineLearning # hide + +N, n = 10, 5 + +Y = rand(StiefelManifold, N, n) +Δ = rgrad(Y, rand(N, n)) +ΩΔ = GeometricMachineLearning.Ω(Y, Δ) +λY = GlobalSection(Y) + +λY_mat = Matrix(λY) + +round.(λY_mat' * ΩΔ * λY_mat; digits = 3) +``` + +For computational reasons the user is strongly discouraged to call `Matrix` on an instance of `GlobalSection`. The better option is: + +```@example global_section +using GeometricMachineLearning: _round # hide + +_round(global_rep(λY, Δ); digits = 3) +``` + +We now discuss the global tangent space for the Grassmann manifold. This is similar to the Stiefel case. + +## Global Tangent Space for the Grassmann Manifold + +In the case of the Grassmann manifold we construct the global tangent space with respect to the distinct element ``\mathcal{E}=\mathrm{span}(E)\in{}Gr(n,N)``, where ``E`` is again the same matrix. + +The tangent tangent space ``T_\mathcal{E}Gr(n,N)`` can be represented through matrices: + +```math +\begin{pmatrix} + 0 & \cdots & 0 \\ + \cdots & \cdots & \cdots \\ + 0 & \cdots & 0 \\ + b_{11} & \cdots & b_{1n} \\ + \cdots & \cdots & \cdots \\ + b_{(N-n)1} & \cdots & b_{(N-n)n} +\end{pmatrix}. +``` + +This representation is based on the identification ``T_\mathcal{E}Gr(n,N)\to{}T_E\mathcal{S}_E`` that was discussed in the section on the [Grassmann manifold](@ref "The Grassmann Manifold")[^2]. We use the following notation: + +[^2]: We derived the following expression for the [Riemannian gradient of the Grassmann manifold](@ref "The Riemannian Gradient of the Grassmann Manifold"): ``\mathrm{grad}_\mathcal{Y}^{Gr}L = \nabla_Y{}L - YY^T\nabla_YL``. The tangent space to the element ``\mathcal{E}`` can thus be written as ``\bar{B} - EE^T\bar{B}`` where ``B\in\mathbb{R}^{N\times{}n}`` and the matrices in this tangent space have the desired form. + +```math +\mathfrak{g}^\mathrm{hor} = \mathfrak{g}^{\mathrm{hor},\mathcal{E}} = \left\{\begin{pmatrix} 0 & -B^T \\ B & 0 \end{pmatrix}: \text{$B$ arbitrary}\right\}. +``` + +This is equivalent to the horizontal component of ``\mathfrak{g}`` for the Stiefel manifold for the case when ``A`` is zero. This is a reflection of the rotational invariance of the Grassmann manifold: the skew-symmetric matrices ``A`` are connected to the group of rotations ``O(n)`` which is factored out in the Grassmann manifold ``Gr(n,N)\simeq{}St(n,N)/O(n)``. + +## Library Functions + +```@docs; canonical=false +StiefelLieAlgHorMatrix +GrassmannLieAlgHorMatrix +global_rep +``` + + +## References + +```@bibliography +Pages = [] +Canonical = false + +brantner2023generalizing +bendokat2020grassmann +``` \ No newline at end of file diff --git a/docs/src/arrays/grassmann_lie_alg_hor_matrix.md b/docs/src/arrays/grassmann_lie_alg_hor_matrix.md deleted file mode 100644 index 3be1c92a1..000000000 --- a/docs/src/arrays/grassmann_lie_alg_hor_matrix.md +++ /dev/null @@ -1,34 +0,0 @@ -# The horizontal component of the Lie algebra ``\mathfrak{g}`` for the Grassmann manifold - -## Tangent space to the element ``\mathcal{E}`` - -Consider the tangent space to the distinct element ``\mathcal{E}=\mathrm{span}(E)\in{}Gr(n,N)``, where ``E`` is again: - -```math -E = \begin{bmatrix} -\mathbb{I}_n \\ -\mathbb{O} -\end{bmatrix}. -``` - - -The tangent tangent space ``T_\mathcal{E}Gr(n,N)`` can be represented through matrices: - -```math -\begin{pmatrix} - 0 & \cdots & 0 \\ - \cdots & \cdots & \cdots \\ - 0 & \cdots & 0 \\ - a_{11} & \cdots & a_{1n} \\ - \cdots & \cdots & \cdots \\ - a_{(N-n)1} & \cdots & a_{(N-n)n} -\end{pmatrix}, -``` - -where we have used the identification ``T_\mathcal{E}Gr(n,N)\to{}T_E\mathcal{S}_E`` that was discussed in the [section on the Grassmann manifold](@ref "The Grassmann Manifold"). The Grassmann manifold can also be seen as the Stiefel manifold modulo an equivalence class. This leads to the following (which is used for optimization): - -```math -\mathfrak{g}^\mathrm{hor} = \mathfrak{g}^{\mathrm{hor},\mathcal{E}} = \left\{\begin{pmatrix} 0 & -B^T \\ B & 0 \end{pmatrix}: \text{$B$ arbitrary}\right\}. -``` - -This is equivalent to the horizontal component of ``\mathfrak{g}`` for the [Stiefel manifold](stiefel_lie_alg_horizontal.md) for the case when ``A`` is zero. This is a reflection of the rotational invariance of the Grassmann manifold: the skew-symmetric matrices ``A`` are connected to the group of rotations ``O(n)`` which is factored out in the Grassmann manifold ``Gr(n,N)\simeq{}St(n,N)/O(n)``. \ No newline at end of file From 07bc51e424aaed476af80ea85b75cdb4c53d4ab1 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 27 May 2024 16:18:05 +0200 Subject: [PATCH 08/32] Put horizontal components into one md file. --- docs/make.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index 16098433c..f2f622e12 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -23,6 +23,8 @@ const html_format = Documenter.HTML(; ], # specifies that we do not display the package name again (it's already in the logo) sidebar_sitename = false, + # we should get rid of this line again eventually. We will be able to do this once we got rid of library.md + size_threshold = 262144, ) # if platform is set to "none" then no output pdf is generated @@ -144,10 +146,9 @@ makedocs(; "Riemannian Manifolds" => "manifolds/riemannian_manifolds.md", "Homogeneous Spaces" => "manifolds/homogeneous_spaces.md", ], - "Arrays" => [ + "Special Arrays" => [ "Symmetric and Skew-Symmetric Matrices" => "arrays/skew_symmetric_matrix.md", - "Stiefel Global Tangent Space" => "arrays/stiefel_lie_alg_horizontal.md", - "Grassmann Global Tangent Space"=> "arrays/grassmann_lie_alg_hor_matrix.md", + "Global Tangent Spaces" => "arrays/global_tangent_spaces.md", ], "Optimizer Framework" => [ "Optimizers" => "Optimizer.md", From a41e7d5765ea1d12d1897eb4dd467561484bbe4d Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 27 May 2024 16:18:28 +0200 Subject: [PATCH 09/32] Added Holm reference. --- docs/src/GeometricMachineLearning.bib | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/src/GeometricMachineLearning.bib b/docs/src/GeometricMachineLearning.bib index 4ebde5c1d..b50dac1e3 100644 --- a/docs/src/GeometricMachineLearning.bib +++ b/docs/src/GeometricMachineLearning.bib @@ -350,6 +350,15 @@ @inproceedings{feng1987symplectic organization={Springer} } +@book{holm2009geometric, + title={Geometric mechanics and symmetry: from finite to infinite dimensions}, + author={Holm, Darryl D and Schmah, Tanya and Stoica, Cristina}, + volume={12}, + year={2009}, + publisher={Oxford University Press}, + address={Oxford, UK} +} + @article{ge1988approximation, title={On the approximation of linear Hamiltonian systems}, author={Ge, Zhong and Feng, Kang}, From bf4069fb966dc8773a9d0363c344a369cc88e44a Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 27 May 2024 16:18:49 +0200 Subject: [PATCH 10/32] Fixed references. --- docs/src/Optimizer.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/Optimizer.md b/docs/src/Optimizer.md index 5b1b660a9..94473b24d 100644 --- a/docs/src/Optimizer.md +++ b/docs/src/Optimizer.md @@ -1,6 +1,6 @@ # Optimizer -In order to generalize neural network optimizers to [homogeneous spaces](manifolds/homogeneous_spaces.md), a class of manifolds we often encounter in machine learning, we have to find a [global tangent space representation](arrays/stiefel_lie_alg_horizontal.md) which we call $\mathfrak{g}^\mathrm{hor}$ here. +In order to generalize neural network optimizers to [homogeneous spaces](@ref "Homogeneous Spaces"), a class of manifolds we often encounter in machine learning, we have to find a [global tangent space representation](@ref "Global Tangent Spaces") which we call $\mathfrak{g}^\mathrm{hor}$ here. Starting from an element of the tangent space $T_Y\mathcal{M}$[^1], we need to perform two mappings to arrive at $\mathfrak{g}^\mathrm{hor}$, which we refer to by $\Omega$ and a red horizontal arrow: From 233db5bde45236e54437aea6b29fca99830a8335 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 27 May 2024 16:19:57 +0200 Subject: [PATCH 11/32] Added docs on the Grassmann manifold. --- docs/src/arrays/global_tangent_spaces.md | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/docs/src/arrays/global_tangent_spaces.md b/docs/src/arrays/global_tangent_spaces.md index 0672437af..ad1438ddf 100644 --- a/docs/src/arrays/global_tangent_spaces.md +++ b/docs/src/arrays/global_tangent_spaces.md @@ -22,11 +22,21 @@ Main.theorem(raw"Let ``A\in{}G`` an element such that ``AE = Y``. Then we have Main.proof(raw"We first show that for every ``B^Y\in\mathfrak{g}^{\mathrm{hor},Y}`` the element ``A^{-1}B^YA`` is in ``\mathfrak{g}^{\mathrm{hor}}``. First not that ``A^{-1}B^YA\in\mathfrak{g}`` by a fundamental theorem of Lie group theory (closedness of the Lie algebra under adjoint action). Now assume that ``A^{-1}B^YA`` is not fully contained in ``\mathfrak{g}^\mathrm{hor}``, i.e. it also has a vertical component. So we would lose information when performing ``A^{-1}B^YA \mapsto A^{-1}B^YAE = A^{-1}B^YY``, but this contradicts the fact that ``B^Y\in\mathfrak{g}^{\mathrm{hor},Y}.`` We now have to proof that for every ``B\in\mathfrak{g}^\mathrm{hor}`` we can find an element in ``\mathfrak{g}^{\mathrm{hor}, Y}`` such that this element is mapped to ``B``. By a argument similar to the one above we can show that ``ABA^{-1}\in\mathfrak{g}^\mathrm{hor, Y}`` and this element maps to ``B``. Proofing that the map is injective is now trivial.") ``` -We should note that we have written all Lie group and Lie algebra actions as simple matrix multiplications, like ``AE = Y``. For some Lie groups and Lie algebras we should use different notations [holm2009geometric](@cite). But this is not relevant for what we use in `GeometricMachineLearning`. +We should note that we have written all Lie group and Lie algebra actions as simple matrix multiplications, like ``AE = Y``. For some Lie groups and Lie algebras we should use different notations [holm2009geometric](@cite). These Lie groups are however not relevant for what we use in `GeometricMachineLearning` and we will stick to regular matrix notation. ## Global Sections -Note that the theorem above requires us to find an element ``A\in{}G`` such that ``AE = Y``. If we can find a mapping ``\lambda:\mathcal{M}\to{}G`` we call such a mapping a *global section*. Note that in general global sections are not unique because the rank of ``G`` is in general greater than that of ``\mathcal{M}``. We give an example of how to construct such a global section for the Stiefel and the Grassmann manifold below. +Note that the theorem above requires us to find an element ``A\in{}G`` such that ``AE = Y``. If we can find a mapping ``\lambda:\mathcal{M}\to{}G`` we call such a mapping a *global section*. + +```@eval +Main.theorem(raw"We call a mapping from ``\lambda:\mathcal{M} \to G`` a homogeneous space to its associated Lie group a **global section** if it satisfies: +" * Main.indentation * raw"```math +" * Main.indentation * raw"\lambda(Y)E = Y, +" * Main.indentation * raw"``` +" * Main.indentation * raw"where ``E`` is the distinct element of the homogeneous space.") +``` + +Note that in general global sections are not unique because the rank of ``G`` is in general greater than that of ``\mathcal{M}``. We give an example of how to construct such a global section for the Stiefel and the Grassmann manifold below. ## The Global Tangent Space for the Stiefel Manifold @@ -143,13 +153,15 @@ This representation is based on the identification ``T_\mathcal{E}Gr(n,N)\to{}T_ \mathfrak{g}^\mathrm{hor} = \mathfrak{g}^{\mathrm{hor},\mathcal{E}} = \left\{\begin{pmatrix} 0 & -B^T \\ B & 0 \end{pmatrix}: \text{$B$ arbitrary}\right\}. ``` -This is equivalent to the horizontal component of ``\mathfrak{g}`` for the Stiefel manifold for the case when ``A`` is zero. This is a reflection of the rotational invariance of the Grassmann manifold: the skew-symmetric matrices ``A`` are connected to the group of rotations ``O(n)`` which is factored out in the Grassmann manifold ``Gr(n,N)\simeq{}St(n,N)/O(n)``. +This is equivalent to the horizontal component of ``\mathfrak{g}`` for the Stiefel manifold for the case when ``A`` is zero. This is a reflection of the rotational invariance of the Grassmann manifold: the skew-symmetric matrices ``A`` are connected to the group of rotations ``O(n)`` which is factored out in the Grassmann manifold ``Gr(n,N)\simeq{}St(n,N)/O(n)``. In `GeometricMachineLearning` we thus treat the Grassmann manifold as being embedded in the Stiefel manifold. In [bendokat2020grassmann](@cite) viewing the Grassmann manifold as a quotient space of the Stiefel manifold is important for "feasibility" in "practical computations". ## Library Functions ```@docs; canonical=false StiefelLieAlgHorMatrix GrassmannLieAlgHorMatrix +GlobalSection +GeometricMachineLearning.global_section global_rep ``` @@ -160,6 +172,8 @@ global_rep Pages = [] Canonical = false -brantner2023generalizing +absil2004riemannian +absil2008optimization bendokat2020grassmann +brantner2023generalizing ``` \ No newline at end of file From 105b6516d53093990160d934a6114e4f068128a0 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 27 May 2024 16:20:36 +0200 Subject: [PATCH 12/32] Added new test for the horizontal components. --- ...test_grassmann_lie_alg_hor_constructors.jl | 47 ++++++++++++++++++ .../test_stiefel_lie_alg_hor_constructors.jl | 48 +++++++++++++++++++ test/runtests.jl | 2 + 3 files changed, 97 insertions(+) create mode 100644 test/arrays/test_grassmann_lie_alg_hor_constructors.jl create mode 100644 test/arrays/test_stiefel_lie_alg_hor_constructors.jl diff --git a/test/arrays/test_grassmann_lie_alg_hor_constructors.jl b/test/arrays/test_grassmann_lie_alg_hor_constructors.jl new file mode 100644 index 000000000..12f1b9bc4 --- /dev/null +++ b/test/arrays/test_grassmann_lie_alg_hor_constructors.jl @@ -0,0 +1,47 @@ +using GeometricMachineLearning +using Test +import Random + +Random.seed!(123) + +function test_constructors(N::Integer, n::Integer; T::DataType=Float32) + B = rand(T, N - n, n) + B1 = GrassmannLieAlgHorMatrix(B, N, n) + + B2 = Matrix(B1) # note that this does not have any special structure + + B2 = GrassmannLieAlgHorMatrix(B2, n) + + E = StiefelProjection(B1) + + B3 = B1 * E + + B3 = GrassmannLieAlgHorMatrix(B3, n) + + @test B1 ≈ B2 ≈ B3 +end + +function test_lift(N::Integer, n::Integer; T::DataType=Float32) + Y = rand(GrassmannManifold{T}, N, n) + Δ = rgrad(Y, rand(T, N, n)) + ΩΔ = GeometricMachineLearning.Ω(Y, Δ) + λY = GlobalSection(Y) + + λY_mat = Matrix(λY) + + Δ_lift1 = λY_mat' * ΩΔ * λY_mat + + Δ_lift2 = global_rep(λY, Δ) + + @test Δ_lift1 ≈ Δ_lift2 + @test ΩΔ * Y.A ≈ Δ +end + +for T in (Float32, Float64) + for N in (10, 20) + for n in (3, 5) + test_constructors(N, n; T = T) + test_lift(N, n; T = T) + end + end +end \ No newline at end of file diff --git a/test/arrays/test_stiefel_lie_alg_hor_constructors.jl b/test/arrays/test_stiefel_lie_alg_hor_constructors.jl new file mode 100644 index 000000000..7fc9d4ed2 --- /dev/null +++ b/test/arrays/test_stiefel_lie_alg_hor_constructors.jl @@ -0,0 +1,48 @@ +using GeometricMachineLearning +using Test +import Random + +Random.seed!(1234) + +function test_constructors(N::Integer, n::Integer; T::DataType=Float32) + A = rand(SkewSymMatrix{T}, n) + B = rand(T, N - n, n) + B1 = StiefelLieAlgHorMatrix(A, B, N, n) + + B2 = Matrix(B1) # note that this does not have any special structure + + B2 = StiefelLieAlgHorMatrix(B2, n) + + E = StiefelProjection(B1) + + B3 = B1 * E + + B3 = StiefelLieAlgHorMatrix(B3, n) + + @test B1 ≈ B2 ≈ B3 +end + +function test_lift(N::Integer, n::Integer; T::DataType=Float32) + Y = rand(StiefelManifold{T}, N, n) + Δ = rgrad(Y, rand(T, N, n)) + ΩΔ = GeometricMachineLearning.Ω(Y, Δ) + λY = GlobalSection(Y) + + λY_mat = Matrix(λY) + + Δ_lift1 = λY_mat' * ΩΔ * λY_mat + + Δ_lift2 = global_rep(λY, Δ) + + @test Δ_lift1 ≈ Δ_lift2 + @test ΩΔ * Y.A ≈ Δ +end + +for T in (Float32, Float64) + for N in (10, 20) + for n in (3, 5) + test_constructors(N, n; T=T) + test_lift(N, n; T=T) + end + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 424285101..901cfb565 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -16,6 +16,8 @@ using SafeTestsets @safetestset "Matrix multiplication tests for custom arrays " begin include("arrays/matrix_multiplication_for_custom_arrays.jl") end @safetestset "Test constructors for custom arrays " begin include("arrays/constructor_tests_for_custom_arrays.jl") end @safetestset "Symplectic Potential (array tests) " begin include("arrays/symplectic_potential.jl") end +@safetestset "Test StiefelLieAlgHorMatrix constructors and lifts " begin include("arrays/test_stiefel_lie_alg_hor_constructors.jl") end +@safetestset "Test GrassmannLieAlgHorMatrix constructors and lifts " begin include("arrays/test_grassmann_lie_alg_hor_constructors.jl") end @safetestset "Test triangular matrices " begin include("arrays/triangular.jl") end @safetestset "Manifolds (Stiefel): " begin include("manifolds/stiefel_manifold.jl") end From 192925313dc8c0e51ba20a67150b42b9d9b2bea7 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 27 May 2024 16:21:03 +0200 Subject: [PATCH 13/32] Added a note on parallel computations. --- docs/src/arrays/skew_symmetric_matrix.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/src/arrays/skew_symmetric_matrix.md b/docs/src/arrays/skew_symmetric_matrix.md index 5043ff7b7..daf992e49 100644 --- a/docs/src/arrays/skew_symmetric_matrix.md +++ b/docs/src/arrays/skew_symmetric_matrix.md @@ -109,7 +109,11 @@ SkewSymMatrix(S, 5) These special matrices are important for [SympNets](@ref "SympNet Architecture"), [volume-preserving transformers](@ref "Volume-Preserving Transformer") and [linear symplectic transformers](@ref "Linear Symplectic Transformer"). -# Library Functions +## Parallel Computation + +The functions [`GeometricMachineLearning.mat_tensor_mul`](@ref) and [`GeometricMachineLearning.tensor_mat_mul`](@ref) are also implemented for these matrices for efficient parallel computations. This is elaborated on when we introduce [pullbacks](@ref "Pullbacks and Automatic Differentiation"). + +## Library Functions ```@docs; canonical = false UpperTriangular From ee0945d267048118eb55af78ecd2419dd356fcd8 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 27 May 2024 16:21:44 +0200 Subject: [PATCH 14/32] Fixed notational issue. --- docs/src/manifolds/homogeneous_spaces.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/src/manifolds/homogeneous_spaces.md b/docs/src/manifolds/homogeneous_spaces.md index 32c24174c..7f791defe 100644 --- a/docs/src/manifolds/homogeneous_spaces.md +++ b/docs/src/manifolds/homogeneous_spaces.md @@ -77,7 +77,7 @@ using GeometricMachineLearning Y = rand(StiefelManifold{Float32}, 5, 3) Δ = rgrad(Y, rand(Float32, 5, 3)) -GeometricMachineLearning.Ω(Y, Δ) * Y ≈ Δ +GeometricMachineLearning.Ω(Y, Δ) * Y.A ≈ Δ ``` The function `rgrad` is introduced below. @@ -151,7 +151,7 @@ Obtaining the Riemannian Gradient for the Grassmann manifold is slightly more di ```@eval Main.theorem(raw"The Riemannian gradient of a function ``L`` defined on the Grassmann manifold can be written as " * Main.indentation * raw"```math -" * Main.indentation * raw"\mathrm{grad}_\mathcal{Y}^{Gr}L = \nabla_Y{}L - YY^T\nabla_YL, +" * Main.indentation * raw"\mathrm{grad}_\mathcal{Y}^{Gr}L \simeq \nabla_Y{}L - YY^T\nabla_YL, " * Main.indentation * raw"``` " * Main.indentation * raw"where ``\nabla_Y{}L`` again is again the Euclidean gradient.") ``` @@ -194,8 +194,11 @@ Main.proof(raw"In a first step we identify charts on the Grassmann manifold to m StiefelManifold rand(manifold_type::Type{MT}, ::Integer, ::Integer) where MT <: Manifold GeometricMachineLearning.rgrad(::StiefelManifold, ::AbstractMatrix) +GeometricMachineLearning.rgrad(::GrassmannManifold, ::AbstractMatrix) GeometricMachineLearning.metric(::StiefelManifold, ::AbstractMatrix, ::AbstractMatrix) +GeometricMachineLearning.metric(::GrassmannManifold, ::AbstractMatrix, ::AbstractMatrix) GeometricMachineLearning.Ω(::StiefelManifold{T}, ::AbstractMatrix{T}) where T +GeometricMachineLearning.Ω(::GrassmannManifold{T}, ::AbstractMatrix{T}) where T ``` ## References From 89e9ae60d66b2a0d7f4851a23e56718a5ae6d93d Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 27 May 2024 16:22:35 +0200 Subject: [PATCH 15/32] Fixed issues with references. --- docs/src/optimizers/adam_optimizer.md | 8 +++----- docs/src/optimizers/manifold_related/cayley.md | 2 +- docs/src/optimizers/manifold_related/global_sections.md | 4 ++-- docs/src/optimizers/manifold_related/horizontal_lift.md | 2 +- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/docs/src/optimizers/adam_optimizer.md b/docs/src/optimizers/adam_optimizer.md index e09c0b996..0fc3b650a 100644 --- a/docs/src/optimizers/adam_optimizer.md +++ b/docs/src/optimizers/adam_optimizer.md @@ -1,6 +1,6 @@ # The Adam Optimizer -The Adam Optimizer is one of the most widely (if not the most widely used) neural network optimizer. Like most modern neural network optimizers it contains a `cache` that is updated based on first-order gradient information and then, in a second step, the `cache` is used to compute a velocity estimate for updating the neural networ weights. +The Adam Optimizer is one of the most widely (if not the most widely used) neural network optimizer. Like most modern neural network optimizers it contains a `cache` that is updated based on first-order gradient information and then, in a second step, the `cache` is used to compute a velocity estimate for updating the neural network weights. Here we first describe the Adam algorithm for the case where all the weights are on a vector space and then show how to generalize this to the case where the weights are on a manifold. @@ -12,7 +12,7 @@ If all the weights are on a vector space, then we directly compute updates for $ 1. $B_1 \gets ((\rho_1 - \rho_1^t)/(1 - \rho_1^t))\cdot{}B_1 + (1 - \rho_1)/(1 - \rho_1^t)\cdot{}\nabla{}L,$ 2. $B_2 \gets ((\rho_2 - \rho_1^t)/(1 - \rho_2^t))\cdot{}B_2 + (1 - \rho_2)/(1 - \rho_2^t)\cdot\nabla{}L\odot\nabla{}L,$ - where $\odot:\mathbb{R}^n\times\mathbb{R}^n\to\mathbb{R}^n$ is the **Hadamard product**: $[a\odot{}b]_i = a_ib_i$. $\rho_1$ and $\rho_2$ are hyperparameters. Their defaults, $\rho_1=0.9$ and $\rho_2=0.99$, are taken from (Goodfellow et al., 2016, page 301). After having updated the `cache` (i.e. $B_1$ and $B_2$) we compute a **velocity** (step 3) with which the parameters $Y_t$ are then updated (step 4). + where $\odot:\mathbb{R}^n\times\mathbb{R}^n\to\mathbb{R}^n$ is the **Hadamard product**: $[a\odot{}b]_i = a_ib_i$. $\rho_1$ and $\rho_2$ are hyperparameters. Their defaults, $\rho_1=0.9$ and $\rho_2=0.99$, are taken from [goodfellow2016deep; page 301]. After having updated the `cache` (i.e. $B_1$ and $B_2$) we compute a **velocity** (step 3) with which the parameters $Y_t$ are then updated (step 4). 3. $W_t\gets -\eta{}B_1/\sqrt{B_2 + \delta},$ 4. $Y_{t+1} \gets Y_t + W_t,$ @@ -25,13 +25,11 @@ Main.include_graphics("../tikz/adam_optimizer") # hide ## Weights on manifolds -The problem with generalizing Adam to manifolds is that the Hadamard product $\odot$ as well as the other element-wise operations ($/$, $\sqrt{}$ and $+$ in step 3 above) lack a clear geometric interpretation. In `GeometricMachineLearning` we get around this issue by utilizing a so-called [global tangent space representation](../arrays/stiefel_lie_alg_horizontal.md). +The problem with generalizing Adam to manifolds is that the Hadamard product $\odot$ as well as the other element-wise operations ($/$, $\sqrt{}$ and $+$ in step 3 above) lack a clear geometric interpretation. In `GeometricMachineLearning` we get around this issue by utilizing a so-called [global tangent space representation](@ref "Global Tangent Spaces"). ## References -- Goodfellow I, Bengio Y, Courville A. Deep learning[M]. MIT press, 2016. - ```@bibliography Pages = [] Canonical = false diff --git a/docs/src/optimizers/manifold_related/cayley.md b/docs/src/optimizers/manifold_related/cayley.md index 504cffd6c..ff39b30ee 100644 --- a/docs/src/optimizers/manifold_related/cayley.md +++ b/docs/src/optimizers/manifold_related/cayley.md @@ -8,7 +8,7 @@ They Cayley retraction reads: ``` This is easily checked to be a retraction, i.e. $\mathrm{Cayley}(\mathbb{O}) = \mathbb{I}$ and $\frac{\partial}{\partial{}t}\mathrm{Cayley}(tC) = C$. -What we need in practice is not the computation of the Cayley transform of an arbitrary matrix, but the Cayley transform of an element of $\mathfrak{g}^\mathrm{hor}$, the [global tangent space representation](../../arrays/stiefel_lie_alg_horizontal.md). +What we need in practice is not the computation of the Cayley transform of an arbitrary matrix, but the Cayley transform of an element of $\mathfrak{g}^\mathrm{hor}$, the [global tangent space representation](@ref "Global Tangent Spaces"). The elements of $\mathfrak{g}^\mathrm{hor}$ can be written as: diff --git a/docs/src/optimizers/manifold_related/global_sections.md b/docs/src/optimizers/manifold_related/global_sections.md index ad2564bfd..95bba26dc 100644 --- a/docs/src/optimizers/manifold_related/global_sections.md +++ b/docs/src/optimizers/manifold_related/global_sections.md @@ -1,6 +1,6 @@ # Global Sections -**Global sections** are needed needed for the generalization of [Adam](../adam_optimizer.md) and other optimizers to [homogeneous spaces](../../manifolds/homogeneous_spaces.md). They are necessary to perform the two mappings represented represented by horizontal and vertical red lines in the section on the general [optimizer framework](../../Optimizer.md). +**Global sections** are needed needed for the generalization of [Adam](../adam_optimizer.md) and other optimizers to [homogeneous spaces](@ref "Homogeneous Spaces"). They are necessary to perform the two mappings represented represented by horizontal and vertical red lines in the section on the general [optimizer framework](../../Optimizer.md). ## Computing the global section In differential geometry a **section** is always associated to some **bundle**, in our case this bundle is $\pi:G\to\mathcal{M},A\mapsto{}AE$. A section is a mapping $\mathcal{M}\to{}G$ for which $\pi$ is a left inverse, i.e. $\pi\circ\lambda = \mathrm{id}$. @@ -37,7 +37,7 @@ In practice we use the following: \end{aligned} ``` -meaning that for an element of the [horizontal component of the Lie algebra](../../arrays/stiefel_lie_alg_horizontal.md) ``\mathfrak{g}^\mathrm{hor}`` we store ``A=Y^T\Delta`` and ``B=\bar{\lambda}^T\Delta``. +meaning that for an element of the [horizontal component of the Lie algebra](@ref "The Global Tangent Space for the Stiefel Manifold") ``\mathfrak{g}^\mathrm{hor}`` we store ``A=Y^T\Delta`` and ``B=\bar{\lambda}^T\Delta``. ## Optimization diff --git a/docs/src/optimizers/manifold_related/horizontal_lift.md b/docs/src/optimizers/manifold_related/horizontal_lift.md index dcf1aeebb..0646c6f47 100644 --- a/docs/src/optimizers/manifold_related/horizontal_lift.md +++ b/docs/src/optimizers/manifold_related/horizontal_lift.md @@ -12,7 +12,7 @@ The orthogonal complement[^1] of $\mathfrak{g}^{\mathrm{ver}, Y}$ is the horizon \Omega(Y, V) = \left(\mathbb{I} - \frac{1}{2}\right)VY^T - YV^T(\mathbb{I} - \frac{1}{2}YY^T). ``` -If the element $Y$ is the distinct element $E$, then the elements of $\mathfrak{g}^{\mathrm{hor},E}$ take a particularly simple form, see [Global Tangent Space](../../arrays/stiefel_lie_alg_horizontal.md) for a description of this. +If the element $Y$ is the distinct element $E$, then the elements of $\mathfrak{g}^{\mathrm{hor},E}$ take a particularly simple form, see [Global Tangent Space](@ref "Global Tangent Spaces") for a description of this. [^1]: The orthogonal complement is taken with respect to a metric defined on $\mathfrak{g}$. For the case of $G=SO(N)$ and $\mathfrak{g}=\mathfrak{so}(N) = \{A:A+A^T =0\}$ this metric can be chosen as $(A_1,A_2)\mapsto{}\frac{1}{2}A_1^TA_2$. From 195157c7c5572f96d7b72d914d9d8ffcdc1453de Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 27 May 2024 16:23:34 +0200 Subject: [PATCH 16/32] Updated docstrings to conform with recommendations. --- .../grassmann_lie_algebra_horizontal.jl | 79 ++++++++++++++++--- src/arrays/skew_symmetric.jl | 8 ++ src/arrays/stiefel_lie_algebra_horizontal.jl | 77 ++++++++++++------ src/arrays/stiefel_projection.jl | 48 +++++++---- 4 files changed, 162 insertions(+), 50 deletions(-) diff --git a/src/arrays/grassmann_lie_algebra_horizontal.jl b/src/arrays/grassmann_lie_algebra_horizontal.jl index bf883f157..dd69af148 100644 --- a/src/arrays/grassmann_lie_algebra_horizontal.jl +++ b/src/arrays/grassmann_lie_algebra_horizontal.jl @@ -1,5 +1,27 @@ -""" -This implements the horizontal component of a Lie algebra that is isomorphic to the Grassmann manifold. +@doc raw""" + GrassmannLieAlgHorMatrix(B::AbstractMatrix{T}, N::Integer, n::Integer) where T + +Build an instance of `GrassmannLieAlgHorMatrix` based on an arbitrary matrix `B` of size ``(N-n)\times{}n``. + +`GrassmannLieAlgHorMatrix` is the *horizontal component of the Lie algebra of skew-symmetric matrices* (with respect to the canonical metric). +The projection here is: ``\pi:S \to SE/\sim`` where +```math +E = \begin{pmatrix} \mathbb{I}_{n} \\ \mathbb{O}_{(N-n)\times{}n} \end{pmatrix}, +``` + +and the equivalence relation is + +```math +V_1 \sim V_2 \iff \exists A\in\mathcal{S}_\mathrm{skew}(n) \text{such that } V_2 = V_1 + \begin{pmatrix} A \\ \mathbb{O} \end{pmatrix} +``` + +An element of GrassmannLieAlgMatrix takes the form: +```math +\begin{pmatrix} +\bar{\mathbb{O}} & B^T \\ B & \mathbb{O} +\end{pmatrix}, +``` +where ``\bar{\mathbb{O}}\in\mathbb{R}^{n\times{}n}`` and ``\mathbb{O}\in\mathbb{R}^{(N - n)\times{}n}. """ mutable struct GrassmannLieAlgHorMatrix{T, ST <: AbstractMatrix{T}} <: AbstractLieAlgHorMatrix{T} B::ST @@ -13,19 +35,46 @@ mutable struct GrassmannLieAlgHorMatrix{T, ST <: AbstractMatrix{T}} <: AbstractL new{T, typeof(B)}(B, N, n) end +end - function GrassmannLieAlgHorMatrix(A::AbstractMatrix{T}, n::Int) where {T} - N = size(A, 1) - @assert N ≥ n +@doc raw""" + GrassmannLieAlgHorMatrix(D::AbstractMatrix, n::Integer) - B = A[(n+1):N,1:n] - new{eltype(A), typeof(B)}(B, N, n) - end -end +Take a big matrix as input and build an instance of `GrassmannLieAlgHorMatrix` belonging to the GrassmannManifold ``Gr(n, N)`` where ``N`` is the number of rows of `D`. -Base.parent(A::GrassmannLieAlgHorMatrix) = (B) +If the constructor is called with a big ``N\times{}N`` matrix, then the projection is performed the following way: + +```math +\begin{pmatrix} +A & B_1 \\ +B_2 & D +\end{pmatrix} \mapsto +\begin{pmatrix} +\bar{\mathbb{O}} & -B_2^T \\ +B_2 & \mathbb{O} +\end{pmatrix}. +``` + +This can also be seen as the operation: +```math +D \mapsto \Omega(E, DE - EE^TDE), +``` + +where ``\Omega`` is the horizontal lift [`GeometricMachineLearning.Ω`](@ref). +""" +function GrassmannLieAlgHorMatrix(D::AbstractMatrix{T}, n::Int) where {T} + N = size(D, 1) + @assert N ≥ n + + B = D[(n+1):N,1:n] + GrassmannLieAlgHorMatrix(B, N, n) +end + +Base.parent(A::GrassmannLieAlgHorMatrix) = (A.B, ) Base.size(A::GrassmannLieAlgHorMatrix) = (A.N, A.N) +KernelAbstractions.get_backend(B::GrassmannLieAlgHorMatrix) = KernelAbstractions.get_backend(B.B) + function Base.getindex(A::GrassmannLieAlgHorMatrix{T}, i::Integer, j::Integer) where {T} if i ≤ A.n if j ≤ A.n @@ -133,4 +182,12 @@ function LinearAlgebra.mul!(C::GrassmannLieAlgHorMatrix, A::GrassmannLieAlgHorMa mul!(C.B, A.B, α) end LinearAlgebra.mul!(C::GrassmannLieAlgHorMatrix, α::Real, A::GrassmannLieAlgHorMatrix) = mul!(C, A, α) -LinearAlgebra.rmul!(C::GrassmannLieAlgHorMatrix, α::Real) = mul!(C, C, α) \ No newline at end of file +LinearAlgebra.rmul!(C::GrassmannLieAlgHorMatrix, α::Real) = mul!(C, C, α) + +function _round(B::GrassmannLieAlgHorMatrix; kwargs...) + GrassmannLieAlgHorMatrix( + _round(B.B; kwargs...), + B.N, + B.n + ) +end \ No newline at end of file diff --git a/src/arrays/skew_symmetric.jl b/src/arrays/skew_symmetric.jl index 1e2b0ac61..83409f2e8 100644 --- a/src/arrays/skew_symmetric.jl +++ b/src/arrays/skew_symmetric.jl @@ -275,6 +275,14 @@ function Base.copyto!(A::SkewSymMatrix, B::SkewSymMatrix) nothing end +function _round(A::SkewSymMatrix; kwargs...) + SkewSymMatrix(_round(A.S; kwargs...), A.n) +end + +function _round(A::AbstractArray; kwargs...) + round.(A; kwargs...) +end + # define routines for generalizing ChainRulesCore to SkewSymMatrix ChainRulesCore.ProjectTo(A::SkewSymMatrix) = ProjectTo{SkewSymMatrix}(; skew_sym = ProjectTo(A.S)) (project::ProjectTo{SkewSymMatrix})(dA::AbstractMatrix) = SkewSymMatrix(project.skew_sym(map_to_Skew(dA)), size(dA, 2)) diff --git a/src/arrays/stiefel_lie_algebra_horizontal.jl b/src/arrays/stiefel_lie_algebra_horizontal.jl index 226ab884b..2e30c497a 100644 --- a/src/arrays/stiefel_lie_algebra_horizontal.jl +++ b/src/arrays/stiefel_lie_algebra_horizontal.jl @@ -1,10 +1,14 @@ @doc raw""" + StiefelLieAlgHorMatrix(A::SkewSymMatrix{T}, B::AbstractMatrix{T}, N::Integer, n::Integer) where T + +Build an instance of `StiefelLieAlgHorMatrix` based on a skew-symmetric matrix `A` and an arbitrary matrix `B`. + `StiefelLieAlgHorMatrix` is the *horizontal component of the Lie algebra of skew-symmetric matrices* (with respect to the canonical metric). -The projection here is: \(\pi:S \to SE \) where +The projection here is: ``\pi:S \to SE`` where ```math E = \begin{pmatrix} \mathbb{I}_{n} \\ \mathbb{O}_{(N-n)\times{}n} \end{pmatrix}. ``` -The matrix \(E\) is implemented under `StiefelProjection` in `GeometricMachineLearning`. +The matrix \(E\) is implemented under [`StiefelProjection`](@ref) in `GeometricMachineLearning`. An element of StiefelLieAlgMatrix takes the form: ```math @@ -12,20 +16,9 @@ An element of StiefelLieAlgMatrix takes the form: A & B^T \\ B & \mathbb{O} \end{pmatrix}, ``` -where \(A\) is skew-symmetric (this is `SkewSymMatrix` in `GeometricMachineLearning`). +where ``A`` is skew-symmetric (this is [`SkewSymMatrix`](@ref) in `GeometricMachineLearning`). -If the constructor is called with a big \(N\times{}N\) matrix, then the projection is performed the following way: -```math -\begin{pmatrix} -A & B_1 \\ -B_2 & D -\end{pmatrix} \mapsto -\begin{pmatrix} -\mathrm{skew}(A) & -B_2^T \\ -B_2 & \mathbb{O} -\end{pmatrix}. -``` -The operation $\mathrm{skew}:\mathbb{R}^{n\times{}n}\to\mathcal{S}_\mathrm{skew}(n)$ is the skew-symmetrization operation. This is equivalent to calling the constructor of `SkewSymMatrix` with an \(n\times{}n\) matrix. +Also see [`GrassmannLieAlgHorMatrix`](@ref). """ mutable struct StiefelLieAlgHorMatrix{T, AT <: SkewSymMatrix{T}, ST <: AbstractMatrix{T}} <: AbstractLieAlgHorMatrix{T} A::AT @@ -40,17 +33,44 @@ mutable struct StiefelLieAlgHorMatrix{T, AT <: SkewSymMatrix{T}, ST <: AbstractM new{T, typeof(A), typeof(B)}(A, B, N, n) end - - function StiefelLieAlgHorMatrix(A::AbstractMatrix, n::Integer) - N = size(A, 1) - @assert N ≥ n - - A_small = SkewSymMatrix(A[1:n,1:n]) - B = A[(n+1):N,1:n] - new{eltype(A),typeof(A_small), typeof(B)}(A_small, B, N, n) - end end +@doc raw""" + StiefelLieAlgHorMatrix(D::AbstractMatrix, n::Integer) + +Take a big matrix as input and build an instance of `StiefelLieAlgHorMatrix` belonging to the StiefelManifold ``St(n, N)`` where ``N`` is the number of rows of `D`. + +If the constructor is called with a big ``N\times{}N`` matrix, then the projection is performed the following way: + +```math +\begin{pmatrix} +A & B_1 \\ +B_2 & D +\end{pmatrix} \mapsto +\begin{pmatrix} +\mathrm{skew}(A) & -B_2^T \\ +B_2 & \mathbb{O} +\end{pmatrix}. +``` + +The operation ``\mathrm{skew}:\mathbb{R}^{n\times{}n}\to\mathcal{S}_\mathrm{skew}(n)`` is the skew-symmetrization operation. This is equivalent to calling of [`SkewSymMatrix`](@ref) with an ``n\times{}n`` matrix. + +This can also be seen as the operation: +```math +D \mapsto \Omega(E, DE) = \mathrm{skew}\left(2 \left(\mathbb{I} - \frac{1}{2} E E^T \right) DE E^T\right). +``` + +Also see [`GeometricMachineLearning.Ω`](@ref). +""" +function StiefelLieAlgHorMatrix(D::AbstractMatrix, n::Integer) + N = size(D, 1) + @assert N ≥ n + + A_small = SkewSymMatrix(D[1:n,1:n]) + B = D[(n+1):N, 1:n] + StiefelLieAlgHorMatrix(A_small, B, N, n) +end + Base.parent(A::StiefelLieAlgHorMatrix) = (A, B) Base.size(A::StiefelLieAlgHorMatrix) = (A.N, A.N) @@ -230,4 +250,13 @@ end # fallback -> put this somewhere else! function assign!(A::AbstractArray, B::AbstractArray) A .= B +end + +function _round(B::StiefelLieAlgHorMatrix; kwargs...) + StiefelLieAlgHorMatrix( + _round(B.A; kwargs...), + _round(B.B; kwargs...), + B.N, + B.n + ) end \ No newline at end of file diff --git a/src/arrays/stiefel_projection.jl b/src/arrays/stiefel_projection.jl index 236948a09..09e8a8e39 100644 --- a/src/arrays/stiefel_projection.jl +++ b/src/arrays/stiefel_projection.jl @@ -1,13 +1,11 @@ @doc raw""" -An array that essentially does `vcat(I(n), zeros(N-n, n))` with GPU support. It has **three inner constructors**. The **first one** is called with the following arguments: -1. `backend`: backends as supported by `KernelAbstractions`. -2. `T::Type` -3. `N::Integer` -4. `n::Integer` + StiefelProjection(backend, T, N, n) -The **second constructor** is called by supplying a matrix as input. The constructor will then extract the backend, the type and the dimensions of that matrix. +Make a matrix of the form ``\begin{bmatrix} \mathbb{I} & \mathbb{O} \end{pmatrix}^T`` for a specific backend and data type. -The **third constructor** is called by supplying an instance of `StiefelLieAlgHorMatrix`. +An array that essentially does `vcat(I(n), zeros(N-n, n))` with GPU support. + +# Extend help Technically this should be a subtype of `StiefelManifold`. """ @@ -21,16 +19,32 @@ struct StiefelProjection{T, AT} <: AbstractMatrix{T} assign_ones_for_stiefel_projection!(A, ndrange=n) new{T, typeof(A)}(N,n, A) end +end + +@doc raw""" + StiefelProjection(A::AbstractMatrix) - function StiefelProjection(A::AbstractMatrix{T}) where T - StiefelProjection(KernelAbstractions.get_backend(A), T, size(A)...) - end +Extract necessary information from `A` and build an instance of `StiefelProjection`. - function StiefelProjection(B::StiefelLieAlgHorMatrix{T}) where T - StiefelProjection(KernelAbstractions.get_backend(B), T, B.N, B.n) - end +Necessary information here referes to the backend, the data type and the size of the matrix. +""" +function StiefelProjection(A::AbstractMatrix{T}) where T + StiefelProjection(KernelAbstractions.get_backend(A), T, size(A)...) end - + +@doc raw""" + StiefelProjection(B::StiefelLieAlgHorMatrix) + +Extract necessary information from `B` and build an instance of `StiefelProjection`. + +Necessary information here referes to the backend, the data type and the size of the matrix. + +The size is queried through `B.N` and `B.n`. +""" +function StiefelProjection(B::StiefelLieAlgHorMatrix{T}) where T + StiefelProjection(KernelAbstractions.get_backend(B), T, B.N, B.n) +end + @kernel function assign_ones_for_stiefel_projection_kernel!(A::AbstractArray{T}) where T i = @index(Global) A[i, i] = one(T) @@ -50,4 +64,8 @@ Base.:+(A::AbstractMatrix, E::StiefelProjection) = +(E, A) Base.vcat(A::AbstractVecOrMat{T}, E::StiefelProjection{T}) where {T<:Number} = vcat(A, E.A) Base.vcat(E::StiefelProjection{T}, A::AbstractVecOrMat{T}) where {T<:Number} = vcat(E.A, A) Base.hcat(A::AbstractVecOrMat{T}, E::StiefelProjection{T}) where {T<:Number} = hcat(A, E.A) -Base.hcat(E::StiefelProjection{T}, A::AbstractVecOrMat{T}) where {T<:Number} = hcat(E.A, A) \ No newline at end of file +Base.hcat(E::StiefelProjection{T}, A::AbstractVecOrMat{T}) where {T<:Number} = hcat(E.A, A) + +function KernelAbstractions.get_backend(E::StiefelProjection) + KernelAbstractions.get_backend(E.A) +end \ No newline at end of file From f134fbf3a9db7480c5c11edef4175e907971a164 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 27 May 2024 16:24:14 +0200 Subject: [PATCH 17/32] Updated docstrings to conform with recommendations. --- src/kernels/mat_tensor_mul.jl | 77 +++++++- src/kernels/tensor_mat_mul.jl | 50 +++++- src/manifolds/abstract_manifold.jl | 32 ++-- src/manifolds/grassmann_manifold.jl | 101 ++++++++++- src/manifolds/stiefel_manifold.jl | 101 ++++++++++- .../manifold_related/global_sections.jl | 170 +++++++++++++++--- 6 files changed, 481 insertions(+), 50 deletions(-) diff --git a/src/kernels/mat_tensor_mul.jl b/src/kernels/mat_tensor_mul.jl index c600987ea..86785813b 100644 --- a/src/kernels/mat_tensor_mul.jl +++ b/src/kernels/mat_tensor_mul.jl @@ -13,7 +13,15 @@ nothing end -# Creating a wrapper kernel for launching with error checks +@doc raw""" + mat_tensor_mul!(C, A, B) + +Multiply the matrix `A` onto the tensor `B` from the left and store the result in `C`. + +Also checks the bounds of the input arrays. + +The function [`mat_tensor_mul`](@ref) calls `mat_tensor_mul!` internally. +""" function mat_tensor_mul!(C, A, B) @assert size(A)[2] == size(B)[1] @@ -22,6 +30,37 @@ function mat_tensor_mul!(C, A, B) kernel!(C, A, B, ndrange=size(C)) end +@doc raw""" + mat_tensor_mul(A::AbstractMatrix{T}, B::AbstractArray{T, 3}) where T + +Multipliy the matrix `A` onto the tensor `B` from the left. + +Internally this calls the inplace version [`mat_tensor_mul!`](@ref). + +# Examples + +```jldoctest +using GeometricMachineLearning: mat_tensor_mul + +B = [1 1 1; 1 1 1; 1 1 1;;; 2 2 2; 2 2 2; 2 2 2] +A = [3 0 0; 0 2 0; 0 0 1] + +mat_tensor_mul(A, B) + +# output + +3×3×2 Array{Int64, 3}: +[:, :, 1] = + 3 3 3 + 2 2 2 + 1 1 1 + +[:, :, 2] = + 6 6 6 + 4 4 4 + 2 2 2 +``` +""" function mat_tensor_mul(A::AbstractMatrix{T}, B::AbstractArray{T, 3}) where T sizeA = size(A) sizeB = size(B) @@ -61,6 +100,15 @@ function symmetric_mat_mul(S::AbstractVector{T}, B::AbstractArray{T, 3}, n::Int) C end +@doc raw""" + mat_tensor_mul!(C::AbstractArray{T, 3}, A::SymmetricMatrix{T}, B::AbstractArray{T, 3}) where T + +Multiply the symmetric matrix `A` onto the tensor `B` from the left and store the result in `C`. + +Also checks the bounds of the input arrays. + +This performs an efficient multiplication based on the special structure of the symmetric matrix `A`. +""" function mat_tensor_mul!(C::AbstractArray{T, 3}, A::SymmetricMatrix{T}, B::AbstractArray{T, 3}) where T @assert A.n == size(C, 1) == size(B, 1) @@ -96,6 +144,15 @@ function lo_mat_mul(S::AbstractVector{T}, B::AbstractArray{T, 3}, n::Int) where C end +@doc raw""" + mat_tensor_mul!(C::AbstractArray{T, 3}, A::LowerTriangular{T}, B::AbstractArray{T, 3}) where T + +Multiply the lower-triangular matrix `A` onto the tensor `B` from the left and store the result in `C`. + +Also checks the bounds of the input arrays. + +This performs an efficient multiplication based on the special structure of the lower-triangular matrix `A`. +""" function mat_tensor_mul!(C::AbstractArray{T, 3}, A::LowerTriangular{T}, B::AbstractArray{T, 3}) where T @assert A.n == size(C, 1) == size(B, 1) @@ -131,6 +188,15 @@ function up_mat_mul(S::AbstractVector{T}, B::AbstractArray{T, 3}, n::Int) where C end +@doc raw""" + mat_tensor_mul!(C::AbstractArray{T, 3}, A::UpperTriangular{T}, B::AbstractArray{T, 3}) where T + +Multiply the upper-triangular matrix `A` onto the tensor `B` from the left and store the result in `C`. + +Also checks the bounds of the input arrays. + +This performs an efficient multiplication based on the special structure of the upper-triangular matrix `A`. +""" function mat_tensor_mul!(C::AbstractArray{T, 3}, A::UpperTriangular{T}, B::AbstractArray{T, 3}) where T @assert A.n == size(C, 1) == size(B, 1) @@ -168,6 +234,15 @@ function skew_mat_mul(S::AbstractVector{T}, B::AbstractArray{T, 3}, n::Int) wher C end +@doc raw""" + mat_tensor_mul!(C::AbstractArray{T, 3}, A::SkewSymMatrix{T}, B::AbstractArray{T, 3}) where T + +Multiply skew-symmetric the matrix `A` onto the tensor `B` from the left and store the result in `C`. + +Also checks the bounds of the input arrays. + +This performs an efficient multiplication based on the special structure of the skew-symmetric matrix `A`. +""" function mat_tensor_mul!(C::AbstractArray{T, 3}, A::SkewSymMatrix{T}, B::AbstractArray{T, 3}) where T @assert A.n == size(C, 1) == size(B, 1) diff --git a/src/kernels/tensor_mat_mul.jl b/src/kernels/tensor_mat_mul.jl index d686ba5d5..7508f0d65 100644 --- a/src/kernels/tensor_mat_mul.jl +++ b/src/kernels/tensor_mat_mul.jl @@ -11,7 +11,15 @@ C[i,j,k] = tmp_sum end -# Creating a wrapper kernel for launching with error checks +@doc raw""" + tensor_mat_mul!(C, A, B) + +Multiply the matrix `B` onto the tensor `A` from the right and store the result in `C`. + +Also checks the bounds of the input arrays. + +The function [`tensor_mat_mul`](@ref) calls `tensor_mat_mul!` internally. +""" function tensor_mat_mul!(C, A, B) @assert size(A)[2] == size(B)[1] @@ -20,6 +28,37 @@ function tensor_mat_mul!(C, A, B) kernel!(C, A, B, ndrange=size(C)) end +@doc raw""" + tensor_mat_mul(A::AbstractArray{T, 3}, B::AbstractArray{T}) where T + +Multipliy the matrix `B` onto the tensor `A` from the right. + +Internally this calls the inplace version [`tensor_mat_mul!`](@ref). + +# Examples + +```jldoctest +using GeometricMachineLearning: tensor_mat_mul + +A = [1 1 1; 1 1 1; 1 1 1;;; 2 2 2; 2 2 2; 2 2 2] +B = [3 0 0; 0 2 0; 0 0 1] + +tensor_mat_mul(A, B) + +# output + +3×3×2 Array{Int64, 3}: +[:, :, 1] = + 3 2 1 + 3 2 1 + 3 2 1 + +[:, :, 2] = + 6 4 2 + 6 4 2 + 6 4 2 +``` +""" function tensor_mat_mul(A::AbstractArray{T, 3}, B::AbstractMatrix{T}) where T sizeA = size(A); sizeB = size(B) @assert sizeA[2] == sizeB[1] @@ -64,6 +103,15 @@ function symmetric_mat_right_mul(B::AbstractArray{T, 3}, S::AbstractVector{T}, n C end +@doc raw""" + mat_tensor_mul!(C::AbstractArray{T, 3}, B::AbstractArray{T, 3}, A::SymmetricMatrix{T}) where T + +Multiply the symmetric matrix `A` onto the tensor `B` from the right and store the result in `C`. + +Also checks the bounds of the input arrays. + +This performs an efficient multiplication based on the special structure of the symmetric matrix `A`. +""" function tensor_mat_mul!(C::AbstractArray{T, 3}, B::AbstractArray{T, 3}, A::SymmetricMatrix{T}) where T @assert A.n == size(C, 2) == size(B, 2) diff --git a/src/manifolds/abstract_manifold.jl b/src/manifolds/abstract_manifold.jl index c58a028bc..a6cbf2571 100644 --- a/src/manifolds/abstract_manifold.jl +++ b/src/manifolds/abstract_manifold.jl @@ -42,6 +42,10 @@ function Base.rand(rng::Random.AbstractRNG, manifold_type::Type{MT}, N::Integer, rand(CPU(), rng, manifold_type, N, n) end +function _round(Y::Manifold; kwargs...) + typeof(Y)(round.(Y.A; kwargs...)) +end + @doc raw""" rand(backend::KernelAbstractions.Backend, manifold_type::Type{MT}, N::Integer, n::Integer) where MT <: Manifold) @@ -50,20 +54,22 @@ Draw random elements for a specific device. # Examples ```jldoctest using GeometricMachineLearning +using GeometricMachineLearning: _round # hide import Random Random.seed!(123) N, n = 5, 3 -rand(CPU(), StiefelManifold{Float32}, N, n) +Y = rand(CPU(), StiefelManifold{Float32}, N, n) +_round(Y; digits = 5) # hide # output 5×3 StiefelManifold{Float32, Matrix{Float32}}: - -0.275746 0.329913 0.772753 - -0.624851 -0.332242 -0.0685991 - -0.693326 0.36724 -0.189882 - -0.0929493 -0.731446 0.460639 - 0.210203 0.333008 0.387173 + -0.27575 0.32991 0.77275 + -0.62485 -0.33224 -0.0686 + -0.69333 0.36724 -0.18988 + -0.09295 -0.73145 0.46064 + 0.2102 0.33301 0.38717 ``` Random elements of the manifold can also be allocated on GPU, via e.g. ... @@ -90,20 +96,22 @@ When we call ... ```jldoctest using GeometricMachineLearning +using GeometricMachineLearning: _round # hide import Random Random.seed!(123) N, n = 5, 3 -rand(StiefelManifold{Float32}, N, n) +Y = rand(StiefelManifold{Float32}, N, n) +_round(Y; digits = 5) # hide # output 5×3 StiefelManifold{Float32, Matrix{Float32}}: - -0.275746 0.329913 0.772753 - -0.624851 -0.332242 -0.0685991 - -0.693326 0.36724 -0.189882 - -0.0929493 -0.731446 0.460639 - 0.210203 0.333008 0.387173 + -0.27575 0.32991 0.77275 + -0.62485 -0.33224 -0.0686 + -0.69333 0.36724 -0.18988 + -0.09295 -0.73145 0.46064 + 0.2102 0.33301 0.38717 ``` ... the sampling is done by first allocating a random matrix of size ``N\times{}n`` via `Y = randn(Float32, N, n)`. We then perform a QR decomposition `Q, R = qr(Y)` with the `qr` function from the `LinearAlgebra` package (this is using Householder reflections internally). diff --git a/src/manifolds/grassmann_manifold.jl b/src/manifolds/grassmann_manifold.jl index d4dc89221..de178000e 100644 --- a/src/manifolds/grassmann_manifold.jl +++ b/src/manifolds/grassmann_manifold.jl @@ -1,23 +1,110 @@ """ -The `GrassmannManifold` is based on the `StiefelManifold` +The `GrassmannManifold` is based on the [`StiefelManifold`](@ref). """ mutable struct GrassmannManifold{T, AT <: AbstractMatrix{T}} <: Manifold{T} A::AT end +@doc raw""" + rgrad(Y::GrassmannManifold, e_grad::AbstractMatrix) + +Compute the Riemannian gradient at ``Y\in{}Gr(n, N)``. + +These gradient have the property that they are orthogonal to the space spanned by ``Y``. + +Note the property ``Y^T\mathrm{rgrad}(Y, \nabla{}L) = \mathbb{O}.`` + +Also see [`rgrad(::StiefelManifold, ::AbstractMatrix)`](@ref). + +# Examples + +```jldoctest +using GeometricMachineLearning + +Y = GrassmannManifold([1 0 ; 0 1 ; 0 0; 0 0]) +Δ = [1 2; 3 4; 5 6; 7 8] +rgrad(Y, Δ) + +# output + +4×2 Matrix{Int64}: + 0 0 + 0 0 + 5 6 + 7 8 +``` +""" function rgrad(Y::GrassmannManifold, e_grad::AbstractMatrix) e_grad - Y * (Y' * e_grad) end -# think about this! This metric may not be the right one! -function metric(Y::GrassmannManifold, Δ₁::AbstractMatrix, Δ₂::AbstractMatrix) - LinearAlgebra.tr(Δ₁'*(I - Y*inv(Y'*Y)*Y')*Δ₂) +@doc raw""" + metric(Y::GrassmannManifold, Δ₁::AbstractMatrix, Δ₂::AbstractMatrix) + +Compute the metric for vectors `Δ₁` and `Δ₂` at `Y`. + +The representation of the Grassmann manifold is realized as a quotient space of the Stiefel manifold. + +The metric for the Grassmann manifold is: + +```math +g^{Gr}_Y(\Delta_1, \Delta_2) = g^{St}_Y(\Delta_1, \Delta_2) = \mathrm{Tr}(\Delta_1^T (\mathbb{I} - Y Y^T) \Delta_2) = \mathrm{Tr}(\Delta_1^T \Delta_2). +``` +""" +function metric(::GrassmannManifold, Δ₁::AbstractMatrix, Δ₂::AbstractMatrix) + LinearAlgebra.tr(Δ₁' * Δ₂) end +@doc raw""" + global_section(Y::GrassmannManifold) + +Compute a matrix of size ``N\times(N-n)`` whose columns are orthogonal to the columns in `Y`. +The method `global_section` for the Grassmann manifold is equivalent to that for the [`StiefelManifold`](@ref) (we represent the Grassmann manifold as an embedding in the Stiefel manifold). + +See the documentation for [`global_section(Y::StiefelManifold{T}) where T`](@ref). +""" function global_section(Y::GrassmannManifold{T}) where T N, n = size(Y) - A = randn(T, N, N-n) - A - Y * (Y' * A) - qr!(hcat(Y, A)).Q + backend = KernelAbstractions.get_backend(Y) + A = KernelAbstractions.allocate(backend, T, N, N-n) + randn!(A) + A = A - Y.A * (Y.A' * A) + qr!(A).Q end + +@doc raw""" + Ω(Y::GrassmannManifold{T}, Δ::AbstractMatrix{T}) where T + +Perform the *canonical horizontal lift* for the Grassmann manifold: + +```math + \Delta \mapsto \Omega^{St}(Y, Δ), +``` + +where ``\Omega^{St}`` is the canonical horizontal lift for the Stiefel manifold. + +```jldoctest +using GeometricMachineLearning +E = GrassmannManifold(StiefelProjection(5, 2)) +Δ = [0. 0.; 0. 0.; 2. 3.; 4. 5.; 6. 7.] +GeometricMachineLearning.Ω(E, Δ) + +# output + +5×5 SkewSymMatrix{Float64, Vector{Float64}}: + 0.0 -0.0 -2.0 -4.0 -6.0 + 0.0 0.0 -3.0 -5.0 -7.0 + 2.0 3.0 0.0 -0.0 -0.0 + 4.0 5.0 0.0 0.0 -0.0 + 6.0 7.0 0.0 0.0 0.0 +``` +""" +function Ω(Y::GrassmannManifold{T}, Δ::AbstractMatrix{T}) where T + YY = Y * Y' + + ΩSt = 2 * (one(YY) - T(.5) * Y * Y') * Δ * Y' + # E = StiefelProjection(Y) + # SkewSymMatrix(ΩSt - E * E' * ΩSt * E * E') + SkewSymMatrix(ΩSt) +end \ No newline at end of file diff --git a/src/manifolds/stiefel_manifold.jl b/src/manifolds/stiefel_manifold.jl index 37b71e29d..0ede500b1 100644 --- a/src/manifolds/stiefel_manifold.jl +++ b/src/manifolds/stiefel_manifold.jl @@ -24,14 +24,36 @@ function Base.:*(Y::Adjoint{T, ST}, B::ST) where {T, AT<:AbstractMatrix{T}, ST<: end @doc raw""" -Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mathbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this). -The precise form of the mapping is: + rgrad(Y::StiefelManifold, e_grad::AbstractMatrix) + +Compute the Riemannian gradient for the Stiefel manifold at ``Y\in{}St(N,n)`` based on ``\nabla{}L\in\mathbb{R}^{N\times{}n}`` (the Euclidean gradient). + +The function computes the Riemannian gradient with respect to the canonical [`metric`](@ref). + + The precise form of the mapping is: ```math \mathtt{rgrad}(Y, \nabla{}L) \mapsto \nabla{}L - Y(\nabla{}L)^TY ``` -It is called with inputs: -- `Y::StiefelManifold` -- `e_grad::AbstractMatrix`: i.e. the Euclidean gradient (what was called ``\nabla{}L``) above. + +Note the property ``Y^T\mathrm{rgrad}(Y, \nabla{}L)\in\mathcal{S}_\mathrm{skew}(n).`` + +# Examples + +```jldoctest +using GeometricMachineLearning + +Y = StiefelManifold([1 0 ; 0 1 ; 0 0; 0 0]) +Δ = [1 2; 3 4; 5 6; 7 8] +rgrad(Y, Δ) + +# output + +4×2 Matrix{Int64}: + 0 -1 + 1 0 + 5 6 + 7 8 +``` """ function rgrad(Y::StiefelManifold, e_grad::AbstractMatrix) e_grad - Y.A * (e_grad' * Y.A) @@ -55,6 +77,47 @@ function check(Y::StiefelManifold) norm(Y.A'*Y.A - I) end +@doc raw""" + global_section(Y::StiefelManifold) + +Compute a matrix of size ``N\times(N-n)`` whose columns are orthogonal to the columns in `Y`. + +This matrix is also called ``Y_\perp`` [absil2004riemannian, absil2008optimization, bendokat2020grassmann](@cite). + +# Examples + +```jldoctest +using GeometricMachineLearning +using GeometricMachineLearning: global_section +import Random + +Random.seed!(123) + +Y = StiefelManifold([1. 0.; 0. 1.; 0. 0.; 0. 0.]) + +round.(Matrix(global_section(Y)); digits = 3) + +# output + +4×2 Matrix{Float64}: + 0.0 -0.0 + 0.0 0.0 + 0.936 -0.353 + 0.353 0.936 +``` + +Further note that we convert the `QRCompactWYQ` object to a `Matrix` before we display it. + +# Implementation + +The implementation is done with a QR decomposition (`LinearAlgebra.qr!`). Internally we do: + +```julia +randn!(A) +A = A - Y.A * (Y.A' * A) +qr!(A).Q +``` +""" function global_section(Y::StiefelManifold{T}) where T N, n = size(Y) backend = KernelAbstractions.get_backend(Y) @@ -65,10 +128,12 @@ function global_section(Y::StiefelManifold{T}) where T end @doc raw""" -Implements the *canonical horizontal lift* for the Stiefel manifold: + Ω(Y::StiefelManifold{T}, Δ::AbstractMatrix{T}) where T + +Perform *canonical horizontal lift* for the Stiefel manifold: ```math - (\mathbb{I} - \frac{1}{2}YY^T)\Delta{}Y^T - Y\Delta^T(\mathbb{I} - \frac{1}{2}YY^T). + \Delta \mapsto (\mathbb{I} - \frac{1}{2}YY^T)\Delta{}Y^T - Y\Delta^T(\mathbb{I} - \frac{1}{2}YY^T). ``` Internally this performs @@ -78,8 +143,28 @@ SkewSymMatrix(2 * (I(n) - .5 * Y * Y') * Δ * Y') ``` to save memory. + +# Examples + +```jldoctest +using GeometricMachineLearning +E = StiefelManifold(StiefelProjection(5, 2)) +Δ = [0. -1.; 1. 0.; 2. 3.; 4. 5.; 6. 7.] +GeometricMachineLearning.Ω(E, Δ) + +# output + +5×5 SkewSymMatrix{Float64, Vector{Float64}}: + 0.0 -1.0 -2.0 -4.0 -6.0 + 1.0 0.0 -3.0 -5.0 -7.0 + 2.0 3.0 0.0 -0.0 -0.0 + 4.0 5.0 0.0 0.0 -0.0 + 6.0 7.0 0.0 0.0 0.0 +``` + +Note that the output of `Ω` is a skew-symmetric matrix, i.e. an element of ``\mathfrak{g}``. """ function Ω(Y::StiefelManifold{T}, Δ::AbstractMatrix{T}) where T YY = Y * Y' - SkewSymMatrix(2 * (one(YY) - .5 * Y * Y') * Δ * Y') + SkewSymMatrix(2 * (one(YY) - T(.5) * Y * Y') * Δ * Y') end \ No newline at end of file diff --git a/src/optimizers/manifold_related/global_sections.jl b/src/optimizers/manifold_related/global_sections.jl index 73bc1262e..4065dccbb 100644 --- a/src/optimizers/manifold_related/global_sections.jl +++ b/src/optimizers/manifold_related/global_sections.jl @@ -1,18 +1,19 @@ @doc raw""" -This implements global sections for the Stiefel manifold and the Symplectic Stiefel manifold. + GlobalSection(Y::AbstractMatrix) -In practice this is implemented using Householder reflections, with the auxiliary column vectors given by: -|0| -|0| -|.| -|1| ith spot for i in (n+1) to N (or with random columns) -|0| -|.| -|0| +Construct a global section for `Y`. -Maybe consider dividing the output in the check functions by n! +A global section ``\lambda`` is a mapping from a homogeneous space ``\mathcal{M}`` to the corresponding Lie group ``G`` such that -Implement a general global section here!!!! Tₓ𝔐 → G×𝔤 !!!!!! (think about random initialization!) +```math +\lambda(Y)E = Y, +``` + +Also see [`apply_section`](@ref) and [`global_rep`](@ref). + +# Implementation + +For an implementation of `GlobalSection` for a custom array (especially manifolds), the function [`global_section`](@ref) has to be generalized. """ struct GlobalSection{T, AT} Y::AT @@ -29,34 +30,71 @@ function GlobalSection(ps::NamedTuple) apply_toNT(GlobalSection, ps) end -# this is an application G×𝔐 → 𝔐 +@doc raw""" + Matrix(λY::GlobalSection) + +Put `λY` into matrix form. + +This is not recommended if speed is important! + +Use [`apply_section`](@ref) and [`global_rep`](@ref) instead! +""" +function Base.Matrix(λY::GlobalSection) + N, n = size(λY.Y) + + hcat(Matrix(λY.Y), Matrix(λY.λ)[:, 1:(N - n)]) +end + +@doc raw""" + apply_section(λY::GlobalSection{T, AT}, Y₂::AT) where {T, AT <: StiefelManifold{T}} + +Apply `λY` to `Y₂`. + +Mathematically this is the group action of the element ``\lambda{}Y\in{}G`` on the element ``Y_2`` of the homogeneous space ``\mathcal{M}``. + +Internally it calls the inplace version [`apply_section!`](@ref). +""" function apply_section(λY::GlobalSection{T, AT}, Y₂::AT) where {T, AT<:StiefelManifold{T}} N, n = size(λY.Y) @assert (N, n) == size(Y₂) - backend = KernelAbstractions.get_backend(Y₂) - StiefelManifold( - λY.Y.A * Y₂.A[1:n, :] + λY.λ*vcat(Y₂.A[(n+1):N, :], KernelAbstractions.zeros(backend, T, n, n)) - ) + + Y = StiefelManifold(zero(Y₂.A)) + apply_section!(Y, λY, Y₂) + + Y end +@doc raw""" + apply_section!(Y::AT, λY::GlobalSection{T, AT}, Y₂::AT) where {T, AT<:StiefelManifold{T}} + +Apply `λY` to `Y₂` and store the result in `Y`. + +The inplace version of [`apply_section`](@ref). +""" function apply_section!(Y::AT, λY::GlobalSection{T, AT}, Y₂::AT) where {T, AT<:StiefelManifold{T}} N, n = size(λY.Y) @assert (N, n) == size(Y₂) == size(Y) backend = KernelAbstractions.get_backend(Y) - Y.A .= λY.Y * Y₂.A[1:n, :] + λY.λ*vcat(Y₂.A[(n+1):N, :], KernelAbstractions.zeros(backend, T, n, n)) + @views Y.A .= λY.Y * Y₂.A[1:n, :] + λY.λ*vcat(Y₂.A[(n+1):N, :], KernelAbstractions.zeros(backend, T, n, n)) end function apply_section(λY::GlobalSection{T, AT}, Y₂::AT) where {T, AT<:GrassmannManifold{T}} N, n = size(λY.Y) @assert (N, n) == size(Y₂) - GrassmannManifold(λY.λ*Y₂) + + Y = GrassmannManifold(zero(Y₂.A)) + apply_section!(Y, λY, Y₂) + + Y end function apply_section!(Y::AT, λY::GlobalSection{T, AT}, Y₂::AT) where {T, AT<:GrassmannManifold{T}} N, n = size(λY.Y) @assert (N, n) == size(Y₂) - Y.A = λY.λ*Y₂ + + backend = KernelAbstractions.get_backend(Y₂) + @views Y.A = λY.Y * Y₂.A[1:n, :] + λY.λ*vcat(Y₂.A[(n+1):N, :], KernelAbstractions.zeros(backend, T, n, n)) end function apply_section(λY::GlobalSection{T}, Y₂::AbstractVecOrMat{T}) where {T} @@ -84,21 +122,111 @@ function global_rep(::GlobalSection{T}, gx::AbstractVecOrMat{T}) where {T} gx end +@doc raw""" + global_rep(λY::GlobalSection{T, AT}, Δ::AbstractMatrix{T}) where {T, AT<:StiefelManifold{T}} + +Express `Δ` (an the tangent space of `Y`) as an instance of `StiefelLieAlgHorMatrix`. + +This maps an element from ``T_Y\mathcal{M}`` to an element of ``\mathfrak{g}^\mathrm{hor}``. + +These two spaces are isomorphic where the isomorphism where the isomorphism is established through ``\lambda(Y)\in{}G`` via: + +```math +T_Y\mathcal{M} \to \mathfrak{g}^{\mathrm{hor}}, \Delta \mapsto \lambda(Y)^{-1}\Omega(Y, \Delta)\lambda(Y). +``` + +Also see [`GeometricMachineLearning.Ω`](@ref). + +# Examples + +```jldoctest +using GeometricMachineLearning +using GeometricMachineLearning: _round +import Random + +Random.seed!(123) + +Y = rand(StiefelManifold, 6, 3) +Δ = rgrad(Y, randn(6, 3)) +λY = GlobalSection(Y) + +_round(global_rep(λY, Δ); digits = 3) + +# output + +6×6 StiefelLieAlgHorMatrix{Float64, SkewSymMatrix{Float64, Vector{Float64}}, Matrix{Float64}}: + 0.0 0.679 1.925 0.981 -2.058 0.4 + -0.679 0.0 0.298 -0.424 0.733 -0.919 + -1.925 -0.298 0.0 -1.815 1.409 1.085 + -0.981 0.424 1.815 0.0 0.0 0.0 + 2.058 -0.733 -1.409 0.0 0.0 0.0 + -0.4 0.919 -1.085 0.0 0.0 0.0 +``` + +# Implementation + +The function `global_rep` does in fact not perform the entire map ``\lambda(Y)^{-1}\Omega(Y, \Delta)\lambda(Y)`` but only + +```math +\Delta \mapsto \mathrm{skew}(Y^T\Delta), +``` + +to get the small skew-symmetric matrix and + +```math +\Delta \mapsto (\lambda(Y)_{[1:N, n:N]}^T \Delta)_{[1:(N-n), 1:n]}, +``` + +for the arbitrary matrix. +""" function global_rep(λY::GlobalSection{T, AT}, Δ::AbstractMatrix{T}) where {T, AT<:StiefelManifold{T}} N, n = size(λY.Y) StiefelLieAlgHorMatrix( SkewSymMatrix(λY.Y.A' * Δ), - (λY.λ' * Δ)[1:(N-n), 1:n], + typeof(Δ)(@views (λY.λ' * Δ)[1:(N-n), 1:n]), # (λY.λ' * Δ)[(n+1):N, 1:n], N, n ) end +@doc raw""" + global_rep(λY::GlobalSection{T, AT}, Δ::AbstractMatrix{T}) where {T, AT<:GrassmannManifold{T}} + +Express `Δ` (an the tangent space of `Y`) as an instance of `GrassmannLieAlgHorMatrix`. + +The method `global_rep` for [`GrassmannManifold`](@ref) is similar to that for [`StiefelManifold`](@ref). + +# Examples + +```jldoctest +using GeometricMachineLearning +using GeometricMachineLearning: _round +import Random + +Random.seed!(123) + +Y = rand(GrassmannManifold, 6, 3) +Δ = rgrad(Y, randn(6, 3)) +λY = GlobalSection(Y) + +_round(global_rep(λY, Δ); digits = 3) + +# output + +6×6 GrassmannLieAlgHorMatrix{Float64, Matrix{Float64}}: + 0.0 0.0 0.0 0.981 -2.058 0.4 + 0.0 0.0 0.0 -0.424 0.733 -0.919 + 0.0 0.0 0.0 -1.815 1.409 1.085 + -0.981 0.424 1.815 0.0 0.0 0.0 + 2.058 -0.733 -1.409 0.0 0.0 0.0 + -0.4 0.919 -1.085 0.0 0.0 0.0 +``` +""" function global_rep(λY::GlobalSection{T, AT}, Δ::AbstractMatrix{T}) where {T, AT<:GrassmannManifold{T}} N, n = size(λY.Y) GrassmannLieAlgHorMatrix( - (λY.λ' * Δ)[(n+1):N, 1:n], + typeof(Δ)(@views (λY.λ' * Δ)[1:(N-n), 1:n]), N, n ) From 834cf5c761f6ab02a19b50838a3e9f4e6b1b54f5 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 27 May 2024 16:24:53 +0200 Subject: [PATCH 18/32] Fixed problems with Grassmann manifold tests. --- test/manifolds/grassmann_manifold.jl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/test/manifolds/grassmann_manifold.jl b/test/manifolds/grassmann_manifold.jl index 43755b54a..334f5ba3a 100644 --- a/test/manifolds/grassmann_manifold.jl +++ b/test/manifolds/grassmann_manifold.jl @@ -22,17 +22,15 @@ end function global_section_test(T, N::Integer, n::Integer) Y = rand(GrassmannManifold{T}, N, n) - Q = global_section(Y) + Q = Matrix(GlobalSection(Y)) πQ = Q[1:N, 1:n] - norm(Y - πQ*πQ'*Y)/N/n + norm(Y - πQ * πQ' * Y) / N / n end - function tangent_space_rep(T, N::Integer, n::Integer) Y = rand(GrassmannManifold{T}, N, n) Δ = rgrad(Y, randn(T, N, n)) - λY = GlobalSection(Y) - λY.λ'*Δ + Y.A' * Δ end function gloabl_tangent_space_representation(T, N::Integer, n::Integer) @@ -54,7 +52,7 @@ function run_tests(T, N, n, tol) @test norm(tangent_space_rep(T, N, n)[1:n,1:n])/N/n < tol @test typeof(gloabl_tangent_space_representation(T, N, n)) <: GrassmannLieAlgHorMatrix # because of the matrix inversion the tolerance here is set to a higher value - @test norm(coordinate_chart_rep(T, N, n)[1:n,1:n]-I(n))/N/n < tol*10 + @test norm(coordinate_chart_rep(T, N, n)[1:n,1:n]-I(n)) / N / n < tol*10 end tol = 1e-8 From a5e3f384fc0a75c0269353861b339cd973023174 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 27 May 2024 16:25:18 +0200 Subject: [PATCH 19/32] Fixed ambiguity problem. --- test/manifolds/stiefel_manifold.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/manifolds/stiefel_manifold.jl b/test/manifolds/stiefel_manifold.jl index 12ec8933e..2f4e92cba 100644 --- a/test/manifolds/stiefel_manifold.jl +++ b/test/manifolds/stiefel_manifold.jl @@ -36,7 +36,7 @@ end function Ω_test(N::Integer, n::Integer, T::Type=Float32) Y = rand(StiefelManifold{Float32}, 5, 3) Δ = rgrad(Y, rand(Float32, 5, 3)) - @test GeometricMachineLearning.Ω(Y, Δ) * Y ≈ Δ + @test GeometricMachineLearning.Ω(Y, Δ) * Y.A ≈ Δ end function retraction_test(N::Integer, n::Integer, T::Type=Float32) From 3f34907758d6c2d52582ed9f1753b019674660f2 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 28 May 2024 11:13:14 +0200 Subject: [PATCH 20/32] Added references. --- docs/src/arrays/global_tangent_spaces.md | 41 +++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/docs/src/arrays/global_tangent_spaces.md b/docs/src/arrays/global_tangent_spaces.md index ad1438ddf..834839571 100644 --- a/docs/src/arrays/global_tangent_spaces.md +++ b/docs/src/arrays/global_tangent_spaces.md @@ -36,11 +36,11 @@ Main.theorem(raw"We call a mapping from ``\lambda:\mathcal{M} \to G`` a homogene " * Main.indentation * raw"where ``E`` is the distinct element of the homogeneous space.") ``` -Note that in general global sections are not unique because the rank of ``G`` is in general greater than that of ``\mathcal{M}``. We give an example of how to construct such a global section for the Stiefel and the Grassmann manifold below. +Note that in general global sections are not unique because the rank of ``G`` is in general greater than that of ``\mathcal{M}``. We give an example of how to construct such a global section for the Stiefel and the Grassmann manifolds below. ## The Global Tangent Space for the Stiefel Manifold -We now discuss the specific form of the global tangent space for the [Stiefel manifold](@ref "The Stiefel Manifold"). We choose the distinct element[^1] ``E`` to have an especially simple form: +We now discuss the specific form of the global tangent space for the [Stiefel manifold](@ref "The Stiefel Manifold"). We choose the distinct element[^1] ``E`` to have an especially simple form (this matrix can be build by calling [`StiefelProjection`](@ref)): [^1]: We already introduced this special matrix together with the Stiefel manifold. @@ -61,7 +61,7 @@ A & B^T \\ B & \mathbb{O} where ``A`` is a skew-symmetric matrix of size ``n\times{}n`` and ``B`` is an arbitrary matrix of size ``(N - n)\times{}n``. -Arrays of type ``\mathfrak{g}^{\mathrm{hor}, E}`` are implemented in `GeometricMachineLearning` under the name `StiefelLieAlgHorMatrix`. +Arrays of type ``\mathfrak{g}^{\mathrm{hor}, E}`` are implemented in `GeometricMachineLearning` under the name [`StiefelLieAlgHorMatrix`](@ref). We can call this with e.g. a skew-symmetric matrix ``A`` and an arbitrary matrix ``B``: @@ -118,7 +118,7 @@ Y = rand(StiefelManifold, N, n) round.(λY_mat' * ΩΔ * λY_mat; digits = 3) ``` -For computational reasons the user is strongly discouraged to call `Matrix` on an instance of `GlobalSection`. The better option is: +Performing this computation directly is computationally very inefficient however and the user is strongly discouraged to call `Matrix` on an instance of [`GlobalSection`](@ref). The better option is calling [`global_rep`](@ref): ```@example global_section using GeometricMachineLearning: _round # hide @@ -126,6 +126,36 @@ using GeometricMachineLearning: _round # hide _round(global_rep(λY, Δ); digits = 3) ``` +Internally `GlobalSection` calls the function [`global_section`](@ref) which does the following for the Stiefel manifold: + +```julia +A = randn(N, N - n) # or the gpu equivalent +A = A - Y * (Y' * A) +Y⟂ = qr(A).Q[1:N, 1:(N - n)] +``` + +So we draw ``(N - n)`` new columns randomly, subtract the part that is spanned by the columns of ``Y`` and then perform a ``QR`` composition on the resulting matrix. The ``Q`` part of the decomposition is a matrix of ``(N - n)`` columns that is orthogonal to ``Y`` and is typically referred to as ``Y_\perp`` [absil2004riemannian, absil2008optimization, bendokat2020grassmann](@cite). We can easily check that this ``Y_\perp`` is indeed orthogonal to ``Y``. + +```@eval +Main.theorem(raw"The matrix ``Y_\perp`` constructed with the above algorithm satisfies +" * Main.indentation * raw"```math +" * Main.indentation * raw"Y^TY_\perp = \mathbb{O}, +" * Main.indentation * raw"``` +" * Main.indentation * raw"and +" * Main.indentation * raw"```math +" * Main.indentation * raw"(Y_\perp)^TY_\perp = \mathbb{I}, +" * Main.indentation * raw"``` +" * Main.indentation * raw"i.e. all the columns in the big matrix ``[Y, Y_\perp]\in\mathbb{R}^{N\times{}N}`` are mutually orthonormal and it therefore is an element of ``SO(N)``.") +``` + +```@eval +Main.proof(raw"The second property is trivially satisfied because the ``Q`` component of a ``QR`` decomposition is an orthogonal matrix. For the first property note that ``Y^TQR = \mathbb{O}`` is zero because we have subtracted the ``Y`` component from the matrix ``QR``. The matrix ``R\in\mathbb{R}^{N\times{}(N-n)}`` further has the property ``[R]_{ij} = 0`` for ``i > j`` and we have that +" * Main.indentation * raw"```math +" * Main.indentation * raw"(Y^TQ)R = [r_{11}(Y^TQ)_{1\bullet}, r_{12}(Y^TQ)_{1\bullet} + r_{22}(Y^TQ)_{2\bullet}, \ldots, \sum_{i=1}^{N-n}r_{i(N-n)}(Y^TQ)_{i\bullet}]. +" * Main.indentation * raw"``` +" * Main.indentation * raw"Now all the coefficients ``r_{ii}`` are non-zero because the matrix we performed the ``QR`` decomposition on has full rank and we can see that if ``(Y^TQ)R`` is zero ``Y^TQ`` also has to be zero.") +``` + We now discuss the global tangent space for the Grassmann manifold. This is similar to the Stiefel case. ## Global Tangent Space for the Grassmann Manifold @@ -158,8 +188,11 @@ This is equivalent to the horizontal component of ``\mathfrak{g}`` for the Stief ## Library Functions ```@docs; canonical=false +GeometricMachineLearning.AbstractLieAlgHorMatrix StiefelLieAlgHorMatrix +StiefelLieAlgHorMatrix(::AbstractMatrix, ::Int) GrassmannLieAlgHorMatrix +GrassmannLieAlgHorMatrix(::AbstractMatrix, ::Int) GlobalSection GeometricMachineLearning.global_section global_rep From a66cb0f9d073d9f8320680fe18fd61b04e198762 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 28 May 2024 11:15:09 +0200 Subject: [PATCH 21/32] Fixed reference (global_section is not exported). --- docs/src/arrays/global_tangent_spaces.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/arrays/global_tangent_spaces.md b/docs/src/arrays/global_tangent_spaces.md index 834839571..9ea12dcc8 100644 --- a/docs/src/arrays/global_tangent_spaces.md +++ b/docs/src/arrays/global_tangent_spaces.md @@ -126,7 +126,7 @@ using GeometricMachineLearning: _round # hide _round(global_rep(λY, Δ); digits = 3) ``` -Internally `GlobalSection` calls the function [`global_section`](@ref) which does the following for the Stiefel manifold: +Internally `GlobalSection` calls the function [`GeometricMachineLearning.global_section`](@ref) which does the following for the Stiefel manifold: ```julia A = randn(N, N - n) # or the gpu equivalent From 43303bb53259e3fe6d064ae287c65ac9159ff87e Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 28 May 2024 11:15:33 +0200 Subject: [PATCH 22/32] Also exporting GrassmannManifold now. --- docs/src/manifolds/homogeneous_spaces.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/src/manifolds/homogeneous_spaces.md b/docs/src/manifolds/homogeneous_spaces.md index 7f791defe..825c7008f 100644 --- a/docs/src/manifolds/homogeneous_spaces.md +++ b/docs/src/manifolds/homogeneous_spaces.md @@ -192,6 +192,7 @@ Main.proof(raw"In a first step we identify charts on the Grassmann manifold to m ```@docs; canonical=false StiefelManifold +GrassmannManifold rand(manifold_type::Type{MT}, ::Integer, ::Integer) where MT <: Manifold GeometricMachineLearning.rgrad(::StiefelManifold, ::AbstractMatrix) GeometricMachineLearning.rgrad(::GrassmannManifold, ::AbstractMatrix) From f6f6005841812528e7ef9d7d8ec29773276e0df5 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 28 May 2024 11:16:06 +0200 Subject: [PATCH 23/32] Added reference to StiefelLieAlgHorMatrix and GrassmannLieAlgHorMatrix. --- src/arrays/abstract_lie_algebra_horizontal.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/arrays/abstract_lie_algebra_horizontal.jl b/src/arrays/abstract_lie_algebra_horizontal.jl index a7d248a7b..86c731fb8 100644 --- a/src/arrays/abstract_lie_algebra_horizontal.jl +++ b/src/arrays/abstract_lie_algebra_horizontal.jl @@ -1,4 +1,6 @@ @doc raw""" `AbstractLieAlgHorMatrix` is a supertype for various horizontal components of Lie algebras. We usually call this ``\mathfrak{g}^\mathrm{hor}``. + +See [`StiefelLieAlgHorMatrix`](@ref) and [`GrassmannLieAlgHorMatrix`](@ref). """ abstract type AbstractLieAlgHorMatrix{T} <: AbstractMatrix{T} end From cc1681a9574a32cf330c8cad793d0e0b37e41a1a Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 28 May 2024 11:16:45 +0200 Subject: [PATCH 24/32] Fixed equation environment. --- src/arrays/grassmann_lie_algebra_horizontal.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arrays/grassmann_lie_algebra_horizontal.jl b/src/arrays/grassmann_lie_algebra_horizontal.jl index dd69af148..f892fab89 100644 --- a/src/arrays/grassmann_lie_algebra_horizontal.jl +++ b/src/arrays/grassmann_lie_algebra_horizontal.jl @@ -21,7 +21,7 @@ An element of GrassmannLieAlgMatrix takes the form: \bar{\mathbb{O}} & B^T \\ B & \mathbb{O} \end{pmatrix}, ``` -where ``\bar{\mathbb{O}}\in\mathbb{R}^{n\times{}n}`` and ``\mathbb{O}\in\mathbb{R}^{(N - n)\times{}n}. +where ``\bar{\mathbb{O}}\in\mathbb{R}^{n\times{}n}`` and ``\mathbb{O}\in\mathbb{R}^{(N - n)\times{}n}.`` """ mutable struct GrassmannLieAlgHorMatrix{T, ST <: AbstractMatrix{T}} <: AbstractLieAlgHorMatrix{T} B::ST @@ -62,11 +62,11 @@ D \mapsto \Omega(E, DE - EE^TDE), where ``\Omega`` is the horizontal lift [`GeometricMachineLearning.Ω`](@ref). """ -function GrassmannLieAlgHorMatrix(D::AbstractMatrix{T}, n::Int) where {T} +function GrassmannLieAlgHorMatrix(D::AbstractMatrix, n::Int) N = size(D, 1) @assert N ≥ n - B = D[(n+1):N,1:n] + @views B = D[(n + 1):N,1:n] GrassmannLieAlgHorMatrix(B, N, n) end From 9ca80b52460194737798f616fc599ea24b0a1a4d Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 28 May 2024 11:17:21 +0200 Subject: [PATCH 25/32] Added @views. --- src/arrays/stiefel_lie_algebra_horizontal.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arrays/stiefel_lie_algebra_horizontal.jl b/src/arrays/stiefel_lie_algebra_horizontal.jl index 2e30c497a..82af6cf5b 100644 --- a/src/arrays/stiefel_lie_algebra_horizontal.jl +++ b/src/arrays/stiefel_lie_algebra_horizontal.jl @@ -66,8 +66,8 @@ function StiefelLieAlgHorMatrix(D::AbstractMatrix, n::Integer) N = size(D, 1) @assert N ≥ n - A_small = SkewSymMatrix(D[1:n,1:n]) - B = D[(n+1):N, 1:n] + @views A_small = SkewSymMatrix(D[1:n,1:n]) + @views B = D[(n + 1):N, 1:n] StiefelLieAlgHorMatrix(A_small, B, N, n) end From 2be39c19af6598d4c52ef5ae43477e972d8e5873 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 28 May 2024 11:17:37 +0200 Subject: [PATCH 26/32] Added equation. --- src/manifolds/grassmann_manifold.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/manifolds/grassmann_manifold.jl b/src/manifolds/grassmann_manifold.jl index de178000e..6398a95f5 100644 --- a/src/manifolds/grassmann_manifold.jl +++ b/src/manifolds/grassmann_manifold.jl @@ -12,6 +12,11 @@ Compute the Riemannian gradient at ``Y\in{}Gr(n, N)``. These gradient have the property that they are orthogonal to the space spanned by ``Y``. +The precise form of the mapping is: +```math +\mathtt{rgrad}(Y, \nabla{}L) \mapsto \nabla{}L - YY^T\nabla{}L +``` + Note the property ``Y^T\mathrm{rgrad}(Y, \nabla{}L) = \mathbb{O}.`` Also see [`rgrad(::StiefelManifold, ::AbstractMatrix)`](@ref). From 0edee75647bdb640eab3bf1c32bdb64cd7617eef Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 28 May 2024 11:18:09 +0200 Subject: [PATCH 27/32] Specified what the A matrix is. --- src/manifolds/stiefel_manifold.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/manifolds/stiefel_manifold.jl b/src/manifolds/stiefel_manifold.jl index 0ede500b1..f393b0641 100644 --- a/src/manifolds/stiefel_manifold.jl +++ b/src/manifolds/stiefel_manifold.jl @@ -30,7 +30,7 @@ Compute the Riemannian gradient for the Stiefel manifold at ``Y\in{}St(N,n)`` ba The function computes the Riemannian gradient with respect to the canonical [`metric`](@ref). - The precise form of the mapping is: +The precise form of the mapping is: ```math \mathtt{rgrad}(Y, \nabla{}L) \mapsto \nabla{}L - Y(\nabla{}L)^TY ``` @@ -113,7 +113,7 @@ Further note that we convert the `QRCompactWYQ` object to a `Matrix` before we d The implementation is done with a QR decomposition (`LinearAlgebra.qr!`). Internally we do: ```julia -randn!(A) +A = randn(N, N - n) # or the gpu equivalent A = A - Y.A * (Y.A' * A) qr!(A).Q ``` From 046af081304ff10a2cdcfce0c93270994c3c830a Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 28 May 2024 11:18:41 +0200 Subject: [PATCH 28/32] Fixed docstring. --- src/optimizers/manifold_related/retractions.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/optimizers/manifold_related/retractions.jl b/src/optimizers/manifold_related/retractions.jl index 1a3acf03a..823d27ef0 100644 --- a/src/optimizers/manifold_related/retractions.jl +++ b/src/optimizers/manifold_related/retractions.jl @@ -36,7 +36,11 @@ end geodesic(B::NamedTuple) = apply_toNT(geodesic, B) @doc raw""" -The geodesic map for the manifolds. It takes as input an element ``x`` of ``\mathcal{M}`` and an element of ``T_x\mathcal{M}`` and returns ``\mathtt{geodesic}(x, v_x) = \exp(v_x).`` For example: + geodesic(Y::Manifold, Δ) + +Take as input an element of a manifold `Y` and a tangent vector in `Δ` in the corresponding tangent space and compute the geodesic (exponential map). + +In different notation: take as input an element ``x`` of ``\mathcal{M}`` and an element of ``T_x\mathcal{M}`` and return ``\mathtt{geodesic}(x, v_x) = \exp(v_x).`` For example: ```julia Y = rand(StiefelManifold{Float64}, N, n) @@ -44,7 +48,7 @@ Y = rand(StiefelManifold{Float64}, N, n) geodesic(Y, Δ) ``` -See the docstring for ``rgrad`` for details on this function. +See the docstring for [`rgrad`](@ref) for details on this function. """ function geodesic(Y::Manifold{T}, Δ::AbstractMatrix{T}) where T λY = GlobalSection(Y) From 5e6729ccee70e8d3b5b11a929f288ebc05796465 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 4 Jun 2024 10:17:06 +0200 Subject: [PATCH 29/32] Fix for latex caption. --- docs/make.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/make.jl b/docs/make.jl index f2f622e12..7dac3a5b7 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -50,7 +50,7 @@ end function latex_graphics(path::String; label = nothing, caption = nothing, width = .5) figure_width = "$(width)\\textwidth" latex_label = isnothing(label) ? "" : "\\label{" * label * "}" - latex_caption = isnothing(caption) ? "" : "\\caption{" * Markdown.parse(caption) * "}" + latex_caption = isnothing(caption) ? "" : "\\caption{" * string(Markdown.parse(caption))[1:end-2] * "}" latex_string = """\\begin{figure} \\includegraphics[width = """ * figure_width * "]{" * path * ".png}" * latex_caption * From a35fb2cd4f715e413a2d1e1ff5fb581da54f08f4 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 4 Jun 2024 10:55:40 +0200 Subject: [PATCH 30/32] Fixed problem with different matrices. --- src/arrays/stiefel_projection.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arrays/stiefel_projection.jl b/src/arrays/stiefel_projection.jl index 09e8a8e39..d8df60908 100644 --- a/src/arrays/stiefel_projection.jl +++ b/src/arrays/stiefel_projection.jl @@ -1,7 +1,7 @@ @doc raw""" StiefelProjection(backend, T, N, n) -Make a matrix of the form ``\begin{bmatrix} \mathbb{I} & \mathbb{O} \end{pmatrix}^T`` for a specific backend and data type. +Make a matrix of the form ``\begin{bmatrix} \mathbb{I} & \mathbb{O} \end{bmatrix}^T`` for a specific backend and data type. An array that essentially does `vcat(I(n), zeros(N-n, n))` with GPU support. From 3fda4ca289983982119bd2d2a0d97139d9faa82a Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 4 Jun 2024 10:59:57 +0200 Subject: [PATCH 31/32] Added doctests. --- Project.toml | 5 ++--- test/runtests.jl | 4 +++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index 35b508d61..6807607e8 100644 --- a/Project.toml +++ b/Project.toml @@ -10,7 +10,6 @@ ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" -Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" GeometricBase = "9a0b12b7-583b-4f04-aa1f-d8551b6addc9" GeometricEquations = "c85262ba-a08a-430a-b926-d29770767bf2" @@ -37,7 +36,6 @@ ChainRules = "1" ChainRulesCore = "1" ChainRulesTestUtils = "1" Distances = "0.10" -Documenter = "0.27, 1" ForwardDiff = "0.10" GeometricBase = "0.10" GeometricEquations = "0.16" @@ -56,6 +54,7 @@ julia = "1.8" [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" GeometricEquations = "c85262ba-a08a-430a-b926-d29770767bf2" GeometricProblems = "18cb22b4-ad41-5c80-9c5f-710df63fbdc9" @@ -65,4 +64,4 @@ SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["ChainRulesTestUtils", "Random", "SafeTestsets", "Test", "GeometricProblems", "FiniteDifferences"] +test = ["Documenter", "ChainRulesTestUtils", "Random", "SafeTestsets", "Test", "GeometricProblems", "FiniteDifferences"] diff --git a/test/runtests.jl b/test/runtests.jl index 901cfb565..179177148 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,7 @@ -using SafeTestsets +using SafeTestsets, Test, GeometricMachineLearning +using Documenter: doctest +@testset "Doc tests " begin doctest(GeometricMachineLearning; manual = false) end # reduced order modeling tests @safetestset "PSD tests " begin include("psd_architecture_tests.jl") end @safetestset "SymplecticAutoencoder tests " begin include("symplectic_autoencoder_tests.jl") end From c527cc2fabfce77024fb1f005109783b9e803710 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 4 Jun 2024 11:00:53 +0200 Subject: [PATCH 32/32] (Hopefully) fixed problem with caption. --- docs/src/architectures/linear_symplectic_transformer.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/architectures/linear_symplectic_transformer.md b/docs/src/architectures/linear_symplectic_transformer.md index bcc1247ad..93d1d6eac 100644 --- a/docs/src/architectures/linear_symplectic_transformer.md +++ b/docs/src/architectures/linear_symplectic_transformer.md @@ -3,7 +3,7 @@ The linear symplectic transformer consists of a combination of [linear symplectic attention](@ref "Linear Symplectic Attention") and [gradient](@ref "SympNet Gradient Layer") layers and is visualized below: ```@example -Main.include_graphics("../tikz/linear_symplectic_transformer"; caption = raw"Visualization of the linear symplectic transformer architecutre. \texttt{n\_sympnet} refers to the number of SympNet layers (\texttt{n\_sympnet=2} in this figure) and \texttt{L} refers to the number of transformer blocks (\texttt{L=1} in this figure).", width = .3) # hide +Main.include_graphics("../tikz/linear_symplectic_transformer"; caption = raw"Visualization of the linear symplectic transformer architecutre. ``\mathtt{n\_sympnet}`` refers to the number of SympNet layers (``\mathtt{n\_sympnet}=2`` in this figure) and ``\mathtt{L}`` refers to the number of transformer blocks (``\mathtt{L=1}`` in this figure).", width = .3) # hide ``` ## Library Functions