JuliaGNI · michakraus · Dec 20, 2023 · Dec 13, 2023 · Dec 13, 2023 · Dec 13, 2023
diff --git a/Project.toml b/Project.toml
@@ -44,7 +44,7 @@ ChainRulesTestUtils = "1"
 Distances = "0.10"
 Documenter = "0.27, 1"
 ForwardDiff = "0.10"
-GPUArrays = "8, 9"
+GPUArrays = "8, 9, 10"
 GeometricBase = "0.9"
 GeometricEquations = "0.14"
 GeometricIntegrators = "0.13"

diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl
@@ -82,13 +82,10 @@ module GeometricMachineLearning
     export convert_to_dev, Device, CPUDevice
 
     # INCLUDE ARRAYS
-    include("arrays/block_identity_lower.jl")
-    include("arrays/block_identity_upper.jl")
+    include("arrays/skew_symmetric.jl")
     include("arrays/symmetric.jl")
     include("arrays/symplectic.jl")
-    include("arrays/symplectic_lie_algebra.jl")
-    include("arrays/symplectic_lie_algebra_horizontal.jl")
-    include("arrays/skew_symmetric.jl")
+    include("arrays/abstract_lie_algebra_horizontal.jl")
     include("arrays/stiefel_lie_algebra_horizontal.jl")
     include("arrays/grassmann_lie_algebra_horizontal.jl")
 

diff --git a/src/architectures/sympnet.jl b/src/architectures/sympnet.jl
@@ -7,7 +7,7 @@ TODO:
 abstract type SympNet{AT} <: Architecture end
 
 @doc raw"""
-`LASympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: 
+`LASympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: 
 - `depth::Int`: The number of linear layers that are applied. The default is 5.
 - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2.
 - `activation`: The activation function that is applied. By default this is `tanh`.
@@ -32,7 +32,7 @@ end
 @inline AbstractNeuralNetworks.dim(arch::SympNet) = arch.dim
 
 @doc raw"""
-`GSympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: 
+`GSympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: 
 - `upscaling_dimension::Int`: The *upscaling dimension* of the gradient layer. See the documentation for `GradientLayerQ` and `GradientLayerP` for further explanation. The default is `2*dim`.
 - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2.
 - `activation`: The activation function that is applied. By default this is `tanh`.
@@ -49,7 +49,7 @@ struct GSympNet{AT, InitUpper} <: SympNet{AT} where {InitUpper}
     end
 
 
-    function GSympNet(dl::DataLoader; upscaling_dimension=2*dim, nhidden=2, activation=tanh, init_upper=true) 
+    function GSympNet(dl::DataLoader; upscaling_dimension=2*dl.input_dim, nhidden=2, activation=tanh, init_upper=true) 
         new{typeof(activation), init_upper}(dl.input_dim, upscaling_dimension, nhidden, activation)
     end
 end

diff --git a/src/arrays/abstract_lie_algebra_horizontal.jl b/src/arrays/abstract_lie_algebra_horizontal.jl
@@ -0,0 +1,4 @@
+@doc raw"""
+`AbstractLieAlgHorMatrix` is a supertype for various horizontal components of Lie algebras. We usually call this \(\mathfrak{g}^\mathrm{hor}\).
+"""
+abstract type AbstractLieAlgHorMatrix{T} <: AbstractMatrix{T} end
diff --git a/src/arrays/block_identity_lower.jl b/src/arrays/block_identity_lower.jl
diff --git a/src/arrays/block_identity_upper.jl b/src/arrays/block_identity_upper.jl
diff --git a/src/arrays/grassmann_lie_algebra_horizontal.jl b/src/arrays/grassmann_lie_algebra_horizontal.jl
@@ -1,19 +1,6 @@
 """
-This implements the horizontal component of the Lie algebra (in this case just the skew-symmetric matrices).
-The projection is: 
-S -> SE where 
-|I|
-|0| = E.
-
-An element of GrassmannLieAlgMatrix takes the form: 
-| -0 -B'|
-| B  0 | where B is arbitrary.
-
-This also implements the projection: 
-| 0 -B'|    | 0 -B'|
-| B  0 | -> | B  0 |.
+This implements the horizontal component of a Lie algebra that is isomorphic to the Grassmann manifold. 
 """
-
 mutable struct GrassmannLieAlgHorMatrix{T, ST <: AbstractMatrix{T}} <: AbstractLieAlgHorMatrix{T}
     B::ST
     N::Int

diff --git a/src/arrays/skew_symmetric.jl b/src/arrays/skew_symmetric.jl
@@ -187,7 +187,7 @@ end
 
 # the first matrix is multiplied onto A2 in order for it to not be SkewSymMatrix!
 function Base.:*(A1::SkewSymMatrix{T}, A2::SkewSymMatrix{T}) where T 
-    A1*(one(A2)*A2) 
+    A1 * (one(A2) * A2) 
 end
 
 @doc raw"""

diff --git a/src/arrays/stiefel_lie_algebra_horizontal.jl b/src/arrays/stiefel_lie_algebra_horizontal.jl
@@ -1,21 +1,32 @@
-"""
-This implements the horizontal component of the Lie algebra (in this case just the skew-symmetric matrices).
-The projection is: 
-S -> SE where 
-|I|
-|0| = E.
+@doc raw"""
+`StiefelLieAlgHorMatrix` is the *horizontal component of the Lie algebra of skew-symmetric matrices* (with respect to the canonical metric).
+The projection here is: \(\pi:S \to SE \) where 
+```math
+E = \begin{pmatrix} \mathbb{I}_{n} \\ \mathbb{O}_{(N-n)\times{}n}  \end{pmatrix}.
+```
+The matrix \(E\) is implemented under `StiefelProjection` in `GeometricMachineLearning`.
 
 An element of StiefelLieAlgMatrix takes the form: 
-| A -B'|
-| B  0 | where A is skew-symmetric.
-
-This also implements the projection: 
-| A -B'|    | A -B'|
-| B  D | -> | B  0 |.
+```math
+\begin{pmatrix}
+A & B^T \\ B & \mathbb{O}
+\end{pmatrix},
+```
+where \(A\) is skew-symmetric (this is `SkewSymMatrix` in `GeometricMachineLearning`).
+
+If the constructor is called with a big \(N\times{}N\) matrix, then the projection is performed the following way: 
+```math
+\begin{pmatrix}
+A & B_1  \\
+B_2 & D
+\end{pmatrix} \mapsto 
+\begin{pmatrix}
+\mathrm{skew}(A) & -B_2^T \\ 
+B_2 & \mathbb{O}
+\end{pmatrix}.
+```
+The operation $\mathrm{skew}:\mathbb{R}^{n\times{}n}\to\mathcal{S}_\mahtrm{skew}(n)$ is the skew-symmetrization operation. This is equivalent to calling the constructor of `SkewSymMatrix` with an \(n\times{}n\) matrix.
 """
-
-abstract type AbstractLieAlgHorMatrix{T} <: AbstractMatrix{T} end
-
 mutable struct StiefelLieAlgHorMatrix{T, AT <: SkewSymMatrix{T}, ST <: AbstractMatrix{T}} <: AbstractLieAlgHorMatrix{T}
     A::AT
     B::ST

diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl
@@ -20,12 +20,12 @@
 So $S$ stores a string of vectors taken from $A$: $S = [\tilde{a}_1, \tilde{a}_2, \ldots, \tilde{a}_n]$ with $\tilde{a}_i = [[A]_{i1},[A]_{i2},\ldots,[A]_{ii}]$.
 
 TODO: 
--[x] Overload Adjoint operation for SymmetricMatrix!! (Aᵀ = A)
--[ ] implement matrix and vector products (to also work on GPU)
--[x] implement zero initialization (for optimizer)
--[ ] perform some tests (also with Zygote)
--[x] update the constructor (to work better for GPU)
--[ ] implement multiplication with a tensor
+- [x] Overload Adjoint operation for SymmetricMatrix!! (Aᵀ = A)
+- [ ] implement matrix and vector products (to also work on GPU)
+- [x] implement zero initialization (for optimizer)
+- [ ] perform some tests (also with Zygote)
+- [x] update the constructor (to work better for GPU)
+- [ ] implement multiplication with a tensor
 """
 mutable struct SymmetricMatrix{T, AT <: AbstractVector{T}} <: AbstractMatrix{T}
     S::AT
@@ -209,13 +209,27 @@
     C
 end
 
+Base.:*(B::AbstractMatrix{T}, A::SymmetricMatrix{T}) where T = (A * B')'
+
+function Base.:*(A::SymmetricMatrix{T}, B::SymmetricMatrix{T}) where T 
+    A * (B * one(B))
+end
+
 function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T 
     backend = KernelAbstractions.get_backend(A.S)
     c = KernelAbstractions.allocate(backend, T, A.n)
     LinearAlgebra.mul!(c, A, b)
     c
 end
 
+function Base.one(A::SymmetricMatrix{T}) where T
+    backend = KernelAbstractions.get_backend(A.S)
+    unit_matrix = KernelAbstractions.zeros(backend, T, A.n, A.n)
+    write_ones! = write_ones_kernel!(backend)
+    write_ones!(unit_matrix, ndrange=A.n)
+    unit_matrix
+end
+
 # define routines for generalizing ChainRulesCore to SymmetricMatrix 
 ChainRulesCore.ProjectTo(A::SymmetricMatrix) = ProjectTo{SymmetricMatrix}(; symmetric=ProjectTo(A.S))
 (project::ProjectTo{SymmetricMatrix})(dA::AbstractMatrix) = SymmetricMatrix(project.symmetric(map_to_S(dA)), size(dA, 2))

diff --git a/src/arrays/symplectic.jl b/src/arrays/symplectic.jl
@@ -1,75 +1,43 @@
 
 @doc raw"""
 
-    `SymplecticMatrix(n)`
+`SymplecticPotential(n)`
 
 Returns a symplectic matrix of size 2n x 2n
 
 ```math
 \begin{pmatrix}
-0 & & & 1 & & & \\
-& \ddots & & & \ddots & & \\
-& & 0 & & & 1 \\
--1 & & & 0 & & & \\
-& \ddots & & & \ddots & & \\
-& & -1 & & 0 & \\
+\mathbb{O} & \mathbb{I} \\
+\mathbb{O} & -\mathbb{I} \\
 \end{pmatrix}
 ```
-
-    `SymplecticProjection(N,n)`
-Returns the symplectic projection matrix E of the Stiefel manifold, i.e. π: Sp(2N) → Sp(2n,2N), A ↦ AE
-
 """
-#=
-function SymplecticMatrix(n::Int, T::DataType=Float64)
-    BandedMatrix((n => ones(T,n), -n => -ones(T,n)), (2n,2n))
-end
-
-SymplecticMatrix(T::DataType, n::Int) = SymplecticMatrix(n, T)
-
-@doc raw"""
-```math
-\begin{pmatrix}
-I & 0 \\
-0 & 0 \\
-0 & I \\
-0 & 0 \\
-\end{pmatrix}
-```
-"""
-=#
-
-function SymplecticPotential(n::Int, T::DataType=Float64)
-    J = zeros(T, 2*n, 2*n)
-    J[1:n, (n+1):2*n] = one(ones(T, n, n))
-    J[(n+1):2*n, 1:n] = -one(ones(T, n, n))
+function SymplecticPotential(backend, n2::Int, T::DataType=Float64)
+    @assert iseven(n2)
+    n = n2÷2
+    J = KernelAbstractions.zeros(backend, T, 2*n, 2*n)
+    assign_ones_for_symplectic_potential! = assign_ones_for_symplectic_potential_kernel!(backend)
+    assign_ones_for_symplectic_potential!(J, n, ndrange=n)
     J
 end
 
+SymplecticPotential(n::Int, T::DataType=Float64) = SymplecticPotential(CPU(), n, T)
+SymplecticPotential(bakend, T::DataType, n::Int) = SymplecticPotential(backend, n, T)
+
 SymplecticPotential(T::DataType, n::Int) = SymplecticPotential(n, T)
 
-struct SymplecticProjection{T} <: AbstractMatrix{T}
-    N::Int
-    n::Int
-    SymplecticProjection(N, n, T = Float64) = new{T}(N,n)
+@kernel function assign_ones_for_symplectic_potential_kernel!(J::AbstractMatrix{T}, n::Int) where T
+    i = @index(Global)
+    J[map_index_for_symplectic_potential(i, n)...] = i ≤ n ? one(T) : -one(T)
 end
 
-function Base.getindex(E::SymplecticProjection,i,j)
-    if i ≤ E.n
-        if j == i 
-            return 1.
-        end
-        return 0.
-    end
-    if j > E.n
-        if (j-E.n) == (i-E.N)
-            return 1.
-        end
-        return 0.
+"""
+This assigns the right index for the symplectic potential. To be used with `assign_ones_for_symplectic_potential_kernel!`.
+"""
+function map_index_for_symplectic_potential(i::Int, n::Int)
+    if i ≤ n
+        return (i, i+n)
+    else
+        return (i, i-n)
     end
-    return 0.
-end
-
-
-Base.parent(E::SymplecticProjection) = (E.N,E.n)
-Base.size(E::SymplecticProjection) = (2*E.N,2*E.n)
+end