From ba96b359cf1730f4efc502625664907398229d11 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 09:19:22 +0100
Subject: [PATCH 01/96] Remove old array tests.

---
 test/arrays/array_tests_old.jl | 27 ---------------------------
 1 file changed, 27 deletions(-)
 delete mode 100644 test/arrays/array_tests_old.jl

diff --git a/test/arrays/array_tests_old.jl b/test/arrays/array_tests_old.jl
deleted file mode 100644
index 98e08a726..000000000
--- a/test/arrays/array_tests_old.jl
+++ /dev/null
@@ -1,27 +0,0 @@
-using LinearAlgebra
-using Test
-
-using GeometricMachineLearning: BlockIdentityLowerMatrix, SymmetricBlockIdentityLowerMatrix
-using GeometricMachineLearning: BlockIdentityUpperMatrix, SymmetricBlockIdentityUpperMatrix
-using GeometricMachineLearning: ZeroVector
-
-
-W = rand(2,2)
-S = W .+ W'
-x = ones(4)
-y = zero(x)
-
-@test mul!(y, BlockIdentityLowerMatrix(W), x) == vcat(ones(2), ones(2) .+ W * ones(2))
-@test mul!(y, BlockIdentityUpperMatrix(W), x) == vcat(ones(2) .+ W * ones(2), ones(2))
-
-@test mul!(y, SymmetricBlockIdentityLowerMatrix(W), x) == vcat(ones(2), ones(2) .+ S * ones(2))
-@test mul!(y, SymmetricBlockIdentityUpperMatrix(W), x) == vcat(ones(2) .+ S * ones(2), ones(2))
-
-
-z = ZeroVector(Float64, 4)
-
-@test z[1] == zero(Float64)
-@test z[4] == zero(Float64)
-
-@test_throws AssertionError z[0]
-@test_throws AssertionError z[5]

From 0fb91568e2d76661db1b099654001e68ae48734d Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 09:32:38 +0100
Subject: [PATCH 02/96] Removed the block matrices. This is now done by the
 LinearSympNetLayers.

---
 src/arrays/block_identity_lower.jl | 37 ------------------------------
 src/arrays/block_identity_upper.jl | 37 ------------------------------
 2 files changed, 74 deletions(-)
 delete mode 100644 src/arrays/block_identity_lower.jl
 delete mode 100644 src/arrays/block_identity_upper.jl

diff --git a/src/arrays/block_identity_lower.jl b/src/arrays/block_identity_lower.jl
deleted file mode 100644
index d51fab751..000000000
--- a/src/arrays/block_identity_lower.jl
+++ /dev/null
@@ -1,37 +0,0 @@
-"""
-A `BlockIdentityLowerMatrix` is a matrix with blocks
-| 1  0 |
-| S  1 |
-Currently, it only implements a custom `mul!` method, exploiting this structure.
-"""
-struct BlockIdentityLowerMatrix{T, AT <: AbstractMatrix{T}} <: AbstractMatrix{T}
-    S::AT
-
-    function BlockIdentityLowerMatrix(S::AbstractMatrix)
-        @assert length(axes(S,1)) == length(axes(S,2))
-        new{eltype(S), typeof(S)}(S)
-    end
-end
-
-SymmetricBlockIdentityLowerMatrix(W::AbstractMatrix) = BlockIdentityLowerMatrix(W .+ W')
-
-Base.parent(A::BlockIdentityLowerMatrix) = A.S
-Base.size(A::BlockIdentityLowerMatrix) = 2 .* size(A.S)
-
-function LinearAlgebra.mul!(out::AbstractVector, A::BlockIdentityLowerMatrix, z::AbstractVector)
-    @assert length(out) == length(z) == 2*length(axes(A.S,2))
-
-    N = length(axes(A.S,1))
-
-    q_in = @view z[1:N]
-    p_in = @view z[N+1:2N]
-    q_out = @view out[1:N]
-    p_out = @view out[N+1:2N]
-
-    mul!(p_out, A.S, q_in)
-
-    q_out  .= q_in
-    p_out .+= p_in
-    
-    return out
-end
diff --git a/src/arrays/block_identity_upper.jl b/src/arrays/block_identity_upper.jl
deleted file mode 100644
index c4ff51e03..000000000
--- a/src/arrays/block_identity_upper.jl
+++ /dev/null
@@ -1,37 +0,0 @@
-"""
-A `BlockIdentityUpperMatrix` is a matrix with blocks
-| 1  S |
-| 0  1 |
-Currently, it only implements a custom `mul!` method, exploiting this structure.
-"""
-struct BlockIdentityUpperMatrix{T, AT <: AbstractMatrix{T}} <: AbstractMatrix{T}
-    S::AT
-
-    function BlockIdentityUpperMatrix(S::AbstractMatrix)
-        @assert length(axes(S,1)) == length(axes(S,2))
-        new{eltype(S), typeof(S)}(S)
-    end
-end
-
-SymmetricBlockIdentityUpperMatrix(W::AbstractMatrix) = BlockIdentityUpperMatrix(W .+ W')
-
-Base.parent(A::BlockIdentityUpperMatrix) = A.S
-Base.size(A::BlockIdentityUpperMatrix) = 2 .* size(A.S)
-
-function LinearAlgebra.mul!(out::AbstractVector, A::BlockIdentityUpperMatrix, z::AbstractVector)
-    @assert length(out) == length(z) == 2*length(axes(A.S,2))
-
-    N = length(axes(A.S,1))
-    
-    q_in = @view z[1:N]
-    p_in = @view z[N+1:2N]
-    q_out = @view out[1:N]
-    p_out = @view out[N+1:2N]
-
-    mul!(q_out, A.S, p_in)
-
-    q_out .+= q_in
-    p_out  .= p_in
-
-    return out
-end

From dd4e8a55add8e9917b23fd6d341e1611e4e59bd5 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 09:35:38 +0100
Subject: [PATCH 03/96] Hope this fixed the TODO list.

---
 src/arrays/symmetric.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl
index f7db2c6c6..928b90ab5 100644
--- a/src/arrays/symmetric.jl
+++ b/src/arrays/symmetric.jl
@@ -20,12 +20,12 @@ Internally the `struct` saves a vector $S$ of size $n(n+1)\div2$. The conversion
 So $S$ stores a string of vectors taken from $A$: $S = [\tilde{a}_1, \tilde{a}_2, \ldots, \tilde{a}_n]$ with $\tilde{a}_i = [[A]_{i1},[A]_{i2},\ldots,[A]_{ii}]$.
 
 TODO: 
--[x] Overload Adjoint operation for SymmetricMatrix!! (Aᵀ = A)
--[ ] implement matrix and vector products (to also work on GPU)
--[x] implement zero initialization (for optimizer)
--[ ] perform some tests (also with Zygote)
--[x] update the constructor (to work better for GPU)
--[ ] implement multiplication with a tensor
+- [x] Overload Adjoint operation for SymmetricMatrix!! (Aᵀ = A)
+- [ ] implement matrix and vector products (to also work on GPU)
+- [x] implement zero initialization (for optimizer)
+- [ ] perform some tests (also with Zygote)
+- [x] update the constructor (to work better for GPU)
+- [ ] implement multiplication with a tensor
 """
 mutable struct SymmetricMatrix{T, AT <: AbstractVector{T}} <: AbstractMatrix{T}
     S::AT

From 35c0d4862ba8ab2ba9a0a117d6298d5ad668488a Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 10:06:34 +0100
Subject: [PATCH 04/96] We are not using those arrays anymore.

---
 src/arrays/symplectic_lie_algebra.jl          |  89 ------
 .../symplectic_lie_algebra_horizontal.jl      | 298 ------------------
 2 files changed, 387 deletions(-)
 delete mode 100644 src/arrays/symplectic_lie_algebra.jl
 delete mode 100644 src/arrays/symplectic_lie_algebra_horizontal.jl

diff --git a/src/arrays/symplectic_lie_algebra.jl b/src/arrays/symplectic_lie_algebra.jl
deleted file mode 100644
index 2d77394b1..000000000
--- a/src/arrays/symplectic_lie_algebra.jl
+++ /dev/null
@@ -1,89 +0,0 @@
-"""
-A `SymplecticLieAlgMatrix` is a matrix
-| A  B  |
-| C -A' |, where B and C are symmetric. 
-
-The first index is the row index, the second one the column index.
-
-TODO: Check how LinearAlgebra implements matrix multiplication!
-"""
-
-#-> AbstractVecOrMat!!
-mutable struct SymplecticLieAlgMatrix{T, AT <: AbstractMatrix{T}, BT <: SymmetricMatrix{T}} <: AbstractMatrix{T}
-    A::AT
-    B::BT
-    C::BT
-    n::Int
-
-    function SymplecticLieAlgMatrix(A::AbstractMatrix{T}, B::SymmetricMatrix{T}, C::SymmetricMatrix{T}, n) where {T}
-        @assert eltype(B) == eltype(C) == eltype(A)
-        @assert B.n == C.n == n
-        @assert size(A) == (n,n)
-        new{T,typeof(A),typeof(B)}(A,B,C,n)
-    end
-    function SymplecticLieAlgMatrix(S::AbstractMatrix{T}) where {T}
-        n2 = size(S, 1)
-        @assert iseven(n2)
-        @assert size(S, 2) == n2
-        n = n2÷2
-
-        A = T(0.5)*(S[1:n,1:n] - S[(n+1):n2, (n+1):n2]')
-        B = SymmetricMatrix(S[1:n,(n+1):n2])
-        C = SymmetricMatrix(S[(n+1):n2,1:n])
-  
-        new{eltype(S), typeof(A), typeof(B)}(A, B, C, n)
-    end
-
-end 
-
-
-#implementing getindex automatically defines all matrix multiplications! (but probably not in the most efficient way)
-function Base.getindex(A::SymplecticLieAlgMatrix,i,j)
-    n = A.n
-    if i ≤ n && j ≤ n
-        return A.A[i,j]
-    end
-    if i ≤ n
-        return A.B[i, j-n]
-    end
-    if j ≤ n 
-        return A.C[i-n, j]
-    end
-    return -A.A[j-n,i-n]
-end
-
-
-Base.parent(A::SymplecticLieAlgMatrix) = (A=A.A,B=A.B,C=A.C)
-Base.size(A::SymplecticLieAlgMatrix) = (2*A.n,2*A.n)
-
-function Base.:+(S₁::SymplecticLieAlgMatrix, S₂::SymplecticLieAlgMatrix) 
-    @assert S₁.n == S₂.n  
-    SymplecticLieAlgMatrix(
-        S₁.A + S₂.A,
-        S₁.B + S₂.B,
-        S₁.C + S₂.C, 
-        S₁.n
-        )
-end
-
-function Base.:-(S₁::SymplecticLieAlgMatrix, S₂::SymplecticLieAlgMatrix) 
-    @assert S₁.n == S₂.n  
-    SymplecticLieAlgMatrix(
-        S₁.A - S₂.A,
-        S₁.B - S₂.B,
-        S₁.C - S₂.C, 
-        S₁.n
-        )
-end
-
-function Base.:-(S::SymplecticLieAlgMatrix)
-    SymplecticLieAlgMatrix(
-        -S.A, 
-        -S.B, 
-        -S.C,
-        S.n
-    )
-end
-
-
-#TODO: implement functions as in StiefelLieAlgHorMatrix
\ No newline at end of file
diff --git a/src/arrays/symplectic_lie_algebra_horizontal.jl b/src/arrays/symplectic_lie_algebra_horizontal.jl
deleted file mode 100644
index 60a8e1fb9..000000000
--- a/src/arrays/symplectic_lie_algebra_horizontal.jl
+++ /dev/null
@@ -1,298 +0,0 @@
-"""
-The `horizontal part` of `SymplecticLieAlgMatrix` is a matrix
-| A  B  |
-| C -A' |, where B and C are symmetric 
-
-with the following additional requirements: 
-| A₁ A₂ᵀ|	
-| A₃ 0  | = A,
-
-| B₁ B₂ᵀ|
-| B₂ 0  | = B, 
-
-| C₁ C₂ᵀ|
-| C₂ 0  | = C.
-
-The horizontal component is here taken with respect to the G-invariant metric (X₁,X₂) ↦ tr(X₁⁺X₂). (This metric is bi-invariant but degenerate.)
-
-The first index is the row index, the second one the column index.
-
-TODO: Make this a subtype of SymplecticLieAlgMatrix!!!!!
-"""
-
-#-> AbstractVecOrMat!!
-mutable struct SymplecticLieAlgHorMatrix{T, AT <: AbstractMatrix{T}, ST <: SymmetricMatrix{T}} <: AbstractMatrix{T}
-    A₁::AT
-    A₂::AT
-    A₃::AT
-    B₁::ST
-    B₂::AT
-    C₁::ST
-    C₂::AT
-    N::Int
-    n::Int
-
-    function SymplecticLieAlgHorMatrix(A₁::AbstractMatrix, A₂::AbstractMatrix, A₃::AbstractMatrix, 
-                                    B₁::SymmetricMatrix, B₂::AbstractMatrix, C₁::SymmetricMatrix, 
-                                    C₂::AbstractMatrix, N::Int, n::Int)
-        @assert eltype(A₁) == eltype(A₂) == eltype(A₃) == eltype(B₁) == eltype(B₂) == eltype(C₁) == eltype(C₂)
-        @assert size(A₁)[1] == size(A₁)[2] == size(A₂)[2] == size(A₃)[2] == B₁.n == size(B₂)[2] == C₁.n == size(C₂)[2] == n
-        @assert size(A₂)[1] == size(A₃)[1] == size(B₂)[1] == size(C₂)[1] == (N-n)
-        new{eltype(A₁), typeof(A₁), typeof(B₁)}(A₁, A₂, A₃, B₁, B₂, C₁, C₂, N, n)
-    end
-    function SymplecticLieAlgHorMatrix(S::SymplecticLieAlgMatrix, n::Int)
-        N = S.n
-        new{eltype(S.A), typeof(S.A), typeof(S.B)}(
-            S.A[1:n, 1:n],
-            S.A[1:n, (n+1):N]',
-            S.A[(n+1):N, 1:n],
-            #this could be made more efficient by accessing the vector that parametrizes SymmetricMatrix!!
-            SymmetricMatrix(S.B[1:n, 1:n]),
-            S.B[(n+1):N, 1:n],
-            SymmetricMatrix(S.C[1:n, 1:n]),
-            S.C[(n+1):N, 1:n],
-            N, 
-            n
-        )
-    end
-end 
-
-
-function A_index(A₁::AbstractMatrix, A₂::AbstractMatrix, A₃::AbstractMatrix, n, i, j)
-    if i ≤ n
-        if j ≤ n 
-            return A₁[i,j]
-        end
-        return A₂[j-n, i]
-    end
-    if j ≤ n
-        return A₃[i - n, j]
-    end
-    return 0.
-end
-
-function BC_index(B₁::SymmetricMatrix, B₂::AbstractMatrix, n, i, j)
-    if i ≤ n
-        if j ≤ n 
-            return B₁[i,j]
-        end
-        return B₂[j-n, i]
-    end
-    if j ≤ n
-        return B₂[i - n, j]
-    end
-    return 0.
-end
-
-function Base.getindex(S::SymplecticLieAlgHorMatrix, i, j)
-    N = S.N
-    n = S.n
-
-    if i ≤ N && j ≤ N
-        return A_index(S.A₁, S.A₂, S.A₃, n, i, j)
-    end
-    if i ≤ N
-        return BC_index(S.B₁, S.B₂, n, i, j-N)
-    end
-    if j ≤ N
-        return BC_index(S.C₁, S.C₂, n, i-N, j)
-    end
-    return -A_index(S.A₁, S.A₂, S.A₃, n, j-N, i-N)
-end
-
-
-Base.parent(S::SymplecticLieAlgHorMatrix) = (A₁=S.A₁, A₂=S.A₂, A₃=S.A₃, B₁=S.B₁, B₂=S.B₂, C₁=S.C₁, C₂=S.C₂)
-Base.size(S::SymplecticLieAlgHorMatrix) = (2*S.N,2*S.N)
-
-function Base.:+(S₁::SymplecticLieAlgHorMatrix, S₂::SymplecticLieAlgHorMatrix) 
-    @assert S₁.n == S₂.n  
-    @assert S₁.N == S₂.N
-    SymplecticLieAlgHorMatrix(
-        S₁.A₁ + S₂.A₁,
-        S₁.A₂ + S₂.A₂,
-        S₁.A₃ + S₂.A₃,
-        S₁.B₁ + S₂.B₁,
-        S₁.B₂ + S₂.B₂,
-        S₁.C₁ + S₂.C₁,
-        S₁.C₂ + S₂.C₂,
-        S₁.N, 
-        S₁.n
-        )
-end
-
-function Base.:-(S₁::SymplecticLieAlgHorMatrix, S₂::SymplecticLieAlgHorMatrix) 
-    @assert S₁.n == S₂.n  
-    @assert S₁.N == S₂.N
-    SymplecticLieAlgHorMatrix(
-        S₁.A₁ - S₂.A₁,
-        S₁.A₂ - S₂.A₂,
-        S₁.A₃ - S₂.A₃,
-        S₁.B₁ - S₂.B₁,
-        S₁.B₂ - S₂.B₂,
-        S₁.C₁ - S₂.C₁,
-        S₁.C₂ - S₂.C₂,
-        S₁.N, 
-        S₁.n
-        )
-end
-
-function add!(C::SymplecticLieAlgHorMatrix, A::SymplecticLieAlgHorMatrix, B::SymplecticLieAlgHorMatrix)
-    @assert A.N == B.N == C.N
-    @assert A.n == B.n == C.n 
-    add!(C.A₁, A.A₁, B.A₁),
-    add!(C.A₂, A.A₂, B.A₂),
-    add!(C.A₃, A.A₃, B.A₃),
-    add!(C.B₁, A.B₁, B.B₁),
-    add!(C.B₂, A.B₂, B.B₂),
-    add!(C.C₁, A.C₁, B.C₁),
-    add!(C.C₂, A.C₂, B.C₂)
-end
-
-function Base.:-(S::SymplecticLieAlgHorMatrix)
-    SymplecticLieAlgHorMatrix(
-        -S.A₁,
-        -S.A₂, 
-        -S.A₃, 
-        -S.B₁, 
-        -S.B₂, 
-        -S.C₁, 
-        -S.C₂, 
-        S.N,
-        S.n 
-    )
-end
-
-function Base.:*(S::SymplecticLieAlgHorMatrix, α::Real)
-    SymplecticLieAlgHorMatrix(
-        α*S.A₁,
-        α*S.A₂, 
-        α*S.A₃, 
-        α*S.B₁, 
-        α*S.B₂, 
-        α*S.C₁, 
-        α*S.C₂, 
-        S.N,
-        S.n 
-    )
-end
-
-Base.:*(α::Real, A::SymplecticLieAlgHorMatrix) = A*α
-
-function Base.zeros(::Type{SymplecticLieAlgHorMatrix{T}}, N::Integer, n::Integer) where T
-    SymplecticLieAlgHorMatrix(
-        zeros(T, n, n),
-        zeros(T, N-n, n),
-        zeros(T, N-n, n),
-        zeros(SymmetricMatrix{T}, n),
-        zeros(T, N-n, n),
-        zeros(SymmetricMatrix{T},n),
-        zeros(T, N-n, n),
-        N, 
-        n
-    )
-end
-    
-function Base.zeros(::Type{SymplecticLieAlgHorMatrix}, N::Integer, n::Integer) 
-    SymplecticLieAlgHorMatrix(
-        zeros(n, n),
-        zeros(N-n, n),
-        zeros(N-n, n),
-        zeros(SymmetricMatrix, n),
-        zeros(N-n, n),
-        zeros(SymmetricMatrix, n),
-        zeros(N-n, n),
-        N, 
-        n
-    )
-end
-
-
-function Base.rand(::Type{SymplecticLieAlgHorMatrix{T}}, N::Integer, n::Integer) where T
-    SymplecticLieAlgHorMatrix(
-        rand(T, n, n),
-        rand(T, N-n, n),
-        rand(T, N-n, n),
-        rand(SymmetricMatrix{T}, n),
-        rand(T, N-n, n),
-        rand(SymmetricMatrix{T},n),
-        rand(T, N-n, n),
-        N, 
-        n
-    )
-end
-    
-function Base.rand(::Type{SymplecticLieAlgHorMatrix}, N::Integer, n::Integer)
-    SymplecticLieAlgHorMatrix(
-        rand(n, n),
-        rand(N-n, n),
-        rand(N-n, n),
-        rand(SymmetricMatrix, n),
-        rand(N-n, n),
-        rand(SymmetricMatrix, n),
-        rand(N-n, n),
-        N, 
-        n
-    )
-end
-
-function scalar_add(S::SymplecticLieAlgHorMatrix, δ::Real)
-    SymplecticLieAlgHorMatrix(
-        S.A₁ .+ δ,
-        S.A₂ .+ δ, 
-        S.A₃ .+ δ, 
-        scalar_add(S.B₁, δ), 
-        S.B₂ .+ δ, 
-        scalar_add(S.C₁, δ), 
-        S.C₂ .+ δ, 
-        S.N,
-        S.n 
-    )
-end
-
-
-#function Base.:./(A::SymplecticLieAlgMatrix,B::SymplecticLieAlgMatrix)
-function /ᵉˡᵉ(S₁::SymplecticLieAlgHorMatrix,S₂::SymplecticLieAlgHorMatrix)
-    @assert S₁.n == S₂.n
-    @assert S₁.N == S₂.N
-    SymplecticLieAlgHorMatrix(
-        S₁.A₁./S₂.A₁, 
-        S₁.A₂./S₂.A₂,
-        S₁.A₃/S₂.A₃,
-        /ᵉˡᵉ(S₁.B₁, S₂.B₁), 
-        S₁.B₂./S₂.B₂,
-        /ᵉˡᵉ(S₁.C₁, S₂.C₁), 
-        S₁.C₂/S₂.C₂,
-        S₁.N,
-        S₁.n
-        )
-end
-
-function ⊙²(S::SymplecticLieAlgHorMatrix)
-    SymplecticLieHorAlgMatrix(
-        S.A₁.^2, 
-        S.A₂.^2,
-        S.A₃.^2,
-        ⊙²(S.B₁), 
-        S.B₂.^2,
-        ⊙²(S.C₁), 
-        S.C₂.^2,
-        S.N,
-        S.n
-        )
-end
-
-#=
-function RACᵉˡᵉ(S::SymplecticLieAlgHorMatrix) 
-    SymplecticLieAlgMatrix(
-        sqrt.(S.A₁), 
-        sqrt.(S.A₂),
-        sqrt.(S.A₃),
-        RACᵉˡᵉ(S.B₁), 
-        sqrt.(S.B₂),
-        RACᵉˡᵉ(S.B₁), 
-        sqrt.(S.C₂),
-        S.N,
-        S.n
-        )
-end
-=#
\ No newline at end of file

From 1f1e30d8ee16b91cc9d073ef907956a91c72042c Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:10:14 +0100
Subject: [PATCH 05/96] Factored out sampling of arrays.

---
 .../addition_tests_for_custom_arrays.jl       | 12 +++++++++
 .../random_generation_of_custom_arrays.jl     | 27 +++++++++++++++++++
 test/runtests.jl                              |  2 +-
 3 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 test/arrays/addition_tests_for_custom_arrays.jl
 create mode 100644 test/arrays/random_generation_of_custom_arrays.jl

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
new file mode 100644
index 000000000..9413c5884
--- /dev/null
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -0,0 +1,12 @@
+#=
+This tests addition for all custom arrays. Note that these tests will also have to be performed on GPU!
+=#
+
+using LinearAlgebra
+using Random
+using Test
+using GeometricMachineLearning
+
+function test_addition_for_symmetric_matrix(n::Int, T::Type)
+    A = rand(SymmetricMatrix{T}, n)
+end
\ No newline at end of file
diff --git a/test/arrays/random_generation_of_custom_arrays.jl b/test/arrays/random_generation_of_custom_arrays.jl
new file mode 100644
index 000000000..d764ded9f
--- /dev/null
+++ b/test/arrays/random_generation_of_custom_arrays.jl
@@ -0,0 +1,27 @@
+#=
+This tests random generation of custom arrays. This will have to be expanded to GPU tests.
+=#
+using LinearAlgebra
+using Random
+using Test
+using GeometricMachineLearning
+
+function test_random_array_generation(n::Int, N::Int, T::Type)
+    A_sym = rand(SymmetricMatrix{T}, n)
+    @test typeof(A_sym) <: SymmetricMatrix{T}
+    @test eltype(A_sym) == T
+
+    A_skew = rand(SkewSymMatrix{T}, n)
+    @test typeof(A_skew) <: SkewSymMatrix{T}
+    @test eltype(A_skew) == T 
+
+    A_stiefel_hor = rand(StiefelLieAlgHorMatrix{T}, N, n)
+    @test typeof(A_stiefel_hor) <: StiefelLieAlgHorMatrix{T}
+    @test eltype(A_stiefel_hor) == T
+
+    A_grassmann_hor = rand(GrassmannLieAlgHorMatrix{T}, N, n)
+    @test typeof(A_grassmann_hor) <: GrassmannLieAlgHorMatrix{T}
+    @test eltype(A_grassmann_hor) == T
+end
+
+test_random_array_generation(5, 10, Float32)
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index af35e6063..e94ad1242 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,7 +3,7 @@ using SafeTestsets
 
 @safetestset "Check parameterlength                                                           " begin include("parameterlength/check_parameterlengths.jl") end
 @safetestset "Arrays #1                                                                       " begin include("arrays/array_tests.jl") end
-@safetestset "Arrays #2                                                                       " begin include("arrays/array_tests_old.jl") end
+@safetestset "Sampling of arrays                                                              " begin include("arrays/random_generation_of_custom_arrays.jl") end
 @safetestset "Manifolds (Grassmann):                                                          " begin include("manifolds/grassmann_manifold.jl") end
 @safetestset "Gradient Layer                                                                  " begin include("layers/gradient_layer_tests.jl") end
 @safetestset "Test symplecticity of upscaling layer                                           " begin include("layers/sympnet_layers_test.jl") end 

From 51c7c76db88b1c270a6fa19f52574dc672733ade Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:11:03 +0100
Subject: [PATCH 06/96] Got rid of some some symplectic arrays and included the
 supertype for horizontal components as an extra file.

---
 src/GeometricMachineLearning.jl | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl
index b357d839a..b7b9e9f92 100644
--- a/src/GeometricMachineLearning.jl
+++ b/src/GeometricMachineLearning.jl
@@ -82,13 +82,10 @@ module GeometricMachineLearning
     export convert_to_dev, Device, CPUDevice
 
     # INCLUDE ARRAYS
-    include("arrays/block_identity_lower.jl")
-    include("arrays/block_identity_upper.jl")
     include("arrays/symmetric.jl")
     include("arrays/symplectic.jl")
-    include("arrays/symplectic_lie_algebra.jl")
-    include("arrays/symplectic_lie_algebra_horizontal.jl")
     include("arrays/skew_symmetric.jl")
+    include("arrays/abstract_lie_algebra_horizontal.jl")
     include("arrays/stiefel_lie_algebra_horizontal.jl")
     include("arrays/grassmann_lie_algebra_horizontal.jl")
 

From 3876be561a273d76ca4c11b78a6425908ed92c13 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:12:32 +0100
Subject: [PATCH 07/96] Updated docs.

---
 .../grassmann_lie_algebra_horizontal.jl       | 15 +------
 src/arrays/stiefel_lie_algebra_horizontal.jl  | 41 ++++++++++++-------
 2 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/src/arrays/grassmann_lie_algebra_horizontal.jl b/src/arrays/grassmann_lie_algebra_horizontal.jl
index aa164729b..bf883f157 100644
--- a/src/arrays/grassmann_lie_algebra_horizontal.jl
+++ b/src/arrays/grassmann_lie_algebra_horizontal.jl
@@ -1,19 +1,6 @@
 """
-This implements the horizontal component of the Lie algebra (in this case just the skew-symmetric matrices).
-The projection is: 
-S -> SE where 
-|I|
-|0| = E.
-
-An element of GrassmannLieAlgMatrix takes the form: 
-| -0 -B'|
-| B  0 | where B is arbitrary.
-
-This also implements the projection: 
-| 0 -B'|    | 0 -B'|
-| B  0 | -> | B  0 |.
+This implements the horizontal component of a Lie algebra that is isomorphic to the Grassmann manifold. 
 """
-
 mutable struct GrassmannLieAlgHorMatrix{T, ST <: AbstractMatrix{T}} <: AbstractLieAlgHorMatrix{T}
     B::ST
     N::Int
diff --git a/src/arrays/stiefel_lie_algebra_horizontal.jl b/src/arrays/stiefel_lie_algebra_horizontal.jl
index 71558e485..7af26c360 100644
--- a/src/arrays/stiefel_lie_algebra_horizontal.jl
+++ b/src/arrays/stiefel_lie_algebra_horizontal.jl
@@ -1,21 +1,32 @@
-"""
-This implements the horizontal component of the Lie algebra (in this case just the skew-symmetric matrices).
-The projection is: 
-S -> SE where 
-|I|
-|0| = E.
+@doc raw"""
+`StiefelLieAlgHorMatrix` is the *horizontal component of the Lie algebra of skew-symmetric matrices* (with respect to the canonical metric).
+The projection here is: \(\pi:S \to SE \) where 
+```math
+E = \begin{pmatrix} \mathbb{I}_{n} \\ \mathbb{O}_{(N-n)\times{}n}  \end{pmatrix}.
+```
+The matrix \(E\) is implemented under `StiefelProjection` in `GeometricMachineLearning`.
 
 An element of StiefelLieAlgMatrix takes the form: 
-| A -B'|
-| B  0 | where A is skew-symmetric.
-
-This also implements the projection: 
-| A -B'|    | A -B'|
-| B  D | -> | B  0 |.
+```math
+\begin{pmatrix}
+A & B^T \\ B & \mathbb{O}
+\end{pmatrix},
+```
+where \(A\) is skew-symmetric (this is `SkewSymMatrix` in `GeometricMachineLearning`).
+
+If the constructor is called with a big \(N\times{}N\) matrix, then the projection is performed the following way: 
+```math
+\begin{pmatrix}
+A & B_1  \\
+B_2 & D
+\end{pmatrix} \mapsto 
+\begin{pmatrix}
+\mathrm{skew}(A) & -B_2^T \\ 
+B_2 & \mathbb{O}
+\end{pmatrix}.
+```
+The operation $\mathrm{skew}:\mathbb{R}^{n\times{}n}\to\mathcal{S}_\mahtrm{skew}(n)$ is the skew-symmetrization operation. This is equivalent to calling the constructor of `SkewSymMatrix` with an \(n\times{}n\) matrix.
 """
-
-abstract type AbstractLieAlgHorMatrix{T} <: AbstractMatrix{T} end
-
 mutable struct StiefelLieAlgHorMatrix{T, AT <: SkewSymMatrix{T}, ST <: AbstractMatrix{T}} <: AbstractLieAlgHorMatrix{T}
     A::AT
     B::ST

From 48300b2358e38c2a9d373e57c0ac7cff9bd89810 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:12:53 +0100
Subject: [PATCH 08/96] New supertype for the horizontal components.

---
 src/arrays/abstract_lie_algebra_horizontal.jl | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 src/arrays/abstract_lie_algebra_horizontal.jl

diff --git a/src/arrays/abstract_lie_algebra_horizontal.jl b/src/arrays/abstract_lie_algebra_horizontal.jl
new file mode 100644
index 000000000..58a7630df
--- /dev/null
+++ b/src/arrays/abstract_lie_algebra_horizontal.jl
@@ -0,0 +1,4 @@
+@doc raw"""
+`AbstractLieAlgHorMatrix` is a supertype for various horizontal components of Lie algebras. We usually call this \(\mathfrak{g}^\mathrm{hor}\).
+"""
+abstract type AbstractLieAlgHorMatrix{T} <: AbstractMatrix{T} end

From 0dfbc63723c808df7665c6fd081501cb9db1e173 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:25:49 +0100
Subject: [PATCH 09/96] Added new routine for multiplying symmetric matrix from
 the right onto a matrix.

---
 src/arrays/symmetric.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl
index f7db2c6c6..52cdd4f7f 100644
--- a/src/arrays/symmetric.jl
+++ b/src/arrays/symmetric.jl
@@ -209,6 +209,8 @@ function Base.:*(A::SymmetricMatrix{T}, B::AbstractMatrix{T}) where T
     C
 end
 
+Base.:*(B::AbstractMatrix{T}, A::SymmetricMatrix{T}) where T = (A * B')'
+
 function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T 
     backend = KernelAbstractions.get_backend(A.S)
     c = KernelAbstractions.allocate(backend, T, A.n)

From 21bb9312b674b34a73eb4824e0f69a3e4b0d0d1d Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:50:49 +0100
Subject: [PATCH 10/96] Test addition of various custom arrays.

---
 .../addition_tests_for_custom_arrays.jl       | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 test/arrays/addition_tests_for_custom_arrays.jl

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
new file mode 100644
index 000000000..1e3a2e701
--- /dev/null
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -0,0 +1,35 @@
+using GeometricMachineLearning, Test 
+
+@doc raw"""
+This function tests addition for various custom arrays, i.e. if \(A + B\) is performed in the correct way. 
+"""
+
+function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    B = rand(T, n, n)
+
+    # SymmetricMatrix
+    AB_sym = SymmetricMatrix(A + B)
+    AB_sym2 = SymmetricMatrix(A) + SymmetricMatrix(B)
+    @test AB_sym ≈ AB_sym2
+    @test typeof(AB_sym) <: SymmetricMatrix{T}
+    @test typeof(AB_sym2) <: SymmetricMatrix{T}
+
+    # SkewSymMatrix
+    AB_skew = SkewSymMatrix(A + B)
+    AB_skew2 = SkewSymMatrix(A) + SkewSymMatrix(B)
+    @test A_skew ≈ A_skew2 
+    @test typeof(AB_skew) <: SkewSymMatrix{T}
+    @test typeof(AB_skew2) <: SkewSymMatrix{T}
+
+    C = rand(T, N, N)
+    D = rand(T, N, N)
+
+    # StiefelLieAlgHorMatrix
+    CD_slahm = StiefelLieAlgHorMatrix(A + B, n)
+    CD_slahm2 = StiefelLieAlgHorMatrix(A, n) + StiefelLieAlgHorMatrix(B, n)
+    @test CD_slahm ≈ CD_slahm2
+    @test typeof(CD_slahm) <: StiefelLieAlgHorMatrix{T}
+    @test typeof(CD_slahm2) <: StiefelLieAlgHorMatrix{T}
+
+end
\ No newline at end of file

From 969a6cb05e43c64c9f68b51e69dbc13c7ea9e787 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:51:40 +0100
Subject: [PATCH 11/96] Removed tests for various symplectic arrays that are no
 longer part of GML.

---
 test/arrays/array_tests.jl | 59 --------------------------------------
 1 file changed, 59 deletions(-)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index fd9269046..c0da5eba9 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -1,14 +1,7 @@
 using LinearAlgebra
 using Random
 using Test
-
-using BandedMatrices
 using GeometricMachineLearning
-"""
-𝔤ʰ is the horizontal part of 𝔤, i.e. A ∈ 𝔤ʰ ⟺ AEE⁺ = A.
-
-TODO: Add routine & test for symplectic conjugate
-"""
 
 #check if symmetric matrix works for 1×1 matrices 
 W = rand(1,1)
@@ -61,22 +54,6 @@ function skew_mat_mul_test2(n, T=Float64)
     @test isapprox(AS1, AS2)
 end
 
-# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction
-function sympl_lie_alg_add_sub_test(n)
-    J = SymplecticPotential(n)
-    symplectisize(W) = .5*(W - J'*W'*J)
-    W₁ = rand(2*n,2*n)
-    S₁ = SymplecticLieAlgMatrix(W₁)
-    W₂ = rand(2*n,2*n)
-    S₂ = SymplecticLieAlgMatrix(W₂)
-    S₃ = S₁ + S₂
-    S₄ = S₁ - S₂
-    @test typeof(S₃) <: SymplecticLieAlgMatrix
-    @test typeof(S₄) <: SymplecticLieAlgMatrix
-    @test all(abs.(symplectisize(W₁ + W₂) .- S₃) .< 1e-10)
-    @test all(abs.(symplectisize(W₁ - W₂) .- S₄) .< 1e-10)
-end
-
 # test Stiefel manifold projection test 
 function stiefel_proj_test(N,n)
     In = I(n)
@@ -84,15 +61,6 @@ function stiefel_proj_test(N,n)
     @test all(abs.((E'*E) .- In) .< 1e-10)
 end
 
-# test symplectic projection (this is just the E matrix)
-function sympl_proj_test(N, n)
-    JN = SymplecticPotential(N)
-    Jn = SymplecticPotential(n)
-    E = SymplecticProjection(N, n, Float64)
-    @test all(abs.((E'*JN*E) .- Jn) .< 1e-10)
-end
-
-
 function stiefel_lie_alg_add_sub_test(N, n)
     E = StiefelProjection(N, n)
     projection(W::SkewSymMatrix) = W - (I - E*E')*W*(I - E*E')
@@ -108,31 +76,8 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
-# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction
-function sympl_lie_alg_add_sub_test(N, n)
-    J = SymplecticPotential(n)
-    E = SymplecticProjection(N, n)
-    projection(W::SymplecticLieAlgMatrix) = W - (I - E*E')*W*(I - E*E')
-    W₁ = SymplecticLieAlgMatrix(rand(2*N,2*N))
-    S₁ = SymplecticLieAlgHorMatrix(W₁,n)
-    W₂ = SymplecticLieAlgMatrix(rand(2*N,2*N))
-    S₂ = SymplecticLieAlgHorMatrix(W₂,n)
-    S₃ = S₁ + S₂
-    S₄ = S₁ - S₂
-    @test typeof(S₃) <: SymplecticLieAlgHorMatrix
-    @test typeof(S₄) <: SymplecticLieAlgHorMatrix
-    @test all(abs.(projection(W₁ + W₂) .- S₃) .< 1e-10)
-    @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
-end
-
-function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
-    A = rand(StiefelLieAlgHorMatrix{T}, N, n)
-    @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)
-end
-
 # TODO: tests for ADAM functions
 
-
 # test everything for different n & N values
 Random.seed!(42)
 
@@ -149,10 +94,6 @@ for (N, n) ∈ zip(N_vec, n_vec)
     skew_mat_add_sub_test(N)
     skew_mat_mul_test(N)
     skew_mat_mul_test2(N)
-    sympl_lie_alg_add_sub_test(N)
     stiefel_proj_test(N,n)
-    sympl_proj_test(N,n)
     stiefel_lie_alg_add_sub_test(N,n)
-    sympl_lie_alg_add_sub_test(N,n)
-    stiefel_lie_alg_vectorization_test(N, n)
 end

From e0b81cf69917972e8db76c7963508514603cb6d6 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 14:20:24 +0100
Subject: [PATCH 12/96] Added routine for Base.one. This is copied from
 SkewSymMatrix.

---
 src/GeometricMachineLearning.jl |  2 +-
 src/arrays/symmetric.jl         | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl
index b357d839a..fbb84488a 100644
--- a/src/GeometricMachineLearning.jl
+++ b/src/GeometricMachineLearning.jl
@@ -84,13 +84,13 @@ module GeometricMachineLearning
     # INCLUDE ARRAYS
     include("arrays/block_identity_lower.jl")
     include("arrays/block_identity_upper.jl")
-    include("arrays/symmetric.jl")
     include("arrays/symplectic.jl")
     include("arrays/symplectic_lie_algebra.jl")
     include("arrays/symplectic_lie_algebra_horizontal.jl")
     include("arrays/skew_symmetric.jl")
     include("arrays/stiefel_lie_algebra_horizontal.jl")
     include("arrays/grassmann_lie_algebra_horizontal.jl")
+    include("arrays/symmetric.jl")
 
     export SymmetricMatrix, SymplecticPotential, SkewSymMatrix
     export StiefelLieAlgHorMatrix
diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl
index 52cdd4f7f..6a6f3ac91 100644
--- a/src/arrays/symmetric.jl
+++ b/src/arrays/symmetric.jl
@@ -211,6 +211,10 @@ end
 
 Base.:*(B::AbstractMatrix{T}, A::SymmetricMatrix{T}) where T = (A * B')'
 
+function Base.:*(A::SymmetricMatrix{T}, B::SymmetricMatrix{T}) where T 
+    A * (B * one(B))
+end
+
 function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T 
     backend = KernelAbstractions.get_backend(A.S)
     c = KernelAbstractions.allocate(backend, T, A.n)
@@ -218,6 +222,14 @@ function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T
     c
 end
 
+function Base.one(A::SymmetricMatrix{T}) where T
+    backend = KernelAbstractions.get_backend(A.S)
+    unit_matrix = KernelAbstractions.zeros(backend, T, A.n, A.n)
+    write_ones! = write_ones_kernel!(backend)
+    write_ones!(unit_matrix, ndrange=A.n)
+    unit_matrix
+end
+
 # define routines for generalizing ChainRulesCore to SymmetricMatrix 
 ChainRulesCore.ProjectTo(A::SymmetricMatrix) = ProjectTo{SymmetricMatrix}(; symmetric=ProjectTo(A.S))
 (project::ProjectTo{SymmetricMatrix})(dA::AbstractMatrix) = SymmetricMatrix(project.symmetric(map_to_S(dA)), size(dA, 2))

From 16b28baf332e114df8b4c4884fdd99a1806815a6 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 14:20:49 +0100
Subject: [PATCH 13/96] Slightly improved readability.

---
 src/arrays/skew_symmetric.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arrays/skew_symmetric.jl b/src/arrays/skew_symmetric.jl
index e1ab85fe2..3c14b9e45 100644
--- a/src/arrays/skew_symmetric.jl
+++ b/src/arrays/skew_symmetric.jl
@@ -187,7 +187,7 @@ end
 
 # the first matrix is multiplied onto A2 in order for it to not be SkewSymMatrix!
 function Base.:*(A1::SkewSymMatrix{T}, A2::SkewSymMatrix{T}) where T 
-    A1*(one(A2)*A2) 
+    A1 * (one(A2) * A2) 
 end
 
 @doc raw"""

From df07bfb1aade5e452594bb0d9ee7963c0b86fb8b Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 14:21:43 +0100
Subject: [PATCH 14/96] Added addition tests for the remaining custom arrays.

---
 test/arrays/addition_tests_for_custom_arrays.jl | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
index 1e3a2e701..669ccc0ef 100644
--- a/test/arrays/addition_tests_for_custom_arrays.jl
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -3,7 +3,6 @@ using GeometricMachineLearning, Test
 @doc raw"""
 This function tests addition for various custom arrays, i.e. if \(A + B\) is performed in the correct way. 
 """
-
 function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     A = rand(T, n, n)
     B = rand(T, n, n)
@@ -26,10 +25,15 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     D = rand(T, N, N)
 
     # StiefelLieAlgHorMatrix
-    CD_slahm = StiefelLieAlgHorMatrix(A + B, n)
-    CD_slahm2 = StiefelLieAlgHorMatrix(A, n) + StiefelLieAlgHorMatrix(B, n)
+    CD_slahm = StiefelLieAlgHorMatrix(C + D, n)
+    CD_slahm2 = StiefelLieAlgHorMatrix(C, n) + StiefelLieAlgHorMatrix(D, n)
     @test CD_slahm ≈ CD_slahm2
     @test typeof(CD_slahm) <: StiefelLieAlgHorMatrix{T}
     @test typeof(CD_slahm2) <: StiefelLieAlgHorMatrix{T}
 
+    CD_glahm = GrassmannLieAlgHorMatrix(C + D, n)
+    CD_glahm2 = GrassmannLieAlgHorMatrix(C, n) + GrassmannLieAlgHorMatrix(D, n)
+    @test CD_glahm ≈ CD_glahm2
+    @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T}
 end
\ No newline at end of file

From e81ff2e9a16d4cdbc049febd42931321770bde55 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 14:52:14 +0100
Subject: [PATCH 15/96] Added tests for addition, scalar multiplication and
 matrix multiplication for custom arrays.

---
 .../addition_tests_for_custom_arrays.jl       |  4 +-
 ...matrix_multiplication_for_custom_arrays.jl | 21 ++++++++++
 ...scalar_multiplication_for_custom_arrays.jl | 40 +++++++++++++++++++
 test/runtests.jl                              |  4 +-
 4 files changed, 67 insertions(+), 2 deletions(-)
 create mode 100644 test/arrays/matrix_multiplication_for_custom_arrays.jl
 create mode 100644 test/arrays/scalar_multiplication_for_custom_arrays.jl

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
index 669ccc0ef..5fd2bbcfc 100644
--- a/test/arrays/addition_tests_for_custom_arrays.jl
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -36,4 +36,6 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     @test CD_glahm ≈ CD_glahm2
     @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
     @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T}
-end
\ No newline at end of file
+end
+
+addition_tests_for_custom_arrays(5, 10, Float32)
\ No newline at end of file
diff --git a/test/arrays/matrix_multiplication_for_custom_arrays.jl b/test/arrays/matrix_multiplication_for_custom_arrays.jl
new file mode 100644
index 000000000..db051ec3e
--- /dev/null
+++ b/test/arrays/matrix_multiplication_for_custom_arrays.jl
@@ -0,0 +1,21 @@
+using GeometricMachineLearning, Test 
+
+@doc raw"""
+This function tests matrix multiplication for various custom arrays, i.e. if \((A,\alpha) \mapsto \alpha{}A\) is performed in the correct way. 
+"""
+function matrix_multiplication_tests_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    B = rand(T, n, N)
+
+    # SymmetricMatrix
+    A_sym = SymmetricMatrix(A)
+    @test A_sym * B ≈ Matrix{T}(A_sym) * B 
+    @test B' * A_sym ≈ B' * Matrix{T}(A_sym)
+
+    # SkewSymMatrix
+    A_skew = SkewSymMatrix(A)
+    @test A_skew * B ≈ Matrix{T}(A_skew) * B 
+    @test B' * A_skew ≈ B' * Matrix{T}(A_skew)
+end
+
+matrix_multiplication_tests_for_custom_arrays(5, 10, Float32)
\ No newline at end of file
diff --git a/test/arrays/scalar_multiplication_for_custom_arrays.jl b/test/arrays/scalar_multiplication_for_custom_arrays.jl
new file mode 100644
index 000000000..c6214a3e1
--- /dev/null
+++ b/test/arrays/scalar_multiplication_for_custom_arrays.jl
@@ -0,0 +1,40 @@
+using GeometricMachineLearning, Test 
+
+@doc raw"""
+This function tests scalar multiplication for various custom arrays, i.e. if \((A,\alpha) \mapsto \alpha{}A\) is performed in the correct way. 
+"""
+function scalar_multiplication_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    α = rand(T)
+
+    # SymmetricMatrix
+    Aα_sym = SymmetricMatrix(α * A)
+    Aα_sym2 = α * SymmetricMatrix(A)
+    @test Aα_sym ≈ Aα_sym2
+    @test typeof(Aα_sym) <: SymmetricMatrix{T}
+    @test typeof(Aα_sym2) <: SymmetricMatrix{T}
+
+    # SkewSymMatrix
+    Aα_skew = SkewSymMatrix(α * A)
+    Aα_skew2 = α * SkewSymMatrix(A)
+    @test Aα_skew ≈ Aα_skew2 
+    @test typeof(Aα_skew) <: SkewSymMatrix{T}
+    @test typeof(Aα_skew2) <: SkewSymMatrix{T}
+
+    C = rand(T, N, N)
+
+    # StiefelLieAlgHorMatrix
+    Cα_slahm = StiefelLieAlgHorMatrix(α * C, n)
+    Cα_slahm2 = α * StiefelLieAlgHorMatrix(C, n)
+    @test Cα_slahm ≈ Cα_slahm2
+    @test typeof(Cα_slahm) <: StiefelLieAlgHorMatrix{T}
+    @test typeof(Cα_slahm2) <: StiefelLieAlgHorMatrix{T}
+
+    Cα_glahm = GrassmannLieAlgHorMatrix(α * C, n)
+    Cα_glahm2 = α * GrassmannLieAlgHorMatrix(C, n)
+    @test Cα_glahm ≈ Cα_glahm2
+    @test tyepof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(Cα_glahm2) <: GrassmannLieAlgHorMatrix{T}
+end
+
+scalar_multiplication_for_custom_arrays(5, 10, Float32)
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index af35e6063..dda63eb01 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,7 +3,9 @@ using SafeTestsets
 
 @safetestset "Check parameterlength                                                           " begin include("parameterlength/check_parameterlengths.jl") end
 @safetestset "Arrays #1                                                                       " begin include("arrays/array_tests.jl") end
-@safetestset "Arrays #2                                                                       " begin include("arrays/array_tests_old.jl") end
+@safetestset "Addition tests for custom arrays                                                " begin include("arrays/addition_tests_for_custom_arrays.jl") end
+@safetestset "Scalar multiplication tests for custom arrays                                   " begin include("arrays/scalar_multiplication_for_custom_arrays.jl") end
+@safetestset "Matrix multiplication tests for custom arrays                                   " begin include("arrays/matrix_multiplication_for_custom_arrays.jl") end
 @safetestset "Manifolds (Grassmann):                                                          " begin include("manifolds/grassmann_manifold.jl") end
 @safetestset "Gradient Layer                                                                  " begin include("layers/gradient_layer_tests.jl") end
 @safetestset "Test symplecticity of upscaling layer                                           " begin include("layers/sympnet_layers_test.jl") end 

From a94c57c6a8350df4105faf1f838ea1252e4fe722 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 15:05:23 +0100
Subject: [PATCH 16/96] Changed order of how files are included. Symmetric now
 depends on SkewSymmetric.

---
 src/GeometricMachineLearning.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl
index fbb84488a..d8dccaae2 100644
--- a/src/GeometricMachineLearning.jl
+++ b/src/GeometricMachineLearning.jl
@@ -82,15 +82,15 @@ module GeometricMachineLearning
     export convert_to_dev, Device, CPUDevice
 
     # INCLUDE ARRAYS
+    include("arrays/skew_symmetric.jl")
+    include("arrays/symmetric.jl")
     include("arrays/block_identity_lower.jl")
     include("arrays/block_identity_upper.jl")
     include("arrays/symplectic.jl")
     include("arrays/symplectic_lie_algebra.jl")
     include("arrays/symplectic_lie_algebra_horizontal.jl")
-    include("arrays/skew_symmetric.jl")
     include("arrays/stiefel_lie_algebra_horizontal.jl")
     include("arrays/grassmann_lie_algebra_horizontal.jl")
-    include("arrays/symmetric.jl")
 
     export SymmetricMatrix, SymplecticPotential, SkewSymMatrix
     export StiefelLieAlgHorMatrix

From c6a350ed471420a4b4583c45978658d85b1f253e Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 15:05:50 +0100
Subject: [PATCH 17/96] Fixed typo.

---
 .../addition_tests_for_custom_arrays.jl       |  4 +-
 test/arrays/array_tests.jl                    | 43 -------------------
 ...scalar_multiplication_for_custom_arrays.jl |  2 +-
 3 files changed, 3 insertions(+), 46 deletions(-)

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
index 5fd2bbcfc..44fc878c1 100644
--- a/test/arrays/addition_tests_for_custom_arrays.jl
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -17,7 +17,7 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     # SkewSymMatrix
     AB_skew = SkewSymMatrix(A + B)
     AB_skew2 = SkewSymMatrix(A) + SkewSymMatrix(B)
-    @test A_skew ≈ A_skew2 
+    @test AB_skew ≈ AB_skew2 
     @test typeof(AB_skew) <: SkewSymMatrix{T}
     @test typeof(AB_skew2) <: SkewSymMatrix{T}
 
@@ -34,7 +34,7 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     CD_glahm = GrassmannLieAlgHorMatrix(C + D, n)
     CD_glahm2 = GrassmannLieAlgHorMatrix(C, n) + GrassmannLieAlgHorMatrix(D, n)
     @test CD_glahm ≈ CD_glahm2
-    @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
     @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T}
 end
 
diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index fd9269046..0e52c5cd6 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -61,21 +61,6 @@ function skew_mat_mul_test2(n, T=Float64)
     @test isapprox(AS1, AS2)
 end
 
-# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction
-function sympl_lie_alg_add_sub_test(n)
-    J = SymplecticPotential(n)
-    symplectisize(W) = .5*(W - J'*W'*J)
-    W₁ = rand(2*n,2*n)
-    S₁ = SymplecticLieAlgMatrix(W₁)
-    W₂ = rand(2*n,2*n)
-    S₂ = SymplecticLieAlgMatrix(W₂)
-    S₃ = S₁ + S₂
-    S₄ = S₁ - S₂
-    @test typeof(S₃) <: SymplecticLieAlgMatrix
-    @test typeof(S₄) <: SymplecticLieAlgMatrix
-    @test all(abs.(symplectisize(W₁ + W₂) .- S₃) .< 1e-10)
-    @test all(abs.(symplectisize(W₁ - W₂) .- S₄) .< 1e-10)
-end
 
 # test Stiefel manifold projection test 
 function stiefel_proj_test(N,n)
@@ -84,14 +69,6 @@ function stiefel_proj_test(N,n)
     @test all(abs.((E'*E) .- In) .< 1e-10)
 end
 
-# test symplectic projection (this is just the E matrix)
-function sympl_proj_test(N, n)
-    JN = SymplecticPotential(N)
-    Jn = SymplecticPotential(n)
-    E = SymplecticProjection(N, n, Float64)
-    @test all(abs.((E'*JN*E) .- Jn) .< 1e-10)
-end
-
 
 function stiefel_lie_alg_add_sub_test(N, n)
     E = StiefelProjection(N, n)
@@ -108,23 +85,6 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
-# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction
-function sympl_lie_alg_add_sub_test(N, n)
-    J = SymplecticPotential(n)
-    E = SymplecticProjection(N, n)
-    projection(W::SymplecticLieAlgMatrix) = W - (I - E*E')*W*(I - E*E')
-    W₁ = SymplecticLieAlgMatrix(rand(2*N,2*N))
-    S₁ = SymplecticLieAlgHorMatrix(W₁,n)
-    W₂ = SymplecticLieAlgMatrix(rand(2*N,2*N))
-    S₂ = SymplecticLieAlgHorMatrix(W₂,n)
-    S₃ = S₁ + S₂
-    S₄ = S₁ - S₂
-    @test typeof(S₃) <: SymplecticLieAlgHorMatrix
-    @test typeof(S₄) <: SymplecticLieAlgHorMatrix
-    @test all(abs.(projection(W₁ + W₂) .- S₃) .< 1e-10)
-    @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
-end
-
 function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
     A = rand(StiefelLieAlgHorMatrix{T}, N, n)
     @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)
@@ -149,10 +109,7 @@ for (N, n) ∈ zip(N_vec, n_vec)
     skew_mat_add_sub_test(N)
     skew_mat_mul_test(N)
     skew_mat_mul_test2(N)
-    sympl_lie_alg_add_sub_test(N)
     stiefel_proj_test(N,n)
-    sympl_proj_test(N,n)
     stiefel_lie_alg_add_sub_test(N,n)
-    sympl_lie_alg_add_sub_test(N,n)
     stiefel_lie_alg_vectorization_test(N, n)
 end
diff --git a/test/arrays/scalar_multiplication_for_custom_arrays.jl b/test/arrays/scalar_multiplication_for_custom_arrays.jl
index c6214a3e1..3a5e56205 100644
--- a/test/arrays/scalar_multiplication_for_custom_arrays.jl
+++ b/test/arrays/scalar_multiplication_for_custom_arrays.jl
@@ -33,7 +33,7 @@ function scalar_multiplication_for_custom_arrays(n::Int, N::Int, T::Type)
     Cα_glahm = GrassmannLieAlgHorMatrix(α * C, n)
     Cα_glahm2 = α * GrassmannLieAlgHorMatrix(C, n)
     @test Cα_glahm ≈ Cα_glahm2
-    @test tyepof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T}
     @test typeof(Cα_glahm2) <: GrassmannLieAlgHorMatrix{T}
 end
 

From 3e01f05cddd0da718739d42349e8789b79ac36ec Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 16:36:39 +0100
Subject: [PATCH 18/96] Fixed docs and put type dependency for .

---
 src/data_loader/tensor_assign.jl | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/data_loader/tensor_assign.jl b/src/data_loader/tensor_assign.jl
index 3faad0a99..e2282db83 100644
--- a/src/data_loader/tensor_assign.jl
+++ b/src/data_loader/tensor_assign.jl
@@ -35,18 +35,22 @@ end
 end
 
 @doc raw"""
-The function `assign_output_estimate` is closely related to the transformer. It takes the last prediction_window columns of the output and uses is for the final prediction.
+The function `assign_output_estimate` is closely related to the transformer. It takes the last `prediction_window` columns of the output and uses them for the final prediction.
 i.e.
 ```math
-\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ 
-                \cdots & \cdots \\ 
-                z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} \mapsto 
-                \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ 
-                \cdots & \cdots \\ 
-                z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix}     
+\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, 
+\begin{bmatrix} 
+    z^{(1)}_1               & \cdots & z^{(T)}_1 \\ 
+    \cdots                  & \cdots & \cdots    \\ 
+    z^{(1)}_n               & \cdots & z^{(T})_n
+    \end{bmatrix} \mapsto 
+    \begin{bmatrix} 
+    z^{(T - \mathtt{pw})}_1 & \cdots      & z^{(T)}_1 \\ 
+    \cdots                  & \cdots      & \cdots \\ 
+    z^{(T - \mathtt{pw})}_n & \cdots      & z^{(T})_n\end{bmatrix}     
 ``` 
 """
-function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window) where T
+function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window::Int) where T
     sys_dim, seq_length, batch_size = size(full_output)
     backend = KernelAbstractions.get_backend(full_output)
     output_estimate = KernelAbstractions.allocate(backend, T, sys_dim, prediction_window, batch_size)

From 3a1fa6a1f757338d1e6ec8f0d0844ea680bc3055 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 17:04:22 +0100
Subject: [PATCH 19/96] Fixed docs for rgrad.

---
 src/manifolds/stiefel_manifold.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/manifolds/stiefel_manifold.jl b/src/manifolds/stiefel_manifold.jl
index ec1c3fb26..a98e22079 100644
--- a/src/manifolds/stiefel_manifold.jl
+++ b/src/manifolds/stiefel_manifold.jl
@@ -64,7 +64,7 @@ function Base.:*(Y::Adjoint{T, StiefelManifold{T, AT}}, B::AbstractMatrix) where
 end
 
 @doc raw"""
-Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mahbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this).
+Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mathbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this).
 The precise form of the mapping is: 
 ```math
 \mathtt{rgrad}(Y, \nabla{}L) \mapsto \nabla{}L - Y(\nabla{}L)^TY

From 3b8b5a486dd463db0ca2f12dc8f7be1979882b1f Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 17:12:59 +0100
Subject: [PATCH 20/96] Added additional constructor for the case when the type
 is the first input argument.

---
 src/arrays/stiefel_projection.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/arrays/stiefel_projection.jl b/src/arrays/stiefel_projection.jl
index 82c04c2ac..236948a09 100644
--- a/src/arrays/stiefel_projection.jl
+++ b/src/arrays/stiefel_projection.jl
@@ -41,6 +41,8 @@ Outer constructor for `StiefelProjection`. This works with two integers as input
 """
 StiefelProjection(N::Integer, n::Integer, T::Type=Float64) = StiefelProjection(CPU(), T, N, n)
 
+StiefelProjection(T::Type, N::Integer, n::Integer) = StiefelProjection(N, n, T)
+
 Base.size(E::StiefelProjection) = (E.N, E.n)
 Base.getindex(E::StiefelProjection, i, j) = getindex(E.A, i, j)
 Base.:+(E::StiefelProjection, A::AbstractMatrix) = E.A + A 

From b05c66c53f2c5c80d3a572bc2ad7d06467f93e5f Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 17:13:26 +0100
Subject: [PATCH 21/96] Removed the symplectic arrays.

---
 test/arrays/array_tests.jl | 45 --------------------------------------
 test/runtests.jl           |  2 +-
 2 files changed, 1 insertion(+), 46 deletions(-)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index fd9269046..cbadf1aba 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -61,22 +61,6 @@ function skew_mat_mul_test2(n, T=Float64)
     @test isapprox(AS1, AS2)
 end
 
-# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction
-function sympl_lie_alg_add_sub_test(n)
-    J = SymplecticPotential(n)
-    symplectisize(W) = .5*(W - J'*W'*J)
-    W₁ = rand(2*n,2*n)
-    S₁ = SymplecticLieAlgMatrix(W₁)
-    W₂ = rand(2*n,2*n)
-    S₂ = SymplecticLieAlgMatrix(W₂)
-    S₃ = S₁ + S₂
-    S₄ = S₁ - S₂
-    @test typeof(S₃) <: SymplecticLieAlgMatrix
-    @test typeof(S₄) <: SymplecticLieAlgMatrix
-    @test all(abs.(symplectisize(W₁ + W₂) .- S₃) .< 1e-10)
-    @test all(abs.(symplectisize(W₁ - W₂) .- S₄) .< 1e-10)
-end
-
 # test Stiefel manifold projection test 
 function stiefel_proj_test(N,n)
     In = I(n)
@@ -84,15 +68,6 @@ function stiefel_proj_test(N,n)
     @test all(abs.((E'*E) .- In) .< 1e-10)
 end
 
-# test symplectic projection (this is just the E matrix)
-function sympl_proj_test(N, n)
-    JN = SymplecticPotential(N)
-    Jn = SymplecticPotential(n)
-    E = SymplecticProjection(N, n, Float64)
-    @test all(abs.((E'*JN*E) .- Jn) .< 1e-10)
-end
-
-
 function stiefel_lie_alg_add_sub_test(N, n)
     E = StiefelProjection(N, n)
     projection(W::SkewSymMatrix) = W - (I - E*E')*W*(I - E*E')
@@ -108,23 +83,6 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
-# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction
-function sympl_lie_alg_add_sub_test(N, n)
-    J = SymplecticPotential(n)
-    E = SymplecticProjection(N, n)
-    projection(W::SymplecticLieAlgMatrix) = W - (I - E*E')*W*(I - E*E')
-    W₁ = SymplecticLieAlgMatrix(rand(2*N,2*N))
-    S₁ = SymplecticLieAlgHorMatrix(W₁,n)
-    W₂ = SymplecticLieAlgMatrix(rand(2*N,2*N))
-    S₂ = SymplecticLieAlgHorMatrix(W₂,n)
-    S₃ = S₁ + S₂
-    S₄ = S₁ - S₂
-    @test typeof(S₃) <: SymplecticLieAlgHorMatrix
-    @test typeof(S₄) <: SymplecticLieAlgHorMatrix
-    @test all(abs.(projection(W₁ + W₂) .- S₃) .< 1e-10)
-    @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
-end
-
 function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
     A = rand(StiefelLieAlgHorMatrix{T}, N, n)
     @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)
@@ -149,10 +107,7 @@ for (N, n) ∈ zip(N_vec, n_vec)
     skew_mat_add_sub_test(N)
     skew_mat_mul_test(N)
     skew_mat_mul_test2(N)
-    sympl_lie_alg_add_sub_test(N)
     stiefel_proj_test(N,n)
-    sympl_proj_test(N,n)
     stiefel_lie_alg_add_sub_test(N,n)
-    sympl_lie_alg_add_sub_test(N,n)
     stiefel_lie_alg_vectorization_test(N, n)
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index af35e6063..848328f7d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,7 +3,7 @@ using SafeTestsets
 
 @safetestset "Check parameterlength                                                           " begin include("parameterlength/check_parameterlengths.jl") end
 @safetestset "Arrays #1                                                                       " begin include("arrays/array_tests.jl") end
-@safetestset "Arrays #2                                                                       " begin include("arrays/array_tests_old.jl") end
+@safetestset "Test constructors for custom arrays                                             " begin include("arrays/constructor_tests_for_custom_arrays.jl") end
 @safetestset "Manifolds (Grassmann):                                                          " begin include("manifolds/grassmann_manifold.jl") end
 @safetestset "Gradient Layer                                                                  " begin include("layers/gradient_layer_tests.jl") end
 @safetestset "Test symplecticity of upscaling layer                                           " begin include("layers/sympnet_layers_test.jl") end 

From 71f791bd1191e6324408c43fc991ae9e9800a555 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 17:14:04 +0100
Subject: [PATCH 22/96] Added tests for the various constructors for custom
 arrays.

---
 .../constructor_tests_for_custom_arrays.jl    | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 test/arrays/constructor_tests_for_custom_arrays.jl

diff --git a/test/arrays/constructor_tests_for_custom_arrays.jl b/test/arrays/constructor_tests_for_custom_arrays.jl
new file mode 100644
index 000000000..452f850f8
--- /dev/null
+++ b/test/arrays/constructor_tests_for_custom_arrays.jl
@@ -0,0 +1,37 @@
+using GeometricMachineLearning, Test
+using LinearAlgebra: I
+
+@doc raw"""
+This tests various constructor for custom arrays, e.g. if calling `SymmetricMatrix` on a matrix ``A`` does 
+```math 
+A \mapsto \frac{1}{2}(A + A^T).
+```
+"""
+function test_constructors_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    B = rand(T, N, N)
+
+    # SymmetricMatrix 
+    @test Matrix{T}(SymmetricMatrix(A)) ≈ T(.5) * (A + A')
+
+    # SkewSymMatrix 
+    @test Matrix{T}(SkewSymMatrix(A)) ≈ T(.5) * (A - A')
+
+    # StiefelLieAlgHorMatrix 
+    B_shor = StiefelLieAlgHorMatrix(SkewSymMatrix(B), n)
+    B_shor2 = Matrix{T}(SkewSymMatrix(B))
+    B_shor2[(n+1):N, (n+1):N] .= zero(T)
+    @test Matrix{T}(B_shor) ≈ B_shor2
+
+    # GrassmannLieAlgHorMatrix 
+    B_ghor = GrassmannLieAlgHorMatrix(SkewSymMatrix(B), n)
+    B_ghor2 = copy(B_shor2)
+    B_ghor2[1:n, 1:n] .= zero(T)
+    @test Matrix{T}(B_ghor) ≈ B_ghor2 
+
+    # StiefelProjection
+    E = StiefelProjection(T, N, n)
+    @test Matrix{T}(E) ≈ vcat(I(n), zeros(T, (N-n), n))
+end
+
+test_constructors_for_custom_arrays(5, 10, Float32)
\ No newline at end of file

From 5d58358761c34ca57c28d40890153dd1c327de6a Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 17:22:18 +0100
Subject: [PATCH 23/96] Fixed docs for optimize_for_one_epoch

---
 src/data_loader/batch.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl
index bbb9e4e39..7a0235fd4 100644
--- a/src/data_loader/batch.jl
+++ b/src/data_loader/batch.jl
@@ -58,7 +58,7 @@ With the optional argument:
 
 The output of `optimize_for_one_epoch!` is the average loss over all batches of the epoch:
 ```math
-output = \frac{1}{mathtt{steps\_per\_epoch}}\sum_{t=1}^mathtt{steps\_per\_epoch}loss(\theta^{(t-1)}).
+output = \frac{1}{\mathtt{steps\_per\_epoch}}\sum_{t=1}^\mathtt{steps\_per\_epoch}loss(\theta^{(t-1)}).
 ```
 This is done because any **reverse differentiation** routine always has two outputs: a pullback and the value of the function it is differentiating. In the case of zygote: `loss_value, pullback = Zygote.pullback(ps -> loss(ps), ps)` (if the loss only depends on the parameters).
 """

From b43fb3989f45a407ac1bb864928adcb7cb689187 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Thu, 14 Dec 2023 14:07:41 +0100
Subject: [PATCH 24/96] Added descriptions for some functions in the docs.

---
 src/layers/multi_head_attention.jl | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/layers/multi_head_attention.jl b/src/layers/multi_head_attention.jl
index 05c3c53cc..e07820cb7 100644
--- a/src/layers/multi_head_attention.jl
+++ b/src/layers/multi_head_attention.jl
@@ -125,14 +125,23 @@ function (d::MultiHeadAttention{M, M, Stiefel, Retraction, false})(x::AbstractAr
 end
 
 import ChainRules
+"""
+This has to be extended to tensors; you should probably do a PR in ChainRules for this.
+"""
 function ChainRules._adjoint_mat_pullback(y::AbstractArray{T, 3}, proj) where T 
     (NoTangent(), proj(tensor_transpose(y)))
 end
 
+"""
+Extend `mat_tensor_mul` to a multiplication by the adjoint of an element of `StiefelManifold`. 
+"""
 function mat_tensor_mul(Y::AT, x::AbstractArray{T, 3}) where {T, BT <: AbstractArray{T}, ST <: StiefelManifold{T, BT}, AT <: Adjoint{T, ST}}
     mat_tensor_mul(Y.parent.A', x)
 end
 
+"""
+Extend `mat_tensor_mul` to a multiplication by an element of `StiefelManifold`. 
+"""
 function mat_tensor_mul(Y::StiefelManifold, x::AbstractArray{T, 3}) where T 
     mat_tensor_mul(Y.A, x)
 end
\ No newline at end of file

From d032f3e137e51e0a5c635396a1aa872515f6e60e Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Thu, 14 Dec 2023 15:08:41 +0100
Subject: [PATCH 25/96] Added eltype to AbstractCache.

---
 src/optimizers/bfgs_cache.jl       |  4 ++--
 src/optimizers/optimizer_caches.jl | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/optimizers/bfgs_cache.jl b/src/optimizers/bfgs_cache.jl
index 26f1c0b96..96561f8be 100644
--- a/src/optimizers/bfgs_cache.jl
+++ b/src/optimizers/bfgs_cache.jl
@@ -5,7 +5,7 @@ It stores an array for the previous time step `B` and the inverse of the Hessian
 
 It is important to note that setting up this cache already requires a derivative! This is not the case for the other optimizers.
 """
-struct BFGSCache{T, AT<:AbstractArray{T}} <: AbstractCache
+struct BFGSCache{T, AT<:AbstractArray{T}} <: AbstractCache{T}
     B::AT
     S::AT
     H::AbstractMatrix{T}
@@ -19,7 +19,7 @@ In order to initialize `BGGSCache` we first need gradient information. This is w
 
 NOTE: we may not need this. 
 """
-struct BFGSDummyCache{T, AT<:AbstractArray{T}} <: AbstractCache
+struct BFGSDummyCache{T, AT<:AbstractArray{T}} <: AbstractCache{T}
     function BFGSDummyCache(B::AbstractArray)
         new{eltype(B), typeof(zero(B))}()
     end
diff --git a/src/optimizers/optimizer_caches.jl b/src/optimizers/optimizer_caches.jl
index b8a21cb0a..189ef7ae6 100644
--- a/src/optimizers/optimizer_caches.jl
+++ b/src/optimizers/optimizer_caches.jl
@@ -7,12 +7,12 @@ AbstractCache has subtypes:
 
 All of them can be initialized with providing an array (also supporting manifold types).
 """
-abstract type AbstractCache end
+abstract type AbstractCache{T} end
 
 #############################################################################
 # All the definitions of the caches
 
-struct AdamCache{T, AT <: AbstractArray{T}} <: AbstractCache
+struct AdamCache{T, AT <: AbstractArray{T}} <: AbstractCache{T}
     B₁::AT
     B₂::AT 
     function AdamCache(Y::AbstractArray)
@@ -20,15 +20,15 @@ struct AdamCache{T, AT <: AbstractArray{T}} <: AbstractCache
     end
 end
 
-struct MomentumCache{T, AT <: AbstractArray{T}} <:AbstractCache
+struct MomentumCache{T, AT <: AbstractArray{T}} <:AbstractCache{T}
     B::AT
     function MomentumCache(Y::AbstractArray)
         new{eltype(Y), typeof(zero(Y))}(zero(Y))
     end
 end
 
-struct GradientCache <: AbstractCache end
-GradientCache(::AbstractArray) = GradientCache()
+struct GradientCache{T} <: AbstractCache{T} end
+GradientCache(::AbstractArray{T}) where T = GradientCache{T}()
 
 #############################################################################
 # All the setup_cache functions 

From 81ac240b1d09c47b68ef9771b33ea7c30ca9f130 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Thu, 14 Dec 2023 15:13:27 +0100
Subject: [PATCH 26/96] Removed comments that seem to have become unnecessary.

---
 src/optimizers/optimizer_caches.jl | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/optimizers/optimizer_caches.jl b/src/optimizers/optimizer_caches.jl
index 189ef7ae6..398eaa562 100644
--- a/src/optimizers/optimizer_caches.jl
+++ b/src/optimizers/optimizer_caches.jl
@@ -33,11 +33,6 @@ GradientCache(::AbstractArray{T}) where T = GradientCache{T}()
 #############################################################################
 # All the setup_cache functions 
 
-# I don't really understand what we need these for ???
-# setup_adam_cache(B::AbstractArray) = reshape([setup_adam_cache(b) for b in B], size(B))
-# setup_momentum_cache(B::AbstractArray) = reshape([setup_momentum_cache(b) for b in B], size(B))
-# setup_gradient_cache(B::AbstractArray) = reshape([setup_gradient_cache(b) for b in B], size(B))
-
 setup_adam_cache(ps::NamedTuple) = apply_toNT(setup_adam_cache, ps)
 setup_momentum_cache(ps::NamedTuple) = apply_toNT(setup_momentum_cache, ps)
 setup_gradient_cache(ps::NamedTuple) = apply_toNT(setup_gradient_cache, ps)

From 9002452df67b1c7d66a9465a9e5db7e1333125ab Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Thu, 14 Dec 2023 15:13:49 +0100
Subject: [PATCH 27/96] Removed symplectic part (again).

---
 test/arrays/array_tests.jl | 45 --------------------------------------
 1 file changed, 45 deletions(-)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index fd9269046..cbadf1aba 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -61,22 +61,6 @@ function skew_mat_mul_test2(n, T=Float64)
     @test isapprox(AS1, AS2)
 end
 
-# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction
-function sympl_lie_alg_add_sub_test(n)
-    J = SymplecticPotential(n)
-    symplectisize(W) = .5*(W - J'*W'*J)
-    W₁ = rand(2*n,2*n)
-    S₁ = SymplecticLieAlgMatrix(W₁)
-    W₂ = rand(2*n,2*n)
-    S₂ = SymplecticLieAlgMatrix(W₂)
-    S₃ = S₁ + S₂
-    S₄ = S₁ - S₂
-    @test typeof(S₃) <: SymplecticLieAlgMatrix
-    @test typeof(S₄) <: SymplecticLieAlgMatrix
-    @test all(abs.(symplectisize(W₁ + W₂) .- S₃) .< 1e-10)
-    @test all(abs.(symplectisize(W₁ - W₂) .- S₄) .< 1e-10)
-end
-
 # test Stiefel manifold projection test 
 function stiefel_proj_test(N,n)
     In = I(n)
@@ -84,15 +68,6 @@ function stiefel_proj_test(N,n)
     @test all(abs.((E'*E) .- In) .< 1e-10)
 end
 
-# test symplectic projection (this is just the E matrix)
-function sympl_proj_test(N, n)
-    JN = SymplecticPotential(N)
-    Jn = SymplecticPotential(n)
-    E = SymplecticProjection(N, n, Float64)
-    @test all(abs.((E'*JN*E) .- Jn) .< 1e-10)
-end
-
-
 function stiefel_lie_alg_add_sub_test(N, n)
     E = StiefelProjection(N, n)
     projection(W::SkewSymMatrix) = W - (I - E*E')*W*(I - E*E')
@@ -108,23 +83,6 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
-# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction
-function sympl_lie_alg_add_sub_test(N, n)
-    J = SymplecticPotential(n)
-    E = SymplecticProjection(N, n)
-    projection(W::SymplecticLieAlgMatrix) = W - (I - E*E')*W*(I - E*E')
-    W₁ = SymplecticLieAlgMatrix(rand(2*N,2*N))
-    S₁ = SymplecticLieAlgHorMatrix(W₁,n)
-    W₂ = SymplecticLieAlgMatrix(rand(2*N,2*N))
-    S₂ = SymplecticLieAlgHorMatrix(W₂,n)
-    S₃ = S₁ + S₂
-    S₄ = S₁ - S₂
-    @test typeof(S₃) <: SymplecticLieAlgHorMatrix
-    @test typeof(S₄) <: SymplecticLieAlgHorMatrix
-    @test all(abs.(projection(W₁ + W₂) .- S₃) .< 1e-10)
-    @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
-end
-
 function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
     A = rand(StiefelLieAlgHorMatrix{T}, N, n)
     @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)
@@ -149,10 +107,7 @@ for (N, n) ∈ zip(N_vec, n_vec)
     skew_mat_add_sub_test(N)
     skew_mat_mul_test(N)
     skew_mat_mul_test2(N)
-    sympl_lie_alg_add_sub_test(N)
     stiefel_proj_test(N,n)
-    sympl_proj_test(N,n)
     stiefel_lie_alg_add_sub_test(N,n)
-    sympl_lie_alg_add_sub_test(N,n)
     stiefel_lie_alg_vectorization_test(N, n)
 end

From 4c82474c49e6bc5a1c97bac4e5128af4968f17ba Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Thu, 14 Dec 2023 15:15:19 +0100
Subject: [PATCH 28/96] Refactored all the transformer tests and gave each of
 them a name.

---
 test/runtests.jl                              | 17 ++++---
 ...ulti_head_attention_stiefel_optim_cache.jl | 51 ++++++++++++-------
 ...multi_head_attention_stiefel_retraction.jl | 51 +++++++++----------
 .../multi_head_attention_stiefel_setup.jl     | 33 ++++++++----
 .../transformer_application.jl                |  9 ++--
 .../transformer_gradient.jl                   |  3 ++
 .../transformer_optimizer.jl                  |  9 +++-
 test/transformer_related/transformer_setup.jl |  3 ++
 8 files changed, 109 insertions(+), 67 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index af35e6063..5c9a72ecd 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,7 +3,6 @@ using SafeTestsets
 
 @safetestset "Check parameterlength                                                           " begin include("parameterlength/check_parameterlengths.jl") end
 @safetestset "Arrays #1                                                                       " begin include("arrays/array_tests.jl") end
-@safetestset "Arrays #2                                                                       " begin include("arrays/array_tests_old.jl") end
 @safetestset "Manifolds (Grassmann):                                                          " begin include("manifolds/grassmann_manifold.jl") end
 @safetestset "Gradient Layer                                                                  " begin include("layers/gradient_layer_tests.jl") end
 @safetestset "Test symplecticity of upscaling layer                                           " begin include("layers/sympnet_layers_test.jl") end 
@@ -11,13 +10,15 @@ using SafeTestsets
 @safetestset "Manifold Neural Network Layers                                                  " begin include("layers/manifold_layers.jl") end
 @safetestset "Custom AD rules for kernels                                                     " begin include("custom_ad_rules/kernel_pullbacks.jl") end
 @safetestset "ResNet                                                                          " begin include("layers/resnet_tests.jl") end
-@safetestset "Transformer Networks #1                                                         " begin include("transformer_related/multi_head_attention_stiefel_optim_cache.jl") end
-@safetestset "Transformer Networks #2                                                         " begin include("transformer_related/multi_head_attention_stiefel_retraction.jl") end
-@safetestset "Transformer Networks #3                                                         " begin include("transformer_related/multi_head_attention_stiefel_setup.jl") end
-@safetestset "Transformer Networks #4                                                         " begin include("transformer_related/transformer_setup.jl") end
-@safetestset "Transformer Networks #5                                                         " begin include("transformer_related/transformer_application.jl") end
-@safetestset "Transformer Networks #6                                                         " begin include("transformer_related/transformer_gradient.jl") end
-@safetestset "Transformer Networks #7                                                         " begin include("transformer_related/transformer_optimizer.jl") end
+
+# transformer-related tests
+@safetestset "Test setup of MultiHeadAttention layer Stiefel weights                          " begin include("transformer_related/multi_head_attention_stiefel_setup.jl") end
+@safetestset "Test geodesic and Cayley retr for the MultiHeadAttention layer w/ St weights    " begin include("transformer_related/multi_head_attention_stiefel_retraction.jl") end
+@safetestset "Test the correct setup of the various optimizer caches for MultiHeadAttention   " begin include("transformer_related/multi_head_attention_stiefel_optim_cache.jl") end
+@safetestset "Check if the transformer can be applied to a tensor.                            " begin include("transformer_related/transformer_application.jl") end
+@safetestset "Check if the gradient/pullback of MultiHeadAttention changes type in St case    " begin include("transformer_related/transformer_gradient.jl") end
+@safetestset "Check if the optimization_step! changes the parameters of the transformer       " begin include("transformer_related/transformer_optimizer.jl") end
+
 @safetestset "Attention layer #1                                                              " begin include("attention_layer/attention_setup.jl") end
 @safetestset "(MultiHead)Attention                                                            " begin include("attention_layer/apply_multi_head_attention.jl") end
 @safetestset "Classification layer                                                            " begin include("layers/classification.jl") end
diff --git a/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl b/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl
index ca4209c39..c7614fc52 100644
--- a/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl
+++ b/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl
@@ -2,19 +2,10 @@ using GeometricMachineLearning, Test
 
 import Lux, Random, LinearAlgebra
 
-dim = 64
-n_heads = 8
-Dₕ = dim÷8
-tol = eps(Float32)
-
-model = Chain(MultiHeadAttention(dim, n_heads), MultiHeadAttention(dim, n_heads, Stiefel=true))
-ps = initialparameters(CPU(), Float32, model)
-
-o₁ = Optimizer(AdamOptimizer(), ps)
-o₂ = Optimizer(MomentumOptimizer(), ps)
-o₃ = Optimizer(GradientOptimizer(), ps)
-
-function check_adam_cache(C::AbstractCache) 
+@doc raw"""
+This checks if the Adam cache was set up in the right way
+"""
+function check_adam_cache(C::AbstractCache{T}, tol= T(10) * eps(T)) where T 
     @test typeof(C) <: AdamCache 
     @test propertynames(C) == (:B₁, :B₂)
     @test typeof(C.B₁) <: StiefelLieAlgHorMatrix
@@ -24,7 +15,10 @@ function check_adam_cache(C::AbstractCache)
 end 
 check_adam_cache(B::NamedTuple) = apply_toNT(check_adam_cache, B)
 
-function check_momentum_cache(C::AbstractCache)
+@doc raw"""
+This checks if the momentum cache was set up in the right way
+"""
+function check_momentum_cache(C::AbstractCache{T}, tol= T(10) * eps(T)) where T 
     @test typeof(C) <: MomentumCache 
     @test propertynames(C) == (:B,)
     @test typeof(C.B) <: StiefelLieAlgHorMatrix
@@ -32,12 +26,33 @@ function check_momentum_cache(C::AbstractCache)
 end
 check_momentum_cache(B::NamedTuple) = apply_toNT(check_momentum_cache, B)
 
-function check_gradient_cache(C::AbstractCache)
+@doc raw"""
+This checks if the gradient cache was set up in the right way
+"""
+function check_gradient_cache(C::AbstractCache{T}) where T 
     @test typeof(C) <: GradientCache 
     @test propertynames(C) == ()
 end
 check_gradient_cache(B::NamedTuple) = apply_toNT(check_gradient_cache, B)
 
-check_adam_cache(o₁.cache[2])
-check_momentum_cache(o₂.cache[2])
-check_gradient_cache(o₃.cache[2])
+@doc raw"""
+This checks if all the caches are set up in the right way for the `MultiHeadAttention` layer with Stiefel weights.
+
+TODO:
+- [ ] `BFGSOptimizer` !!
+"""
+function test_cache_setups_for_optimizer_for_multihead_attention_layer(T::Type, dim::Int, n_heads::Int)
+    @assert dim % n_heads == 0
+    model = MultiHeadAttention(dim, n_heads, Stiefel=true)
+    ps = initialparameters(CPU(), T, model)
+
+    o₁ = Optimizer(AdamOptimizer(), ps)
+    o₂ = Optimizer(MomentumOptimizer(), ps)
+    o₃ = Optimizer(GradientOptimizer(), ps)
+
+    check_adam_cache(o₁.cache)
+    check_momentum_cache(o₂.cache)
+    check_gradient_cache(o₃.cache)
+end
+
+test_cache_setups_for_optimizer_for_multihead_attention_layer(Float32, 64, 8)
\ No newline at end of file
diff --git a/test/transformer_related/multi_head_attention_stiefel_retraction.jl b/test/transformer_related/multi_head_attention_stiefel_retraction.jl
index 842069da5..54f6214e8 100644
--- a/test/transformer_related/multi_head_attention_stiefel_retraction.jl
+++ b/test/transformer_related/multi_head_attention_stiefel_retraction.jl
@@ -1,7 +1,3 @@
-"""
-This is a test for that checks if the retractions (geodesic and Cayley for now) map from StiefelLieAlgHorMatrix to StiefelManifold when used with MultiHeadAttention.
-"""
-
 import Random, Test, Lux, LinearAlgebra, KernelAbstractions
 
 using GeometricMachineLearning, Test
@@ -9,37 +5,40 @@ using GeometricMachineLearning: geodesic
 using GeometricMachineLearning: cayley
 using GeometricMachineLearning: init_optimizer_cache
 
-dim = 64
-n_heads = 8
-Dₕ = dim÷8
-tol = eps(Float32)
-T = Float32
-backend = KernelAbstractions.CPU()
-
-model = MultiHeadAttention(dim, n_heads, Stiefel=true)
-
-ps = initialparameters(backend, T, model)
-
-cache = init_optimizer_cache(MomentumOptimizer(), ps)
-
-E = StiefelProjection(dim, Dₕ, T)
-function check_retraction_geodesic(A::AbstractMatrix) 
+@doc raw"""
+This function computes the geodesic retraction of an element of `StiefelLieAlgHorMatrix` and then checks if the resulting element is `StiefelProjection`.
+"""
+function check_retraction_geodesic(A::AbstractMatrix{T}, tol=eps(T)) where T
     A_retracted = geodesic(A)
     @test typeof(A_retracted) <: StiefelManifold
-    @test LinearAlgebra.norm(A_retracted - E) < tol
+    @test LinearAlgebra.norm(A_retracted - StiefelProjection(A_retracted)) < tol
 end
 check_retraction_geodesic(cache::NamedTuple) = apply_toNT(check_retraction_geodesic, cache)
 check_retraction_geodesic(B::MomentumCache) = check_retraction_geodesic(B.B)
 
-check_retraction_geodesic(cache)
-
-E = StiefelProjection(dim, Dₕ)
-function check_retraction_cayley(A::AbstractMatrix) 
+@doc raw"""
+This function computes the cayley retraction of an element of `StiefelLieAlgHorMatrix` and then checks if the resulting element is `StiefelProjection`.
+"""
+function check_retraction_cayley(A::AbstractMatrix{T}, tol=eps(T)) where T
     A_retracted = cayley(A)
     @test typeof(A_retracted) <: StiefelManifold
-    @test LinearAlgebra.norm(A_retracted - E) < tol
+    @test LinearAlgebra.norm(A_retracted - StiefelProjection(A_retracted)) < tol
 end
 check_retraction_cayley(cache::NamedTuple) = apply_toNT(check_retraction_cayley, cache)
 check_retraction_cayley(B::MomentumCache) = check_retraction_cayley(B.B)
 
-check_retraction_cayley(cache)
+@doc raw"""
+This is a test for that checks if the retractions (geodesic and Cayley for now) map from `StiefelLieAlgHorMatrix` to `StiefelManifold` when used with `MultiHeadAttention`.
+"""
+function test_multi_head_attention_retraction(T::Type, dim, n_heads, tol=eps(T), backend=KernelAbstractions.CPU())
+    model = MultiHeadAttention(dim, n_heads, Stiefel=true)
+
+    ps = initialparameters(backend, T, model)
+    cache = init_optimizer_cache(MomentumOptimizer(), ps)
+
+    check_retraction_geodesic(cache)
+
+    check_retraction_cayley(cache)
+end
+
+test_multi_head_attention_retraction(Float32, 64, 8)
\ No newline at end of file
diff --git a/test/transformer_related/multi_head_attention_stiefel_setup.jl b/test/transformer_related/multi_head_attention_stiefel_setup.jl
index 3b8cc6cbf..742839331 100644
--- a/test/transformer_related/multi_head_attention_stiefel_setup.jl
+++ b/test/transformer_related/multi_head_attention_stiefel_setup.jl
@@ -3,25 +3,36 @@ import Random, Test, Lux, LinearAlgebra, KernelAbstractions
 using GeometricMachineLearning, Test
 using GeometricMachineLearning: init_optimizer_cache
 
-T = Float32
-model = MultiHeadAttention(64, 8, Stiefel=true)
-ps = initialparameters(KernelAbstractions.CPU(), T, model)
-tol = 10*eps(T)
-
-function check_setup(A::AbstractMatrix)
+@doc raw"""
+This checks for an arbitrary matrix ``A\in\mathbb{R}^{N\times{}n}`` if ``A\in{}St(n,N)``.
+"""
+function check_setup(A::AbstractMatrix{T}, tol=T(10)*eps(T)) where T
     @test typeof(A) <: StiefelManifold
     @test check(A) < tol
 end
 check_setup(ps::NamedTuple) = apply_toNT(check_setup, ps)
-check_setup(ps)
 
-######## check if the gradients are set up the correct way 
-function check_grad_setup(B::AbstractMatrix)
+@doc raw"""
+This checks for an arbitrary matrix ``B\in\mathbb{R}^{N\times{}N}`` if ``B\in\mathfrak{g}^\mathrm{hor}``.
+"""
+function check_grad_setup(B::AbstractMatrix{T}, tol=T(10)*eps(T)) where T
     @test typeof(B) <: StiefelLieAlgHorMatrix
     @test LinearAlgebra.norm(B) < tol
 end
 check_grad_setup(gx::NamedTuple) = apply_toNT(check_grad_setup, gx)
 check_grad_setup(B::MomentumCache) = check_grad_setup(B.B)
 
-gx = init_optimizer_cache(MomentumOptimizer(), ps)
-check_grad_setup(gx)
+@doc raw"""
+Check if `initialparameters` and `init_optimizer_cache` do the right thing for `MultiHeadAttentionLayer`.
+"""
+function check_multi_head_attention_stiefel_setup(T::Type, N::Int, n::Int)
+    model = MultiHeadAttention(N, n, Stiefel=true)
+    ps = initialparameters(KernelAbstractions.CPU(), T, model)
+
+    check_setup(ps)
+
+    gx = init_optimizer_cache(MomentumOptimizer(), ps)
+    check_grad_setup(gx)
+end
+
+check_multi_head_attention_stiefel_setup(Float32, 64, 8)
\ No newline at end of file
diff --git a/test/transformer_related/transformer_application.jl b/test/transformer_related/transformer_application.jl
index 7fc159a8b..6729c889e 100644
--- a/test/transformer_related/transformer_application.jl
+++ b/test/transformer_related/transformer_application.jl
@@ -1,5 +1,8 @@
 using Test, KernelAbstractions, GeometricMachineLearning
 
+@doc raw"""
+This tests if the size of the input array is kept constant when fed into the transformer (for a matrix and a tensor)
+"""
 function transformer_application_test(T, dim, n_heads, L, seq_length=8, batch_size=10)
     model₁ = Chain(Transformer(dim, n_heads, L, Stiefel=false), ResNet(dim))
     model₂ = Chain(Transformer(dim, n_heads, L, Stiefel=true), ResNet(dim))
@@ -8,11 +11,11 @@ function transformer_application_test(T, dim, n_heads, L, seq_length=8, batch_si
     ps₂ = initialparameters(KernelAbstractions.CPU(), T, model₂)
     
     input₁ = rand(T, dim, seq_length, batch_size)
-    input₂ = rand(T, dim, seq_length, batch_size)
-    
+    input₂ = rand(T, dim, seq_length)
+
     @test size(model₁(input₁, ps₁)) == size(input₁)
-    @test size(model₁(input₂, ps₁)) == size(input₂)
     @test size(model₂(input₁, ps₂)) == size(input₁)
+    @test size(model₁(input₂, ps₁)) == size(input₂)
     @test size(model₂(input₂, ps₂)) == size(input₂)
 end
 
diff --git a/test/transformer_related/transformer_gradient.jl b/test/transformer_related/transformer_gradient.jl
index e049933a6..2a53673fc 100644
--- a/test/transformer_related/transformer_gradient.jl
+++ b/test/transformer_related/transformer_gradient.jl
@@ -1,5 +1,8 @@
 using Test, KernelAbstractions, GeometricMachineLearning, Zygote, LinearAlgebra
 
+@doc raw"""
+This checks if the gradients of the transformer change the type in case of the Stiefel manifold, and checks if they stay the same in the case of regular weights.
+"""
 function transformer_gradient_test(T, dim, n_heads, L, seq_length=8, batch_size=10)
     model₁ = Chain(Transformer(dim, n_heads, L, Stiefel=false), ResNet(dim))
     model₂ = Chain(Transformer(dim, n_heads, L, Stiefel=true), ResNet(dim))
diff --git a/test/transformer_related/transformer_optimizer.jl b/test/transformer_related/transformer_optimizer.jl
index aa3968c62..e6421bdb7 100644
--- a/test/transformer_related/transformer_optimizer.jl
+++ b/test/transformer_related/transformer_optimizer.jl
@@ -1,5 +1,8 @@
 using Test, KernelAbstractions, GeometricMachineLearning, Zygote, LinearAlgebra
 
+@doc raw"""
+This function tests if the `GradientOptimzier`, `MomentumOptimizer`, `AdamOptimizer` and `BFGSOptimizer` act on the neural network weights via `optimization_step!`.
+"""
 function transformer_gradient_test(T, dim, n_heads, L, seq_length=8, batch_size=10)
     model = Chain(Transformer(dim, n_heads, L, Stiefel=true), ResNet(dim))
     model = Transformer(dim, n_heads, L, Stiefel=true)
@@ -14,15 +17,19 @@ function transformer_gradient_test(T, dim, n_heads, L, seq_length=8, batch_size=
     o₁ = Optimizer(GradientOptimizer(), ps)
     o₂ = Optimizer(MomentumOptimizer(), ps)
     o₃ = Optimizer(AdamOptimizer(), ps)
+    o₄ = Optimizer(BFGSOptimizer(), ps)
 
     ps₁ = deepcopy(ps)
     ps₂ = deepcopy(ps)
     ps₃ = deepcopy(ps)
+    ps₄ = deepcopy(ps)
 
     optimization_step!(o₁, model, ps₁, dx)
     optimization_step!(o₂, model, ps₂, dx)
     optimization_step!(o₃, model, ps₃, dx)
-    @test typeof(ps₁) == typeof(ps₂) == typeof(ps₃) == typeof(ps)
+    optimization_step!(o₄, model, ps₄, dx)
+    @test typeof(ps₁) == typeof(ps₂) == typeof(ps₃) == typeof(ps₄) == typeof(ps)
+    @test ps₁[1].PQ.head_1 ≉ ps₂[1].PQ.head_1 ≉ ps₃[1].PQ.head_1 ≉ ps₄[1].PQ.head_1 ≉ ps[1].PQ.head_1
 end
 
 transformer_gradient_test(Float32, 10, 5, 4)
\ No newline at end of file
diff --git a/test/transformer_related/transformer_setup.jl b/test/transformer_related/transformer_setup.jl
index 42ac78995..1311ce26c 100644
--- a/test/transformer_related/transformer_setup.jl
+++ b/test/transformer_related/transformer_setup.jl
@@ -1,5 +1,8 @@
 using Test, KernelAbstractions, GeometricMachineLearning
 
+@doc raw"""
+This function tests the setup of the transformer with Stiefel weights.
+"""
 function transformer_setup_test(dim, n_heads, L, T)
     model = Transformer(dim, n_heads, L, Stiefel=true)
     ps = initialparameters(KernelAbstractions.CPU(), T, model)

From 35b3db6aacb115ff14d7fbf9034eddcc1c204613 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 13:07:38 +0100
Subject: [PATCH 29/96] Renamed file to something more descriptive.

---
 .../{data_loader.jl => data_loader_optimization_step.jl}       | 3 +++
 1 file changed, 3 insertions(+)
 rename test/data_loader/{data_loader.jl => data_loader_optimization_step.jl} (84%)

diff --git a/test/data_loader/data_loader.jl b/test/data_loader/data_loader_optimization_step.jl
similarity index 84%
rename from test/data_loader/data_loader.jl
rename to test/data_loader/data_loader_optimization_step.jl
index 880212f5b..0ac67a648 100644
--- a/test/data_loader/data_loader.jl
+++ b/test/data_loader/data_loader_optimization_step.jl
@@ -1,5 +1,8 @@
 using GeometricMachineLearning, Test, Zygote
 
+@doc raw"""
+This tests the gradient optimizer called together with the `DataLoader` (applied to a tensor).
+"""
 function test_data_loader(sys_dim, n_time_steps, n_params, T=Float32)
     data = randn(T, sys_dim, n_time_steps, n_params)
     dl = DataLoader(data)

From 66f3279c3125b35e7995727215e44eb6a676c26d Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 13:16:21 +0100
Subject: [PATCH 30/96] Renamed file to something meaningful and made the test
 more readable.

---
 test/data_loader/batch.jl                     | 33 -----------------
 .../optimizer_functor_with_adam.jl            | 37 +++++++++++++++++++
 2 files changed, 37 insertions(+), 33 deletions(-)
 delete mode 100644 test/data_loader/batch.jl
 create mode 100644 test/data_loader/optimizer_functor_with_adam.jl

diff --git a/test/data_loader/batch.jl b/test/data_loader/batch.jl
deleted file mode 100644
index 4b7c17309..000000000
--- a/test/data_loader/batch.jl
+++ /dev/null
@@ -1,33 +0,0 @@
-using AbstractNeuralNetworks: AbstractExplicitLayer, Chain
-using GeometricMachineLearning, Test
-
-"""
-This creates a dummy MNIST data set.
-
-TODO: include tests to check if all elements are batched!
-"""
-function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10)
-    rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images)))
-end
-
-dl = DataLoader(create_dummy_mnist()...; patch_length=3)
-# batch size is equal to two
-batch = Batch(2)
-
-# this function should be made part of AbstractNeuralNetworks !!!
-function Chain(c::Chain, d::AbstractExplicitLayer) 
-    Chain(c.layers..., d)
-end
-
-# input dim is 3^2 = 9
-model = Chain(Transformer(dl.input_dim, 3, 2; Stiefel=true), Classification(dl.input_dim, 10, σ))
-ps = initialparameters(CPU(), Float32, model)
-
-loss₁ = GeometricMachineLearning.loss(model, ps, dl)
-
-opt = Optimizer(AdamOptimizer(), ps)
-loss_average = optimize_for_one_epoch!(opt, model, ps, dl, batch)
-
-loss₃ = GeometricMachineLearning.loss(model, ps, dl)
-
-@test loss₁ > loss_average > loss₃
\ No newline at end of file
diff --git a/test/data_loader/optimizer_functor_with_adam.jl b/test/data_loader/optimizer_functor_with_adam.jl
new file mode 100644
index 000000000..2caf538b2
--- /dev/null
+++ b/test/data_loader/optimizer_functor_with_adam.jl
@@ -0,0 +1,37 @@
+using AbstractNeuralNetworks: AbstractExplicitLayer, Chain
+using GeometricMachineLearning, Test
+
+# this function should be made part of AbstractNeuralNetworks !!!
+function Chain(c::Chain, d::AbstractExplicitLayer) 
+    Chain(c.layers..., d)
+end
+
+"""
+This creates a dummy MNIST data set; i.e. its output are two tensors that look similarly to the ones in the MNIST data set.
+"""
+function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10)
+    rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images)))
+end
+
+function test_optimizer_functor_with_adam(;T=Float32, dim₁=6, dim₂=6, n_images=10, patch_length=patch_length)
+    dl = DataLoader(create_dummy_mnist(T=T, dim₁=dim₁, dim₂=dim₂, n_images=n_images)...; patch_length=3)
+    
+    # batch size is equal to two
+    batch = Batch(2)
+
+    # input dim is dim₁ / patch_length * dim₂ / pach_length; the transformer is called with dim₁ / patch_length and two layers
+    model = Chain(Transformer(dl.input_dim, dim₁ / patch_length, 2; Stiefel=true), Classification(dl.input_dim, 10, σ))
+    ps = initialparameters(CPU(), Float32, model)
+
+    loss₁ = GeometricMachineLearning.loss(model, ps, dl)
+
+    opt = Optimizer(AdamOptimizer(), ps)
+    loss_average = optimize_for_one_epoch!(opt, model, ps, dl, batch)
+
+    loss₃ = GeometricMachineLearning.loss(model, ps, dl)
+
+    #check if the loss decreases during optimization
+    @test loss₁ > loss_average > loss₃
+end
+
+test_optimizer_functor_with_adam()
\ No newline at end of file

From 257aeeea0f46e1bcace46852c4bfd98dcd550a26 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:24:55 +0100
Subject: [PATCH 31/96] Added descriptions and a wrapper for using the custom
 loss function with NeuralNetwork.

---
 test/data_loader/mnist_utils.jl | 61 ++++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 23 deletions(-)

diff --git a/test/data_loader/mnist_utils.jl b/test/data_loader/mnist_utils.jl
index fe685c647..f6c637a58 100644
--- a/test/data_loader/mnist_utils.jl
+++ b/test/data_loader/mnist_utils.jl
@@ -7,9 +7,8 @@ using GeometricMachineLearning: index_conversion
 import Zygote
 
 """
-This function tests if all the patch nubmers are assigned correctly, i.e. tests patch_index.
+This function tests is used to test if all the patch nubmers are assigned correctly with `index_conversion`, i.e. tests `patch_index` by inverting it.
 """
-### Test if mapping is invertible
 function reverse_index(i::Integer, j::Integer, patch_length=7)
     opt_i = i%patch_length==0 ? 1 : 0
     within_patch_index = i%patch_length + opt_i*patch_length, (i÷patch_length - opt_i + 1)
@@ -20,41 +19,57 @@ function reverse_index(i::Integer, j::Integer, patch_length=7)
     (patch_index[1]-1)*patch_length + within_patch_index[1], (patch_index[2]-1)*patch_length + within_patch_index[2]
 end 
 
-# test if this is the inverse of the other batch index conversion!
-patch_lengths = (2, 4, 7, 14)
-for patch_length in patch_lengths
-    number_of_patches = (28÷patch_length)^2
-      for i in 1:28 
-        for j in 1:28
-            @test reverse_index(index_conversion(i, j, patch_length, number_of_patches)..., patch_length) == (i, j)
+"""
+This function uses `reverse_index` to test `index_conversion`, i.e. checks if the functions are invertible. 
+"""
+function test_index_conversion(patch_lengths=(2, 4, 7, 14)) 
+    for patch_length in patch_lengths
+        number_of_patches = (28÷patch_length)^2
+        for i in 1:28 
+            for j in 1:28
+                @test reverse_index(index_conversion(i, j, patch_length, number_of_patches)..., patch_length) == (i, j)
+            end
         end
     end
 end
 
+"""
+This function tests if `onehotbatch` does what it should; i.e. convert a vector of integers to a one-hot-tensor.
+"""
 function test_onehotbatch(V::AbstractVector{T}) where {T<:Integer} 
     V_encoded = onehotbatch(V)
-    for i in length(V)
+    for (i, v) in zip(length(V), V)
         @test sum(V_encoded[:,1,i]) == 1
+        @test V_encoded[v, 1, i] == 1
     end
 end
 
 test_onehotbatch([1, 2, 5, 0])
-####### MNIST-like data set 
 
-train_x = rand(Float32, 28,28,100)
-train_y = Int.(ceil.(10*rand(Float32, 100))) .- 1
+@doc raw"""
+Generates an MNIST-like dummy data set.
+"""
+function generate_dummy_mnist(dim₁=28, dim₂=18, number_images=100, T=Float32)
+    train_x = rand(T, dim₁, dim₂, number_images)
+    train_y = Int.(ceil.(10 * rand(T, number_images))) .- 1
+    train_x, train_y
+end
 
+function test_optimizer_for_classification_layer(; dim₁=28, dim₂=28, number_images=100, patch_length=7, T=Float32)
+    dl = DataLoader(generate_dummy_mnist(dim₁, dim₂, number_images, T); patch_length=patch_length)
 
-dl = DataLoader(train_x, train_y)
+    activation_function(x) = tanh.(x)
+    model = Classification((dim₁ ÷ patch_length) * (dim₂ ÷ patch_length), 10, activation_function)
+    ps = initialparameters(CPU(), T, model)   
+    loss₁ = GeometricMachineLearning.loss(model, ps, dl)
 
-activation_function(x) = tanh.(x)
-model = Classification(49, 10, activation_function)
-ps = initialparameters(CPU(), Float32, model)
-loss₁ = GeometricMachineLearning.loss(model, ps, dl)
+    opt = Optimizer(GradientOptimizer(), ps)
+    dx = Zygote.gradient(ps -> GeometricMachineLearning.loss(model, ps, dl), ps)[1]
+    optimization_step!(opt, model, ps, dx)
+    loss₂ = GeometricMachineLearning.loss(model, ps, dl)
 
-opt = Optimizer(GradientOptimizer(), ps)
-dx = Zygote.gradient(ps -> GeometricMachineLearning.loss(model, ps, dl), ps)[1]
-optimization_step!(opt, model, ps, dx)
-loss₂ = GeometricMachineLearning.loss(model, ps, dl)
+    @test loss₂ < loss₁
+end
 
-@test loss₂ < loss₁
+test_index_conversion()
+test_optimizer_for_classification_layer()
\ No newline at end of file

From 9ca60ed7280a213ebff112f2be18b229d0eccc20 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:25:26 +0100
Subject: [PATCH 32/96] Added descriptions and a wrapper for using the custom
 loss function with NeuralNetwork.

---
 src/data_loader/data_loader.jl | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl
index ae6a45089..24fb90a21 100644
--- a/src/data_loader/data_loader.jl
+++ b/src/data_loader/data_loader.jl
@@ -93,25 +93,35 @@ It takes as input:
 """
 function loss(model::Union{Chain, AbstractExplicitLayer}, ps::Union{Tuple, NamedTuple}, input::AT, output::BT) where {T, T1, AT<:AbstractArray{T, 3}, BT<:AbstractArray{T1, 3}}
     output_estimate = model(input, ps)
-    norm(output - output_estimate)/norm(output) # /T(sqrt(size(output, 2)*size(output, 3)))
+    norm(output - output_estimate) / norm(output) # /T(sqrt(size(output, 2)*size(output, 3)))
 end
 
+@doc raw"""
+The *autoencoder loss*. 
+"""
 function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 3}} 
     output_estimate = model(input, ps)
-    norm(output_estimate - input)/norm(input) # /T(sqrt(size(input, 2)*size(input, 3)))
+    norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)*size(input, 3)))
 end
 
 function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 2}} 
     output_estimate = model(input, ps)
-    norm(output_estimate - input)/norm(input) # /T(sqrt(size(input, 2)))
+    norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)))
 end
 
 nt_diff(A, B) = (q = A.q - B.q, p = A.p - B.p)
 nt_norm(A) = norm(A.q) + norm(A.p)
 
+@doc raw"""
+Loss function that takes a `NamedTuple` as input. This should be used with a SympNet (or other neural network-based integrator). It computes:
+
+```math
+\mathtt{loss}(\mathcal{NN}, \mathtt{ps}, \begin{pmatrix} q \\ p \end{pmatrix}, \begin{pmatrix} q' \\ p' \end{pmatrix}) \mapsto \left|| \mathcal{NN}(\begin{pmatrix} q \\ p \end{pmatrix}) -  \begin{pmatrix} q' \\ p' \end{pmatrix} \right|| / \left|| \begin{pmatrix} q \\ p \end{pmatrix} \right||
+```
+"""
 function loss(model::Chain, ps::Tuple, input::NamedTuple, output::NamedTuple) 
     output_estimate = model(input, ps)
-    nt_norm(nt_diff(output_estimate, output))/nt_norm(input)
+    nt_norm(nt_diff(output_estimate, output)) / nt_norm(input)
 end
 
 @doc raw"""
@@ -136,6 +146,13 @@ function loss(model::Chain, ps::Tuple, dl::DataLoader{T, BT}) where {T, BT<:Name
     loss(model, ps, dl.data)
 end
 
+@doc raw"""
+Wrapper if we deal with a neural network.
+"""
+function loss(nn::NeuralNetwork, dl::DataLoader)
+    loss(nn.model, nn.params, dl)
+end
+
 @doc raw"""
 Computes the accuracy (as opposed to the loss) of a neural network classifier. 
 

From fced8d80298e34b304284cb00af470487d980f5c Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:26:04 +0100
Subject: [PATCH 33/96] Added documentation.

---
 src/data_loader/batch.jl | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl
index bbb9e4e39..9a76d0b26 100644
--- a/src/data_loader/batch.jl
+++ b/src/data_loader/batch.jl
@@ -15,6 +15,9 @@ end
 hasseqlength(::Batch{<:Integer}) = true
 hasseqlength(::Batch{<:Nothing}) = false
 
+@doc raw"""
+The functor for batch is called with an instance on `DataLoader`. It then returns a tuple of batch indices: ``(\mathcal{I}_1, \ldots, \mathcal{I}_{\lceil\mathtt{dl.n\_params/batch\_size}\rceil})``, where the index runs from 1 to the number of batches, which is the number of parameters divided by the batch size (rounded up).
+"""
 function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, AT<:AbstractArray{T, 3}}
     indices = shuffle(1:dl.n_params)
     n_batches = Int(ceil(dl.n_params/batch.batch_size))
@@ -28,6 +31,9 @@ function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, AT<:Abstract
     batches
 end
 
+@doc raw"""
+The functor for batch is called with an instance on `DataLoader`. It then returns a tuple of batch indices: ``(\mathcal{I}_1, \ldots, \mathcal{I}_{\lceil\mathtt{(dl.input\_time\_steps-1)/batch\_size}\rceil})``, where the index runs from 1 to the number of batches, which is the number of input time steps (minus one) divided by the batch size (and rounded up).
+"""
 function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, BT<:AbstractMatrix{T}, AT<:Union{BT, NamedTuple{(:q, :p), Tuple{BT, BT}}}}
     indices = shuffle(1:dl.input_time_steps)
     n_batches = Int(ceil((dl.input_time_steps-1)/batch.batch_size))
@@ -88,7 +94,7 @@ function optimize_for_one_epoch!(opt::Optimizer, nn::NeuralNetwork, dl::DataLoad
 end
 
 """
-TODO: Add ProgressMeter!!!
+This routine is called if a `DataLoader` storing *symplectic data* (i.e. a `NamedTuple`) is supplied.
 """
 function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTuple}, dl::DataLoader{T, AT}, batch::Batch, loss) where {T, AT<:NamedTuple}
     count = 0 
@@ -107,7 +113,16 @@ function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTu
     total_error/count
 end
 
-
+@doc raw"""
+A functor for `Optimizer`. It is called with:
+    - `nn::NeuralNetwork`
+    - `dl::DataLoader`
+    - `batch::Batch`
+    - `n_epochs::Int`
+    - `loss`
+
+The last argument is a function through which `Zygote` differentiates. This argument is optional; if it is not supplied `GeometricMachineLearning` defaults to an appropriate loss for the `DataLoader`.
+"""
 function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int, loss)
     progress_object = ProgressMeter.Progress(n_epochs; enabled=true)
     loss_array = zeros(n_epochs)

From d95268daec76b606953b0a2fd1dddbcb00bf9f22 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:53:14 +0100
Subject: [PATCH 34/96] Added constructor for optimizer if input arguments are
 flipped.

---
 src/optimizers/optimizer.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/optimizers/optimizer.jl b/src/optimizers/optimizer.jl
index 7cd78d037..e6e25cdaa 100644
--- a/src/optimizers/optimizer.jl
+++ b/src/optimizers/optimizer.jl
@@ -22,6 +22,8 @@ function Optimizer(m::OptimizerMethod, nn::NeuralNetwork)
     Optimizer(m, nn.params)
 end
 
+Optimizer(nn::NeuralNetwork, m::OptimizerMethod) = Optimizer(m, nn)
+
 #######################################################################################
 # optimization step function
 

From 2abed8793ec2e604911eadcae999c31bf73c593a Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:09:44 +0100
Subject: [PATCH 35/96] Added a comment saying that the constructor can be
 called with DataLoader as input.

---
 src/architectures/sympnet.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/architectures/sympnet.jl b/src/architectures/sympnet.jl
index 8e017f125..b07d5d84c 100644
--- a/src/architectures/sympnet.jl
+++ b/src/architectures/sympnet.jl
@@ -7,7 +7,7 @@ TODO:
 abstract type SympNet{AT} <: Architecture end
 
 @doc raw"""
-`LASympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: 
+`LASympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: 
 - `depth::Int`: The number of linear layers that are applied. The default is 5.
 - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2.
 - `activation`: The activation function that is applied. By default this is `tanh`.
@@ -32,7 +32,7 @@ end
 @inline AbstractNeuralNetworks.dim(arch::SympNet) = arch.dim
 
 @doc raw"""
-`GSympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: 
+`GSympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: 
 - `upscaling_dimension::Int`: The *upscaling dimension* of the gradient layer. See the documentation for `GradientLayerQ` and `GradientLayerP` for further explanation. The default is `2*dim`.
 - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2.
 - `activation`: The activation function that is applied. By default this is `tanh`.

From 0f62b8f2e96949b442b1ad2b009818ffe4b9d520 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:10:14 +0100
Subject: [PATCH 36/96] Added default for number of epochs.

---
 src/data_loader/batch.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl
index 9a76d0b26..10b434ffd 100644
--- a/src/data_loader/batch.jl
+++ b/src/data_loader/batch.jl
@@ -133,6 +133,6 @@ function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epoch
     loss_array
 end
 
-function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int)
+function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int=1)
     o(nn, dl, batch, n_epochs, loss)
 end
\ No newline at end of file

From 3ab42f2c9874e83edaa54ce3c809e7d2332be8f4 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:11:05 +0100
Subject: [PATCH 37/96] Combined matrix and tensor routines into one. Added
 another loss for autoencoder qp data.

---
 src/data_loader/data_loader.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl
index 24fb90a21..2beb691b9 100644
--- a/src/data_loader/data_loader.jl
+++ b/src/data_loader/data_loader.jl
@@ -99,19 +99,19 @@ end
 @doc raw"""
 The *autoencoder loss*. 
 """
-function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 3}} 
+function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T}} 
     output_estimate = model(input, ps)
     norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)*size(input, 3)))
 end
 
-function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 2}} 
-    output_estimate = model(input, ps)
-    norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)))
-end
-
 nt_diff(A, B) = (q = A.q - B.q, p = A.p - B.p)
 nt_norm(A) = norm(A.q) + norm(A.p)
 
+function loss(model::Chain, ps::Tuple, input::NT) where {T, AT<:AbstractArray{T}, NT<:NamedTuple{(:q, :p,), Tuple{AT, AT}}}
+    output_estimate = model(input, ps)
+    nt_norm(output_estimate - input) / nt_norm(input)
+end
+
 @doc raw"""
 Loss function that takes a `NamedTuple` as input. This should be used with a SympNet (or other neural network-based integrator). It computes:
 

From 5d22855e9a98e50625fcc9f66e426fe9fd0ed8d3 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:12:14 +0100
Subject: [PATCH 38/96] Commented out a section that is probably not needed.

---
 src/data_loader/tensor_assign.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/data_loader/tensor_assign.jl b/src/data_loader/tensor_assign.jl
index 3faad0a99..4aac9bfc0 100644
--- a/src/data_loader/tensor_assign.jl
+++ b/src/data_loader/tensor_assign.jl
@@ -55,6 +55,7 @@ function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_win
     output_estimate
 end
 
+#=
 """
 This function draws random time steps and parameters and based on these assign the batch and the output.
 
@@ -101,6 +102,7 @@ function draw_batch!(batch::AT, output::BT, data::AT, target::BT) where {T, T2,
     assign_batch!(batch, data, params, time_steps, ndrange=size(batch))
     assign_batch!(output, target, params, time_steps, ndrange=size(output))
 end
+=#
 
 """
 Used for differentiating assign_output_estimate (this appears in the loss). 

From 86485adb054c3c90df2c5ed725e974d44843e4f2 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:12:41 +0100
Subject: [PATCH 39/96] Test data loader for qp data.

---
 test/data_loader/batch_data_loader_qp_test.jl | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 test/data_loader/batch_data_loader_qp_test.jl

diff --git a/test/data_loader/batch_data_loader_qp_test.jl b/test/data_loader/batch_data_loader_qp_test.jl
new file mode 100644
index 000000000..9e1273e49
--- /dev/null
+++ b/test/data_loader/batch_data_loader_qp_test.jl
@@ -0,0 +1,33 @@
+using GeometricMachineLearning
+using Test 
+
+function dummy_qp_data_matrix(dim=2, number_data_points=200, T=Float32)
+    (q = rand(T, dim, number_data_points), p = (rand(T, dim, number_data_points)))
+end
+
+function dummy_qp_data_tensor(dim=2, number_of_time_steps=100, number_of_parameters=20, T=Float32)
+    (q = rand(T, dim, number_of_time_steps, number_of_parameters), p = (rand(T, dim, number_of_time_steps, number_of_parameters)))
+end
+
+function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size, T=Float32)
+    dl1 = DataLoader(dummy_qp_data_matrix(dim, number_of_time_steps, T))
+    dl2 = DataLoader(dummy_qp_data_tensor(dim, number_of_time_steps, number_of_parameters))
+
+    arch1 = GSympNet(dl1)
+    arch2 = GSympNet(dl2)
+
+    nn1 = NeuralNetwork(arch1, CPU(), T)
+    nn2 = NeuralNetwork(arch2, CPU(), T)
+
+    loss1 = loss(nn1, dl1)
+    loss2 = loss(nn2, dl2)
+
+    batch = Batch(batch_size)
+    o₁ = Optimizer(GradientOptimizer(), nn1)
+    o₂ = Optimizer(GradientOptimizer(), nn2)
+
+    o₁(nn1, dl1, batch)
+    o₂(nn2, dl2, batch)
+end
+
+test_data_loader()
\ No newline at end of file

From cac1f185da9ce28e6e3e63f0e9d9d68fae33439c Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:13:08 +0100
Subject: [PATCH 40/96] Renamed and added tests.

---
 test/runtests.jl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index af35e6063..ae9307766 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -34,6 +34,8 @@ using SafeTestsets
 @safetestset "Training                                                                        " begin include("train!/test_training.jl") end
 @safetestset "NeuralNetSolution                                                               " begin include("train!/test_neuralnet_solution.jl") end
 @safetestset "Problem & Integrators                                                           " begin include("integrator/test_integrator.jl") end
-@safetestset "Data Loader #1                                                                  " begin include("data_loader/data_loader.jl") end
-@safetestset "Data Loader #2                                                                  " begin include("data_loader/mnist_utils.jl") end
-@safetestset "Data Loader #3 (Batch struct)                                                   " begin include("data_loader/batch.jl") end
\ No newline at end of file
+
+@safetestset "Test data loader for q and p data                                               " begin include("data_loader/batch_data_loader_qp_test.jl") end
+@safetestset "Test mnist_utils.                                                               " begin include("data_loader/mnist_utils.jl") end
+@safetestset "Test the data loader in combination with optimization_step!                     " begin include("data_loader/data_loader_optimization_step.jl") end
+@safetestset "Optimizer functor with data loader for Adam                                     " begin include("data_loader/optimizer_functor_with_adam.jl")
\ No newline at end of file

From 4528053bc135b3f495ac732804cd913e4187cc96 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:14:24 +0100
Subject: [PATCH 41/96] Adjusted symplectic matrix.

---
 src/arrays/symplectic.jl | 80 ++++++++++++----------------------------
 1 file changed, 24 insertions(+), 56 deletions(-)

diff --git a/src/arrays/symplectic.jl b/src/arrays/symplectic.jl
index 1dcc59658..3a4f138d6 100644
--- a/src/arrays/symplectic.jl
+++ b/src/arrays/symplectic.jl
@@ -1,75 +1,43 @@
 
 @doc raw"""
 
-    `SymplecticMatrix(n)`
+`SymplecticPotential(n)`
 
 Returns a symplectic matrix of size 2n x 2n
 
 ```math
 \begin{pmatrix}
-0 & & & 1 & & & \\
-& \ddots & & & \ddots & & \\
-& & 0 & & & 1 \\
--1 & & & 0 & & & \\
-& \ddots & & & \ddots & & \\
-& & -1 & & 0 & \\
+\mathbb{O} & \mathbb{I} \\
+\mathbb{O} & -\mathbb{I} \\
 \end{pmatrix}
 ```
-
-    `SymplecticProjection(N,n)`
-Returns the symplectic projection matrix E of the Stiefel manifold, i.e. π: Sp(2N) → Sp(2n,2N), A ↦ AE
-
 """
-#=
-function SymplecticMatrix(n::Int, T::DataType=Float64)
-    BandedMatrix((n => ones(T,n), -n => -ones(T,n)), (2n,2n))
-end
-
-SymplecticMatrix(T::DataType, n::Int) = SymplecticMatrix(n, T)
-
-@doc raw"""
-```math
-\begin{pmatrix}
-I & 0 \\
-0 & 0 \\
-0 & I \\
-0 & 0 \\
-\end{pmatrix}
-```
-"""
-=#
-
-function SymplecticPotential(n::Int, T::DataType=Float64)
-    J = zeros(T, 2*n, 2*n)
-    J[1:n, (n+1):2*n] = one(ones(T, n, n))
-    J[(n+1):2*n, 1:n] = -one(ones(T, n, n))
+function SymplecticPotential(backend, n2::Int, T::DataType=Float64)
+    @assert iseven(n2)
+    n = n2÷2
+    J = KernelAbstractions.zeros(backend, T, 2*n, 2*n)
+    assign_ones_for_symplectic_potential! = assign_ones_for_symplectic_potential_kernel!(backend)
+    assign_ones_for_symplectic_potential!(J, n, ndrange=n)
     J
 end
 
+SymplecticPotential(n::Int, T::DataType=Float64) = SymplecticPotential(CPU(), n, T)
+SymplecticPotential(bakend, T::DataType, n::Int) = SymplecticPotential(backend, n, T)
+
 SymplecticPotential(T::DataType, n::Int) = SymplecticPotential(n, T)
 
-struct SymplecticProjection{T} <: AbstractMatrix{T}
-    N::Int
-    n::Int
-    SymplecticProjection(N, n, T = Float64) = new{T}(N,n)
+@kernel function assign_ones_for_symplectic_potential_kernel!(J::AbstractMatrix{T}, n::Int) where T
+    i = @index(Global)
+    J[map_index_for_symplectic_potential(i, n)...] = i ≤ n ? one(T) : -one(T)
 end
 
-function Base.getindex(E::SymplecticProjection,i,j)
-    if i ≤ E.n
-        if j == i 
-            return 1.
-        end
-        return 0.
-    end
-    if j > E.n
-        if (j-E.n) == (i-E.N)
-            return 1.
-        end
-        return 0.
+"""
+This assigns the right index for the symplectic potential. To be used with `assign_ones_for_symplectic_potential_kernel!`.
+"""
+function map_index_for_symplectic_potential(i::Int, n::Int)
+    if i ≤ n
+        return (i, i+n)
+    else
+        return (i, i-n)
     end
-    return 0.
-end
-
-
-Base.parent(E::SymplecticProjection) = (E.N,E.n)
-Base.size(E::SymplecticProjection) = (2*E.N,2*E.n)
+end
\ No newline at end of file

From 6ed5c6259c3d91c3e833d31dc54f16766fff618c Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 13:07:38 +0100
Subject: [PATCH 42/96] Renamed file to something more descriptive.

---
 .../{data_loader.jl => data_loader_optimization_step.jl}       | 3 +++
 1 file changed, 3 insertions(+)
 rename test/data_loader/{data_loader.jl => data_loader_optimization_step.jl} (84%)

diff --git a/test/data_loader/data_loader.jl b/test/data_loader/data_loader_optimization_step.jl
similarity index 84%
rename from test/data_loader/data_loader.jl
rename to test/data_loader/data_loader_optimization_step.jl
index 880212f5b..0ac67a648 100644
--- a/test/data_loader/data_loader.jl
+++ b/test/data_loader/data_loader_optimization_step.jl
@@ -1,5 +1,8 @@
 using GeometricMachineLearning, Test, Zygote
 
+@doc raw"""
+This tests the gradient optimizer called together with the `DataLoader` (applied to a tensor).
+"""
 function test_data_loader(sys_dim, n_time_steps, n_params, T=Float32)
     data = randn(T, sys_dim, n_time_steps, n_params)
     dl = DataLoader(data)

From f4ddd3f6f3b8e1f98918146f6211ef0cc9d1afa8 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 13:16:21 +0100
Subject: [PATCH 43/96] Renamed file to something meaningful and made the test
 more readable.

---
 test/data_loader/batch.jl                     | 33 -----------------
 .../optimizer_functor_with_adam.jl            | 37 +++++++++++++++++++
 2 files changed, 37 insertions(+), 33 deletions(-)
 delete mode 100644 test/data_loader/batch.jl
 create mode 100644 test/data_loader/optimizer_functor_with_adam.jl

diff --git a/test/data_loader/batch.jl b/test/data_loader/batch.jl
deleted file mode 100644
index 4b7c17309..000000000
--- a/test/data_loader/batch.jl
+++ /dev/null
@@ -1,33 +0,0 @@
-using AbstractNeuralNetworks: AbstractExplicitLayer, Chain
-using GeometricMachineLearning, Test
-
-"""
-This creates a dummy MNIST data set.
-
-TODO: include tests to check if all elements are batched!
-"""
-function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10)
-    rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images)))
-end
-
-dl = DataLoader(create_dummy_mnist()...; patch_length=3)
-# batch size is equal to two
-batch = Batch(2)
-
-# this function should be made part of AbstractNeuralNetworks !!!
-function Chain(c::Chain, d::AbstractExplicitLayer) 
-    Chain(c.layers..., d)
-end
-
-# input dim is 3^2 = 9
-model = Chain(Transformer(dl.input_dim, 3, 2; Stiefel=true), Classification(dl.input_dim, 10, σ))
-ps = initialparameters(CPU(), Float32, model)
-
-loss₁ = GeometricMachineLearning.loss(model, ps, dl)
-
-opt = Optimizer(AdamOptimizer(), ps)
-loss_average = optimize_for_one_epoch!(opt, model, ps, dl, batch)
-
-loss₃ = GeometricMachineLearning.loss(model, ps, dl)
-
-@test loss₁ > loss_average > loss₃
\ No newline at end of file
diff --git a/test/data_loader/optimizer_functor_with_adam.jl b/test/data_loader/optimizer_functor_with_adam.jl
new file mode 100644
index 000000000..2caf538b2
--- /dev/null
+++ b/test/data_loader/optimizer_functor_with_adam.jl
@@ -0,0 +1,37 @@
+using AbstractNeuralNetworks: AbstractExplicitLayer, Chain
+using GeometricMachineLearning, Test
+
+# this function should be made part of AbstractNeuralNetworks !!!
+function Chain(c::Chain, d::AbstractExplicitLayer) 
+    Chain(c.layers..., d)
+end
+
+"""
+This creates a dummy MNIST data set; i.e. its output are two tensors that look similarly to the ones in the MNIST data set.
+"""
+function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10)
+    rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images)))
+end
+
+function test_optimizer_functor_with_adam(;T=Float32, dim₁=6, dim₂=6, n_images=10, patch_length=patch_length)
+    dl = DataLoader(create_dummy_mnist(T=T, dim₁=dim₁, dim₂=dim₂, n_images=n_images)...; patch_length=3)
+    
+    # batch size is equal to two
+    batch = Batch(2)
+
+    # input dim is dim₁ / patch_length * dim₂ / pach_length; the transformer is called with dim₁ / patch_length and two layers
+    model = Chain(Transformer(dl.input_dim, dim₁ / patch_length, 2; Stiefel=true), Classification(dl.input_dim, 10, σ))
+    ps = initialparameters(CPU(), Float32, model)
+
+    loss₁ = GeometricMachineLearning.loss(model, ps, dl)
+
+    opt = Optimizer(AdamOptimizer(), ps)
+    loss_average = optimize_for_one_epoch!(opt, model, ps, dl, batch)
+
+    loss₃ = GeometricMachineLearning.loss(model, ps, dl)
+
+    #check if the loss decreases during optimization
+    @test loss₁ > loss_average > loss₃
+end
+
+test_optimizer_functor_with_adam()
\ No newline at end of file

From 92ac2fe8415f49ed72c2e2f86d1423af321f234a Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:24:55 +0100
Subject: [PATCH 44/96] Added descriptions and a wrapper for using the custom
 loss function with NeuralNetwork.

---
 test/data_loader/mnist_utils.jl | 61 ++++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 23 deletions(-)

diff --git a/test/data_loader/mnist_utils.jl b/test/data_loader/mnist_utils.jl
index fe685c647..f6c637a58 100644
--- a/test/data_loader/mnist_utils.jl
+++ b/test/data_loader/mnist_utils.jl
@@ -7,9 +7,8 @@ using GeometricMachineLearning: index_conversion
 import Zygote
 
 """
-This function tests if all the patch nubmers are assigned correctly, i.e. tests patch_index.
+This function tests is used to test if all the patch nubmers are assigned correctly with `index_conversion`, i.e. tests `patch_index` by inverting it.
 """
-### Test if mapping is invertible
 function reverse_index(i::Integer, j::Integer, patch_length=7)
     opt_i = i%patch_length==0 ? 1 : 0
     within_patch_index = i%patch_length + opt_i*patch_length, (i÷patch_length - opt_i + 1)
@@ -20,41 +19,57 @@ function reverse_index(i::Integer, j::Integer, patch_length=7)
     (patch_index[1]-1)*patch_length + within_patch_index[1], (patch_index[2]-1)*patch_length + within_patch_index[2]
 end 
 
-# test if this is the inverse of the other batch index conversion!
-patch_lengths = (2, 4, 7, 14)
-for patch_length in patch_lengths
-    number_of_patches = (28÷patch_length)^2
-      for i in 1:28 
-        for j in 1:28
-            @test reverse_index(index_conversion(i, j, patch_length, number_of_patches)..., patch_length) == (i, j)
+"""
+This function uses `reverse_index` to test `index_conversion`, i.e. checks if the functions are invertible. 
+"""
+function test_index_conversion(patch_lengths=(2, 4, 7, 14)) 
+    for patch_length in patch_lengths
+        number_of_patches = (28÷patch_length)^2
+        for i in 1:28 
+            for j in 1:28
+                @test reverse_index(index_conversion(i, j, patch_length, number_of_patches)..., patch_length) == (i, j)
+            end
         end
     end
 end
 
+"""
+This function tests if `onehotbatch` does what it should; i.e. convert a vector of integers to a one-hot-tensor.
+"""
 function test_onehotbatch(V::AbstractVector{T}) where {T<:Integer} 
     V_encoded = onehotbatch(V)
-    for i in length(V)
+    for (i, v) in zip(length(V), V)
         @test sum(V_encoded[:,1,i]) == 1
+        @test V_encoded[v, 1, i] == 1
     end
 end
 
 test_onehotbatch([1, 2, 5, 0])
-####### MNIST-like data set 
 
-train_x = rand(Float32, 28,28,100)
-train_y = Int.(ceil.(10*rand(Float32, 100))) .- 1
+@doc raw"""
+Generates an MNIST-like dummy data set.
+"""
+function generate_dummy_mnist(dim₁=28, dim₂=18, number_images=100, T=Float32)
+    train_x = rand(T, dim₁, dim₂, number_images)
+    train_y = Int.(ceil.(10 * rand(T, number_images))) .- 1
+    train_x, train_y
+end
 
+function test_optimizer_for_classification_layer(; dim₁=28, dim₂=28, number_images=100, patch_length=7, T=Float32)
+    dl = DataLoader(generate_dummy_mnist(dim₁, dim₂, number_images, T); patch_length=patch_length)
 
-dl = DataLoader(train_x, train_y)
+    activation_function(x) = tanh.(x)
+    model = Classification((dim₁ ÷ patch_length) * (dim₂ ÷ patch_length), 10, activation_function)
+    ps = initialparameters(CPU(), T, model)   
+    loss₁ = GeometricMachineLearning.loss(model, ps, dl)
 
-activation_function(x) = tanh.(x)
-model = Classification(49, 10, activation_function)
-ps = initialparameters(CPU(), Float32, model)
-loss₁ = GeometricMachineLearning.loss(model, ps, dl)
+    opt = Optimizer(GradientOptimizer(), ps)
+    dx = Zygote.gradient(ps -> GeometricMachineLearning.loss(model, ps, dl), ps)[1]
+    optimization_step!(opt, model, ps, dx)
+    loss₂ = GeometricMachineLearning.loss(model, ps, dl)
 
-opt = Optimizer(GradientOptimizer(), ps)
-dx = Zygote.gradient(ps -> GeometricMachineLearning.loss(model, ps, dl), ps)[1]
-optimization_step!(opt, model, ps, dx)
-loss₂ = GeometricMachineLearning.loss(model, ps, dl)
+    @test loss₂ < loss₁
+end
 
-@test loss₂ < loss₁
+test_index_conversion()
+test_optimizer_for_classification_layer()
\ No newline at end of file

From cb19f5bb8f50952d720e058b8c9d2c652b0c82bd Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:25:26 +0100
Subject: [PATCH 45/96] Added descriptions and a wrapper for using the custom
 loss function with NeuralNetwork.

---
 src/data_loader/data_loader.jl | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl
index ae6a45089..24fb90a21 100644
--- a/src/data_loader/data_loader.jl
+++ b/src/data_loader/data_loader.jl
@@ -93,25 +93,35 @@ It takes as input:
 """
 function loss(model::Union{Chain, AbstractExplicitLayer}, ps::Union{Tuple, NamedTuple}, input::AT, output::BT) where {T, T1, AT<:AbstractArray{T, 3}, BT<:AbstractArray{T1, 3}}
     output_estimate = model(input, ps)
-    norm(output - output_estimate)/norm(output) # /T(sqrt(size(output, 2)*size(output, 3)))
+    norm(output - output_estimate) / norm(output) # /T(sqrt(size(output, 2)*size(output, 3)))
 end
 
+@doc raw"""
+The *autoencoder loss*. 
+"""
 function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 3}} 
     output_estimate = model(input, ps)
-    norm(output_estimate - input)/norm(input) # /T(sqrt(size(input, 2)*size(input, 3)))
+    norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)*size(input, 3)))
 end
 
 function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 2}} 
     output_estimate = model(input, ps)
-    norm(output_estimate - input)/norm(input) # /T(sqrt(size(input, 2)))
+    norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)))
 end
 
 nt_diff(A, B) = (q = A.q - B.q, p = A.p - B.p)
 nt_norm(A) = norm(A.q) + norm(A.p)
 
+@doc raw"""
+Loss function that takes a `NamedTuple` as input. This should be used with a SympNet (or other neural network-based integrator). It computes:
+
+```math
+\mathtt{loss}(\mathcal{NN}, \mathtt{ps}, \begin{pmatrix} q \\ p \end{pmatrix}, \begin{pmatrix} q' \\ p' \end{pmatrix}) \mapsto \left|| \mathcal{NN}(\begin{pmatrix} q \\ p \end{pmatrix}) -  \begin{pmatrix} q' \\ p' \end{pmatrix} \right|| / \left|| \begin{pmatrix} q \\ p \end{pmatrix} \right||
+```
+"""
 function loss(model::Chain, ps::Tuple, input::NamedTuple, output::NamedTuple) 
     output_estimate = model(input, ps)
-    nt_norm(nt_diff(output_estimate, output))/nt_norm(input)
+    nt_norm(nt_diff(output_estimate, output)) / nt_norm(input)
 end
 
 @doc raw"""
@@ -136,6 +146,13 @@ function loss(model::Chain, ps::Tuple, dl::DataLoader{T, BT}) where {T, BT<:Name
     loss(model, ps, dl.data)
 end
 
+@doc raw"""
+Wrapper if we deal with a neural network.
+"""
+function loss(nn::NeuralNetwork, dl::DataLoader)
+    loss(nn.model, nn.params, dl)
+end
+
 @doc raw"""
 Computes the accuracy (as opposed to the loss) of a neural network classifier. 
 

From cb7725f380a7ec9158183799fad262e4a33bc415 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:26:04 +0100
Subject: [PATCH 46/96] Added documentation.

---
 src/data_loader/batch.jl | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl
index bbb9e4e39..9a76d0b26 100644
--- a/src/data_loader/batch.jl
+++ b/src/data_loader/batch.jl
@@ -15,6 +15,9 @@ end
 hasseqlength(::Batch{<:Integer}) = true
 hasseqlength(::Batch{<:Nothing}) = false
 
+@doc raw"""
+The functor for batch is called with an instance on `DataLoader`. It then returns a tuple of batch indices: ``(\mathcal{I}_1, \ldots, \mathcal{I}_{\lceil\mathtt{dl.n\_params/batch\_size}\rceil})``, where the index runs from 1 to the number of batches, which is the number of parameters divided by the batch size (rounded up).
+"""
 function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, AT<:AbstractArray{T, 3}}
     indices = shuffle(1:dl.n_params)
     n_batches = Int(ceil(dl.n_params/batch.batch_size))
@@ -28,6 +31,9 @@ function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, AT<:Abstract
     batches
 end
 
+@doc raw"""
+The functor for batch is called with an instance on `DataLoader`. It then returns a tuple of batch indices: ``(\mathcal{I}_1, \ldots, \mathcal{I}_{\lceil\mathtt{(dl.input\_time\_steps-1)/batch\_size}\rceil})``, where the index runs from 1 to the number of batches, which is the number of input time steps (minus one) divided by the batch size (and rounded up).
+"""
 function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, BT<:AbstractMatrix{T}, AT<:Union{BT, NamedTuple{(:q, :p), Tuple{BT, BT}}}}
     indices = shuffle(1:dl.input_time_steps)
     n_batches = Int(ceil((dl.input_time_steps-1)/batch.batch_size))
@@ -88,7 +94,7 @@ function optimize_for_one_epoch!(opt::Optimizer, nn::NeuralNetwork, dl::DataLoad
 end
 
 """
-TODO: Add ProgressMeter!!!
+This routine is called if a `DataLoader` storing *symplectic data* (i.e. a `NamedTuple`) is supplied.
 """
 function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTuple}, dl::DataLoader{T, AT}, batch::Batch, loss) where {T, AT<:NamedTuple}
     count = 0 
@@ -107,7 +113,16 @@ function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTu
     total_error/count
 end
 
-
+@doc raw"""
+A functor for `Optimizer`. It is called with:
+    - `nn::NeuralNetwork`
+    - `dl::DataLoader`
+    - `batch::Batch`
+    - `n_epochs::Int`
+    - `loss`
+
+The last argument is a function through which `Zygote` differentiates. This argument is optional; if it is not supplied `GeometricMachineLearning` defaults to an appropriate loss for the `DataLoader`.
+"""
 function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int, loss)
     progress_object = ProgressMeter.Progress(n_epochs; enabled=true)
     loss_array = zeros(n_epochs)

From 90bbaa7856743c165c54e9d6871b5987903a6ed7 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:53:14 +0100
Subject: [PATCH 47/96] Added constructor for optimizer if input arguments are
 flipped.

---
 src/optimizers/optimizer.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/optimizers/optimizer.jl b/src/optimizers/optimizer.jl
index 7cd78d037..e6e25cdaa 100644
--- a/src/optimizers/optimizer.jl
+++ b/src/optimizers/optimizer.jl
@@ -22,6 +22,8 @@ function Optimizer(m::OptimizerMethod, nn::NeuralNetwork)
     Optimizer(m, nn.params)
 end
 
+Optimizer(nn::NeuralNetwork, m::OptimizerMethod) = Optimizer(m, nn)
+
 #######################################################################################
 # optimization step function
 

From 137d64018c0926e1a965d363c56db9dfa10d024f Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:09:44 +0100
Subject: [PATCH 48/96] Added a comment saying that the constructor can be
 called with DataLoader as input.

---
 src/architectures/sympnet.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/architectures/sympnet.jl b/src/architectures/sympnet.jl
index 8e017f125..b07d5d84c 100644
--- a/src/architectures/sympnet.jl
+++ b/src/architectures/sympnet.jl
@@ -7,7 +7,7 @@ TODO:
 abstract type SympNet{AT} <: Architecture end
 
 @doc raw"""
-`LASympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: 
+`LASympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: 
 - `depth::Int`: The number of linear layers that are applied. The default is 5.
 - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2.
 - `activation`: The activation function that is applied. By default this is `tanh`.
@@ -32,7 +32,7 @@ end
 @inline AbstractNeuralNetworks.dim(arch::SympNet) = arch.dim
 
 @doc raw"""
-`GSympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: 
+`GSympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: 
 - `upscaling_dimension::Int`: The *upscaling dimension* of the gradient layer. See the documentation for `GradientLayerQ` and `GradientLayerP` for further explanation. The default is `2*dim`.
 - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2.
 - `activation`: The activation function that is applied. By default this is `tanh`.

From b57d1488a29a9e29a16a5a57f0cfb5828e2ac588 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:10:14 +0100
Subject: [PATCH 49/96] Added default for number of epochs.

---
 src/data_loader/batch.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl
index 9a76d0b26..10b434ffd 100644
--- a/src/data_loader/batch.jl
+++ b/src/data_loader/batch.jl
@@ -133,6 +133,6 @@ function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epoch
     loss_array
 end
 
-function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int)
+function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int=1)
     o(nn, dl, batch, n_epochs, loss)
 end
\ No newline at end of file

From 3100b0b77401b42e0edc1857527cbf051dea65ac Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:11:05 +0100
Subject: [PATCH 50/96] Combined matrix and tensor routines into one. Added
 another loss for autoencoder qp data.

---
 src/data_loader/data_loader.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl
index 24fb90a21..2beb691b9 100644
--- a/src/data_loader/data_loader.jl
+++ b/src/data_loader/data_loader.jl
@@ -99,19 +99,19 @@ end
 @doc raw"""
 The *autoencoder loss*. 
 """
-function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 3}} 
+function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T}} 
     output_estimate = model(input, ps)
     norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)*size(input, 3)))
 end
 
-function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 2}} 
-    output_estimate = model(input, ps)
-    norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)))
-end
-
 nt_diff(A, B) = (q = A.q - B.q, p = A.p - B.p)
 nt_norm(A) = norm(A.q) + norm(A.p)
 
+function loss(model::Chain, ps::Tuple, input::NT) where {T, AT<:AbstractArray{T}, NT<:NamedTuple{(:q, :p,), Tuple{AT, AT}}}
+    output_estimate = model(input, ps)
+    nt_norm(output_estimate - input) / nt_norm(input)
+end
+
 @doc raw"""
 Loss function that takes a `NamedTuple` as input. This should be used with a SympNet (or other neural network-based integrator). It computes:
 

From 718174fb847fb93bc54ad4ca96568d400da229e2 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:12:14 +0100
Subject: [PATCH 51/96] Commented out a section that is probably not needed.

---
 src/data_loader/tensor_assign.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/data_loader/tensor_assign.jl b/src/data_loader/tensor_assign.jl
index 3faad0a99..4aac9bfc0 100644
--- a/src/data_loader/tensor_assign.jl
+++ b/src/data_loader/tensor_assign.jl
@@ -55,6 +55,7 @@ function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_win
     output_estimate
 end
 
+#=
 """
 This function draws random time steps and parameters and based on these assign the batch and the output.
 
@@ -101,6 +102,7 @@ function draw_batch!(batch::AT, output::BT, data::AT, target::BT) where {T, T2,
     assign_batch!(batch, data, params, time_steps, ndrange=size(batch))
     assign_batch!(output, target, params, time_steps, ndrange=size(output))
 end
+=#
 
 """
 Used for differentiating assign_output_estimate (this appears in the loss). 

From 370ee05c4ab154beb44694d79a96694987400db6 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:12:41 +0100
Subject: [PATCH 52/96] Test data loader for qp data.

---
 test/data_loader/batch_data_loader_qp_test.jl | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 test/data_loader/batch_data_loader_qp_test.jl

diff --git a/test/data_loader/batch_data_loader_qp_test.jl b/test/data_loader/batch_data_loader_qp_test.jl
new file mode 100644
index 000000000..9e1273e49
--- /dev/null
+++ b/test/data_loader/batch_data_loader_qp_test.jl
@@ -0,0 +1,33 @@
+using GeometricMachineLearning
+using Test 
+
+function dummy_qp_data_matrix(dim=2, number_data_points=200, T=Float32)
+    (q = rand(T, dim, number_data_points), p = (rand(T, dim, number_data_points)))
+end
+
+function dummy_qp_data_tensor(dim=2, number_of_time_steps=100, number_of_parameters=20, T=Float32)
+    (q = rand(T, dim, number_of_time_steps, number_of_parameters), p = (rand(T, dim, number_of_time_steps, number_of_parameters)))
+end
+
+function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size, T=Float32)
+    dl1 = DataLoader(dummy_qp_data_matrix(dim, number_of_time_steps, T))
+    dl2 = DataLoader(dummy_qp_data_tensor(dim, number_of_time_steps, number_of_parameters))
+
+    arch1 = GSympNet(dl1)
+    arch2 = GSympNet(dl2)
+
+    nn1 = NeuralNetwork(arch1, CPU(), T)
+    nn2 = NeuralNetwork(arch2, CPU(), T)
+
+    loss1 = loss(nn1, dl1)
+    loss2 = loss(nn2, dl2)
+
+    batch = Batch(batch_size)
+    o₁ = Optimizer(GradientOptimizer(), nn1)
+    o₂ = Optimizer(GradientOptimizer(), nn2)
+
+    o₁(nn1, dl1, batch)
+    o₂(nn2, dl2, batch)
+end
+
+test_data_loader()
\ No newline at end of file

From 14d6ae2130d45b80ee21a631ef9eddf912d139c2 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:13:08 +0100
Subject: [PATCH 53/96] Renamed and added tests.

---
 test/runtests.jl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index e94ad1242..b0069ae2d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -34,6 +34,8 @@ using SafeTestsets
 @safetestset "Training                                                                        " begin include("train!/test_training.jl") end
 @safetestset "NeuralNetSolution                                                               " begin include("train!/test_neuralnet_solution.jl") end
 @safetestset "Problem & Integrators                                                           " begin include("integrator/test_integrator.jl") end
-@safetestset "Data Loader #1                                                                  " begin include("data_loader/data_loader.jl") end
-@safetestset "Data Loader #2                                                                  " begin include("data_loader/mnist_utils.jl") end
-@safetestset "Data Loader #3 (Batch struct)                                                   " begin include("data_loader/batch.jl") end
\ No newline at end of file
+
+@safetestset "Test data loader for q and p data                                               " begin include("data_loader/batch_data_loader_qp_test.jl") end
+@safetestset "Test mnist_utils.                                                               " begin include("data_loader/mnist_utils.jl") end
+@safetestset "Test the data loader in combination with optimization_step!                     " begin include("data_loader/data_loader_optimization_step.jl") end
+@safetestset "Optimizer functor with data loader for Adam                                     " begin include("data_loader/optimizer_functor_with_adam.jl")
\ No newline at end of file

From 9edec7be2edb767966ab05477fe7866209ab2a3f Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:14:24 +0100
Subject: [PATCH 54/96] Adjusted symplectic matrix.

---
 src/arrays/symplectic.jl | 80 ++++++++++++----------------------------
 1 file changed, 24 insertions(+), 56 deletions(-)

diff --git a/src/arrays/symplectic.jl b/src/arrays/symplectic.jl
index 1dcc59658..3a4f138d6 100644
--- a/src/arrays/symplectic.jl
+++ b/src/arrays/symplectic.jl
@@ -1,75 +1,43 @@
 
 @doc raw"""
 
-    `SymplecticMatrix(n)`
+`SymplecticPotential(n)`
 
 Returns a symplectic matrix of size 2n x 2n
 
 ```math
 \begin{pmatrix}
-0 & & & 1 & & & \\
-& \ddots & & & \ddots & & \\
-& & 0 & & & 1 \\
--1 & & & 0 & & & \\
-& \ddots & & & \ddots & & \\
-& & -1 & & 0 & \\
+\mathbb{O} & \mathbb{I} \\
+\mathbb{O} & -\mathbb{I} \\
 \end{pmatrix}
 ```
-
-    `SymplecticProjection(N,n)`
-Returns the symplectic projection matrix E of the Stiefel manifold, i.e. π: Sp(2N) → Sp(2n,2N), A ↦ AE
-
 """
-#=
-function SymplecticMatrix(n::Int, T::DataType=Float64)
-    BandedMatrix((n => ones(T,n), -n => -ones(T,n)), (2n,2n))
-end
-
-SymplecticMatrix(T::DataType, n::Int) = SymplecticMatrix(n, T)
-
-@doc raw"""
-```math
-\begin{pmatrix}
-I & 0 \\
-0 & 0 \\
-0 & I \\
-0 & 0 \\
-\end{pmatrix}
-```
-"""
-=#
-
-function SymplecticPotential(n::Int, T::DataType=Float64)
-    J = zeros(T, 2*n, 2*n)
-    J[1:n, (n+1):2*n] = one(ones(T, n, n))
-    J[(n+1):2*n, 1:n] = -one(ones(T, n, n))
+function SymplecticPotential(backend, n2::Int, T::DataType=Float64)
+    @assert iseven(n2)
+    n = n2÷2
+    J = KernelAbstractions.zeros(backend, T, 2*n, 2*n)
+    assign_ones_for_symplectic_potential! = assign_ones_for_symplectic_potential_kernel!(backend)
+    assign_ones_for_symplectic_potential!(J, n, ndrange=n)
     J
 end
 
+SymplecticPotential(n::Int, T::DataType=Float64) = SymplecticPotential(CPU(), n, T)
+SymplecticPotential(bakend, T::DataType, n::Int) = SymplecticPotential(backend, n, T)
+
 SymplecticPotential(T::DataType, n::Int) = SymplecticPotential(n, T)
 
-struct SymplecticProjection{T} <: AbstractMatrix{T}
-    N::Int
-    n::Int
-    SymplecticProjection(N, n, T = Float64) = new{T}(N,n)
+@kernel function assign_ones_for_symplectic_potential_kernel!(J::AbstractMatrix{T}, n::Int) where T
+    i = @index(Global)
+    J[map_index_for_symplectic_potential(i, n)...] = i ≤ n ? one(T) : -one(T)
 end
 
-function Base.getindex(E::SymplecticProjection,i,j)
-    if i ≤ E.n
-        if j == i 
-            return 1.
-        end
-        return 0.
-    end
-    if j > E.n
-        if (j-E.n) == (i-E.N)
-            return 1.
-        end
-        return 0.
+"""
+This assigns the right index for the symplectic potential. To be used with `assign_ones_for_symplectic_potential_kernel!`.
+"""
+function map_index_for_symplectic_potential(i::Int, n::Int)
+    if i ≤ n
+        return (i, i+n)
+    else
+        return (i, i-n)
     end
-    return 0.
-end
-
-
-Base.parent(E::SymplecticProjection) = (E.N,E.n)
-Base.size(E::SymplecticProjection) = (2*E.N,2*E.n)
+end
\ No newline at end of file

From 2acdd2487d64ec2d04d6e2caabd252f3424bbaa9 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:45:28 +0100
Subject: [PATCH 55/96] dim was missing for specifying default.

---
 src/architectures/sympnet.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/architectures/sympnet.jl b/src/architectures/sympnet.jl
index b07d5d84c..dba449c4e 100644
--- a/src/architectures/sympnet.jl
+++ b/src/architectures/sympnet.jl
@@ -49,7 +49,7 @@ struct GSympNet{AT, InitUpper} <: SympNet{AT} where {InitUpper}
     end
 
         
-    function GSympNet(dl::DataLoader; upscaling_dimension=2*dim, nhidden=2, activation=tanh, init_upper=true) 
+    function GSympNet(dl::DataLoader; upscaling_dimension=2*dl.input_dim, nhidden=2, activation=tanh, init_upper=true) 
         new{typeof(activation), init_upper}(dl.input_dim, upscaling_dimension, nhidden, activation)
     end
 end

From f8b8fff1211ce88fd88c4fa7a39dd6e0a919d97d Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:46:01 +0100
Subject: [PATCH 56/96] dl has no field data.

---
 src/data_loader/data_loader.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl
index 2beb691b9..5117bbc23 100644
--- a/src/data_loader/data_loader.jl
+++ b/src/data_loader/data_loader.jl
@@ -109,7 +109,7 @@ nt_norm(A) = norm(A.q) + norm(A.p)
 
 function loss(model::Chain, ps::Tuple, input::NT) where {T, AT<:AbstractArray{T}, NT<:NamedTuple{(:q, :p,), Tuple{AT, AT}}}
     output_estimate = model(input, ps)
-    nt_norm(output_estimate - input) / nt_norm(input)
+    nt_norm(nt_diff(output_estimate, input)) / nt_norm(input)
 end
 
 @doc raw"""
@@ -143,7 +143,7 @@ function loss(model::Chain, ps::Tuple, dl::DataLoader{T, BT, Nothing}) where {T,
 end
 
 function loss(model::Chain, ps::Tuple, dl::DataLoader{T, BT}) where {T, BT<:NamedTuple}
-    loss(model, ps, dl.data)
+    loss(model, ps, dl.input)
 end
 
 @doc raw"""

From 8c4c73b1e9faa8e826fcd8463558664c058edd4e Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:46:27 +0100
Subject: [PATCH 57/96] Fixed typos.

---
 test/data_loader/batch_data_loader_qp_test.jl   | 10 +++++-----
 test/data_loader/mnist_utils.jl                 |  6 +++---
 test/data_loader/optimizer_functor_with_adam.jl |  6 +++---
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/test/data_loader/batch_data_loader_qp_test.jl b/test/data_loader/batch_data_loader_qp_test.jl
index 9e1273e49..6258c3eef 100644
--- a/test/data_loader/batch_data_loader_qp_test.jl
+++ b/test/data_loader/batch_data_loader_qp_test.jl
@@ -9,7 +9,7 @@ function dummy_qp_data_tensor(dim=2, number_of_time_steps=100, number_of_paramet
     (q = rand(T, dim, number_of_time_steps, number_of_parameters), p = (rand(T, dim, number_of_time_steps, number_of_parameters)))
 end
 
-function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size, T=Float32)
+function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size=10, T=Float32)
     dl1 = DataLoader(dummy_qp_data_matrix(dim, number_of_time_steps, T))
     dl2 = DataLoader(dummy_qp_data_tensor(dim, number_of_time_steps, number_of_parameters))
 
@@ -19,15 +19,15 @@ function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=
     nn1 = NeuralNetwork(arch1, CPU(), T)
     nn2 = NeuralNetwork(arch2, CPU(), T)
 
-    loss1 = loss(nn1, dl1)
-    loss2 = loss(nn2, dl2)
+    loss1 = GeometricMachineLearning.loss(nn1, dl1)
+    loss2 = GeometricMachineLearning.loss(nn2, dl2)
 
     batch = Batch(batch_size)
     o₁ = Optimizer(GradientOptimizer(), nn1)
-    o₂ = Optimizer(GradientOptimizer(), nn2)
+    # o₂ = Optimizer(GradientOptimizer(), nn2)
 
     o₁(nn1, dl1, batch)
-    o₂(nn2, dl2, batch)
+    # o₂(nn2, dl2, batch)
 end
 
 test_data_loader()
\ No newline at end of file
diff --git a/test/data_loader/mnist_utils.jl b/test/data_loader/mnist_utils.jl
index f6c637a58..502739756 100644
--- a/test/data_loader/mnist_utils.jl
+++ b/test/data_loader/mnist_utils.jl
@@ -49,17 +49,17 @@ test_onehotbatch([1, 2, 5, 0])
 @doc raw"""
 Generates an MNIST-like dummy data set.
 """
-function generate_dummy_mnist(dim₁=28, dim₂=18, number_images=100, T=Float32)
+function generate_dummy_mnist(dim₁=28, dim₂=28, number_images=100, T=Float32)
     train_x = rand(T, dim₁, dim₂, number_images)
     train_y = Int.(ceil.(10 * rand(T, number_images))) .- 1
     train_x, train_y
 end
 
 function test_optimizer_for_classification_layer(; dim₁=28, dim₂=28, number_images=100, patch_length=7, T=Float32)
-    dl = DataLoader(generate_dummy_mnist(dim₁, dim₂, number_images, T); patch_length=patch_length)
+    dl = DataLoader(generate_dummy_mnist(dim₁, dim₂, number_images, T)...; patch_length=patch_length)
 
     activation_function(x) = tanh.(x)
-    model = Classification((dim₁ ÷ patch_length) * (dim₂ ÷ patch_length), 10, activation_function)
+    model = Classification(patch_length * patch_length, 10, activation_function)
     ps = initialparameters(CPU(), T, model)   
     loss₁ = GeometricMachineLearning.loss(model, ps, dl)
 
diff --git a/test/data_loader/optimizer_functor_with_adam.jl b/test/data_loader/optimizer_functor_with_adam.jl
index 2caf538b2..c2e3be6a8 100644
--- a/test/data_loader/optimizer_functor_with_adam.jl
+++ b/test/data_loader/optimizer_functor_with_adam.jl
@@ -13,14 +13,14 @@ function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10)
     rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images)))
 end
 
-function test_optimizer_functor_with_adam(;T=Float32, dim₁=6, dim₂=6, n_images=10, patch_length=patch_length)
-    dl = DataLoader(create_dummy_mnist(T=T, dim₁=dim₁, dim₂=dim₂, n_images=n_images)...; patch_length=3)
+function test_optimizer_functor_with_adam(;T=Float32, dim₁=6, dim₂=6, n_images=10, patch_length=3)
+    dl = DataLoader(create_dummy_mnist(T=T, dim₁=dim₁, dim₂=dim₂, n_images=n_images)...; patch_length=patch_length)
     
     # batch size is equal to two
     batch = Batch(2)
 
     # input dim is dim₁ / patch_length * dim₂ / pach_length; the transformer is called with dim₁ / patch_length and two layers
-    model = Chain(Transformer(dl.input_dim, dim₁ / patch_length, 2; Stiefel=true), Classification(dl.input_dim, 10, σ))
+    model = Chain(Transformer(dl.input_dim, patch_length, 2; Stiefel=true), Classification(dl.input_dim, 10, σ))
     ps = initialparameters(CPU(), Float32, model)
 
     loss₁ = GeometricMachineLearning.loss(model, ps, dl)

From d0725941779e31494db573bc947c66cdf95d43be Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:57:19 +0100
Subject: [PATCH 58/96] Removed method that appeared twice for some reason.

---
 src/data_loader/data_loader.jl | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl
index 4c5eeff2b..5117bbc23 100644
--- a/src/data_loader/data_loader.jl
+++ b/src/data_loader/data_loader.jl
@@ -153,13 +153,6 @@ function loss(nn::NeuralNetwork, dl::DataLoader)
     loss(nn.model, nn.params, dl)
 end
 
-@doc raw"""
-Wrapper if we deal with a neural network.
-"""
-function loss(nn::NeuralNetwork, dl::DataLoader)
-    loss(nn.model, nn.params, dl)
-end
-
 @doc raw"""
 Computes the accuracy (as opposed to the loss) of a neural network classifier. 
 

From d071839f6891fdbf7d5d33cdae2c289719298ea9 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 19:02:03 +0100
Subject: [PATCH 59/96] Forgot to commit before.

---
 test/data_loader/batch_data_loader_qp_test.jl | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/test/data_loader/batch_data_loader_qp_test.jl b/test/data_loader/batch_data_loader_qp_test.jl
index 6194bbc39..e189aca2f 100644
--- a/test/data_loader/batch_data_loader_qp_test.jl
+++ b/test/data_loader/batch_data_loader_qp_test.jl
@@ -9,11 +9,8 @@ function dummy_qp_data_tensor(dim=2, number_of_time_steps=100, number_of_paramet
     (q = rand(T, dim, number_of_time_steps, number_of_parameters), p = (rand(T, dim, number_of_time_steps, number_of_parameters)))
 end
 
-<<<<<<< HEAD
 function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size=10, T=Float32)
-=======
-function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size, T=Float32)
->>>>>>> 4528053bc135b3f495ac732804cd913e4187cc96
+
     dl1 = DataLoader(dummy_qp_data_matrix(dim, number_of_time_steps, T))
     dl2 = DataLoader(dummy_qp_data_tensor(dim, number_of_time_steps, number_of_parameters))
 
@@ -23,7 +20,6 @@ function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=
     nn1 = NeuralNetwork(arch1, CPU(), T)
     nn2 = NeuralNetwork(arch2, CPU(), T)
 
-<<<<<<< HEAD
     loss1 = GeometricMachineLearning.loss(nn1, dl1)
     loss2 = GeometricMachineLearning.loss(nn2, dl2)
 
@@ -33,17 +29,6 @@ function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=
 
     o₁(nn1, dl1, batch)
     # o₂(nn2, dl2, batch)
-=======
-    loss1 = loss(nn1, dl1)
-    loss2 = loss(nn2, dl2)
-
-    batch = Batch(batch_size)
-    o₁ = Optimizer(GradientOptimizer(), nn1)
-    o₂ = Optimizer(GradientOptimizer(), nn2)
-
-    o₁(nn1, dl1, batch)
-    o₂(nn2, dl2, batch)
->>>>>>> 4528053bc135b3f495ac732804cd913e4187cc96
 end
 
 test_data_loader()
\ No newline at end of file

From 2bb3f2994d8f228937d1929f7855568e4e04d737 Mon Sep 17 00:00:00 2001
From: CompatHelper Julia <compathelper_noreply@julialang.org>
Date: Wed, 20 Dec 2023 00:35:47 +0000
Subject: [PATCH 60/96] CompatHelper: bump compat for GPUArrays to 10, (keep
 existing compat)

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 16c3b1e15..1936d4dd8 100644
--- a/Project.toml
+++ b/Project.toml
@@ -44,7 +44,7 @@ ChainRulesTestUtils = "1"
 Distances = "0.10"
 Documenter = "0.27, 1"
 ForwardDiff = "0.10"
-GPUArrays = "8, 9"
+GPUArrays = "8, 9, 10"
 GeometricBase = "0.9"
 GeometricEquations = "0.14"
 GeometricIntegrators = "0.13"

From d160bcfa6860d55cc5d259a290973f567665999b Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 17:22:18 +0100
Subject: [PATCH 61/96] Fixed docs for optimize_for_one_epoch

---
 src/data_loader/batch.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl
index 10b434ffd..449fa05a8 100644
--- a/src/data_loader/batch.jl
+++ b/src/data_loader/batch.jl
@@ -64,7 +64,7 @@ With the optional argument:
 
 The output of `optimize_for_one_epoch!` is the average loss over all batches of the epoch:
 ```math
-output = \frac{1}{mathtt{steps\_per\_epoch}}\sum_{t=1}^mathtt{steps\_per\_epoch}loss(\theta^{(t-1)}).
+output = \frac{1}{\mathtt{steps\_per\_epoch}}\sum_{t=1}^\mathtt{steps\_per\_epoch}loss(\theta^{(t-1)}).
 ```
 This is done because any **reverse differentiation** routine always has two outputs: a pullback and the value of the function it is differentiating. In the case of zygote: `loss_value, pullback = Zygote.pullback(ps -> loss(ps), ps)` (if the loss only depends on the parameters).
 """

From b35e5e2c0941008a2cd539bd4defbdea3db56012 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Thu, 14 Dec 2023 14:07:41 +0100
Subject: [PATCH 62/96] Added descriptions for some functions in the docs.

---
 src/layers/multi_head_attention.jl | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/layers/multi_head_attention.jl b/src/layers/multi_head_attention.jl
index 05c3c53cc..e07820cb7 100644
--- a/src/layers/multi_head_attention.jl
+++ b/src/layers/multi_head_attention.jl
@@ -125,14 +125,23 @@ function (d::MultiHeadAttention{M, M, Stiefel, Retraction, false})(x::AbstractAr
 end
 
 import ChainRules
+"""
+This has to be extended to tensors; you should probably do a PR in ChainRules for this.
+"""
 function ChainRules._adjoint_mat_pullback(y::AbstractArray{T, 3}, proj) where T 
     (NoTangent(), proj(tensor_transpose(y)))
 end
 
+"""
+Extend `mat_tensor_mul` to a multiplication by the adjoint of an element of `StiefelManifold`. 
+"""
 function mat_tensor_mul(Y::AT, x::AbstractArray{T, 3}) where {T, BT <: AbstractArray{T}, ST <: StiefelManifold{T, BT}, AT <: Adjoint{T, ST}}
     mat_tensor_mul(Y.parent.A', x)
 end
 
+"""
+Extend `mat_tensor_mul` to a multiplication by an element of `StiefelManifold`. 
+"""
 function mat_tensor_mul(Y::StiefelManifold, x::AbstractArray{T, 3}) where T 
     mat_tensor_mul(Y.A, x)
 end
\ No newline at end of file

From 8fe48325becd86aea9bf3a9c4f8c6a2968df5679 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Thu, 14 Dec 2023 15:08:41 +0100
Subject: [PATCH 63/96] Added eltype to AbstractCache.

---
 src/optimizers/bfgs_cache.jl       |  4 ++--
 src/optimizers/optimizer_caches.jl | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/optimizers/bfgs_cache.jl b/src/optimizers/bfgs_cache.jl
index 26f1c0b96..96561f8be 100644
--- a/src/optimizers/bfgs_cache.jl
+++ b/src/optimizers/bfgs_cache.jl
@@ -5,7 +5,7 @@ It stores an array for the previous time step `B` and the inverse of the Hessian
 
 It is important to note that setting up this cache already requires a derivative! This is not the case for the other optimizers.
 """
-struct BFGSCache{T, AT<:AbstractArray{T}} <: AbstractCache
+struct BFGSCache{T, AT<:AbstractArray{T}} <: AbstractCache{T}
     B::AT
     S::AT
     H::AbstractMatrix{T}
@@ -19,7 +19,7 @@ In order to initialize `BGGSCache` we first need gradient information. This is w
 
 NOTE: we may not need this. 
 """
-struct BFGSDummyCache{T, AT<:AbstractArray{T}} <: AbstractCache
+struct BFGSDummyCache{T, AT<:AbstractArray{T}} <: AbstractCache{T}
     function BFGSDummyCache(B::AbstractArray)
         new{eltype(B), typeof(zero(B))}()
     end
diff --git a/src/optimizers/optimizer_caches.jl b/src/optimizers/optimizer_caches.jl
index b8a21cb0a..189ef7ae6 100644
--- a/src/optimizers/optimizer_caches.jl
+++ b/src/optimizers/optimizer_caches.jl
@@ -7,12 +7,12 @@ AbstractCache has subtypes:
 
 All of them can be initialized with providing an array (also supporting manifold types).
 """
-abstract type AbstractCache end
+abstract type AbstractCache{T} end
 
 #############################################################################
 # All the definitions of the caches
 
-struct AdamCache{T, AT <: AbstractArray{T}} <: AbstractCache
+struct AdamCache{T, AT <: AbstractArray{T}} <: AbstractCache{T}
     B₁::AT
     B₂::AT 
     function AdamCache(Y::AbstractArray)
@@ -20,15 +20,15 @@ struct AdamCache{T, AT <: AbstractArray{T}} <: AbstractCache
     end
 end
 
-struct MomentumCache{T, AT <: AbstractArray{T}} <:AbstractCache
+struct MomentumCache{T, AT <: AbstractArray{T}} <:AbstractCache{T}
     B::AT
     function MomentumCache(Y::AbstractArray)
         new{eltype(Y), typeof(zero(Y))}(zero(Y))
     end
 end
 
-struct GradientCache <: AbstractCache end
-GradientCache(::AbstractArray) = GradientCache()
+struct GradientCache{T} <: AbstractCache{T} end
+GradientCache(::AbstractArray{T}) where T = GradientCache{T}()
 
 #############################################################################
 # All the setup_cache functions 

From ebc5f71f02cbd75b1fbcfdf778a4d19eff475e42 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Thu, 14 Dec 2023 15:13:27 +0100
Subject: [PATCH 64/96] Removed comments that seem to have become unnecessary.

---
 src/optimizers/optimizer_caches.jl | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/optimizers/optimizer_caches.jl b/src/optimizers/optimizer_caches.jl
index 189ef7ae6..398eaa562 100644
--- a/src/optimizers/optimizer_caches.jl
+++ b/src/optimizers/optimizer_caches.jl
@@ -33,11 +33,6 @@ GradientCache(::AbstractArray{T}) where T = GradientCache{T}()
 #############################################################################
 # All the setup_cache functions 
 
-# I don't really understand what we need these for ???
-# setup_adam_cache(B::AbstractArray) = reshape([setup_adam_cache(b) for b in B], size(B))
-# setup_momentum_cache(B::AbstractArray) = reshape([setup_momentum_cache(b) for b in B], size(B))
-# setup_gradient_cache(B::AbstractArray) = reshape([setup_gradient_cache(b) for b in B], size(B))
-
 setup_adam_cache(ps::NamedTuple) = apply_toNT(setup_adam_cache, ps)
 setup_momentum_cache(ps::NamedTuple) = apply_toNT(setup_momentum_cache, ps)
 setup_gradient_cache(ps::NamedTuple) = apply_toNT(setup_gradient_cache, ps)

From 3c967e8e9d02acb85a126234560fadd1ddd11118 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 08:41:58 +0100
Subject: [PATCH 65/96] Resolved merge conflict with array_tests.jl (i.e. added
 vectorization test).

---
 test/arrays/array_tests.jl | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index c0da5eba9..1c372344e 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -76,6 +76,12 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
+
+function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
+    A = rand(StiefelLieAlgHorMatrix{T}, N, n)
+    @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)
+end
+
 # TODO: tests for ADAM functions
 
 # test everything for different n & N values
@@ -96,4 +102,5 @@ for (N, n) ∈ zip(N_vec, n_vec)
     skew_mat_mul_test2(N)
     stiefel_proj_test(N,n)
     stiefel_lie_alg_add_sub_test(N,n)
+    stiefel_lie_alg_vectorization_test(N, n)
 end

From da77a967fa26d914cbb62031ceb7f81cc3548d08 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 08:45:03 +0100
Subject: [PATCH 66/96] Added sampling test for custom arrays (resolved
 conflict).

---
 test/runtests.jl                              | 16 +++---
 ...ulti_head_attention_stiefel_optim_cache.jl | 51 ++++++++++++-------
 ...multi_head_attention_stiefel_retraction.jl | 51 +++++++++----------
 .../multi_head_attention_stiefel_setup.jl     | 33 ++++++++----
 .../transformer_application.jl                |  9 ++--
 .../transformer_gradient.jl                   |  3 ++
 .../transformer_optimizer.jl                  |  9 +++-
 test/transformer_related/transformer_setup.jl |  3 ++
 8 files changed, 109 insertions(+), 66 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 033b04f16..1b6cb1b4d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -11,13 +11,15 @@ using SafeTestsets
 @safetestset "Manifold Neural Network Layers                                                  " begin include("layers/manifold_layers.jl") end
 @safetestset "Custom AD rules for kernels                                                     " begin include("custom_ad_rules/kernel_pullbacks.jl") end
 @safetestset "ResNet                                                                          " begin include("layers/resnet_tests.jl") end
-@safetestset "Transformer Networks #1                                                         " begin include("transformer_related/multi_head_attention_stiefel_optim_cache.jl") end
-@safetestset "Transformer Networks #2                                                         " begin include("transformer_related/multi_head_attention_stiefel_retraction.jl") end
-@safetestset "Transformer Networks #3                                                         " begin include("transformer_related/multi_head_attention_stiefel_setup.jl") end
-@safetestset "Transformer Networks #4                                                         " begin include("transformer_related/transformer_setup.jl") end
-@safetestset "Transformer Networks #5                                                         " begin include("transformer_related/transformer_application.jl") end
-@safetestset "Transformer Networks #6                                                         " begin include("transformer_related/transformer_gradient.jl") end
-@safetestset "Transformer Networks #7                                                         " begin include("transformer_related/transformer_optimizer.jl") end
+
+# transformer-related tests
+@safetestset "Test setup of MultiHeadAttention layer Stiefel weights                          " begin include("transformer_related/multi_head_attention_stiefel_setup.jl") end
+@safetestset "Test geodesic and Cayley retr for the MultiHeadAttention layer w/ St weights    " begin include("transformer_related/multi_head_attention_stiefel_retraction.jl") end
+@safetestset "Test the correct setup of the various optimizer caches for MultiHeadAttention   " begin include("transformer_related/multi_head_attention_stiefel_optim_cache.jl") end
+@safetestset "Check if the transformer can be applied to a tensor.                            " begin include("transformer_related/transformer_application.jl") end
+@safetestset "Check if the gradient/pullback of MultiHeadAttention changes type in St case    " begin include("transformer_related/transformer_gradient.jl") end
+@safetestset "Check if the optimization_step! changes the parameters of the transformer       " begin include("transformer_related/transformer_optimizer.jl") end
+
 @safetestset "Attention layer #1                                                              " begin include("attention_layer/attention_setup.jl") end
 @safetestset "(MultiHead)Attention                                                            " begin include("attention_layer/apply_multi_head_attention.jl") end
 @safetestset "Classification layer                                                            " begin include("layers/classification.jl") end
diff --git a/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl b/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl
index ca4209c39..c7614fc52 100644
--- a/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl
+++ b/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl
@@ -2,19 +2,10 @@ using GeometricMachineLearning, Test
 
 import Lux, Random, LinearAlgebra
 
-dim = 64
-n_heads = 8
-Dₕ = dim÷8
-tol = eps(Float32)
-
-model = Chain(MultiHeadAttention(dim, n_heads), MultiHeadAttention(dim, n_heads, Stiefel=true))
-ps = initialparameters(CPU(), Float32, model)
-
-o₁ = Optimizer(AdamOptimizer(), ps)
-o₂ = Optimizer(MomentumOptimizer(), ps)
-o₃ = Optimizer(GradientOptimizer(), ps)
-
-function check_adam_cache(C::AbstractCache) 
+@doc raw"""
+This checks if the Adam cache was set up in the right way
+"""
+function check_adam_cache(C::AbstractCache{T}, tol= T(10) * eps(T)) where T 
     @test typeof(C) <: AdamCache 
     @test propertynames(C) == (:B₁, :B₂)
     @test typeof(C.B₁) <: StiefelLieAlgHorMatrix
@@ -24,7 +15,10 @@ function check_adam_cache(C::AbstractCache)
 end 
 check_adam_cache(B::NamedTuple) = apply_toNT(check_adam_cache, B)
 
-function check_momentum_cache(C::AbstractCache)
+@doc raw"""
+This checks if the momentum cache was set up in the right way
+"""
+function check_momentum_cache(C::AbstractCache{T}, tol= T(10) * eps(T)) where T 
     @test typeof(C) <: MomentumCache 
     @test propertynames(C) == (:B,)
     @test typeof(C.B) <: StiefelLieAlgHorMatrix
@@ -32,12 +26,33 @@ function check_momentum_cache(C::AbstractCache)
 end
 check_momentum_cache(B::NamedTuple) = apply_toNT(check_momentum_cache, B)
 
-function check_gradient_cache(C::AbstractCache)
+@doc raw"""
+This checks if the gradient cache was set up in the right way
+"""
+function check_gradient_cache(C::AbstractCache{T}) where T 
     @test typeof(C) <: GradientCache 
     @test propertynames(C) == ()
 end
 check_gradient_cache(B::NamedTuple) = apply_toNT(check_gradient_cache, B)
 
-check_adam_cache(o₁.cache[2])
-check_momentum_cache(o₂.cache[2])
-check_gradient_cache(o₃.cache[2])
+@doc raw"""
+This checks if all the caches are set up in the right way for the `MultiHeadAttention` layer with Stiefel weights.
+
+TODO:
+- [ ] `BFGSOptimizer` !!
+"""
+function test_cache_setups_for_optimizer_for_multihead_attention_layer(T::Type, dim::Int, n_heads::Int)
+    @assert dim % n_heads == 0
+    model = MultiHeadAttention(dim, n_heads, Stiefel=true)
+    ps = initialparameters(CPU(), T, model)
+
+    o₁ = Optimizer(AdamOptimizer(), ps)
+    o₂ = Optimizer(MomentumOptimizer(), ps)
+    o₃ = Optimizer(GradientOptimizer(), ps)
+
+    check_adam_cache(o₁.cache)
+    check_momentum_cache(o₂.cache)
+    check_gradient_cache(o₃.cache)
+end
+
+test_cache_setups_for_optimizer_for_multihead_attention_layer(Float32, 64, 8)
\ No newline at end of file
diff --git a/test/transformer_related/multi_head_attention_stiefel_retraction.jl b/test/transformer_related/multi_head_attention_stiefel_retraction.jl
index 842069da5..54f6214e8 100644
--- a/test/transformer_related/multi_head_attention_stiefel_retraction.jl
+++ b/test/transformer_related/multi_head_attention_stiefel_retraction.jl
@@ -1,7 +1,3 @@
-"""
-This is a test for that checks if the retractions (geodesic and Cayley for now) map from StiefelLieAlgHorMatrix to StiefelManifold when used with MultiHeadAttention.
-"""
-
 import Random, Test, Lux, LinearAlgebra, KernelAbstractions
 
 using GeometricMachineLearning, Test
@@ -9,37 +5,40 @@ using GeometricMachineLearning: geodesic
 using GeometricMachineLearning: cayley
 using GeometricMachineLearning: init_optimizer_cache
 
-dim = 64
-n_heads = 8
-Dₕ = dim÷8
-tol = eps(Float32)
-T = Float32
-backend = KernelAbstractions.CPU()
-
-model = MultiHeadAttention(dim, n_heads, Stiefel=true)
-
-ps = initialparameters(backend, T, model)
-
-cache = init_optimizer_cache(MomentumOptimizer(), ps)
-
-E = StiefelProjection(dim, Dₕ, T)
-function check_retraction_geodesic(A::AbstractMatrix) 
+@doc raw"""
+This function computes the geodesic retraction of an element of `StiefelLieAlgHorMatrix` and then checks if the resulting element is `StiefelProjection`.
+"""
+function check_retraction_geodesic(A::AbstractMatrix{T}, tol=eps(T)) where T
     A_retracted = geodesic(A)
     @test typeof(A_retracted) <: StiefelManifold
-    @test LinearAlgebra.norm(A_retracted - E) < tol
+    @test LinearAlgebra.norm(A_retracted - StiefelProjection(A_retracted)) < tol
 end
 check_retraction_geodesic(cache::NamedTuple) = apply_toNT(check_retraction_geodesic, cache)
 check_retraction_geodesic(B::MomentumCache) = check_retraction_geodesic(B.B)
 
-check_retraction_geodesic(cache)
-
-E = StiefelProjection(dim, Dₕ)
-function check_retraction_cayley(A::AbstractMatrix) 
+@doc raw"""
+This function computes the cayley retraction of an element of `StiefelLieAlgHorMatrix` and then checks if the resulting element is `StiefelProjection`.
+"""
+function check_retraction_cayley(A::AbstractMatrix{T}, tol=eps(T)) where T
     A_retracted = cayley(A)
     @test typeof(A_retracted) <: StiefelManifold
-    @test LinearAlgebra.norm(A_retracted - E) < tol
+    @test LinearAlgebra.norm(A_retracted - StiefelProjection(A_retracted)) < tol
 end
 check_retraction_cayley(cache::NamedTuple) = apply_toNT(check_retraction_cayley, cache)
 check_retraction_cayley(B::MomentumCache) = check_retraction_cayley(B.B)
 
-check_retraction_cayley(cache)
+@doc raw"""
+This is a test for that checks if the retractions (geodesic and Cayley for now) map from `StiefelLieAlgHorMatrix` to `StiefelManifold` when used with `MultiHeadAttention`.
+"""
+function test_multi_head_attention_retraction(T::Type, dim, n_heads, tol=eps(T), backend=KernelAbstractions.CPU())
+    model = MultiHeadAttention(dim, n_heads, Stiefel=true)
+
+    ps = initialparameters(backend, T, model)
+    cache = init_optimizer_cache(MomentumOptimizer(), ps)
+
+    check_retraction_geodesic(cache)
+
+    check_retraction_cayley(cache)
+end
+
+test_multi_head_attention_retraction(Float32, 64, 8)
\ No newline at end of file
diff --git a/test/transformer_related/multi_head_attention_stiefel_setup.jl b/test/transformer_related/multi_head_attention_stiefel_setup.jl
index 3b8cc6cbf..742839331 100644
--- a/test/transformer_related/multi_head_attention_stiefel_setup.jl
+++ b/test/transformer_related/multi_head_attention_stiefel_setup.jl
@@ -3,25 +3,36 @@ import Random, Test, Lux, LinearAlgebra, KernelAbstractions
 using GeometricMachineLearning, Test
 using GeometricMachineLearning: init_optimizer_cache
 
-T = Float32
-model = MultiHeadAttention(64, 8, Stiefel=true)
-ps = initialparameters(KernelAbstractions.CPU(), T, model)
-tol = 10*eps(T)
-
-function check_setup(A::AbstractMatrix)
+@doc raw"""
+This checks for an arbitrary matrix ``A\in\mathbb{R}^{N\times{}n}`` if ``A\in{}St(n,N)``.
+"""
+function check_setup(A::AbstractMatrix{T}, tol=T(10)*eps(T)) where T
     @test typeof(A) <: StiefelManifold
     @test check(A) < tol
 end
 check_setup(ps::NamedTuple) = apply_toNT(check_setup, ps)
-check_setup(ps)
 
-######## check if the gradients are set up the correct way 
-function check_grad_setup(B::AbstractMatrix)
+@doc raw"""
+This checks for an arbitrary matrix ``B\in\mathbb{R}^{N\times{}N}`` if ``B\in\mathfrak{g}^\mathrm{hor}``.
+"""
+function check_grad_setup(B::AbstractMatrix{T}, tol=T(10)*eps(T)) where T
     @test typeof(B) <: StiefelLieAlgHorMatrix
     @test LinearAlgebra.norm(B) < tol
 end
 check_grad_setup(gx::NamedTuple) = apply_toNT(check_grad_setup, gx)
 check_grad_setup(B::MomentumCache) = check_grad_setup(B.B)
 
-gx = init_optimizer_cache(MomentumOptimizer(), ps)
-check_grad_setup(gx)
+@doc raw"""
+Check if `initialparameters` and `init_optimizer_cache` do the right thing for `MultiHeadAttentionLayer`.
+"""
+function check_multi_head_attention_stiefel_setup(T::Type, N::Int, n::Int)
+    model = MultiHeadAttention(N, n, Stiefel=true)
+    ps = initialparameters(KernelAbstractions.CPU(), T, model)
+
+    check_setup(ps)
+
+    gx = init_optimizer_cache(MomentumOptimizer(), ps)
+    check_grad_setup(gx)
+end
+
+check_multi_head_attention_stiefel_setup(Float32, 64, 8)
\ No newline at end of file
diff --git a/test/transformer_related/transformer_application.jl b/test/transformer_related/transformer_application.jl
index 7fc159a8b..6729c889e 100644
--- a/test/transformer_related/transformer_application.jl
+++ b/test/transformer_related/transformer_application.jl
@@ -1,5 +1,8 @@
 using Test, KernelAbstractions, GeometricMachineLearning
 
+@doc raw"""
+This tests if the size of the input array is kept constant when fed into the transformer (for a matrix and a tensor)
+"""
 function transformer_application_test(T, dim, n_heads, L, seq_length=8, batch_size=10)
     model₁ = Chain(Transformer(dim, n_heads, L, Stiefel=false), ResNet(dim))
     model₂ = Chain(Transformer(dim, n_heads, L, Stiefel=true), ResNet(dim))
@@ -8,11 +11,11 @@ function transformer_application_test(T, dim, n_heads, L, seq_length=8, batch_si
     ps₂ = initialparameters(KernelAbstractions.CPU(), T, model₂)
     
     input₁ = rand(T, dim, seq_length, batch_size)
-    input₂ = rand(T, dim, seq_length, batch_size)
-    
+    input₂ = rand(T, dim, seq_length)
+
     @test size(model₁(input₁, ps₁)) == size(input₁)
-    @test size(model₁(input₂, ps₁)) == size(input₂)
     @test size(model₂(input₁, ps₂)) == size(input₁)
+    @test size(model₁(input₂, ps₁)) == size(input₂)
     @test size(model₂(input₂, ps₂)) == size(input₂)
 end
 
diff --git a/test/transformer_related/transformer_gradient.jl b/test/transformer_related/transformer_gradient.jl
index e049933a6..2a53673fc 100644
--- a/test/transformer_related/transformer_gradient.jl
+++ b/test/transformer_related/transformer_gradient.jl
@@ -1,5 +1,8 @@
 using Test, KernelAbstractions, GeometricMachineLearning, Zygote, LinearAlgebra
 
+@doc raw"""
+This checks if the gradients of the transformer change the type in case of the Stiefel manifold, and checks if they stay the same in the case of regular weights.
+"""
 function transformer_gradient_test(T, dim, n_heads, L, seq_length=8, batch_size=10)
     model₁ = Chain(Transformer(dim, n_heads, L, Stiefel=false), ResNet(dim))
     model₂ = Chain(Transformer(dim, n_heads, L, Stiefel=true), ResNet(dim))
diff --git a/test/transformer_related/transformer_optimizer.jl b/test/transformer_related/transformer_optimizer.jl
index aa3968c62..e6421bdb7 100644
--- a/test/transformer_related/transformer_optimizer.jl
+++ b/test/transformer_related/transformer_optimizer.jl
@@ -1,5 +1,8 @@
 using Test, KernelAbstractions, GeometricMachineLearning, Zygote, LinearAlgebra
 
+@doc raw"""
+This function tests if the `GradientOptimzier`, `MomentumOptimizer`, `AdamOptimizer` and `BFGSOptimizer` act on the neural network weights via `optimization_step!`.
+"""
 function transformer_gradient_test(T, dim, n_heads, L, seq_length=8, batch_size=10)
     model = Chain(Transformer(dim, n_heads, L, Stiefel=true), ResNet(dim))
     model = Transformer(dim, n_heads, L, Stiefel=true)
@@ -14,15 +17,19 @@ function transformer_gradient_test(T, dim, n_heads, L, seq_length=8, batch_size=
     o₁ = Optimizer(GradientOptimizer(), ps)
     o₂ = Optimizer(MomentumOptimizer(), ps)
     o₃ = Optimizer(AdamOptimizer(), ps)
+    o₄ = Optimizer(BFGSOptimizer(), ps)
 
     ps₁ = deepcopy(ps)
     ps₂ = deepcopy(ps)
     ps₃ = deepcopy(ps)
+    ps₄ = deepcopy(ps)
 
     optimization_step!(o₁, model, ps₁, dx)
     optimization_step!(o₂, model, ps₂, dx)
     optimization_step!(o₃, model, ps₃, dx)
-    @test typeof(ps₁) == typeof(ps₂) == typeof(ps₃) == typeof(ps)
+    optimization_step!(o₄, model, ps₄, dx)
+    @test typeof(ps₁) == typeof(ps₂) == typeof(ps₃) == typeof(ps₄) == typeof(ps)
+    @test ps₁[1].PQ.head_1 ≉ ps₂[1].PQ.head_1 ≉ ps₃[1].PQ.head_1 ≉ ps₄[1].PQ.head_1 ≉ ps[1].PQ.head_1
 end
 
 transformer_gradient_test(Float32, 10, 5, 4)
\ No newline at end of file
diff --git a/test/transformer_related/transformer_setup.jl b/test/transformer_related/transformer_setup.jl
index 42ac78995..1311ce26c 100644
--- a/test/transformer_related/transformer_setup.jl
+++ b/test/transformer_related/transformer_setup.jl
@@ -1,5 +1,8 @@
 using Test, KernelAbstractions, GeometricMachineLearning
 
+@doc raw"""
+This function tests the setup of the transformer with Stiefel weights.
+"""
 function transformer_setup_test(dim, n_heads, L, T)
     model = Transformer(dim, n_heads, L, Stiefel=true)
     ps = initialparameters(KernelAbstractions.CPU(), T, model)

From cfc1b5fd52fd243f425fad8c68be57dea4cc3c94 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 16:36:39 +0100
Subject: [PATCH 67/96] Fixed docs and put type dependency for .

---
 src/data_loader/tensor_assign.jl | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/data_loader/tensor_assign.jl b/src/data_loader/tensor_assign.jl
index 4aac9bfc0..299e52b84 100644
--- a/src/data_loader/tensor_assign.jl
+++ b/src/data_loader/tensor_assign.jl
@@ -35,18 +35,22 @@ end
 end
 
 @doc raw"""
-The function `assign_output_estimate` is closely related to the transformer. It takes the last prediction_window columns of the output and uses is for the final prediction.
+The function `assign_output_estimate` is closely related to the transformer. It takes the last `prediction_window` columns of the output and uses them for the final prediction.
 i.e.
 ```math
-\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ 
-                \cdots & \cdots \\ 
-                z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} \mapsto 
-                \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ 
-                \cdots & \cdots \\ 
-                z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix}     
+\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, 
+\begin{bmatrix} 
+    z^{(1)}_1               & \cdots & z^{(T)}_1 \\ 
+    \cdots                  & \cdots & \cdots    \\ 
+    z^{(1)}_n               & \cdots & z^{(T})_n
+    \end{bmatrix} \mapsto 
+    \begin{bmatrix} 
+    z^{(T - \mathtt{pw})}_1 & \cdots      & z^{(T)}_1 \\ 
+    \cdots                  & \cdots      & \cdots \\ 
+    z^{(T - \mathtt{pw})}_n & \cdots      & z^{(T})_n\end{bmatrix}     
 ``` 
 """
-function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window) where T
+function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window::Int) where T
     sys_dim, seq_length, batch_size = size(full_output)
     backend = KernelAbstractions.get_backend(full_output)
     output_estimate = KernelAbstractions.allocate(backend, T, sys_dim, prediction_window, batch_size)

From 9773e453a956b776a376920d16ac68d00b60f0c6 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 17:04:22 +0100
Subject: [PATCH 68/96] Fixed docs for rgrad.

---
 src/manifolds/stiefel_manifold.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/manifolds/stiefel_manifold.jl b/src/manifolds/stiefel_manifold.jl
index ec1c3fb26..a98e22079 100644
--- a/src/manifolds/stiefel_manifold.jl
+++ b/src/manifolds/stiefel_manifold.jl
@@ -64,7 +64,7 @@ function Base.:*(Y::Adjoint{T, StiefelManifold{T, AT}}, B::AbstractMatrix) where
 end
 
 @doc raw"""
-Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mahbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this).
+Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mathbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this).
 The precise form of the mapping is: 
 ```math
 \mathtt{rgrad}(Y, \nabla{}L) \mapsto \nabla{}L - Y(\nabla{}L)^TY

From dbbe3fee8f57df651224554b069f9bfd67cf844f Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 17:12:59 +0100
Subject: [PATCH 69/96] Added additional constructor for the case when the type
 is the first input argument.

---
 src/arrays/stiefel_projection.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/arrays/stiefel_projection.jl b/src/arrays/stiefel_projection.jl
index 82c04c2ac..236948a09 100644
--- a/src/arrays/stiefel_projection.jl
+++ b/src/arrays/stiefel_projection.jl
@@ -41,6 +41,8 @@ Outer constructor for `StiefelProjection`. This works with two integers as input
 """
 StiefelProjection(N::Integer, n::Integer, T::Type=Float64) = StiefelProjection(CPU(), T, N, n)
 
+StiefelProjection(T::Type, N::Integer, n::Integer) = StiefelProjection(N, n, T)
+
 Base.size(E::StiefelProjection) = (E.N, E.n)
 Base.getindex(E::StiefelProjection, i, j) = getindex(E.A, i, j)
 Base.:+(E::StiefelProjection, A::AbstractMatrix) = E.A + A 

From 4e6f2640a3ad0f92b524fee1e434a30e0ae42953 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 08:52:39 +0100
Subject: [PATCH 70/96] Resolved merge conflict.

---
 test/arrays/array_tests.jl | 12 ++++++++++++
 test/runtests.jl           |  1 +
 2 files changed, 13 insertions(+)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index c0da5eba9..abd21fc5f 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -76,6 +76,14 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
+<<<<<<< HEAD
+=======
+function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
+    A = rand(StiefelLieAlgHorMatrix{T}, N, n)
+    @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)
+end
+
+>>>>>>> b05c66c (Removed the symplectic arrays.)
 # TODO: tests for ADAM functions
 
 # test everything for different n & N values
@@ -96,4 +104,8 @@ for (N, n) ∈ zip(N_vec, n_vec)
     skew_mat_mul_test2(N)
     stiefel_proj_test(N,n)
     stiefel_lie_alg_add_sub_test(N,n)
+<<<<<<< HEAD
+=======
+    stiefel_lie_alg_vectorization_test(N, n)
+>>>>>>> b05c66c (Removed the symplectic arrays.)
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 033b04f16..92afd85ef 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,6 +4,7 @@ using SafeTestsets
 @safetestset "Check parameterlength                                                           " begin include("parameterlength/check_parameterlengths.jl") end
 @safetestset "Arrays #1                                                                       " begin include("arrays/array_tests.jl") end
 @safetestset "Sampling of arrays                                                              " begin include("arrays/random_generation_of_custom_arrays.jl") end
+@safetestset "Test constructors for custom arrays                                             " begin include("arrays/constructor_tests_for_custom_arrays.jl") end
 @safetestset "Manifolds (Grassmann):                                                          " begin include("manifolds/grassmann_manifold.jl") end
 @safetestset "Gradient Layer                                                                  " begin include("layers/gradient_layer_tests.jl") end
 @safetestset "Test symplecticity of upscaling layer                                           " begin include("layers/sympnet_layers_test.jl") end 

From 1f04af95a82d187b4ca90da28bccb67c917daf83 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 17:14:04 +0100
Subject: [PATCH 71/96] Added tests for the various constructors for custom
 arrays.

---
 .../constructor_tests_for_custom_arrays.jl    | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 test/arrays/constructor_tests_for_custom_arrays.jl

diff --git a/test/arrays/constructor_tests_for_custom_arrays.jl b/test/arrays/constructor_tests_for_custom_arrays.jl
new file mode 100644
index 000000000..452f850f8
--- /dev/null
+++ b/test/arrays/constructor_tests_for_custom_arrays.jl
@@ -0,0 +1,37 @@
+using GeometricMachineLearning, Test
+using LinearAlgebra: I
+
+@doc raw"""
+This tests various constructor for custom arrays, e.g. if calling `SymmetricMatrix` on a matrix ``A`` does 
+```math 
+A \mapsto \frac{1}{2}(A + A^T).
+```
+"""
+function test_constructors_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    B = rand(T, N, N)
+
+    # SymmetricMatrix 
+    @test Matrix{T}(SymmetricMatrix(A)) ≈ T(.5) * (A + A')
+
+    # SkewSymMatrix 
+    @test Matrix{T}(SkewSymMatrix(A)) ≈ T(.5) * (A - A')
+
+    # StiefelLieAlgHorMatrix 
+    B_shor = StiefelLieAlgHorMatrix(SkewSymMatrix(B), n)
+    B_shor2 = Matrix{T}(SkewSymMatrix(B))
+    B_shor2[(n+1):N, (n+1):N] .= zero(T)
+    @test Matrix{T}(B_shor) ≈ B_shor2
+
+    # GrassmannLieAlgHorMatrix 
+    B_ghor = GrassmannLieAlgHorMatrix(SkewSymMatrix(B), n)
+    B_ghor2 = copy(B_shor2)
+    B_ghor2[1:n, 1:n] .= zero(T)
+    @test Matrix{T}(B_ghor) ≈ B_ghor2 
+
+    # StiefelProjection
+    E = StiefelProjection(T, N, n)
+    @test Matrix{T}(E) ≈ vcat(I(n), zeros(T, (N-n), n))
+end
+
+test_constructors_for_custom_arrays(5, 10, Float32)
\ No newline at end of file

From 7aff01654a3138289225742dcc252a8d01cbbef7 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:25:49 +0100
Subject: [PATCH 72/96] Added new routine for multiplying symmetric matrix from
 the right onto a matrix.

---
 src/arrays/symmetric.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl
index 928b90ab5..3b81466a1 100644
--- a/src/arrays/symmetric.jl
+++ b/src/arrays/symmetric.jl
@@ -209,6 +209,8 @@ function Base.:*(A::SymmetricMatrix{T}, B::AbstractMatrix{T}) where T
     C
 end
 
+Base.:*(B::AbstractMatrix{T}, A::SymmetricMatrix{T}) where T = (A * B')'
+
 function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T 
     backend = KernelAbstractions.get_backend(A.S)
     c = KernelAbstractions.allocate(backend, T, A.n)

From 3321d4b90f40817f1c8107f5f4ee3ae107f853cb Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:01:48 +0100
Subject: [PATCH 73/96] Resolved merge conflict.

---
 .../addition_tests_for_custom_arrays.jl       | 40 ++++++++++++++-----
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
index 9413c5884..eb0ec6cd9 100644
--- a/test/arrays/addition_tests_for_custom_arrays.jl
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -1,12 +1,34 @@
-#=
-This tests addition for all custom arrays. Note that these tests will also have to be performed on GPU!
-=#
+using GeometricMachineLearning, Test 
 
-using LinearAlgebra
-using Random
-using Test
-using GeometricMachineLearning
+@doc raw"""
+This function tests addition for various custom arrays, i.e. if \(A + B\) is performed in the correct way. 
+"""
 
-function test_addition_for_symmetric_matrix(n::Int, T::Type)
-    A = rand(SymmetricMatrix{T}, n)
+function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    B = rand(T, n, n)
+
+    # SymmetricMatrix
+    AB_sym = SymmetricMatrix(A + B)
+    AB_sym2 = SymmetricMatrix(A) + SymmetricMatrix(B)
+    @test AB_sym ≈ AB_sym2
+    @test typeof(AB_sym) <: SymmetricMatrix{T}
+    @test typeof(AB_sym2) <: SymmetricMatrix{T}
+
+    # SkewSymMatrix
+    AB_skew = SkewSymMatrix(A + B)
+    AB_skew2 = SkewSymMatrix(A) + SkewSymMatrix(B)
+    @test A_skew ≈ A_skew2 
+    @test typeof(AB_skew) <: SkewSymMatrix{T}
+    @test typeof(AB_skew2) <: SkewSymMatrix{T}
+
+    C = rand(T, N, N)
+    D = rand(T, N, N)
+
+    # StiefelLieAlgHorMatrix
+    CD_slahm = StiefelLieAlgHorMatrix(A + B, n)
+    CD_slahm2 = StiefelLieAlgHorMatrix(A, n) + StiefelLieAlgHorMatrix(B, n)
+    @test CD_slahm ≈ CD_slahm2
+    @test typeof(CD_slahm) <: StiefelLieAlgHorMatrix{T}
+    @test typeof(CD_slahm2) <: StiefelLieAlgHorMatrix{T}
 end
\ No newline at end of file

From 65ee5c0528a6e2dc3f9cb820e77ed3a28a7d8ac1 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:03:16 +0100
Subject: [PATCH 74/96] Resolved conflict (removed triangular matrices).

---
 src/GeometricMachineLearning.jl |  1 +
 src/arrays/symmetric.jl         | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl
index b7b9e9f92..2b6813f35 100644
--- a/src/GeometricMachineLearning.jl
+++ b/src/GeometricMachineLearning.jl
@@ -88,6 +88,7 @@ module GeometricMachineLearning
     include("arrays/abstract_lie_algebra_horizontal.jl")
     include("arrays/stiefel_lie_algebra_horizontal.jl")
     include("arrays/grassmann_lie_algebra_horizontal.jl")
+    include("arrays/symmetric.jl")
 
     export SymmetricMatrix, SymplecticPotential, SkewSymMatrix
     export StiefelLieAlgHorMatrix
diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl
index 3b81466a1..761e8dd72 100644
--- a/src/arrays/symmetric.jl
+++ b/src/arrays/symmetric.jl
@@ -211,6 +211,10 @@ end
 
 Base.:*(B::AbstractMatrix{T}, A::SymmetricMatrix{T}) where T = (A * B')'
 
+function Base.:*(A::SymmetricMatrix{T}, B::SymmetricMatrix{T}) where T 
+    A * (B * one(B))
+end
+
 function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T 
     backend = KernelAbstractions.get_backend(A.S)
     c = KernelAbstractions.allocate(backend, T, A.n)
@@ -218,6 +222,14 @@ function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T
     c
 end
 
+function Base.one(A::SymmetricMatrix{T}) where T
+    backend = KernelAbstractions.get_backend(A.S)
+    unit_matrix = KernelAbstractions.zeros(backend, T, A.n, A.n)
+    write_ones! = write_ones_kernel!(backend)
+    write_ones!(unit_matrix, ndrange=A.n)
+    unit_matrix
+end
+
 # define routines for generalizing ChainRulesCore to SymmetricMatrix 
 ChainRulesCore.ProjectTo(A::SymmetricMatrix) = ProjectTo{SymmetricMatrix}(; symmetric=ProjectTo(A.S))
 (project::ProjectTo{SymmetricMatrix})(dA::AbstractMatrix) = SymmetricMatrix(project.symmetric(map_to_S(dA)), size(dA, 2))

From 0f958d6d97455e5664fd67fc660dc69cf17369eb Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 14:20:49 +0100
Subject: [PATCH 75/96] Slightly improved readability.

---
 src/arrays/skew_symmetric.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arrays/skew_symmetric.jl b/src/arrays/skew_symmetric.jl
index e1ab85fe2..3c14b9e45 100644
--- a/src/arrays/skew_symmetric.jl
+++ b/src/arrays/skew_symmetric.jl
@@ -187,7 +187,7 @@ end
 
 # the first matrix is multiplied onto A2 in order for it to not be SkewSymMatrix!
 function Base.:*(A1::SkewSymMatrix{T}, A2::SkewSymMatrix{T}) where T 
-    A1*(one(A2)*A2) 
+    A1 * (one(A2) * A2) 
 end
 
 @doc raw"""

From 250c7013b76cd987fe7c489a1a92ffb2f50abade Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:04:15 +0100
Subject: [PATCH 76/96] Resolved conflict.

---
 test/arrays/addition_tests_for_custom_arrays.jl | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
index eb0ec6cd9..669ccc0ef 100644
--- a/test/arrays/addition_tests_for_custom_arrays.jl
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -3,7 +3,6 @@ using GeometricMachineLearning, Test
 @doc raw"""
 This function tests addition for various custom arrays, i.e. if \(A + B\) is performed in the correct way. 
 """
-
 function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     A = rand(T, n, n)
     B = rand(T, n, n)
@@ -26,9 +25,15 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     D = rand(T, N, N)
 
     # StiefelLieAlgHorMatrix
-    CD_slahm = StiefelLieAlgHorMatrix(A + B, n)
-    CD_slahm2 = StiefelLieAlgHorMatrix(A, n) + StiefelLieAlgHorMatrix(B, n)
+    CD_slahm = StiefelLieAlgHorMatrix(C + D, n)
+    CD_slahm2 = StiefelLieAlgHorMatrix(C, n) + StiefelLieAlgHorMatrix(D, n)
     @test CD_slahm ≈ CD_slahm2
     @test typeof(CD_slahm) <: StiefelLieAlgHorMatrix{T}
     @test typeof(CD_slahm2) <: StiefelLieAlgHorMatrix{T}
+
+    CD_glahm = GrassmannLieAlgHorMatrix(C + D, n)
+    CD_glahm2 = GrassmannLieAlgHorMatrix(C, n) + GrassmannLieAlgHorMatrix(D, n)
+    @test CD_glahm ≈ CD_glahm2
+    @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T}
 end
\ No newline at end of file

From b320aaf664062cfc1c11195ecda2cb9728f2f9cc Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:05:19 +0100
Subject: [PATCH 77/96] Resolved conflict (accepted both changes).

---
 test/runtests.jl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/runtests.jl b/test/runtests.jl
index 033b04f16..e03f65f0f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,6 +4,9 @@ using SafeTestsets
 @safetestset "Check parameterlength                                                           " begin include("parameterlength/check_parameterlengths.jl") end
 @safetestset "Arrays #1                                                                       " begin include("arrays/array_tests.jl") end
 @safetestset "Sampling of arrays                                                              " begin include("arrays/random_generation_of_custom_arrays.jl") end
+@safetestset "Addition tests for custom arrays                                                " begin include("arrays/addition_tests_for_custom_arrays.jl") end
+@safetestset "Scalar multiplication tests for custom arrays                                   " begin include("arrays/scalar_multiplication_for_custom_arrays.jl") end
+@safetestset "Matrix multiplication tests for custom arrays                                   " begin include("arrays/matrix_multiplication_for_custom_arrays.jl") end
 @safetestset "Manifolds (Grassmann):                                                          " begin include("manifolds/grassmann_manifold.jl") end
 @safetestset "Gradient Layer                                                                  " begin include("layers/gradient_layer_tests.jl") end
 @safetestset "Test symplecticity of upscaling layer                                           " begin include("layers/sympnet_layers_test.jl") end 

From 5ae3067f9e9c579f091746f0a09541f7100e2338 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 14:52:14 +0100
Subject: [PATCH 78/96] Added tests for addition, scalar multiplication and
 matrix multiplication for custom arrays.

---
 .../addition_tests_for_custom_arrays.jl       |  4 +-
 ...matrix_multiplication_for_custom_arrays.jl | 21 ++++++++++
 ...scalar_multiplication_for_custom_arrays.jl | 40 +++++++++++++++++++
 3 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 test/arrays/matrix_multiplication_for_custom_arrays.jl
 create mode 100644 test/arrays/scalar_multiplication_for_custom_arrays.jl

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
index 669ccc0ef..5fd2bbcfc 100644
--- a/test/arrays/addition_tests_for_custom_arrays.jl
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -36,4 +36,6 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     @test CD_glahm ≈ CD_glahm2
     @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
     @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T}
-end
\ No newline at end of file
+end
+
+addition_tests_for_custom_arrays(5, 10, Float32)
\ No newline at end of file
diff --git a/test/arrays/matrix_multiplication_for_custom_arrays.jl b/test/arrays/matrix_multiplication_for_custom_arrays.jl
new file mode 100644
index 000000000..db051ec3e
--- /dev/null
+++ b/test/arrays/matrix_multiplication_for_custom_arrays.jl
@@ -0,0 +1,21 @@
+using GeometricMachineLearning, Test 
+
+@doc raw"""
+This function tests matrix multiplication for various custom arrays, i.e. if \((A,\alpha) \mapsto \alpha{}A\) is performed in the correct way. 
+"""
+function matrix_multiplication_tests_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    B = rand(T, n, N)
+
+    # SymmetricMatrix
+    A_sym = SymmetricMatrix(A)
+    @test A_sym * B ≈ Matrix{T}(A_sym) * B 
+    @test B' * A_sym ≈ B' * Matrix{T}(A_sym)
+
+    # SkewSymMatrix
+    A_skew = SkewSymMatrix(A)
+    @test A_skew * B ≈ Matrix{T}(A_skew) * B 
+    @test B' * A_skew ≈ B' * Matrix{T}(A_skew)
+end
+
+matrix_multiplication_tests_for_custom_arrays(5, 10, Float32)
\ No newline at end of file
diff --git a/test/arrays/scalar_multiplication_for_custom_arrays.jl b/test/arrays/scalar_multiplication_for_custom_arrays.jl
new file mode 100644
index 000000000..c6214a3e1
--- /dev/null
+++ b/test/arrays/scalar_multiplication_for_custom_arrays.jl
@@ -0,0 +1,40 @@
+using GeometricMachineLearning, Test 
+
+@doc raw"""
+This function tests scalar multiplication for various custom arrays, i.e. if \((A,\alpha) \mapsto \alpha{}A\) is performed in the correct way. 
+"""
+function scalar_multiplication_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    α = rand(T)
+
+    # SymmetricMatrix
+    Aα_sym = SymmetricMatrix(α * A)
+    Aα_sym2 = α * SymmetricMatrix(A)
+    @test Aα_sym ≈ Aα_sym2
+    @test typeof(Aα_sym) <: SymmetricMatrix{T}
+    @test typeof(Aα_sym2) <: SymmetricMatrix{T}
+
+    # SkewSymMatrix
+    Aα_skew = SkewSymMatrix(α * A)
+    Aα_skew2 = α * SkewSymMatrix(A)
+    @test Aα_skew ≈ Aα_skew2 
+    @test typeof(Aα_skew) <: SkewSymMatrix{T}
+    @test typeof(Aα_skew2) <: SkewSymMatrix{T}
+
+    C = rand(T, N, N)
+
+    # StiefelLieAlgHorMatrix
+    Cα_slahm = StiefelLieAlgHorMatrix(α * C, n)
+    Cα_slahm2 = α * StiefelLieAlgHorMatrix(C, n)
+    @test Cα_slahm ≈ Cα_slahm2
+    @test typeof(Cα_slahm) <: StiefelLieAlgHorMatrix{T}
+    @test typeof(Cα_slahm2) <: StiefelLieAlgHorMatrix{T}
+
+    Cα_glahm = GrassmannLieAlgHorMatrix(α * C, n)
+    Cα_glahm2 = α * GrassmannLieAlgHorMatrix(C, n)
+    @test Cα_glahm ≈ Cα_glahm2
+    @test tyepof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(Cα_glahm2) <: GrassmannLieAlgHorMatrix{T}
+end
+
+scalar_multiplication_for_custom_arrays(5, 10, Float32)
\ No newline at end of file

From d4bbb6c80de27188f62c102ed66373d16a28f6d1 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:06:50 +0100
Subject: [PATCH 79/96] Resolved merge conflict.

---
 src/GeometricMachineLearning.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl
index 2b6813f35..b7b9e9f92 100644
--- a/src/GeometricMachineLearning.jl
+++ b/src/GeometricMachineLearning.jl
@@ -88,7 +88,6 @@ module GeometricMachineLearning
     include("arrays/abstract_lie_algebra_horizontal.jl")
     include("arrays/stiefel_lie_algebra_horizontal.jl")
     include("arrays/grassmann_lie_algebra_horizontal.jl")
-    include("arrays/symmetric.jl")
 
     export SymmetricMatrix, SymplecticPotential, SkewSymMatrix
     export StiefelLieAlgHorMatrix

From f25e3faf6a18cb9dbfbbb1401db6d09996e40aa8 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:07:54 +0100
Subject: [PATCH 80/96] Fixed (apparent) conflict.

---
 test/arrays/array_tests.jl | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index c0da5eba9..292eae52f 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -54,6 +54,10 @@ function skew_mat_mul_test2(n, T=Float64)
     @test isapprox(AS1, AS2)
 end
 
+<<<<<<< HEAD
+=======
+
+>>>>>>> c6a350e (Fixed typo.)
 # test Stiefel manifold projection test 
 function stiefel_proj_test(N,n)
     In = I(n)
@@ -61,6 +65,10 @@ function stiefel_proj_test(N,n)
     @test all(abs.((E'*E) .- In) .< 1e-10)
 end
 
+<<<<<<< HEAD
+=======
+
+>>>>>>> c6a350e (Fixed typo.)
 function stiefel_lie_alg_add_sub_test(N, n)
     E = StiefelProjection(N, n)
     projection(W::SkewSymMatrix) = W - (I - E*E')*W*(I - E*E')
@@ -76,6 +84,14 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
+<<<<<<< HEAD
+=======
+function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
+    A = rand(StiefelLieAlgHorMatrix{T}, N, n)
+    @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)
+end
+
+>>>>>>> c6a350e (Fixed typo.)
 # TODO: tests for ADAM functions
 
 # test everything for different n & N values
@@ -96,4 +112,8 @@ for (N, n) ∈ zip(N_vec, n_vec)
     skew_mat_mul_test2(N)
     stiefel_proj_test(N,n)
     stiefel_lie_alg_add_sub_test(N,n)
+<<<<<<< HEAD
+=======
+    stiefel_lie_alg_vectorization_test(N, n)
+>>>>>>> c6a350e (Fixed typo.)
 end

From e0cc2d2aa03e8ff7e50a1c670b8ab26a681539a2 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 15:05:50 +0100
Subject: [PATCH 81/96] Fixed typo.

---
 test/arrays/addition_tests_for_custom_arrays.jl        | 4 ++--
 test/arrays/scalar_multiplication_for_custom_arrays.jl | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
index 5fd2bbcfc..44fc878c1 100644
--- a/test/arrays/addition_tests_for_custom_arrays.jl
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -17,7 +17,7 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     # SkewSymMatrix
     AB_skew = SkewSymMatrix(A + B)
     AB_skew2 = SkewSymMatrix(A) + SkewSymMatrix(B)
-    @test A_skew ≈ A_skew2 
+    @test AB_skew ≈ AB_skew2 
     @test typeof(AB_skew) <: SkewSymMatrix{T}
     @test typeof(AB_skew2) <: SkewSymMatrix{T}
 
@@ -34,7 +34,7 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     CD_glahm = GrassmannLieAlgHorMatrix(C + D, n)
     CD_glahm2 = GrassmannLieAlgHorMatrix(C, n) + GrassmannLieAlgHorMatrix(D, n)
     @test CD_glahm ≈ CD_glahm2
-    @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
     @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T}
 end
 
diff --git a/test/arrays/scalar_multiplication_for_custom_arrays.jl b/test/arrays/scalar_multiplication_for_custom_arrays.jl
index c6214a3e1..3a5e56205 100644
--- a/test/arrays/scalar_multiplication_for_custom_arrays.jl
+++ b/test/arrays/scalar_multiplication_for_custom_arrays.jl
@@ -33,7 +33,7 @@ function scalar_multiplication_for_custom_arrays(n::Int, N::Int, T::Type)
     Cα_glahm = GrassmannLieAlgHorMatrix(α * C, n)
     Cα_glahm2 = α * GrassmannLieAlgHorMatrix(C, n)
     @test Cα_glahm ≈ Cα_glahm2
-    @test tyepof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T}
     @test typeof(Cα_glahm2) <: GrassmannLieAlgHorMatrix{T}
 end
 

From 1e659d7c5f6e423824a4cb216ce507f39aefb1e6 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:13:53 +0100
Subject: [PATCH 82/96] Fixed comments from previous merge conflict.

---
 test/arrays/array_tests.jl | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index dadc11665..8621ddda7 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -53,14 +53,6 @@ function skew_mat_mul_test2(n, T=Float64)
     AS2 = A*Matrix{T}(S)
     @test isapprox(AS1, AS2)
 end
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-=======
->>>>>>> c6a350ed471420a4b4583c45978658d85b1f253e
-
->>>>>>> c6a350e (Fixed typo.)
 # test Stiefel manifold projection test 
 function stiefel_proj_test(N,n)
     In = I(n)
@@ -68,13 +60,6 @@ function stiefel_proj_test(N,n)
     @test all(abs.((E'*E) .- In) .< 1e-10)
 end
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-=======
->>>>>>> c6a350ed471420a4b4583c45978658d85b1f253e
-
->>>>>>> c6a350e (Fixed typo.)
 function stiefel_lie_alg_add_sub_test(N, n)
     E = StiefelProjection(N, n)
     projection(W::SkewSymMatrix) = W - (I - E*E')*W*(I - E*E')
@@ -90,17 +75,11 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-=======
->>>>>>> c6a350ed471420a4b4583c45978658d85b1f253e
 function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
     A = rand(StiefelLieAlgHorMatrix{T}, N, n)
     @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)
 end
 
->>>>>>> c6a350e (Fixed typo.)
 # TODO: tests for ADAM functions
 
 # test everything for different n & N values
@@ -121,11 +100,5 @@ for (N, n) ∈ zip(N_vec, n_vec)
     skew_mat_mul_test2(N)
     stiefel_proj_test(N,n)
     stiefel_lie_alg_add_sub_test(N,n)
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-=======
->>>>>>> c6a350ed471420a4b4583c45978658d85b1f253e
     stiefel_lie_alg_vectorization_test(N, n)
->>>>>>> c6a350e (Fixed typo.)
 end

From 57108602a71ef405c802309fbba126aa9ed45556 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 16:36:39 +0100
Subject: [PATCH 83/96] Fixed docs and put type dependency for .

---
 src/data_loader/tensor_assign.jl | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/data_loader/tensor_assign.jl b/src/data_loader/tensor_assign.jl
index 4aac9bfc0..299e52b84 100644
--- a/src/data_loader/tensor_assign.jl
+++ b/src/data_loader/tensor_assign.jl
@@ -35,18 +35,22 @@ end
 end
 
 @doc raw"""
-The function `assign_output_estimate` is closely related to the transformer. It takes the last prediction_window columns of the output and uses is for the final prediction.
+The function `assign_output_estimate` is closely related to the transformer. It takes the last `prediction_window` columns of the output and uses them for the final prediction.
 i.e.
 ```math
-\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ 
-                \cdots & \cdots \\ 
-                z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} \mapsto 
-                \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ 
-                \cdots & \cdots \\ 
-                z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix}     
+\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, 
+\begin{bmatrix} 
+    z^{(1)}_1               & \cdots & z^{(T)}_1 \\ 
+    \cdots                  & \cdots & \cdots    \\ 
+    z^{(1)}_n               & \cdots & z^{(T})_n
+    \end{bmatrix} \mapsto 
+    \begin{bmatrix} 
+    z^{(T - \mathtt{pw})}_1 & \cdots      & z^{(T)}_1 \\ 
+    \cdots                  & \cdots      & \cdots \\ 
+    z^{(T - \mathtt{pw})}_n & \cdots      & z^{(T})_n\end{bmatrix}     
 ``` 
 """
-function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window) where T
+function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window::Int) where T
     sys_dim, seq_length, batch_size = size(full_output)
     backend = KernelAbstractions.get_backend(full_output)
     output_estimate = KernelAbstractions.allocate(backend, T, sys_dim, prediction_window, batch_size)

From ce78b93d03c8365f165598090da6588b474b618e Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 17:04:22 +0100
Subject: [PATCH 84/96] Fixed docs for rgrad.

---
 src/manifolds/stiefel_manifold.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/manifolds/stiefel_manifold.jl b/src/manifolds/stiefel_manifold.jl
index ec1c3fb26..a98e22079 100644
--- a/src/manifolds/stiefel_manifold.jl
+++ b/src/manifolds/stiefel_manifold.jl
@@ -64,7 +64,7 @@ function Base.:*(Y::Adjoint{T, StiefelManifold{T, AT}}, B::AbstractMatrix) where
 end
 
 @doc raw"""
-Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mahbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this).
+Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mathbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this).
 The precise form of the mapping is: 
 ```math
 \mathtt{rgrad}(Y, \nabla{}L) \mapsto \nabla{}L - Y(\nabla{}L)^TY

From f6dda17c48cbe298949abfaae231af52fa54daa0 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 17:12:59 +0100
Subject: [PATCH 85/96] Added additional constructor for the case when the type
 is the first input argument.

---
 src/arrays/stiefel_projection.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/arrays/stiefel_projection.jl b/src/arrays/stiefel_projection.jl
index 82c04c2ac..236948a09 100644
--- a/src/arrays/stiefel_projection.jl
+++ b/src/arrays/stiefel_projection.jl
@@ -41,6 +41,8 @@ Outer constructor for `StiefelProjection`. This works with two integers as input
 """
 StiefelProjection(N::Integer, n::Integer, T::Type=Float64) = StiefelProjection(CPU(), T, N, n)
 
+StiefelProjection(T::Type, N::Integer, n::Integer) = StiefelProjection(N, n, T)
+
 Base.size(E::StiefelProjection) = (E.N, E.n)
 Base.getindex(E::StiefelProjection, i, j) = getindex(E.A, i, j)
 Base.:+(E::StiefelProjection, A::AbstractMatrix) = E.A + A 

From 99e83e6c0b206be4ab64e16051fa1872f41eb735 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:17:25 +0100
Subject: [PATCH 86/96] Resolved conflict (forgot to resolve another one
 before).

---
 test/arrays/array_tests.jl | 1 +
 test/runtests.jl           | 1 +
 2 files changed, 2 insertions(+)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index c2badb77b..1c372344e 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -76,6 +76,7 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
+
 function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
     A = rand(StiefelLieAlgHorMatrix{T}, N, n)
     @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)
diff --git a/test/runtests.jl b/test/runtests.jl
index 1b6cb1b4d..1db601285 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,6 +4,7 @@ using SafeTestsets
 @safetestset "Check parameterlength                                                           " begin include("parameterlength/check_parameterlengths.jl") end
 @safetestset "Arrays #1                                                                       " begin include("arrays/array_tests.jl") end
 @safetestset "Sampling of arrays                                                              " begin include("arrays/random_generation_of_custom_arrays.jl") end
+@safetestset "Test constructors for custom arrays                                             " begin include("arrays/constructor_tests_for_custom_arrays.jl") end
 @safetestset "Manifolds (Grassmann):                                                          " begin include("manifolds/grassmann_manifold.jl") end
 @safetestset "Gradient Layer                                                                  " begin include("layers/gradient_layer_tests.jl") end
 @safetestset "Test symplecticity of upscaling layer                                           " begin include("layers/sympnet_layers_test.jl") end 

From 02b5b31170b651eeb63700d6ebec91284ce8e781 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 17:14:04 +0100
Subject: [PATCH 87/96] Added tests for the various constructors for custom
 arrays.

---
 .../constructor_tests_for_custom_arrays.jl    | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 test/arrays/constructor_tests_for_custom_arrays.jl

diff --git a/test/arrays/constructor_tests_for_custom_arrays.jl b/test/arrays/constructor_tests_for_custom_arrays.jl
new file mode 100644
index 000000000..452f850f8
--- /dev/null
+++ b/test/arrays/constructor_tests_for_custom_arrays.jl
@@ -0,0 +1,37 @@
+using GeometricMachineLearning, Test
+using LinearAlgebra: I
+
+@doc raw"""
+This tests various constructor for custom arrays, e.g. if calling `SymmetricMatrix` on a matrix ``A`` does 
+```math 
+A \mapsto \frac{1}{2}(A + A^T).
+```
+"""
+function test_constructors_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    B = rand(T, N, N)
+
+    # SymmetricMatrix 
+    @test Matrix{T}(SymmetricMatrix(A)) ≈ T(.5) * (A + A')
+
+    # SkewSymMatrix 
+    @test Matrix{T}(SkewSymMatrix(A)) ≈ T(.5) * (A - A')
+
+    # StiefelLieAlgHorMatrix 
+    B_shor = StiefelLieAlgHorMatrix(SkewSymMatrix(B), n)
+    B_shor2 = Matrix{T}(SkewSymMatrix(B))
+    B_shor2[(n+1):N, (n+1):N] .= zero(T)
+    @test Matrix{T}(B_shor) ≈ B_shor2
+
+    # GrassmannLieAlgHorMatrix 
+    B_ghor = GrassmannLieAlgHorMatrix(SkewSymMatrix(B), n)
+    B_ghor2 = copy(B_shor2)
+    B_ghor2[1:n, 1:n] .= zero(T)
+    @test Matrix{T}(B_ghor) ≈ B_ghor2 
+
+    # StiefelProjection
+    E = StiefelProjection(T, N, n)
+    @test Matrix{T}(E) ≈ vcat(I(n), zeros(T, (N-n), n))
+end
+
+test_constructors_for_custom_arrays(5, 10, Float32)
\ No newline at end of file

From 36bb2a9a39b0ec8cf560084403e734ff340a610e Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:18:53 +0100
Subject: [PATCH 88/96] Removed the symplectic arrays.

---
 test/arrays/array_tests.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index 1c372344e..c2badb77b 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -76,7 +76,6 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
-
 function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
     A = rand(StiefelLieAlgHorMatrix{T}, N, n)
     @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)

From e6b2d34990d15a214bee225c065fb23ed3c75a62 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 16:36:39 +0100
Subject: [PATCH 89/96] Fixed docs and put type dependency for .

---
 src/data_loader/tensor_assign.jl | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/data_loader/tensor_assign.jl b/src/data_loader/tensor_assign.jl
index 4aac9bfc0..299e52b84 100644
--- a/src/data_loader/tensor_assign.jl
+++ b/src/data_loader/tensor_assign.jl
@@ -35,18 +35,22 @@ end
 end
 
 @doc raw"""
-The function `assign_output_estimate` is closely related to the transformer. It takes the last prediction_window columns of the output and uses is for the final prediction.
+The function `assign_output_estimate` is closely related to the transformer. It takes the last `prediction_window` columns of the output and uses them for the final prediction.
 i.e.
 ```math
-\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ 
-                \cdots & \cdots \\ 
-                z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} \mapsto 
-                \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ 
-                \cdots & \cdots \\ 
-                z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix}     
+\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, 
+\begin{bmatrix} 
+    z^{(1)}_1               & \cdots & z^{(T)}_1 \\ 
+    \cdots                  & \cdots & \cdots    \\ 
+    z^{(1)}_n               & \cdots & z^{(T})_n
+    \end{bmatrix} \mapsto 
+    \begin{bmatrix} 
+    z^{(T - \mathtt{pw})}_1 & \cdots      & z^{(T)}_1 \\ 
+    \cdots                  & \cdots      & \cdots \\ 
+    z^{(T - \mathtt{pw})}_n & \cdots      & z^{(T})_n\end{bmatrix}     
 ``` 
 """
-function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window) where T
+function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window::Int) where T
     sys_dim, seq_length, batch_size = size(full_output)
     backend = KernelAbstractions.get_backend(full_output)
     output_estimate = KernelAbstractions.allocate(backend, T, sys_dim, prediction_window, batch_size)

From f28680f132622ac2b28282d145bf2f2e042603c2 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 17:04:22 +0100
Subject: [PATCH 90/96] Fixed docs for rgrad.

---
 src/manifolds/stiefel_manifold.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/manifolds/stiefel_manifold.jl b/src/manifolds/stiefel_manifold.jl
index ec1c3fb26..a98e22079 100644
--- a/src/manifolds/stiefel_manifold.jl
+++ b/src/manifolds/stiefel_manifold.jl
@@ -64,7 +64,7 @@ function Base.:*(Y::Adjoint{T, StiefelManifold{T, AT}}, B::AbstractMatrix) where
 end
 
 @doc raw"""
-Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mahbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this).
+Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mathbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this).
 The precise form of the mapping is: 
 ```math
 \mathtt{rgrad}(Y, \nabla{}L) \mapsto \nabla{}L - Y(\nabla{}L)^TY

From 3456fc5db1a56089eb600aa32febb6cab89a6354 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 17:12:59 +0100
Subject: [PATCH 91/96] Added additional constructor for the case when the type
 is the first input argument.

---
 src/arrays/stiefel_projection.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/arrays/stiefel_projection.jl b/src/arrays/stiefel_projection.jl
index 82c04c2ac..236948a09 100644
--- a/src/arrays/stiefel_projection.jl
+++ b/src/arrays/stiefel_projection.jl
@@ -41,6 +41,8 @@ Outer constructor for `StiefelProjection`. This works with two integers as input
 """
 StiefelProjection(N::Integer, n::Integer, T::Type=Float64) = StiefelProjection(CPU(), T, N, n)
 
+StiefelProjection(T::Type, N::Integer, n::Integer) = StiefelProjection(N, n, T)
+
 Base.size(E::StiefelProjection) = (E.N, E.n)
 Base.getindex(E::StiefelProjection, i, j) = getindex(E.A, i, j)
 Base.:+(E::StiefelProjection, A::AbstractMatrix) = E.A + A 

From f202264cfe85ffaeb7521578fb9f2ef867b3a6ab Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 14:27:47 +0100
Subject: [PATCH 92/96] Resolved conflict (forgot to resolve another one
 before).

---
 test/arrays/array_tests.jl | 1 +
 test/runtests.jl           | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index 8621ddda7..9c00adc75 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -75,6 +75,7 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
+
 function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
     A = rand(StiefelLieAlgHorMatrix{T}, N, n)
     @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)
diff --git a/test/runtests.jl b/test/runtests.jl
index 672ae9123..e67116b3b 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,9 +4,13 @@ using SafeTestsets
 @safetestset "Check parameterlength                                                           " begin include("parameterlength/check_parameterlengths.jl") end
 @safetestset "Arrays #1                                                                       " begin include("arrays/array_tests.jl") end
 @safetestset "Sampling of arrays                                                              " begin include("arrays/random_generation_of_custom_arrays.jl") end
+<<<<<<< HEAD
 @safetestset "Addition tests for custom arrays                                                " begin include("arrays/addition_tests_for_custom_arrays.jl") end
 @safetestset "Scalar multiplication tests for custom arrays                                   " begin include("arrays/scalar_multiplication_for_custom_arrays.jl") end
 @safetestset "Matrix multiplication tests for custom arrays                                   " begin include("arrays/matrix_multiplication_for_custom_arrays.jl") end
+=======
+@safetestset "Test constructors for custom arrays                                             " begin include("arrays/constructor_tests_for_custom_arrays.jl") end
+>>>>>>> 99e83e6 (Resolved conflict (forgot to resolve another one before).)
 @safetestset "Manifolds (Grassmann):                                                          " begin include("manifolds/grassmann_manifold.jl") end
 @safetestset "Gradient Layer                                                                  " begin include("layers/gradient_layer_tests.jl") end
 @safetestset "Test symplecticity of upscaling layer                                           " begin include("layers/sympnet_layers_test.jl") end 

From c4cdd5126a9261d86b2beba4afb3dc3a7a7f0c58 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 17:14:04 +0100
Subject: [PATCH 93/96] Added tests for the various constructors for custom
 arrays.

---
 .../constructor_tests_for_custom_arrays.jl    | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 test/arrays/constructor_tests_for_custom_arrays.jl

diff --git a/test/arrays/constructor_tests_for_custom_arrays.jl b/test/arrays/constructor_tests_for_custom_arrays.jl
new file mode 100644
index 000000000..452f850f8
--- /dev/null
+++ b/test/arrays/constructor_tests_for_custom_arrays.jl
@@ -0,0 +1,37 @@
+using GeometricMachineLearning, Test
+using LinearAlgebra: I
+
+@doc raw"""
+This tests various constructor for custom arrays, e.g. if calling `SymmetricMatrix` on a matrix ``A`` does 
+```math 
+A \mapsto \frac{1}{2}(A + A^T).
+```
+"""
+function test_constructors_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    B = rand(T, N, N)
+
+    # SymmetricMatrix 
+    @test Matrix{T}(SymmetricMatrix(A)) ≈ T(.5) * (A + A')
+
+    # SkewSymMatrix 
+    @test Matrix{T}(SkewSymMatrix(A)) ≈ T(.5) * (A - A')
+
+    # StiefelLieAlgHorMatrix 
+    B_shor = StiefelLieAlgHorMatrix(SkewSymMatrix(B), n)
+    B_shor2 = Matrix{T}(SkewSymMatrix(B))
+    B_shor2[(n+1):N, (n+1):N] .= zero(T)
+    @test Matrix{T}(B_shor) ≈ B_shor2
+
+    # GrassmannLieAlgHorMatrix 
+    B_ghor = GrassmannLieAlgHorMatrix(SkewSymMatrix(B), n)
+    B_ghor2 = copy(B_shor2)
+    B_ghor2[1:n, 1:n] .= zero(T)
+    @test Matrix{T}(B_ghor) ≈ B_ghor2 
+
+    # StiefelProjection
+    E = StiefelProjection(T, N, n)
+    @test Matrix{T}(E) ≈ vcat(I(n), zeros(T, (N-n), n))
+end
+
+test_constructors_for_custom_arrays(5, 10, Float32)
\ No newline at end of file

From f388a4c12308c0ff4ccc875263eb9c47c9712754 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:18:53 +0100
Subject: [PATCH 94/96] Removed the symplectic arrays.

---
 test/arrays/array_tests.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index 9c00adc75..8621ddda7 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -75,7 +75,6 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
-
 function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
     A = rand(StiefelLieAlgHorMatrix{T}, N, n)
     @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)

From 882fa043e651f681d12766119df14b976b2d0148 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 14:29:15 +0100
Subject: [PATCH 95/96] Resolved merge conflict.

---
 test/arrays/array_tests.jl | 1 +
 test/runtests.jl           | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index 8621ddda7..9c00adc75 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -75,6 +75,7 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
+
 function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
     A = rand(StiefelLieAlgHorMatrix{T}, N, n)
     @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)
diff --git a/test/runtests.jl b/test/runtests.jl
index e67116b3b..d1d0ad577 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -5,12 +5,16 @@ using SafeTestsets
 @safetestset "Arrays #1                                                                       " begin include("arrays/array_tests.jl") end
 @safetestset "Sampling of arrays                                                              " begin include("arrays/random_generation_of_custom_arrays.jl") end
 <<<<<<< HEAD
+<<<<<<< HEAD
 @safetestset "Addition tests for custom arrays                                                " begin include("arrays/addition_tests_for_custom_arrays.jl") end
 @safetestset "Scalar multiplication tests for custom arrays                                   " begin include("arrays/scalar_multiplication_for_custom_arrays.jl") end
 @safetestset "Matrix multiplication tests for custom arrays                                   " begin include("arrays/matrix_multiplication_for_custom_arrays.jl") end
 =======
 @safetestset "Test constructors for custom arrays                                             " begin include("arrays/constructor_tests_for_custom_arrays.jl") end
 >>>>>>> 99e83e6 (Resolved conflict (forgot to resolve another one before).)
+=======
+@safetestset "Test constructors for custom arrays                                             " begin include("arrays/constructor_tests_for_custom_arrays.jl") end
+>>>>>>> 4e6f264 (Resolved merge conflict.)
 @safetestset "Manifolds (Grassmann):                                                          " begin include("manifolds/grassmann_manifold.jl") end
 @safetestset "Gradient Layer                                                                  " begin include("layers/gradient_layer_tests.jl") end
 @safetestset "Test symplecticity of upscaling layer                                           " begin include("layers/sympnet_layers_test.jl") end 

From 2aa69196d17ad70686b47b0ab90926b0c43713c6 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 14:30:09 +0100
Subject: [PATCH 96/96] Removed the symplectic arrays.

---
 test/arrays/array_tests.jl | 1 +
 test/runtests.jl           | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index 9c00adc75..1c372344e 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -53,6 +53,7 @@ function skew_mat_mul_test2(n, T=Float64)
     AS2 = A*Matrix{T}(S)
     @test isapprox(AS1, AS2)
 end
+
 # test Stiefel manifold projection test 
 function stiefel_proj_test(N,n)
     In = I(n)
diff --git a/test/runtests.jl b/test/runtests.jl
index d1d0ad577..e2f94167a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,6 +3,7 @@ using SafeTestsets
 
 @safetestset "Check parameterlength                                                           " begin include("parameterlength/check_parameterlengths.jl") end
 @safetestset "Arrays #1                                                                       " begin include("arrays/array_tests.jl") end
+<<<<<<< HEAD
 @safetestset "Sampling of arrays                                                              " begin include("arrays/random_generation_of_custom_arrays.jl") end
 <<<<<<< HEAD
 <<<<<<< HEAD
@@ -15,6 +16,9 @@ using SafeTestsets
 =======
 @safetestset "Test constructors for custom arrays                                             " begin include("arrays/constructor_tests_for_custom_arrays.jl") end
 >>>>>>> 4e6f264 (Resolved merge conflict.)
+=======
+@safetestset "Test constructors for custom arrays                                             " begin include("arrays/constructor_tests_for_custom_arrays.jl") end
+>>>>>>> b05c66c (Removed the symplectic arrays.)
 @safetestset "Manifolds (Grassmann):                                                          " begin include("manifolds/grassmann_manifold.jl") end
 @safetestset "Gradient Layer                                                                  " begin include("layers/gradient_layer_tests.jl") end
 @safetestset "Test symplecticity of upscaling layer                                           " begin include("layers/sympnet_layers_test.jl") end