From ba96b359cf1730f4efc502625664907398229d11 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 09:19:22 +0100
Subject: [PATCH 01/60] Remove old array tests.

---
 test/arrays/array_tests_old.jl | 27 ---------------------------
 1 file changed, 27 deletions(-)
 delete mode 100644 test/arrays/array_tests_old.jl

diff --git a/test/arrays/array_tests_old.jl b/test/arrays/array_tests_old.jl
deleted file mode 100644
index 98e08a726..000000000
--- a/test/arrays/array_tests_old.jl
+++ /dev/null
@@ -1,27 +0,0 @@
-using LinearAlgebra
-using Test
-
-using GeometricMachineLearning: BlockIdentityLowerMatrix, SymmetricBlockIdentityLowerMatrix
-using GeometricMachineLearning: BlockIdentityUpperMatrix, SymmetricBlockIdentityUpperMatrix
-using GeometricMachineLearning: ZeroVector
-
-
-W = rand(2,2)
-S = W .+ W'
-x = ones(4)
-y = zero(x)
-
-@test mul!(y, BlockIdentityLowerMatrix(W), x) == vcat(ones(2), ones(2) .+ W * ones(2))
-@test mul!(y, BlockIdentityUpperMatrix(W), x) == vcat(ones(2) .+ W * ones(2), ones(2))
-
-@test mul!(y, SymmetricBlockIdentityLowerMatrix(W), x) == vcat(ones(2), ones(2) .+ S * ones(2))
-@test mul!(y, SymmetricBlockIdentityUpperMatrix(W), x) == vcat(ones(2) .+ S * ones(2), ones(2))
-
-
-z = ZeroVector(Float64, 4)
-
-@test z[1] == zero(Float64)
-@test z[4] == zero(Float64)
-
-@test_throws AssertionError z[0]
-@test_throws AssertionError z[5]

From 0fb91568e2d76661db1b099654001e68ae48734d Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 09:32:38 +0100
Subject: [PATCH 02/60] Removed the block matrices. This is now done by the
 LinearSympNetLayers.

---
 src/arrays/block_identity_lower.jl | 37 ------------------------------
 src/arrays/block_identity_upper.jl | 37 ------------------------------
 2 files changed, 74 deletions(-)
 delete mode 100644 src/arrays/block_identity_lower.jl
 delete mode 100644 src/arrays/block_identity_upper.jl

diff --git a/src/arrays/block_identity_lower.jl b/src/arrays/block_identity_lower.jl
deleted file mode 100644
index d51fab751..000000000
--- a/src/arrays/block_identity_lower.jl
+++ /dev/null
@@ -1,37 +0,0 @@
-"""
-A `BlockIdentityLowerMatrix` is a matrix with blocks
-| 1  0 |
-| S  1 |
-Currently, it only implements a custom `mul!` method, exploiting this structure.
-"""
-struct BlockIdentityLowerMatrix{T, AT <: AbstractMatrix{T}} <: AbstractMatrix{T}
-    S::AT
-
-    function BlockIdentityLowerMatrix(S::AbstractMatrix)
-        @assert length(axes(S,1)) == length(axes(S,2))
-        new{eltype(S), typeof(S)}(S)
-    end
-end
-
-SymmetricBlockIdentityLowerMatrix(W::AbstractMatrix) = BlockIdentityLowerMatrix(W .+ W')
-
-Base.parent(A::BlockIdentityLowerMatrix) = A.S
-Base.size(A::BlockIdentityLowerMatrix) = 2 .* size(A.S)
-
-function LinearAlgebra.mul!(out::AbstractVector, A::BlockIdentityLowerMatrix, z::AbstractVector)
-    @assert length(out) == length(z) == 2*length(axes(A.S,2))
-
-    N = length(axes(A.S,1))
-
-    q_in = @view z[1:N]
-    p_in = @view z[N+1:2N]
-    q_out = @view out[1:N]
-    p_out = @view out[N+1:2N]
-
-    mul!(p_out, A.S, q_in)
-
-    q_out  .= q_in
-    p_out .+= p_in
-    
-    return out
-end
diff --git a/src/arrays/block_identity_upper.jl b/src/arrays/block_identity_upper.jl
deleted file mode 100644
index c4ff51e03..000000000
--- a/src/arrays/block_identity_upper.jl
+++ /dev/null
@@ -1,37 +0,0 @@
-"""
-A `BlockIdentityUpperMatrix` is a matrix with blocks
-| 1  S |
-| 0  1 |
-Currently, it only implements a custom `mul!` method, exploiting this structure.
-"""
-struct BlockIdentityUpperMatrix{T, AT <: AbstractMatrix{T}} <: AbstractMatrix{T}
-    S::AT
-
-    function BlockIdentityUpperMatrix(S::AbstractMatrix)
-        @assert length(axes(S,1)) == length(axes(S,2))
-        new{eltype(S), typeof(S)}(S)
-    end
-end
-
-SymmetricBlockIdentityUpperMatrix(W::AbstractMatrix) = BlockIdentityUpperMatrix(W .+ W')
-
-Base.parent(A::BlockIdentityUpperMatrix) = A.S
-Base.size(A::BlockIdentityUpperMatrix) = 2 .* size(A.S)
-
-function LinearAlgebra.mul!(out::AbstractVector, A::BlockIdentityUpperMatrix, z::AbstractVector)
-    @assert length(out) == length(z) == 2*length(axes(A.S,2))
-
-    N = length(axes(A.S,1))
-    
-    q_in = @view z[1:N]
-    p_in = @view z[N+1:2N]
-    q_out = @view out[1:N]
-    p_out = @view out[N+1:2N]
-
-    mul!(q_out, A.S, p_in)
-
-    q_out .+= q_in
-    p_out  .= p_in
-
-    return out
-end

From dd4e8a55add8e9917b23fd6d341e1611e4e59bd5 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 09:35:38 +0100
Subject: [PATCH 03/60] Hope this fixed the TODO list.

---
 src/arrays/symmetric.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl
index f7db2c6c6..928b90ab5 100644
--- a/src/arrays/symmetric.jl
+++ b/src/arrays/symmetric.jl
@@ -20,12 +20,12 @@ Internally the `struct` saves a vector $S$ of size $n(n+1)\div2$. The conversion
 So $S$ stores a string of vectors taken from $A$: $S = [\tilde{a}_1, \tilde{a}_2, \ldots, \tilde{a}_n]$ with $\tilde{a}_i = [[A]_{i1},[A]_{i2},\ldots,[A]_{ii}]$.
 
 TODO: 
--[x] Overload Adjoint operation for SymmetricMatrix!! (Aᵀ = A)
--[ ] implement matrix and vector products (to also work on GPU)
--[x] implement zero initialization (for optimizer)
--[ ] perform some tests (also with Zygote)
--[x] update the constructor (to work better for GPU)
--[ ] implement multiplication with a tensor
+- [x] Overload Adjoint operation for SymmetricMatrix!! (Aᵀ = A)
+- [ ] implement matrix and vector products (to also work on GPU)
+- [x] implement zero initialization (for optimizer)
+- [ ] perform some tests (also with Zygote)
+- [x] update the constructor (to work better for GPU)
+- [ ] implement multiplication with a tensor
 """
 mutable struct SymmetricMatrix{T, AT <: AbstractVector{T}} <: AbstractMatrix{T}
     S::AT

From 35c0d4862ba8ab2ba9a0a117d6298d5ad668488a Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 10:06:34 +0100
Subject: [PATCH 04/60] We are not using those arrays anymore.

---
 src/arrays/symplectic_lie_algebra.jl          |  89 ------
 .../symplectic_lie_algebra_horizontal.jl      | 298 ------------------
 2 files changed, 387 deletions(-)
 delete mode 100644 src/arrays/symplectic_lie_algebra.jl
 delete mode 100644 src/arrays/symplectic_lie_algebra_horizontal.jl

diff --git a/src/arrays/symplectic_lie_algebra.jl b/src/arrays/symplectic_lie_algebra.jl
deleted file mode 100644
index 2d77394b1..000000000
--- a/src/arrays/symplectic_lie_algebra.jl
+++ /dev/null
@@ -1,89 +0,0 @@
-"""
-A `SymplecticLieAlgMatrix` is a matrix
-| A  B  |
-| C -A' |, where B and C are symmetric. 
-
-The first index is the row index, the second one the column index.
-
-TODO: Check how LinearAlgebra implements matrix multiplication!
-"""
-
-#-> AbstractVecOrMat!!
-mutable struct SymplecticLieAlgMatrix{T, AT <: AbstractMatrix{T}, BT <: SymmetricMatrix{T}} <: AbstractMatrix{T}
-    A::AT
-    B::BT
-    C::BT
-    n::Int
-
-    function SymplecticLieAlgMatrix(A::AbstractMatrix{T}, B::SymmetricMatrix{T}, C::SymmetricMatrix{T}, n) where {T}
-        @assert eltype(B) == eltype(C) == eltype(A)
-        @assert B.n == C.n == n
-        @assert size(A) == (n,n)
-        new{T,typeof(A),typeof(B)}(A,B,C,n)
-    end
-    function SymplecticLieAlgMatrix(S::AbstractMatrix{T}) where {T}
-        n2 = size(S, 1)
-        @assert iseven(n2)
-        @assert size(S, 2) == n2
-        n = n2÷2
-
-        A = T(0.5)*(S[1:n,1:n] - S[(n+1):n2, (n+1):n2]')
-        B = SymmetricMatrix(S[1:n,(n+1):n2])
-        C = SymmetricMatrix(S[(n+1):n2,1:n])
-  
-        new{eltype(S), typeof(A), typeof(B)}(A, B, C, n)
-    end
-
-end 
-
-
-#implementing getindex automatically defines all matrix multiplications! (but probably not in the most efficient way)
-function Base.getindex(A::SymplecticLieAlgMatrix,i,j)
-    n = A.n
-    if i ≤ n && j ≤ n
-        return A.A[i,j]
-    end
-    if i ≤ n
-        return A.B[i, j-n]
-    end
-    if j ≤ n 
-        return A.C[i-n, j]
-    end
-    return -A.A[j-n,i-n]
-end
-
-
-Base.parent(A::SymplecticLieAlgMatrix) = (A=A.A,B=A.B,C=A.C)
-Base.size(A::SymplecticLieAlgMatrix) = (2*A.n,2*A.n)
-
-function Base.:+(S₁::SymplecticLieAlgMatrix, S₂::SymplecticLieAlgMatrix) 
-    @assert S₁.n == S₂.n  
-    SymplecticLieAlgMatrix(
-        S₁.A + S₂.A,
-        S₁.B + S₂.B,
-        S₁.C + S₂.C, 
-        S₁.n
-        )
-end
-
-function Base.:-(S₁::SymplecticLieAlgMatrix, S₂::SymplecticLieAlgMatrix) 
-    @assert S₁.n == S₂.n  
-    SymplecticLieAlgMatrix(
-        S₁.A - S₂.A,
-        S₁.B - S₂.B,
-        S₁.C - S₂.C, 
-        S₁.n
-        )
-end
-
-function Base.:-(S::SymplecticLieAlgMatrix)
-    SymplecticLieAlgMatrix(
-        -S.A, 
-        -S.B, 
-        -S.C,
-        S.n
-    )
-end
-
-
-#TODO: implement functions as in StiefelLieAlgHorMatrix
\ No newline at end of file
diff --git a/src/arrays/symplectic_lie_algebra_horizontal.jl b/src/arrays/symplectic_lie_algebra_horizontal.jl
deleted file mode 100644
index 60a8e1fb9..000000000
--- a/src/arrays/symplectic_lie_algebra_horizontal.jl
+++ /dev/null
@@ -1,298 +0,0 @@
-"""
-The `horizontal part` of `SymplecticLieAlgMatrix` is a matrix
-| A  B  |
-| C -A' |, where B and C are symmetric 
-
-with the following additional requirements: 
-| A₁ A₂ᵀ|	
-| A₃ 0  | = A,
-
-| B₁ B₂ᵀ|
-| B₂ 0  | = B, 
-
-| C₁ C₂ᵀ|
-| C₂ 0  | = C.
-
-The horizontal component is here taken with respect to the G-invariant metric (X₁,X₂) ↦ tr(X₁⁺X₂). (This metric is bi-invariant but degenerate.)
-
-The first index is the row index, the second one the column index.
-
-TODO: Make this a subtype of SymplecticLieAlgMatrix!!!!!
-"""
-
-#-> AbstractVecOrMat!!
-mutable struct SymplecticLieAlgHorMatrix{T, AT <: AbstractMatrix{T}, ST <: SymmetricMatrix{T}} <: AbstractMatrix{T}
-    A₁::AT
-    A₂::AT
-    A₃::AT
-    B₁::ST
-    B₂::AT
-    C₁::ST
-    C₂::AT
-    N::Int
-    n::Int
-
-    function SymplecticLieAlgHorMatrix(A₁::AbstractMatrix, A₂::AbstractMatrix, A₃::AbstractMatrix, 
-                                    B₁::SymmetricMatrix, B₂::AbstractMatrix, C₁::SymmetricMatrix, 
-                                    C₂::AbstractMatrix, N::Int, n::Int)
-        @assert eltype(A₁) == eltype(A₂) == eltype(A₃) == eltype(B₁) == eltype(B₂) == eltype(C₁) == eltype(C₂)
-        @assert size(A₁)[1] == size(A₁)[2] == size(A₂)[2] == size(A₃)[2] == B₁.n == size(B₂)[2] == C₁.n == size(C₂)[2] == n
-        @assert size(A₂)[1] == size(A₃)[1] == size(B₂)[1] == size(C₂)[1] == (N-n)
-        new{eltype(A₁), typeof(A₁), typeof(B₁)}(A₁, A₂, A₃, B₁, B₂, C₁, C₂, N, n)
-    end
-    function SymplecticLieAlgHorMatrix(S::SymplecticLieAlgMatrix, n::Int)
-        N = S.n
-        new{eltype(S.A), typeof(S.A), typeof(S.B)}(
-            S.A[1:n, 1:n],
-            S.A[1:n, (n+1):N]',
-            S.A[(n+1):N, 1:n],
-            #this could be made more efficient by accessing the vector that parametrizes SymmetricMatrix!!
-            SymmetricMatrix(S.B[1:n, 1:n]),
-            S.B[(n+1):N, 1:n],
-            SymmetricMatrix(S.C[1:n, 1:n]),
-            S.C[(n+1):N, 1:n],
-            N, 
-            n
-        )
-    end
-end 
-
-
-function A_index(A₁::AbstractMatrix, A₂::AbstractMatrix, A₃::AbstractMatrix, n, i, j)
-    if i ≤ n
-        if j ≤ n 
-            return A₁[i,j]
-        end
-        return A₂[j-n, i]
-    end
-    if j ≤ n
-        return A₃[i - n, j]
-    end
-    return 0.
-end
-
-function BC_index(B₁::SymmetricMatrix, B₂::AbstractMatrix, n, i, j)
-    if i ≤ n
-        if j ≤ n 
-            return B₁[i,j]
-        end
-        return B₂[j-n, i]
-    end
-    if j ≤ n
-        return B₂[i - n, j]
-    end
-    return 0.
-end
-
-function Base.getindex(S::SymplecticLieAlgHorMatrix, i, j)
-    N = S.N
-    n = S.n
-
-    if i ≤ N && j ≤ N
-        return A_index(S.A₁, S.A₂, S.A₃, n, i, j)
-    end
-    if i ≤ N
-        return BC_index(S.B₁, S.B₂, n, i, j-N)
-    end
-    if j ≤ N
-        return BC_index(S.C₁, S.C₂, n, i-N, j)
-    end
-    return -A_index(S.A₁, S.A₂, S.A₃, n, j-N, i-N)
-end
-
-
-Base.parent(S::SymplecticLieAlgHorMatrix) = (A₁=S.A₁, A₂=S.A₂, A₃=S.A₃, B₁=S.B₁, B₂=S.B₂, C₁=S.C₁, C₂=S.C₂)
-Base.size(S::SymplecticLieAlgHorMatrix) = (2*S.N,2*S.N)
-
-function Base.:+(S₁::SymplecticLieAlgHorMatrix, S₂::SymplecticLieAlgHorMatrix) 
-    @assert S₁.n == S₂.n  
-    @assert S₁.N == S₂.N
-    SymplecticLieAlgHorMatrix(
-        S₁.A₁ + S₂.A₁,
-        S₁.A₂ + S₂.A₂,
-        S₁.A₃ + S₂.A₃,
-        S₁.B₁ + S₂.B₁,
-        S₁.B₂ + S₂.B₂,
-        S₁.C₁ + S₂.C₁,
-        S₁.C₂ + S₂.C₂,
-        S₁.N, 
-        S₁.n
-        )
-end
-
-function Base.:-(S₁::SymplecticLieAlgHorMatrix, S₂::SymplecticLieAlgHorMatrix) 
-    @assert S₁.n == S₂.n  
-    @assert S₁.N == S₂.N
-    SymplecticLieAlgHorMatrix(
-        S₁.A₁ - S₂.A₁,
-        S₁.A₂ - S₂.A₂,
-        S₁.A₃ - S₂.A₃,
-        S₁.B₁ - S₂.B₁,
-        S₁.B₂ - S₂.B₂,
-        S₁.C₁ - S₂.C₁,
-        S₁.C₂ - S₂.C₂,
-        S₁.N, 
-        S₁.n
-        )
-end
-
-function add!(C::SymplecticLieAlgHorMatrix, A::SymplecticLieAlgHorMatrix, B::SymplecticLieAlgHorMatrix)
-    @assert A.N == B.N == C.N
-    @assert A.n == B.n == C.n 
-    add!(C.A₁, A.A₁, B.A₁),
-    add!(C.A₂, A.A₂, B.A₂),
-    add!(C.A₃, A.A₃, B.A₃),
-    add!(C.B₁, A.B₁, B.B₁),
-    add!(C.B₂, A.B₂, B.B₂),
-    add!(C.C₁, A.C₁, B.C₁),
-    add!(C.C₂, A.C₂, B.C₂)
-end
-
-function Base.:-(S::SymplecticLieAlgHorMatrix)
-    SymplecticLieAlgHorMatrix(
-        -S.A₁,
-        -S.A₂, 
-        -S.A₃, 
-        -S.B₁, 
-        -S.B₂, 
-        -S.C₁, 
-        -S.C₂, 
-        S.N,
-        S.n 
-    )
-end
-
-function Base.:*(S::SymplecticLieAlgHorMatrix, α::Real)
-    SymplecticLieAlgHorMatrix(
-        α*S.A₁,
-        α*S.A₂, 
-        α*S.A₃, 
-        α*S.B₁, 
-        α*S.B₂, 
-        α*S.C₁, 
-        α*S.C₂, 
-        S.N,
-        S.n 
-    )
-end
-
-Base.:*(α::Real, A::SymplecticLieAlgHorMatrix) = A*α
-
-function Base.zeros(::Type{SymplecticLieAlgHorMatrix{T}}, N::Integer, n::Integer) where T
-    SymplecticLieAlgHorMatrix(
-        zeros(T, n, n),
-        zeros(T, N-n, n),
-        zeros(T, N-n, n),
-        zeros(SymmetricMatrix{T}, n),
-        zeros(T, N-n, n),
-        zeros(SymmetricMatrix{T},n),
-        zeros(T, N-n, n),
-        N, 
-        n
-    )
-end
-    
-function Base.zeros(::Type{SymplecticLieAlgHorMatrix}, N::Integer, n::Integer) 
-    SymplecticLieAlgHorMatrix(
-        zeros(n, n),
-        zeros(N-n, n),
-        zeros(N-n, n),
-        zeros(SymmetricMatrix, n),
-        zeros(N-n, n),
-        zeros(SymmetricMatrix, n),
-        zeros(N-n, n),
-        N, 
-        n
-    )
-end
-
-
-function Base.rand(::Type{SymplecticLieAlgHorMatrix{T}}, N::Integer, n::Integer) where T
-    SymplecticLieAlgHorMatrix(
-        rand(T, n, n),
-        rand(T, N-n, n),
-        rand(T, N-n, n),
-        rand(SymmetricMatrix{T}, n),
-        rand(T, N-n, n),
-        rand(SymmetricMatrix{T},n),
-        rand(T, N-n, n),
-        N, 
-        n
-    )
-end
-    
-function Base.rand(::Type{SymplecticLieAlgHorMatrix}, N::Integer, n::Integer)
-    SymplecticLieAlgHorMatrix(
-        rand(n, n),
-        rand(N-n, n),
-        rand(N-n, n),
-        rand(SymmetricMatrix, n),
-        rand(N-n, n),
-        rand(SymmetricMatrix, n),
-        rand(N-n, n),
-        N, 
-        n
-    )
-end
-
-function scalar_add(S::SymplecticLieAlgHorMatrix, δ::Real)
-    SymplecticLieAlgHorMatrix(
-        S.A₁ .+ δ,
-        S.A₂ .+ δ, 
-        S.A₃ .+ δ, 
-        scalar_add(S.B₁, δ), 
-        S.B₂ .+ δ, 
-        scalar_add(S.C₁, δ), 
-        S.C₂ .+ δ, 
-        S.N,
-        S.n 
-    )
-end
-
-
-#function Base.:./(A::SymplecticLieAlgMatrix,B::SymplecticLieAlgMatrix)
-function /ᵉˡᵉ(S₁::SymplecticLieAlgHorMatrix,S₂::SymplecticLieAlgHorMatrix)
-    @assert S₁.n == S₂.n
-    @assert S₁.N == S₂.N
-    SymplecticLieAlgHorMatrix(
-        S₁.A₁./S₂.A₁, 
-        S₁.A₂./S₂.A₂,
-        S₁.A₃/S₂.A₃,
-        /ᵉˡᵉ(S₁.B₁, S₂.B₁), 
-        S₁.B₂./S₂.B₂,
-        /ᵉˡᵉ(S₁.C₁, S₂.C₁), 
-        S₁.C₂/S₂.C₂,
-        S₁.N,
-        S₁.n
-        )
-end
-
-function ⊙²(S::SymplecticLieAlgHorMatrix)
-    SymplecticLieHorAlgMatrix(
-        S.A₁.^2, 
-        S.A₂.^2,
-        S.A₃.^2,
-        ⊙²(S.B₁), 
-        S.B₂.^2,
-        ⊙²(S.C₁), 
-        S.C₂.^2,
-        S.N,
-        S.n
-        )
-end
-
-#=
-function RACᵉˡᵉ(S::SymplecticLieAlgHorMatrix) 
-    SymplecticLieAlgMatrix(
-        sqrt.(S.A₁), 
-        sqrt.(S.A₂),
-        sqrt.(S.A₃),
-        RACᵉˡᵉ(S.B₁), 
-        sqrt.(S.B₂),
-        RACᵉˡᵉ(S.B₁), 
-        sqrt.(S.C₂),
-        S.N,
-        S.n
-        )
-end
-=#
\ No newline at end of file

From 1f1e30d8ee16b91cc9d073ef907956a91c72042c Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:10:14 +0100
Subject: [PATCH 05/60] Factored out sampling of arrays.

---
 .../addition_tests_for_custom_arrays.jl       | 12 +++++++++
 .../random_generation_of_custom_arrays.jl     | 27 +++++++++++++++++++
 test/runtests.jl                              |  2 +-
 3 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 test/arrays/addition_tests_for_custom_arrays.jl
 create mode 100644 test/arrays/random_generation_of_custom_arrays.jl

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
new file mode 100644
index 000000000..9413c5884
--- /dev/null
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -0,0 +1,12 @@
+#=
+This tests addition for all custom arrays. Note that these tests will also have to be performed on GPU!
+=#
+
+using LinearAlgebra
+using Random
+using Test
+using GeometricMachineLearning
+
+function test_addition_for_symmetric_matrix(n::Int, T::Type)
+    A = rand(SymmetricMatrix{T}, n)
+end
\ No newline at end of file
diff --git a/test/arrays/random_generation_of_custom_arrays.jl b/test/arrays/random_generation_of_custom_arrays.jl
new file mode 100644
index 000000000..d764ded9f
--- /dev/null
+++ b/test/arrays/random_generation_of_custom_arrays.jl
@@ -0,0 +1,27 @@
+#=
+This tests random generation of custom arrays. This will have to be expanded to GPU tests.
+=#
+using LinearAlgebra
+using Random
+using Test
+using GeometricMachineLearning
+
+function test_random_array_generation(n::Int, N::Int, T::Type)
+    A_sym = rand(SymmetricMatrix{T}, n)
+    @test typeof(A_sym) <: SymmetricMatrix{T}
+    @test eltype(A_sym) == T
+
+    A_skew = rand(SkewSymMatrix{T}, n)
+    @test typeof(A_skew) <: SkewSymMatrix{T}
+    @test eltype(A_skew) == T 
+
+    A_stiefel_hor = rand(StiefelLieAlgHorMatrix{T}, N, n)
+    @test typeof(A_stiefel_hor) <: StiefelLieAlgHorMatrix{T}
+    @test eltype(A_stiefel_hor) == T
+
+    A_grassmann_hor = rand(GrassmannLieAlgHorMatrix{T}, N, n)
+    @test typeof(A_grassmann_hor) <: GrassmannLieAlgHorMatrix{T}
+    @test eltype(A_grassmann_hor) == T
+end
+
+test_random_array_generation(5, 10, Float32)
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index af35e6063..e94ad1242 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,7 +3,7 @@ using SafeTestsets
 
 @safetestset "Check parameterlength                                                           " begin include("parameterlength/check_parameterlengths.jl") end
 @safetestset "Arrays #1                                                                       " begin include("arrays/array_tests.jl") end
-@safetestset "Arrays #2                                                                       " begin include("arrays/array_tests_old.jl") end
+@safetestset "Sampling of arrays                                                              " begin include("arrays/random_generation_of_custom_arrays.jl") end
 @safetestset "Manifolds (Grassmann):                                                          " begin include("manifolds/grassmann_manifold.jl") end
 @safetestset "Gradient Layer                                                                  " begin include("layers/gradient_layer_tests.jl") end
 @safetestset "Test symplecticity of upscaling layer                                           " begin include("layers/sympnet_layers_test.jl") end 

From 51c7c76db88b1c270a6fa19f52574dc672733ade Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:11:03 +0100
Subject: [PATCH 06/60] Got rid of some some symplectic arrays and included the
 supertype for horizontal components as an extra file.

---
 src/GeometricMachineLearning.jl | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl
index b357d839a..b7b9e9f92 100644
--- a/src/GeometricMachineLearning.jl
+++ b/src/GeometricMachineLearning.jl
@@ -82,13 +82,10 @@ module GeometricMachineLearning
     export convert_to_dev, Device, CPUDevice
 
     # INCLUDE ARRAYS
-    include("arrays/block_identity_lower.jl")
-    include("arrays/block_identity_upper.jl")
     include("arrays/symmetric.jl")
     include("arrays/symplectic.jl")
-    include("arrays/symplectic_lie_algebra.jl")
-    include("arrays/symplectic_lie_algebra_horizontal.jl")
     include("arrays/skew_symmetric.jl")
+    include("arrays/abstract_lie_algebra_horizontal.jl")
     include("arrays/stiefel_lie_algebra_horizontal.jl")
     include("arrays/grassmann_lie_algebra_horizontal.jl")
 

From 3876be561a273d76ca4c11b78a6425908ed92c13 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:12:32 +0100
Subject: [PATCH 07/60] Updated docs.

---
 .../grassmann_lie_algebra_horizontal.jl       | 15 +------
 src/arrays/stiefel_lie_algebra_horizontal.jl  | 41 ++++++++++++-------
 2 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/src/arrays/grassmann_lie_algebra_horizontal.jl b/src/arrays/grassmann_lie_algebra_horizontal.jl
index aa164729b..bf883f157 100644
--- a/src/arrays/grassmann_lie_algebra_horizontal.jl
+++ b/src/arrays/grassmann_lie_algebra_horizontal.jl
@@ -1,19 +1,6 @@
 """
-This implements the horizontal component of the Lie algebra (in this case just the skew-symmetric matrices).
-The projection is: 
-S -> SE where 
-|I|
-|0| = E.
-
-An element of GrassmannLieAlgMatrix takes the form: 
-| -0 -B'|
-| B  0 | where B is arbitrary.
-
-This also implements the projection: 
-| 0 -B'|    | 0 -B'|
-| B  0 | -> | B  0 |.
+This implements the horizontal component of a Lie algebra that is isomorphic to the Grassmann manifold. 
 """
-
 mutable struct GrassmannLieAlgHorMatrix{T, ST <: AbstractMatrix{T}} <: AbstractLieAlgHorMatrix{T}
     B::ST
     N::Int
diff --git a/src/arrays/stiefel_lie_algebra_horizontal.jl b/src/arrays/stiefel_lie_algebra_horizontal.jl
index 71558e485..7af26c360 100644
--- a/src/arrays/stiefel_lie_algebra_horizontal.jl
+++ b/src/arrays/stiefel_lie_algebra_horizontal.jl
@@ -1,21 +1,32 @@
-"""
-This implements the horizontal component of the Lie algebra (in this case just the skew-symmetric matrices).
-The projection is: 
-S -> SE where 
-|I|
-|0| = E.
+@doc raw"""
+`StiefelLieAlgHorMatrix` is the *horizontal component of the Lie algebra of skew-symmetric matrices* (with respect to the canonical metric).
+The projection here is: \(\pi:S \to SE \) where 
+```math
+E = \begin{pmatrix} \mathbb{I}_{n} \\ \mathbb{O}_{(N-n)\times{}n}  \end{pmatrix}.
+```
+The matrix \(E\) is implemented under `StiefelProjection` in `GeometricMachineLearning`.
 
 An element of StiefelLieAlgMatrix takes the form: 
-| A -B'|
-| B  0 | where A is skew-symmetric.
-
-This also implements the projection: 
-| A -B'|    | A -B'|
-| B  D | -> | B  0 |.
+```math
+\begin{pmatrix}
+A & B^T \\ B & \mathbb{O}
+\end{pmatrix},
+```
+where \(A\) is skew-symmetric (this is `SkewSymMatrix` in `GeometricMachineLearning`).
+
+If the constructor is called with a big \(N\times{}N\) matrix, then the projection is performed the following way: 
+```math
+\begin{pmatrix}
+A & B_1  \\
+B_2 & D
+\end{pmatrix} \mapsto 
+\begin{pmatrix}
+\mathrm{skew}(A) & -B_2^T \\ 
+B_2 & \mathbb{O}
+\end{pmatrix}.
+```
+The operation $\mathrm{skew}:\mathbb{R}^{n\times{}n}\to\mathcal{S}_\mahtrm{skew}(n)$ is the skew-symmetrization operation. This is equivalent to calling the constructor of `SkewSymMatrix` with an \(n\times{}n\) matrix.
 """
-
-abstract type AbstractLieAlgHorMatrix{T} <: AbstractMatrix{T} end
-
 mutable struct StiefelLieAlgHorMatrix{T, AT <: SkewSymMatrix{T}, ST <: AbstractMatrix{T}} <: AbstractLieAlgHorMatrix{T}
     A::AT
     B::ST

From 48300b2358e38c2a9d373e57c0ac7cff9bd89810 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:12:53 +0100
Subject: [PATCH 08/60] New supertype for the horizontal components.

---
 src/arrays/abstract_lie_algebra_horizontal.jl | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 src/arrays/abstract_lie_algebra_horizontal.jl

diff --git a/src/arrays/abstract_lie_algebra_horizontal.jl b/src/arrays/abstract_lie_algebra_horizontal.jl
new file mode 100644
index 000000000..58a7630df
--- /dev/null
+++ b/src/arrays/abstract_lie_algebra_horizontal.jl
@@ -0,0 +1,4 @@
+@doc raw"""
+`AbstractLieAlgHorMatrix` is a supertype for various horizontal components of Lie algebras. We usually call this \(\mathfrak{g}^\mathrm{hor}\).
+"""
+abstract type AbstractLieAlgHorMatrix{T} <: AbstractMatrix{T} end

From 0dfbc63723c808df7665c6fd081501cb9db1e173 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:25:49 +0100
Subject: [PATCH 09/60] Added new routine for multiplying symmetric matrix from
 the right onto a matrix.

---
 src/arrays/symmetric.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl
index f7db2c6c6..52cdd4f7f 100644
--- a/src/arrays/symmetric.jl
+++ b/src/arrays/symmetric.jl
@@ -209,6 +209,8 @@ function Base.:*(A::SymmetricMatrix{T}, B::AbstractMatrix{T}) where T
     C
 end
 
+Base.:*(B::AbstractMatrix{T}, A::SymmetricMatrix{T}) where T = (A * B')'
+
 function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T 
     backend = KernelAbstractions.get_backend(A.S)
     c = KernelAbstractions.allocate(backend, T, A.n)

From 21bb9312b674b34a73eb4824e0f69a3e4b0d0d1d Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:50:49 +0100
Subject: [PATCH 10/60] Test addition of various custom arrays.

---
 .../addition_tests_for_custom_arrays.jl       | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 test/arrays/addition_tests_for_custom_arrays.jl

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
new file mode 100644
index 000000000..1e3a2e701
--- /dev/null
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -0,0 +1,35 @@
+using GeometricMachineLearning, Test 
+
+@doc raw"""
+This function tests addition for various custom arrays, i.e. if \(A + B\) is performed in the correct way. 
+"""
+
+function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    B = rand(T, n, n)
+
+    # SymmetricMatrix
+    AB_sym = SymmetricMatrix(A + B)
+    AB_sym2 = SymmetricMatrix(A) + SymmetricMatrix(B)
+    @test AB_sym ≈ AB_sym2
+    @test typeof(AB_sym) <: SymmetricMatrix{T}
+    @test typeof(AB_sym2) <: SymmetricMatrix{T}
+
+    # SkewSymMatrix
+    AB_skew = SkewSymMatrix(A + B)
+    AB_skew2 = SkewSymMatrix(A) + SkewSymMatrix(B)
+    @test A_skew ≈ A_skew2 
+    @test typeof(AB_skew) <: SkewSymMatrix{T}
+    @test typeof(AB_skew2) <: SkewSymMatrix{T}
+
+    C = rand(T, N, N)
+    D = rand(T, N, N)
+
+    # StiefelLieAlgHorMatrix
+    CD_slahm = StiefelLieAlgHorMatrix(A + B, n)
+    CD_slahm2 = StiefelLieAlgHorMatrix(A, n) + StiefelLieAlgHorMatrix(B, n)
+    @test CD_slahm ≈ CD_slahm2
+    @test typeof(CD_slahm) <: StiefelLieAlgHorMatrix{T}
+    @test typeof(CD_slahm2) <: StiefelLieAlgHorMatrix{T}
+
+end
\ No newline at end of file

From 969a6cb05e43c64c9f68b51e69dbc13c7ea9e787 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:51:40 +0100
Subject: [PATCH 11/60] Removed tests for various symplectic arrays that are no
 longer part of GML.

---
 test/arrays/array_tests.jl | 59 --------------------------------------
 1 file changed, 59 deletions(-)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index fd9269046..c0da5eba9 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -1,14 +1,7 @@
 using LinearAlgebra
 using Random
 using Test
-
-using BandedMatrices
 using GeometricMachineLearning
-"""
-𝔤ʰ is the horizontal part of 𝔤, i.e. A ∈ 𝔤ʰ ⟺ AEE⁺ = A.
-
-TODO: Add routine & test for symplectic conjugate
-"""
 
 #check if symmetric matrix works for 1×1 matrices 
 W = rand(1,1)
@@ -61,22 +54,6 @@ function skew_mat_mul_test2(n, T=Float64)
     @test isapprox(AS1, AS2)
 end
 
-# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction
-function sympl_lie_alg_add_sub_test(n)
-    J = SymplecticPotential(n)
-    symplectisize(W) = .5*(W - J'*W'*J)
-    W₁ = rand(2*n,2*n)
-    S₁ = SymplecticLieAlgMatrix(W₁)
-    W₂ = rand(2*n,2*n)
-    S₂ = SymplecticLieAlgMatrix(W₂)
-    S₃ = S₁ + S₂
-    S₄ = S₁ - S₂
-    @test typeof(S₃) <: SymplecticLieAlgMatrix
-    @test typeof(S₄) <: SymplecticLieAlgMatrix
-    @test all(abs.(symplectisize(W₁ + W₂) .- S₃) .< 1e-10)
-    @test all(abs.(symplectisize(W₁ - W₂) .- S₄) .< 1e-10)
-end
-
 # test Stiefel manifold projection test 
 function stiefel_proj_test(N,n)
     In = I(n)
@@ -84,15 +61,6 @@ function stiefel_proj_test(N,n)
     @test all(abs.((E'*E) .- In) .< 1e-10)
 end
 
-# test symplectic projection (this is just the E matrix)
-function sympl_proj_test(N, n)
-    JN = SymplecticPotential(N)
-    Jn = SymplecticPotential(n)
-    E = SymplecticProjection(N, n, Float64)
-    @test all(abs.((E'*JN*E) .- Jn) .< 1e-10)
-end
-
-
 function stiefel_lie_alg_add_sub_test(N, n)
     E = StiefelProjection(N, n)
     projection(W::SkewSymMatrix) = W - (I - E*E')*W*(I - E*E')
@@ -108,31 +76,8 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
-# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction
-function sympl_lie_alg_add_sub_test(N, n)
-    J = SymplecticPotential(n)
-    E = SymplecticProjection(N, n)
-    projection(W::SymplecticLieAlgMatrix) = W - (I - E*E')*W*(I - E*E')
-    W₁ = SymplecticLieAlgMatrix(rand(2*N,2*N))
-    S₁ = SymplecticLieAlgHorMatrix(W₁,n)
-    W₂ = SymplecticLieAlgMatrix(rand(2*N,2*N))
-    S₂ = SymplecticLieAlgHorMatrix(W₂,n)
-    S₃ = S₁ + S₂
-    S₄ = S₁ - S₂
-    @test typeof(S₃) <: SymplecticLieAlgHorMatrix
-    @test typeof(S₄) <: SymplecticLieAlgHorMatrix
-    @test all(abs.(projection(W₁ + W₂) .- S₃) .< 1e-10)
-    @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
-end
-
-function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
-    A = rand(StiefelLieAlgHorMatrix{T}, N, n)
-    @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)
-end
-
 # TODO: tests for ADAM functions
 
-
 # test everything for different n & N values
 Random.seed!(42)
 
@@ -149,10 +94,6 @@ for (N, n) ∈ zip(N_vec, n_vec)
     skew_mat_add_sub_test(N)
     skew_mat_mul_test(N)
     skew_mat_mul_test2(N)
-    sympl_lie_alg_add_sub_test(N)
     stiefel_proj_test(N,n)
-    sympl_proj_test(N,n)
     stiefel_lie_alg_add_sub_test(N,n)
-    sympl_lie_alg_add_sub_test(N,n)
-    stiefel_lie_alg_vectorization_test(N, n)
 end

From e0b81cf69917972e8db76c7963508514603cb6d6 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 14:20:24 +0100
Subject: [PATCH 12/60] Added routine for Base.one. This is copied from
 SkewSymMatrix.

---
 src/GeometricMachineLearning.jl |  2 +-
 src/arrays/symmetric.jl         | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl
index b357d839a..fbb84488a 100644
--- a/src/GeometricMachineLearning.jl
+++ b/src/GeometricMachineLearning.jl
@@ -84,13 +84,13 @@ module GeometricMachineLearning
     # INCLUDE ARRAYS
     include("arrays/block_identity_lower.jl")
     include("arrays/block_identity_upper.jl")
-    include("arrays/symmetric.jl")
     include("arrays/symplectic.jl")
     include("arrays/symplectic_lie_algebra.jl")
     include("arrays/symplectic_lie_algebra_horizontal.jl")
     include("arrays/skew_symmetric.jl")
     include("arrays/stiefel_lie_algebra_horizontal.jl")
     include("arrays/grassmann_lie_algebra_horizontal.jl")
+    include("arrays/symmetric.jl")
 
     export SymmetricMatrix, SymplecticPotential, SkewSymMatrix
     export StiefelLieAlgHorMatrix
diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl
index 52cdd4f7f..6a6f3ac91 100644
--- a/src/arrays/symmetric.jl
+++ b/src/arrays/symmetric.jl
@@ -211,6 +211,10 @@ end
 
 Base.:*(B::AbstractMatrix{T}, A::SymmetricMatrix{T}) where T = (A * B')'
 
+function Base.:*(A::SymmetricMatrix{T}, B::SymmetricMatrix{T}) where T 
+    A * (B * one(B))
+end
+
 function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T 
     backend = KernelAbstractions.get_backend(A.S)
     c = KernelAbstractions.allocate(backend, T, A.n)
@@ -218,6 +222,14 @@ function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T
     c
 end
 
+function Base.one(A::SymmetricMatrix{T}) where T
+    backend = KernelAbstractions.get_backend(A.S)
+    unit_matrix = KernelAbstractions.zeros(backend, T, A.n, A.n)
+    write_ones! = write_ones_kernel!(backend)
+    write_ones!(unit_matrix, ndrange=A.n)
+    unit_matrix
+end
+
 # define routines for generalizing ChainRulesCore to SymmetricMatrix 
 ChainRulesCore.ProjectTo(A::SymmetricMatrix) = ProjectTo{SymmetricMatrix}(; symmetric=ProjectTo(A.S))
 (project::ProjectTo{SymmetricMatrix})(dA::AbstractMatrix) = SymmetricMatrix(project.symmetric(map_to_S(dA)), size(dA, 2))

From 16b28baf332e114df8b4c4884fdd99a1806815a6 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 14:20:49 +0100
Subject: [PATCH 13/60] Slightly improved readability.

---
 src/arrays/skew_symmetric.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arrays/skew_symmetric.jl b/src/arrays/skew_symmetric.jl
index e1ab85fe2..3c14b9e45 100644
--- a/src/arrays/skew_symmetric.jl
+++ b/src/arrays/skew_symmetric.jl
@@ -187,7 +187,7 @@ end
 
 # the first matrix is multiplied onto A2 in order for it to not be SkewSymMatrix!
 function Base.:*(A1::SkewSymMatrix{T}, A2::SkewSymMatrix{T}) where T 
-    A1*(one(A2)*A2) 
+    A1 * (one(A2) * A2) 
 end
 
 @doc raw"""

From df07bfb1aade5e452594bb0d9ee7963c0b86fb8b Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 14:21:43 +0100
Subject: [PATCH 14/60] Added addition tests for the remaining custom arrays.

---
 test/arrays/addition_tests_for_custom_arrays.jl | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
index 1e3a2e701..669ccc0ef 100644
--- a/test/arrays/addition_tests_for_custom_arrays.jl
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -3,7 +3,6 @@ using GeometricMachineLearning, Test
 @doc raw"""
 This function tests addition for various custom arrays, i.e. if \(A + B\) is performed in the correct way. 
 """
-
 function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     A = rand(T, n, n)
     B = rand(T, n, n)
@@ -26,10 +25,15 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     D = rand(T, N, N)
 
     # StiefelLieAlgHorMatrix
-    CD_slahm = StiefelLieAlgHorMatrix(A + B, n)
-    CD_slahm2 = StiefelLieAlgHorMatrix(A, n) + StiefelLieAlgHorMatrix(B, n)
+    CD_slahm = StiefelLieAlgHorMatrix(C + D, n)
+    CD_slahm2 = StiefelLieAlgHorMatrix(C, n) + StiefelLieAlgHorMatrix(D, n)
     @test CD_slahm ≈ CD_slahm2
     @test typeof(CD_slahm) <: StiefelLieAlgHorMatrix{T}
     @test typeof(CD_slahm2) <: StiefelLieAlgHorMatrix{T}
 
+    CD_glahm = GrassmannLieAlgHorMatrix(C + D, n)
+    CD_glahm2 = GrassmannLieAlgHorMatrix(C, n) + GrassmannLieAlgHorMatrix(D, n)
+    @test CD_glahm ≈ CD_glahm2
+    @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T}
 end
\ No newline at end of file

From e81ff2e9a16d4cdbc049febd42931321770bde55 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 14:52:14 +0100
Subject: [PATCH 15/60] Added tests for addition, scalar multiplication and
 matrix multiplication for custom arrays.

---
 .../addition_tests_for_custom_arrays.jl       |  4 +-
 ...matrix_multiplication_for_custom_arrays.jl | 21 ++++++++++
 ...scalar_multiplication_for_custom_arrays.jl | 40 +++++++++++++++++++
 test/runtests.jl                              |  4 +-
 4 files changed, 67 insertions(+), 2 deletions(-)
 create mode 100644 test/arrays/matrix_multiplication_for_custom_arrays.jl
 create mode 100644 test/arrays/scalar_multiplication_for_custom_arrays.jl

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
index 669ccc0ef..5fd2bbcfc 100644
--- a/test/arrays/addition_tests_for_custom_arrays.jl
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -36,4 +36,6 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     @test CD_glahm ≈ CD_glahm2
     @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
     @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T}
-end
\ No newline at end of file
+end
+
+addition_tests_for_custom_arrays(5, 10, Float32)
\ No newline at end of file
diff --git a/test/arrays/matrix_multiplication_for_custom_arrays.jl b/test/arrays/matrix_multiplication_for_custom_arrays.jl
new file mode 100644
index 000000000..db051ec3e
--- /dev/null
+++ b/test/arrays/matrix_multiplication_for_custom_arrays.jl
@@ -0,0 +1,21 @@
+using GeometricMachineLearning, Test 
+
+@doc raw"""
+This function tests matrix multiplication for various custom arrays, i.e. if \((A,\alpha) \mapsto \alpha{}A\) is performed in the correct way. 
+"""
+function matrix_multiplication_tests_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    B = rand(T, n, N)
+
+    # SymmetricMatrix
+    A_sym = SymmetricMatrix(A)
+    @test A_sym * B ≈ Matrix{T}(A_sym) * B 
+    @test B' * A_sym ≈ B' * Matrix{T}(A_sym)
+
+    # SkewSymMatrix
+    A_skew = SkewSymMatrix(A)
+    @test A_skew * B ≈ Matrix{T}(A_skew) * B 
+    @test B' * A_skew ≈ B' * Matrix{T}(A_skew)
+end
+
+matrix_multiplication_tests_for_custom_arrays(5, 10, Float32)
\ No newline at end of file
diff --git a/test/arrays/scalar_multiplication_for_custom_arrays.jl b/test/arrays/scalar_multiplication_for_custom_arrays.jl
new file mode 100644
index 000000000..c6214a3e1
--- /dev/null
+++ b/test/arrays/scalar_multiplication_for_custom_arrays.jl
@@ -0,0 +1,40 @@
+using GeometricMachineLearning, Test 
+
+@doc raw"""
+This function tests scalar multiplication for various custom arrays, i.e. if \((A,\alpha) \mapsto \alpha{}A\) is performed in the correct way. 
+"""
+function scalar_multiplication_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    α = rand(T)
+
+    # SymmetricMatrix
+    Aα_sym = SymmetricMatrix(α * A)
+    Aα_sym2 = α * SymmetricMatrix(A)
+    @test Aα_sym ≈ Aα_sym2
+    @test typeof(Aα_sym) <: SymmetricMatrix{T}
+    @test typeof(Aα_sym2) <: SymmetricMatrix{T}
+
+    # SkewSymMatrix
+    Aα_skew = SkewSymMatrix(α * A)
+    Aα_skew2 = α * SkewSymMatrix(A)
+    @test Aα_skew ≈ Aα_skew2 
+    @test typeof(Aα_skew) <: SkewSymMatrix{T}
+    @test typeof(Aα_skew2) <: SkewSymMatrix{T}
+
+    C = rand(T, N, N)
+
+    # StiefelLieAlgHorMatrix
+    Cα_slahm = StiefelLieAlgHorMatrix(α * C, n)
+    Cα_slahm2 = α * StiefelLieAlgHorMatrix(C, n)
+    @test Cα_slahm ≈ Cα_slahm2
+    @test typeof(Cα_slahm) <: StiefelLieAlgHorMatrix{T}
+    @test typeof(Cα_slahm2) <: StiefelLieAlgHorMatrix{T}
+
+    Cα_glahm = GrassmannLieAlgHorMatrix(α * C, n)
+    Cα_glahm2 = α * GrassmannLieAlgHorMatrix(C, n)
+    @test Cα_glahm ≈ Cα_glahm2
+    @test tyepof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(Cα_glahm2) <: GrassmannLieAlgHorMatrix{T}
+end
+
+scalar_multiplication_for_custom_arrays(5, 10, Float32)
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index af35e6063..dda63eb01 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,7 +3,9 @@ using SafeTestsets
 
 @safetestset "Check parameterlength                                                           " begin include("parameterlength/check_parameterlengths.jl") end
 @safetestset "Arrays #1                                                                       " begin include("arrays/array_tests.jl") end
-@safetestset "Arrays #2                                                                       " begin include("arrays/array_tests_old.jl") end
+@safetestset "Addition tests for custom arrays                                                " begin include("arrays/addition_tests_for_custom_arrays.jl") end
+@safetestset "Scalar multiplication tests for custom arrays                                   " begin include("arrays/scalar_multiplication_for_custom_arrays.jl") end
+@safetestset "Matrix multiplication tests for custom arrays                                   " begin include("arrays/matrix_multiplication_for_custom_arrays.jl") end
 @safetestset "Manifolds (Grassmann):                                                          " begin include("manifolds/grassmann_manifold.jl") end
 @safetestset "Gradient Layer                                                                  " begin include("layers/gradient_layer_tests.jl") end
 @safetestset "Test symplecticity of upscaling layer                                           " begin include("layers/sympnet_layers_test.jl") end 

From a94c57c6a8350df4105faf1f838ea1252e4fe722 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 15:05:23 +0100
Subject: [PATCH 16/60] Changed order of how files are included. Symmetric now
 depends on SkewSymmetric.

---
 src/GeometricMachineLearning.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl
index fbb84488a..d8dccaae2 100644
--- a/src/GeometricMachineLearning.jl
+++ b/src/GeometricMachineLearning.jl
@@ -82,15 +82,15 @@ module GeometricMachineLearning
     export convert_to_dev, Device, CPUDevice
 
     # INCLUDE ARRAYS
+    include("arrays/skew_symmetric.jl")
+    include("arrays/symmetric.jl")
     include("arrays/block_identity_lower.jl")
     include("arrays/block_identity_upper.jl")
     include("arrays/symplectic.jl")
     include("arrays/symplectic_lie_algebra.jl")
     include("arrays/symplectic_lie_algebra_horizontal.jl")
-    include("arrays/skew_symmetric.jl")
     include("arrays/stiefel_lie_algebra_horizontal.jl")
     include("arrays/grassmann_lie_algebra_horizontal.jl")
-    include("arrays/symmetric.jl")
 
     export SymmetricMatrix, SymplecticPotential, SkewSymMatrix
     export StiefelLieAlgHorMatrix

From c6a350ed471420a4b4583c45978658d85b1f253e Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 15:05:50 +0100
Subject: [PATCH 17/60] Fixed typo.

---
 .../addition_tests_for_custom_arrays.jl       |  4 +-
 test/arrays/array_tests.jl                    | 43 -------------------
 ...scalar_multiplication_for_custom_arrays.jl |  2 +-
 3 files changed, 3 insertions(+), 46 deletions(-)

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
index 5fd2bbcfc..44fc878c1 100644
--- a/test/arrays/addition_tests_for_custom_arrays.jl
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -17,7 +17,7 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     # SkewSymMatrix
     AB_skew = SkewSymMatrix(A + B)
     AB_skew2 = SkewSymMatrix(A) + SkewSymMatrix(B)
-    @test A_skew ≈ A_skew2 
+    @test AB_skew ≈ AB_skew2 
     @test typeof(AB_skew) <: SkewSymMatrix{T}
     @test typeof(AB_skew2) <: SkewSymMatrix{T}
 
@@ -34,7 +34,7 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     CD_glahm = GrassmannLieAlgHorMatrix(C + D, n)
     CD_glahm2 = GrassmannLieAlgHorMatrix(C, n) + GrassmannLieAlgHorMatrix(D, n)
     @test CD_glahm ≈ CD_glahm2
-    @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
     @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T}
 end
 
diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index fd9269046..0e52c5cd6 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -61,21 +61,6 @@ function skew_mat_mul_test2(n, T=Float64)
     @test isapprox(AS1, AS2)
 end
 
-# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction
-function sympl_lie_alg_add_sub_test(n)
-    J = SymplecticPotential(n)
-    symplectisize(W) = .5*(W - J'*W'*J)
-    W₁ = rand(2*n,2*n)
-    S₁ = SymplecticLieAlgMatrix(W₁)
-    W₂ = rand(2*n,2*n)
-    S₂ = SymplecticLieAlgMatrix(W₂)
-    S₃ = S₁ + S₂
-    S₄ = S₁ - S₂
-    @test typeof(S₃) <: SymplecticLieAlgMatrix
-    @test typeof(S₄) <: SymplecticLieAlgMatrix
-    @test all(abs.(symplectisize(W₁ + W₂) .- S₃) .< 1e-10)
-    @test all(abs.(symplectisize(W₁ - W₂) .- S₄) .< 1e-10)
-end
 
 # test Stiefel manifold projection test 
 function stiefel_proj_test(N,n)
@@ -84,14 +69,6 @@ function stiefel_proj_test(N,n)
     @test all(abs.((E'*E) .- In) .< 1e-10)
 end
 
-# test symplectic projection (this is just the E matrix)
-function sympl_proj_test(N, n)
-    JN = SymplecticPotential(N)
-    Jn = SymplecticPotential(n)
-    E = SymplecticProjection(N, n, Float64)
-    @test all(abs.((E'*JN*E) .- Jn) .< 1e-10)
-end
-
 
 function stiefel_lie_alg_add_sub_test(N, n)
     E = StiefelProjection(N, n)
@@ -108,23 +85,6 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
-# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction
-function sympl_lie_alg_add_sub_test(N, n)
-    J = SymplecticPotential(n)
-    E = SymplecticProjection(N, n)
-    projection(W::SymplecticLieAlgMatrix) = W - (I - E*E')*W*(I - E*E')
-    W₁ = SymplecticLieAlgMatrix(rand(2*N,2*N))
-    S₁ = SymplecticLieAlgHorMatrix(W₁,n)
-    W₂ = SymplecticLieAlgMatrix(rand(2*N,2*N))
-    S₂ = SymplecticLieAlgHorMatrix(W₂,n)
-    S₃ = S₁ + S₂
-    S₄ = S₁ - S₂
-    @test typeof(S₃) <: SymplecticLieAlgHorMatrix
-    @test typeof(S₄) <: SymplecticLieAlgHorMatrix
-    @test all(abs.(projection(W₁ + W₂) .- S₃) .< 1e-10)
-    @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
-end
-
 function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
     A = rand(StiefelLieAlgHorMatrix{T}, N, n)
     @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)
@@ -149,10 +109,7 @@ for (N, n) ∈ zip(N_vec, n_vec)
     skew_mat_add_sub_test(N)
     skew_mat_mul_test(N)
     skew_mat_mul_test2(N)
-    sympl_lie_alg_add_sub_test(N)
     stiefel_proj_test(N,n)
-    sympl_proj_test(N,n)
     stiefel_lie_alg_add_sub_test(N,n)
-    sympl_lie_alg_add_sub_test(N,n)
     stiefel_lie_alg_vectorization_test(N, n)
 end
diff --git a/test/arrays/scalar_multiplication_for_custom_arrays.jl b/test/arrays/scalar_multiplication_for_custom_arrays.jl
index c6214a3e1..3a5e56205 100644
--- a/test/arrays/scalar_multiplication_for_custom_arrays.jl
+++ b/test/arrays/scalar_multiplication_for_custom_arrays.jl
@@ -33,7 +33,7 @@ function scalar_multiplication_for_custom_arrays(n::Int, N::Int, T::Type)
     Cα_glahm = GrassmannLieAlgHorMatrix(α * C, n)
     Cα_glahm2 = α * GrassmannLieAlgHorMatrix(C, n)
     @test Cα_glahm ≈ Cα_glahm2
-    @test tyepof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T}
     @test typeof(Cα_glahm2) <: GrassmannLieAlgHorMatrix{T}
 end
 

From 35b3db6aacb115ff14d7fbf9034eddcc1c204613 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 13:07:38 +0100
Subject: [PATCH 18/60] Renamed file to something more descriptive.

---
 .../{data_loader.jl => data_loader_optimization_step.jl}       | 3 +++
 1 file changed, 3 insertions(+)
 rename test/data_loader/{data_loader.jl => data_loader_optimization_step.jl} (84%)

diff --git a/test/data_loader/data_loader.jl b/test/data_loader/data_loader_optimization_step.jl
similarity index 84%
rename from test/data_loader/data_loader.jl
rename to test/data_loader/data_loader_optimization_step.jl
index 880212f5b..0ac67a648 100644
--- a/test/data_loader/data_loader.jl
+++ b/test/data_loader/data_loader_optimization_step.jl
@@ -1,5 +1,8 @@
 using GeometricMachineLearning, Test, Zygote
 
+@doc raw"""
+This tests the gradient optimizer called together with the `DataLoader` (applied to a tensor).
+"""
 function test_data_loader(sys_dim, n_time_steps, n_params, T=Float32)
     data = randn(T, sys_dim, n_time_steps, n_params)
     dl = DataLoader(data)

From 66f3279c3125b35e7995727215e44eb6a676c26d Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 13:16:21 +0100
Subject: [PATCH 19/60] Renamed file to something meaningful and made the test
 more readable.

---
 test/data_loader/batch.jl                     | 33 -----------------
 .../optimizer_functor_with_adam.jl            | 37 +++++++++++++++++++
 2 files changed, 37 insertions(+), 33 deletions(-)
 delete mode 100644 test/data_loader/batch.jl
 create mode 100644 test/data_loader/optimizer_functor_with_adam.jl

diff --git a/test/data_loader/batch.jl b/test/data_loader/batch.jl
deleted file mode 100644
index 4b7c17309..000000000
--- a/test/data_loader/batch.jl
+++ /dev/null
@@ -1,33 +0,0 @@
-using AbstractNeuralNetworks: AbstractExplicitLayer, Chain
-using GeometricMachineLearning, Test
-
-"""
-This creates a dummy MNIST data set.
-
-TODO: include tests to check if all elements are batched!
-"""
-function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10)
-    rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images)))
-end
-
-dl = DataLoader(create_dummy_mnist()...; patch_length=3)
-# batch size is equal to two
-batch = Batch(2)
-
-# this function should be made part of AbstractNeuralNetworks !!!
-function Chain(c::Chain, d::AbstractExplicitLayer) 
-    Chain(c.layers..., d)
-end
-
-# input dim is 3^2 = 9
-model = Chain(Transformer(dl.input_dim, 3, 2; Stiefel=true), Classification(dl.input_dim, 10, σ))
-ps = initialparameters(CPU(), Float32, model)
-
-loss₁ = GeometricMachineLearning.loss(model, ps, dl)
-
-opt = Optimizer(AdamOptimizer(), ps)
-loss_average = optimize_for_one_epoch!(opt, model, ps, dl, batch)
-
-loss₃ = GeometricMachineLearning.loss(model, ps, dl)
-
-@test loss₁ > loss_average > loss₃
\ No newline at end of file
diff --git a/test/data_loader/optimizer_functor_with_adam.jl b/test/data_loader/optimizer_functor_with_adam.jl
new file mode 100644
index 000000000..2caf538b2
--- /dev/null
+++ b/test/data_loader/optimizer_functor_with_adam.jl
@@ -0,0 +1,37 @@
+using AbstractNeuralNetworks: AbstractExplicitLayer, Chain
+using GeometricMachineLearning, Test
+
+# this function should be made part of AbstractNeuralNetworks !!!
+function Chain(c::Chain, d::AbstractExplicitLayer) 
+    Chain(c.layers..., d)
+end
+
+"""
+This creates a dummy MNIST data set; i.e. its output are two tensors that look similarly to the ones in the MNIST data set.
+"""
+function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10)
+    rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images)))
+end
+
+function test_optimizer_functor_with_adam(;T=Float32, dim₁=6, dim₂=6, n_images=10, patch_length=patch_length)
+    dl = DataLoader(create_dummy_mnist(T=T, dim₁=dim₁, dim₂=dim₂, n_images=n_images)...; patch_length=3)
+    
+    # batch size is equal to two
+    batch = Batch(2)
+
+    # input dim is dim₁ / patch_length * dim₂ / pach_length; the transformer is called with dim₁ / patch_length and two layers
+    model = Chain(Transformer(dl.input_dim, dim₁ / patch_length, 2; Stiefel=true), Classification(dl.input_dim, 10, σ))
+    ps = initialparameters(CPU(), Float32, model)
+
+    loss₁ = GeometricMachineLearning.loss(model, ps, dl)
+
+    opt = Optimizer(AdamOptimizer(), ps)
+    loss_average = optimize_for_one_epoch!(opt, model, ps, dl, batch)
+
+    loss₃ = GeometricMachineLearning.loss(model, ps, dl)
+
+    #check if the loss decreases during optimization
+    @test loss₁ > loss_average > loss₃
+end
+
+test_optimizer_functor_with_adam()
\ No newline at end of file

From 257aeeea0f46e1bcace46852c4bfd98dcd550a26 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:24:55 +0100
Subject: [PATCH 20/60] Added descriptions and a wrapper for using the custom
 loss function with NeuralNetwork.

---
 test/data_loader/mnist_utils.jl | 61 ++++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 23 deletions(-)

diff --git a/test/data_loader/mnist_utils.jl b/test/data_loader/mnist_utils.jl
index fe685c647..f6c637a58 100644
--- a/test/data_loader/mnist_utils.jl
+++ b/test/data_loader/mnist_utils.jl
@@ -7,9 +7,8 @@ using GeometricMachineLearning: index_conversion
 import Zygote
 
 """
-This function tests if all the patch nubmers are assigned correctly, i.e. tests patch_index.
+This function tests is used to test if all the patch nubmers are assigned correctly with `index_conversion`, i.e. tests `patch_index` by inverting it.
 """
-### Test if mapping is invertible
 function reverse_index(i::Integer, j::Integer, patch_length=7)
     opt_i = i%patch_length==0 ? 1 : 0
     within_patch_index = i%patch_length + opt_i*patch_length, (i÷patch_length - opt_i + 1)
@@ -20,41 +19,57 @@ function reverse_index(i::Integer, j::Integer, patch_length=7)
     (patch_index[1]-1)*patch_length + within_patch_index[1], (patch_index[2]-1)*patch_length + within_patch_index[2]
 end 
 
-# test if this is the inverse of the other batch index conversion!
-patch_lengths = (2, 4, 7, 14)
-for patch_length in patch_lengths
-    number_of_patches = (28÷patch_length)^2
-      for i in 1:28 
-        for j in 1:28
-            @test reverse_index(index_conversion(i, j, patch_length, number_of_patches)..., patch_length) == (i, j)
+"""
+This function uses `reverse_index` to test `index_conversion`, i.e. checks if the functions are invertible. 
+"""
+function test_index_conversion(patch_lengths=(2, 4, 7, 14)) 
+    for patch_length in patch_lengths
+        number_of_patches = (28÷patch_length)^2
+        for i in 1:28 
+            for j in 1:28
+                @test reverse_index(index_conversion(i, j, patch_length, number_of_patches)..., patch_length) == (i, j)
+            end
         end
     end
 end
 
+"""
+This function tests if `onehotbatch` does what it should; i.e. convert a vector of integers to a one-hot-tensor.
+"""
 function test_onehotbatch(V::AbstractVector{T}) where {T<:Integer} 
     V_encoded = onehotbatch(V)
-    for i in length(V)
+    for (i, v) in zip(length(V), V)
         @test sum(V_encoded[:,1,i]) == 1
+        @test V_encoded[v, 1, i] == 1
     end
 end
 
 test_onehotbatch([1, 2, 5, 0])
-####### MNIST-like data set 
 
-train_x = rand(Float32, 28,28,100)
-train_y = Int.(ceil.(10*rand(Float32, 100))) .- 1
+@doc raw"""
+Generates an MNIST-like dummy data set.
+"""
+function generate_dummy_mnist(dim₁=28, dim₂=18, number_images=100, T=Float32)
+    train_x = rand(T, dim₁, dim₂, number_images)
+    train_y = Int.(ceil.(10 * rand(T, number_images))) .- 1
+    train_x, train_y
+end
 
+function test_optimizer_for_classification_layer(; dim₁=28, dim₂=28, number_images=100, patch_length=7, T=Float32)
+    dl = DataLoader(generate_dummy_mnist(dim₁, dim₂, number_images, T); patch_length=patch_length)
 
-dl = DataLoader(train_x, train_y)
+    activation_function(x) = tanh.(x)
+    model = Classification((dim₁ ÷ patch_length) * (dim₂ ÷ patch_length), 10, activation_function)
+    ps = initialparameters(CPU(), T, model)   
+    loss₁ = GeometricMachineLearning.loss(model, ps, dl)
 
-activation_function(x) = tanh.(x)
-model = Classification(49, 10, activation_function)
-ps = initialparameters(CPU(), Float32, model)
-loss₁ = GeometricMachineLearning.loss(model, ps, dl)
+    opt = Optimizer(GradientOptimizer(), ps)
+    dx = Zygote.gradient(ps -> GeometricMachineLearning.loss(model, ps, dl), ps)[1]
+    optimization_step!(opt, model, ps, dx)
+    loss₂ = GeometricMachineLearning.loss(model, ps, dl)
 
-opt = Optimizer(GradientOptimizer(), ps)
-dx = Zygote.gradient(ps -> GeometricMachineLearning.loss(model, ps, dl), ps)[1]
-optimization_step!(opt, model, ps, dx)
-loss₂ = GeometricMachineLearning.loss(model, ps, dl)
+    @test loss₂ < loss₁
+end
 
-@test loss₂ < loss₁
+test_index_conversion()
+test_optimizer_for_classification_layer()
\ No newline at end of file

From 9ca60ed7280a213ebff112f2be18b229d0eccc20 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:25:26 +0100
Subject: [PATCH 21/60] Added descriptions and a wrapper for using the custom
 loss function with NeuralNetwork.

---
 src/data_loader/data_loader.jl | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl
index ae6a45089..24fb90a21 100644
--- a/src/data_loader/data_loader.jl
+++ b/src/data_loader/data_loader.jl
@@ -93,25 +93,35 @@ It takes as input:
 """
 function loss(model::Union{Chain, AbstractExplicitLayer}, ps::Union{Tuple, NamedTuple}, input::AT, output::BT) where {T, T1, AT<:AbstractArray{T, 3}, BT<:AbstractArray{T1, 3}}
     output_estimate = model(input, ps)
-    norm(output - output_estimate)/norm(output) # /T(sqrt(size(output, 2)*size(output, 3)))
+    norm(output - output_estimate) / norm(output) # /T(sqrt(size(output, 2)*size(output, 3)))
 end
 
+@doc raw"""
+The *autoencoder loss*. 
+"""
 function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 3}} 
     output_estimate = model(input, ps)
-    norm(output_estimate - input)/norm(input) # /T(sqrt(size(input, 2)*size(input, 3)))
+    norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)*size(input, 3)))
 end
 
 function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 2}} 
     output_estimate = model(input, ps)
-    norm(output_estimate - input)/norm(input) # /T(sqrt(size(input, 2)))
+    norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)))
 end
 
 nt_diff(A, B) = (q = A.q - B.q, p = A.p - B.p)
 nt_norm(A) = norm(A.q) + norm(A.p)
 
+@doc raw"""
+Loss function that takes a `NamedTuple` as input. This should be used with a SympNet (or other neural network-based integrator). It computes:
+
+```math
+\mathtt{loss}(\mathcal{NN}, \mathtt{ps}, \begin{pmatrix} q \\ p \end{pmatrix}, \begin{pmatrix} q' \\ p' \end{pmatrix}) \mapsto \left|| \mathcal{NN}(\begin{pmatrix} q \\ p \end{pmatrix}) -  \begin{pmatrix} q' \\ p' \end{pmatrix} \right|| / \left|| \begin{pmatrix} q \\ p \end{pmatrix} \right||
+```
+"""
 function loss(model::Chain, ps::Tuple, input::NamedTuple, output::NamedTuple) 
     output_estimate = model(input, ps)
-    nt_norm(nt_diff(output_estimate, output))/nt_norm(input)
+    nt_norm(nt_diff(output_estimate, output)) / nt_norm(input)
 end
 
 @doc raw"""
@@ -136,6 +146,13 @@ function loss(model::Chain, ps::Tuple, dl::DataLoader{T, BT}) where {T, BT<:Name
     loss(model, ps, dl.data)
 end
 
+@doc raw"""
+Wrapper if we deal with a neural network.
+"""
+function loss(nn::NeuralNetwork, dl::DataLoader)
+    loss(nn.model, nn.params, dl)
+end
+
 @doc raw"""
 Computes the accuracy (as opposed to the loss) of a neural network classifier. 
 

From fced8d80298e34b304284cb00af470487d980f5c Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:26:04 +0100
Subject: [PATCH 22/60] Added documentation.

---
 src/data_loader/batch.jl | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl
index bbb9e4e39..9a76d0b26 100644
--- a/src/data_loader/batch.jl
+++ b/src/data_loader/batch.jl
@@ -15,6 +15,9 @@ end
 hasseqlength(::Batch{<:Integer}) = true
 hasseqlength(::Batch{<:Nothing}) = false
 
+@doc raw"""
+The functor for batch is called with an instance on `DataLoader`. It then returns a tuple of batch indices: ``(\mathcal{I}_1, \ldots, \mathcal{I}_{\lceil\mathtt{dl.n\_params/batch\_size}\rceil})``, where the index runs from 1 to the number of batches, which is the number of parameters divided by the batch size (rounded up).
+"""
 function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, AT<:AbstractArray{T, 3}}
     indices = shuffle(1:dl.n_params)
     n_batches = Int(ceil(dl.n_params/batch.batch_size))
@@ -28,6 +31,9 @@ function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, AT<:Abstract
     batches
 end
 
+@doc raw"""
+The functor for batch is called with an instance on `DataLoader`. It then returns a tuple of batch indices: ``(\mathcal{I}_1, \ldots, \mathcal{I}_{\lceil\mathtt{(dl.input\_time\_steps-1)/batch\_size}\rceil})``, where the index runs from 1 to the number of batches, which is the number of input time steps (minus one) divided by the batch size (and rounded up).
+"""
 function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, BT<:AbstractMatrix{T}, AT<:Union{BT, NamedTuple{(:q, :p), Tuple{BT, BT}}}}
     indices = shuffle(1:dl.input_time_steps)
     n_batches = Int(ceil((dl.input_time_steps-1)/batch.batch_size))
@@ -88,7 +94,7 @@ function optimize_for_one_epoch!(opt::Optimizer, nn::NeuralNetwork, dl::DataLoad
 end
 
 """
-TODO: Add ProgressMeter!!!
+This routine is called if a `DataLoader` storing *symplectic data* (i.e. a `NamedTuple`) is supplied.
 """
 function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTuple}, dl::DataLoader{T, AT}, batch::Batch, loss) where {T, AT<:NamedTuple}
     count = 0 
@@ -107,7 +113,16 @@ function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTu
     total_error/count
 end
 
-
+@doc raw"""
+A functor for `Optimizer`. It is called with:
+    - `nn::NeuralNetwork`
+    - `dl::DataLoader`
+    - `batch::Batch`
+    - `n_epochs::Int`
+    - `loss`
+
+The last argument is a function through which `Zygote` differentiates. This argument is optional; if it is not supplied `GeometricMachineLearning` defaults to an appropriate loss for the `DataLoader`.
+"""
 function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int, loss)
     progress_object = ProgressMeter.Progress(n_epochs; enabled=true)
     loss_array = zeros(n_epochs)

From d95268daec76b606953b0a2fd1dddbcb00bf9f22 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:53:14 +0100
Subject: [PATCH 23/60] Added constructor for optimizer if input arguments are
 flipped.

---
 src/optimizers/optimizer.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/optimizers/optimizer.jl b/src/optimizers/optimizer.jl
index 7cd78d037..e6e25cdaa 100644
--- a/src/optimizers/optimizer.jl
+++ b/src/optimizers/optimizer.jl
@@ -22,6 +22,8 @@ function Optimizer(m::OptimizerMethod, nn::NeuralNetwork)
     Optimizer(m, nn.params)
 end
 
+Optimizer(nn::NeuralNetwork, m::OptimizerMethod) = Optimizer(m, nn)
+
 #######################################################################################
 # optimization step function
 

From 2abed8793ec2e604911eadcae999c31bf73c593a Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:09:44 +0100
Subject: [PATCH 24/60] Added a comment saying that the constructor can be
 called with DataLoader as input.

---
 src/architectures/sympnet.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/architectures/sympnet.jl b/src/architectures/sympnet.jl
index 8e017f125..b07d5d84c 100644
--- a/src/architectures/sympnet.jl
+++ b/src/architectures/sympnet.jl
@@ -7,7 +7,7 @@ TODO:
 abstract type SympNet{AT} <: Architecture end
 
 @doc raw"""
-`LASympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: 
+`LASympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: 
 - `depth::Int`: The number of linear layers that are applied. The default is 5.
 - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2.
 - `activation`: The activation function that is applied. By default this is `tanh`.
@@ -32,7 +32,7 @@ end
 @inline AbstractNeuralNetworks.dim(arch::SympNet) = arch.dim
 
 @doc raw"""
-`GSympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: 
+`GSympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: 
 - `upscaling_dimension::Int`: The *upscaling dimension* of the gradient layer. See the documentation for `GradientLayerQ` and `GradientLayerP` for further explanation. The default is `2*dim`.
 - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2.
 - `activation`: The activation function that is applied. By default this is `tanh`.

From 0f62b8f2e96949b442b1ad2b009818ffe4b9d520 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:10:14 +0100
Subject: [PATCH 25/60] Added default for number of epochs.

---
 src/data_loader/batch.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl
index 9a76d0b26..10b434ffd 100644
--- a/src/data_loader/batch.jl
+++ b/src/data_loader/batch.jl
@@ -133,6 +133,6 @@ function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epoch
     loss_array
 end
 
-function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int)
+function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int=1)
     o(nn, dl, batch, n_epochs, loss)
 end
\ No newline at end of file

From 3ab42f2c9874e83edaa54ce3c809e7d2332be8f4 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:11:05 +0100
Subject: [PATCH 26/60] Combined matrix and tensor routines into one. Added
 another loss for autoencoder qp data.

---
 src/data_loader/data_loader.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl
index 24fb90a21..2beb691b9 100644
--- a/src/data_loader/data_loader.jl
+++ b/src/data_loader/data_loader.jl
@@ -99,19 +99,19 @@ end
 @doc raw"""
 The *autoencoder loss*. 
 """
-function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 3}} 
+function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T}} 
     output_estimate = model(input, ps)
     norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)*size(input, 3)))
 end
 
-function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 2}} 
-    output_estimate = model(input, ps)
-    norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)))
-end
-
 nt_diff(A, B) = (q = A.q - B.q, p = A.p - B.p)
 nt_norm(A) = norm(A.q) + norm(A.p)
 
+function loss(model::Chain, ps::Tuple, input::NT) where {T, AT<:AbstractArray{T}, NT<:NamedTuple{(:q, :p,), Tuple{AT, AT}}}
+    output_estimate = model(input, ps)
+    nt_norm(output_estimate - input) / nt_norm(input)
+end
+
 @doc raw"""
 Loss function that takes a `NamedTuple` as input. This should be used with a SympNet (or other neural network-based integrator). It computes:
 

From 5d22855e9a98e50625fcc9f66e426fe9fd0ed8d3 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:12:14 +0100
Subject: [PATCH 27/60] Commented out a section that is probably not needed.

---
 src/data_loader/tensor_assign.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/data_loader/tensor_assign.jl b/src/data_loader/tensor_assign.jl
index 3faad0a99..4aac9bfc0 100644
--- a/src/data_loader/tensor_assign.jl
+++ b/src/data_loader/tensor_assign.jl
@@ -55,6 +55,7 @@ function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_win
     output_estimate
 end
 
+#=
 """
 This function draws random time steps and parameters and based on these assign the batch and the output.
 
@@ -101,6 +102,7 @@ function draw_batch!(batch::AT, output::BT, data::AT, target::BT) where {T, T2,
     assign_batch!(batch, data, params, time_steps, ndrange=size(batch))
     assign_batch!(output, target, params, time_steps, ndrange=size(output))
 end
+=#
 
 """
 Used for differentiating assign_output_estimate (this appears in the loss). 

From 86485adb054c3c90df2c5ed725e974d44843e4f2 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:12:41 +0100
Subject: [PATCH 28/60] Test data loader for qp data.

---
 test/data_loader/batch_data_loader_qp_test.jl | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 test/data_loader/batch_data_loader_qp_test.jl

diff --git a/test/data_loader/batch_data_loader_qp_test.jl b/test/data_loader/batch_data_loader_qp_test.jl
new file mode 100644
index 000000000..9e1273e49
--- /dev/null
+++ b/test/data_loader/batch_data_loader_qp_test.jl
@@ -0,0 +1,33 @@
+using GeometricMachineLearning
+using Test 
+
+function dummy_qp_data_matrix(dim=2, number_data_points=200, T=Float32)
+    (q = rand(T, dim, number_data_points), p = (rand(T, dim, number_data_points)))
+end
+
+function dummy_qp_data_tensor(dim=2, number_of_time_steps=100, number_of_parameters=20, T=Float32)
+    (q = rand(T, dim, number_of_time_steps, number_of_parameters), p = (rand(T, dim, number_of_time_steps, number_of_parameters)))
+end
+
+function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size, T=Float32)
+    dl1 = DataLoader(dummy_qp_data_matrix(dim, number_of_time_steps, T))
+    dl2 = DataLoader(dummy_qp_data_tensor(dim, number_of_time_steps, number_of_parameters))
+
+    arch1 = GSympNet(dl1)
+    arch2 = GSympNet(dl2)
+
+    nn1 = NeuralNetwork(arch1, CPU(), T)
+    nn2 = NeuralNetwork(arch2, CPU(), T)
+
+    loss1 = loss(nn1, dl1)
+    loss2 = loss(nn2, dl2)
+
+    batch = Batch(batch_size)
+    o₁ = Optimizer(GradientOptimizer(), nn1)
+    o₂ = Optimizer(GradientOptimizer(), nn2)
+
+    o₁(nn1, dl1, batch)
+    o₂(nn2, dl2, batch)
+end
+
+test_data_loader()
\ No newline at end of file

From cac1f185da9ce28e6e3e63f0e9d9d68fae33439c Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:13:08 +0100
Subject: [PATCH 29/60] Renamed and added tests.

---
 test/runtests.jl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index af35e6063..ae9307766 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -34,6 +34,8 @@ using SafeTestsets
 @safetestset "Training                                                                        " begin include("train!/test_training.jl") end
 @safetestset "NeuralNetSolution                                                               " begin include("train!/test_neuralnet_solution.jl") end
 @safetestset "Problem & Integrators                                                           " begin include("integrator/test_integrator.jl") end
-@safetestset "Data Loader #1                                                                  " begin include("data_loader/data_loader.jl") end
-@safetestset "Data Loader #2                                                                  " begin include("data_loader/mnist_utils.jl") end
-@safetestset "Data Loader #3 (Batch struct)                                                   " begin include("data_loader/batch.jl") end
\ No newline at end of file
+
+@safetestset "Test data loader for q and p data                                               " begin include("data_loader/batch_data_loader_qp_test.jl") end
+@safetestset "Test mnist_utils.                                                               " begin include("data_loader/mnist_utils.jl") end
+@safetestset "Test the data loader in combination with optimization_step!                     " begin include("data_loader/data_loader_optimization_step.jl") end
+@safetestset "Optimizer functor with data loader for Adam                                     " begin include("data_loader/optimizer_functor_with_adam.jl")
\ No newline at end of file

From 4528053bc135b3f495ac732804cd913e4187cc96 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:14:24 +0100
Subject: [PATCH 30/60] Adjusted symplectic matrix.

---
 src/arrays/symplectic.jl | 80 ++++++++++++----------------------------
 1 file changed, 24 insertions(+), 56 deletions(-)

diff --git a/src/arrays/symplectic.jl b/src/arrays/symplectic.jl
index 1dcc59658..3a4f138d6 100644
--- a/src/arrays/symplectic.jl
+++ b/src/arrays/symplectic.jl
@@ -1,75 +1,43 @@
 
 @doc raw"""
 
-    `SymplecticMatrix(n)`
+`SymplecticPotential(n)`
 
 Returns a symplectic matrix of size 2n x 2n
 
 ```math
 \begin{pmatrix}
-0 & & & 1 & & & \\
-& \ddots & & & \ddots & & \\
-& & 0 & & & 1 \\
--1 & & & 0 & & & \\
-& \ddots & & & \ddots & & \\
-& & -1 & & 0 & \\
+\mathbb{O} & \mathbb{I} \\
+\mathbb{O} & -\mathbb{I} \\
 \end{pmatrix}
 ```
-
-    `SymplecticProjection(N,n)`
-Returns the symplectic projection matrix E of the Stiefel manifold, i.e. π: Sp(2N) → Sp(2n,2N), A ↦ AE
-
 """
-#=
-function SymplecticMatrix(n::Int, T::DataType=Float64)
-    BandedMatrix((n => ones(T,n), -n => -ones(T,n)), (2n,2n))
-end
-
-SymplecticMatrix(T::DataType, n::Int) = SymplecticMatrix(n, T)
-
-@doc raw"""
-```math
-\begin{pmatrix}
-I & 0 \\
-0 & 0 \\
-0 & I \\
-0 & 0 \\
-\end{pmatrix}
-```
-"""
-=#
-
-function SymplecticPotential(n::Int, T::DataType=Float64)
-    J = zeros(T, 2*n, 2*n)
-    J[1:n, (n+1):2*n] = one(ones(T, n, n))
-    J[(n+1):2*n, 1:n] = -one(ones(T, n, n))
+function SymplecticPotential(backend, n2::Int, T::DataType=Float64)
+    @assert iseven(n2)
+    n = n2÷2
+    J = KernelAbstractions.zeros(backend, T, 2*n, 2*n)
+    assign_ones_for_symplectic_potential! = assign_ones_for_symplectic_potential_kernel!(backend)
+    assign_ones_for_symplectic_potential!(J, n, ndrange=n)
     J
 end
 
+SymplecticPotential(n::Int, T::DataType=Float64) = SymplecticPotential(CPU(), n, T)
+SymplecticPotential(bakend, T::DataType, n::Int) = SymplecticPotential(backend, n, T)
+
 SymplecticPotential(T::DataType, n::Int) = SymplecticPotential(n, T)
 
-struct SymplecticProjection{T} <: AbstractMatrix{T}
-    N::Int
-    n::Int
-    SymplecticProjection(N, n, T = Float64) = new{T}(N,n)
+@kernel function assign_ones_for_symplectic_potential_kernel!(J::AbstractMatrix{T}, n::Int) where T
+    i = @index(Global)
+    J[map_index_for_symplectic_potential(i, n)...] = i ≤ n ? one(T) : -one(T)
 end
 
-function Base.getindex(E::SymplecticProjection,i,j)
-    if i ≤ E.n
-        if j == i 
-            return 1.
-        end
-        return 0.
-    end
-    if j > E.n
-        if (j-E.n) == (i-E.N)
-            return 1.
-        end
-        return 0.
+"""
+This assigns the right index for the symplectic potential. To be used with `assign_ones_for_symplectic_potential_kernel!`.
+"""
+function map_index_for_symplectic_potential(i::Int, n::Int)
+    if i ≤ n
+        return (i, i+n)
+    else
+        return (i, i-n)
     end
-    return 0.
-end
-
-
-Base.parent(E::SymplecticProjection) = (E.N,E.n)
-Base.size(E::SymplecticProjection) = (2*E.N,2*E.n)
+end
\ No newline at end of file

From 6ed5c6259c3d91c3e833d31dc54f16766fff618c Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 13:07:38 +0100
Subject: [PATCH 31/60] Renamed file to something more descriptive.

---
 .../{data_loader.jl => data_loader_optimization_step.jl}       | 3 +++
 1 file changed, 3 insertions(+)
 rename test/data_loader/{data_loader.jl => data_loader_optimization_step.jl} (84%)

diff --git a/test/data_loader/data_loader.jl b/test/data_loader/data_loader_optimization_step.jl
similarity index 84%
rename from test/data_loader/data_loader.jl
rename to test/data_loader/data_loader_optimization_step.jl
index 880212f5b..0ac67a648 100644
--- a/test/data_loader/data_loader.jl
+++ b/test/data_loader/data_loader_optimization_step.jl
@@ -1,5 +1,8 @@
 using GeometricMachineLearning, Test, Zygote
 
+@doc raw"""
+This tests the gradient optimizer called together with the `DataLoader` (applied to a tensor).
+"""
 function test_data_loader(sys_dim, n_time_steps, n_params, T=Float32)
     data = randn(T, sys_dim, n_time_steps, n_params)
     dl = DataLoader(data)

From f4ddd3f6f3b8e1f98918146f6211ef0cc9d1afa8 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 13:16:21 +0100
Subject: [PATCH 32/60] Renamed file to something meaningful and made the test
 more readable.

---
 test/data_loader/batch.jl                     | 33 -----------------
 .../optimizer_functor_with_adam.jl            | 37 +++++++++++++++++++
 2 files changed, 37 insertions(+), 33 deletions(-)
 delete mode 100644 test/data_loader/batch.jl
 create mode 100644 test/data_loader/optimizer_functor_with_adam.jl

diff --git a/test/data_loader/batch.jl b/test/data_loader/batch.jl
deleted file mode 100644
index 4b7c17309..000000000
--- a/test/data_loader/batch.jl
+++ /dev/null
@@ -1,33 +0,0 @@
-using AbstractNeuralNetworks: AbstractExplicitLayer, Chain
-using GeometricMachineLearning, Test
-
-"""
-This creates a dummy MNIST data set.
-
-TODO: include tests to check if all elements are batched!
-"""
-function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10)
-    rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images)))
-end
-
-dl = DataLoader(create_dummy_mnist()...; patch_length=3)
-# batch size is equal to two
-batch = Batch(2)
-
-# this function should be made part of AbstractNeuralNetworks !!!
-function Chain(c::Chain, d::AbstractExplicitLayer) 
-    Chain(c.layers..., d)
-end
-
-# input dim is 3^2 = 9
-model = Chain(Transformer(dl.input_dim, 3, 2; Stiefel=true), Classification(dl.input_dim, 10, σ))
-ps = initialparameters(CPU(), Float32, model)
-
-loss₁ = GeometricMachineLearning.loss(model, ps, dl)
-
-opt = Optimizer(AdamOptimizer(), ps)
-loss_average = optimize_for_one_epoch!(opt, model, ps, dl, batch)
-
-loss₃ = GeometricMachineLearning.loss(model, ps, dl)
-
-@test loss₁ > loss_average > loss₃
\ No newline at end of file
diff --git a/test/data_loader/optimizer_functor_with_adam.jl b/test/data_loader/optimizer_functor_with_adam.jl
new file mode 100644
index 000000000..2caf538b2
--- /dev/null
+++ b/test/data_loader/optimizer_functor_with_adam.jl
@@ -0,0 +1,37 @@
+using AbstractNeuralNetworks: AbstractExplicitLayer, Chain
+using GeometricMachineLearning, Test
+
+# this function should be made part of AbstractNeuralNetworks !!!
+function Chain(c::Chain, d::AbstractExplicitLayer) 
+    Chain(c.layers..., d)
+end
+
+"""
+This creates a dummy MNIST data set; i.e. its output are two tensors that look similarly to the ones in the MNIST data set.
+"""
+function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10)
+    rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images)))
+end
+
+function test_optimizer_functor_with_adam(;T=Float32, dim₁=6, dim₂=6, n_images=10, patch_length=patch_length)
+    dl = DataLoader(create_dummy_mnist(T=T, dim₁=dim₁, dim₂=dim₂, n_images=n_images)...; patch_length=3)
+    
+    # batch size is equal to two
+    batch = Batch(2)
+
+    # input dim is dim₁ / patch_length * dim₂ / pach_length; the transformer is called with dim₁ / patch_length and two layers
+    model = Chain(Transformer(dl.input_dim, dim₁ / patch_length, 2; Stiefel=true), Classification(dl.input_dim, 10, σ))
+    ps = initialparameters(CPU(), Float32, model)
+
+    loss₁ = GeometricMachineLearning.loss(model, ps, dl)
+
+    opt = Optimizer(AdamOptimizer(), ps)
+    loss_average = optimize_for_one_epoch!(opt, model, ps, dl, batch)
+
+    loss₃ = GeometricMachineLearning.loss(model, ps, dl)
+
+    #check if the loss decreases during optimization
+    @test loss₁ > loss_average > loss₃
+end
+
+test_optimizer_functor_with_adam()
\ No newline at end of file

From 92ac2fe8415f49ed72c2e2f86d1423af321f234a Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:24:55 +0100
Subject: [PATCH 33/60] Added descriptions and a wrapper for using the custom
 loss function with NeuralNetwork.

---
 test/data_loader/mnist_utils.jl | 61 ++++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 23 deletions(-)

diff --git a/test/data_loader/mnist_utils.jl b/test/data_loader/mnist_utils.jl
index fe685c647..f6c637a58 100644
--- a/test/data_loader/mnist_utils.jl
+++ b/test/data_loader/mnist_utils.jl
@@ -7,9 +7,8 @@ using GeometricMachineLearning: index_conversion
 import Zygote
 
 """
-This function tests if all the patch nubmers are assigned correctly, i.e. tests patch_index.
+This function tests is used to test if all the patch nubmers are assigned correctly with `index_conversion`, i.e. tests `patch_index` by inverting it.
 """
-### Test if mapping is invertible
 function reverse_index(i::Integer, j::Integer, patch_length=7)
     opt_i = i%patch_length==0 ? 1 : 0
     within_patch_index = i%patch_length + opt_i*patch_length, (i÷patch_length - opt_i + 1)
@@ -20,41 +19,57 @@ function reverse_index(i::Integer, j::Integer, patch_length=7)
     (patch_index[1]-1)*patch_length + within_patch_index[1], (patch_index[2]-1)*patch_length + within_patch_index[2]
 end 
 
-# test if this is the inverse of the other batch index conversion!
-patch_lengths = (2, 4, 7, 14)
-for patch_length in patch_lengths
-    number_of_patches = (28÷patch_length)^2
-      for i in 1:28 
-        for j in 1:28
-            @test reverse_index(index_conversion(i, j, patch_length, number_of_patches)..., patch_length) == (i, j)
+"""
+This function uses `reverse_index` to test `index_conversion`, i.e. checks if the functions are invertible. 
+"""
+function test_index_conversion(patch_lengths=(2, 4, 7, 14)) 
+    for patch_length in patch_lengths
+        number_of_patches = (28÷patch_length)^2
+        for i in 1:28 
+            for j in 1:28
+                @test reverse_index(index_conversion(i, j, patch_length, number_of_patches)..., patch_length) == (i, j)
+            end
         end
     end
 end
 
+"""
+This function tests if `onehotbatch` does what it should; i.e. convert a vector of integers to a one-hot-tensor.
+"""
 function test_onehotbatch(V::AbstractVector{T}) where {T<:Integer} 
     V_encoded = onehotbatch(V)
-    for i in length(V)
+    for (i, v) in zip(length(V), V)
         @test sum(V_encoded[:,1,i]) == 1
+        @test V_encoded[v, 1, i] == 1
     end
 end
 
 test_onehotbatch([1, 2, 5, 0])
-####### MNIST-like data set 
 
-train_x = rand(Float32, 28,28,100)
-train_y = Int.(ceil.(10*rand(Float32, 100))) .- 1
+@doc raw"""
+Generates an MNIST-like dummy data set.
+"""
+function generate_dummy_mnist(dim₁=28, dim₂=18, number_images=100, T=Float32)
+    train_x = rand(T, dim₁, dim₂, number_images)
+    train_y = Int.(ceil.(10 * rand(T, number_images))) .- 1
+    train_x, train_y
+end
 
+function test_optimizer_for_classification_layer(; dim₁=28, dim₂=28, number_images=100, patch_length=7, T=Float32)
+    dl = DataLoader(generate_dummy_mnist(dim₁, dim₂, number_images, T); patch_length=patch_length)
 
-dl = DataLoader(train_x, train_y)
+    activation_function(x) = tanh.(x)
+    model = Classification((dim₁ ÷ patch_length) * (dim₂ ÷ patch_length), 10, activation_function)
+    ps = initialparameters(CPU(), T, model)   
+    loss₁ = GeometricMachineLearning.loss(model, ps, dl)
 
-activation_function(x) = tanh.(x)
-model = Classification(49, 10, activation_function)
-ps = initialparameters(CPU(), Float32, model)
-loss₁ = GeometricMachineLearning.loss(model, ps, dl)
+    opt = Optimizer(GradientOptimizer(), ps)
+    dx = Zygote.gradient(ps -> GeometricMachineLearning.loss(model, ps, dl), ps)[1]
+    optimization_step!(opt, model, ps, dx)
+    loss₂ = GeometricMachineLearning.loss(model, ps, dl)
 
-opt = Optimizer(GradientOptimizer(), ps)
-dx = Zygote.gradient(ps -> GeometricMachineLearning.loss(model, ps, dl), ps)[1]
-optimization_step!(opt, model, ps, dx)
-loss₂ = GeometricMachineLearning.loss(model, ps, dl)
+    @test loss₂ < loss₁
+end
 
-@test loss₂ < loss₁
+test_index_conversion()
+test_optimizer_for_classification_layer()
\ No newline at end of file

From cb19f5bb8f50952d720e058b8c9d2c652b0c82bd Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:25:26 +0100
Subject: [PATCH 34/60] Added descriptions and a wrapper for using the custom
 loss function with NeuralNetwork.

---
 src/data_loader/data_loader.jl | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl
index ae6a45089..24fb90a21 100644
--- a/src/data_loader/data_loader.jl
+++ b/src/data_loader/data_loader.jl
@@ -93,25 +93,35 @@ It takes as input:
 """
 function loss(model::Union{Chain, AbstractExplicitLayer}, ps::Union{Tuple, NamedTuple}, input::AT, output::BT) where {T, T1, AT<:AbstractArray{T, 3}, BT<:AbstractArray{T1, 3}}
     output_estimate = model(input, ps)
-    norm(output - output_estimate)/norm(output) # /T(sqrt(size(output, 2)*size(output, 3)))
+    norm(output - output_estimate) / norm(output) # /T(sqrt(size(output, 2)*size(output, 3)))
 end
 
+@doc raw"""
+The *autoencoder loss*. 
+"""
 function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 3}} 
     output_estimate = model(input, ps)
-    norm(output_estimate - input)/norm(input) # /T(sqrt(size(input, 2)*size(input, 3)))
+    norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)*size(input, 3)))
 end
 
 function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 2}} 
     output_estimate = model(input, ps)
-    norm(output_estimate - input)/norm(input) # /T(sqrt(size(input, 2)))
+    norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)))
 end
 
 nt_diff(A, B) = (q = A.q - B.q, p = A.p - B.p)
 nt_norm(A) = norm(A.q) + norm(A.p)
 
+@doc raw"""
+Loss function that takes a `NamedTuple` as input. This should be used with a SympNet (or other neural network-based integrator). It computes:
+
+```math
+\mathtt{loss}(\mathcal{NN}, \mathtt{ps}, \begin{pmatrix} q \\ p \end{pmatrix}, \begin{pmatrix} q' \\ p' \end{pmatrix}) \mapsto \left|| \mathcal{NN}(\begin{pmatrix} q \\ p \end{pmatrix}) -  \begin{pmatrix} q' \\ p' \end{pmatrix} \right|| / \left|| \begin{pmatrix} q \\ p \end{pmatrix} \right||
+```
+"""
 function loss(model::Chain, ps::Tuple, input::NamedTuple, output::NamedTuple) 
     output_estimate = model(input, ps)
-    nt_norm(nt_diff(output_estimate, output))/nt_norm(input)
+    nt_norm(nt_diff(output_estimate, output)) / nt_norm(input)
 end
 
 @doc raw"""
@@ -136,6 +146,13 @@ function loss(model::Chain, ps::Tuple, dl::DataLoader{T, BT}) where {T, BT<:Name
     loss(model, ps, dl.data)
 end
 
+@doc raw"""
+Wrapper if we deal with a neural network.
+"""
+function loss(nn::NeuralNetwork, dl::DataLoader)
+    loss(nn.model, nn.params, dl)
+end
+
 @doc raw"""
 Computes the accuracy (as opposed to the loss) of a neural network classifier. 
 

From cb7725f380a7ec9158183799fad262e4a33bc415 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:26:04 +0100
Subject: [PATCH 35/60] Added documentation.

---
 src/data_loader/batch.jl | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl
index bbb9e4e39..9a76d0b26 100644
--- a/src/data_loader/batch.jl
+++ b/src/data_loader/batch.jl
@@ -15,6 +15,9 @@ end
 hasseqlength(::Batch{<:Integer}) = true
 hasseqlength(::Batch{<:Nothing}) = false
 
+@doc raw"""
+The functor for batch is called with an instance on `DataLoader`. It then returns a tuple of batch indices: ``(\mathcal{I}_1, \ldots, \mathcal{I}_{\lceil\mathtt{dl.n\_params/batch\_size}\rceil})``, where the index runs from 1 to the number of batches, which is the number of parameters divided by the batch size (rounded up).
+"""
 function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, AT<:AbstractArray{T, 3}}
     indices = shuffle(1:dl.n_params)
     n_batches = Int(ceil(dl.n_params/batch.batch_size))
@@ -28,6 +31,9 @@ function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, AT<:Abstract
     batches
 end
 
+@doc raw"""
+The functor for batch is called with an instance on `DataLoader`. It then returns a tuple of batch indices: ``(\mathcal{I}_1, \ldots, \mathcal{I}_{\lceil\mathtt{(dl.input\_time\_steps-1)/batch\_size}\rceil})``, where the index runs from 1 to the number of batches, which is the number of input time steps (minus one) divided by the batch size (and rounded up).
+"""
 function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, BT<:AbstractMatrix{T}, AT<:Union{BT, NamedTuple{(:q, :p), Tuple{BT, BT}}}}
     indices = shuffle(1:dl.input_time_steps)
     n_batches = Int(ceil((dl.input_time_steps-1)/batch.batch_size))
@@ -88,7 +94,7 @@ function optimize_for_one_epoch!(opt::Optimizer, nn::NeuralNetwork, dl::DataLoad
 end
 
 """
-TODO: Add ProgressMeter!!!
+This routine is called if a `DataLoader` storing *symplectic data* (i.e. a `NamedTuple`) is supplied.
 """
 function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTuple}, dl::DataLoader{T, AT}, batch::Batch, loss) where {T, AT<:NamedTuple}
     count = 0 
@@ -107,7 +113,16 @@ function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTu
     total_error/count
 end
 
-
+@doc raw"""
+A functor for `Optimizer`. It is called with:
+    - `nn::NeuralNetwork`
+    - `dl::DataLoader`
+    - `batch::Batch`
+    - `n_epochs::Int`
+    - `loss`
+
+The last argument is a function through which `Zygote` differentiates. This argument is optional; if it is not supplied `GeometricMachineLearning` defaults to an appropriate loss for the `DataLoader`.
+"""
 function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int, loss)
     progress_object = ProgressMeter.Progress(n_epochs; enabled=true)
     loss_array = zeros(n_epochs)

From 90bbaa7856743c165c54e9d6871b5987903a6ed7 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 17:53:14 +0100
Subject: [PATCH 36/60] Added constructor for optimizer if input arguments are
 flipped.

---
 src/optimizers/optimizer.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/optimizers/optimizer.jl b/src/optimizers/optimizer.jl
index 7cd78d037..e6e25cdaa 100644
--- a/src/optimizers/optimizer.jl
+++ b/src/optimizers/optimizer.jl
@@ -22,6 +22,8 @@ function Optimizer(m::OptimizerMethod, nn::NeuralNetwork)
     Optimizer(m, nn.params)
 end
 
+Optimizer(nn::NeuralNetwork, m::OptimizerMethod) = Optimizer(m, nn)
+
 #######################################################################################
 # optimization step function
 

From 137d64018c0926e1a965d363c56db9dfa10d024f Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:09:44 +0100
Subject: [PATCH 37/60] Added a comment saying that the constructor can be
 called with DataLoader as input.

---
 src/architectures/sympnet.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/architectures/sympnet.jl b/src/architectures/sympnet.jl
index 8e017f125..b07d5d84c 100644
--- a/src/architectures/sympnet.jl
+++ b/src/architectures/sympnet.jl
@@ -7,7 +7,7 @@ TODO:
 abstract type SympNet{AT} <: Architecture end
 
 @doc raw"""
-`LASympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: 
+`LASympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: 
 - `depth::Int`: The number of linear layers that are applied. The default is 5.
 - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2.
 - `activation`: The activation function that is applied. By default this is `tanh`.
@@ -32,7 +32,7 @@ end
 @inline AbstractNeuralNetworks.dim(arch::SympNet) = arch.dim
 
 @doc raw"""
-`GSympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: 
+`GSympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: 
 - `upscaling_dimension::Int`: The *upscaling dimension* of the gradient layer. See the documentation for `GradientLayerQ` and `GradientLayerP` for further explanation. The default is `2*dim`.
 - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2.
 - `activation`: The activation function that is applied. By default this is `tanh`.

From b57d1488a29a9e29a16a5a57f0cfb5828e2ac588 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:10:14 +0100
Subject: [PATCH 38/60] Added default for number of epochs.

---
 src/data_loader/batch.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl
index 9a76d0b26..10b434ffd 100644
--- a/src/data_loader/batch.jl
+++ b/src/data_loader/batch.jl
@@ -133,6 +133,6 @@ function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epoch
     loss_array
 end
 
-function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int)
+function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int=1)
     o(nn, dl, batch, n_epochs, loss)
 end
\ No newline at end of file

From 3100b0b77401b42e0edc1857527cbf051dea65ac Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:11:05 +0100
Subject: [PATCH 39/60] Combined matrix and tensor routines into one. Added
 another loss for autoencoder qp data.

---
 src/data_loader/data_loader.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl
index 24fb90a21..2beb691b9 100644
--- a/src/data_loader/data_loader.jl
+++ b/src/data_loader/data_loader.jl
@@ -99,19 +99,19 @@ end
 @doc raw"""
 The *autoencoder loss*. 
 """
-function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 3}} 
+function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T}} 
     output_estimate = model(input, ps)
     norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)*size(input, 3)))
 end
 
-function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 2}} 
-    output_estimate = model(input, ps)
-    norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)))
-end
-
 nt_diff(A, B) = (q = A.q - B.q, p = A.p - B.p)
 nt_norm(A) = norm(A.q) + norm(A.p)
 
+function loss(model::Chain, ps::Tuple, input::NT) where {T, AT<:AbstractArray{T}, NT<:NamedTuple{(:q, :p,), Tuple{AT, AT}}}
+    output_estimate = model(input, ps)
+    nt_norm(output_estimate - input) / nt_norm(input)
+end
+
 @doc raw"""
 Loss function that takes a `NamedTuple` as input. This should be used with a SympNet (or other neural network-based integrator). It computes:
 

From 718174fb847fb93bc54ad4ca96568d400da229e2 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:12:14 +0100
Subject: [PATCH 40/60] Commented out a section that is probably not needed.

---
 src/data_loader/tensor_assign.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/data_loader/tensor_assign.jl b/src/data_loader/tensor_assign.jl
index 3faad0a99..4aac9bfc0 100644
--- a/src/data_loader/tensor_assign.jl
+++ b/src/data_loader/tensor_assign.jl
@@ -55,6 +55,7 @@ function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_win
     output_estimate
 end
 
+#=
 """
 This function draws random time steps and parameters and based on these assign the batch and the output.
 
@@ -101,6 +102,7 @@ function draw_batch!(batch::AT, output::BT, data::AT, target::BT) where {T, T2,
     assign_batch!(batch, data, params, time_steps, ndrange=size(batch))
     assign_batch!(output, target, params, time_steps, ndrange=size(output))
 end
+=#
 
 """
 Used for differentiating assign_output_estimate (this appears in the loss). 

From 370ee05c4ab154beb44694d79a96694987400db6 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:12:41 +0100
Subject: [PATCH 41/60] Test data loader for qp data.

---
 test/data_loader/batch_data_loader_qp_test.jl | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 test/data_loader/batch_data_loader_qp_test.jl

diff --git a/test/data_loader/batch_data_loader_qp_test.jl b/test/data_loader/batch_data_loader_qp_test.jl
new file mode 100644
index 000000000..9e1273e49
--- /dev/null
+++ b/test/data_loader/batch_data_loader_qp_test.jl
@@ -0,0 +1,33 @@
+using GeometricMachineLearning
+using Test 
+
+function dummy_qp_data_matrix(dim=2, number_data_points=200, T=Float32)
+    (q = rand(T, dim, number_data_points), p = (rand(T, dim, number_data_points)))
+end
+
+function dummy_qp_data_tensor(dim=2, number_of_time_steps=100, number_of_parameters=20, T=Float32)
+    (q = rand(T, dim, number_of_time_steps, number_of_parameters), p = (rand(T, dim, number_of_time_steps, number_of_parameters)))
+end
+
+function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size, T=Float32)
+    dl1 = DataLoader(dummy_qp_data_matrix(dim, number_of_time_steps, T))
+    dl2 = DataLoader(dummy_qp_data_tensor(dim, number_of_time_steps, number_of_parameters))
+
+    arch1 = GSympNet(dl1)
+    arch2 = GSympNet(dl2)
+
+    nn1 = NeuralNetwork(arch1, CPU(), T)
+    nn2 = NeuralNetwork(arch2, CPU(), T)
+
+    loss1 = loss(nn1, dl1)
+    loss2 = loss(nn2, dl2)
+
+    batch = Batch(batch_size)
+    o₁ = Optimizer(GradientOptimizer(), nn1)
+    o₂ = Optimizer(GradientOptimizer(), nn2)
+
+    o₁(nn1, dl1, batch)
+    o₂(nn2, dl2, batch)
+end
+
+test_data_loader()
\ No newline at end of file

From 14d6ae2130d45b80ee21a631ef9eddf912d139c2 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:13:08 +0100
Subject: [PATCH 42/60] Renamed and added tests.

---
 test/runtests.jl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index e94ad1242..b0069ae2d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -34,6 +34,8 @@ using SafeTestsets
 @safetestset "Training                                                                        " begin include("train!/test_training.jl") end
 @safetestset "NeuralNetSolution                                                               " begin include("train!/test_neuralnet_solution.jl") end
 @safetestset "Problem & Integrators                                                           " begin include("integrator/test_integrator.jl") end
-@safetestset "Data Loader #1                                                                  " begin include("data_loader/data_loader.jl") end
-@safetestset "Data Loader #2                                                                  " begin include("data_loader/mnist_utils.jl") end
-@safetestset "Data Loader #3 (Batch struct)                                                   " begin include("data_loader/batch.jl") end
\ No newline at end of file
+
+@safetestset "Test data loader for q and p data                                               " begin include("data_loader/batch_data_loader_qp_test.jl") end
+@safetestset "Test mnist_utils.                                                               " begin include("data_loader/mnist_utils.jl") end
+@safetestset "Test the data loader in combination with optimization_step!                     " begin include("data_loader/data_loader_optimization_step.jl") end
+@safetestset "Optimizer functor with data loader for Adam                                     " begin include("data_loader/optimizer_functor_with_adam.jl")
\ No newline at end of file

From 9edec7be2edb767966ab05477fe7866209ab2a3f Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:14:24 +0100
Subject: [PATCH 43/60] Adjusted symplectic matrix.

---
 src/arrays/symplectic.jl | 80 ++++++++++++----------------------------
 1 file changed, 24 insertions(+), 56 deletions(-)

diff --git a/src/arrays/symplectic.jl b/src/arrays/symplectic.jl
index 1dcc59658..3a4f138d6 100644
--- a/src/arrays/symplectic.jl
+++ b/src/arrays/symplectic.jl
@@ -1,75 +1,43 @@
 
 @doc raw"""
 
-    `SymplecticMatrix(n)`
+`SymplecticPotential(n)`
 
 Returns a symplectic matrix of size 2n x 2n
 
 ```math
 \begin{pmatrix}
-0 & & & 1 & & & \\
-& \ddots & & & \ddots & & \\
-& & 0 & & & 1 \\
--1 & & & 0 & & & \\
-& \ddots & & & \ddots & & \\
-& & -1 & & 0 & \\
+\mathbb{O} & \mathbb{I} \\
+\mathbb{O} & -\mathbb{I} \\
 \end{pmatrix}
 ```
-
-    `SymplecticProjection(N,n)`
-Returns the symplectic projection matrix E of the Stiefel manifold, i.e. π: Sp(2N) → Sp(2n,2N), A ↦ AE
-
 """
-#=
-function SymplecticMatrix(n::Int, T::DataType=Float64)
-    BandedMatrix((n => ones(T,n), -n => -ones(T,n)), (2n,2n))
-end
-
-SymplecticMatrix(T::DataType, n::Int) = SymplecticMatrix(n, T)
-
-@doc raw"""
-```math
-\begin{pmatrix}
-I & 0 \\
-0 & 0 \\
-0 & I \\
-0 & 0 \\
-\end{pmatrix}
-```
-"""
-=#
-
-function SymplecticPotential(n::Int, T::DataType=Float64)
-    J = zeros(T, 2*n, 2*n)
-    J[1:n, (n+1):2*n] = one(ones(T, n, n))
-    J[(n+1):2*n, 1:n] = -one(ones(T, n, n))
+function SymplecticPotential(backend, n2::Int, T::DataType=Float64)
+    @assert iseven(n2)
+    n = n2÷2
+    J = KernelAbstractions.zeros(backend, T, 2*n, 2*n)
+    assign_ones_for_symplectic_potential! = assign_ones_for_symplectic_potential_kernel!(backend)
+    assign_ones_for_symplectic_potential!(J, n, ndrange=n)
     J
 end
 
+SymplecticPotential(n::Int, T::DataType=Float64) = SymplecticPotential(CPU(), n, T)
+SymplecticPotential(bakend, T::DataType, n::Int) = SymplecticPotential(backend, n, T)
+
 SymplecticPotential(T::DataType, n::Int) = SymplecticPotential(n, T)
 
-struct SymplecticProjection{T} <: AbstractMatrix{T}
-    N::Int
-    n::Int
-    SymplecticProjection(N, n, T = Float64) = new{T}(N,n)
+@kernel function assign_ones_for_symplectic_potential_kernel!(J::AbstractMatrix{T}, n::Int) where T
+    i = @index(Global)
+    J[map_index_for_symplectic_potential(i, n)...] = i ≤ n ? one(T) : -one(T)
 end
 
-function Base.getindex(E::SymplecticProjection,i,j)
-    if i ≤ E.n
-        if j == i 
-            return 1.
-        end
-        return 0.
-    end
-    if j > E.n
-        if (j-E.n) == (i-E.N)
-            return 1.
-        end
-        return 0.
+"""
+This assigns the right index for the symplectic potential. To be used with `assign_ones_for_symplectic_potential_kernel!`.
+"""
+function map_index_for_symplectic_potential(i::Int, n::Int)
+    if i ≤ n
+        return (i, i+n)
+    else
+        return (i, i-n)
     end
-    return 0.
-end
-
-
-Base.parent(E::SymplecticProjection) = (E.N,E.n)
-Base.size(E::SymplecticProjection) = (2*E.N,2*E.n)
+end
\ No newline at end of file

From 2acdd2487d64ec2d04d6e2caabd252f3424bbaa9 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:45:28 +0100
Subject: [PATCH 44/60] dim was missing for specifying default.

---
 src/architectures/sympnet.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/architectures/sympnet.jl b/src/architectures/sympnet.jl
index b07d5d84c..dba449c4e 100644
--- a/src/architectures/sympnet.jl
+++ b/src/architectures/sympnet.jl
@@ -49,7 +49,7 @@ struct GSympNet{AT, InitUpper} <: SympNet{AT} where {InitUpper}
     end
 
         
-    function GSympNet(dl::DataLoader; upscaling_dimension=2*dim, nhidden=2, activation=tanh, init_upper=true) 
+    function GSympNet(dl::DataLoader; upscaling_dimension=2*dl.input_dim, nhidden=2, activation=tanh, init_upper=true) 
         new{typeof(activation), init_upper}(dl.input_dim, upscaling_dimension, nhidden, activation)
     end
 end

From f8b8fff1211ce88fd88c4fa7a39dd6e0a919d97d Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:46:01 +0100
Subject: [PATCH 45/60] dl has no field data.

---
 src/data_loader/data_loader.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl
index 2beb691b9..5117bbc23 100644
--- a/src/data_loader/data_loader.jl
+++ b/src/data_loader/data_loader.jl
@@ -109,7 +109,7 @@ nt_norm(A) = norm(A.q) + norm(A.p)
 
 function loss(model::Chain, ps::Tuple, input::NT) where {T, AT<:AbstractArray{T}, NT<:NamedTuple{(:q, :p,), Tuple{AT, AT}}}
     output_estimate = model(input, ps)
-    nt_norm(output_estimate - input) / nt_norm(input)
+    nt_norm(nt_diff(output_estimate, input)) / nt_norm(input)
 end
 
 @doc raw"""
@@ -143,7 +143,7 @@ function loss(model::Chain, ps::Tuple, dl::DataLoader{T, BT, Nothing}) where {T,
 end
 
 function loss(model::Chain, ps::Tuple, dl::DataLoader{T, BT}) where {T, BT<:NamedTuple}
-    loss(model, ps, dl.data)
+    loss(model, ps, dl.input)
 end
 
 @doc raw"""

From 8c4c73b1e9faa8e826fcd8463558664c058edd4e Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:46:27 +0100
Subject: [PATCH 46/60] Fixed typos.

---
 test/data_loader/batch_data_loader_qp_test.jl   | 10 +++++-----
 test/data_loader/mnist_utils.jl                 |  6 +++---
 test/data_loader/optimizer_functor_with_adam.jl |  6 +++---
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/test/data_loader/batch_data_loader_qp_test.jl b/test/data_loader/batch_data_loader_qp_test.jl
index 9e1273e49..6258c3eef 100644
--- a/test/data_loader/batch_data_loader_qp_test.jl
+++ b/test/data_loader/batch_data_loader_qp_test.jl
@@ -9,7 +9,7 @@ function dummy_qp_data_tensor(dim=2, number_of_time_steps=100, number_of_paramet
     (q = rand(T, dim, number_of_time_steps, number_of_parameters), p = (rand(T, dim, number_of_time_steps, number_of_parameters)))
 end
 
-function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size, T=Float32)
+function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size=10, T=Float32)
     dl1 = DataLoader(dummy_qp_data_matrix(dim, number_of_time_steps, T))
     dl2 = DataLoader(dummy_qp_data_tensor(dim, number_of_time_steps, number_of_parameters))
 
@@ -19,15 +19,15 @@ function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=
     nn1 = NeuralNetwork(arch1, CPU(), T)
     nn2 = NeuralNetwork(arch2, CPU(), T)
 
-    loss1 = loss(nn1, dl1)
-    loss2 = loss(nn2, dl2)
+    loss1 = GeometricMachineLearning.loss(nn1, dl1)
+    loss2 = GeometricMachineLearning.loss(nn2, dl2)
 
     batch = Batch(batch_size)
     o₁ = Optimizer(GradientOptimizer(), nn1)
-    o₂ = Optimizer(GradientOptimizer(), nn2)
+    # o₂ = Optimizer(GradientOptimizer(), nn2)
 
     o₁(nn1, dl1, batch)
-    o₂(nn2, dl2, batch)
+    # o₂(nn2, dl2, batch)
 end
 
 test_data_loader()
\ No newline at end of file
diff --git a/test/data_loader/mnist_utils.jl b/test/data_loader/mnist_utils.jl
index f6c637a58..502739756 100644
--- a/test/data_loader/mnist_utils.jl
+++ b/test/data_loader/mnist_utils.jl
@@ -49,17 +49,17 @@ test_onehotbatch([1, 2, 5, 0])
 @doc raw"""
 Generates an MNIST-like dummy data set.
 """
-function generate_dummy_mnist(dim₁=28, dim₂=18, number_images=100, T=Float32)
+function generate_dummy_mnist(dim₁=28, dim₂=28, number_images=100, T=Float32)
     train_x = rand(T, dim₁, dim₂, number_images)
     train_y = Int.(ceil.(10 * rand(T, number_images))) .- 1
     train_x, train_y
 end
 
 function test_optimizer_for_classification_layer(; dim₁=28, dim₂=28, number_images=100, patch_length=7, T=Float32)
-    dl = DataLoader(generate_dummy_mnist(dim₁, dim₂, number_images, T); patch_length=patch_length)
+    dl = DataLoader(generate_dummy_mnist(dim₁, dim₂, number_images, T)...; patch_length=patch_length)
 
     activation_function(x) = tanh.(x)
-    model = Classification((dim₁ ÷ patch_length) * (dim₂ ÷ patch_length), 10, activation_function)
+    model = Classification(patch_length * patch_length, 10, activation_function)
     ps = initialparameters(CPU(), T, model)   
     loss₁ = GeometricMachineLearning.loss(model, ps, dl)
 
diff --git a/test/data_loader/optimizer_functor_with_adam.jl b/test/data_loader/optimizer_functor_with_adam.jl
index 2caf538b2..c2e3be6a8 100644
--- a/test/data_loader/optimizer_functor_with_adam.jl
+++ b/test/data_loader/optimizer_functor_with_adam.jl
@@ -13,14 +13,14 @@ function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10)
     rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images)))
 end
 
-function test_optimizer_functor_with_adam(;T=Float32, dim₁=6, dim₂=6, n_images=10, patch_length=patch_length)
-    dl = DataLoader(create_dummy_mnist(T=T, dim₁=dim₁, dim₂=dim₂, n_images=n_images)...; patch_length=3)
+function test_optimizer_functor_with_adam(;T=Float32, dim₁=6, dim₂=6, n_images=10, patch_length=3)
+    dl = DataLoader(create_dummy_mnist(T=T, dim₁=dim₁, dim₂=dim₂, n_images=n_images)...; patch_length=patch_length)
     
     # batch size is equal to two
     batch = Batch(2)
 
     # input dim is dim₁ / patch_length * dim₂ / pach_length; the transformer is called with dim₁ / patch_length and two layers
-    model = Chain(Transformer(dl.input_dim, dim₁ / patch_length, 2; Stiefel=true), Classification(dl.input_dim, 10, σ))
+    model = Chain(Transformer(dl.input_dim, patch_length, 2; Stiefel=true), Classification(dl.input_dim, 10, σ))
     ps = initialparameters(CPU(), Float32, model)
 
     loss₁ = GeometricMachineLearning.loss(model, ps, dl)

From d0725941779e31494db573bc947c66cdf95d43be Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 18:57:19 +0100
Subject: [PATCH 47/60] Removed method that appeared twice for some reason.

---
 src/data_loader/data_loader.jl | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl
index 4c5eeff2b..5117bbc23 100644
--- a/src/data_loader/data_loader.jl
+++ b/src/data_loader/data_loader.jl
@@ -153,13 +153,6 @@ function loss(nn::NeuralNetwork, dl::DataLoader)
     loss(nn.model, nn.params, dl)
 end
 
-@doc raw"""
-Wrapper if we deal with a neural network.
-"""
-function loss(nn::NeuralNetwork, dl::DataLoader)
-    loss(nn.model, nn.params, dl)
-end
-
 @doc raw"""
 Computes the accuracy (as opposed to the loss) of a neural network classifier. 
 

From d071839f6891fdbf7d5d33cdae2c289719298ea9 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Tue, 19 Dec 2023 19:02:03 +0100
Subject: [PATCH 48/60] Forgot to commit before.

---
 test/data_loader/batch_data_loader_qp_test.jl | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/test/data_loader/batch_data_loader_qp_test.jl b/test/data_loader/batch_data_loader_qp_test.jl
index 6194bbc39..e189aca2f 100644
--- a/test/data_loader/batch_data_loader_qp_test.jl
+++ b/test/data_loader/batch_data_loader_qp_test.jl
@@ -9,11 +9,8 @@ function dummy_qp_data_tensor(dim=2, number_of_time_steps=100, number_of_paramet
     (q = rand(T, dim, number_of_time_steps, number_of_parameters), p = (rand(T, dim, number_of_time_steps, number_of_parameters)))
 end
 
-<<<<<<< HEAD
 function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size=10, T=Float32)
-=======
-function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size, T=Float32)
->>>>>>> 4528053bc135b3f495ac732804cd913e4187cc96
+
     dl1 = DataLoader(dummy_qp_data_matrix(dim, number_of_time_steps, T))
     dl2 = DataLoader(dummy_qp_data_tensor(dim, number_of_time_steps, number_of_parameters))
 
@@ -23,7 +20,6 @@ function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=
     nn1 = NeuralNetwork(arch1, CPU(), T)
     nn2 = NeuralNetwork(arch2, CPU(), T)
 
-<<<<<<< HEAD
     loss1 = GeometricMachineLearning.loss(nn1, dl1)
     loss2 = GeometricMachineLearning.loss(nn2, dl2)
 
@@ -33,17 +29,6 @@ function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=
 
     o₁(nn1, dl1, batch)
     # o₂(nn2, dl2, batch)
-=======
-    loss1 = loss(nn1, dl1)
-    loss2 = loss(nn2, dl2)
-
-    batch = Batch(batch_size)
-    o₁ = Optimizer(GradientOptimizer(), nn1)
-    o₂ = Optimizer(GradientOptimizer(), nn2)
-
-    o₁(nn1, dl1, batch)
-    o₂(nn2, dl2, batch)
->>>>>>> 4528053bc135b3f495ac732804cd913e4187cc96
 end
 
 test_data_loader()
\ No newline at end of file

From 2bb3f2994d8f228937d1929f7855568e4e04d737 Mon Sep 17 00:00:00 2001
From: CompatHelper Julia <compathelper_noreply@julialang.org>
Date: Wed, 20 Dec 2023 00:35:47 +0000
Subject: [PATCH 49/60] CompatHelper: bump compat for GPUArrays to 10, (keep
 existing compat)

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 16c3b1e15..1936d4dd8 100644
--- a/Project.toml
+++ b/Project.toml
@@ -44,7 +44,7 @@ ChainRulesTestUtils = "1"
 Distances = "0.10"
 Documenter = "0.27, 1"
 ForwardDiff = "0.10"
-GPUArrays = "8, 9"
+GPUArrays = "8, 9, 10"
 GeometricBase = "0.9"
 GeometricEquations = "0.14"
 GeometricIntegrators = "0.13"

From 7aff01654a3138289225742dcc252a8d01cbbef7 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 13:25:49 +0100
Subject: [PATCH 50/60] Added new routine for multiplying symmetric matrix from
 the right onto a matrix.

---
 src/arrays/symmetric.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl
index 928b90ab5..3b81466a1 100644
--- a/src/arrays/symmetric.jl
+++ b/src/arrays/symmetric.jl
@@ -209,6 +209,8 @@ function Base.:*(A::SymmetricMatrix{T}, B::AbstractMatrix{T}) where T
     C
 end
 
+Base.:*(B::AbstractMatrix{T}, A::SymmetricMatrix{T}) where T = (A * B')'
+
 function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T 
     backend = KernelAbstractions.get_backend(A.S)
     c = KernelAbstractions.allocate(backend, T, A.n)

From 3321d4b90f40817f1c8107f5f4ee3ae107f853cb Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:01:48 +0100
Subject: [PATCH 51/60] Resolved merge conflict.

---
 .../addition_tests_for_custom_arrays.jl       | 40 ++++++++++++++-----
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
index 9413c5884..eb0ec6cd9 100644
--- a/test/arrays/addition_tests_for_custom_arrays.jl
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -1,12 +1,34 @@
-#=
-This tests addition for all custom arrays. Note that these tests will also have to be performed on GPU!
-=#
+using GeometricMachineLearning, Test 
 
-using LinearAlgebra
-using Random
-using Test
-using GeometricMachineLearning
+@doc raw"""
+This function tests addition for various custom arrays, i.e. if \(A + B\) is performed in the correct way. 
+"""
 
-function test_addition_for_symmetric_matrix(n::Int, T::Type)
-    A = rand(SymmetricMatrix{T}, n)
+function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    B = rand(T, n, n)
+
+    # SymmetricMatrix
+    AB_sym = SymmetricMatrix(A + B)
+    AB_sym2 = SymmetricMatrix(A) + SymmetricMatrix(B)
+    @test AB_sym ≈ AB_sym2
+    @test typeof(AB_sym) <: SymmetricMatrix{T}
+    @test typeof(AB_sym2) <: SymmetricMatrix{T}
+
+    # SkewSymMatrix
+    AB_skew = SkewSymMatrix(A + B)
+    AB_skew2 = SkewSymMatrix(A) + SkewSymMatrix(B)
+    @test A_skew ≈ A_skew2 
+    @test typeof(AB_skew) <: SkewSymMatrix{T}
+    @test typeof(AB_skew2) <: SkewSymMatrix{T}
+
+    C = rand(T, N, N)
+    D = rand(T, N, N)
+
+    # StiefelLieAlgHorMatrix
+    CD_slahm = StiefelLieAlgHorMatrix(A + B, n)
+    CD_slahm2 = StiefelLieAlgHorMatrix(A, n) + StiefelLieAlgHorMatrix(B, n)
+    @test CD_slahm ≈ CD_slahm2
+    @test typeof(CD_slahm) <: StiefelLieAlgHorMatrix{T}
+    @test typeof(CD_slahm2) <: StiefelLieAlgHorMatrix{T}
 end
\ No newline at end of file

From 65ee5c0528a6e2dc3f9cb820e77ed3a28a7d8ac1 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:03:16 +0100
Subject: [PATCH 52/60] Resolved conflict (removed triangular matrices).

---
 src/GeometricMachineLearning.jl |  1 +
 src/arrays/symmetric.jl         | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl
index b7b9e9f92..2b6813f35 100644
--- a/src/GeometricMachineLearning.jl
+++ b/src/GeometricMachineLearning.jl
@@ -88,6 +88,7 @@ module GeometricMachineLearning
     include("arrays/abstract_lie_algebra_horizontal.jl")
     include("arrays/stiefel_lie_algebra_horizontal.jl")
     include("arrays/grassmann_lie_algebra_horizontal.jl")
+    include("arrays/symmetric.jl")
 
     export SymmetricMatrix, SymplecticPotential, SkewSymMatrix
     export StiefelLieAlgHorMatrix
diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl
index 3b81466a1..761e8dd72 100644
--- a/src/arrays/symmetric.jl
+++ b/src/arrays/symmetric.jl
@@ -211,6 +211,10 @@ end
 
 Base.:*(B::AbstractMatrix{T}, A::SymmetricMatrix{T}) where T = (A * B')'
 
+function Base.:*(A::SymmetricMatrix{T}, B::SymmetricMatrix{T}) where T 
+    A * (B * one(B))
+end
+
 function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T 
     backend = KernelAbstractions.get_backend(A.S)
     c = KernelAbstractions.allocate(backend, T, A.n)
@@ -218,6 +222,14 @@ function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T
     c
 end
 
+function Base.one(A::SymmetricMatrix{T}) where T
+    backend = KernelAbstractions.get_backend(A.S)
+    unit_matrix = KernelAbstractions.zeros(backend, T, A.n, A.n)
+    write_ones! = write_ones_kernel!(backend)
+    write_ones!(unit_matrix, ndrange=A.n)
+    unit_matrix
+end
+
 # define routines for generalizing ChainRulesCore to SymmetricMatrix 
 ChainRulesCore.ProjectTo(A::SymmetricMatrix) = ProjectTo{SymmetricMatrix}(; symmetric=ProjectTo(A.S))
 (project::ProjectTo{SymmetricMatrix})(dA::AbstractMatrix) = SymmetricMatrix(project.symmetric(map_to_S(dA)), size(dA, 2))

From 0f958d6d97455e5664fd67fc660dc69cf17369eb Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 14:20:49 +0100
Subject: [PATCH 53/60] Slightly improved readability.

---
 src/arrays/skew_symmetric.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arrays/skew_symmetric.jl b/src/arrays/skew_symmetric.jl
index e1ab85fe2..3c14b9e45 100644
--- a/src/arrays/skew_symmetric.jl
+++ b/src/arrays/skew_symmetric.jl
@@ -187,7 +187,7 @@ end
 
 # the first matrix is multiplied onto A2 in order for it to not be SkewSymMatrix!
 function Base.:*(A1::SkewSymMatrix{T}, A2::SkewSymMatrix{T}) where T 
-    A1*(one(A2)*A2) 
+    A1 * (one(A2) * A2) 
 end
 
 @doc raw"""

From 250c7013b76cd987fe7c489a1a92ffb2f50abade Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:04:15 +0100
Subject: [PATCH 54/60] Resolved conflict.

---
 test/arrays/addition_tests_for_custom_arrays.jl | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
index eb0ec6cd9..669ccc0ef 100644
--- a/test/arrays/addition_tests_for_custom_arrays.jl
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -3,7 +3,6 @@ using GeometricMachineLearning, Test
 @doc raw"""
 This function tests addition for various custom arrays, i.e. if \(A + B\) is performed in the correct way. 
 """
-
 function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     A = rand(T, n, n)
     B = rand(T, n, n)
@@ -26,9 +25,15 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     D = rand(T, N, N)
 
     # StiefelLieAlgHorMatrix
-    CD_slahm = StiefelLieAlgHorMatrix(A + B, n)
-    CD_slahm2 = StiefelLieAlgHorMatrix(A, n) + StiefelLieAlgHorMatrix(B, n)
+    CD_slahm = StiefelLieAlgHorMatrix(C + D, n)
+    CD_slahm2 = StiefelLieAlgHorMatrix(C, n) + StiefelLieAlgHorMatrix(D, n)
     @test CD_slahm ≈ CD_slahm2
     @test typeof(CD_slahm) <: StiefelLieAlgHorMatrix{T}
     @test typeof(CD_slahm2) <: StiefelLieAlgHorMatrix{T}
+
+    CD_glahm = GrassmannLieAlgHorMatrix(C + D, n)
+    CD_glahm2 = GrassmannLieAlgHorMatrix(C, n) + GrassmannLieAlgHorMatrix(D, n)
+    @test CD_glahm ≈ CD_glahm2
+    @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T}
 end
\ No newline at end of file

From b320aaf664062cfc1c11195ecda2cb9728f2f9cc Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:05:19 +0100
Subject: [PATCH 55/60] Resolved conflict (accepted both changes).

---
 test/runtests.jl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/runtests.jl b/test/runtests.jl
index 033b04f16..e03f65f0f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,6 +4,9 @@ using SafeTestsets
 @safetestset "Check parameterlength                                                           " begin include("parameterlength/check_parameterlengths.jl") end
 @safetestset "Arrays #1                                                                       " begin include("arrays/array_tests.jl") end
 @safetestset "Sampling of arrays                                                              " begin include("arrays/random_generation_of_custom_arrays.jl") end
+@safetestset "Addition tests for custom arrays                                                " begin include("arrays/addition_tests_for_custom_arrays.jl") end
+@safetestset "Scalar multiplication tests for custom arrays                                   " begin include("arrays/scalar_multiplication_for_custom_arrays.jl") end
+@safetestset "Matrix multiplication tests for custom arrays                                   " begin include("arrays/matrix_multiplication_for_custom_arrays.jl") end
 @safetestset "Manifolds (Grassmann):                                                          " begin include("manifolds/grassmann_manifold.jl") end
 @safetestset "Gradient Layer                                                                  " begin include("layers/gradient_layer_tests.jl") end
 @safetestset "Test symplecticity of upscaling layer                                           " begin include("layers/sympnet_layers_test.jl") end 

From 5ae3067f9e9c579f091746f0a09541f7100e2338 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 14:52:14 +0100
Subject: [PATCH 56/60] Added tests for addition, scalar multiplication and
 matrix multiplication for custom arrays.

---
 .../addition_tests_for_custom_arrays.jl       |  4 +-
 ...matrix_multiplication_for_custom_arrays.jl | 21 ++++++++++
 ...scalar_multiplication_for_custom_arrays.jl | 40 +++++++++++++++++++
 3 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 test/arrays/matrix_multiplication_for_custom_arrays.jl
 create mode 100644 test/arrays/scalar_multiplication_for_custom_arrays.jl

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
index 669ccc0ef..5fd2bbcfc 100644
--- a/test/arrays/addition_tests_for_custom_arrays.jl
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -36,4 +36,6 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     @test CD_glahm ≈ CD_glahm2
     @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
     @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T}
-end
\ No newline at end of file
+end
+
+addition_tests_for_custom_arrays(5, 10, Float32)
\ No newline at end of file
diff --git a/test/arrays/matrix_multiplication_for_custom_arrays.jl b/test/arrays/matrix_multiplication_for_custom_arrays.jl
new file mode 100644
index 000000000..db051ec3e
--- /dev/null
+++ b/test/arrays/matrix_multiplication_for_custom_arrays.jl
@@ -0,0 +1,21 @@
+using GeometricMachineLearning, Test 
+
+@doc raw"""
+This function tests matrix multiplication for various custom arrays, i.e. if \((A,\alpha) \mapsto \alpha{}A\) is performed in the correct way. 
+"""
+function matrix_multiplication_tests_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    B = rand(T, n, N)
+
+    # SymmetricMatrix
+    A_sym = SymmetricMatrix(A)
+    @test A_sym * B ≈ Matrix{T}(A_sym) * B 
+    @test B' * A_sym ≈ B' * Matrix{T}(A_sym)
+
+    # SkewSymMatrix
+    A_skew = SkewSymMatrix(A)
+    @test A_skew * B ≈ Matrix{T}(A_skew) * B 
+    @test B' * A_skew ≈ B' * Matrix{T}(A_skew)
+end
+
+matrix_multiplication_tests_for_custom_arrays(5, 10, Float32)
\ No newline at end of file
diff --git a/test/arrays/scalar_multiplication_for_custom_arrays.jl b/test/arrays/scalar_multiplication_for_custom_arrays.jl
new file mode 100644
index 000000000..c6214a3e1
--- /dev/null
+++ b/test/arrays/scalar_multiplication_for_custom_arrays.jl
@@ -0,0 +1,40 @@
+using GeometricMachineLearning, Test 
+
+@doc raw"""
+This function tests scalar multiplication for various custom arrays, i.e. if \((A,\alpha) \mapsto \alpha{}A\) is performed in the correct way. 
+"""
+function scalar_multiplication_for_custom_arrays(n::Int, N::Int, T::Type)
+    A = rand(T, n, n)
+    α = rand(T)
+
+    # SymmetricMatrix
+    Aα_sym = SymmetricMatrix(α * A)
+    Aα_sym2 = α * SymmetricMatrix(A)
+    @test Aα_sym ≈ Aα_sym2
+    @test typeof(Aα_sym) <: SymmetricMatrix{T}
+    @test typeof(Aα_sym2) <: SymmetricMatrix{T}
+
+    # SkewSymMatrix
+    Aα_skew = SkewSymMatrix(α * A)
+    Aα_skew2 = α * SkewSymMatrix(A)
+    @test Aα_skew ≈ Aα_skew2 
+    @test typeof(Aα_skew) <: SkewSymMatrix{T}
+    @test typeof(Aα_skew2) <: SkewSymMatrix{T}
+
+    C = rand(T, N, N)
+
+    # StiefelLieAlgHorMatrix
+    Cα_slahm = StiefelLieAlgHorMatrix(α * C, n)
+    Cα_slahm2 = α * StiefelLieAlgHorMatrix(C, n)
+    @test Cα_slahm ≈ Cα_slahm2
+    @test typeof(Cα_slahm) <: StiefelLieAlgHorMatrix{T}
+    @test typeof(Cα_slahm2) <: StiefelLieAlgHorMatrix{T}
+
+    Cα_glahm = GrassmannLieAlgHorMatrix(α * C, n)
+    Cα_glahm2 = α * GrassmannLieAlgHorMatrix(C, n)
+    @test Cα_glahm ≈ Cα_glahm2
+    @test tyepof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(Cα_glahm2) <: GrassmannLieAlgHorMatrix{T}
+end
+
+scalar_multiplication_for_custom_arrays(5, 10, Float32)
\ No newline at end of file

From d4bbb6c80de27188f62c102ed66373d16a28f6d1 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:06:50 +0100
Subject: [PATCH 57/60] Resolved merge conflict.

---
 src/GeometricMachineLearning.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl
index 2b6813f35..b7b9e9f92 100644
--- a/src/GeometricMachineLearning.jl
+++ b/src/GeometricMachineLearning.jl
@@ -88,7 +88,6 @@ module GeometricMachineLearning
     include("arrays/abstract_lie_algebra_horizontal.jl")
     include("arrays/stiefel_lie_algebra_horizontal.jl")
     include("arrays/grassmann_lie_algebra_horizontal.jl")
-    include("arrays/symmetric.jl")
 
     export SymmetricMatrix, SymplecticPotential, SkewSymMatrix
     export StiefelLieAlgHorMatrix

From f25e3faf6a18cb9dbfbbb1401db6d09996e40aa8 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:07:54 +0100
Subject: [PATCH 58/60] Fixed (apparent) conflict.

---
 test/arrays/array_tests.jl | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index c0da5eba9..292eae52f 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -54,6 +54,10 @@ function skew_mat_mul_test2(n, T=Float64)
     @test isapprox(AS1, AS2)
 end
 
+<<<<<<< HEAD
+=======
+
+>>>>>>> c6a350e (Fixed typo.)
 # test Stiefel manifold projection test 
 function stiefel_proj_test(N,n)
     In = I(n)
@@ -61,6 +65,10 @@ function stiefel_proj_test(N,n)
     @test all(abs.((E'*E) .- In) .< 1e-10)
 end
 
+<<<<<<< HEAD
+=======
+
+>>>>>>> c6a350e (Fixed typo.)
 function stiefel_lie_alg_add_sub_test(N, n)
     E = StiefelProjection(N, n)
     projection(W::SkewSymMatrix) = W - (I - E*E')*W*(I - E*E')
@@ -76,6 +84,14 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
+<<<<<<< HEAD
+=======
+function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
+    A = rand(StiefelLieAlgHorMatrix{T}, N, n)
+    @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)
+end
+
+>>>>>>> c6a350e (Fixed typo.)
 # TODO: tests for ADAM functions
 
 # test everything for different n & N values
@@ -96,4 +112,8 @@ for (N, n) ∈ zip(N_vec, n_vec)
     skew_mat_mul_test2(N)
     stiefel_proj_test(N,n)
     stiefel_lie_alg_add_sub_test(N,n)
+<<<<<<< HEAD
+=======
+    stiefel_lie_alg_vectorization_test(N, n)
+>>>>>>> c6a350e (Fixed typo.)
 end

From e0cc2d2aa03e8ff7e50a1c670b8ab26a681539a2 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 13 Dec 2023 15:05:50 +0100
Subject: [PATCH 59/60] Fixed typo.

---
 test/arrays/addition_tests_for_custom_arrays.jl        | 4 ++--
 test/arrays/scalar_multiplication_for_custom_arrays.jl | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl
index 5fd2bbcfc..44fc878c1 100644
--- a/test/arrays/addition_tests_for_custom_arrays.jl
+++ b/test/arrays/addition_tests_for_custom_arrays.jl
@@ -17,7 +17,7 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     # SkewSymMatrix
     AB_skew = SkewSymMatrix(A + B)
     AB_skew2 = SkewSymMatrix(A) + SkewSymMatrix(B)
-    @test A_skew ≈ A_skew2 
+    @test AB_skew ≈ AB_skew2 
     @test typeof(AB_skew) <: SkewSymMatrix{T}
     @test typeof(AB_skew2) <: SkewSymMatrix{T}
 
@@ -34,7 +34,7 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type)
     CD_glahm = GrassmannLieAlgHorMatrix(C + D, n)
     CD_glahm2 = GrassmannLieAlgHorMatrix(C, n) + GrassmannLieAlgHorMatrix(D, n)
     @test CD_glahm ≈ CD_glahm2
-    @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(CD_glahm) <: GrassmannLieAlgHorMatrix{T}
     @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T}
 end
 
diff --git a/test/arrays/scalar_multiplication_for_custom_arrays.jl b/test/arrays/scalar_multiplication_for_custom_arrays.jl
index c6214a3e1..3a5e56205 100644
--- a/test/arrays/scalar_multiplication_for_custom_arrays.jl
+++ b/test/arrays/scalar_multiplication_for_custom_arrays.jl
@@ -33,7 +33,7 @@ function scalar_multiplication_for_custom_arrays(n::Int, N::Int, T::Type)
     Cα_glahm = GrassmannLieAlgHorMatrix(α * C, n)
     Cα_glahm2 = α * GrassmannLieAlgHorMatrix(C, n)
     @test Cα_glahm ≈ Cα_glahm2
-    @test tyepof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T}
+    @test typeof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T}
     @test typeof(Cα_glahm2) <: GrassmannLieAlgHorMatrix{T}
 end
 

From 1e659d7c5f6e423824a4cb216ce507f39aefb1e6 Mon Sep 17 00:00:00 2001
From: benedict-96 <benedikt.brantner@icloud.com>
Date: Wed, 20 Dec 2023 09:13:53 +0100
Subject: [PATCH 60/60] Fixed comments from previous merge conflict.

---
 test/arrays/array_tests.jl | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl
index dadc11665..8621ddda7 100644
--- a/test/arrays/array_tests.jl
+++ b/test/arrays/array_tests.jl
@@ -53,14 +53,6 @@ function skew_mat_mul_test2(n, T=Float64)
     AS2 = A*Matrix{T}(S)
     @test isapprox(AS1, AS2)
 end
-
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-=======
->>>>>>> c6a350ed471420a4b4583c45978658d85b1f253e
-
->>>>>>> c6a350e (Fixed typo.)
 # test Stiefel manifold projection test 
 function stiefel_proj_test(N,n)
     In = I(n)
@@ -68,13 +60,6 @@ function stiefel_proj_test(N,n)
     @test all(abs.((E'*E) .- In) .< 1e-10)
 end
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-=======
->>>>>>> c6a350ed471420a4b4583c45978658d85b1f253e
-
->>>>>>> c6a350e (Fixed typo.)
 function stiefel_lie_alg_add_sub_test(N, n)
     E = StiefelProjection(N, n)
     projection(W::SkewSymMatrix) = W - (I - E*E')*W*(I - E*E')
@@ -90,17 +75,11 @@ function stiefel_lie_alg_add_sub_test(N, n)
     @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10)
 end
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-=======
->>>>>>> c6a350ed471420a4b4583c45978658d85b1f253e
 function stiefel_lie_alg_vectorization_test(N, n; T=Float32)
     A = rand(StiefelLieAlgHorMatrix{T}, N, n)
     @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A)
 end
 
->>>>>>> c6a350e (Fixed typo.)
 # TODO: tests for ADAM functions
 
 # test everything for different n & N values
@@ -121,11 +100,5 @@ for (N, n) ∈ zip(N_vec, n_vec)
     skew_mat_mul_test2(N)
     stiefel_proj_test(N,n)
     stiefel_lie_alg_add_sub_test(N,n)
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-=======
->>>>>>> c6a350ed471420a4b4583c45978658d85b1f253e
     stiefel_lie_alg_vectorization_test(N, n)
->>>>>>> c6a350e (Fixed typo.)
 end