From ba96b359cf1730f4efc502625664907398229d11 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 09:19:22 +0100 Subject: [PATCH 01/96] Remove old array tests. --- test/arrays/array_tests_old.jl | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 test/arrays/array_tests_old.jl diff --git a/test/arrays/array_tests_old.jl b/test/arrays/array_tests_old.jl deleted file mode 100644 index 98e08a726..000000000 --- a/test/arrays/array_tests_old.jl +++ /dev/null @@ -1,27 +0,0 @@ -using LinearAlgebra -using Test - -using GeometricMachineLearning: BlockIdentityLowerMatrix, SymmetricBlockIdentityLowerMatrix -using GeometricMachineLearning: BlockIdentityUpperMatrix, SymmetricBlockIdentityUpperMatrix -using GeometricMachineLearning: ZeroVector - - -W = rand(2,2) -S = W .+ W' -x = ones(4) -y = zero(x) - -@test mul!(y, BlockIdentityLowerMatrix(W), x) == vcat(ones(2), ones(2) .+ W * ones(2)) -@test mul!(y, BlockIdentityUpperMatrix(W), x) == vcat(ones(2) .+ W * ones(2), ones(2)) - -@test mul!(y, SymmetricBlockIdentityLowerMatrix(W), x) == vcat(ones(2), ones(2) .+ S * ones(2)) -@test mul!(y, SymmetricBlockIdentityUpperMatrix(W), x) == vcat(ones(2) .+ S * ones(2), ones(2)) - - -z = ZeroVector(Float64, 4) - -@test z[1] == zero(Float64) -@test z[4] == zero(Float64) - -@test_throws AssertionError z[0] -@test_throws AssertionError z[5] From 0fb91568e2d76661db1b099654001e68ae48734d Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 09:32:38 +0100 Subject: [PATCH 02/96] Removed the block matrices. This is now done by the LinearSympNetLayers. --- src/arrays/block_identity_lower.jl | 37 ------------------------------ src/arrays/block_identity_upper.jl | 37 ------------------------------ 2 files changed, 74 deletions(-) delete mode 100644 src/arrays/block_identity_lower.jl delete mode 100644 src/arrays/block_identity_upper.jl diff --git a/src/arrays/block_identity_lower.jl b/src/arrays/block_identity_lower.jl deleted file mode 100644 index d51fab751..000000000 --- a/src/arrays/block_identity_lower.jl +++ /dev/null @@ -1,37 +0,0 @@ -""" -A `BlockIdentityLowerMatrix` is a matrix with blocks -| 1 0 | -| S 1 | -Currently, it only implements a custom `mul!` method, exploiting this structure. -""" -struct BlockIdentityLowerMatrix{T, AT <: AbstractMatrix{T}} <: AbstractMatrix{T} - S::AT - - function BlockIdentityLowerMatrix(S::AbstractMatrix) - @assert length(axes(S,1)) == length(axes(S,2)) - new{eltype(S), typeof(S)}(S) - end -end - -SymmetricBlockIdentityLowerMatrix(W::AbstractMatrix) = BlockIdentityLowerMatrix(W .+ W') - -Base.parent(A::BlockIdentityLowerMatrix) = A.S -Base.size(A::BlockIdentityLowerMatrix) = 2 .* size(A.S) - -function LinearAlgebra.mul!(out::AbstractVector, A::BlockIdentityLowerMatrix, z::AbstractVector) - @assert length(out) == length(z) == 2*length(axes(A.S,2)) - - N = length(axes(A.S,1)) - - q_in = @view z[1:N] - p_in = @view z[N+1:2N] - q_out = @view out[1:N] - p_out = @view out[N+1:2N] - - mul!(p_out, A.S, q_in) - - q_out .= q_in - p_out .+= p_in - - return out -end diff --git a/src/arrays/block_identity_upper.jl b/src/arrays/block_identity_upper.jl deleted file mode 100644 index c4ff51e03..000000000 --- a/src/arrays/block_identity_upper.jl +++ /dev/null @@ -1,37 +0,0 @@ -""" -A `BlockIdentityUpperMatrix` is a matrix with blocks -| 1 S | -| 0 1 | -Currently, it only implements a custom `mul!` method, exploiting this structure. -""" -struct BlockIdentityUpperMatrix{T, AT <: AbstractMatrix{T}} <: AbstractMatrix{T} - S::AT - - function BlockIdentityUpperMatrix(S::AbstractMatrix) - @assert length(axes(S,1)) == length(axes(S,2)) - new{eltype(S), typeof(S)}(S) - end -end - -SymmetricBlockIdentityUpperMatrix(W::AbstractMatrix) = BlockIdentityUpperMatrix(W .+ W') - -Base.parent(A::BlockIdentityUpperMatrix) = A.S -Base.size(A::BlockIdentityUpperMatrix) = 2 .* size(A.S) - -function LinearAlgebra.mul!(out::AbstractVector, A::BlockIdentityUpperMatrix, z::AbstractVector) - @assert length(out) == length(z) == 2*length(axes(A.S,2)) - - N = length(axes(A.S,1)) - - q_in = @view z[1:N] - p_in = @view z[N+1:2N] - q_out = @view out[1:N] - p_out = @view out[N+1:2N] - - mul!(q_out, A.S, p_in) - - q_out .+= q_in - p_out .= p_in - - return out -end From dd4e8a55add8e9917b23fd6d341e1611e4e59bd5 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 09:35:38 +0100 Subject: [PATCH 03/96] Hope this fixed the TODO list. --- src/arrays/symmetric.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl index f7db2c6c6..928b90ab5 100644 --- a/src/arrays/symmetric.jl +++ b/src/arrays/symmetric.jl @@ -20,12 +20,12 @@ Internally the `struct` saves a vector $S$ of size $n(n+1)\div2$. The conversion So $S$ stores a string of vectors taken from $A$: $S = [\tilde{a}_1, \tilde{a}_2, \ldots, \tilde{a}_n]$ with $\tilde{a}_i = [[A]_{i1},[A]_{i2},\ldots,[A]_{ii}]$. TODO: --[x] Overload Adjoint operation for SymmetricMatrix!! (Aᵀ = A) --[ ] implement matrix and vector products (to also work on GPU) --[x] implement zero initialization (for optimizer) --[ ] perform some tests (also with Zygote) --[x] update the constructor (to work better for GPU) --[ ] implement multiplication with a tensor +- [x] Overload Adjoint operation for SymmetricMatrix!! (Aᵀ = A) +- [ ] implement matrix and vector products (to also work on GPU) +- [x] implement zero initialization (for optimizer) +- [ ] perform some tests (also with Zygote) +- [x] update the constructor (to work better for GPU) +- [ ] implement multiplication with a tensor """ mutable struct SymmetricMatrix{T, AT <: AbstractVector{T}} <: AbstractMatrix{T} S::AT From 35c0d4862ba8ab2ba9a0a117d6298d5ad668488a Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 10:06:34 +0100 Subject: [PATCH 04/96] We are not using those arrays anymore. --- src/arrays/symplectic_lie_algebra.jl | 89 ------ .../symplectic_lie_algebra_horizontal.jl | 298 ------------------ 2 files changed, 387 deletions(-) delete mode 100644 src/arrays/symplectic_lie_algebra.jl delete mode 100644 src/arrays/symplectic_lie_algebra_horizontal.jl diff --git a/src/arrays/symplectic_lie_algebra.jl b/src/arrays/symplectic_lie_algebra.jl deleted file mode 100644 index 2d77394b1..000000000 --- a/src/arrays/symplectic_lie_algebra.jl +++ /dev/null @@ -1,89 +0,0 @@ -""" -A `SymplecticLieAlgMatrix` is a matrix -| A B | -| C -A' |, where B and C are symmetric. - -The first index is the row index, the second one the column index. - -TODO: Check how LinearAlgebra implements matrix multiplication! -""" - -#-> AbstractVecOrMat!! -mutable struct SymplecticLieAlgMatrix{T, AT <: AbstractMatrix{T}, BT <: SymmetricMatrix{T}} <: AbstractMatrix{T} - A::AT - B::BT - C::BT - n::Int - - function SymplecticLieAlgMatrix(A::AbstractMatrix{T}, B::SymmetricMatrix{T}, C::SymmetricMatrix{T}, n) where {T} - @assert eltype(B) == eltype(C) == eltype(A) - @assert B.n == C.n == n - @assert size(A) == (n,n) - new{T,typeof(A),typeof(B)}(A,B,C,n) - end - function SymplecticLieAlgMatrix(S::AbstractMatrix{T}) where {T} - n2 = size(S, 1) - @assert iseven(n2) - @assert size(S, 2) == n2 - n = n2÷2 - - A = T(0.5)*(S[1:n,1:n] - S[(n+1):n2, (n+1):n2]') - B = SymmetricMatrix(S[1:n,(n+1):n2]) - C = SymmetricMatrix(S[(n+1):n2,1:n]) - - new{eltype(S), typeof(A), typeof(B)}(A, B, C, n) - end - -end - - -#implementing getindex automatically defines all matrix multiplications! (but probably not in the most efficient way) -function Base.getindex(A::SymplecticLieAlgMatrix,i,j) - n = A.n - if i ≤ n && j ≤ n - return A.A[i,j] - end - if i ≤ n - return A.B[i, j-n] - end - if j ≤ n - return A.C[i-n, j] - end - return -A.A[j-n,i-n] -end - - -Base.parent(A::SymplecticLieAlgMatrix) = (A=A.A,B=A.B,C=A.C) -Base.size(A::SymplecticLieAlgMatrix) = (2*A.n,2*A.n) - -function Base.:+(S₁::SymplecticLieAlgMatrix, S₂::SymplecticLieAlgMatrix) - @assert S₁.n == S₂.n - SymplecticLieAlgMatrix( - S₁.A + S₂.A, - S₁.B + S₂.B, - S₁.C + S₂.C, - S₁.n - ) -end - -function Base.:-(S₁::SymplecticLieAlgMatrix, S₂::SymplecticLieAlgMatrix) - @assert S₁.n == S₂.n - SymplecticLieAlgMatrix( - S₁.A - S₂.A, - S₁.B - S₂.B, - S₁.C - S₂.C, - S₁.n - ) -end - -function Base.:-(S::SymplecticLieAlgMatrix) - SymplecticLieAlgMatrix( - -S.A, - -S.B, - -S.C, - S.n - ) -end - - -#TODO: implement functions as in StiefelLieAlgHorMatrix \ No newline at end of file diff --git a/src/arrays/symplectic_lie_algebra_horizontal.jl b/src/arrays/symplectic_lie_algebra_horizontal.jl deleted file mode 100644 index 60a8e1fb9..000000000 --- a/src/arrays/symplectic_lie_algebra_horizontal.jl +++ /dev/null @@ -1,298 +0,0 @@ -""" -The `horizontal part` of `SymplecticLieAlgMatrix` is a matrix -| A B | -| C -A' |, where B and C are symmetric - -with the following additional requirements: -| A₁ A₂ᵀ| -| A₃ 0 | = A, - -| B₁ B₂ᵀ| -| B₂ 0 | = B, - -| C₁ C₂ᵀ| -| C₂ 0 | = C. - -The horizontal component is here taken with respect to the G-invariant metric (X₁,X₂) ↦ tr(X₁⁺X₂). (This metric is bi-invariant but degenerate.) - -The first index is the row index, the second one the column index. - -TODO: Make this a subtype of SymplecticLieAlgMatrix!!!!! -""" - -#-> AbstractVecOrMat!! -mutable struct SymplecticLieAlgHorMatrix{T, AT <: AbstractMatrix{T}, ST <: SymmetricMatrix{T}} <: AbstractMatrix{T} - A₁::AT - A₂::AT - A₃::AT - B₁::ST - B₂::AT - C₁::ST - C₂::AT - N::Int - n::Int - - function SymplecticLieAlgHorMatrix(A₁::AbstractMatrix, A₂::AbstractMatrix, A₃::AbstractMatrix, - B₁::SymmetricMatrix, B₂::AbstractMatrix, C₁::SymmetricMatrix, - C₂::AbstractMatrix, N::Int, n::Int) - @assert eltype(A₁) == eltype(A₂) == eltype(A₃) == eltype(B₁) == eltype(B₂) == eltype(C₁) == eltype(C₂) - @assert size(A₁)[1] == size(A₁)[2] == size(A₂)[2] == size(A₃)[2] == B₁.n == size(B₂)[2] == C₁.n == size(C₂)[2] == n - @assert size(A₂)[1] == size(A₃)[1] == size(B₂)[1] == size(C₂)[1] == (N-n) - new{eltype(A₁), typeof(A₁), typeof(B₁)}(A₁, A₂, A₃, B₁, B₂, C₁, C₂, N, n) - end - function SymplecticLieAlgHorMatrix(S::SymplecticLieAlgMatrix, n::Int) - N = S.n - new{eltype(S.A), typeof(S.A), typeof(S.B)}( - S.A[1:n, 1:n], - S.A[1:n, (n+1):N]', - S.A[(n+1):N, 1:n], - #this could be made more efficient by accessing the vector that parametrizes SymmetricMatrix!! - SymmetricMatrix(S.B[1:n, 1:n]), - S.B[(n+1):N, 1:n], - SymmetricMatrix(S.C[1:n, 1:n]), - S.C[(n+1):N, 1:n], - N, - n - ) - end -end - - -function A_index(A₁::AbstractMatrix, A₂::AbstractMatrix, A₃::AbstractMatrix, n, i, j) - if i ≤ n - if j ≤ n - return A₁[i,j] - end - return A₂[j-n, i] - end - if j ≤ n - return A₃[i - n, j] - end - return 0. -end - -function BC_index(B₁::SymmetricMatrix, B₂::AbstractMatrix, n, i, j) - if i ≤ n - if j ≤ n - return B₁[i,j] - end - return B₂[j-n, i] - end - if j ≤ n - return B₂[i - n, j] - end - return 0. -end - -function Base.getindex(S::SymplecticLieAlgHorMatrix, i, j) - N = S.N - n = S.n - - if i ≤ N && j ≤ N - return A_index(S.A₁, S.A₂, S.A₃, n, i, j) - end - if i ≤ N - return BC_index(S.B₁, S.B₂, n, i, j-N) - end - if j ≤ N - return BC_index(S.C₁, S.C₂, n, i-N, j) - end - return -A_index(S.A₁, S.A₂, S.A₃, n, j-N, i-N) -end - - -Base.parent(S::SymplecticLieAlgHorMatrix) = (A₁=S.A₁, A₂=S.A₂, A₃=S.A₃, B₁=S.B₁, B₂=S.B₂, C₁=S.C₁, C₂=S.C₂) -Base.size(S::SymplecticLieAlgHorMatrix) = (2*S.N,2*S.N) - -function Base.:+(S₁::SymplecticLieAlgHorMatrix, S₂::SymplecticLieAlgHorMatrix) - @assert S₁.n == S₂.n - @assert S₁.N == S₂.N - SymplecticLieAlgHorMatrix( - S₁.A₁ + S₂.A₁, - S₁.A₂ + S₂.A₂, - S₁.A₃ + S₂.A₃, - S₁.B₁ + S₂.B₁, - S₁.B₂ + S₂.B₂, - S₁.C₁ + S₂.C₁, - S₁.C₂ + S₂.C₂, - S₁.N, - S₁.n - ) -end - -function Base.:-(S₁::SymplecticLieAlgHorMatrix, S₂::SymplecticLieAlgHorMatrix) - @assert S₁.n == S₂.n - @assert S₁.N == S₂.N - SymplecticLieAlgHorMatrix( - S₁.A₁ - S₂.A₁, - S₁.A₂ - S₂.A₂, - S₁.A₃ - S₂.A₃, - S₁.B₁ - S₂.B₁, - S₁.B₂ - S₂.B₂, - S₁.C₁ - S₂.C₁, - S₁.C₂ - S₂.C₂, - S₁.N, - S₁.n - ) -end - -function add!(C::SymplecticLieAlgHorMatrix, A::SymplecticLieAlgHorMatrix, B::SymplecticLieAlgHorMatrix) - @assert A.N == B.N == C.N - @assert A.n == B.n == C.n - add!(C.A₁, A.A₁, B.A₁), - add!(C.A₂, A.A₂, B.A₂), - add!(C.A₃, A.A₃, B.A₃), - add!(C.B₁, A.B₁, B.B₁), - add!(C.B₂, A.B₂, B.B₂), - add!(C.C₁, A.C₁, B.C₁), - add!(C.C₂, A.C₂, B.C₂) -end - -function Base.:-(S::SymplecticLieAlgHorMatrix) - SymplecticLieAlgHorMatrix( - -S.A₁, - -S.A₂, - -S.A₃, - -S.B₁, - -S.B₂, - -S.C₁, - -S.C₂, - S.N, - S.n - ) -end - -function Base.:*(S::SymplecticLieAlgHorMatrix, α::Real) - SymplecticLieAlgHorMatrix( - α*S.A₁, - α*S.A₂, - α*S.A₃, - α*S.B₁, - α*S.B₂, - α*S.C₁, - α*S.C₂, - S.N, - S.n - ) -end - -Base.:*(α::Real, A::SymplecticLieAlgHorMatrix) = A*α - -function Base.zeros(::Type{SymplecticLieAlgHorMatrix{T}}, N::Integer, n::Integer) where T - SymplecticLieAlgHorMatrix( - zeros(T, n, n), - zeros(T, N-n, n), - zeros(T, N-n, n), - zeros(SymmetricMatrix{T}, n), - zeros(T, N-n, n), - zeros(SymmetricMatrix{T},n), - zeros(T, N-n, n), - N, - n - ) -end - -function Base.zeros(::Type{SymplecticLieAlgHorMatrix}, N::Integer, n::Integer) - SymplecticLieAlgHorMatrix( - zeros(n, n), - zeros(N-n, n), - zeros(N-n, n), - zeros(SymmetricMatrix, n), - zeros(N-n, n), - zeros(SymmetricMatrix, n), - zeros(N-n, n), - N, - n - ) -end - - -function Base.rand(::Type{SymplecticLieAlgHorMatrix{T}}, N::Integer, n::Integer) where T - SymplecticLieAlgHorMatrix( - rand(T, n, n), - rand(T, N-n, n), - rand(T, N-n, n), - rand(SymmetricMatrix{T}, n), - rand(T, N-n, n), - rand(SymmetricMatrix{T},n), - rand(T, N-n, n), - N, - n - ) -end - -function Base.rand(::Type{SymplecticLieAlgHorMatrix}, N::Integer, n::Integer) - SymplecticLieAlgHorMatrix( - rand(n, n), - rand(N-n, n), - rand(N-n, n), - rand(SymmetricMatrix, n), - rand(N-n, n), - rand(SymmetricMatrix, n), - rand(N-n, n), - N, - n - ) -end - -function scalar_add(S::SymplecticLieAlgHorMatrix, δ::Real) - SymplecticLieAlgHorMatrix( - S.A₁ .+ δ, - S.A₂ .+ δ, - S.A₃ .+ δ, - scalar_add(S.B₁, δ), - S.B₂ .+ δ, - scalar_add(S.C₁, δ), - S.C₂ .+ δ, - S.N, - S.n - ) -end - - -#function Base.:./(A::SymplecticLieAlgMatrix,B::SymplecticLieAlgMatrix) -function /ᵉˡᵉ(S₁::SymplecticLieAlgHorMatrix,S₂::SymplecticLieAlgHorMatrix) - @assert S₁.n == S₂.n - @assert S₁.N == S₂.N - SymplecticLieAlgHorMatrix( - S₁.A₁./S₂.A₁, - S₁.A₂./S₂.A₂, - S₁.A₃/S₂.A₃, - /ᵉˡᵉ(S₁.B₁, S₂.B₁), - S₁.B₂./S₂.B₂, - /ᵉˡᵉ(S₁.C₁, S₂.C₁), - S₁.C₂/S₂.C₂, - S₁.N, - S₁.n - ) -end - -function ⊙²(S::SymplecticLieAlgHorMatrix) - SymplecticLieHorAlgMatrix( - S.A₁.^2, - S.A₂.^2, - S.A₃.^2, - ⊙²(S.B₁), - S.B₂.^2, - ⊙²(S.C₁), - S.C₂.^2, - S.N, - S.n - ) -end - -#= -function RACᵉˡᵉ(S::SymplecticLieAlgHorMatrix) - SymplecticLieAlgMatrix( - sqrt.(S.A₁), - sqrt.(S.A₂), - sqrt.(S.A₃), - RACᵉˡᵉ(S.B₁), - sqrt.(S.B₂), - RACᵉˡᵉ(S.B₁), - sqrt.(S.C₂), - S.N, - S.n - ) -end -=# \ No newline at end of file From 1f1e30d8ee16b91cc9d073ef907956a91c72042c Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 13:10:14 +0100 Subject: [PATCH 05/96] Factored out sampling of arrays. --- .../addition_tests_for_custom_arrays.jl | 12 +++++++++ .../random_generation_of_custom_arrays.jl | 27 +++++++++++++++++++ test/runtests.jl | 2 +- 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 test/arrays/addition_tests_for_custom_arrays.jl create mode 100644 test/arrays/random_generation_of_custom_arrays.jl diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl new file mode 100644 index 000000000..9413c5884 --- /dev/null +++ b/test/arrays/addition_tests_for_custom_arrays.jl @@ -0,0 +1,12 @@ +#= +This tests addition for all custom arrays. Note that these tests will also have to be performed on GPU! +=# + +using LinearAlgebra +using Random +using Test +using GeometricMachineLearning + +function test_addition_for_symmetric_matrix(n::Int, T::Type) + A = rand(SymmetricMatrix{T}, n) +end \ No newline at end of file diff --git a/test/arrays/random_generation_of_custom_arrays.jl b/test/arrays/random_generation_of_custom_arrays.jl new file mode 100644 index 000000000..d764ded9f --- /dev/null +++ b/test/arrays/random_generation_of_custom_arrays.jl @@ -0,0 +1,27 @@ +#= +This tests random generation of custom arrays. This will have to be expanded to GPU tests. +=# +using LinearAlgebra +using Random +using Test +using GeometricMachineLearning + +function test_random_array_generation(n::Int, N::Int, T::Type) + A_sym = rand(SymmetricMatrix{T}, n) + @test typeof(A_sym) <: SymmetricMatrix{T} + @test eltype(A_sym) == T + + A_skew = rand(SkewSymMatrix{T}, n) + @test typeof(A_skew) <: SkewSymMatrix{T} + @test eltype(A_skew) == T + + A_stiefel_hor = rand(StiefelLieAlgHorMatrix{T}, N, n) + @test typeof(A_stiefel_hor) <: StiefelLieAlgHorMatrix{T} + @test eltype(A_stiefel_hor) == T + + A_grassmann_hor = rand(GrassmannLieAlgHorMatrix{T}, N, n) + @test typeof(A_grassmann_hor) <: GrassmannLieAlgHorMatrix{T} + @test eltype(A_grassmann_hor) == T +end + +test_random_array_generation(5, 10, Float32) \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index af35e6063..e94ad1242 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,7 +3,7 @@ using SafeTestsets @safetestset "Check parameterlength " begin include("parameterlength/check_parameterlengths.jl") end @safetestset "Arrays #1 " begin include("arrays/array_tests.jl") end -@safetestset "Arrays #2 " begin include("arrays/array_tests_old.jl") end +@safetestset "Sampling of arrays " begin include("arrays/random_generation_of_custom_arrays.jl") end @safetestset "Manifolds (Grassmann): " begin include("manifolds/grassmann_manifold.jl") end @safetestset "Gradient Layer " begin include("layers/gradient_layer_tests.jl") end @safetestset "Test symplecticity of upscaling layer " begin include("layers/sympnet_layers_test.jl") end From 51c7c76db88b1c270a6fa19f52574dc672733ade Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 13:11:03 +0100 Subject: [PATCH 06/96] Got rid of some some symplectic arrays and included the supertype for horizontal components as an extra file. --- src/GeometricMachineLearning.jl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl index b357d839a..b7b9e9f92 100644 --- a/src/GeometricMachineLearning.jl +++ b/src/GeometricMachineLearning.jl @@ -82,13 +82,10 @@ module GeometricMachineLearning export convert_to_dev, Device, CPUDevice # INCLUDE ARRAYS - include("arrays/block_identity_lower.jl") - include("arrays/block_identity_upper.jl") include("arrays/symmetric.jl") include("arrays/symplectic.jl") - include("arrays/symplectic_lie_algebra.jl") - include("arrays/symplectic_lie_algebra_horizontal.jl") include("arrays/skew_symmetric.jl") + include("arrays/abstract_lie_algebra_horizontal.jl") include("arrays/stiefel_lie_algebra_horizontal.jl") include("arrays/grassmann_lie_algebra_horizontal.jl") From 3876be561a273d76ca4c11b78a6425908ed92c13 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 13:12:32 +0100 Subject: [PATCH 07/96] Updated docs. --- .../grassmann_lie_algebra_horizontal.jl | 15 +------ src/arrays/stiefel_lie_algebra_horizontal.jl | 41 ++++++++++++------- 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/src/arrays/grassmann_lie_algebra_horizontal.jl b/src/arrays/grassmann_lie_algebra_horizontal.jl index aa164729b..bf883f157 100644 --- a/src/arrays/grassmann_lie_algebra_horizontal.jl +++ b/src/arrays/grassmann_lie_algebra_horizontal.jl @@ -1,19 +1,6 @@ """ -This implements the horizontal component of the Lie algebra (in this case just the skew-symmetric matrices). -The projection is: -S -> SE where -|I| -|0| = E. - -An element of GrassmannLieAlgMatrix takes the form: -| -0 -B'| -| B 0 | where B is arbitrary. - -This also implements the projection: -| 0 -B'| | 0 -B'| -| B 0 | -> | B 0 |. +This implements the horizontal component of a Lie algebra that is isomorphic to the Grassmann manifold. """ - mutable struct GrassmannLieAlgHorMatrix{T, ST <: AbstractMatrix{T}} <: AbstractLieAlgHorMatrix{T} B::ST N::Int diff --git a/src/arrays/stiefel_lie_algebra_horizontal.jl b/src/arrays/stiefel_lie_algebra_horizontal.jl index 71558e485..7af26c360 100644 --- a/src/arrays/stiefel_lie_algebra_horizontal.jl +++ b/src/arrays/stiefel_lie_algebra_horizontal.jl @@ -1,21 +1,32 @@ -""" -This implements the horizontal component of the Lie algebra (in this case just the skew-symmetric matrices). -The projection is: -S -> SE where -|I| -|0| = E. +@doc raw""" +`StiefelLieAlgHorMatrix` is the *horizontal component of the Lie algebra of skew-symmetric matrices* (with respect to the canonical metric). +The projection here is: \(\pi:S \to SE \) where +```math +E = \begin{pmatrix} \mathbb{I}_{n} \\ \mathbb{O}_{(N-n)\times{}n} \end{pmatrix}. +``` +The matrix \(E\) is implemented under `StiefelProjection` in `GeometricMachineLearning`. An element of StiefelLieAlgMatrix takes the form: -| A -B'| -| B 0 | where A is skew-symmetric. - -This also implements the projection: -| A -B'| | A -B'| -| B D | -> | B 0 |. +```math +\begin{pmatrix} +A & B^T \\ B & \mathbb{O} +\end{pmatrix}, +``` +where \(A\) is skew-symmetric (this is `SkewSymMatrix` in `GeometricMachineLearning`). + +If the constructor is called with a big \(N\times{}N\) matrix, then the projection is performed the following way: +```math +\begin{pmatrix} +A & B_1 \\ +B_2 & D +\end{pmatrix} \mapsto +\begin{pmatrix} +\mathrm{skew}(A) & -B_2^T \\ +B_2 & \mathbb{O} +\end{pmatrix}. +``` +The operation $\mathrm{skew}:\mathbb{R}^{n\times{}n}\to\mathcal{S}_\mahtrm{skew}(n)$ is the skew-symmetrization operation. This is equivalent to calling the constructor of `SkewSymMatrix` with an \(n\times{}n\) matrix. """ - -abstract type AbstractLieAlgHorMatrix{T} <: AbstractMatrix{T} end - mutable struct StiefelLieAlgHorMatrix{T, AT <: SkewSymMatrix{T}, ST <: AbstractMatrix{T}} <: AbstractLieAlgHorMatrix{T} A::AT B::ST From 48300b2358e38c2a9d373e57c0ac7cff9bd89810 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 13:12:53 +0100 Subject: [PATCH 08/96] New supertype for the horizontal components. --- src/arrays/abstract_lie_algebra_horizontal.jl | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 src/arrays/abstract_lie_algebra_horizontal.jl diff --git a/src/arrays/abstract_lie_algebra_horizontal.jl b/src/arrays/abstract_lie_algebra_horizontal.jl new file mode 100644 index 000000000..58a7630df --- /dev/null +++ b/src/arrays/abstract_lie_algebra_horizontal.jl @@ -0,0 +1,4 @@ +@doc raw""" +`AbstractLieAlgHorMatrix` is a supertype for various horizontal components of Lie algebras. We usually call this \(\mathfrak{g}^\mathrm{hor}\). +""" +abstract type AbstractLieAlgHorMatrix{T} <: AbstractMatrix{T} end From 0dfbc63723c808df7665c6fd081501cb9db1e173 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 13:25:49 +0100 Subject: [PATCH 09/96] Added new routine for multiplying symmetric matrix from the right onto a matrix. --- src/arrays/symmetric.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl index f7db2c6c6..52cdd4f7f 100644 --- a/src/arrays/symmetric.jl +++ b/src/arrays/symmetric.jl @@ -209,6 +209,8 @@ function Base.:*(A::SymmetricMatrix{T}, B::AbstractMatrix{T}) where T C end +Base.:*(B::AbstractMatrix{T}, A::SymmetricMatrix{T}) where T = (A * B')' + function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T backend = KernelAbstractions.get_backend(A.S) c = KernelAbstractions.allocate(backend, T, A.n) From 21bb9312b674b34a73eb4824e0f69a3e4b0d0d1d Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 13:50:49 +0100 Subject: [PATCH 10/96] Test addition of various custom arrays. --- .../addition_tests_for_custom_arrays.jl | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 test/arrays/addition_tests_for_custom_arrays.jl diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl new file mode 100644 index 000000000..1e3a2e701 --- /dev/null +++ b/test/arrays/addition_tests_for_custom_arrays.jl @@ -0,0 +1,35 @@ +using GeometricMachineLearning, Test + +@doc raw""" +This function tests addition for various custom arrays, i.e. if \(A + B\) is performed in the correct way. +""" + +function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type) + A = rand(T, n, n) + B = rand(T, n, n) + + # SymmetricMatrix + AB_sym = SymmetricMatrix(A + B) + AB_sym2 = SymmetricMatrix(A) + SymmetricMatrix(B) + @test AB_sym ≈ AB_sym2 + @test typeof(AB_sym) <: SymmetricMatrix{T} + @test typeof(AB_sym2) <: SymmetricMatrix{T} + + # SkewSymMatrix + AB_skew = SkewSymMatrix(A + B) + AB_skew2 = SkewSymMatrix(A) + SkewSymMatrix(B) + @test A_skew ≈ A_skew2 + @test typeof(AB_skew) <: SkewSymMatrix{T} + @test typeof(AB_skew2) <: SkewSymMatrix{T} + + C = rand(T, N, N) + D = rand(T, N, N) + + # StiefelLieAlgHorMatrix + CD_slahm = StiefelLieAlgHorMatrix(A + B, n) + CD_slahm2 = StiefelLieAlgHorMatrix(A, n) + StiefelLieAlgHorMatrix(B, n) + @test CD_slahm ≈ CD_slahm2 + @test typeof(CD_slahm) <: StiefelLieAlgHorMatrix{T} + @test typeof(CD_slahm2) <: StiefelLieAlgHorMatrix{T} + +end \ No newline at end of file From 969a6cb05e43c64c9f68b51e69dbc13c7ea9e787 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 13:51:40 +0100 Subject: [PATCH 11/96] Removed tests for various symplectic arrays that are no longer part of GML. --- test/arrays/array_tests.jl | 59 -------------------------------------- 1 file changed, 59 deletions(-) diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl index fd9269046..c0da5eba9 100644 --- a/test/arrays/array_tests.jl +++ b/test/arrays/array_tests.jl @@ -1,14 +1,7 @@ using LinearAlgebra using Random using Test - -using BandedMatrices using GeometricMachineLearning -""" -𝔤ʰ is the horizontal part of 𝔤, i.e. A ∈ 𝔤ʰ ⟺ AEE⁺ = A. - -TODO: Add routine & test for symplectic conjugate -""" #check if symmetric matrix works for 1×1 matrices W = rand(1,1) @@ -61,22 +54,6 @@ function skew_mat_mul_test2(n, T=Float64) @test isapprox(AS1, AS2) end -# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction -function sympl_lie_alg_add_sub_test(n) - J = SymplecticPotential(n) - symplectisize(W) = .5*(W - J'*W'*J) - W₁ = rand(2*n,2*n) - S₁ = SymplecticLieAlgMatrix(W₁) - W₂ = rand(2*n,2*n) - S₂ = SymplecticLieAlgMatrix(W₂) - S₃ = S₁ + S₂ - S₄ = S₁ - S₂ - @test typeof(S₃) <: SymplecticLieAlgMatrix - @test typeof(S₄) <: SymplecticLieAlgMatrix - @test all(abs.(symplectisize(W₁ + W₂) .- S₃) .< 1e-10) - @test all(abs.(symplectisize(W₁ - W₂) .- S₄) .< 1e-10) -end - # test Stiefel manifold projection test function stiefel_proj_test(N,n) In = I(n) @@ -84,15 +61,6 @@ function stiefel_proj_test(N,n) @test all(abs.((E'*E) .- In) .< 1e-10) end -# test symplectic projection (this is just the E matrix) -function sympl_proj_test(N, n) - JN = SymplecticPotential(N) - Jn = SymplecticPotential(n) - E = SymplecticProjection(N, n, Float64) - @test all(abs.((E'*JN*E) .- Jn) .< 1e-10) -end - - function stiefel_lie_alg_add_sub_test(N, n) E = StiefelProjection(N, n) projection(W::SkewSymMatrix) = W - (I - E*E')*W*(I - E*E') @@ -108,31 +76,8 @@ function stiefel_lie_alg_add_sub_test(N, n) @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) end -# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction -function sympl_lie_alg_add_sub_test(N, n) - J = SymplecticPotential(n) - E = SymplecticProjection(N, n) - projection(W::SymplecticLieAlgMatrix) = W - (I - E*E')*W*(I - E*E') - W₁ = SymplecticLieAlgMatrix(rand(2*N,2*N)) - S₁ = SymplecticLieAlgHorMatrix(W₁,n) - W₂ = SymplecticLieAlgMatrix(rand(2*N,2*N)) - S₂ = SymplecticLieAlgHorMatrix(W₂,n) - S₃ = S₁ + S₂ - S₄ = S₁ - S₂ - @test typeof(S₃) <: SymplecticLieAlgHorMatrix - @test typeof(S₄) <: SymplecticLieAlgHorMatrix - @test all(abs.(projection(W₁ + W₂) .- S₃) .< 1e-10) - @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) -end - -function stiefel_lie_alg_vectorization_test(N, n; T=Float32) - A = rand(StiefelLieAlgHorMatrix{T}, N, n) - @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A) -end - # TODO: tests for ADAM functions - # test everything for different n & N values Random.seed!(42) @@ -149,10 +94,6 @@ for (N, n) ∈ zip(N_vec, n_vec) skew_mat_add_sub_test(N) skew_mat_mul_test(N) skew_mat_mul_test2(N) - sympl_lie_alg_add_sub_test(N) stiefel_proj_test(N,n) - sympl_proj_test(N,n) stiefel_lie_alg_add_sub_test(N,n) - sympl_lie_alg_add_sub_test(N,n) - stiefel_lie_alg_vectorization_test(N, n) end From e0b81cf69917972e8db76c7963508514603cb6d6 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 14:20:24 +0100 Subject: [PATCH 12/96] Added routine for Base.one. This is copied from SkewSymMatrix. --- src/GeometricMachineLearning.jl | 2 +- src/arrays/symmetric.jl | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl index b357d839a..fbb84488a 100644 --- a/src/GeometricMachineLearning.jl +++ b/src/GeometricMachineLearning.jl @@ -84,13 +84,13 @@ module GeometricMachineLearning # INCLUDE ARRAYS include("arrays/block_identity_lower.jl") include("arrays/block_identity_upper.jl") - include("arrays/symmetric.jl") include("arrays/symplectic.jl") include("arrays/symplectic_lie_algebra.jl") include("arrays/symplectic_lie_algebra_horizontal.jl") include("arrays/skew_symmetric.jl") include("arrays/stiefel_lie_algebra_horizontal.jl") include("arrays/grassmann_lie_algebra_horizontal.jl") + include("arrays/symmetric.jl") export SymmetricMatrix, SymplecticPotential, SkewSymMatrix export StiefelLieAlgHorMatrix diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl index 52cdd4f7f..6a6f3ac91 100644 --- a/src/arrays/symmetric.jl +++ b/src/arrays/symmetric.jl @@ -211,6 +211,10 @@ end Base.:*(B::AbstractMatrix{T}, A::SymmetricMatrix{T}) where T = (A * B')' +function Base.:*(A::SymmetricMatrix{T}, B::SymmetricMatrix{T}) where T + A * (B * one(B)) +end + function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T backend = KernelAbstractions.get_backend(A.S) c = KernelAbstractions.allocate(backend, T, A.n) @@ -218,6 +222,14 @@ function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T c end +function Base.one(A::SymmetricMatrix{T}) where T + backend = KernelAbstractions.get_backend(A.S) + unit_matrix = KernelAbstractions.zeros(backend, T, A.n, A.n) + write_ones! = write_ones_kernel!(backend) + write_ones!(unit_matrix, ndrange=A.n) + unit_matrix +end + # define routines for generalizing ChainRulesCore to SymmetricMatrix ChainRulesCore.ProjectTo(A::SymmetricMatrix) = ProjectTo{SymmetricMatrix}(; symmetric=ProjectTo(A.S)) (project::ProjectTo{SymmetricMatrix})(dA::AbstractMatrix) = SymmetricMatrix(project.symmetric(map_to_S(dA)), size(dA, 2)) From 16b28baf332e114df8b4c4884fdd99a1806815a6 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 14:20:49 +0100 Subject: [PATCH 13/96] Slightly improved readability. --- src/arrays/skew_symmetric.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arrays/skew_symmetric.jl b/src/arrays/skew_symmetric.jl index e1ab85fe2..3c14b9e45 100644 --- a/src/arrays/skew_symmetric.jl +++ b/src/arrays/skew_symmetric.jl @@ -187,7 +187,7 @@ end # the first matrix is multiplied onto A2 in order for it to not be SkewSymMatrix! function Base.:*(A1::SkewSymMatrix{T}, A2::SkewSymMatrix{T}) where T - A1*(one(A2)*A2) + A1 * (one(A2) * A2) end @doc raw""" From df07bfb1aade5e452594bb0d9ee7963c0b86fb8b Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 14:21:43 +0100 Subject: [PATCH 14/96] Added addition tests for the remaining custom arrays. --- test/arrays/addition_tests_for_custom_arrays.jl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl index 1e3a2e701..669ccc0ef 100644 --- a/test/arrays/addition_tests_for_custom_arrays.jl +++ b/test/arrays/addition_tests_for_custom_arrays.jl @@ -3,7 +3,6 @@ using GeometricMachineLearning, Test @doc raw""" This function tests addition for various custom arrays, i.e. if \(A + B\) is performed in the correct way. """ - function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type) A = rand(T, n, n) B = rand(T, n, n) @@ -26,10 +25,15 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type) D = rand(T, N, N) # StiefelLieAlgHorMatrix - CD_slahm = StiefelLieAlgHorMatrix(A + B, n) - CD_slahm2 = StiefelLieAlgHorMatrix(A, n) + StiefelLieAlgHorMatrix(B, n) + CD_slahm = StiefelLieAlgHorMatrix(C + D, n) + CD_slahm2 = StiefelLieAlgHorMatrix(C, n) + StiefelLieAlgHorMatrix(D, n) @test CD_slahm ≈ CD_slahm2 @test typeof(CD_slahm) <: StiefelLieAlgHorMatrix{T} @test typeof(CD_slahm2) <: StiefelLieAlgHorMatrix{T} + CD_glahm = GrassmannLieAlgHorMatrix(C + D, n) + CD_glahm2 = GrassmannLieAlgHorMatrix(C, n) + GrassmannLieAlgHorMatrix(D, n) + @test CD_glahm ≈ CD_glahm2 + @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T} + @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T} end \ No newline at end of file From e81ff2e9a16d4cdbc049febd42931321770bde55 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 14:52:14 +0100 Subject: [PATCH 15/96] Added tests for addition, scalar multiplication and matrix multiplication for custom arrays. --- .../addition_tests_for_custom_arrays.jl | 4 +- ...matrix_multiplication_for_custom_arrays.jl | 21 ++++++++++ ...scalar_multiplication_for_custom_arrays.jl | 40 +++++++++++++++++++ test/runtests.jl | 4 +- 4 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 test/arrays/matrix_multiplication_for_custom_arrays.jl create mode 100644 test/arrays/scalar_multiplication_for_custom_arrays.jl diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl index 669ccc0ef..5fd2bbcfc 100644 --- a/test/arrays/addition_tests_for_custom_arrays.jl +++ b/test/arrays/addition_tests_for_custom_arrays.jl @@ -36,4 +36,6 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type) @test CD_glahm ≈ CD_glahm2 @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T} @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T} -end \ No newline at end of file +end + +addition_tests_for_custom_arrays(5, 10, Float32) \ No newline at end of file diff --git a/test/arrays/matrix_multiplication_for_custom_arrays.jl b/test/arrays/matrix_multiplication_for_custom_arrays.jl new file mode 100644 index 000000000..db051ec3e --- /dev/null +++ b/test/arrays/matrix_multiplication_for_custom_arrays.jl @@ -0,0 +1,21 @@ +using GeometricMachineLearning, Test + +@doc raw""" +This function tests matrix multiplication for various custom arrays, i.e. if \((A,\alpha) \mapsto \alpha{}A\) is performed in the correct way. +""" +function matrix_multiplication_tests_for_custom_arrays(n::Int, N::Int, T::Type) + A = rand(T, n, n) + B = rand(T, n, N) + + # SymmetricMatrix + A_sym = SymmetricMatrix(A) + @test A_sym * B ≈ Matrix{T}(A_sym) * B + @test B' * A_sym ≈ B' * Matrix{T}(A_sym) + + # SkewSymMatrix + A_skew = SkewSymMatrix(A) + @test A_skew * B ≈ Matrix{T}(A_skew) * B + @test B' * A_skew ≈ B' * Matrix{T}(A_skew) +end + +matrix_multiplication_tests_for_custom_arrays(5, 10, Float32) \ No newline at end of file diff --git a/test/arrays/scalar_multiplication_for_custom_arrays.jl b/test/arrays/scalar_multiplication_for_custom_arrays.jl new file mode 100644 index 000000000..c6214a3e1 --- /dev/null +++ b/test/arrays/scalar_multiplication_for_custom_arrays.jl @@ -0,0 +1,40 @@ +using GeometricMachineLearning, Test + +@doc raw""" +This function tests scalar multiplication for various custom arrays, i.e. if \((A,\alpha) \mapsto \alpha{}A\) is performed in the correct way. +""" +function scalar_multiplication_for_custom_arrays(n::Int, N::Int, T::Type) + A = rand(T, n, n) + α = rand(T) + + # SymmetricMatrix + Aα_sym = SymmetricMatrix(α * A) + Aα_sym2 = α * SymmetricMatrix(A) + @test Aα_sym ≈ Aα_sym2 + @test typeof(Aα_sym) <: SymmetricMatrix{T} + @test typeof(Aα_sym2) <: SymmetricMatrix{T} + + # SkewSymMatrix + Aα_skew = SkewSymMatrix(α * A) + Aα_skew2 = α * SkewSymMatrix(A) + @test Aα_skew ≈ Aα_skew2 + @test typeof(Aα_skew) <: SkewSymMatrix{T} + @test typeof(Aα_skew2) <: SkewSymMatrix{T} + + C = rand(T, N, N) + + # StiefelLieAlgHorMatrix + Cα_slahm = StiefelLieAlgHorMatrix(α * C, n) + Cα_slahm2 = α * StiefelLieAlgHorMatrix(C, n) + @test Cα_slahm ≈ Cα_slahm2 + @test typeof(Cα_slahm) <: StiefelLieAlgHorMatrix{T} + @test typeof(Cα_slahm2) <: StiefelLieAlgHorMatrix{T} + + Cα_glahm = GrassmannLieAlgHorMatrix(α * C, n) + Cα_glahm2 = α * GrassmannLieAlgHorMatrix(C, n) + @test Cα_glahm ≈ Cα_glahm2 + @test tyepof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T} + @test typeof(Cα_glahm2) <: GrassmannLieAlgHorMatrix{T} +end + +scalar_multiplication_for_custom_arrays(5, 10, Float32) \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index af35e6063..dda63eb01 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,7 +3,9 @@ using SafeTestsets @safetestset "Check parameterlength " begin include("parameterlength/check_parameterlengths.jl") end @safetestset "Arrays #1 " begin include("arrays/array_tests.jl") end -@safetestset "Arrays #2 " begin include("arrays/array_tests_old.jl") end +@safetestset "Addition tests for custom arrays " begin include("arrays/addition_tests_for_custom_arrays.jl") end +@safetestset "Scalar multiplication tests for custom arrays " begin include("arrays/scalar_multiplication_for_custom_arrays.jl") end +@safetestset "Matrix multiplication tests for custom arrays " begin include("arrays/matrix_multiplication_for_custom_arrays.jl") end @safetestset "Manifolds (Grassmann): " begin include("manifolds/grassmann_manifold.jl") end @safetestset "Gradient Layer " begin include("layers/gradient_layer_tests.jl") end @safetestset "Test symplecticity of upscaling layer " begin include("layers/sympnet_layers_test.jl") end From a94c57c6a8350df4105faf1f838ea1252e4fe722 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 15:05:23 +0100 Subject: [PATCH 16/96] Changed order of how files are included. Symmetric now depends on SkewSymmetric. --- src/GeometricMachineLearning.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl index fbb84488a..d8dccaae2 100644 --- a/src/GeometricMachineLearning.jl +++ b/src/GeometricMachineLearning.jl @@ -82,15 +82,15 @@ module GeometricMachineLearning export convert_to_dev, Device, CPUDevice # INCLUDE ARRAYS + include("arrays/skew_symmetric.jl") + include("arrays/symmetric.jl") include("arrays/block_identity_lower.jl") include("arrays/block_identity_upper.jl") include("arrays/symplectic.jl") include("arrays/symplectic_lie_algebra.jl") include("arrays/symplectic_lie_algebra_horizontal.jl") - include("arrays/skew_symmetric.jl") include("arrays/stiefel_lie_algebra_horizontal.jl") include("arrays/grassmann_lie_algebra_horizontal.jl") - include("arrays/symmetric.jl") export SymmetricMatrix, SymplecticPotential, SkewSymMatrix export StiefelLieAlgHorMatrix From c6a350ed471420a4b4583c45978658d85b1f253e Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 15:05:50 +0100 Subject: [PATCH 17/96] Fixed typo. --- .../addition_tests_for_custom_arrays.jl | 4 +- test/arrays/array_tests.jl | 43 ------------------- ...scalar_multiplication_for_custom_arrays.jl | 2 +- 3 files changed, 3 insertions(+), 46 deletions(-) diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl index 5fd2bbcfc..44fc878c1 100644 --- a/test/arrays/addition_tests_for_custom_arrays.jl +++ b/test/arrays/addition_tests_for_custom_arrays.jl @@ -17,7 +17,7 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type) # SkewSymMatrix AB_skew = SkewSymMatrix(A + B) AB_skew2 = SkewSymMatrix(A) + SkewSymMatrix(B) - @test A_skew ≈ A_skew2 + @test AB_skew ≈ AB_skew2 @test typeof(AB_skew) <: SkewSymMatrix{T} @test typeof(AB_skew2) <: SkewSymMatrix{T} @@ -34,7 +34,7 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type) CD_glahm = GrassmannLieAlgHorMatrix(C + D, n) CD_glahm2 = GrassmannLieAlgHorMatrix(C, n) + GrassmannLieAlgHorMatrix(D, n) @test CD_glahm ≈ CD_glahm2 - @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T} + @test typeof(CD_glahm) <: GrassmannLieAlgHorMatrix{T} @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T} end diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl index fd9269046..0e52c5cd6 100644 --- a/test/arrays/array_tests.jl +++ b/test/arrays/array_tests.jl @@ -61,21 +61,6 @@ function skew_mat_mul_test2(n, T=Float64) @test isapprox(AS1, AS2) end -# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction -function sympl_lie_alg_add_sub_test(n) - J = SymplecticPotential(n) - symplectisize(W) = .5*(W - J'*W'*J) - W₁ = rand(2*n,2*n) - S₁ = SymplecticLieAlgMatrix(W₁) - W₂ = rand(2*n,2*n) - S₂ = SymplecticLieAlgMatrix(W₂) - S₃ = S₁ + S₂ - S₄ = S₁ - S₂ - @test typeof(S₃) <: SymplecticLieAlgMatrix - @test typeof(S₄) <: SymplecticLieAlgMatrix - @test all(abs.(symplectisize(W₁ + W₂) .- S₃) .< 1e-10) - @test all(abs.(symplectisize(W₁ - W₂) .- S₄) .< 1e-10) -end # test Stiefel manifold projection test function stiefel_proj_test(N,n) @@ -84,14 +69,6 @@ function stiefel_proj_test(N,n) @test all(abs.((E'*E) .- In) .< 1e-10) end -# test symplectic projection (this is just the E matrix) -function sympl_proj_test(N, n) - JN = SymplecticPotential(N) - Jn = SymplecticPotential(n) - E = SymplecticProjection(N, n, Float64) - @test all(abs.((E'*JN*E) .- Jn) .< 1e-10) -end - function stiefel_lie_alg_add_sub_test(N, n) E = StiefelProjection(N, n) @@ -108,23 +85,6 @@ function stiefel_lie_alg_add_sub_test(N, n) @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) end -# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction -function sympl_lie_alg_add_sub_test(N, n) - J = SymplecticPotential(n) - E = SymplecticProjection(N, n) - projection(W::SymplecticLieAlgMatrix) = W - (I - E*E')*W*(I - E*E') - W₁ = SymplecticLieAlgMatrix(rand(2*N,2*N)) - S₁ = SymplecticLieAlgHorMatrix(W₁,n) - W₂ = SymplecticLieAlgMatrix(rand(2*N,2*N)) - S₂ = SymplecticLieAlgHorMatrix(W₂,n) - S₃ = S₁ + S₂ - S₄ = S₁ - S₂ - @test typeof(S₃) <: SymplecticLieAlgHorMatrix - @test typeof(S₄) <: SymplecticLieAlgHorMatrix - @test all(abs.(projection(W₁ + W₂) .- S₃) .< 1e-10) - @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) -end - function stiefel_lie_alg_vectorization_test(N, n; T=Float32) A = rand(StiefelLieAlgHorMatrix{T}, N, n) @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A) @@ -149,10 +109,7 @@ for (N, n) ∈ zip(N_vec, n_vec) skew_mat_add_sub_test(N) skew_mat_mul_test(N) skew_mat_mul_test2(N) - sympl_lie_alg_add_sub_test(N) stiefel_proj_test(N,n) - sympl_proj_test(N,n) stiefel_lie_alg_add_sub_test(N,n) - sympl_lie_alg_add_sub_test(N,n) stiefel_lie_alg_vectorization_test(N, n) end diff --git a/test/arrays/scalar_multiplication_for_custom_arrays.jl b/test/arrays/scalar_multiplication_for_custom_arrays.jl index c6214a3e1..3a5e56205 100644 --- a/test/arrays/scalar_multiplication_for_custom_arrays.jl +++ b/test/arrays/scalar_multiplication_for_custom_arrays.jl @@ -33,7 +33,7 @@ function scalar_multiplication_for_custom_arrays(n::Int, N::Int, T::Type) Cα_glahm = GrassmannLieAlgHorMatrix(α * C, n) Cα_glahm2 = α * GrassmannLieAlgHorMatrix(C, n) @test Cα_glahm ≈ Cα_glahm2 - @test tyepof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T} + @test typeof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T} @test typeof(Cα_glahm2) <: GrassmannLieAlgHorMatrix{T} end From 3e01f05cddd0da718739d42349e8789b79ac36ec Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 16:36:39 +0100 Subject: [PATCH 18/96] Fixed docs and put type dependency for . --- src/data_loader/tensor_assign.jl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/data_loader/tensor_assign.jl b/src/data_loader/tensor_assign.jl index 3faad0a99..e2282db83 100644 --- a/src/data_loader/tensor_assign.jl +++ b/src/data_loader/tensor_assign.jl @@ -35,18 +35,22 @@ end end @doc raw""" -The function `assign_output_estimate` is closely related to the transformer. It takes the last prediction_window columns of the output and uses is for the final prediction. +The function `assign_output_estimate` is closely related to the transformer. It takes the last `prediction_window` columns of the output and uses them for the final prediction. i.e. ```math -\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ - \cdots & \cdots \\ - z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} \mapsto - \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ - \cdots & \cdots \\ - z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} +\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, +\begin{bmatrix} + z^{(1)}_1 & \cdots & z^{(T)}_1 \\ + \cdots & \cdots & \cdots \\ + z^{(1)}_n & \cdots & z^{(T})_n + \end{bmatrix} \mapsto + \begin{bmatrix} + z^{(T - \mathtt{pw})}_1 & \cdots & z^{(T)}_1 \\ + \cdots & \cdots & \cdots \\ + z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} ``` """ -function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window) where T +function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window::Int) where T sys_dim, seq_length, batch_size = size(full_output) backend = KernelAbstractions.get_backend(full_output) output_estimate = KernelAbstractions.allocate(backend, T, sys_dim, prediction_window, batch_size) From 3a1fa6a1f757338d1e6ec8f0d0844ea680bc3055 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 17:04:22 +0100 Subject: [PATCH 19/96] Fixed docs for rgrad. --- src/manifolds/stiefel_manifold.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/manifolds/stiefel_manifold.jl b/src/manifolds/stiefel_manifold.jl index ec1c3fb26..a98e22079 100644 --- a/src/manifolds/stiefel_manifold.jl +++ b/src/manifolds/stiefel_manifold.jl @@ -64,7 +64,7 @@ function Base.:*(Y::Adjoint{T, StiefelManifold{T, AT}}, B::AbstractMatrix) where end @doc raw""" -Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mahbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this). +Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mathbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this). The precise form of the mapping is: ```math \mathtt{rgrad}(Y, \nabla{}L) \mapsto \nabla{}L - Y(\nabla{}L)^TY From 3b8b5a486dd463db0ca2f12dc8f7be1979882b1f Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 17:12:59 +0100 Subject: [PATCH 20/96] Added additional constructor for the case when the type is the first input argument. --- src/arrays/stiefel_projection.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/arrays/stiefel_projection.jl b/src/arrays/stiefel_projection.jl index 82c04c2ac..236948a09 100644 --- a/src/arrays/stiefel_projection.jl +++ b/src/arrays/stiefel_projection.jl @@ -41,6 +41,8 @@ Outer constructor for `StiefelProjection`. This works with two integers as input """ StiefelProjection(N::Integer, n::Integer, T::Type=Float64) = StiefelProjection(CPU(), T, N, n) +StiefelProjection(T::Type, N::Integer, n::Integer) = StiefelProjection(N, n, T) + Base.size(E::StiefelProjection) = (E.N, E.n) Base.getindex(E::StiefelProjection, i, j) = getindex(E.A, i, j) Base.:+(E::StiefelProjection, A::AbstractMatrix) = E.A + A From b05c66c53f2c5c80d3a572bc2ad7d06467f93e5f Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 17:13:26 +0100 Subject: [PATCH 21/96] Removed the symplectic arrays. --- test/arrays/array_tests.jl | 45 -------------------------------------- test/runtests.jl | 2 +- 2 files changed, 1 insertion(+), 46 deletions(-) diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl index fd9269046..cbadf1aba 100644 --- a/test/arrays/array_tests.jl +++ b/test/arrays/array_tests.jl @@ -61,22 +61,6 @@ function skew_mat_mul_test2(n, T=Float64) @test isapprox(AS1, AS2) end -# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction -function sympl_lie_alg_add_sub_test(n) - J = SymplecticPotential(n) - symplectisize(W) = .5*(W - J'*W'*J) - W₁ = rand(2*n,2*n) - S₁ = SymplecticLieAlgMatrix(W₁) - W₂ = rand(2*n,2*n) - S₂ = SymplecticLieAlgMatrix(W₂) - S₃ = S₁ + S₂ - S₄ = S₁ - S₂ - @test typeof(S₃) <: SymplecticLieAlgMatrix - @test typeof(S₄) <: SymplecticLieAlgMatrix - @test all(abs.(symplectisize(W₁ + W₂) .- S₃) .< 1e-10) - @test all(abs.(symplectisize(W₁ - W₂) .- S₄) .< 1e-10) -end - # test Stiefel manifold projection test function stiefel_proj_test(N,n) In = I(n) @@ -84,15 +68,6 @@ function stiefel_proj_test(N,n) @test all(abs.((E'*E) .- In) .< 1e-10) end -# test symplectic projection (this is just the E matrix) -function sympl_proj_test(N, n) - JN = SymplecticPotential(N) - Jn = SymplecticPotential(n) - E = SymplecticProjection(N, n, Float64) - @test all(abs.((E'*JN*E) .- Jn) .< 1e-10) -end - - function stiefel_lie_alg_add_sub_test(N, n) E = StiefelProjection(N, n) projection(W::SkewSymMatrix) = W - (I - E*E')*W*(I - E*E') @@ -108,23 +83,6 @@ function stiefel_lie_alg_add_sub_test(N, n) @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) end -# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction -function sympl_lie_alg_add_sub_test(N, n) - J = SymplecticPotential(n) - E = SymplecticProjection(N, n) - projection(W::SymplecticLieAlgMatrix) = W - (I - E*E')*W*(I - E*E') - W₁ = SymplecticLieAlgMatrix(rand(2*N,2*N)) - S₁ = SymplecticLieAlgHorMatrix(W₁,n) - W₂ = SymplecticLieAlgMatrix(rand(2*N,2*N)) - S₂ = SymplecticLieAlgHorMatrix(W₂,n) - S₃ = S₁ + S₂ - S₄ = S₁ - S₂ - @test typeof(S₃) <: SymplecticLieAlgHorMatrix - @test typeof(S₄) <: SymplecticLieAlgHorMatrix - @test all(abs.(projection(W₁ + W₂) .- S₃) .< 1e-10) - @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) -end - function stiefel_lie_alg_vectorization_test(N, n; T=Float32) A = rand(StiefelLieAlgHorMatrix{T}, N, n) @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A) @@ -149,10 +107,7 @@ for (N, n) ∈ zip(N_vec, n_vec) skew_mat_add_sub_test(N) skew_mat_mul_test(N) skew_mat_mul_test2(N) - sympl_lie_alg_add_sub_test(N) stiefel_proj_test(N,n) - sympl_proj_test(N,n) stiefel_lie_alg_add_sub_test(N,n) - sympl_lie_alg_add_sub_test(N,n) stiefel_lie_alg_vectorization_test(N, n) end diff --git a/test/runtests.jl b/test/runtests.jl index af35e6063..848328f7d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,7 +3,7 @@ using SafeTestsets @safetestset "Check parameterlength " begin include("parameterlength/check_parameterlengths.jl") end @safetestset "Arrays #1 " begin include("arrays/array_tests.jl") end -@safetestset "Arrays #2 " begin include("arrays/array_tests_old.jl") end +@safetestset "Test constructors for custom arrays " begin include("arrays/constructor_tests_for_custom_arrays.jl") end @safetestset "Manifolds (Grassmann): " begin include("manifolds/grassmann_manifold.jl") end @safetestset "Gradient Layer " begin include("layers/gradient_layer_tests.jl") end @safetestset "Test symplecticity of upscaling layer " begin include("layers/sympnet_layers_test.jl") end From 71f791bd1191e6324408c43fc991ae9e9800a555 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 17:14:04 +0100 Subject: [PATCH 22/96] Added tests for the various constructors for custom arrays. --- .../constructor_tests_for_custom_arrays.jl | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 test/arrays/constructor_tests_for_custom_arrays.jl diff --git a/test/arrays/constructor_tests_for_custom_arrays.jl b/test/arrays/constructor_tests_for_custom_arrays.jl new file mode 100644 index 000000000..452f850f8 --- /dev/null +++ b/test/arrays/constructor_tests_for_custom_arrays.jl @@ -0,0 +1,37 @@ +using GeometricMachineLearning, Test +using LinearAlgebra: I + +@doc raw""" +This tests various constructor for custom arrays, e.g. if calling `SymmetricMatrix` on a matrix ``A`` does +```math +A \mapsto \frac{1}{2}(A + A^T). +``` +""" +function test_constructors_for_custom_arrays(n::Int, N::Int, T::Type) + A = rand(T, n, n) + B = rand(T, N, N) + + # SymmetricMatrix + @test Matrix{T}(SymmetricMatrix(A)) ≈ T(.5) * (A + A') + + # SkewSymMatrix + @test Matrix{T}(SkewSymMatrix(A)) ≈ T(.5) * (A - A') + + # StiefelLieAlgHorMatrix + B_shor = StiefelLieAlgHorMatrix(SkewSymMatrix(B), n) + B_shor2 = Matrix{T}(SkewSymMatrix(B)) + B_shor2[(n+1):N, (n+1):N] .= zero(T) + @test Matrix{T}(B_shor) ≈ B_shor2 + + # GrassmannLieAlgHorMatrix + B_ghor = GrassmannLieAlgHorMatrix(SkewSymMatrix(B), n) + B_ghor2 = copy(B_shor2) + B_ghor2[1:n, 1:n] .= zero(T) + @test Matrix{T}(B_ghor) ≈ B_ghor2 + + # StiefelProjection + E = StiefelProjection(T, N, n) + @test Matrix{T}(E) ≈ vcat(I(n), zeros(T, (N-n), n)) +end + +test_constructors_for_custom_arrays(5, 10, Float32) \ No newline at end of file From 5d58358761c34ca57c28d40890153dd1c327de6a Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 17:22:18 +0100 Subject: [PATCH 23/96] Fixed docs for optimize_for_one_epoch --- src/data_loader/batch.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl index bbb9e4e39..7a0235fd4 100644 --- a/src/data_loader/batch.jl +++ b/src/data_loader/batch.jl @@ -58,7 +58,7 @@ With the optional argument: The output of `optimize_for_one_epoch!` is the average loss over all batches of the epoch: ```math -output = \frac{1}{mathtt{steps\_per\_epoch}}\sum_{t=1}^mathtt{steps\_per\_epoch}loss(\theta^{(t-1)}). +output = \frac{1}{\mathtt{steps\_per\_epoch}}\sum_{t=1}^\mathtt{steps\_per\_epoch}loss(\theta^{(t-1)}). ``` This is done because any **reverse differentiation** routine always has two outputs: a pullback and the value of the function it is differentiating. In the case of zygote: `loss_value, pullback = Zygote.pullback(ps -> loss(ps), ps)` (if the loss only depends on the parameters). """ From b43fb3989f45a407ac1bb864928adcb7cb689187 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Thu, 14 Dec 2023 14:07:41 +0100 Subject: [PATCH 24/96] Added descriptions for some functions in the docs. --- src/layers/multi_head_attention.jl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/layers/multi_head_attention.jl b/src/layers/multi_head_attention.jl index 05c3c53cc..e07820cb7 100644 --- a/src/layers/multi_head_attention.jl +++ b/src/layers/multi_head_attention.jl @@ -125,14 +125,23 @@ function (d::MultiHeadAttention{M, M, Stiefel, Retraction, false})(x::AbstractAr end import ChainRules +""" +This has to be extended to tensors; you should probably do a PR in ChainRules for this. +""" function ChainRules._adjoint_mat_pullback(y::AbstractArray{T, 3}, proj) where T (NoTangent(), proj(tensor_transpose(y))) end +""" +Extend `mat_tensor_mul` to a multiplication by the adjoint of an element of `StiefelManifold`. +""" function mat_tensor_mul(Y::AT, x::AbstractArray{T, 3}) where {T, BT <: AbstractArray{T}, ST <: StiefelManifold{T, BT}, AT <: Adjoint{T, ST}} mat_tensor_mul(Y.parent.A', x) end +""" +Extend `mat_tensor_mul` to a multiplication by an element of `StiefelManifold`. +""" function mat_tensor_mul(Y::StiefelManifold, x::AbstractArray{T, 3}) where T mat_tensor_mul(Y.A, x) end \ No newline at end of file From d032f3e137e51e0a5c635396a1aa872515f6e60e Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Thu, 14 Dec 2023 15:08:41 +0100 Subject: [PATCH 25/96] Added eltype to AbstractCache. --- src/optimizers/bfgs_cache.jl | 4 ++-- src/optimizers/optimizer_caches.jl | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/optimizers/bfgs_cache.jl b/src/optimizers/bfgs_cache.jl index 26f1c0b96..96561f8be 100644 --- a/src/optimizers/bfgs_cache.jl +++ b/src/optimizers/bfgs_cache.jl @@ -5,7 +5,7 @@ It stores an array for the previous time step `B` and the inverse of the Hessian It is important to note that setting up this cache already requires a derivative! This is not the case for the other optimizers. """ -struct BFGSCache{T, AT<:AbstractArray{T}} <: AbstractCache +struct BFGSCache{T, AT<:AbstractArray{T}} <: AbstractCache{T} B::AT S::AT H::AbstractMatrix{T} @@ -19,7 +19,7 @@ In order to initialize `BGGSCache` we first need gradient information. This is w NOTE: we may not need this. """ -struct BFGSDummyCache{T, AT<:AbstractArray{T}} <: AbstractCache +struct BFGSDummyCache{T, AT<:AbstractArray{T}} <: AbstractCache{T} function BFGSDummyCache(B::AbstractArray) new{eltype(B), typeof(zero(B))}() end diff --git a/src/optimizers/optimizer_caches.jl b/src/optimizers/optimizer_caches.jl index b8a21cb0a..189ef7ae6 100644 --- a/src/optimizers/optimizer_caches.jl +++ b/src/optimizers/optimizer_caches.jl @@ -7,12 +7,12 @@ AbstractCache has subtypes: All of them can be initialized with providing an array (also supporting manifold types). """ -abstract type AbstractCache end +abstract type AbstractCache{T} end ############################################################################# # All the definitions of the caches -struct AdamCache{T, AT <: AbstractArray{T}} <: AbstractCache +struct AdamCache{T, AT <: AbstractArray{T}} <: AbstractCache{T} B₁::AT B₂::AT function AdamCache(Y::AbstractArray) @@ -20,15 +20,15 @@ struct AdamCache{T, AT <: AbstractArray{T}} <: AbstractCache end end -struct MomentumCache{T, AT <: AbstractArray{T}} <:AbstractCache +struct MomentumCache{T, AT <: AbstractArray{T}} <:AbstractCache{T} B::AT function MomentumCache(Y::AbstractArray) new{eltype(Y), typeof(zero(Y))}(zero(Y)) end end -struct GradientCache <: AbstractCache end -GradientCache(::AbstractArray) = GradientCache() +struct GradientCache{T} <: AbstractCache{T} end +GradientCache(::AbstractArray{T}) where T = GradientCache{T}() ############################################################################# # All the setup_cache functions From 81ac240b1d09c47b68ef9771b33ea7c30ca9f130 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Thu, 14 Dec 2023 15:13:27 +0100 Subject: [PATCH 26/96] Removed comments that seem to have become unnecessary. --- src/optimizers/optimizer_caches.jl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/optimizers/optimizer_caches.jl b/src/optimizers/optimizer_caches.jl index 189ef7ae6..398eaa562 100644 --- a/src/optimizers/optimizer_caches.jl +++ b/src/optimizers/optimizer_caches.jl @@ -33,11 +33,6 @@ GradientCache(::AbstractArray{T}) where T = GradientCache{T}() ############################################################################# # All the setup_cache functions -# I don't really understand what we need these for ??? -# setup_adam_cache(B::AbstractArray) = reshape([setup_adam_cache(b) for b in B], size(B)) -# setup_momentum_cache(B::AbstractArray) = reshape([setup_momentum_cache(b) for b in B], size(B)) -# setup_gradient_cache(B::AbstractArray) = reshape([setup_gradient_cache(b) for b in B], size(B)) - setup_adam_cache(ps::NamedTuple) = apply_toNT(setup_adam_cache, ps) setup_momentum_cache(ps::NamedTuple) = apply_toNT(setup_momentum_cache, ps) setup_gradient_cache(ps::NamedTuple) = apply_toNT(setup_gradient_cache, ps) From 9002452df67b1c7d66a9465a9e5db7e1333125ab Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Thu, 14 Dec 2023 15:13:49 +0100 Subject: [PATCH 27/96] Removed symplectic part (again). --- test/arrays/array_tests.jl | 45 -------------------------------------- 1 file changed, 45 deletions(-) diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl index fd9269046..cbadf1aba 100644 --- a/test/arrays/array_tests.jl +++ b/test/arrays/array_tests.jl @@ -61,22 +61,6 @@ function skew_mat_mul_test2(n, T=Float64) @test isapprox(AS1, AS2) end -# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction -function sympl_lie_alg_add_sub_test(n) - J = SymplecticPotential(n) - symplectisize(W) = .5*(W - J'*W'*J) - W₁ = rand(2*n,2*n) - S₁ = SymplecticLieAlgMatrix(W₁) - W₂ = rand(2*n,2*n) - S₂ = SymplecticLieAlgMatrix(W₂) - S₃ = S₁ + S₂ - S₄ = S₁ - S₂ - @test typeof(S₃) <: SymplecticLieAlgMatrix - @test typeof(S₄) <: SymplecticLieAlgMatrix - @test all(abs.(symplectisize(W₁ + W₂) .- S₃) .< 1e-10) - @test all(abs.(symplectisize(W₁ - W₂) .- S₄) .< 1e-10) -end - # test Stiefel manifold projection test function stiefel_proj_test(N,n) In = I(n) @@ -84,15 +68,6 @@ function stiefel_proj_test(N,n) @test all(abs.((E'*E) .- In) .< 1e-10) end -# test symplectic projection (this is just the E matrix) -function sympl_proj_test(N, n) - JN = SymplecticPotential(N) - Jn = SymplecticPotential(n) - E = SymplecticProjection(N, n, Float64) - @test all(abs.((E'*JN*E) .- Jn) .< 1e-10) -end - - function stiefel_lie_alg_add_sub_test(N, n) E = StiefelProjection(N, n) projection(W::SkewSymMatrix) = W - (I - E*E')*W*(I - E*E') @@ -108,23 +83,6 @@ function stiefel_lie_alg_add_sub_test(N, n) @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) end -# check if matrix is ∈ 𝔤 (check if the vector space projection works), addition & subtraction -function sympl_lie_alg_add_sub_test(N, n) - J = SymplecticPotential(n) - E = SymplecticProjection(N, n) - projection(W::SymplecticLieAlgMatrix) = W - (I - E*E')*W*(I - E*E') - W₁ = SymplecticLieAlgMatrix(rand(2*N,2*N)) - S₁ = SymplecticLieAlgHorMatrix(W₁,n) - W₂ = SymplecticLieAlgMatrix(rand(2*N,2*N)) - S₂ = SymplecticLieAlgHorMatrix(W₂,n) - S₃ = S₁ + S₂ - S₄ = S₁ - S₂ - @test typeof(S₃) <: SymplecticLieAlgHorMatrix - @test typeof(S₄) <: SymplecticLieAlgHorMatrix - @test all(abs.(projection(W₁ + W₂) .- S₃) .< 1e-10) - @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) -end - function stiefel_lie_alg_vectorization_test(N, n; T=Float32) A = rand(StiefelLieAlgHorMatrix{T}, N, n) @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A) @@ -149,10 +107,7 @@ for (N, n) ∈ zip(N_vec, n_vec) skew_mat_add_sub_test(N) skew_mat_mul_test(N) skew_mat_mul_test2(N) - sympl_lie_alg_add_sub_test(N) stiefel_proj_test(N,n) - sympl_proj_test(N,n) stiefel_lie_alg_add_sub_test(N,n) - sympl_lie_alg_add_sub_test(N,n) stiefel_lie_alg_vectorization_test(N, n) end From 4c82474c49e6bc5a1c97bac4e5128af4968f17ba Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Thu, 14 Dec 2023 15:15:19 +0100 Subject: [PATCH 28/96] Refactored all the transformer tests and gave each of them a name. --- test/runtests.jl | 17 ++++--- ...ulti_head_attention_stiefel_optim_cache.jl | 51 ++++++++++++------- ...multi_head_attention_stiefel_retraction.jl | 51 +++++++++---------- .../multi_head_attention_stiefel_setup.jl | 33 ++++++++---- .../transformer_application.jl | 9 ++-- .../transformer_gradient.jl | 3 ++ .../transformer_optimizer.jl | 9 +++- test/transformer_related/transformer_setup.jl | 3 ++ 8 files changed, 109 insertions(+), 67 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index af35e6063..5c9a72ecd 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,7 +3,6 @@ using SafeTestsets @safetestset "Check parameterlength " begin include("parameterlength/check_parameterlengths.jl") end @safetestset "Arrays #1 " begin include("arrays/array_tests.jl") end -@safetestset "Arrays #2 " begin include("arrays/array_tests_old.jl") end @safetestset "Manifolds (Grassmann): " begin include("manifolds/grassmann_manifold.jl") end @safetestset "Gradient Layer " begin include("layers/gradient_layer_tests.jl") end @safetestset "Test symplecticity of upscaling layer " begin include("layers/sympnet_layers_test.jl") end @@ -11,13 +10,15 @@ using SafeTestsets @safetestset "Manifold Neural Network Layers " begin include("layers/manifold_layers.jl") end @safetestset "Custom AD rules for kernels " begin include("custom_ad_rules/kernel_pullbacks.jl") end @safetestset "ResNet " begin include("layers/resnet_tests.jl") end -@safetestset "Transformer Networks #1 " begin include("transformer_related/multi_head_attention_stiefel_optim_cache.jl") end -@safetestset "Transformer Networks #2 " begin include("transformer_related/multi_head_attention_stiefel_retraction.jl") end -@safetestset "Transformer Networks #3 " begin include("transformer_related/multi_head_attention_stiefel_setup.jl") end -@safetestset "Transformer Networks #4 " begin include("transformer_related/transformer_setup.jl") end -@safetestset "Transformer Networks #5 " begin include("transformer_related/transformer_application.jl") end -@safetestset "Transformer Networks #6 " begin include("transformer_related/transformer_gradient.jl") end -@safetestset "Transformer Networks #7 " begin include("transformer_related/transformer_optimizer.jl") end + +# transformer-related tests +@safetestset "Test setup of MultiHeadAttention layer Stiefel weights " begin include("transformer_related/multi_head_attention_stiefel_setup.jl") end +@safetestset "Test geodesic and Cayley retr for the MultiHeadAttention layer w/ St weights " begin include("transformer_related/multi_head_attention_stiefel_retraction.jl") end +@safetestset "Test the correct setup of the various optimizer caches for MultiHeadAttention " begin include("transformer_related/multi_head_attention_stiefel_optim_cache.jl") end +@safetestset "Check if the transformer can be applied to a tensor. " begin include("transformer_related/transformer_application.jl") end +@safetestset "Check if the gradient/pullback of MultiHeadAttention changes type in St case " begin include("transformer_related/transformer_gradient.jl") end +@safetestset "Check if the optimization_step! changes the parameters of the transformer " begin include("transformer_related/transformer_optimizer.jl") end + @safetestset "Attention layer #1 " begin include("attention_layer/attention_setup.jl") end @safetestset "(MultiHead)Attention " begin include("attention_layer/apply_multi_head_attention.jl") end @safetestset "Classification layer " begin include("layers/classification.jl") end diff --git a/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl b/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl index ca4209c39..c7614fc52 100644 --- a/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl +++ b/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl @@ -2,19 +2,10 @@ using GeometricMachineLearning, Test import Lux, Random, LinearAlgebra -dim = 64 -n_heads = 8 -Dₕ = dim÷8 -tol = eps(Float32) - -model = Chain(MultiHeadAttention(dim, n_heads), MultiHeadAttention(dim, n_heads, Stiefel=true)) -ps = initialparameters(CPU(), Float32, model) - -o₁ = Optimizer(AdamOptimizer(), ps) -o₂ = Optimizer(MomentumOptimizer(), ps) -o₃ = Optimizer(GradientOptimizer(), ps) - -function check_adam_cache(C::AbstractCache) +@doc raw""" +This checks if the Adam cache was set up in the right way +""" +function check_adam_cache(C::AbstractCache{T}, tol= T(10) * eps(T)) where T @test typeof(C) <: AdamCache @test propertynames(C) == (:B₁, :B₂) @test typeof(C.B₁) <: StiefelLieAlgHorMatrix @@ -24,7 +15,10 @@ function check_adam_cache(C::AbstractCache) end check_adam_cache(B::NamedTuple) = apply_toNT(check_adam_cache, B) -function check_momentum_cache(C::AbstractCache) +@doc raw""" +This checks if the momentum cache was set up in the right way +""" +function check_momentum_cache(C::AbstractCache{T}, tol= T(10) * eps(T)) where T @test typeof(C) <: MomentumCache @test propertynames(C) == (:B,) @test typeof(C.B) <: StiefelLieAlgHorMatrix @@ -32,12 +26,33 @@ function check_momentum_cache(C::AbstractCache) end check_momentum_cache(B::NamedTuple) = apply_toNT(check_momentum_cache, B) -function check_gradient_cache(C::AbstractCache) +@doc raw""" +This checks if the gradient cache was set up in the right way +""" +function check_gradient_cache(C::AbstractCache{T}) where T @test typeof(C) <: GradientCache @test propertynames(C) == () end check_gradient_cache(B::NamedTuple) = apply_toNT(check_gradient_cache, B) -check_adam_cache(o₁.cache[2]) -check_momentum_cache(o₂.cache[2]) -check_gradient_cache(o₃.cache[2]) +@doc raw""" +This checks if all the caches are set up in the right way for the `MultiHeadAttention` layer with Stiefel weights. + +TODO: +- [ ] `BFGSOptimizer` !! +""" +function test_cache_setups_for_optimizer_for_multihead_attention_layer(T::Type, dim::Int, n_heads::Int) + @assert dim % n_heads == 0 + model = MultiHeadAttention(dim, n_heads, Stiefel=true) + ps = initialparameters(CPU(), T, model) + + o₁ = Optimizer(AdamOptimizer(), ps) + o₂ = Optimizer(MomentumOptimizer(), ps) + o₃ = Optimizer(GradientOptimizer(), ps) + + check_adam_cache(o₁.cache) + check_momentum_cache(o₂.cache) + check_gradient_cache(o₃.cache) +end + +test_cache_setups_for_optimizer_for_multihead_attention_layer(Float32, 64, 8) \ No newline at end of file diff --git a/test/transformer_related/multi_head_attention_stiefel_retraction.jl b/test/transformer_related/multi_head_attention_stiefel_retraction.jl index 842069da5..54f6214e8 100644 --- a/test/transformer_related/multi_head_attention_stiefel_retraction.jl +++ b/test/transformer_related/multi_head_attention_stiefel_retraction.jl @@ -1,7 +1,3 @@ -""" -This is a test for that checks if the retractions (geodesic and Cayley for now) map from StiefelLieAlgHorMatrix to StiefelManifold when used with MultiHeadAttention. -""" - import Random, Test, Lux, LinearAlgebra, KernelAbstractions using GeometricMachineLearning, Test @@ -9,37 +5,40 @@ using GeometricMachineLearning: geodesic using GeometricMachineLearning: cayley using GeometricMachineLearning: init_optimizer_cache -dim = 64 -n_heads = 8 -Dₕ = dim÷8 -tol = eps(Float32) -T = Float32 -backend = KernelAbstractions.CPU() - -model = MultiHeadAttention(dim, n_heads, Stiefel=true) - -ps = initialparameters(backend, T, model) - -cache = init_optimizer_cache(MomentumOptimizer(), ps) - -E = StiefelProjection(dim, Dₕ, T) -function check_retraction_geodesic(A::AbstractMatrix) +@doc raw""" +This function computes the geodesic retraction of an element of `StiefelLieAlgHorMatrix` and then checks if the resulting element is `StiefelProjection`. +""" +function check_retraction_geodesic(A::AbstractMatrix{T}, tol=eps(T)) where T A_retracted = geodesic(A) @test typeof(A_retracted) <: StiefelManifold - @test LinearAlgebra.norm(A_retracted - E) < tol + @test LinearAlgebra.norm(A_retracted - StiefelProjection(A_retracted)) < tol end check_retraction_geodesic(cache::NamedTuple) = apply_toNT(check_retraction_geodesic, cache) check_retraction_geodesic(B::MomentumCache) = check_retraction_geodesic(B.B) -check_retraction_geodesic(cache) - -E = StiefelProjection(dim, Dₕ) -function check_retraction_cayley(A::AbstractMatrix) +@doc raw""" +This function computes the cayley retraction of an element of `StiefelLieAlgHorMatrix` and then checks if the resulting element is `StiefelProjection`. +""" +function check_retraction_cayley(A::AbstractMatrix{T}, tol=eps(T)) where T A_retracted = cayley(A) @test typeof(A_retracted) <: StiefelManifold - @test LinearAlgebra.norm(A_retracted - E) < tol + @test LinearAlgebra.norm(A_retracted - StiefelProjection(A_retracted)) < tol end check_retraction_cayley(cache::NamedTuple) = apply_toNT(check_retraction_cayley, cache) check_retraction_cayley(B::MomentumCache) = check_retraction_cayley(B.B) -check_retraction_cayley(cache) +@doc raw""" +This is a test for that checks if the retractions (geodesic and Cayley for now) map from `StiefelLieAlgHorMatrix` to `StiefelManifold` when used with `MultiHeadAttention`. +""" +function test_multi_head_attention_retraction(T::Type, dim, n_heads, tol=eps(T), backend=KernelAbstractions.CPU()) + model = MultiHeadAttention(dim, n_heads, Stiefel=true) + + ps = initialparameters(backend, T, model) + cache = init_optimizer_cache(MomentumOptimizer(), ps) + + check_retraction_geodesic(cache) + + check_retraction_cayley(cache) +end + +test_multi_head_attention_retraction(Float32, 64, 8) \ No newline at end of file diff --git a/test/transformer_related/multi_head_attention_stiefel_setup.jl b/test/transformer_related/multi_head_attention_stiefel_setup.jl index 3b8cc6cbf..742839331 100644 --- a/test/transformer_related/multi_head_attention_stiefel_setup.jl +++ b/test/transformer_related/multi_head_attention_stiefel_setup.jl @@ -3,25 +3,36 @@ import Random, Test, Lux, LinearAlgebra, KernelAbstractions using GeometricMachineLearning, Test using GeometricMachineLearning: init_optimizer_cache -T = Float32 -model = MultiHeadAttention(64, 8, Stiefel=true) -ps = initialparameters(KernelAbstractions.CPU(), T, model) -tol = 10*eps(T) - -function check_setup(A::AbstractMatrix) +@doc raw""" +This checks for an arbitrary matrix ``A\in\mathbb{R}^{N\times{}n}`` if ``A\in{}St(n,N)``. +""" +function check_setup(A::AbstractMatrix{T}, tol=T(10)*eps(T)) where T @test typeof(A) <: StiefelManifold @test check(A) < tol end check_setup(ps::NamedTuple) = apply_toNT(check_setup, ps) -check_setup(ps) -######## check if the gradients are set up the correct way -function check_grad_setup(B::AbstractMatrix) +@doc raw""" +This checks for an arbitrary matrix ``B\in\mathbb{R}^{N\times{}N}`` if ``B\in\mathfrak{g}^\mathrm{hor}``. +""" +function check_grad_setup(B::AbstractMatrix{T}, tol=T(10)*eps(T)) where T @test typeof(B) <: StiefelLieAlgHorMatrix @test LinearAlgebra.norm(B) < tol end check_grad_setup(gx::NamedTuple) = apply_toNT(check_grad_setup, gx) check_grad_setup(B::MomentumCache) = check_grad_setup(B.B) -gx = init_optimizer_cache(MomentumOptimizer(), ps) -check_grad_setup(gx) +@doc raw""" +Check if `initialparameters` and `init_optimizer_cache` do the right thing for `MultiHeadAttentionLayer`. +""" +function check_multi_head_attention_stiefel_setup(T::Type, N::Int, n::Int) + model = MultiHeadAttention(N, n, Stiefel=true) + ps = initialparameters(KernelAbstractions.CPU(), T, model) + + check_setup(ps) + + gx = init_optimizer_cache(MomentumOptimizer(), ps) + check_grad_setup(gx) +end + +check_multi_head_attention_stiefel_setup(Float32, 64, 8) \ No newline at end of file diff --git a/test/transformer_related/transformer_application.jl b/test/transformer_related/transformer_application.jl index 7fc159a8b..6729c889e 100644 --- a/test/transformer_related/transformer_application.jl +++ b/test/transformer_related/transformer_application.jl @@ -1,5 +1,8 @@ using Test, KernelAbstractions, GeometricMachineLearning +@doc raw""" +This tests if the size of the input array is kept constant when fed into the transformer (for a matrix and a tensor) +""" function transformer_application_test(T, dim, n_heads, L, seq_length=8, batch_size=10) model₁ = Chain(Transformer(dim, n_heads, L, Stiefel=false), ResNet(dim)) model₂ = Chain(Transformer(dim, n_heads, L, Stiefel=true), ResNet(dim)) @@ -8,11 +11,11 @@ function transformer_application_test(T, dim, n_heads, L, seq_length=8, batch_si ps₂ = initialparameters(KernelAbstractions.CPU(), T, model₂) input₁ = rand(T, dim, seq_length, batch_size) - input₂ = rand(T, dim, seq_length, batch_size) - + input₂ = rand(T, dim, seq_length) + @test size(model₁(input₁, ps₁)) == size(input₁) - @test size(model₁(input₂, ps₁)) == size(input₂) @test size(model₂(input₁, ps₂)) == size(input₁) + @test size(model₁(input₂, ps₁)) == size(input₂) @test size(model₂(input₂, ps₂)) == size(input₂) end diff --git a/test/transformer_related/transformer_gradient.jl b/test/transformer_related/transformer_gradient.jl index e049933a6..2a53673fc 100644 --- a/test/transformer_related/transformer_gradient.jl +++ b/test/transformer_related/transformer_gradient.jl @@ -1,5 +1,8 @@ using Test, KernelAbstractions, GeometricMachineLearning, Zygote, LinearAlgebra +@doc raw""" +This checks if the gradients of the transformer change the type in case of the Stiefel manifold, and checks if they stay the same in the case of regular weights. +""" function transformer_gradient_test(T, dim, n_heads, L, seq_length=8, batch_size=10) model₁ = Chain(Transformer(dim, n_heads, L, Stiefel=false), ResNet(dim)) model₂ = Chain(Transformer(dim, n_heads, L, Stiefel=true), ResNet(dim)) diff --git a/test/transformer_related/transformer_optimizer.jl b/test/transformer_related/transformer_optimizer.jl index aa3968c62..e6421bdb7 100644 --- a/test/transformer_related/transformer_optimizer.jl +++ b/test/transformer_related/transformer_optimizer.jl @@ -1,5 +1,8 @@ using Test, KernelAbstractions, GeometricMachineLearning, Zygote, LinearAlgebra +@doc raw""" +This function tests if the `GradientOptimzier`, `MomentumOptimizer`, `AdamOptimizer` and `BFGSOptimizer` act on the neural network weights via `optimization_step!`. +""" function transformer_gradient_test(T, dim, n_heads, L, seq_length=8, batch_size=10) model = Chain(Transformer(dim, n_heads, L, Stiefel=true), ResNet(dim)) model = Transformer(dim, n_heads, L, Stiefel=true) @@ -14,15 +17,19 @@ function transformer_gradient_test(T, dim, n_heads, L, seq_length=8, batch_size= o₁ = Optimizer(GradientOptimizer(), ps) o₂ = Optimizer(MomentumOptimizer(), ps) o₃ = Optimizer(AdamOptimizer(), ps) + o₄ = Optimizer(BFGSOptimizer(), ps) ps₁ = deepcopy(ps) ps₂ = deepcopy(ps) ps₃ = deepcopy(ps) + ps₄ = deepcopy(ps) optimization_step!(o₁, model, ps₁, dx) optimization_step!(o₂, model, ps₂, dx) optimization_step!(o₃, model, ps₃, dx) - @test typeof(ps₁) == typeof(ps₂) == typeof(ps₃) == typeof(ps) + optimization_step!(o₄, model, ps₄, dx) + @test typeof(ps₁) == typeof(ps₂) == typeof(ps₃) == typeof(ps₄) == typeof(ps) + @test ps₁[1].PQ.head_1 ≉ ps₂[1].PQ.head_1 ≉ ps₃[1].PQ.head_1 ≉ ps₄[1].PQ.head_1 ≉ ps[1].PQ.head_1 end transformer_gradient_test(Float32, 10, 5, 4) \ No newline at end of file diff --git a/test/transformer_related/transformer_setup.jl b/test/transformer_related/transformer_setup.jl index 42ac78995..1311ce26c 100644 --- a/test/transformer_related/transformer_setup.jl +++ b/test/transformer_related/transformer_setup.jl @@ -1,5 +1,8 @@ using Test, KernelAbstractions, GeometricMachineLearning +@doc raw""" +This function tests the setup of the transformer with Stiefel weights. +""" function transformer_setup_test(dim, n_heads, L, T) model = Transformer(dim, n_heads, L, Stiefel=true) ps = initialparameters(KernelAbstractions.CPU(), T, model) From 35b3db6aacb115ff14d7fbf9034eddcc1c204613 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 13:07:38 +0100 Subject: [PATCH 29/96] Renamed file to something more descriptive. --- .../{data_loader.jl => data_loader_optimization_step.jl} | 3 +++ 1 file changed, 3 insertions(+) rename test/data_loader/{data_loader.jl => data_loader_optimization_step.jl} (84%) diff --git a/test/data_loader/data_loader.jl b/test/data_loader/data_loader_optimization_step.jl similarity index 84% rename from test/data_loader/data_loader.jl rename to test/data_loader/data_loader_optimization_step.jl index 880212f5b..0ac67a648 100644 --- a/test/data_loader/data_loader.jl +++ b/test/data_loader/data_loader_optimization_step.jl @@ -1,5 +1,8 @@ using GeometricMachineLearning, Test, Zygote +@doc raw""" +This tests the gradient optimizer called together with the `DataLoader` (applied to a tensor). +""" function test_data_loader(sys_dim, n_time_steps, n_params, T=Float32) data = randn(T, sys_dim, n_time_steps, n_params) dl = DataLoader(data) From 66f3279c3125b35e7995727215e44eb6a676c26d Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 13:16:21 +0100 Subject: [PATCH 30/96] Renamed file to something meaningful and made the test more readable. --- test/data_loader/batch.jl | 33 ----------------- .../optimizer_functor_with_adam.jl | 37 +++++++++++++++++++ 2 files changed, 37 insertions(+), 33 deletions(-) delete mode 100644 test/data_loader/batch.jl create mode 100644 test/data_loader/optimizer_functor_with_adam.jl diff --git a/test/data_loader/batch.jl b/test/data_loader/batch.jl deleted file mode 100644 index 4b7c17309..000000000 --- a/test/data_loader/batch.jl +++ /dev/null @@ -1,33 +0,0 @@ -using AbstractNeuralNetworks: AbstractExplicitLayer, Chain -using GeometricMachineLearning, Test - -""" -This creates a dummy MNIST data set. - -TODO: include tests to check if all elements are batched! -""" -function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10) - rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images))) -end - -dl = DataLoader(create_dummy_mnist()...; patch_length=3) -# batch size is equal to two -batch = Batch(2) - -# this function should be made part of AbstractNeuralNetworks !!! -function Chain(c::Chain, d::AbstractExplicitLayer) - Chain(c.layers..., d) -end - -# input dim is 3^2 = 9 -model = Chain(Transformer(dl.input_dim, 3, 2; Stiefel=true), Classification(dl.input_dim, 10, σ)) -ps = initialparameters(CPU(), Float32, model) - -loss₁ = GeometricMachineLearning.loss(model, ps, dl) - -opt = Optimizer(AdamOptimizer(), ps) -loss_average = optimize_for_one_epoch!(opt, model, ps, dl, batch) - -loss₃ = GeometricMachineLearning.loss(model, ps, dl) - -@test loss₁ > loss_average > loss₃ \ No newline at end of file diff --git a/test/data_loader/optimizer_functor_with_adam.jl b/test/data_loader/optimizer_functor_with_adam.jl new file mode 100644 index 000000000..2caf538b2 --- /dev/null +++ b/test/data_loader/optimizer_functor_with_adam.jl @@ -0,0 +1,37 @@ +using AbstractNeuralNetworks: AbstractExplicitLayer, Chain +using GeometricMachineLearning, Test + +# this function should be made part of AbstractNeuralNetworks !!! +function Chain(c::Chain, d::AbstractExplicitLayer) + Chain(c.layers..., d) +end + +""" +This creates a dummy MNIST data set; i.e. its output are two tensors that look similarly to the ones in the MNIST data set. +""" +function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10) + rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images))) +end + +function test_optimizer_functor_with_adam(;T=Float32, dim₁=6, dim₂=6, n_images=10, patch_length=patch_length) + dl = DataLoader(create_dummy_mnist(T=T, dim₁=dim₁, dim₂=dim₂, n_images=n_images)...; patch_length=3) + + # batch size is equal to two + batch = Batch(2) + + # input dim is dim₁ / patch_length * dim₂ / pach_length; the transformer is called with dim₁ / patch_length and two layers + model = Chain(Transformer(dl.input_dim, dim₁ / patch_length, 2; Stiefel=true), Classification(dl.input_dim, 10, σ)) + ps = initialparameters(CPU(), Float32, model) + + loss₁ = GeometricMachineLearning.loss(model, ps, dl) + + opt = Optimizer(AdamOptimizer(), ps) + loss_average = optimize_for_one_epoch!(opt, model, ps, dl, batch) + + loss₃ = GeometricMachineLearning.loss(model, ps, dl) + + #check if the loss decreases during optimization + @test loss₁ > loss_average > loss₃ +end + +test_optimizer_functor_with_adam() \ No newline at end of file From 257aeeea0f46e1bcace46852c4bfd98dcd550a26 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 17:24:55 +0100 Subject: [PATCH 31/96] Added descriptions and a wrapper for using the custom loss function with NeuralNetwork. --- test/data_loader/mnist_utils.jl | 61 ++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/test/data_loader/mnist_utils.jl b/test/data_loader/mnist_utils.jl index fe685c647..f6c637a58 100644 --- a/test/data_loader/mnist_utils.jl +++ b/test/data_loader/mnist_utils.jl @@ -7,9 +7,8 @@ using GeometricMachineLearning: index_conversion import Zygote """ -This function tests if all the patch nubmers are assigned correctly, i.e. tests patch_index. +This function tests is used to test if all the patch nubmers are assigned correctly with `index_conversion`, i.e. tests `patch_index` by inverting it. """ -### Test if mapping is invertible function reverse_index(i::Integer, j::Integer, patch_length=7) opt_i = i%patch_length==0 ? 1 : 0 within_patch_index = i%patch_length + opt_i*patch_length, (i÷patch_length - opt_i + 1) @@ -20,41 +19,57 @@ function reverse_index(i::Integer, j::Integer, patch_length=7) (patch_index[1]-1)*patch_length + within_patch_index[1], (patch_index[2]-1)*patch_length + within_patch_index[2] end -# test if this is the inverse of the other batch index conversion! -patch_lengths = (2, 4, 7, 14) -for patch_length in patch_lengths - number_of_patches = (28÷patch_length)^2 - for i in 1:28 - for j in 1:28 - @test reverse_index(index_conversion(i, j, patch_length, number_of_patches)..., patch_length) == (i, j) +""" +This function uses `reverse_index` to test `index_conversion`, i.e. checks if the functions are invertible. +""" +function test_index_conversion(patch_lengths=(2, 4, 7, 14)) + for patch_length in patch_lengths + number_of_patches = (28÷patch_length)^2 + for i in 1:28 + for j in 1:28 + @test reverse_index(index_conversion(i, j, patch_length, number_of_patches)..., patch_length) == (i, j) + end end end end +""" +This function tests if `onehotbatch` does what it should; i.e. convert a vector of integers to a one-hot-tensor. +""" function test_onehotbatch(V::AbstractVector{T}) where {T<:Integer} V_encoded = onehotbatch(V) - for i in length(V) + for (i, v) in zip(length(V), V) @test sum(V_encoded[:,1,i]) == 1 + @test V_encoded[v, 1, i] == 1 end end test_onehotbatch([1, 2, 5, 0]) -####### MNIST-like data set -train_x = rand(Float32, 28,28,100) -train_y = Int.(ceil.(10*rand(Float32, 100))) .- 1 +@doc raw""" +Generates an MNIST-like dummy data set. +""" +function generate_dummy_mnist(dim₁=28, dim₂=18, number_images=100, T=Float32) + train_x = rand(T, dim₁, dim₂, number_images) + train_y = Int.(ceil.(10 * rand(T, number_images))) .- 1 + train_x, train_y +end +function test_optimizer_for_classification_layer(; dim₁=28, dim₂=28, number_images=100, patch_length=7, T=Float32) + dl = DataLoader(generate_dummy_mnist(dim₁, dim₂, number_images, T); patch_length=patch_length) -dl = DataLoader(train_x, train_y) + activation_function(x) = tanh.(x) + model = Classification((dim₁ ÷ patch_length) * (dim₂ ÷ patch_length), 10, activation_function) + ps = initialparameters(CPU(), T, model) + loss₁ = GeometricMachineLearning.loss(model, ps, dl) -activation_function(x) = tanh.(x) -model = Classification(49, 10, activation_function) -ps = initialparameters(CPU(), Float32, model) -loss₁ = GeometricMachineLearning.loss(model, ps, dl) + opt = Optimizer(GradientOptimizer(), ps) + dx = Zygote.gradient(ps -> GeometricMachineLearning.loss(model, ps, dl), ps)[1] + optimization_step!(opt, model, ps, dx) + loss₂ = GeometricMachineLearning.loss(model, ps, dl) -opt = Optimizer(GradientOptimizer(), ps) -dx = Zygote.gradient(ps -> GeometricMachineLearning.loss(model, ps, dl), ps)[1] -optimization_step!(opt, model, ps, dx) -loss₂ = GeometricMachineLearning.loss(model, ps, dl) + @test loss₂ < loss₁ +end -@test loss₂ < loss₁ +test_index_conversion() +test_optimizer_for_classification_layer() \ No newline at end of file From 9ca60ed7280a213ebff112f2be18b229d0eccc20 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 17:25:26 +0100 Subject: [PATCH 32/96] Added descriptions and a wrapper for using the custom loss function with NeuralNetwork. --- src/data_loader/data_loader.jl | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl index ae6a45089..24fb90a21 100644 --- a/src/data_loader/data_loader.jl +++ b/src/data_loader/data_loader.jl @@ -93,25 +93,35 @@ It takes as input: """ function loss(model::Union{Chain, AbstractExplicitLayer}, ps::Union{Tuple, NamedTuple}, input::AT, output::BT) where {T, T1, AT<:AbstractArray{T, 3}, BT<:AbstractArray{T1, 3}} output_estimate = model(input, ps) - norm(output - output_estimate)/norm(output) # /T(sqrt(size(output, 2)*size(output, 3))) + norm(output - output_estimate) / norm(output) # /T(sqrt(size(output, 2)*size(output, 3))) end +@doc raw""" +The *autoencoder loss*. +""" function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 3}} output_estimate = model(input, ps) - norm(output_estimate - input)/norm(input) # /T(sqrt(size(input, 2)*size(input, 3))) + norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)*size(input, 3))) end function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 2}} output_estimate = model(input, ps) - norm(output_estimate - input)/norm(input) # /T(sqrt(size(input, 2))) + norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2))) end nt_diff(A, B) = (q = A.q - B.q, p = A.p - B.p) nt_norm(A) = norm(A.q) + norm(A.p) +@doc raw""" +Loss function that takes a `NamedTuple` as input. This should be used with a SympNet (or other neural network-based integrator). It computes: + +```math +\mathtt{loss}(\mathcal{NN}, \mathtt{ps}, \begin{pmatrix} q \\ p \end{pmatrix}, \begin{pmatrix} q' \\ p' \end{pmatrix}) \mapsto \left|| \mathcal{NN}(\begin{pmatrix} q \\ p \end{pmatrix}) - \begin{pmatrix} q' \\ p' \end{pmatrix} \right|| / \left|| \begin{pmatrix} q \\ p \end{pmatrix} \right|| +``` +""" function loss(model::Chain, ps::Tuple, input::NamedTuple, output::NamedTuple) output_estimate = model(input, ps) - nt_norm(nt_diff(output_estimate, output))/nt_norm(input) + nt_norm(nt_diff(output_estimate, output)) / nt_norm(input) end @doc raw""" @@ -136,6 +146,13 @@ function loss(model::Chain, ps::Tuple, dl::DataLoader{T, BT}) where {T, BT<:Name loss(model, ps, dl.data) end +@doc raw""" +Wrapper if we deal with a neural network. +""" +function loss(nn::NeuralNetwork, dl::DataLoader) + loss(nn.model, nn.params, dl) +end + @doc raw""" Computes the accuracy (as opposed to the loss) of a neural network classifier. From fced8d80298e34b304284cb00af470487d980f5c Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 17:26:04 +0100 Subject: [PATCH 33/96] Added documentation. --- src/data_loader/batch.jl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl index bbb9e4e39..9a76d0b26 100644 --- a/src/data_loader/batch.jl +++ b/src/data_loader/batch.jl @@ -15,6 +15,9 @@ end hasseqlength(::Batch{<:Integer}) = true hasseqlength(::Batch{<:Nothing}) = false +@doc raw""" +The functor for batch is called with an instance on `DataLoader`. It then returns a tuple of batch indices: ``(\mathcal{I}_1, \ldots, \mathcal{I}_{\lceil\mathtt{dl.n\_params/batch\_size}\rceil})``, where the index runs from 1 to the number of batches, which is the number of parameters divided by the batch size (rounded up). +""" function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, AT<:AbstractArray{T, 3}} indices = shuffle(1:dl.n_params) n_batches = Int(ceil(dl.n_params/batch.batch_size)) @@ -28,6 +31,9 @@ function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, AT<:Abstract batches end +@doc raw""" +The functor for batch is called with an instance on `DataLoader`. It then returns a tuple of batch indices: ``(\mathcal{I}_1, \ldots, \mathcal{I}_{\lceil\mathtt{(dl.input\_time\_steps-1)/batch\_size}\rceil})``, where the index runs from 1 to the number of batches, which is the number of input time steps (minus one) divided by the batch size (and rounded up). +""" function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, BT<:AbstractMatrix{T}, AT<:Union{BT, NamedTuple{(:q, :p), Tuple{BT, BT}}}} indices = shuffle(1:dl.input_time_steps) n_batches = Int(ceil((dl.input_time_steps-1)/batch.batch_size)) @@ -88,7 +94,7 @@ function optimize_for_one_epoch!(opt::Optimizer, nn::NeuralNetwork, dl::DataLoad end """ -TODO: Add ProgressMeter!!! +This routine is called if a `DataLoader` storing *symplectic data* (i.e. a `NamedTuple`) is supplied. """ function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTuple}, dl::DataLoader{T, AT}, batch::Batch, loss) where {T, AT<:NamedTuple} count = 0 @@ -107,7 +113,16 @@ function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTu total_error/count end - +@doc raw""" +A functor for `Optimizer`. It is called with: + - `nn::NeuralNetwork` + - `dl::DataLoader` + - `batch::Batch` + - `n_epochs::Int` + - `loss` + +The last argument is a function through which `Zygote` differentiates. This argument is optional; if it is not supplied `GeometricMachineLearning` defaults to an appropriate loss for the `DataLoader`. +""" function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int, loss) progress_object = ProgressMeter.Progress(n_epochs; enabled=true) loss_array = zeros(n_epochs) From d95268daec76b606953b0a2fd1dddbcb00bf9f22 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 17:53:14 +0100 Subject: [PATCH 34/96] Added constructor for optimizer if input arguments are flipped. --- src/optimizers/optimizer.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/optimizers/optimizer.jl b/src/optimizers/optimizer.jl index 7cd78d037..e6e25cdaa 100644 --- a/src/optimizers/optimizer.jl +++ b/src/optimizers/optimizer.jl @@ -22,6 +22,8 @@ function Optimizer(m::OptimizerMethod, nn::NeuralNetwork) Optimizer(m, nn.params) end +Optimizer(nn::NeuralNetwork, m::OptimizerMethod) = Optimizer(m, nn) + ####################################################################################### # optimization step function From 2abed8793ec2e604911eadcae999c31bf73c593a Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:09:44 +0100 Subject: [PATCH 35/96] Added a comment saying that the constructor can be called with DataLoader as input. --- src/architectures/sympnet.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/architectures/sympnet.jl b/src/architectures/sympnet.jl index 8e017f125..b07d5d84c 100644 --- a/src/architectures/sympnet.jl +++ b/src/architectures/sympnet.jl @@ -7,7 +7,7 @@ TODO: abstract type SympNet{AT} <: Architecture end @doc raw""" -`LASympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: +`LASympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: - `depth::Int`: The number of linear layers that are applied. The default is 5. - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2. - `activation`: The activation function that is applied. By default this is `tanh`. @@ -32,7 +32,7 @@ end @inline AbstractNeuralNetworks.dim(arch::SympNet) = arch.dim @doc raw""" -`GSympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: +`GSympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: - `upscaling_dimension::Int`: The *upscaling dimension* of the gradient layer. See the documentation for `GradientLayerQ` and `GradientLayerP` for further explanation. The default is `2*dim`. - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2. - `activation`: The activation function that is applied. By default this is `tanh`. From 0f62b8f2e96949b442b1ad2b009818ffe4b9d520 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:10:14 +0100 Subject: [PATCH 36/96] Added default for number of epochs. --- src/data_loader/batch.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl index 9a76d0b26..10b434ffd 100644 --- a/src/data_loader/batch.jl +++ b/src/data_loader/batch.jl @@ -133,6 +133,6 @@ function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epoch loss_array end -function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int) +function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int=1) o(nn, dl, batch, n_epochs, loss) end \ No newline at end of file From 3ab42f2c9874e83edaa54ce3c809e7d2332be8f4 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:11:05 +0100 Subject: [PATCH 37/96] Combined matrix and tensor routines into one. Added another loss for autoencoder qp data. --- src/data_loader/data_loader.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl index 24fb90a21..2beb691b9 100644 --- a/src/data_loader/data_loader.jl +++ b/src/data_loader/data_loader.jl @@ -99,19 +99,19 @@ end @doc raw""" The *autoencoder loss*. """ -function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 3}} +function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T}} output_estimate = model(input, ps) norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)*size(input, 3))) end -function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 2}} - output_estimate = model(input, ps) - norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2))) -end - nt_diff(A, B) = (q = A.q - B.q, p = A.p - B.p) nt_norm(A) = norm(A.q) + norm(A.p) +function loss(model::Chain, ps::Tuple, input::NT) where {T, AT<:AbstractArray{T}, NT<:NamedTuple{(:q, :p,), Tuple{AT, AT}}} + output_estimate = model(input, ps) + nt_norm(output_estimate - input) / nt_norm(input) +end + @doc raw""" Loss function that takes a `NamedTuple` as input. This should be used with a SympNet (or other neural network-based integrator). It computes: From 5d22855e9a98e50625fcc9f66e426fe9fd0ed8d3 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:12:14 +0100 Subject: [PATCH 38/96] Commented out a section that is probably not needed. --- src/data_loader/tensor_assign.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/data_loader/tensor_assign.jl b/src/data_loader/tensor_assign.jl index 3faad0a99..4aac9bfc0 100644 --- a/src/data_loader/tensor_assign.jl +++ b/src/data_loader/tensor_assign.jl @@ -55,6 +55,7 @@ function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_win output_estimate end +#= """ This function draws random time steps and parameters and based on these assign the batch and the output. @@ -101,6 +102,7 @@ function draw_batch!(batch::AT, output::BT, data::AT, target::BT) where {T, T2, assign_batch!(batch, data, params, time_steps, ndrange=size(batch)) assign_batch!(output, target, params, time_steps, ndrange=size(output)) end +=# """ Used for differentiating assign_output_estimate (this appears in the loss). From 86485adb054c3c90df2c5ed725e974d44843e4f2 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:12:41 +0100 Subject: [PATCH 39/96] Test data loader for qp data. --- test/data_loader/batch_data_loader_qp_test.jl | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 test/data_loader/batch_data_loader_qp_test.jl diff --git a/test/data_loader/batch_data_loader_qp_test.jl b/test/data_loader/batch_data_loader_qp_test.jl new file mode 100644 index 000000000..9e1273e49 --- /dev/null +++ b/test/data_loader/batch_data_loader_qp_test.jl @@ -0,0 +1,33 @@ +using GeometricMachineLearning +using Test + +function dummy_qp_data_matrix(dim=2, number_data_points=200, T=Float32) + (q = rand(T, dim, number_data_points), p = (rand(T, dim, number_data_points))) +end + +function dummy_qp_data_tensor(dim=2, number_of_time_steps=100, number_of_parameters=20, T=Float32) + (q = rand(T, dim, number_of_time_steps, number_of_parameters), p = (rand(T, dim, number_of_time_steps, number_of_parameters))) +end + +function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size, T=Float32) + dl1 = DataLoader(dummy_qp_data_matrix(dim, number_of_time_steps, T)) + dl2 = DataLoader(dummy_qp_data_tensor(dim, number_of_time_steps, number_of_parameters)) + + arch1 = GSympNet(dl1) + arch2 = GSympNet(dl2) + + nn1 = NeuralNetwork(arch1, CPU(), T) + nn2 = NeuralNetwork(arch2, CPU(), T) + + loss1 = loss(nn1, dl1) + loss2 = loss(nn2, dl2) + + batch = Batch(batch_size) + o₁ = Optimizer(GradientOptimizer(), nn1) + o₂ = Optimizer(GradientOptimizer(), nn2) + + o₁(nn1, dl1, batch) + o₂(nn2, dl2, batch) +end + +test_data_loader() \ No newline at end of file From cac1f185da9ce28e6e3e63f0e9d9d68fae33439c Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:13:08 +0100 Subject: [PATCH 40/96] Renamed and added tests. --- test/runtests.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index af35e6063..ae9307766 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -34,6 +34,8 @@ using SafeTestsets @safetestset "Training " begin include("train!/test_training.jl") end @safetestset "NeuralNetSolution " begin include("train!/test_neuralnet_solution.jl") end @safetestset "Problem & Integrators " begin include("integrator/test_integrator.jl") end -@safetestset "Data Loader #1 " begin include("data_loader/data_loader.jl") end -@safetestset "Data Loader #2 " begin include("data_loader/mnist_utils.jl") end -@safetestset "Data Loader #3 (Batch struct) " begin include("data_loader/batch.jl") end \ No newline at end of file + +@safetestset "Test data loader for q and p data " begin include("data_loader/batch_data_loader_qp_test.jl") end +@safetestset "Test mnist_utils. " begin include("data_loader/mnist_utils.jl") end +@safetestset "Test the data loader in combination with optimization_step! " begin include("data_loader/data_loader_optimization_step.jl") end +@safetestset "Optimizer functor with data loader for Adam " begin include("data_loader/optimizer_functor_with_adam.jl") \ No newline at end of file From 4528053bc135b3f495ac732804cd913e4187cc96 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:14:24 +0100 Subject: [PATCH 41/96] Adjusted symplectic matrix. --- src/arrays/symplectic.jl | 80 ++++++++++++---------------------------- 1 file changed, 24 insertions(+), 56 deletions(-) diff --git a/src/arrays/symplectic.jl b/src/arrays/symplectic.jl index 1dcc59658..3a4f138d6 100644 --- a/src/arrays/symplectic.jl +++ b/src/arrays/symplectic.jl @@ -1,75 +1,43 @@ @doc raw""" - `SymplecticMatrix(n)` +`SymplecticPotential(n)` Returns a symplectic matrix of size 2n x 2n ```math \begin{pmatrix} -0 & & & 1 & & & \\ -& \ddots & & & \ddots & & \\ -& & 0 & & & 1 \\ --1 & & & 0 & & & \\ -& \ddots & & & \ddots & & \\ -& & -1 & & 0 & \\ +\mathbb{O} & \mathbb{I} \\ +\mathbb{O} & -\mathbb{I} \\ \end{pmatrix} ``` - - `SymplecticProjection(N,n)` -Returns the symplectic projection matrix E of the Stiefel manifold, i.e. π: Sp(2N) → Sp(2n,2N), A ↦ AE - """ -#= -function SymplecticMatrix(n::Int, T::DataType=Float64) - BandedMatrix((n => ones(T,n), -n => -ones(T,n)), (2n,2n)) -end - -SymplecticMatrix(T::DataType, n::Int) = SymplecticMatrix(n, T) - -@doc raw""" -```math -\begin{pmatrix} -I & 0 \\ -0 & 0 \\ -0 & I \\ -0 & 0 \\ -\end{pmatrix} -``` -""" -=# - -function SymplecticPotential(n::Int, T::DataType=Float64) - J = zeros(T, 2*n, 2*n) - J[1:n, (n+1):2*n] = one(ones(T, n, n)) - J[(n+1):2*n, 1:n] = -one(ones(T, n, n)) +function SymplecticPotential(backend, n2::Int, T::DataType=Float64) + @assert iseven(n2) + n = n2÷2 + J = KernelAbstractions.zeros(backend, T, 2*n, 2*n) + assign_ones_for_symplectic_potential! = assign_ones_for_symplectic_potential_kernel!(backend) + assign_ones_for_symplectic_potential!(J, n, ndrange=n) J end +SymplecticPotential(n::Int, T::DataType=Float64) = SymplecticPotential(CPU(), n, T) +SymplecticPotential(bakend, T::DataType, n::Int) = SymplecticPotential(backend, n, T) + SymplecticPotential(T::DataType, n::Int) = SymplecticPotential(n, T) -struct SymplecticProjection{T} <: AbstractMatrix{T} - N::Int - n::Int - SymplecticProjection(N, n, T = Float64) = new{T}(N,n) +@kernel function assign_ones_for_symplectic_potential_kernel!(J::AbstractMatrix{T}, n::Int) where T + i = @index(Global) + J[map_index_for_symplectic_potential(i, n)...] = i ≤ n ? one(T) : -one(T) end -function Base.getindex(E::SymplecticProjection,i,j) - if i ≤ E.n - if j == i - return 1. - end - return 0. - end - if j > E.n - if (j-E.n) == (i-E.N) - return 1. - end - return 0. +""" +This assigns the right index for the symplectic potential. To be used with `assign_ones_for_symplectic_potential_kernel!`. +""" +function map_index_for_symplectic_potential(i::Int, n::Int) + if i ≤ n + return (i, i+n) + else + return (i, i-n) end - return 0. -end - - -Base.parent(E::SymplecticProjection) = (E.N,E.n) -Base.size(E::SymplecticProjection) = (2*E.N,2*E.n) +end \ No newline at end of file From 6ed5c6259c3d91c3e833d31dc54f16766fff618c Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 13:07:38 +0100 Subject: [PATCH 42/96] Renamed file to something more descriptive. --- .../{data_loader.jl => data_loader_optimization_step.jl} | 3 +++ 1 file changed, 3 insertions(+) rename test/data_loader/{data_loader.jl => data_loader_optimization_step.jl} (84%) diff --git a/test/data_loader/data_loader.jl b/test/data_loader/data_loader_optimization_step.jl similarity index 84% rename from test/data_loader/data_loader.jl rename to test/data_loader/data_loader_optimization_step.jl index 880212f5b..0ac67a648 100644 --- a/test/data_loader/data_loader.jl +++ b/test/data_loader/data_loader_optimization_step.jl @@ -1,5 +1,8 @@ using GeometricMachineLearning, Test, Zygote +@doc raw""" +This tests the gradient optimizer called together with the `DataLoader` (applied to a tensor). +""" function test_data_loader(sys_dim, n_time_steps, n_params, T=Float32) data = randn(T, sys_dim, n_time_steps, n_params) dl = DataLoader(data) From f4ddd3f6f3b8e1f98918146f6211ef0cc9d1afa8 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 13:16:21 +0100 Subject: [PATCH 43/96] Renamed file to something meaningful and made the test more readable. --- test/data_loader/batch.jl | 33 ----------------- .../optimizer_functor_with_adam.jl | 37 +++++++++++++++++++ 2 files changed, 37 insertions(+), 33 deletions(-) delete mode 100644 test/data_loader/batch.jl create mode 100644 test/data_loader/optimizer_functor_with_adam.jl diff --git a/test/data_loader/batch.jl b/test/data_loader/batch.jl deleted file mode 100644 index 4b7c17309..000000000 --- a/test/data_loader/batch.jl +++ /dev/null @@ -1,33 +0,0 @@ -using AbstractNeuralNetworks: AbstractExplicitLayer, Chain -using GeometricMachineLearning, Test - -""" -This creates a dummy MNIST data set. - -TODO: include tests to check if all elements are batched! -""" -function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10) - rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images))) -end - -dl = DataLoader(create_dummy_mnist()...; patch_length=3) -# batch size is equal to two -batch = Batch(2) - -# this function should be made part of AbstractNeuralNetworks !!! -function Chain(c::Chain, d::AbstractExplicitLayer) - Chain(c.layers..., d) -end - -# input dim is 3^2 = 9 -model = Chain(Transformer(dl.input_dim, 3, 2; Stiefel=true), Classification(dl.input_dim, 10, σ)) -ps = initialparameters(CPU(), Float32, model) - -loss₁ = GeometricMachineLearning.loss(model, ps, dl) - -opt = Optimizer(AdamOptimizer(), ps) -loss_average = optimize_for_one_epoch!(opt, model, ps, dl, batch) - -loss₃ = GeometricMachineLearning.loss(model, ps, dl) - -@test loss₁ > loss_average > loss₃ \ No newline at end of file diff --git a/test/data_loader/optimizer_functor_with_adam.jl b/test/data_loader/optimizer_functor_with_adam.jl new file mode 100644 index 000000000..2caf538b2 --- /dev/null +++ b/test/data_loader/optimizer_functor_with_adam.jl @@ -0,0 +1,37 @@ +using AbstractNeuralNetworks: AbstractExplicitLayer, Chain +using GeometricMachineLearning, Test + +# this function should be made part of AbstractNeuralNetworks !!! +function Chain(c::Chain, d::AbstractExplicitLayer) + Chain(c.layers..., d) +end + +""" +This creates a dummy MNIST data set; i.e. its output are two tensors that look similarly to the ones in the MNIST data set. +""" +function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10) + rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images))) +end + +function test_optimizer_functor_with_adam(;T=Float32, dim₁=6, dim₂=6, n_images=10, patch_length=patch_length) + dl = DataLoader(create_dummy_mnist(T=T, dim₁=dim₁, dim₂=dim₂, n_images=n_images)...; patch_length=3) + + # batch size is equal to two + batch = Batch(2) + + # input dim is dim₁ / patch_length * dim₂ / pach_length; the transformer is called with dim₁ / patch_length and two layers + model = Chain(Transformer(dl.input_dim, dim₁ / patch_length, 2; Stiefel=true), Classification(dl.input_dim, 10, σ)) + ps = initialparameters(CPU(), Float32, model) + + loss₁ = GeometricMachineLearning.loss(model, ps, dl) + + opt = Optimizer(AdamOptimizer(), ps) + loss_average = optimize_for_one_epoch!(opt, model, ps, dl, batch) + + loss₃ = GeometricMachineLearning.loss(model, ps, dl) + + #check if the loss decreases during optimization + @test loss₁ > loss_average > loss₃ +end + +test_optimizer_functor_with_adam() \ No newline at end of file From 92ac2fe8415f49ed72c2e2f86d1423af321f234a Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 17:24:55 +0100 Subject: [PATCH 44/96] Added descriptions and a wrapper for using the custom loss function with NeuralNetwork. --- test/data_loader/mnist_utils.jl | 61 ++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/test/data_loader/mnist_utils.jl b/test/data_loader/mnist_utils.jl index fe685c647..f6c637a58 100644 --- a/test/data_loader/mnist_utils.jl +++ b/test/data_loader/mnist_utils.jl @@ -7,9 +7,8 @@ using GeometricMachineLearning: index_conversion import Zygote """ -This function tests if all the patch nubmers are assigned correctly, i.e. tests patch_index. +This function tests is used to test if all the patch nubmers are assigned correctly with `index_conversion`, i.e. tests `patch_index` by inverting it. """ -### Test if mapping is invertible function reverse_index(i::Integer, j::Integer, patch_length=7) opt_i = i%patch_length==0 ? 1 : 0 within_patch_index = i%patch_length + opt_i*patch_length, (i÷patch_length - opt_i + 1) @@ -20,41 +19,57 @@ function reverse_index(i::Integer, j::Integer, patch_length=7) (patch_index[1]-1)*patch_length + within_patch_index[1], (patch_index[2]-1)*patch_length + within_patch_index[2] end -# test if this is the inverse of the other batch index conversion! -patch_lengths = (2, 4, 7, 14) -for patch_length in patch_lengths - number_of_patches = (28÷patch_length)^2 - for i in 1:28 - for j in 1:28 - @test reverse_index(index_conversion(i, j, patch_length, number_of_patches)..., patch_length) == (i, j) +""" +This function uses `reverse_index` to test `index_conversion`, i.e. checks if the functions are invertible. +""" +function test_index_conversion(patch_lengths=(2, 4, 7, 14)) + for patch_length in patch_lengths + number_of_patches = (28÷patch_length)^2 + for i in 1:28 + for j in 1:28 + @test reverse_index(index_conversion(i, j, patch_length, number_of_patches)..., patch_length) == (i, j) + end end end end +""" +This function tests if `onehotbatch` does what it should; i.e. convert a vector of integers to a one-hot-tensor. +""" function test_onehotbatch(V::AbstractVector{T}) where {T<:Integer} V_encoded = onehotbatch(V) - for i in length(V) + for (i, v) in zip(length(V), V) @test sum(V_encoded[:,1,i]) == 1 + @test V_encoded[v, 1, i] == 1 end end test_onehotbatch([1, 2, 5, 0]) -####### MNIST-like data set -train_x = rand(Float32, 28,28,100) -train_y = Int.(ceil.(10*rand(Float32, 100))) .- 1 +@doc raw""" +Generates an MNIST-like dummy data set. +""" +function generate_dummy_mnist(dim₁=28, dim₂=18, number_images=100, T=Float32) + train_x = rand(T, dim₁, dim₂, number_images) + train_y = Int.(ceil.(10 * rand(T, number_images))) .- 1 + train_x, train_y +end +function test_optimizer_for_classification_layer(; dim₁=28, dim₂=28, number_images=100, patch_length=7, T=Float32) + dl = DataLoader(generate_dummy_mnist(dim₁, dim₂, number_images, T); patch_length=patch_length) -dl = DataLoader(train_x, train_y) + activation_function(x) = tanh.(x) + model = Classification((dim₁ ÷ patch_length) * (dim₂ ÷ patch_length), 10, activation_function) + ps = initialparameters(CPU(), T, model) + loss₁ = GeometricMachineLearning.loss(model, ps, dl) -activation_function(x) = tanh.(x) -model = Classification(49, 10, activation_function) -ps = initialparameters(CPU(), Float32, model) -loss₁ = GeometricMachineLearning.loss(model, ps, dl) + opt = Optimizer(GradientOptimizer(), ps) + dx = Zygote.gradient(ps -> GeometricMachineLearning.loss(model, ps, dl), ps)[1] + optimization_step!(opt, model, ps, dx) + loss₂ = GeometricMachineLearning.loss(model, ps, dl) -opt = Optimizer(GradientOptimizer(), ps) -dx = Zygote.gradient(ps -> GeometricMachineLearning.loss(model, ps, dl), ps)[1] -optimization_step!(opt, model, ps, dx) -loss₂ = GeometricMachineLearning.loss(model, ps, dl) + @test loss₂ < loss₁ +end -@test loss₂ < loss₁ +test_index_conversion() +test_optimizer_for_classification_layer() \ No newline at end of file From cb19f5bb8f50952d720e058b8c9d2c652b0c82bd Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 17:25:26 +0100 Subject: [PATCH 45/96] Added descriptions and a wrapper for using the custom loss function with NeuralNetwork. --- src/data_loader/data_loader.jl | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl index ae6a45089..24fb90a21 100644 --- a/src/data_loader/data_loader.jl +++ b/src/data_loader/data_loader.jl @@ -93,25 +93,35 @@ It takes as input: """ function loss(model::Union{Chain, AbstractExplicitLayer}, ps::Union{Tuple, NamedTuple}, input::AT, output::BT) where {T, T1, AT<:AbstractArray{T, 3}, BT<:AbstractArray{T1, 3}} output_estimate = model(input, ps) - norm(output - output_estimate)/norm(output) # /T(sqrt(size(output, 2)*size(output, 3))) + norm(output - output_estimate) / norm(output) # /T(sqrt(size(output, 2)*size(output, 3))) end +@doc raw""" +The *autoencoder loss*. +""" function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 3}} output_estimate = model(input, ps) - norm(output_estimate - input)/norm(input) # /T(sqrt(size(input, 2)*size(input, 3))) + norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)*size(input, 3))) end function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 2}} output_estimate = model(input, ps) - norm(output_estimate - input)/norm(input) # /T(sqrt(size(input, 2))) + norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2))) end nt_diff(A, B) = (q = A.q - B.q, p = A.p - B.p) nt_norm(A) = norm(A.q) + norm(A.p) +@doc raw""" +Loss function that takes a `NamedTuple` as input. This should be used with a SympNet (or other neural network-based integrator). It computes: + +```math +\mathtt{loss}(\mathcal{NN}, \mathtt{ps}, \begin{pmatrix} q \\ p \end{pmatrix}, \begin{pmatrix} q' \\ p' \end{pmatrix}) \mapsto \left|| \mathcal{NN}(\begin{pmatrix} q \\ p \end{pmatrix}) - \begin{pmatrix} q' \\ p' \end{pmatrix} \right|| / \left|| \begin{pmatrix} q \\ p \end{pmatrix} \right|| +``` +""" function loss(model::Chain, ps::Tuple, input::NamedTuple, output::NamedTuple) output_estimate = model(input, ps) - nt_norm(nt_diff(output_estimate, output))/nt_norm(input) + nt_norm(nt_diff(output_estimate, output)) / nt_norm(input) end @doc raw""" @@ -136,6 +146,13 @@ function loss(model::Chain, ps::Tuple, dl::DataLoader{T, BT}) where {T, BT<:Name loss(model, ps, dl.data) end +@doc raw""" +Wrapper if we deal with a neural network. +""" +function loss(nn::NeuralNetwork, dl::DataLoader) + loss(nn.model, nn.params, dl) +end + @doc raw""" Computes the accuracy (as opposed to the loss) of a neural network classifier. From cb7725f380a7ec9158183799fad262e4a33bc415 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 17:26:04 +0100 Subject: [PATCH 46/96] Added documentation. --- src/data_loader/batch.jl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl index bbb9e4e39..9a76d0b26 100644 --- a/src/data_loader/batch.jl +++ b/src/data_loader/batch.jl @@ -15,6 +15,9 @@ end hasseqlength(::Batch{<:Integer}) = true hasseqlength(::Batch{<:Nothing}) = false +@doc raw""" +The functor for batch is called with an instance on `DataLoader`. It then returns a tuple of batch indices: ``(\mathcal{I}_1, \ldots, \mathcal{I}_{\lceil\mathtt{dl.n\_params/batch\_size}\rceil})``, where the index runs from 1 to the number of batches, which is the number of parameters divided by the batch size (rounded up). +""" function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, AT<:AbstractArray{T, 3}} indices = shuffle(1:dl.n_params) n_batches = Int(ceil(dl.n_params/batch.batch_size)) @@ -28,6 +31,9 @@ function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, AT<:Abstract batches end +@doc raw""" +The functor for batch is called with an instance on `DataLoader`. It then returns a tuple of batch indices: ``(\mathcal{I}_1, \ldots, \mathcal{I}_{\lceil\mathtt{(dl.input\_time\_steps-1)/batch\_size}\rceil})``, where the index runs from 1 to the number of batches, which is the number of input time steps (minus one) divided by the batch size (and rounded up). +""" function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, BT<:AbstractMatrix{T}, AT<:Union{BT, NamedTuple{(:q, :p), Tuple{BT, BT}}}} indices = shuffle(1:dl.input_time_steps) n_batches = Int(ceil((dl.input_time_steps-1)/batch.batch_size)) @@ -88,7 +94,7 @@ function optimize_for_one_epoch!(opt::Optimizer, nn::NeuralNetwork, dl::DataLoad end """ -TODO: Add ProgressMeter!!! +This routine is called if a `DataLoader` storing *symplectic data* (i.e. a `NamedTuple`) is supplied. """ function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTuple}, dl::DataLoader{T, AT}, batch::Batch, loss) where {T, AT<:NamedTuple} count = 0 @@ -107,7 +113,16 @@ function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTu total_error/count end - +@doc raw""" +A functor for `Optimizer`. It is called with: + - `nn::NeuralNetwork` + - `dl::DataLoader` + - `batch::Batch` + - `n_epochs::Int` + - `loss` + +The last argument is a function through which `Zygote` differentiates. This argument is optional; if it is not supplied `GeometricMachineLearning` defaults to an appropriate loss for the `DataLoader`. +""" function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int, loss) progress_object = ProgressMeter.Progress(n_epochs; enabled=true) loss_array = zeros(n_epochs) From 90bbaa7856743c165c54e9d6871b5987903a6ed7 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 17:53:14 +0100 Subject: [PATCH 47/96] Added constructor for optimizer if input arguments are flipped. --- src/optimizers/optimizer.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/optimizers/optimizer.jl b/src/optimizers/optimizer.jl index 7cd78d037..e6e25cdaa 100644 --- a/src/optimizers/optimizer.jl +++ b/src/optimizers/optimizer.jl @@ -22,6 +22,8 @@ function Optimizer(m::OptimizerMethod, nn::NeuralNetwork) Optimizer(m, nn.params) end +Optimizer(nn::NeuralNetwork, m::OptimizerMethod) = Optimizer(m, nn) + ####################################################################################### # optimization step function From 137d64018c0926e1a965d363c56db9dfa10d024f Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:09:44 +0100 Subject: [PATCH 48/96] Added a comment saying that the constructor can be called with DataLoader as input. --- src/architectures/sympnet.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/architectures/sympnet.jl b/src/architectures/sympnet.jl index 8e017f125..b07d5d84c 100644 --- a/src/architectures/sympnet.jl +++ b/src/architectures/sympnet.jl @@ -7,7 +7,7 @@ TODO: abstract type SympNet{AT} <: Architecture end @doc raw""" -`LASympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: +`LASympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: - `depth::Int`: The number of linear layers that are applied. The default is 5. - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2. - `activation`: The activation function that is applied. By default this is `tanh`. @@ -32,7 +32,7 @@ end @inline AbstractNeuralNetworks.dim(arch::SympNet) = arch.dim @doc raw""" -`GSympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: +`GSympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: - `upscaling_dimension::Int`: The *upscaling dimension* of the gradient layer. See the documentation for `GradientLayerQ` and `GradientLayerP` for further explanation. The default is `2*dim`. - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2. - `activation`: The activation function that is applied. By default this is `tanh`. From b57d1488a29a9e29a16a5a57f0cfb5828e2ac588 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:10:14 +0100 Subject: [PATCH 49/96] Added default for number of epochs. --- src/data_loader/batch.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl index 9a76d0b26..10b434ffd 100644 --- a/src/data_loader/batch.jl +++ b/src/data_loader/batch.jl @@ -133,6 +133,6 @@ function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epoch loss_array end -function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int) +function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int=1) o(nn, dl, batch, n_epochs, loss) end \ No newline at end of file From 3100b0b77401b42e0edc1857527cbf051dea65ac Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:11:05 +0100 Subject: [PATCH 50/96] Combined matrix and tensor routines into one. Added another loss for autoencoder qp data. --- src/data_loader/data_loader.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl index 24fb90a21..2beb691b9 100644 --- a/src/data_loader/data_loader.jl +++ b/src/data_loader/data_loader.jl @@ -99,19 +99,19 @@ end @doc raw""" The *autoencoder loss*. """ -function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 3}} +function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T}} output_estimate = model(input, ps) norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)*size(input, 3))) end -function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 2}} - output_estimate = model(input, ps) - norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2))) -end - nt_diff(A, B) = (q = A.q - B.q, p = A.p - B.p) nt_norm(A) = norm(A.q) + norm(A.p) +function loss(model::Chain, ps::Tuple, input::NT) where {T, AT<:AbstractArray{T}, NT<:NamedTuple{(:q, :p,), Tuple{AT, AT}}} + output_estimate = model(input, ps) + nt_norm(output_estimate - input) / nt_norm(input) +end + @doc raw""" Loss function that takes a `NamedTuple` as input. This should be used with a SympNet (or other neural network-based integrator). It computes: From 718174fb847fb93bc54ad4ca96568d400da229e2 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:12:14 +0100 Subject: [PATCH 51/96] Commented out a section that is probably not needed. --- src/data_loader/tensor_assign.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/data_loader/tensor_assign.jl b/src/data_loader/tensor_assign.jl index 3faad0a99..4aac9bfc0 100644 --- a/src/data_loader/tensor_assign.jl +++ b/src/data_loader/tensor_assign.jl @@ -55,6 +55,7 @@ function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_win output_estimate end +#= """ This function draws random time steps and parameters and based on these assign the batch and the output. @@ -101,6 +102,7 @@ function draw_batch!(batch::AT, output::BT, data::AT, target::BT) where {T, T2, assign_batch!(batch, data, params, time_steps, ndrange=size(batch)) assign_batch!(output, target, params, time_steps, ndrange=size(output)) end +=# """ Used for differentiating assign_output_estimate (this appears in the loss). From 370ee05c4ab154beb44694d79a96694987400db6 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:12:41 +0100 Subject: [PATCH 52/96] Test data loader for qp data. --- test/data_loader/batch_data_loader_qp_test.jl | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 test/data_loader/batch_data_loader_qp_test.jl diff --git a/test/data_loader/batch_data_loader_qp_test.jl b/test/data_loader/batch_data_loader_qp_test.jl new file mode 100644 index 000000000..9e1273e49 --- /dev/null +++ b/test/data_loader/batch_data_loader_qp_test.jl @@ -0,0 +1,33 @@ +using GeometricMachineLearning +using Test + +function dummy_qp_data_matrix(dim=2, number_data_points=200, T=Float32) + (q = rand(T, dim, number_data_points), p = (rand(T, dim, number_data_points))) +end + +function dummy_qp_data_tensor(dim=2, number_of_time_steps=100, number_of_parameters=20, T=Float32) + (q = rand(T, dim, number_of_time_steps, number_of_parameters), p = (rand(T, dim, number_of_time_steps, number_of_parameters))) +end + +function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size, T=Float32) + dl1 = DataLoader(dummy_qp_data_matrix(dim, number_of_time_steps, T)) + dl2 = DataLoader(dummy_qp_data_tensor(dim, number_of_time_steps, number_of_parameters)) + + arch1 = GSympNet(dl1) + arch2 = GSympNet(dl2) + + nn1 = NeuralNetwork(arch1, CPU(), T) + nn2 = NeuralNetwork(arch2, CPU(), T) + + loss1 = loss(nn1, dl1) + loss2 = loss(nn2, dl2) + + batch = Batch(batch_size) + o₁ = Optimizer(GradientOptimizer(), nn1) + o₂ = Optimizer(GradientOptimizer(), nn2) + + o₁(nn1, dl1, batch) + o₂(nn2, dl2, batch) +end + +test_data_loader() \ No newline at end of file From 14d6ae2130d45b80ee21a631ef9eddf912d139c2 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:13:08 +0100 Subject: [PATCH 53/96] Renamed and added tests. --- test/runtests.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index e94ad1242..b0069ae2d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -34,6 +34,8 @@ using SafeTestsets @safetestset "Training " begin include("train!/test_training.jl") end @safetestset "NeuralNetSolution " begin include("train!/test_neuralnet_solution.jl") end @safetestset "Problem & Integrators " begin include("integrator/test_integrator.jl") end -@safetestset "Data Loader #1 " begin include("data_loader/data_loader.jl") end -@safetestset "Data Loader #2 " begin include("data_loader/mnist_utils.jl") end -@safetestset "Data Loader #3 (Batch struct) " begin include("data_loader/batch.jl") end \ No newline at end of file + +@safetestset "Test data loader for q and p data " begin include("data_loader/batch_data_loader_qp_test.jl") end +@safetestset "Test mnist_utils. " begin include("data_loader/mnist_utils.jl") end +@safetestset "Test the data loader in combination with optimization_step! " begin include("data_loader/data_loader_optimization_step.jl") end +@safetestset "Optimizer functor with data loader for Adam " begin include("data_loader/optimizer_functor_with_adam.jl") \ No newline at end of file From 9edec7be2edb767966ab05477fe7866209ab2a3f Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:14:24 +0100 Subject: [PATCH 54/96] Adjusted symplectic matrix. --- src/arrays/symplectic.jl | 80 ++++++++++++---------------------------- 1 file changed, 24 insertions(+), 56 deletions(-) diff --git a/src/arrays/symplectic.jl b/src/arrays/symplectic.jl index 1dcc59658..3a4f138d6 100644 --- a/src/arrays/symplectic.jl +++ b/src/arrays/symplectic.jl @@ -1,75 +1,43 @@ @doc raw""" - `SymplecticMatrix(n)` +`SymplecticPotential(n)` Returns a symplectic matrix of size 2n x 2n ```math \begin{pmatrix} -0 & & & 1 & & & \\ -& \ddots & & & \ddots & & \\ -& & 0 & & & 1 \\ --1 & & & 0 & & & \\ -& \ddots & & & \ddots & & \\ -& & -1 & & 0 & \\ +\mathbb{O} & \mathbb{I} \\ +\mathbb{O} & -\mathbb{I} \\ \end{pmatrix} ``` - - `SymplecticProjection(N,n)` -Returns the symplectic projection matrix E of the Stiefel manifold, i.e. π: Sp(2N) → Sp(2n,2N), A ↦ AE - """ -#= -function SymplecticMatrix(n::Int, T::DataType=Float64) - BandedMatrix((n => ones(T,n), -n => -ones(T,n)), (2n,2n)) -end - -SymplecticMatrix(T::DataType, n::Int) = SymplecticMatrix(n, T) - -@doc raw""" -```math -\begin{pmatrix} -I & 0 \\ -0 & 0 \\ -0 & I \\ -0 & 0 \\ -\end{pmatrix} -``` -""" -=# - -function SymplecticPotential(n::Int, T::DataType=Float64) - J = zeros(T, 2*n, 2*n) - J[1:n, (n+1):2*n] = one(ones(T, n, n)) - J[(n+1):2*n, 1:n] = -one(ones(T, n, n)) +function SymplecticPotential(backend, n2::Int, T::DataType=Float64) + @assert iseven(n2) + n = n2÷2 + J = KernelAbstractions.zeros(backend, T, 2*n, 2*n) + assign_ones_for_symplectic_potential! = assign_ones_for_symplectic_potential_kernel!(backend) + assign_ones_for_symplectic_potential!(J, n, ndrange=n) J end +SymplecticPotential(n::Int, T::DataType=Float64) = SymplecticPotential(CPU(), n, T) +SymplecticPotential(bakend, T::DataType, n::Int) = SymplecticPotential(backend, n, T) + SymplecticPotential(T::DataType, n::Int) = SymplecticPotential(n, T) -struct SymplecticProjection{T} <: AbstractMatrix{T} - N::Int - n::Int - SymplecticProjection(N, n, T = Float64) = new{T}(N,n) +@kernel function assign_ones_for_symplectic_potential_kernel!(J::AbstractMatrix{T}, n::Int) where T + i = @index(Global) + J[map_index_for_symplectic_potential(i, n)...] = i ≤ n ? one(T) : -one(T) end -function Base.getindex(E::SymplecticProjection,i,j) - if i ≤ E.n - if j == i - return 1. - end - return 0. - end - if j > E.n - if (j-E.n) == (i-E.N) - return 1. - end - return 0. +""" +This assigns the right index for the symplectic potential. To be used with `assign_ones_for_symplectic_potential_kernel!`. +""" +function map_index_for_symplectic_potential(i::Int, n::Int) + if i ≤ n + return (i, i+n) + else + return (i, i-n) end - return 0. -end - - -Base.parent(E::SymplecticProjection) = (E.N,E.n) -Base.size(E::SymplecticProjection) = (2*E.N,2*E.n) +end \ No newline at end of file From 2acdd2487d64ec2d04d6e2caabd252f3424bbaa9 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:45:28 +0100 Subject: [PATCH 55/96] dim was missing for specifying default. --- src/architectures/sympnet.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/architectures/sympnet.jl b/src/architectures/sympnet.jl index b07d5d84c..dba449c4e 100644 --- a/src/architectures/sympnet.jl +++ b/src/architectures/sympnet.jl @@ -49,7 +49,7 @@ struct GSympNet{AT, InitUpper} <: SympNet{AT} where {InitUpper} end - function GSympNet(dl::DataLoader; upscaling_dimension=2*dim, nhidden=2, activation=tanh, init_upper=true) + function GSympNet(dl::DataLoader; upscaling_dimension=2*dl.input_dim, nhidden=2, activation=tanh, init_upper=true) new{typeof(activation), init_upper}(dl.input_dim, upscaling_dimension, nhidden, activation) end end From f8b8fff1211ce88fd88c4fa7a39dd6e0a919d97d Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:46:01 +0100 Subject: [PATCH 56/96] dl has no field data. --- src/data_loader/data_loader.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl index 2beb691b9..5117bbc23 100644 --- a/src/data_loader/data_loader.jl +++ b/src/data_loader/data_loader.jl @@ -109,7 +109,7 @@ nt_norm(A) = norm(A.q) + norm(A.p) function loss(model::Chain, ps::Tuple, input::NT) where {T, AT<:AbstractArray{T}, NT<:NamedTuple{(:q, :p,), Tuple{AT, AT}}} output_estimate = model(input, ps) - nt_norm(output_estimate - input) / nt_norm(input) + nt_norm(nt_diff(output_estimate, input)) / nt_norm(input) end @doc raw""" @@ -143,7 +143,7 @@ function loss(model::Chain, ps::Tuple, dl::DataLoader{T, BT, Nothing}) where {T, end function loss(model::Chain, ps::Tuple, dl::DataLoader{T, BT}) where {T, BT<:NamedTuple} - loss(model, ps, dl.data) + loss(model, ps, dl.input) end @doc raw""" From 8c4c73b1e9faa8e826fcd8463558664c058edd4e Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:46:27 +0100 Subject: [PATCH 57/96] Fixed typos. --- test/data_loader/batch_data_loader_qp_test.jl | 10 +++++----- test/data_loader/mnist_utils.jl | 6 +++--- test/data_loader/optimizer_functor_with_adam.jl | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/test/data_loader/batch_data_loader_qp_test.jl b/test/data_loader/batch_data_loader_qp_test.jl index 9e1273e49..6258c3eef 100644 --- a/test/data_loader/batch_data_loader_qp_test.jl +++ b/test/data_loader/batch_data_loader_qp_test.jl @@ -9,7 +9,7 @@ function dummy_qp_data_tensor(dim=2, number_of_time_steps=100, number_of_paramet (q = rand(T, dim, number_of_time_steps, number_of_parameters), p = (rand(T, dim, number_of_time_steps, number_of_parameters))) end -function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size, T=Float32) +function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size=10, T=Float32) dl1 = DataLoader(dummy_qp_data_matrix(dim, number_of_time_steps, T)) dl2 = DataLoader(dummy_qp_data_tensor(dim, number_of_time_steps, number_of_parameters)) @@ -19,15 +19,15 @@ function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters= nn1 = NeuralNetwork(arch1, CPU(), T) nn2 = NeuralNetwork(arch2, CPU(), T) - loss1 = loss(nn1, dl1) - loss2 = loss(nn2, dl2) + loss1 = GeometricMachineLearning.loss(nn1, dl1) + loss2 = GeometricMachineLearning.loss(nn2, dl2) batch = Batch(batch_size) o₁ = Optimizer(GradientOptimizer(), nn1) - o₂ = Optimizer(GradientOptimizer(), nn2) + # o₂ = Optimizer(GradientOptimizer(), nn2) o₁(nn1, dl1, batch) - o₂(nn2, dl2, batch) + # o₂(nn2, dl2, batch) end test_data_loader() \ No newline at end of file diff --git a/test/data_loader/mnist_utils.jl b/test/data_loader/mnist_utils.jl index f6c637a58..502739756 100644 --- a/test/data_loader/mnist_utils.jl +++ b/test/data_loader/mnist_utils.jl @@ -49,17 +49,17 @@ test_onehotbatch([1, 2, 5, 0]) @doc raw""" Generates an MNIST-like dummy data set. """ -function generate_dummy_mnist(dim₁=28, dim₂=18, number_images=100, T=Float32) +function generate_dummy_mnist(dim₁=28, dim₂=28, number_images=100, T=Float32) train_x = rand(T, dim₁, dim₂, number_images) train_y = Int.(ceil.(10 * rand(T, number_images))) .- 1 train_x, train_y end function test_optimizer_for_classification_layer(; dim₁=28, dim₂=28, number_images=100, patch_length=7, T=Float32) - dl = DataLoader(generate_dummy_mnist(dim₁, dim₂, number_images, T); patch_length=patch_length) + dl = DataLoader(generate_dummy_mnist(dim₁, dim₂, number_images, T)...; patch_length=patch_length) activation_function(x) = tanh.(x) - model = Classification((dim₁ ÷ patch_length) * (dim₂ ÷ patch_length), 10, activation_function) + model = Classification(patch_length * patch_length, 10, activation_function) ps = initialparameters(CPU(), T, model) loss₁ = GeometricMachineLearning.loss(model, ps, dl) diff --git a/test/data_loader/optimizer_functor_with_adam.jl b/test/data_loader/optimizer_functor_with_adam.jl index 2caf538b2..c2e3be6a8 100644 --- a/test/data_loader/optimizer_functor_with_adam.jl +++ b/test/data_loader/optimizer_functor_with_adam.jl @@ -13,14 +13,14 @@ function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10) rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images))) end -function test_optimizer_functor_with_adam(;T=Float32, dim₁=6, dim₂=6, n_images=10, patch_length=patch_length) - dl = DataLoader(create_dummy_mnist(T=T, dim₁=dim₁, dim₂=dim₂, n_images=n_images)...; patch_length=3) +function test_optimizer_functor_with_adam(;T=Float32, dim₁=6, dim₂=6, n_images=10, patch_length=3) + dl = DataLoader(create_dummy_mnist(T=T, dim₁=dim₁, dim₂=dim₂, n_images=n_images)...; patch_length=patch_length) # batch size is equal to two batch = Batch(2) # input dim is dim₁ / patch_length * dim₂ / pach_length; the transformer is called with dim₁ / patch_length and two layers - model = Chain(Transformer(dl.input_dim, dim₁ / patch_length, 2; Stiefel=true), Classification(dl.input_dim, 10, σ)) + model = Chain(Transformer(dl.input_dim, patch_length, 2; Stiefel=true), Classification(dl.input_dim, 10, σ)) ps = initialparameters(CPU(), Float32, model) loss₁ = GeometricMachineLearning.loss(model, ps, dl) From d0725941779e31494db573bc947c66cdf95d43be Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 18:57:19 +0100 Subject: [PATCH 58/96] Removed method that appeared twice for some reason. --- src/data_loader/data_loader.jl | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl index 4c5eeff2b..5117bbc23 100644 --- a/src/data_loader/data_loader.jl +++ b/src/data_loader/data_loader.jl @@ -153,13 +153,6 @@ function loss(nn::NeuralNetwork, dl::DataLoader) loss(nn.model, nn.params, dl) end -@doc raw""" -Wrapper if we deal with a neural network. -""" -function loss(nn::NeuralNetwork, dl::DataLoader) - loss(nn.model, nn.params, dl) -end - @doc raw""" Computes the accuracy (as opposed to the loss) of a neural network classifier. From d071839f6891fdbf7d5d33cdae2c289719298ea9 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 19 Dec 2023 19:02:03 +0100 Subject: [PATCH 59/96] Forgot to commit before. --- test/data_loader/batch_data_loader_qp_test.jl | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/test/data_loader/batch_data_loader_qp_test.jl b/test/data_loader/batch_data_loader_qp_test.jl index 6194bbc39..e189aca2f 100644 --- a/test/data_loader/batch_data_loader_qp_test.jl +++ b/test/data_loader/batch_data_loader_qp_test.jl @@ -9,11 +9,8 @@ function dummy_qp_data_tensor(dim=2, number_of_time_steps=100, number_of_paramet (q = rand(T, dim, number_of_time_steps, number_of_parameters), p = (rand(T, dim, number_of_time_steps, number_of_parameters))) end -<<<<<<< HEAD function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size=10, T=Float32) -======= -function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size, T=Float32) ->>>>>>> 4528053bc135b3f495ac732804cd913e4187cc96 + dl1 = DataLoader(dummy_qp_data_matrix(dim, number_of_time_steps, T)) dl2 = DataLoader(dummy_qp_data_tensor(dim, number_of_time_steps, number_of_parameters)) @@ -23,7 +20,6 @@ function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters= nn1 = NeuralNetwork(arch1, CPU(), T) nn2 = NeuralNetwork(arch2, CPU(), T) -<<<<<<< HEAD loss1 = GeometricMachineLearning.loss(nn1, dl1) loss2 = GeometricMachineLearning.loss(nn2, dl2) @@ -33,17 +29,6 @@ function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters= o₁(nn1, dl1, batch) # o₂(nn2, dl2, batch) -======= - loss1 = loss(nn1, dl1) - loss2 = loss(nn2, dl2) - - batch = Batch(batch_size) - o₁ = Optimizer(GradientOptimizer(), nn1) - o₂ = Optimizer(GradientOptimizer(), nn2) - - o₁(nn1, dl1, batch) - o₂(nn2, dl2, batch) ->>>>>>> 4528053bc135b3f495ac732804cd913e4187cc96 end test_data_loader() \ No newline at end of file From 2bb3f2994d8f228937d1929f7855568e4e04d737 Mon Sep 17 00:00:00 2001 From: CompatHelper Julia Date: Wed, 20 Dec 2023 00:35:47 +0000 Subject: [PATCH 60/96] CompatHelper: bump compat for GPUArrays to 10, (keep existing compat) --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 16c3b1e15..1936d4dd8 100644 --- a/Project.toml +++ b/Project.toml @@ -44,7 +44,7 @@ ChainRulesTestUtils = "1" Distances = "0.10" Documenter = "0.27, 1" ForwardDiff = "0.10" -GPUArrays = "8, 9" +GPUArrays = "8, 9, 10" GeometricBase = "0.9" GeometricEquations = "0.14" GeometricIntegrators = "0.13" From d160bcfa6860d55cc5d259a290973f567665999b Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 17:22:18 +0100 Subject: [PATCH 61/96] Fixed docs for optimize_for_one_epoch --- src/data_loader/batch.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl index 10b434ffd..449fa05a8 100644 --- a/src/data_loader/batch.jl +++ b/src/data_loader/batch.jl @@ -64,7 +64,7 @@ With the optional argument: The output of `optimize_for_one_epoch!` is the average loss over all batches of the epoch: ```math -output = \frac{1}{mathtt{steps\_per\_epoch}}\sum_{t=1}^mathtt{steps\_per\_epoch}loss(\theta^{(t-1)}). +output = \frac{1}{\mathtt{steps\_per\_epoch}}\sum_{t=1}^\mathtt{steps\_per\_epoch}loss(\theta^{(t-1)}). ``` This is done because any **reverse differentiation** routine always has two outputs: a pullback and the value of the function it is differentiating. In the case of zygote: `loss_value, pullback = Zygote.pullback(ps -> loss(ps), ps)` (if the loss only depends on the parameters). """ From b35e5e2c0941008a2cd539bd4defbdea3db56012 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Thu, 14 Dec 2023 14:07:41 +0100 Subject: [PATCH 62/96] Added descriptions for some functions in the docs. --- src/layers/multi_head_attention.jl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/layers/multi_head_attention.jl b/src/layers/multi_head_attention.jl index 05c3c53cc..e07820cb7 100644 --- a/src/layers/multi_head_attention.jl +++ b/src/layers/multi_head_attention.jl @@ -125,14 +125,23 @@ function (d::MultiHeadAttention{M, M, Stiefel, Retraction, false})(x::AbstractAr end import ChainRules +""" +This has to be extended to tensors; you should probably do a PR in ChainRules for this. +""" function ChainRules._adjoint_mat_pullback(y::AbstractArray{T, 3}, proj) where T (NoTangent(), proj(tensor_transpose(y))) end +""" +Extend `mat_tensor_mul` to a multiplication by the adjoint of an element of `StiefelManifold`. +""" function mat_tensor_mul(Y::AT, x::AbstractArray{T, 3}) where {T, BT <: AbstractArray{T}, ST <: StiefelManifold{T, BT}, AT <: Adjoint{T, ST}} mat_tensor_mul(Y.parent.A', x) end +""" +Extend `mat_tensor_mul` to a multiplication by an element of `StiefelManifold`. +""" function mat_tensor_mul(Y::StiefelManifold, x::AbstractArray{T, 3}) where T mat_tensor_mul(Y.A, x) end \ No newline at end of file From 8fe48325becd86aea9bf3a9c4f8c6a2968df5679 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Thu, 14 Dec 2023 15:08:41 +0100 Subject: [PATCH 63/96] Added eltype to AbstractCache. --- src/optimizers/bfgs_cache.jl | 4 ++-- src/optimizers/optimizer_caches.jl | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/optimizers/bfgs_cache.jl b/src/optimizers/bfgs_cache.jl index 26f1c0b96..96561f8be 100644 --- a/src/optimizers/bfgs_cache.jl +++ b/src/optimizers/bfgs_cache.jl @@ -5,7 +5,7 @@ It stores an array for the previous time step `B` and the inverse of the Hessian It is important to note that setting up this cache already requires a derivative! This is not the case for the other optimizers. """ -struct BFGSCache{T, AT<:AbstractArray{T}} <: AbstractCache +struct BFGSCache{T, AT<:AbstractArray{T}} <: AbstractCache{T} B::AT S::AT H::AbstractMatrix{T} @@ -19,7 +19,7 @@ In order to initialize `BGGSCache` we first need gradient information. This is w NOTE: we may not need this. """ -struct BFGSDummyCache{T, AT<:AbstractArray{T}} <: AbstractCache +struct BFGSDummyCache{T, AT<:AbstractArray{T}} <: AbstractCache{T} function BFGSDummyCache(B::AbstractArray) new{eltype(B), typeof(zero(B))}() end diff --git a/src/optimizers/optimizer_caches.jl b/src/optimizers/optimizer_caches.jl index b8a21cb0a..189ef7ae6 100644 --- a/src/optimizers/optimizer_caches.jl +++ b/src/optimizers/optimizer_caches.jl @@ -7,12 +7,12 @@ AbstractCache has subtypes: All of them can be initialized with providing an array (also supporting manifold types). """ -abstract type AbstractCache end +abstract type AbstractCache{T} end ############################################################################# # All the definitions of the caches -struct AdamCache{T, AT <: AbstractArray{T}} <: AbstractCache +struct AdamCache{T, AT <: AbstractArray{T}} <: AbstractCache{T} B₁::AT B₂::AT function AdamCache(Y::AbstractArray) @@ -20,15 +20,15 @@ struct AdamCache{T, AT <: AbstractArray{T}} <: AbstractCache end end -struct MomentumCache{T, AT <: AbstractArray{T}} <:AbstractCache +struct MomentumCache{T, AT <: AbstractArray{T}} <:AbstractCache{T} B::AT function MomentumCache(Y::AbstractArray) new{eltype(Y), typeof(zero(Y))}(zero(Y)) end end -struct GradientCache <: AbstractCache end -GradientCache(::AbstractArray) = GradientCache() +struct GradientCache{T} <: AbstractCache{T} end +GradientCache(::AbstractArray{T}) where T = GradientCache{T}() ############################################################################# # All the setup_cache functions From ebc5f71f02cbd75b1fbcfdf778a4d19eff475e42 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Thu, 14 Dec 2023 15:13:27 +0100 Subject: [PATCH 64/96] Removed comments that seem to have become unnecessary. --- src/optimizers/optimizer_caches.jl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/optimizers/optimizer_caches.jl b/src/optimizers/optimizer_caches.jl index 189ef7ae6..398eaa562 100644 --- a/src/optimizers/optimizer_caches.jl +++ b/src/optimizers/optimizer_caches.jl @@ -33,11 +33,6 @@ GradientCache(::AbstractArray{T}) where T = GradientCache{T}() ############################################################################# # All the setup_cache functions -# I don't really understand what we need these for ??? -# setup_adam_cache(B::AbstractArray) = reshape([setup_adam_cache(b) for b in B], size(B)) -# setup_momentum_cache(B::AbstractArray) = reshape([setup_momentum_cache(b) for b in B], size(B)) -# setup_gradient_cache(B::AbstractArray) = reshape([setup_gradient_cache(b) for b in B], size(B)) - setup_adam_cache(ps::NamedTuple) = apply_toNT(setup_adam_cache, ps) setup_momentum_cache(ps::NamedTuple) = apply_toNT(setup_momentum_cache, ps) setup_gradient_cache(ps::NamedTuple) = apply_toNT(setup_gradient_cache, ps) From 3c967e8e9d02acb85a126234560fadd1ddd11118 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 08:41:58 +0100 Subject: [PATCH 65/96] Resolved merge conflict with array_tests.jl (i.e. added vectorization test). --- test/arrays/array_tests.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl index c0da5eba9..1c372344e 100644 --- a/test/arrays/array_tests.jl +++ b/test/arrays/array_tests.jl @@ -76,6 +76,12 @@ function stiefel_lie_alg_add_sub_test(N, n) @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) end + +function stiefel_lie_alg_vectorization_test(N, n; T=Float32) + A = rand(StiefelLieAlgHorMatrix{T}, N, n) + @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A) +end + # TODO: tests for ADAM functions # test everything for different n & N values @@ -96,4 +102,5 @@ for (N, n) ∈ zip(N_vec, n_vec) skew_mat_mul_test2(N) stiefel_proj_test(N,n) stiefel_lie_alg_add_sub_test(N,n) + stiefel_lie_alg_vectorization_test(N, n) end From da77a967fa26d914cbb62031ceb7f81cc3548d08 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 08:45:03 +0100 Subject: [PATCH 66/96] Added sampling test for custom arrays (resolved conflict). --- test/runtests.jl | 16 +++--- ...ulti_head_attention_stiefel_optim_cache.jl | 51 ++++++++++++------- ...multi_head_attention_stiefel_retraction.jl | 51 +++++++++---------- .../multi_head_attention_stiefel_setup.jl | 33 ++++++++---- .../transformer_application.jl | 9 ++-- .../transformer_gradient.jl | 3 ++ .../transformer_optimizer.jl | 9 +++- test/transformer_related/transformer_setup.jl | 3 ++ 8 files changed, 109 insertions(+), 66 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 033b04f16..1b6cb1b4d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,13 +11,15 @@ using SafeTestsets @safetestset "Manifold Neural Network Layers " begin include("layers/manifold_layers.jl") end @safetestset "Custom AD rules for kernels " begin include("custom_ad_rules/kernel_pullbacks.jl") end @safetestset "ResNet " begin include("layers/resnet_tests.jl") end -@safetestset "Transformer Networks #1 " begin include("transformer_related/multi_head_attention_stiefel_optim_cache.jl") end -@safetestset "Transformer Networks #2 " begin include("transformer_related/multi_head_attention_stiefel_retraction.jl") end -@safetestset "Transformer Networks #3 " begin include("transformer_related/multi_head_attention_stiefel_setup.jl") end -@safetestset "Transformer Networks #4 " begin include("transformer_related/transformer_setup.jl") end -@safetestset "Transformer Networks #5 " begin include("transformer_related/transformer_application.jl") end -@safetestset "Transformer Networks #6 " begin include("transformer_related/transformer_gradient.jl") end -@safetestset "Transformer Networks #7 " begin include("transformer_related/transformer_optimizer.jl") end + +# transformer-related tests +@safetestset "Test setup of MultiHeadAttention layer Stiefel weights " begin include("transformer_related/multi_head_attention_stiefel_setup.jl") end +@safetestset "Test geodesic and Cayley retr for the MultiHeadAttention layer w/ St weights " begin include("transformer_related/multi_head_attention_stiefel_retraction.jl") end +@safetestset "Test the correct setup of the various optimizer caches for MultiHeadAttention " begin include("transformer_related/multi_head_attention_stiefel_optim_cache.jl") end +@safetestset "Check if the transformer can be applied to a tensor. " begin include("transformer_related/transformer_application.jl") end +@safetestset "Check if the gradient/pullback of MultiHeadAttention changes type in St case " begin include("transformer_related/transformer_gradient.jl") end +@safetestset "Check if the optimization_step! changes the parameters of the transformer " begin include("transformer_related/transformer_optimizer.jl") end + @safetestset "Attention layer #1 " begin include("attention_layer/attention_setup.jl") end @safetestset "(MultiHead)Attention " begin include("attention_layer/apply_multi_head_attention.jl") end @safetestset "Classification layer " begin include("layers/classification.jl") end diff --git a/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl b/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl index ca4209c39..c7614fc52 100644 --- a/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl +++ b/test/transformer_related/multi_head_attention_stiefel_optim_cache.jl @@ -2,19 +2,10 @@ using GeometricMachineLearning, Test import Lux, Random, LinearAlgebra -dim = 64 -n_heads = 8 -Dₕ = dim÷8 -tol = eps(Float32) - -model = Chain(MultiHeadAttention(dim, n_heads), MultiHeadAttention(dim, n_heads, Stiefel=true)) -ps = initialparameters(CPU(), Float32, model) - -o₁ = Optimizer(AdamOptimizer(), ps) -o₂ = Optimizer(MomentumOptimizer(), ps) -o₃ = Optimizer(GradientOptimizer(), ps) - -function check_adam_cache(C::AbstractCache) +@doc raw""" +This checks if the Adam cache was set up in the right way +""" +function check_adam_cache(C::AbstractCache{T}, tol= T(10) * eps(T)) where T @test typeof(C) <: AdamCache @test propertynames(C) == (:B₁, :B₂) @test typeof(C.B₁) <: StiefelLieAlgHorMatrix @@ -24,7 +15,10 @@ function check_adam_cache(C::AbstractCache) end check_adam_cache(B::NamedTuple) = apply_toNT(check_adam_cache, B) -function check_momentum_cache(C::AbstractCache) +@doc raw""" +This checks if the momentum cache was set up in the right way +""" +function check_momentum_cache(C::AbstractCache{T}, tol= T(10) * eps(T)) where T @test typeof(C) <: MomentumCache @test propertynames(C) == (:B,) @test typeof(C.B) <: StiefelLieAlgHorMatrix @@ -32,12 +26,33 @@ function check_momentum_cache(C::AbstractCache) end check_momentum_cache(B::NamedTuple) = apply_toNT(check_momentum_cache, B) -function check_gradient_cache(C::AbstractCache) +@doc raw""" +This checks if the gradient cache was set up in the right way +""" +function check_gradient_cache(C::AbstractCache{T}) where T @test typeof(C) <: GradientCache @test propertynames(C) == () end check_gradient_cache(B::NamedTuple) = apply_toNT(check_gradient_cache, B) -check_adam_cache(o₁.cache[2]) -check_momentum_cache(o₂.cache[2]) -check_gradient_cache(o₃.cache[2]) +@doc raw""" +This checks if all the caches are set up in the right way for the `MultiHeadAttention` layer with Stiefel weights. + +TODO: +- [ ] `BFGSOptimizer` !! +""" +function test_cache_setups_for_optimizer_for_multihead_attention_layer(T::Type, dim::Int, n_heads::Int) + @assert dim % n_heads == 0 + model = MultiHeadAttention(dim, n_heads, Stiefel=true) + ps = initialparameters(CPU(), T, model) + + o₁ = Optimizer(AdamOptimizer(), ps) + o₂ = Optimizer(MomentumOptimizer(), ps) + o₃ = Optimizer(GradientOptimizer(), ps) + + check_adam_cache(o₁.cache) + check_momentum_cache(o₂.cache) + check_gradient_cache(o₃.cache) +end + +test_cache_setups_for_optimizer_for_multihead_attention_layer(Float32, 64, 8) \ No newline at end of file diff --git a/test/transformer_related/multi_head_attention_stiefel_retraction.jl b/test/transformer_related/multi_head_attention_stiefel_retraction.jl index 842069da5..54f6214e8 100644 --- a/test/transformer_related/multi_head_attention_stiefel_retraction.jl +++ b/test/transformer_related/multi_head_attention_stiefel_retraction.jl @@ -1,7 +1,3 @@ -""" -This is a test for that checks if the retractions (geodesic and Cayley for now) map from StiefelLieAlgHorMatrix to StiefelManifold when used with MultiHeadAttention. -""" - import Random, Test, Lux, LinearAlgebra, KernelAbstractions using GeometricMachineLearning, Test @@ -9,37 +5,40 @@ using GeometricMachineLearning: geodesic using GeometricMachineLearning: cayley using GeometricMachineLearning: init_optimizer_cache -dim = 64 -n_heads = 8 -Dₕ = dim÷8 -tol = eps(Float32) -T = Float32 -backend = KernelAbstractions.CPU() - -model = MultiHeadAttention(dim, n_heads, Stiefel=true) - -ps = initialparameters(backend, T, model) - -cache = init_optimizer_cache(MomentumOptimizer(), ps) - -E = StiefelProjection(dim, Dₕ, T) -function check_retraction_geodesic(A::AbstractMatrix) +@doc raw""" +This function computes the geodesic retraction of an element of `StiefelLieAlgHorMatrix` and then checks if the resulting element is `StiefelProjection`. +""" +function check_retraction_geodesic(A::AbstractMatrix{T}, tol=eps(T)) where T A_retracted = geodesic(A) @test typeof(A_retracted) <: StiefelManifold - @test LinearAlgebra.norm(A_retracted - E) < tol + @test LinearAlgebra.norm(A_retracted - StiefelProjection(A_retracted)) < tol end check_retraction_geodesic(cache::NamedTuple) = apply_toNT(check_retraction_geodesic, cache) check_retraction_geodesic(B::MomentumCache) = check_retraction_geodesic(B.B) -check_retraction_geodesic(cache) - -E = StiefelProjection(dim, Dₕ) -function check_retraction_cayley(A::AbstractMatrix) +@doc raw""" +This function computes the cayley retraction of an element of `StiefelLieAlgHorMatrix` and then checks if the resulting element is `StiefelProjection`. +""" +function check_retraction_cayley(A::AbstractMatrix{T}, tol=eps(T)) where T A_retracted = cayley(A) @test typeof(A_retracted) <: StiefelManifold - @test LinearAlgebra.norm(A_retracted - E) < tol + @test LinearAlgebra.norm(A_retracted - StiefelProjection(A_retracted)) < tol end check_retraction_cayley(cache::NamedTuple) = apply_toNT(check_retraction_cayley, cache) check_retraction_cayley(B::MomentumCache) = check_retraction_cayley(B.B) -check_retraction_cayley(cache) +@doc raw""" +This is a test for that checks if the retractions (geodesic and Cayley for now) map from `StiefelLieAlgHorMatrix` to `StiefelManifold` when used with `MultiHeadAttention`. +""" +function test_multi_head_attention_retraction(T::Type, dim, n_heads, tol=eps(T), backend=KernelAbstractions.CPU()) + model = MultiHeadAttention(dim, n_heads, Stiefel=true) + + ps = initialparameters(backend, T, model) + cache = init_optimizer_cache(MomentumOptimizer(), ps) + + check_retraction_geodesic(cache) + + check_retraction_cayley(cache) +end + +test_multi_head_attention_retraction(Float32, 64, 8) \ No newline at end of file diff --git a/test/transformer_related/multi_head_attention_stiefel_setup.jl b/test/transformer_related/multi_head_attention_stiefel_setup.jl index 3b8cc6cbf..742839331 100644 --- a/test/transformer_related/multi_head_attention_stiefel_setup.jl +++ b/test/transformer_related/multi_head_attention_stiefel_setup.jl @@ -3,25 +3,36 @@ import Random, Test, Lux, LinearAlgebra, KernelAbstractions using GeometricMachineLearning, Test using GeometricMachineLearning: init_optimizer_cache -T = Float32 -model = MultiHeadAttention(64, 8, Stiefel=true) -ps = initialparameters(KernelAbstractions.CPU(), T, model) -tol = 10*eps(T) - -function check_setup(A::AbstractMatrix) +@doc raw""" +This checks for an arbitrary matrix ``A\in\mathbb{R}^{N\times{}n}`` if ``A\in{}St(n,N)``. +""" +function check_setup(A::AbstractMatrix{T}, tol=T(10)*eps(T)) where T @test typeof(A) <: StiefelManifold @test check(A) < tol end check_setup(ps::NamedTuple) = apply_toNT(check_setup, ps) -check_setup(ps) -######## check if the gradients are set up the correct way -function check_grad_setup(B::AbstractMatrix) +@doc raw""" +This checks for an arbitrary matrix ``B\in\mathbb{R}^{N\times{}N}`` if ``B\in\mathfrak{g}^\mathrm{hor}``. +""" +function check_grad_setup(B::AbstractMatrix{T}, tol=T(10)*eps(T)) where T @test typeof(B) <: StiefelLieAlgHorMatrix @test LinearAlgebra.norm(B) < tol end check_grad_setup(gx::NamedTuple) = apply_toNT(check_grad_setup, gx) check_grad_setup(B::MomentumCache) = check_grad_setup(B.B) -gx = init_optimizer_cache(MomentumOptimizer(), ps) -check_grad_setup(gx) +@doc raw""" +Check if `initialparameters` and `init_optimizer_cache` do the right thing for `MultiHeadAttentionLayer`. +""" +function check_multi_head_attention_stiefel_setup(T::Type, N::Int, n::Int) + model = MultiHeadAttention(N, n, Stiefel=true) + ps = initialparameters(KernelAbstractions.CPU(), T, model) + + check_setup(ps) + + gx = init_optimizer_cache(MomentumOptimizer(), ps) + check_grad_setup(gx) +end + +check_multi_head_attention_stiefel_setup(Float32, 64, 8) \ No newline at end of file diff --git a/test/transformer_related/transformer_application.jl b/test/transformer_related/transformer_application.jl index 7fc159a8b..6729c889e 100644 --- a/test/transformer_related/transformer_application.jl +++ b/test/transformer_related/transformer_application.jl @@ -1,5 +1,8 @@ using Test, KernelAbstractions, GeometricMachineLearning +@doc raw""" +This tests if the size of the input array is kept constant when fed into the transformer (for a matrix and a tensor) +""" function transformer_application_test(T, dim, n_heads, L, seq_length=8, batch_size=10) model₁ = Chain(Transformer(dim, n_heads, L, Stiefel=false), ResNet(dim)) model₂ = Chain(Transformer(dim, n_heads, L, Stiefel=true), ResNet(dim)) @@ -8,11 +11,11 @@ function transformer_application_test(T, dim, n_heads, L, seq_length=8, batch_si ps₂ = initialparameters(KernelAbstractions.CPU(), T, model₂) input₁ = rand(T, dim, seq_length, batch_size) - input₂ = rand(T, dim, seq_length, batch_size) - + input₂ = rand(T, dim, seq_length) + @test size(model₁(input₁, ps₁)) == size(input₁) - @test size(model₁(input₂, ps₁)) == size(input₂) @test size(model₂(input₁, ps₂)) == size(input₁) + @test size(model₁(input₂, ps₁)) == size(input₂) @test size(model₂(input₂, ps₂)) == size(input₂) end diff --git a/test/transformer_related/transformer_gradient.jl b/test/transformer_related/transformer_gradient.jl index e049933a6..2a53673fc 100644 --- a/test/transformer_related/transformer_gradient.jl +++ b/test/transformer_related/transformer_gradient.jl @@ -1,5 +1,8 @@ using Test, KernelAbstractions, GeometricMachineLearning, Zygote, LinearAlgebra +@doc raw""" +This checks if the gradients of the transformer change the type in case of the Stiefel manifold, and checks if they stay the same in the case of regular weights. +""" function transformer_gradient_test(T, dim, n_heads, L, seq_length=8, batch_size=10) model₁ = Chain(Transformer(dim, n_heads, L, Stiefel=false), ResNet(dim)) model₂ = Chain(Transformer(dim, n_heads, L, Stiefel=true), ResNet(dim)) diff --git a/test/transformer_related/transformer_optimizer.jl b/test/transformer_related/transformer_optimizer.jl index aa3968c62..e6421bdb7 100644 --- a/test/transformer_related/transformer_optimizer.jl +++ b/test/transformer_related/transformer_optimizer.jl @@ -1,5 +1,8 @@ using Test, KernelAbstractions, GeometricMachineLearning, Zygote, LinearAlgebra +@doc raw""" +This function tests if the `GradientOptimzier`, `MomentumOptimizer`, `AdamOptimizer` and `BFGSOptimizer` act on the neural network weights via `optimization_step!`. +""" function transformer_gradient_test(T, dim, n_heads, L, seq_length=8, batch_size=10) model = Chain(Transformer(dim, n_heads, L, Stiefel=true), ResNet(dim)) model = Transformer(dim, n_heads, L, Stiefel=true) @@ -14,15 +17,19 @@ function transformer_gradient_test(T, dim, n_heads, L, seq_length=8, batch_size= o₁ = Optimizer(GradientOptimizer(), ps) o₂ = Optimizer(MomentumOptimizer(), ps) o₃ = Optimizer(AdamOptimizer(), ps) + o₄ = Optimizer(BFGSOptimizer(), ps) ps₁ = deepcopy(ps) ps₂ = deepcopy(ps) ps₃ = deepcopy(ps) + ps₄ = deepcopy(ps) optimization_step!(o₁, model, ps₁, dx) optimization_step!(o₂, model, ps₂, dx) optimization_step!(o₃, model, ps₃, dx) - @test typeof(ps₁) == typeof(ps₂) == typeof(ps₃) == typeof(ps) + optimization_step!(o₄, model, ps₄, dx) + @test typeof(ps₁) == typeof(ps₂) == typeof(ps₃) == typeof(ps₄) == typeof(ps) + @test ps₁[1].PQ.head_1 ≉ ps₂[1].PQ.head_1 ≉ ps₃[1].PQ.head_1 ≉ ps₄[1].PQ.head_1 ≉ ps[1].PQ.head_1 end transformer_gradient_test(Float32, 10, 5, 4) \ No newline at end of file diff --git a/test/transformer_related/transformer_setup.jl b/test/transformer_related/transformer_setup.jl index 42ac78995..1311ce26c 100644 --- a/test/transformer_related/transformer_setup.jl +++ b/test/transformer_related/transformer_setup.jl @@ -1,5 +1,8 @@ using Test, KernelAbstractions, GeometricMachineLearning +@doc raw""" +This function tests the setup of the transformer with Stiefel weights. +""" function transformer_setup_test(dim, n_heads, L, T) model = Transformer(dim, n_heads, L, Stiefel=true) ps = initialparameters(KernelAbstractions.CPU(), T, model) From cfc1b5fd52fd243f425fad8c68be57dea4cc3c94 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 16:36:39 +0100 Subject: [PATCH 67/96] Fixed docs and put type dependency for . --- src/data_loader/tensor_assign.jl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/data_loader/tensor_assign.jl b/src/data_loader/tensor_assign.jl index 4aac9bfc0..299e52b84 100644 --- a/src/data_loader/tensor_assign.jl +++ b/src/data_loader/tensor_assign.jl @@ -35,18 +35,22 @@ end end @doc raw""" -The function `assign_output_estimate` is closely related to the transformer. It takes the last prediction_window columns of the output and uses is for the final prediction. +The function `assign_output_estimate` is closely related to the transformer. It takes the last `prediction_window` columns of the output and uses them for the final prediction. i.e. ```math -\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ - \cdots & \cdots \\ - z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} \mapsto - \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ - \cdots & \cdots \\ - z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} +\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, +\begin{bmatrix} + z^{(1)}_1 & \cdots & z^{(T)}_1 \\ + \cdots & \cdots & \cdots \\ + z^{(1)}_n & \cdots & z^{(T})_n + \end{bmatrix} \mapsto + \begin{bmatrix} + z^{(T - \mathtt{pw})}_1 & \cdots & z^{(T)}_1 \\ + \cdots & \cdots & \cdots \\ + z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} ``` """ -function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window) where T +function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window::Int) where T sys_dim, seq_length, batch_size = size(full_output) backend = KernelAbstractions.get_backend(full_output) output_estimate = KernelAbstractions.allocate(backend, T, sys_dim, prediction_window, batch_size) From 9773e453a956b776a376920d16ac68d00b60f0c6 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 17:04:22 +0100 Subject: [PATCH 68/96] Fixed docs for rgrad. --- src/manifolds/stiefel_manifold.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/manifolds/stiefel_manifold.jl b/src/manifolds/stiefel_manifold.jl index ec1c3fb26..a98e22079 100644 --- a/src/manifolds/stiefel_manifold.jl +++ b/src/manifolds/stiefel_manifold.jl @@ -64,7 +64,7 @@ function Base.:*(Y::Adjoint{T, StiefelManifold{T, AT}}, B::AbstractMatrix) where end @doc raw""" -Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mahbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this). +Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mathbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this). The precise form of the mapping is: ```math \mathtt{rgrad}(Y, \nabla{}L) \mapsto \nabla{}L - Y(\nabla{}L)^TY From dbbe3fee8f57df651224554b069f9bfd67cf844f Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 17:12:59 +0100 Subject: [PATCH 69/96] Added additional constructor for the case when the type is the first input argument. --- src/arrays/stiefel_projection.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/arrays/stiefel_projection.jl b/src/arrays/stiefel_projection.jl index 82c04c2ac..236948a09 100644 --- a/src/arrays/stiefel_projection.jl +++ b/src/arrays/stiefel_projection.jl @@ -41,6 +41,8 @@ Outer constructor for `StiefelProjection`. This works with two integers as input """ StiefelProjection(N::Integer, n::Integer, T::Type=Float64) = StiefelProjection(CPU(), T, N, n) +StiefelProjection(T::Type, N::Integer, n::Integer) = StiefelProjection(N, n, T) + Base.size(E::StiefelProjection) = (E.N, E.n) Base.getindex(E::StiefelProjection, i, j) = getindex(E.A, i, j) Base.:+(E::StiefelProjection, A::AbstractMatrix) = E.A + A From 4e6f2640a3ad0f92b524fee1e434a30e0ae42953 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 08:52:39 +0100 Subject: [PATCH 70/96] Resolved merge conflict. --- test/arrays/array_tests.jl | 12 ++++++++++++ test/runtests.jl | 1 + 2 files changed, 13 insertions(+) diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl index c0da5eba9..abd21fc5f 100644 --- a/test/arrays/array_tests.jl +++ b/test/arrays/array_tests.jl @@ -76,6 +76,14 @@ function stiefel_lie_alg_add_sub_test(N, n) @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) end +<<<<<<< HEAD +======= +function stiefel_lie_alg_vectorization_test(N, n; T=Float32) + A = rand(StiefelLieAlgHorMatrix{T}, N, n) + @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A) +end + +>>>>>>> b05c66c (Removed the symplectic arrays.) # TODO: tests for ADAM functions # test everything for different n & N values @@ -96,4 +104,8 @@ for (N, n) ∈ zip(N_vec, n_vec) skew_mat_mul_test2(N) stiefel_proj_test(N,n) stiefel_lie_alg_add_sub_test(N,n) +<<<<<<< HEAD +======= + stiefel_lie_alg_vectorization_test(N, n) +>>>>>>> b05c66c (Removed the symplectic arrays.) end diff --git a/test/runtests.jl b/test/runtests.jl index 033b04f16..92afd85ef 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,7 @@ using SafeTestsets @safetestset "Check parameterlength " begin include("parameterlength/check_parameterlengths.jl") end @safetestset "Arrays #1 " begin include("arrays/array_tests.jl") end @safetestset "Sampling of arrays " begin include("arrays/random_generation_of_custom_arrays.jl") end +@safetestset "Test constructors for custom arrays " begin include("arrays/constructor_tests_for_custom_arrays.jl") end @safetestset "Manifolds (Grassmann): " begin include("manifolds/grassmann_manifold.jl") end @safetestset "Gradient Layer " begin include("layers/gradient_layer_tests.jl") end @safetestset "Test symplecticity of upscaling layer " begin include("layers/sympnet_layers_test.jl") end From 1f04af95a82d187b4ca90da28bccb67c917daf83 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 17:14:04 +0100 Subject: [PATCH 71/96] Added tests for the various constructors for custom arrays. --- .../constructor_tests_for_custom_arrays.jl | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 test/arrays/constructor_tests_for_custom_arrays.jl diff --git a/test/arrays/constructor_tests_for_custom_arrays.jl b/test/arrays/constructor_tests_for_custom_arrays.jl new file mode 100644 index 000000000..452f850f8 --- /dev/null +++ b/test/arrays/constructor_tests_for_custom_arrays.jl @@ -0,0 +1,37 @@ +using GeometricMachineLearning, Test +using LinearAlgebra: I + +@doc raw""" +This tests various constructor for custom arrays, e.g. if calling `SymmetricMatrix` on a matrix ``A`` does +```math +A \mapsto \frac{1}{2}(A + A^T). +``` +""" +function test_constructors_for_custom_arrays(n::Int, N::Int, T::Type) + A = rand(T, n, n) + B = rand(T, N, N) + + # SymmetricMatrix + @test Matrix{T}(SymmetricMatrix(A)) ≈ T(.5) * (A + A') + + # SkewSymMatrix + @test Matrix{T}(SkewSymMatrix(A)) ≈ T(.5) * (A - A') + + # StiefelLieAlgHorMatrix + B_shor = StiefelLieAlgHorMatrix(SkewSymMatrix(B), n) + B_shor2 = Matrix{T}(SkewSymMatrix(B)) + B_shor2[(n+1):N, (n+1):N] .= zero(T) + @test Matrix{T}(B_shor) ≈ B_shor2 + + # GrassmannLieAlgHorMatrix + B_ghor = GrassmannLieAlgHorMatrix(SkewSymMatrix(B), n) + B_ghor2 = copy(B_shor2) + B_ghor2[1:n, 1:n] .= zero(T) + @test Matrix{T}(B_ghor) ≈ B_ghor2 + + # StiefelProjection + E = StiefelProjection(T, N, n) + @test Matrix{T}(E) ≈ vcat(I(n), zeros(T, (N-n), n)) +end + +test_constructors_for_custom_arrays(5, 10, Float32) \ No newline at end of file From 7aff01654a3138289225742dcc252a8d01cbbef7 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 13:25:49 +0100 Subject: [PATCH 72/96] Added new routine for multiplying symmetric matrix from the right onto a matrix. --- src/arrays/symmetric.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl index 928b90ab5..3b81466a1 100644 --- a/src/arrays/symmetric.jl +++ b/src/arrays/symmetric.jl @@ -209,6 +209,8 @@ function Base.:*(A::SymmetricMatrix{T}, B::AbstractMatrix{T}) where T C end +Base.:*(B::AbstractMatrix{T}, A::SymmetricMatrix{T}) where T = (A * B')' + function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T backend = KernelAbstractions.get_backend(A.S) c = KernelAbstractions.allocate(backend, T, A.n) From 3321d4b90f40817f1c8107f5f4ee3ae107f853cb Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 09:01:48 +0100 Subject: [PATCH 73/96] Resolved merge conflict. --- .../addition_tests_for_custom_arrays.jl | 40 ++++++++++++++----- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl index 9413c5884..eb0ec6cd9 100644 --- a/test/arrays/addition_tests_for_custom_arrays.jl +++ b/test/arrays/addition_tests_for_custom_arrays.jl @@ -1,12 +1,34 @@ -#= -This tests addition for all custom arrays. Note that these tests will also have to be performed on GPU! -=# +using GeometricMachineLearning, Test -using LinearAlgebra -using Random -using Test -using GeometricMachineLearning +@doc raw""" +This function tests addition for various custom arrays, i.e. if \(A + B\) is performed in the correct way. +""" -function test_addition_for_symmetric_matrix(n::Int, T::Type) - A = rand(SymmetricMatrix{T}, n) +function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type) + A = rand(T, n, n) + B = rand(T, n, n) + + # SymmetricMatrix + AB_sym = SymmetricMatrix(A + B) + AB_sym2 = SymmetricMatrix(A) + SymmetricMatrix(B) + @test AB_sym ≈ AB_sym2 + @test typeof(AB_sym) <: SymmetricMatrix{T} + @test typeof(AB_sym2) <: SymmetricMatrix{T} + + # SkewSymMatrix + AB_skew = SkewSymMatrix(A + B) + AB_skew2 = SkewSymMatrix(A) + SkewSymMatrix(B) + @test A_skew ≈ A_skew2 + @test typeof(AB_skew) <: SkewSymMatrix{T} + @test typeof(AB_skew2) <: SkewSymMatrix{T} + + C = rand(T, N, N) + D = rand(T, N, N) + + # StiefelLieAlgHorMatrix + CD_slahm = StiefelLieAlgHorMatrix(A + B, n) + CD_slahm2 = StiefelLieAlgHorMatrix(A, n) + StiefelLieAlgHorMatrix(B, n) + @test CD_slahm ≈ CD_slahm2 + @test typeof(CD_slahm) <: StiefelLieAlgHorMatrix{T} + @test typeof(CD_slahm2) <: StiefelLieAlgHorMatrix{T} end \ No newline at end of file From 65ee5c0528a6e2dc3f9cb820e77ed3a28a7d8ac1 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 09:03:16 +0100 Subject: [PATCH 74/96] Resolved conflict (removed triangular matrices). --- src/GeometricMachineLearning.jl | 1 + src/arrays/symmetric.jl | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl index b7b9e9f92..2b6813f35 100644 --- a/src/GeometricMachineLearning.jl +++ b/src/GeometricMachineLearning.jl @@ -88,6 +88,7 @@ module GeometricMachineLearning include("arrays/abstract_lie_algebra_horizontal.jl") include("arrays/stiefel_lie_algebra_horizontal.jl") include("arrays/grassmann_lie_algebra_horizontal.jl") + include("arrays/symmetric.jl") export SymmetricMatrix, SymplecticPotential, SkewSymMatrix export StiefelLieAlgHorMatrix diff --git a/src/arrays/symmetric.jl b/src/arrays/symmetric.jl index 3b81466a1..761e8dd72 100644 --- a/src/arrays/symmetric.jl +++ b/src/arrays/symmetric.jl @@ -211,6 +211,10 @@ end Base.:*(B::AbstractMatrix{T}, A::SymmetricMatrix{T}) where T = (A * B')' +function Base.:*(A::SymmetricMatrix{T}, B::SymmetricMatrix{T}) where T + A * (B * one(B)) +end + function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T backend = KernelAbstractions.get_backend(A.S) c = KernelAbstractions.allocate(backend, T, A.n) @@ -218,6 +222,14 @@ function Base.:*(A::SymmetricMatrix{T}, b::AbstractVector{T}) where T c end +function Base.one(A::SymmetricMatrix{T}) where T + backend = KernelAbstractions.get_backend(A.S) + unit_matrix = KernelAbstractions.zeros(backend, T, A.n, A.n) + write_ones! = write_ones_kernel!(backend) + write_ones!(unit_matrix, ndrange=A.n) + unit_matrix +end + # define routines for generalizing ChainRulesCore to SymmetricMatrix ChainRulesCore.ProjectTo(A::SymmetricMatrix) = ProjectTo{SymmetricMatrix}(; symmetric=ProjectTo(A.S)) (project::ProjectTo{SymmetricMatrix})(dA::AbstractMatrix) = SymmetricMatrix(project.symmetric(map_to_S(dA)), size(dA, 2)) From 0f958d6d97455e5664fd67fc660dc69cf17369eb Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 14:20:49 +0100 Subject: [PATCH 75/96] Slightly improved readability. --- src/arrays/skew_symmetric.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arrays/skew_symmetric.jl b/src/arrays/skew_symmetric.jl index e1ab85fe2..3c14b9e45 100644 --- a/src/arrays/skew_symmetric.jl +++ b/src/arrays/skew_symmetric.jl @@ -187,7 +187,7 @@ end # the first matrix is multiplied onto A2 in order for it to not be SkewSymMatrix! function Base.:*(A1::SkewSymMatrix{T}, A2::SkewSymMatrix{T}) where T - A1*(one(A2)*A2) + A1 * (one(A2) * A2) end @doc raw""" From 250c7013b76cd987fe7c489a1a92ffb2f50abade Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 09:04:15 +0100 Subject: [PATCH 76/96] Resolved conflict. --- test/arrays/addition_tests_for_custom_arrays.jl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl index eb0ec6cd9..669ccc0ef 100644 --- a/test/arrays/addition_tests_for_custom_arrays.jl +++ b/test/arrays/addition_tests_for_custom_arrays.jl @@ -3,7 +3,6 @@ using GeometricMachineLearning, Test @doc raw""" This function tests addition for various custom arrays, i.e. if \(A + B\) is performed in the correct way. """ - function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type) A = rand(T, n, n) B = rand(T, n, n) @@ -26,9 +25,15 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type) D = rand(T, N, N) # StiefelLieAlgHorMatrix - CD_slahm = StiefelLieAlgHorMatrix(A + B, n) - CD_slahm2 = StiefelLieAlgHorMatrix(A, n) + StiefelLieAlgHorMatrix(B, n) + CD_slahm = StiefelLieAlgHorMatrix(C + D, n) + CD_slahm2 = StiefelLieAlgHorMatrix(C, n) + StiefelLieAlgHorMatrix(D, n) @test CD_slahm ≈ CD_slahm2 @test typeof(CD_slahm) <: StiefelLieAlgHorMatrix{T} @test typeof(CD_slahm2) <: StiefelLieAlgHorMatrix{T} + + CD_glahm = GrassmannLieAlgHorMatrix(C + D, n) + CD_glahm2 = GrassmannLieAlgHorMatrix(C, n) + GrassmannLieAlgHorMatrix(D, n) + @test CD_glahm ≈ CD_glahm2 + @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T} + @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T} end \ No newline at end of file From b320aaf664062cfc1c11195ecda2cb9728f2f9cc Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 09:05:19 +0100 Subject: [PATCH 77/96] Resolved conflict (accepted both changes). --- test/runtests.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 033b04f16..e03f65f0f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,9 @@ using SafeTestsets @safetestset "Check parameterlength " begin include("parameterlength/check_parameterlengths.jl") end @safetestset "Arrays #1 " begin include("arrays/array_tests.jl") end @safetestset "Sampling of arrays " begin include("arrays/random_generation_of_custom_arrays.jl") end +@safetestset "Addition tests for custom arrays " begin include("arrays/addition_tests_for_custom_arrays.jl") end +@safetestset "Scalar multiplication tests for custom arrays " begin include("arrays/scalar_multiplication_for_custom_arrays.jl") end +@safetestset "Matrix multiplication tests for custom arrays " begin include("arrays/matrix_multiplication_for_custom_arrays.jl") end @safetestset "Manifolds (Grassmann): " begin include("manifolds/grassmann_manifold.jl") end @safetestset "Gradient Layer " begin include("layers/gradient_layer_tests.jl") end @safetestset "Test symplecticity of upscaling layer " begin include("layers/sympnet_layers_test.jl") end From 5ae3067f9e9c579f091746f0a09541f7100e2338 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 14:52:14 +0100 Subject: [PATCH 78/96] Added tests for addition, scalar multiplication and matrix multiplication for custom arrays. --- .../addition_tests_for_custom_arrays.jl | 4 +- ...matrix_multiplication_for_custom_arrays.jl | 21 ++++++++++ ...scalar_multiplication_for_custom_arrays.jl | 40 +++++++++++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 test/arrays/matrix_multiplication_for_custom_arrays.jl create mode 100644 test/arrays/scalar_multiplication_for_custom_arrays.jl diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl index 669ccc0ef..5fd2bbcfc 100644 --- a/test/arrays/addition_tests_for_custom_arrays.jl +++ b/test/arrays/addition_tests_for_custom_arrays.jl @@ -36,4 +36,6 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type) @test CD_glahm ≈ CD_glahm2 @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T} @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T} -end \ No newline at end of file +end + +addition_tests_for_custom_arrays(5, 10, Float32) \ No newline at end of file diff --git a/test/arrays/matrix_multiplication_for_custom_arrays.jl b/test/arrays/matrix_multiplication_for_custom_arrays.jl new file mode 100644 index 000000000..db051ec3e --- /dev/null +++ b/test/arrays/matrix_multiplication_for_custom_arrays.jl @@ -0,0 +1,21 @@ +using GeometricMachineLearning, Test + +@doc raw""" +This function tests matrix multiplication for various custom arrays, i.e. if \((A,\alpha) \mapsto \alpha{}A\) is performed in the correct way. +""" +function matrix_multiplication_tests_for_custom_arrays(n::Int, N::Int, T::Type) + A = rand(T, n, n) + B = rand(T, n, N) + + # SymmetricMatrix + A_sym = SymmetricMatrix(A) + @test A_sym * B ≈ Matrix{T}(A_sym) * B + @test B' * A_sym ≈ B' * Matrix{T}(A_sym) + + # SkewSymMatrix + A_skew = SkewSymMatrix(A) + @test A_skew * B ≈ Matrix{T}(A_skew) * B + @test B' * A_skew ≈ B' * Matrix{T}(A_skew) +end + +matrix_multiplication_tests_for_custom_arrays(5, 10, Float32) \ No newline at end of file diff --git a/test/arrays/scalar_multiplication_for_custom_arrays.jl b/test/arrays/scalar_multiplication_for_custom_arrays.jl new file mode 100644 index 000000000..c6214a3e1 --- /dev/null +++ b/test/arrays/scalar_multiplication_for_custom_arrays.jl @@ -0,0 +1,40 @@ +using GeometricMachineLearning, Test + +@doc raw""" +This function tests scalar multiplication for various custom arrays, i.e. if \((A,\alpha) \mapsto \alpha{}A\) is performed in the correct way. +""" +function scalar_multiplication_for_custom_arrays(n::Int, N::Int, T::Type) + A = rand(T, n, n) + α = rand(T) + + # SymmetricMatrix + Aα_sym = SymmetricMatrix(α * A) + Aα_sym2 = α * SymmetricMatrix(A) + @test Aα_sym ≈ Aα_sym2 + @test typeof(Aα_sym) <: SymmetricMatrix{T} + @test typeof(Aα_sym2) <: SymmetricMatrix{T} + + # SkewSymMatrix + Aα_skew = SkewSymMatrix(α * A) + Aα_skew2 = α * SkewSymMatrix(A) + @test Aα_skew ≈ Aα_skew2 + @test typeof(Aα_skew) <: SkewSymMatrix{T} + @test typeof(Aα_skew2) <: SkewSymMatrix{T} + + C = rand(T, N, N) + + # StiefelLieAlgHorMatrix + Cα_slahm = StiefelLieAlgHorMatrix(α * C, n) + Cα_slahm2 = α * StiefelLieAlgHorMatrix(C, n) + @test Cα_slahm ≈ Cα_slahm2 + @test typeof(Cα_slahm) <: StiefelLieAlgHorMatrix{T} + @test typeof(Cα_slahm2) <: StiefelLieAlgHorMatrix{T} + + Cα_glahm = GrassmannLieAlgHorMatrix(α * C, n) + Cα_glahm2 = α * GrassmannLieAlgHorMatrix(C, n) + @test Cα_glahm ≈ Cα_glahm2 + @test tyepof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T} + @test typeof(Cα_glahm2) <: GrassmannLieAlgHorMatrix{T} +end + +scalar_multiplication_for_custom_arrays(5, 10, Float32) \ No newline at end of file From d4bbb6c80de27188f62c102ed66373d16a28f6d1 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 09:06:50 +0100 Subject: [PATCH 79/96] Resolved merge conflict. --- src/GeometricMachineLearning.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl index 2b6813f35..b7b9e9f92 100644 --- a/src/GeometricMachineLearning.jl +++ b/src/GeometricMachineLearning.jl @@ -88,7 +88,6 @@ module GeometricMachineLearning include("arrays/abstract_lie_algebra_horizontal.jl") include("arrays/stiefel_lie_algebra_horizontal.jl") include("arrays/grassmann_lie_algebra_horizontal.jl") - include("arrays/symmetric.jl") export SymmetricMatrix, SymplecticPotential, SkewSymMatrix export StiefelLieAlgHorMatrix From f25e3faf6a18cb9dbfbbb1401db6d09996e40aa8 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 09:07:54 +0100 Subject: [PATCH 80/96] Fixed (apparent) conflict. --- test/arrays/array_tests.jl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl index c0da5eba9..292eae52f 100644 --- a/test/arrays/array_tests.jl +++ b/test/arrays/array_tests.jl @@ -54,6 +54,10 @@ function skew_mat_mul_test2(n, T=Float64) @test isapprox(AS1, AS2) end +<<<<<<< HEAD +======= + +>>>>>>> c6a350e (Fixed typo.) # test Stiefel manifold projection test function stiefel_proj_test(N,n) In = I(n) @@ -61,6 +65,10 @@ function stiefel_proj_test(N,n) @test all(abs.((E'*E) .- In) .< 1e-10) end +<<<<<<< HEAD +======= + +>>>>>>> c6a350e (Fixed typo.) function stiefel_lie_alg_add_sub_test(N, n) E = StiefelProjection(N, n) projection(W::SkewSymMatrix) = W - (I - E*E')*W*(I - E*E') @@ -76,6 +84,14 @@ function stiefel_lie_alg_add_sub_test(N, n) @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) end +<<<<<<< HEAD +======= +function stiefel_lie_alg_vectorization_test(N, n; T=Float32) + A = rand(StiefelLieAlgHorMatrix{T}, N, n) + @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A) +end + +>>>>>>> c6a350e (Fixed typo.) # TODO: tests for ADAM functions # test everything for different n & N values @@ -96,4 +112,8 @@ for (N, n) ∈ zip(N_vec, n_vec) skew_mat_mul_test2(N) stiefel_proj_test(N,n) stiefel_lie_alg_add_sub_test(N,n) +<<<<<<< HEAD +======= + stiefel_lie_alg_vectorization_test(N, n) +>>>>>>> c6a350e (Fixed typo.) end From e0cc2d2aa03e8ff7e50a1c670b8ab26a681539a2 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 15:05:50 +0100 Subject: [PATCH 81/96] Fixed typo. --- test/arrays/addition_tests_for_custom_arrays.jl | 4 ++-- test/arrays/scalar_multiplication_for_custom_arrays.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/arrays/addition_tests_for_custom_arrays.jl b/test/arrays/addition_tests_for_custom_arrays.jl index 5fd2bbcfc..44fc878c1 100644 --- a/test/arrays/addition_tests_for_custom_arrays.jl +++ b/test/arrays/addition_tests_for_custom_arrays.jl @@ -17,7 +17,7 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type) # SkewSymMatrix AB_skew = SkewSymMatrix(A + B) AB_skew2 = SkewSymMatrix(A) + SkewSymMatrix(B) - @test A_skew ≈ A_skew2 + @test AB_skew ≈ AB_skew2 @test typeof(AB_skew) <: SkewSymMatrix{T} @test typeof(AB_skew2) <: SkewSymMatrix{T} @@ -34,7 +34,7 @@ function addition_tests_for_custom_arrays(n::Int, N::Int, T::Type) CD_glahm = GrassmannLieAlgHorMatrix(C + D, n) CD_glahm2 = GrassmannLieAlgHorMatrix(C, n) + GrassmannLieAlgHorMatrix(D, n) @test CD_glahm ≈ CD_glahm2 - @test tyepof(CD_glahm) <: GrassmannLieAlgHorMatrix{T} + @test typeof(CD_glahm) <: GrassmannLieAlgHorMatrix{T} @test typeof(CD_glahm2) <: GrassmannLieAlgHorMatrix{T} end diff --git a/test/arrays/scalar_multiplication_for_custom_arrays.jl b/test/arrays/scalar_multiplication_for_custom_arrays.jl index c6214a3e1..3a5e56205 100644 --- a/test/arrays/scalar_multiplication_for_custom_arrays.jl +++ b/test/arrays/scalar_multiplication_for_custom_arrays.jl @@ -33,7 +33,7 @@ function scalar_multiplication_for_custom_arrays(n::Int, N::Int, T::Type) Cα_glahm = GrassmannLieAlgHorMatrix(α * C, n) Cα_glahm2 = α * GrassmannLieAlgHorMatrix(C, n) @test Cα_glahm ≈ Cα_glahm2 - @test tyepof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T} + @test typeof(Cα_glahm) <: GrassmannLieAlgHorMatrix{T} @test typeof(Cα_glahm2) <: GrassmannLieAlgHorMatrix{T} end From 1e659d7c5f6e423824a4cb216ce507f39aefb1e6 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 09:13:53 +0100 Subject: [PATCH 82/96] Fixed comments from previous merge conflict. --- test/arrays/array_tests.jl | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl index dadc11665..8621ddda7 100644 --- a/test/arrays/array_tests.jl +++ b/test/arrays/array_tests.jl @@ -53,14 +53,6 @@ function skew_mat_mul_test2(n, T=Float64) AS2 = A*Matrix{T}(S) @test isapprox(AS1, AS2) end - -<<<<<<< HEAD -<<<<<<< HEAD -======= -======= ->>>>>>> c6a350ed471420a4b4583c45978658d85b1f253e - ->>>>>>> c6a350e (Fixed typo.) # test Stiefel manifold projection test function stiefel_proj_test(N,n) In = I(n) @@ -68,13 +60,6 @@ function stiefel_proj_test(N,n) @test all(abs.((E'*E) .- In) .< 1e-10) end -<<<<<<< HEAD -<<<<<<< HEAD -======= -======= ->>>>>>> c6a350ed471420a4b4583c45978658d85b1f253e - ->>>>>>> c6a350e (Fixed typo.) function stiefel_lie_alg_add_sub_test(N, n) E = StiefelProjection(N, n) projection(W::SkewSymMatrix) = W - (I - E*E')*W*(I - E*E') @@ -90,17 +75,11 @@ function stiefel_lie_alg_add_sub_test(N, n) @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) end -<<<<<<< HEAD -<<<<<<< HEAD -======= -======= ->>>>>>> c6a350ed471420a4b4583c45978658d85b1f253e function stiefel_lie_alg_vectorization_test(N, n; T=Float32) A = rand(StiefelLieAlgHorMatrix{T}, N, n) @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A) end ->>>>>>> c6a350e (Fixed typo.) # TODO: tests for ADAM functions # test everything for different n & N values @@ -121,11 +100,5 @@ for (N, n) ∈ zip(N_vec, n_vec) skew_mat_mul_test2(N) stiefel_proj_test(N,n) stiefel_lie_alg_add_sub_test(N,n) -<<<<<<< HEAD -<<<<<<< HEAD -======= -======= ->>>>>>> c6a350ed471420a4b4583c45978658d85b1f253e stiefel_lie_alg_vectorization_test(N, n) ->>>>>>> c6a350e (Fixed typo.) end From 57108602a71ef405c802309fbba126aa9ed45556 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 16:36:39 +0100 Subject: [PATCH 83/96] Fixed docs and put type dependency for . --- src/data_loader/tensor_assign.jl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/data_loader/tensor_assign.jl b/src/data_loader/tensor_assign.jl index 4aac9bfc0..299e52b84 100644 --- a/src/data_loader/tensor_assign.jl +++ b/src/data_loader/tensor_assign.jl @@ -35,18 +35,22 @@ end end @doc raw""" -The function `assign_output_estimate` is closely related to the transformer. It takes the last prediction_window columns of the output and uses is for the final prediction. +The function `assign_output_estimate` is closely related to the transformer. It takes the last `prediction_window` columns of the output and uses them for the final prediction. i.e. ```math -\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ - \cdots & \cdots \\ - z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} \mapsto - \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ - \cdots & \cdots \\ - z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} +\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, +\begin{bmatrix} + z^{(1)}_1 & \cdots & z^{(T)}_1 \\ + \cdots & \cdots & \cdots \\ + z^{(1)}_n & \cdots & z^{(T})_n + \end{bmatrix} \mapsto + \begin{bmatrix} + z^{(T - \mathtt{pw})}_1 & \cdots & z^{(T)}_1 \\ + \cdots & \cdots & \cdots \\ + z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} ``` """ -function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window) where T +function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window::Int) where T sys_dim, seq_length, batch_size = size(full_output) backend = KernelAbstractions.get_backend(full_output) output_estimate = KernelAbstractions.allocate(backend, T, sys_dim, prediction_window, batch_size) From ce78b93d03c8365f165598090da6588b474b618e Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 17:04:22 +0100 Subject: [PATCH 84/96] Fixed docs for rgrad. --- src/manifolds/stiefel_manifold.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/manifolds/stiefel_manifold.jl b/src/manifolds/stiefel_manifold.jl index ec1c3fb26..a98e22079 100644 --- a/src/manifolds/stiefel_manifold.jl +++ b/src/manifolds/stiefel_manifold.jl @@ -64,7 +64,7 @@ function Base.:*(Y::Adjoint{T, StiefelManifold{T, AT}}, B::AbstractMatrix) where end @doc raw""" -Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mahbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this). +Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mathbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this). The precise form of the mapping is: ```math \mathtt{rgrad}(Y, \nabla{}L) \mapsto \nabla{}L - Y(\nabla{}L)^TY From f6dda17c48cbe298949abfaae231af52fa54daa0 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 17:12:59 +0100 Subject: [PATCH 85/96] Added additional constructor for the case when the type is the first input argument. --- src/arrays/stiefel_projection.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/arrays/stiefel_projection.jl b/src/arrays/stiefel_projection.jl index 82c04c2ac..236948a09 100644 --- a/src/arrays/stiefel_projection.jl +++ b/src/arrays/stiefel_projection.jl @@ -41,6 +41,8 @@ Outer constructor for `StiefelProjection`. This works with two integers as input """ StiefelProjection(N::Integer, n::Integer, T::Type=Float64) = StiefelProjection(CPU(), T, N, n) +StiefelProjection(T::Type, N::Integer, n::Integer) = StiefelProjection(N, n, T) + Base.size(E::StiefelProjection) = (E.N, E.n) Base.getindex(E::StiefelProjection, i, j) = getindex(E.A, i, j) Base.:+(E::StiefelProjection, A::AbstractMatrix) = E.A + A From 99e83e6c0b206be4ab64e16051fa1872f41eb735 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 09:17:25 +0100 Subject: [PATCH 86/96] Resolved conflict (forgot to resolve another one before). --- test/arrays/array_tests.jl | 1 + test/runtests.jl | 1 + 2 files changed, 2 insertions(+) diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl index c2badb77b..1c372344e 100644 --- a/test/arrays/array_tests.jl +++ b/test/arrays/array_tests.jl @@ -76,6 +76,7 @@ function stiefel_lie_alg_add_sub_test(N, n) @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) end + function stiefel_lie_alg_vectorization_test(N, n; T=Float32) A = rand(StiefelLieAlgHorMatrix{T}, N, n) @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A) diff --git a/test/runtests.jl b/test/runtests.jl index 1b6cb1b4d..1db601285 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,7 @@ using SafeTestsets @safetestset "Check parameterlength " begin include("parameterlength/check_parameterlengths.jl") end @safetestset "Arrays #1 " begin include("arrays/array_tests.jl") end @safetestset "Sampling of arrays " begin include("arrays/random_generation_of_custom_arrays.jl") end +@safetestset "Test constructors for custom arrays " begin include("arrays/constructor_tests_for_custom_arrays.jl") end @safetestset "Manifolds (Grassmann): " begin include("manifolds/grassmann_manifold.jl") end @safetestset "Gradient Layer " begin include("layers/gradient_layer_tests.jl") end @safetestset "Test symplecticity of upscaling layer " begin include("layers/sympnet_layers_test.jl") end From 02b5b31170b651eeb63700d6ebec91284ce8e781 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 17:14:04 +0100 Subject: [PATCH 87/96] Added tests for the various constructors for custom arrays. --- .../constructor_tests_for_custom_arrays.jl | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 test/arrays/constructor_tests_for_custom_arrays.jl diff --git a/test/arrays/constructor_tests_for_custom_arrays.jl b/test/arrays/constructor_tests_for_custom_arrays.jl new file mode 100644 index 000000000..452f850f8 --- /dev/null +++ b/test/arrays/constructor_tests_for_custom_arrays.jl @@ -0,0 +1,37 @@ +using GeometricMachineLearning, Test +using LinearAlgebra: I + +@doc raw""" +This tests various constructor for custom arrays, e.g. if calling `SymmetricMatrix` on a matrix ``A`` does +```math +A \mapsto \frac{1}{2}(A + A^T). +``` +""" +function test_constructors_for_custom_arrays(n::Int, N::Int, T::Type) + A = rand(T, n, n) + B = rand(T, N, N) + + # SymmetricMatrix + @test Matrix{T}(SymmetricMatrix(A)) ≈ T(.5) * (A + A') + + # SkewSymMatrix + @test Matrix{T}(SkewSymMatrix(A)) ≈ T(.5) * (A - A') + + # StiefelLieAlgHorMatrix + B_shor = StiefelLieAlgHorMatrix(SkewSymMatrix(B), n) + B_shor2 = Matrix{T}(SkewSymMatrix(B)) + B_shor2[(n+1):N, (n+1):N] .= zero(T) + @test Matrix{T}(B_shor) ≈ B_shor2 + + # GrassmannLieAlgHorMatrix + B_ghor = GrassmannLieAlgHorMatrix(SkewSymMatrix(B), n) + B_ghor2 = copy(B_shor2) + B_ghor2[1:n, 1:n] .= zero(T) + @test Matrix{T}(B_ghor) ≈ B_ghor2 + + # StiefelProjection + E = StiefelProjection(T, N, n) + @test Matrix{T}(E) ≈ vcat(I(n), zeros(T, (N-n), n)) +end + +test_constructors_for_custom_arrays(5, 10, Float32) \ No newline at end of file From 36bb2a9a39b0ec8cf560084403e734ff340a610e Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 09:18:53 +0100 Subject: [PATCH 88/96] Removed the symplectic arrays. --- test/arrays/array_tests.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl index 1c372344e..c2badb77b 100644 --- a/test/arrays/array_tests.jl +++ b/test/arrays/array_tests.jl @@ -76,7 +76,6 @@ function stiefel_lie_alg_add_sub_test(N, n) @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) end - function stiefel_lie_alg_vectorization_test(N, n; T=Float32) A = rand(StiefelLieAlgHorMatrix{T}, N, n) @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A) From e6b2d34990d15a214bee225c065fb23ed3c75a62 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 16:36:39 +0100 Subject: [PATCH 89/96] Fixed docs and put type dependency for . --- src/data_loader/tensor_assign.jl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/data_loader/tensor_assign.jl b/src/data_loader/tensor_assign.jl index 4aac9bfc0..299e52b84 100644 --- a/src/data_loader/tensor_assign.jl +++ b/src/data_loader/tensor_assign.jl @@ -35,18 +35,22 @@ end end @doc raw""" -The function `assign_output_estimate` is closely related to the transformer. It takes the last prediction_window columns of the output and uses is for the final prediction. +The function `assign_output_estimate` is closely related to the transformer. It takes the last `prediction_window` columns of the output and uses them for the final prediction. i.e. ```math -\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ - \cdots & \cdots \\ - z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} \mapsto - \begin{bmatrix} z^{(1)}_1 & \cdots z^{(T)}_1 \\ - \cdots & \cdots \\ - z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} +\mathbb{R}^{N\times\mathtt{pw}}\to\mathbb{R}^{N\times\mathtt{pw}}, +\begin{bmatrix} + z^{(1)}_1 & \cdots & z^{(T)}_1 \\ + \cdots & \cdots & \cdots \\ + z^{(1)}_n & \cdots & z^{(T})_n + \end{bmatrix} \mapsto + \begin{bmatrix} + z^{(T - \mathtt{pw})}_1 & \cdots & z^{(T)}_1 \\ + \cdots & \cdots & \cdots \\ + z^{(T - \mathtt{pw})}_n & \cdots & z^{(T})_n\end{bmatrix} ``` """ -function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window) where T +function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_window::Int) where T sys_dim, seq_length, batch_size = size(full_output) backend = KernelAbstractions.get_backend(full_output) output_estimate = KernelAbstractions.allocate(backend, T, sys_dim, prediction_window, batch_size) From f28680f132622ac2b28282d145bf2f2e042603c2 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 17:04:22 +0100 Subject: [PATCH 90/96] Fixed docs for rgrad. --- src/manifolds/stiefel_manifold.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/manifolds/stiefel_manifold.jl b/src/manifolds/stiefel_manifold.jl index ec1c3fb26..a98e22079 100644 --- a/src/manifolds/stiefel_manifold.jl +++ b/src/manifolds/stiefel_manifold.jl @@ -64,7 +64,7 @@ function Base.:*(Y::Adjoint{T, StiefelManifold{T, AT}}, B::AbstractMatrix) where end @doc raw""" -Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mahbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this). +Computes the Riemannian gradient for the Stiefel manifold given an element ``Y\in{}St(N,n)`` and a matrix ``\nabla{}L\in\mathbb{R}^{N\times{}n}`` (the Euclidean gradient). It computes the Riemannian gradient with respect to the canonical metric (see the documentation for the function `metric` for an explanation of this). The precise form of the mapping is: ```math \mathtt{rgrad}(Y, \nabla{}L) \mapsto \nabla{}L - Y(\nabla{}L)^TY From 3456fc5db1a56089eb600aa32febb6cab89a6354 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 17:12:59 +0100 Subject: [PATCH 91/96] Added additional constructor for the case when the type is the first input argument. --- src/arrays/stiefel_projection.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/arrays/stiefel_projection.jl b/src/arrays/stiefel_projection.jl index 82c04c2ac..236948a09 100644 --- a/src/arrays/stiefel_projection.jl +++ b/src/arrays/stiefel_projection.jl @@ -41,6 +41,8 @@ Outer constructor for `StiefelProjection`. This works with two integers as input """ StiefelProjection(N::Integer, n::Integer, T::Type=Float64) = StiefelProjection(CPU(), T, N, n) +StiefelProjection(T::Type, N::Integer, n::Integer) = StiefelProjection(N, n, T) + Base.size(E::StiefelProjection) = (E.N, E.n) Base.getindex(E::StiefelProjection, i, j) = getindex(E.A, i, j) Base.:+(E::StiefelProjection, A::AbstractMatrix) = E.A + A From f202264cfe85ffaeb7521578fb9f2ef867b3a6ab Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 14:27:47 +0100 Subject: [PATCH 92/96] Resolved conflict (forgot to resolve another one before). --- test/arrays/array_tests.jl | 1 + test/runtests.jl | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl index 8621ddda7..9c00adc75 100644 --- a/test/arrays/array_tests.jl +++ b/test/arrays/array_tests.jl @@ -75,6 +75,7 @@ function stiefel_lie_alg_add_sub_test(N, n) @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) end + function stiefel_lie_alg_vectorization_test(N, n; T=Float32) A = rand(StiefelLieAlgHorMatrix{T}, N, n) @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A) diff --git a/test/runtests.jl b/test/runtests.jl index 672ae9123..e67116b3b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,9 +4,13 @@ using SafeTestsets @safetestset "Check parameterlength " begin include("parameterlength/check_parameterlengths.jl") end @safetestset "Arrays #1 " begin include("arrays/array_tests.jl") end @safetestset "Sampling of arrays " begin include("arrays/random_generation_of_custom_arrays.jl") end +<<<<<<< HEAD @safetestset "Addition tests for custom arrays " begin include("arrays/addition_tests_for_custom_arrays.jl") end @safetestset "Scalar multiplication tests for custom arrays " begin include("arrays/scalar_multiplication_for_custom_arrays.jl") end @safetestset "Matrix multiplication tests for custom arrays " begin include("arrays/matrix_multiplication_for_custom_arrays.jl") end +======= +@safetestset "Test constructors for custom arrays " begin include("arrays/constructor_tests_for_custom_arrays.jl") end +>>>>>>> 99e83e6 (Resolved conflict (forgot to resolve another one before).) @safetestset "Manifolds (Grassmann): " begin include("manifolds/grassmann_manifold.jl") end @safetestset "Gradient Layer " begin include("layers/gradient_layer_tests.jl") end @safetestset "Test symplecticity of upscaling layer " begin include("layers/sympnet_layers_test.jl") end From c4cdd5126a9261d86b2beba4afb3dc3a7a7f0c58 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Dec 2023 17:14:04 +0100 Subject: [PATCH 93/96] Added tests for the various constructors for custom arrays. --- .../constructor_tests_for_custom_arrays.jl | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 test/arrays/constructor_tests_for_custom_arrays.jl diff --git a/test/arrays/constructor_tests_for_custom_arrays.jl b/test/arrays/constructor_tests_for_custom_arrays.jl new file mode 100644 index 000000000..452f850f8 --- /dev/null +++ b/test/arrays/constructor_tests_for_custom_arrays.jl @@ -0,0 +1,37 @@ +using GeometricMachineLearning, Test +using LinearAlgebra: I + +@doc raw""" +This tests various constructor for custom arrays, e.g. if calling `SymmetricMatrix` on a matrix ``A`` does +```math +A \mapsto \frac{1}{2}(A + A^T). +``` +""" +function test_constructors_for_custom_arrays(n::Int, N::Int, T::Type) + A = rand(T, n, n) + B = rand(T, N, N) + + # SymmetricMatrix + @test Matrix{T}(SymmetricMatrix(A)) ≈ T(.5) * (A + A') + + # SkewSymMatrix + @test Matrix{T}(SkewSymMatrix(A)) ≈ T(.5) * (A - A') + + # StiefelLieAlgHorMatrix + B_shor = StiefelLieAlgHorMatrix(SkewSymMatrix(B), n) + B_shor2 = Matrix{T}(SkewSymMatrix(B)) + B_shor2[(n+1):N, (n+1):N] .= zero(T) + @test Matrix{T}(B_shor) ≈ B_shor2 + + # GrassmannLieAlgHorMatrix + B_ghor = GrassmannLieAlgHorMatrix(SkewSymMatrix(B), n) + B_ghor2 = copy(B_shor2) + B_ghor2[1:n, 1:n] .= zero(T) + @test Matrix{T}(B_ghor) ≈ B_ghor2 + + # StiefelProjection + E = StiefelProjection(T, N, n) + @test Matrix{T}(E) ≈ vcat(I(n), zeros(T, (N-n), n)) +end + +test_constructors_for_custom_arrays(5, 10, Float32) \ No newline at end of file From f388a4c12308c0ff4ccc875263eb9c47c9712754 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 09:18:53 +0100 Subject: [PATCH 94/96] Removed the symplectic arrays. --- test/arrays/array_tests.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl index 9c00adc75..8621ddda7 100644 --- a/test/arrays/array_tests.jl +++ b/test/arrays/array_tests.jl @@ -75,7 +75,6 @@ function stiefel_lie_alg_add_sub_test(N, n) @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) end - function stiefel_lie_alg_vectorization_test(N, n; T=Float32) A = rand(StiefelLieAlgHorMatrix{T}, N, n) @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A) From 882fa043e651f681d12766119df14b976b2d0148 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 14:29:15 +0100 Subject: [PATCH 95/96] Resolved merge conflict. --- test/arrays/array_tests.jl | 1 + test/runtests.jl | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl index 8621ddda7..9c00adc75 100644 --- a/test/arrays/array_tests.jl +++ b/test/arrays/array_tests.jl @@ -75,6 +75,7 @@ function stiefel_lie_alg_add_sub_test(N, n) @test all(abs.(projection(W₁ - W₂) .- S₄) .< 1e-10) end + function stiefel_lie_alg_vectorization_test(N, n; T=Float32) A = rand(StiefelLieAlgHorMatrix{T}, N, n) @test isapprox(StiefelLieAlgHorMatrix(vec(A), N, n), A) diff --git a/test/runtests.jl b/test/runtests.jl index e67116b3b..d1d0ad577 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,12 +5,16 @@ using SafeTestsets @safetestset "Arrays #1 " begin include("arrays/array_tests.jl") end @safetestset "Sampling of arrays " begin include("arrays/random_generation_of_custom_arrays.jl") end <<<<<<< HEAD +<<<<<<< HEAD @safetestset "Addition tests for custom arrays " begin include("arrays/addition_tests_for_custom_arrays.jl") end @safetestset "Scalar multiplication tests for custom arrays " begin include("arrays/scalar_multiplication_for_custom_arrays.jl") end @safetestset "Matrix multiplication tests for custom arrays " begin include("arrays/matrix_multiplication_for_custom_arrays.jl") end ======= @safetestset "Test constructors for custom arrays " begin include("arrays/constructor_tests_for_custom_arrays.jl") end >>>>>>> 99e83e6 (Resolved conflict (forgot to resolve another one before).) +======= +@safetestset "Test constructors for custom arrays " begin include("arrays/constructor_tests_for_custom_arrays.jl") end +>>>>>>> 4e6f264 (Resolved merge conflict.) @safetestset "Manifolds (Grassmann): " begin include("manifolds/grassmann_manifold.jl") end @safetestset "Gradient Layer " begin include("layers/gradient_layer_tests.jl") end @safetestset "Test symplecticity of upscaling layer " begin include("layers/sympnet_layers_test.jl") end From 2aa69196d17ad70686b47b0ab90926b0c43713c6 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Dec 2023 14:30:09 +0100 Subject: [PATCH 96/96] Removed the symplectic arrays. --- test/arrays/array_tests.jl | 1 + test/runtests.jl | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/test/arrays/array_tests.jl b/test/arrays/array_tests.jl index 9c00adc75..1c372344e 100644 --- a/test/arrays/array_tests.jl +++ b/test/arrays/array_tests.jl @@ -53,6 +53,7 @@ function skew_mat_mul_test2(n, T=Float64) AS2 = A*Matrix{T}(S) @test isapprox(AS1, AS2) end + # test Stiefel manifold projection test function stiefel_proj_test(N,n) In = I(n) diff --git a/test/runtests.jl b/test/runtests.jl index d1d0ad577..e2f94167a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,6 +3,7 @@ using SafeTestsets @safetestset "Check parameterlength " begin include("parameterlength/check_parameterlengths.jl") end @safetestset "Arrays #1 " begin include("arrays/array_tests.jl") end +<<<<<<< HEAD @safetestset "Sampling of arrays " begin include("arrays/random_generation_of_custom_arrays.jl") end <<<<<<< HEAD <<<<<<< HEAD @@ -15,6 +16,9 @@ using SafeTestsets ======= @safetestset "Test constructors for custom arrays " begin include("arrays/constructor_tests_for_custom_arrays.jl") end >>>>>>> 4e6f264 (Resolved merge conflict.) +======= +@safetestset "Test constructors for custom arrays " begin include("arrays/constructor_tests_for_custom_arrays.jl") end +>>>>>>> b05c66c (Removed the symplectic arrays.) @safetestset "Manifolds (Grassmann): " begin include("manifolds/grassmann_manifold.jl") end @safetestset "Gradient Layer " begin include("layers/gradient_layer_tests.jl") end @safetestset "Test symplecticity of upscaling layer " begin include("layers/sympnet_layers_test.jl") end