diff --git a/src/architectures/sympnet.jl b/src/architectures/sympnet.jl index 8e017f125..dba449c4e 100644 --- a/src/architectures/sympnet.jl +++ b/src/architectures/sympnet.jl @@ -7,7 +7,7 @@ TODO: abstract type SympNet{AT} <: Architecture end @doc raw""" -`LASympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: +`LASympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: - `depth::Int`: The number of linear layers that are applied. The default is 5. - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2. - `activation`: The activation function that is applied. By default this is `tanh`. @@ -32,7 +32,7 @@ end @inline AbstractNeuralNetworks.dim(arch::SympNet) = arch.dim @doc raw""" -`GSympNet` is called with **a single input argument**, the **system dimension**. Optional input arguments are: +`GSympNet` is called with **a single input argument**, the **system dimension**, or with an instance of `DataLoader`. Optional input arguments are: - `upscaling_dimension::Int`: The *upscaling dimension* of the gradient layer. See the documentation for `GradientLayerQ` and `GradientLayerP` for further explanation. The default is `2*dim`. - `nhidden::Int`: The number of hidden layers (i.e. layers that are **not** input or output layers). The default is 2. - `activation`: The activation function that is applied. By default this is `tanh`. @@ -49,7 +49,7 @@ struct GSympNet{AT, InitUpper} <: SympNet{AT} where {InitUpper} end - function GSympNet(dl::DataLoader; upscaling_dimension=2*dim, nhidden=2, activation=tanh, init_upper=true) + function GSympNet(dl::DataLoader; upscaling_dimension=2*dl.input_dim, nhidden=2, activation=tanh, init_upper=true) new{typeof(activation), init_upper}(dl.input_dim, upscaling_dimension, nhidden, activation) end end diff --git a/src/arrays/symplectic.jl b/src/arrays/symplectic.jl index 1dcc59658..3a4f138d6 100644 --- a/src/arrays/symplectic.jl +++ b/src/arrays/symplectic.jl @@ -1,75 +1,43 @@ @doc raw""" - `SymplecticMatrix(n)` +`SymplecticPotential(n)` Returns a symplectic matrix of size 2n x 2n ```math \begin{pmatrix} -0 & & & 1 & & & \\ -& \ddots & & & \ddots & & \\ -& & 0 & & & 1 \\ --1 & & & 0 & & & \\ -& \ddots & & & \ddots & & \\ -& & -1 & & 0 & \\ +\mathbb{O} & \mathbb{I} \\ +\mathbb{O} & -\mathbb{I} \\ \end{pmatrix} ``` - - `SymplecticProjection(N,n)` -Returns the symplectic projection matrix E of the Stiefel manifold, i.e. π: Sp(2N) → Sp(2n,2N), A ↦ AE - """ -#= -function SymplecticMatrix(n::Int, T::DataType=Float64) - BandedMatrix((n => ones(T,n), -n => -ones(T,n)), (2n,2n)) -end - -SymplecticMatrix(T::DataType, n::Int) = SymplecticMatrix(n, T) - -@doc raw""" -```math -\begin{pmatrix} -I & 0 \\ -0 & 0 \\ -0 & I \\ -0 & 0 \\ -\end{pmatrix} -``` -""" -=# - -function SymplecticPotential(n::Int, T::DataType=Float64) - J = zeros(T, 2*n, 2*n) - J[1:n, (n+1):2*n] = one(ones(T, n, n)) - J[(n+1):2*n, 1:n] = -one(ones(T, n, n)) +function SymplecticPotential(backend, n2::Int, T::DataType=Float64) + @assert iseven(n2) + n = n2÷2 + J = KernelAbstractions.zeros(backend, T, 2*n, 2*n) + assign_ones_for_symplectic_potential! = assign_ones_for_symplectic_potential_kernel!(backend) + assign_ones_for_symplectic_potential!(J, n, ndrange=n) J end +SymplecticPotential(n::Int, T::DataType=Float64) = SymplecticPotential(CPU(), n, T) +SymplecticPotential(bakend, T::DataType, n::Int) = SymplecticPotential(backend, n, T) + SymplecticPotential(T::DataType, n::Int) = SymplecticPotential(n, T) -struct SymplecticProjection{T} <: AbstractMatrix{T} - N::Int - n::Int - SymplecticProjection(N, n, T = Float64) = new{T}(N,n) +@kernel function assign_ones_for_symplectic_potential_kernel!(J::AbstractMatrix{T}, n::Int) where T + i = @index(Global) + J[map_index_for_symplectic_potential(i, n)...] = i ≤ n ? one(T) : -one(T) end -function Base.getindex(E::SymplecticProjection,i,j) - if i ≤ E.n - if j == i - return 1. - end - return 0. - end - if j > E.n - if (j-E.n) == (i-E.N) - return 1. - end - return 0. +""" +This assigns the right index for the symplectic potential. To be used with `assign_ones_for_symplectic_potential_kernel!`. +""" +function map_index_for_symplectic_potential(i::Int, n::Int) + if i ≤ n + return (i, i+n) + else + return (i, i-n) end - return 0. -end - - -Base.parent(E::SymplecticProjection) = (E.N,E.n) -Base.size(E::SymplecticProjection) = (2*E.N,2*E.n) +end \ No newline at end of file diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl index bbb9e4e39..10b434ffd 100644 --- a/src/data_loader/batch.jl +++ b/src/data_loader/batch.jl @@ -15,6 +15,9 @@ end hasseqlength(::Batch{<:Integer}) = true hasseqlength(::Batch{<:Nothing}) = false +@doc raw""" +The functor for batch is called with an instance on `DataLoader`. It then returns a tuple of batch indices: ``(\mathcal{I}_1, \ldots, \mathcal{I}_{\lceil\mathtt{dl.n\_params/batch\_size}\rceil})``, where the index runs from 1 to the number of batches, which is the number of parameters divided by the batch size (rounded up). +""" function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, AT<:AbstractArray{T, 3}} indices = shuffle(1:dl.n_params) n_batches = Int(ceil(dl.n_params/batch.batch_size)) @@ -28,6 +31,9 @@ function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, AT<:Abstract batches end +@doc raw""" +The functor for batch is called with an instance on `DataLoader`. It then returns a tuple of batch indices: ``(\mathcal{I}_1, \ldots, \mathcal{I}_{\lceil\mathtt{(dl.input\_time\_steps-1)/batch\_size}\rceil})``, where the index runs from 1 to the number of batches, which is the number of input time steps (minus one) divided by the batch size (and rounded up). +""" function (batch::Batch{<:Nothing})(dl::DataLoader{T, AT}) where {T, BT<:AbstractMatrix{T}, AT<:Union{BT, NamedTuple{(:q, :p), Tuple{BT, BT}}}} indices = shuffle(1:dl.input_time_steps) n_batches = Int(ceil((dl.input_time_steps-1)/batch.batch_size)) @@ -88,7 +94,7 @@ function optimize_for_one_epoch!(opt::Optimizer, nn::NeuralNetwork, dl::DataLoad end """ -TODO: Add ProgressMeter!!! +This routine is called if a `DataLoader` storing *symplectic data* (i.e. a `NamedTuple`) is supplied. """ function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTuple}, dl::DataLoader{T, AT}, batch::Batch, loss) where {T, AT<:NamedTuple} count = 0 @@ -107,7 +113,16 @@ function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTu total_error/count end - +@doc raw""" +A functor for `Optimizer`. It is called with: + - `nn::NeuralNetwork` + - `dl::DataLoader` + - `batch::Batch` + - `n_epochs::Int` + - `loss` + +The last argument is a function through which `Zygote` differentiates. This argument is optional; if it is not supplied `GeometricMachineLearning` defaults to an appropriate loss for the `DataLoader`. +""" function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int, loss) progress_object = ProgressMeter.Progress(n_epochs; enabled=true) loss_array = zeros(n_epochs) @@ -118,6 +133,6 @@ function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epoch loss_array end -function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int) +function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int=1) o(nn, dl, batch, n_epochs, loss) end \ No newline at end of file diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl index ae6a45089..5117bbc23 100644 --- a/src/data_loader/data_loader.jl +++ b/src/data_loader/data_loader.jl @@ -93,25 +93,35 @@ It takes as input: """ function loss(model::Union{Chain, AbstractExplicitLayer}, ps::Union{Tuple, NamedTuple}, input::AT, output::BT) where {T, T1, AT<:AbstractArray{T, 3}, BT<:AbstractArray{T1, 3}} output_estimate = model(input, ps) - norm(output - output_estimate)/norm(output) # /T(sqrt(size(output, 2)*size(output, 3))) + norm(output - output_estimate) / norm(output) # /T(sqrt(size(output, 2)*size(output, 3))) end -function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 3}} +@doc raw""" +The *autoencoder loss*. +""" +function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T}} output_estimate = model(input, ps) - norm(output_estimate - input)/norm(input) # /T(sqrt(size(input, 2)*size(input, 3))) + norm(output_estimate - input) / norm(input) # /T(sqrt(size(input, 2)*size(input, 3))) end -function loss(model::Chain, ps::Tuple, input::BT) where {T, BT<:AbstractArray{T, 2}} +nt_diff(A, B) = (q = A.q - B.q, p = A.p - B.p) +nt_norm(A) = norm(A.q) + norm(A.p) + +function loss(model::Chain, ps::Tuple, input::NT) where {T, AT<:AbstractArray{T}, NT<:NamedTuple{(:q, :p,), Tuple{AT, AT}}} output_estimate = model(input, ps) - norm(output_estimate - input)/norm(input) # /T(sqrt(size(input, 2))) + nt_norm(nt_diff(output_estimate, input)) / nt_norm(input) end -nt_diff(A, B) = (q = A.q - B.q, p = A.p - B.p) -nt_norm(A) = norm(A.q) + norm(A.p) +@doc raw""" +Loss function that takes a `NamedTuple` as input. This should be used with a SympNet (or other neural network-based integrator). It computes: +```math +\mathtt{loss}(\mathcal{NN}, \mathtt{ps}, \begin{pmatrix} q \\ p \end{pmatrix}, \begin{pmatrix} q' \\ p' \end{pmatrix}) \mapsto \left|| \mathcal{NN}(\begin{pmatrix} q \\ p \end{pmatrix}) - \begin{pmatrix} q' \\ p' \end{pmatrix} \right|| / \left|| \begin{pmatrix} q \\ p \end{pmatrix} \right|| +``` +""" function loss(model::Chain, ps::Tuple, input::NamedTuple, output::NamedTuple) output_estimate = model(input, ps) - nt_norm(nt_diff(output_estimate, output))/nt_norm(input) + nt_norm(nt_diff(output_estimate, output)) / nt_norm(input) end @doc raw""" @@ -133,7 +143,14 @@ function loss(model::Chain, ps::Tuple, dl::DataLoader{T, BT, Nothing}) where {T, end function loss(model::Chain, ps::Tuple, dl::DataLoader{T, BT}) where {T, BT<:NamedTuple} - loss(model, ps, dl.data) + loss(model, ps, dl.input) +end + +@doc raw""" +Wrapper if we deal with a neural network. +""" +function loss(nn::NeuralNetwork, dl::DataLoader) + loss(nn.model, nn.params, dl) end @doc raw""" diff --git a/src/data_loader/tensor_assign.jl b/src/data_loader/tensor_assign.jl index 3faad0a99..4aac9bfc0 100644 --- a/src/data_loader/tensor_assign.jl +++ b/src/data_loader/tensor_assign.jl @@ -55,6 +55,7 @@ function assign_output_estimate(full_output::AbstractArray{T, 3}, prediction_win output_estimate end +#= """ This function draws random time steps and parameters and based on these assign the batch and the output. @@ -101,6 +102,7 @@ function draw_batch!(batch::AT, output::BT, data::AT, target::BT) where {T, T2, assign_batch!(batch, data, params, time_steps, ndrange=size(batch)) assign_batch!(output, target, params, time_steps, ndrange=size(output)) end +=# """ Used for differentiating assign_output_estimate (this appears in the loss). diff --git a/src/optimizers/optimizer.jl b/src/optimizers/optimizer.jl index 7cd78d037..e6e25cdaa 100644 --- a/src/optimizers/optimizer.jl +++ b/src/optimizers/optimizer.jl @@ -22,6 +22,8 @@ function Optimizer(m::OptimizerMethod, nn::NeuralNetwork) Optimizer(m, nn.params) end +Optimizer(nn::NeuralNetwork, m::OptimizerMethod) = Optimizer(m, nn) + ####################################################################################### # optimization step function diff --git a/test/data_loader/batch.jl b/test/data_loader/batch.jl deleted file mode 100644 index 4b7c17309..000000000 --- a/test/data_loader/batch.jl +++ /dev/null @@ -1,33 +0,0 @@ -using AbstractNeuralNetworks: AbstractExplicitLayer, Chain -using GeometricMachineLearning, Test - -""" -This creates a dummy MNIST data set. - -TODO: include tests to check if all elements are batched! -""" -function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10) - rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images))) -end - -dl = DataLoader(create_dummy_mnist()...; patch_length=3) -# batch size is equal to two -batch = Batch(2) - -# this function should be made part of AbstractNeuralNetworks !!! -function Chain(c::Chain, d::AbstractExplicitLayer) - Chain(c.layers..., d) -end - -# input dim is 3^2 = 9 -model = Chain(Transformer(dl.input_dim, 3, 2; Stiefel=true), Classification(dl.input_dim, 10, σ)) -ps = initialparameters(CPU(), Float32, model) - -loss₁ = GeometricMachineLearning.loss(model, ps, dl) - -opt = Optimizer(AdamOptimizer(), ps) -loss_average = optimize_for_one_epoch!(opt, model, ps, dl, batch) - -loss₃ = GeometricMachineLearning.loss(model, ps, dl) - -@test loss₁ > loss_average > loss₃ \ No newline at end of file diff --git a/test/data_loader/batch_data_loader_qp_test.jl b/test/data_loader/batch_data_loader_qp_test.jl new file mode 100644 index 000000000..e189aca2f --- /dev/null +++ b/test/data_loader/batch_data_loader_qp_test.jl @@ -0,0 +1,34 @@ +using GeometricMachineLearning +using Test + +function dummy_qp_data_matrix(dim=2, number_data_points=200, T=Float32) + (q = rand(T, dim, number_data_points), p = (rand(T, dim, number_data_points))) +end + +function dummy_qp_data_tensor(dim=2, number_of_time_steps=100, number_of_parameters=20, T=Float32) + (q = rand(T, dim, number_of_time_steps, number_of_parameters), p = (rand(T, dim, number_of_time_steps, number_of_parameters))) +end + +function test_data_loader(dim=2, number_of_time_steps=100, number_of_parameters=20, batch_size=10, T=Float32) + + dl1 = DataLoader(dummy_qp_data_matrix(dim, number_of_time_steps, T)) + dl2 = DataLoader(dummy_qp_data_tensor(dim, number_of_time_steps, number_of_parameters)) + + arch1 = GSympNet(dl1) + arch2 = GSympNet(dl2) + + nn1 = NeuralNetwork(arch1, CPU(), T) + nn2 = NeuralNetwork(arch2, CPU(), T) + + loss1 = GeometricMachineLearning.loss(nn1, dl1) + loss2 = GeometricMachineLearning.loss(nn2, dl2) + + batch = Batch(batch_size) + o₁ = Optimizer(GradientOptimizer(), nn1) + # o₂ = Optimizer(GradientOptimizer(), nn2) + + o₁(nn1, dl1, batch) + # o₂(nn2, dl2, batch) +end + +test_data_loader() \ No newline at end of file diff --git a/test/data_loader/data_loader.jl b/test/data_loader/data_loader_optimization_step.jl similarity index 84% rename from test/data_loader/data_loader.jl rename to test/data_loader/data_loader_optimization_step.jl index 880212f5b..0ac67a648 100644 --- a/test/data_loader/data_loader.jl +++ b/test/data_loader/data_loader_optimization_step.jl @@ -1,5 +1,8 @@ using GeometricMachineLearning, Test, Zygote +@doc raw""" +This tests the gradient optimizer called together with the `DataLoader` (applied to a tensor). +""" function test_data_loader(sys_dim, n_time_steps, n_params, T=Float32) data = randn(T, sys_dim, n_time_steps, n_params) dl = DataLoader(data) diff --git a/test/data_loader/mnist_utils.jl b/test/data_loader/mnist_utils.jl index fe685c647..171941711 100644 --- a/test/data_loader/mnist_utils.jl +++ b/test/data_loader/mnist_utils.jl @@ -7,9 +7,8 @@ using GeometricMachineLearning: index_conversion import Zygote """ -This function tests if all the patch nubmers are assigned correctly, i.e. tests patch_index. +This function tests is used to test if all the patch nubmers are assigned correctly with `index_conversion`, i.e. tests `patch_index` by inverting it. """ -### Test if mapping is invertible function reverse_index(i::Integer, j::Integer, patch_length=7) opt_i = i%patch_length==0 ? 1 : 0 within_patch_index = i%patch_length + opt_i*patch_length, (i÷patch_length - opt_i + 1) @@ -20,41 +19,59 @@ function reverse_index(i::Integer, j::Integer, patch_length=7) (patch_index[1]-1)*patch_length + within_patch_index[1], (patch_index[2]-1)*patch_length + within_patch_index[2] end -# test if this is the inverse of the other batch index conversion! -patch_lengths = (2, 4, 7, 14) -for patch_length in patch_lengths - number_of_patches = (28÷patch_length)^2 - for i in 1:28 - for j in 1:28 - @test reverse_index(index_conversion(i, j, patch_length, number_of_patches)..., patch_length) == (i, j) +""" +This function uses `reverse_index` to test `index_conversion`, i.e. checks if the functions are invertible. +""" +function test_index_conversion(patch_lengths=(2, 4, 7, 14)) + for patch_length in patch_lengths + number_of_patches = (28÷patch_length)^2 + for i in 1:28 + for j in 1:28 + @test reverse_index(index_conversion(i, j, patch_length, number_of_patches)..., patch_length) == (i, j) + end end end end +""" +This function tests if `onehotbatch` does what it should; i.e. convert a vector of integers to a one-hot-tensor. +""" function test_onehotbatch(V::AbstractVector{T}) where {T<:Integer} V_encoded = onehotbatch(V) - for i in length(V) + for (i, v) in zip(length(V), V) @test sum(V_encoded[:,1,i]) == 1 + @test V_encoded[v, 1, i] == 1 end end test_onehotbatch([1, 2, 5, 0]) -####### MNIST-like data set -train_x = rand(Float32, 28,28,100) -train_y = Int.(ceil.(10*rand(Float32, 100))) .- 1 +@doc raw""" +Generates an MNIST-like dummy data set. +""" +function generate_dummy_mnist(dim₁=28, dim₂=28, number_images=100, T=Float32) + train_x = rand(T, dim₁, dim₂, number_images) + train_y = Int.(ceil.(10 * rand(T, number_images))) .- 1 + train_x, train_y +end + +function test_optimizer_for_classification_layer(; dim₁=28, dim₂=28, number_images=100, patch_length=7, T=Float32) + dl = DataLoader(generate_dummy_mnist(dim₁, dim₂, number_images, T)...; patch_length=patch_length) -dl = DataLoader(train_x, train_y) + activation_function(x) = tanh.(x) + model = Classification(patch_length * patch_length, 10, activation_function) -activation_function(x) = tanh.(x) -model = Classification(49, 10, activation_function) -ps = initialparameters(CPU(), Float32, model) -loss₁ = GeometricMachineLearning.loss(model, ps, dl) + ps = initialparameters(CPU(), T, model) + loss₁ = GeometricMachineLearning.loss(model, ps, dl) -opt = Optimizer(GradientOptimizer(), ps) -dx = Zygote.gradient(ps -> GeometricMachineLearning.loss(model, ps, dl), ps)[1] -optimization_step!(opt, model, ps, dx) -loss₂ = GeometricMachineLearning.loss(model, ps, dl) + opt = Optimizer(GradientOptimizer(), ps) + dx = Zygote.gradient(ps -> GeometricMachineLearning.loss(model, ps, dl), ps)[1] + optimization_step!(opt, model, ps, dx) + loss₂ = GeometricMachineLearning.loss(model, ps, dl) + + @test loss₂ < loss₁ +end -@test loss₂ < loss₁ +test_index_conversion() +test_optimizer_for_classification_layer() \ No newline at end of file diff --git a/test/data_loader/optimizer_functor_with_adam.jl b/test/data_loader/optimizer_functor_with_adam.jl new file mode 100644 index 000000000..b4e48dfef --- /dev/null +++ b/test/data_loader/optimizer_functor_with_adam.jl @@ -0,0 +1,39 @@ +using AbstractNeuralNetworks: AbstractExplicitLayer, Chain +using GeometricMachineLearning, Test + +# this function should be made part of AbstractNeuralNetworks !!! +function Chain(c::Chain, d::AbstractExplicitLayer) + Chain(c.layers..., d) +end + +""" +This creates a dummy MNIST data set; i.e. its output are two tensors that look similarly to the ones in the MNIST data set. +""" +function create_dummy_mnist(;T=Float32, dim₁=6, dim₂=6, n_images=10) + rand(T, dim₁, dim₂, n_images), Int.(floor.(10*rand(T, n_images))) +end + + +function test_optimizer_functor_with_adam(;T=Float32, dim₁=6, dim₂=6, n_images=10, patch_length=3) + dl = DataLoader(create_dummy_mnist(T=T, dim₁=dim₁, dim₂=dim₂, n_images=n_images)...; patch_length=patch_length) + + # batch size is equal to two + batch = Batch(2) + + # input dim is dim₁ / patch_length * dim₂ / pach_length; the transformer is called with dim₁ / patch_length and two layers + model = Chain(Transformer(dl.input_dim, patch_length, 2; Stiefel=true), Classification(dl.input_dim, 10, σ)) + + ps = initialparameters(CPU(), Float32, model) + + loss₁ = GeometricMachineLearning.loss(model, ps, dl) + + opt = Optimizer(AdamOptimizer(), ps) + loss_average = optimize_for_one_epoch!(opt, model, ps, dl, batch) + + loss₃ = GeometricMachineLearning.loss(model, ps, dl) + + #check if the loss decreases during optimization + @test loss₁ > loss_average > loss₃ +end + +test_optimizer_functor_with_adam() \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index e94ad1242..033b04f16 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -34,6 +34,8 @@ using SafeTestsets @safetestset "Training " begin include("train!/test_training.jl") end @safetestset "NeuralNetSolution " begin include("train!/test_neuralnet_solution.jl") end @safetestset "Problem & Integrators " begin include("integrator/test_integrator.jl") end -@safetestset "Data Loader #1 " begin include("data_loader/data_loader.jl") end -@safetestset "Data Loader #2 " begin include("data_loader/mnist_utils.jl") end -@safetestset "Data Loader #3 (Batch struct) " begin include("data_loader/batch.jl") end \ No newline at end of file + +@safetestset "Test data loader for q and p data " begin include("data_loader/batch_data_loader_qp_test.jl") end +@safetestset "Test mnist_utils. " begin include("data_loader/mnist_utils.jl") end +@safetestset "Test the data loader in combination with optimization_step! " begin include("data_loader/data_loader_optimization_step.jl") end +@safetestset "Optimizer functor with data loader for Adam " begin include("data_loader/optimizer_functor_with_adam.jl") end \ No newline at end of file