Skip to content

Commit

Permalink
Merge pull request #129 from JuliaGNI/loss_routines
Browse files Browse the repository at this point in the history
Loss routines
  • Loading branch information
michakraus authored Apr 10, 2024
2 parents af93bf5 + 7bbf45e commit 8825732
Show file tree
Hide file tree
Showing 7 changed files with 189 additions and 53 deletions.
6 changes: 4 additions & 2 deletions src/GeometricMachineLearning.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,11 @@ module GeometricMachineLearning
# from GeometricBase to print docs
export description

# the functionality in the script doesn't require anything else defined in GML, but some of the other scripts in that folder do.
include("data_loader/data_loader.jl")

include("loss/loss_routines.jl")
include("loss/losses.jl")

include("kernels/assign_q_and_p.jl")
include("kernels/tensor_mat_mul.jl")
include("kernels/tensor_tensor_mul.jl")
Expand Down Expand Up @@ -381,5 +383,5 @@ module GeometricMachineLearning

export ReducedSystem, compute_reduction_error, compute_projection_error, reduced_vector_field_from_full_explicit_vector_field, perform_integration_reduced, perform_integration_full

include("loss/loss_routines.jl")
include("data_loader/optimize.jl")
end
50 changes: 0 additions & 50 deletions src/data_loader/batch.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,40 +84,6 @@ function number_of_batches(dl::DataLoader{T, AT}, batch::Batch{<:Integer}) where
Int(ceil((dl.input_time_steps - batch.seq_length) * dl.n_params / batch.batch_size))
end

@doc raw"""
Optimize for an entire epoch. For this you have to supply:
- an instance of the optimizer.
- the neural network model
- the parameters of the model
- the data (in form of `DataLoader`)
- in instance of `Batch` that contains `batch_size` (and optionally `seq_length`)
With the optional argument:
- the loss, which takes the `model`, the parameters `ps` and an instance of `DataLoader` as input.
The output of `optimize_for_one_epoch!` is the average loss over all batches of the epoch:
```math
output = \frac{1}{\mathtt{steps\_per\_epoch}}\sum_{t=1}^\mathtt{steps\_per\_epoch}loss(\theta^{(t-1)}).
```
This is done because any **reverse differentiation** routine always has two outputs: a pullback and the value of the function it is differentiating. In the case of zygote: `loss_value, pullback = Zygote.pullback(ps -> loss(ps), ps)` (if the loss only depends on the parameters).
"""
function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTuple}, dl::DataLoader{T, AT, BT}, batch::Batch, loss) where {T, T1, AT<:AbstractArray{T, 3}, BT<:AbstractArray{T1, 3}}
count = 0
total_error = T(0)
batches = batch(dl)
@views for batch_indices in batches
count += 1
# these `copy`s should not be necessary! coming from a Zygote problem!
input_batch = copy(dl.input[:, :, batch_indices])
output_batch = copy(dl.output[:, :, batch_indices])
loss_value, pullback = Zygote.pullback(ps -> loss(model, ps, input_batch, output_batch), ps)
total_error += loss_value
dp = pullback(one(loss_value))[1]
optimization_step!(opt, model, ps, dp)
end
total_error / count
end

@kernel function assign_input_from_vector_of_tuples_kernel!(q_input::AT, p_input::AT, input::NamedTuple{(:q, :p), Tuple{AT, AT}}, indices::AbstractArray{Int, 2}) where {T, AT<:AbstractArray{T, 3}}
i, j, k = @index(Global, NTuple)

Expand Down Expand Up @@ -198,22 +164,6 @@ function convert_input_and_batch_indices_to_array(dl::DataLoader{T, BT}, batch::
input, output
end

function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTuple}, dl::DataLoader{T, CT, Nothing}, batch::Batch, loss) where {T, AT<:AbstractArray{T, 3}, BT<:NamedTuple{(:q, :p), Tuple{AT, AT}}, CT<:Union{AT, BT}}
count = 0
total_error = T(0)
batches = batch(dl)
@views for batch_indices in batches
count += 1
# these `copy`s should not be necessary! coming from a Zygote problem!
input_nt, output_nt = convert_input_and_batch_indices_to_array(dl, batch, batch_indices)
loss_value, pullback = Zygote.pullback(ps -> loss(model, ps, input_nt, output_nt), ps)
total_error += loss_value
dp = pullback(one(loss_value))[1]
optimization_step!(opt, model, ps, dp)
end
total_error / count
end

function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTuple}, dl::DataLoader{T, AT, BT}, batch::Batch) where {T, T1, AT<:AbstractArray{T, 3}, BT<:AbstractArray{T1, 3}}
optimize_for_one_epoch!(opt, model, ps, dl, batch, loss)
end
Expand Down
83 changes: 83 additions & 0 deletions src/data_loader/optimize.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
@doc raw"""
Optimize for an entire epoch. For this you have to supply:
- an instance of the optimizer.
- the neural network model
- the parameters of the model
- the data (in form of `DataLoader`)
- in instance of `Batch` that contains `batch_size` (and optionally `seq_length`)
With the optional argument:
- the loss, which takes the `model`, the parameters `ps` and an instance of `DataLoader` as input.
The output of `optimize_for_one_epoch!` is the average loss over all batches of the epoch:
```math
output = \frac{1}{\mathtt{steps\_per\_epoch}}\sum_{t=1}^\mathtt{steps\_per\_epoch}loss(\theta^{(t-1)}).
```
This is done because any **reverse differentiation** routine always has two outputs: a pullback and the value of the function it is differentiating. In the case of zygote: `loss_value, pullback = Zygote.pullback(ps -> loss(ps), ps)` (if the loss only depends on the parameters).
"""
function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTuple}, dl::DataLoader{T, AT, BT}, batch::Batch, loss) where {T, T1, AT<:AbstractArray{T, 3}, BT<:AbstractArray{T1, 3}}
count = 0
total_error = T(0)
batches = batch(dl)
@views for batch_indices in batches
count += 1
# these `copy`s should not be necessary! coming from a Zygote problem!
input_batch = copy(dl.input[:, :, batch_indices])
output_batch = copy(dl.output[:, :, batch_indices])
loss_value, pullback = Zygote.pullback(ps -> loss(model, ps, input_batch, output_batch), ps)
total_error += loss_value
dp = pullback(one(loss_value))[1]
optimization_step!(opt, model, ps, dp)
end
total_error / count
end

function optimize_for_one_epoch!(opt::Optimizer, model, ps::Union{Tuple, NamedTuple}, dl::DataLoader{T, CT, Nothing}, batch::Batch, loss::NetworkLoss) where {T, AT<:AbstractArray{T, 3}, BT<:NamedTuple{(:q, :p), Tuple{AT, AT}}, CT<:Union{AT, BT}}
count = 0
total_error = T(0)
batches = batch(dl)
@views for batch_indices in batches
count += 1
# these `copy`s should not be necessary! coming from a Zygote problem!
input_nt, output_nt = convert_input_and_batch_indices_to_array(dl, batch, batch_indices)
loss_value, pullback = Zygote.pullback(ps -> loss(model, ps, input_nt, output_nt), ps)
total_error += loss_value
dp = pullback(one(loss_value))[1]
optimization_step!(opt, model, ps, dp)
end
total_error / count
end

@doc raw"""
A functor for `Optimizer`. It is called with:
- `nn::NeuralNetwork`
- `dl::DataLoader`
- `batch::Batch`
- `n_epochs::Int`
- `loss`
The last argument is a function through which `Zygote` differentiates. This argument is optional; if it is not supplied `GeometricMachineLearning` defaults to an appropriate loss for the `DataLoader`.
"""
function (o::Optimizer)(nn::NeuralNetwork, dl::DataLoader, batch::Batch, n_epochs::Int, loss::NetworkLoss)
progress_object = ProgressMeter.Progress(n_epochs; enabled=true)
loss_array = zeros(n_epochs)
for i in 1:n_epochs
loss_array[i] = optimize_for_one_epoch!(o, nn.model, nn.params, dl, batch, loss)
ProgressMeter.next!(progress_object; showvalues = [(:TrainingLoss, loss_array[i])])
end
loss_array
end

#=
function (o::Optimizer)(nn::NeuralNetwork{<:TransformerIntegrator}, dl::DataLoader, batch::Batch{Int}, n_epochs::Int=1)
loss = TransformerLoss(batch)
o(nn, dl, batch, n_epochs, loss)
end
function (o::Optimizer)(nn::NeuralNetwork{<:NeuralNetworkIntegrator}, dl::DataLoader, batch::Batch{Int}, n_epochs::Int=1)
loss = FeedForwardLoss()
o(nn, dl, batch, n_epochs, loss)
end
(o::Optimizer)(nn::NeuralNetwork{<:NeuralNetworkIntegrator}, dl::DataLoader, batch::Batch{Nothing}, n_epochs::Int=1) = o(nn, dl, Batch(batch.batch_size, 1), n_epochs)
=#
32 changes: 31 additions & 1 deletion src/loss/loss_routines.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ It takes as input:
- `input::Union{Array, NamedTuple}`
- `output::Uniont{Array, NamedTuple}`
"""
function loss(model::Union{Chain, AbstractExplicitLayer}, ps::Union{Tuple, NamedTuple}, input::AT, output::BT) where {T, T1, AT<:AbstractArray{T, 3}, BT<:AbstractArray{T1, 3}}
function loss(model::Union{Chain, AbstractExplicitLayer}, ps::Union{Tuple, NamedTuple}, input::AT, output::BT) where {T, T1, AT<:AbstractArray{T}, BT<:AbstractArray{T1}}
output_estimate = model(input, ps)
norm(output - output_estimate) / norm(output)
end
Expand Down Expand Up @@ -46,6 +46,36 @@ function loss(model::Union{Chain, AbstractExplicitLayer}, ps::Union{Tuple, Named
nt_norm(nt_diff(output_estimate, input)) / nt_norm(input)
end

@doc raw"""
The transformer works similarly to the regular loss, but with the difference that ``\mathcal{NN}(input)`` and ``output`` may have different sizes.
It takes as input:
- `model::Union{Chain, AbstractExplicitLayer}`
- `ps::Union{Tuple, NamedTuple}`
- `input::Union{Array, NamedTuple}`
- `output::Uniont{Array, NamedTuple}`
"""
function transformer_loss(model::Union{Chain, AbstractExplicitLayer}, ps::Union{Tuple, NamedTuple}, input::BT, output::BT) where {T, BT<:AbstractArray{T}}
norm(
crop_array_for_transformer_loss(model(input, ps), output) -
output
) /
norm(input)
end

function transformer_loss(model::Union{Chain, AbstractExplicitLayer}, ps::Union{Tuple, NamedTuple}, input::NT, output::NT) where {T, AT<:AbstractArray{T}, NT<:NamedTuple{(:q, :p,), Tuple{AT, AT}}}
output_estimate = model(input, ps)

nt_norm(
nt_diff(
(q = crop_array_for_transformer_loss(output_estimate.q, output.q),
p = crop_array_for_transformer_loss(output_estimate.p, output.p)),
output
)
) /
nt_norm(input)
end


@doc raw"""
Alternative call of the loss function. This takes as input:
Expand Down
42 changes: 42 additions & 0 deletions src/loss/losses.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
abstract type NetworkLoss end

function (loss::NetworkLoss)(nn::NeuralNetwork, input::AT, output::AT) where {AT <: AbstractArray}
loss(nn.model, nn.params, input, output)
end

@doc raw"""
The loss for a transformer network (especially a transformer integrator). The constructor is called with:
- `seq_length::Int`
- `prediction_window::Int` (default is 1).
"""
struct TransformerLoss <: NetworkLoss
seq_length::Int
prediction_window::Int
end

TransformerLoss(seq_length::Int) = TransformerLoss(seq_length, 1)

@doc raw"""
This crops the output array of the neural network so that it conforms with the output it should be compared to. This is needed for the transformer loss.
"""
function crop_array_for_transformer_loss(nn_output::AT, output::AT) where {T, AT<:AbstractArray{T, 3}}
@view nn_output[axes(output, 1), axes(output, 2) .+ size(nn_output, 2) .- size(output, 2), axes(output, 3)]
end

function (loss::TransformerLoss)(model::Chain, ps::Union{Tuple, NamedTuple}, input::AT, output::AT) where {T, AT <: Union{AbstractArray{T, 2}, AbstractArray{T, 3}}}
input_dim, input_seq_length = size(input)
output_dim, output_prediction_window = size(output)
@assert input_dim == output_dim
@assert input_seq_length == loss.seq_length
@assert output_prediction_window == loss.prediction_window

predicted_output_uncropped = model(input, ps)
predicted_output_cropped = crop_array_for_transformer_loss(predicted_output_uncropped, output)
norm(predicted_output_cropped - output) / norm(output)
end

struct FeedForwardLoss <: NetworkLoss end

function (loss::FeedForwardLoss)(model::Chain, ps::Union{Tuple, NamedTuple}, input::AT, output::AT) where {AT <: AbstractArray}
norm(model(input, ps) - output) / norm(output)
end
27 changes: 27 additions & 0 deletions test/network_losses/losses_and_optimization.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using GeometricMachineLearning
using GeometricMachineLearning: FeedForwardLoss
using Test
import Random

Random.seed!(123)

const sin_vector = sin.(0:0.01:2π)
const dl = DataLoader(reshape(sin_vector, 1, length(sin_vector), 1))

function setup_network(dl::DataLoader{T}) where T
arch = Chain(Dense(1, 5, tanh), ResNet(5, tanh), Dense(5, 1, identity))
NeuralNetwork(arch, CPU(), T)
end

function train_network(; n_epochs=5)
nn = setup_network(dl)
loss = FeedForwardLoss()

o = Optimizer(AdamOptimizer(), nn)
batch = Batch(5, 1)
loss_array = o(nn, dl, batch, n_epochs, loss)
T = eltype(dl)
@test loss_array[end] / loss_array[1] < T(0.1)
end

train_network()
2 changes: 2 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ using SafeTestsets
@safetestset "Optimizer functor with data loader for Adam " begin include("data_loader/optimizer_functor_with_adam.jl") end
@safetestset "Test data loader for a tensor (q and p data) " begin include("data_loader/draw_batch_for_tensor_test.jl") end

@safetestset "Test NetworkLoss + Optimizer " begin include("network_losses/losses_and_optimization.jl") end

@safetestset "Test parallel inverses " begin include("kernels/tensor_inverse.jl") end
@safetestset "Test parallel Cayley " begin include("kernels/tensor_cayley.jl") end

Expand Down

0 comments on commit 8825732

Please sign in to comment.