Skip to content

Commit

Permalink
Merge pull request #150 from JuliaGNI/linear_symplectic_transformer
Browse files Browse the repository at this point in the history
Linear symplectic transformer
  • Loading branch information
michakraus authored May 16, 2024
2 parents 14a9295 + 8eecb10 commit 8a2f2ef
Show file tree
Hide file tree
Showing 54 changed files with 1,348 additions and 160 deletions.
24 changes: 20 additions & 4 deletions docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ latex: latex_no_pdf
$(MAKE) compile_tex;
$(MAKE) compile_tex

latex_no_pdf_no_images: install_brenier_two_fluid latex_docs_no_pdf remove_brenier_two_fluid put_figures_outside_of_minted_environment
latex_no_pdf_no_images: install_brenier_two_fluid latex_docs_no_pdf put_figures_outside_of_minted_environment do_correct_quotation_marks make_correct_thrm_and_dfntn_environment

latex_no_pdf: latex_images latex_no_pdf_no_images

html: html_images install_brenier_two_fluid test_docs documenter remove_brenier_two_fluid
html: html_images html_no_images

html_no_images: install_brenier_two_fluid test_docs documenter

test_docs:
cd ..; julia --project=docs -e '; \
Expand Down Expand Up @@ -56,7 +58,7 @@ latex_docs_no_pdf:

compile_tex:
cd build; \
xelatex -shell-escape G*.tex;
lualatex -shell-escape G*.tex;

put_figures_outside_of_minted_environment:
sed -i'' -e 's/\"\\\\begin{figure}\\n\\\\includegraphics/DeleteThisAndTheLineBefore\n\\begin{figure}[H]\n\\centering\n\\includegraphics/g' build/G*.tex;
Expand All @@ -69,4 +71,18 @@ put_figures_outside_of_minted_environment:
sed -ni'' -e '/DeleteThisAndTheLineBefore/{x;d;};1h;1!{x;p;};${x;p;}' build/G*.tex;
sed -i'' -e '/DeleteThisAndTheLineAfter/{N;s/\n.*//;}' build/G*.tex;
sed -i'' -e '/DeleteThisAndTheLineBefore/d' build/G*.tex;
sed -i'' -e '/DeleteThisAndTheLineAfter/d' build/G*.tex;
sed -i'' -e '/DeleteThisAndTheLineAfter/d' build/G*.tex;
sed -i'' -e 's/\\\\texttt/\\texttt/g' build/G*.tex;
sed -i'' -e 's/\\\\_/\\_/g' build/G*.tex;

make_correct_thrm_and_dfntn_environment:
sed -i'' -e 's/{\\textbackslash}begin\\{thrm\\}/\\begin{thrm}/g' build/G*.tex;
sed -i'' -e 's/{\\textbackslash}end\\{thrm\\}/\\end{thrm}/g' build/G*.tex;
sed -i'' -e 's/{\\textbackslash}label\\{th:\([a-zA-Z]*\)\\}/\\label{th:\1}/g' build/G*.tex;
sed -i'' -e 's/{\\textbackslash}begin\\{dfntn\\}/\\begin{dfntn}/g' build/G*.tex;
sed -i'' -e 's/{\\textbackslash}end\\{dfntn\\}/\\end{dfntn}/g' build/G*.tex;
sed -i'' -e 's/{\\textbackslash}label\\{def:\([a-zA-Z]*\)\\}/\\label{th:\1}/g' build/G*.tex;

do_correct_quotation_marks:
sed -i'' -e 's/{\\textquotedbl}/"/g' build/G*.tex;
sed -i'' -e 's/ "/ ``/g' build/G*.tex
1 change: 1 addition & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
GeometricIntegrators = "dcce2d33-59f6-5b8d-9047-0defad88ae06"
GeometricMachineLearning = "194d25b2-d3f5-49f0-af24-c124f4aa80cc"
GeometricProblems = "18cb22b4-ad41-5c80-9c5f-710df63fbdc9"
LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
56 changes: 50 additions & 6 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using GeometricMachineLearning
using Documenter
using DocumenterCitations
using Markdown
# using Weave

# this is necessary to avoid warnings. See https://documenter.juliadocs.org/dev/man/syntax/
Expand Down Expand Up @@ -33,8 +34,12 @@ const output_type = isempty(ARGS) ? :html : ARGS[1] == "html_output" ? :html : :
const format = output_type == :html ? html_format : latex_format

function html_graphics(path::String; kwargs...)
light_string = """<object type="image/svg+xml" class="display-light-only" data=$(joinpath(buildpath, path * ".png"))></object>"""
dark_string = """<object type="image/svg+xml" class="display-dark-only" data=$(joinpath(buildpath, path * "_dark.png"))></object>"""
light_path = joinpath(path * ".png")
dark_path = joinpath(path * "_dark.png")
light_string = """<object type="image/svg+xml" class="display-light-only" data=$(joinpath(buildpath, light_path))></object>"""
dark_string = """<object type="image/svg+xml" class="display-dark-only" data=$(joinpath(buildpath, dark_path))></object>"""
@assert isfile(light_path) "No file found for " * light_path * "!"
@assert isfile(dark_path) "No file found for " * dark_path * "!"
Docs.HTML(light_string, dark_string)
end

Expand All @@ -53,6 +58,37 @@ function include_graphics(path::String; kwargs...)
Main.output_type == :html ? html_graphics(path; kwargs...) : latex_graphics(path; kwargs...)
end

function theorem(statement::String, name::Nothing; label::Union{Nothing, String} = nothing)
if Main.output_type == :html
Markdown.parse("__Theorem:__ *" * statement * "*")
else
theorem_label = isnothing(label) ? "" : raw"\label{th:" * label * raw"}"
Markdown.parse(raw"\begin{thrm}" * statement * theorem_label * raw"\end{thrm}")
end
end

function theorem(statement::String, name::String; label::Union{Nothing, String} = nothing)
if Main.output_type == :html
Markdown.parse("__Theorem (" * name * "):__ *" * statement * "*")
else
theorem_label = isnothing(label) ? "" : raw"\label{th:" * label * raw"}"
Markdown.parse(raw"\begin{thrm}[" * name * "]" * statement * theorem_label * raw"\end{thrm}")
end
end

function theorem(statement::String; name::Union{Nothing, String} = nothing, label::Union{Nothing, String} = nothing)
theorem(statement, name; label = label)
end

function definition(statement::String; label::Union{Nothing, String} = nothing)
if Main.output_type == :html
Markdown.parse("__Definition:__ *" * statement * "*")
else
theorem_label = isnothing(label) ? "" : raw"\label{def:" * label * raw"}"
Markdown.parse(raw"\begin{dfntn}" * statement * theorem_label * raw"\end{dfntn}")
end
end

makedocs(;
plugins = [bib],
modules = [GeometricMachineLearning],
Expand All @@ -62,10 +98,6 @@ makedocs(;
format = format,
pages=[
"Home" => "index.md",
"Architectures" => [
"SympNet" => "architectures/sympnet.md",
"Symplectic Autoencoders" => "architectures/symplectic_autoencoder.md",
],
"Manifolds" => [
"Concepts from General Topology" => "manifolds/basic_topology.md",
"General Theory on Manifolds" => "manifolds/manifolds.md",
Expand Down Expand Up @@ -96,9 +128,20 @@ makedocs(;
"BFGS Optimizer" => "optimizers/bfgs_optimizer.md",
],
"Special Neural Network Layers" => [
"Sympnet Gradient Layers" => "layers/sympnet_gradient.md",
"Volume-Preserving Layers" => "layers/volume_preserving_feedforward.md",
"Attention" => "layers/attention_layer.md",
"Multihead Attention" => "layers/multihead_attention_layer.md",
"Linear Symplectic Attention" => "layers/linear_symplectic_attention.md",
],
"Architectures" => [
"Symplectic Autoencoders" => "architectures/symplectic_autoencoder.md",
"Neural Network Integrators" => "architectures/neural_network_integrators.md",
"SympNet" => "architectures/sympnet.md",
"Volume-Preserving FeedForward" => "architectures/volume_preserving_feedforward.md",
"Standard Transformer" => "architectures/transformer.md",
"Volume-Preserving Transformer" => "architectures/volume_preserving_transformer.md",
"Linear Symplectic Transformer" => "architectures/linear_symplectic_transformer.md",
],
"Data Loader" =>[
"Routines" => "data_loader/data_loader.md",
Expand All @@ -116,6 +159,7 @@ makedocs(;
"MNIST" => "tutorials/mnist_tutorial.md",
"Grassmann manifold" => "tutorials/grassmann_layer.md",
"Volume-Preserving Attention" => "tutorials/volume_preserving_attention.md",
"Linear Symplectic Transformer" => "tutorials/linear_symplectic_transformer.md",
],
"References" => "references.md",
"Library" => "library.md",
Expand Down
72 changes: 72 additions & 0 deletions docs/src/GeometricMachineLearning.bib
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,34 @@ @article{vaswani2017attention
year={2017}
}

@article{patwardhan2023transformers,
title={Transformers in the real world: A survey on nlp applications},
author={Patwardhan, Narendra and Marrone, Stefano and Sansone, Carlo},
journal={Information},
volume={14},
number={4},
pages={242},
year={2023},
publisher={MDPI}
}

@article{hemmasian2023reduced,
title={Reduced-order modeling of fluid flows with transformers},
author={Hemmasian, AmirPouya and Barati Farimani, Amir},
journal={Physics of Fluids},
volume={35},
number={5},
year={2023},
publisher={AIP Publishing}
}

@article{solera2023beta,
title={$\beta$-Variational autoencoders and transformers for reduced-order modelling of fluid flows},
author={Solera-Rico, Alberto and Vila, Carlos Sanmiguel and G{\'o}mez, MA and Wang, Yuning and Almashjary, Abdulrahman and Dawson, Scott and Vinuesa, Ricardo},
journal={arXiv preprint arXiv:2304.03571},
year={2023}
}

@article{brantner2023symplectic,
title={Symplectic autoencoders for Model Reduction of Hamiltonian Systems},
author={Brantner, Benedikt and Kraus, Michael},
Expand All @@ -166,6 +194,13 @@ @article{brantner2023structure
year = {2023}
}

@article{brantner2024volume,
author = {Brantner, Benedikt and de Romemont, Guillaume and Kraus, Michael and Li, Zeyuan},
title = {Volume-Preserving Transformers for Learning Time Series Data with Structure},
journal = {arXiv preprint arXiv:2312:11166v2},
year = {2024}
}

@article{lin2008riemannian,
title={Riemannian manifold learning},
author={Lin, Tong and Zha, Hongbin},
Expand Down Expand Up @@ -273,4 +308,41 @@ @article{feng1998step
pages={193--202},
year={1998},
publisher={JSTOR}
}

@inproceedings{feng1987symplectic,
title={The symplectic methods for the computation of Hamiltonian equations},
author={Feng, Kang and Qin, Meng-zhao},
booktitle={Numerical Methods for Partial Differential Equations: Proceedings of a Conference held in Shanghai, PR China, March 25--29, 1987},
pages={1--37},
year={1987},
organization={Springer}
}

@article{ge1988approximation,
title={On the approximation of linear Hamiltonian systems},
author={Ge, Zhong and Feng, Kang},
journal={Journal of Computational Mathematics},
pages={88--97},
year={1988},
publisher={JSTOR}
}

@misc{Kraus:2020:GeometricIntegrators,
title={GeometricIntegrators.jl: Geometric Numerical Integration in Julia},
author={Kraus, Michael},
year={2020},
howpublished={\url{https://github.com/JuliaGNI/GeometricIntegrators.jl}},
doi={10.5281/zenodo.3648325}
}

@article{hochreiter1997long,
title={Long short-term memory},
author={Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
journal={Neural computation},
volume={9},
number={8},
pages={1735--1780},
year={1997},
publisher={MIT press}
}
13 changes: 13 additions & 0 deletions docs/src/architectures/linear_symplectic_transformer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Linear Symplectic Transformer

The linear symplectic transformer consists of a combination of [linear symplectic attention](@ref "Linear Symplectic Attention") and [gradient](@ref "SympNet Gradient Layer") layers and is visualized below:

```@example
Main.include_graphics("../tikz/linear_symplectic_transformer"; caption = raw"Visualization of the linear symplectic transformer architecutre. \texttt{n\_sympnet} refers to the number of SympNet layers (\texttt{n\_sympnet=2} in this figure) and \texttt{L} refers to the number of transformer blocks (\texttt{L=1} in this figure).", width = .3) # hide
```

## Library Functions

```@docs; canonical=false
LinearSymplecticTransformer
```
66 changes: 66 additions & 0 deletions docs/src/architectures/neural_network_integrators.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Neural Network Integrators

In `GeometricMachineLearning` we can divide most neural network architectures (that are used for applications to physical systems) into two categories: autoencoders and integrators. *Integrator* in its most general form refers to an approximation of the flow of an ODE (see [the section on the existence and uniqueness theorem](@ref "The Existence-And-Uniqueness Theorem")) by a numerical scheme. Traditionally these numerical schemes were constructed by defining certain relationships between a known time step ``z^{(t)}`` and a future unknown one ``z^{(t+1)}`` [hairer2006geometric, leimkuhler2004simulating](@cite):

```math
f(z^{(t)}, z^{(t+1)}) = 0.
```

One usually refers to such a relationship as an "integration scheme". If this relationship can be reformulated as

```math
z^{(t+1)} = g(z^{(t)}),
```

then we refer to the scheme as *explicit*, if it cannot be reformulated in such a way then we refer to it as *implicit*. Implicit schemes are typically more expensive to solve than explicit ones. The `Julia` library `GeometricIntegrators` [Kraus:2020:GeometricIntegrators](@cite) offers a wide variety of integration schemes both implicit and explicit.

The neural network integrators in `GeometricMachineLearning` (the corresponding type is [`NeuralNetworkIntegrator`](@ref)) are all explicit integration schemes where the function ``g`` above is modeled with a neural network.

Neural networks, as an alternative to traditional methods, are employed because of (i) potentially superior performance and (ii) an ability to learn unknown dynamics from data.

## Multi-step methods

*Multi-step method* [feng1987symplectic, ge1988approximation](@cite) refers to schemes that are of the form[^1]:

[^1]: We again assume that all the steps up to and including ``t`` are known.

```math
f(z^{(t - \mathtt{sl} + 1)}, z^{(t - \mathtt{sl} + 2)}, \ldots, z^{(t)}, z^{(t + 1)}, \ldots, z^{(\mathtt{pw} + 1)}) = 0,
```
where `sl` is short for *sequence length* and `pw` is short for *prediction window*. In contrast to traditional single-step methods, `sl` and `pw` can be greater than 1. An explicit multi-step method has the following form:

```math
[z^{(t+1)}, \ldots, z^{(t+\mathtt{pw})}] = g(z^{(t - \mathtt{sl} + 1)}, \ldots, z^{(t)}).
```

There are essentially two ways to construct multi-step methods with neural networks: the older one is using recurrent neural networks such as long short-term memory cells (LSTMs, [hochreiter1997long](@cite)) and the newer one is using transformer neural networks [vaswani2017attention](@cite). Both of these approaches have been successfully employed to learn multi-step methods (see [fresca2021comprehensive, lee2020model](@cite) for the former and [hemmasian2023reduced, solera2023beta, brantner2024volume](@cite) for the latter), but because the transformer architecture exhibits superior performance on modern hardware and can be imbued with geometric properties it is recommended to always use a transformer-derived architecture when dealing with time series[^2].

[^2]: `GeometricMachineLearning` also has an LSTM implementation, but this may be deprecated in the future.

Explicit multi-step methods derived from he transformer are always subtypes of the type [`TransformerIntegrator`](@ref) in `GeometricMachineLearning`. In `GeometricMachineLearning` the [standard transformer](@ref "Standard Transformer"), the [volume-preserving transformer](@ref "Volume-Preserving Transformer") and the [linear symplectic transformer](@ref "Linear Symplectic Transformer") are implemented.

## Library Functions

```@docs; canonical=false
NeuralNetworkIntegrator
TransformerIntegrator
```

## References

```@bibliography
Pages = []
Canonical = false
hairer2006geometric
leimkuhler2004simulating
Kraus:2020:GeometricIntegrators
feng1998step
hochreiter1997long
vaswani2017attention
fresca2021comprehensive
lee2020model
hemmasian2023reduced
solera2023beta
brantner2024volume
```
2 changes: 1 addition & 1 deletion docs/src/architectures/sympnet.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SympNet
# SympNet Architecture

This document discusses the SympNet architecture and its implementation in `GeometricMachineLearning.jl`.

Expand Down
22 changes: 22 additions & 0 deletions docs/src/architectures/transformer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Standard Transformer

The transformer is a relatively modern neural network architecture [vaswani2017attention](@cite) that has come to dominate the field of natural language processing (NLP, [patwardhan2023transformers](@cite)) and replaced the previously dominant long-short term memory cells (LSTM, [hochreiter1997long](@cite)). Its success is due to a variety of factors:
- unlike LSTMs it consists of very simple building blocks and hence is easier to interpret mathematically,
- it is very flexible in its application and the data it is fed with do not have to conform to a rigid pattern,
- transformers utilize modern hardware (especially GPUs) very effectively.

The transformer architecture is sketched below:

```@example
Main.include_graphics("../tikz/transformer_encoder") # hide
```

It is nothing more than a combination of a [multihead attention layer](@ref "Multihead Attention") and a residual neural network[^1] (ResNet).

[^1]: A ResNet is nothing more than a neural network to whose output we again add the input, i.e. every ResNet is of the form ``\mathrm{ResNet}(x) = x + \mathcal{NN}(x)``.

## Library Functions

```@docs; canonical=false
StandardTransformerIntegrator
```
23 changes: 23 additions & 0 deletions docs/src/architectures/volume_preserving_feedforward.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Volume-Preserving Feedforward Neural Network

## Neural network architecture

The constructor produces the following architecture[^1]:

[^1]: Based on the input arguments `n_linear` and `n_blocks`. In this example `init_upper` is set to false, which means that the first layer is of type *lower* followed by a layer of type *upper*.

```@example
Main.include_graphics("../tikz/vp_feedforward") # hide
```

Here *LinearLowerLayer* performs ``x \mapsto x + Lx`` and *NonLinearLowerLayer* performs ``x \mapsto x + \sigma(Lx + b)``. The activation function ``\sigma`` is the forth input argument to the constructor and `tanh` by default.

## Note on Sympnets

As [SympNets](@ref "SympNet Architecture") are symplectic maps, they also conserve phase space volume and therefore form a subcategory of volume-preserving feedforward layers.

## Library Functions

```@docs; canonical=false
VolumePreservingFeedForward
```
Loading

0 comments on commit 8a2f2ef

Please sign in to comment.