Merge pull request #154 from JuliaGNI/rework_optimizer_docs

Rework optimizer docs
JuliaGNI · Jun 14, 2024 · e21cc9e · e21cc9e
2 parents 87e1f5d + f075a20
commit e21cc9e
Show file tree

Hide file tree

Showing 73 changed files with 2,037 additions and 653 deletions.
diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml
@@ -20,18 +20,27 @@ jobs:
           sudo apt-get install texlive-science
       - name: Make tikz images
         run: make all -C docs/src/tikz
+      # NOTE: Python is necessary for the pre-rendering (minification) step; I copied this bit, do not know if that's actually needed
+      - name: Install python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.8'
+      - name: Install binary dependencies
+        run: sudo apt-get update && sudo apt-get install -y xorg-dev mesa-utils xvfb libgl1 freeglut3-dev libxrandr-dev libxinerama-dev libxcursor-dev libxi-dev libxext-dev
       - uses: julia-actions/setup-julia@latest
         with:
           version: '1'
       - name: Install BrenierTwoFluid package
         run: |
           cd docs
-          make install_brenier_two_fluid test_docs
+          DISPLAY=:0 xvfb-run -s '-screen 0 1024x768x24' make install_brenier_two_fluid test_docs
           cd .. 
       - name: Make docs (call julia documenter)
-        run: julia --project=docs docs/make.jl html_output
+        run: DISPLAY=:0 xvfb-run -s '-screen 0 1024x768x24' julia --project=docs docs/make.jl html_output
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-docdeploy@v1
+        with:
+          prefix: xvfb-run
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/.github/workflows/Latex.yml b/.github/workflows/Latex.yml
@@ -25,15 +25,16 @@ jobs:
       - name: install BrenierTwoFluid
         run: |
           cd docs
-          make install_brenier_two_fluid test_docs
+          DISPLAY=:0 xvfb-run -s '-screen 0 1024x768x24' make install_brenier_two_fluid test_docs
           cd .. 
       - name: make tex document
-        run: julia --project=docs --threads=2 docs/make.jl latex_output
+        run: DISPLAY=:0 xvfb-run -s '-screen 0 1024x768x24' julia --project=docs --threads=2 docs/make.jl latex_output
       - name: Some sed magic 
         run: |
+          make copy_png_files -C docs
           make put_figures_outside_of_minted_environment -C docs
           make do_correct_quotation_marks -C docs
-          make make_correct_thrm_and_dfntn_and_xmpl_and_proof_environment -C docs
+          make make_correct_thrm_and_dfntn_and_xmpl_and_rmrk_and_proof_environment -C docs
       - name: compile tex document
         run: |
           cd docs/build 

diff --git a/Project.toml b/Project.toml
@@ -18,6 +18,7 @@ GeometricSolutions = "7843afe4-64f4-4df4-9231-049495c56661"
 HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
+LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
@@ -27,6 +28,7 @@ SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
+UpdateJulia = "770da0de-323d-4d28-9202-0e205c1e0aff"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]

diff --git a/docs/Makefile b/docs/Makefile
@@ -11,7 +11,7 @@ latex: latex_no_pdf
 	$(MAKE) compile_tex;
 	$(MAKE) compile_tex
 
-latex_no_pdf_no_images: install_brenier_two_fluid latex_docs_no_pdf put_figures_outside_of_minted_environment do_correct_quotation_marks make_correct_thrm_and_dfntn_and_xmpl_and_proof_environment
+latex_no_pdf_no_images: install_brenier_two_fluid latex_docs_no_pdf copy_png_files put_figures_outside_of_minted_environment do_correct_quotation_marks make_correct_thrm_and_dfntn_and_xmpl_and_rmrk_and_proof_environment
 
 latex_no_pdf: latex_images latex_no_pdf_no_images 
 
@@ -74,9 +74,8 @@ put_figures_outside_of_minted_environment:
 	sed -i'' -e '/DeleteThisAndTheLineAfter/d' build/G*.tex;
 	sed -i'' -e 's/\\\\texttt/\\texttt/g' build/G*.tex;
 	sed -i'' -e 's/\\\\_/\\_/g' build/G*.tex;
-	sed -i'' -e 's/tangent_space.png/manifolds\/tangent_space.png/g' build/G*.tex;
 
-make_correct_thrm_and_dfntn_and_xmpl_and_proof_environment:
+make_correct_thrm_and_dfntn_and_xmpl_and_rmrk_and_proof_environment:
 	sed -i'' -e 's/{\\textbackslash}begin\\{thrm\\}/\\begin{thrm}/g' build/G*.tex;
 	sed -i'' -e 's/{\\textbackslash}end\\{thrm\\}/\\end{thrm}/g' build/G*.tex;
 	sed -i'' -e 's/{\\textbackslash}label\\{th:\([a-zA-Z]*\)\\}/\\label{th:\1}/g' build/G*.tex;
@@ -88,7 +87,15 @@ make_correct_thrm_and_dfntn_and_xmpl_and_proof_environment:
 	sed -i'' -e 's/{\\textbackslash}label\\{xmpl:\([a-zA-Z]*\)\\}/\\label{xmpl:\1}/g' build/G*.tex;
 	sed -i'' -e 's/{\\textbackslash}begin\\{proof\\}/\\begin{proof}/g' build/G*.tex;
 	sed -i'' -e 's/{\\textbackslash}end\\{proof\\}/\\end{proof}/g' build/G*.tex;
+	sed -i'' -e 's/{\\textbackslash}begin\\{rmrk\\}/\\begin{rmrk}/g' build/G*.tex;
+	sed -i'' -e 's/{\\textbackslash}end\\{rmrk\\}/\\end{rmrk}/g' build/G*.tex;
+	sed -i'' -e 's/{\\textbackslash}label\\{rmrk:\([a-zA-Z]*\)\\}/\\label{rmrk:\1}/g' build/G*.tex;
 
 do_correct_quotation_marks:
 	sed -i'' -e 's/{\\textquotedbl}/"/g' build/G*.tex;
-	sed -i'' -e 's/ "/ ``/g' build/G*.tex
+	sed -i'' -e 's/ "/ ``/g' build/G*.tex
+
+copy_png_files:
+	find build/manifolds -name \*.png -exec cp {} build \; ;
+	find build/optimizers/manifold_related -name \*.png -exec cp {} build \; ;
+	find build/tutorials -name \*.png -exec cp {} build \;
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -1,9 +1,11 @@
 [deps]
 Bibliography = "f1be7e48-bf82-45af-a471-ae754a193061"
+BrenierTwoFluid = "698bc5df-bacc-4e45-9592-41ae9e406d75"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a"
 GeometricIntegrators = "dcce2d33-59f6-5b8d-9047-0defad88ae06"
 GeometricMachineLearning = "194d25b2-d3f5-49f0-af24-c124f4aa80cc"
 GeometricProblems = "18cb22b4-ad41-5c80-9c5f-710df63fbdc9"

diff --git a/docs/gl_makie_transparent_background_hack.jl b/docs/gl_makie_transparent_background_hack.jl
@@ -0,0 +1,27 @@
+# taken from https://docs.makie.org/stable/how-to/save-figure-with-transparency
+function calculate_rgba(rgb1, rgb2, rgba_bg)::RGBAf
+    rgb1 == rgb2 && return RGBAf(GLMakie.red(rgb1), GLMakie.green(rgb1), GLMakie.blue(rgb1), 1)
+    c1 = Float64.((GLMakie.red(rgb1), GLMakie.green(rgb1), GLMakie.blue(rgb1)))
+    c2 = Float64.((GLMakie.red(rgb2), GLMakie.green(rgb2), GLMakie.blue(rgb2)))
+    alphas_fg = 1 .+ c1 .- c2
+    alpha_fg = clamp(sum(alphas_fg) / 3, 0, 1)
+    alpha_fg == 0 && return rgba_bg
+    rgb_fg = clamp.((c1 ./ alpha_fg), 0, 1)
+    rgb_bg = Float64.((rgba_bg.r, rgba_bg.g, rgba_bg.b))
+    alpha_final = alpha_fg + (1 - alpha_fg) * rgba_bg.alpha
+    rgb_final = @. 1 / alpha_final * (alpha_fg * rgb_fg + (1 - alpha_fg) * rgba_bg.alpha * rgb_bg)
+    return RGBAf(rgb_final..., alpha_final)
+end
+
+function alpha_colorbuffer(figure)
+    scene = figure.scene
+    bg = scene.backgroundcolor[]
+    scene.backgroundcolor[] = RGBAf(0, 0, 0, 1)
+    b1 = copy(colorbuffer(scene))
+    scene.backgroundcolor[] = RGBAf(1, 1, 1, 1)
+    b2 = colorbuffer(scene)
+    scene.backgroundcolor[] = bg
+    return map(b1, b2) do b1, b2
+        calculate_rgba(b1, b2, bg)
+    end
+end
diff --git a/docs/make.jl b/docs/make.jl
@@ -3,6 +3,7 @@ using Documenter
 using DocumenterCitations
 using Markdown
 using Bibliography
+using LaTeXStrings
 # using Weave
 
 # this is necessary to avoid warnings. See https://documenter.juliadocs.org/dev/man/syntax/
@@ -106,6 +107,16 @@ function example(statement::String; label::Union{Nothing, String} = nothing)
     end
 end
 
+function remark(statement::String; label::Union{Nothing, String} = nothing)
+    if Main.output_type == :html
+        Markdown.parse("""!!! info "Remark" 
+            \t $(statement)""")
+    else
+        theorem_label = isnothing(label) ? "" : raw"\label{rmrk:" * label * raw"}"
+        Markdown.parse(raw"\begin{rmrk}" * statement * theorem_label * raw"\end{rmrk}")
+    end
+end
+
 function proof(statement::String)
     if Main.output_type == :html
         Markdown.parse("""!!! details "Proof" 
@@ -146,22 +157,17 @@ makedocs(;
             "Riemannian Manifolds" => "manifolds/riemannian_manifolds.md",
             "Homogeneous Spaces" => "manifolds/homogeneous_spaces.md",
             ],
-        "Special Arrays" => [
+        "Special Arrays and AD" => [
             "Symmetric and Skew-Symmetric Matrices" => "arrays/skew_symmetric_matrix.md",
             "Global Tangent Spaces" => "arrays/global_tangent_spaces.md",
-        ],
-        "Optimizer Framework" => [
-            "Optimizers" => "Optimizer.md",
-            "General Optimization" => "optimizers/general_optimization.md",
             "Pullbacks" => "pullbacks/computation_of_pullbacks.md",
         ],
-        "Optimizer Functions" => [
-            "Horizontal Lift" => "optimizers/manifold_related/horizontal_lift.md",
+        "Optimizers" => [
+            "Optimizers" => "optimizers/optimizer_framework.md",
             "Global Sections" => "optimizers/manifold_related/global_sections.md",
             "Retractions" => "optimizers/manifold_related/retractions.md",
-            "Geodesic Retraction" => "optimizers/manifold_related/geodesic.md",
-            "Cayley Retraction" => "optimizers/manifold_related/cayley.md",
-            "Adam Optimizer" => "optimizers/adam_optimizer.md",
+            "Parallel Transport" => "optimizers/manifold_related/parallel_transport.md",
+            "Optimizer Methods" => "optimizers/optimizer_methods.md",
             "BFGS Optimizer" => "optimizers/bfgs_optimizer.md",
             ],
         "Special Neural Network Layers" => [
@@ -197,6 +203,8 @@ makedocs(;
             "Grassmann manifold" => "tutorials/grassmann_layer.md",
             "Volume-Preserving Attention" => "tutorials/volume_preserving_attention.md",
             "Linear Symplectic Transformer" => "tutorials/linear_symplectic_transformer.md",
+            "Adjusting the Loss Function" => "tutorials/adjusting_the_loss_function.md",
+            "Comparing Optimizers" => "tutorials/optimizer_comparison.md",
         ],
         "References" => "references.md",
         "Library" => "library.md",

diff --git a/docs/src/GeometricMachineLearning.bib b/docs/src/GeometricMachineLearning.bib
@@ -61,15 +61,23 @@ @book{lipschutz1965general
     author={Seymour Lipschutz},
     year={1965},
     publisher={McGraw-Hill Book Company},
-    location={New York City, New York}
+    address={New York City, New York}
 }
 
 @book{bishop1980tensor,
     title={Tensor Analysis on Manifolds},
     author={Richard L. Bishop, Samuel I. Goldberg},
     year={1980},
     publisher={Dover Publications},
-    location={Mineola, New York}
+    address={Mineola, New York}
+}
+
+@book{o1983semi,
+  title={Semi-Riemannian geometry with applications to relativity},
+  author={O'neill, Barrett},
+  year={1983},
+  publisher={Academic press},
+  address={New York City, New York}
 }
 
 @book{do1992riemannian,
@@ -85,7 +93,7 @@ @book{wright2006numerical
   author={Stephen J. Wright, Jorge Nocedal},
   year={2006},
   publisher={Springer Science+Business Media},
-  location={New York, NY}
+  address={New York, NY}
 }
 
 @article{fresca2021comprehensive,
@@ -385,4 +393,59 @@ @article{hochreiter1997long
   pages={1735--1780},
   year={1997},
   publisher={MIT press}
+}
+
+@article{celledoni2000approximating,
+  title={Approximating the exponential from a Lie algebra to a Lie group},
+  author={Celledoni, Elena and Iserles, Arieh},
+  journal={Mathematics of Computation},
+  volume={69},
+  number={232},
+  pages={1457--1480},
+  year={2000}
+}
+
+@inproceedings{fraikin2007optimization,
+  title={Optimization over the Stiefel manifold},
+  author={Fraikin, Catherine and H{\"u}per, K and Dooren, P Van},
+  booktitle={PAMM: Proceedings in Applied Mathematics and Mechanics},
+  volume={7},
+  number={1},
+  pages={1062205--1062206},
+  year={2007},
+  organization={Wiley Online Library}
+}
+
+@article{schlarb2024covariant,
+  title={Covariant Derivatives on Homogeneous Spaces: Horizontal Lifts and Parallel Transport},
+  author={Schlarb, Markus},
+  journal={The Journal of Geometric Analysis},
+  volume={34},
+  number={5},
+  pages={1--43},
+  year={2024},
+  publisher={Springer}
+}
+
+@article{kong2023momentum,
+  title={Momentum stiefel optimizer, with applications to suitably-orthogonal attention, and optimal transport},
+  author={Kong, Lingkai and Wang, Yuqing and Tao, Molei},
+  journal={arXiv preprint arXiv:2205.14173v3},
+  year={2023}
+}
+
+@MISC{2279304,
+    TITLE = {Quasi-newton methods: Understanding DFP updating formula},
+    AUTHOR = {A.G. (https://math.stackexchange.com/users/253273/a-\%ce\%93)},
+    HOWPUBLISHED = {Mathematics Stack Exchange},
+    NOTE = {URL:https://math.stackexchange.com/q/2279304 (version: 2017-05-13)}
+}
+
+@inproceedings{huang2016riemannian,
+  title={A Riemannian BFGS method for nonconvex optimization problems},
+  author={Huang, Wen and Absil, P-A and Gallivan, Kyle A},
+  booktitle={Numerical Mathematics and Advanced Applications ENUMATH 2015},
+  pages={627--634},
+  year={2016},
+  organization={Springer}
 }
diff --git a/docs/src/Optimizer.md b/docs/src/Optimizer.md
diff --git a/docs/src/architectures/sympnet.md b/docs/src/architectures/sympnet.md
@@ -190,7 +190,7 @@ There are many $r$-finite activation functions commonly used in neural networks,
 - sigmoid $\sigma(x)=\frac{1}{1+e^{-x}}$ for any positive integer $r$, 
 - tanh $\tanh(x)=\frac{e^x-e^{-x}}{e^x+e^{-x}}$ for any positive integer $r$. 
 
-The universal approximation theorems state that we can, in principle, get arbitrarily close to any symplectomorphism defined on $\mathbb{R}^{2d}$. But this does not tell us anything about how to optimize the network. This is can be done with any common [neural network optimizer](../Optimizer.md) and these neural network optimizers always rely on a corresponding loss function.  
+The universal approximation theorems state that we can, in principle, get arbitrarily close to any symplectomorphism defined on $\mathbb{R}^{2d}$. But this does not tell us anything about how to optimize the network. This is can be done with any common [neural network optimizer](@ref "Neural Network Optimizers") and these neural network optimizers always rely on a corresponding loss function.  
 
 ## Loss function
 

diff --git a/docs/src/arrays/global_tangent_spaces.md b/docs/src/arrays/global_tangent_spaces.md
@@ -29,7 +29,7 @@ We should note that we have written all Lie group and Lie algebra actions as sim
 Note that the theorem above requires us to find an element ``A\in{}G`` such that ``AE = Y``. If we can find a mapping ``\lambda:\mathcal{M}\to{}G`` we call such a mapping a *global section*. 
 
 ```@eval
-Main.theorem(raw"We call a mapping from ``\lambda:\mathcal{M} \to G`` a homogeneous space to its associated Lie group a **global section** if it satisfies:
+Main.definition(raw"We call a mapping from ``\lambda:\mathcal{M} \to G`` a homogeneous space to its associated Lie group a **global section** if it satisfies:
 " * Main.indentation * raw"```math
 " * Main.indentation * raw"\lambda(Y)E = Y,
 " * Main.indentation * raw"```

diff --git a/docs/src/manifolds/homogeneous_spaces.md b/docs/src/manifolds/homogeneous_spaces.md
@@ -22,7 +22,7 @@ The tangent spaces of ``\mathcal{M}`` are of the form ``T_Y\mathcal{M} = \mathfr
 Based on this we can perform a splitting of ``\mathfrak{g}`` into two parts:
 
 ```@eval
-Main.definition(raw"A **splitting of the Lie algebra** ``mathfrak{g}`` at an element of a homogeneous space ``Y`` is a decomposition into a **vertical** and a **horizontal** component, denoted by ``\mathfrak{g} = \mathfrak{g}^{\mathrm{ver},Y} \oplus \mathfrak{g}^{\mathrm{hor},Y}`` such that
+Main.definition(raw"A **splitting of the Lie algebra** ``\mathfrak{g}`` at an element of a homogeneous space ``Y`` is a decomposition into a **vertical** and a **horizontal** component, denoted by ``\mathfrak{g} = \mathfrak{g}^{\mathrm{ver},Y} \oplus \mathfrak{g}^{\mathrm{hor},Y}`` such that
 " * Main.indentation * raw"1. The *vertical component* ``\mathfrak{g}^{\mathrm{ver},Y}`` is the kernel of the map ``\mathfrak{g}\to{}T_Y\mathcal{M}, V \mapsto VY``, i.e. ``\mathfrak{g}^{\mathrm{ver},Y} = \{V\in\mathfrak{g}:VY = 0\}.``
 " * Main.indentation * raw"2. The *horizontal component* ``\mathfrak{g}^{\mathrm{hor},Y}`` is the orthogonal complement of ``\mathfrak{g}^{\mathrm{ver},Y}`` in ``\mathfrak{g}``. It is isomorphic to ``T_Y\mathcal{M}``.
 ")