diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml index e3df5d1d8..e44e57f1f 100644 --- a/.github/workflows/Documenter.yml +++ b/.github/workflows/Documenter.yml @@ -4,7 +4,6 @@ on: push: branches: - main - tags: '*' pull_request: jobs: @@ -13,31 +12,27 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + - run: | + sudo apt-get install imagemagick + sudo apt-get install poppler-utils + sudo apt-get install texlive-xetex + sudo apt-get install texlive-science + mkdir docs/src/assets + make all -C docs/src/tikz - uses: julia-actions/setup-julia@latest - - name: Install dependencies - run: | + - run: | julia --project=docs -e ' using Pkg Pkg.develop(PackageSpec(path=pwd())) Pkg.instantiate() Pkg.build() - Pkg.precompile()' - sudo apt-get install imagemagick - sudo apt-get install poppler-utils - sudo apt-get install texlive-xetex - sudo apt-get install texlive-science - -name: Generate tikz pictures - run: | - make all -C src/tikz - - name: Run doctests - run: | - julia --project=docs -e ' + Pkg.precompile() using Documenter: doctest using GeometricMachineLearning doctest(GeometricMachineLearning)' - - name: Build and deploy Documentation - run: julia --project make.jl - working-directory: docs + julia --project=docs docs/make.jl + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-docdeploy@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} \ No newline at end of file diff --git a/README.md b/README.md index 12eeb3b2b..0f0d2d088 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ -
- - -
+ + [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliagni.github.io/GeometricMachineLearning.jl/stable) [![Latest](https://img.shields.io/badge/docs-latest-blue.svg)](https://juliagni.github.io/GeometricMachineLearning.jl/latest) @@ -55,4 +57,4 @@ plot(trajectory_to_plot) The optimization of the first layer is done on the Stiefel Manifold $St(n, N)$, and the optimizer used is the manifold version of Adam (see (Brantner, 2023)). ## References -- Brantner B. Generalizing Adam To Manifolds For Efficiently Training Transformers[J]. arXiv preprint arXiv:2305.16901, 2023. \ No newline at end of file +- Brantner B. Generalizing Adam To Manifolds For Efficiently Training Transformers[J]. arXiv preprint arXiv:2305.16901, 2023. diff --git a/docs/Project.toml b/docs/Project.toml index 1dec04c32..ed78630a5 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,4 +1,21 @@ [deps] +AbstractNeuralNetworks = "60874f82-5ada-4c70-bd1c-fa6be7711c8a" +BandedMatrices = "aae01518-5342-5314-be14-df237901396f" +ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +GeometricBase = "9a0b12b7-583b-4f04-aa1f-d8551b6addc9" +GeometricEquations = "c85262ba-a08a-430a-b926-d29770767bf2" +GeometricIntegrators = "dcce2d33-59f6-5b8d-9047-0defad88ae06" GeometricMachineLearning = "194d25b2-d3f5-49f0-af24-c124f4aa80cc" +InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/docs/make.jl b/docs/make.jl index e71674e4e..555e6f329 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -27,14 +27,22 @@ makedocs(; "SympNet" => "architectures/sympnet.md", ], "Manifolds" => [ + "Concepts from General Topology" => "manifolds/basic_topology.md", + "General Theory on Manifolds" => "manifolds/manifolds.md", + "The Inverse Function Theorem" => "manifolds/inverse_function_theorem.md", + "The Submersion Theorem" => "manifolds/submersion_theorem.md", "Homogeneous Spaces" => "manifolds/homogeneous_spaces.md", "Stiefel" => "manifolds/stiefel_manifold.md", "Grassmann" => "manifolds/grassmann_manifold.md", + "Differential Equations and the EAU theorem" => "manifolds/existence_and_uniqueness_theorem.md", ], "Arrays" => [ "Global Tangent Space" => "arrays/stiefel_lie_alg_horizontal.md", ], - "Optimizer Framework" => "Optimizer.md", + "Optimizer Framework" => [ + "Optimizers" => "Optimizer.md", + "General Optimization" => "optimizers/general_optimization.md", + ], "Optimizer Functions" => [ "Horizontal Lift" => "optimizers/manifold_related/horizontal_lift.md", "Global Sections" => "optimizers/manifold_related/global_sections.md", @@ -42,6 +50,7 @@ makedocs(; "Geodesic Retraction" => "optimizers/manifold_related/geodesic.md", "Cayley Retraction" => "optimizers/manifold_related/cayley.md", "Adam Optimizer" => "optimizers/adam_optimizer.md", + "BFGS Optimizer" => "optimizers/bfgs_optimizer.md", ], "Special Neural Network Layers" => [ "Attention" => "layers/attention_layer.md", diff --git a/docs/src/GeometricMachineLearning.bib b/docs/src/GeometricMachineLearning.bib index 9f711ef09..67cabf9f1 100644 --- a/docs/src/GeometricMachineLearning.bib +++ b/docs/src/GeometricMachineLearning.bib @@ -38,3 +38,116 @@ @book{leimkuhler2004simulating year={2004}, publisher={Cambridge university press} } + +@book{lang2012fundamentals, + title={Fundamentals of differential geometry}, + author={Lang, Serge}, + volume={191}, + year={2012}, + publisher={Springer Science \& Business Media} +} + +@book{lipschutz1965general, + title={General Topology}, + author={Seymour Lipschutz}, + year={1965}, + publisher={McGraw-Hill Book Company}, + location={New York City, New York} +} + +@book{bishop1980tensor, + title={Tensor Analysis on Manifolds}, + author={Richard L. Bishop, Samuel I. Goldberg}, + year={1980}, + publisher={Dover Publications}, + location={Mineola, New York} +} + +@book{wright2006numerical, + title={Numerical optimization}, + author={Stephen J. Wright, Jorge Nocedal}, + year={2006}, + publisher={Springer Science+Business Media}, + location={New York, NY} +} + +@article{fresca2021comprehensive, + title={A comprehensive deep learning-based approach to reduced order modeling of nonlinear time-dependent parametrized PDEs}, + author={Fresca, Stefania and Dede’, Luca and Manzoni, Andrea}, + journal={Journal of Scientific Computing}, + volume={87}, + pages={1--36}, + year={2021}, + publisher={Springer} +} + +@article{buchfink2023symplectic, + title={Symplectic model reduction of Hamiltonian systems on nonlinear manifolds and approximation with weakly symplectic autoencoder}, + author={Buchfink, Patrick and Glas, Silke and Haasdonk, Bernard}, + journal={SIAM Journal on Scientific Computing}, + volume={45}, + number={2}, + pages={A289--A311}, + year={2023}, + publisher={SIAM} +} + +@article{peng2016symplectic, + title={Symplectic model reduction of Hamiltonian systems}, + author={Peng, Liqian and Mohseni, Kamran}, + journal={SIAM Journal on Scientific Computing}, + volume={38}, + number={1}, + pages={A1--A27}, + year={2016}, + publisher={SIAM} +} + +@article{luong2015effective, + title={Effective approaches to attention-based neural machine translation}, + author={Luong, Minh-Thang and Pham, Hieu and Manning, Christopher D}, + journal={arXiv preprint arXiv:1508.04025}, + year={2015} +} + +@article{bahdanau2014neural, + title={Neural machine translation by jointly learning to align and translate}, + author={Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua}, + journal={arXiv preprint arXiv:1409.0473}, + year={2014} +} + +@article{greif2019decay, + title={Decay of the Kolmogorov N-width for wave problems}, + author={Greif, Constantin and Urban, Karsten}, + journal={Applied Mathematics Letters}, + volume={96}, + pages={216--222}, + year={2019}, + publisher={Elsevier} +} + +@article{blickhan2023registration, + title={A registration method for reduced basis problems using linear optimal transport}, + author={Blickhan, Tobias}, + journal={arXiv preprint arXiv:2304.14884}, + year={2023} +} + +@article{lee2020model, + title={Model reduction of dynamical systems on nonlinear manifolds using deep convolutional autoencoders}, + author={Lee, Kookjin and Carlberg, Kevin T}, + journal={Journal of Computational Physics}, + volume={404}, + pages={108973}, + year={2020}, + publisher={Elsevier} +} + +@article{vaswani2017attention, + title={Attention is all you need}, + author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, Lukasz and Polosukhin, Illia}, + journal={Advances in neural information processing systems}, + volume={30}, + year={2017} +} diff --git a/docs/src/Optimizer.md b/docs/src/Optimizer.md index f7cbe5fae..2b6005a28 100644 --- a/docs/src/Optimizer.md +++ b/docs/src/Optimizer.md @@ -4,6 +4,8 @@ In order to generalize neural network optimizers to [homogeneous spaces](manifol Starting from an element of the tangent space $T_Y\mathcal{M}$[^1], we need to perform two mappings to arrive at $\mathfrak{g}^\mathrm{hor}$, which we refer to by $\Omega$ and a red horizontal arrow: +[^1]: In practice this is obtained by first using an AD routine on a loss function $L$, and then computing the Riemannian gradient based on this. See the section of the [Stiefel manifold](manifolds/stiefel_manifold.md) for an example of this. + ![](tikz/general_optimization_with_boundary.png) Here the mapping $\Omega$ is a [horizontal lift](optimizers/manifold_related/horizontal_lift.md) from the tangent space onto the **horizontal component of the Lie algebra at $Y$**. @@ -13,6 +15,10 @@ The red line maps the horizontal component at $Y$, i.e. $\mathfrak{g}^{\mathrm{h The $\mathrm{cache}$ stores information about previous optimization steps and is dependent on the optimizer. The elements of the $\mathrm{cache}$ are also in $\mathfrak{g}^\mathrm{hor}$. Based on this the optimer ([Adam](optimizers/adam_optimizer.md) in this case) computes a final velocity, which is the input of a [retraction](optimizers/manifold_related/retractions.md). Because this *update* is done for $\mathfrak{g}^{\mathrm{hor}}\equiv{}T_Y\mathcal{M}$, we still need to perform a mapping, called `apply_section` here, that then finally updates the network parameters. The two red lines are described in [global sections](optimizers/manifold_related/global_sections.md). ## References -- Brantner B. Generalizing Adam To Manifolds For Efficiently Training Transformers[J]. arXiv preprint arXiv:2305.16901, 2023. -[^1]: In practice this is obtained by first using an AD routine on a loss function $L$, and then computing the Riemannian gradient based on this. See the section of the [Stiefel manifold](manifolds/stiefel_manifold.md) for an example of this. \ No newline at end of file +```@bibliography +Pages = [] +Canonical = false + +brantner2023generalizing +``` \ No newline at end of file diff --git a/docs/src/assets/logo.png b/docs/src/assets/logo.png deleted file mode 100644 index ebece58bf..000000000 Binary files a/docs/src/assets/logo.png and /dev/null differ diff --git a/docs/src/assets/logo_dark.png b/docs/src/assets/logo_dark.png deleted file mode 100644 index b66bed417..000000000 Binary files a/docs/src/assets/logo_dark.png and /dev/null differ diff --git a/docs/src/layers/attention_layer.md b/docs/src/layers/attention_layer.md index f72adc979..fce2da1a5 100644 --- a/docs/src/layers/attention_layer.md +++ b/docs/src/layers/attention_layer.md @@ -84,5 +84,11 @@ Attention was used before, but always in connection with **recurrent neural netw ## References -- Luong M T, Pham H, Manning C D. Effective approaches to attention-based neural machine translation[J]. arXiv preprint arXiv:1508.04025, 2015. -- Bahdanau D, Cho K, Bengio Y. Neural machine translation by jointly learning to align and translate[J]. arXiv preprint arXiv:1409.0473, 2014. \ No newline at end of file + +```@bibliography +Pages = [] +Canonical = false + +bahdanau2014neural +luong2015effective +``` \ No newline at end of file diff --git a/docs/src/layers/multihead_attention_layer.md b/docs/src/layers/multihead_attention_layer.md index d117ba4ea..c97193316 100644 --- a/docs/src/layers/multihead_attention_layer.md +++ b/docs/src/layers/multihead_attention_layer.md @@ -51,4 +51,10 @@ Because the main task of the $W_i^V$, $W_i^K$ and $W_i^Q$ matrices here is for t ## References -- Vaswani, Ashish, et al. "Attention is all you need." Advances in neural information processing systems 30 (2017). \ No newline at end of file + +```@bibliography +Pages = [] +Canonical = false + +vaswani2017attention +``` \ No newline at end of file diff --git a/docs/src/manifolds/basic_topology.md b/docs/src/manifolds/basic_topology.md new file mode 100644 index 000000000..ada034814 --- /dev/null +++ b/docs/src/manifolds/basic_topology.md @@ -0,0 +1,65 @@ +# Basic Concepts of General Topology + +On this page we discuss basic notions of topology that are necessary to define and work [manifolds](manifolds.md). Here we largely omit concrete examples and only define concepts that are necessary for defining a manifold[^1], namely the properties of being *Hausdorff* and *second countable*. For a wide range of examples and a detailed discussion of the theory see e.g. [lipschutz1965general](@cite). The here-presented theory is also (rudimentary) covered in most differential geometry books such as [lang2012fundamentals](@cite) and [bishop1980tensor](@cite). + + +[^1]: Some authors (see e.g. [lang2012fundamentals](@cite)) do not require these properties. But since they constitute very weak restrictions and are always satisfied by the manifolds relevant for our purposes we require them here. + +__Definition__: A **topological space** is a set ``\mathcal{M}`` for which we define a collection of subsets of ``\mathcal{M}``, which we denote by ``\mathcal{T}`` and call the *open subsets*. ``\mathcal{T}`` further has to satisfy the following three conditions: +1. The empty set and ``\mathcal{M}`` belong to ``\mathcal{T}``. +2. Any union of an arbitrary number of elements of ``\mathcal{T}`` again belongs to ``\mathcal{T}``. +3. Any intersection of a finite number of elements of ``\mathcal{T}`` again belongs to ``\mathcal{T}``. + +Based on this definition of a topological space we can now define what it means to be *Hausdorff*: +__Definition__: A topological space ``\mathcal{M}`` is said to be **Hausdorff** if for any two points ``x,y\in\mathcal{M}`` we can find two open sets ``U_x,U_y\in\mathcal{T}`` s.t. ``x\in{}U_x, y\in{}U_y`` and ``U_x\cap{}U_y=\{\}``. + +We now give the second definition that we need for defining manifolds, that of *second countability*: +__Definition__: A topological space ``\mathcal{M}`` is said to be **second-countable** if we can find a countable subcollection of ``\mathcal{T}`` called ``\mathcal{U}`` s.t. ``\forall{}U\in\mathcal{T}`` and ``x\in{}U`` we can find an element ``V\in\mathcal{U}`` for which ``x\in{}V\sub{}U``. + +We now give a few definitions and results that are needed for the [inverse function theorem](inverse_function_theorem.md) which is essential for practical applications of manifold theory. + +__Definition__: A mapping ``f`` between topological spaces ``\mathcal{M}`` and ``\mathcal{N}`` is called **continuous** if the preimage of every open set is again an open set, i.e. if ``f^{-1}\{U\}\in\mathcal{T}`` for ``U`` open in ``\mathcal{N}`` and ``\mathcal{T}`` the topology on ``\mathcal{M}``. + +__Definition__: A **closed set** of a topological space ``\mathcal{M}`` is one whose complement is an open set, i.e. ``F`` is closed if ``F^c\in\mathcal{T}``, where the superscript ``{}^c`` indicates the complement. For closed sets we thus have the following three properties: +1. The empty set and ``\mathcal{M}`` are closed sets. +2. Any union of a finite number of closed sets is again closed. +3. Any intersection of an arbitrary number of closed sets is again closed. + +__Theorem__: The definition of continuity is equivalent to the following, second definition: ``f:\mathcal{M}\to\mathcal{N}`` is continuous if ``f^{-1}\{F\}\sub\mathcal{M}`` is a closed set for each closed set ``F\sub\mathcal{N}``. + +__Proof__: First assume that ``f`` is continuous according to the first definition and not to the second. Then ``f^{-1}{F}`` is not closed but ``f^{-1}{F^c}`` is open. But ``f^{-1}\{F^c\} = \{x\in\mathcal{M}:f(x)\nin\mathcal{N}\} = (f^{-1}\{F\})^c`` cannot be open, else ``f^{-1}\{F\}`` would be closed. The implication of the first definition under assumption of the second can be shown analogously. + +__Theorem__: The property of a set ``F`` being closed is equivalent to the following statement: If a point ``y`` is such that for every open set ``U`` containing it we have ``U\cap{}F\neq\{\}`` then this point is contained in ``F``. + +__Proof__: We first proof that if a set is closed then the statement holds. Consider a closed set ``F`` and a point ``y\nin{}F`` s.t. every open set containing ``y`` has nonempty intersection with ``F``. But the complement ``F^c`` also is such a set, which is a clear contradiction. Now assume the above statement for a set ``F`` and further assume ``F`` is not closed. Its complement ``F^c`` is thus not open. Now consider the *interior* of this set: ``\mathrm{int}(F^c):=\cup\{U:U\sub{}F^c\}``, i.e. the biggest open set contained within ``F^c``. Hence there must be a point ``y`` which is in ``F^c`` but is not in its interior, else ``F^c`` would be equal to its interior, i.e. would be open. We further must be able to find an open set ``U`` that contains ``y`` but is also contained in ``F^c``, else ``y`` would be an element of ``F``. A contradiction. + +__Definition__: An **open cover** of a topological space ``\mathcal{M}`` is a (not necessarily countable) collection of open sets ``\{U_i\}_{i\mathcal{I}}`` s.t. their union contains ``\mathcal{M}``. A **finite open cover** is a collection of a finite number of open sets that cover ``\mathcal{M}``. We say that an open cover is **reducible** to a finite cover if we can find a finite number of elements in the open cover whose union still contains ``\mathcal{M}``. + +__Definition__: A topological space ``\mathcal{M}`` is called **compact** if every open cover is reducible to a finite cover. + +__Theorem__: Consider a continuous function ``f:\mathcal{M}\to\mathcal{N}`` and a compact set ``K\in\mathcal{M}``. Then ``f(K)`` is also compact. + +__Proof__: Consider an open cover of ``f(K)``: ``\{U_i\}_{i\in\mathcal{I}}``. Then ``\{f^{-1}\{U_i\}\}_{i\in\mathcal{I}}`` is an open cover of ``K`` and hence reducible to a finite cover ``\{f^{-1}\{U_i\}\}_{i\in\{i_1,\ldots,i_n}}``. But then ``\{{U_i\}_{i\in\{i_1,\ldots,i_n}}`` also covers ``f(K)``. + +__Theorem__: A closed subset of a compact space is compact: + +__Proof__: Call the closed set ``F`` and consider an open cover of this set: ``\{U\}_{i\in\mathcal{I}}``. Then this open cover combined with ``F^c`` is an open cover for the entire compact space, hence reducible to a finite cover. + +__Theorem__: A compact subset of a Hausdorff space is closed: + +__Proof__: Consider a compact subset ``K``. If ``K`` is not closed, then there has to be a point ``y\nin{}K`` s.t. every open set containing ``y`` intersects ``K``. Because the surrounding space is Hausdorff we can now find the following two collections of open sets: ``\{(U_z, U_{z,y}: U_z\cap{}U_{z,y}=\{\})\}_{z\in{}K}``. The open cover ``\{U_z}_{z\in{}K}`` is then reducible to a finite cover ``\{U_z}_{z\in{z_1, \ldots, z_n}\}``. The intersection ``\cap_{z\in{z_1, \ldots, z_n}}U_{z,y}`` is then an open set that contains ``y`` but has no intersection with ``K``. A contraction. + +__Theorem__: If ``\mathcal{M}`` is compact and ``\mathcal{N}`` is Hausdorff, then the inverse of a continuous function ``f:\mathcal{M}\to\mathcal{N}`` is again continuous, i.e. ``f(V)`` is an open set in ``\mathcal{N}`` for ``V\in\mathcal{T}``. + +__Proof__: We can equivalently show that every closed set is mapped to a closed set. First consider the set ``K\in\mathcal{M}``. Its image is again compact and hence closed because ``\mathcal{N}`` is Hausdorff. + +## References + +```@bibliography +Pages = [] +Canonical = false + +bishop1980tensor +lang2012fundamentals +lipschutz1965general +``` diff --git a/docs/src/manifolds/existence_and_uniqueness_theorem.md b/docs/src/manifolds/existence_and_uniqueness_theorem.md new file mode 100644 index 000000000..4aa89da45 --- /dev/null +++ b/docs/src/manifolds/existence_and_uniqueness_theorem.md @@ -0,0 +1,24 @@ +# The Existence-And-Uniqueness Theorem + +In order to proof the existence-and-uniqueness theorem we first need another theorem, the **Banach fixed-point theorem** for which we also need another definition. + +__Definition__: A **contraction mapping** is a map ``T:\mathbb{R}^N\to\mathbb{R}^N`` for which there exists ``q\in[0,1)`` s.t. ``\forall{}x,y\in\mathbb{R}^N,\,||T(x)-T(y)||\leq{}q||x-y||``. + +__Theorem (Banach fixed-point theorem)__: Every **contraction mapping** ``T`` admits a unique fixed point ``x^*`` (i.e. a point ``x^*`` s.t. ``F(x^*)=x^*``) and this point can be found by taking an arbitrary point ``x_0\in\mathbb{R}^N`` and taking the limit ``\lim_{n\to\infty}T^n(x_0)``. + +__Proof (Banach fixed-point theorem)__: Take an arbitrary point ``x_0\in\mathbb{R}^N`` and consider the sequence ``(x_n)_{n\in\mathbb{N}}`` with ``x_n:=T^n(x_0)``. Then it holds that (for ``m>n``): +```math +\begin{aligned} +|x_m - x_n| & \leq |x_m - x_{m-1}| + |x_{m-1} - x_{m-2}| + \cdots + |x_{m-(m-n+1)}-x_{n}| \\ + & = |x_{n+(m-n)} - x_{n+(m-n-1)}| + \cdots + |x_{n+1} - x_n| \\ + & \leq \sum_{i=0}^{m-n-1}q^i|x_{n+1} - x_n| \\ + & \leq \sum_{i=0}^{m-n-1}q^iq^n|x_1 - x_0| \\ + & = q^n|x_1 -x_0|\sum_{i=1}^{m-n-1}q^i, +\end{aligned} +``` +where we have used the triangle inequality in the first line. If we now let ``m`` on the right-hand side first go to infinity then we get +```math +|x_m-x_n| & \leq q^n|x_1 -x_0|\sum_{i=1}^{\infty}q^i + & =q^n|x_1 -x_0| \frac{1}{1-q}, +``` +proofing that the sequence is Cauchy. Because ``\mathbb{R}^N`` is a complete metric space we get that ``(x_n)_{n\in\mathbb{N}}`` is a convergent sequence. We call the limit of this sequence ``x^*``. This completes the proof of the Banach fixed-point theorem. diff --git a/docs/src/manifolds/inverse_function_theorem.md b/docs/src/manifolds/inverse_function_theorem.md new file mode 100644 index 000000000..52648d9a1 --- /dev/null +++ b/docs/src/manifolds/inverse_function_theorem.md @@ -0,0 +1,29 @@ +# The Inverse Function Theorem + +The **inverse function theorem** gives a sufficient condition on a vector-valued function to be invertible in a neighborhood of a specific point. This theorem is critical in developing a theory of [manifolds](manifolds.md) and serves as a basis for the [submersion theorem](submersion_theorem.md). Here we first state the theorem and then give a proof. + +__Theorem (Inverse function theorem)__: Consider a vector-valued differentiable function ``F:\mathbb{R}^N\to\mathbb{R}^N`` and assume its Jacobian is non-degenerate at a point ``x\in\mathbb{R}^N``. Then there exists a neighborhood ``U`` that contains ``F(x)`` and on which ``F`` is invertible, i.e. ``\exists{}H:U\to\mathbb{R}^N`` s.t. ``\forall{}y\inU,\,F\circ{}H(y) = y`` and the inverse is differentiable. + +__Proof__: Consider a mapping ``F:\mathbb{R}^N\to\mathbb{R}^N`` and assume its Jacobian has full rank at point ``x``, i.e. ``\det{}F'(x)\neq0``. Now consider a ball around ``x`` whose radius ``r`` we do not yet fix and two points ``y`` and ``z`` in that ball: ``y,z\in{}B(x,r)``. We further introduce the function ``G(y):=F(x)-F'(x)y``. By the *mean value theorem* we have ``|G(z) - G(y)|\leq|z-y|\sup_{0