diff --git a/docs/Makefile b/docs/Makefile
index c9bc39fc4..93127ef55 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -11,5 +11,4 @@ images:
 
 clean:
 	$(MAKE) empty -C src/tikz
-	rm -Rf build
-	rm -Rf src/tutorial
\ No newline at end of file
+	rm -Rf build
\ No newline at end of file
diff --git a/docs/make.jl b/docs/make.jl
index 555e6f329..13643738d 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -17,7 +17,7 @@ makedocs(;
     format=Documenter.HTML(;
         prettyurls=get(ENV, "CI", "false") == "true",
         canonical="https://juliagni.github.io/GeometricMachineLearning.jl",
-        assets=String[],
+        assets=["assets/extra_styles.css"],
         # specifies that we do not display the package name again (it's already in the logo)
         sidebar_sitename=false,
     ),
diff --git a/docs/src/architectures/sympnet.md b/docs/src/architectures/sympnet.md
index 12d66519d..6c326583a 100644
--- a/docs/src/architectures/sympnet.md
+++ b/docs/src/architectures/sympnet.md
@@ -6,7 +6,7 @@ This document discusses the SympNet architecture and its implementation in `Geom
 
 ### Principle
 
-SympNets (see [jin2020sympnets](@cite) for the eponymous paper) are a type of neural network that can model the trajectory of a Hamiltonian system in phase space. Take $(q^T,p^T)^T=(q_1,\ldots,q_d,p_1,\ldots,p_d)^T\in \mathbb{R}^{2d}$ as the coordinates in phase space, where $q=(q_1, \ldots, q_d)^T\in \mathbb{R}^{d}$ is refered to as the *position* and $p=(p_1, \ldots, p_d)^T\in \mathbb{R}^{d}$ the *momentum*. Given a point $(q^T,p^T)^T$ in $\mathbb{R}^{2d}$ the SympNet aims to compute the *next position* $((q')^T,(p')^T)^T$ and thus predicts the trajectory while preserving the *symplectic structure* of the system.
+SympNets (see [jin2020sympnets](@cite) for the eponymous paper) are a type of neural network that can model the trajectory of a Hamiltonian system in phase space. Take ``(q^T,p^T)^T=(q_1,\ldots,q_d,p_1,\ldots,p_d)^T\in \mathbb{R}^{2d}`` as the coordinates in phase space, where ``q=(q_1, \ldots, q_d)^T\in \mathbb{R}^{d}`` is refered to as the *position* and ``p=(p_1, \ldots, p_d)^T\in \mathbb{R}^{d}`` the *momentum*. Given a point ``(q^T,p^T)^T`` in ``\mathbb{R}^{2d}`` the SympNet aims to compute the *next position* ``((q')^T,(p')^T)^T`` and thus predicts the trajectory while preserving the *symplectic structure* of the system.
 SympNets are enforcing symplecticity strongly, meaning that this property is hard-coded into the network architecture. The layers are reminiscent of traditional neural network feedforward layers, but have a strong restriction imposed on them in order to be symplectic.
 
 SympNets can be viewed as a "symplectic integrator" (see [hairer2006geometric](@cite) and [leimkuhler2004simulating](@cite)). Their goal is to predict, based on an initial condition $((q^{(0)})^T,(p^{(0)})^T)^T$, a sequence of points in phase space that fit the training data as well as possible:
@@ -15,8 +15,10 @@ SympNets can be viewed as a "symplectic integrator" (see [hairer2006geometric](@
 ```
 The tilde in the above equation indicates *predicted data*. The time step between predictions is not a parameter we can choose but is related to the *temporal frequency of the training data*. This means that if data is recorded in an interval of e.g. 0.1 seconds, then this will be the time step of our integrator.
 
-### Architecture of SympNets
-![](../tikz/sympnet_architecture.png)
+```@raw html
+<img class="display-light-only" src="../tikz/sympnet_architecture.png" alt="SympNet Architecture">
+<img class="display-dark-only" src="../tikz/sympnet_architecture_dark.png" alt="SympNet Architecture">
+```
 
 There are two types of SympNet architectures: $LA$-SympNets and $G$-SympNets. 
  
diff --git a/docs/src/assets/extra_styles.css b/docs/src/assets/extra_styles.css
new file mode 100644
index 000000000..b0446dcd9
--- /dev/null
+++ b/docs/src/assets/extra_styles.css
@@ -0,0 +1,4 @@
+.display-light-only {display: block;}
+.display-dark-only {display: none;}
+.theme--documenter-dark .display-light-only {display: none;}
+.theme--documenter-dark .display-dark-only {display: block;}
\ No newline at end of file
diff --git a/docs/src/tikz/Makefile b/docs/src/tikz/Makefile
index 3a889ec42..d4036f848 100644
--- a/docs/src/tikz/Makefile
+++ b/docs/src/tikz/Makefile
@@ -5,7 +5,9 @@ pdf:
 	xelatex -shell-escape transformer_encoder
 	xelatex -shell-escape third_degree_spline
 	xelatex -shell-escape sympnet_architecture
+	xelatex -shell-escape sympnet_architecture_dark
 	xelatex -shell-escape structs_visualization
+	xelatex -shell-escape structs_visualization_dark
 	xelatex -shell-escape logo 
 	xelatex -shell-escape symplectic_autoencoder
 	xelatex -shell-escape solution_manifold_2
@@ -18,7 +20,9 @@ png:
 	pdftocairo  -png -r 150 -transp -singlefile  transformer_encoder.pdf		        transformer_encoder   
 	pdftocairo  -png -r 150 -transp -singlefile  third_degree_spline.pdf	           	third_degree_spline          
 	pdftocairo	-png -r 150 -transp -singlefile	 sympnet_architecture.pdf				sympnet_architecture
+	pdftocairo	-png -r 150 -transp -singlefile	 sympnet_architecture_dark.pdf			sympnet_architecture_dark
 	pdftocairo 	-png -r 150 -transp -singlefile  structs_visualization.pdf				structs_visualization
+	pdftocairo 	-png -r 150 -transp -singlefile  structs_visualization_dark.pdf			structs_visualization_dark
 	pdftocairo 	-png -r 150 -transp -singlefile	 logo.pdf								logo 
 	pdftocairo  -png -r 150 -transp -singlefile  symplectic_autoencoder.pdf				symplectic_autoencoder
 	pdftocairo  -png -r 150 -transp -singlefile  solution_manifold_2.pdf				solution_manifold_2
@@ -31,7 +35,6 @@ logo:
 	pdftocairo 	-png -r 500 -transp -singlefile	 logo_with_name.pdf						logo_with_name 
 	pdftocairo 	-png -r 500 -transp -singlefile  logo_with_name_dark.pdf 				logo_with_name_dark
 
-	mkdir -p ../assets
 	cp logo_with_name.png 		../assets/logo.png
 	cp logo_with_name_dark.png 	../assets/logo-dark.png
 
@@ -47,6 +50,5 @@ empty: clean
 	rm -f *.png
 	rm -f *.svg
 	rm -f ../assets/*.png
-	rm -f ../assets
 
 all: pdf png logo clean
\ No newline at end of file
diff --git a/docs/src/tikz/structs_visualization_dark.tex b/docs/src/tikz/structs_visualization_dark.tex
new file mode 100644
index 000000000..fa0bae25e
--- /dev/null
+++ b/docs/src/tikz/structs_visualization_dark.tex
@@ -0,0 +1,112 @@
+\documentclass[crop, tikz]{standalone}
+
+\usepackage{tikz}
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage[mode=buildnew]{standalone}
+
+\usepackage{xcolor}
+
+
+\usetikzlibrary{positioning}
+\usetikzlibrary{calc}
+\usetikzlibrary{fit}
+%\usepackage{nicematrix}
+
+\tikzset{set/.style={draw,circle,inner sep=0pt,align=center}}
+
+\definecolor{morange}{RGB}{255,127,14}
+\definecolor{mblue}{RGB}{31,119,180}
+\definecolor{mred}{RGB}{214,39,40}
+\definecolor{mpurple}{RGB}{148,103,189}
+\definecolor{mgreen}{RGB}{44,160,44}
+
+\begin{document}
+\begin{tikzpicture}[module/.style={draw, very thick, rounded corners, minimum width=8ex},
+    abstract_type/.style={module, fill=mred!30},
+    chain/.style={module, fill=mgreen!30},
+    constructor/.style={module, fill=morange!30},
+    struct/.style={module, fill=mblue!30},
+    arrow_exp/.style={-stealth, thick, rounded corners, white},
+    arrow_imp/.style={-stealth, thick, rounded corners, dashed, white},
+    arrow_constructor/.style={arrow_exp, morange},
+]
+%\node[module] (ann) {\texttt{AbstractNeuralNetworks.jl}};
+%\node[module, right of=ann, xshift=5cm] (gml) {\texttt{GeometricMachineLearning.jl}};
+
+\node[abstract_type] (architecture) {\texttt{Architecture}};
+
+\node[abstract_type, right of=architecture, xshift=1cm] (model) {\texttt{Model}};
+\node[abstract_type, below of=model] (al) {\texttt{AbstractLayer}};
+\node[abstract_type, below of=al] (ael) {\texttt{AbstractExplicitLayer}};
+
+\node[abstract_type, left of=architecture, xshift=-3cm] (ann) {\texttt{AbstractNeuralNetwork}};
+\node[struct, below of=ann] (nn) {\texttt{NeuralNetwork}};
+
+\node[abstract_type, below of=ael, xshift=12cm, yshift=2cm] (sympnetlayer) {\texttt{SympNetLayer}};
+\node[struct, below of=sympnetlayer, xshift=-6cm] (gradient) {\texttt{GradientLayer}};
+\node[struct, below of=sympnetlayer] (activation) {\texttt{ActivationLayer}}; 
+\node[struct, below of=sympnetlayer, xshift=6.4cm] (linear) {\texttt{LinearLayer}};
+
+\node[constructor, below of=gradient, xshift=-1.5cm] (gradientq) {\texttt{GradientLayerQ}}; 
+\node[constructor, below of=gradient, xshift=1.5cm] (gradientp) {\texttt{GradientLayerP}}; 
+\node[constructor, below of=activation, xshift=-1.3cm] (activationq) {\texttt{ActivationLayerQ}};
+\node[constructor, below of=activation, xshift=2.0cm] (activationp) {\texttt{ActivationLayerP}};
+\node[constructor, below of=linear, xshift=-1.3cm] (linearq) {\texttt{LinearLayerQ}};
+\node[constructor, below of=linear, xshift=1.3cm] (linearp) {\texttt{LinearLayerP}};
+
+\node[abstract_type, below of=activationq, xshift=1cm, yshift=-2cm] (sympnetnetwork) {\texttt{SympNet}};
+\node[struct, below of=sympnetnetwork, xshift=-1cm] (gsympnet) {\texttt{GSympNet}};
+\node[struct, below of=sympnetnetwork, xshift=1.1cm] (lasympnet) {\texttt{LASympNet}};
+
+\draw[arrow_exp] (model) -- (al);
+\draw[arrow_exp] (al) -- (ael); 
+
+\draw[arrow_exp] (ael) -- (sympnetlayer);
+\draw[arrow_exp] (sympnetlayer) -- (gradient);
+\draw[arrow_exp] (sympnetlayer) -- (activation); 
+\draw[arrow_exp] (sympnetlayer) -- (linear); 
+
+% arrows for the constructor
+\draw[arrow_constructor] (gradientq) -- (gradient);
+\draw[arrow_constructor] (gradientp) -- (gradient); 
+\draw[arrow_constructor] (activationq) -- (activation); 
+\draw[arrow_constructor] (activationp) -- (activation);
+\draw[arrow_constructor] (linearq) -- (linear);
+\draw[arrow_constructor] (linearp) -- (linear);
+
+\coordinate[right of=linearp, yshift=-.3cm] (right_of_linearp);
+\coordinate[below of=sympnetnetwork, yshift=-.5cm] (below_of_sympnet);
+\coordinate[left of=ael, xshift=-1.4cm, yshift=-5.5cm] (left_of_ael);
+
+\draw[arrow_exp] (architecture.west)--(left_of_ael)--(below_of_sympnet)--(sympnetnetwork);
+\draw[arrow_exp] (sympnetnetwork) -- (gsympnet);
+\draw[arrow_exp] (sympnetnetwork) -- (lasympnet); 
+
+\coordinate[right of=sympnetnetwork] (right_of_sympnet);
+
+\draw[arrow_imp] (linearq) -- (lasympnet);
+\draw[arrow_imp] (linearp) -- (lasympnet);
+\draw[arrow_imp] (activationq)--(right_of_sympnet)--(lasympnet);
+\draw[arrow_imp] (activationp) -- (lasympnet);
+\draw[arrow_imp] (gradientq) -- (gsympnet);
+\draw[arrow_imp] (gradientp) -- (gsympnet);
+
+\draw[arrow_exp] (ann) -- (nn);
+
+\coordinate[left of=gsympnet, xshift=-10cm] (left_of_gsympnet);
+
+\draw[arrow_imp] (gsympnet.west)--(left_of_gsympnet)--(nn);
+
+\coordinate[below of=lasympnet, yshift=.6cm] (below_of_lasympnet);
+\coordinate[below of=left_of_gsympnet, yshift=.6cm] (left_of_gsympnet2);
+\coordinate[left of=nn, xshift=.8cm, yshift=-.25cm] (nn2);
+
+\draw[arrow_imp] (lasympnet.south)--(below_of_lasympnet)--(left_of_gsympnet2)--(nn2);
+
+\node[fit=(architecture)(model)(al)(ael)(ann), label=below:\color{mpurple}\hspace{5cm}\texttt{AbstractNeuralNetworks.jl}, draw, rounded corners, color=white] (ann) {};
+\node[fit=(sympnetnetwork)(gradient)(gradientq)(gsympnet)(lasympnet)(linearp)(linear)(sympnetlayer)(below_of_sympnet), label=below:\color{mgreen}\texttt{GeometricMachineLearning.jl}, draw, rounded corners, color=white] (gml) {}; 
+
+
+\end{tikzpicture}
+\end{document}
\ No newline at end of file
diff --git a/docs/src/tikz/sympnet_architecture_dark.tex b/docs/src/tikz/sympnet_architecture_dark.tex
new file mode 100644
index 000000000..1a450893b
--- /dev/null
+++ b/docs/src/tikz/sympnet_architecture_dark.tex
@@ -0,0 +1,63 @@
+\documentclass[crop, tikz]{standalone}
+
+\usepackage{tikz}
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage[mode=buildnew]{standalone}
+
+\usepackage{xcolor}
+
+
+\usetikzlibrary{positioning}
+\usetikzlibrary{calc}
+\usetikzlibrary{fit}
+%\usepackage{nicematrix}
+
+\tikzset{set/.style={draw,circle,inner sep=0pt,align=center}}
+
+\definecolor{mred}{RGB}{214,39,40}
+\definecolor{mgreen}{RGB}{44,160,44}
+\definecolor{mblue}{RGB}{31,119,180}
+
+
+\begin{document}
+
+\begin{tikzpicture}[module/.style={draw, very thick, rounded corners, minimum width=4ex},
+    ffnnmodule/.style={module, fill=mblue!20},
+    arrow/.style={-stealth, thick, rounded corners, white},
+]
+
+\node[ffnnmodule, align=center] (qp0) {$q^{(0)}$\\\\$p^{(0)}$};
+\node[ffnnmodule, right of=qp0, align=center, xshift=5ex] (qp1) {$q^{(1)}$ \\ \\ $p^{(1)}$};
+\node[ffnnmodule, right of=qp1, align=center, xshift=5ex] (qp2) {$q^{(2)}$ \\ \\ $p^{(2)}$};
+\node[right of=qp2, align=center, xshift=5ex] (dots) { \\ {\color{white}$\cdots$} \\ };
+\node[right of=dots, ffnnmodule, align=center, xshift=5ex] (qpn) {$q^{(k+1)}$ \\ \\ $p^{(k+1)}$};
+
+\coordinate[right of=qp0, xshift=1.5ex] (leftofqp1);
+\coordinate[right of=dots] (rightofdots);
+
+\node[fit=(leftofqp1)(qp1)(qp2)(dots)(rightofdots), draw, ultra thick, rounded corners, label=below:{\color{white}{SympNet}}, color=white] (sympnet) {};
+
+\draw[arrow] ($(qp0.east)!0.5!(qp0.north east)$) -- ($(qp1.west)!0.5!(qp1.north west)$) node[midway, above] {\tiny id};
+\draw[arrow] ($(qp0.east)!0.5!(qp0.south east)$) -- ($(qp1.west)!0.5!(qp1.south west)$) node[midway, below] {\tiny id};
+
+\draw[arrow] ($(qp1.east)!0.5!(qp1.north east)$) -- ($(qp2.west)!0.5!(qp2.north west)$) node[midway, above] {\tiny id};
+\draw[arrow] ($(qp1.east)!0.5!(qp1.south east)$) -- ($(qp2.west)!0.5!(qp2.south west)$) node[midway, below] {\tiny id};
+
+\draw[arrow] ($(qp2.east)!0.5!(qp2.north east)$) -- ([xshift=7ex] $(qp2.east)!0.5!(qp2.north east)$) node[midway, above] {\tiny id};
+\draw[arrow] ($(qp2.east)!0.5!(qp2.south east)$) -- ([xshift=7ex] $(qp2.east)!0.5!(qp2.south east)$) node[midway, below] {\tiny id};
+
+\draw[arrow] ([xshift=10ex] $(qp2.east)!0.5!(qp2.north east)$) -- ($(qpn.west)!0.5!(qpn.north west)$) node[midway, above] {\tiny id};
+\draw[arrow] ([xshift=10ex] $(qp2.east)!0.5!(qp2.south east)$) -- ($(qpn.west)!0.5!(qpn.south west)$) node[midway, below] {\tiny id};
+ 
+\draw[arrow] ($(qp0.east)!0.5!(qp0.north east)$) -- ([yshift=.3ex] $(qp1.west)!0.5!(qp1.south west)$) node[midway, below] {\tiny $T_0$};
+
+\draw[arrow] ($(qp1.east)!0.5!(qp1.south east)$) -- ([yshift=-.3ex] $(qp2.west)!0.5!(qp2.north west)$) node[midway, above] {\tiny $T_1$};
+
+\draw[arrow] ($(qp2.east)!0.5!(qp2.north east)$) -- ([yshift=.3ex, xshift=7ex] $(qp2.east)!0.5!(qp2.south east)$) node[midway, below] {\tiny $T_2$};
+
+\draw[arrow] ([xshift=10ex] $(qp2.east)!0.5!(qp2.south east)$) -- ([yshift=-.3ex] $(qpn.west)!0.5!(qpn.north west)$) node[midway, above] {\tiny $T_k$};
+
+\end{tikzpicture}
+
+\end{document}
\ No newline at end of file
diff --git a/docs/src/tutorials/sympnet_tutorial.md b/docs/src/tutorials/sympnet_tutorial.md
index fc6704a14..a149fb6ca 100644
--- a/docs/src/tutorials/sympnet_tutorial.md
+++ b/docs/src/tutorials/sympnet_tutorial.md
@@ -49,7 +49,10 @@ The loss function described in the [theory section](../architectures/sympnet.md)
 
 ## Data Structures in `GeometricMachineLearning.jl`
 
-![](../tikz/structs_visualization.png)
+```@raw html
+<img class="display-light-only" src="../tikz/structs_visualization.png" alt="Structs Visualization">
+<img class="display-dark-only" src="../tikz/structs_visualization_dark.png" alt="Structs Visualization">
+```
 
 ## Examples