diff --git a/tex/presentation.tex b/tex/presentation.tex
index 027b22d..9b512cd 100755
--- a/tex/presentation.tex
+++ b/tex/presentation.tex
@@ -188,34 +188,6 @@
 
 
 
-\begin{frame}{Notations}
-    \begin{itemize}
-        \item $\mathbf{p}$: The original, content image
-        \item $\mathbf{a}$: The original, artwork image
-        \item $\mathbf{x}$: The image to be generated. It is initiated as a
-            random noise image.
-        \item $F^l$: \textbf{Feature Map} at level l, is the result of applying
-            filters at level $l$. If $N_l$ filters are applier at level $l$,
-            then this feature map has a depth of $N_l$.
-        \item $N_l$: The number of filters applier at level $l$. This is
-            the same as the depths of the feature map at level
-            $l$.
-        \item $M_l$: the dimension of the feature map at level l, which
-            is equal to $N_l \times M_l$.
-    \end{itemize}
-\end{frame}
-
-
-
-\begin{frame}{Notations}
-    \begin{figure}[H]
-        \centering
-        \includegraphics[width=.8\textwidth]{img/levels}
-    \end{figure}
-\end{frame}
-
-
-
 \begin{frame}{VGG19}
 \begin{figure}[H]
 \centering
@@ -246,6 +218,34 @@
 
 
 
+\begin{frame}{Notations}
+    \begin{itemize}
+        \item $\mathbf{p}$: The original, content image
+        \item $\mathbf{a}$: The original, artwork image
+        \item $\mathbf{x}$: The image to be generated. It is initiated as a
+            random noise image.
+        \item $F^l$: \textbf{Feature Map} at level l, is the result of applying
+            filters at level $l$. If $N_l$ filters are applier at level $l$,
+            then this feature map has a depth of $N_l$.
+        \item $N_l$: The number of filters applier at level $l$. This is
+            the same as the depths of the feature map at level
+            $l$.
+        \item $M_l$: the dimension of the feature map at level l, which
+            is equal to $N_l \times M_l$.
+    \end{itemize}
+\end{frame}
+
+
+
+\begin{frame}{Notations}
+    \begin{figure}[H]
+        \centering
+        \includegraphics[width=.8\textwidth]{img/levels}
+    \end{figure}
+\end{frame}
+
+
+
 \begin{frame}{Content Representation}
     \begin{itemize}
         \item Perform gradient descent optimization on a white noise image
@@ -277,76 +277,20 @@
 
 
 
-\begin{frame}{Style Representation}
-Style representation is achieved via the ``Gram Matrix'' $G$. Gram matrix is
-an $N_l \times N_l$ matrix which calculates the correlations between
-different filter responses.
-
-\begin{equation}
-    \mathbf{G^l}_{ij} = \mathbf{{F^l}^T}_i \times \mathbf{F^l}_j
-    = (\mathbf{{F^l}^T} \times \mathbf{F^l})_{ij}
-\end{equation}
-\end{frame}
-
-
-
-\begin{frame}{Style Representation}
-Given $G_x^l$ and $G_a^l$ as respective Gram matrices of the noise image and
-the original image, our goal is to reduce the overall difference between
-$G_x^l$ and $G_a^l$. In this sense, Contribution of layer $l$ to the total
-loss is
-
-\begin{equation}
-    E_l = \frac{1}{4N_l^2M_l^2} \sum_{i}^{N_l}\sum_{j}^{N_l}{((G^l_x)_{ij} - (G_a^l)_{ij})^2}
-    = \mathbf{1}^T(\mathbf{G^l_x} - \mathbf{G^l_a})(\mathbf{G^l_x} - \mathbf{G^l_a})^T
-\end{equation}
-
-\end{frame}
-
-
-
-\begin{frame}{Style Representation}
-The total style loss is:
-\begin{equation}
-    \mathcal{L}_{style}(\mathbf{a}, \mathbf{x}) = \sum_{l=0}^L {w_l E_l }
-\end{equation}
-\begin{equation}
-    \frac{\partial \mathcal{L}_{style}}{\partial (F_x^l)_{ij}} = \frac{\partial E_l}{\partial (F^l_x)_{ij}} =
-    (4(\mathbf{G_x}^l - \mathbf{G_a}^l) \times \mathbf{F_x}^l)_{ij}
-\end{equation}
-\end{frame}
-
-
-
-\begin{frame}{Style Representation}
-\begin{equation}
-    \frac{\partial \mathcal{L}_{style}}{\partial (F^l_x)_{ij}} = \frac{\partial E_l}{\partial (F^l_x)_{ij}} =
-    (4(\mathbf{G_x}^l - \mathbf{G_a}^l) \times \mathbf{F}_x^l)_{ij}
-\end{equation}
-    \begin{figure}
-    \begin{tikzpicture}[scale=.84, every node/.style={scale=.7}, transform shape]
-        \node (a) at (-5, 0) {};
-        \node [mystyle] (b) [right=2cm of a] {$ \times X^T$};
-        \draw [myEdgeStyle] (a.east) to  node [auto] (ab) {$\mathbf{F^l}_{N_l \times M_l}$}(b.west);
-        \draw [myEdgeStyle] (a.east) to  node [red] [below] (ab) {$2(\mathbf{G} - \mathbf{A}) \times 2\mathbf{F}$}(b.west);
-        \node [mystyle, ellipse, text height = .5cm, text width = 2cm] (c) [right=2cm of b] {$ (\mathbf{X} - \mathbf{A}_l)^2$};
-        \draw [myEdgeStyle] (b.east) to node [auto] (bc) {$\mathbf{G}_{N_l \times N_l}$} (c.west) ;
-        \draw [myEdgeStyle] (b.east) to node  [red][below] (bc) {$2(\mathbf{G} - \mathbf{A})_{N_l \times N_l}$} (c.west) ;
-        \node [mystyle] (d) [right=2cm of c] {$\times$};
-        \draw [myEdgeStyle] (c.east) to node [auto] (cd) {$(G - A)^2_{N_l \times N_l}$} (d.west) ;
-        \draw [myEdgeStyle] (c.east) to node  [red][below] (cd) {$1_{N_l \times N_l}$} (d.west) ;
-        \node (bcd) [below=1cm of cd] {};
-        \draw [myEdgeStyle] (bcd.east) -| node [above left= .1and .4cm] {$\mathbf{1}_{N_l \times 1}$} (d.south) ;
-        \node [mystyle] (e) [right=2cm of d] {$\times$};
-        \draw [myEdgeStyle] (d.east) to node [above] (de) {$E'_{N_l \times 1}$} (e.west) ;
-        \draw [myEdgeStyle] (d.east) to node  [red][below] (de) {$\mathbf{1}_{N_l \times 1}$} (e.west) ;
-        \node (bde) [below=1cm of de] {};
-        \draw [myEdgeStyle] (bde.east) -| node [above left= .1and .4cm] {$\mathbf{1}^T_{1 \times N_l}$} (e.south) ;
-        \node (f) [right=1.5cm of e] {};
-        \draw [myEdgeStyle] (e.east) to node [auto] (ef) {$E$} (f.west) ;
-        \draw [myEdgeStyle] (e.east) to node  [red][below] (ef2) {$1$} (f.west) ;
-    \end{tikzpicture}
-    \end{figure}
+\begin{frame}{Content Reconstruction}
+\begin{figure}[ht]
+    \begin{minipage}[b]{0.45\linewidth}
+        \centering
+        \includegraphics[width=\textwidth]{img/content/noise}
+        \caption*{White Noise Image $\mathbf{x}$}
+    \end{minipage}
+    \hspace{0.5cm}
+    \begin{minipage}[b]{0.45\linewidth}
+        \centering
+        \includegraphics[width=\textwidth]{img/content/tubingen}
+        \caption*{Content Image $\mathbf{p}$}
+    \end{minipage}
+\end{figure}
 \end{frame}
 
 
@@ -444,6 +388,99 @@
 
 
 
+
+\begin{frame}{Style Representation}
+Style representation is achieved via the ``Gram Matrix'' $G$. Gram matrix is
+an $N_l \times N_l$ matrix which calculates the correlations between
+different filter responses.
+
+\begin{equation}
+    \mathbf{G^l}_{ij} = \mathbf{{F^l}^T}_i \times \mathbf{F^l}_j
+    = (\mathbf{{F^l}^T} \times \mathbf{F^l})_{ij}
+\end{equation}
+\end{frame}
+
+
+
+\begin{frame}{Style Representation}
+Given $G_x^l$ and $G_a^l$ as respective Gram matrices of the noise image and
+the original image, our goal is to reduce the overall difference between
+$G_x^l$ and $G_a^l$. In this sense, Contribution of layer $l$ to the total
+loss is
+
+\begin{equation}
+    E_l = \frac{1}{4N_l^2M_l^2} \sum_{i}^{N_l}\sum_{j}^{N_l}{((G^l_x)_{ij} - (G_a^l)_{ij})^2}
+    = \mathbf{1}^T(\mathbf{G^l_x} - \mathbf{G^l_a})(\mathbf{G^l_x} - \mathbf{G^l_a})^T
+\end{equation}
+
+\end{frame}
+
+
+
+\begin{frame}{Style Representation}
+The total style loss is:
+\begin{equation}
+    \mathcal{L}_{style}(\mathbf{a}, \mathbf{x}) = \sum_{l=0}^L {w_l E_l }
+\end{equation}
+\begin{equation}
+    \frac{\partial \mathcal{L}_{style}}{\partial (F_x^l)_{ij}} = \frac{\partial E_l}{\partial (F^l_x)_{ij}} =
+    (4(\mathbf{G_x}^l - \mathbf{G_a}^l) \times \mathbf{F_x}^l)_{ij}
+\end{equation}
+\end{frame}
+
+
+
+\begin{frame}{Style Representation}
+\begin{equation}
+    \frac{\partial \mathcal{L}_{style}}{\partial (F^l_x)_{ij}} = \frac{\partial E_l}{\partial (F^l_x)_{ij}} =
+    (4(\mathbf{G_x}^l - \mathbf{G_a}^l) \times \mathbf{F}_x^l)_{ij}
+\end{equation}
+    \begin{figure}
+    \begin{tikzpicture}[scale=.84, every node/.style={scale=.7}, transform shape]
+        \node (a) at (-5, 0) {};
+        \node [mystyle] (b) [right=2cm of a] {$ \times X^T$};
+        \draw [myEdgeStyle] (a.east) to  node [auto] (ab) {$\mathbf{F^l}_{N_l \times M_l}$}(b.west);
+        \draw [myEdgeStyle] (a.east) to  node [red] [below] (ab) {$2(\mathbf{G} - \mathbf{A}) \times 2\mathbf{F}$}(b.west);
+        \node [mystyle, ellipse, text height = .5cm, text width = 2cm] (c) [right=2cm of b] {$ (\mathbf{X} - \mathbf{A}_l)^2$};
+        \draw [myEdgeStyle] (b.east) to node [auto] (bc) {$\mathbf{G}_{N_l \times N_l}$} (c.west) ;
+        \draw [myEdgeStyle] (b.east) to node  [red][below] (bc) {$2(\mathbf{G} - \mathbf{A})_{N_l \times N_l}$} (c.west) ;
+        \node [mystyle] (d) [right=2cm of c] {$\times$};
+        \draw [myEdgeStyle] (c.east) to node [auto] (cd) {$(G - A)^2_{N_l \times N_l}$} (d.west) ;
+        \draw [myEdgeStyle] (c.east) to node  [red][below] (cd) {$1_{N_l \times N_l}$} (d.west) ;
+        \node (bcd) [below=1cm of cd] {};
+        \draw [myEdgeStyle] (bcd.east) -| node [above left= .1and .4cm] {$\mathbf{1}_{N_l \times 1}$} (d.south) ;
+        \node [mystyle] (e) [right=2cm of d] {$\times$};
+        \draw [myEdgeStyle] (d.east) to node [above] (de) {$E'_{N_l \times 1}$} (e.west) ;
+        \draw [myEdgeStyle] (d.east) to node  [red][below] (de) {$\mathbf{1}_{N_l \times 1}$} (e.west) ;
+        \node (bde) [below=1cm of de] {};
+        \draw [myEdgeStyle] (bde.east) -| node [above left= .1and .4cm] {$\mathbf{1}^T_{1 \times N_l}$} (e.south) ;
+        \node (f) [right=1.5cm of e] {};
+        \draw [myEdgeStyle] (e.east) to node [auto] (ef) {$E$} (f.west) ;
+        \draw [myEdgeStyle] (e.east) to node  [red][below] (ef2) {$1$} (f.west) ;
+    \end{tikzpicture}
+    \end{figure}
+\end{frame}
+
+
+
+\begin{frame}{Style Reconstruction}
+\begin{figure}[ht]
+    \begin{minipage}[b]{0.45\linewidth}
+        \centering
+        \includegraphics[width=\textwidth]{img/style/noise}
+        \caption*{White Noise Image $\mathbf{x}$}
+    \end{minipage}
+    \hspace{0.5cm}
+    \begin{minipage}[b]{0.45\linewidth}
+        \centering
+        \includegraphics[width=\textwidth]{img/style/the-starry-night}
+        \caption*{Artwork Image $\mathbf{a}$}
+    \end{minipage}
+\end{figure}
+\end{frame}
+
+
+
 % 1:1
 \begin{frame}{Style Reconstruction}
 \begin{figure}[ht]
@@ -552,27 +589,6 @@
 \end{frame}
 
 
-% VGG19 for style transfer
-\begin{frame}{Style Transfer}
-\begin{figure}[ht]
-\centering
-\caption*{Content and Style Loss Layers for Style Transfer}
-\includegraphics[width=0.9\textwidth]{img/vgg19/transfer/layers}
-\end{figure}
-\end{frame}
-
-
-
-% Gatys et al. visualize of network passes
-\begin{frame}{Style Transfer}
-\begin{figure}[ht]
-\centering
-\caption*{Style Transfer Architecture}
-\includegraphics[width=\textwidth]{img/style-transfer}
-\end{figure}
-\end{frame}
-
-
 
 % white noise and style representation
 \begin{frame}{Style Transfer}
@@ -599,68 +615,24 @@
 
 
 
-\begin{frame}[allowframebreaks]{Note on Optimization methods}
-
-    \begin{center}
-        $\mathbf{g}_k = \nabla f_{\theta}(\theta_k) $ \hspace{10mm}
-        $\mathbf{H}_k = \nabla^{2} f_{\theta}(\theta_k)$
-    \end{center}
-    \textbf{Methods}:
-    \begin{enumerate}
-        \item \textbf{Gradient}: $\boldsymbol{\theta}_{k+1} =
-            \boldsymbol{\theta}_k - \eta_k \mathbf{g}_k$
-        \item \textbf{Hessian}: $\boldsymbol{\theta}_{k+1} = \boldsymbol{\theta}_k - d_k$
-            where $\mathbf{d}_k = \mathbf{H}_k^{-1} \mathbf{g}_k$ \\
-            Rather than computing $\mathbf{d}_k = \mathbf{H}_k^{-1} \mathbf{g}_k$ directly,
-            we can solve the linear systems of equations
-            $\mathbf{H}_k \mathbf{d}_k = -\mathbf{g}_k$ for $\mathbf{d}_k$.
-    \end{enumerate}
-
-    \newpage
+% VGG19 for style transfer
+\begin{frame}{Style Transfer}
+\begin{figure}[ht]
+\centering
+\caption*{Content and Style Loss Layers for Style Transfer}
+\includegraphics[width=0.9\textwidth]{img/vgg19/transfer/layers}
+\end{figure}
+\end{frame}
 
-    \begin{center}
-        $\mathbf{s}_k = \mathbf {x} _{k+1}-\mathbf {x} _{k}$\\
-        $\mathbf{y}_k = \nabla f(\mathbf {x} _{k+1})-
-                    \nabla f(\mathbf {x} _{k})
-                                = \mathbf{g}_{k+1} - \mathbf{g}_{k}$. \\
-    \end{center}
-    However calculating $H^{-1}_k$ is extensive both in terms of computation
-    and memory. Approximation methods have been proposed:
-    \begin{enumerate}
-        \item Imposing quasi-Newtonian condition: \\
-            $H_{k+1}(\mathbf{s}_k)=y_k$\\
 
-        \item \textbf{BFGS}: After some math magic we have:
-            $H_{k+1}=H_{k}+{\frac {\mathbf {y} _{k}\mathbf {y} _{k}
-            ^{\mathrm {T} }}{\mathbf {y} _{k}^{\mathrm {T} }
-            \mathbf {s} _{k}}}-{\frac {H_{k}\mathbf {s} _{k}\mathbf {s} _{k}^
-            {\mathrm {T} }H_{k}^{\mathrm {T} }}{\mathbf {s} _{k}^
-            {\mathrm {T} }H_{k}\mathbf {s} _{k}}}$\\
-            $H^{-1}_{k+1}=(I-\rho _{k}s_{k}y_{k}^{\top })
-            H^{-1}_{k}(I-\rho _{k}y_{k}s_{k}^{\top })+\rho _{k}s_{k}s_{k}^{\top }$\\
-            where $\rho_k = \rho_k = \frac{1}{y^{\rm T}_k s_k} $.
-        \item \textbf{L-BFGS}: Instead of estimating the Hessian at each
-            iteration the value of $\mathbf{d}_k$ is calculated directly from
-            a history the past m steps $\mathbf{s}_k$s.
-    \end{enumerate}
-
-    \begin{table}[]
-        \centering
-        \caption{My caption}
-        \label{my-label}
-        \begin{tabular}{l|l|l}
-            Gradient Descent              & BFGS        & L-BFGS      \\ \hline
-            $\theta(N^)$ & $\theta(N)$ & $\theta(m)$ \\
-            $\theta(N^)$ & $\theta(N)$ & $\theta(m)$
-        \end{tabular}
-    \end{table}
-
-    \begin{figure}
-    \centering
-    \caption*{Minimizing $\mathcal{L}_{total}$ With Different Optimizers}
-    \includegraphics[width=.8\textwidth]{img/loss/plot}
-    \end{figure}
 
+% Gatys et al. visualize of network passes
+\begin{frame}{Style Transfer}
+\begin{figure}[ht]
+\centering
+\caption*{Style Transfer Architecture}
+\includegraphics[width=\textwidth]{img/style-transfer}
+\end{figure}
 \end{frame}
 
 
@@ -748,6 +720,105 @@
 
 
 
+\begin{frame}[allowframebreaks]{Note on Optimization methods}
+
+    \begin{center}
+        $\mathbf{g}_k = \nabla f_{\theta}(\theta_k) $ \hspace{10mm}
+        $\mathbf{H}_k = \nabla^{2} f_{\theta}(\theta_k)$
+    \end{center}
+    \textbf{Methods}:
+    \begin{enumerate}
+        \item \textbf{Gradient}: $\boldsymbol{\theta}_{k+1} =
+            \boldsymbol{\theta}_k - \eta_k \mathbf{g}_k$
+        \item \textbf{Hessian}: $\boldsymbol{\theta}_{k+1} = \boldsymbol{\theta}_k - d_k$
+            where $\mathbf{d}_k = \mathbf{H}_k^{-1} \mathbf{g}_k$ \\
+            Rather than computing $\mathbf{d}_k = \mathbf{H}_k^{-1} \mathbf{g}_k$ directly,
+            we can solve the linear systems of equations
+            $\mathbf{H}_k \mathbf{d}_k = -\mathbf{g}_k$ for $\mathbf{d}_k$.
+    \end{enumerate}
+
+    \newpage
+
+    \begin{center}
+        $\mathbf{s}_k = \mathbf {x} _{k+1}-\mathbf {x} _{k}$\\
+        $\mathbf{y}_k = \nabla f(\mathbf {x} _{k+1})-
+                    \nabla f(\mathbf {x} _{k})
+                                = \mathbf{g}_{k+1} - \mathbf{g}_{k}$. \\
+    \end{center}
+    However calculating $H^{-1}_k$ is extensive both in terms of computation
+    and memory. Approximation methods have been proposed:
+    \begin{enumerate}
+        \item Imposing quasi-Newtonian condition: \\
+            $H_{k+1}(\mathbf{s}_k)=y_k$\\
+
+        \item \textbf{BFGS}: After some math magic we have:
+            $H_{k+1}=H_{k}+{\frac {\mathbf {y} _{k}\mathbf {y} _{k}
+            ^{\mathrm {T} }}{\mathbf {y} _{k}^{\mathrm {T} }
+            \mathbf {s} _{k}}}-{\frac {H_{k}\mathbf {s} _{k}\mathbf {s} _{k}^
+            {\mathrm {T} }H_{k}^{\mathrm {T} }}{\mathbf {s} _{k}^
+            {\mathrm {T} }H_{k}\mathbf {s} _{k}}}$\\
+            $H^{-1}_{k+1}=(I-\rho _{k}s_{k}y_{k}^{\top })
+            H^{-1}_{k}(I-\rho _{k}y_{k}s_{k}^{\top })+\rho _{k}s_{k}s_{k}^{\top }$\\
+            where $\rho_k = \rho_k = \frac{1}{y^{\rm T}_k s_k} $.
+        \item \textbf{L-BFGS}: Instead of estimating the Hessian at each
+            iteration the value of $\mathbf{d}_k$ is calculated directly from
+            a history the past m steps $\mathbf{s}_k$s.
+    \end{enumerate}
+
+    \begin{table}[]
+        \centering
+        \caption{My caption}
+        \label{my-label}
+        \begin{tabular}{l|l|l}
+            Gradient Descent              & BFGS        & L-BFGS      \\ \hline
+            $\theta(N^)$ & $\theta(N)$ & $\theta(m)$ \\
+            $\theta(N^)$ & $\theta(N)$ & $\theta(m)$
+        \end{tabular}
+    \end{table}
+
+    \begin{figure}
+    \centering
+    \caption*{Minimizing $\mathcal{L}_{total}$ With Different Optimizers}
+    \includegraphics[width=.8\textwidth]{img/loss/plot}
+    \end{figure}
+
+\end{frame}
+
+
+
+\begin{frame}{Optimizers}
+\framesubtitle{Gradient Descent}
+\begin{figure}[ht]
+\centering
+\includegraphics[width=\textwidth]{img/loss/SGD}
+\caption*{Samford Hall Styled like \textit{Seated Nude} Using \textbf{Gradient Descent}}
+\end{figure}
+\end{frame}
+
+
+
+\begin{frame}{Optimizers}
+\framesubtitle{L-BFGS}
+\begin{figure}[ht]
+\centering
+\includegraphics[width=\textwidth]{img/loss/L_BFGS}
+\caption*{Samford Hall Styled like \textit{Seated Nude} Using \textbf{L-BFGS}}
+\end{figure}
+\end{frame}
+
+
+
+\begin{frame}{Optimizers}
+\framesubtitle{Adam}
+\begin{figure}[ht]
+\centering
+\includegraphics[width=\textwidth]{img/loss/Adam}
+\caption*{Samford Hall Styled like \textit{Seated Nude} Using \textbf{Adam}}
+\end{figure}
+\end{frame}
+
+
+
 \begin{frame}{Literature Review}
     Comparable to generative Adversarial
     Networks \cite{dosovitskiy2016generating}.