diff --git a/.gitignore b/.gitignore
index 766efe6..1ad6cb1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,8 @@
 .DS_Store
 _build/
 html/
+# Ignore directory created by vscode.
+.vscode/
+# ignore commonly used venv directories within the source repo.
+venv*/
+.venv*/
\ No newline at end of file
diff --git a/_static/css/custom.css b/_static/css/custom.css
index 88c2bf4..3113571 100644
--- a/_static/css/custom.css
+++ b/_static/css/custom.css
@@ -5,6 +5,9 @@
   --deep-blue-color: #054C91;
   --cool-grey-10-color: #63666A;
   --cool-grey-6-color: #A7A8AA;
+  --cool-grey-5-color: #b1b3b3;
+  --cool-grey-4-color: #bbbcbc;
+  --cool-grey-3-color: #c8c9c7;
   --cool-grey-2-color: #D0D0CE;
   --cool-grey-2-lighter-color: #E1E1E0; /* equivalent of Cool grey 2 with alpha 0.5 */
   --cool-grey-2-lightest-color: #F1F1F0; /* equivalent of Cool grey 2 with alpha 0.3 */
@@ -16,6 +19,10 @@ body {
     color: var(--cool-grey-10-color);
 }
 
+blockquote {
+  font-size: 17px;
+}
+
 h1, .h1, h2, .h2, h3, .h3 {
     color: var(--deep-blue-color);
 }
@@ -142,6 +149,50 @@ code {
   text-align: right;
 }
 
+/* Custom toctree highlighting.*/
+
+.bs-sidenav {
+  /* Remove padding so link formatting behaves as intended. */
+  padding-top: 0px;
+  padding-bottom: 0px;
+  overflow: hidden;
+}
+
+.bs-sidenav .nav.current li.current {
+  background: var(--cool-grey-2-color);
+  background: var(--cool-grey-2-lighter-color);
+}
+
+.bs-sidenav li.toctree-l1.current > a,
+.bs-sidenav li.toctree-l1.current li.toctree-l2 > a,
+.bs-sidenav li.toctree-l1 .nav-list li.toctree-l2 > a {
+  background: var(--cool-grey-2-lighter-color);
+}
+
+.bs-sidenav li.toctree-l2.current > a,
+.bs-sidenav li.toctree-l2.current li.toctree-l3 > a,
+.bs-sidenav li.toctree-l2 .nav-list li.toctree-l3 > a {
+  background: var(--cool-grey-3-color);
+}
+
+.bs-sidenav li.toctree-l3.current > a,
+.bs-sidenav li.toctree-l3.current li.toctree-l4 > a,
+.bs-sidenav li.toctree-l3 .nav-list li.toctree-l4 > a {
+  background: var(--cool-grey-4-color);
+}
+
+.bs-sidenav .nav > li > a:hover,
+.bs-sidenav .nav > li > a:focus {
+  background-color: var(--cool-grey-2-lighter-color);
+}
+
+.bs-sidenav .nav li.current > a {
+  /* Mark current items in the tree bolder */
+  font-weight: 900;
+}
+
+
+/* Change colours of alert blocks */
 .alert-info {
     background-color: var(--deep-blue-color);
     border-color: var(--deep-blue-color);
@@ -249,7 +300,7 @@ select:-webkit-autofill:focus {
     color: #9e0000;  /* A dark red which provides good contrast against the orange background) */
 }
 
-
+/* Media query for screens <= 1160px */
 @media (max-width: 1160px) {
   .navbar-header {
       float: none;
diff --git a/_static/js/custom.js b/_static/js/custom.js
new file mode 100644
index 0000000..b29072c
--- /dev/null
+++ b/_static/js/custom.js
@@ -0,0 +1,20 @@
+// The sphinx-bootstrap-theme handles the {{ toctree() }} content generates different markup for global and local toctree content. This JS applies the `current` css class to list items for internal references on page load and if any internal links are clicked.
+$(document).ready(function() {
+    // On page load, mark localtoc elements as current if appropriate
+    $('.bs-sidenav .nav li > a.reference.internal').each(function() {
+        if (this.href === window.location.href) {
+            $(this).parent().addClass('current');
+            $(this).parents('.bs-sidenav li').addClass('current')
+        }
+    });
+    // on click of an internal reference in the toctree, adjust use of the current css class in the sidebar as appropriate
+    $('.bs-sidenav .nav li > a.reference.internal').click(function() {
+        // Remove the current class from others
+        $('.nav li').has("a.reference.internal").removeClass('current');
+        // Mark the new selected link as current.
+        $(this).parent().addClass("current");
+        // Mark parents 
+        $(this).parents('.bs-sidenav li').addClass('current')
+
+    });
+});
\ No newline at end of file
diff --git a/_templates/sidebartoc.html b/_templates/sidebartoc.html
new file mode 100644
index 0000000..1b99f11
--- /dev/null
+++ b/_templates/sidebartoc.html
@@ -0,0 +1,3 @@
+{{ toctree(collapse=True, maxdepth=-1, includehidden=False) }}
+<!-- {%- if display_toc %} -->
+<!-- {%- endif %} -->
\ No newline at end of file
diff --git a/conf.py b/conf.py
index 12cf838..1278f63 100644
--- a/conf.py
+++ b/conf.py
@@ -11,7 +11,12 @@
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = []
+extensions = [
+    'sphinxcontrib.fulltoc',
+    "sphinxext.rediraffe",
+    'sphinx.ext.mathjax',
+    'sphinx_copybutton'
+]
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -26,7 +31,10 @@
 
 # General information about the project.
 project = u'Bede Documentation'
-copyright = u'2020, N8 CIR'
+# Extract the year from the current time.
+import datetime
+year = datetime.datetime.now().year
+copyright = f'{year}, N8 CIR'
 author = u'N8 CIR'
 
 # The version info for the project you're documenting, acts as replacement for
@@ -48,7 +56,7 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'README.rst', "common/*.rst"]
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'README.rst', "common/*.rst", '**.inc.rst', 'venv*', '.venv*']
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
@@ -57,7 +65,6 @@
 todo_include_todos = False
 
 ## Added by CA to get MathJax rendering loaded
-extensions = ['sphinx.ext.mathjax']
 mathjax_path='https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js'
 
 
@@ -73,9 +80,9 @@
         ("Usage", "usage/index"),
         ("Hardware", "hardware/index"),
         ("Software", "software/index"),
-        ("Profiling", "profiling/index"),
+        ("Guides", "guides/index"),
         ("Training", "training/index"),
-        ("User Group", "bug/index"),
+        ("User Group", "user-group/index"),
         ("FAQ", "faq/index"),
     ],
 
@@ -86,25 +93,23 @@
     'navbar_pagenav': False,
     'source_link_position': "footer",
     'bootswatch_theme': "flatly",
+    'globaltoc_depth': 2,
+
 }
 html_static_path = ['_static']
 
-# Belt and braces: use both old and new sphinx syntax for custom CSS to make sure it loads
+# add custom css files
 html_css_files = [
     'css/custom.css',
 ]
-html_context = {
-    'css_files': [
-        '_static/css/custom.css'
-    ]
-}
 
+# Add custom js files
 html_js_files = [
     'https://use.fontawesome.com/c79ff27dd1.js',
     'js/rtd-versions.js',
+    'js/custom.js'
 ]
 
-
 # (Optional) Logo. Should be small enough to fit the navbar (ideally 24x24).
 # Path should be relative to the ``_static`` files directory.
 html_logo = '_static/images/logo-cmyk.png'
@@ -165,11 +170,20 @@
      'Miscellaneous'),
 ]
 
-
+# html sidebars issues warnings if multiple wildcard selectors match, so this is more verbsoe than it ideally would be. 
 html_sidebars = {
-    '**': ['localtoc.html', 'relations.html'],
+    '**': ['sidebartoc.html'],
     'index': [],
-    'search': []
+    'search': [],
+    'usage/index': ['localtoc.html'],
+    'hardware/index': [],
+    'training/index': ['localtoc.html'],
+    'guides/index': ['sidebartoc.html'],
+    'faq/index': ['localtoc.html'],
+    'user-group/index': [],
+    'faq/index': ['localtoc.html'],
+    'glossary/index': ['localtoc.html'],
+    'rhel8/index': ['localtoc.html'],
 }
 
 def setup(app):
@@ -177,3 +191,13 @@ def setup(app):
     if on_rtd:
         app.add_javascript('https://use.fontawesome.com/c79ff27dd1.js')
         app.add_javascript('js/rtd-versions.js')
+
+# Control use of the shphinx-rediraffe plugin to generate redirect files for moved documentation.
+# This is only viable for whole-pages, not for any bookmarks within a page unfortunately.
+rediraffe_redirects = {
+    "bug/index.rst": "user-group/index.rst",
+    "profiling/index.rst": "guides/nvidia-profiling-tools.rst",
+    "software/resnet50/bede-README-sbatch.rst": "software/applications/wmlce.rst",
+    "software/wanderings/wanderings-in-CUDALand.rst": "guides/wanderings/wanderings-in-CUDALand.rst",
+    "software/wanderings/Estimating-pi-in-CUDALand.rst": "guides/wanderings/Estimating-pi-in-CUDALand.rst",
+}
diff --git a/faq/index.rst b/faq/index.rst
index a81f733..844eed6 100644
--- a/faq/index.rst
+++ b/faq/index.rst
@@ -16,7 +16,7 @@ How can I acknowledge Bede in published or presented work?
 ----------------------------------------------------------------
 
 You can acknowledge Bede using the standard text that we provide. You can
-find this `here <https://bede-documentation.readthedocs.io/en/latest/usage/index.html#acknowledging-bede>`__.
+find this :ref:`here <usage-acknowledging-bede>`.
 
 How can I check my home directory quota?
 ----------------------------------------
@@ -34,7 +34,7 @@ You can use the following command:
 
 This tells me that, in this case, I have a limit of :code:`20480M`, and am 
 currently using :code:`79544K` across 1071 files. You can find more information
-about the Bede filesystems `here <https://bede-documentation.readthedocs.io/en/latest/usage/index.html#file-storage>`__.
+about the Bede filesystems :ref:`here <usage-file-storage>`.
 
 Where should I put project data?
 --------------------------------
@@ -53,14 +53,14 @@ How do I get started with Bede?
 -------------------------------
 
 The 'Using Bede' page runs through how to get registered, how to log in to the
-machine and how to run jobs, a link to this page can be found `here
-<https://bede-documentation.readthedocs.io/en/latest/usage/index.html>`__.
+machine and how to run jobs, a link to this page can be found :ref:`here
+<using-bede>`.
 
 How can I add my own software?
 ------------------------------
 
 It is recommended that you use spack to extend the installed software on the
-system, there are instructions on how to do this `here <https://bede-documentation.readthedocs.io/en/latest/software/index.html#environments>`__,
+system, there are instructions on how to do this :ref:`here <software-environments>`,
 along with further information about alternatives.
 
 
diff --git a/guides/cuda-investigations.rst b/guides/cuda-investigations.rst
new file mode 100644
index 0000000..269dc56
--- /dev/null
+++ b/guides/cuda-investigations.rst
@@ -0,0 +1,8 @@
+Initial investigations with CUDA (under development)
+----------------------------------------------------
+
+.. toctree::
+   :maxdepth: -1
+
+   wanderings/wanderings-in-CUDALand
+   wanderings/Estimating-pi-in-CUDALand
\ No newline at end of file
diff --git a/guides/index.rst b/guides/index.rst
new file mode 100644
index 0000000..a3cc4ce
--- /dev/null
+++ b/guides/index.rst
@@ -0,0 +1,18 @@
+.. _guides:
+
+Guides
+======
+
+These provide guidance related to the use of Bede, such as the use of profiling tools.
+
+
+If you notice any omissions, errors or have any suggested changes to the documentation please create an `Issue <https://github.com/N8-CIR-Bede/documentation/issues>`__ or open a `Pull Request <https://github.com/N8-CIR-Bede/documentation/pulls>`__ on GitHub. 
+
+.. include:: ../common/rhel8-status.rst
+
+.. toctree::
+    :maxdepth: -1
+    :glob:
+
+    nvidia-profiling-tools.rst
+    ./*
\ No newline at end of file
diff --git a/profiling/index.rst b/guides/nvidia-profiling-tools.rst
similarity index 94%
rename from profiling/index.rst
rename to guides/nvidia-profiling-tools.rst
index cecbd38..9478537 100644
--- a/profiling/index.rst
+++ b/guides/nvidia-profiling-tools.rst
@@ -1,10 +1,5 @@
-:tocdepth: 3
-
-Profiling Tools
-===============
-
 NVIDIA Profiling Tools
-----------------------
+======================
 
 Nvidia provide a suite of profiling tools which can be used to profile applications running on the Volta and Turing architecture Nvidia GPUs within Bede. 
 
@@ -12,7 +7,7 @@ Nvidia provide a suite of profiling tools which can be used to profile applicati
 The `NVIDIA Visual Profiler <https://developer.nvidia.com/nvidia-visual-profiler>`_ is the legacy Nvidia profiling tool. It is recommended to use the newer tools where possible.
 
 Preparing your Application
-~~~~~~~~~~~~~~~~~~~~~~~~~~
+--------------------------
 
 To improve the effectiveness of the Nvidia profiling tools, several steps can be taken.
 
@@ -25,7 +20,7 @@ The :ref:` NVIDIA Tools Extension` can be used to mark regions of code. This can
 
 
 Nsight Systems
-~~~~~~~~~~~~~~
+--------------
 
 Nsight Systems is a system-wide performance analysis tool designed to visualize an application’s algorithms and identify the largest opportunities to optimize.
 It supports Pascal (SM 60) and newer GPUs.
@@ -55,7 +50,7 @@ Once this file has been downloaded to your local machine, it can be opened in ``
 
 
 Cluster Modules
-^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~
 
 ``nsys`` is available through the following Bede modules:
 
@@ -63,7 +58,7 @@ Cluster Modules
 * ``nvhpc/20.9``
 
 More Information
-^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~
 
 * `Nsight Systems <https://docs.nvidia.com/nsight-systems/>`_
 * `OLCF: Nsight Systems Tutorial <https://vimeo.com/398838139>`_
@@ -71,7 +66,7 @@ More Information
   * Use the following `Nsight report files <https://drive.google.com/open?id=133a90SIupysHfbO3mlyfXfaEivCyV1EP>`_ to follow the tutorial.
 
 Nsight Compute
-~~~~~~~~~~~~~~
+--------------
 
 Nsight Compute is a kernel profiler for CUDA applications, which can also be used for API debugging.
 It supports Volta architecture GPUs and newer (SM 70+).
@@ -100,7 +95,7 @@ Once the ``.ncu-rep`` file has been downloaded locally, it can be imported into
 
 
 Cluster Modules
-^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~
 
 ``ncu`` is available through the following Bede modules:
 
@@ -109,7 +104,7 @@ Cluster Modules
 
 
 More Information
-^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~
 
 * `Nsight Compute <https://docs.nvidia.com/nsight-compute/>`_
 * `OLCF: Nsight Compute Tutorial <https://vimeo.com/398929189>`_
@@ -118,7 +113,7 @@ More Information
 
 
 Nvidia Visual Profiler (legacy)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-------------------------------
 
 The Visual Profiler is NVIDIA's legacy profiler, which fills some of the roles of bother Nsight Systems and Nsight Compute, but is no longer actively developed.
 It is still provided to enable profiling of older GPU architectures not supported by the newer tools.
@@ -151,7 +146,8 @@ Once these files are downloaded to your local machine, Import them into the Visu
 * Add ``analysis.nvprof`` to ``Event/Metric data files``
 
 Cluster Modules
-^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~
+
 ``nvprof`` is available through the following Bede modules:
 
 * ``cuda/10.1.243``
@@ -159,13 +155,13 @@ Cluster Modules
 * ``nvhpc/20.9``
 
 Documentation
-^^^^^^^^^^^^^
+~~~~~~~~~~~~~
 
 + `Nvprof Documentation <https://docs.nvidia.com/cuda/profiler-users-guide/index.html>`_
 
 
 NVIDIA Tools Extension
-~~~~~~~~~~~~~~~~~~~~~~
+----------------------
 
 `NVIDIA Tools Extension (NVTX) <https://docs.nvidia.com/gameworks/index.html#gameworkslibrary/nvtx/nvidia_tools_extension_library_nvtx.htm>`__ is a C-based API for annotating events and ranges in applications.
 These markers and ranges can be used to increase the usability of the NVIDIA profiling tools.
@@ -184,14 +180,15 @@ The NVIDIA Developer blog contains several posts on using NVTX:
 
 
 CMake support
-^^^^^^^^^^^^^
+~~~~~~~~~~~~~
+
+From CMake 3.17, the `FindCUDAToolkit module <https://cmake.org/cmake/help/git-stage/module/FindCUDAToolkit.html>`_ can be used to find the tools extension and select the appropriate include directory.
 
-From CMake 3.17, the ```FindCUDAToolkit <https://cmake.org/cmake/help/git-stage/module/FindCUDAToolkit.html>`_`` can be used to find the tools extension and select the appropriate include directory.
 If support for older CMake versions is required custom ``find_package`` modules can be used, e.g. `ptheywood/cuda-cmake-NVTX on GitHub <https://github.com/ptheywood/cuda-cmake-nvtx>`_.
 
 
 Documentation
-^^^^^^^^^^^^^
+~~~~~~~~~~~~~
 
 * `NVTX Documentation <https://docs.nvidia.com/gameworks/index.html#gameworkslibrary/nvtx/nvidia_tools_extension_library_nvtx.htm>`_
 * `NVTX 3 on GitHub <https://github.com/NVIDIA/NVTX>`_
diff --git a/software/wanderings/Estimating-pi-in-CUDALand.rst b/guides/wanderings/Estimating-pi-in-CUDALand.rst
similarity index 100%
rename from software/wanderings/Estimating-pi-in-CUDALand.rst
rename to guides/wanderings/Estimating-pi-in-CUDALand.rst
diff --git a/software/wanderings/Unit-circle.jpg b/guides/wanderings/Unit-circle.jpg
similarity index 100%
rename from software/wanderings/Unit-circle.jpg
rename to guides/wanderings/Unit-circle.jpg
diff --git a/software/wanderings/add_grid.cu b/guides/wanderings/add_grid.cu
similarity index 100%
rename from software/wanderings/add_grid.cu
rename to guides/wanderings/add_grid.cu
diff --git a/software/wanderings/add_grid_init.cu b/guides/wanderings/add_grid_init.cu
similarity index 100%
rename from software/wanderings/add_grid_init.cu
rename to guides/wanderings/add_grid_init.cu
diff --git a/software/wanderings/cuda_global_reduce.cu b/guides/wanderings/cuda_global_reduce.cu
similarity index 100%
rename from software/wanderings/cuda_global_reduce.cu
rename to guides/wanderings/cuda_global_reduce.cu
diff --git a/software/wanderings/cupi.cu b/guides/wanderings/cupi.cu
similarity index 100%
rename from software/wanderings/cupi.cu
rename to guides/wanderings/cupi.cu
diff --git a/software/wanderings/matadd.cu b/guides/wanderings/matadd.cu
similarity index 100%
rename from software/wanderings/matadd.cu
rename to guides/wanderings/matadd.cu
diff --git a/software/wanderings/matorthog.cu b/guides/wanderings/matorthog.cu
similarity index 100%
rename from software/wanderings/matorthog.cu
rename to guides/wanderings/matorthog.cu
diff --git a/software/wanderings/pigreco.cu b/guides/wanderings/pigreco.cu
similarity index 100%
rename from software/wanderings/pigreco.cu
rename to guides/wanderings/pigreco.cu
diff --git a/software/wanderings/sgemm-basic.cpp b/guides/wanderings/sgemm-basic.cpp
similarity index 100%
rename from software/wanderings/sgemm-basic.cpp
rename to guides/wanderings/sgemm-basic.cpp
diff --git a/software/wanderings/sgemm-unified.cu b/guides/wanderings/sgemm-unified.cu
similarity index 100%
rename from software/wanderings/sgemm-unified.cu
rename to guides/wanderings/sgemm-unified.cu
diff --git a/software/wanderings/sgemm-unifiedorthog.cu b/guides/wanderings/sgemm-unifiedorthog.cu
similarity index 100%
rename from software/wanderings/sgemm-unifiedorthog.cu
rename to guides/wanderings/sgemm-unifiedorthog.cu
diff --git a/software/wanderings/sgemm-unifiedorthogV2a.cu b/guides/wanderings/sgemm-unifiedorthogV2a.cu
similarity index 100%
rename from software/wanderings/sgemm-unifiedorthogV2a.cu
rename to guides/wanderings/sgemm-unifiedorthogV2a.cu
diff --git a/software/wanderings/transpose.cu b/guides/wanderings/transpose.cu
similarity index 100%
rename from software/wanderings/transpose.cu
rename to guides/wanderings/transpose.cu
diff --git a/software/wanderings/unified-test.cu b/guides/wanderings/unified-test.cu
similarity index 100%
rename from software/wanderings/unified-test.cu
rename to guides/wanderings/unified-test.cu
diff --git a/software/wanderings/wanderings-in-CUDALand.rst b/guides/wanderings/wanderings-in-CUDALand.rst
similarity index 100%
rename from software/wanderings/wanderings-in-CUDALand.rst
rename to guides/wanderings/wanderings-in-CUDALand.rst
diff --git a/hardware/index.rst b/hardware/index.rst
index c565efe..7b86416 100644
--- a/hardware/index.rst
+++ b/hardware/index.rst
@@ -19,7 +19,7 @@ uniquely positioned for:
 
 There are:
 
--  2x “login” nodes, each containing:
+-  2x ``login`` nodes, each containing:
 
    -  2x POWER9 CPUs @ 2.4GHz (40 cores total and 4 hardware threads per
       core), with NVLink 2.0
@@ -27,7 +27,7 @@ There are:
    -  4x Tesla V100 32G NVLink 2.0
    -  1x Mellanox EDR (100Gbit/s) InfiniBand port
 
--  32x “gpu” nodes, each containing:
+-  32x ``gpu`` nodes, each containing:
 
    -  2x POWER9 CPUs @ 2.7GHz (32 cores total and 4 hardware threads per
       core), with NVLink 2.0
@@ -35,7 +35,7 @@ There are:
    -  4x Tesla V100 32G NVLink 2.0
    -  2x Mellanox EDR (100Gbit/s) InfiniBand ports
 
--  4x “infer” nodes, each containing:
+-  4x ``infer`` nodes, each containing:
 
    -  2x POWER9 CPUs @ 2.9GHz (40 cores total and 4 hardware threads per
       core)
@@ -44,7 +44,7 @@ There are:
    -  1x Mellanox EDR (100Gbit/s) InfiniBand port
 
 The Mellanox EDR InfiniBand interconnect is organised in a 2:1 block fat
-tree topology. GPUDirect RDMA transfers are supported on the 32 “gpu”
+tree topology. GPUDirect RDMA transfers are supported on the 32 ``gpu``
 nodes only, as this requires an InfiniBand port per POWER9 CPU socket.
 
 Storage is provided by a 2PB Lustre filesystem capable of reaching
diff --git a/index.rst b/index.rst
index e40e963..16ad1d8 100644
--- a/index.rst
+++ b/index.rst
@@ -12,8 +12,8 @@ Please note that the system is still under active development, and so some funct
 
 .. include:: common/rhel8-status.rst
 
-Site Contents
-=============
+
+.. The TOC is required on the root document, but as we have alternate navigation it is not required, so can be hidden?
 
 .. toctree::
    :maxdepth: 2
@@ -21,9 +21,9 @@ Site Contents
    usage/index
    hardware/index
    software/index
-   profiling/index
+   guides/index
    training/index
    faq/index
-   bug/index
+   user-group/index
    glossary/index
    rhel8/index
diff --git a/requirements.txt b/requirements.txt
index e79845f..d0452c8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,6 @@
-sphinx==1.5.3  # should be same version as currently used by ReadTheDocs
-sphinx-bootstrap-theme>=0.7.1
+sphinx==4.3.1
+sphinx-bootstrap-theme
 sphinx-autobuild
+sphinxext-rediraffe
+sphinxcontrib.fulltoc
+sphinx-copybutton
diff --git a/software/applications/amber.rst b/software/applications/amber.rst
new file mode 100644
index 0000000..d98ef01
--- /dev/null
+++ b/software/applications/amber.rst
@@ -0,0 +1,28 @@
+.. _software-applications-amber:
+
+AMBER
+-------
+
+`AMBER <https://ambermd.org/>`__ is a suite of biomolecular simulation programs. It began in the late 1970's, and is maintained by an active development community.
+
+On Bede, AMBER is made available through the :ref:`HECBioSim Project <software-projects-hecbiosim>`.
+
+
+.. code-block:: bash
+
+   # Load the hecbiosim project
+   module load hecbiosim
+   # Load the desired version of amber, for the appropriate OS image
+   # RHEL 8:
+   module load amber/20-rhel8
+   # RHEL 7
+   module load amber/20
+  
+
+The HECBioSim project also provide `example bede job submission scripts for AMBER on their website <https://www.hecbiosim.ac.uk/access-hpc/example-submit-scripts/bede-scripts>`__.
+
+For more information see the `AMBER documentation <https://ambermd.org/Manuals.php>`__ and `information on GPU acceleration within AMBER <https://ambermd.org/GPUSupport.php>`__.
+
+
+
+
diff --git a/software/applications/conda.rst b/software/applications/conda.rst
new file mode 100644
index 0000000..1e66b4e
--- /dev/null
+++ b/software/applications/conda.rst
@@ -0,0 +1,103 @@
+.. _software-applications-conda:
+
+Conda
+-----
+
+`Conda <https://docs.conda.io/>`__ is an open source package management system and environment management system that runs on Windows, macOS and Linux. Conda quickly installs, runs and updates packages and their dependencies.
+
+Installing Miniconda
+~~~~~~~~~~~~~~~~~~~~
+
+The simplest way to install Conda for use on Bede is through the `miniconda <https://docs.conda.io/en/latest/miniconda.html>`__ installer.
+
+.. note::
+
+    You may wish to install conda into the ``/nobackup/projects/<project>/$USER`` (where ``project`` is the project code for your project) directory rather than your ``home`` directory as it may consume considerable disk space
+
+.. code-block:: bash
+
+   export CONDADIR=/nobackup/projects/<project>/$USER # Update this with your <project> code.
+   mkdir -p $CONDADIR
+   pushd $CONDADIR
+
+   # Download the latest miniconda installer for ppcle64
+   wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-ppc64le.sh
+   # Validate the file checksum matches is listed on https://docs.conda.io/en/latest/miniconda_hashes.html.
+   sha256sum Miniconda3-latest-Linux-ppc64le.sh
+
+   sh Miniconda3-latest-Linux-ppc64le.sh -b -p ./miniconda
+   source miniconda/bin/activate
+   conda update conda -y
+
+On subsequent sessions, or in job scripts you may need to re-source miniconda. Alternatively you could add this to your bash environment. I.e. 
+
+.. code-block:: bash
+
+    export CONDADIR=/nobackup/projects/<project>/$USER # Update this with your <project> code.
+    source $CONDADIR/miniconda/bin/activate
+
+Creating a new Conda Environment
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+With miniconda installed and activated, new `conda environments <https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/environments.html>`__ can be created using ``conda create``.
+
+I.e. to create a new conda environment named `example`, with `python 3.9` you can run the following.
+
+.. code-block:: bash
+   
+   conda create -y --name example python==3.9
+
+Once created, the environment can be activated using ``conda activate``.
+
+.. code-block:: bash
+
+   conda activate example
+
+Listing and Activating existing Conda Environments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Existing conda environments can be listed via:
+
+.. code-block:: bash
+
+   conda env list
+
+``conda activate`` can then be used to activate one of the listed environments.
+
+Installing Conda Packages
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Conda packages can then be installed using ``conda install <package>``.
+
+I.e. to install the conda package ``pylint`` into the active conda environment:
+
+.. code-block:: bash
+    
+   conda install -y pylint
+
+.. note::
+
+    Only Conda packages with support for ``ppc64le`` will be installable.
+
+Deleting Conda Environments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You may need to delete conda environments when they are no longer required, to free up disk space.
+This can be achieved using ``conda env remove``.
+I.e. to remove the ``example`` conda  environment created before:
+
+.. code-block:: bash
+
+   conda env remove -n example
+
+Further Information
+~~~~~~~~~~~~~~~~~~~
+
+See the `Conda Documentation <https://docs.conda.io/>`__ for further information.
+
+Alternatively, conda provides its own help information for the main ``conda`` executable and all subcommands, such as ``conda list``
+
+.. code-block:: bash
+
+   conda -h 
+   conda list -h
diff --git a/software/applications/eman2.rst b/software/applications/eman2.rst
new file mode 100644
index 0000000..f2877cb
--- /dev/null
+++ b/software/applications/eman2.rst
@@ -0,0 +1,22 @@
+.. _software-applications-eman2:
+
+EMAN2
+=====
+
+`EMAN2 <https://blake.bcm.edu/emanwiki/EMAN2>`__ is a broadly based greyscale scientific image processing suite with a primary focus on processing data from transmission electron microscopes.
+
+On Bede, EMAN2 is provided by the :ref:`IBM Collaboration project <software-projects-ibm-collaboration>`.
+
+
+To access these packages, first you must have a local conda installation set up and activated. 
+See :ref:`Conda <software-applications-conda>` for instructions on how to install and enable conda.
+
+The CyroEM conda environment can then be loaded using:
+
+.. code-block:: bash
+
+   conda activate /projects/bddir04/ibm-lfsapp/Eman2
+
+Once loaded, the included software applications can then be used.
+
+For more information on the use of EMAN2, please visit the `EMAN2 website <https://blake.bcm.edu/emanwiki/EMAN2>`__ which includes the online documentation.
\ No newline at end of file
diff --git a/software/applications/grace.rst b/software/applications/grace.rst
new file mode 100644
index 0000000..06435bf
--- /dev/null
+++ b/software/applications/grace.rst
@@ -0,0 +1,15 @@
+.. _software-applications-grace:
+
+Grace
+-----
+
+`Grace <https://plasma-gate.weizmann.ac.il/Grace/>`__ Grace is a WYSIWYG 2D plotting tool for the X Window System.
+
+On Bede, the batch-printing component of Grace, ``gracebat`` is provided via an environment module:
+
+.. code-block:: bash
+
+   module load grace
+   module load grace/5.1.25
+
+For more information see the `Grace documentation <https://plasma-gate.weizmann.ac.il/Grace/doc/UsersGuide.html>`.
diff --git a/software/applications/gromacs.rst b/software/applications/gromacs.rst
new file mode 100644
index 0000000..1ff07ec
--- /dev/null
+++ b/software/applications/gromacs.rst
@@ -0,0 +1,41 @@
+.. _software-applications-gromacs:
+
+GROMACS
+-------
+
+`GROMACS <http://www.gromacs.org/About_Gromacs>`__ is a versatile package for molecular dynamics simulation.
+It is primarily designed for biochemical molecules like proteins, lipids and nucleic acids that have a lot of complicated bonded interactions, but since GROMACS is extremely fast at calculating the nonbonded interactions (that usually dominate simulations) many groups are also using it for research on non-biological systems, e.g. polymers.
+
+CUDA-based GPU acceleration is available for Since GROMACS >= 4.6, for Nvidia compute capability >= 2.0 GPUs (e.g. Fermi or later).
+
+
+On Bede, GROMACS is made available through the :ref:`HECBioSim Project <software-projects-hecbiosim>`.
+
+
+.. code-block:: bash
+
+   # Load the hecbiosim project
+   module load hecbiosim
+   
+   # Load the desired version of gromacs, for the appropriate RHEL image.
+   # RHEL 8
+   module load gromacs/2020.4-plumed-2.6.2-rhel8
+   module load gromacs/2021.1
+   module load gromacs/2021.1-plumed-2.7.2-rhel8
+   module load gromacs/2021.2-plumed-2.7.1-rhel8
+   module load gromacs/2021.2-plumed-2.7.2-rhel8
+   module load gromacs/2021.4-plumed-2.7.3-rhel8
+
+   # RHEL 7
+   gromacs/2020.3
+   gromacs/2020.4-plumed-2.6.2
+   gromacs/2021.2
+
+
+The HECBioSim project also provide `example bede job submission scripts for GROMACS on their website <https://www.hecbiosim.ac.uk/access-hpc/example-submit-scripts/bede-scripts>`__.
+
+For more information see the `GROMACS documentation <https://manual.gromacs.org/documentation/>`__ and `information on GPU acceleration within GROMACS <http://www.gromacs.org/GPU_acceleration>`__.
+
+
+
+
diff --git a/software/applications/index.rst b/software/applications/index.rst
new file mode 100644
index 0000000..950b035
--- /dev/null
+++ b/software/applications/index.rst
@@ -0,0 +1,14 @@
+.. _software-applications:
+
+Applications
+============
+
+These pages list the software applications available on Bede.
+
+If you notice any omissions, errors or have any suggested changes to the documentation please create an `Issue <https://github.com/N8-CIR-Bede/documentation/issues>`__ or open a `Pull Request <https://github.com/N8-CIR-Bede/documentation/pulls>`__ on GitHub. 
+
+.. toctree::
+    :maxdepth: 1
+    :glob:
+
+    *
\ No newline at end of file
diff --git a/software/applications/namd.rst b/software/applications/namd.rst
new file mode 100644
index 0000000..82bf3d0
--- /dev/null
+++ b/software/applications/namd.rst
@@ -0,0 +1,25 @@
+.. _software-applications-namd:
+
+NAMD
+----
+
+`NAMD <https://www.ks.uiuc.edu/Research/namd/>`__ is a parallel molecular dynamics code designed for high-performance simulation of large biomolecular systems.
+Based on Charm++ parallel objects, NAMD scales to hundreds of cores for typical simulations and beyond 500,000 cores for the largest simulations.
+
+On Bede, NAMD is made available through the :ref:`HECBioSim Project <software-projects-hecbiosim>`.
+
+
+.. code-block:: bash
+
+   # Load the hecbiosim project
+   module load hecbiosim
+   # Load the desired version of namd
+   module load namd
+   module load namd/2.14-smp
+   module load namd/3.0-alpha7-singlenode
+   module load namd/3.0-alpha9-singlenode-rhel8
+
+
+For more information see the `NAMD User's Guide <https://www.ks.uiuc.edu/Research/namd/2.14/ug/>`__.
+
+
diff --git a/software/applications/openmm.rst b/software/applications/openmm.rst
new file mode 100644
index 0000000..0168325
--- /dev/null
+++ b/software/applications/openmm.rst
@@ -0,0 +1,21 @@
+.. _software-applications-openmm:
+
+OpenMM
+------
+
+`OpenMM <https://openmm.org/>`__ is a high-performance toolkit for molecular simulation. 
+It can be used as an application, a library, or a flexible programming environment
+and includes extensive language bindings for Python, C, C++, and even Fortran.
+
+On Bede, OpenMM is made available through the :ref:`HECBioSim Project <software-projects-hecbiosim>`.
+
+
+.. code-block:: bash
+
+   # Load the hecbiosim project
+   module load hecbiosim
+   # Load the desired version of openmm
+   module load openmm
+   module load openmm/7.4.1-python3.7
+
+For more information see the `OpenMM Documentation <https://openmm.org/documentation>`__.
\ No newline at end of file
diff --git a/software/applications/python.rst b/software/applications/python.rst
new file mode 100644
index 0000000..e46cbbf
--- /dev/null
+++ b/software/applications/python.rst
@@ -0,0 +1,59 @@
+.. _software-python:
+
+Python
+======
+
+`Python <https://www.python.org/>`__ is an interpreted, interactive, object-oriented programming language with dynamic typing.
+
+Python 3.6 is available by default on Bede, as ``python3``, however, consider using :ref:`Conda <software-applications-conda>` for your python dependency management.
+
+Conda is a cross-platform package and environment management system, which can provide alternate python versions than distributed centrally, and is more-suitable for managing packages which include non-python dependencies. 
+
+Python 2 is also available, but is no longer an officially supported version of python. 
+If you are still using python 2, upgrade to python 3 as soon as possible.
+
+.. note::
+
+    The ``python`` executable refers to ``python2`` on RHEL 7, but ``python3`` on RHEL 8 images. Consider using the more specific ``python3`` command.
+
+If you wish to use non-conda python, you should use `virtual environments <https://docs.python.org/3/library/venv.html>`__ to isolate your python environment(s) from the system-wide environment.
+This will allow you to install your own python dependencies via pip.
+
+
+For instance, to create and install `sphinx` (the python package used to create this documentation) into a python environment in your home directory:
+
+.. code-block:: bash
+
+   # Create a directory for your venvs if it does not exist
+   mkdir -p ~/.venvs
+   # Create a python3 venv named sphinx, located at ~/.venvs/sphinx
+   python3 -m venv ~/.venvs/sphinx
+   # Activate the virtual environment. You will need to do this any time you with to use the environment
+   source ~/.venvs/sphinx/bin/activate
+   # Verify the location of your python3
+   which python3
+   # Use pip to install sphinx into the environment
+   python3 -m pip install sphinx
+
+.. note::
+  
+   Python virtual environments can become large if large python packages such as TensorFlow are installed. 
+   Consider placing your python virtual environments in your project directories to avoid filling your home directory.
+
+
+Python virtual environments can be deactivated using the ``deactivate`` command
+
+.. code-block:: bash
+
+   deactivate
+
+They can be deleted by recursively deleting the directory.
+
+I.e. to delete a python virtual environment located at ``~/.venvs/sphinx``
+
+.. code-block:: bash
+
+    rm -r ~/.venvs/sphinx/
+
+
+For further information on please see the `Python Online Documentation <https://docs.python.org/3/index.html>`__.
diff --git a/software/applications/pytorch.rst b/software/applications/pytorch.rst
new file mode 100644
index 0000000..5200520
--- /dev/null
+++ b/software/applications/pytorch.rst
@@ -0,0 +1,49 @@
+.. _software-applications-pytorch:
+
+PyTorch
+-------
+
+`PyTorch <https://pytorch.org/>`__ is an end-to-end machine learning framework.
+PyTorch enables fast, flexible experimentation and efficient production through a user-friendly front-end, distributed training, and ecosystem of tools and libraries.
+
+The main method of distribution for PyTorch is via :ref:`Conda <software-applications-conda>`.
+
+For more information on the usage of PyTorch, see the `Online Documentation <https://pytorch.org/docs/>`__.
+
+PyTorch Quickstart
+~~~~~~~~~~~~~~~~~~
+
+The following should get you set up with a working conda environment (replacing <project> with your project code):
+
+.. code-block:: bash
+
+    export DIR=/nobackup/projects/<project>/$USER
+    # rm -rf ~/.conda ~/.condarc $DIR/miniconda # Uncomment if you want to remove old env
+    mkdir $DIR
+    pushd $DIR
+
+    wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-ppc64le.sh
+
+    sh Miniconda3-latest-Linux-ppc64le.sh -b -p $DIR/miniconda
+    source miniconda/bin/activate
+    conda update conda -y
+    conda config --set channel_priority strict
+
+    conda config --prepend channels \
+            https://public.dhe.ibm.com/ibmdl/export/pub/software/server/ibm-ai/conda/
+
+    conda config --prepend channels \
+            https://opence.mit.edu
+
+    conda create --name opence pytorch=1.7.1 -y
+    conda activate opence
+
+
+This has some limitations such as not supporting large model support. 
+If you require LMS, please see the :ref:`WMLCE <software-applications-wmlce>` page.
+
+
+Further Information
+~~~~~~~~~~~~~~~~~~~
+
+For more information on the usage of PyTorch, see the `Online Documentation <https://pytorch.org/docs/>`__.
\ No newline at end of file
diff --git a/software/applications/r.rst b/software/applications/r.rst
new file mode 100644
index 0000000..ccfa5da
--- /dev/null
+++ b/software/applications/r.rst
@@ -0,0 +1,14 @@
+.. _software-applications-R:
+
+R
+-
+
+`R <https://www.r-project.org/>`__ is a free software environment for statistical computing and graphics.
+It is provided on the system by the ``r`` module(s), which make ``R`` and ``Rscript`` available for use.
+
+.. code-block:: bash
+
+   module load r
+   module load r/4.0.3
+
+For more information, run ``man R``, ``man RScript`` or see the `R Manuals <https://cran.r-project.org/manuals.html>`__ online. 
diff --git a/software/applications/tensorflow.rst b/software/applications/tensorflow.rst
new file mode 100644
index 0000000..bd7160d
--- /dev/null
+++ b/software/applications/tensorflow.rst
@@ -0,0 +1,42 @@
+.. _software-python-tensorflow:
+
+TensorFlow
+----------
+
+`TensorFlow <https://www.tensorflow.org/>`__ is an end-to-end open source platform for machine learning. It has a comprehensive, flexible ecosystem of tools, libraries and community resources that lets researchers push the state-of-the-art in ML and developers easily build and deploy ML powered applications.
+
+TensorFlow Quickstart
+~~~~~~~~~~~~~~~~~~~~~
+
+The following should get you set up with a working conda environment (replacing ``<project>`` with your project code):
+
+.. code-block:: bash
+
+    export DIR=/nobackup/projects/<project>/$USER
+    # rm -rf ~/.conda ~/.condarc $DIR/miniconda # Uncomment if you want to remove old env
+    mkdir $DIR
+    pushd $DIR
+
+    wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-ppc64le.sh
+
+    sh Miniconda3-latest-Linux-ppc64le.sh -b -p $DIR/miniconda
+    source miniconda/bin/activate
+    conda update conda -y
+
+    conda config --prepend channels \
+            https://public.dhe.ibm.com/ibmdl/export/pub/software/server/ibm-ai/conda/
+
+    conda config --prepend channels \
+            https://opence.mit.edu
+
+    conda create --name opence tensorflow -y
+    conda activate opence
+
+.. note::
+  
+   This conflicts with the :ref:`PyTorch <software-applications-pytorch>` instructions as they set the conda channel_priority to be strict which seems to cause issues when installing TensorFlow.
+
+Further Information
+~~~~~~~~~~~~~~~~~~~
+
+For further information on TensorFlow features and usage, please refer to the `TensorFlow Documentation <https://www.tensorflow.org/api_docs/>`__. 
\ No newline at end of file
diff --git a/software/applications/wmlce.rst b/software/applications/wmlce.rst
new file mode 100644
index 0000000..92cecdf
--- /dev/null
+++ b/software/applications/wmlce.rst
@@ -0,0 +1,255 @@
+.. _software-applications-wmlce:
+
+IBM WMLCE
+=========
+
+`IBM WMLCE <https://www.ibm.com/support/pages/get-started-ibm-wml-ce>`__ is the Watson Machine Learning Community Edition, a software distribution for machine learning which included some technology previews such as `Large Model Support for TensorFlow <https://www.ibm.com/support/knowledgecenter/SS5SF7_1.7.0/navigation/wmlce_getstarted_tflms.html?view=kc#wmlce_getstarted_tflms>`__.
+
+.. warning:: 
+
+   WMLCE was archived by IBM on 2020-11-10 and is no longer updated or maintained. 
+
+   It has been replaced by `Open-CE <https://osuosl.org/services/powerdev/opence/>`__, a community driven software distribution for machine learning, which does not support all features of WMLCE.
+
+   The remainder of this document refers to WMLCE, so may be considered out of date.
+
+.. warning:: 
+
+   WMLCE 1.7 may not be compatible with RHEL 8.
+
+PyTorch and TensorFlow: IBM PowerAI and wmlce [Possibly Out of Date]
+--------------------------------------------------------------------
+
+IBM have done a lot of work to port common Machine Learning tools to the
+POWER9 system, and to take advantage of the GPUs abililty to directly
+access main system memory on the POWER9 architecture using its "Large
+Model Support".
+
+This has been packaged up into what is variously known as IBM Watson
+Machine Learning Community Edition (wmlce) or the catchier name PowerAI.
+
+Documentation on wmlce can be found here:
+https://www.ibm.com/support/pages/get-started-ibm-wml-ce
+
+Installation is via the IBM channel of the anaconda package management tool. **Note:
+if you do not use this channel you will not find all of the available packages.**
+First install anaconda (can be quite large - so using the /nobackup area):
+
+.. code-block:: bash
+
+   cd /nobackup/projects/<project>
+
+   wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-ppc64le.sh
+   sh Miniconda3-latest-Linux-ppc64le.sh
+   conda update conda
+   conda config --set channel_priority strict
+   conda config --prepend channels https://public.dhe.ibm.com/ibmdl/export/pub/software/server/ibm-ai/conda/
+   conda create --name wmlce
+
+Then login again and install wmlce (GPU version by default - substitute
+``powerai-cpu`` for ``powerai`` for the CPU version):
+
+.. code-block:: bash
+
+   conda activate wmlce
+   conda install powerai ipython
+
+Running ``ipython`` on the login node will then allow you to experiment
+with this feature using an interactive copy of Python and the GPUs on
+the login node. Demanding work should be packaged into a job and
+launched with the ``python`` command.
+
+If a single node with 4 GPUs and 512GB RAM isn't enough, the Distributed
+Deep Learning feature of PowerAI should allow you to write code that can
+take advantage of multiple nodes.
+
+WMLCE resnet50 benchmark [Possibly out of date]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This Bede specific README file is based upon options laid out in the README.MD file in the WMLCE
+resnet50 benchmark directory. The necessary data from ImageNet has been downloaded and processed.
+It is stored in /nobackup/datasets/resnet50/TFRecords and is universally readable.
+
+NOTE: As written, the associated sbatch script must be run in a directory that is writable
+by the user. It creates a directory with the default name run_results into which it will write
+the results of the computation. The results data will use up to 1.2GB of space. The run
+directory must also be accessible by the compute nodes, so using /tmp on a login node is not
+suitable.
+
+The main WMLCE README.MD file suggests the following parameters are appropriate for a 4 node
+(possibly 16 GPU) run:
+
+.. code-block:: bash
+
+ # Run a training job
+ ddlrun -H host1,host2,host3,host4 python benchmarks/tensorflow-benchmarks/resnet50/main.py \
+ --mode=train_and_evaluate --iter_unit=epoch --num_iter=50 --batch_size=256 --warmup_steps=100 \
+ --use_cosine_lr --label_smoothing 0.1 --lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 \
+ --weight_decay=3.0517578125e-05   --data_dir=/data/imagenetTF/ --results_dir=run_results \
+ --use_xla --precision=fp16  --loss_scale=1024 --use_static_loss_scaling
+
+ddlrun by itself is not integrated with Slurm and will not run directly on Bede. A wrapper-script
+called bede-ddlrun is available and that is what is used in the following.
+
+It is easy to define a single GPU run based on the above set of parameters (basically
+remove the ddlrun command at the front and specify the correct paths). The associated run
+takes about 16 hours to complete.
+
+The related sbatch script ( :ref:`sbatch_resnet50base.sh <sbatch_resenet50base.sh>`) is configured to use 4 GPUs on one node.
+Changing the script to use 4 nodes, 16 GPUs, requires changing one line.
+
+
+The sbatch script specifies:
+
+.. code-block:: bash
+
+   # ...
+   #SBATCH -p gpu
+   #SBATCH --gres=gpu:4
+   #SBATCH -N1
+   # ...
+
+   module load slurm/dflt
+   export PYTHON_HOME=/opt/software/apps/anaconda3/
+   source $PYTHON_HOME/bin/activate wmlce_env
+
+   export OMP_NUM_THREADS=1   # Disable multithreading
+
+   bede-ddlrun python $PYTHON_HOME/envs/wmlce_env/tensorflow-benchmarks/resnet50/main.py \
+   --mode=train_and_evaluate --iter_unit=epoch --num_iter=50 --batch_size=256 \
+   --warmup_steps=100 --use_cosine_lr --label_smoothing 0.1 --lr_init=0.256 \
+   --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=3.0517578125e-05  \
+   --data_dir=/nobackup/datasets/resnet50/TFRecords/ --results_dir=run_results \
+   --use_xla --precision=fp16  --loss_scale=1024 --use_static_loss_scaling
+
+
+
+The resulting job should run for about 4 hours and will keep all 4 GPUs at nearly
+100% utilisation.
+
+The first few lines of output should look similar to:
+
+.. code-block::
+
+   [WARN DDL-2-17] Not performing connection tests. Cannot find 'mpitool' executabl
+   e. This could be because you are using a version of mpi that does not ship with
+   mpitool.
+   Please see /tmp/DDLRUN/DDLRUN.j9SmSKzaKGEL/ddlrun.log for detailed log.
+   + /opt/software/apps/anaconda3/envs/wmlce_env/bin/mpirun -x PATH -x LD_LIBRARY_P
+   ATH -disable_gdr -gpu -mca plm_rsh_num_concurrent 1 --rankfile /tmp/DDLRUN/DDLRU
+   N.j9SmSKzaKGEL/RANKFILE -n 4 -x DDL_HOST_PORT=2200 -x "DDL_HOST_LIST=gpu025.bede
+   .dur.ac.uk:0,1,2,3" -x "DDL_OPTIONS=-mode p:4x1x1x1 " bash -c 'source /opt/softw
+   are/apps/anaconda3/etc/profile.d/conda.sh && conda activate /opt/software/apps/a
+   naconda3/envs/wmlce_env > /dev/null 2>&1 && python /opt/software/apps/anaconda3/
+   envs/wmlce_env/tensorflow-benchmarks/resnet50/main.py --mode=train_and_evaluate
+   --iter_unit=epoch --num_iter=50 --batch_size=256 --warmup_steps=100 --use_cosine
+   _lr --label_smoothing 0.1 --lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875
+   --weight_decay=3.0517578125e-05 --data_dir=/nobackup/datasets/resnet50/TFRecords
+   / --results_dir=run_results --use_xla --precision=fp16 --loss_scale=1024 --use_s
+   tatic_loss_scaling'
+   2020-11-17 15:39:49.410620: I tensorflow/stream_executor/platform/default/dso_lo
+   ader.cc:44] Successfully opened dynamic library libcudart.so.10.2
+
+There are a number of configuration / compiler type messages and then you should
+start to see messages like:
+
+.. code-block:: 
+
+   :::NVLOGv0.2.3 resnet 1605627653.398838758 (training_hooks.py:100) iteration: 0
+   :::NVLOGv0.2.3 resnet 1605627653.400741577 (training_hooks.py:101) imgs_per_sec:
+   37.5667719118656
+   :::NVLOGv0.2.3 resnet 1605627653.402500391 (training_hooks.py:102) cross_entropy
+   : 9.02121639251709
+   :::NVLOGv0.2.3 resnet 1605627653.404244661 (training_hooks.py:103) l2_loss: 0.74
+   98071789741516
+   :::NVLOGv0.2.3 resnet 1605627653.405992270 (training_hooks.py:104) total_loss: 9
+   .771023750305176
+   :::NVLOGv0.2.3 resnet 1605627653.407735109 (training_hooks.py:105) learning_rate
+   : 0.0
+   :::NVLOGv0.2.3 resnet 1605627671.803228855 (training_hooks.py:100) iteration: 10
+   :::NVLOGv0.2.3 resnet 1605627671.805866718 (training_hooks.py:101) imgs_per_sec:
+   4526.812526349517
+   :::NVLOGv0.2.3 resnet 1605627671.807682991 (training_hooks.py:102) cross_entropy
+   : 8.204719543457031
+
+The most relevant line is the value after ``imgs_per_sec``:
+
+Once things start running, you should see something like 4500 images per second as
+the rate on 4 GPUs.
+
+After about 4 hours, the training has converged and you should see the last few lines like:
+
+.. code-block::
+
+   transpose_before=resnet50_v1.5/input_reshape/transpose pad=resnet50_v1.5/conv2d/Pad transpose_after=resnet50_v1.5/conv2d/conv2d/Conv2D-0-TransposeNCHWToNHWC-LayoutOptimizer
+   :::NVLOGv0.2.3 resnet 1605641981.781752110 (runner.py:610) Top-1 Accuracy: 75.863
+   :::NVLOGv0.2.3 resnet 1605641981.782602310 (runner.py:611) Top-5 Accuracy: 92.823
+   :::NVLOGv0.2.3 resnet 1605641981.783382177 (runner.py:630) Ending Model Evaluation ...
+
+It is easy to modify the script to use 4 nodes and hence 16 GPUs. The run time will
+be a just over an hour and during the 16 GPU run, about 18000 images per second will
+be processed.
+
+Unfortunately, the basic parameters used with the resnet50 run do not allow this
+job to scale much beyond 16 GPUs. Indeed, there is no speedup with this configuration
+using 32 GPUs. Improving scalability is left as an exercise for the user.
+ 
+ 
+
+.. _sbatch_resenet50base.sh:
+
+sbatch_resent50base.sh
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: bash
+
+   #!/bin/bash -l 
+   #SBATCH -A bdXXXYY
+   #SBATCH -p gpu
+   #SBATCH --gres=gpu:4
+   #SBATCH -N1 
+   #SBATCH -o multix1.o%j
+   #SBATCH -t 4:20:00
+   #
+   # Author: C. Addison 
+   # Initial version: 2020-11-19
+   #
+   # Please read the file bede-README-batch.txt for details on this
+   # script.
+   #
+   echo =========================================================   
+   echo SLURM job: submitted  date = `date`
+   date_start=`date +%s`
+
+   echo Nodes involved:
+   echo $SLURM_NODELIST
+   echo =========================================================   
+   echo Job output begins                                           
+   echo ----------------- 
+   echo
+   module load slurm/dflt
+   export PYTHON_HOME=/opt/software/apps/anaconda3/
+   source $PYTHON_HOME/bin/activate wmlce_env
+
+   export OMP_NUM_THREADS=1   # Disable multithreading
+
+   bede-ddlrun python $PYTHON_HOME/envs/wmlce_env/tensorflow-benchmarks/resnet50/main.py \
+   --mode=train_and_evaluate --iter_unit=epoch --num_iter=50 --batch_size=256 \
+   --warmup_steps=100 --use_cosine_lr --label_smoothing 0.1 --lr_init=0.256 \
+   --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=3.0517578125e-05  \
+   --data_dir=/nobackup/datasets/resnet50/TFRecords/ --results_dir=run_results \
+   --use_xla --precision=fp16  --loss_scale=1024 --use_static_loss_scaling
+
+   echo   
+   echo ---------------                                           
+   echo Job output ends                                           
+   date_end=`date +%s`
+   seconds=$((date_end-date_start))
+   minutes=$((seconds/60))
+   seconds=$((seconds-60*minutes))
+   hours=$((minutes/60))
+   minutes=$((minutes-60*hours))
+   echo =========================================================   
+   echo SLURM job: finished   date = `date`   
+   echo Total run time : $hours Hours $minutes Minutes $seconds Seconds
+   echo =========================================================   
\ No newline at end of file
diff --git a/software/compilers/gcc.rst b/software/compilers/gcc.rst
new file mode 100644
index 0000000..477b9d7
--- /dev/null
+++ b/software/compilers/gcc.rst
@@ -0,0 +1,30 @@
+GCC
+---
+
+The `GNU Compiler Collection (GCC) <https://gcc.gnu.org/>`__ is available on Bede including C, C++ and Fortran compilers. 
+
+The copies of GCC available as modules have been compiled with CUDA
+offload support:
+
+.. code-block:: bash
+
+   module load gcc/10.2.0
+   module load gcc/8.4.0
+
+The version of GCC which is distributed with RHEL is also packaged as the ``gcc/native`` module. 
+On RHEL 7 nodes, this is GCC ``4.8.5``. 
+On RHEL 8 nodes, this is GCC ``8.5.0``.
+
+.. code-block:: bash
+
+   # The GCC version provided by this module is RHEL specific.
+   module load gcc/native
+
+.. note::
+   Note that the default GCC provided by Red Hat Enterprise Linux 7 (4.8.5)
+   is quite old, will not optimise for the POWER9 processor (either use
+   POWER8 tuning options or use a later compiler), and does not have
+   CUDA/GPU offload support compiled in. The module ``gcc/native`` has been
+   provided to point to this copy of GCC.
+
+For further information please see the `GCC online documentation <https://gcc.gnu.org/onlinedocs/>`__.
diff --git a/software/compilers/ibmxl.rst b/software/compilers/ibmxl.rst
new file mode 100644
index 0000000..06b483e
--- /dev/null
+++ b/software/compilers/ibmxl.rst
@@ -0,0 +1,18 @@
+IBM XL
+------
+
+The `IBM XL C and C++ compiler family <https://www.ibm.com/products/c-and-c-plus-plus-compiler-family>`__ and `IBM XL Fortran compiler family <https://www.ibm.com/products/fortran-compiler-family>`__ are available on Bede.
+
+On RHEL 7, the IBM compilers are part of the default environment.
+
+On RHEL 8, the IBM compilers are provided by the ``xl`` module family:
+
+.. code-block:: bash
+
+   # RHEL 8 Only
+   module load xl
+   module load xl/16.1.1
+
+For further information please see the `IBM XL C and C++ documentation <https://www.ibm.com/products/c-and-c-plus-plus-compiler-family>`__ and `IBM XL Fortran documentation <https://www.ibm.com/products/fortran-compiler-family>`__.
+
+
diff --git a/software/compilers/index.rst b/software/compilers/index.rst
new file mode 100644
index 0000000..a1767a6
--- /dev/null
+++ b/software/compilers/index.rst
@@ -0,0 +1,23 @@
+.. _software-compilers:
+
+Compilers
+=========
+
+Most compiler modules set the ``CC``, ``CXX``, ``FC``, ``F90`` environment variables to appropriate values. These are commonly used by tools such as CMake and autoconf, so that by loading a compiler module its compilers are used by default.
+
+This can also be done in your own build scripts and make files. e.g.
+
+.. code-block:: bash
+
+  module load gcc
+  $CC -o myprog myprog.c
+
+.. toctree::
+    :maxdepth: 1
+    :glob:
+
+    gcc
+    llvm
+    ibmxl
+    nvhpc
+    nvcc
diff --git a/software/compilers/llvm.rst b/software/compilers/llvm.rst
new file mode 100644
index 0000000..56576a4
--- /dev/null
+++ b/software/compilers/llvm.rst
@@ -0,0 +1,19 @@
+LLVM
+----
+
+LLVM has been provided for use on the system by the ``llvm`` module.
+It has been built with CUDA GPU offloading support, allowing OpenMP
+regions to run on a GPU using the ``target`` directive.
+
+Note that, as from LLVM 11.0.0, it provides a Fortran compiler called
+``flang``. Although this has been compiled and can be used for
+experimentation, it is still immature and ultimately relies on
+``gfortran`` for its code generation. The ``lvm/11.0.0`` module therefore
+defaults to using the operating system provided ``gfortran``, instead.
+
+.. code-block:: bash
+
+   module load llvm
+   module load llvm/11.0.0
+
+For further information please see the `LLVM Releases <https://releases.llvm.org/>`__ for versioned documentation.
\ No newline at end of file
diff --git a/software/compilers/nvcc.rst b/software/compilers/nvcc.rst
new file mode 100644
index 0000000..b9c663c
--- /dev/null
+++ b/software/compilers/nvcc.rst
@@ -0,0 +1,54 @@
+.. _software-compilers-nvcc:
+
+CUDA and NVCC
+-------------
+
+`CUDA <https://developer.nvidia.com/cuda-zone>`__ and the ``nvcc`` CUDA/C++ compiler are provided for use on the system by the `cuda` modules.
+
+Unlike other compiler modules, the cuda modules do not set ``CC`` or ``CXX`` environment variables. This is because ``nvcc`` can be used to compile device CUDA code in conjunction with a range of host compilers, such as GCC or LLVM clang.
+
+
+.. code-block:: bash
+
+   module load cuda
+
+   # RHEL 8 only
+   module load cuda/11.5.1
+   module load cuda/11.4.1
+   module load cuda/11.3.1
+   module load cuda/11.2.2
+
+   # RHEL 7 or RHEL 8
+   module load cuda/10.2.89
+   module load cuda/10.1.243
+
+For further information please see the `CUDA Toolkit Archive <https://developer.nvidia.com/cuda-toolkit-archive>`__.
+
+
+GPU Code Generation Options
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``-gencode`` or ``arch`` and ``-code`` NVCC compiler options allow for architecture specific optimisation of generated code, for NVCC's `two-stage compilation process <https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#virtual-architectures>`__.
+
+Bede contains NVIDIA Tesla V100 and Tesla T4 GPUs, which are `compute capability <https://developer.nvidia.com/cuda-gpus>`__ ``7.0`` and ``7.5`` respectively.
+
+To generate optimised code for both GPU models in Bede, the following ``-gencode`` options can be passed to ``nvcc``:
+
+.. code-block:: bash
+
+   -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75
+
+Alternatively, to reduce compile time and binary size a single ``-gencode`` option can be passed. 
+
+If only compute capability ``70`` is selected, code will be optimised for Volta GPUs, but will execute on Volta and Turing GPUs.
+
+If only compute capability ``75`` is selected, code will be optimised for Turing GPUs, but it will not be executable on Volta GPUs.
+
+.. code-block:: bash
+
+   # Optimise for V100 GPUs, executable on T4 GPUs
+   -gencode=arch=compute_70,code=sm_70 
+   # Optimise for T4 GPUs, not executable on V100 GPUs
+   -gencode=arch=compute_75,code=sm_75
+
+For more information on the use of ``-gencode``, ``-arch`` and ``-code`` please  see the `NVCC Documentation <https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html>`__.
\ No newline at end of file
diff --git a/software/compilers/nvhpc.rst b/software/compilers/nvhpc.rst
new file mode 100644
index 0000000..380c2fe
--- /dev/null
+++ b/software/compilers/nvhpc.rst
@@ -0,0 +1,22 @@
+.. _software-compilers-nvhpc:
+
+NVIDIA HPC SDK
+--------------
+
+The `NVIDIA HPC SDK <https://developer.nvidia.com/hpc-sdk>`__, otherwise referred to as ``nvhpc``, is a suite of compilers, libraries and tools for HPC.
+It provides C, C++ and Fortran compilers, which include features enabling GPU acceleration through standard C++ and Fortran, OpenACC directives and CUDA.
+
+It is provided for use on the system by the ``nvhpc`` module(s).
+It provides the ``nvc``, ``nvc++`` and ``nvfortran`` compilers.
+
+This module also provides the `NCCL <https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/index.html>`__ and `NVSHMEM <https://docs.nvidia.com/hpc-sdk/nvshmem/index.html>`__ libraries, as well as the suite of math libraries typically included with the CUDA Toolkit, such as ``cublas``, ``cufft`` and ``nvblas``.
+
+.. code-block:: bash
+
+   module load nvhpc
+   # RHEL 7 only
+   module load nvhpc/20.9
+   # RHEL 8 only 
+   module load nvhpc/21.5
+
+For further information please see the `NVIDIA HPC SDK Documentation Archive <https://docs.nvidia.com/hpc-sdk/archive/>`__.
diff --git a/software/Cryo-EM_Bede.pdf b/software/environments/Cryo-EM_Bede.pdf
similarity index 100%
rename from software/Cryo-EM_Bede.pdf
rename to software/environments/Cryo-EM_Bede.pdf
diff --git a/software/environments/cryo-em.rst b/software/environments/cryo-em.rst
new file mode 100644
index 0000000..5f67e1e
--- /dev/null
+++ b/software/environments/cryo-em.rst
@@ -0,0 +1,28 @@
+.. _software-environments-cryoem:
+
+Cryo-EM Software Environment
+============================
+
+Documentation on the the Cryo-EM Software Environment for Life Sciences is available :download:`here <Cryo-EM_Bede.pdf>`. 
+Note that this document is mainly based on the installation on `Satori <https://mit-satori.github.io>`_ and might have some inconsistencies with the Bede installation.
+
+The Cryo-EM software package is provided by the :ref:`IBM Collaboration project <software-projects-ibm-collaboration>`.
+
+It is a conda environment which provides the following software packages:
+
+* `RELION <https://www3.mrc-lmb.cam.ac.uk/relion/index.php?title=Main_Page>`__
+* `CTFfind4 <https://grigoriefflab.umassmed.edu/ctffind4>`__
+* `MotionCor2 <https://emcore.ucsf.edu/ucsf-software>`__
+* `crYOLO <https://cryolo.readthedocs.io/en/stable/>`__
+* `ResMap <http://resmap.sourceforge.net/>`__
+
+To access these packages, first you must have a local conda installation set up and activated. 
+See :ref:`Conda <software-applications-conda>` for instructions on how to install and enable conda.
+
+The CyroEM conda environment can then be loaded using:
+
+.. code-block:: bash
+
+   conda activate /projects/bddir04/ibm-lfsapp/CryoEM
+
+Once loaded, the included software applications can then be used.
diff --git a/software/environments/easybuild.rst b/software/environments/easybuild.rst
new file mode 100644
index 0000000..cd70e20
--- /dev/null
+++ b/software/environments/easybuild.rst
@@ -0,0 +1,46 @@
+Easybuild
+=========
+
+.. note::
+
+    Not currently recommended.
+
+The central Easybuild modules are available when a user executes the
+following command and then logs in again:
+
+.. code-block:: bash
+
+   echo easybuild > ~/.application_environment
+
+A user can create their own Easybuild installation to supplement (or
+override) the packages provided by the central install by:
+
+.. code-block:: bash
+
+   echo 'export EASYBUILD_INSTALLPATH=$HOME/eb' >> ~/.bash_profile
+   echo 'export EASYBUILD_BUILDPATH=/tmp' >> ~/.bash_profile
+   echo 'export EASYBUILD_MODULES_TOOL=Lmod' >> ~/.bash_profile
+   echo 'export EASYBUILD_PARALLEL=8' >> ~/.bash_profile
+   echo 'export MODULEPATH=$HOME/eb/modules/all:$MODULEPATH' >> ~/.bash_profile
+
+Login again, and then:
+
+.. code-block:: bash
+
+   wget https://raw.githubusercontent.com/easybuilders/easybuild-framework/develop/easybuild/scripts/bootstrap_eb.py
+   python bootstrap_eb.py $EASYBUILD_INSTALLPATH
+
+Verify install by checking sensible output from:
+
+.. code-block:: bash
+
+   module avail   # should show an EasyBuild module under user's home directory
+   module load EasyBuild
+   which eb       # should show a path under the user's home directory
+
+Software can now be installed into the new Easybuild area using
+``eb <package>``
+
+Project Easybuild installations can be created using a similar method.
+In this case, a central module to add the project’s modules to a user’s
+environment is helpful, and can be done on request.
\ No newline at end of file
diff --git a/software/environments/index.rst b/software/environments/index.rst
new file mode 100644
index 0000000..9171f61
--- /dev/null
+++ b/software/environments/index.rst
@@ -0,0 +1,100 @@
+.. _software-environments:
+
+Environments
+============
+
+The default software environment on Bede is called "builder". This uses
+the modules system normally used on HPC systems, but provides a system
+of intelligent modules. To see a list of what is available, executing
+the command ``module avail``.
+
+In this scheme, modules providing access to compilers and libraries
+examine other modules that are also loaded and make the most appropriate
+copy (or "flavour") of the software available. This minimises the
+problem of knowing what modules to choose whilst providing access to all
+the combinations of how a library can be built.
+
+For example, the following command gives you access to a copy of FFTW
+3.3.8 that has been built against GCC 8.4.0:
+
+::
+
+   $ module load gcc/8.4.0 fftw/3.3.8
+   $ which fftw-wisdom
+   /opt/software/builder/developers/libraries/fftw/3.3.8/1/gcc-8.4.0/bin/fftw-wisdom
+
+If you then load an MPI library, your environment will be automatically
+updated to point at a copy of FFTW 3.3.8 that has been built against GCC
+8.4.0 and OpenMPI 4.0.5:
+
+::
+
+   $ module load openmpi/4.0.5
+   $ which fftw-wisdom
+   /opt/software/builder/developers/libraries/fftw/3.3.8/1/gcc-8.4.0-openmpi-4.0.5/bin/fftw-wisdom
+
+Similarly, if you then load CUDA, the MPI library will be replaced by
+one built against it:
+
+::
+
+   $ which mpirun
+   /opt/software/builder/developers/libraries/openmpi/4.0.5/1/gcc-8.4.0/bin/mpirun
+   $ module load cuda/10.2.89
+   $ which mpirun
+   /opt/software/builder/developers/libraries/openmpi/4.0.5/1/gcc-8.4.0-cuda-10.2.89/bin/mpirun
+
+Modules follow certain conventions:
+
+-  Logs of software builds can be found under ``/opt/software/builder/logs/``.
+-  Installation recipes for modules can be found under directory ``/home/builder/builder/``.
+-  Although modules do their best to configure your environment so
+   that you can use the software, it is sometimes useful to know where the
+   software is installed on disk. This is provided by the ``<NAME>_HOME``
+   environment variable, e.g. if the ``gcc/8.4.0`` module is loaded,
+   environment variable ``GCC_HOME`` points to the directory containing
+   its files.
+-  Software provided by modules sometimes use other modules for their
+   functionality. It is not normally required to explicitly load
+   these prerequisites but it can be useful, for example to mirror R's
+   buld environment when installing an R library. Where this occurs,
+   a list of modules is provided by the ``<NAME>_BUILD_MODULES``
+   environment variable, e.g. the ``r`` module sets environment variable
+   ``R_BUILD_MODULES``.
+
+Software can be built on top of these modules in the following ways:
+
+-  Traditional - loading appropriate modules, manually unpacking,
+   configuring, building and installing the new software
+   (e.g. ``./configure; make; make install``)
+-  `Spack <https://spack.readthedocs.io/>`__ - automated method of
+   installing software. Spack will automatically find the multiple
+   flavours (or variants, in spack-speak) of libraries provided by
+   builder, minimising the number of packages needing to be built.
+
+With Builder and Spack, the opportunity arises for a project to inherit
+and supplement software, and for users to then inherit and supplement
+that in turn. In this way, the centre can concentrate on providing core
+software of general use and allow projects and users to concentrate on
+specialist software elements that support their work.
+
+In addition, there are two other types of software environment on Bede,
+which are not currently recommended:
+
+-  The vendor-supplied set of modules that originally came with the
+   machine. To use these, execute:
+   ``echo ocf > ~/.application_environment`` and then login again.
+-  Easybuild - an automated method of installing software, rival to
+   Spack. To use this, execute:
+   ``echo builder > ~/.application_environment`` and then login again.
+
+In both cases, executing ``rm ~/.application_environment`` and login
+again will return you to the default software environment.
+
+.. toctree::
+    :maxdepth: 1
+    :glob:
+
+    spack
+    easybuild
+    cryo-em
diff --git a/software/environments/spack.rst b/software/environments/spack.rst
new file mode 100644
index 0000000..277db03
--- /dev/null
+++ b/software/environments/spack.rst
@@ -0,0 +1,52 @@
+.. _software-spack:
+
+Spack
+=====
+
+`Spack <https://spack.readthedocs.io/>`__ can be used to extend the installed software on the system,
+without requiring specialist knowledge on how to build particular pieces
+of software. Documentation for the project is here:
+https://spack.readthedocs.io/
+
+To install spack, execute the following and then login again:
+
+.. code-block:: bash
+
+   $ git clone https://github.com/spack/spack.git $HOME/spack
+
+   $ echo 'export SPACK_ROOT=$HOME/spack' >> ~/.bash_profile
+   $ echo 'source $SPACK_ROOT/share/spack/setup-env.sh' >> ~/.bash_profile
+
+Example usage, installing an MPI aware, GPU version of gromacs and than
+loading it into your environment to use (once built, execute
+``spack load gromacs`` before using):
+
+.. code-block:: bash
+
+   $ spack install gromacs +mpi +cuda
+
+Other useful spack commands: \* ``spack find`` - show what packages have
+been installed \* ``spack list`` - show what packages spack knows how to
+build \* ``spack compilers`` - show what compilers spack can use \*
+``spack info <package>`` - details about a package, and the different
+ways it can be built \* ``spack spec <package>`` - what pieces of
+software a package depends on
+
+If a project wishes to create a spack installation, for example under
+``/projects/<project>/spack`` and you would like an easy way for your
+users to add it to their environment, please contact us and we can make
+a module.
+
+If you are a user who wishes to supplement your project’s spack
+installation, follow the installation instructions above and then tell
+it where your project’s copy of spack is:
+
+.. code-block:: bash
+
+   cat > $SPACK_ROOT/etc/spack/upstreams.yaml <<EOF
+   upstreams:
+     spack-central:
+       install_tree: /projects/<project>/spack
+       modules:
+         tcl: /projects/<project>/spack/share/spack/modules
+   EOF
\ No newline at end of file
diff --git a/software/index.rst b/software/index.rst
index 4d799c5..2c64d96 100644
--- a/software/index.rst
+++ b/software/index.rst
@@ -1,489 +1,23 @@
-Software
-========
+.. _software:
 
-.. include:: ../common/rhel8-status.rst
-
-
-Environments
-------------
-
-The default software environment on Bede is called “builder”. This uses
-the modules system normally used on HPC systems, but provides a system
-of intelligent modules. To see a list of what is available, executing
-the command ``module avail``.
-
-In this scheme, modules providing access to compilers and libraries
-examine other modules that are also loaded and make the most appropriate
-copy (or “flavour”) of the software available. This minimises the
-problem of knowing what modules to choose whilst providing access to all
-the combinations of how a library can be built.
-
-For example, the following command gives you access to a copy of FFTW
-3.3.8 that has been built against GCC 8.4.0:
-
-::
-
-   $ module load gcc/8.4.0 fftw/3.3.8
-   $ which fftw-wisdom
-   /opt/software/builder/developers/libraries/fftw/3.3.8/1/gcc-8.4.0/bin/fftw-wisdom
-
-If you then load an MPI library, your environment will be automatically
-updated to point at a copy of FFTW 3.3.8 that has been built against GCC
-8.4.0 and OpenMPI 4.0.5:
-
-::
-
-   $ module load openmpi/4.0.5
-   $ which fftw-wisdom
-   /opt/software/builder/developers/libraries/fftw/3.3.8/1/gcc-8.4.0-openmpi-4.0.5/bin/fftw-wisdom
-
-Similarly, if you then load CUDA, the MPI library will be replaced by
-one built against it:
-
-::
-
-   $ which mpirun
-   /opt/software/builder/developers/libraries/openmpi/4.0.5/1/gcc-8.4.0/bin/mpirun
-   $ module load cuda/10.2.89
-   $ which mpirun
-   /opt/software/builder/developers/libraries/openmpi/4.0.5/1/gcc-8.4.0-cuda-10.2.89/bin/mpirun
-
-Modules follow certain conventions:
-
--  Logs of software builds can be found under ``/opt/software/builder/logs/``.
--  Installation recipes for modules can be found under directory ``/home/builder/builder/``.
--  Although modules do their best to configure your environment so
-   that you can use the software, it is sometimes useful to know where the
-   software is installed on disk. This is provided by the ``<NAME>_HOME``
-   environment variable, e.g. if the ``gcc/8.4.0`` module is loaded,
-   environment variable ``GCC_HOME`` points to the directory containing
-   its files.
--  Software provided by modules sometimes use other modules for their
-   functionality. It is not normally required to explicitly load
-   these prerequisites but it can be useful, for example to mirror R's
-   buld environment when installing an R library. Where this occurs,
-   a list of modules is provided by the ``<NAME>_BUILD_MODULES``
-   environment variable, e.g. the ``r`` module sets environment variable
-   ``R_BUILD_MODULES``.
-
-Software can be built on top of these modules in the following ways:
-
--  Traditional - loading appropriate modules, manually unpacking,
-   configuring, building and installing the new software
-   (e.g. ``./configure; make; make install``)
--  `Spack <https://spack.readthedocs.io/>`__ - automated method of
-   installing software. Spack will automatically find the multiple
-   flavours (or variants, in spack-speak) of libraries provided by
-   builder, minimising the number of packages needing to be built.
-
-With Builder and Spack, the opportunity arises for a project to inherit
-and supplement software, and for users to then inherit and supplement
-that in turn. In this way, the centre can concentrate on providing core
-software of general use and allow projects and users to concentrate on
-specialist software elements that support their work.
-
-In addition, there are two other types of software environment on Bede,
-which are not currently recommended:
-
--  The vendor-supplied set of modules that originally came with the
-   machine. To use these, execute:
-   ``echo ocf > ~/.application_environment`` and then login again.
--  Easybuild - an automated method of installing software, rival to
-   Spack. To use this, execute:
-   ``echo builder > ~/.application_environment`` and then login again.
-
-In both cases, executing ``rm ~/.application_environment`` and login
-again will return you to the default software environment.
-
-.. _software-spack:
-
-Spack
-~~~~~
-
-Spack can be used to extend the installed software on the system,
-without requiring specialist knowledge on how to build particular pieces
-of software. Documentation for the project is here:
-https://spack.readthedocs.io/
-
-To install spack, execute the following and then login again:
-
-::
-
-   $ git clone https://github.com/spack/spack.git $HOME/spack
-
-   $ echo 'export SPACK_ROOT=$HOME/spack' >> ~/.bash_profile
-   $ echo 'source $SPACK_ROOT/share/spack/setup-env.sh' >> ~/.bash_profile
-
-Example usage, installing an MPI aware, GPU version of gromacs and than
-loading it into your environment to use (once built, execute
-``spack load gromacs`` before using):
-
-::
-
-   $ spack install gromacs +mpi +cuda
-
-Other useful spack commands: \* ``spack find`` - show what packages have
-been installed \* ``spack list`` - show what packages spack knows how to
-build \* ``spack compilers`` - show what compilers spack can use \*
-``spack info <package>`` - details about a package, and the different
-ways it can be built \* ``spack spec <package>`` - what pieces of
-software a package depends on
-
-If a project wishes to create a spack installation, for example under
-``/projects/<project>/spack`` and you would like an easy way for your
-users to add it to their environment, please contact us and we can make
-a module.
-
-If you are a user who wishes to supplement your project’s spack
-installation, follow the installation instructions above and then tell
-it where your project’s copy of spack is:
-
-::
-
-   cat > $SPACK_ROOT/etc/spack/upstreams.yaml <<EOF
-   upstreams:
-     spack-central:
-       install_tree: /projects/<project>/spack
-       modules:
-         tcl: /projects/<project>/spack/share/spack/modules
-   EOF
-
-Easybuild
-~~~~~~~~~
-
-Not currently recommended.
-
-The central Easybuild modules are available when a user executes the
-following command and then logs in again:
-
-::
-
-   echo easybuild > ~/.application_environment
-
-A user can create their own Easybuild installation to supplement (or
-override) the packages provided by the central install by:
-
-::
-
-   echo 'export EASYBUILD_INSTALLPATH=$HOME/eb' >> ~/.bash_profile
-   echo 'export EASYBUILD_BUILDPATH=/tmp' >> ~/.bash_profile
-   echo 'export EASYBUILD_MODULES_TOOL=Lmod' >> ~/.bash_profile
-   echo 'export EASYBUILD_PARALLEL=8' >> ~/.bash_profile
-   echo 'export MODULEPATH=$HOME/eb/modules/all:$MODULEPATH' >> ~/.bash_profile
-
-Login again, and then:
-
-::
-
-   wget https://raw.githubusercontent.com/easybuilders/easybuild-framework/develop/easybuild/scripts/bootstrap_eb.py
-   python bootstrap_eb.py $EASYBUILD_INSTALLPATH
-
-Verify install by checking sensible output from:
-
-::
-
-   module avail   # should show an EasyBuild module under user's home directory
-   module load EasyBuild
-   which eb       # should show a path under the user's home directory
-
-Software can now be installed into the new Easybuild area using
-``eb <package>``
-
-Project Easybuild installations can be created using a similar method.
-In this case, a central module to add the project’s modules to a user’s
-environment is helpful, and can be done on request.
-
-
-Compilers
----------
-
-Most compiler modules set the ``CC``, ``CXX``, ``FC``, ``F90`` environment variables to appropriate values. These are commonly used by tools such as CMake and autoconf, so that by loading a compiler module its compilers are used by default.
-
-This can also be done in your own build scripts and make files. e.g.
-
-::
-
-  module load gcc
-  $CC -o myprog myprog.c
-
-GCC
-~~~
-
-Note that the default GCC provided by Red Hat Enterprise Linux 7 (4.8.5)
-is quite old, will not optimise for the POWER9 processor (either use
-POWER8 tuning options or use a later compiler), and does not have
-CUDA/GPU offload support compiled in. The module ``gcc/native`` has been
-provided to point to this copy of GCC.
-
-The copies of GCC available as modules have been compiled with CUDA
-offload support:
-
-::
-
-   module load gcc/10.2.0
-
-For further information please see the `GCC online documentation <https://gcc.gnu.org/onlinedocs/>`__.
-
-LLVM
-~~~~
-
-LLVM has been provided for use on the system by the ``llvm`` module.
-It has been built with CUDA GPU offloading support, allowing OpenMP
-regions to run on a GPU using the ``target`` directive.
-
-Note that, as from LLVM 11.0.0, it provides a Fortran compiler called
-``flang``. Although this has been compiled and can be used for
-experimentation, it is still immature and ultimately relies on
-``gfortran`` for its code generation. The ``lvm/11.0.0`` module therefore
-defaults to using the operating system provided ``gfortran``, instead.
-
-::
-
-   module load llvm/11.0.0
-
-For further information please see the `LLVM Releases <https://releases.llvm.org/>`__ for versioned documentation.
-
-NVIDIA HPC SDK
-~~~~~~~~~~~~~~
-
-The `NVIDIA HPC SDK <https://developer.nvidia.com/hpc-sdk>`__, otherwise referred to as `nvhpc`, is a suite of compilers, libraries and tools for HPC.
-It provides C, C++ and Fortran compilers, which include features enabling GPU acceleration through standard C++ and Fortran, OpenACC directives and CUDA.
-
-It is provided for use on the system by the ``nvhpc`` module(s), such as ``nvhpc/20.9``, and provides the ``nvc``, ``nvc++`` and ``nvfortran`` compilers.   
-
-This module also provides the `NCCL <https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/index.html>`__ and `NVSHMEM <https://docs.nvidia.com/hpc-sdk/nvshmem/index.html>`__ libraries, as well as the suite of math libraries typically included with the CUDA Toolkit, such as ``cublas``, ``cufft`` and ``nvblas``.
-
-::
-
-   module load nvhpc/20.9
-
-For further information please see the `NVIDIA HPC SDK Documentation Archive <https://docs.nvidia.com/hpc-sdk/archive/>`__.
-
-CUDA / NVCC
-~~~~~~~~~~~
+Software on Bede
+================
 
-`CUDA <https://developer.nvidia.com/cuda-zone>`__ and the ``nvcc`` CUDA/C++ compiler are provided for use on the system by the `cuda` modules.
+These pages list software available on bessemer and/or instructions on how to install and use software which is not centrally installed.
 
-Unlike other compiler modules, the cuda modules do not set ``CC`` or ``CXX`` environment variables. This is because ``nvcc`` can be used to compile device CUDA code in conjunction with a range of host compilers, such as GCC or LLVM clang.
 
-::
+If you notice any omissions, errors or have any suggested changes to the documentation please create an `Issue <https://github.com/N8-CIR-Bede/documentation/issues>`__ or open a `Pull Request <https://github.com/N8-CIR-Bede/documentation/pulls>`__ on GitHub. 
 
-   module load cuda/10.2.89
-   module load cuda/10.1.243
-
-For further information please see the `CUDA Toolkit Archive <https://developer.nvidia.com/cuda-toolkit-archive>`__.
-   
-BLAS/LAPACK
------------
-
-The following numerical libraries provide optimised CPU implementations of BLAS and LAPACK on the system:
-
-- ESSL (IBM Engineering and Scientific Subroutine Library)
-- OpenBLAS
-
-The modules for each of these libraries provide some convenience environment variables: ``N8CIR_LINALG_CFLAGS`` contains the compiler arguments to link BLAS and LAPACK to C code; ``N8CIR_LINALG_FFLAGS`` contains the same to link to Fortran. When used with variables such as ``CC``, commands to build software can become entirely independent of what compilers and numerical libraries you have loaded, e.g.
-
-::
-
-   module load gcc essl/6.2
-   $CC -o myprog myprog.c $N8CIR_LINALG_CFLAGS
-
-
-MPI
----
-
-The main supported MPI on the system is OpenMPI.
-
-For access to a cuda-enabled MPI: ``module load gcc cuda openmpi``
-
-We commit to the following convention for all MPIs we provide as modules:
-
-- The wrapper to compile C programs is called ``mpicc``
-- The wrapper to compile C++ programs is called ``mpicxx``
-- The wrapper to compile Fortran programs is called ``mpif90``
-
-
-HDF5
-----
-
-When loaded in conjunction with an MPI module such as ``openmpi``, the
-``hdf5`` module provides both the serial and parallel versions of the
-library. The parallel functionality relies on a technology called MPI-IO,
-which is currently subject to the following known issue on Bede:
-
-- HDF5 does not pass all of its parallel tests with OpenMPI 4.x. If
-  you are using this MPI and your application continues to run but does
-  not return from a call to the HDF5 library, you may have hit a similar
-  issue. The current workaround is to instruct OpenMPI to use an alternative
-  MPI-IO implementation with the command: ``export OMPI_MCA_io=ompio``
-  The trade off is that, in some areas, this alternative is extremely slow
-  and so should be used with caution.
-
-
-NetCDF
-------
-
-The ``netcdf`` module provides the C, C++ and Fortran bindings for this
-file format library. When an MPI module is loaded, parallel support is
-enabled through the PnetCDF and HDF5 libraries.
-
-Use of NetCDF's parallel functionality can use HDF5, and so is subject
-to its known issues on Bede (see above).
-
-Python
-------
-
-PyTorch Quickstart
-~~~~~~~~~~~~~~~~~~
-The following should get you set up with a working conda environment (replacing <project> with your project code):
-
-::
-
-    export DIR=/nobackup/projects/<project>/$USER
-    # rm -rf ~/.conda ~/.condarc $DIR/miniconda # Uncomment if you want to remove old env
-    mkdir $DIR
-    pushd $DIR
-
-    wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-ppc64le.sh
-
-    sh Miniconda3-latest-Linux-ppc64le.sh -b -p $DIR/miniconda
-    source miniconda/bin/activate
-    conda update conda -y
-    conda config --set channel_priority strict
-
-    conda config --prepend channels \
-            https://public.dhe.ibm.com/ibmdl/export/pub/software/server/ibm-ai/conda/
-
-    conda config --prepend channels \
-            https://opence.mit.edu
-
-    conda create --name opence pytorch=1.7.1 -y
-    conda activate opence
-
-
-This has some limitations such as not supporting large model support. If you require this you can try the instructions below, these provide an older version of PyTorch however.
-
-TensorFlow Quickstart
-~~~~~~~~~~~~~~~~~~~~~
-The following should get you set up with a working conda environment (replacing <project> with your project code):
-
-
-::
-    export DIR=/nobackup/projects/<project>/$USER
-    # rm -rf ~/.conda .condarc $DIR/miniconda # Uncomment if you want to remove old env
-    mkdir $DIR
-    pushd $DIR
-
-    wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-ppc64le.sh
-
-    sh Miniconda3-latest-Linux-ppc64le.sh -b -p $DIR/miniconda
-    source miniconda/bin/activate
-    conda update conda -y
-
-    conda config --prepend channels \
-            https://public.dhe.ibm.com/ibmdl/export/pub/software/server/ibm-ai/conda/
-
-    conda config --prepend channels \
-            https://opence.mit.edu
-
-    conda create --name opence tensorflow -y
-    conda activate opence
-
-
-Note: This conflicts with the PyTorch instructions above as they set the conda channel_priority to be strict which seems to cause issues when installing TensorFlow.
-
-
-PyTorch and TensorFlow: IBM PowerAI and wmlce [Possibly Out of Date]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-IBM have done a lot of work to port common Machine Learning tools to the
-POWER9 system, and to take advantage of the GPUs abililty to directly
-access main system memory on the POWER9 architecture using its “Large
-Model Support”.
-
-This has been packaged up into what is variously known as IBM Watson
-Machine Learning Community Edition (wmlce) or the catchier name PowerAI.
-
-Documentation on wmlce can be found here:
-https://www.ibm.com/support/pages/get-started-ibm-wml-ce
-
-Installation is via the IBM channel of the anaconda package management tool. **Note:
-if you do not use this channel you will not find all of the available packages.**
-First install anaconda (can be quite large - so using the /nobackup area):
-
-::
-
-   cd /nobackup/projects/<project>
-
-   wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-ppc64le.sh
-   sh Miniconda3-latest-Linux-ppc64le.sh
-   conda update conda
-   conda config --set channel_priority strict
-   conda config --prepend channels https://public.dhe.ibm.com/ibmdl/export/pub/software/server/ibm-ai/conda/
-   conda create --name wmlce
-
-Then login again and install wmlce (GPU version by default - substitute
-``powerai-cpu`` for ``powerai`` for the CPU version):
-
-::
-
-   conda activate wmlce
-   conda install powerai ipython
-
-Running ``ipython`` on the login node will then allow you to experiment
-with this feature using an interactive copy of Python and the GPUs on
-the login node. Demanding work should be packaged into a job and
-launched with the ``python`` command.
-
-If a single node with 4 GPUs and 512GB RAM isn't enough, the Distributed
-Deep Learning feature of PowerAI should allow you to write code that can
-take advantage of multiple nodes.
-
-
-
-WMLCE resnet50 benchmark [Possibly out of date]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-
-.. toctree::
-   :maxdepth: -1
-
-   resnet50/bede-README-sbatch
-
-R
--
-
-`R <https://www.r-project.org/>`__ is a free software environment for statistical computing and graphics.
-It is provided on the system by the `r` module(s), which make ``R`` and ``Rscript`` available for use.
-
-::
-
-   module load r/4.0.3
-
-For more information, run ``man R``, ``man RScript`` or see the `R Manuals <https://cran.r-project.org/manuals.html>`__ online. 
-
-Initial investigations with CUDA (under development)
-----------------------------------------------------
+.. include:: ../common/rhel8-status.rst
 
 .. toctree::
-   :maxdepth: -1
-
-   wanderings/wanderings-in-CUDALand
-   wanderings/Estimating-pi-in-CUDALand
-
-Cryo-EM Software Environment
-----------------------------
-
-Documentation on the the Cryo-EM Software Environment for Life Sciences is available :download:`here <Cryo-EM_Bede.pdf>`. Note that this document is mainly based on the installation on `Satori <https://mit-satori.github.io>`_ and might have some inconsistencies with the Bede installation.
-
-
-
-
-
-To use the modules, execute
-
-::
-
-   conda activate /projects/bddir04/ibm-lfsapp/CryoEM
-   
-with a working conda installation.   
+    :maxdepth: 3
+    :titlesonly:
+    :name: softwaretoc
+
+    applications/index
+    projects/index
+    environments/index
+    compilers/index
+    libraries/index
+    tools/index
diff --git a/software/libraries/blas-lapack.rst b/software/libraries/blas-lapack.rst
new file mode 100644
index 0000000..0bef279
--- /dev/null
+++ b/software/libraries/blas-lapack.rst
@@ -0,0 +1,25 @@
+.. _software-libraries-blas-lapack:
+
+BLAS/LAPACK
+===========
+
+The following numerical libraries provide optimised CPU implementations of BLAS and LAPACK on the system:
+
+- `ESSL <https://www.ibm.com/docs/en/essl>`__ (IBM Engineering and Scientific Subroutine Library)
+- `OpenBLAS <https://github.com/xianyi/OpenBLAS>`__
+
+The modules for each of these libraries provide some convenience environment variables: ``N8CIR_LINALG_CFLAGS`` contains the compiler arguments to link BLAS and LAPACK to C code; ``N8CIR_LINALG_FFLAGS`` contains the same to link to Fortran. When used with variables such as ``CC``, commands to build software can become entirely independent of what compilers and numerical libraries you have loaded, eg. for ESSL:
+
+.. code-block:: bash
+
+   module load gcc essl/6.2
+   $CC -o myprog myprog.c $N8CIR_LINALG_CFLAGS
+
+
+Or for OpenBLAS:
+
+.. code-block:: bash
+
+   module load gcc openblas/6.2
+   $CC -o myprog myprog.c $N8CIR_LINALG_CFLAGS
+
diff --git a/software/libraries/boost.rst b/software/libraries/boost.rst
new file mode 100644
index 0000000..aaabc38
--- /dev/null
+++ b/software/libraries/boost.rst
@@ -0,0 +1,11 @@
+Boost
+=====
+
+`Boost <https://boost.org>`__ provides free peer-reviewed portable C++ source libraries, that work well with the C++ Standard Library.
+
+A centrally-installed version is available via the modules system, which can be loaded as follows: 
+
+.. code-block:: bash
+
+    module load boost
+    module load boost/1.74.0
diff --git a/software/libraries/fftw.rst b/software/libraries/fftw.rst
new file mode 100644
index 0000000..5d777e4
--- /dev/null
+++ b/software/libraries/fftw.rst
@@ -0,0 +1,13 @@
+.. _software-libraries-fftw:
+
+FFTW
+====
+
+`FFTW <https://www.fftw.org/>`__ is a C subroutine library for computing the discrete Fourier transform (DFT) in one or more dimensions, of arbitrary input size, and of both real and complex data.
+
+A centrally-installed version of FFTW can be loaded via ``module``: 
+
+.. code-block:: bash
+
+    module load fftw
+    module load fftw/3.3.8
diff --git a/software/libraries/hdf5.rst b/software/libraries/hdf5.rst
new file mode 100644
index 0000000..61b97d0
--- /dev/null
+++ b/software/libraries/hdf5.rst
@@ -0,0 +1,29 @@
+.. _software-libraries-hdf5:
+
+HDF5
+====
+
+When loaded in conjunction with an MPI module such as ``openmpi``, the
+``hdf5`` module provides both the serial and parallel versions of the
+library. 
+
+.. code-block:: bash
+
+   module load hdf5
+   module load hdf5/1.10.7
+
+.. _software-libraries-hdf5-known-issues:
+
+Known issues
+------------
+
+The parallel functionality relies on a technology called MPI-IO,
+which is currently subject to the following known issue on Bede:
+
+- HDF5 does not pass all of its parallel tests with OpenMPI 4.x. If
+  you are using this MPI and your application continues to run but does
+  not return from a call to the HDF5 library, you may have hit a similar
+  issue. The current workaround is to instruct OpenMPI to use an alternative
+  MPI-IO implementation with the command: ``export OMPI_MCA_io=ompio``
+  The trade off is that, in some areas, this alternative is extremely slow
+  and so should be used with caution.
diff --git a/software/libraries/index.rst b/software/libraries/index.rst
new file mode 100644
index 0000000..dcbaa29
--- /dev/null
+++ b/software/libraries/index.rst
@@ -0,0 +1,14 @@
+.. _software-libraries:
+
+Libraries
+=========
+
+These pages list software libraries which are centrally installed on Bede.
+
+If you notice any omissions, errors or have any suggested changes to the documentation please create an `Issue <https://github.com/N8-CIR-Bede/documentation/issues>`__ or open a `Pull Request <https://github.com/N8-CIR-Bede/documentation/pulls>`__ on GitHub. 
+
+.. toctree::
+    :maxdepth: 1
+    :glob:
+
+    ./*
\ No newline at end of file
diff --git a/software/libraries/mpi.rst b/software/libraries/mpi.rst
new file mode 100644
index 0000000..d80be02
--- /dev/null
+++ b/software/libraries/mpi.rst
@@ -0,0 +1,43 @@
+.. _software-libraries-MPI:
+
+MPI
+===
+
+`OpenMPI <https://www.open-mpi.org/>`__ and `MVAPICH <https://mvapich.cse.ohio-state.edu/>`__ are provided as alternate Message Passing Interface (MPI) implementations on Bede.
+
+OpenMPI is the main supported MPI on bede.
+
+We commit to the following convention for all MPIs we provide as modules:
+
+- The wrapper to compile C programs is called ``mpicc``
+- The wrapper to compile C++ programs is called ``mpicxx``
+- The wrapper to compile Fortran programs is called ``mpif90``
+
+CUDA-enabled MPI is available through OpenMPI, when a cuda module is loaded alongside ``openmpi``, I.e.
+
+.. code-block:: bash
+
+   module load gcc cuda openmpi
+
+OpenMPI is provided by the ``openmpi`` module(s):
+
+.. code-block:: bash
+
+   module load openmpi
+   module load openmpi/4.0.5
+
+
+MVAPICH2 is provided by the `mvapich2` module(s):
+
+.. code-block:: bash
+
+   module load mvapich2
+   module load mvapich2/2.3.5
+   module load mvapich2/2.3.5-2
+
+
+.. note::
+
+   The ``mvapich2/2.3.5-2`` module should be used rather than ``mvapich2/2.3.5``, which is only provided to support existing projects which depend on it.
+
+   Under RHEL 8, the ``mvapich2/2.3.5`` module is removed.
\ No newline at end of file
diff --git a/software/libraries/netcdf.rst b/software/libraries/netcdf.rst
new file mode 100644
index 0000000..68b899d
--- /dev/null
+++ b/software/libraries/netcdf.rst
@@ -0,0 +1,20 @@
+.. _software-libraries-netcdf:
+
+NetCDF
+======
+
+`Network Common Data Form (NetCDF) <https://www.unidata.ucar.edu/software/netcdf/>`__ is a set of software libraries and machine independent data formats for array-orientated scientific data.
+
+A centrally installed version of NetCDF is provided on Bede by the ``netcdf`` module(s).
+It provides the C, C++ and Fortran bindings for this file format library.
+When an :ref:`MPI <software-libraries-mpi>` module is loaded, parallel file support is enabled through the PnetCDF and :ref:`HDF5 <software-libraries-hdf5>` libraries.
+
+.. code-block:: bash
+   
+   module load netcdf
+   module load netcdf/4.7.4
+
+.. note::
+    
+    NetCDF's parallel functionality can use HDF5, and so is subject
+    to its known issues on Bede (see :ref:`software-libraries-hdf5-known-issues`).
diff --git a/software/libraries/nvtoolsext.rst b/software/libraries/nvtoolsext.rst
new file mode 100644
index 0000000..b2e7740
--- /dev/null
+++ b/software/libraries/nvtoolsext.rst
@@ -0,0 +1,39 @@
+.. _software-libraries-nvtoolsext:
+
+NVIDIA Tools Extension
+~~~~~~~~~~~~~~~~~~~~~~
+
+`NVIDIA Tools Extension (NVTX) <https://docs.nvidia.com/gameworks/index.html#gameworkslibrary/nvtx/nvidia_tools_extension_library_nvtx.htm>`__ is a C-based API for annotating events and ranges in applications.
+These markers and ranges can be used to increase the usability of the NVIDIA profiling tools.
+
+* For CUDA ``>= 10.0``, NVTX version ``3`` is distributed as a header only library.
+* For CUDA ``<  10.0``, NVTX is distributed as a shared library.
+
+The location of the headers and shared libraries may vary between Operating Systems, and CUDA installation (i.e. CUDA toolkit, PGI compilers or HPC SDK).
+
+On Bede, nvToolsExt is provided by the :ref:`CUDA <software-compilers-nvcc>` and :ref:`NVHPC <software-compilers-nvhpc>` modules:
+
+.. code-block:: bash
+    
+   module load cuda
+   module load nvhpc
+
+The NVIDIA Developer blog contains several posts on using NVTX:
+
+* `Generate Custom Application Profile Timelines with NVTX (Jiri Kraus) <https://developer.nvidia.com/blog/cuda-pro-tip-generate-custom-application-profile-timelines-nvtx/>`_
+* `Track MPI Calls In The NVIDIA Visual Profiler (Jeff Larkin) <https://developer.nvidia.com/blog/gpu-pro-tip-track-mpi-calls-nvidia-visual-profiler/>`_
+* `Customize CUDA Fortran Profiling with NVTX (Massimiliano Fatica) <https://developer.nvidia.com/blog/customize-cuda-fortran-profiling-nvtx/>`_
+
+
+CMake support
+^^^^^^^^^^^^^
+
+From CMake 3.17, the ```FindCUDAToolkit <https://cmake.org/cmake/help/git-stage/module/FindCUDAToolkit.html>`_`` can be used to find the tools extension and select the appropriate include directory.
+If support for older CMake versions is required custom ``find_package`` modules can be used, e.g. `ptheywood/cuda-cmake-NVTX on GitHub <https://github.com/ptheywood/cuda-cmake-nvtx>`_.
+
+
+Documentation
+^^^^^^^^^^^^^
+
+* `NVTX Documentation <https://docs.nvidia.com/gameworks/index.html#gameworkslibrary/nvtx/nvidia_tools_extension_library_nvtx.htm>`_
+* `NVTX 3 on GitHub <https://github.com/NVIDIA/NVTX>`_
diff --git a/software/libraries/plumed.rst b/software/libraries/plumed.rst
new file mode 100644
index 0000000..7b71e7b
--- /dev/null
+++ b/software/libraries/plumed.rst
@@ -0,0 +1,33 @@
+.. _software-libraries-plumed:
+
+PLUMED
+------
+
+`PLUMED <https://www.plumed.org/>`__, the community-developed PLUgin for MolEcular Dynamics, is a an open-source, community-developed library that provides a wide range of different methods, which include:
+
+* enhanced-sampling algorithms
+* free-energy methods
+* tools to analyze the vast amounts of data produced by molecular dynamics (MD) simulations.
+
+PLUMED works together with some of the most popular MD engines, including :ref:`GROMACS <software-applications-gromacs>`, :ref:`NAMD <software-applications-namd>` and :ref:`OpenMM <software-applications-openmm>` which are available on Bede.
+
+
+On Bede, PLUMED is made available through the :ref:`HECBioSim Project <software-projects-hecbiosim>`.
+
+
+.. code-block:: bash
+
+   # Load the hecbiosim project
+   module load hecbiosim
+   # Load the desired version of PLUMED
+   module load plumed
+   module load plumed/2.7.2-rhel8
+   module load plumed/2.6.2-rhel8
+   module load plumed/2.6.2
+
+
+For more information see the `PLUMED Documentation <https://www.plumed.org/doc>`__.
+
+
+
+
diff --git a/software/libraries/vtk.rst b/software/libraries/vtk.rst
new file mode 100644
index 0000000..ac992b3
--- /dev/null
+++ b/software/libraries/vtk.rst
@@ -0,0 +1,13 @@
+.. _software-libraries-vtk:
+
+VTK
+---
+
+`The Visualization Toolkit (VTK) <https://vtk.org/>`__ is open source software for manipulating and displaying scientific data.
+
+The ``vtk`` module can be loaded by one of the following:
+
+.. code-block:: bash
+
+   module load vtk
+   module load vtk/9.0.1
diff --git a/software/projects/hecbiosim.rst b/software/projects/hecbiosim.rst
new file mode 100644
index 0000000..dafc1ad
--- /dev/null
+++ b/software/projects/hecbiosim.rst
@@ -0,0 +1,28 @@
+.. _software-projects-hecbiosim:
+
+HECBioSim
+=========
+
+The `HEC BioSim consortium <http://www.hecbiosim.ac.uk/>`__ focusses on molecular simulations, at a variety of time and length scales but based on well-defined physics to complement experiment.
+The unique insight they can provide gives molecular level understanding of how biological macromolecules function.
+Simulations are crucial in analysing protein folding, mechanisms of biological catalysis, and how membrane proteins interact with lipid bilayers.
+A particular challenge is the integration of simulations across length and timescales: different types of simulation method are required for different types of problems.
+
+On Bede, the HECBioSim project provides several software packages for molecular simulation, including:
+
+* :ref:`AMBER <software-applications-amber>`
+* :ref:`GROMACS <software-applications-gromacs>`
+* :ref:`NAMD <software-applications-namd>`
+* :ref:`OpenMM <software-applications-openmm>`
+* :ref:`PLUMED <software-libraries-plumed>`
+
+Once the ``hecbiosim`` module has been loaded, it is possible to load versions of the provided packages.
+
+.. code-block:: bash
+
+    # Load the hecbiosim module
+    module load hecbiosim
+    # Once loaded, modules provided by hecbiosim are available, such as gromacs
+    module load gromacs
+
+For more information on the HEC BioSim consortium please see the `HECBioSim Website <http://www.hecbiosim.ac.uk/>`__.
diff --git a/software/projects/ibm-collaboration.rst b/software/projects/ibm-collaboration.rst
new file mode 100644
index 0000000..ecc721a
--- /dev/null
+++ b/software/projects/ibm-collaboration.rst
@@ -0,0 +1,24 @@
+.. _software-projects-ibm-collaboration:
+
+IBM Collaboration
+=================
+
+On Bede, the ``ibm-collaboration`` project provides several software packages which were produced in collaboration with the system vendor `IBM <https://www.ibm.com/>`__.
+
+* :ref:`Cryo-EM <software-environments-cryoem>` - a collection of software packages for life sciences including:
+  
+  * `RELION <https://www3.mrc-lmb.cam.ac.uk/relion/index.php?title=Main_Page>`__
+  * `CTFfind4 <https://grigoriefflab.umassmed.edu/ctffind4>`__
+  * `MotionCor2 <https://emcore.ucsf.edu/ucsf-software>`__
+  * `crYOLO <https://cryolo.readthedocs.io/en/stable/>`__
+  * `ResMap <http://resmap.sourceforge.net/>`__
+
+* :ref:`EMAN2 <software-applications-eman2>` - a scientific image processing suite with a primary focus on processing data from transmission electron microscopes.
+
+For instructions on how to use these projects please see the :ref:`Cryo-EM <software-environments-cryoem>` and :ref:`EMAN2 <software-applications-eman2>` pages.
+
+.. warning::
+
+    The ``ibm-collaboration`` module does currently provide valid Environment Modules to load the included software packages. This will be addressed in the future.
+    
+    Instead, follow the instructions listed for the included projects to load the software packages via conda.
\ No newline at end of file
diff --git a/software/projects/index.rst b/software/projects/index.rst
new file mode 100644
index 0000000..1175232
--- /dev/null
+++ b/software/projects/index.rst
@@ -0,0 +1,14 @@
+.. _software-projects:
+
+Projects
+========
+
+These pages list several ``projects`` on Bede which provide software packages that are not installed by the Bede system administrators.
+
+If you notice any omissions, errors or have any suggested changes to the documentation please create an `Issue <https://github.com/N8-CIR-Bede/documentation/issues>`__ or open a `Pull Request <https://github.com/N8-CIR-Bede/documentation/pulls>`__ on GitHub. 
+
+.. toctree::
+    :maxdepth: 3
+
+    hecbiosim
+    ibm-collaboration
diff --git a/software/resnet50/bede-README-sbatch.rst b/software/resnet50/bede-README-sbatch.rst
deleted file mode 100644
index be24377..0000000
--- a/software/resnet50/bede-README-sbatch.rst
+++ /dev/null
@@ -1,134 +0,0 @@
-************************************************************
-Watson Machine Learning Community Edition resnet50 benchmark
-************************************************************
-
-
-This Bede specific README file is based upon options laid out in the README.MD file in the WMLCE
-resnet50 benchmark directory. The necessary data from ImageNet has been downloaded and processed.
-It is stored in /nobackup/datasets/resnet50/TFRecords and is universally readable.
-
-NOTE: As written, the associated sbatch script must be run in a directory that is writable
-by the user. It creates a directory with the default name run_results into which it will write
-the results of the computation. The results data will use up to 1.2GB of space. The run
-directory must also be accessible by the compute nodes, so using /tmp on a login node is not
-suitable.
-
-The main WMLCE README.MD file suggests the following parameters are appropriate for a 4 node
-(possibly 16 GPU) run:
-
-
-::
-
- # Run a training job
- ddlrun -H host1,host2,host3,host4 python benchmarks/tensorflow-benchmarks/resnet50/main.py \
- --mode=train_and_evaluate --iter_unit=epoch --num_iter=50 --batch_size=256 --warmup_steps=100 \
- --use_cosine_lr --label_smoothing 0.1 --lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875 \
- --weight_decay=3.0517578125e-05   --data_dir=/data/imagenetTF/ --results_dir=run_results \
- --use_xla --precision=fp16  --loss_scale=1024 --use_static_loss_scaling
-
-ddlrun by itself is not integrated with Slurm and will not run directly on Bede. A wrapper-script
-called bede-ddlrun is available and that is what is used in the following.
-
-It is easy to define a single GPU run based on the above set of parameters (basically
-remove the ddlrun command at the front and specify the correct paths). The associated run
-takes about 16 hours to complete.
-
-The related sbatch script ( :download:`sbatch_resnet50base.sh <sbatch_resnet50base.sh>`) is configured to use 4 GPUs on one node.
-Changing the script to use 4 nodes, 16 GPUs, requires changing one line.
-
-
-The sbatch script specifies:
-
-::
-
- ...
- #SBATCH -p gpu
- #SBATCH --gres=gpu:4
- #SBATCH -N1
- ...
-
- module load slurm/dflt
- export PYTHON_HOME=/opt/software/apps/anaconda3/
- source $PYTHON_HOME/bin/activate wmlce_env
-
- export OMP_NUM_THREADS=1   # Disable multithreading
-
- bede-ddlrun python $PYTHON_HOME/envs/wmlce_env/tensorflow-benchmarks/resnet50/main.py \
- --mode=train_and_evaluate --iter_unit=epoch --num_iter=50 --batch_size=256 \
- --warmup_steps=100 --use_cosine_lr --label_smoothing 0.1 --lr_init=0.256 \
- --lr_warmup_epochs=8 --momentum=0.875 --weight_decay=3.0517578125e-05  \
- --data_dir=/nobackup/datasets/resnet50/TFRecords/ --results_dir=run_results \
- --use_xla --precision=fp16  --loss_scale=1024 --use_static_loss_scaling
-
-
-
-The resulting job should run for about 4 hours and will keep all 4 GPUs at nearly
-100% utilisation.
-
-The first few lines of output should look similar to:
-::
-
- [WARN DDL-2-17] Not performing connection tests. Cannot find 'mpitool' executabl
- e. This could be because you are using a version of mpi that does not ship with
- mpitool.
- Please see /tmp/DDLRUN/DDLRUN.j9SmSKzaKGEL/ddlrun.log for detailed log.
- + /opt/software/apps/anaconda3/envs/wmlce_env/bin/mpirun -x PATH -x LD_LIBRARY_P
- ATH -disable_gdr -gpu -mca plm_rsh_num_concurrent 1 --rankfile /tmp/DDLRUN/DDLRU
- N.j9SmSKzaKGEL/RANKFILE -n 4 -x DDL_HOST_PORT=2200 -x "DDL_HOST_LIST=gpu025.bede
- .dur.ac.uk:0,1,2,3" -x "DDL_OPTIONS=-mode p:4x1x1x1 " bash -c 'source /opt/softw
- are/apps/anaconda3/etc/profile.d/conda.sh && conda activate /opt/software/apps/a
- naconda3/envs/wmlce_env > /dev/null 2>&1 && python /opt/software/apps/anaconda3/
- envs/wmlce_env/tensorflow-benchmarks/resnet50/main.py --mode=train_and_evaluate
- --iter_unit=epoch --num_iter=50 --batch_size=256 --warmup_steps=100 --use_cosine
- _lr --label_smoothing 0.1 --lr_init=0.256 --lr_warmup_epochs=8 --momentum=0.875
- --weight_decay=3.0517578125e-05 --data_dir=/nobackup/datasets/resnet50/TFRecords
- / --results_dir=run_results --use_xla --precision=fp16 --loss_scale=1024 --use_s
- tatic_loss_scaling'
- 2020-11-17 15:39:49.410620: I tensorflow/stream_executor/platform/default/dso_lo
- ader.cc:44] Successfully opened dynamic library libcudart.so.10.2
-
-There are a number of configuration / compiler type messages and then you should
-start to see messages like:
-
-::
-
- :::NVLOGv0.2.3 resnet 1605627653.398838758 (training_hooks.py:100) iteration: 0
- :::NVLOGv0.2.3 resnet 1605627653.400741577 (training_hooks.py:101) imgs_per_sec:
- 37.5667719118656
- :::NVLOGv0.2.3 resnet 1605627653.402500391 (training_hooks.py:102) cross_entropy
- : 9.02121639251709
- :::NVLOGv0.2.3 resnet 1605627653.404244661 (training_hooks.py:103) l2_loss: 0.74
- 98071789741516
- :::NVLOGv0.2.3 resnet 1605627653.405992270 (training_hooks.py:104) total_loss: 9
- .771023750305176
- :::NVLOGv0.2.3 resnet 1605627653.407735109 (training_hooks.py:105) learning_rate
- : 0.0
- :::NVLOGv0.2.3 resnet 1605627671.803228855 (training_hooks.py:100) iteration: 10
- :::NVLOGv0.2.3 resnet 1605627671.805866718 (training_hooks.py:101) imgs_per_sec:
-  4526.812526349517
- :::NVLOGv0.2.3 resnet 1605627671.807682991 (training_hooks.py:102) cross_entropy
- : 8.204719543457031
-
-The most relevant line is the value after imgs_per_sec:
-
-Once things start running, you should see something like 4500 images per second as
-the rate on 4 GPUs.
-
-After about 4 hours, the training has converged and you should see the last few lines like:
-
-::
-
- transpose_before=resnet50_v1.5/input_reshape/transpose pad=resnet50_v1.5/conv2d/Pad transpose_after=resnet50_v1.5/conv2d/conv2d/Conv2D-0-TransposeNCHWToNHWC-LayoutOptimizer
- :::NVLOGv0.2.3 resnet 1605641981.781752110 (runner.py:610) Top-1 Accuracy: 75.863
- :::NVLOGv0.2.3 resnet 1605641981.782602310 (runner.py:611) Top-5 Accuracy: 92.823
- :::NVLOGv0.2.3 resnet 1605641981.783382177 (runner.py:630) Ending Model Evaluation ...
-
-It is easy to modify the script to use 4 nodes and hence 16 GPUs. The run time will
-be a just over an hour and during the 16 GPU run, about 18000 images per second will
-be processed.
-
-Unfortunately, the basic parameters used with the resnet50 run do not allow this
-job to scale much beyond 16 GPUs. Indeed, there is no speedup with this configuration
-using 32 GPUs. Improving scalability is left as an exercise for the user.
- 
- 
diff --git a/software/resnet50/sbatch_resnet50base.sh b/software/resnet50/sbatch_resnet50base.sh
deleted file mode 100644
index 65f5d23..0000000
--- a/software/resnet50/sbatch_resnet50base.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash -l 
-#SBATCH -A bdXXXYY
-#SBATCH -p gpu
-#SBATCH --gres=gpu:4
-#SBATCH -N1 
-#SBATCH -o multix1.o%j
-#SBATCH -t 4:20:00
-#
-# Author: C. Addison 
-# Initial version: 2020-11-19
-#
-# Please read the file bede-README-batch.txt for details on this
-# script.
-#
-echo =========================================================   
-echo SLURM job: submitted  date = `date`
-date_start=`date +%s`
-
-echo Nodes involved:
-echo $SLURM_NODELIST
-echo =========================================================   
-echo Job output begins                                           
-echo ----------------- 
-echo
-module load slurm/dflt
-export PYTHON_HOME=/opt/software/apps/anaconda3/
-source $PYTHON_HOME/bin/activate wmlce_env
-
-export OMP_NUM_THREADS=1   # Disable multithreading
-
-
-bede-ddlrun python $PYTHON_HOME/envs/wmlce_env/tensorflow-benchmarks/resnet50/main.py \
---mode=train_and_evaluate --iter_unit=epoch --num_iter=50 --batch_size=256 \
---warmup_steps=100 --use_cosine_lr --label_smoothing 0.1 --lr_init=0.256 \
---lr_warmup_epochs=8 --momentum=0.875 --weight_decay=3.0517578125e-05  \
---data_dir=/nobackup/datasets/resnet50/TFRecords/ --results_dir=run_results \
---use_xla --precision=fp16  --loss_scale=1024 --use_static_loss_scaling
-
-
-
-echo   
-echo ---------------                                           
-echo Job output ends                                           
-date_end=`date +%s`
-seconds=$((date_end-date_start))
-minutes=$((seconds/60))
-seconds=$((seconds-60*minutes))
-hours=$((minutes/60))
-minutes=$((minutes-60*hours))
-echo =========================================================   
-echo SLURM job: finished   date = `date`   
-echo Total run time : $hours Hours $minutes Minutes $seconds Seconds
-echo =========================================================   
-
diff --git a/software/tools/cmake.rst b/software/tools/cmake.rst
new file mode 100644
index 0000000..8b17a0a
--- /dev/null
+++ b/software/tools/cmake.rst
@@ -0,0 +1,20 @@
+.. _software-tools-cmake:
+
+CMake
+=====
+
+`CMake <https://cmake.org/>`__ is an open-source, cross-platform family of tools designed to build, test and package software.
+CMake is used to control the software compilation process using simple platform and compiler independent configuration files, and generate native makefiles and workspaces that can be used in the compiler environment of your choice.
+The suite of CMake tools were created by `Kitware <https://www.kitware.com/>`__ in response to the need for a powerful, cross-platform build environment for open-source projects such as ITK and VTK.
+
+CMake is part of Kitware’s collection of commercially supported `open-source platforms <https://www.kitware.com/platforms/>`__ for software development.
+
+
+.. code-block:: bash
+
+    module load cmake
+    module load cmake/3.18.4
+
+Once loaded, the ``cmake``, ``ccmake``, ``cpack`` and ``ctest`` binaries are available for use, to configure, build and test software which uses CMake as the build system. 
+
+For more information, see the `online documentation <https://cmake.org/cmake/help/v3.18/>`__.
\ No newline at end of file
diff --git a/software/tools/index.rst b/software/tools/index.rst
new file mode 100644
index 0000000..04bc007
--- /dev/null
+++ b/software/tools/index.rst
@@ -0,0 +1,14 @@
+.. _software-tools:
+
+Tools
+================
+
+These pages list developer tools available on bessemer and / or instructions on how to install and use tools which are not centrally installed.
+
+If you notice any omissions, errors or have any suggested changes to the documentation please create an `Issue <https://github.com/N8-CIR-Bede/documentation/issues>`__ or open a `Pull Request <https://github.com/N8-CIR-Bede/documentation/pulls>`__ on GitHub. 
+
+.. toctree::
+    :maxdepth: 1
+    :glob:
+
+    ./*
\ No newline at end of file
diff --git a/software/tools/make.rst b/software/tools/make.rst
new file mode 100644
index 0000000..112609d
--- /dev/null
+++ b/software/tools/make.rst
@@ -0,0 +1,19 @@
+.. _software-tools-make:
+
+Make
+====
+
+`GNU Make <https://www.gnu.org/software/make/>`__ is a tool which controls the generation of executables and other non-source files of a program from the program's source files.
+
+Make gets its knowledge of how to build your program from a file called the makefile, which lists each of the non-source files and how to compute it from other files. When you write a program, you should write a makefile for it, so that it is possible to use Make to build and install the program.
+
+
+On Bede, ``make 3.82`` is provided by default (``4.2`` under RHEL8). 
+A more recent version of ``make``, is provided by the ``make`` family of modules. 
+
+.. code-block:: bash
+
+    module load make
+    module load make/4.3
+
+For more information on the usage of ``make``, see the `online documentation <https://www.gnu.org/software/make/manual/>`__ or run ``man make`` after loading the module.
\ No newline at end of file
diff --git a/software/tools/nsight-compute.rst b/software/tools/nsight-compute.rst
new file mode 100644
index 0000000..ba18056
--- /dev/null
+++ b/software/tools/nsight-compute.rst
@@ -0,0 +1,56 @@
+.. _software-tools-nsight-compute:
+
+Nsight Compute
+==============
+
+`Nsight Compute <https://developer.nvidia.com/nsight-compute>`__ is a kernel profiler for CUDA applications, which can also be used for API debugging.
+It supports Volta architecture GPUs and newer (SM 70+).
+
+On Bede, Nsight Compute is provided by a number of modules, with differing versions of ``ncu``. 
+You should use a versions of ``ncu`` that is at least as new as the CUDA toolkit used to compile your application (if appropriate).
+
+.. code-block:: bash
+
+   module load nsight-compute/2020.2.1 # provides nsys 2020.2.1
+
+   # RHEL 7 only
+   module load nvhpc/20.5  # provides nsys 2020.1.0
+
+   # RHEL 8 only
+   module load cuda/11.5.1 # provides nsys 2021.3.1
+   module load cuda/11.4.1 # provides nsys 2021.2.1
+   module load cuda/11.3.1 # provides nsys 2021.1.1
+   module load cuda/11.2.2 # provides nsys 2020.3.1
+   module load nvhpc/21.5  # provides nsys 2021.1.0
+
+
+Consider compiling your CUDA application using ``nvcc`` with ``-lineinfo`` or ``--generate-line-info`` to generate line-level profile information.
+
+A common use-case for using Nsight Compute on HPC systems is to capture all available profiling metrics for a run of a target application, storing the information to a file on disk. This file can then be interrogated on a local machine using the Nsight Compute GUI.
+
+For example, the following command captures the full set of metrics for an application using using the command line tool `ncu`.
+
+.. code-block:: bash
+
+   ncu -o profile --set full ./myapplication <arguments>
+
+Capturing the full set of metrics can lead to very long run times, as each kernel is replayed many times.
+Rather than capturing the full set of metrics, a subset may be captured using the ``--set``, ``--section`` and ``--metrics`` flags as described in the `Nsight Comptue Profile Command Line Options table <https://docs.nvidia.com/nsight-compute/NsightComputeCli/index.html#command-line-options-profile>`_.
+
+The scope of the section being profiled can also be reduced using `NVTX Filtering <https://docs.nvidia.com/nsight-compute/NsightComputeCli/index.html#nvtx-filtering>`_; or by targetting specific kernels using ``--kernel-id``, ``--kernel-regex`` and/or ``--launch-skip`` see the `CLI docs for more information <https://docs.nvidia.com/nsight-compute/NsightComputeCli/index.html#command-line-options-profile>`_).
+
+
+Once the ``.ncu-rep`` file has been downloaded locally, it can be imported into local Nsight CUDA GUI ``ncu-ui`` via ``ncu-ui profile.ncu-rep`` **or**  ``File > Open > profile.ncu-rep`` in the GUI.
+
+.. note::
+   Older versions of Nsight Compute (CUDA < v11.0.194) provided ``nv-nsight-cu-cli`` ``nv-nsight-cu`` rather than ``ncu`` and ``ncu-ui`` respectively.
+
+   The generated report file used the ``.nsight-cuprof-report`` extension rather than ``.ncu-rep``.
+
+More Information
+^^^^^^^^^^^^^^^^
+
+* `Nsight Compute <https://docs.nvidia.com/nsight-compute/>`_
+* `OLCF: Nsight Compute Tutorial <https://vimeo.com/398929189>`_
+
+  * Use the following `Nsight report files <https://drive.google.com/open?id=133a90SIupysHfbO3mlyfXfaEivCyV1EP>`_ to follow the tutorial.
\ No newline at end of file
diff --git a/software/tools/nsight-systems.rst b/software/tools/nsight-systems.rst
new file mode 100644
index 0000000..dc1c2f5
--- /dev/null
+++ b/software/tools/nsight-systems.rst
@@ -0,0 +1,54 @@
+.. _software-tools-nsight-systems:
+
+Nsight Systems
+==============
+
+`Nsight Systems <https://developer.nvidia.com/nsight-systems>`__ is a system-wide performance analysis tool designed to visualize an application’s algorithms and identify the largest opportunities to optimize.
+It supports Pascal (SM 60) and newer GPUs.
+
+A common use-case for Nsight Systems is to generate application timelines via the command line, which can later be visualised on a local computer using the GUI component.
+
+On Bede, Nsight Systems is provided by a number of modules, with differing versions of ``nsys``. 
+You should use a versions of ``nsys`` that is at least as new as the CUDA toolkit used to compile your application (if appropriate).
+
+.. code-block:: bash
+
+   module load nsight-systems/2020.3.1 # provides nsys 2020.3.1
+
+   # RHEL 7 only
+   module load nvhpc/20.5  # provides nsys 2020.3.1
+
+   # RHEL 8 only
+   module load cuda/11.5.1 # provides nsys 2021.3.3
+   module load cuda/11.4.1 # provides nsys 2021.2.4
+   module load cuda/11.3.1 # provides nsys 2021.1.3
+   module load cuda/11.2.2 # provides nsys 2020.4.3
+   module load nvhpc/21.5  # provides nsys 2021.2.1
+
+To generate an application timeline with Nsight Systems CLI (``nsys``):
+
+.. code-block:: bash
+
+   nsys profile -o timeline ./myapplication <arguments>
+
+Nsight systems can trace mulitple APIs, such as CUDA and OpenACC. 
+The ``--trace`` argument to specify which APIs should be traced.
+See the `nsys profiling command switch options <https://docs.nvidia.com/nsight-systems/profiling/index.html#cli-profile-command-switch-options>`__ for further information.
+
+.. code-block:: bash
+
+   nsys profile -o timeline --trace cuda,nvtx,osrt,openacc ./myapplication <arguments>
+
+.. note::
+   On Power9 systems such as Bede the ``--trace`` option ``osrt`` can lead to ``SIGILL`` errors with some versions of ``nsys``. As this is part of the default set default, consider passing ``--trace cuda,nvtx`` instead.
+
+Once this file has been downloaded to your local machine, it can be opened in ``nsys-ui``/``nsight-sys`` via ``File > Open > timeline.qdrep``
+
+
+More Information
+^^^^^^^^^^^^^^^^
+
+* `Nsight Systems <https://docs.nvidia.com/nsight-systems/>`_
+* `OLCF: Nsight Systems Tutorial <https://vimeo.com/398838139>`_
+  
+  * Use the following `Nsight report files <https://drive.google.com/open?id=133a90SIupysHfbO3mlyfXfaEivCyV1EP>`_ to follow the tutorial.
\ No newline at end of file
diff --git a/software/tools/nvidia-smi.rst b/software/tools/nvidia-smi.rst
new file mode 100644
index 0000000..5b1580b
--- /dev/null
+++ b/software/tools/nvidia-smi.rst
@@ -0,0 +1,56 @@
+.. _software-tools-nvidia-smi:
+
+NVIDIA-SMI
+===========
+
+``nvidia-smi`` is the NVIDIA System Management Interface.
+It is a command line tool which provides monitoring information for NVIDIA GPUs.
+
+It is available for use by default in interactive and batch sessions on Bede, but operations which would require root will not be usable by regular Bede users.
+
+Most Bede users will not need to interact with ``nvidia-smi``, however, it can be used to gather information about GPUs in a system and how they are connected to one another, which may be useful when reporting any performance results.
+
+Using ``nvidia-smi``
+--------------------
+
+Running the ``nvidia-smi`` tool without any arguments will present summary information about the available GPUs on the current node that are accessible by the user, and information about the GPU driver in use. 
+
+.. code-block:: bash
+
+   nvidia-smi
+
+Detailed information per GPU can be queried using the ``-q`` and ``-i`` options:
+
+.. code-block:: bash
+
+    # View detailed information about device 0
+    nvidia-smi -i 0 -q
+
+V100 GPUs within ``gpu`` nodes in Bede are connected to one another and the CPU via NVLink connections, while T4 GPUs in ``infer`` nodes are not.
+How GPUs within a node are connected to one another can be queried via the ``topo`` subcommand. 
+This may be useful when using multi-GPU applications. 
+
+.. code-block:: bash
+
+   # View the GPUDirect communication matrix via -m / --matrix
+   nvidia-smi topo -m
+   # View how GPUs 0 and 1 are connected, in a session with >= 2 GPUs
+   nvidia-smi topo  -i 0,1 -p
+
+The ``nvlink`` subcommand can be used to query the status of each NVlink connection:
+
+.. code-block:: bash
+
+   # View the status of each nvlink for device 0
+   nvidia-smi nvlink -i 0 -s
+   # View how GPUs 0 and 1 are connected, in a session with >= 2 GPUs
+   nvidia-smi topo  -i 0,1 -p
+
+
+
+Full usage documentation can be found via the ``--help`` option:
+
+
+.. code-block:: bash
+
+   nvidia-smi --help
\ No newline at end of file
diff --git a/software/tools/singularity.rst b/software/tools/singularity.rst
new file mode 100644
index 0000000..6555d7f
--- /dev/null
+++ b/software/tools/singularity.rst
@@ -0,0 +1,23 @@
+Apptainer /Singularity
+----------------------
+
+`Apptainer <https://apptainer.org/>`__ (formerly `Singularity <https://sylabs.io/singularity/>`__) is a container platform similar to `Docker <https://www.docker.com/>`__. 
+Singularity is the most widely used container system for HPC.
+It allows you to create and run containers that package up pieces of software in a way that is portable and reproducible.
+
+Container platforms allow users to create and use container images, which are self-contained software stacks.
+
+.. note::
+   As Bede is a Power 9 Architecture (``ppc64le``) machine, containers created on more common ``x86_64`` machines may not be compatible. 
+
+
+Under RHEL 8, Singularity is provided in the default environment, and can be used without loading any modules.
+
+Under RHEL 7, singularity is provided by a module:
+
+.. code-block:: bash
+
+    module load singularity
+    module load singularity/3.6.4
+
+For more information on how to use singularity, please see the `Singularity Documentation <https://apptainer.org/docs-legacy/>`__.
diff --git a/training/index.rst b/training/index.rst
index 10293ea..a5c253d 100644
--- a/training/index.rst
+++ b/training/index.rst
@@ -1,7 +1,7 @@
 .. _training:
 
-Useful Training Material
-========================
+Training Material
+=================
 
 Information on support available for Bede Users can be found `on this website <https://n8cir.org.uk/supporting-research/facilities/bede/rse-support-bede/>`_ .
 
diff --git a/usage/index.rst b/usage/index.rst
index d9a2ad5..7dcb03c 100644
--- a/usage/index.rst
+++ b/usage/index.rst
@@ -1,3 +1,5 @@
+.. _using-bede:
+
 Using Bede
 ==========
 
@@ -11,14 +13,13 @@ Registering
 
 Access to the machine is based around projects:
 
--  For information on how to register a new project, please see https://n8cir.org.uk/supporting-research/facilities/bede/docs/bede_registrations/
+-  For information on how to register a new project, please see `the N8CIR website <https://n8cir.org.uk/supporting-research/facilities/bede/docs/bede_registrations/>`__.
 
 -  To create an account to use the system:
 
    -  Identify an existing project, or register a new one.
-   -  Create an EPCC SAFE account and login to the SAFE system at:
-      https://safe.epcc.ed.ac.uk/
-   -  Once there, select “Project->Request access” from the web
+   -  Create an EPCC SAFE account and login to the SAFE system at `https://safe.epcc.ed.ac.uk/ <safe.epcc.ed.ac.uk>`__
+   -  Once there, select "Project->Request access" from the web
       interface and then register against your project
 
 Login
@@ -37,7 +38,9 @@ acceptable.
 
 Most of the computational power of the system is accessed through the
 batch scheduler, and so demanding applications should be submitted to it
-(see “Running Jobs”).
+(see "Running Jobs").
+
+.. _usage-acknowledging-bede:
 
 Acknowledging Bede
 ------------------
@@ -47,15 +50,17 @@ wherever the work is presented.
 
 We provide the following acknowledgement text, and strongly encourage its use:
 
-*"This work made use of the facilities of the N8 Centre of Excellence in
-Computationally Intensive Research (N8 CIR) provided and funded by the N8
-research partnership and EPSRC (Grant No. EP/T022167/1). The Centre is
-co-ordinated by the Universities of Durham, Manchester and York."*
+   "This work made use of the facilities of the N8 Centre of Excellence in
+   Computationally Intensive Research (N8 CIR) provided and funded by the N8
+   research partnership and EPSRC (Grant No. EP/T022167/1). The Centre is
+   co-ordinated by the Universities of Durham, Manchester and York."
 
 Acknowledgement of Bede provides data that can be used to assess the facility's
 success and influences future funding decisions, so please ensure that you are
 acknowledging where appropriate.
 
+.. _usage-file-storage:
+
 File Storage
 ------------
 
@@ -66,7 +71,7 @@ Each project has access to the following shared storage:
    -  Intended for project files to be backed up (note: backups not
       currently in place)
    -  Modest performance
-   -  A default quota of 20GB
+   -  A default quota of ``20GB``
 
 -  Project Lustre directory (``/nobackup/projects/<project>``)
 
@@ -83,7 +88,7 @@ In addition, each user has:
 
    -  Intended for per-user configuration files.
    -  Modest performance
-   -  A default quota of 20GB
+   -  A default quota of ``20GB``
 
 Please note that, as access to Bede is driven by project use, no
 personal data should be stored on the system.
@@ -126,7 +131,7 @@ Part of, or an entire node
 Example job script for programs written to take advantage of a GPU or
 multiple GPUs on a single computer:
 
-::
+.. code-block:: bash
 
    #!/bin/bash
 
@@ -156,7 +161,7 @@ Multiple nodes (MPI)
 Example job script for programs using MPI to take advantage of multiple
 CPUs/GPUs across one or more machines:
 
-::
+.. code-block:: bash
 
    #!/bin/bash
 
@@ -196,7 +201,7 @@ process) or ``none`` (set ``OMP_NUM_THREADS=1``)
 
 Examples:
 
-::
+.. code-block:: bash
 
    # - One MPI rank per node:
    bede-mpirun --bede-par 1ppn <mpirun_options> <program>
@@ -216,7 +221,7 @@ Multiple nodes (IBM PowerAI DDL)
 IBM PowerAI DDL (Distributed Deep Learning) is a method of using the
 GPUs in more than one node to perform calculations. Example job script:
 
-::
+.. code-block:: bash
 
    #!/bin/bash
 
@@ -242,6 +247,10 @@ GPUs in more than one node to perform calculations. Example job script:
 
    echo "end of job"
 
+.. warning::
+
+   IBM PowerAI DDL is only supported on RHEL 7
+
 Maximum Job Runtime
 ~~~~~~~~~~~~~~~~~~~
 
@@ -255,14 +264,14 @@ requested:
   * - Partition Name
     - Default Job Time
     - Maximum Job Time
-  * - infer
-    - 01:00:00
-    - 2-00:00:00
-  * - gpu
-    - 01:00:00
-    - 2-00:00:00
-
-Where, for example, `2-00:00:00` means 'two days, zero hours, zero minutes,
+  * - ``infer``
+    - ``01:00:00``
+    - ``2-00:00:00``
+  * - ``gpu``
+    - ``01:00:00``
+    - ``2-00:00:00``
+
+Where, for example, ``2-00:00:00`` means 'two days, zero hours, zero minutes,
 and zero seconds'. These job time limits affect what will and won't be accepted
-in the `--time` field of your job script: `--time` values above the partition
+in the ``--time`` field of your job script: ``--time`` values above the partition
 maximum will result in your job submission being rejected.
diff --git a/bug/index.rst b/user-group/index.rst
similarity index 100%
rename from bug/index.rst
rename to user-group/index.rst