diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 00000000000..9e2728c0935
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,7 @@
+# .readthedocs.yml
+
+build:
+  image: latest
+
+python:
+  version: 3.7
diff --git a/CMakeLists.txt b/CMakeLists.txt
index eee3e65bf73..4dfb77a0e19 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.8)
+cmake_minimum_required(VERSION 3.12)
 
 project(LBANN CXX)
 
@@ -48,8 +48,8 @@ endif ()
 #
 
 set(LBANN_VERSION_MAJOR 0)
-set(LBANN_VERSION_MINOR 98)
-set(LBANN_VERSION_PATCH 1)
+set(LBANN_VERSION_MINOR 99)
+set(LBANN_VERSION_PATCH 0)
 
 set(LBANN_VERSION "${LBANN_VERSION_MAJOR}.${LBANN_VERSION_MINOR}.${LBANN_VERSION_PATCH}")
 
@@ -100,7 +100,7 @@ option(LBANN_WITH_ALUMINUM "Enable Aluminum all-reduce library" OFF)
 
 option(LBANN_WITH_CNPY "Include cnpy" ON)
 
-option(LBANN_WITH_CONDUIT "Enable Conduit library" OFF)
+option(LBANN_WITH_CONDUIT "Enable Conduit library" ON)
 
 option(LBANN_WITH_CUDNN "Include Nvidia cuDNN" ON)
 
@@ -110,12 +110,17 @@ option(LBANN_WITH_HWLOC
 option(LBANN_WITH_NVPROF
   "Enable NVTX-based instrumentation for nvprof" OFF)
 
-option(LBANN_WITH_TBINF "Include Tensorboard interface" ON)
+option(LBANN_WITH_PYTHON
+  "Install Python frontend and enable embedded Python" ON)
 
+option(LBANN_WITH_TBINF "Include Tensorboard interface" ON)
 
 option(LBANN_WITH_VTUNE
   "Link the Intel VTune profiling library" OFF)
 
+option(LBANN_WITH_UNIT_TESTING
+  "Enable the unit testing framework (requires Catch2)" OFF)
+
 # Enable parallel random matrix generation, if possible
 option(LBANN_DETERMINISTIC
   "Use deterministic algorithms as much as possible." OFF)
@@ -167,12 +172,12 @@ set(LBANN_HAS_CEREAL ${CEREAL_FOUND})
 # The imported target is just called "cereal". Super.
 
 # Setup the linear algebra library
-find_package(Hydrogen 1.1.0 NO_MODULE QUIET
+find_package(Hydrogen 1.2.0 NO_MODULE QUIET
   HINTS ${Hydrogen_DIR} ${HYDROGEN_DIR} $ENV{Hydrogen_DIR} $ENV{HYDROGEN_DIR}
   PATH_SUFFIXES lib/cmake/hydrogen
   NO_DEFAULT_PATH)
 if (NOT Hydrogen_FOUND)
-  find_package(Hydrogen 1.1.0 NO_MODULE QUIET REQUIRED)
+  find_package(Hydrogen 1.2.0 NO_MODULE QUIET REQUIRED)
 endif ()
 message(STATUS "Found Hydrogen: ${Hydrogen_DIR}")
 set(LBANN_HAS_HYDROGEN ${Hydrogen_FOUND})
@@ -209,13 +214,13 @@ endif ()
 if (LBANN_WITH_ALUMINUM)
   # Aluminum may have already been found by Hydrogen
   if (NOT Aluminum_FOUND)
-    find_package(Aluminum NO_MODULE QUIET
+    find_package(Aluminum 0.2.0 NO_MODULE QUIET
       HINTS ${Aluminum_DIR} ${ALUMINUM_DIR} ${AL_DIR}
       $ENV{Aluminum_DIR} $ENV{ALUMINUM_DIR} $ENV{AL_DIR}
       PATH_SUFFIXES lib64/cmake/aluminum lib/cmake/aluminum
       NO_DEFAULT_PATH)
     if (NOT Aluminum_FOUND)
-      find_package(Aluminum NO_MODULE QUIET)
+      find_package(Aluminum 0.2.0 NO_MODULE QUIET)
     endif ()
   endif ()
   set(LBANN_HAS_ALUMINUM ${Aluminum_FOUND})
@@ -287,6 +292,29 @@ if (LBANN_WITH_TBINF)
   add_subdirectory(external/TBinf)
 endif ()
 
+# Find Python
+# Note: This uses the Python module in cmake/modules, not the module
+# that comes included with CMake. See the file for a discussion of the
+# differences.
+if (LBANN_WITH_PYTHON)
+  find_package(Python REQUIRED)
+  set(LBANN_HAS_PYTHON "${Python_FOUND}")
+  if (NOT Python_VERSION_MAJOR EQUAL 3)
+    set(LBANN_HAS_PYTHON FALSE)
+    message(FATAL_ERROR "Python 2 is not supported.")
+  endif ()
+
+  # Setup the installation stuff
+  set(PYTHON_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}"
+    CACHE PATH "The prefix for the python installation")
+
+  set(CMAKE_INSTALL_PYTHONDIR
+    "lib/python${Python_VERSION_MAJOR}.${Python_VERSION_MINOR}/site-packages"
+    CACHE PATH
+    "Relative path from PYTHON_INSTALL_PREFIX to the python package install")
+
+endif (LBANN_WITH_PYTHON)
+
 if (LBANN_WITH_VTUNE)
   find_package(VTune MODULE)
 
@@ -305,7 +333,7 @@ if (LBANN_WITH_VTUNE)
   endif (VTune_FOUND)
 endif (LBANN_WITH_VTUNE)
 
-if (LBANN_WITH_NVPROF)
+if (LBANN_WITH_CUDA AND LBANN_WITH_NVPROF)
   set(LBANN_NVPROF TRUE)
 endif ()
 
@@ -336,15 +364,15 @@ if (LBANN_WITH_CONDUIT)
     message(STATUS "Found HDF5: ${HDF5_DIR}")
   endif ()
 
-  find_package(CONDUIT CONFIG QUIET
-    HINTS ${CONDUIT_DIR} $ENV{CONDUIT_DIR}
+  find_package(Conduit CONFIG QUIET
+    HINTS ${Conduit_DIR} $ENV{Conduit_DIR} ${CONDUIT_DIR} $ENV{CONDUIT_DIR}
     PATH_SUFFIXES lib64/cmake lib/cmake
     NO_DEFAULT_PATH)
-  if (NOT CONDUIT_FOUND)
-    find_package(CONDUIT CONFIG QUIET REQUIRED
+  if (NOT Conduit_FOUND)
+    find_package(Conduit CONFIG QUIET REQUIRED
       PATH_SUFFIXES lib64/cmake lib/cmake)
   endif ()
-  message(STATUS "Found CONDUIT: ${CONDUIT_DIR}")
+  message(STATUS "Found CONDUIT: ${Conduit_DIR}")
 
   # Ugh. I don't like that this requires intimate knowledge of
   # specific targets that CONDUIT exports. It should support
@@ -402,9 +430,28 @@ if (LBANN_WITH_CONDUIT)
     "${_conduit_interface_link_libs}")
 
   set(CONDUIT_LIBRARIES conduit::conduit)
-  set(LBANN_HAS_CONDUIT ${CONDUIT_FOUND})
+  set(LBANN_HAS_CONDUIT ${Conduit_FOUND})
 endif (LBANN_WITH_CONDUIT)
 
+if (LBANN_WITH_UNIT_TESTING)
+  find_package(Catch2 2.0.0 CONFIG QUIET
+    HINTS ${CATCH2_DIR} $ENV{CATCH2_DIR} ${CATCH_DIR} $ENV{CATCH_DIR}
+    PATH_SUFFIXES lib64/cmake/Catch2 lib/cmake/Catch2
+    NO_DEFAULT_PATH)
+  if (NOT Catch2_FOUND)
+    find_package(Catch2 2.0.0 CONFIG QUIET REQUIRED)
+  endif ()
+  message(STATUS "Found Catch2: ${Catch2_DIR}")
+
+  # Now that Catch2 has been found, start adding the unit tests
+  include(CTest)
+  include(Catch)
+  add_subdirectory(src/utils/unit_test)
+
+  # Add this one last
+  add_subdirectory(unit_test)
+endif (LBANN_WITH_UNIT_TESTING)
+
 # Handle the documentation
 add_subdirectory(docs)
 
@@ -430,6 +477,10 @@ target_include_directories(lbann PUBLIC
   $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/include>
   $<INSTALL_INTERFACE:${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}>)
 
+if (LBANN_HAS_PYTHON)
+  target_include_directories(lbann PUBLIC ${Python_INCLUDE_DIRS})
+endif ()
+
 # Use the IMPORTED targets when possible.
 target_link_libraries(lbann PUBLIC LbannProto)
 target_link_libraries(lbann PUBLIC cereal)
@@ -460,6 +511,10 @@ if (LBANN_HAS_VTUNE)
   target_link_libraries(lbann PUBLIC ${VTUNE_STATIC_LIB})
 endif ()
 
+if (LBANN_HAS_PYTHON)
+  target_link_libraries(lbann PUBLIC ${Python_LIBRARIES})
+endif ()
+
 if (TARGET LBANN_CXX_FLAGS_werror)
   target_link_libraries(lbann PUBLIC LBANN_CXX_FLAGS_werror)
 endif ()
@@ -516,8 +571,8 @@ export(EXPORT LBANNTargets NAMESPACE LBANN:: FILE LBANNTargets.cmake)
 
 # Write the configure file for the install tree
 set(INCLUDE_INSTALL_DIRS include)
-set(LIB_INSTALL_DIR lib)
-set(CMAKE_INSTALL_DIR lib/cmake/lbann)
+set(LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR})
+set(CMAKE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/lbann)
 set(EXTRA_CMAKE_MODULE_DIR)
 configure_package_config_file(cmake/configure_files/LBANNConfig.cmake.in
   "${CMAKE_BINARY_DIR}/LBANNConfig.cmake.install"
@@ -559,6 +614,64 @@ install(
   FILES "${PROJECT_BINARY_DIR}/lbann_config.hpp"
   DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
 
+# Install Python frontend
+# Note (tym): Python best practices are to put setup.py at the package
+# root and setuptools only accepts relative paths. However, we need to
+# insert a config file containing install-specific file paths and make
+# sure setup.py can pick it up. I see three approaches for the build
+# process:
+#   1) Inject the config file into a known location in the source
+#      directory so that setup.py can pick it up.
+#   2) Copy the Python source tree into the build directory and insert
+#      setup.py and the config file.
+#   3) Create setup.py and the config file in the build directory and
+#      pass the source directory as a relative path.
+# We go for option 3 since it's simple and lightweight, but it runs
+# counter to the intent of setuptools. If we learn about any nicer
+# approaches, we should use them.
+if (LBANN_HAS_PYTHON)
+
+  # Construct config file
+  # NOTE (trb): python_config.ini is installed by setup.py
+  set(_PYTHON_CONFIG_INI ${CMAKE_BINARY_DIR}/python_config.ini)
+  set(_LBANN_PB2_PY ${PYTHON_INSTALL_PREFIX}/${CMAKE_INSTALL_PYTHONDIR}/lbann_pb2.py)
+  set(_LBANN_EXE ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}/lbann)
+  configure_file(
+    "${CMAKE_SOURCE_DIR}/cmake/configure_files/python_config.ini.in"
+    "${_PYTHON_CONFIG_INI}"
+    @ONLY)
+
+  # Construct setup.py
+  set(_SETUP_PY ${CMAKE_BINARY_DIR}/setup.py)
+  set(_LBANN_PYTHON_DIR "${CMAKE_SOURCE_DIR}/python")
+  configure_file(
+    "${CMAKE_SOURCE_DIR}/cmake/configure_files/setup.py.in"
+    "${_SETUP_PY}"
+    @ONLY)
+
+  # Install Python package with setuptools
+  set(_PY_INSTALL_DIR "${PYTHON_INSTALL_PREFIX}/${CMAKE_INSTALL_PYTHONDIR}")
+  set(_SETUP_PY_ARGS
+    "${_SETUP_PY_ARGS} --root ${_PY_INSTALL_DIR} --install-lib . --install-data .")
+  install(CODE
+    "execute_process(COMMAND ${Python_EXECUTABLE} ${_SETUP_PY} install ${_SETUP_PY_ARGS})")
+
+  set(_PY_INSTALL_MSG
+    "
+\n**********************************************************************
+
+A Python package has been installed to ${_PY_INSTALL_DIR}. To use
+this package, be sure to add this directory to your PYTHONPATH, e.g.:
+
+  export PYTHONPATH=${_PY_INSTALL_DIR}:\\$\{PYTHONPATH\}
+
+**********************************************************************\n
+")
+  install(CODE
+    "execute_process(COMMAND ${CMAKE_COMMAND} -E echo \"${_PY_INSTALL_MSG}\")")
+
+endif (LBANN_HAS_PYTHON)
+
 # Install contributor list, license, readme
 install(
   FILES "${PROJECT_SOURCE_DIR}/CONTRIBUTORS"
@@ -583,8 +696,10 @@ macro(append_str_tf STRING_VAR)
     math(EXPR _num_spaces "${_max_length} - ${_var_length}")
     lbann_get_space_string(_spaces ${_num_spaces})
     if (${var})
+      set(${var} "TRUE")
       string(APPEND ${STRING_VAR} "  ${var}:" "${_spaces}" "TRUE\n")
     else ()
+      set(${var} "FALSE")
       string(APPEND ${STRING_VAR} "  ${var}:" "${_spaces}" "FALSE\n")
     endif ()
   endforeach()
@@ -632,10 +747,33 @@ append_str_tf(_str
   LBANN_HAS_DOXYGEN
   LBANN_HAS_LBANN_PROTO
   LBANN_HAS_ALUMINUM
-  LBANN_HAS_CONDUIT)
+  LBANN_HAS_CONDUIT
+  LBANN_HAS_PYTHON)
 string(APPEND _str
   "\n== End LBANN Configuration Summary ==\n")
 
 # Output to stdout
 execute_process(COMMAND ${CMAKE_COMMAND} -E echo "${_str}")
 set(_str)
+
+#
+# Write a basic modulefile
+#
+set(LBANN_MODULEFILE_NAME "lbann-${LBANN_VERSION}.lua"
+  CACHE STRING
+  "The name of the LBANN modulefile to install. Must end in .lua.")
+
+if (NOT (LBANN_MODULEFILE_NAME MATCHES ".+\.lua"))
+  message(WARNING
+    "LBANN_MODULEFILE_NAME must have extension \".lua\". Appending.")
+  set(LBANN_MODULEFILE_NAME "${LBANN_MODULEFILE_NAME}.lua"
+    CACHE STRING "" FORCE)
+endif ()
+
+configure_file(
+  "${CMAKE_SOURCE_DIR}/cmake/configure_files/lbann_module.lua.in"
+  "${CMAKE_BINARY_DIR}/lbann_module.lua.install"
+  @ONLY)
+install(FILES "${CMAKE_BINARY_DIR}/lbann_module.lua.install"
+  RENAME "${LBANN_MODULEFILE_NAME}"
+  DESTINATION "${CMAKE_INSTALL_SYSCONFDIR}/modulefiles")
diff --git a/LICENSE b/LICENSE
index ebd51e42629..68681ede2ee 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,5 +1,5 @@
-Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC. 
-Produced at the Lawrence Livermore National Laboratory. 
+Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+Produced at the Lawrence Livermore National Laboratory.
 Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 the CONTRIBUTORS file. <lbann-dev@llnl.gov>
 
@@ -8,7 +8,7 @@ All rights reserved.
 
 This file is part of LBANN: Livermore Big Artificial Neural Network
 Toolkit. For details, see http://software.llnl.gov/LBANN or
-https://github.com/LLNL/LBANN. 
+https://github.com/LLNL/LBANN.
 
 Licensed under the Apache License, Version 2.0 (the "Licensee"); you
 may not use this file except in compliance with the License.  You may
@@ -21,4 +21,3 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 implied. See the License for the specific language governing
 permissions and limitations under the license.
-
diff --git a/README.md b/README.md
index 10d3c8e1b7b..8afb7203cfe 100644
--- a/README.md
+++ b/README.md
@@ -21,9 +21,17 @@ methods.
 
 
 ## Building LBANN
-A few options for building LBANN are documented
-[here](docs/BuildingLBANN.md#top).
+The preferred method for LBANN users to install LBANN is to use
+[Spack](https://github.com/llnl/spack). After some system
+configuration, this should be as straightforward as
 
+```bash
+spack install lbann
+```
+
+More detailed instructions for building and installing LBANN are
+available at the [main LBANN
+documentation](https://lbann.readthedocs.io/en/latest/index.html).
 
 ## Running LBANN
 The basic template for running LBANN is
@@ -42,8 +50,12 @@ optimized for the case in which one assigns one GPU per MPI
 the MPI launcher.
 
 More details about running LBANN are documented
-[here](docs/RunningLBANN.md#top).
+[here](https://lbann.readthedocs.io/en/latest/running_lbann.html).
+
+## Publications
 
+A list of publications, presentations and posters are shown
+[here](https://lbann.readthedocs.io/en/latest/publications.html).
 
 ## Reporting issues
 Issues, questions, and bugs can be raised on the [Github issue
diff --git a/ReleaseNotes.txt b/ReleaseNotes.txt
index fea20150a3f..13418207629 100644
--- a/ReleaseNotes.txt
+++ b/ReleaseNotes.txt
@@ -1,21 +1,90 @@
-============================== (Pending) Release Notes: v0.99 ==============================
+============================== (Pending) Release Notes: v1.00 ==============================
 Support for new training algorithms:
 
 Support for new network structures:
 
 Support for new layers:
 
+Python front-end:
+
+Performance optimizations:
+
+Model portability & usability:
+
+Internal features:
+
+I/O & data readers:
+
+Build system:
+
+Bug fixes:
+
+Retired features:
+
+============================== Release Notes: v0.99 ==============================
+Support for new training algorithms:
+ - Improvements to LTFB infrastructure (including transfer of SGD and Adam hyperparameters)
+
+Support for new network structures:
+ - Support for Wide ResNets
+
+Support for new layers:
+
+Python front-end:
+ - Python front-end for generating neural network architectures (lbann namespace):
+   including layers, objective functions, callbacks, metrics, and optimizers.
+ - Python interface for launching (SLURM or LSF) jobs on HPC systems
+ - Support for running LBANN experiments and capturing experimental output
+ - Network templates for AlexNet, LeNet, arbitrary ResNet models, and Wide ResNet models
+ - Python scripts for LeNet, AlexNet, and (Wide) ResNets in model zoo.
+
 Performance optimizations:
+ - GPU implementation of RMSprop optimizer.
+ - cuDNN convolution algorithms are determined by empirically measuring
+   performance rather than using heuristics.
+ - Avoid setting up unused bias weights.
+ - Perform gradient accumulations in-place when possible.
 
 Model portability & usability:
 
 Internal features:
+ - Weight gradient allreduces are in-place rather than on a staging buffer.
+ - Fully connected and convolution layers only create bias weights when
+   needed.
+ - Optimizer exposes gradient buffers so they can be updated in-place.
+ - Added callback support to explicitly save model
+ - Min-max metric for reporting on multiple LTFB trainers
+ - Cleanup of Hydrogen interface to match Hydrogen v1.2.0
+ - Added type-erased matrix class for internal refactoring
+ - Make CUB always log performance critical events
 
 I/O & data readers:
+ - Python data reader that interacts with an embedded Python session.
+ - Optimized data store to provide preload option
+ - Extended data store to operate with Cosmoflow-numpy data reader
 
 Build system:
+ - Added documentation for how users can use Spack to install LBANN
+   either directly or via environments.
+ - Conduit is a required dependency.
+ - Provided Spack environment for installing LBANN as a user
+ - Improved documentation on lbann.readthedocs.io
+ - CMake installs a module file in the installation directory that
+   sets up PATH and PYTHONPATH variables appropriately
+
+Bug fixes:
+ - Models can now be copied or setup multiple times.
+ - Fixed incorrect weight initialization with multiple trainers.
+ - Updated I/O random number generators to be C++ thread safe (rather than OpenMP)
+ - Added an I/O random number generator for preprocessing that is independent
+   of the data sequence RNG.
+ - Fixed initialization order of RNGs and multiple models / trainers.
+ - General fixes for I/O and LTFB interaction.
 
 Retired features:
+ - "Zero" layer (hack for early GAN implementation).
+ - Removed data reader specific implementations of data store (in favor of Conduit-based
+   data store)
 
 ============================== Release Notes: v0.98.1 ==============================
 Bug Fixes:
diff --git a/bamboo/README.md b/bamboo/README.md
index 4ad8b6508be..c317c496379 100644
--- a/bamboo/README.md
+++ b/bamboo/README.md
@@ -47,13 +47,13 @@ As an alternative to splitting the file, errors can be searched for with `grep -
 
 Bamboo agent properties are used to specify requirements for each job.
 
-| Agents (jobs)              | `agent_owner` | `architecture` | `cluster`  | `gpu_architecture` | `sys_type`             |
-| ---                        | ---           | ---            | ---        | ---                | ---                    |
-| Catalyst Agents (x86_cpu)  | `lbannusr`    | `x86_64`       | `catalyst` | `none`             | `toss_3_x86_64_ib`     |
-| Pascal Agents              | `lbannusr`    | `x86_64`       | `pascal`   | `pascal`           | `chaos_6_x86_64_ib`    |
-| Quartz Agents (x86_cpu)    | `lbannusr`    | `x86_64`	      |	`quartz`   | `none`		| `toss_3_x86_64_ib`     |
-| Ray Agents (ppc64le_gpu)   | `lbannusr`    | `ppc64_le`     | `ray`      | `pascal`           | `blueos_3_ppc64le_ib`  |
-| Surface Agents (x86_gpu)   | `lbannusr`    | `x86_64`       | `surface`  | `kepler`           | `chaos_5_x86_64_ib`    |
+| Agents (jobs)                  | `agent_owner` | `architecture` | `cluster`  | `gpu_architecture` | `sys_type`             |
+| ---                            | ---           | ---            | ---        | ---                | ---                    |
+| Catalyst Agents (x86_cpu)      | `lbannusr`    | `x86_64`       | `catalyst` | `none`             | `toss_3_x86_64_ib`     |
+| Pascal Agents (x86_gpu_pascal) | `lbannusr`    | `x86_64`       | `pascal`   | `pascal`           | `chaos_6_x86_64_ib`    |
+| Quartz Agents (x86_cpu)        | `lbannusr`    | `x86_64`	  | `quartz`   | `none`	            | `toss_3_x86_64_ib`     |
+| Ray Agents (ppc64le_gpu)       | `lbannusr`    | `ppc64_le`     | `ray`      | `pascal`           | `blueos_3_ppc64le_ib`  |
+| Surface Agents (x86_gpu)       | `lbannusr`    | `x86_64`       | `surface`  | `kepler`           | `chaos_5_x86_64_ib`    |
 
 Currently, `agent_owner`, `architecture`, and `gpu_architecture` are used to determine agents to run a job.
 
diff --git a/bamboo/common_python/test_tools.py b/bamboo/common_python/test_tools.py
index 0fdbf044160..6cafbb39bd6 100644
--- a/bamboo/common_python/test_tools.py
+++ b/bamboo/common_python/test_tools.py
@@ -5,27 +5,27 @@
 # Run locally with python -m pytest -s
 
 def test_command_catalyst():
-    actual = tools.get_command(cluster='catalyst', executable='exe', num_nodes=20, partition='pdebug', time_limit=30, num_processes=40, dir_name='dir', data_filedir_default='lscratchh/filedir', data_reader_name='mnist', data_reader_percent=0.10, exit_after_setup=True, mini_batch_size=15, model_folder='models/folder', model_name='lenet', num_epochs=7, optimizer_name='adagrad', processes_per_model=10, output_file_name='output_file', error_file_name='error_file', check_executable_existance=False)
+    actual = tools.get_command(cluster='catalyst', executable='exe', num_nodes=20, partition='pdebug', time_limit=30, num_processes=40, dir_name='dir', data_filedir_default='lscratchh/filedir', data_reader_name='mnist', data_reader_percent=0.10, exit_after_setup=True, mini_batch_size=15, model_folder='models/folder', model_name='lenet', num_epochs=7, optimizer_name='adagrad', processes_per_model=10, output_file_name='output_file', error_file_name='error_file', check_executable_existence=False)
     expected = 'salloc --nodes=20 --partition=pdebug --time=30 srun --ntasks=40 exe --reader=dir/model_zoo/data_readers/data_reader_mnist.prototext --data_reader_percent=0.100000 --exit_after_setup --mini_batch_size=15 --model=dir/model_zoo/models/folder/model_lenet.prototext --num_epochs=7 --optimizer=dir/model_zoo/optimizers/opt_adagrad.prototext --procs_per_model=10 > output_file 2> error_file'
     assert actual == expected
 
 def test_command_pascal():
-    actual = tools.get_command(cluster='pascal', executable='exe', num_nodes=20, partition='pdebug', time_limit=30, num_processes=40, dir_name='dir', data_filedir_default='lscratchh/filedir', data_reader_name='mnist', data_reader_percent=0.10, exit_after_setup=True, mini_batch_size=15, model_folder='models/folder', model_name='lenet', num_epochs=7, optimizer_name='adagrad', processes_per_model=10, output_file_name='output_file', error_file_name='error_file', check_executable_existance=False)
+    actual = tools.get_command(cluster='pascal', executable='exe', num_nodes=20, partition='pdebug', time_limit=30, num_processes=40, dir_name='dir', data_filedir_default='lscratchh/filedir', data_reader_name='mnist', data_reader_percent=0.10, exit_after_setup=True, mini_batch_size=15, model_folder='models/folder', model_name='lenet', num_epochs=7, optimizer_name='adagrad', processes_per_model=10, output_file_name='output_file', error_file_name='error_file', check_executable_existence=False)
     expected = 'salloc --nodes=20 --partition=pdebug --time=30 srun --ntasks=40 exe --reader=dir/model_zoo/data_readers/data_reader_mnist.prototext --data_reader_percent=0.100000 --exit_after_setup --mini_batch_size=15 --model=dir/model_zoo/models/folder/model_lenet.prototext --num_epochs=7 --optimizer=dir/model_zoo/optimizers/opt_adagrad.prototext --procs_per_model=10 > output_file 2> error_file'
     assert actual == expected
 
 def test_command_quartz():
-    actual = tools.get_command(cluster='quartz', executable='exe', num_nodes=20, partition='pdebug', time_limit=30, num_processes=40, dir_name='dir', data_filedir_default='lscratchh/filedir', data_reader_name='mnist', data_reader_percent=0.10, exit_after_setup=True, mini_batch_size=15, model_folder='models/folder', model_name='lenet', num_epochs=7, optimizer_name='adagrad', processes_per_model=10, output_file_name='output_file', error_file_name='error_file', check_executable_existance=False)
+    actual = tools.get_command(cluster='quartz', executable='exe', num_nodes=20, partition='pdebug', time_limit=30, num_processes=40, dir_name='dir', data_filedir_default='lscratchh/filedir', data_reader_name='mnist', data_reader_percent=0.10, exit_after_setup=True, mini_batch_size=15, model_folder='models/folder', model_name='lenet', num_epochs=7, optimizer_name='adagrad', processes_per_model=10, output_file_name='output_file', error_file_name='error_file', check_executable_existence=False)
     expected = 'salloc --nodes=20 --partition=pdebug --time=30 srun --ntasks=40 exe --data_filedir=lscratchh/filedir --reader=dir/model_zoo/data_readers/data_reader_mnist.prototext --data_reader_percent=0.100000 --exit_after_setup --mini_batch_size=15 --model=dir/model_zoo/models/folder/model_lenet.prototext --num_epochs=7 --optimizer=dir/model_zoo/optimizers/opt_adagrad.prototext --procs_per_model=10 > output_file 2> error_file'
     assert actual == expected
     
 def test_command_surface():
-    actual = tools.get_command(cluster='surface', executable='exe', num_nodes=20, partition='pdebug', time_limit=30, num_processes=40, dir_name='dir', data_filedir_default='lscratchh/filedir', data_reader_name='mnist', data_reader_percent=0.10, exit_after_setup=True, mini_batch_size=15, model_folder='models/folder', model_name='lenet', num_epochs=7, optimizer_name='adagrad', processes_per_model=10, output_file_name='output_file', error_file_name='error_file', check_executable_existance=False)
+    actual = tools.get_command(cluster='surface', executable='exe', num_nodes=20, partition='pdebug', time_limit=30, num_processes=40, dir_name='dir', data_filedir_default='lscratchh/filedir', data_reader_name='mnist', data_reader_percent=0.10, exit_after_setup=True, mini_batch_size=15, model_folder='models/folder', model_name='lenet', num_epochs=7, optimizer_name='adagrad', processes_per_model=10, output_file_name='output_file', error_file_name='error_file', check_executable_existence=False)
     expected = 'salloc --nodes=20 --partition=pbatch --time=30 srun --ntasks=40 exe --reader=dir/model_zoo/data_readers/data_reader_mnist.prototext --data_reader_percent=0.100000 --exit_after_setup --mini_batch_size=15 --model=dir/model_zoo/models/folder/model_lenet.prototext --num_epochs=7 --optimizer=dir/model_zoo/optimizers/opt_adagrad.prototext --procs_per_model=10 > output_file 2> error_file'
     assert actual == expected
 
 def test_command_ray():
-    actual = tools.get_command(cluster='ray', executable='exe', num_nodes=20, partition='pdebug', time_limit=30, num_processes=40, dir_name='dir', data_filedir_default='lscratchh/filedir', data_reader_name='mnist', data_reader_percent=0.10, exit_after_setup=True, mini_batch_size=15, model_folder='models/folder', model_name='lenet', num_epochs=7, optimizer_name='adagrad', processes_per_model=10, output_file_name='output_file', error_file_name='error_file', check_executable_existance=False)
+    actual = tools.get_command(cluster='ray', executable='exe', num_nodes=20, partition='pdebug', time_limit=30, num_processes=40, dir_name='dir', data_filedir_default='lscratchh/filedir', data_reader_name='mnist', data_reader_percent=0.10, exit_after_setup=True, mini_batch_size=15, model_folder='models/folder', model_name='lenet', num_epochs=7, optimizer_name='adagrad', processes_per_model=10, output_file_name='output_file', error_file_name='error_file', check_executable_existence=False)
     expected = 'bsub -x -G guests -Is -n 40 -q pdebug -R "span[ptile=2]" -W 30 mpirun -np 40 -N 2 exe --data_filedir=gscratchr/filedir --reader=dir/model_zoo/data_readers/data_reader_mnist.prototext --data_reader_percent=0.100000 --exit_after_setup --mini_batch_size=15 --model=dir/model_zoo/models/folder/model_lenet.prototext --num_epochs=7 --optimizer=dir/model_zoo/optimizers/opt_adagrad.prototext --procs_per_model=10 > output_file 2> error_file'
     assert actual == expected
 
@@ -33,112 +33,112 @@ def test_command_ray():
 
 def test_blacklisted_substrings():
     try:
-        tools.get_command('ray', 'exe', partition=';', optimizer_path='--model=new_model', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', partition=';', optimizer_path='--model=new_model', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid character(s): ; contains ; , --model=new_model contains --'
         assert actual == expected
 
 def test_unsupported_cluster():
     try:
-        tools.get_command('quartz', 'exe', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('quartz', 'exe', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Unsupported Cluster: quartz'
         assert actual == expected
 
 def test_bad_model_1():
     try:
-        tools.get_command('ray', 'exe', dir_name='dir', model_folder='folder', model_name='name', model_path='path', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', dir_name='dir', model_folder='folder', model_name='name', model_path='path', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: model_path is set but so is at least one of model folder and model_name'
         assert actual == expected
 
 def test_bad_model_2():
     try:
-        tools.get_command('ray', 'exe', dir_name='dir', model_folder='folder', model_path='path', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', dir_name='dir', model_folder='folder', model_path='path', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: model_path is set but so is at least one of model folder and model_name'
         assert actual == expected
 
 def test_bad_model_3():
     try:
-        tools.get_command('ray', 'exe', dir_name='dir', model_name='name',  model_path='path', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', dir_name='dir', model_name='name',  model_path='path', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: model_path is set but so is at least one of model folder and model_name'
         assert actual == expected
 
 def test_bad_model_4():
     try:
-        tools.get_command('ray', 'exe', dir_name='dir', model_folder='folder', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', dir_name='dir', model_folder='folder', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: model_folder set but not model_name.'
         assert actual == expected
 
 def test_bad_model_5():
     try:
-        tools.get_command('ray', 'exe', dir_name='dir', model_name='name', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', dir_name='dir', model_name='name', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: model_name set but not model_folder.'
         assert actual == expected
 
 def test_bad_data_reader():
     try:
-        tools.get_command('catalyst', 'exe', dir_name='dir', data_reader_name='name', data_reader_path='path', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('catalyst', 'exe', dir_name='dir', data_reader_name='name', data_reader_path='path', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: data_reader_path is set but so is data_reader_name , data_reader_name or data_reader_path is set but not data_filedir_default. If a data reader is provided, the default filedir must be set. This allows for determining what the filedir should be on each cluster. Alternatively, some or all of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default] can be set.'
         assert actual == expected
 
 def test_bad_optimizer():
     try:
-        tools.get_command('ray', 'exe', dir_name='dir', optimizer_name='name', optimizer_path='path', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', dir_name='dir', optimizer_name='name', optimizer_path='path', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: optimizer_path is set but so is optimizer_name'
         assert actual == expected
 
 def test_bad_dir_name_1():
     try:
-        tools.get_command('ray', 'exe', dir_name='dir', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', dir_name='dir', check_executable_existence=False)
+    except Exception as e:
 	actual = str(e)
 	expected = 'Invalid Usage: dir_name set but none of model_folder, model_name, data_reader_name, optimizer_name are.'
         assert actual == expected
 
 def test_bad_dir_name_2():
     try:
-        tools.get_command('ray', 'exe', model_folder='folder', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', model_folder='folder', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: dir_name is not set but at least one of model_folder, model_name, data_reader_name, optimizer_name is.'
         assert actual == expected
 
 def test_bad_dir_name_3():
     try:
-        tools.get_command('ray', 'exe', model_name='name', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', model_name='name', check_executable_existence=False)
+    except Exception as e:
 	actual = str(e)
 	expected = 'Invalid Usage: dir_name is not set but at least one of model_folder, model_name, data_reader_name, optimizer_name is.'
         assert actual == expected
 
 def test_bad_dir_name_4():
     try:
-        tools.get_command('catalyst', 'exe', data_reader_name='name', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('catalyst', 'exe', data_reader_name='name', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: dir_name is not set but at least one of model_folder, model_name, data_reader_name, optimizer_name is. , data_reader_name or data_reader_path is set but not data_filedir_default. If a data reader is provided, the default filedir must be set. This allows for determining what the filedir should be on each cluster. Alternatively, some or all of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default] can be set.'
         assert actual == expected
 
 def test_bad_dir_name_5():
     try:
-        tools.get_command('ray', 'exe', optimizer_name='name', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', optimizer_name='name', check_executable_existence=False)
+    except Exception as e:
 	actual = str(e)
 	expected = 'Invalid Usage: dir_name is not set but at least one of model_folder, model_name, data_reader_name, optimizer_name is.'
         assert actual == expected
@@ -146,8 +146,8 @@ def test_bad_dir_name_5():
 def test_bad_data_filedir_1():
     try:
         tools.get_command('ray', 'exe', dir_name='dir', data_reader_name='name', data_filedir_default='filedir', data_filedir_train_default='a',
-                          check_executable_existance=False)
-    except Exception, e:
+                          check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: data_fildir_default set but so is at least one of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default]'
         assert actual == expected
@@ -155,8 +155,8 @@ def test_bad_data_filedir_1():
 def test_bad_data_filedir_2():
     try:
         tools.get_command('ray', 'exe', dir_name='dir', data_reader_name='name', data_filedir_default='filedir', data_filename_train_default='b',
-                          check_executable_existance=False)
-    except Exception, e:
+                          check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: data_fildir_default set but so is at least one of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default]'
         assert actual == expected
@@ -165,8 +165,8 @@ def test_bad_data_filedir_2():
 def test_bad_data_filedir_3():
     try:
         tools.get_command('ray', 'exe', dir_name='dir', data_reader_name='name', data_filedir_default='filedir', data_filedir_test_default='c',
-                          check_executable_existance=False)
-    except Exception, e:
+                          check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: data_fildir_default set but so is at least one of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default]'
         assert actual == expected
@@ -174,24 +174,24 @@ def test_bad_data_filedir_3():
 def test_bad_data_filedir_4():
     try:
         tools.get_command('ray', 'exe', dir_name='dir', data_reader_name='name', data_filedir_default='filedir', data_filename_test_default='d',
-                          check_executable_existance=False)
-    except Exception, e:
+                          check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: data_fildir_default set but so is at least one of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default]'
         assert actual == expected
 
 def test_bad_data_filedir_5():
     try:
-        tools.get_command('ray', 'exe', data_reader_path='path', data_filedir_default='filedir', data_filedir_train_default='e', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', data_reader_path='path', data_filedir_default='filedir', data_filedir_train_default='e', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: data_fildir_default set but so is at least one of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default]'
         assert actual == expected
 
 def test_bad_data_filedir_6():
     try:
-        tools.get_command('ray', 'exe', data_reader_path='path', data_filedir_default='filedir', data_filename_train_default='f', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', data_reader_path='path', data_filedir_default='filedir', data_filename_train_default='f', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: data_fildir_default set but so is at least one of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default]'
         assert actual == expected
@@ -199,48 +199,48 @@ def test_bad_data_filedir_6():
 
 def test_bad_data_filedir_7():
     try:
-        tools.get_command('ray', 'exe', data_reader_path='path', data_filedir_default='filedir', data_filedir_test_default='g', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', data_reader_path='path', data_filedir_default='filedir', data_filedir_test_default='g', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: data_fildir_default set but so is at least one of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default]'
         assert actual == expected
 
 def test_bad_data_filedir_8():
     try:
-        tools.get_command('ray', 'exe', data_reader_path='path', data_filedir_default='filedir', data_filename_test_default='h', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', data_reader_path='path', data_filedir_default='filedir', data_filename_test_default='h', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: data_fildir_default set but so is at least one of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default]'
         assert actual == expected
 
 def test_bad_data_filedir_9():
     try:
-        tools.get_command('ray', 'exe', dir_name='dir', data_reader_name='name', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', dir_name='dir', data_reader_name='name', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: data_reader_name or data_reader_path is set but not data_filedir_default. If a data reader is provided, the default filedir must be set. This allows for determining what the filedir should be on each cluster. Alternatively, some or all of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default] can be set.'
         assert actual == expected
 
 def test_bad_data_filedir_10():
     try:
-        tools.get_command('ray', 'exe', data_reader_path='path', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', data_reader_path='path', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: data_reader_name or data_reader_path is set but not data_filedir_default. If a data reader is provided, the default filedir must be set. This allows for determining what the filedir should be on each cluster. Alternatively, some or all of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default] can be set.'
         assert actual == expected
 
 def test_bad_data_filedir_11():
     try:
-        tools.get_command('ray', 'exe', data_filedir_default='filedir', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', data_filedir_default='filedir', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: data_filedir_default set but neither data_reader_name or data_reader_path are.'
 	assert actual == expected
 
 def test_bad_data_filedir_12():
     try:
-        tools.get_command('ray', 'exe', data_filedir_train_default='a', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', data_filedir_train_default='a', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: At least one of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default] is set, but neither data_reader_name or data_reader_path are.'
         assert actual == expected
@@ -248,8 +248,8 @@ def test_bad_data_filedir_12():
 
 def test_bad_data_filedir_13():
     try:
-        tools.get_command('ray', 'exe', data_filename_train_default='b', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', data_filename_train_default='b', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: At least one of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default] is set, but neither data_reader_name or data_reader_path are.'
         assert actual == expected
@@ -257,8 +257,8 @@ def test_bad_data_filedir_13():
 
 def test_bad_data_filedir_14():
     try:
-        tools.get_command('ray', 'exe', data_filedir_test_default='c', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', data_filedir_test_default='c', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: At least one of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default] is set, but neither data_reader_name or data_reader_path are.'
         assert actual == expected
@@ -266,8 +266,8 @@ def test_bad_data_filedir_14():
 
 def test_bad_data_filedir_15():
     try:
-        tools.get_command('ray', 'exe', data_filename_test_default='e', check_executable_existance=False)
-    except Exception, e:
+        tools.get_command('ray', 'exe', data_filename_test_default='e', check_executable_existence=False)
+    except Exception as e:
         actual = str(e)
         expected = 'Invalid Usage: At least one of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default] is set, but neither data_reader_name or data_reader_path are.'
         assert actual == expected
diff --git a/bamboo/common_python/tools.py b/bamboo/common_python/tools.py
index 4a9508c8b3a..7110ddc9a67 100644
--- a/bamboo/common_python/tools.py
+++ b/bamboo/common_python/tools.py
@@ -1,14 +1,16 @@
 import pytest
 import math, os, re
 
+
 def check_list(substrings, strings):
     errors = []
     for string in strings:
         for substring in substrings:
             if (string != None) and (substring in string):
-               errors.append('%s contains %s' % (string, substring))
+                errors.append('%s contains %s' % (string, substring))
     return errors
 
+
 def get_command(cluster,
                 executable,
                 num_nodes=None,
@@ -37,29 +39,30 @@ def get_command(cluster,
                 output_file_name=None,
                 error_file_name=None,
                 return_tuple=False,
-                check_executable_existance=True,
+                check_executable_existence=True,
                 skip_no_exe=True):
     # Check parameters for black-listed characters like semi-colons that
     # would terminate the command and allow for an extra command
     blacklist = [';', '--']
-    strings = [partition, dir_name, data_filedir_default, data_filedir_train_default,
+    strings = [partition, dir_name, data_filedir_default,
+               data_filedir_train_default,
                data_filename_train_default, data_filedir_test_default,
                data_filename_test_default, data_reader_name, data_reader_path,
                model_folder, model_name, model_path, optimizer_name,
                optimizer_path, output_file_name, error_file_name]
     invalid_character_errors = check_list(blacklist, strings)
     if invalid_character_errors != []:
-        raise Exception('Invalid character(s): %s' % ' , '.join(invalid_character_errors))
+        raise Exception('Invalid character(s): %s' % ' , '.join(
+            invalid_character_errors))
 
-    # Check executable existance
-    if check_executable_existance:
-        executable_exists = os.path.exists(executable)
-        if not executable_exists:
-            error_string = 'Executable does not exist: %s' % executable
-            if skip_no_exe:
-                pytest.skip(error_string)
-            else:
-                raise Exception(error_string)
+    # Never give lbannusr an allocation for over 12 hours though.
+    strict_time_limit = 60*6  # 6 hours.
+    if time_limit > strict_time_limit:
+        time_limit = strict_time_limit
+
+    # Check executable existence
+    if check_executable_existence:
+        process_executable_existence(executable, skip_no_exe)
 
     # Determine scheduler
     if cluster in ['catalyst', 'pascal', 'quartz', 'surface']:
@@ -81,20 +84,21 @@ def get_command(cluster,
             option_num_nodes = ''
             option_partition = ''
             option_time_limit = ''
-            if num_nodes != None:
+            if num_nodes is not None:
                 # --nodes=<minnodes[-maxnodes]> =>
                 # Request that a minimum of minnodes nodes be allocated to this
                 # job. A maximum node count may also be specified with
                 # maxnodes.
                 option_num_nodes = ' --nodes=%d' % num_nodes
-            if partition != None:
+            if partition is not None:
                 # Surface does not have pdebug, so switch to pbatch
-                if (cluster == 'surface') and (partition == 'pdebug'):
+                if (cluster in ['surface', 'pascal']) and \
+                        (partition == 'pdebug'):
                     partition = 'pbatch'
                 # --partition => Request a specific partition for the resource
                 # allocation.
                 option_partition = ' --partition=%s' % partition
-            if time_limit != None:
+            if time_limit is not None:
                 # --time => Set a limit on the total run time of the job
                 # allocation.
                 # Time limit in minutes
@@ -109,7 +113,7 @@ def get_command(cluster,
         else:
             command_run = ' srun --mpibind=off'
         option_num_processes = ''
-        if num_processes != None:
+        if num_processes is not None:
             # --ntasks => Specify  the  number of tasks to run.
             # Number of processes to run => MPI Rank
             option_num_processes = ' --ntasks=%d' % num_processes
@@ -120,7 +124,7 @@ def get_command(cluster,
         command_allocate = ''
         # Allocate a node if we don't have one already
         # Running the tests manually allows for already having a node allocated
-        if os.getenv('LSB_HOSTS') == None:
+        if os.getenv('LSB_HOSTS') is None:
             command_allocate = 'bsub'
             # x => Puts the host running your job into exclusive execution
             # mode.
@@ -135,19 +139,19 @@ def get_command(cluster,
             option_partition = ''
             option_processes_per_node = ''
             option_time_limit = ''
-            if num_processes != None:
+            if num_processes is not None:
                 # n => Submits a parallel job and specifies the number of
                 # tasks in the job.
                 option_num_processes = ' -n %d' % num_processes
-                if (num_nodes != None) and (num_nodes != 0):
+                if (num_nodes is not None) and (num_nodes != 0):
                     # R => Runs the job on a host that meets the specified
                     # resource requirements.
                     option_processes_per_node = ' -R "span[ptile=%d]"' % int(
                         math.ceil(float(num_processes)/num_nodes))
-            if partition != None:
+            if partition is not None:
                 # q => Submits the job to one of the specified queues.
                 option_partition = ' -q %s' % partition
-            if time_limit != None:
+            if time_limit is not None:
                 if cluster == 'ray':
                     max_ray_time = 480
                     if time_limit > max_ray_time:
@@ -166,10 +170,10 @@ def get_command(cluster,
             command_run = ' mpirun'
         option_num_processes = ''
         option_processes_per_node = ''
-        if num_processes != None:
+        if num_processes is not None:
             # -np => Run this many copies of the program on the given nodes.
             option_num_processes = ' -np %d' % num_processes
-            if (num_nodes != None) and (num_nodes != 0):
+            if (num_nodes is not None) and (num_nodes != 0):
                 option_processes_per_node = ' -N %d' % int(
                     math.ceil(float(num_processes)/num_nodes))
         command_run = '%s%s%s' % (
@@ -194,57 +198,68 @@ def get_command(cluster,
     option_optimizer = ''
     option_processes_per_model = ''
     lbann_errors = []
-    if model_path != None:
+    if model_path is not None:
         # If model_folder and/or model_name are set, an exception will be
         # raised later.
         option_model = ' --model=%s' % model_path
-    if data_reader_path != None:
+    if data_reader_path is not None:
         # If data_reader_name is set, an exception will be raised later.
         option_data_reader = ' --reader=%s' % data_reader_path
-    if optimizer_path != None:
+    if optimizer_path is not None:
         # If optimizer_name is set, an exception will be raised later.
         option_optimizer_name = ' --optimizer=%s' % optimizer_path
-    if dir_name != None:
-        if model_path != None:
-            if (model_folder != None) or (model_name != None):
+    if dir_name is not None:
+        if model_path is not None:
+            if (model_folder is not None) or (model_name is not None):
                 lbann_errors.append(
-                    'model_path is set but so is at least one of model folder and model_name')
+                    ('model_path is set but so is at least one of model'
+                     ' folder and model_name'))
         else:
-            if (model_folder != None) and (model_name != None):
-                option_model = ' --model=%s/model_zoo/%s/model_%s.prototext' % (dir_name, model_folder, model_name)
-            elif model_folder != None:
+            if (model_folder is not None) and (model_name is not None):
+                option_model = ' --model=%s/model_zoo/%s/model_%s.prototext' % (
+                    dir_name, model_folder, model_name)
+            elif model_folder is not None:
                 lbann_errors.append('model_folder set but not model_name.')
-            elif model_name != None:
+            elif model_name is not None:
                 lbann_errors.append('model_name set but not model_folder.')
-        if data_reader_name != None:
-            if data_reader_path != None:
-                lbann_errors.append('data_reader_path is set but so is data_reader_name')
+        if data_reader_name is not None:
+            if data_reader_path is not None:
+                lbann_errors.append(('data_reader_path is set but so is'
+                                     ' data_reader_name'))
             else:
                 option_data_reader = ' --reader=%s/model_zoo/data_readers/data_reader_%s.prototext' % (dir_name, data_reader_name)
-        if optimizer_name != None:
-            if optimizer_path != None:
-                lbann_errors.append('optimizer_path is set but so is optimizer_name')
+        if optimizer_name is not None:
+            if optimizer_path is not None:
+                lbann_errors.append(('optimizer_path is set but so is'
+                                     ' optimizer_name'))
             else:
                 option_optimizer = ' --optimizer=%s/model_zoo/optimizers/opt_%s.prototext' % (dir_name, optimizer_name)
-        if (model_folder == None) and (model_name == None) and (data_reader_name == None) and (optimizer_name == None):
-            lbann_errors.append('dir_name set but none of model_folder, model_name, data_reader_name, optimizer_name are.')
-    elif (model_folder != None) or (model_name != None) or (data_reader_name != None) or (optimizer_name != None):
+        if (model_folder is None) and (model_name is None) and \
+                (data_reader_name is None) and (optimizer_name is None):
+            lbann_errors.append(
+                ('dir_name set but none of model_folder, model_name,'
+                 ' data_reader_name, optimizer_name are.'))
+    elif (model_folder is not None) or (model_name is not None) or \
+            (data_reader_name is not None) or (optimizer_name is not None):
         lbann_errors.append(
-            'dir_name is not set but at least one of model_folder, model_name, data_reader_name, optimizer_name is.')
+            ('dir_name is not set but at least one of model_folder,'
+             ' model_name, data_reader_name, optimizer_name is.'))
     data_file_parameters = [data_filedir_train_default,
                             data_filename_train_default,
                             data_filedir_test_default,
                             data_filename_test_default]
     # Determine data file paths
     # If there is no regex match, then re.sub keeps the original string
-    if data_filedir_default != None:
+    if data_filedir_default is not None:
         if cluster in ['catalyst', 'pascal', 'surface']:
             # option_data_filedir = data_filedir_default # lscratchh, presumably
-            pass # No need to pass in a parameter
+            pass  # No need to pass in a parameter
         elif cluster == 'quartz':
-            option_data_filedir = ' --data_filedir=%s' % re.sub('[a-z]scratch[a-z]', 'lscratchh', data_filedir_default)
+            option_data_filedir = ' --data_filedir=%s' % re.sub(
+                '[a-z]scratch[a-z]', 'lscratchh', data_filedir_default)
         elif cluster == 'ray':
-            option_data_filedir = ' --data_filedir=%s' % re.sub('[a-z]scratch[a-z]', 'gscratchr', data_filedir_default)
+            option_data_filedir = ' --data_filedir=%s' % re.sub(
+                '[a-z]scratch[a-z]', 'gscratchr', data_filedir_default)
     elif None not in data_file_parameters:
         if cluster in ['catalyst', 'pascal', 'surface']:
             # option_data_filedir_train = data_filedir_train_default
@@ -262,35 +277,56 @@ def get_command(cluster,
             option_data_filename_train = ' --data_filename_train=%s' % re.sub('[a-z]scratch[a-z]', 'gscratchr', data_filename_train_default)
             option_data_filedir_test   = ' --data_filedir_test=%s'   % re.sub('[a-z]scratch[a-z]', 'gscratchr', data_filedir_test_default)
             option_data_filename_test = ' --data_filename_test=%s'  % re.sub('[a-z]scratch[a-z]', 'gscratchr', data_filename_test_default)
-    if (data_reader_name != None) or (data_reader_path != None):
-        if data_filedir_default != None:
-            if data_file_parameters != [None, None, None, None]: # If any are not None
-                lbann_errors.append('data_fildir_default set but so is at least one of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default]')
+    if (data_reader_name is not None) or (data_reader_path is not None):
+        if data_filedir_default is not None:
+            # If any are not None
+            if data_file_parameters != [None, None, None, None]:
+                lbann_errors.append(
+                    ('data_fildir_default set but so is at least one of'
+                     ' [data_filedir_train_default, data_filename_train'
+                     '_default, data_filedir_test_default,'
+                     ' data_filename_test_default]'))
             # else: only data_filedir_default is set
         else:
             # if None in data_file_parameters: # If any are None
             if data_file_parameters == [None, None, None, None]: # If all are None
-                lbann_errors.append('data_reader_name or data_reader_path is set but not data_filedir_default. If a data reader is provided, the default filedir must be set. This allows for determining what the filedir should be on each cluster. Alternatively, some or all of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default] can be set.')
+                lbann_errors.append(
+                    ('data_reader_name or data_reader_path is set but not'
+                     ' data_filedir_default. If a data reader is provided,'
+                     ' the default filedir must be set. This allows for'
+                     ' determining what the filedir should be on each'
+                     ' cluster. Alternatively, some or all of'
+                     ' [data_filedir_train_default, data_filename_train'
+                     '_default, data_filedir_test_default, data_filename'
+                     '_test_default] can be set.'))
             # else: no data_file parameters are set
     else:
-        if data_filedir_default != None:
-            lbann_errors.append('data_filedir_default set but neither data_reader_name or data_reader_path are.')
-        elif filter(lambda x: x != None, data_file_parameters) != []: # If the list of non-None data_file parameters is not empty
-            lbann_errors.append('At least one of [data_filedir_train_default, data_filename_train_default, data_filedir_test_default, data_filename_test_default] is set, but neither data_reader_name or data_reader_path are.')
+        if data_filedir_default is not None:
+            lbann_errors.append(
+                ('data_filedir_default set but neither data_reader_name'
+                 ' or data_reader_path are.'))
+        elif filter(lambda x: x is not None, data_file_parameters) != []:
+            # If the list of non-None data_file parameters is not empty
+            lbann_errors.append(
+                ('At least one of [data_filedir_train_default, data_filename'
+                 '_train_default, data_filedir_test_default, data_filename'
+                 '_test_default] is set, but neither data_reader_name or'
+                 ' data_reader_path are.'))
         # else: no conflicts
-    if data_reader_percent != None:
+    if data_reader_percent is not None:
         option_data_reader_percent = ' --data_reader_percent=%f' % data_reader_percent
     if exit_after_setup:
         option_exit_after_setup = ' --exit_after_setup'
-    if mini_batch_size != None:
+    if mini_batch_size is not None:
         option_mini_batch_size = ' --mini_batch_size=%d' % mini_batch_size
-    if num_epochs != None:
+    if num_epochs is not None:
         option_num_epochs = ' --num_epochs=%d' % num_epochs
-    if processes_per_model != None:
+    if processes_per_model is not None:
         option_processes_per_model = ' --procs_per_model=%d' % processes_per_model
-    if ckpt_dir != None:
+    if ckpt_dir is not None:
         option_ckpt_dir = ' --ckpt_dir=%s' % ckpt_dir
     if lbann_errors != []:
+        print('lbann_errors={lbann_errors}.'.format(lbann_errors=lbann_errors))
         raise Exception('Invalid Usage: ' + ' , '.join(lbann_errors))
     command_lbann = '%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s' % (
         executable, option_ckpt_dir, option_data_filedir,
@@ -304,15 +340,84 @@ def get_command(cluster,
     # Create redirect command
     command_output = ''
     command_error = ''
-    if output_file_name != None:
+    if output_file_name is not None:
         command_output = ' > %s' % output_file_name
-    if error_file_name != None:
+    if error_file_name is not None:
         command_error = ' 2> %s' % error_file_name
     command_redirect = '%s%s' % (command_output, command_error)
 
     t = (command_allocate, command_run, command_lbann, command_redirect)
 
     if return_tuple:
+        print('command_tuple=' + str(t))
         return t
     else:
-        return '%s%s %s%s' % t
+        command_string = '%s%s %s%s' % t
+        print('command_string=' + command_string)
+        return command_string
+
+
+def process_executable_existence(executable, skip_no_exe=True):
+    executable_exists = os.path.exists(executable)
+    if not executable_exists:
+        error_string = 'Executable does not exist: %s' % executable
+        if skip_no_exe:
+            pytest.skip(error_string)
+        else:
+            raise Exception(error_string)
+
+
+def get_spack_exes(default_dirname, cluster):
+    exes = {}
+
+    exes['clang4'] = '%s/bamboo/compiler_tests/builds/%s_clang-4.0.0_rel/build/model_zoo/lbann' % (default_dirname, cluster)
+    exes['gcc4'] = '%s/bamboo/compiler_tests/builds/%s_gcc-4.9.3_rel/build/model_zoo/lbann' % (default_dirname, cluster)
+    exes['gcc7'] = '%s/bamboo/compiler_tests/builds/%s_gcc-7.1.0_rel/build/model_zoo/lbann' % (default_dirname, cluster)
+    exes['intel18'] = '%s/bamboo/compiler_tests/builds/%s_intel-18.0.0_rel/build/model_zoo/lbann' % (default_dirname, cluster)
+
+    exes['clang4_debug'] = '%s/bamboo/compiler_tests/builds/%s_clang-4.0.0_debug/build/model_zoo/lbann' % (default_dirname, cluster)
+    exes['gcc4_debug'] = '%s/bamboo/compiler_tests/builds/%s_gcc-4.9.3_debug/build/model_zoo/lbann' % (default_dirname, cluster)
+    exes['gcc7_debug'] = '%s/bamboo/compiler_tests/builds/%s_gcc-7.1.0_debug/build/model_zoo/lbann' % (default_dirname, cluster)
+    exes['intel18_debug'] = '%s/bamboo/compiler_tests/builds/%s_intel-18.0.0_debug/build/model_zoo/lbann' % (default_dirname, cluster)
+
+    return exes
+
+
+def get_default_exes(default_dirname, cluster):
+    exes = get_spack_exes(default_dirname, cluster)
+    # Use build script as a backup if the Spack build doesn't work.
+    if not os.path.exists(exes['clang4']):
+        exes['clang4'] = '%s/build/clang.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster)
+    if not os.path.exists(exes['gcc7']):
+        exes['gcc7'] = '%s/build/gnu.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster)
+    if not os.path.exists(exes['intel18']):
+        exes['intel18'] = '%s/build/intel.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster)
+
+    if not os.path.exists(exes['clang4_debug']):
+        exes['clang4_debug'] = '%s/build/clang.Debug.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster)
+    if not os.path.exists(exes['gcc7_debug']):
+        exes['gcc7_debug'] = '%s/build/gnu.Debug.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster)
+    if not os.path.exists(exes['intel18_debug']):
+        exes['intel18_debug'] = '%s/build/intel.Debug.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster)
+
+    default_exes = {}
+    default_exes['default'] = '%s/build/gnu.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster)
+    if cluster in ['catalyst', 'quartz', 'pascal']:
+        # x86_cpu - catalyst, quartz
+        # x86_gpu_pascal - pascal
+        default_exes['clang4'] = exes['clang4']
+        default_exes['gcc4'] = exes['gcc4']
+        default_exes['gcc7'] = exes['gcc7']
+        default_exes['intel18'] = exes['intel18']
+
+        default_exes['clang4_debug'] = exes['clang4_debug']
+        default_exes['gcc4_debug'] = exes['gcc4_debug']
+        default_exes['gcc7_debug'] = exes['gcc7_debug']
+        default_exes['intel18_debug'] = exes['intel18_debug']
+    elif cluster in ['surface']:
+        # x86_gpu - surface
+        default_exes['gcc4'] = exes['gcc4']
+        default_exes['gcc4_debug'] = exes['gcc4_debug']
+
+    print('default_exes={d}'.format(d=default_exes))
+    return default_exes
diff --git a/bamboo/compiler_tests/build_script.sh b/bamboo/compiler_tests/build_script.sh
index c52f239b5fa..07a19172f26 100755
--- a/bamboo/compiler_tests/build_script.sh
+++ b/bamboo/compiler_tests/build_script.sh
@@ -1,53 +1,7 @@
-set -e
 CLUSTER=$(hostname | sed 's/\([a-zA-Z][a-zA-Z]*\)[0-9]*/\1/g')
-LBANN_DIR=$(git rev-parse --show-toplevel)
-DEBUG=''
 if [ "${CLUSTER}" != 'surface' ]; then
     source /usr/share/lmod/lmod/init/bash
     source /etc/profile.d/00-modulepath.sh
 fi
-
-while :; do
-    case ${1} in
-        --compiler)
-            # Choose compiler
-            if [ -n "${2}" ]; then
-                COMPILER=${2}
-                shift
-            else
-                echo "\"${1}\" option requires a non-empty option argument" >&2
-                exit 1
-            fi
-            ;;
-
-        -d|--debug)
-            # Debug mode
-            DEBUG='--debug'
-            ;;
-        *)
-            # Break loop if there are no more options
-            break
-
-    esac
-    shift
-done
-
-if [ "${COMPILER}" == 'clang' ]; then
-    module load clang/4.0.0
-    ${LBANN_DIR}/scripts/build_lbann_lc.sh --compiler clang ${DEBUG} --reconfigure
-fi
-
-if [ "${COMPILER}" == 'intel' ]; then
-    module load intel/18.0.0
-    ${LBANN_DIR}/scripts/build_lbann_lc.sh --compiler intel ${DEBUG} --reconfigure
-fi
-
-if [ "${COMPILER}" == 'gcc4' ]; then
-    module load gcc/4.9.3
-    ${LBANN_DIR}/scripts/build_lbann_lc.sh --compiler gnu ${DEBUG} --reconfigure
-fi
-
-if [ "${COMPILER}" == 'gcc7' ]; then
-    module load gcc/7.1.0
-    ${LBANN_DIR}/scripts/build_lbann_lc.sh --compiler gnu ${DEBUG} --reconfigure
-fi
+LBANN_DIR=$(git rev-parse --show-toplevel)
+${LBANN_DIR}/scripts/build_lbann_lc.sh --with-conduit
diff --git a/bamboo/compiler_tests/build_script_specific.sh b/bamboo/compiler_tests/build_script_specific.sh
new file mode 100755
index 00000000000..975d58ac4a1
--- /dev/null
+++ b/bamboo/compiler_tests/build_script_specific.sh
@@ -0,0 +1,53 @@
+set -e
+CLUSTER=$(hostname | sed 's/\([a-zA-Z][a-zA-Z]*\)[0-9]*/\1/g')
+LBANN_DIR=$(git rev-parse --show-toplevel)
+DEBUG=''
+if [ "${CLUSTER}" != 'surface' ]; then
+    source /usr/share/lmod/lmod/init/bash
+    source /etc/profile.d/00-modulepath.sh
+fi
+
+while :; do
+    case ${1} in
+        --compiler)
+            # Choose compiler
+            if [ -n "${2}" ]; then
+                COMPILER=${2}
+                shift
+            else
+                echo "\"${1}\" option requires a non-empty option argument" >&2
+                exit 1
+            fi
+            ;;
+
+        -d|--debug)
+            # Debug mode
+            DEBUG='--debug'
+            ;;
+        *)
+            # Break loop if there are no more options
+            break
+
+    esac
+    shift
+done
+
+if [ "${COMPILER}" == 'clang4' ]; then
+    module load clang/4.0.0
+    ${LBANN_DIR}/scripts/build_lbann_lc.sh --compiler clang ${DEBUG} --reconfigure --with-conduit
+fi
+
+if [ "${COMPILER}" == 'intel18' ]; then
+    module load intel/18.0.0
+    ${LBANN_DIR}/scripts/build_lbann_lc.sh --compiler intel ${DEBUG} --reconfigure --with-conduit
+fi
+
+if [ "${COMPILER}" == 'gcc4' ]; then
+    module load gcc/4.9.3
+    ${LBANN_DIR}/scripts/build_lbann_lc.sh --compiler gnu ${DEBUG} --reconfigure --with-conduit
+fi
+
+if [ "${COMPILER}" == 'gcc7' ]; then
+    module load gcc/7.1.0
+    ${LBANN_DIR}/scripts/build_lbann_lc.sh --compiler gnu ${DEBUG} --reconfigure --with-conduit
+fi
diff --git a/bamboo/compiler_tests/conftest.py b/bamboo/compiler_tests/conftest.py
index 6e07162c5d3..238b812e638 100644
--- a/bamboo/compiler_tests/conftest.py
+++ b/bamboo/compiler_tests/conftest.py
@@ -1,18 +1,23 @@
 import pytest
 import re, subprocess
 
+
 def pytest_addoption(parser):
-    cluster = re.sub('[0-9]+', '', subprocess.check_output('hostname'.split()).strip())
-    default_dirname = subprocess.check_output('git rev-parse --show-toplevel'.split()).strip()
+    cluster = re.sub('[0-9]+', '', subprocess.check_output(
+        'hostname'.split()).strip())
+    default_dirname = subprocess.check_output(
+        'git rev-parse --show-toplevel'.split()).strip()
     parser.addoption('--cluster', action='store', default=cluster,
                      help='--cluster=<cluster> to specify the cluster being run on, for the purpose of determing which commands to use. Default the current cluster')
     parser.addoption('--dirname', action='store', default=default_dirname,
                      help='--dirname specifies the top-level directory')
 
+
 @pytest.fixture
 def cluster(request):
     return request.config.getoption('--cluster')
-    
+
+
 @pytest.fixture
 def dirname(request):
     return request.config.getoption('--dirname')
diff --git a/bamboo/compiler_tests/test_compiler.py b/bamboo/compiler_tests/test_compiler.py
index 383c8701832..5682d11f3af 100644
--- a/bamboo/compiler_tests/test_compiler.py
+++ b/bamboo/compiler_tests/test_compiler.py
@@ -1,109 +1,167 @@
+# import sys
+# sys.path.insert(0, '../common_python')
+# import tools
 import pytest
 import os, re, subprocess
 
-def build_script(cluster, dirname, compiler, debug):
-    if debug:
-        build = 'debug'
+
+def test_compiler_build_script(cluster, dirname):
+    if cluster in ['pascal']:
+        output_file_name = '%s/bamboo/compiler_tests/output/build_script_output.txt' % (dirname)
+        error_file_name = '%s/bamboo/compiler_tests/error/build_script_error.txt' % (dirname)
+        command = '%s/bamboo/compiler_tests/build_script.sh > %s 2> %s' % (
+            dirname, output_file_name, error_file_name)
+        return_code = os.system(command)
+        if return_code != 0:
+            output_file = open(output_file_name, 'r')
+            for line in output_file:
+                print('%s: %s' % (output_file_name, line))
+            error_file = open(error_file_name, 'r')
+            for line in error_file:
+                print('%s: %s' % (error_file_name, line))
+        assert return_code == 0
     else:
-        build = 'release'
-    output_file_name = '%s/bamboo/compiler_tests/output/%s_%s_%s_output.txt' % (dirname, cluster, compiler, build)
-    error_file_name = '%s/bamboo/compiler_tests/error/%s_%s_%s_error.txt' % (dirname, cluster, compiler, build)
-    command = '%s/bamboo/compiler_tests/build_script.sh --compiler %s %s> %s 2> %s' % (dirname, compiler, debug, output_file_name, error_file_name)
-    return_code = os.system(command)
-    if return_code != 0:
-        output_file = open(output_file_name, 'r')
-        for line in output_file:
-            print('%s: %s' % (output_file_name, line))
-        error_file = open(error_file_name, 'r')
-        for line in error_file:
-            print('%s: %s' % (error_file_name, line))
-    assert return_code == 0
+        e = 'test_compiler_build_script: Unsupported Cluster %s' % cluster
+        print('Skip - ' + e)
+        pytest.skip(e)
+
 
 def test_compiler_clang4_release(cluster, dirname):
-    #skeleton_clang4(cluster, dirname, False)
-    if cluster in ['ray', 'catalyst']:
-        build_script(cluster, dirname, 'clang', '')
-    else:
-        pytest.skip('Unsupported Cluster %s' % cluster)
+    try:
+        skeleton_clang4(cluster, dirname, False)
+    except AssertionError as e:
+        print(e)
+        build_script(cluster, dirname, 'clang4', False)
+    path = '%s/bamboo/compiler_tests/builds/%s_clang-4.0.0_rel/build/model_zoo/lbann' % (dirname, cluster)
+    if not os.path.exists(path):
+        path = '%s/build/clang.Release.%s.llnl.gov/install/bin/lbann' % (dirname, cluster)
+        assert os.path.exists(path)
+
 
 def test_compiler_clang4_debug(cluster, dirname):
-    #skeleton_clang4(cluster, dirname, True)
-    if cluster in ['ray', 'catalyst']:
-        build_script(cluster, dirname, 'clang', '--debug')
-    else:
-        pytest.skip('Unsupported Cluster %s' % cluster)
+    try:
+        skeleton_clang4(cluster, dirname, True)
+    except AssertionError as e:
+        print(e)
+        build_script(cluster, dirname, 'clang4', True)
+    path = '%s/bamboo/compiler_tests/builds/%s_clang-4.0.0_debug/build/model_zoo/lbann' % (dirname, cluster)
+    if not os.path.exists(path):
+        path = '%s/build/clang.Debug.%s.llnl.gov/install/bin/lbann' % (dirname, cluster)
+        assert os.path.exists(path)
+
 
 def test_compiler_gcc4_release(cluster, dirname):
-    #skeleton_gcc4(cluster, dirname, False)
-    build_script(cluster, dirname, 'gcc4', '')
+    try:
+        skeleton_gcc4(cluster, dirname, False)
+    except AssertionError as e:
+        print(e)
+        build_script(cluster, dirname, 'gcc4', False)
+    path = '%s/bamboo/compiler_tests/builds/%s_gcc-4.9.3_rel/build/model_zoo/lbann' % (dirname, cluster)
+    assert os.path.exists(path)
+
 
 def test_compiler_gcc4_debug(cluster, dirname):
-    #skeleton_gcc4(cluster, dirname, True)
-    build_script(cluster, dirname, 'gcc4', '--debug')
+    try:
+        skeleton_gcc4(cluster, dirname, True)
+    except AssertionError as e:
+        print(e)
+        build_script(cluster, dirname, 'gcc4', True)
+    path = '%s/bamboo/compiler_tests/builds/%s_gcc-4.9.3_debug/build/model_zoo/lbann' % (dirname, cluster)
+    assert os.path.exists(path)
+
 
 def test_compiler_gcc7_release(cluster, dirname):
-    #skeleton_gcc7(cluster, dirname, False)
-    if cluster == 'catalyst':
-        build_script(cluster, dirname, 'gcc7', '')
-    else:
-        pytest.skip('Unsupported Cluster %s' % cluster)
+    try:
+        skeleton_gcc7(cluster, dirname, False)
+    except AssertionError as e:
+        print(e)
+        build_script(cluster, dirname, 'gcc7', False)
+    path = '%s/bamboo/compiler_tests/builds/%s_gcc-7.1.0_rel/build/model_zoo/lbann' % (dirname, cluster)
+    if not os.path.exists(path):
+        path = '%s/build/gnu.Release.%s.llnl.gov/install/bin/lbann' % (dirname, cluster)
+        assert os.path.exists(path)
+
 
 def test_compiler_gcc7_debug(cluster, dirname):
-    #skeleton_gcc7(cluster, dirname, True)
-    if cluster == 'catalyst':
-        build_script(cluster, dirname, 'gcc7', '--debug')
-    else:
-        pytest.skip('Unsupported Cluster %s' % cluster)
+    try:
+        skeleton_gcc7(cluster, dirname, True)
+    except AssertionError as e:
+        print(e)
+        build_script(cluster, dirname, 'gcc7', True)
+    path = '%s/bamboo/compiler_tests/builds/%s_gcc-7.1.0_debug/build/model_zoo/lbann' % (dirname, cluster)
+    if not os.path.exists(path):
+        path = '%s/build/gnu.Debug.%s.llnl.gov/install/bin/lbann' % (dirname, cluster)
+        assert os.path.exists(path)
+
 
 def test_compiler_intel18_release(cluster, dirname):
-    #skeleton_intel18(cluster, dirname, False)
-    if cluster == 'catalyst':
-        build_script(cluster, dirname, 'intel', '')
-    else:
-        pytest.skip('Unsupported Cluster %s' % cluster)
+    try:
+        skeleton_intel18(cluster, dirname, False)
+    except AssertionError as e:
+        print(e)
+        build_script(cluster, dirname, 'intel18', False)
+    path = '%s/bamboo/compiler_tests/builds/%s_intel-18.0.0_rel/build/model_zoo/lbann' % (dirname, cluster)
+    if not os.path.exists(path):
+        path = '%s/build/intel.Release.%s.llnl.gov/install/bin/lbann' % (dirname, cluster)
+        assert os.path.exists(path)
+
 
 def test_compiler_intel18_debug(cluster, dirname):
-    #skeleton_intel18(cluster, dirname, True)
-    if cluster == 'catalyst':
-        build_script(cluster, dirname, 'intel', '--debug')
-    else:
-        pytest.skip('Unsupported Cluster %s' % cluster)
+    try:
+        skeleton_intel18(cluster, dirname, True)
+    except AssertionError as e:
+        print(e)
+        build_script(cluster, dirname, 'intel18', True)
+    path = '%s/bamboo/compiler_tests/builds/%s_intel-18.0.0_debug/build/model_zoo/lbann' % (dirname, cluster)
+    if not os.path.exists(path):
+        path = '%s/build/intel.Debug.%s.llnl.gov/install/bin/lbann' % (dirname, cluster)
+        assert os.path.exists(path)
+
 
 def skeleton_clang4(cluster, dir_name, debug, should_log=False):
     if cluster in ['catalyst', 'quartz']:
         spack_skeleton(dir_name, 'clang@4.0.0', 'mvapich2@2.2', debug, should_log)
         build_skeleton(dir_name, 'clang@4.0.0', debug, should_log)
     else:
-        pytest.skip('Unsupported Cluster %s' % cluster)
+        e = 'skeleton_clang4: Unsupported Cluster %s' % cluster
+        print('Skip - ' + e)
+        pytest.skip(e)
+
 
 def skeleton_gcc4(cluster, dir_name, debug, should_log=False):
-    if cluster in ['catalyst', 'quartz', 'ray']:
-        if cluster in ['catalyst','quartz']:
-            mpi = 'mvapich2@2.2'
-        elif cluster in  ['pascal', 'surface']:
-            mpi = 'mvapich2@2.2+cuda'
-        elif cluster == 'ray':
-            mpi = 'spectrum-mpi@2018.04.27'
-        else:
-            raise Exception('Unsupported Cluster %s' % cluster)
-        spack_skeleton(dir_name, 'gcc@4.9.3', mpi, debug, should_log)
-        build_skeleton(dir_name, 'gcc@4.9.3', debug, should_log)
+    if cluster in ['quartz']:  # Taking out 'catalyst'
+        mpi = 'mvapich2@2.2'
+    elif cluster in ['surface']:  # Taking out 'pascal'
+        mpi = 'mvapich2@2.2+cuda'
+    elif cluster == 'ray':
+        mpi = 'spectrum-mpi@2018.04.27'
     else:
-        pytest.skip('Unsupported Cluster %s' % cluster)
+        e = 'skeleton_gcc4: Unsupported Cluster %s' % cluster
+        print('Skip - ' + e)
+        pytest.skip(e)
+    spack_skeleton(dir_name, 'gcc@4.9.3', mpi, debug, should_log)
+    build_skeleton(dir_name, 'gcc@4.9.3', debug, should_log)
+
 
 def skeleton_gcc7(cluster, dir_name, debug, should_log=False):
     if cluster in ['catalyst', 'quartz']:
         spack_skeleton(dir_name, 'gcc@7.1.0', 'mvapich2@2.2', debug, should_log)
         build_skeleton(dir_name, 'gcc@7.1.0', debug, should_log)
     else:
-        pytest.skip('Unsupported Cluster %s' % cluster)
+        e = 'skeleton_gcc7: Unsupported Cluster %s' % cluster
+        print('Skip - ' + e)
+        pytest.skip(e)
+
 
 def skeleton_intel18(cluster, dir_name, debug, should_log=False):
-    if cluster in ['catalyst', 'quartz']:
+    if cluster in ['quartz']:  # Taking out 'catalyst'
         spack_skeleton(dir_name, 'intel@18.0.0', 'mvapich2@2.2', debug, should_log)
         build_skeleton(dir_name, 'intel@18.0.0', debug, should_log)
     else:
-        pytest.skip('Unsupported Cluster %s' % cluster)
+        e = 'skeleton_intel18: Unsupported Cluster %s' % cluster
+        print('Skip - ' + e)
+        pytest.skip(e)
+
 
 def spack_skeleton(dir_name, compiler, mpi_lib, debug, should_log):
     compiler_underscored = re.sub('[@\.]', '_', compiler)
@@ -130,6 +188,7 @@ def spack_skeleton(dir_name, compiler, mpi_lib, debug, should_log):
             print('%s: %s' % (error_file_name, line))
     assert return_code == 0
 
+
 def build_skeleton(dir_name, compiler, debug, should_log):
     compiler_underscored = re.sub('[@\.]', '_', compiler)
     if debug:
@@ -142,7 +201,8 @@ def build_skeleton(dir_name, compiler, debug, should_log):
     #mpi_lib = mpi_lib.replace('@', '-')
     cluster = re.sub('[0-9]+', '', subprocess.check_output('hostname'.split()).strip())
     # For reference:
-    # Commenting out for now. These additions to path name will likely return one day, so I am not removing them entirely
+    # Commenting out for now. These additions to path name will likely return
+    # one day, so I am not removing them entirely.
     # x86_64 <=> catalyst, pascal, quartz, surface
     # ppc64le <=> ray
     #architecture = subprocess.check_output('uname -m'.split()).strip()
@@ -164,3 +224,27 @@ def build_skeleton(dir_name, compiler, debug, should_log):
         for line in error_file:
             print('%s: %s' % (error_file_name, line))
     assert return_code == 0
+
+
+def build_script(cluster, dirname, compiler, debug):
+    print(('Running build_script for cluster={cluster},'
+           ' compiler={compiler}, debug={debug}.').format(
+        cluster=cluster, compiler=compiler, debug=debug))
+    if debug:
+        build = 'debug'
+        debug_flag = '--debug'
+    else:
+        build = 'release'
+        debug_flag = ''
+    output_file_name = '%s/bamboo/compiler_tests/output/%s_%s_%s_build_script_output.txt' % (dirname, cluster, compiler, build)
+    error_file_name = '%s/bamboo/compiler_tests/error/%s_%s_%s_build_script_error.txt' % (dirname, cluster, compiler, build)
+    command = '%s/bamboo/compiler_tests/build_script_specific.sh --compiler %s %s> %s 2> %s' % (dirname, compiler, debug_flag, output_file_name, error_file_name)
+    return_code = os.system(command)
+    if return_code != 0:
+        output_file = open(output_file_name, 'r')
+        for line in output_file:
+            print('%s: %s' % (output_file_name, line))
+        error_file = open(error_file_name, 'r')
+        for line in error_file:
+            print('%s: %s' % (error_file_name, line))
+    assert return_code == 0
diff --git a/bamboo/integration_tests/common_code.py b/bamboo/integration_tests/common_code.py
index 7a3cea95c71..0d0a4dda68e 100644
--- a/bamboo/integration_tests/common_code.py
+++ b/bamboo/integration_tests/common_code.py
@@ -3,6 +3,7 @@
 import tools
 import collections, csv, os, pprint, re, time
 
+
 # Set up the command ##########################################################
 def get_command(cluster, dir_name, model_folder, model_name, executable,
                 output_file_name, error_file_name, compiler_name, weekly=False):
@@ -23,7 +24,8 @@ def get_command(cluster, dir_name, model_folder, model_name, executable,
             optimizer_name='adagrad', output_file_name=output_file_name,
             error_file_name=error_file_name)
     elif model_name in ['conv_autoencoder_mnist', 'lenet_mnist']:
-        if (model_name == 'lenet_mnist') and (compiler_name in ['clang4', 'intel18']):
+        if (model_name == 'lenet_mnist') and \
+                (compiler_name in ['clang4', 'intel18']):
             partition = 'pbatch'
             time_limit = 600
         else:
@@ -33,10 +35,10 @@ def get_command(cluster, dir_name, model_folder, model_name, executable,
             num_processes = 20
         else:
             num_processes = 2
-	command = tools.get_command(
+        command = tools.get_command(
             cluster=cluster, executable=executable, num_nodes=1,
-            partition=partition, time_limit=time_limit, num_processes=num_processes,
-            dir_name=dir_name,
+            partition=partition, time_limit=time_limit,
+            num_processes=num_processes, dir_name=dir_name,
             data_filedir_default='/p/lscratchh/brainusr/datasets/MNIST',
             data_reader_name='mnist', model_folder=model_folder,
             model_name=model_name, num_epochs=5, optimizer_name='adagrad',
@@ -47,18 +49,23 @@ def get_command(cluster, dir_name, model_folder, model_name, executable,
 
 # Run LBANN ###################################################################
 
-def run_lbann(command, model_name, output_file_name, error_file_name, should_log=False):
+
+def run_lbann(command, model_name, output_file_name, error_file_name,
+              should_log=False):
     print('About to run: %s' % command)
-    print('%s began waiting in the queue at ' % model_name + time.strftime('%H:%M:%S', time.localtime()))
+    print('%s began waiting in the queue at ' % model_name +
+          time.strftime('%H:%M:%S', time.localtime()))
     output_value = os.system(command)
-    print('%s finished at ' % model_name + time.strftime('%H:%M:%S', time.localtime()))
+    print('%s finished at ' % model_name +
+          time.strftime('%H:%M:%S', time.localtime()))
     lbann_exceptions = []
     timed_out = False
     if should_log or (output_value != 0):
         output_file = open(output_file_name, 'r')
         for line in output_file:
             print('%s: %s' % (output_file_name, line))
-            is_match = re.search('This lbann_exception is about to be thrown:(.*)', line)
+            is_match = re.search(
+                'This lbann_exception is about to be thrown:(.*)', line)
             if is_match:
                 lbann_exceptions.append(is_match.group(1))
             is_match = re.search('CANCELLED AT (.*) DUE TO TIME LIMIT', line)
@@ -67,15 +74,22 @@ def run_lbann(command, model_name, output_file_name, error_file_name, should_log
         error_file = open(error_file_name, 'r')
         for line in error_file:
             print('%s: %s' % (error_file_name, line))
+            is_match = re.search('LBANN error on (.*)', line)
+            if is_match:
+                lbann_exceptions.append(is_match.group(1))
     if output_value != 0:
-        error_string = 'Model %s crashed with output_value=%d, timed_out=%s, and lbann exceptions=%s. Command was: %s' % (
-            model_name, output_value, str(timed_out), str(collections.Counter(lbann_exceptions)), command)
+        error_string = ('Model %s crashed with output_value=%d, timed_out=%s,'
+                        ' and lbann exceptions=%s. Command was: %s') % (
+            model_name, output_value, str(timed_out),
+            str(collections.Counter(lbann_exceptions)), command)
         raise Exception(error_string)
     return output_value
 
 # Extract data from output ####################################################
 
-def populate_data_dict_epoch(regex, line, data_field, data_fields, data_dict, model_id):
+
+def populate_data_dict_epoch(regex, line, data_field, data_fields, data_dict,
+                             model_id):
     is_match = re.search(regex, line)
     if is_match and (data_field in data_fields):
         if model_id not in data_dict[data_field].keys():
@@ -84,7 +98,9 @@ def populate_data_dict_epoch(regex, line, data_field, data_fields, data_dict, mo
         value = float(is_match.group(2))
         data_dict[data_field][model_id][epoch_id] = value
 
-def populate_data_dict_overall(regex, line, data_field, data_fields, data_dict, model_id):
+
+def populate_data_dict_overall(regex, line, data_field, data_fields, data_dict,
+                               model_id):
     is_match = re.search(regex, line)
     if is_match and (data_field in data_fields):
         if model_id not in data_dict[data_field].keys():
@@ -92,6 +108,7 @@ def populate_data_dict_overall(regex, line, data_field, data_fields, data_dict,
         value = float(is_match.group(1))
         data_dict[data_field][model_id]['overall'] = value
 
+
 # data_dict[data_field][model_id][epoch_id] = float
 # data_fields is the list or set of data we're interested in.
 def extract_data(output_file_name, data_fields, should_log):
@@ -102,24 +119,31 @@ def extract_data(output_file_name, data_fields, should_log):
 
     for line in output_file:
         if should_log:
-            print('%s: %s' % (output_file_name, line))
+            print('extract_data: %s: %s' % (output_file_name, line))
 
         # Check if line is reporting model results
         is_model = re.search('^Model ([0-9]+)', line)
+        if not is_model:
+            is_model = re.search('^model([0-9]+)', line)
         if is_model:
+            print('extract_data: is_model={is_model}'.format(is_model=is_model))
             model_id = is_model.group(1)
 
             regex = 'training epoch ([0-9]+) objective function : ([0-9.]+)'
             data_field = 'training_objective_function'
-            populate_data_dict_epoch(regex, line, data_field, data_fields, data_dict, model_id)
+            populate_data_dict_epoch(regex, line, data_field, data_fields,
+                                     data_dict, model_id)
 
             regex = 'training epoch ([0-9]+) run time : ([0-9.]+)'
             data_field = 'training_run_time'
-            populate_data_dict_epoch(regex, line, data_field, data_fields, data_dict, model_id)
+            populate_data_dict_epoch(regex, line, data_field, data_fields,
+                                     data_dict, model_id)
 
             regex = 'training epoch ([0-9]+) mini-batch time statistics : ([0-9.]+)s mean, ([0-9.]+)s max, ([0-9.]+)s min, ([0-9.]+)s stdev'
             is_match = re.search(regex, line)
             if is_match:
+                print('extract_data: is_mini-batch time statistics={is_match}'.format(
+                    is_match=is_match))
                 epoch_id = is_match.group(1)
                 mean_value = float(is_match.group(2))
                 max_value = float(is_match.group(3))
@@ -129,53 +153,66 @@ def extract_data(output_file_name, data_fields, should_log):
                 if data_field in data_fields:
                     if model_id not in data_dict[data_field].keys():
                         data_dict[data_field][model_id] = {}
+                    print('extract_data: mean_value={mv}'.format(mv=mean_value))
                     data_dict[data_field][model_id][epoch_id] = mean_value
                 data_field = 'training_max'
                 if data_field in data_fields:
                     if model_id not in data_dict[data_field].keys():
                         data_dict[data_field][model_id] = {}
+                    print('extract_data: max_value={mv}'.format(mv=max_value))
                     data_dict[data_field][model_id][epoch_id] = max_value
                 data_field = 'training_min'
                 if data_field in data_fields:
                     if model_id not in data_dict[data_field].keys():
                         data_dict[data_field][model_id] = {}
+                    print('extract_data: min_value={mv}'.format(mv=min_value))
                     data_dict[data_field][model_id][epoch_id] = min_value
                 data_field = 'training_stdev'
                 if data_field in data_fields:
                     if model_id not in data_dict[data_field].keys():
                         data_dict[data_field][model_id] = {}
+                    print('extract_data: stdev={sv}'.format(sv=stdev_value))
                     data_dict[data_field][model_id][epoch_id] = stdev_value
 
             regex = 'test categorical accuracy : ([0-9.]+)'
             data_field = 'test_accuracy'
-            populate_data_dict_overall(regex, line, data_field, data_fields, data_dict, model_id)
+            populate_data_dict_overall(regex, line, data_field, data_fields,
+                                       data_dict, model_id)
     output_file.close()
     if should_log:
+        print('extract_data: Extracted Data below:')
         pprint.pprint(data_dict)
     return data_dict
 
 # Skeleton ####################################################################
 
-def skeleton(cluster, dir_name, executable, model_folder, model_name, data_fields, should_log, compiler_name=None, weekly=False):
-    if compiler_name == None:
+
+def skeleton(cluster, dir_name, executable, model_folder, model_name,
+             data_fields, should_log, compiler_name=None, weekly=False):
+    if compiler_name is None:
         output_file_name = '%s/bamboo/integration_tests/output/%s_output.txt' % (dir_name, model_name)
         error_file_name = '%s/bamboo/integration_tests/error/%s_error.txt' % (dir_name, model_name)
     else:
-        output_file_name = '%s/bamboo/integration_tests/output/%s_%s_output.txt' %(dir_name, model_name, compiler_name)
-        error_file_name = '%s/bamboo/integration_tests/error/%s_%s_error.txt' %(dir_name, model_name, compiler_name)
-    command = get_command(cluster, dir_name, model_folder, model_name, executable, output_file_name, error_file_name, compiler_name, weekly=weekly)
-    run_lbann(command, model_name, output_file_name, error_file_name, should_log) # Don't need return value
+        output_file_name = '%s/bamboo/integration_tests/output/%s_%s_output.txt' % (dir_name, model_name, compiler_name)
+        error_file_name = '%s/bamboo/integration_tests/error/%s_%s_error.txt' % (dir_name, model_name, compiler_name)
+    command = get_command(
+        cluster, dir_name, model_folder, model_name, executable,
+        output_file_name, error_file_name, compiler_name, weekly=weekly)
+    run_lbann(command, model_name, output_file_name,
+              error_file_name, should_log)  # Don't need return value
     return extract_data(output_file_name, data_fields, should_log)
 
 # Misc. functions  ############################################################
 
+
 # csv_dict[row_header][column_header] = float
 def csv_to_dict(csv_path):
-  with open(csv_path, 'r') as csv_file:
-    reader = csv.reader(csv_file, skipinitialspace=True)
-    column_headers = reader.next()
-    values = {}
-    for row in reader:
-      row_header = row[0]
-      values[row_header] = dict(zip(column_headers[1:], map(float, row[1:])))
-  return values
+    with open(csv_path, 'r') as csv_file:
+        reader = csv.reader(csv_file, skipinitialspace=True)
+        column_headers = reader.next()
+        values = {}
+        for row in reader:
+            row_header = row[0]
+            values[row_header] = dict(
+                zip(column_headers[1:], map(float, row[1:])))
+    return values
diff --git a/bamboo/integration_tests/conftest.py b/bamboo/integration_tests/conftest.py
index 4039eeb7dac..da2ffc127be 100644
--- a/bamboo/integration_tests/conftest.py
+++ b/bamboo/integration_tests/conftest.py
@@ -1,31 +1,15 @@
-import pytest, os, re, subprocess
+import sys
+sys.path.insert(0, '../common_python')
+import tools
+import pytest, re, subprocess
 
-def pytest_addoption(parser):
-    cluster = re.sub('[0-9]+', '', subprocess.check_output('hostname'.split()).strip())
-    default_dirname = subprocess.check_output('git rev-parse --show-toplevel'.split()).strip()
-    default_exes = {}
-    default_exes['default'] = '%s/build/gnu.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster)
-    if cluster in ['catalyst', 'quartz']:
-        default_exes['clang4'] = '%s/build/clang.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster) #'%s/bamboo/compiler_tests/builds/%s_clang-4.0.0_rel/build/model_zoo/lbann' % (default_dirname, cluster)
-        #default_exes['gcc4'] = '%s/bamboo/compiler_tests/builds/%s_gcc-4.9.3_rel/build/model_zoo/lbann' % (default_dirname, cluster)
-        default_exes['gcc7'] = '%s/build/gnu.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster) #'%s/bamboo/compiler_tests/builds/%s_gcc-7.1.0_rel/build/model_zoo/lbann' % (default_dirname, cluster)
-        default_exes['intel18'] = '%s/build/intel.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster) #'%s/bamboo/compiler_tests/builds/%s_intel-18.0.0_rel/build/model_zoo/lbann' % (default_dirname, cluster)
-
-        default_exes['clang4_debug'] = '%s/build/clang.Debug.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster) #'%s/bamboo/compiler_tests/builds/%s_clang-4.0.0_debug/build/model_zoo/lbann' % (default_dirname, cluster)
-        #default_exes['gcc4_debug'] = '%s/bamboo/compiler_tests/builds/%s_gcc-4.9.3_debug/build/model_zoo/lbann' % (default_dirname, cluster)
-        default_exes['gcc7_debug'] = '%s/build/gnu.Debug.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster) #'%s/bamboo/compiler_tests/builds/%s_gcc-7.1.0_debug/build/model_zoo/lbann' % (default_dirname, cluster)
-        default_exes['intel18_debug'] = '%s/build/intel.Debug.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster) #'%s/bamboo/compiler_tests/builds/%s_intel-18.0.0_debug/build/model_zoo/lbann' % (default_dirname, cluster)
-
-    if cluster == 'ray':
-        default_exes['clang4'] = '%s/build/clang.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster)
-        default_exes['gcc4'] = '%s/build/gnu.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster) #'%s/bamboo/compiler_tests/builds/%s_gcc-4.9.3_rel/build/model_zoo/lbann' % (default_dirname, cluster)
 
-        default_exes['clang4_debug'] = '%s/build/clang.Debug.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster)
-        default_exes['gcc4_debug'] = '%s/build/gnu.Debug.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster) #'%s/bamboo/compiler_tests/builds/%s_gcc-4.9.3_debug/build/model_zoo/lbann' % (default_dirname, cluster)
-
-    if cluster in ['surface', 'pascal']:
-        default_exes['gcc4'] = default_exes['default']
-        default_exes['gcc4_debug'] = '%s/build/gnu.Debug.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster)
+def pytest_addoption(parser):
+    cluster = re.sub('[0-9]+', '', subprocess.check_output(
+        'hostname'.split()).strip())
+    default_dirname = subprocess.check_output(
+        'git rev-parse --show-toplevel'.split()).strip()
+    default_exes = tools.get_default_exes(default_dirname, cluster)
 
     parser.addoption('--cluster', action='store', default=cluster,
                      help='--cluster=<cluster> to specify the cluster being run on, for the purpose of determing which commands to use. Default the current cluster')
@@ -40,26 +24,32 @@ def pytest_addoption(parser):
     # For local testing only
     parser.addoption('--exe', action='store', help='--exe=<hand-picked executable>')
 
+
 @pytest.fixture
 def cluster(request):
     return request.config.getoption('--cluster')
 
+
 @pytest.fixture
 def debug(request):
     return request.config.getoption('--debug')
 
+
 @pytest.fixture
 def dirname(request):
     return request.config.getoption('--dirname')
 
+
 @pytest.fixture
 def exes(request):
     return request.config.getoption('--exes')
 
+
 @pytest.fixture
 def weekly(request):
     return request.config.getoption('--weekly')
 
+
 @pytest.fixture
 def exe(request):
     return request.config.getoption('--exe')
diff --git a/bamboo/integration_tests/expected_values/catalyst/clang4/expected_performance.csv b/bamboo/integration_tests/expected_values/catalyst/clang4/expected_performance.csv
index 2234e14d5ab..32551e8e70b 100644
--- a/bamboo/integration_tests/expected_values/catalyst/clang4/expected_performance.csv
+++ b/bamboo/integration_tests/expected_values/catalyst/clang4/expected_performance.csv
@@ -1,5 +1,5 @@
 Model_name,      training_run_time, training_mean, training_max, training_min, training_stdev, test_accuracy
-alexnet_nightly, 353.48,            7.07,          21.57,        1.24,         4.21,           0.00
-alexnet_weekly,  882.26,            1.78,          4.68,         0.95,         0.21,           2.49
-cache_alexnet,   623.30,            1.27,          4.98,         0.66,         2.24,           0.57
-lenet_mnist,     33.55,             0.04,          0.09,         0.04,         0.01,           98.96
+alexnet_nightly, 56.00,             1.20,          5.00,         0.80,         0.40,           0.00
+alexnet_weekly,  0.00,              0.00,          0.00,         0.00,         0.00,           100.00
+cache_alexnet,   0.00,              0.00,          0.00,         0.00,         0.00,           100.00
+lenet_mnist,     88.00,             0.12,          0.40,         0.10,         0.09,           98.40
diff --git a/bamboo/integration_tests/expected_values/catalyst/gcc4/expected_conv_autoencoder_mnist_objective_functions.csv b/bamboo/integration_tests/expected_values/catalyst/gcc4/expected_conv_autoencoder_mnist_objective_functions.csv
deleted file mode 100644
index 80c12b2b0ed..00000000000
--- a/bamboo/integration_tests/expected_values/catalyst/gcc4/expected_conv_autoencoder_mnist_objective_functions.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-Epoch_number, training_objective_function
-0,            0.207480
-1,            0.194710
-2,            0.193224
-3,            0.192867
-4,            0.192758
diff --git a/bamboo/integration_tests/expected_values/catalyst/gcc4/expected_performance.csv b/bamboo/integration_tests/expected_values/catalyst/gcc4/expected_performance.csv
deleted file mode 100644
index 639e20aa5f4..00000000000
--- a/bamboo/integration_tests/expected_values/catalyst/gcc4/expected_performance.csv
+++ /dev/null
@@ -1,5 +0,0 @@
-Model_name,      training_run_time, training_mean, training_max, training_min, training_stdev, test_accuracy
-alexnet_nightly, 63.18,             1.27,          3.11,         0.79,         0.55,           0.00
-alexnet_weekly,  565.30,            1.14,          3.83,         0.76,         0.30,           3.11
-cache_alexnet,   623.30,            1.27,          4.98,         0.66,         2.24,           0.57
-lenet_mnist,     15.61,             0.02,          0.08,         0.02,         0.01,           98.96
diff --git a/bamboo/integration_tests/expected_values/catalyst/gcc7/expected_performance.csv b/bamboo/integration_tests/expected_values/catalyst/gcc7/expected_performance.csv
index b315574f51d..d3ac7caa6b4 100644
--- a/bamboo/integration_tests/expected_values/catalyst/gcc7/expected_performance.csv
+++ b/bamboo/integration_tests/expected_values/catalyst/gcc7/expected_performance.csv
@@ -1,5 +1,5 @@
 Model_name,      training_run_time, training_mean, training_max, training_min, training_stdev, test_accuracy
-alexnet_nightly, 71.14,             1.43,          3.20,         0.98,         0.51,           0.00
-alexnet_weekly,  691.96,            1.40,          4.53,         1.09,         0.22,           1.05
-cache_alexnet,   623.30,            1.27,          4.98,         0.66,         2.24,           0.57
-lenet_mnist,     15.51,             0.02,          0.06,         0.02,         0.01,           99.00
+alexnet_nightly, 57.00,             1.11,          4.80,        0.37,         1.20,           0.00
+alexnet_weekly,  0.00,              0.00,          0.00,        0.00,         0.00,           100.00
+cache_alexnet,   0.00,              0.00,          0.00,        0.00,         0.00,           100.00
+lenet_mnist,     64.00,             0.10,          0.40,        0.08,         0.04,           98.92
diff --git a/bamboo/integration_tests/expected_values/catalyst/intel18/expected_conv_autoencoder_imagenet_objective_functions.csv b/bamboo/integration_tests/expected_values/catalyst/intel18/expected_conv_autoencoder_imagenet_objective_functions.csv
deleted file mode 100644
index 003794fd557..00000000000
--- a/bamboo/integration_tests/expected_values/catalyst/intel18/expected_conv_autoencoder_imagenet_objective_functions.csv
+++ /dev/null
@@ -1,21 +0,0 @@
-Epoch_number, training_objective_function_nightly, training_objective_function_weekly
-0,            0.675652,                            0.608574
-1,            0.590008,                            0.590008
-2,            0.587484,                            0.587484
-3,            0.586305,                            0.586305
-4,            0.585585,                            0.585585
-5,            0.585036,                            0.585036
-6,            0.584688,                            0.584688
-7,            0.584348,                            0.584348
-8,            0.584041,                            0.584041
-9,            0.583865,                            0.583865
-10,           0.583665,                            0.583665
-11,           0.583521,                            0.583521
-12,           0.583303,                            0.583303
-13,           0.58328,                             0.58328
-14,           0.5832,                              0.5832
-15,           0.583134,                            0.583134
-16,           0.583052,                            0.583052
-17,           0.583039,                            0.583039
-18,           0.582954,                            0.582954
-19,           0.582936,                            0.582936
diff --git a/bamboo/integration_tests/expected_values/catalyst/intel18/expected_conv_autoencoder_mnist_objective_functions.csv b/bamboo/integration_tests/expected_values/catalyst/intel18/expected_conv_autoencoder_mnist_objective_functions.csv
deleted file mode 100644
index 80c12b2b0ed..00000000000
--- a/bamboo/integration_tests/expected_values/catalyst/intel18/expected_conv_autoencoder_mnist_objective_functions.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-Epoch_number, training_objective_function
-0,            0.207480
-1,            0.194710
-2,            0.193224
-3,            0.192867
-4,            0.192758
diff --git a/bamboo/integration_tests/expected_values/catalyst/intel18/expected_performance.csv b/bamboo/integration_tests/expected_values/catalyst/intel18/expected_performance.csv
deleted file mode 100644
index 4fc534169fe..00000000000
--- a/bamboo/integration_tests/expected_values/catalyst/intel18/expected_performance.csv
+++ /dev/null
@@ -1,5 +0,0 @@
-Model_name,      training_run_time, training_mean, training_max, training_min, training_stdev, test_accuracy
-alexnet_nightly, 49.54,             0.96,          3.21,         1.00,         0.62,           0.00
-alexnet_weekly,  402.50,            0.82,          3.36,         0.47,         0.34,           3.27
-cache_alexnet,   623.30,            1.27,          4.98,         0.66,         2.24,           0.57
-lenet_mnist,     20.02,             0.03,          0.09,         0.03,         0.01,           98.91
diff --git a/bamboo/integration_tests/expected_values/catalyst/gcc4/expected_conv_autoencoder_imagenet_objective_functions.csv b/bamboo/integration_tests/expected_values/pascal/gcc7/expected_conv_autoencoder_imagenet_objective_functions.csv
similarity index 100%
rename from bamboo/integration_tests/expected_values/catalyst/gcc4/expected_conv_autoencoder_imagenet_objective_functions.csv
rename to bamboo/integration_tests/expected_values/pascal/gcc7/expected_conv_autoencoder_imagenet_objective_functions.csv
diff --git a/bamboo/integration_tests/expected_values/pascal/gcc7/expected_conv_autoencoder_mnist_objective_functions.csv b/bamboo/integration_tests/expected_values/pascal/gcc7/expected_conv_autoencoder_mnist_objective_functions.csv
new file mode 100644
index 00000000000..8bcf25bb71d
--- /dev/null
+++ b/bamboo/integration_tests/expected_values/pascal/gcc7/expected_conv_autoencoder_mnist_objective_functions.csv
@@ -0,0 +1,6 @@
+Epoch_number, training_objective_function
+0,            0.207514
+1,            0.194710
+2,            0.193221
+3,            0.192864
+4,            0.192755
diff --git a/bamboo/integration_tests/expected_values/pascal/gcc7/expected_performance.csv b/bamboo/integration_tests/expected_values/pascal/gcc7/expected_performance.csv
new file mode 100644
index 00000000000..cca3451efd2
--- /dev/null
+++ b/bamboo/integration_tests/expected_values/pascal/gcc7/expected_performance.csv
@@ -0,0 +1,5 @@
+Model_name,      training_run_time, training_mean, training_max, training_min, training_stdev, test_accuracy
+alexnet_nightly, 51.00,             1.20,          4.00,         0.50,         0.40,           0.17
+alexnet_weekly,  0.00,              0.00,          0.00,         0.00,         0.00,           100.00
+cache_alexnet,   0.00,              0.00,          0.00,         0.00,         0.00,           100.00
+lenet_mnist,     9.00,              0.01,          6.00,         0.01,         0.40,           98.40
diff --git a/bamboo/integration_tests/expected_values/ray/gcc4/expected_conv_autoencoder_imagenet_objective_functions.csv b/bamboo/integration_tests/expected_values/ray/gcc4/expected_conv_autoencoder_imagenet_objective_functions.csv
deleted file mode 100644
index 28c4d8c9e98..00000000000
--- a/bamboo/integration_tests/expected_values/ray/gcc4/expected_conv_autoencoder_imagenet_objective_functions.csv
+++ /dev/null
@@ -1,21 +0,0 @@
-Epoch_number, training_objective_function_nightly, training_objective_function_weekly
-0,            0.608574,                            0.608574			   
-1,            0.590008,                            0.590008			   
-2,            0.587484,                            0.587484			   
-3,            0.586305,                            0.586305			   
-4,            0.585585,                            0.585585			   
-5,            0.585036,                            0.585036			   
-6,            0.584688,                            0.584688			   
-7,            0.584348,                            0.584348			   
-8,            0.584041,                            0.584041			   
-9,            0.583865,                            0.583865			   
-10,           0.583665,                            0.583665			   
-11,           0.583521,                            0.583521			   
-12,           0.583303,                            0.583303			   
-13,           0.58328,                             0.58328			   
-14,           0.5832,                              0.5832			   
-15,           0.583134,                            0.583134			   
-16,           0.583052,                            0.583052			   
-17,           0.583039,                            0.583039			   
-18,           0.582954,                            0.582954			   
-19,           0.582936,                            0.582936
diff --git a/bamboo/integration_tests/expected_values/ray/gcc4/expected_conv_autoencoder_mnist_objective_functions.csv b/bamboo/integration_tests/expected_values/ray/gcc4/expected_conv_autoencoder_mnist_objective_functions.csv
deleted file mode 100644
index 96a9ed9a8b7..00000000000
--- a/bamboo/integration_tests/expected_values/ray/gcc4/expected_conv_autoencoder_mnist_objective_functions.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-Epoch_number, training_objective_function
-0,            0.207587
-1,            0.194595
-2,            0.193141
-3,            0.192808
-4,            0.192716
diff --git a/bamboo/integration_tests/expected_values/ray/gcc4/expected_performance.csv b/bamboo/integration_tests/expected_values/ray/gcc4/expected_performance.csv
deleted file mode 100644
index e3331534d90..00000000000
--- a/bamboo/integration_tests/expected_values/ray/gcc4/expected_performance.csv
+++ /dev/null
@@ -1,5 +0,0 @@
-Model_name,      training_run_time, training_mean, training_max, training_min, training_stdev, test_accuracy
-alexnet_nightly, 47.42,             0.95,          3.34,         0.54,         0.59,           0.00
-alexnet_weekly,  623.30,            1.27,          4.98,         0.66,         2.24,           0.57
-cache_alexnet,   623.30,            1.27,          4.98,         0.66,         2.24,           0.57
-lenet_mnist,     260.85,            0.31,          0.88,         0.28,         0.03,           98.66
diff --git a/bamboo/integration_tests/expected_values/surface/gcc4/expected_conv_autoencoder_imagenet_objective_functions.csv b/bamboo/integration_tests/expected_values/surface/gcc4/expected_conv_autoencoder_imagenet_objective_functions.csv
deleted file mode 100644
index 32d30822dce..00000000000
--- a/bamboo/integration_tests/expected_values/surface/gcc4/expected_conv_autoencoder_imagenet_objective_functions.csv
+++ /dev/null
@@ -1,21 +0,0 @@
-Epoch_number, training_objective_function_nightly, training_objective_function_weekly
-0,            0.608574,                            0.608574
-1,            0.590008,                            0.590008
-2,            0.587484,                            0.587484
-3,            0.586305,                            0.586305
-4,            0.585585,                            0.585585
-5,            0.585036,                            0.585036
-6,            0.584688,                            0.584688
-7,            0.584348,                            0.584348
-8,            0.584041,                            0.584041
-9,            0.583865,                            0.583865
-10,           0.583665,                            0.583665
-11,           0.583521,                            0.583521
-12,           0.583303,                            0.583303
-13,           0.58328,                             0.58328
-14,           0.5832,                              0.5832
-15,           0.583134,                            0.583134
-16,           0.583052,                            0.583052
-17,           0.583039,                            0.583039
-18,           0.582954,                            0.582954
-19,           0.582936,                            0.582936
diff --git a/bamboo/integration_tests/expected_values/surface/gcc4/expected_conv_autoencoder_mnist_objective_functions.csv b/bamboo/integration_tests/expected_values/surface/gcc4/expected_conv_autoencoder_mnist_objective_functions.csv
deleted file mode 100644
index 96a9ed9a8b7..00000000000
--- a/bamboo/integration_tests/expected_values/surface/gcc4/expected_conv_autoencoder_mnist_objective_functions.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-Epoch_number, training_objective_function
-0,            0.207587
-1,            0.194595
-2,            0.193141
-3,            0.192808
-4,            0.192716
diff --git a/bamboo/integration_tests/expected_values/surface/gcc4/expected_performance.csv b/bamboo/integration_tests/expected_values/surface/gcc4/expected_performance.csv
deleted file mode 100644
index 0e8c11d0edc..00000000000
--- a/bamboo/integration_tests/expected_values/surface/gcc4/expected_performance.csv
+++ /dev/null
@@ -1,5 +0,0 @@
-Model_name,      training_run_time, training_mean, training_max, training_min, training_stdev, test_accuracy
-alexnet_nightly, 39.60,             0.80,          5.15,         0.37,         0.69,           0.00
-alexnet_weekly,  623.30,            1.27,          7.37,         0.66,         2.24,           0.15
-cache_alexnet,   623.30,            1.27,          4.98,         0.66,         2.24,           0.57
-lenet_mnist,     21.91,             0.04,          1.95,         0.04,         0.07,           98.66
diff --git a/bamboo/integration_tests/test_integration_autoencoders.py b/bamboo/integration_tests/test_integration_autoencoders.py
index 4fbe0172d0f..5f021ce6f53 100644
--- a/bamboo/integration_tests/test_integration_autoencoders.py
+++ b/bamboo/integration_tests/test_integration_autoencoders.py
@@ -1,36 +1,47 @@
 import pytest
 import common_code
 
-def error_if(f, f_symbol, data_field, actual_values, expected_values, model_name, errors, all_values, frequency_str):
+
+def error_if(f, f_symbol, data_field, actual_values, expected_values,
+             model_name, errors, all_values, frequency_str):
   d = actual_values[data_field]
   for model_id in sorted(d.keys()):
     for epoch_id in sorted(d[model_id].keys()):
       actual_value = d[model_id][epoch_id]
       expected_value = expected_values[epoch_id][data_field + frequency_str]
 
-      if actual_value == None:
+      if actual_value is None:
         errors.append('d[%s][%s] == None' % (model_id, epoch_id))
-      if expected_value == None:
+      if expected_value is None:
         errors.append('d[%s]([%s] == None' % (model_id, epoch_id))
 
       if f(actual_value, expected_value):
-        errors.append('%f %s %f %s Model %s Epoch %s %s' % (actual_value, f_symbol, expected_value, model_name, model_id, epoch_id, data_field))
-      all_values.append('%f %s Model %s Epoch %s %s' % (actual_value, model_name, model_id, epoch_id, data_field))
+        errors.append('%f %s %f %s Model %s Epoch %s %s' % (
+            actual_value, f_symbol, expected_value, model_name, model_id,
+            epoch_id, data_field))
+      all_values.append('%f %s Model %s Epoch %s %s' % (
+          actual_value, model_name, model_id, epoch_id, data_field))
+
 
-def run_tests(actual_objective_functions, model_name, dir_name, cluster, should_log, compiler_name, frequency_str=''):
-    expected_objective_functions = common_code.csv_to_dict('%s/bamboo/integration_tests/expected_values/%s/%s/expected_%s_objective_functions.csv' % (dir_name, cluster, compiler_name, model_name))
+def run_tests(actual_objective_functions, model_name, dir_name, cluster,
+              should_log, compiler_name, frequency_str=''):
+    expected_objective_functions = common_code.csv_to_dict(
+        '%s/bamboo/integration_tests/expected_values/%s/%s/expected_%s_objective_functions.csv' % (dir_name, cluster, compiler_name, model_name))
     errors = []
     all_values = []
     tolerance = 0.05
     # Are we within tolerance * expected_value?
-    outside_tolerance = lambda x,y: abs(x - y) > abs(tolerance * y)
-    error_if(outside_tolerance, '!=', 'training_objective_function', actual_objective_functions, expected_objective_functions, model_name, errors, all_values, frequency_str)
+    outside_tolerance = lambda x, y: abs(x - y) > abs(tolerance * y)
+    error_if(outside_tolerance, '!=', 'training_objective_function',
+             actual_objective_functions, expected_objective_functions,
+             model_name, errors, all_values, frequency_str)
 
     print('Errors for: %s %s (%d)' % (model_name, compiler_name, len(errors)))
     for error in errors:
         print(error)
     if should_log:
-        print('All values for: %s %s (%d)' % (model_name, compiler_name, len(all_values)))
+        print('All values for: %s %s (%d)' % (model_name, compiler_name,
+                                              len(all_values)))
         for value in all_values:
             print(value)
     assert errors == []
@@ -39,81 +50,53 @@ def run_tests(actual_objective_functions, model_name, dir_name, cluster, should_
   'training_objective_function'
 ]
 
-def skeleton_autoencoder_mnist(cluster, dir_name, executables, compiler_name):
-  if compiler_name not in executables:
-    pytest.skip('default_exes[%s] does not exist' % compiler_name)    
-  model_folder = 'models/autoencoder_mnist'
-  model_name = 'conv_autoencoder_mnist'
-  should_log=False
-  actual_objective_functions = common_code.skeleton(cluster, dir_name, executables[compiler_name], model_folder, model_name, DATA_FIELDS, should_log, compiler_name=compiler_name)
-  run_tests(actual_objective_functions, model_name, dir_name, cluster, should_log, compiler_name)
-
-def skeleton_autoencoder_imagenet(cluster, dir_name, executables, compiler_name, weekly):
-  if cluster == 'surface':
-    pytest.skip('skeleton_autoencoder_imagenet does not run on surface')
+
+def skeleton_autoencoder_imagenet(cluster, dir_name, executables, compiler_name,
+                                  weekly):
+  if cluster in ['surface', 'pascal']:
+      e = 'skeleton_autoencoder_imagenet: does not run on GPU'
+      print('Skip - ' + e)
+      pytest.skip(e)
   if compiler_name not in executables:
-    pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_autoencoder_imagenet: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
   model_folder = 'models/autoencoder_imagenet'
   model_name = 'conv_autoencoder_imagenet'
   should_log = False
-  actual_objective_functions = common_code.skeleton(cluster, dir_name, executables[compiler_name], model_folder, model_name, DATA_FIELDS, should_log, compiler_name=compiler_name, weekly=weekly)
+  actual_objective_functions = common_code.skeleton(
+      cluster, dir_name, executables[compiler_name], model_folder, model_name,
+      DATA_FIELDS, should_log, compiler_name=compiler_name, weekly=weekly)
   frequency_str = '_nightly'
   if weekly:
     frequency_str = '_weekly'
-  run_tests(actual_objective_functions, model_name, dir_name, cluster, should_log, compiler_name, frequency_str)
-
-def test_integration_autoencoder_mnist_clang4(cluster, dirname, exes):
-  if cluster in ['catalyst', 'quartz']:
-    pytest.skip('FIXME')
-    # Catalyst Errors:
-    # 0.219298 != 0.207480 conv_autoencoder_mnist Model 0 Epoch 0 training_objective_function
-  skeleton_autoencoder_mnist(cluster, dirname, exes, 'clang4')
-
-def test_integration_autoencoder_imagenet_clang4(cluster, dirname, exes, weekly):
-  skeleton_autoencoder_imagenet(cluster, dirname, exes, 'clang4', weekly)
-  
-def test_integration_autoencoder_mnist_gcc4(cluster, dirname, exes):
-  if cluster in ['catalyst', 'quartz', 'surface']:
-    pytest.skip('FIXME')
-    # Catalyst Errors:
-    # 0.219298 != 0.207480 conv_autoencoder_mnist Model 0 Epoch 0 training_objective_function
-    # Surface Errors:
-    # 0.053411 != 0.207587 conv_autoencoder_mnist Model 0 Epoch 0 training_objective_function
-    # 0.026719 != 0.194595 conv_autoencoder_mnist Model 0 Epoch 1 training_objective_function
-    # 0.024882 != 0.193141 conv_autoencoder_mnist Model 0 Epoch 2 training_objective_function
-    # 0.023039 != 0.192808 conv_autoencoder_mnist Model 0 Epoch 3 training_objective_function
-    # 0.023243 != 0.192716 conv_autoencoder_mnist Model 0 Epoch 4 training_objective_function
-  skeleton_autoencoder_mnist(cluster, dirname, exes, 'gcc4')
+  run_tests(actual_objective_functions, model_name, dir_name, cluster,
+            should_log, compiler_name, frequency_str)
+
+
+def test_integration_autoencoder_imagenet_clang4(cluster, dirname, exes,
+                                                 weekly):
+    skeleton_autoencoder_imagenet(cluster, dirname, exes, 'clang4', weekly)
+
 
 def test_integration_autoencoder_imagenet_gcc4(cluster, dirname, exes, weekly):
-  skeleton_autoencoder_imagenet(cluster, dirname, exes, 'gcc4', weekly)
-  
-def test_integration_autoencoder_mnist_gcc7(cluster, dirname, exes):
-  if cluster in ['catalyst', 'quartz']:
-    pytest.skip('FIXME')
-    # Catalyst Errors:
-    # 0.219383 != 0.207514 conv_autoencoder_mnist Model 0 Epoch 0 training_objective_function
-  skeleton_autoencoder_mnist(cluster, dirname, exes, 'gcc7')
+    skeleton_autoencoder_imagenet(cluster, dirname, exes, 'gcc4', weekly)
+
 
 def test_integration_autoencoder_imagenet_gcc7(cluster, dirname, exes, weekly):
-  skeleton_autoencoder_imagenet(cluster, dirname, exes, 'gcc7', weekly)
-  
-def test_integration_autoencoder_mnist_intel18(cluster, dirname, exes):
-  skeleton_autoencoder_mnist(cluster, dirname, exes, 'intel18')
+    skeleton_autoencoder_imagenet(cluster, dirname, exes, 'gcc7', weekly)
+
 
-def test_integration_autoencoder_imagenet_intel18(cluster, dirname, exes, weekly):
-  skeleton_autoencoder_imagenet(cluster, dirname, exes, 'intel18', weekly)
+def test_integration_autoencoder_imagenet_intel18(cluster, dirname, exes,
+                                                  weekly):
+    skeleton_autoencoder_imagenet(cluster, dirname, exes, 'intel18', weekly)
 
-# Run with python -m pytest -s test_integration_autoencoder.py -k 'test_integration_autoencoder_mnist_exe' --exe=<executable>
-def test_integration_autoencoder_mnist_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
-    skeleton_autoencoder_mnist(cluster, dirname, exes, 'exe', True)
 
 # Run with python -m pytest -s test_integration_autoencoder.py -k 'test_integration_autoencoder_imagenet_exe' --exe=<executable>
 def test_integration_autoencoder_imagenet_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_integration_autoencoder_imagenet_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip()
+    exes = {'exe': exe}
     skeleton_autoencoder_imagenet(cluster, dirname, exes, 'exe', True)
diff --git a/bamboo/integration_tests/test_integration_debug.py b/bamboo/integration_tests/test_integration_debug.py
index 1744e3243d4..c205dffb24c 100644
--- a/bamboo/integration_tests/test_integration_debug.py
+++ b/bamboo/integration_tests/test_integration_debug.py
@@ -2,15 +2,20 @@
 sys.path.insert(0, '../common_python')
 import tools
 import pytest
-import os
 import common_code
 
-def skeleton_mnist_debug(cluster, dir_name, executables, compiler_name, weekly, debug, should_log=False):
+
+def skeleton_mnist_debug(cluster, dir_name, executables, compiler_name, weekly,
+                         debug, should_log=False):
     # If weekly or debug are true, then run the test.
     if (not weekly) and (not debug):
-        pytest.skip('Not doing weekly or debug testing')
+        e = 'skeleton_mnist_debug: Not doing weekly or debug testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+        e = 'skeleton_mnist_debug: default_exes[%s] does not exist' % compiler_name
+        print('Skip - ' + e)
+        pytest.skip(e)
     model_name = 'lenet_mnist'
     output_file_name = '%s/bamboo/integration_tests/output/%s_%s_output.txt' %(dir_name, model_name, compiler_name)
     error_file_name = '%s/bamboo/integration_tests/error/%s_%s_error.txt' %(dir_name, model_name, compiler_name)
@@ -24,14 +29,22 @@ def skeleton_mnist_debug(cluster, dir_name, executables, compiler_name, weekly,
     output_value = common_code.run_lbann(command, model_name, output_file_name, error_file_name)
     assert output_value == 0
 
-def skeleton_cifar_debug(cluster, dir_name, executables, compiler_name, weekly, debug, should_log=False):
+
+def skeleton_cifar_debug(cluster, dir_name, executables, compiler_name, weekly,
+                         debug, should_log=False):
     # If weekly or debug are true, then run the test.
     if (not weekly) and (not debug):
-        pytest.skip('Not doing weekly or debug testing')
+        e = 'skeleton_cifar_debug: Not doing weekly or debug testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
     if cluster == 'ray':
-        pytest.skip('cifar not operational on Ray')
+        e = 'skeleton_cifar_debug: cifar not operational on Ray'
+        print('Skip - ' + e)
+        pytest.skip(e)
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+        e = 'skeleton_cifar_debug: default_exes[%s] does not exist' % compiler_name
+        print('Skip - ' + e)
+        pytest.skip(e)
     model_name = 'autoencoder_cifar10'
     output_file_name = '%s/bamboo/integration_tests/output/%s_%s_output.txt' %(dir_name, model_name, compiler_name)
     error_file_name = '%s/bamboo/integration_tests/error/%s_%s_error.txt' %(dir_name, model_name, compiler_name)
@@ -46,40 +59,54 @@ def skeleton_cifar_debug(cluster, dir_name, executables, compiler_name, weekly,
     output_value = common_code.run_lbann(command, model_name, output_file_name, error_file_name)
     assert output_value == 0
 
+
 def test_integration_mnist_clang4_debug(cluster, dirname, exes, weekly, debug):
     skeleton_mnist_debug(cluster, dirname, exes, 'clang4_debug', weekly, debug)
 
+
 def test_integration_cifar_clang4_debug(cluster, dirname, exes, weekly, debug):
     skeleton_cifar_debug(cluster, dirname, exes, 'clang4_debug', weekly, debug)
 
+
 def test_integration_mnist_gcc4_debug(cluster, dirname, exes, weekly, debug):
     skeleton_mnist_debug(cluster, dirname, exes, 'gcc4_debug', weekly, debug)
 
+
 def test_integration_cifar_gcc4_debug(cluster, dirname, exes, weekly, debug):
     skeleton_cifar_debug(cluster, dirname, exes, 'gcc4_debug', weekly, debug)
 
+
 def test_integration_mnist_gcc7_debug(cluster, dirname, exes, weekly, debug):
     skeleton_mnist_debug(cluster, dirname, exes, 'gcc7_debug', weekly, debug)
 
+
 def test_integration_cifar_gcc7_debug(cluster, dirname, exes, weekly, debug):
     skeleton_cifar_debug(cluster, dirname, exes, 'gcc7_debug', weekly, debug)
 
+
 def test_integration_mnist_intel18_debug(cluster, dirname, exes, weekly, debug):
     skeleton_mnist_debug(cluster, dirname, exes, 'intel18_debug', weekly, debug)
 
+
 def test_integration_cifar_intel18_debug(cluster, dirname, exes, weekly, debug):
     skeleton_cifar_debug(cluster, dirname, exes, 'intel18_debug', weekly, debug)
 
+
 # Run with python -m pytest -s test_integration_debug.py -k 'test_integration_mnist_exe' --exe=<executable>
 def test_integration_mnist_exe(cluster, dirname, exe):
-    if exe == None:
-	pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_integration_mnist_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_mnist_debug(cluster, dirname, exes, 'exe', True, True)
 
+
 # Run with python -m pytest -s test_integration_debug.py -k 'test_integration_cifar_exe' --exe=<executable>
 def test_integration_cifar_exe(cluster, dirname, exe):
     if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+        e = 'test_integration_cifar_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_cifar_debug(cluster, dirname, exes, 'exe', True, True)
diff --git a/bamboo/integration_tests/test_integration_io_buffers.py b/bamboo/integration_tests/test_integration_io_buffers.py
deleted file mode 100644
index 9132b36ba83..00000000000
--- a/bamboo/integration_tests/test_integration_io_buffers.py
+++ /dev/null
@@ -1,125 +0,0 @@
-import sys
-sys.path.insert(0, '../common_python')
-import tools
-import pytest
-import os, sys
-import common_code
-
-def skeleton_io_buffers(cluster, dir_name, executables, compiler_name, weekly):
-    if not weekly:
-        pytest.skip('Not doing weekly testing')
-    if cluster == 'surface':
-        pytest.skip('skeleton_io_buffers does not run on surface')
-    if compiler_name not in executables:
-        pytest.skip('default_exes[%s] does not exist' % compiler_name)
-    max_mb = 300
-    # Printing output from 6*6*2=72 runs of LBANN makes the logs too slow.
-    # Output from run_lbann is still printed - if there is a failure.
-    should_log = False
-    partitioned = 'mnist_partitioned_io'
-    distributed = 'mnist_distributed_io'
-    model_names = [partitioned, distributed]
-    accuracies = {}
-    errors = []
-    all_values = []
-    fatal_errors = []
-    overall_min_partitioned_accuracy = float('inf')
-    overall_min_distributed_accuracy = float('inf')
-    for mini_batch_size in [300, 150, 100, 75, 60, 50]:
-        num_models = max_mb / mini_batch_size
-        for procs_per_model in [1, 2, 3, 4, 5, 6]:
-            num_ranks = procs_per_model * num_models
-            for model_name in model_names:
-                output_file_name = '%s/bamboo/integration_tests/output/%s_%d_%d_output.txt' % (dir_name, model_name, mini_batch_size, procs_per_model)
-                error_file_name = '%s/bamboo/integration_tests/error/%s_%d_%d_error.txt' % (dir_name, model_name, mini_batch_size, procs_per_model)
-                command = tools.get_command(
-                    cluster=cluster, executable=executables[compiler_name], num_nodes=2,
-                    num_processes=num_ranks, dir_name=dir_name,
-                    data_filedir_default='/p/lscratchh/brainusr/datasets/MNIST',
-                    data_reader_name='mnist', mini_batch_size=mini_batch_size,
-                    model_folder='tests', model_name=model_name, num_epochs=5,
-                    optimizer_name='adagrad',
-                    processes_per_model=procs_per_model,
-                    output_file_name=output_file_name, error_file_name=error_file_name)
-                try:
-                    common_code.run_lbann(command, model_name, output_file_name, error_file_name, should_log) # Don't need return value
-                    accuracy_dict = common_code.extract_data(output_file_name, ['test_accuracy'], should_log)
-                    accuracies[model_name] = accuracy_dict['test_accuracy']
-                except Exception:
-                    # We want to keep running to see if any other mini_batch_size & procs_per_model combination crashes.
-                    # However, it is now pointless to compare accuracies.
-                    fatal_errors.append('Crashed running %s with mini_batch_size=%d, procs_per_model=%d' % (model_name, mini_batch_size, procs_per_model))
-            # End model name loop
-            if fatal_errors == []:
-                partitioned_num_models = len(accuracies[partitioned].keys())
-                distributed_num_models = len(accuracies[distributed].keys())
-                assert partitioned_num_models == distributed_num_models
-
-                min_partitioned_accuracy = float('inf')
-                min_distributed_accuracy = float('inf')
-                for model_num in sorted(accuracies[partitioned].keys()):
-                    partitioned_accuracy = accuracies[partitioned][model_num]['overall']
-                    distributed_accuracy = accuracies[distributed][model_num]['overall']
-                    if partitioned_accuracy < min_partitioned_accuracy:
-                        min_partitioned_accuracy = partitioned_accuracy
-                    if distributed_accuracy < min_distributed_accuracy:
-                        min_distributed_accuracy = distributed_accuracy
-                    tolerance = 0.05
-                    # Are we within tolerance * expected_value?
-                    if abs(partitioned_accuracy - distributed_accuracy) > abs(tolerance * min(partitioned_accuracy, distributed_accuracy)):
-                        errors.append('partitioned = %f != %f = distributed; model_num=%s mini_batch_size=%d procs_per_model=%d' % (partitioned_accuracy, distributed_accuracy, model_num, mini_batch_size, procs_per_model))
-                        all_values.append('partitioned = %f, %f = distributed; model_num=%s mini_batch_size=%d procs_per_model=%d' % (partitioned_accuracy, distributed_accuracy, model_num, mini_batch_size, procs_per_model))
-                # End model_num loop
-                if min_partitioned_accuracy < overall_min_partitioned_accuracy:
-                    overall_min_partitioned_accuracy = min_partitioned_accuracy
-                if min_distributed_accuracy < overall_min_distributed_accuracy:
-                    overall_min_distributed_accuracy = min_distributed_accuracy
-            # End fatal_errors == [] block
-        # End procs_per_model loop
-    # End mini_batch_size loop
-    for fatal_error in fatal_errors:
-        print(fatal_error)
-    assert fatal_errors == []
-    # If there were no fatal errors, archive the accuracies.
-    if os.environ['LOGNAME'] == 'lbannusr':
-        key = 'bamboo_planKey'
-        if key in os.environ:
-            plan = os.environ[key]
-            if plan in ['LBANN-NIGHTD', 'LBANN-WD']:
-                archive_file = '/usr/workspace/wsb/lbannusr/archives/%s/%s/%s/io_buffers.txt' % (plan, cluster, compiler_name)
-                with open(archive_file, 'a') as archive:
-                    archive.write('%s, %f, %f\n' % (os.environ['bamboo_buildNumber'], overall_min_partitioned_accuracy, overall_min_distributed_accuracy))
-            else:
-                print('The plan %s does not have archiving activated' % plan)
-        else:
-            print('%s is not in os.environ' % key)
-    else:
-        print('os.environ["LOGNAME"]=%s' % os.environ['LOGNAME'])
-
-    print('Errors for: partitioned_and_distributed %s (%d)' % (compiler_name, len(errors)))
-    for error in errors:
-        print(error)
-    if should_log:
-        print('All values for: partitioned_and_distributed %s (%d)' % (compiler_name, len(all_values)))
-        for value in all_values:
-            print(value)
-    assert errors == []
-
-def test_integration_io_buffers_clang4(cluster, dirname, exes, weekly):
-    skeleton_io_buffers(cluster, dirname, exes, 'clang4', weekly)
-
-def test_integration_io_buffers_gcc4(cluster, dirname, exes, weekly):
-    skeleton_io_buffers(cluster, dirname, exes, 'gcc4', weekly)
-
-def test_integration_io_buffers_gcc7(cluster, dirname, exes, weekly):
-    skeleton_io_buffers(cluster, dirname, exes, 'gcc7', weekly)
-
-def test_integration_io_buffers_intel18(cluster, dirname, exes, weekly):
-    skeleton_io_buffers(cluster, dirname, exes, 'intel18', weekly)
-
-# Run with python -m pytest -s test_integration_io_buffers.py -k 'test_integration_io_buffers_exe' --exe=<executable>
-def test_integration_performance_io_buffers_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
-    skeleton_io_buffers(cluster, dirname, exes, 'exe', True)
diff --git a/bamboo/integration_tests/test_integration_performance.py b/bamboo/integration_tests/test_integration_performance.py
index da5e6472762..a171184ba5e 100644
--- a/bamboo/integration_tests/test_integration_performance.py
+++ b/bamboo/integration_tests/test_integration_performance.py
@@ -2,7 +2,9 @@
 import operator, os
 import common_code
 
-def error_if(f, f_symbol, data_field, actual_values, expected_values, model_name, errors, all_values, frequency_str):
+
+def error_if(f, f_symbol, data_field, actual_values, expected_values,
+             model_name, errors, all_values, frequency_str):
   d = actual_values[data_field]
   if f_symbol == '<':
     # Every time a value is smaller, update archive_value
@@ -17,40 +19,60 @@ def error_if(f, f_symbol, data_field, actual_values, expected_values, model_name
       actual_value = d[model_id][epoch_id]
       expected_value = expected_values[model_name + frequency_str][data_field]
 
-      if actual_value == None:
-        errors.append('d[%s][%s] == None' % (model_id, epoch_id))
-      if expected_value == None:
-        errors.append('d[%s]([%s] == None' % (model_id, epoch_id))
-
-      if f(actual_value, expected_value):
-        errors.append('%f %s %f %s Model %s Epoch %s %s' % (actual_value, f_symbol, expected_value, model_name, model_id, epoch_id, data_field))
-      all_values.append('%f %s Model %s Epoch %s %s' % (actual_value, model_name, model_id, epoch_id, data_field))
-
-      if f(actual_value, archive_value):
-        archive_value = actual_value
+      if actual_value is None:
+        errors.append('actual_value: d[%s][%s] is None' % (model_id, epoch_id))
+      else:
+        print('actual_value={av}'.format(av=actual_value))
+      if expected_value is None:
+        errors.append(
+          'expected_value: d[%s]([%s] is None' % (model_id, epoch_id))
+      else:
+        print('expected_value={ev}'.format(ev=expected_value))
+
+      if (actual_value is not None) and (expected_value is not None):
+        if f(actual_value, expected_value):
+          errors.append('%f %s %f %s Model %s Epoch %s %s' % (
+            actual_value, f_symbol, expected_value, model_name, model_id,
+            epoch_id, data_field))
+        all_values.append('%f %s Model %s Epoch %s %s' % (
+          actual_value, model_name, model_id, epoch_id, data_field))
+
+        if f(actual_value, archive_value):
+          archive_value = actual_value
+      else:
+        print('archiving: either actual_value or expected_value is None.')
   return archive_value
 
-def run_tests(actual_performance, model_name, dir_name, should_log, compiler_name, cluster, frequency_str=''):
-  expected_performance = common_code.csv_to_dict('%s/bamboo/integration_tests/expected_values/%s/%s/expected_performance.csv' % (dir_name, cluster, compiler_name))
+
+def run_tests(actual_performance, model_name, dir_name, should_log,
+              compiler_name, cluster, frequency_str=''):
+  expected_performance = common_code.csv_to_dict(
+    '%s/bamboo/integration_tests/expected_values/%s/%s/expected_performance.csv' % (dir_name, cluster, compiler_name))
   errors = []
   all_values = []
   greater_than = operator.gt
   less_than = operator.lt
   max_run_time = error_if(greater_than, '>', 'training_run_time', actual_performance, expected_performance, model_name, errors, all_values, frequency_str)
-  max_mean = error_if(greater_than, '>', 'training_mean', actual_performance, expected_performance, model_name, errors, all_values, frequency_str)
-  max_max = error_if(greater_than, '>', 'training_max', actual_performance, expected_performance, model_name, errors, all_values, frequency_str)
-  max_min = error_if(greater_than, '>', 'training_min', actual_performance, expected_performance, model_name, errors, all_values, frequency_str)
-  max_stdev = error_if(greater_than, '>', 'training_stdev', actual_performance, expected_performance, model_name, errors, all_values, frequency_str)
+  max_mean     = error_if(greater_than, '>', 'training_mean', actual_performance, expected_performance, model_name, errors, all_values, frequency_str)
+  max_max      = error_if(greater_than, '>', 'training_max', actual_performance, expected_performance, model_name, errors, all_values, frequency_str)
+  max_min      = error_if(greater_than, '>', 'training_min', actual_performance, expected_performance, model_name, errors, all_values, frequency_str)
+  max_stdev    = error_if(greater_than, '>', 'training_stdev', actual_performance, expected_performance, model_name, errors, all_values, frequency_str)
   min_accuracy = error_if(less_than, '<', 'test_accuracy', actual_performance, expected_performance, model_name, errors, all_values, frequency_str)
 
+  archival_string = '%s, %f, %f, %f, %f, %f, %f\n' % (
+    os.environ['bamboo_buildNumber'], max_run_time, max_mean, max_max, max_min,
+    max_stdev, min_accuracy)
+  print('archival_string: ' + archival_string)
   if os.environ['LOGNAME'] == 'lbannusr':
     key = 'bamboo_planKey'
     if key in os.environ:
       plan = os.environ[key]
       if plan in ['LBANN-NIGHTD', 'LBANN-WD']:
         archive_file = '/usr/workspace/wsb/lbannusr/archives/%s/%s/%s/performance_%s.txt' % (plan, cluster, compiler_name, model_name)
+        print('Archive file: ' + archive_file)
         with open(archive_file, 'a') as archive:
-          archive.write('%s, %f, %f, %f, %f, %f, %f\n' % (os.environ['bamboo_buildNumber'], max_run_time, max_mean, max_max, max_min, max_stdev, min_accuracy))
+          print('Archiving to file.')
+          archive.write(archival_string)
       else:
         print('The plan %s does not have archiving activated' % plan)
     else:
@@ -62,7 +84,8 @@ def run_tests(actual_performance, model_name, dir_name, should_log, compiler_nam
   for error in errors:
     print(error)
   if should_log:
-    print('All values for: %s %s (%d)' % (model_name, compiler_name, len(all_values)))
+    print('All values for: %s %s (%d)' % (
+      model_name, compiler_name, len(all_values)))
     for value in all_values:
       print(value)
   assert errors == []
@@ -76,133 +99,154 @@ def run_tests(actual_performance, model_name, dir_name, should_log, compiler_nam
   'test_accuracy'
 ]
 
-def skeleton_performance_lenet_mnist(cluster, dir_name, executables, compiler_name):
+
+def skeleton_performance_lenet_mnist(cluster, dir_name, executables,
+                                     compiler_name):
   if compiler_name not in executables:
-    pytest.skip('default_exes[%s] does not exist' % compiler_name)
+    e = 'skeleton_performance_lenet_mnist: default_exes[%s] does not exist' % compiler_name
+    print('Skip - ' + e)
+    pytest.skip(e)
   executable = executables[compiler_name]
   model_name = 'lenet_mnist'
   model_folder = 'models/' + model_name
-  should_log = False
-  actual_performance = common_code.skeleton(cluster, dir_name, executable, model_folder, model_name, DATA_FIELDS, should_log, compiler_name=compiler_name)
-  run_tests(actual_performance, model_name, dir_name, should_log, compiler_name, cluster)
+  should_log = True
+  actual_performance = common_code.skeleton(
+    cluster, dir_name, executable, model_folder, model_name, DATA_FIELDS,
+    should_log, compiler_name=compiler_name)
+  run_tests(actual_performance, model_name, dir_name, should_log,
+            compiler_name, cluster)
+
 
-def skeleton_performance_alexnet(cluster, dir_name, executables, compiler_name, weekly):
+def skeleton_performance_alexnet(cluster, dir_name, executables, compiler_name,
+                                 weekly):
   if compiler_name not in executables:
-    pytest.skip('default_exes[%s] does not exist' % compiler_name)
+    e = 'skeleton_performance_alexnet: default_exes[%s] does not exist' % compiler_name
+    print('Skip - ' + e)
+    pytest.skip(e)
   executable = executables[compiler_name]
   model_name = 'alexnet'
   model_folder = 'models/' + model_name
-  should_log = False
-  actual_performance = common_code.skeleton(cluster, dir_name, executable, model_folder, model_name, DATA_FIELDS, should_log, compiler_name=compiler_name, weekly=weekly)
+  should_log = True
+  actual_performance = common_code.skeleton(
+    cluster, dir_name, executable, model_folder, model_name, DATA_FIELDS,
+    should_log, compiler_name=compiler_name, weekly=weekly)
   frequency_str = '_nightly'
   if weekly:
     frequency_str = '_weekly'
-  run_tests(actual_performance, model_name, dir_name, should_log, compiler_name, cluster, frequency_str)
+  run_tests(actual_performance, model_name, dir_name, should_log,
+            compiler_name, cluster, frequency_str)
+
 
-def skeleton_performance_full_alexnet(cluster, dir_name, executables, compiler_name, weekly):
+def skeleton_performance_full_alexnet(cluster, dir_name, executables,
+                                      compiler_name, weekly):
   if not weekly:
-    pytest.skip('Not doing weekly testing')
+    e = 'skeleton_performance_full_alexnet: Non-local testing'
+    print('Skip - ' + e)
+    pytest.skip(e)
   if compiler_name not in executables:
-    pytest.skip('default_exes[%s] does not exist' % compiler_name)
+    e = 'skeleton_performance_full_alexnet: default_exes[%s] does not exist' % compiler_name
+    print('Skip - ' + e)
+    pytest.skip(e)
   executable = executables[compiler_name]
   if not os.path.exists(executable):
     pytest.skip('Executable does not exist: %s' % executable)
   model_name = 'full_alexnet'
-  should_log = False
+  should_log = True
   output_file_name = '%s/bamboo/integration_tests/output/%s_%s_output.txt' %(dir_name, model_name, compiler_name)
   error_file_name = '%s/bamboo/integration_tests/error/%s_%s_error.txt' %(dir_name, model_name, compiler_name) 
-  if (cluster in ['catalyst', 'surface']):
+  if cluster in ['catalyst', 'surface']:
     command = 'salloc %s/bamboo/integration_tests/%s.sh > %s' % (dir_name, model_name, output_file_name)
   elif cluster == 'ray':
-    pytest.skip('Ray is unsupported for skeleton_performance_full_alexnet')
+    e = 'skeleton_performance_full_alexnet: Ray is unsupported for skeleton_performance_full_alexnet'
+    print('Skip - ' + e)
+    pytest.skip(e)
   else:
     raise Exception('Unsupported Cluster %s' % cluster)
-  common_code.run_lbann(command, model_name, output_file_name, error_file_name, should_log) # Don't need return value
-  actual_performance = common_code.extract_data(output_file_name, DATA_FIELDS, should_log)
-  run_tests(actual_performance, model_name, dirname, should_log, compiler_name, cluster)
+  common_code.run_lbann(command, model_name, output_file_name, error_file_name,
+                        should_log)  # Don't need return value
+  actual_performance = common_code.extract_data(output_file_name, DATA_FIELDS,
+                                                should_log)
+  run_tests(actual_performance, model_name, dir_name, should_log, compiler_name,
+            cluster)
+
 
 def test_integration_performance_lenet_mnist_clang4(cluster, dirname, exes):
-  if cluster in ['catalyst', 'quartz']:
-    pytest.skip('FIXME')
-    # Catalyst Errors:
-    # 0.104416 > 0.090000 lenet_mnist Model 0 Epoch 0 training_max
-    # 98.770000 < 98.960000 lenet_mnist Model 0 Epoch overall test_accuracy
   skeleton_performance_lenet_mnist(cluster, dirname, exes, 'clang4')
-    
+
+
 def test_integration_performance_alexnet_clang4(cluster, dirname, exes, weekly):
   skeleton_performance_alexnet(cluster, dirname, exes, 'clang4', weekly)
 
-def test_integration_performance_full_alexnet_clang4(cluster, dirname, exes, weekly):
+
+def test_integration_performance_full_alexnet_clang4(cluster, dirname, exes,
+                                                     weekly):
   skeleton_performance_full_alexnet(cluster, dirname, exes, 'clang4', weekly)
-                                        
+
+
 def test_integration_performance_lenet_mnist_gcc4(cluster, dirname, exes):
-  if cluster in ['catalyst', 'quartz', 'surface']:
-    pytest.skip('FIXME')
-    # Catalyst Errors:
-    # 15.634300 > 15.610000 lenet_mnist Model 0 Epoch 3 training_run_time
-    # 15.655200 > 15.610000 lenet_mnist Model 0 Epoch 4 training_run_time
-    # 98.770000 < 98.960000 lenet_mnist Model 0 Epoch overall test_accuracy
-    # Surface Errors:
-    # [surface145:mpi_rank_0][error_sighandler] Caught error: Segmentation fault (signal 11)
-    # srun: error: surface145: task 0: Segmentation fault (core dumped)
   skeleton_performance_lenet_mnist(cluster, dirname, exes, 'gcc4')
 
+
 def test_integration_performance_alexnet_gcc4(cluster, dirname, exes, weekly):
-  if cluster in ['surface']:
-    pytest.skip('FIXME')
-    # Surface Errors:
-    # [surface59:mpi_rank_0][error_sighandler] Caught error: Segmentation fault (signal 11)
-    # srun: error: surface59: task 0: Segmentation fault (core dumped)
   skeleton_performance_alexnet(cluster, dirname, exes, 'gcc4', weekly)
 
+
 def test_integration_performance_full_alexnet_gcc4(cluster, dirname, exes, weekly):
   skeleton_performance_full_alexnet(cluster, dirname, exes, 'gcc4', weekly)
 
+
 def test_integration_performance_lenet_mnist_gcc7(cluster, dirname, exes):
-  if cluster in ['catalyst', 'quartz']:
-    pytest.skip('FIXME')
-    # Catalyst Errors:
-    # 15.522700 > 15.510000 lenet_mnist Model 0 Epoch 4 training_run_time
-    # 98.950000 < 99.000000 lenet_mnist Model 0 Epoch overall test_accuracy
   skeleton_performance_lenet_mnist(cluster, dirname, exes, 'gcc7')
 
+
 def test_integration_performance_alexnet_gcc7(cluster, dirname, exes, weekly):
-  if cluster in ['catalyst', 'quartz']:
-    pytest.skip('FIXME')
-    # Catalyst Errors:
-    # 0.546884 > 0.510000 alexnet Model 0 Epoch 17 training_stdev
   skeleton_performance_alexnet(cluster, dirname, exes, 'gcc7', weekly)
 
-def test_integration_performance_full_alexnet_gcc7(cluster, dirname, exes, weekly):
+
+def test_integration_performance_full_alexnet_gcc7(cluster, dirname, exes,
+                                                   weekly):
   skeleton_performance_full_alexnet(cluster, dirname, exes, 'gcc7', weekly)
 
+
 def test_integration_performance_lenet_mnist_intel18(cluster, dirname, exes):
   skeleton_performance_lenet_mnist(cluster, dirname, exes, 'intel18')
 
-def test_integration_performance_alexnet_intel18(cluster, dirname, exes, weekly):
+
+def test_integration_performance_alexnet_intel18(cluster, dirname, exes,
+                                                 weekly):
   skeleton_performance_alexnet(cluster, dirname, exes, 'intel18', weekly)
 
-def test_integration_performance_full_alexnet_intel18(cluster, dirname, exes, weekly):
+
+def test_integration_performance_full_alexnet_intel18(cluster, dirname, exes,
+                                                      weekly):
   skeleton_performance_full_alexnet(cluster, dirname, exes, 'intel18', weekly)
 
 
 # Run with python -m pytest -s test_integration_performance.py -k 'test_integration_performance_lenet_mnist_exe' --exe=<executable>
 def test_integration_performance_lenet_mnist_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+      e = 'test_integration_performance_lenet_mnist_exe: Non-local testing'
+      print('Skip - ' + e)
+      pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_performance_lenet_mnist(cluster, dirname, exes, 'exe')
 
+
 # Run with python -m pytest -s test_integration_performance.py -k 'test_integration_performance_alexnet_exe' --exe=<executable>
 def test_integration_performance_alexnet_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+      e = 'stest_integration_performance_alexnet_exe: Non-local testing'
+      print('Skip - ' + e)
+      pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_performance_alexnet(cluster, dirname, exes, 'exe', True)
 
+
 # Run with python -m pytest -s test_integration_performance.py -k 'test_integration_performance_full_alexnet_exe' --exe=<executable>
 def test_integration_performance_full_alexnet_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+      e = 'test_integration_performance_full_alexnet_exe: Non-local testing'
+      print('Skip - ' + e)
+      pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_performance_full_alexnet(cluster, dirname, exes, 'exe', True)
diff --git a/bamboo/unit_tests/conftest.py b/bamboo/unit_tests/conftest.py
index 5e5cce7d2f7..eda975da95a 100644
--- a/bamboo/unit_tests/conftest.py
+++ b/bamboo/unit_tests/conftest.py
@@ -1,22 +1,14 @@
-import pytest, os, re, subprocess
+import sys
+sys.path.insert(0, '../common_python')
+import tools
+import pytest, re, subprocess
 
 def pytest_addoption(parser):
-    cluster = re.sub('[0-9]+', '', subprocess.check_output('hostname'.split()).strip())
-    default_dirname = subprocess.check_output('git rev-parse --show-toplevel'.split()).strip()
-    default_exes = {}
-    default_exes['default'] = '%s/build/gnu.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster)
-    if cluster in ['catalyst', 'quartz']:
-        default_exes['clang4'] = '%s/build/clang.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster) #'%s/bamboo/compiler_tests/builds/%s_clang-4.0.0_rel/build/model_zoo/lbann' % (default_dirname, cluster)
-        #default_exes['gcc4'] = '%s/bamboo/compiler_tests/builds/%s_gcc-4.9.3_rel/build/model_zoo/lbann' % (default_dirname, cluster)
-        default_exes['gcc7'] = '%s/build/gnu.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster)  #'%s/bamboo/compiler_tests/builds/%s_gcc-7.1.0_rel/build/model_zoo/lbann' % (default_dirname, cluster)
-        default_exes['intel18'] = '%s/build/intel.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster) #'%s/bamboo/compiler_tests/builds/%s_intel-18.0.0_rel/build/model_zoo/lbann' % (default_dirname, cluster)
-
-    if cluster ==  'ray':
-        default_exes['gcc4'] = '%s/build/gnu.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster) #'%s/bamboo/compiler_tests/builds/%s_gcc-4.9.3_rel/build/model_zoo/lbann' % (default_dirname, cluster)
-        default_exes['clang4'] = '%s/build/clang.Release.%s.llnl.gov/install/bin/lbann' % (default_dirname, cluster)
-
-    if cluster in ['surface', 'pascal']:
-        default_exes['gcc4'] = default_exes['default']
+    cluster = re.sub('[0-9]+', '', subprocess.check_output(
+        'hostname'.split()).strip())
+    default_dirname = subprocess.check_output(
+        'git rev-parse --show-toplevel'.split()).strip()
+    default_exes = tools.get_default_exes(default_dirname, cluster)
 
     parser.addoption('--cluster', action='store', default=cluster,
                      help='--cluster=<cluster> to specify the cluster being run on, for the purpose of determing which commands to use. Default the current cluster')
diff --git a/bamboo/unit_tests/prototext/model_mnist_simple_1.prototext b/bamboo/unit_tests/prototext/model_mnist_simple_1.prototext
index c2c6477837b..77a1c7ed256 100644
--- a/bamboo/unit_tests/prototext/model_mnist_simple_1.prototext
+++ b/bamboo/unit_tests/prototext/model_mnist_simple_1.prototext
@@ -1,19 +1,17 @@
 model {
-  name: "sequential_model"
   data_layout: "data_parallel"
   mini_batch_size: 64
   block_size: 256
   num_epochs: 3
   num_parallel_readers: 0
-  procs_per_model: 0
-  num_gpus: -1
+  procs_per_trainer: 0
 
   ###################################################
   # Objective function
   ###################################################
 
   objective_function {
-    cross_entropy {}
+    layer_term { layer: "cross_entropy" }
     l2_weight_regularization {
       scale_factor: 1e-4
     }
@@ -24,7 +22,11 @@ model {
   ###################################################
 
   metric {
-    categorical_accuracy {}
+    layer_metric {
+      name: "categorical accuracy"
+      layer: "accuracy"
+      unit: "%"
+    }
   }
 
   ###################################################
@@ -52,13 +54,25 @@ model {
 
   layer {
     name: "data"
+    children: "image label"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
+  }
+  layer {
+    parents: "data"
+    name: "image"
+    data_layout: "data_parallel"
+    split {}
+  }
+  layer {
+    parents: "data"
+    name: "label"
+    data_layout: "data_parallel"
+    split {}
   }
 
   layer {
+    parents: "image"
     name: "ip1"
     data_layout: "model_parallel"
     fully_connected {
@@ -68,12 +82,14 @@ model {
   }
 
   layer {
+    parents: "ip1"
     name: "relu1"
     data_layout: "model_parallel"
     relu {}
   }
 
   layer {
+    parents: "relu1"
     name: "ip2"
     data_layout: "model_parallel"
     fully_connected {
@@ -83,18 +99,24 @@ model {
   }
 
   layer {
+    parents: "ip2"
     name: "prob"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     softmax {}
   }
 
   layer {
-    name: "target"
+    parents: "prob label"
+    name: "cross_entropy"
     data_layout: "data_parallel"
-    target {
-      io_buffer: "partitioned"
-      shared_data_reader: true
-    }
+    cross_entropy {}
+  }
+
+  layer {
+    parents: "prob label"
+    name: "accuracy"
+    data_layout: "data_parallel"
+    categorical_accuracy {}
   }
 
 }
diff --git a/bamboo/unit_tests/prototext/model_mnist_simple_2.prototext b/bamboo/unit_tests/prototext/model_mnist_simple_2.prototext
index d265acf7276..c89c171566f 100644
--- a/bamboo/unit_tests/prototext/model_mnist_simple_2.prototext
+++ b/bamboo/unit_tests/prototext/model_mnist_simple_2.prototext
@@ -1,19 +1,17 @@
 model {
-  name: "sequential_model"
   data_layout: "data_parallel"
   mini_batch_size: 64
   block_size: 256
   num_epochs: 3
   num_parallel_readers: 0
-  procs_per_model: 0
-  num_gpus: -1
+  procs_per_trainer: 0
 
   ###################################################
   # Objective function
   ###################################################
 
   objective_function {
-    cross_entropy {}
+    layer_term { layer: "cross_entropy" }
     l2_weight_regularization {
       scale_factor: 1e-4
     }
@@ -24,7 +22,11 @@ model {
   ###################################################
 
   metric {
-    categorical_accuracy {}
+    layer_metric {
+      name: "categorical accuracy"
+      layer: "accuracy"
+      unit: "%"
+    }
   }
 
   ###################################################
@@ -52,13 +54,25 @@ model {
 
   layer {
     name: "data"
+    children: "image label"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
+  }
+  layer {
+    parents: "data"
+    name: "image"
+    data_layout: "data_parallel"
+    split {}
+  }
+  layer {
+    parents: "data"
+    name: "label"
+    data_layout: "data_parallel"
+    split {}
   }
 
   layer {
+    parents: "image"
     name: "ip1"
     data_layout: "model_parallel"
     fully_connected {
@@ -68,12 +82,14 @@ model {
   }
 
   layer {
+    parents: "ip1"
     name: "relu1"
     data_layout: "model_parallel"
     relu {}
   }
 
   layer {
+    parents: "relu1"
     name: "ip3"
     data_layout: "model_parallel"
     fully_connected {
@@ -83,11 +99,13 @@ model {
   }
 
   layer {
+    parents: "ip3"
     name: "relu3"
     data_layout: "model_parallel"
     relu {}
   }
   layer {
+    parents: "relu3"
     name: "ip2"
     data_layout: "model_parallel"
     fully_connected {
@@ -97,18 +115,24 @@ model {
   }
 
   layer {
+    parents: "ip2"
     name: "prob"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     softmax {}
   }
 
   layer {
-    name: "target"
+    parents: "prob label"
+    name: "cross_entropy"
     data_layout: "data_parallel"
-    target {
-      io_buffer: "partitioned"
-      shared_data_reader: true
-    }
+    cross_entropy {}
+  }
+
+  layer {
+    parents: "prob label"
+    name: "accuracy"
+    data_layout: "data_parallel"
+    categorical_accuracy {}
   }
 
 }
diff --git a/bamboo/unit_tests/prototext/opt_sgd.prototext b/bamboo/unit_tests/prototext/opt_sgd.prototext
index 3ab5afd6406..8d066780476 100644
--- a/bamboo/unit_tests/prototext/opt_sgd.prototext
+++ b/bamboo/unit_tests/prototext/opt_sgd.prototext
@@ -1,8 +1,7 @@
 optimizer {
   sgd {
     learn_rate: 0.01
-    momentum: 0.9 
-    decay_rate: 0
+    momentum: 0.9
     nesterov: false
-  }  
+  }
 }
diff --git a/bamboo/unit_tests/test_unit_check_proto_models.py b/bamboo/unit_tests/test_unit_check_proto_models.py
index 539d69d0b44..353fca3143a 100644
--- a/bamboo/unit_tests/test_unit_check_proto_models.py
+++ b/bamboo/unit_tests/test_unit_check_proto_models.py
@@ -2,11 +2,14 @@
 sys.path.insert(0, '../common_python')
 import tools
 import pytest
-import os, re, subprocess, sys
+import os
+
 
 def skeleton_models(cluster, dir_name, executables, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+        e = 'skeleton_models: default_exes[%s] does not exist' % compiler_name
+        print('Skip - ' + e)
+        pytest.skip(e)
     opt = 'sgd'
     node_count = 1
     time_limit = 1
@@ -16,17 +19,14 @@ def skeleton_models(cluster, dir_name, executables, compiler_name):
         for file_name in files:
             if file_name.endswith('.prototext') and "model" in file_name:
                 model_path = subdir + '/' + file_name
-                print('Attempting model setup for: ' + file_name )
+                print('Attempting model setup for: ' + file_name)
                 data_filedir_default = None
                 data_filedir_train_default=None
                 data_filename_train_default=None
                 data_filedir_test_default=None
                 data_filename_test_default=None
                 data_reader_path=None
-                if 'motif' in file_name:
-                    print('Skipping %s because motifs are deprecated' % model_path)
-                    continue
-                elif 'mnist' in file_name:
+                if 'mnist' in file_name:
                     data_filedir_default = '/p/lscratchh/brainusr/datasets/MNIST'
                     data_reader_name = 'mnist'
                 elif 'adversarial' in file_name:
@@ -38,6 +38,9 @@ def skeleton_models(cluster, dir_name, executables, compiler_name):
                     data_reader_path = '%s/model_zoo/models/gan/mnist/discriminator_data.prototext' % (dir_name)
                     data_reader_name = None
                 elif 'triplet' in file_name:
+                    # Disabling triplet test.
+                    print('Skipping triplet tests.')
+                    continue
                     data_filedir_train_default = '/p/lscratchh/brainusr/datasets/ILSVRC2012/patches_84h_110x110_13x13-blur-ab_compact/'
                     data_filename_train_default = '/p/lscratchh/brainusr/datasets/ILSVRC2012/patches_84h_110x110_13x13-blur-ab_compact/train/train_list_8h.nfl.npz'
                     data_filedir_test_default = '/p/lscratchh/brainusr/datasets/ILSVRC2012/patches_84h_110x110_13x13-blur-ab_compact/'
@@ -58,7 +61,7 @@ def skeleton_models(cluster, dir_name, executables, compiler_name):
                     data_filename_test_default = '/p/lscratchh/brainusr/datasets/ILSVRC2012/labels/val.txt'
                     data_reader_name = 'imagenet'
                     node_count = 2
-                    if(cluster == 'ray'):
+                    if cluster == 'ray':
                         time_limit = 3
                     if 'resnet50' in file_name:
                         node_count = 8
@@ -70,26 +73,35 @@ def skeleton_models(cluster, dir_name, executables, compiler_name):
                     data_filedir_default = '/p/lscratchh/brainusr/datasets/tinyshakespeare/'
                     data_reader_name = 'ascii'
                 else:
-                    print("Shared lbannusr account doesn't have access to dataset this model requires")
+                    print(
+                        "No access to dataset that model={m} requires.".format(
+                            m=file_name))
                     continue
-                if (cluster == 'ray') and (data_reader_name in ['cifar10', 'ascii']):
+                if (cluster == 'ray') and \
+                        (data_reader_name in ['cifar10', 'ascii']):
                     print('Skipping %s because data is not available on ray' % model_path)
-                elif (cluster == 'ray') or (cluster == 'pascal')  and ('conv_autoencoder' in file_name) or ('gan' in subdir):
+                elif (cluster == 'ray') or (cluster == 'pascal') and \
+                        ('conv_autoencoder' in file_name) or ('gan' in subdir):
                     print('Skipping %s because unpooling/noise is not implemented on gpu' % model_path)
                 else:
                     output_file_name = '%s/bamboo/unit_tests/output/check_proto_models_%s_%s_output.txt' % (dir_name, file_name, compiler_name)
                     error_file_name = '%s/bamboo/unit_tests/error/check_proto_models_%s_%s_error.txt' % (dir_name, file_name, compiler_name)
                     cmd = tools.get_command(
-                        cluster=cluster, executable=executables[compiler_name], num_nodes=node_count,
-                        partition='pbatch', time_limit=time_limit, dir_name=dir_name,
+                        cluster=cluster, executable=executables[compiler_name],
+                        num_nodes=node_count,
+                        partition='pbatch', time_limit=time_limit,
+                        dir_name=dir_name,
                         data_filedir_default=data_filedir_default,
                         data_filedir_train_default=data_filedir_train_default,
                         data_filename_train_default=data_filename_train_default,
                         data_filedir_test_default=data_filedir_test_default,
                         data_filename_test_default=data_filename_test_default,
-                        data_reader_name=data_reader_name, data_reader_path=data_reader_path,
-                        exit_after_setup=True, model_path=model_path, optimizer_name=opt,
-                        output_file_name=output_file_name, error_file_name=error_file_name)
+                        data_reader_name=data_reader_name,
+                        data_reader_path=data_reader_path,
+                        exit_after_setup=True, model_path=model_path,
+                        optimizer_name=opt,
+                        output_file_name=output_file_name,
+                        error_file_name=error_file_name)
                     if os.system(cmd) != 0:
                         print("Error detected in " + model_path)
                         #defective_models.append(file_name)
@@ -98,31 +110,35 @@ def skeleton_models(cluster, dir_name, executables, compiler_name):
                        working_models.append(cmd)
     num_defective = len(defective_models)
     if num_defective != 0:
-        print('Working models: %d. Defective models: %d', len(working_models), num_defective)
+        print('Working models: %d. Defective models: %d' % (
+            len(working_models), num_defective))
         print('Errors for: The following models exited with errors %s' % compiler_name)
         for model in defective_models:
             print(model)
     assert num_defective == 0
 
+
 def test_unit_models_clang4(cluster, dirname, exes):
     skeleton_models(cluster, dirname, exes, 'clang4')
 
+
 def test_unit_models_gcc4(cluster, dirname, exes):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 8 == 0
     skeleton_models(cluster, dirname, exes, 'gcc4')
 
+
 def test_unit_models_gcc7(cluster, dirname, exes):
     skeleton_models(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_models_intel18(cluster, dirname, exes):
     skeleton_models(cluster, dirname, exes, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_check_proto_models.py -k 'test_unit_models_exe' --exe=<executable>
 def test_unit_models_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
+    if exe is None:
+        e = 'test_unit_models_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
     exes = {'exe' : exe}
     skeleton_models(cluster, dirname, exes, 'exe')
diff --git a/bamboo/unit_tests/test_unit_checkpoint.py b/bamboo/unit_tests/test_unit_checkpoint.py
index 2b0912c5200..25ea6614e3b 100644
--- a/bamboo/unit_tests/test_unit_checkpoint.py
+++ b/bamboo/unit_tests/test_unit_checkpoint.py
@@ -4,10 +4,16 @@
 import pytest
 import os
 
-def skeleton_checkpoint_lenet_shared(cluster, executables, dir_name, compiler_name):
+
+def skeleton_checkpoint_lenet_shared(cluster, executables, dir_name,
+                                     compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+        e = 'skeleton_checkpoint_lenet_shared: default_exes[%s] does not exist' % compiler_name
+        print('Skip - ' + e)
+        pytest.skip(e)
     exe = executables[compiler_name]
+
+    # No checkpointing, printing weights to files.
     output_file_name = '%s/bamboo/unit_tests/output/checkpoint_lenet_shared_no_checkpoint_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/checkpoint_lenet_shared_no_checkpoint_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
@@ -23,6 +29,7 @@ def skeleton_checkpoint_lenet_shared(cluster, executables, dir_name, compiler_na
         sys.exit(1)
     os.system('mv ckpt ckpt_baseline')
 
+    # Run to checkpoint, printing weights to files.
     output_file_name = '%s/bamboo/unit_tests/output/checkpoint_lenet_shared_checkpoint_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/checkpoint_lenet_shared_checkpoint_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
@@ -37,6 +44,7 @@ def skeleton_checkpoint_lenet_shared(cluster, executables, dir_name, compiler_na
         sys.stderr.write('LeNet (checkpoint) execution failed, exiting with error')
         sys.exit(1)
 
+    # Pick up from checkpoint, printing weights to files.
     output_file_name = '%s/bamboo/unit_tests/output/checkpoint_lenet_shared_restart_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/checkpoint_lenet_shared_restart_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
@@ -55,10 +63,16 @@ def skeleton_checkpoint_lenet_shared(cluster, executables, dir_name, compiler_na
     os.system('rm -rf ckpt*')
     assert diff_test == 0
 
-def skeleton_checkpoint_lenet_distributed(cluster, executables, dir_name, compiler_name):
+
+def skeleton_checkpoint_lenet_distributed(cluster, executables, dir_name,
+                                          compiler_name):
      if compiler_name not in executables:
-       pytest.skip('default_exes[%s] does not exist' % compiler_name)
+         e = 'skeleton_checkpoint_lenet_distributed: default_exes[%s] does not exist' % compiler_name
+         print('Skip - ' + e)
+         pytest.skip(e)
      exe = executables[compiler_name]
+
+     # No checkpointing, printing weights to files.
      output_file_name = '%s/bamboo/unit_tests/output/checkpoint_lenet_distributed_no_checkpoint_%s_output.txt' % (dir_name, compiler_name)
      error_file_name  = '%s/bamboo/unit_tests/error/checkpoint_lenet_distributed_no_checkpoint_%s_error.txt' % (dir_name, compiler_name)
      command = tools.get_command(
@@ -74,6 +88,7 @@ def skeleton_checkpoint_lenet_distributed(cluster, executables, dir_name, compil
          sys.exit(1)
      os.system('mv ckpt ckpt_baseline')
 
+     # Run to checkpoint, printing weights to files.
      output_file_name = '%s/bamboo/unit_tests/output/checkpoint_lenet_distributed_checkpoint_%s_output.txt' % (dir_name, compiler_name)
      error_file_name  = '%s/bamboo/unit_tests/error/checkpoint_lenet_distributed_checkpoint_%s_error.txt' % (dir_name, compiler_name)
      command = tools.get_command(
@@ -88,6 +103,7 @@ def skeleton_checkpoint_lenet_distributed(cluster, executables, dir_name, compil
          sys.stderr.write('LeNet (checkpoint) execution failed, exiting with error')
          sys.exit(1)
 
+     # Pick up from checkpoint, printing weights to files.
      output_file_name = '%s/bamboo/unit_tests/output/checkpoint_lenet_distributed_restart_%s_output.txt' % (dir_name, compiler_name)
      error_file_name  = '%s/bamboo/unit_tests/error/checkpoint_lenet_distributed_restart_%s_error.txt' % (dir_name, compiler_name)
      command = tools.get_command(
@@ -106,26 +122,33 @@ def skeleton_checkpoint_lenet_distributed(cluster, executables, dir_name, compil
      os.system('rm -rf ckpt*')
      assert diff_test == 0
 
+
 def test_unit_checkpoint_lenet_clang4(cluster, exes, dirname):
     skeleton_checkpoint_lenet_shared(cluster, exes, dirname, 'clang4')
     skeleton_checkpoint_lenet_distributed(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_checkpoint_lenet_gcc4(cluster, exes, dirname):
     skeleton_checkpoint_lenet_shared(cluster, exes, dirname, 'gcc4')
     skeleton_checkpoint_lenet_distributed(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_checkpoint_lenet_gcc7(cluster, exes, dirname):
     skeleton_checkpoint_lenet_shared(cluster, exes, dirname, 'gcc7')
     skeleton_checkpoint_lenet_distributed(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_checkpoint_lenet_intel18(cluster, exes, dirname):
     skeleton_checkpoint_lenet_shared(cluster, exes, dirname, 'intel18')
     skeleton_checkpoint_lenet_distributed(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_checkpoint.py -k 'test_unit_checkpoint_lenet_exe' --exe=<executable>
 def test_unit_checkpoint_lenet_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_checkpoint_lenet_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_checkpoint_lenet_shared(cluster, exes, dirname, 'exe')
     skeleton_checkpoint_lenet_distributed(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_clamp.py b/bamboo/unit_tests/test_unit_layer_clamp.py
index 6ac7278ab30..8cd7d579374 100644
--- a/bamboo/unit_tests/test_unit_layer_clamp.py
+++ b/bamboo/unit_tests/test_unit_layer_clamp.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_clamp(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+        e = 'skeleton_layer_clamp: default_exes[%s] does not exist' % compiler_name
+        print('Skip - ' + e)
+        pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_clamp_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_clamp_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name], num_nodes=1,
+        num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='clamp', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='clamp',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_clamp_clang4(cluster, exes, dirname):
     skeleton_layer_clamp(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_clamp_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_clamp(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_clamp_gcc7(cluster, exes, dirname):
     skeleton_layer_clamp(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_clamp_intel18(cluster, exes, dirname):
     skeleton_layer_clamp(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_layer_clamp.py -k 'test_unit_layer_clamp_exe' --exe=<executable>
 def test_unit_layer_clamp_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_clamp_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_clamp(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_covariance.py b/bamboo/unit_tests/test_unit_layer_covariance.py
index 41bdb9d985f..e72bca4fb51 100644
--- a/bamboo/unit_tests/test_unit_layer_covariance.py
+++ b/bamboo/unit_tests/test_unit_layer_covariance.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_covariance(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+        e = 'skeleton_layer_covariance: default_exes[%s] does not exist' % compiler_name
+        print('Skip - ' + e)
+        pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_covariance_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_covariance_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name], num_nodes=1,
+        num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='covariance', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='covariance',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_covariance_clang4(cluster, exes, dirname):
     skeleton_layer_covariance(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_covariance_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_covariance(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_covariance_gcc7(cluster, exes, dirname):
     skeleton_layer_covariance(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_covariance_intel18(cluster, exes, dirname):
     skeleton_layer_covariance(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_ridge_regression.py -k 'test_unit_layer_covariance_exe' --exe=<executable>
 def test_unit_layer_covariance_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_covariance_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_covariance(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_elu.py b/bamboo/unit_tests/test_unit_layer_elu.py
index a121bfcb50f..66b10d1fc5b 100644
--- a/bamboo/unit_tests/test_unit_layer_elu.py
+++ b/bamboo/unit_tests/test_unit_layer_elu.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_elu(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+        e = 'skeleton_layer_elu: default_exes[%s] does not exist' % compiler_name
+        print('Skip - ' + e)
+        pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_elu_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_elu_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name], num_nodes=1,
+        num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='elu', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='elu',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_elu_clang4(cluster, exes, dirname):
     skeleton_layer_elu(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_elu_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_elu(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_elu_gcc7(cluster, exes, dirname):
     skeleton_layer_elu(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_elu_intel18(cluster, exes, dirname):
     skeleton_layer_elu(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_layer_elu.py -k 'test_unit_layer_elu_exe' --exe=<executable>
 def test_unit_layer_elu_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_elu_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_elu(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_identity.py b/bamboo/unit_tests/test_unit_layer_identity.py
index b26f4248d69..86568e946d5 100644
--- a/bamboo/unit_tests/test_unit_layer_identity.py
+++ b/bamboo/unit_tests/test_unit_layer_identity.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_identity(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+        e = 'skeleton_layer_identity: default_exes[%s] does not exist' % compiler_name
+        print('Skip - ' + e)
+        pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_identity_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_identity_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name], num_nodes=1,
+        num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='identity', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='identity',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_identity_clang4(cluster, exes, dirname):
     skeleton_layer_identity(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_identity_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_identity(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_identity_gcc7(cluster, exes, dirname):
     skeleton_layer_identity(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_identity_intel18(cluster, exes, dirname):
     skeleton_layer_identity(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_layer_identity.py -k 'test_unit_layer_identity_exe' --exe=<executable>
 def test_unit_layer_identity_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_identity_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_identity(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_l1_norm.py b/bamboo/unit_tests/test_unit_layer_l1_norm.py
index 1c1ab406106..9abcc2652ce 100644
--- a/bamboo/unit_tests/test_unit_layer_l1_norm.py
+++ b/bamboo/unit_tests/test_unit_layer_l1_norm.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_l1_norm(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_layer_l1_norm: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_l1_norm_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_l1_norm_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name], num_nodes=1,
+        num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='l1_norm', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='l1_norm',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_l1_norm_clang4(cluster, exes, dirname):
     skeleton_layer_l1_norm(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_l1_norm_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_l1_norm(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_l1_norm_gcc7(cluster, exes, dirname):
     skeleton_layer_l1_norm(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_l1_norm_intel18(cluster, exes, dirname):
     skeleton_layer_l1_norm(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_ridge_regression.py -k 'test_unit_layer_l1_norm_exe' --exe=<executable>
 def test_unit_layer_l1_norm_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_l1_norm_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_l1_norm(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_l2_norm2.py b/bamboo/unit_tests/test_unit_layer_l2_norm2.py
index 29233e9ce18..cdbad231498 100644
--- a/bamboo/unit_tests/test_unit_layer_l2_norm2.py
+++ b/bamboo/unit_tests/test_unit_layer_l2_norm2.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_l2_norm2(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_layer_l2_norm2: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_l2_norm2_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_l2_norm2_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name], num_nodes=1,
+        num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='l2_norm2', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='l2_norm2',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_l2_norm2_clang4(cluster, exes, dirname):
     skeleton_layer_l2_norm2(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_l2_norm2_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_l2_norm2(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_l2_norm2_gcc7(cluster, exes, dirname):
     skeleton_layer_l2_norm2(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_l2_norm2_intel18(cluster, exes, dirname):
     skeleton_layer_l2_norm2(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_ridge_regression.py -k 'test_unit_layer_l2_norm2_exe' --exe=<executable>
 def test_unit_layer_l2_norm2_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_l2_norm2_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_l2_norm2(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_leaky_relu.py b/bamboo/unit_tests/test_unit_layer_leaky_relu.py
index d934987e76a..6c90b34ce78 100644
--- a/bamboo/unit_tests/test_unit_layer_leaky_relu.py
+++ b/bamboo/unit_tests/test_unit_layer_leaky_relu.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_leaky_relu(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_layer_leaky_relu: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_leaky_relu_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_leaky_relu_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name], num_nodes=1,
+        num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='leaky_relu', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='leaky_relu',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_leaky_relu_clang4(cluster, exes, dirname):
     skeleton_layer_leaky_relu(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_leaky_relu_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_leaky_relu(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_leaky_relu_gcc7(cluster, exes, dirname):
     skeleton_layer_leaky_relu(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_leaky_relu_intel18(cluster, exes, dirname):
     skeleton_layer_leaky_relu(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_ridge_regression.py -k 'test_unit_layer_leaky_relu_exe' --exe=<executable>
 def test_unit_layer_leaky_relu_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_leaky_relu_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_leaky_relu(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_log_sigmoid.py b/bamboo/unit_tests/test_unit_layer_log_sigmoid.py
index bda8dab5b98..9a47d55754d 100644
--- a/bamboo/unit_tests/test_unit_layer_log_sigmoid.py
+++ b/bamboo/unit_tests/test_unit_layer_log_sigmoid.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_log_sigmoid(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_layer_log_sigmoid: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_log_sigmoid_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_log_sigmoid_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name], num_nodes=1,
+        num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='log_sigmoid', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='log_sigmoid',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_log_sigmoid_clang4(cluster, exes, dirname):
     skeleton_layer_log_sigmoid(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_log_sigmoid_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_log_sigmoid(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_log_sigmoid_gcc7(cluster, exes, dirname):
     skeleton_layer_log_sigmoid(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_log_sigmoid_intel18(cluster, exes, dirname):
     skeleton_layer_log_sigmoid(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_layer_log_sigmoid.py -k 'test_unit_layer_log_sigmoid_exe' --exe=<executable>
 def test_unit_layer_log_sigmoid_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_log_sigmoid_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_log_sigmoid(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_log_softmax.py b/bamboo/unit_tests/test_unit_layer_log_softmax.py
index 749cd34dc22..85a20790d31 100644
--- a/bamboo/unit_tests/test_unit_layer_log_softmax.py
+++ b/bamboo/unit_tests/test_unit_layer_log_softmax.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_log_softmax(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_layer_log_softmax: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_log_softmax_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_log_softmax_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name],
+        num_nodes=1, num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='log_softmax', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='log_softmax',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_log_softmax_clang4(cluster, exes, dirname):
     skeleton_layer_log_softmax(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_log_softmax_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_log_softmax(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_log_softmax_gcc7(cluster, exes, dirname):
     skeleton_layer_log_softmax(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_log_softmax_intel18(cluster, exes, dirname):
     skeleton_layer_log_softmax(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_ridge_regression.py -k 'test_unit_layer_log_softmax_exe' --exe=<executable>
 def test_unit_layer_log_softmax_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_log_softmax_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_log_softmax(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_mean_absolute_error.py b/bamboo/unit_tests/test_unit_layer_mean_absolute_error.py
index 62768e6afe8..c21544ed295 100644
--- a/bamboo/unit_tests/test_unit_layer_mean_absolute_error.py
+++ b/bamboo/unit_tests/test_unit_layer_mean_absolute_error.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_mean_absolute_error(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_layer_mean_absolute_error: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_mean_absolute_error_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_mean_absolute_error_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name], num_nodes=1,
+        num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='mean_absolute_error', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='mean_absolute_error',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_mean_absolute_error_clang4(cluster, exes, dirname):
     skeleton_layer_mean_absolute_error(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_mean_absolute_error_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_mean_absolute_error(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_mean_absolute_error_gcc7(cluster, exes, dirname):
     skeleton_layer_mean_absolute_error(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_mean_absolute_error_intel18(cluster, exes, dirname):
     skeleton_layer_mean_absolute_error(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_ridge_regression.py -k 'test_unit_layer_mean_absolute_error_exe' --exe=<executable>
 def test_unit_layer_mean_absolute_error_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_mean_absolute_error_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_mean_absolute_error(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_relu.py b/bamboo/unit_tests/test_unit_layer_relu.py
index 0b66c9fabb2..c904cce301f 100644
--- a/bamboo/unit_tests/test_unit_layer_relu.py
+++ b/bamboo/unit_tests/test_unit_layer_relu.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_relu(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_layer_relu: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_relu_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_relu_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name],
+        num_nodes=1, num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='relu', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='relu',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_relu_clang4(cluster, exes, dirname):
     skeleton_layer_relu(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_relu_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_relu(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_relu_gcc7(cluster, exes, dirname):
     skeleton_layer_relu(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_relu_intel18(cluster, exes, dirname):
     skeleton_layer_relu(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_layer_relu.py -k 'test_unit_layer_relu_exe' --exe=<executable>
 def test_unit_layer_relu_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_relu_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_relu(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_selu.py b/bamboo/unit_tests/test_unit_layer_selu.py
index 5fb4cef8d1e..b32f8c9eb71 100644
--- a/bamboo/unit_tests/test_unit_layer_selu.py
+++ b/bamboo/unit_tests/test_unit_layer_selu.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_selu(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_layer_selu: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_selu_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_selu_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name],
+        num_nodes=1, num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='selu', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='selu',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_selu_clang4(cluster, exes, dirname):
     skeleton_layer_selu(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_selu_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_selu(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_selu_gcc7(cluster, exes, dirname):
     skeleton_layer_selu(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_selu_intel18(cluster, exes, dirname):
     skeleton_layer_selu(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_layer_selu.py -k 'test_unit_layer_selu_exe' --exe=<executable>
 def test_unit_layer_selu_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_selu_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_selu(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_sigmoid.py b/bamboo/unit_tests/test_unit_layer_sigmoid.py
index 2c0cc2d3d4e..268526b7644 100644
--- a/bamboo/unit_tests/test_unit_layer_sigmoid.py
+++ b/bamboo/unit_tests/test_unit_layer_sigmoid.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_sigmoid(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_layer_sigmoid: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_sigmoid_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_sigmoid_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name],
+        num_nodes=1, num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='sigmoid', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='sigmoid',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_sigmoid_clang4(cluster, exes, dirname):
     skeleton_layer_sigmoid(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_sigmoid_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_sigmoid(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_sigmoid_gcc7(cluster, exes, dirname):
     skeleton_layer_sigmoid(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_sigmoid_intel18(cluster, exes, dirname):
     skeleton_layer_sigmoid(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_layer_sigmoid.py -k 'test_unit_layer_sigmoid_exe' --exe=<executable>
 def test_unit_layer_sigmoid_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_sigmoid_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_sigmoid(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_softmax.py b/bamboo/unit_tests/test_unit_layer_softmax.py
index dd1742a551c..dd4c3add193 100644
--- a/bamboo/unit_tests/test_unit_layer_softmax.py
+++ b/bamboo/unit_tests/test_unit_layer_softmax.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_softmax(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_layer_softmax: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_softmax_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_softmax_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name],
+        num_nodes=1, num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='softmax', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='softmax',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_softmax_clang4(cluster, exes, dirname):
     skeleton_layer_softmax(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_softmax_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_softmax(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_softmax_gcc7(cluster, exes, dirname):
     skeleton_layer_softmax(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_softmax_intel18(cluster, exes, dirname):
     skeleton_layer_softmax(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_ridge_regression.py -k 'test_unit_layer_softmax_exe' --exe=<executable>
 def test_unit_layer_softmax_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_softmax_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_softmax(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_softplus.py b/bamboo/unit_tests/test_unit_layer_softplus.py
index bc7d5605988..0c017c6f93e 100644
--- a/bamboo/unit_tests/test_unit_layer_softplus.py
+++ b/bamboo/unit_tests/test_unit_layer_softplus.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_softplus(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_layer_softplus: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_softplus_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_softplus_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name], num_nodes=1,
+        num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='softplus', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='softplus',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_softplus_clang4(cluster, exes, dirname):
     skeleton_layer_softplus(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_softplus_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_softplus(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_softplus_gcc7(cluster, exes, dirname):
     skeleton_layer_softplus(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_softplus_intel18(cluster, exes, dirname):
     skeleton_layer_softplus(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_layer_softplus.py -k 'test_unit_layer_softplus_exe' --exe=<executable>
 def test_unit_layer_softplus_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_softplus_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_softplus(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_softsign.py b/bamboo/unit_tests/test_unit_layer_softsign.py
index 667efb172c3..a7bed251425 100644
--- a/bamboo/unit_tests/test_unit_layer_softsign.py
+++ b/bamboo/unit_tests/test_unit_layer_softsign.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_softsign(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_layer_softsign: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_softsign_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_softsign_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name], num_nodes=1,
+        num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='softsign', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='softsign',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_softsign_clang4(cluster, exes, dirname):
     skeleton_layer_softsign(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_softsign_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_softsign(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_softsign_gcc7(cluster, exes, dirname):
     skeleton_layer_softsign(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_softsign_intel18(cluster, exes, dirname):
     skeleton_layer_softsign(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_layer_softsign.py -k 'test_unit_layer_softsign_exe' --exe=<executable>
 def test_unit_layer_softsign_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_softsign_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_softsign(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_squared_difference.py b/bamboo/unit_tests/test_unit_layer_squared_difference.py
index 201267757d7..a05bbcc5082 100644
--- a/bamboo/unit_tests/test_unit_layer_squared_difference.py
+++ b/bamboo/unit_tests/test_unit_layer_squared_difference.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_squared_difference(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_layer_squared_difference: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_squared_difference_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_squared_difference_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name], num_nodes=1,
+        num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='squared_difference', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='squared_difference',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_squared_difference_clang4(cluster, exes, dirname):
     skeleton_layer_squared_difference(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_squared_difference_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_squared_difference(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_squared_difference_gcc7(cluster, exes, dirname):
     skeleton_layer_squared_difference(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_squared_difference_intel18(cluster, exes, dirname):
     skeleton_layer_squared_difference(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_layer_squared_difference.py -k 'test_unit_layer_squared_difference_exe' --exe=<executable>
 def test_unit_layer_squared_difference_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_squared_difference_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_squared_difference(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_tessellate.py b/bamboo/unit_tests/test_unit_layer_tessellate.py
index 25e30770c63..575bd894f89 100644
--- a/bamboo/unit_tests/test_unit_layer_tessellate.py
+++ b/bamboo/unit_tests/test_unit_layer_tessellate.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_tessellate(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_layer_tessellate: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_tessellate_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_tessellate_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name], num_nodes=1,
+        num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='tessellate', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='tessellate',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_tessellate_clang4(cluster, exes, dirname):
     skeleton_layer_tessellate(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_tessellate_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_tessellate(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_tessellate_gcc7(cluster, exes, dirname):
     skeleton_layer_tessellate(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_tessellate_intel18(cluster, exes, dirname):
     skeleton_layer_tessellate(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_layer_tessellate.py -k 'test_unit_layer_tessellate_exe' --exe=<executable>
 def test_unit_layer_tessellate_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_tessellate_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_tessellate(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_layer_variance.py b/bamboo/unit_tests/test_unit_layer_variance.py
index 4b476aedf5b..0db001567d5 100644
--- a/bamboo/unit_tests/test_unit_layer_variance.py
+++ b/bamboo/unit_tests/test_unit_layer_variance.py
@@ -4,38 +4,46 @@
 import pytest
 import os
 
+
 def skeleton_layer_variance(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_layer_variance: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/layer_variance_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/layer_variance_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=2, dir_name=dir_name,
+        cluster=cluster, executable=executables[compiler_name],
+        num_nodes=1, num_processes=2, dir_name=dir_name,
         data_filedir_default='', data_reader_name='synthetic',
-        model_folder='tests/layer_tests', model_name='variance', optimizer_name='sgd',
+        model_folder='tests/layer_tests', model_name='variance',
+        optimizer_name='sgd',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_layer_variance_clang4(cluster, exes, dirname):
     skeleton_layer_variance(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_layer_variance_gcc4_check(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_layer_variance(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_layer_variance_gcc7(cluster, exes, dirname):
     skeleton_layer_variance(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_layer_variance_intel18(cluster, exes, dirname):
     skeleton_layer_variance(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_ridge_regression.py -k 'test_unit_layer_variance_exe' --exe=<executable>
 def test_unit_layer_variance_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_layer_variance_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_layer_variance(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_lbann2_reload.py b/bamboo/unit_tests/test_unit_lbann2_reload.py
index 8bad2453fae..4b8491e248f 100644
--- a/bamboo/unit_tests/test_unit_lbann2_reload.py
+++ b/bamboo/unit_tests/test_unit_lbann2_reload.py
@@ -4,11 +4,21 @@
 import pytest
 import os, sys
 
+
 def skeleton_lbann2_reload(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_lbann2_reload: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
+    lbann2 = executables[compiler_name] + '2'
+
+    # Delete directories / files if they happen to be around from the
+    # previous build.
+    os.system('rm -rf ckpt')
+    os.system('rm -rf lbann2_*')
+
 
-    lbann2  = executables[compiler_name] + '2'
+    # No checkpointing, printing weights to files.
     model_path = '{../../model_zoo/models/lenet_mnist/model_lenet_mnist.prototext,../../model_zoo/tests/model_lenet_mnist_lbann2ckpt.prototext}'
     output_file_name = '%s/bamboo/unit_tests/output/lbann2_no_checkpoint_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/lbann2_no_checkpoint_%s_error.txt' % (dir_name, compiler_name)
@@ -22,6 +32,7 @@ def skeleton_lbann2_reload(cluster, executables, dir_name, compiler_name):
         num_epochs=2,
         output_file_name=output_file_name,
         error_file_name=error_file_name)
+
     os.mkdir('lbann2_ckpt')
     return_code = os.system(command)
     if return_code != 0:
@@ -30,6 +41,7 @@ def skeleton_lbann2_reload(cluster, executables, dir_name, compiler_name):
 
     os.system('mv lbann2_ckpt lbann2_nockpt')
 
+    # Run to checkpoint, printing weights to files.
     output_file_name = '%s/bamboo/unit_tests/output/lbann2_checkpoint_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/lbann2_checkpoint_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
@@ -42,9 +54,11 @@ def skeleton_lbann2_reload(cluster, executables, dir_name, compiler_name):
         error_file_name=error_file_name)
     return_code_ckpt_1 = os.system(command)
     if return_code_ckpt_1 != 0:
-        sys.stderr.write('LeNet (checkpoint) execution failed, exiting with error')
+        sys.stderr.write(
+            'LeNet (checkpoint) execution failed, exiting with error')
         sys.exit(1)
 
+    # Pick up from checkpoint, printing weights to files.
     output_file_name = '%s/bamboo/unit_tests/output/lbann2_restart_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/lbann2_restart_%s_error.txt' % (dir_name, compiler_name)
     os.mkdir('lbann2_ckpt')
@@ -59,30 +73,76 @@ def skeleton_lbann2_reload(cluster, executables, dir_name, compiler_name):
         error_file_name=error_file_name)
     return_code_ckpt_2 = os.system(command)
     if return_code_ckpt_2 != 0:
-        sys.stderr.write('LBANN2 LeNet weight reload failed, exiting with error')
+        sys.stderr.write(
+            'LBANN2 LeNet weight reload failed, exiting with error')
         sys.exit(1)
     os.system('rm lbann2_ckpt/model0-epoch*')
     os.system('rm lbann2_nockpt/model0-epoch*')
-    diff_test = os.system('diff -rq lbann2_ckpt/ lbann2_nockpt/')
+
+    diff_result = os.system('diff -rq lbann2_ckpt/ lbann2_nockpt/')
+    allow_epsilon_diff = False
+    if allow_epsilon_diff and (diff_result != 0):
+        equal_within_epsilon = True
+        ckpt_files = os.listdir('lbann2_ckpt')
+        for file_name in ckpt_files:
+            ckpt_file = open('lbann2_ckpt/' + file_name, 'r')
+            no_ckpt_file = open('lbann2_nockpt/' + file_name, 'r')
+            for ckpt_line in ckpt_file:
+                no_ckpt_line = next(no_ckpt_file)
+                if ckpt_line != no_ckpt_line:
+                    error_string = ('ckpt_line={ckpt_line},'
+                                    ' nockpt_line={no_ckpt_line}').format(
+                        ckpt_line=ckpt_line, no_ckpt_line=no_ckpt_line)
+                    try:
+                        ckpt_values = list(map(float, ckpt_line.split()))
+                        no_ckpt_values = list(map(float, no_ckpt_line.split()))
+                        num = len(ckpt_values)
+                        if len(no_ckpt_values) == num:
+                            for i in range(num):
+                                if abs(ckpt_values[i] - no_ckpt_values[i]) > 0.5:
+                                    # Not equal within epsilon.
+                                    equal_within_epsilon = False
+                                    print(error_string)
+                        else:
+                            # Length of lists don't match.
+                            equal_within_epsilon = False
+                            print(error_string)
+                    except ValueError:
+                        # Non-numerical diff.
+                        equal_within_epsilon = False
+                        print(error_string)
+        if equal_within_epsilon:
+            diff_result = 0
     os.system('rm -rf ckpt')
     os.system('rm -rf lbann2_*')
-    assert diff_test == 0
+    assert diff_result == 0
+
 
 def test_unit_lbann2_reload_clang4(cluster, exes, dirname):
+    if cluster == 'catalyst':  # STILL ERRORS
+        pytest.skip('FIXME')
     skeleton_lbann2_reload(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_lbann2_reload_gcc4(cluster, exes, dirname):
   skeleton_lbann2_reload(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_lbann2_reload_gcc7(cluster, exes, dirname):
+    if cluster in ['catalyst', 'pascal']:  # STILL ERRORS
+        pytest.skip('FIXME')
     skeleton_lbann2_reload(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_lbann2_reload_intel18(cluster, exes, dirname):
     skeleton_lbann2_reload(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_lbann2_reload.py -k 'test_unit_lbann2_reload_exe' --exe=<executable>
 def test_unit_lbann2_reload_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_lbann2_reload_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_lbann2_reload(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_lbann_invocation.py b/bamboo/unit_tests/test_unit_lbann_invocation.py
index efaf7db4686..a002db49be4 100644
--- a/bamboo/unit_tests/test_unit_lbann_invocation.py
+++ b/bamboo/unit_tests/test_unit_lbann_invocation.py
@@ -1,10 +1,9 @@
 import sys
 sys.path.insert(0, '../common_python')
 import tools
-import pytest
 import os, sys
 
-def test_unit_no_params_bad(cluster, exes, dirname):
+def test_unit_no_params_bad(cluster, exes):
     exe = exes['gcc4']
     sys.stderr.write('TESTING: run lbann with no params; lbann should throw exception\n')
     command = tools.get_command(
@@ -12,7 +11,8 @@ def test_unit_no_params_bad(cluster, exes, dirname):
     return_code = os.system(command)
     assert return_code != 0
 
-def test_unit_one_model_bad(cluster, exes, dirname):
+
+def test_unit_one_model_bad(cluster, exes):
     exe = exes['gcc4']
     sys.stderr.write('TESTING: run lbann with no optimizer or reader; lbann should throw exception\n')
     model_path = 'prototext/model_mnist_simple_1.prototext'
@@ -22,7 +22,8 @@ def test_unit_one_model_bad(cluster, exes, dirname):
     return_code = os.system(command)
     assert return_code != 0
 
-def test_unit_two_models_bad(cluster, exes, dirname):
+
+def test_unit_two_models_bad(cluster, exes):
     exe = exes['gcc4']
     sys.stderr.write('TESTING: run lbann with two models but no optimizer or reader; lbann should throw exception\n')
     model_path = '{prototext/model_mnist_simple_1.prototext,prototext/model_mnist_simple_1.prototext}'
@@ -32,7 +33,8 @@ def test_unit_two_models_bad(cluster, exes, dirname):
     return_code = os.system(command)
     assert return_code != 0
 
-def test_unit_two_models_bad2(cluster, exes,  dirname):
+
+def test_unit_two_models_bad2(cluster, exes):
     exe = exes['gcc4']
     sys.stderr.write('TESTING: run lbann with two models with missing {; lbann should throw exception\n')
     model_path='prototext/model_mnist_simple_1.prototext,prototext/model_mnist_simple_1.prototext}'
@@ -42,7 +44,8 @@ def test_unit_two_models_bad2(cluster, exes,  dirname):
     return_code = os.system(command)
     assert return_code != 0
 
-def test_unit_missing_optimizer(cluster, exes, dirname):
+
+def test_unit_missing_optimizer(cluster, exes):
     exe = exes['gcc4']
     sys.stderr.write('TESTING: run lbann with two models, reader, but no optimizer; lbann should throw exception\n')
     model_path='{prototext/model_mnist_simple_1.prototext,prototext/model_mnist_simple_1.prototext}'
@@ -54,7 +57,8 @@ def test_unit_missing_optimizer(cluster, exes, dirname):
     return_code = os.system(command)
     assert return_code != 0
 
-def test_unit_missing_reader(cluster, exes, dirname):
+
+def test_unit_missing_reader(cluster, exes):
     exe = exes['gcc4']
     sys.stderr.write('TESTING: run lbann with two models, reader, but no reader; lbann should throw exception\n')
     model_path = '{prototext/model_mnist_simple_1.prototext,prototext/model_mnist_simple_1.prototext}'
@@ -65,14 +69,16 @@ def test_unit_missing_reader(cluster, exes, dirname):
     return_code = os.system(command)
     assert return_code != 0
 
-def test_unit_bad_params(cluster, exes, dirname):
+
+def test_unit_bad_params(cluster, exes):
     exe = exes['gcc4']
     sys.stderr.write('TESTING: run lbann with ill-formed param (missing -) lbann should throw exception\n')
     (command_allocate, command_run, _, _) = tools.get_command(cluster=cluster, executable=exe, return_tuple=True)
     return_code = os.system('%s%s %s -exit_after_setup --reader=prototext/data_reader_mnist.prototext --model={prototext/model_mnist_simple_1.prototext,prototext/model_mnist_simple_1.prototext} --optimizer=prototext/opt_sgd.prototext' % (command_allocate, command_run, exe))
     assert return_code != 0
 
-def test_unit_should_work(cluster, exes, dirname):
+
+def test_unit_should_work(cluster, exes):
     exe = exes['gcc4']
     sys.stderr.write('TESTING: run lbann with two models, reader, and optimizer; lbann should NOT throw exception\n')
     model_path = '{prototext/model_mnist_simple_1.prototext,prototext/model_mnist_simple_1.prototext}'
diff --git a/bamboo/unit_tests/test_unit_mnist_conv_graph.py b/bamboo/unit_tests/test_unit_mnist_conv_graph.py
index 3437f461273..65a7bd54ad0 100644
--- a/bamboo/unit_tests/test_unit_mnist_conv_graph.py
+++ b/bamboo/unit_tests/test_unit_mnist_conv_graph.py
@@ -4,40 +4,53 @@
 import pytest
 import os
 
+
 def skeleton_mnist_conv_graph(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_mnist_conv_graph: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/mnist_conv_graph_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/mnist_conv_graph_%s_error.txt' % (dir_name, compiler_name)
+    if compiler_name == 'gcc7':
+        tl = 240
+    else:
+        tl = None
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=1,
-        dir_name=dir_name, data_filedir_default='/p/lscratchh/brainusr/datasets/MNIST',
-        data_reader_name='mnist', model_folder='tests', model_name='mnist_conv_graph',
+        cluster=cluster, executable=executables[compiler_name],
+        num_nodes=1, time_limit=tl, num_processes=1,
+        dir_name=dir_name,
+        data_filedir_default='/p/lscratchh/brainusr/datasets/MNIST',
+        data_reader_name='mnist', model_folder='tests',
+        model_name='mnist_conv_graph',
         optimizer_name='adam',
         output_file_name=output_file_name,
         error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_mnist_conv_graph_clang4(cluster, exes, dirname):
     skeleton_mnist_conv_graph(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_mnist_conv_graph_gcc4(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 35584 == 0
     skeleton_mnist_conv_graph(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_mnist_conv_graph_gcc7(cluster, exes, dirname):
     skeleton_mnist_conv_graph(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_mnist_conv_graph_intel18(cluster, exes, dirname):
     skeleton_mnist_conv_graph(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_conv_graph.py -k 'test_unit_mnist_conv_graph_exe' --exe=<executable>
 def test_unit_mnist_conv_graph_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_mnist_conv_graph_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_mnist_conv_graph(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_mnist_ridge_regression.py b/bamboo/unit_tests/test_unit_mnist_ridge_regression.py
index 4390693d99b..0d4d3994837 100644
--- a/bamboo/unit_tests/test_unit_mnist_ridge_regression.py
+++ b/bamboo/unit_tests/test_unit_mnist_ridge_regression.py
@@ -4,38 +4,47 @@
 import pytest
 import os
 
+
 def skeleton_mnist_ridge_regression(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_mnist_ridge_regression: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/mnist_ridge_regression_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/mnist_ridge_regression_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=1, dir_name=dir_name,
-        data_filedir_default='/p/lscratchh/brainusr/datasets/MNIST', data_reader_name='mnist',
-        model_folder='tests', model_name='mnist_ridge_regression', optimizer_name='adam',
+        cluster=cluster, executable=executables[compiler_name], num_nodes=1,
+        num_processes=1, dir_name=dir_name,
+        data_filedir_default='/p/lscratchh/brainusr/datasets/MNIST',
+        data_reader_name='mnist',
+        model_folder='tests', model_name='mnist_ridge_regression',
+        optimizer_name='adam',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_mnist_ridge_regression_clang4(cluster, exes, dirname):
     skeleton_mnist_ridge_regression(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_mnist_ridge_regression_gcc4(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_mnist_ridge_regression(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_mnist_ridge_regression_gcc7(cluster, exes, dirname):
     skeleton_mnist_ridge_regression(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_mnist_ridge_regression_intel18(cluster, exes, dirname):
     skeleton_mnist_ridge_regression(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_ridge_regression.py -k 'test_unit_mnist_ridge_regression_exe' --exe=<executable>
 def test_unit_mnist_ridge_regression_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_mnist_ridge_regression_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_mnist_ridge_regression(cluster, exes, dirname, 'exe')
diff --git a/bamboo/unit_tests/test_unit_mnist_softmax_classifier.py b/bamboo/unit_tests/test_unit_mnist_softmax_classifier.py
index e67ec7e8cb7..8718c0e5802 100644
--- a/bamboo/unit_tests/test_unit_mnist_softmax_classifier.py
+++ b/bamboo/unit_tests/test_unit_mnist_softmax_classifier.py
@@ -4,38 +4,47 @@
 import pytest
 import os
 
+
 def skeleton_mnist_softmax_classifier(cluster, executables, dir_name, compiler_name):
     if compiler_name not in executables:
-      pytest.skip('default_exes[%s] does not exist' % compiler_name)
+      e = 'skeleton_mnist_softmax_classifier: default_exes[%s] does not exist' % compiler_name
+      print('Skip - ' + e)
+      pytest.skip(e)
     output_file_name = '%s/bamboo/unit_tests/output/mnist_softmax_classifier_%s_output.txt' % (dir_name, compiler_name)
     error_file_name  = '%s/bamboo/unit_tests/error/mnist_softmax_classifier_%s_error.txt' % (dir_name, compiler_name)
     command = tools.get_command(
-        cluster=cluster, executable=executables[compiler_name], num_nodes=1, num_processes=1, dir_name=dir_name,
-        data_filedir_default='/p/lscratchh/brainusr/datasets/MNIST', data_reader_name='mnist',
-        model_folder='tests', model_name='mnist_softmax_classifier', optimizer_name='adam',
+        cluster=cluster, executable=executables[compiler_name], num_nodes=1,
+        num_processes=1, dir_name=dir_name,
+        data_filedir_default='/p/lscratchh/brainusr/datasets/MNIST',
+        data_reader_name='mnist',
+        model_folder='tests', model_name='mnist_softmax_classifier',
+        optimizer_name='adam',
         output_file_name=output_file_name, error_file_name=error_file_name)
     return_code = os.system(command)
     assert return_code == 0
 
+
 def test_unit_mnist_softmax_classifier_clang4(cluster, exes, dirname):
     skeleton_mnist_softmax_classifier(cluster, exes, dirname, 'clang4')
 
+
 def test_unit_mnist_softmax_classifier_gcc4(cluster, exes, dirname):
-    if cluster in ['surface']:
-        pytest.skip('FIXME')
-        # Surface Errors:
-        # assert 34304 == 0
     skeleton_mnist_softmax_classifier(cluster, exes, dirname, 'gcc4')
 
+
 def test_unit_mnist_softmax_classifier_gcc7(cluster, exes, dirname):
     skeleton_mnist_softmax_classifier(cluster, exes, dirname, 'gcc7')
 
+
 def test_unit_mnist_softmax_classifier_intel18(cluster, exes, dirname):
     skeleton_mnist_softmax_classifier(cluster, exes, dirname, 'intel18')
 
+
 # Run with python -m pytest -s test_unit_softmax_classifier.py -k 'test_unit_mnist_softmax_classifier_exe' --exe=<executable>
 def test_unit_mnist_softmax_classifier_exe(cluster, dirname, exe):
-    if exe == None:
-        pytest.skip('Non-local testing')
-    exes = {'exe' : exe}
+    if exe is None:
+        e = 'test_unit_mnist_softmax_classifier_exe: Non-local testing'
+        print('Skip - ' + e)
+        pytest.skip(e)
+    exes = {'exe': exe}
     skeleton_mnist_softmax_classifier(cluster, exes, dirname, 'exe')
diff --git a/cmake/configure_files/lbann_config.hpp.in b/cmake/configure_files/lbann_config.hpp.in
index bdf17666422..76b50bc920c 100644
--- a/cmake/configure_files/lbann_config.hpp.in
+++ b/cmake/configure_files/lbann_config.hpp.in
@@ -31,6 +31,7 @@
 #cmakedefine LBANN_HAS_ALUMINUM
 #cmakedefine LBANN_ALUMINUM_MPI_PASSTHROUGH
 #cmakedefine LBANN_HAS_CONDUIT
+#cmakedefine LBANN_HAS_PYTHON
 
 #cmakedefine LBANN_DETERMINISTIC
 
@@ -42,6 +43,9 @@
 
 #cmakedefine LBANN_SYS_SENDFILE_OK
 
+#cmakedefine LBANN_HAS_STD_ANY
+#cmakedefine LBANN_HAS_STD_MAKE_UNIQUE
+
 // Define the LBANN datatype
 namespace lbann
 {
diff --git a/cmake/configure_files/lbann_module.lua.in b/cmake/configure_files/lbann_module.lua.in
new file mode 100644
index 00000000000..754d2c6106d
--- /dev/null
+++ b/cmake/configure_files/lbann_module.lua.in
@@ -0,0 +1,69 @@
+-- LMod module file for LBANN
+
+-- CMAKE_INSTALL_PREFIX: @CMAKE_INSTALL_PREFIX@
+-- CMAKE_BUILD_TYPE: @CMAKE_BUILD_TYPE@
+-- CXX Compiler: @CMAKE_CXX_COMPILER@
+-- CXX FLAGS: @CMAKE_CXX_FLAGS@
+-- CXX FLAGS_DEBUG: @CMAKE_CXX_FLAGS_DEBUG@
+-- CXX FLAGS_RELWITHDEBINFO: @CMAKE_CXX_FLAGS_RELWITHDEBINFO@
+-- CXX FLAGS_RELEASE: @CMAKE_CXX_FLAGS_RELEASE@
+-- LBANN_GNU_LINUX: @LBANN_GNU_LINUX@
+-- LBANN_HAS_HYDROGEN: @LBANN_HAS_HYDROGEN@
+-- LBANN_HAS_OPENCV: @LBANN_HAS_OPENCV@
+-- LBANN_HAS_CEREAL: @LBANN_HAS_CEREAL@
+-- LBANN_HAS_CUDA: @LBANN_HAS_CUDA@
+-- LBANN_HAS_CUDNN: @LBANN_HAS_CUDNN@
+-- LBANN_HAS_NCCL2: @LBANN_HAS_NCCL2@
+-- LBANN_HAS_PROTOBUF: @LBANN_HAS_PROTOBUF@
+-- LBANN_HAS_CNPY: @LBANN_HAS_CNPY@
+-- LBANN_HAS_TBINF: @LBANN_HAS_TBINF@
+-- LBANN_HAS_VTUNE: @LBANN_HAS_VTUNE@
+-- LBANN_NVPROF: @LBANN_NVPROF@
+-- LBANN_HAS_DOXYGEN: @LBANN_HAS_DOXYGEN@
+-- LBANN_HAS_LBANN_PROTO: @LBANN_HAS_LBANN_PROTO@
+-- LBANN_HAS_ALUMINUM: @LBANN_HAS_ALUMINUM@
+-- LBANN_HAS_CONDUIT: @LBANN_HAS_CONDUIT@
+-- LBANN_HAS_PYTHON: @LBANN_HAS_PYTHON@
+
+help(
+[[
+LBANN version @LBANN_VERSION@. Livermore Big Artificial Neural Network
+Toolkit. A distributed memory, HPC-optimized, model and data parallel
+training toolkit for deep neural networks.
+]])
+
+whatis("Package: LBANN")
+whatis("Version: @LBANN_VERSION@")
+whatis("Description: Livermore Big Artificial Neural Network Toolkit. A distributed memory, HPC-optimized, model and data parallel training toolkit for deep neural networks.")
+whatis("URL: https://github.com/llnl/lbann")
+whatis("CMAKE_INSTALL_PREFIX: @CMAKE_INSTALL_PREFIX@")
+whatis("CMAKE_BUILD_TYPE: @CMAKE_BUILD_TYPE@")
+whatis("CXX Compiler: @CMAKE_CXX_COMPILER@")
+whatis("CXX FLAGS: @CMAKE_CXX_FLAGS@")
+whatis("CXX FLAGS_DEBUG: @CMAKE_CXX_FLAGS_DEBUG@")
+whatis("CXX FLAGS_RELWITHDEBINFO: @CMAKE_CXX_FLAGS_RELWITHDEBINFO@")
+whatis("CXX FLAGS_RELEASE: @CMAKE_CXX_FLAGS_RELEASE@")
+whatis("LBANN_GNU_LINUX: @LBANN_GNU_LINUX@")
+whatis("LBANN_HAS_HYDROGEN: @LBANN_HAS_HYDROGEN@")
+whatis("LBANN_HAS_OPENCV: @LBANN_HAS_OPENCV@")
+whatis("LBANN_HAS_CEREAL: @LBANN_HAS_CEREAL@")
+whatis("LBANN_HAS_CUDA: @LBANN_HAS_CUDA@")
+whatis("LBANN_HAS_CUDNN: @LBANN_HAS_CUDNN@")
+whatis("LBANN_HAS_NCCL2: @LBANN_HAS_NCCL2@")
+whatis("LBANN_HAS_PROTOBUF: @LBANN_HAS_PROTOBUF@")
+whatis("LBANN_HAS_CNPY: @LBANN_HAS_CNPY@")
+whatis("LBANN_HAS_TBINF: @LBANN_HAS_TBINF@")
+whatis("LBANN_HAS_VTUNE: @LBANN_HAS_VTUNE@")
+whatis("LBANN_NVPROF: @LBANN_NVPROF@")
+whatis("LBANN_HAS_DOXYGEN: @LBANN_HAS_DOXYGEN@")
+whatis("LBANN_HAS_LBANN_PROTO: @LBANN_HAS_LBANN_PROTO@")
+whatis("LBANN_HAS_ALUMINUM: @LBANN_HAS_ALUMINUM@")
+whatis("LBANN_HAS_CONDUIT: @LBANN_HAS_CONDUIT@")
+whatis("LBANN_HAS_PYTHON: @LBANN_HAS_PYTHON@")
+
+prepend_path("PATH","@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_BINDIR@")
+prepend_path("LD_LIBRARY_PATH","@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@")
+prepend_path("PYTHONPATH","@PYTHON_INSTALL_PREFIX@/@CMAKE_INSTALL_PYTHONDIR@")
+
+pushenv("LBANN_DIR","@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_DIR@")
+
diff --git a/cmake/configure_files/python_config.ini.in b/cmake/configure_files/python_config.ini.in
new file mode 100644
index 00000000000..3ed3de25aef
--- /dev/null
+++ b/cmake/configure_files/python_config.ini.in
@@ -0,0 +1,3 @@
+[Paths]
+lbann_pb2.py = @_LBANN_PB2_PY@
+lbann_exe = @_LBANN_EXE@
diff --git a/cmake/configure_files/setup.py.in b/cmake/configure_files/setup.py.in
new file mode 100644
index 00000000000..bd6dae0516b
--- /dev/null
+++ b/cmake/configure_files/setup.py.in
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+import os.path
+import setuptools
+
+# Variables from CMake
+version = '@LBANN_VERSION@'
+src_dir = '@_LBANN_PYTHON_DIR@'
+config_file = '@_PYTHON_CONFIG_INI@'
+
+# Get relative paths
+# Note: setuptools does not accept absolute paths
+current_dir = os.path.dirname(os.path.abspath(__file__))
+src_dir = os.path.relpath(os.path.abspath(src_dir), current_dir)
+config_file = os.path.relpath(os.path.abspath(config_file), current_dir)
+
+# Setup package
+setuptools.setup(
+    name='lbann',
+    description='LBANN: Livermore Big Artificial Neural Network',
+    version=version,
+    url='https://github.com/LLNL/lbann',
+    author='Lawrence Livermore National Security, LLC.',
+    license='Apache 2.0',
+    packages=setuptools.find_packages(src_dir),
+    package_dir={'': src_dir},
+    data_files=[('lbann', [config_file])],
+    install_requires=['graphviz>=0.10.1',
+                      'matplotlib>=2.0.2',
+                      'numpy>=1.16.0',
+                      'onnx>=1.3.0',
+                      'pandas>=0.24.1',
+                      'protobuf>=3.6.1',
+                      'texttable>=1.4.0'])
diff --git a/cmake/modules/FindPython.cmake b/cmake/modules/FindPython.cmake
new file mode 100644
index 00000000000..62c7945174f
--- /dev/null
+++ b/cmake/modules/FindPython.cmake
@@ -0,0 +1,90 @@
+# Detect Python interpreter and Python C API
+#
+# This makes several improvements over the FindPython.cmake module
+# that comes included with CMake:
+#   - The stock version ignores user-provded hints if it thinks it has
+#     found a newer Python version. This is a problem if a virtual
+#     environment doesn't override the 'python<major>.<minor>'
+#     executable since that executable will take
+#     precedence. User-provided hints now take precedence.
+#   - Python C API objects are deduced by querying the Python
+#     interpreter rather than directly looking for files. This is
+#     helpful if a virtual environment doesn't create all the
+#     necessary copies or symlinks.
+#
+# Hint variables
+#
+#   Python_EXECUTABLE
+#   Python_ROOT_DIR
+#
+# Exports the following variables
+#
+#   Python_FOUND
+#   Python_EXECUTABLE
+#   Python_VERSION
+#   Python_VERSION_MAJOR
+#   Python_VERSION_MINOR
+#   Python_VERSION_PATCH
+#   Python_INCLUDE_DIRS
+#   Python_LIBRARIES
+#
+
+set(Python_FOUND FALSE)
+
+# Find executable
+if (NOT Python_EXECUTABLE)
+  if (Python_ROOT_DIR)
+    set(_HINT "${Python_ROOT_DIR}/bin")
+  endif (Python_ROOT_DIR)
+  find_program(Python_EXECUTABLE
+    NAMES python3 python
+    HINTS "${_HINT}")
+endif (NOT Python_EXECUTABLE)
+if (NOT Python_EXECUTABLE)
+  message(WARNING "Could not find Python executable")
+  return()
+endif (NOT Python_EXECUTABLE)
+
+# Get version
+execute_process(
+  COMMAND "${Python_EXECUTABLE}" "-c"
+  "import sys; sys.stdout.write('.'.join([str(x) for x in sys.version_info[:3]]))"
+  OUTPUT_VARIABLE Python_VERSION)
+string(REGEX MATCHALL "[0-9]+" _VERSION_PARSED "${Python_VERSION}")
+list(GET _VERSION_PARSED 0 Python_VERSION_MAJOR)
+list(GET _VERSION_PARSED 1 Python_VERSION_MINOR)
+list(GET _VERSION_PARSED 2 Python_VERSION_PATCH)
+
+# Find Python C API
+execute_process(
+  COMMAND "${Python_EXECUTABLE}" "-c"
+  "import sys; from distutils.sysconfig import get_python_inc; sys.stdout.write(get_python_inc())"
+  OUTPUT_VARIABLE Python_INCLUDE_DIRS)
+execute_process(
+  COMMAND "${Python_EXECUTABLE}" "-c"
+  "import sys; from distutils.sysconfig import get_config_var; sys.stdout.write(get_config_var('LIBDIR'))"
+  OUTPUT_VARIABLE _LIB_DIR)
+if (BUILD_SHARED_LIBS)
+  set(_GLOB_EXPR "${_LIB_DIR}/libpython*${CMAKE_SHARED_LIBRARY_SUFFIX}")
+ELSE (BUILD_SHARED_LIBS)
+  set(_GLOB_EXPR "${_LIB_DIR}/libpython*${CMAKE_STATIC_LIBRARY_SUFFIX}")
+endif (BUILD_SHARED_LIBS)
+FILE(GLOB _GLOB_RESULT "${_GLOB_EXPR}")
+get_filename_component(Python_LIBRARIES "${_GLOB_RESULT}" ABSOLUTE)
+
+# Handle the find_package arguments
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(
+  Python
+  REQUIRED_VARS Python_EXECUTABLE Python_INCLUDE_DIRS Python_LIBRARIES
+  Python_VERSION_MAJOR Python_VERSION_MINOR Python_VERSION_PATCH
+  VERSION_VAR Python_VERSION)
+
+# Build the imported target
+if (NOT TARGET Python::Python)
+  add_library(Python::Python INTERFACE IMPORTED)
+  set_property(TARGET Python::Python
+    PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${Python_INCLUDE_DIRS}")
+  set_property(TARGET Python::Python
+    PROPERTY INTERFACE_LINK_LIBRARIES "${Python_LIBRARIES}")
+endif (NOT TARGET Python::Python)
diff --git a/cmake/modules/SetupCXX.cmake b/cmake/modules/SetupCXX.cmake
index eeac1e6336e..ef2e1d9415c 100644
--- a/cmake/modules/SetupCXX.cmake
+++ b/cmake/modules/SetupCXX.cmake
@@ -74,7 +74,8 @@ endif ()
 
 # Initialize C++ flags
 lbann_check_and_append_flag(CMAKE_CXX_FLAGS
-  -fPIC -g -Wall -Wextra -Wno-unused-parameter -Wnon-virtual-dtor -Wshadow)
+  -fPIC -g -Wall -Wextra -Wno-unused-parameter -Wnon-virtual-dtor -Wshadow
+  -Wno-deprecated-declarations)
 
 # Disable all optimization in debug for better viewing under debuggers
 # (cmake already adds -g)
@@ -150,3 +151,16 @@ endif ()
 
 # Check if we can use Linux's sys/sendfile.h
 check_include_file_cxx(sys/sendfile.h LBANN_SYS_SENDFILE_OK)
+
+# Testing for std::any
+include(CheckCXXSourceCompiles)
+set(_ANY_TEST_CODE
+  "#include <any>
+int main(int, char* argv[]) { std::any x; }")
+check_cxx_source_compiles("${_ANY_TEST_CODE}" LBANN_HAS_STD_ANY)
+
+set(_MAKE_UNIQUE_TEST_CODE
+  "#include <memory>
+int main(int, char* argv[]) { auto x = std::make_unique<double>(); }")
+check_cxx_source_compiles(
+  "${_MAKE_UNIQUE_TEST_CODE}" LBANN_HAS_STD_MAKE_UNIQUE)
diff --git a/cmake/modules/SetupProtobuf.cmake b/cmake/modules/SetupProtobuf.cmake
index 51fac7f9f33..cfe37b87b30 100644
--- a/cmake/modules/SetupProtobuf.cmake
+++ b/cmake/modules/SetupProtobuf.cmake
@@ -41,7 +41,7 @@ else ()
   if(NOT Protobuf_FOUND)
     find_package(Protobuf "${PROTOBUF_MIN_VERSION}" CONFIG QUIET REQUIRED)
   endif ()
-  message("Found Protobuf: ${Protobuf_DIR}")
+  message(STATUS "Found Protobuf: ${Protobuf_DIR}")
 endif ()
 
 if (NOT Protobuf_FOUND)
diff --git a/containers/README.md b/containers/README.md
index 224510e9156..c0bceafbb54 100644
--- a/containers/README.md
+++ b/containers/README.md
@@ -1,42 +1,42 @@
  ## Singularity
- 
+
  [Singularity](http://singularity.lbl.gov/)
- 
+
  First build a Singularity container with the lbann.def file:
  ```
  sudo singularity build --writable lbann.img lbann.def
  ```
  *Note: Building the image requires root access.*
- 
+
  *Note: --writable allows users to make changes inside the container (Required for LC).*
 
-This will create a container called lbann.img which can be used to invoke lbann on any system with singularity and openmpi installed. 
+This will create a container called lbann.img which can be used to invoke lbann on any system with singularity and openmpi installed.
 ### Customizing Configuration in lbann.def
-Singularity is designed to take advantage of underlying HPC resources. The lbann.def file in this directory specifically installs packages necessary for infiniband interconnects (lines 15-19). It builds openmpi outside of the spack step to ensure it is built with infiniband support (lines 37-55). Experienced users should modify these sections to match with the underlying resources they intend to run on. This defintion file also builds gcc version 4.9.3, and uses it to build openmpi and lbann (lines 33-35). This is also customized to run on specific LC resources, and can be modified depending on the users system. 
+Singularity is designed to take advantage of underlying HPC resources. The lbann.def file in this directory specifically installs packages necessary for infiniband interconnects (lines 15-19). It builds openmpi outside of the spack step to ensure it is built with infiniband support (lines 37-55). Experienced users should modify these sections to match with the underlying resources they intend to run on. This defintion file also builds gcc version 4.9.3, and uses it to build openmpi and lbann (lines 33-35). This is also customized to run on specific LC resources, and can be modified depending on the users system.
  ### Running LBANN with Singualrity
  To run LBANN use mpirun and singularity's execute command:
  ```
  salloc -N2
- mpirun -np 4 singularity exec -B /p:/p lbann.img /lbann/spack_builds/singularity_optimizied_test/model_zoo/lbann  mpirun -np 4 singularity exec -B /p:/p lbann.img /lbann/spack_builds/singularity/model_zoo/lbann  --model=/lbann/model_zoo/tests/model_mnist_distributed_io.prototext --reader=/lbann/model_zoo/data_readers/data_reader_mnist.prototext --optimizer=/lbann/   model_zoo/optimizers/opt_adagrad.prototext
+mpirun -np 4 singularity exec -B /p:/p lbann.img /lbann/spack_builds/singularity_optimizied_test/model_zoo/lbann  mpirun -np 4 singularity exec -B /p:/p lbann.img /lbann/spack_builds/singularity/model_zoo/lbann --model=/lbann/model_zoo/models/lenet_mnist/model_lenet_mnist.prototext --reader=/lbann/model_zoo/data_readers/data_reader_mnist.prototext --optimizer=/lbann/   model_zoo/optimizers/opt_adagrad.prototext
  ```
 *Note: The -B singularity command, binds directories from the surrounding filesystem to the container. Be sure to include any necessary files using this command (i.e model prototext files, datasets, etc). Alternatively, system admins are capable of allowing a singularity container to utilize the host's filesystem. This is done by changing MOUNT HOSTFS in the singularity config file.*
 
  ## Docker
- 
+
   [Docker](https://www.docker.com/)
-  
+
  First build a Docker image with the Dockerfile. From whichever directory contains the Dockerfile:
  ```
 docker build -t dockban .
  ```
- 
+
  *Note: The -t flag specifies an identifying tag for this image. "dockban" can be changed to any desired tag.*
- 
+
  ### Customizing Configuration in Dockerfile
- The Dockerfile container defintion is less complicated than its Singularity counterpart. gcc 7.1.0 is built and registered with spack in lines 19-21. Users can change this, as well as LBANN specific build options in spack (line 22). For instance, to add gpu support a user can add "+gpu" to this line. 
- 
+ The Dockerfile container defintion is less complicated than its Singularity counterpart. gcc 7.1.0 is built and registered with spack in lines 19-21. Users can change this, as well as LBANN specific build options in spack (line 22). For instance, to add gpu support a user can add "+gpu" to this line.
+
  ### Running LBANN with Docker
-This LBANN build also uses openmpi, so lbann can be launched with mpirun here as well. However, this example will just show the single process invocation. 
+This LBANN build also uses openmpi, so lbann can be launched with mpirun here as well. However, this example will just show the single process invocation.
 
 Start a docker container from the previously created image, and attach to it. Make sure to bind any necessary directories using -v:
 ```
@@ -44,5 +44,5 @@ docker run -it -v $HOME/MNIST:/MNIST dockban
 ```
 Run LBANN as you would outside of a container:
 ```
-./spack_build/docker_build/model_zoo/lbann --model=model_zoo/models/lenet_mnist/model_lenet_mnist.prototext                  --reader=model_zoo/data_readers/data_reader_mnist.prototext --optimizer=model_zoo/optimizers/opt_sgd.prototext 
+./spack_build/docker_build/model_zoo/lbann --model=model_zoo/models/lenet_mnist/model_lenet_mnist.prototext                  --reader=model_zoo/data_readers/data_reader_mnist.prototext --optimizer=model_zoo/optimizers/opt_sgd.prototext
 ```
diff --git a/docs/BuildRSTDocs.py b/docs/BuildRSTDocs.py
new file mode 100644
index 00000000000..5a783e493c6
--- /dev/null
+++ b/docs/BuildRSTDocs.py
@@ -0,0 +1,355 @@
+#from RSTDocsFlavorText import *
+
+import xml.etree.ElementTree as etree
+import os, runpy
+
+rst_docs_globals = runpy.run_path("RSTDocsFlavorText.py")
+lbann_rst_headers = rst_docs_globals["lbann_rst_headers"]
+lbann_rst_flavor_text = rst_docs_globals["lbann_rst_flavor_text"]
+
+# Some globals cuz lazy
+xml_root_dir = 'doxy_out/xml/'
+
+def strip_template(class_name):
+    ind = class_name.find('<')
+    if ind > 0:
+        return class_name[0:ind]
+    return class_name
+
+# This will return a list of length 2, [longest_namespace, class_name]
+# E.g., split_namespace("A::B::C::myclass") will return ["A::B::C","myclass"].
+#
+# If no namespace, the first entry will be empty string.
+def split_namespace(class_name):
+    ind = class_name.rfind(':')
+    if ind > 0:
+        return class_name[0:ind-1], class_name[ind+1:]
+    return "", class_name
+
+def strip_namespace(class_name):
+    ind = class_name.rfind(':')
+    if ind > 0:
+        return class_name[ind+1:]
+    return class_name
+
+def get_known_subdirs(topdir, all_dirs):
+    subdirs = []
+    for d in all_dirs:
+        if d == topdir: continue
+        commonprefix = os.path.commonprefix([topdir, d])
+        if commonprefix == topdir:
+            if os.path.dirname(d) == topdir:
+                subdirs.append(d)
+    return subdirs
+
+def is_abstract_class_from_element(class_element):
+    abstract = class_element.get('abstract')
+    if abstract is None or abstract == 'no':
+        return False
+    return abstract == 'yes'
+
+def is_abstract_class(class_name, xml_file):
+    class_tree = etree.parse(xml_file)
+    class_root = class_tree.getroot()
+
+    class_element = class_root.find('compounddef')
+    if class_element.findtext('compoundname') != class_name:
+        raise Exception('bad compoundname')
+    return is_abstract_class_from_element(class_element)
+
+def is_base_class_from_element(class_element):
+    base_element = class_element.find('basecompoundref')
+    if base_element is not None:
+        return False
+    return True
+
+def is_base_class(class_name, xml_file):
+    class_tree = etree.parse(xml_file)
+    class_root = class_tree.getroot()
+
+    class_element = class_root.find('compounddef')
+    if class_element.findtext('compoundname') != class_name:
+        raise Exception('bad compoundname')
+    return is_base_class_from_element(class_element)
+
+def get_class_directory_from_element(class_element):
+    loc = class_element.find('location')
+    if loc is None:
+        raise Exception("Class has no location")
+    filename = loc.get('bodyfile')
+    if filename is None:
+        filename = loc.get('file')
+        if filename is None:
+            raise Exception("No file or bodyfile in location")
+    return os.path.dirname(filename)
+
+def get_class_directory(class_name, xml_file):
+    class_tree = etree.parse(xml_file)
+    class_root = class_tree.getroot()
+    class_name = strip_template(class_name)
+
+    class_element = class_root.find('compounddef')
+    if class_element.findtext('compoundname') != class_name:
+        raise Exception('compoundname "' + class_element.findtext('compoundname') + '" does not match "' + class_name + '"')
+    return get_class_directory_from_element(class_element)
+
+def is_base_class_rel_to_dir(class_name, xml_file):
+    class_tree = etree.parse(xml_file)
+    class_root = class_tree.getroot()
+
+    class_element = class_root.find('compounddef')
+    if class_element.findtext('compoundname') != class_name:
+        raise Exception('bad compoundname')
+
+    this_class_dir = get_class_directory_from_element(class_element)
+    base_element = class_element.find('basecompoundref')
+
+    if base_element is None:
+        return True
+
+    base_name = base_element.text
+    base_class_refid = base_element.get('refid')
+    if base_class_refid is None: # Base class not found
+        return True
+
+    base_class_xml = os.path.join(
+        xml_root_dir,base_class_refid+'.xml')
+    base_class_dir = get_class_directory(base_name, base_class_xml)
+
+    return base_class_dir != this_class_dir
+
+def is_public_class_from_element(class_element):
+    return class_element.get('prot') == 'public'
+
+def is_public_class(class_name, xml_file):
+    class_tree = etree.parse(xml_file)
+    class_root = class_tree.getroot()
+
+    class_element = class_root.find('compounddef')
+    if class_element.findtext('compoundname') != class_name:
+        raise Exception('bad compoundname')
+
+    return is_public_class_from_element(class_element)
+
+# Write a simple RST file for a class
+def write_class_rst_file(class_name, breathe_project_name, *args, **kwargs):
+    namespace = kwargs.get('namespace', '')
+    display_name = kwargs.get('display_name', class_name)
+    description = kwargs.get('description', '')
+    header_string = kwargs.get('header_string', '')
+    output_dir = kwargs.get('output_dir', os.getcwd())
+    output_filename = kwargs.get('output_filename', '')
+    subclasses = kwargs.get('subclasses', {})
+
+    # Handle defaults more rigorously
+    if namespace == '':
+        namespace, class_name = split_namespace(class_name)
+    if not namespace == '':
+        namespace = namespace + '::'
+
+    # Possibly rebuild the structure since breathe needs namespace
+    # information
+    full_class_name = namespace + class_name
+
+    if output_filename == '':
+        output_filename = class_name + '.rst'
+
+    if header_string == '':
+        header_string = "Documentation of "+display_name
+
+    equal_string = '=' * (len(header_string) + 5)
+
+    output_file = os.path.join(output_dir,output_filename)
+    with open(output_file, 'w') as f:
+        f.write(header_string + '\n')
+        f.write(equal_string + '\n\n')
+        if description != '':
+            f.write(description + '\n\n')
+        else:
+            f.write('\n')
+        f.write('.. doxygenclass:: ' + full_class_name + '\n')
+        f.write('    :project: ' + breathe_project_name + '\n')
+        f.write('    :members:\n\n')
+        if len(subclasses) > 0:
+            f.write('.. toctree::\n')
+            f.write('    :maxdepth: 1\n')
+            f.write('    :caption: Derived Classes\n\n')
+            for sc, sc_out_dir in subclasses.items():
+                sc_no_ns = strip_namespace(sc)
+                if sc_out_dir == output_dir:
+                    sc_rst_path = sc_no_ns
+                else:
+                    sc_rst_path = os.path.join(
+                        os.path.relpath(sc_out_dir, output_dir),
+                        sc_no_ns);
+                f.write('    ' + sc_no_ns + ' <' + sc_rst_path + '>\n')
+    return
+
+# Adds things from rhs into lhs. Keys are anything, values are lists
+def merge_dir_class_maps(lhs, rhs):
+    for d, cls in rhs.items():
+        if d not in lhs:
+            lhs[d] = cls
+        else:
+            lhs[d] += cls
+
+# Writes a file called "strip_namespace(class_name).rst" in "output_dir"
+def process_class(class_name, xml_file, output_root_dir):
+
+    # Get the XML tree for this class
+    class_tree = etree.parse(xml_file)
+    class_root = class_tree.getroot()
+
+    # Get the description of this class
+    compounds_in_file = class_root.findall('compounddef')
+    if len(compounds_in_file) > 1:
+        raise Exception("Found multiple compounds in file: "+xml_file)
+
+    compound = compounds_in_file[0]
+
+    # Ensure there's nothing funky in the file.
+    if compound.findtext('compoundname') != class_name:
+        raise Exception("Found unexpected compounddef \""
+                        + compound.findtext('compoundname')
+                        + "\" in file: " + xml_file)
+    if compound.get('kind') != "class":
+        raise Exception("\"" + class_name + "\" does not have kind=\"class\". "
+                        + "File: " + xml_file)
+
+    # Build the output directory path
+    class_dir = get_class_directory_from_element(compound)
+    output_dir = os.path.relpath(class_dir, "../include/lbann/")
+    # Add the base prefix
+    file_output_dir = os.path.normpath(
+        os.path.join(output_root_dir, output_dir))
+    if not os.path.exists(file_output_dir):
+        os.makedirs(file_output_dir)
+
+    # Build output for all derived classes
+    subclasses = {}
+    output_dir_class_map = {}
+    for derived in compound.iter('derivedcompoundref'):
+        derived_name = strip_template(derived.text)
+        derived_xml = os.path.join(xml_root_dir,
+                                   derived.get('refid') + ".xml")
+
+        sc_out_dir, sc_dir_class_map = process_class(
+            derived_name, derived_xml, output_root_dir)
+
+        merge_dir_class_maps(output_dir_class_map, sc_dir_class_map)
+        subclasses[derived_name] = sc_out_dir
+
+    # Write the RST for this class
+    header_string = "Documentation of " + class_name
+    write_class_rst_file(class_name, "lbann",
+                         output_dir=file_output_dir,
+                         subclasses=subclasses)
+
+    # Add this class to the map
+    if file_output_dir not in output_dir_class_map:
+        output_dir_class_map[file_output_dir] = [class_name]
+    else:
+        output_dir_class_map[file_output_dir].append(class_name)
+
+    return file_output_dir, output_dir_class_map
+
+#
+# Actual code starts here
+# Let's see if I can write everything
+#
+
+# Set the XML output directory relative to this directory
+xml_root_dir = 'doxy_out/xml/'
+index_tree = etree.parse(xml_root_dir + 'index.xml')
+index_root = index_tree.getroot()
+
+# Set the RST output directory relative to this directory
+rst_base_dir = "lbann"
+
+# Find all classes in the index
+class_to_file_map = {}
+for neighbor in index_root.iter('compound'):
+    if neighbor.get('kind') == 'class':
+        class_to_file_map[neighbor.findtext('name')] \
+            = os.path.join(xml_root_dir,neighbor.get('refid') + '.xml')
+
+# Build all of the class documentation
+dir_all_class_map = {}
+for cls, fn in class_to_file_map.items():
+    if is_base_class(cls, fn) and is_public_class(cls, fn):
+        out_dir, all_out_dirs = process_class(cls, fn, rst_base_dir)
+        merge_dir_class_maps(dir_all_class_map, all_out_dirs)
+
+# Write the high-level files, one file per directory except where
+# noted below
+ignore_dirs = [ os.path.join(rst_base_dir,d) for d in
+                ['data_distributions','utils/impl']]
+
+# Remove the ignored dirs from the map
+for d in ignore_dirs:
+    if d in dir_all_class_map:
+        del dir_all_class_map[d]
+
+all_dirs = list(dir_all_class_map.keys())
+for d in all_dirs:
+    dir_without_base = os.path.relpath(d, rst_base_dir)
+    if dir_without_base in lbann_rst_headers:
+        header_string = lbann_rst_headers[dir_without_base]
+    else:
+        header_string = os.path.basename(dir_without_base)
+
+    equal_string = '=' * (len(header_string) + 5)
+
+    if dir_without_base in lbann_rst_flavor_text:
+        flavor_text = lbann_rst_flavor_text[dir_without_base]
+    else:
+        flavor_text = None
+
+    abstract_classes = []
+    concrete_classes = []
+    for c in dir_all_class_map[d]:
+        if is_abstract_class(c, class_to_file_map[c]):
+            abstract_classes.append(strip_namespace(c))
+        else:
+            concrete_classes.append(strip_namespace(c))
+
+    abstract_classes.sort()
+    concrete_classes.sort()
+
+    subdirs = [os.path.basename(d) for d in get_known_subdirs(d, all_dirs)]
+    subdirs.sort()
+
+    if dir_without_base == '.':
+        filename = os.path.join(rst_base_dir, "lbann.rst")
+    else:
+        filename = os.path.join(d,os.path.basename(d)+'_dir.rst')
+
+    with open(filename, 'w') as f:
+        f.write(header_string+'\n')
+        f.write(equal_string+'\n\n')
+        if flavor_text is not None:
+            f.write(flavor_text+'\n')
+
+        if len(abstract_classes) > 0:
+            f.write('\n')
+            f.write('.. toctree::'+'\n')
+            f.write('    :maxdepth: 1'+'\n')
+            f.write('    :caption: Abstract Classes\n\n')
+            for cls in abstract_classes:
+                f.write('    class '+cls+' <'+cls+'>\n')
+
+        if len(concrete_classes) > 0:
+            f.write('\n')
+            f.write('.. toctree::'+'\n')
+            f.write('    :maxdepth: 1'+'\n')
+            f.write('    :caption: Concrete Classes\n\n')
+            for cls in concrete_classes:
+                f.write('    class '+cls+' <'+cls+'>\n')
+
+        if len(subdirs) > 0:
+            f.write('\n')
+            f.write('.. toctree::'+'\n')
+            f.write('    :maxdepth: 1'+'\n')
+            f.write('    :caption: Subdirectories\n\n')
+            for sdir in subdirs:
+                f.write('    '+sdir+'/'+sdir+'_dir\n')
diff --git a/docs/BuildingLBANN.md b/docs/BuildingLBANN.md
deleted file mode 100644
index 4643c7d3354..00000000000
--- a/docs/BuildingLBANN.md
+++ /dev/null
@@ -1,533 +0,0 @@
-# Building LBANN
-## Download
-
-LBANN source code can be obtained from the [Github
-repo](https://github.com/LLNL/lbann).
-
-## Dependencies
-
-The following packages and tools are required to build LBANN. All
-packages listed below may be installed using
-[Spack](https://github.com/llnl/spack). See
-<a href="#building-with-spack">below</a>
-for more details on using Spack to build a complete LBANN
-environment.
-
-The following basic tools are **required**.
-
-+ A C++11-compliant compiler.
-+ OpenMP, version 3.0 or newer.
-+ An MPI-3.0 implementation.
-+ [CEREAL](https://github.com/USCiLab/cereal) is used to handle
-  complex serialization tasks.
-+ [CMake](https://cmake.org), version 3.9 or newer.
-
-The following LLNL-maintained packages are **required**.
-
-+ [Hydrogen](https://github.com/llnl/elemental) is a fork of the
-  [Elemental](https://github.com/elemental/elemental) distributed
-  dense linear-algebra library and it may be installed via
-  [Spack](https://github.com/llnl/spack) using the package name
-  "hydrogen". If CUDA support is enabled in Hydrogen, LBANN will
-  inherit this support.
-
-The following third-party packages are **required**.
-
-+ [CNPY](https://github.com/rogersce/cnpy.git) is used to ingest data
-  in NumPy format. In principle this should be optional, but at time
-  of writing, LBANN will not build without it.
-+ [OpenCV](https://github.com/opencv/opencv) is used to preprocess
-  image data. For performance reasons, it is recommend to build OpenCV
-  with [JPEG-turbo](https://github.com/libjpeg-turbo/libjpeg-turbo)
-  for JPEG format support.
-+ [ProtoBuf](https://github.com/protocolbuffers/protobuf) is used to
-  express models in a portable format.
-
-The following LLNL-maintained packages are **optional**.
-
-+ [Aluminum](https://github.com/llnl/aluminum) is a
-  communication library optimized for machine learning and interaction
-  with GPUs. We cannot recommend its use strongly enough. It can be
-  built using [Spack](https://github.com/llnl/spack).
-+ [CONDUIT](https://github.com/llnl/conduit) is used to ingest
-  structured data produced by scientific simulations.
-
-The following third-party packages are **optional**.
-
-+ [CUDA](https://developer.nvidia.com/cuda-toolkit). The development
-  team currently uses CUDA version 9.2. Building with CUDA support
-  requires that Hydrogen has been built with CUDA support (see below).
-  + [cuDNN](https://developer.nvidia.com/cudnn) is required if
-    building LBANN with CUDA support. It is freely available as a binary
-    distribution from NVIDIA.
-+ [HWLOC](https://www.open-mpi.org/projects/hwloc/). HWLOC enables
-  LBANN to make certain optimizations based on the hardware
-  topology. Its use is strongly recommended.
-+ NVTX. LBANN supports some improved annotations for NVPROF using
-  NVTX. NVTX is provided as part of the CUDA toolkit.
-+ VTune. LBANN supports some improved annotations for VTune.
-
-
-## Building with [Spack](https://github.com/llnl/spack)
-
-### Setup Spack and local base tools
-
-1.  Download and install [Spack](https://github.com/llnl/spack).
-    Additionally setup shell support as discussed
-    [here](https://spack.readthedocs.io/en/latest/module_file_support.html#id2).
-
-    ```bash
-    . ${SPACK_ROOT}/share/spack/setup-env.sh
-    ```
-
-2.  Setup your compiler and external software environment. For example,
-    on LLNL\'s LC machines, one might load the following modules:
-    ```bash
-    ml gcc/7.3.0 mvapich2/2.3 cuda/10.0.130 # Pascal
-    ```
-    or
-    ```bash
-    ml gcc/7.3.1 cuda/9.2.148 spectrum-mpi/rolling-release  # Lassen / Sierra
-    ```
-
-    + Note to unload unwanted modules you can execute `ml` with
-      package names prepended with a dash, e.g.: `ml -intel`. To
-      unload all currently loaded modules, use `ml purge`.
-
-### Building & Installing LBANN as a user
-
-This section is work in progress. For now, follow the developer
-instructions below. We are working to simplify this process.
-
-### Building & Installing LBANN as a developer
-
-Developers of LBANN will often need to interact with the source code
-and/or advanced configuration options for Aluminum, Hydrogen, and
-LBANN while the other dependencies remain constant. The Spack
-installation instructions below set up a Spack environment with the
-remaining dependencies, requiring the developer to build Aluminum,
-Hydrogen, and LBANN separately, by whatever means they choose.
-
-1.  Establish a Spack environment and install software dependencies.
-    Note that there are four environments to pick from along two axes:
-
-    1. developers or users
-    2. x86_64 and ppc64le
-
-    For example if you are a developer and want to build the inside of
-    the git repo use the following instructions:
-    ```bash
-    export LBANN_HOME=/path/to/lbann/git/repo
-    export LBANN_BUILD_DIR=/path/to/a/build/directory
-    export LBANN_INSTALL_DIR=/path/to/an/install/directory
-    cd ${LBANN_BUILD_DIR}
-    spack env create -d . ${LBANN_HOME}/spack_environments/developer_release_<arch>_cuda_spack.yaml # where <arch> = x86_64 | ppc64le
-    spack install
-    spack env loads # Spack creates a file named loads that has all of the correct modules
-    source loads
-    unset LIBRARY_PATH
-    ```
-
-    + Note that the environments provided here have a set of external
-      packages and compilers that are installed on an LLNL LC CZ
-      system.  Please update these for your system environment.
-      Alternatively, you can create baseline versions of the
-      user-level Spack configuration files and remove the externals
-      and compilers from the `spack.yaml` file. More details are
-      provided [here](spack_environment.md).
-
-    + Note that the initial build of all of the standard packages in Spack
-      will take a while.
-
-    + Note that the Spack module files set the `LIBRARY_PATH` environment
-      variable. This behavior allows autotools-based builds to pickup the
-      correct libraries but interferes with the way that CMake sets up
-      RPATHs.  To correctly establish the RPATH, please unset the variable
-      as noted above, or you can explicitly pass the RPATH fields to CMake
-      using a command such as:
-      ```bash
-      cmake -DCMAKE_INSTALL_RPATH=$(sed 's/:/;/g' <<< "${LIBRARY_PATH}") \
-            -DCMAKE_BUILD_RPATH=$(sed 's/:/;/g' <<< "${LIBRARY_PATH}") \
-            ...
-      ```
-
-2.  Build LBANN locally from source and build Hydrogen and Aluminum
-    using the superbuild. See
-    <a href=#building-an-entire-ecosystem-with-the-superbuild>below</a>
-    for a list and descriptions of all CMake flags known to LBANN's
-    "Superbuild" build system. A representative CMake command line
-    that expects `LBANN_HOME`, `LBANN_BUILD_DIR`, `LBANN_INSTALL_DIR`
-    environment variables might be:
-    ```bash
-    cd ${LBANN_BUILD_DIR}
-    cmake \
-      -G Ninja \
-      -D LBANN_SB_BUILD_ALUMINUM=ON \
-      -D ALUMINUM_ENABLE_MPI_CUDA=OFF \
-      -D ALUMINUM_ENABLE_NCCL=ON \
-      -D LBANN_SB_BUILD_HYDROGEN=ON \
-      -D Hydrogen_ENABLE_CUDA=ON \
-      -D LBANN_SB_BUILD_LBANN=ON \
-      -D CMAKE_BUILD_TYPE:STRING=Release \
-      -D LBANN_WITH_CUDA:BOOL=ON \
-      -D LBANN_WITH_NVPROF:BOOL=ON \
-      -D LBANN_DATATYPE:STRING=float \
-      -D LBANN_WITH_TOPO_AWARE:BOOL=ON \
-      -D LBANN_WITH_ALUMINUM:BOOL=ON \
-      -D LBANN_WITH_CONDUIT:BOOL=ON \
-      -D LBANN_WITH_CUDA:BOOL=ON \
-      -D LBANN_WITH_CUDNN:BOOL=ON \
-      -D LBANN_WITH_NCCL:BOOL=ON \
-      -D LBANN_WITH_SOFTMAX_CUDA:BOOL=ON \
-      -D LBANN_SEQUENTIAL_INITIALIZATION:BOOL=OFF \
-      -D LBANN_WITH_TBINF=OFF \
-      -D LBANN_WITH_VTUNE:BOOL=OFF \
-      -D LBANN_DATATYPE=float \
-      -D CMAKE_INSTALL_PREFIX:PATH=${LBANN_INSTALL_DIR} \
-      ${LBANN_HOME}/superbuild
-
-    ninja
-    ```
-
-## Building with [CMake](https://cmake.org)
-
-LBANN uses [CMake](https://cmake.org) for its build system and a
-version newer than or equal to 3.9.0 is required. LBANN development is
-done primarily on UNIX-based platforms. As such, the build is tested
-regularly on Linux-based machines, occasionally on OSX, and never on
-Windows machines.
-
-It is required that LBANN be built out-of-source. That is, CMake must
-not be invoked in a directory containing a CMakeLists.
-
-### LBANN CMake options
-The following options are exposed in the CMake build system.
-
-+ `LBANN_WITH_ALUMINUM` (Default: `OFF`): Use the Aluminum communication
-  package. This will be set to `ON` automatically if Hydrogen was
-  built with Aluminum.
-
-+ `LBANN_WITH_CNPY` (Default: `ON`): Build with support for CNPY for reading
-  Numpy data.
-
-+ `LBANN_WITH_CONDUIT` (Default: `OFF`): Build with support for CONDUIT.
-
-+ `LBANN_WITH_NVPROF` (Default: `OFF`): Build with extra annotations for NVPROF.
-
-+ `LBANN_WITH_TOPO_AWARE` (Default: `ON`): Use HWLOC for topology-aware choices.
-
-+ `LBANN_WITH_TBINF` (Default: `ON`): Enable the Tensorboard interace.
-
-+ `LBANN_WITH_VTUNE` (Default: `OFF`): Build with extra annotations for VTune.
-
-+ `LBANN_DETERMINISTIC` (Default: `OFF`): Force as much of the code as possible
-  to be deterministic. This is not a guarantee as certain operations
-  in third-party libraries cannot be forced into a deterministic mode,
-  especially for CUDA-enabled builds.
-
-+ `LBANN_SEQUENTIAL_INITIALIZATION` (Default: `OFF`): Force sequentially
-  consistent initialization of data structures.
-
-+ `LBANN_WARNINGS_AS_ERRORS` (Default: `OFF`): Promote compiler
-  warnings to errors. This should be used by developers
-  only. Developers are encouraged to build with this `ON` prior to
-  merging any code into the repository.
-
-+ `LBANN_USE_PROTOBUF_MODULE` (Default: `OFF`): Search for Protobuf
-  using CMake's `FindProtobuf.cmake` module instead of the Protobuf
-  config file. This is useful on platforms with differently
-  architected compute nodes or when the config method is inexplicably
-  failing.
-
-The following variables may also be set:
-
-+ `LBANN_DATATYPE` (Default: `float`): The datatype to use for
-  training. Currently this must be `float` or `double`.
-
-The following variable has been deprecated and removed:
-
-+ `LBANN_WITH_CUDA`. The "CUDA-ness" of LBANN is now tied 1:1 with the
-  "CUDA-ness" of Hydrogen. At present, it seems like unnecessary
-  overhead to support the situation in which Hydrogen has CUDA support
-  but LBANN doesn't want to use it until a compelling use-case reveals
-  itself.
-
-### Controlling dependency resolution
-The following variables may be set with CMake to identify dependencies
-that are not installed into the "typical" locations that CMake
-searches by default. They may be either exported into the environment
-used by CMake using whatever mechanisms are allowed by the shell or
-passed to CMake as a cache variable
-(e.g., `cmake -DPKG_DIR=/path/to/pkg`).
-The latter option is recommended.
-
-+ `Aluminum_DIR` or `ALUMINUM_DIR` or `AL_DIR`: The path to _either_
-  the Aluminum installation prefix _or_ the AluminumConfig.cmake
-  file. If Hydrogen has not been built with Aluminum support, set
-  `LBANN_WITH_ALUMINUM=ON` to enable Aluminum support.
-+ `CEREAL_DIR`: The path to _either_ the CEREAL installation prefix
-  _or_ the cereal-config.cmake file.
-+ `CNPY_DIR`: The path to the CNPY installation prefix. Must set
-  `LBANN_WITH_CNPY=ON` to enable CNPY support.
-+ `CONDUIT_DIR` or `CONDUIT_DIR`: The path to _either_ the
-  CONDUIT installation prefix _or_ the conduit.cmake file. Must set
-  `LBANN_WITH_CONDUIT=ON` to enable CONDUIT support.
-  + `HDF5_DIR`: The path to _either_ the HDF5 installation prefix _or_
-    the hdf5_config.cmake file. There is a known issue with CONDUIT
-    that it may link to HDF5 but not properly export that dependency.
-+ `HWLOC_DIR`: The path to the HWLOC installation prefix. Must set
-  `LBANN_WITH_HWLOC=ON` to enable HWLOC support.
-+ `Hydrogen_DIR` or `HYDROGEN_DIR`: The path to _either_ the Hydrogen
-  installation prefix _or_ the HydrogenConfig.cmake file.
-+ `NVTX_DIR`: The path the the prefix of NVTX. This should not be used
-  except in circumstances in which one might want to link to a
-  different NVTX installation than the CUDA toolkit. Under normal
-  circumstances, if CUDA was found without issue, NVTX should be as
-  well.
-+ `OpenCV_DIR` or `OPENCV_DIR`: The path to _either_ the OpenCV
-  installation prefix _or_ the OpenCVConfig.cmake file.
-+ `Protobuf_DIR` or `PROTOBUF_DIR`: The path to _either_ the Protobuf
-  installation prefix _or_ the protobuf-config.cmake file.
-+ `VTUNE_DIR`: The path to the prefix of the VTune (or Intel compiler
-  suite) installation.
-
-Compilers, include CUDA compilers, are found using the default CMake
-mechanisms, as are OpenMP and MPI. Thus, the process of finding these
-tools can be manipulated using the usual CMake mechanisms and/or cache
-variables as [documented by CMake](https://cmake.org/documentation).
-
-Except where otherwise noted, this list attempts to address the first
-level of dependencies of LBANN, that is, those that are one edge away
-in the DAG. If deeper dependency issues appear, please consult the
-documentation of the packages that are causing the issues as they may
-require additional CMake/environment flags to be set before properly
-resolving.
-
-### Example CMake invocation
-A sample CMake build for LBANN might look like the following.
-```bash
-cmake \
-  -D LBANN_WITH_CUDA:BOOL=ON \
-  -D LBANN_WITH_NVPROF:BOOL=ON \
-  -D LBANN_DATATYPE:STRING=float \
-  -D Hydrogen_DIR:PATH=/path/to/hydrogen \
-  -D HWLOC_DIR:PATH=/path/to/hwloc \
-  /path/to/lbann
-```
-
-## Building an entire ecosystem with the "Superbuild"
-
-__WARNING__: This is primarily for developer convenience and is not
-meant to be robust to all possible use-cases for LBANN.
-
-LBANN includes CMake `ExternalProject` definitions for a large portion
-of its dependency graph. The following dependencies are
-supported. These are one or two edges from LBANN in the
-dependency DAG.
-
-+ Aluminum
-+ CNPY
-+ CONDUIT
-+ [CUB](https://github.com/nvlabs/cub). This is used by Hydrogen for
-  efficiently managing GPU memory.
-+ [HDF5](https://www.hdfgroup.org/solutions/hdf5). This is a
-  dependency of CONDUIT.
-+ Hydrogen
-+ [JPEG-turbo](https://github.com/libjpeg-turbo/libjpeg-turbo). This
-  is a dependency of OpenCV.
-+ [OpenBLAS](https://github.com/xianyi/OpenBLAS.git). This is an
-  optional dependency of Hydrogen. It is recommended if your system
-  does not have a system-optimized BLAS distribution (e.g., Intel's MKL).
-+ OpenCV
-+ Protobuf
-
-The following dependencies are known to exist but for some reason or
-another are not supported by the superbuild framework.
-
-+ cuDNN is a freely available binary package available from NVIDIA.
-+ NCCL is a freely available binary package available from
-  NVIDIA. Inspired users may also build it from source from its
-  [github repository](https://github.com/nvidia/nccl).
-+ HWLOC is often installed by default, especially on large
-  supercomputers. Certain components may require superuser access to
-  configure, but these features are not used by LBANN. If it is not
-  available, ask the system administrators, consult the package
-  manager, install using Spack, or build from
-  [source](https://www.open-mpi.org/projects/hwloc/).
-
-The superbuild system is itself a CMake project rooted in
-`$LBANN_HOME/superbuild` (distinct from the LBANN CMake project rooted
-in `$LBANN_HOME`). Options that control the superbuild system are
-prefixed with `LBANN_SB_`; other options that appear in a CMake
-invocation for the superbuild are either interpreted on a sub-project
-basis or forwarded to certain sub-projects.
-
-### Choosing packages to build in the Superbuild
-The superbuild system is _constructive_ or _additive_; that is, it
-will only build the packages that it is asked to build. Any required
-package that is not requested is assumed to exist on the system by the
-time it is needed by whichever package requires it. For example, if
-HDF5 is provided by the system administrators on a system, it does not
-need to be built and CONDUIT can be built by pointing its build to the
-system HDF5.
-
-Packages are included in a superbuild by passing
-`LBANN_SB_BUILD_<PKG>` options to CMake _for each package_ that it
-should build, including LBANN itself. E.g.,
-```bash
-cmake \
-  -DLBANN_SB_BUILD_ALUMINUM=ON \
-  -DLBANN_SB_BUILD_HYDROGEN=ON \
-  -DLBANN_SB_BUILD_LBANN=ON \
-  /path/to/lbann/superbuild
-```
-will invoke the superbuild to build Aluminum, Hydrogen, and LBANN
-_only_. Acceptable values for `<PKG>` are `ALUMINUM`, `CNPY`,
-`CONDUIT`, `CUB`, `HDF5`, `HYDROGEN`, `JPEG_TURBO`, `OPENCV`,
-`PROTOBUF` and `LBANN`.
-
-### Forwarding options to sub-projects
-The subprojects are largely pre-configured to "do the right thing" for
-building LBANN. However, there are some variables that users of the
-superbuild system may need to control. These are exposed as regular
-CMake options in the individual projects' CMakeLists and can be viewed
-by running, e.g.,
-
-```bash
-cmake -L superbuild/<pkg>/CMakeLists.txt
-```
-
-Several significant CMake flags are automatically forwarded from the
-superbuild CMake to subprojects. These are generally "typical" CMake
-flags (but not all; if something is missing, open please
-[an issue](https://github.com/llnl/lbann/issues)). Some examples are
-
-+ `CMAKE_INSTALL_PREFIX`
-+ `CMAKE_BUILD_TYPE`
-+ `CMAKE_<LANG>_COMPILER`
-+ `CMAKE_<LANG>_FLAGS`
-
-To accommodate developers working on edge-cases with these
-dependencies, any flag may be forwarded to any CMake-built package
-using the following syntax:
-`LBANN_SB_FWD_<PKG>_<OPTION>=<VALUE>`. This will result in a cache
-variable being sent to the CMake command for `<PKG>` with the form
-
-```bash
--D<OPTION>=<VALUE>
-```
-
-The `<OPTION>` may be something specific to `<PKG>` or it may
-be a CMake flag that is not automatically forwarded. For example, the
-following CMake invocation would send
-`CMAKE_INTERPROCEDURAL_OPTIMIZATION` to the `HYDROGEN` package and
-`SPHINX_DIR` to `LBANN`:
-```bash
-cmake -D LBANN_SB_BUILD_HYDROGEN=ON \
-  -D LBANN_SB_BUILD_LBANN=ON \
-  -D LBANN_SB_FWD_HYDROGEN_CMAKE_INTERPROCEDURAL_OPTIMIZATION=ON \
-  -D LBANN_SB_FWD_LBANN_SPHINX_DIR=/path/to/sphinx \
-  /path/to/superbuild
-```
-
-### Special targets in the Superbuild
-Modern shells should be able to tab-complete the names of targets in
-Makefiles or Ninja files, and IDEs should display all targets
-interactively. The superbuild should create project-level targets for
-all of the subprojects; these match the `<PKG>` values noted
-above. For example, after a successful CMake configuration of the
-superbuild using the Ninja generator, the command
-
-```bash
-ninja HYDROGEN
-```
-
-will build the sub-DAG ending with Hydrogen. If
-`LBANN_SB_BUILD_LBANN=ON`, `ninja LBANN` is equivalent to `ninja`
-since LBANN depends on all other targets built by the superbuild.
-
-When building on UNIX platforms, the "Unix Makefiles" and "Ninja"
-generators will have special targets defined for debugging superbuild
-issues. These targets are `gather-build` and `gather-log`. These
-create tarballs of the build system files and the execution logs
-generated for the superbuild or during the superbuild build phase,
-respectively. The target `gather-all` depends on both of these targets
-and may be used to generate both tarballs at once. The resulting
-tarballs are helpful to the build system maintainers for debugging
-build issues if using the superbuild system.
-
-### A full superbuild example
-A full invocation to the superbuild that builds all dependencies might
-look like the following. This example will use a CUDA-enabled build
-with Aluminum and CONDUIT support using the currently-load GCC
-toolset. It assumes that desired flags are stored in `<LANG>_FLAGS` in
-the environment.
-
-```bash
-cmake -GNinja \
-    -D CMAKE_BUILD_TYPE=Release \
-    -D CMAKE_INSTALL_PREFIX=${PWD}/install \
-    -D CMAKE_C_COMPILER=$(which gcc) \
-    -D CMAKE_C_FLAGS="${C_FLAGS}" \
-    -D CMAKE_CXX_COMPILER=$(which g++) \
-    -D CMAKE_CXX_FLAGS="${CXX_FLAGS}" \
-    -D CMAKE_Fortran_COMPILER=$(which gfortran) \
-    -D CMAKE_Fortran_FLAGS="${Fortran_FLAGS}" \
-    -D CMAKE_CUDA_COMPILER=$(which nvcc) \
-    -D CMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \
-    \
-    -D LBANN_SB_BUILD_CNPY=ON \
-    -D LBANN_SB_BUILD_CONDUIT=ON \
-    -D LBANN_SB_BUILD_CUB=ON \
-    -D LBANN_SB_BUILD_HDF5=ON \
-    -D LBANN_SB_BUILD_JPEG_TURBO=ON \
-    -D LBANN_SB_BUILD_OPENBLAS=ON \
-    -D LBANN_SB_BUILD_OPENCV=ON \
-    -D LBANN_SB_BUILD_PROTOBUF=ON \
-    \
-    -D LBANN_SB_BUILD_ALUMINUM=ON \
-    -D ALUMINUM_ENABLE_MPI_CUDA=ON \
-    -D ALUMINUM_ENABLE_NCCL=ON \
-    \
-    -D LBANN_SB_BUILD_HYDROGEN=ON \
-    -D Hydrogen_ENABLE_CUDA=ON \
-    -D Hydrogen_ENABLE_CUB=ON \
-    -D Hydrogen_ENABLE_ALUMINUM=ON \
-    \
-    -D LBANN_SB_BUILD_LBANN=ON \
-    -D LBANN_WITH_ALUMINUM=ON \
-    -D LBANN_WITH_CONDUIT=ON \
-    -D LBANN_WITH_CUDA=ON \
-    -D LBANN_WITH_NVPROF=ON \
-    -D LBANN_WITH_TBINF=ON \
-    -D LBANN_WITH_TOPO_AWARE=ON \
-    -D LBANN_SEQUENTIAL_INITIALIZATION=OFF \
-    -D LBANN_WARNINGS_AS_ERRORS=OFF \
-    \
-    /path/to/superbuild
-```
-
-As a final disclaimer: Please do report any issues with the superbuild
-on [Github](https://github.com/llnl/lbann/issues), but note that they
-will be evaluated on a case-by-case basis and may not be fixed in a
-timely manner or at all if they do not affect the development team. To
-repeat, the superbuild exists for developer convenience and is not
-meant to supplant a legitimate package manager.
-
-## Deprecated or special-use methods
-At time of writing, there is another developer-only build method. The
-`build_lbann_lc.sh` script in the `scripts/` directory exists for use
-_by developers only_. Certain paths through this script require access
-to a certain linux group on the Livermore Computing machines (LC) and
-the script may not work for those without that access. Please consult
-the preceding sections for alternative and preferred methods for
-building LBANN.
-
-### LBANN Container Builds
-
-We provide basic container definition files, and
-[instructions](../containers/README.md) for their use, in the
-containers subdirectory. We currently support Docker and Singularity.
-
-The container builds are not regularly
-tested. [Issues](https://github.com/llnl/lbann/issues) are welcome.
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index cfc8f3a067e..ed3551c66f1 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -15,20 +15,20 @@ if (SPHINX_FOUND)
       "Please install breathe (e.g., \"pip install breathe\") and reconfigure.")
   else ()
     set_full_path(SPHINX_SOURCES
+      Makefile
       conf.py
-      callbacks.rst
-      getting_started.rst
+      build_containers.rst
+      building_lbann.rst
+      build_llnl_idiosyncracies.rst
+      build_spack_extra_config.rst
+      build_with_cmake.rst
+      build_with_superbuild.rst
+      build_osx.rst
+      publications.rst
       index.rst
-      io.rst
-      layers.rst
-      learning.rst
-      loss_functions.rst
-      metrics.rst
-      objective_functions.rst
-      optimizers.rst
-      regularizers.rst
-      transform.rst
-      weight_regularization.rst)
+      running_lbann.rst
+      sphinx_requirements.txt
+      )
 
     if (NOT SPHINX_BUILDER)
       set(SPHINX_BUILDER "html")
@@ -69,7 +69,7 @@ endif ()
 if (DOXYGEN_FOUND)
   set(DOXYGEN_SOURCES
     ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in
-    ${CMAKE_CURRENT_SOURCE_DIR}/README.md
+    ${CMAKE_SOURCE_DIR}/README.md
     )
 
   # Setup Dot support
diff --git a/docs/README.md b/docs/README
similarity index 94%
rename from docs/README.md
rename to docs/README
index d0e7a715ce8..0bf30a3cbe2 100644
--- a/docs/README.md
+++ b/docs/README
@@ -1,3 +1,5 @@
+FIXME (TRB): This is probably all out of date. Should this file be deleted?
+
 ## LBANN Documentation via Doxygen
 ### Updating the Github Pages
 This documentation is generated and deployed to github pages automatically using Bamboo CI. To trigger an update to the pages add "gen_doxygen" to your commit message.
diff --git a/docs/RSTDocsFlavorText.py b/docs/RSTDocsFlavorText.py
new file mode 100644
index 00000000000..2f6b5969f08
--- /dev/null
+++ b/docs/RSTDocsFlavorText.py
@@ -0,0 +1,115 @@
+# This file contains the headers and flavor text to appear on the
+# organizational top pages of the developer documentation. In both
+# dictionaries, the keys are paths relative to "include/lbann". The
+# header will be the title for the page. The flavor text will be
+# inserted under the title and before a toctree listing contents.
+
+lbann_rst_headers = {
+    '.' : 'LBANN API',
+    'callbacks' : 'Callback Interface',
+    'data_readers' : 'Data Readers Interface',
+    'data_store' : 'Data Store Interface',
+    'layers' : 'Layer Interface',
+    'layers/activations' : 'Activation Layers',
+    'layers/image' : 'Image Layers',
+    'layers/io' : 'I/O Layers',
+    'layers/learning' : 'Learning Layers',
+    'layers/math' : 'Math Layers',
+    'layers/misc' : 'Miscellaneous Layers',
+    'layers/regularizers' : 'Regularization Layers',
+    'layers/transform' : 'Transform Layers',
+    'io': 'I/O Utilities',
+    'io/data_buffers': 'Data Buffers for Data Ingestion',
+    'metrics' : 'Metrics Interface',
+    'models' : 'Models Interface',
+    'objective_functions' : 'Objective Function Interface',
+    'objective_functions/weight_regularization' : 'Objective Functions for Weight Regularization',
+    'optimizers' : 'Optimizer Interface',
+    'proto' : 'Protobuf and Front-End Utilities',
+    'utils' : 'General Utilities',
+    'utils/threads' : 'Multithreading Utilities',
+    'weights' : 'Weights Interface'
+}
+
+lbann_rst_flavor_text = {
+    '.' : '''
+Welcome to the LBANN developers' documentation. The documentation is
+laid out following a similar structure to the source code to aid in
+navigation.
+    ''',
+
+    'callbacks' : '''
+Callbacks give users information about their model as it is trained.
+Users can select which callbacks to use during training in their
+model prototext file.''',
+
+    'data_readers' : '''
+Data readers provide a mechanism for ingesting data into LBANN.  This
+is typically where a user may have to interact with the LBANN source
+code.''',
+
+    'data_store' : '''
+The data store provides in-memory caching of the data set and
+inter-epoch data shuffling.''',
+
+    'layers' : '''
+LBANN models are defined in model prototext files. The bulk of these
+defintions will be the series of layers which make up the model
+itself. LBANN layers all inherit from the common base
+:code:`lbann::Layer`. The concrete layers belong to one of several
+categories.''',
+
+    'io': '''
+Classes for persisting the state of LBANN (checkpoint and restart),
+ file I/O and data buffers.''',
+
+    'io/data_buffers': '''
+The data buffer classes describe how data is distributed across the
+input layer. Note that this part of the class hierarchy is scheduled
+to be deprecated and folded into the existing input layer class.''',
+
+    'metrics' : '''
+A metric function can be used to evaluate the performance of a model
+without affecting the training process. Users define the metric with
+which to test their model in their model prototext file.
+The available metric functions in LBANN are found below.''',
+
+    'models' : '''
+A model is a collection of layers that are composed into a
+computational graph. The model also holds the weight matrices for each
+learning layer. During training the weight matrices are the free
+parameters. For a trained network during inference the weight matrics
+are preloaded from saved matrices. The model also contains the
+objective function and optimizer classes for the weights.''',
+
+    'objective_functions' : '''
+An objective function is the measure that training attempts to optimize.
+Objective functions are defined in a user's model defintion prototext
+file. Available objective functions can be found below.''',
+
+    'objective_functions/weight_regularization' : '''
+TODO:Something about objective_functions/weight_regularization''',
+
+    'optimizers' : '''
+Optimizer algorithms attempt to optimize model weights. Optimizers
+are selected when invoking LBANN via a command line argument
+(:code:`--optimizer=<path_top_opt_proto>`). Available optimizers
+are found below.''',
+
+    'proto' : '''
+LBANN uses the Tensorflow protobuf format for specifying the
+architecture of neural networks, data readers, and optimizers. It
+serves as the "assembly language" interface to the toolkit. The
+python front end of LBANN will emit a network description in the
+protobuf format that is ingested at runtime.''',
+
+    'utils' : 'Utility classes and functions.',
+
+    'utils/threads' : 'TODO: Something about utils/threads',
+
+    'weights' : '''
+The weight class is the representation of the trainable parameters in
+the neural network.  Learning layers each have an independent weight
+class.  During stochastic gradient descent training the weight
+matrices are updated after each forward and backward propagation step.'''
+}
diff --git a/docs/RunningLBANN.md b/docs/RunningLBANN.md
deleted file mode 100644
index eed74007287..00000000000
--- a/docs/RunningLBANN.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# Running LBANN
-
-The basic template for running LBANN is
-
-```bash
-<mpi-launcher> <mpi-options> \
-    lbann <lbann-options> \
-    --model=model.prototext \
-    --optimizer=opt.prototext \
-    --reader=data_reader.prototext
-```
-
-When using GPGPU accelerators, users should be aware that LBANN is
-optimized for the case in which one assigns one GPU per MPI
-*rank*. This should be borne in mind when choosing the parameters for
-the MPI launcher.
-
-A list of options for LBANN may be found by running `lbann
---help`. **NOTE**: At time of writing, it is known that some of these
-are out-of-date. An [issue](https://github.com/LLNL/lbann/issues/864)
-has been opened to track this.
-
-## Using the model zoo
-
-LBANN ships with prototext descriptions of a variety of models,
-optimizers and data readers. These may be found in the `model_zoo/`
-directory of the source repository or the `share/model_zoo/` directory
-of the install directory.
-
-**WARNING**: Some of these prototexts point to specific data locations
-on LLNL LC clusters. Users may have to modify such paths to point to
-locations on their own systems. This can be done by modifying the
-prototext directly or overriding the options on the command line with,
-e.g., the `--data_filedir_train` and `--data_filedir_test` options.
-
-The following is an example invocation of LBANN on a machine using
-Slurm's `srun` as an MPI launcher. In the example command,
-a machine with 2 GPGPUs per node are available, 4 nodes will be used,
-`${LBANN_EXE}` is the path to the `lbann` executable, and
-`${LBANN_MODEL_ZOO_DIR}` is the path to the `model_zoo/` directory in
-either the source tree or the install tree. Note that the options
-passed to `srun` are not likely to be portable to other MPI
-launchers. The example will train Alexnet with SGD optimization on the
-Imagenet dataset for 5 epochs.
-```bash
-srun -N4 --ntasks-per-node=2 \
-    ${LBANN_EXE} \
-    --model=${LBANN_MODEL_ZOO_DIR}/models/alexnet/alexnet.prototext \
-    --optimizer=${LBANN_MODEL_ZOO_DIR}/optimizers/opt_sgd.prototext \
-    --reader=${LBANN_MODEL_ZOO_DIR}/data_readers/data_reader_imagenet.prototext
-    --num_epochs=5
-```
-
-## Using the Python interface for prototext
-
-There is a python interface for generating model prototext
-files. Example Python scripts may be found in the
-`scripts/proto/lbann/models` directory of the source
-repository. Running the Python script will generate a prototext that
-can be passed to the `--model` option for LBANN.
-
-```bash
-python3 alexnet.py alexnet.prototext
-<mpi-launcher> <mpi-options> \
-    lbann --model=alexnet.prototext <other-lbann-options>
-```
-
-where `<other-lbann-options>` are as documented <a
-href="#using-the-model-zoo">above</a>, with optimizer and data reader
-prototexts coming from the appropriate `model_zoo/` directories.
-
-## Running the inference engine
-
-This section is under construction, requiring input from other team
-members. Until it is complete, please ask questions on the [issue
-tracker](https://github.com/llnl/lbann/issues).
diff --git a/docs/SourceTreeDoxyfile b/docs/SourceTreeDoxyfile
new file mode 100644
index 00000000000..eb38cd65aa7
--- /dev/null
+++ b/docs/SourceTreeDoxyfile
@@ -0,0 +1,2368 @@
+# Doxyfile 1.8.9.1
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a double hash (##) is considered a comment and is placed in
+# front of the TAG it is preceding.
+#
+# All text after a single hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists, items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (\" \").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all text
+# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
+# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
+# for the list of possible encodings.
+# The default value is: UTF-8.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
+# double-quotes, unless you are using Doxywizard) that should identify the
+# project for which the documentation is generated. This name is used in the
+# title of most generated pages and in a few other places.
+# The default value is: My Project.
+
+PROJECT_NAME           = LBANN
+
+# The PROJECT_NUMBER=0.99
+# could be handy for archiving the generated documentation or if some version
+# control system is used.
+
+PROJECT_NUMBER=0.99
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer a
+# quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          = Livermore Big Artificial Neural Network Toolkit
+
+# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
+# in the documentation. The maximum height of the logo should not exceed 55
+# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
+# the logo to the output directory.
+
+PROJECT_LOGO           =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
+# into which the generated documentation will be written. If a relative path is
+# entered, it will be relative to the location where doxygen was started. If
+# left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = doxy_out
+
+# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
+# directories (in 2 levels) under the output directory of each output format and
+# will distribute the generated files over these directories. Enabling this
+# option can be useful when feeding doxygen a huge amount of source files, where
+# putting all generated files in the same directory would otherwise causes
+# performance problems for the file system.
+# The default value is: NO.
+
+CREATE_SUBDIRS         = YES
+
+# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
+# characters to appear in the names of generated files. If set to NO, non-ASCII
+# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
+# U+3044.
+# The default value is: NO.
+
+ALLOW_UNICODE_NAMES    = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
+# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
+# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
+# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
+# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
+# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
+# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
+# Ukrainian and Vietnamese.
+# The default value is: English.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
+# descriptions after the members that are listed in the file and class
+# documentation (similar to Javadoc). Set to NO to disable this.
+# The default value is: YES.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
+# description of a member or function before the detailed description
+#
+# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+# The default value is: YES.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator that is
+# used to form the text in various listings. Each string in this list, if found
+# as the leading text of the brief description, will be stripped from the text
+# and the result, after processing the whole list, is used as the annotated
+# text. Otherwise, the brief description is used as-is. If left blank, the
+# following values are used ($name is automatically replaced with the name of
+# the entity):The $name class, The $name widget, The $name file, is, provides,
+# specifies, contains, represents, a, an and the.
+
+ABBREVIATE_BRIEF       =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# doxygen will generate a detailed section even if there is only a brief
+# description.
+# The default value is: NO.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+# The default value is: NO.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
+# before files name in the file list and in the header files. If set to NO the
+# shortest path that makes the file name unique will be used
+# The default value is: YES.
+
+FULL_PATH_NAMES        = NO
+
+# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
+# Stripping is only done if one of the specified strings matches the left-hand
+# part of the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the path to
+# strip.
+#
+# Note that you can specify absolute paths here, but also relative paths, which
+# will be relative from the directory where doxygen is started.
+# This tag requires that the tag FULL_PATH_NAMES is set to YES.
+
+STRIP_FROM_PATH        = .
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
+# path mentioned in the documentation of a class, which tells the reader which
+# header file to include in order to use a class. If left blank only the name of
+# the header file containing the class definition is used. Otherwise one should
+# specify the list of include paths that are normally passed to the compiler
+# using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
+# less readable) file names. This can be useful is your file systems doesn't
+# support long names like on DOS, Mac, or CD-ROM.
+# The default value is: NO.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
+# first line (until the first dot) of a Javadoc-style comment as the brief
+# description. If set to NO, the Javadoc-style will behave just like regular Qt-
+# style comments (thus requiring an explicit @brief command for a brief
+# description.)
+# The default value is: NO.
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
+# line (until the first dot) of a Qt-style comment as the brief description. If
+# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
+# requiring an explicit \brief command for a brief description.)
+# The default value is: NO.
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
+# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
+# a brief description. This used to be the default behavior. The new default is
+# to treat a multi-line C++ comment block as a detailed description. Set this
+# tag to YES if you prefer the old behavior instead.
+#
+# Note that setting this tag to YES also means that rational rose comments are
+# not recognized any more.
+# The default value is: NO.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
+# documentation from any documented member that it re-implements.
+# The default value is: YES.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
+# page for each member. If set to NO, the documentation of a member will be part
+# of the file/class/namespace that contains it.
+# The default value is: NO.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
+# uses this value to replace tabs by spaces in code fragments.
+# Minimum value: 1, maximum value: 16, default value: 4.
+
+TAB_SIZE               = 4
+
+# This tag can be used to specify a number of aliases that act as commands in
+# the documentation. An alias has the form:
+# name=value
+# For example adding
+# "sideeffect=@par Side Effects:\n"
+# will allow you to put the command \sideeffect (or @sideeffect) in the
+# documentation, which will result in a user-defined paragraph with heading
+# "Side Effects:". You can put \n's in the value part of an alias to insert
+# newlines.
+
+ALIASES                = "todo_high=\xrefitem todo1 \"High Priority Todo\" \"Todo high priority list\"" \
+                         "todo_med=\xrefitem todo2 \"Medium Priority Todo\" \"Todo medium priority list\"" \
+                         "todo_low=\xrefitem todo3 \"Low Priority Todo\" \"Todo low priority list\""
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding "class=itcl::class"
+# will allow you to use the command class in the itcl::class meaning.
+
+TCL_SUBST              =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C. For
+# instance, some of the names that are used will be different. The list of all
+# members will be omitted, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
+# Python sources only. Doxygen will then generate output that is more tailored
+# for that language. For instance, namespaces will be presented as packages,
+# qualified scopes will look different, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources. Doxygen will then generate output that is tailored for Fortran.
+# The default value is: NO.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for VHDL.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension, and
+# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
+# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran:
+# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran:
+# Fortran. In the later case the parser tries to guess whether the code is fixed
+# or free formatted code, this is the default for Fortran type files), VHDL. For
+# instance to make doxygen treat .inc files as Fortran files (default is PHP),
+# and .f files as C (default is Fortran), use: inc=Fortran f=C.
+#
+# Note: For files without extension you can use no_extension as a placeholder.
+#
+# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
+# the files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
+# according to the Markdown format, which allows for more readable
+# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you can
+# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
+# case of backward compatibilities issues.
+# The default value is: YES.
+
+MARKDOWN_SUPPORT       = YES
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by putting a % sign in front of the word or
+# globally by setting AUTOLINK_SUPPORT to NO.
+# The default value is: YES.
+
+AUTOLINK_SUPPORT       = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should set this
+# tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string);
+# versus func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+# The default value is: NO.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+# The default value is: NO.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
+# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
+# will parse them like normal C++ but will assume all classes use public instead
+# of private inheritance when no explicit protection keyword is present.
+# The default value is: NO.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES will make
+# doxygen to replace the get and set methods by a property in the documentation.
+# This will only work if the methods are indeed getting or setting a simple
+# type. If this is not the case, or you want to show the methods anyway, you
+# should set this option to NO.
+# The default value is: YES.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+# The default value is: NO.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES to allow class member groups of the same type
+# (for instance a group of public functions) to be put as a subgroup of that
+# type (e.g. under the Public Functions section). Set it to NO to prevent
+# subgrouping. Alternatively, this can be done per class using the
+# \nosubgrouping command.
+# The default value is: YES.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
+# are shown inside the group in which they are included (e.g. using \ingroup)
+# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
+# and RTF).
+#
+# Note that this feature does not work in combination with
+# SEPARATE_MEMBER_PAGES.
+# The default value is: NO.
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
+# with only public data fields or simple typedef fields will be shown inline in
+# the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO, structs, classes, and unions are shown on a separate page (for HTML and
+# Man pages) or section (for LaTeX and RTF).
+# The default value is: NO.
+
+INLINE_SIMPLE_STRUCTS  = NO
+
+# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
+# enum is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically be
+# useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+# The default value is: NO.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can be
+# an expensive process and often the same symbol appears multiple times in the
+# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
+# doxygen will become slower. If the cache is too large, memory is wasted. The
+# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
+# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
+# symbols. At the end of a run doxygen will report the cache usage and suggest
+# the optimal cache size from a speed point of view.
+# Minimum value: 0, maximum value: 9, default value: 0.
+
+LOOKUP_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
+# documentation are documented, even if no documentation was available. Private
+# class members and static file members will be hidden unless the
+# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
+# Note: This will also disable the warnings about undocumented members that are
+# normally produced when WARNINGS is set to YES.
+# The default value is: NO.
+
+EXTRACT_ALL            = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
+# be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIVATE        = YES
+
+# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
+# scope will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PACKAGE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
+# included in the documentation.
+# The default value is: NO.
+
+EXTRACT_STATIC         = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO,
+# only classes defined in header files are included. Does not have any effect
+# for Java sources.
+# The default value is: YES.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. If set to YES, local methods,
+# which are defined in the implementation section but not in the interface are
+# included in the documentation. If set to NO, only methods in the interface are
+# included.
+# The default value is: NO.
+
+EXTRACT_LOCAL_METHODS  = YES
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base name of
+# the file that contains the anonymous namespace. By default anonymous namespace
+# are hidden.
+# The default value is: NO.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
+# undocumented members inside documented classes or files. If set to NO these
+# members will be included in the various overviews, but no documentation
+# section is generated. This option has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy. If set
+# to NO, these classes will be included in the various overviews. This option
+# has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
+# (class|struct|union) declarations. If set to NO, these declarations will be
+# included in the documentation.
+# The default value is: NO.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
+# documentation blocks found inside the body of a function. If set to NO, these
+# blocks will be appended to the function's detailed documentation block.
+# The default value is: NO.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation that is typed after a
+# \internal command is included. If the tag is set to NO then the documentation
+# will be excluded. Set it to YES to include the internal documentation.
+# The default value is: NO.
+
+INTERNAL_DOCS          = YES
+
+# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
+# names in lower-case letters. If set to YES, upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+# The default value is: system dependent.
+
+CASE_SENSE_NAMES       = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
+# their full class and namespace scopes in the documentation. If set to YES, the
+# scope will be hidden.
+# The default value is: NO.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
+# append additional text to a page's title, such as Class Reference. If set to
+# YES the compound reference will be hidden.
+# The default value is: NO.
+
+HIDE_COMPOUND_REFERENCE= NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
+# the files that are included by a file in the documentation of that file.
+# The default value is: YES.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
+# grouped member an include statement to the documentation, telling the reader
+# which file to include in order to use the member.
+# The default value is: NO.
+
+SHOW_GROUPED_MEMB_INC  = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
+# files with double quotes in the documentation rather than with sharp brackets.
+# The default value is: NO.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
+# documentation for inline members.
+# The default value is: YES.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
+# (detailed) documentation of file and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order.
+# The default value is: YES.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
+# descriptions of file, namespace and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order. Note that
+# this will also influence the order of the classes in the class list.
+# The default value is: NO.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
+# (brief and detailed) documentation of class members so that constructors and
+# destructors are listed first. If set to NO the constructors will appear in the
+# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
+# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
+# member documentation.
+# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
+# detailed member documentation.
+# The default value is: NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
+# of group names into alphabetical order. If set to NO the group names will
+# appear in their defined order.
+# The default value is: NO.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
+# fully-qualified names, including namespaces. If set to NO, the class list will
+# be sorted only by class name, not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the alphabetical
+# list.
+# The default value is: NO.
+
+SORT_BY_SCOPE_NAME     = YES
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
+# type resolution of all parameters of a function it will reject a match between
+# the prototype and the implementation of a member function even if there is
+# only one candidate or it is obvious which candidate to choose by doing a
+# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
+# accept a match between prototype and implementation in such cases.
+# The default value is: NO.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
+# list. This list is created by putting \todo commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
+# list. This list is created by putting \test commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
+# list. This list is created by putting \bug commands in the documentation.
+# The default value is: YES.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
+# the deprecated list. This list is created by putting \deprecated commands in
+# the documentation.
+# The default value is: YES.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional documentation
+# sections, marked by \if <section_label> ... \endif and \cond <section_label>
+# ... \endcond blocks.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
+# initial value of a variable or macro / define can have for it to appear in the
+# documentation. If the initializer consists of more lines than specified here
+# it will be hidden. Use a value of 0 to hide initializers completely. The
+# appearance of the value of individual variables and macros / defines can be
+# controlled using \showinitializer or \hideinitializer command in the
+# documentation regardless of this setting.
+# Minimum value: 0, maximum value: 10000, default value: 30.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
+# the bottom of the documentation of classes and structs. If set to YES, the
+# list will mention the files that were used to generate the documentation.
+# The default value is: YES.
+
+SHOW_USED_FILES        = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
+# will remove the Files entry from the Quick Index and from the Folder Tree View
+# (if specified).
+# The default value is: YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
+# page. This will remove the Namespaces entry from the Quick Index and from the
+# Folder Tree View (if specified).
+# The default value is: YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command command input-file, where command is the value of the
+# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
+# by doxygen. Whatever the program writes to standard output is used as the file
+# version. For an example see the documentation.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option. You can
+# optionally specify a file name after the option, if omitted DoxygenLayout.xml
+# will be used as the name of the layout file.
+#
+# Note that if you run doxygen from a directory containing a file called
+# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
+# tag is left empty.
+
+LAYOUT_FILE            =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
+# the reference definitions. This must be a list of .bib files. The .bib
+# extension is automatically appended if omitted. This requires the bibtex tool
+# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info.
+# For LaTeX the style of the bibliography can be controlled using
+# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
+# search path. See also \cite for info how to create references.
+
+CITE_BIB_FILES         =
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated to
+# standard output by doxygen. If QUIET is set to YES this implies that the
+# messages are off.
+# The default value is: NO.
+
+QUIET                  = YES
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
+# this implies that the warnings are on.
+#
+# Tip: Turn warnings on while writing the documentation.
+# The default value is: YES.
+
+WARNINGS               = YES
+
+# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
+# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: YES.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some parameters
+# in a documented function, or documenting parameters that don't exist or using
+# markup commands wrongly.
+# The default value is: YES.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
+# are documented, but have no documentation for their parameters or return
+# value. If set to NO, doxygen will only warn about wrong or incomplete
+# parameter documentation, but not about the absence of documentation.
+# The default value is: NO.
+
+WARN_NO_PARAMDOC       = YES
+
+# The WARN_FORMAT tag determines the format of the warning messages that doxygen
+# can produce. The string should contain the $file, $line, and $text tags, which
+# will be replaced by the file and line number from which the warning originated
+# and the warning text. Optionally the format may contain $version, which will
+# be replaced by the version of the file (if it could be obtained via
+# FILE_VERSION_FILTER)
+# The default value is: $file:$line: $text.
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning and error
+# messages should be written. If left blank the output is written to standard
+# error (stderr).
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag is used to specify the files and/or directories that contain
+# documented source files. You may enter file names like myfile.cpp or
+# directories like /usr/src/myproject. Separate the files or directories with
+# spaces.
+# Note: If this tag is empty the current directory is searched.
+
+ INPUT                  = ../README.md \
+                          ../docs \
+                          ../src \
+                          ../include
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
+# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
+# documentation (see: http://www.gnu.org/software/libiconv) for the list of
+# possible encodings.
+# The default value is: UTF-8.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank the
+# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii,
+# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp,
+# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown,
+# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf,
+# *.qsf, *.as and *.js.
+
+# NOTE (trb): Assume all documentation is in headers or special files.
+FILE_PATTERNS          = *.h *.hpp *.md *.rst
+
+# The RECURSIVE tag can be used to specify whether or not subdirectories should
+# be searched for input files as well.
+# The default value is: NO.
+
+RECURSIVE              = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+#
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE                =
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+# The default value is: NO.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories use the pattern */test/*
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or directories
+# that contain example code fragments that are included (see the \include
+# command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank all
+# files are included.
+
+EXAMPLE_PATTERNS       =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude commands
+# irrespective of the value of the RECURSIVE tag.
+# The default value is: NO.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or directories
+# that contain images that are to be included in the documentation (see the
+# \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command:
+#
+# <filter> <input-file>
+#
+# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
+# name of an input file. Doxygen will then use the output that the filter
+# program writes to standard output. If FILTER_PATTERNS is specified, this tag
+# will be ignored.
+#
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form: pattern=filter
+# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
+# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
+# patterns match the file name, INPUT_FILTER is applied.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will also be used to filter the input files that are used for
+# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
+# The default value is: NO.
+
+FILTER_SOURCE_FILES    = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
+# it is also possible to disable source filtering for a specific pattern using
+# *.ext= (so without naming a filter).
+# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
+
+FILTER_SOURCE_PATTERNS =
+
+# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want to reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE = ../README.md
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
+# generated. Documented entities will be cross-referenced with these sources.
+#
+# Note: To get rid of all source code in the generated output, make sure that
+# also VERBATIM_HEADERS is set to NO.
+# The default value is: NO.
+
+SOURCE_BROWSER         = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body of functions,
+# classes and enums directly into the documentation.
+# The default value is: NO.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
+# special comment blocks from generated source code fragments. Normal C, C++ and
+# Fortran comments will always remain visible.
+# The default value is: YES.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
+# function all documented functions referencing it will be listed.
+# The default value is: NO.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES then for each documented function
+# all documented entities called/used by that function will be listed.
+# The default value is: NO.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
+# to YES then the hyperlinks from functions in REFERENCES_RELATION and
+# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
+# link to the documentation.
+# The default value is: YES.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
+# source code will show a tooltip with additional information such as prototype,
+# brief description and links to the definition and documentation. Since this
+# will make the HTML file larger and loading of large files a bit slower, you
+# can opt to disable this feature.
+# The default value is: YES.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+SOURCE_TOOLTIPS        = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code will
+# point to the HTML generated by the htags(1) tool instead of doxygen built-in
+# source browser. The htags tool is part of GNU's global source tagging system
+# (see http://www.gnu.org/software/global/global.html). You will need version
+# 4.8.6 or higher.
+#
+# To use it do the following:
+# - Install the latest version of global
+# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
+# - Make sure the INPUT points to the root of the source tree
+# - Run doxygen as normal
+#
+# Doxygen will invoke htags (and that will in turn invoke gtags), so these
+# tools must be available from the command line (i.e. in the search path).
+#
+# The result: instead of the source browser generated by doxygen, the links to
+# source code will now point to the output of htags.
+# The default value is: NO.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
+# verbatim copy of the header file for each class for which an include is
+# specified. Set to NO to disable this.
+# See also: Section \class.
+# The default value is: YES.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
+# compounds will be generated. Enable this if the project contains a lot of
+# classes, structs, unions or interfaces.
+# The default value is: YES.
+
+ALPHABETICAL_INDEX     = YES
+
+# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
+# which the alphabetical index list will be split.
+# Minimum value: 1, maximum value: 20, default value: 5.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all classes will
+# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
+# can be used to specify a prefix (or a list of prefixes) that should be ignored
+# while generating the index headers.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
+# The default value is: YES.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
+# generated HTML page (for example: .htm, .php, .asp).
+# The default value is: .html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
+# each generated HTML page. If the tag is left blank doxygen will generate a
+# standard header.
+#
+# To get valid HTML the header file that includes any scripts and style sheets
+# that doxygen needs, which is dependent on the configuration options used (e.g.
+# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
+# default header using
+# doxygen -w html new_header.html new_footer.html new_stylesheet.css
+# YourConfigFile
+# and then modify the file new_header.html. See also section "Doxygen usage"
+# for information on how to generate the default header that doxygen normally
+# uses.
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. For a description
+# of the possible markers and block names see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
+# generated HTML page. If the tag is left blank doxygen will generate a standard
+# footer. See HTML_HEADER for more information on how to generate a default
+# footer and what special commands can be used inside the footer. See also
+# section "Doxygen usage" for information on how to generate the default footer
+# that doxygen normally uses.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
+# sheet that is used by each HTML page. It can be used to fine-tune the look of
+# the HTML output. If left blank doxygen will generate a default style sheet.
+# See also section "Doxygen usage" for information on how to generate the style
+# sheet that doxygen normally uses.
+# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
+# it is more robust and this tag (HTML_STYLESHEET) will in the future become
+# obsolete.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_STYLESHEET        =
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# cascading style sheets that are included after the standard style sheets
+# created by doxygen. Using this option one can overrule certain style aspects.
+# This is preferred over using HTML_STYLESHEET since it does not replace the
+# standard style sheet and is therefore more robust against future updates.
+# Doxygen will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list). For an example see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_STYLESHEET  =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
+# files will be copied as-is; there are no commands or markers available.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_FILES       =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
+# will adjust the colors in the style sheet and background images according to
+# this color. Hue is specified as an angle on a colorwheel, see
+# http://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
+# purple, and 360 is red again.
+# Minimum value: 0, maximum value: 359, default value: 220.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
+# in the HTML output. For a value of 0 the output will use grayscales only. A
+# value of 255 will produce the most vivid colors.
+# Minimum value: 0, maximum value: 255, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
+# luminance component of the colors in the HTML output. Values below 100
+# gradually make the output lighter, whereas values above 100 make the output
+# darker. The value divided by 100 is the actual gamma applied, so 80 represents
+# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
+# change the gamma.
+# Minimum value: 40, maximum value: 240, default value: 80.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting this
+# to NO can help when comparing the output of multiple runs.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_TIMESTAMP         = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
+# shown in the various tree structured indices initially; the user can expand
+# and collapse entries dynamically later on. Doxygen will expand the tree to
+# such a level that at most the specified number of entries are visible (unless
+# a fully collapsed tree already exceeds this amount). So setting the number of
+# entries 1 will produce a full collapsed tree by default. 0 is a special value
+# representing an infinite number of entries and will result in a full expanded
+# tree by default.
+# Minimum value: 0, maximum value: 9999, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files will be
+# generated that can be used as input for Apple's Xcode 3 integrated development
+# environment (see: http://developer.apple.com/tools/xcode/), introduced with
+# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
+# Makefile in the HTML output directory. Running make will produce the docset in
+# that directory and running make install will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
+# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_DOCSET        = NO
+
+# This tag determines the name of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# The default value is: Doxygen generated docs.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# This tag specifies a string that should uniquely identify the documentation
+# set bundle. This should be a reverse domain-name style string, e.g.
+# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+# The default value is: org.doxygen.Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
+# The default value is: Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
+# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
+# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
+# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
+# Windows.
+#
+# The HTML Help Workshop contains a compiler that can convert all HTML output
+# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
+# files are now used as the Windows 98 help format, and will replace the old
+# Windows help format (.hlp) on all Windows platforms in the future. Compressed
+# HTML files also contain an index, a table of contents, and you can search for
+# words in the documentation. The HTML workshop also contains a viewer for
+# compressed HTML files.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_HTMLHELP      = NO
+
+# The CHM_FILE tag can be used to specify the file name of the resulting .chm
+# file. You can add a path in front of the file if the result should not be
+# written to the html output directory.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_FILE               =
+
+# The HHC_LOCATION tag can be used to specify the location (absolute path
+# including file name) of the HTML help compiler (hhc.exe). If non-empty,
+# doxygen will try to run the HTML help compiler on the generated index.hhp.
+# The file has to be specified with full path.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+HHC_LOCATION           =
+
+# The GENERATE_CHI flag controls if a separate .chi index file is generated
+# (YES) or that it should be included in the master .chm file (NO).
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+GENERATE_CHI           = NO
+
+# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)
+# and project file content.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_INDEX_ENCODING     =
+
+# The BINARY_TOC flag controls whether a binary table of contents is generated
+# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
+# enables the Previous and Next buttons.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members to
+# the table of contents of the HTML help documentation and to the tree view.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
+# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
+# (.qch) of the generated HTML documentation.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
+# the file name of the resulting .qch file. The path specified is relative to
+# the HTML output folder.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
+# Project output. For more information please see Qt Help Project / Namespace
+# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace).
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
+# Help Project output. For more information please see Qt Help Project / Virtual
+# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual-
+# folders).
+# The default value is: doc.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
+# filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's filter section matches. Qt Help Project / Filter Attributes (see:
+# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# The QHG_LOCATION tag can be used to specify the location of Qt's
+# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
+# generated .qhp file.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
+# generated, together with the HTML files, they form an Eclipse help plugin. To
+# install this plugin and make it available under the help contents menu in
+# Eclipse, the contents of the directory containing the HTML and XML files needs
+# to be copied into the plugins directory of eclipse. The name of the directory
+# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
+# After copying Eclipse needs to be restarted before the help appears.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the Eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have this
+# name. Each documentation set should have its own identifier.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# If you want full control over the layout of the generated HTML pages it might
+# be necessary to disable the index and replace it with your own. The
+# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
+# of each HTML page. A value of NO enables the index and the value YES disables
+# it. Since the tabs in the index contain the same information as the navigation
+# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+DISABLE_INDEX          = YES
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information. If the tag
+# value is set to YES, a side panel will be generated containing a tree-like
+# index structure (just like the one that is generated for HTML Help). For this
+# to work a browser that supports JavaScript, DHTML, CSS and frames is required
+# (i.e. any modern browser). Windows users are probably better off using the
+# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
+# further fine-tune the look of the index. As an example, the default style
+# sheet generated by doxygen has an example that shows how to put an image at
+# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
+# the same information as the tab index, you could consider setting
+# DISABLE_INDEX to YES when enabling this option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_TREEVIEW      = YES
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
+# doxygen will group on one line in the generated HTML documentation.
+#
+# Note that a value of 0 will completely suppress the enum values from appearing
+# in the overview section.
+# Minimum value: 0, maximum value: 20, default value: 4.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
+# to set the initial width (in pixels) of the frame in which the tree is shown.
+# Minimum value: 0, maximum value: 1500, default value: 250.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+TREEVIEW_WIDTH         = 250
+
+# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to
+# external symbols imported via tag files in a separate window.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# Use this tag to change the font size of LaTeX formulas included as images in
+# the HTML documentation. When you change the font size after a successful
+# doxygen run you need to manually remove any form_*.png images from the HTML
+# output directory to force them to be regenerated.
+# Minimum value: 8, maximum value: 50, default value: 10.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_FONTSIZE       = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are not
+# supported properly for IE 6.0, but are supported on all modern browsers.
+#
+# Note that when changing this option you need to delete any form_*.png files in
+# the HTML output directory before the changes have effect.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_TRANSPARENT    = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
+# http://www.mathjax.org) which uses client side Javascript for the rendering
+# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
+# installed or if you want to formulas look prettier in the HTML output. When
+# enabled you may also need to install MathJax separately and configure the path
+# to it using the MATHJAX_RELPATH option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+USE_MATHJAX            = NO
+
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. See the MathJax site (see:
+# http://docs.mathjax.org/en/latest/output.html) for more details.
+# Possible values are: HTML-CSS (which is slower, but has the best
+# compatibility), NativeMML (i.e. MathML) and SVG.
+# The default value is: HTML-CSS.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_FORMAT         = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the HTML
+# output directory using the MATHJAX_RELPATH option. The destination directory
+# should contain the MathJax.js script. For instance, if the mathjax directory
+# is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
+# Content Delivery Network so you can quickly see the result without installing
+# MathJax. However, it is strongly recommended to install a local copy of
+# MathJax from http://www.mathjax.org before deployment.
+# The default value is: http://cdn.mathjax.org/mathjax/latest.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
+# extension names that should be enabled during MathJax rendering. For example
+# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_EXTENSIONS     =
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
+# of code that will be used on startup of the MathJax code. See the MathJax site
+# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# example see the documentation.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_CODEFILE       =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
+# the HTML output. The underlying search engine uses javascript and DHTML and
+# should work on any modern browser. Note that when using HTML help
+# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
+# there is already a search function so this one should typically be disabled.
+# For large projects the javascript based search engine can be slow, then
+# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
+# search using the keyboard; to jump to the search box use <access key> + S
+# (what the <access key> is depends on the OS and browser, but it is typically
+# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
+# key> to jump into the search results window, the results can be navigated
+# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
+# the search. The filter options can be selected when the cursor is inside the
+# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
+# to select a filter and <Enter> or <escape> to activate or cancel the filter
+# option.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using Javascript. There
+# are two flavors of web server based searching depending on the EXTERNAL_SEARCH
+# setting. When disabled, doxygen will generate a PHP script for searching and
+# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
+# and searching needs to be provided by external tools. See the section
+# "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SERVER_BASED_SEARCH    = NO
+
+# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
+# search results.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/).
+#
+# See the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH        = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will return the search results when EXTERNAL_SEARCH is enabled.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/). See the section "External Indexing and
+# Searching" for details.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHENGINE_URL       =
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+# The default file is: searchdata.xml.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHDATA_FILE        = searchdata.xml
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH_ID     =
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
+# to a relative location where the documentation can be found. The format is:
+# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTRA_SEARCH_MAPPINGS  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
+# The default value is: YES.
+
+GENERATE_LATEX         = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked.
+#
+# Note that when enabling USE_PDFLATEX this option is only used for generating
+# bitmaps for formulas in the HTML output, but not in the Makefile that is
+# written to the output directory.
+# The default file is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
+# index for LaTeX.
+# The default file is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used by the
+# printer.
+# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
+# 14 inches) and executive (7.25 x 10.5 inches).
+# The default value is: a4.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PAPER_TYPE             = a4wide
+
+# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
+# that should be included in the LaTeX output. To get the times font for
+# instance you can specify
+# EXTRA_PACKAGES=times
+# If left blank no extra packages will be included.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+EXTRA_PACKAGES         = amsmath
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
+# generated LaTeX document. The header should contain everything until the first
+# chapter. If it is left blank doxygen will generate a standard header. See
+# section "Doxygen usage" for information on how to let doxygen write the
+# default header to a separate file.
+#
+# Note: Only use a user-defined header if you know what you are doing! The
+# following commands have a special meaning inside the header: $title,
+# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
+# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
+# string, for the replacement values of the other commands the user is referred
+# to HTML_HEADER.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HEADER           =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
+# generated LaTeX document. The footer should contain everything after the last
+# chapter. If it is left blank doxygen will generate a standard footer. See
+# LATEX_HEADER for more information on how to generate a default footer and what
+# special commands can be used inside the footer.
+#
+# Note: Only use a user-defined footer if you know what you are doing!
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_FOOTER           =
+
+# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# LaTeX style sheets that are included after the standard style sheets created
+# by doxygen. Using this option one can overrule certain style aspects. Doxygen
+# will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list).
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_STYLESHEET =
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the LATEX_OUTPUT output
+# directory. Note that the files will be copied as-is; there are no commands or
+# markers available.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_FILES      =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
+# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
+# contain links (just like the HTML output) instead of page references. This
+# makes the output suitable for online browsing using a PDF viewer.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
+# the PDF file directly from the LaTeX files. Set this option to YES, to get a
+# higher quality PDF documentation.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
+# command to the generated LaTeX files. This will instruct LaTeX to keep running
+# if errors occur, instead of asking the user for help. This option is also used
+# when generating formulas in HTML.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BATCHMODE        = NO
+
+# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
+# index chapters (such as File Index, Compound Index, etc.) in the output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HIDE_INDICES     = NO
+
+# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
+# code with syntax highlighting in the LaTeX output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_SOURCE_CODE      = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. See
+# http://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# The default value is: plain.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BIB_STYLE        = plain
+
+#---------------------------------------------------------------------------
+# Configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The
+# RTF output is optimized for Word 97 and may not look too pretty with other RTF
+# readers/editors.
+# The default value is: NO.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: rtf.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
+# contain hyperlink fields. The RTF file will contain links (just like the HTML
+# output) instead of page references. This makes the output suitable for online
+# browsing using Word or some other Word compatible readers that support those
+# fields.
+#
+# Note: WordPad (write) and others do not support links.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's config
+# file, i.e. a series of assignments. You only have to provide replacements,
+# missing definitions are set to their default value.
+#
+# See also section "Doxygen usage" for information on how to generate the
+# default style sheet that doxygen normally uses.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an RTF document. Syntax is
+# similar to doxygen's config file. A template extensions file can be generated
+# using doxygen -e rtf extensionFile.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_EXTENSIONS_FILE    =
+
+# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
+# with syntax highlighting in the RTF output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_SOURCE_CODE        = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for
+# classes and files.
+# The default value is: NO.
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it. A directory man3 will be created inside the directory specified by
+# MAN_OUTPUT.
+# The default directory is: man.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to the generated
+# man pages. In case the manual section does not start with a number, the number
+# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
+# optional.
+# The default value is: .3.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_EXTENSION          = .3
+
+# The MAN_SUBDIR tag determines the name of the directory created within
+# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by
+# MAN_EXTENSION with the initial . removed.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_SUBDIR             =
+
+# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
+# will generate one additional man file for each entity documented in the real
+# man page(s). These additional files only source the real man page, but without
+# them the man command would be unable to find the correct page.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that
+# captures the structure of the code including all documentation.
+# The default value is: NO.
+
+GENERATE_XML           = YES
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: xml.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_OUTPUT             = xml
+
+# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program
+# listings (including syntax highlighting and cross-referencing information) to
+# the XML output. Note that enabling this will significantly increase the size
+# of the XML output.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files
+# that can be used to generate PDF.
+# The default value is: NO.
+
+GENERATE_DOCBOOK       = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it.
+# The default directory is: docbook.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_OUTPUT         = docbook
+
+# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
+# program listings (including syntax highlighting and cross-referencing
+# information) to the DOCBOOK output. Note that enabling this will significantly
+# increase the size of the DOCBOOK output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_PROGRAMLISTING = NO
+
+#---------------------------------------------------------------------------
+# Configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
+# AutoGen Definitions (see http://autogen.sf.net) file that captures the
+# structure of the code including all documentation. Note that this feature is
+# still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module
+# file that captures the structure of the code including all documentation.
+#
+# Note that this feature is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary
+# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
+# output from the Perl module output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely
+# formatted so it can be parsed by a human reader. This is useful if you want to
+# understand what is going on. On the other hand, if this tag is set to NO, the
+# size of the Perl module output will be much smaller and Perl will parse it
+# just the same.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file are
+# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
+# so different doxyrules.make files included by the same Makefile don't
+# overwrite each other's variables.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
+# C-preprocessor directives found in the sources and include files.
+# The default value is: YES.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
+# in the source code. If set to NO, only conditional compilation will be
+# performed. Macro expansion can be done in a controlled way by setting
+# EXPAND_ONLY_PREDEF to YES.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+MACRO_EXPANSION        = YES
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
+# the macro expansion is limited to the macros specified with the PREDEFINED and
+# EXPAND_AS_DEFINED tags.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_ONLY_PREDEF     = YES
+
+# If the SEARCH_INCLUDES tag is set to YES, the include files in the
+# INCLUDE_PATH will be searched if a #include is found.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by the
+# preprocessor.
+# This tag requires that the tag SEARCH_INCLUDES is set to YES.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will be
+# used.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that are
+# defined before the preprocessor is started (similar to the -D option of e.g.
+# gcc). The argument of the tag is a list of macros of the form: name or
+# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
+# is assumed. To prevent a macro definition from being undefined via #undef or
+# recursively expanded use the := operator instead of the = operator.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+PREDEFINED             = DOXYGEN_SHOULD_SKIP_THIS
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
+# tag can be used to specify a list of macro names that should be expanded. The
+# macro definition that is found in the sources will be used. Use the PREDEFINED
+# tag if you want to use a different macro definition that overrules the
+# definition found in the source code.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
+# remove all references to function-like macros that are alone on a line, have
+# an all uppercase name, and do not end with a semicolon. Such function macros
+# are typically used for boiler-plate code, and will confuse the parser if not
+# removed.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tag files. For each tag
+# file the location of the external documentation should be added. The format of
+# a tag file without this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where loc1 and loc2 can be relative or absolute paths or URLs. See the
+# section "Linking to external documentation" for more information about the use
+# of tag files.
+# Note: Each tag file must have a unique name (where the name does NOT include
+# the path). If a tag file is not located in the directory in which doxygen is
+# run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
+# tag file that is based on the input files it reads. See section "Linking to
+# external documentation" for more information about the usage of tag files.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
+# the class index. If set to NO, only the inherited external classes will be
+# listed.
+# The default value is: NO.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will be
+# listed.
+# The default value is: YES.
+
+EXTERNAL_GROUPS        = YES
+
+# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in
+# the related pages index. If set to NO, only the current project's pages will
+# be listed.
+# The default value is: YES.
+
+EXTERNAL_PAGES         = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of 'which perl').
+# The default file (with absolute path) is: /usr/bin/perl.
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
+# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
+# NO turns the diagrams off. Note that this option also works with HAVE_DOT
+# disabled, but it is recommended to install and use dot, since it yields more
+# powerful graphs.
+# The default value is: YES.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see:
+# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            =
+
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
+
+DIA_PATH               =
+
+# If set to YES the inheritance and collaboration graphs will hide inheritance
+# and usage relations if the target is undocumented or is not a class.
+# The default value is: YES.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz (see:
+# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# Bell Labs. The other options in this section have no effect if this option is
+# set to NO
+# The default value is: NO.
+
+HAVE_DOT               = YES
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
+# to run in parallel. When set to 0 doxygen will base this on the number of
+# processors available in the system. You can set it explicitly to a value
+# larger than 0 to get control over the balance between CPU load and processing
+# speed.
+# Minimum value: 0, maximum value: 32, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NUM_THREADS        = 0
+
+# When you want a differently looking font in the dot files that doxygen
+# generates you can specify the font name using DOT_FONTNAME. You need to make
+# sure dot is able to find the font, which can be done by putting it in a
+# standard location or by setting the DOTFONTPATH environment variable or by
+# setting DOT_FONTPATH to the directory containing the font.
+# The default value is: Helvetica.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTNAME           = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
+# dot graphs.
+# Minimum value: 4, maximum value: 24, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the default font as specified with
+# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
+# the path where dot can find it using this tag.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
+# each documented class showing the direct and indirect inheritance relations.
+# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
+# graph for each documented class showing the direct and indirect implementation
+# dependencies (inheritance, containment, and class references variables) of the
+# class with other documented classes.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
+# groups, showing the direct groups dependencies.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LOOK               = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
+# class node. If there are many fields or methods and many nodes the graph may
+# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
+# number of items for each type to make the size more manageable. Set this to 0
+# for no limit. Note that the threshold may be exceeded by 50% before the limit
+# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
+# but if the number exceeds 15, the total amount of fields shown is limited to
+# 10.
+# Minimum value: 0, maximum value: 100, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LIMIT_NUM_FIELDS   = 10
+
+# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
+# collaboration graphs will show the relations between templates and their
+# instances.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
+# YES then doxygen will generate a graph for each documented file showing the
+# direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDE_GRAPH          = YES
+
+# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
+# set to YES then doxygen will generate a graph for each documented file showing
+# the direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALL_GRAPH             = YES
+
+# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable caller graphs for selected
+# functions only using the \callergraph command.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALLER_GRAPH           = YES
+
+# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
+# hierarchy of all classes instead of a textual one.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
+# dependencies a directory has on other directories in a graphical way. The
+# dependency relations are determined by the #include relations between the
+# files in the directories.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot.
+# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
+# to make the SVG files visible in IE 9+ (other browsers do not have this
+# requirement).
+# Possible values are: png, jpg, gif and svg.
+# The default value is: png.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_IMAGE_FORMAT       = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+#
+# Note that this requires a modern browser other than Internet Explorer. Tested
+# and working are Firefox, Chrome, Safari, and Opera.
+# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
+# the SVG files visible. Older versions of IE do not have SVG support.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INTERACTIVE_SVG        = NO
+
+# The DOT_PATH tag can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the \dotfile
+# command).
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOTFILE_DIRS           =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS           =
+
+# The DIAFILE_DIRS tag can be used to specify one or more directories that
+# contain dia files that are included in the documentation (see the \diafile
+# command).
+
+DIAFILE_DIRS           =
+
+# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
+# path where java can find the plantuml.jar file. If left blank, it is assumed
+# PlantUML is not used or called during a preprocessing step. Doxygen will
+# generate a warning when it encounters a \startuml command in this case and
+# will not generate output for the diagram.
+
+PLANTUML_JAR_PATH      =
+
+# When using plantuml, the specified paths are searched for files specified by
+# the !include statement in a plantuml block.
+
+PLANTUML_INCLUDE_PATH  =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
+# that will be shown in the graph. If the number of nodes in a graph becomes
+# larger than this value, doxygen will truncate the graph, which is visualized
+# by representing a node as a red box. Note that doxygen if the number of direct
+# children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
+# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+# Minimum value: 0, maximum value: 10000, default value: 50.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_GRAPH_MAX_NODES    = 500
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
+# generated by dot. A depth value of 3 means that only nodes reachable from the
+# root by following a path via at most 3 edges will be shown. Nodes that lay
+# further from the root node will be omitted. Note that setting this option to 1
+# or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+# Minimum value: 0, maximum value: 1000, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not seem
+# to support this out of the box.
+#
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10) support
+# this, this feature is disabled by default.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
+# explaining the meaning of the various boxes and arrows in the dot generated
+# graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
+# files that are used to generate the various graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_CLEANUP            = YES
diff --git a/docs/build_containers.rst b/docs/build_containers.rst
new file mode 100644
index 00000000000..3d7da725a20
--- /dev/null
+++ b/docs/build_containers.rst
@@ -0,0 +1,107 @@
+Building LBANN in containers
+==============================
+
+We currently support Docker and Singularity.
+
+.. warning:: The container builds are not regularly tested. If
+             problems are encountered, please `open an issue
+             <https://github.com/llnl/lbann/issues/new>`_.
+
+`Singularity <http://singularity.lbl.gov/>`_
+--------------------------------------------------
+
+First build a Singularity container with the lbann.def file:
+
+.. code-block:: bash
+   
+    sudo singularity build --writable lbann.img lbann.def
+
+.. note:: Building the image requires root access.
+.. note:: :code:`--writable` allows users to make changes inside the
+          container. This is required for LLNL's LC systems.
+
+This will create a container called lbann.img which can be used to
+invoke lbann on any system with Singularity and Open-MPI installed.
+
+Customizing Configuration in lbann.def
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Singularity is designed to take advantage of underlying HPC
+resources. The lbann.def file in this directory specifically installs
+packages necessary for infiniband interconnects (lines 15-19). It
+builds openmpi outside of the spack step to ensure it is built with
+infiniband support (lines 37-55). Experienced users should modify
+these sections to match with the underlying resources they intend to
+run on. This defintion file also builds GCC version 4.9.3, and uses it
+to build Open-MPI and LBANN (lines 33-35). This is also customized to
+run on specific LLNL LC resources, and can be modified depending on
+the users system.
+
+Running LBANN with Singualrity
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To run LBANN use mpirun and singularity's execute command:
+
+.. code-block:: bash
+                
+    salloc -N2
+    mpirun -np 4 singularity exec -B /p:/p lbann.img \
+        /lbann/spack_builds/singularity/model_zoo/lbann \
+        --model=/lbann/model_zoo/models/lenet_mnist/model_lenet_mnist.prototext \
+        --reader=/lbann/model_zoo/data_readers/data_reader_mnist.prototext \
+        --optimizer=/lbann/model_zoo/optimizers/opt_adagrad.prototext
+
+.. note:: The :code:`-B` Singularity command binds directories from
+          the surrounding filesystem to the container. Be sure to
+          include any necessary files using this command (e.g., model
+          prototext files, datasets, etc). Alternatively, system
+          administrators are capable of allowing a Singularity
+          container to utilize the host's filesystem. This is done by
+          changing MOUNT HOSTFS in the Singularity config file.
+
+`Docker <https://www.docker.com/>`_
+----------------------------------------
+
+First build a Docker image with the Dockerfile. From whichever
+directory contains the Dockerfile:
+
+.. code-block:: bash
+                
+    docker build -t dockban .
+
+
+.. note:: The :code:`-t` flag specifies an identifying tag for this
+          image. "dockban" can be changed to any desired tag.
+
+Customizing Configuration in Dockerfile
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The Dockerfile container defintion is less complicated than its
+Singularity counterpart. GCC 7.1.0 is built and registered with spack
+in lines 19-21. Users can change this, as well as LBANN-specific build
+options in spack (line 22). For example, to add gpu support, a user can
+add :code:`+gpu` to this line.
+
+Running LBANN with Docker
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This LBANN build also uses Open-MPI. Thus, LBANN can be launched with
+:code:`mpirun` here as well. However, this example will just show the
+single-process invocation.
+
+Start a docker container from the previously created image, and attach
+to it. Make sure to bind any necessary directories using :code:`-v`:
+
+.. code-block:: bash
+                
+    docker run -it -v $HOME/MNIST:/MNIST dockban
+
+Run LBANN as you would outside of a container:
+
+.. code-block:: bash
+
+    ./spack_build/docker_build/model_zoo/lbann \
+        --model=model_zoo/models/lenet_mnist/model_lenet_mnist.prototext \
+        --reader=model_zoo/data_readers/data_reader_mnist.prototext \
+        --optimizer=model_zoo/optimizers/opt_sgd.prototext
+
diff --git a/docs/build_llnl_idiosyncracies.rst b/docs/build_llnl_idiosyncracies.rst
new file mode 100644
index 00000000000..f5f3836ef0f
--- /dev/null
+++ b/docs/build_llnl_idiosyncracies.rst
@@ -0,0 +1,64 @@
+Specific information for Livermore Computing (LC) systems
+============================================================
+
+.. warning:: Many features below make assumptions that users belong to
+             certain groups on LC systems. Any information contained
+             here should not be considered general-purpose and any
+             examples are not expected to work except for certain
+             users on LLNL's LC systems.
+
+The :code:`build_lbann_lc.sh` script
+----------------------------------------
+
+The :code:`build_lbann_lc.sh` script in the :code:`scripts/` directory
+is a historical script with logic to choose the "right" compilers
+and grab all the LC-installed dependencies. It is updated on an
+*ad-hoc* basis by the subset of developers who use it and it should
+not be relied upon as a replacement for other methods described in
+this guide.
+
+.. warning:: Certain paths through this script require access to a
+             certain linux group on the Livermore Computing
+             machines (LC) at LLNL.
+
+
+Pre-installed Binary Packages
+------------------------------
+
+The LC machines have many instances of cuDNN and NCCL installed in
+locations shared by the :code:`brain` group. These may be consistently
+detected by CMake by :code:`export`-ing their locations into the
+shell:
+
+.. code-block:: bash
+
+    export CUDNN_DIR=/usr/WS2/brain/cudnn/cudnn-7.4.2/cuda-10.0_x86_64
+    export NCCL_DIR=/usr/WS2/brain/nccl2/nccl_2.4.2-1+cuda10.0_x86_64
+
+Notice that this is specific to using CUDA 10.0 on an x86_64 LC
+machine. This is a shortcut around formally passing this location as a
+cache variable to all relevant CMake projects. The cache method for
+passing these to the LBANN CMake is:
+
+.. code-block:: bash
+
+    cmake CUDNN_DIR=/usr/WS2/brain/cudnn/cudnn-7.4.2/cuda-10.0_x86_64 \
+        <other args> \
+        /path/to/lbann
+
+LBANN will detect NCCL automatically from the Aluminum import; there
+should be no need to pass :code:`NCCL_DIR` to the LBANN CMake.
+
+The Superbuild, on the other hand, may require both :code:`CUDNN_DIR`
+and :code:`NCCL_DIR` if building both Aluminum and LBANN. Such an
+invocation might be:
+
+.. code-block:: bash
+
+    cmake -DLBANN_SB_BUILD_ALUMINUM=ON -DALUMINUM_ENABLE_NCCL=ON \
+        -DLBANN_SB_FWD_ALUMINUM_NCCL_DIR=/usr/WS2/brain/nccl2/nccl_2.4.2-1+cuda10.0_x86_64 \
+        -DLBANN_SB_BUILD_HYDROGEN=ON \
+        -DLBANN_SB_BUILD_LBANN=ON \
+        -DLBANN_SB_FWD_CUDNN_DIR=/usr/WS2/brain/cudnn/cudnn-7.4.2/cuda-10.0_x86_64 \
+        <other options> \
+        /path/to/lbann/superbuild
diff --git a/docs/build_osx.rst b/docs/build_osx.rst
new file mode 100644
index 00000000000..3e5ce179a09
--- /dev/null
+++ b/docs/build_osx.rst
@@ -0,0 +1,134 @@
+.. role:: bash(code)
+          :language: bash
+
+=========================
+Building LBANN on OS X
+=========================
+
+.. warning:: This section is still under development and being
+             tested. It contains known issues. This warning will be
+             removed when it is believed to be generally usable.
+
+
+--------------------
+Getting Started
+--------------------
+
+.. _osx-setup-spack:
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Setup Spack and local base tools
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To get started follow the general directions on building LBANN to
+`setup spack
+<https://lbann.readthedocs.io/en/latest/building_lbann.html#setup-spack-and-local-base-tools>`_.
+
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Setup Homebrew
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. note:: Setting up Homebrew only needs to be done once per system,.
+
+1.  Download and install `Homebrew <https://brew.sh>`_.  Setup base
+    development packages.  Note that at the moment we use brew to
+    install llvm, open-mpi, scalapack, and cmake.
+
+    .. code-block:: bash
+
+       brew install llvm
+       brew install open-mpi
+       brew install scalapack
+       brew install cmake
+
+    Put the brew based clang in your path:
+
+    .. code-block:: bash
+
+       export PATH="/usr/local/opt/llvm/bin:$PATH";
+
+    Install lmmod so that we can use modules to put spack built
+    packages into your path.
+
+    .. code-block:: bash
+
+       brew install lmod
+       brew install luarocks
+
+    Update your .profile to enable use of modules via lmod
+
+    .. code-block:: bash
+
+       source $(brew --prefix lmod)/init/$(basename $SHELL)
+
+.. _osx-build-install-as-developer:
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Building & Installing LBANN as a developer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+1.  Establish a Spack environment and install software dependencies.
+
+    .. note:: This spack environment has to be setup once each time
+              you create a new build directory.
+
+    .. code-block:: bash
+
+        export LBANN_HOME=/path/to/lbann/git/repo
+        export LBANN_BUILD_DIR=/path/to/a/build/directory
+        export LBANN_INSTALL_DIR=/path/to/an/install/directory
+        cd ${LBANN_BUILD_DIR}
+        spack env create -d . ${LBANN_HOME}/spack_environments/developer_release_osx_spack.yaml
+        spack install
+        spack env loads # Spack creates a file named loads that has all of the correct modules
+        source loads
+        unset LIBRARY_PATH
+
+
+2.  Build LBANN locally from source and build Hydrogen and Aluminum
+    using the superbuild. See :ref:`here <building-with-the-superbuild>`
+    for a list and descriptions of all CMake flags known to LBANN's
+    "Superbuild" build system. A representative CMake command line
+    that expects :bash:`LBANN_HOME`, :bash:`LBANN_BUILD_DIR`,
+    :bash:`LBANN_INSTALL_DIR` environment variables might be:
+
+    .. code-block:: console
+
+        cd ${LBANN_BUILD_DIR}
+        cmake \
+          -G Ninja \
+          -D CMAKE_BUILD_TYPE:STRING=Release \
+          -D CMAKE_INSTALL_PREFIX:PATH=${LBANN_INSTALL_DIR} \
+          \
+          -D LBANN_SB_BUILD_ALUMINUM=ON \
+          -D ALUMINUM_ENABLE_MPI_CUDA=OFF \
+          -D ALUMINUM_ENABLE_NCCL=OFF \
+          \
+          -D LBANN_SB_BUILD_HYDROGEN=ON \
+          -D Hydrogen_ENABLE_ALUMINUM=ON \
+          -D Hydrogen_ENABLE_CUB=OFF \
+          -D Hydrogen_ENABLE_CUDA=OFF \
+          \
+          -D LBANN_SB_BUILD_LBANN=ON \
+          -D LBANN_DATATYPE:STRING=float \
+          -D LBANN_SEQUENTIAL_INITIALIZATION:BOOL=OFF \
+          -D LBANN_WITH_ALUMINUM:BOOL=ON \
+          -D LBANN_WITH_CONDUIT:BOOL=ON \
+          -D LBANN_WITH_CUDA:BOOL=OFF \
+          -D LBANN_WITH_CUDNN:BOOL=OFF \
+          -D LBANN_WITH_NCCL:BOOL=OFF \
+          -D LBANN_WITH_NVPROF:BOOL=OFF \
+          -D LBANN_WITH_SOFTMAX_CUDA:BOOL=OFF \
+          -D LBANN_WITH_TOPO_AWARE:BOOL=ON \
+          -D LBANN_WITH_TBINF=OFF \
+          -D LBANN_WITH_VTUNE:BOOL=OFF \
+          \
+          -D CMAKE_CXX_COMPILER=$(which clang) \
+          -D CMAKE_C_COMPILER=$(which clang) \
+          -D LBANN_SB_FWD_ALUMINUM_OpenMP_CXX_LIB_NAMES=omp \
+          -D LBANN_SB_FWD_ALUMINUM_OpenMP_CXX_FLAGS=-fopenmp \
+          -D LBANN_SB_FWD_ALUMINUM_OpenMP_omp_LIBRARY=/usr/local/opt/llvm/lib/libomp.dylib \
+          ${LBANN_HOME}/superbuild
+
+        ninja
diff --git a/docs/spack_environment.md b/docs/build_spack_extra_config.rst
similarity index 83%
rename from docs/spack_environment.md
rename to docs/build_spack_extra_config.rst
index ce618b8c80a..79864dce2c7 100644
--- a/docs/spack_environment.md
+++ b/docs/build_spack_extra_config.rst
@@ -1,8 +1,15 @@
-# Setting up Basic Spack Environment
+.. _setup-spack-env:
 
-## Setting up user's Spack environment
+========================================
+Setting up Basic Spack Environment
+========================================
 
-+ Copy the following text into ~/.spack/linux/compilers.yaml
+.. note:: These instructions are specific to LLNL's Livermore
+          Computing (LC) machines x86_64 machines. External users will
+          likely have to modify these paths to be specific to their
+          build platform.
+
++ Copy the following text into :code:`~/.spack/linux/compilers.yaml`: ::
 
         compilers:
         - compiler:
@@ -32,7 +39,7 @@
             spec: gcc@7.3.1
             target: ppc64le
 
-+ Copy the following text into ~/.spack/linux/packages.yaml
++ Copy the following text into :code:`~/.spack/linux/packages.yaml`: ::
 
         packages:
           all:
diff --git a/docs/build_with_cmake.rst b/docs/build_with_cmake.rst
new file mode 100644
index 00000000000..f6f49ebd305
--- /dev/null
+++ b/docs/build_with_cmake.rst
@@ -0,0 +1,164 @@
+.. role:: cxx(code)
+   :language: cpp
+
+.. _build-with-cmake:
+
+==================================================
+Building LBANN with `CMake <https://cmake.org>`_
+==================================================
+
+LBANN uses `CMake <https://cmake.org>`_ for its build system and a
+version newer than or equal to 3.9.0 is required. LBANN development is
+done primarily on UNIX-based platforms. As such, the build is tested
+regularly on Linux-based machines, occasionally on OSX, and never on
+Windows machines.
+
+The CMake build system is available to any users or developers who
+need a more fine-grained level of control over dependency resolution
+and/or features of LBANN. The LBANN team has made an effort to expose
+as many knobs as possible through the Spack package but if something
+is missing, please `open an issue <https://github.com/LLNL/lbann/issues/new>`_.
+
+It is required that LBANN be built out-of-source. That is, CMake must
+not be invoked in a directory containing a CMakeLists.
+
+--------------------
+LBANN CMake options
+--------------------
+
+The following options are exposed in the CMake build system.
+
++ :code:`LBANN_WITH_ALUMINUM` (Default: :code:`OFF`): Use the Aluminum communication
+  package. This will be set to :code:`ON` automatically if Hydrogen was
+  built with Aluminum.
+
++ :code:`LBANN_WITH_CNPY` (Default: :code:`ON`): Build with support for CNPY for reading
+  Numpy data.
+
++ :code:`LBANN_WITH_CONDUIT` (Default: :code:`OFF`): Build with support for CONDUIT.
+
++ :code:`LBANN_WITH_NVPROF` (Default: :code:`OFF`): Build with extra annotations for NVPROF.
+
++ :code:`LBANN_WITH_TOPO_AWARE` (Default: :code:`ON`): Use HWLOC for topology-aware choices.
+
++ :code:`LBANN_WITH_TBINF` (Default: :code:`ON`): Enable the Tensorboard interace.
+
++ :code:`LBANN_WITH_VTUNE` (Default: :code:`OFF`): Build with extra annotations for VTune.
+
++ :code:`LBANN_DETERMINISTIC` (Default: :code:`OFF`): Force as much of the code as possible
+  to be deterministic. This is not a guarantee as certain operations
+  in third-party libraries cannot be forced into a deterministic mode,
+  especially for CUDA-enabled builds.
+
++ :code:`LBANN_SEQUENTIAL_INITIALIZATION` (Default: :code:`OFF`): Force sequentially
+  consistent initialization of data structures.
+
++ :code:`LBANN_WARNINGS_AS_ERRORS` (Default: :code:`OFF`): Promote compiler
+  warnings to errors. This should be used by developers
+  only. Developers are encouraged to build with this :code:`ON` prior to
+  merging any code into the repository.
+
++ :code:`LBANN_USE_PROTOBUF_MODULE` (Default: :code:`OFF`): Search for
+  Protobuf using CMake's :code:`FindProtobuf.cmake` module instead of
+  the Protobuf config file. This is useful on platforms with
+  differently architected compute nodes or when the config method is
+  inexplicably failing.
+
+The following variables may also be set:
+
++ :code:`LBANN_DATATYPE` (Default: :cxx:`float`): The datatype to use for
+  training. Currently this must be :cxx:`float` or :cxx:`double`.
+
+The following variable has been deprecated and removed:
+
++ :code:`LBANN_WITH_CUDA`. The "CUDA-ness" of LBANN is now tied 1:1 with the
+  "CUDA-ness" of Hydrogen. At present, it seems like unnecessary
+  overhead to support the situation in which Hydrogen has CUDA support
+  but LBANN doesn't want to use it until a compelling use-case reveals
+  itself.
+
+-----------------------------------
+Controlling dependency resolution
+-----------------------------------
+
+The following variables may be set with CMake to identify dependencies
+that are not installed into the "typical" locations that CMake
+searches by default. They may be either exported into the environment
+used by CMake using whatever mechanisms are allowed by the shell or
+passed to CMake as a cache variable
+(e.g., :code:`cmake -DPKG_DIR=/path/to/pkg`).
+The latter option is recommended.
+
++ :code:`Aluminum_DIR` or :code:`ALUMINUM_DIR` or :code:`AL_DIR`: The
+  path to *either* the Aluminum installation prefix *or* the
+  :code:`AluminumConfig.cmake` file. If Hydrogen has not been built
+  with Aluminum support, set :code:`LBANN_WITH_ALUMINUM=ON` to enable
+  Aluminum support.
+
++ :code:`CEREAL_DIR`: The path to *either* the CEREAL installation
+  prefix *or* the :code:`cereal-config.cmake` file.
+
++ :code:`CNPY_DIR`: The path to the CNPY installation prefix. Must set
+  :code:`LBANN_WITH_CNPY=ON` to enable CNPY support.
+
++ :code:`CONDUIT_DIR` or :code:`CONDUIT_DIR`: The path to *either* the
+  CONDUIT installation prefix *or* the :code:`ConduitConfig.cmake`
+  file. Must set :code:`LBANN_WITH_CONDUIT=ON` to enable CONDUIT
+  support.
+
++ :code:`HDF5_DIR`: The path to *either* the HDF5 installation prefix
+  *or* the :code:`hdf5_config.cmake` file. There is a known issue with
+  CONDUIT that it may link to HDF5 but not properly export that
+  dependency.
+
++ :code:`HWLOC_DIR`: The path to the HWLOC installation prefix. Must
+  set :code:`LBANN_WITH_HWLOC=ON` to enable HWLOC support.
+
++ :code:`Hydrogen_DIR` or :code:`HYDROGEN_DIR`: The path to *either*
+  the Hydrogen installation prefix *or* the
+  :code:`HydrogenConfig.cmake` file.
+
++ :code:`NVTX_DIR`: The path the the prefix of NVTX. This should not
+  be used except in circumstances in which one might want to link to a
+  different NVTX installation than the CUDA toolkit. Under normal
+  circumstances, if CUDA was found without issue, NVTX should be as
+  well.
+
++ :code:`OpenCV_DIR` or :code:`OPENCV_DIR`: The path to *either* the
+  OpenCV installation prefix *or* the :code:`OpenCVConfig.cmake`
+  file.
+
++ :code:`Protobuf_DIR` or :code:`PROTOBUF_DIR`: The path to *either*
+  the Protobuf installation prefix *or* the
+  :code:`protobuf-config.cmake` file.
+
++ :code:`VTUNE_DIR`: The path to the prefix of the VTune (or Intel
+  compiler suite) installation.
+
+Compilers, include CUDA compilers, are found using the default CMake
+mechanisms, as are OpenMP and MPI. Thus, the process of finding these
+tools can be manipulated using the usual CMake mechanisms and/or cache
+variables as `documented by CMake <https://cmake.org/documentation>`_.
+
+Except where otherwise noted, this list attempts to address the first
+level of dependencies of LBANN, that is, those that are one edge away
+in the DAG. If deeper dependency issues appear, please consult the
+documentation of the packages that are causing the issues as they may
+require additional CMake/environment flags to be set before properly
+resolving.
+
+------------------------------
+Example CMake invocation
+------------------------------
+
+A sample CMake build for LBANN might look like the following.
+
+.. code-block:: bash
+
+    cmake \
+      -D LBANN_WITH_CUDA:BOOL=ON \
+      -D LBANN_WITH_NVPROF:BOOL=ON \
+      -D LBANN_DATATYPE:STRING=float \
+      -D Hydrogen_DIR:PATH=/path/to/hydrogen \
+      -D HWLOC_DIR:PATH=/path/to/hwloc \
+      /path/to/lbann
diff --git a/docs/build_with_superbuild.rst b/docs/build_with_superbuild.rst
new file mode 100644
index 00000000000..5986806d48f
--- /dev/null
+++ b/docs/build_with_superbuild.rst
@@ -0,0 +1,222 @@
+.. _building-with-the-superbuild:
+
+=======================================================
+Building an entire ecosystem with the "Superbuild"
+=======================================================
+
+.. warning:: This is primarily for developer convenience and is not
+             meant to be robust to all possible use-cases for LBANN.
+
+LBANN includes CMake :code:`ExternalProject` definitions for a large
+portion of its dependency graph. The following dependencies are
+supported. These are one or two edges from LBANN in the dependency
+DAG.
+
++ Aluminum
++ CEREAL
++ CNPY
++ CONDUIT
++ `CUB <https://github.com/nvlabs/cub>`_. This is used by Hydrogen for
+  efficiently managing GPU memory.
++ `HDF5 <https://www.hdfgroup.org/solutions/hdf5>`_. This is a
+  dependency of CONDUIT.
++ Hydrogen
++ `JPEG-turbo <https://github.com/libjpeg-turbo/libjpeg-turbo>`_. This
+  is a dependency of OpenCV.
++ `OpenBLAS <https://github.com/xianyi/OpenBLAS.git>`_. This is an
+  optional dependency of Hydrogen. It is recommended if your system
+  does not have a system-optimized BLAS distribution (e.g., Intel's MKL).
++ OpenCV
++ Protobuf
+
+The following dependencies are known to exist but for some reason or
+another are not supported by the superbuild framework.
+
++ cuDNN is a freely available binary package available from NVIDIA.
++ NCCL is a freely available binary package available from
+  NVIDIA. Inspired users may also build it from source from its
+  `github repository <https://github.com/nvidia/nccl>`_.
++ HWLOC is often installed by default, especially on large
+  supercomputers. Certain components may require superuser access to
+  configure, but these features are not used by LBANN. If it is not
+  available, ask the system administrators, consult the package
+  manager, install using Spack, or build from
+  `source <https://www.open-mpi.org/projects/hwloc>`_.
+
+The superbuild system is itself a CMake project rooted in
+:code:`$LBANN_HOME/superbuild` (distinct from the LBANN CMake project rooted
+in :code:`$LBANN_HOME`). Options that control the superbuild system are
+prefixed with :code:`LBANN_SB_`; other options that appear in a CMake
+invocation for the superbuild are either interpreted on a sub-project
+basis or forwarded to certain sub-projects.
+
+--------------------------------------------------
+Choosing packages to build in the Superbuild
+--------------------------------------------------
+
+The superbuild system is *constructive* or *additive*; that is, it
+will only build the packages that it is asked to build. Any required
+package that is not requested is assumed to exist on the system by the
+time it is needed by whichever package requires it. For example, if
+HDF5 is provided by the system administrators on a system, it does not
+need to be built and CONDUIT can be built by pointing its build to the
+system HDF5.
+
+Packages are included in a superbuild by passing
+:code:`LBANN_SB_BUILD_<PKG>` options to CMake *for each package* that
+it should build, including LBANN itself. E.g.,
+
+.. code-block:: bash
+
+    cmake \
+      -DLBANN_SB_BUILD_ALUMINUM=ON \
+      -DLBANN_SB_BUILD_HYDROGEN=ON \
+      -DLBANN_SB_BUILD_LBANN=ON \
+      /path/to/lbann/superbuild
+
+will invoke the superbuild to build Aluminum, Hydrogen, and LBANN
+*only*. Acceptable values for :code:`<PKG>` are :code:`ALUMINUM`,
+:code:`CEREAL`, :code:`CNPY`, :code:`CONDUIT`, :code:`CUB`,
+:code:`HDF5`, :code:`HYDROGEN`, :code:`JPEG_TURBO`, :code:`OPENCV`,
+:code:`PROTOBUF` and :code:`LBANN`.
+
+
+Forwarding options to sub-projects
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The subprojects are largely pre-configured to "do the right thing" for
+building LBANN. However, there are some variables that users of the
+superbuild system may need to control. These are exposed as regular
+CMake options in the individual projects' CMakeLists and can be viewed
+by running, e.g.,
+
+.. code-block:: bash
+
+    cmake -L superbuild/<pkg>/CMakeLists.txt
+
+Several significant CMake flags are automatically forwarded from the
+superbuild CMake to subprojects. These are generally "typical" CMake
+flags (but not all; if something is missing, open please
+`an issue <https://github.com/llnl/lbann/issues)>`_. Some examples are
+
++ :code:`CMAKE_INSTALL_PREFIX`
++ :code:`CMAKE_BUILD_TYPE`
++ :code:`CMAKE_<LANG>_COMPILER`
++ :code:`CMAKE_<LANG>_FLAGS`
+
+To accommodate developers working on edge-cases with these
+dependencies, any flag may be forwarded to any CMake-built package
+using the following syntax:
+:code:`LBANN_SB_FWD_<PKG>_<OPTION>=<VALUE>`. This will result in a cache
+variable being sent to the CMake command for :code:`<PKG>` with the form
+
+.. code-block:: bash
+
+    -D<OPTION>=<VALUE>
+
+The :code:`<OPTION>` may be something specific to :code:`<PKG>` or it
+may be a CMake flag that is not automatically forwarded. For example,
+the following CMake invocation would send
+:code:`CMAKE_INTERPROCEDURAL_OPTIMIZATION` to the :code:`HYDROGEN`
+package and :code:`SPHINX_DIR` to :code:`LBANN`:
+
+.. code-block:: bash
+
+    cmake -D LBANN_SB_BUILD_HYDROGEN=ON \
+      -D LBANN_SB_BUILD_LBANN=ON \
+      -D LBANN_SB_FWD_HYDROGEN_CMAKE_INTERPROCEDURAL_OPTIMIZATION=ON \
+      -D LBANN_SB_FWD_LBANN_SPHINX_DIR=/path/to/sphinx \
+      /path/to/superbuild
+
+-----------------------------------
+Special targets in the Superbuild
+-----------------------------------
+
+Modern shells should be able to tab-complete the names of targets in
+Makefiles or Ninja files, and IDEs should display all targets
+interactively. The superbuild should create project-level targets for
+all of the subprojects; these match the :code:`<PKG>` values noted
+above. For example, after a successful CMake configuration of the
+superbuild using the Ninja generator, the command
+
+.. code-block:: bash
+
+    ninja HYDROGEN
+
+will build the sub-DAG ending with Hydrogen. If
+:code:`LBANN_SB_BUILD_LBANN=ON`, `ninja LBANN` is equivalent to
+:code:`ninja` since LBANN depends on all other targets built by the
+superbuild.
+
+When building on UNIX platforms, the "Unix Makefiles" and "Ninja"
+generators will have special targets defined for debugging superbuild
+issues. These targets are :code:`gather-build` and
+:code:`gather-log`. These create tarballs of the build system files
+and the execution logs generated for the superbuild or during the
+superbuild build phase, respectively. The target :code:`gather-all`
+depends on both of these targets and may be used to generate both
+tarballs at once. The resulting tarballs are helpful to the build
+system maintainers for debugging build issues if using the superbuild
+system.
+
+------------------------------
+A full superbuild example
+------------------------------
+
+A full invocation to the superbuild that builds all dependencies might
+look like the following. This example will use a CUDA-enabled build
+with Aluminum and CONDUIT support using the currently-load GCC
+toolset. It assumes that desired flags are stored in
+:code:`<LANG>_FLAGS` in the environment.
+
+.. code-block:: bash
+
+    cmake -GNinja \
+        -D CMAKE_BUILD_TYPE=Release \
+        -D CMAKE_INSTALL_PREFIX=${PWD}/install \
+        -D CMAKE_C_COMPILER=$(which gcc) \
+        -D CMAKE_C_FLAGS="${C_FLAGS}" \
+        -D CMAKE_CXX_COMPILER=$(which g++) \
+        -D CMAKE_CXX_FLAGS="${CXX_FLAGS}" \
+        -D CMAKE_Fortran_COMPILER=$(which gfortran) \
+        -D CMAKE_Fortran_FLAGS="${Fortran_FLAGS}" \
+        -D CMAKE_CUDA_COMPILER=$(which nvcc) \
+        -D CMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \
+        \
+        -D LBANN_SB_BUILD_CEREAL=ON \
+        -D LBANN_SB_BUILD_CNPY=ON \
+        -D LBANN_SB_BUILD_CONDUIT=ON \
+        -D LBANN_SB_BUILD_CUB=ON \
+        -D LBANN_SB_BUILD_HDF5=ON \
+        -D LBANN_SB_BUILD_JPEG_TURBO=ON \
+        -D LBANN_SB_BUILD_OPENBLAS=ON \
+        -D LBANN_SB_BUILD_OPENCV=ON \
+        -D LBANN_SB_BUILD_PROTOBUF=ON \
+        \
+        -D LBANN_SB_BUILD_ALUMINUM=ON \
+        -D ALUMINUM_ENABLE_MPI_CUDA=ON \
+        -D ALUMINUM_ENABLE_NCCL=ON \
+        \
+        -D LBANN_SB_BUILD_HYDROGEN=ON \
+        -D Hydrogen_ENABLE_CUDA=ON \
+        -D Hydrogen_ENABLE_CUB=ON \
+        -D Hydrogen_ENABLE_ALUMINUM=ON \
+        \
+        -D LBANN_SB_BUILD_LBANN=ON \
+        -D LBANN_WITH_ALUMINUM=ON \
+        -D LBANN_WITH_CONDUIT=ON \
+        -D LBANN_WITH_CUDA=ON \
+        -D LBANN_WITH_NVPROF=ON \
+        -D LBANN_WITH_TBINF=ON \
+        -D LBANN_WITH_TOPO_AWARE=ON \
+        -D LBANN_SEQUENTIAL_INITIALIZATION=OFF \
+        -D LBANN_WARNINGS_AS_ERRORS=OFF \
+        \
+        /path/to/superbuild
+
+Please report any issues with the superbuild on `Github
+<https://github.com/llnl/lbann/issues>`_, but note that they will be
+evaluated on a case-by-case basis and may not be fixed in a timely
+manner or at all if they do not affect the development team. To
+repeat, the superbuild exists for developer convenience and is not
+meant to supplant a legitimate package manager.
diff --git a/docs/building_lbann.rst b/docs/building_lbann.rst
new file mode 100644
index 00000000000..b374170bb07
--- /dev/null
+++ b/docs/building_lbann.rst
@@ -0,0 +1,315 @@
+.. role:: bash(code)
+          :language: bash
+
+====================
+Building LBANN
+====================
+
+--------------------
+Download
+--------------------
+
+LBANN source code can be obtained from the `Github
+repo <https://github.com/LLNL/lbann>`_.
+
+--------------------
+Dependencies
+--------------------
+
+The following packages and tools are required to build LBANN. All
+packages listed below may be installed using `Spack
+<https://github.com/llnl/spack>`_. See :ref:`below
+<building-with-spack>` for more details on using Spack to build a
+complete LBANN environment.
+
+The following basic tools are **required**.
+
++ A C++11-compliant compiler.
+
++ OpenMP, version 3.0 or newer.
+
++ An MPI-3.0 implementation.
+
++ `CEREAL <https://github.com/USCiLab/cereal>`_ is used to handle
+  complex serialization tasks.
+
++ `CMake <https://cmake.org>`_, version 3.9 or newer.
+
+The following LLNL-maintained packages are **required**.
+
++ `Hydrogen <https://github.com/llnl/elemental>`_ is a fork of the
+  `Elemental <https://github.com/elemental/elemental>`_ distributed
+  dense linear-algebra library and it may be installed via
+  `Spack <https://github.com/llnl/spack>`_ using the package name
+  "hydrogen". If CUDA support is enabled in Hydrogen, LBANN will
+  inherit this support.
+
+The following third-party packages are **required**.
+
++ `CNPY <https://github.com/rogersce/cnpy.git>`_ is used to ingest data
+  in NumPy format. In principle this should be optional, but at time
+  of writing, LBANN will not build without it.
+
++ `OpenCV <https://github.com/opencv/opencv>`_ is used to preprocess
+  image data. For performance reasons, it is recommend to build OpenCV
+  with `JPEG-turbo <https://github.com/libjpeg-turbo/libjpeg-turbo>`_
+  for JPEG format support.
+
++ `ProtoBuf <https://github.com/protocolbuffers/protobuf>`_ is used to
+  express models in a portable format.
+
+The following LLNL-maintained packages are **optional**.
+
++ `Aluminum <https://github.com/llnl/aluminum>`_ is a
+  communication library optimized for machine learning and interaction
+  with GPUs. We cannot recommend its use strongly enough. It can be
+  built using `Spack <https://github.com/llnl/spack>`_.
+
++ `CONDUIT <https://github.com/llnl/conduit>`_ is used to ingest
+  structured data produced by scientific simulations.
+
+The following third-party packages are **optional**.
+
++ `CUDA <https://developer.nvidia.com/cuda-toolkit>`_. The development
+  team currently uses CUDA version 9.2. Building with CUDA support
+  requires that Hydrogen has been built with CUDA support (see below).
+
++ `cuDNN <https://developer.nvidia.com/cudnn>`_ is required if
+  building LBANN with CUDA support. It is freely available as a binary
+  distribution from NVIDIA.
+
++ `HWLOC <https://www.open-mpi.org/projects/hwloc/>`_. HWLOC enables
+  LBANN to make certain optimizations based on the hardware
+  topology. Its use is strongly recommended.
+
++ NVTX. LBANN supports some improved annotations for NVPROF using
+  NVTX. NVTX is provided as part of the CUDA toolkit.
+
++ VTune. LBANN supports some improved annotations for VTune.
+
+
+.. _building-with-spack:
+
+------------------------------------------------------------
+Building with `Spack <https://github.com/llnl/spack>`_
+------------------------------------------------------------
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Setup Spack and local base tools
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+1.  Download and install `Spack <https://github.com/llnl/spack>`_.
+    Additionally setup shell support as discussed
+    `here <https://spack.readthedocs.io/en/latest/module_file_support.html#id2>`_.
+
+    .. code-block:: bash
+
+        . ${SPACK_ROOT}/share/spack/setup-env.sh
+
+
+2.  Setup your compiler and external software environment. For example,
+    on LLNL\'s LC machines, one might load the following modules:
+
+    .. code-block:: bash
+
+        ml gcc/7.3.0 mvapich2/2.3 cuda/10.0.130 # Pascal
+
+    or
+
+    .. code-block:: bash
+
+        ml gcc/7.3.1 cuda/9.2.148 spectrum-mpi/rolling-release  # Lassen / Sierra
+
+
+    + Note to unload unwanted modules you can execute :bash:`ml` with
+      package names prepended with a dash, e.g.: :bash:`ml -intel`. To
+      unload all currently loaded modules, use :bash:`ml purge`.
+
+3.  Optionally, setup your spack environment to take advantages of
+    locally installed tools.  Note that unless your spack environment
+    is explicitly told about tools such as cmake, python, mpi, etc. it
+    will install everything that LBANN and all of its dependencies
+    require. This can take quite a long time, but only has to be done
+    once for a given spack repository.  Once all of the standard tools
+    are installed, rebuilding LBANN with spack is quite fast.
+
+    + Advice on setting up paths to external installations is beyond
+      the scope of this document, but is covered in the `Spack
+      Documentation <https://spack.readthedocs.io/>`_.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Building & Installing LBANN as a user
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. warning:: This section is still under development and being
+             tested. It contains known issues. This warning will be
+             removed when it is believed to be generally usable.
+
+With Spack setup and installed into your path, it can be used to
+install the LBANN executables. This approach is appropriate for users
+that want to train new or existing models using the python front-end.
+
+.. note:: If your model requires custom layers or data readers, you
+          may need to install LBANN as a developer, which would allow
+          you to modify and recompile the source code.
+
+Here are three easy ways to install LBANN:
+
+- Using the Spack environment method, (e.g., for an x86_64 LLNL LC
+  system with GPU support):
+
+  .. note:: This method provides a consistent set of dependencies during
+      installation.
+
+  .. code-block:: bash
+
+      cd <path to LBANN repo>/spack_environments/users/llnl_lc/<arch>_gpu/ # where <arch> = x86_64 | ppc64le
+      spack install
+      ml load lbann
+
+- Building with the latest released versions and GPU support (use the
+  user's defaults for specifying the compiler, MPI library, etc.):
+
+  .. code-block:: bash
+
+      spack install lbann +gpu +nccl
+      ml load lbann
+
+- Building with the head of develop branch for lbann, hydrogen and
+  aluminum with GPU support (use the user's defaults for specifying
+  the compiler, MPI library, etc.):
+
+  .. code-block:: bash
+
+      spack install lbann@develop +gpu +nccl ^hydrogen@develop ^aluminum@master
+      ml load lbann
+
+There are numerous options for all of these packages. These options
+can be viewed via commands such as :bash:`spack info lbann`. To
+specify the compiler, one can add options such as :code:`%gcc@7.3.0`.
+For further information about specifying dependencies, such as the MPI
+library, please consult `the Spack documentation
+<https://spack.readthedocs.io>`_.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Building & Installing LBANN as a developer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Developers of LBANN will often need to interact with the source code
+and/or advanced configuration options for Aluminum, Hydrogen, and
+LBANN while the other dependencies remain constant. The Spack
+installation instructions below set up a Spack environment with the
+remaining dependencies, requiring the developer to build Aluminum,
+Hydrogen, and LBANN separately, by whatever means they choose.
+
+1.  Establish a Spack environment and install software dependencies.
+    Note that there are four environments to pick from along two axes:
+
+    .. note:: This spack environment has to be setup once each time
+              you create a new build directory.
+
+    1. developers or users
+    2. x86_64 and ppc64le
+
+    For example if you are a developer and want to build the inside of
+    the git repo use the following instructions:
+
+    .. code-block:: bash
+
+        export LBANN_HOME=/path/to/lbann/git/repo
+        export LBANN_BUILD_DIR=/path/to/a/build/directory
+        export LBANN_INSTALL_DIR=/path/to/an/install/directory
+        cd ${LBANN_BUILD_DIR}
+        spack env create -d . ${LBANN_HOME}/spack_environments/developer_release_<arch>_cuda_spack.yaml # where <arch> = x86_64 | ppc64le
+        cp ${LBANN_HOME}/spack_environments/std_versions_and_variants_llnl_lc_cz.yaml .
+        cp ${LBANN_HOME}/spack_environments/externals_<arch>_llnl_lc_cz.yaml . # where <arch> = x86_64 | ppc64le
+        spack install
+        spack env loads # Spack creates a file named loads that has all of the correct modules
+        source loads
+        unset LIBRARY_PATH
+
+
+    + Note that the environments provided here have a set of external
+      packages and compilers that are installed on an LLNL LC CZ
+      system.  Please update these for your system environment.
+      Alternatively, you can create baseline versions of the
+      user-level Spack configuration files and remove the externals
+      and compilers from the :code:`spack.yaml` file. More details are
+      provided :ref:`here <setup-spack-env>`.
+
+    + Note that the initial build of all of the standard packages in Spack
+      will take a while.
+
+    + Note that the Spack module files set the :bash:`LIBRARY_PATH` environment
+      variable. This behavior allows autotools-based builds to pickup the
+      correct libraries but interferes with the way that CMake sets up
+      RPATHs.  To correctly establish the RPATH, please unset the variable
+      as noted above, or you can explicitly pass the RPATH fields to CMake
+      using a command such as:
+
+      .. code-block:: bash
+
+          cmake -DCMAKE_INSTALL_RPATH=$(sed 's/:/;/g' <<< "${LIBRARY_PATH}") \
+                -DCMAKE_BUILD_RPATH=$(sed 's/:/;/g' <<< "${LIBRARY_PATH}") \
+                ...
+
+2.  Build LBANN locally from source and build Hydrogen and Aluminum
+    using the superbuild. See :ref:`here <building-with-the-superbuild>`
+    for a list and descriptions of all CMake flags known to LBANN's
+    "Superbuild" build system. A representative CMake command line
+    that expects :bash:`LBANN_HOME`, :bash:`LBANN_BUILD_DIR`,
+    :bash:`LBANN_INSTALL_DIR` environment variables might be:
+
+    .. code-block:: console
+
+        cd ${LBANN_BUILD_DIR}
+        cmake \
+          -G Ninja \
+          -D CMAKE_BUILD_TYPE:STRING=Release \
+          -D CMAKE_INSTALL_PREFIX:PATH=${LBANN_INSTALL_DIR} \
+          \
+          -D LBANN_SB_BUILD_ALUMINUM=ON \
+          -D ALUMINUM_ENABLE_MPI_CUDA=OFF \
+          -D ALUMINUM_ENABLE_NCCL=ON \
+          \
+          -D LBANN_SB_BUILD_HYDROGEN=ON \
+          -D Hydrogen_ENABLE_ALUMINUM=ON \
+          -D Hydrogen_ENABLE_CUB=ON \
+          -D Hydrogen_ENABLE_CUDA=ON \
+          \
+          -D LBANN_SB_BUILD_LBANN=ON \
+          -D LBANN_DATATYPE:STRING=float \
+          -D LBANN_SEQUENTIAL_INITIALIZATION:BOOL=OFF \
+          -D LBANN_WITH_ALUMINUM:BOOL=ON \
+          -D LBANN_WITH_CONDUIT:BOOL=ON \
+          -D LBANN_WITH_CUDA:BOOL=ON \
+          -D LBANN_WITH_CUDNN:BOOL=ON \
+          -D LBANN_WITH_NCCL:BOOL=ON \
+          -D LBANN_WITH_NVPROF:BOOL=ON \
+          -D LBANN_WITH_SOFTMAX_CUDA:BOOL=ON \
+          -D LBANN_WITH_TOPO_AWARE:BOOL=ON \
+          -D LBANN_WITH_TBINF=OFF \
+          -D LBANN_WITH_VTUNE:BOOL=OFF \
+          ${LBANN_HOME}/superbuild
+
+        ninja
+        ml use ${LBANN_INSTALL_DIR}/etc/modulefiles/
+        ml load lbann-0.99.0
+
+
+The complete documentation for building LBANN directly with CMake can
+be found :ref:`here <build-with-cmake>`.
+
+------------------------------
+Advanced build methods
+------------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   build_osx
+   build_with_cmake
+   build_with_superbuild
+   build_containers
+   build_llnl_idiosyncracies
+   build_spack_extra_config
diff --git a/docs/callbacks.rst b/docs/callbacks.rst
deleted file mode 100644
index bd0bdf906fe..00000000000
--- a/docs/callbacks.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Callbacks
-=================================
-
-Callbacks give users information about their model as it is trained. Users can select which callbacks to use during training in their model prototext file. Available callbacks can be found below. 
-
-.. autodoxygenindex::  
-  :project: callback
diff --git a/docs/conf.py b/docs/conf.py
index 9294f0e134a..d1763486df5 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -16,19 +16,20 @@
 # import sys
 # sys.path.insert(0, os.path.abspath('.'))
 
+import subprocess, os, runpy
 
-import subprocess, os
+rebuild_doxygen = not os.path.isdir("doxy_out/xml")
 
-read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
+if rebuild_doxygen:
+    subprocess.call('doxygen SourceTreeDoxyfile', shell=True)
 
-if read_the_docs_build:
-
-    subprocess.call('doxygen Doxyfile.in', shell=True)
+#exec(open("./BuildRSTDocs.py").read())
+runpy.run_path("BuildRSTDocs.py")
 
 # -- Project information -----------------------------------------------------
 
 project = 'LBANN'
-copyright = '2018, LLNL'
+copyright = '2014-2019, LLNL'
 author = 'LBANN Dev Team'
 
 # The short X.Y version
@@ -86,20 +87,6 @@
   "collapse_navigation" : False
 }
 
-breathe_projects_source = {
-  "callback"        : ( "../include/lbann/callbacks/", [""]),
-  "layer"           : ( "../include/lbann/layers/", [""]),
-  "activations"     : ( "../include/lbann/layers/activations", [""]),
-  "io"              : ( "../include/lbann/layers/io", [""]),
-  "learning"        : ( "../include/lbann/layers/learning", [""]),
-  "regularizers"    : ( "../include/lbann/layers/regularizers", [""]),
-  "transform"       : ( "../include/lbann/layers/transform", [""]),
-  "metrics"         : ( "../include/lbann/metrics", [""]),
-  "obj_functions"   : ( "../include/lbann/objective_functions", [""]),
-  "loss_functions"  : ( "../include/lbann/objective_functions/loss_functions", [""]),
-  "weight_reg"      : ( "../include/lbann/objective_functions/weight_regularization", [""]),
-  "optimizers"      : ( "../include/lbann/optimizers", [""])
-}
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
@@ -110,7 +97,7 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+#html_static_path = ['_static']
 
 # Custom sidebar templates, must be a dictionary that maps document names
 # to template names.
@@ -120,7 +107,7 @@
 # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
 # 'searchbox.html']``.
 #
-# html_sidebars = {}
+#html_sidebars = {}
 html_sidebars = { '**': ['globaltoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html'] }
 
 # -- Options for HTMLHelp output ---------------------------------------------
@@ -128,7 +115,6 @@
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'lbanndoc'
 
-
 # -- Options for LaTeX output ------------------------------------------------
 
 latex_elements = {
@@ -179,8 +165,9 @@
      'Miscellaneous'),
 ]
 
+
+breathe_default_project = "lbann"
 breathe_projects = {
     "lbann":"doxy_out/xml/",
-    }
-extensions = [ "m2r", "breathe", 'sphinx.ext.mathjax' ]
-m2r_parse_relative_links = True
+}
+extensions = [ 'breathe', 'sphinx.ext.mathjax' ]
diff --git a/docs/getting_started.rst b/docs/getting_started.rst
deleted file mode 100644
index 471854752af..00000000000
--- a/docs/getting_started.rst
+++ /dev/null
@@ -1,3 +0,0 @@
-.. mdinclude:: ./BuildingLBANN.md
-.. mdinclude:: ./spack_environment.md
-.. mdinclude:: ./RunningLBANN.md
diff --git a/docs/index.rst b/docs/index.rst
index d9f19fb6077..b07bfae9bb9 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,25 +1,48 @@
-.. lbann documentation master file, created by
-   sphinx-quickstart on Tue Apr 24 09:57:24 2018.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
+.. lbann documentation master file
 
-LBANN's Documentation
-=================================
+LBANN: Livermore Big Artificial Neural Network Toolkit
+============================================================
 
-Welcome to the Livermore Big Artifical Neural Network (LBANN) documentaion. For information on getting start with LBANN see the getting started page. 
+The Livermore Big Artificial Neural Network toolkit (LBANN) is an
+open-source, HPC-centric, deep learning training framework that is
+optimized to compose multiple levels of parallelism.
+
+LBANN provides model-parallel acceleration through domain
+decomposition to optimize for strong scaling of network training.  It
+also allows for composition of model-parallelism with both data
+parallelism and ensemble training methods for training large neural
+networks with massive amounts of data.  LBANN is able to advantage of
+tightly-coupled accelerators, low-latency high-bandwidth networking,
+and high-bandwidth parallel file systems.
+
+LBANN supports state-of-the-art training algorithms such as
+unsupervised, self-supervised, and adversarial (GAN) training methods
+in addition to traditional supervised learning.  It also supports
+recurrent neural networks via back propagation through time (BPTT)
+training, transfer learning, and multi-model and ensemble training
+methods.
+
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Getting Started
+
+   building_lbann
+   running_lbann
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Publications
+
+   publications
 
 .. toctree::
    :maxdepth: 2
+   :caption: Developer Documentation
 
-   getting_started
-   callbacks
-   layers
-   metrics
-   objective_functions
-   optimizers
+   lbann/lbann
+   style_guide
 
 ==================
 
 * :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/docs/io.rst b/docs/io.rst
deleted file mode 100644
index 9bb3f0e711e..00000000000
--- a/docs/io.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-I/O Layers
-=================================
-
-
-
-.. autodoxygenindex:: 
- :project: io 
diff --git a/docs/layers.rst b/docs/layers.rst
deleted file mode 100644
index fd9806dbd97..00000000000
--- a/docs/layers.rst
+++ /dev/null
@@ -1,15 +0,0 @@
-Layers
-=================================
-
-LBANN models are defined in model prototext files. The bulk of these defintions will be the series of layers which make up the model itself. LBANN layer's inherit from the base layer class below. To view information on specific types of layers LBANN offers, navigate to the desired page: 
-
-.. toctree::
-  :maxdepth: 2
-  
-  io
-  learning
-  regularizers
-  transform
-  
-.. autodoxygenindex:: 
- :project: layer 
diff --git a/docs/learning.rst b/docs/learning.rst
deleted file mode 100644
index 9edd1a8baf2..00000000000
--- a/docs/learning.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Learning Layers
-=================================
-
-
-
-.. autodoxygenindex:: 
- :project: learning 
diff --git a/docs/loss_functions.rst b/docs/loss_functions.rst
deleted file mode 100644
index 0641e5d48df..00000000000
--- a/docs/loss_functions.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Loss Functions
-=================================
-
-Example loss functions page.
-
-.. autodoxygenindex:: 
- :project: loss_functions 
diff --git a/docs/make.bat b/docs/make.bat
deleted file mode 100644
index 0c5db779c5e..00000000000
--- a/docs/make.bat
+++ /dev/null
@@ -1,36 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=.
-set BUILDDIR=_build
-set SPHINXPROJ=lbann
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-
-:end
-popd
diff --git a/docs/metrics.rst b/docs/metrics.rst
deleted file mode 100644
index 88b4d4d7e84..00000000000
--- a/docs/metrics.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-Metrics
-=================================
-
-A metric function can be used to evaluate the performance of a model without affecting the training process. Users define the metric with which to test their model in their model prototext file. The available metric functions in LBANN are found below.  
-
-.. toctree::
-  :maxdepth: 2
-  
-.. autodoxygenindex:: 
- :project: metrics 
diff --git a/docs/objective_functions.rst b/docs/objective_functions.rst
deleted file mode 100644
index 16abb527faf..00000000000
--- a/docs/objective_functions.rst
+++ /dev/null
@@ -1,13 +0,0 @@
-Objective Functions
-=================================
-
-Objective functions are the measure which training attempts to optimize. Objective functions are defined in a user's model defintion prototext file. Available objective functions can be found below.  
-
-.. toctree::
-  :maxdepth: 2
-
-  loss_functions
-  weight_regularization
-
-.. autodoxygenindex:: 
- :project: obj_functions
diff --git a/docs/optimizers.rst b/docs/optimizers.rst
deleted file mode 100644
index 2eb4d53fe0b..00000000000
--- a/docs/optimizers.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-Optimizers
-=================================
-
-Optimizer algorithms attempt to optimize model weights. Optimizers are selected when invoking lbann via a command line argument (--optimizer=<path_top_opt_proto>). Available optimizers are found below.  
-
-.. toctree::
-  :maxdepth: 2
-  
-.. autodoxygenindex:: 
- :project: optimizers 
diff --git a/docs/publications.rst b/docs/publications.rst
new file mode 100644
index 00000000000..c2bb25449cd
--- /dev/null
+++ b/docs/publications.rst
@@ -0,0 +1,68 @@
+Papers, Presentations, and Posters
+======================================
+
+Publications about or related to using LBANN:
+
++ Nikoli Dryden, Naoya Maruyama, Tom Benson, Tim Moon, Marc Snir,
+  Brian Van Essen. `"Improving Strong-Scaling of CNN Training by
+  Exploiting Finer-Grained Parallelism"
+  <https://arxiv.org/abs/1903.06681>`_, to appear in *IEEE
+  International Parallel & Distributed Processing Symposium*, 2019.
+
+  + `IPDPS'19 <http://www.ipdps.org/ipdps2019/2019-advance-program.html>`_
+
++ Nikoli Dryden, Naoya Maruyama, Tim Moon, Tom Benson, Andy Yoo, Marc
+  Snir, Brian Van Essen. `"Aluminum: An Asynchronous, GPU-Aware
+  Communication Library Optimized for Large-Scale Training of Deep
+  Neural Networks on HPC Systems"
+  <https://ieeexplore.ieee.org/document/8638639>`_, in *Proceedings of
+  the Workshop on Machine Learning in High-Performance Computing
+  Environments (MLHPC '18)*, Nov. 2018. DOI:
+  `10.1109/MLHPC.2018.8638639
+  <https://doi.org/10.1109/MLHPC.2018.8638639>`_
+
+  + `MLHPC'18  <https://ornlcda.github.io/MLHPC2018/>`_
+
++ Sam Ade Jacobs, Nikoli Dryden, Roger Pearce, and Brian Van
+  Essen. `"Towards Scalable Parallel Training of Deep Neural
+  Networks" <https://dl.acm.org/citation.cfm?id=3146353>`_, in *Proceedings of the Workshop on Machine Learning in
+  High-Performance Computing Environments (MLHPC '17)*, pages 1-8,
+  Nov. 2017.  DOI: `10.1145/3146347.3146353 <https://doi.org/10.1145/3146347.3146353>`_
+
+  + `MLHPC'17  <https://ornlcda.github.io/MLHPC2017/>`_
+
++ Nikoli Dryden, Sam Ade Jacobs, Tim Moon, Brian Van
+  Essen. `"Communication quantization for data-parallel training of
+  deep neural networks"
+  <https://ieeexplore.ieee.org/document/7835789>`_, in *Proceedings of
+  the Workshop on Machine Learning in High-Performance Computing
+  Environments (MLHPC '16)*, pages 1-8, Nov. 2016. DOI:
+  `10.1109/MLHPC.2016.004 <https://doi.org/10.1109/MLHPC.2016.004>`_
+
+  + `MLHPC'16  <https://ornlcda.github.io/MLHPC2016/>`_
+
++ Brian Van Essen, Hyojin Kim, Roger Pearce, Kofi Boakye, Barry
+  Chen. `"LBANN: Livermore Big Artificial Neural Network HPC
+  Toolkit" <https://dl.acm.org/citation.cfm?id=2834897>`_, in
+  *Proceedings of the Workshop on Machine Learning in High-Performance
+  Computing Environments (MLHPC '15)*, pages 5:1-6, Nov. 2015. DOI:
+  `10.1145/2834892.2834897 <https://doi.org/10.1145/2834892.2834897>`_
+
+  + `MLHPC'15  <https://ornlcda.github.io/MLHPC2015/>`_
+
+Presentations highlighting LBANN and its impact on science applications:
+
+.. note:: Presentations and links to be added
+
+Posters about LBANN and its core algorthms and features:
+
+.. note:: Posters and links to be added
+
++ Nikoli Dryden, Naoya Maruyama, Tom Benson, Tim Moon, Marc Snir,
+  Brian Van Essen. **"Scalable CNN Training on Large-Scale HPC
+  Systems"** in *Proceedings of the Workshop on Systems for ML and
+  Open Source Software at NeurIPS 2018*, December 7,
+  2018. `abs
+  <http://learningsys.org/nips18/assets/papers/85CameraReadySubmissionsysforml-abs.pdf>`_
+
+  + `Systems for ML 2018 <http://learningsys.org/nips18/acceptedpapers.html>`_
diff --git a/docs/regularizers.rst b/docs/regularizers.rst
deleted file mode 100644
index ff30b904f22..00000000000
--- a/docs/regularizers.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Regularizer Layers
-=================================
-
-Example regularizers page. This imports documentation for everything in our layers/regularizers subdirectory.
-
-.. autodoxygenindex:: 
- :project: regularizers 
diff --git a/docs/requirements.txt b/docs/requirements.txt
deleted file mode 100644
index a99da91ff83..00000000000
--- a/docs/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-breathe
-sphinx==1.6.7
diff --git a/docs/running_lbann.rst b/docs/running_lbann.rst
new file mode 100644
index 00000000000..d98e5fa62fe
--- /dev/null
+++ b/docs/running_lbann.rst
@@ -0,0 +1,96 @@
+.. role:: bash(code)
+          :language: bash
+
+====================
+Running LBANN
+====================
+
+The basic template for running LBANN is
+
+.. code-block:: bash
+
+    <mpi-launcher> <mpi-options> \
+        lbann <lbann-options> \
+        --model=model.prototext \
+        --optimizer=opt.prototext \
+        --reader=data_reader.prototext
+
+When using GPGPU accelerators, users should be aware that LBANN is
+optimized for the case in which one assigns one GPU per MPI
+*rank*. This should be borne in mind when choosing the parameters for
+the MPI launcher.
+
+A list of options for LBANN may be found by running :bash:`lbann
+--help`.
+
+.. note:: At time of writing, it is known that some of these are
+          out-of-date. An
+          `issue <https://github.com/LLNL/lbann/issues/864>`_ has been
+          opened to track this.
+
+.. _using-the-model-zoo:
+
+--------------------
+Using the model zoo
+--------------------
+
+LBANN ships with prototext descriptions of a variety of models,
+optimizers and data readers. These may be found in the :code:`model_zoo/`
+directory of the source repository or the :code:`share/model_zoo/` directory
+of the install directory.
+
+.. warning:: Some of these prototexts point to specific data locations
+             on LLNL LC clusters. Users may have to modify such paths
+             to point to locations on their own systems. This can be
+             done by modifying the prototext directly or overriding
+             the options on the command line with, e.g., the
+             :code:`--data_filedir_train` and
+             :code:`--data_filedir_test` options.
+
+The following is an example invocation of LBANN on a machine using
+Slurm's :bash:`srun` as an MPI launcher. In the example command,
+a machine with 2 GPGPUs per node are available, 4 nodes will be used,
+:bash:`${LBANN_EXE}` is the path to the :code:`lbann` executable, and
+:bash:`${LBANN_MODEL_ZOO_DIR}` is the path to the :code:`model_zoo/` directory in
+either the source tree or the install tree. Note that the options
+passed to :bash:`srun` are not likely to be portable to other MPI
+launchers. The example will train Alexnet with SGD optimization on the
+Imagenet dataset for 5 epochs.
+
+.. code-block:: bash
+
+    srun -N4 --ntasks-per-node=2 \
+        ${LBANN_EXE} \
+        --model=${LBANN_MODEL_ZOO_DIR}/models/alexnet/alexnet.prototext \
+        --optimizer=${LBANN_MODEL_ZOO_DIR}/optimizers/opt_sgd.prototext \
+        --reader=${LBANN_MODEL_ZOO_DIR}/data_readers/data_reader_imagenet.prototext \
+        --num_epochs=5
+    
+---------------------------------------------
+Using the Python interface for prototext
+---------------------------------------------
+
+There is a python interface for generating model prototext
+files. Example Python scripts may be found in the
+:code:`scripts/proto/lbann/models` directory of the source
+repository. Running the Python script will generate a prototext that
+can be passed to the :code:`--model` option for LBANN.
+
+.. code-block:: bash
+                
+    python3 alexnet.py alexnet.prototext
+    <mpi-launcher> <mpi-options> \
+        lbann --model=alexnet.prototext <other-lbann-options>
+
+where :code:`<other-lbann-options>` are as documented
+:ref:`above <using-the-model-zoo>`, with optimizer and data reader
+prototexts coming from the appropriate :code:`model_zoo/` directories.
+
+------------------------------
+Running the inference engine
+------------------------------
+
+This section is under construction, requiring input from other team
+members. Until it is complete, please ask questions on the
+`issue tracker <https://github.com/llnl/lbann/issues>`_.
+
diff --git a/docs/sphinx_requirements.txt b/docs/sphinx_requirements.txt
index 213fb9c2038..cd6467ed82b 100644
--- a/docs/sphinx_requirements.txt
+++ b/docs/sphinx_requirements.txt
@@ -1,2 +1 @@
 breathe
-m2r
diff --git a/docs/style_guide.rst b/docs/style_guide.rst
new file mode 100644
index 00000000000..4410486d69c
--- /dev/null
+++ b/docs/style_guide.rst
@@ -0,0 +1,113 @@
+LBANN Style Guide
+====================
+
+In-Source Documentation
+-------------------------
+
+In-source documentation should be written using `Doxygen
+<http://www.doxygen.nl/manual/>`_. LBANN will use C-style Doxygen
+comments (:code:`/** @brief A short comment */`) and with ampersats
+(:code:`@details`) instead of backslashes to denote directives. The
+aim is maximal :code:`grep`-ability and readability of the source
+code. Using C-style comments for Doxygen helps differentiate quickly
+between C-style Doxygen and C++-style source-only documentation.
+
+.. note:: C-style comments on classes and functions default to
+          :code:`@details`, *not* :code:`@brief`, even for one-line
+          comments. Be sure to add :code:`@brief` when appropriate.
+
+.. _sg-doc-functions:
+
+Documentation of Functions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Every function should be decorated with the maximally applicable set
+of the following:
+
++ :code:`@brief`: A short description of the class. May span multiple
+  lines if necessary for maintaining line character limits.
+
++ :code:`@details`: Begin a detailed description of the function. This is
+  not explicitly needed if a blank line is inserted between the
+  :code:`@brief` and the body of the :code:`@details` section.
+
++ :code:`@param <name>` Decribe a parameter to the function. It may be
+  helpful to annotate with :code:`@param[in]`, :code:`@param[out]`, or
+  :code:`@param[in,out]` if not clear from the types. Repeat this
+  directive for each applicable parameter.
+
++ :code:`@tparam <name>` Describe a template parameter. This can be
+  useful for explaining any assumptions (or even better, static
+  assertions) about satisfied concepts/predicates. Repeat this
+  directive for each applicable template parameter.
+
++ :code:`@returns` Describe the return value of the function. This is
+  not needed for trivial "getters".
+
++ :code:`@throws <exception>` Indicate an exception that may be
+  thrown. It is not expected that every possible exception (e.g.,
+  those coming from corner-cases of the STL) be documented. However,
+  if a function's implementation has an explicit :code:`throw`
+  statement, the exception should be noted with this
+  directive. Repeat this directive for each applicable exception.
+
++ :code:`@pre` Description of preconditions. This is most useful for
+  functions that use in/out parameters or those that require various
+  conditions on objects in the case of member functions. Repeat this
+  directive for each precondition.
+
++ :code:`@post` Description of postconditions. This is most useful for
+  functions that use in/out parameters or those that require various
+  conditions on objects in the case of member functions. Repeat this
+  directive for each postcondition.
+
+Some hypothetical examples of appropriately marked up functions are:
+
+.. code-block:: c++
+
+    /** @brief Does a foo.
+     *
+     *  These are details.
+     *
+     *  @tparam T The type of parameter. Must implement `operator+=`
+     *  @param param This is a parameter. It says how to foo.
+     *  @throws crazy_error If a crazy error occurs.
+     *  @pre param is not foo'd yet
+     *  @post param has been foo'd
+     */
+    template <typename T>
+    void foo(T& param);
+
+    /** @brief Computes a result.
+     *  @details The algorithm is simple @f$ret=A+B@f$.
+     *  @param A the first value
+     *  @param B the second value
+     *  @returns The output of the complicated algorithm
+     */
+    int compute_result(int A, int B) noexcept;
+
+
+Documentation of Classes
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Every class should be decorated with the maximally applicable set of
+the following:
+
++ :code:`@brief`: A short description of the class. May span multiple
+  lines if necessary for maintaining line character limits.
+
++ :code:`@details`: Begin a detailed description of the function. This is
+  not explicitly needed if a blank line is inserted between the
+  :code:`@brief` and the body of the :code:`@details` section.
+
++ :code:`@tparam <name>`: Describe a template parameter. This can be
+  useful for explaining any assumptions (or even better, static
+  assertions) about satisfied concepts/predicates. Repeat this
+  directive for each applicable template parameter.
+
++ :code:`@name <name>`, :code:`@{`, :code:`@}`: Group members
+  into named sections.
+
+Member functions are functions and should be documented as
+:ref:`above<sg-doc-functions>`. An example of a completely marked up
+file is `include/lbann/utils/any.hpp`.
diff --git a/docs/transform.rst b/docs/transform.rst
deleted file mode 100644
index b3923148547..00000000000
--- a/docs/transform.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Transform Layers
-=================================
-
-Example transform page. This imports documentation for everything in our layers/regularizers subdirectory.
-
-.. autodoxygenindex:: 
- :project: transform 
diff --git a/docs/weight_regularization.rst b/docs/weight_regularization.rst
deleted file mode 100644
index 07514ed4cf4..00000000000
--- a/docs/weight_regularization.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Weight Regularization
-=================================
-
-Example weight regularization page. 
-
-.. autodoxygenindex:: 
- :project: weight_reg 
diff --git a/experiments/run_lbann_aecycgan_experiment.sh b/experiments/run_lbann_aecycgan_experiment.sh
index eb13b43b855..b3fc8721309 100755
--- a/experiments/run_lbann_aecycgan_experiment.sh
+++ b/experiments/run_lbann_aecycgan_experiment.sh
@@ -379,6 +379,8 @@ case ${CLUSTER} in
         echo "export AL_PROGRESS_RANKS_PER_NUMA_NODE=2" >> ${BATCH_SCRIPT}
         ;;
 esac
+echo "export MV2_USE_RDMA_CM=0"                         >> ${BATCH_SCRIPT}
+echo "export MV2_USE_LAZY_MEM_UNREGISTER=0"             >> ${BATCH_SCRIPT}
 echo ""                                                 >> ${BATCH_SCRIPT}
 
 # Cache dataset in node-local memory
diff --git a/experiments/run_lbann_cycgan_experiment.sh b/experiments/run_lbann_cycgan_experiment.sh
index 40d6596023c..0a274507ac4 100755
--- a/experiments/run_lbann_cycgan_experiment.sh
+++ b/experiments/run_lbann_cycgan_experiment.sh
@@ -378,6 +378,8 @@ case ${CLUSTER} in
         echo "export AL_PROGRESS_RANKS_PER_NUMA_NODE=2" >> ${BATCH_SCRIPT}
         ;;
 esac
+echo "export MV2_USE_RDMA_CM=0"                         >> ${BATCH_SCRIPT}
+echo "export MV2_USE_LAZY_MEM_UNREGISTER=0"             >> ${BATCH_SCRIPT}
 echo ""                                                 >> ${BATCH_SCRIPT}
 
 # Cache dataset in node-local memory
diff --git a/experiments/run_lbann_cycgan_inference.sh b/experiments/run_lbann_cycgan_inference.sh
index def3697d3dc..1a2422fcbea 100755
--- a/experiments/run_lbann_cycgan_inference.sh
+++ b/experiments/run_lbann_cycgan_inference.sh
@@ -232,6 +232,8 @@ case ${CLUSTER} in
         echo "export AL_PROGRESS_RANKS_PER_NUMA_NODE=2" >> ${BATCH_SCRIPT}
         ;;
 esac
+echo "export MV2_USE_RDMA_CM=0"                         >> ${BATCH_SCRIPT}
+echo "export MV2_USE_LAZY_MEM_UNREGISTER=0"             >> ${BATCH_SCRIPT}
 echo ""                                                 >> ${BATCH_SCRIPT}
 
 # Cache dataset in node-local memory
diff --git a/experiments/run_lbann_experiment.sh b/experiments/run_lbann_experiment.sh
index 5baa284b5ac..df9b86b116e 100755
--- a/experiments/run_lbann_experiment.sh
+++ b/experiments/run_lbann_experiment.sh
@@ -371,6 +371,7 @@ case ${CLUSTER} in
         ;;
 esac
 echo "export MV2_USE_RDMA_CM=0"                         >> ${BATCH_SCRIPT}
+echo "export MV2_USE_LAZY_MEM_UNREGISTER=0"             >> ${BATCH_SCRIPT}
 echo ""                                                 >> ${BATCH_SCRIPT}
 
 # Cache dataset in node-local memory
diff --git a/external/TBinf/TBinf.cpp b/external/TBinf/TBinf.cpp
index a46400455ec..b92141f9bc5 100644
--- a/external/TBinf/TBinf.cpp
+++ b/external/TBinf/TBinf.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/external/TBinf/TBinf.hpp b/external/TBinf/TBinf.hpp
index 30e052ebf06..0a11937da71 100644
--- a/external/TBinf/TBinf.hpp
+++ b/external/TBinf/TBinf.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/external/cub/cub_enable_alloc_free_logging.patch b/external/cub/cub_enable_alloc_free_logging.patch
new file mode 100644
index 00000000000..505fbff19e5
--- /dev/null
+++ b/external/cub/cub_enable_alloc_free_logging.patch
@@ -0,0 +1,22 @@
+diff --git a/cub/util_allocator.cuh b/cub/util_allocator.cuh
+index 0e6dd048..f41f2e64 100644
+--- a/cub/util_allocator.cuh
++++ b/cub/util_allocator.cuh
+@@ -446,7 +446,7 @@ struct CachingDeviceAllocator
+             if (CubDebug(error = cudaMalloc(&search_key.d_ptr, search_key.bytes)) == cudaErrorMemoryAllocation)
+             {
+                 // The allocation attempt failed: free all cached blocks on device and retry
+-                if (debug) _CubLog("\tDevice %d failed to allocate %lld bytes for stream %lld, retrying after freeing cached allocations",
++                _CubLog("\tDevice %d failed to allocate %lld bytes for stream %lld, retrying after freeing cached allocations",
+                       device, (long long) search_key.bytes, (long long) search_key.associated_stream);
+ 
+                 error = cudaSuccess;    // Reset the error we will return
+@@ -606,7 +606,7 @@ struct CachingDeviceAllocator
+             if (CubDebug(error = cudaFree(d_ptr))) return error;
+             if (CubDebug(error = cudaEventDestroy(search_key.ready_event))) return error;
+ 
+-            if (debug) _CubLog("\tDevice %d freed %lld bytes from associated stream %lld.\n\t\t  %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n",
++            _CubLog("\tDevice %d freed %lld bytes from associated stream %lld.\n\t\t  %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n",
+                 device, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live);
+         }
+ 
diff --git a/include/lbann/CMakeLists.txt b/include/lbann/CMakeLists.txt
index a717a52fca6..28123a8350b 100644
--- a/include/lbann/CMakeLists.txt
+++ b/include/lbann/CMakeLists.txt
@@ -8,8 +8,8 @@ set_full_path(THIS_DIR_HEADERS
 
 # Add the subdirectories
 add_subdirectory(callbacks)
-add_subdirectory(data_distributions)
 add_subdirectory(data_readers)
+add_subdirectory(data_store)
 add_subdirectory(io)
 add_subdirectory(layers)
 add_subdirectory(metrics)
diff --git a/include/lbann/Elemental_extensions.hpp b/include/lbann/Elemental_extensions.hpp
index 49405b4486a..6ebc61b0e33 100644
--- a/include/lbann/Elemental_extensions.hpp
+++ b/include/lbann/Elemental_extensions.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/base.hpp b/include/lbann/base.hpp
index 895a963334d..a4baa63c443 100644
--- a/include/lbann/base.hpp
+++ b/include/lbann/base.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////xecu
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -30,6 +30,7 @@
 #include "El.hpp"
 #include "lbann/Elemental_extensions.hpp"
 #include "lbann/utils/cyg_profile.hpp"
+#include "lbann/utils/file_utils.hpp"
 
 // Defines, among other things, DataType.
 #include "lbann_config.hpp"
@@ -37,11 +38,19 @@
 // Support for OpenMP macros
 #include "lbann/utils/omp_pragma.hpp"
 
+#include <functional>
+
 namespace lbann {
 
 // Forward-declaration.
 class lbann_comm;
 
+// Note that this should only be used to wrap the thing coming out of
+// initialize()! This will be removed when we have proper RAII around
+// these things.
+using world_comm_ptr =
+    std::unique_ptr<lbann_comm, std::function<void(lbann_comm*)>>;
+
 /** Create LBANN communicator.
  *
  *  Initializes Elemental, which in turn initializes MPI, Aluminum,
@@ -52,9 +61,10 @@ class lbann_comm;
  *  @param argc Command line arguments.
  *  @param argv Number of command line arguments.
  *  @param seed RNG seed.
- *  @return     LBANN communicator.
+ *  @return     LBANN communicator corresponding to MPI_COMM_WORLD.
  */
-lbann_comm* initialize(int& argc, char**& argv, int seed = -1);
+world_comm_ptr initialize(int& argc, char**& argv, int seed = -1);
+
 /** Destroy LBANN communicator.
  *
  *  Finalizes Elemental, which in turn finalizes MPI, Aluminum, and
diff --git a/include/lbann/callbacks/callback.hpp b/include/lbann/callbacks/callback.hpp
index 53bd6d0ef2d..fae45448bb8 100644
--- a/include/lbann/callbacks/callback.hpp
+++ b/include/lbann/callbacks/callback.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -36,98 +36,152 @@
 
 namespace lbann {
 
-/**
- * Base class for callbacks during training/testing.
- * The method of each callback is called at a given point during training or
- * testing by the model. Implement whichever ones you care about.
- * Callbacks may be passed a lbann_summary instance, which they can use to log
- * whatever relevant information they need to.
+/** @class lbann_callback
+ *  @brief Base class for callbacks during training/testing.
+ *
+ *  The method of each callback is called at a given point during
+ *  training or testing by the model. Implement whichever ones you
+ *  care about.  Callbacks may be passed a lbann_summary instance,
+ *  which they can use to log any relevant information.
  */
 class lbann_callback {
- public:
-  /** Initialize a callback with an optional batch interval and summarizer. */
+public:
+
+  /** @name Constructors and destructor */
+  ///@{
+
+  /** @brief Initialize a callback with an optional batch interval and
+   *         summarizer.
+   */
   lbann_callback(int batch_interval = 1,
                  lbann_summary *summarizer = nullptr) :
     m_batch_interval(std::max(batch_interval, 1)), m_summarizer(summarizer) {}
   lbann_callback(const lbann_callback&) = default;
-  lbann_callback& operator=(const lbann_callback&) = default;
   virtual ~lbann_callback() {}
+
+  ///@}
+  /** @name Polymorphic copy */
+  ///@{
+
   virtual lbann_callback* copy() const = 0;
+
+  ///@}
+  /** @name Modifiers */
+  ///@{
+
   void set_summarizer(lbann_summary *summarizer) {
     m_summarizer = summarizer;
   }
-  /** Called once to set up the callback (after all layers are set up). */
+
+  /** @brief Called once to set up the callback (after all layers are
+   *         set up).
+   */
   virtual void setup(model *m) {};
-  /** Called at the beginning of training. */
+
+  ///@}
+  /** @name Callback hooks */
+  ///@{
+
+  /** @brief Called at the beginning of training. */
   virtual void on_train_begin(model *m) {}
-  /** Called at the end of training. */
+  /** @brief Called at the end of training. */
   virtual void on_train_end(model *m) {}
-  /** Called at the end of every phase (multiple epochs) in a layer-wise model training */
+  /** @brief Called at the end of every phase (multiple epochs) in a
+   *         layer-wise model training
+   */
   virtual void on_phase_end(model *m) {}
-  /** Called at the beginning of each epoch. */
+  /** @brief Called at the beginning of each epoch. */
   virtual void on_epoch_begin(model *m) {}
-  /** Called immediate after the end of each epoch. */
+  /** @brief Called immediate after the end of each epoch. */
   virtual void on_epoch_end(model *m) {}
-  /** Called at the beginning of a (mini-)batch. */
+  /** @brief Called at the beginning of a (mini-)batch. */
   virtual void on_batch_begin(model *m) {}
-  /** Called immediately after the end of a (mini-)batch. */
+  /** @brief Called immediately after the end of a (mini-)batch. */
   virtual void on_batch_end(model *m) {}
-  /** Called at the beginning of testing. */
+  /** @brief Called at the beginning of testing. */
   virtual void on_test_begin(model *m) {}
-  /** Called immediately after the end of testing. */
+  /** @brief Called immediately after the end of testing. */
   virtual void on_test_end(model *m) {}
-  /** Called at the beginning of validation. */
+  /** @brief Called at the beginning of validation. */
   virtual void on_validation_begin(model *m) {}
-  /** Called immediately after the end of validation. */
+  /** @brief Called immediately after the end of validation. */
   virtual void on_validation_end(model *m) {}
-  /** Called when a model begins forward propagation. */
+  /** @brief Called when a model begins forward propagation. */
   virtual void on_forward_prop_begin(model *m) {}
-  /** Called when a layer begins forward propagation. */
+  /** @brief Called when a layer begins forward propagation. */
   virtual void on_forward_prop_begin(model *m, Layer *l) {}
-  /** Called when a model ends forward propagation. */
+  /** @brief Called when a model ends forward propagation. */
   virtual void on_forward_prop_end(model *m) {}
-  /** Called when a layer ends forward propagation. */
+  /** @brief Called when a layer ends forward propagation. */
   virtual void on_forward_prop_end(model *m, Layer *l) {}
-  /** Called when a model begins backward propagation. */
+  /** @brief Called when a model begins backward propagation. */
   virtual void on_backward_prop_begin(model *m) {}
-  /** Called when a layer begins backward propagation. */
+  /** @brief Called when a layer begins backward propagation. */
   virtual void on_backward_prop_begin(model *m, Layer *l) {}
-  /** Called when a model ends backward propagation. */
+  /** @brief Called when a model ends backward propagation. */
   virtual void on_backward_prop_end(model *m) {}
-  /** Called when a layer ends backward propagation. */
+  /** @brief Called when a layer ends backward propagation. */
   virtual void on_backward_prop_end(model *m, Layer *l) {}
-  /** Called when a model begins optimization. */
+  /** @brief Called when a model begins optimization. */
   virtual void on_optimize_begin(model *m) {}
-  /** Called when weights begins optimization. */
+  /** @brief Called when weights begins optimization. */
   virtual void on_optimize_begin(model *m, weights *w) {}
-  /** Called when a model ends optimization. */
+  /** @brief Called when a model ends optimization. */
   virtual void on_optimize_end(model *m) {}
-  /** Called when weights ends optimization. */
+  /** @brief Called when weights ends optimization. */
   virtual void on_optimize_end(model *m, weights *w) {}
 
-  /** Called at the beginning of a (mini-)batch evaluation (validation / testing). */
+  /** @brief Called at the beginning of a (mini-)batch evaluation
+   *         (validation / testing).
+   */
   virtual void on_batch_evaluate_begin(model *m) {}
-  /** Called at the end of a (mini-)batch evaluation (validation / testing). */
+  /** @brief Called at the end of a (mini-)batch evaluation
+   *         (validation / testing).
+   */
   virtual void on_batch_evaluate_end(model *m) {}
-  /** Called when a model begins forward propagation for evaluation (validation / testing). */
+  /** @brief Called when a model begins forward propagation for
+   *         evaluation (validation / testing).
+   */
   virtual void on_evaluate_forward_prop_begin(model *m) {}
-  /** Called when a layer begins forward propagation for evaluation (validation / testing). */
+  /** @brief Called when a layer begins forward propagation for
+   *         evaluation (validation / testing).
+   */
   virtual void on_evaluate_forward_prop_begin(model *m, Layer *l) {}
-  /** Called when a model ends forward propagation for evaluation (validation / testing). */
+  /** @brief Called when a model ends forward propagation for
+   *         evaluation (validation / testing).
+   */
   virtual void on_evaluate_forward_prop_end(model *m) {}
-  /** Called when a layer ends forward propagation for evaluation (validation / testing). */
+  /** @brief Called when a layer ends forward propagation for
+   *         evaluation (validation / testing).
+   */
   virtual void on_evaluate_forward_prop_end(model *m, Layer *l) {}
 
-  /** Return the batch interval. */
+  ///@}
+  /** @name Queries */
+  ///@{
+
+  /** @brief Return the batch interval. */
   int get_batch_interval() const { return m_batch_interval; }
 
-  /** Return this callback's name. */
+  /** @brief Return this callback's name. */
   virtual std::string name() const = 0;
 
- protected:
-  /** Batch methods should once every this many steps. */
+  ///@}
+
+protected:
+
+  /** @brief Copy-assignment operator.
+   *
+   *  Performs a shallow (pointer) copy of the summarizer.
+   */
+  lbann_callback& operator=(const lbann_callback&) = default;
+
+protected:
+  /** @todo Make lbann_callback data private */
+
+  /** @brief Batch methods should once every this many steps. */
   int m_batch_interval;
-  /** Optional summarizer for the callbacks to use. */
+  /** @brief Optional summarizer for the callbacks to use. */
   lbann_summary *m_summarizer;
 };
 
diff --git a/include/lbann/callbacks/callback_check_dataset.hpp b/include/lbann/callbacks/callback_check_dataset.hpp
index fe131d1dd51..09ce25d723f 100644
--- a/include/lbann/callbacks/callback_check_dataset.hpp
+++ b/include/lbann/callbacks/callback_check_dataset.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -41,9 +41,6 @@ class lbann_callback_check_dataset : public lbann_callback {
   using lbann_callback::on_forward_prop_end;
   using lbann_callback::on_evaluate_forward_prop_end;
 
-  /**
-   * @param basename The basename for writing files.
-   */
   lbann_callback_check_dataset() :
     lbann_callback() {}
   lbann_callback_check_dataset(
@@ -63,7 +60,7 @@ class lbann_callback_check_dataset : public lbann_callback {
 
   std::string name() const override { return "check data set indices"; }
  private:
-  /** Basename for writing files. */
+  /** @brief Basename for writing files. */
   std::string m_basename;
 
   std::set<long> training_set;
diff --git a/include/lbann/callbacks/callback_check_gradients.hpp b/include/lbann/callbacks/callback_check_gradients.hpp
index 721625881e4..8433a00d5f1 100644
--- a/include/lbann/callbacks/callback_check_gradients.hpp
+++ b/include/lbann/callbacks/callback_check_gradients.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/callbacks/callback_check_init.hpp b/include/lbann/callbacks/callback_check_init.hpp
index b0f777eb6e7..6d5572379fb 100644
--- a/include/lbann/callbacks/callback_check_init.hpp
+++ b/include/lbann/callbacks/callback_check_init.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/callbacks/callback_check_metric.hpp b/include/lbann/callbacks/callback_check_metric.hpp
index 754dcf1afe7..8b094c8c395 100644
--- a/include/lbann/callbacks/callback_check_metric.hpp
+++ b/include/lbann/callbacks/callback_check_metric.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/callbacks/callback_checknan.hpp b/include/lbann/callbacks/callback_checknan.hpp
index fff437fecdf..c45a7eee95c 100644
--- a/include/lbann/callbacks/callback_checknan.hpp
+++ b/include/lbann/callbacks/callback_checknan.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/callbacks/callback_checkpoint.hpp b/include/lbann/callbacks/callback_checkpoint.hpp
index bed8a800466..ebeacdeaa7e 100644
--- a/include/lbann/callbacks/callback_checkpoint.hpp
+++ b/include/lbann/callbacks/callback_checkpoint.hpp
@@ -1,5 +1,5 @@
 //////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -33,21 +33,31 @@
 
 namespace lbann {
 
-/**
- *  * Checkpoint at given interval in given directory
- *   */
+/** @brief Checkpoint at given interval in given directory */
 class lbann_callback_checkpoint : public lbann_callback {
  public:
 
-  /**
- * @param checkpoint_dir directory to save checkpoint files
- * @param checkpoint_epochs interval to checkpoint
- * @param checkpoint_steps interval to checkpoint
- * @param checkpoint_secs interval to checkpoint
- * @param checkpoint_per_rank true to save/load a file per mpi rank
- */
+  /** @brief Construct the checkpoint callback
+   *
+   *  It may be beneficial to the distributed checkpoints at a higher
+   *  tempo than the shared checkpoints because they are less
+   *  expensive.
+   *
+   *  @param checkpoint_dir directory to save checkpoint files
+   *  @param checkpoint_epochs interval to checkpoint
+   *  @param checkpoint_steps interval to checkpoint
+   *  @param checkpoint_secs interval to checkpoint
+   *  @param per_rank_dir The directory into which to dump distributed checkpoints
+   *  @param ckpt_dist_epochs The frequency of distributed checkpoints in epochs
+   *  @param ckpt_dist_steps The frequence of distributed checkpoints in steps
+   */
   lbann_callback_checkpoint(std::string checkpoint_dir,
-                            int checkpoint_epochs, int checkpoint_steps, int checkpoint_secs, std::string per_rank_dir, int ckpt_dist_epochs, int ckpt_dist_steps) :
+                            int checkpoint_epochs,
+                            int checkpoint_steps,
+                            int checkpoint_secs,
+                            std::string per_rank_dir,
+                            int ckpt_dist_epochs,
+                            int ckpt_dist_steps) :
     lbann_callback(),
     m_checkpoint_dir(checkpoint_dir),
     m_checkpoint_epochs(checkpoint_epochs),
@@ -170,7 +180,10 @@ static inline bool write_latest(std::string filename, int epoch, int train) {
   }
   return true;
 }
-/** \brief Reads the "latest" file and returns the epoch number and sample offset for most recent checkpoint */
+
+/** \brief Reads the "latest" file and returns the epoch number and
+ *        sample offset for most recent checkpoint
+ */
 static inline bool read_latest(std::string filename, int *epochLast, int *trainLast) {
   // assume we don't have a file, we'll return -1 in that case
   *epochLast = -1;
diff --git a/include/lbann/callbacks/callback_checksmall.hpp b/include/lbann/callbacks/callback_checksmall.hpp
index ca52709357b..2f66a04d2d9 100644
--- a/include/lbann/callbacks/callback_checksmall.hpp
+++ b/include/lbann/callbacks/callback_checksmall.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/callbacks/callback_confusion_matrix.hpp b/include/lbann/callbacks/callback_confusion_matrix.hpp
index 0f575caf547..b87dc8b24a0 100644
--- a/include/lbann/callbacks/callback_confusion_matrix.hpp
+++ b/include/lbann/callbacks/callback_confusion_matrix.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/callbacks/callback_debug.hpp b/include/lbann/callbacks/callback_debug.hpp
index f870f19f6d0..c342c7ad778 100644
--- a/include/lbann/callbacks/callback_debug.hpp
+++ b/include/lbann/callbacks/callback_debug.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -37,14 +37,15 @@ namespace lbann {
  * Print verbose status updates to standard error stream.
  * This callback is useful for "printf debugging."
  *
- * Takes a prototext parameter @c phase: train | validate | test | <empty>
- * if <empty> will print messages for all phases
+ * Takes a prototext parameter @c phase: train | validate | test | \<empty\>
+ * if \<empty\> will print messages for all phases
  *
  */
 class lbann_callback_debug : public lbann_callback {
  public:
 
-  /** Constructor.
+  /** @brief Constructor.
+   *
    *  If modes is empty, status updates will be printed for all
    *  execution modes.
    */
@@ -56,13 +57,13 @@ class lbann_callback_debug : public lbann_callback {
   lbann_callback_debug* copy() const override { return new lbann_callback_debug(*this); }
   std::string name() const override { return "debug"; }
 
-  /** Print that a batch is beginning. */
+  /** @brief Print that a batch is beginning. */
   void on_batch_begin(model *m) override;
-  /** Print that a batch is ending. */
+  /** @brief Print that a batch is ending. */
   void on_batch_end(model *m) override;
-  /** Print that a layer's forward prop is beginning. */
+  /** @brief Print that a layer's forward prop is beginning. */
   void on_batch_evaluate_begin(model *m) override;
-  /** Print that a layer's forward prop is ending. */
+  /** @brief Print that a layer's forward prop is ending. */
   void on_batch_evaluate_end(model *m) override;
 
   using lbann_callback::on_forward_prop_begin;
@@ -74,27 +75,28 @@ class lbann_callback_debug : public lbann_callback {
   using lbann_callback::on_optimize_begin;
   using lbann_callback::on_optimize_end;
 
-  /** Print that a layer's forward prop is beginning. */
+  /** @brief Print that a layer's forward prop is beginning. */
   void on_forward_prop_begin(model *m, Layer *l) override;
-  /** Print that a layer's forward prop is ending. */
+  /** @brief Print that a layer's forward prop is ending. */
   void on_forward_prop_end(model *m, Layer *l) override;
-  /** Print that a layer's backward prop is beginning. */
+  /** @brief Print that a layer's backward prop is beginning. */
   void on_backward_prop_begin(model *m, Layer *l) override;
-  /** Print that a layer's backward prop is ending. */
+  /** @brief Print that a layer's backward prop is ending. */
   void on_backward_prop_end(model *m, Layer *l) override;
-  /** Print that a layer's backward prop is beginning. */
+  /** @brief Print that a layer's backward prop is beginning. */
   void on_evaluate_forward_prop_begin(model *m, Layer *l) override;
-  /** Print that a layer's backward prop is ending. */
+  /** @brief Print that a layer's backward prop is ending. */
   void on_evaluate_forward_prop_end(model *m, Layer *l) override;
 
-  /** Print that a weights' optimization step is beginning. */
+  /** @brief Print that a weights' optimization step is beginning. */
   void on_optimize_begin(model *m, weights *w) override;
-  /** Print that a weights' optimization step is ending. */
+  /** @brief Print that a weights' optimization step is ending. */
   void on_optimize_end(model *m, weights *w) override;
 
  private:
 
-  /** Execution modes for which status updates will be printed.
+  /** @brief Execution modes for which status updates will be printed.
+   *
    *  If empty, status updates are printed for all execution modes.
    */
   std::set<execution_mode> m_modes;
diff --git a/include/lbann/callbacks/callback_debug_io.hpp b/include/lbann/callbacks/callback_debug_io.hpp
index b81975f0a3f..ffaff0af567 100644
--- a/include/lbann/callbacks/callback_debug_io.hpp
+++ b/include/lbann/callbacks/callback_debug_io.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -77,7 +77,7 @@ class lbann_callback_debug_io : public lbann_callback {
   /** Common format for printing I/O stats at the start of a phase */
   void print_phase_start(model *m, execution_mode mode);
 
-  std::string name() const override { return "debug"; }
+  std::string name() const override { return "debug_io"; }
  private:
   /** The phase to debug. */
   execution_mode m_debug_phase;
diff --git a/include/lbann/callbacks/callback_dump_error_signals.hpp b/include/lbann/callbacks/callback_dump_error_signals.hpp
index 3110e0c45fa..0c5571d9597 100644
--- a/include/lbann/callbacks/callback_dump_error_signals.hpp
+++ b/include/lbann/callbacks/callback_dump_error_signals.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -51,7 +51,7 @@ class lbann_callback_dump_error_signals : public lbann_callback {
 
   /** Write error signals to file after each backward prop step. */
   void on_backward_prop_end(model *m, Layer *l) override;
-  
+
  private:
   /** Basename for output files. */
   std::string m_basename;
diff --git a/include/lbann/callbacks/callback_dump_gradients.hpp b/include/lbann/callbacks/callback_dump_gradients.hpp
index 141f5eb355e..b0a6d587446 100644
--- a/include/lbann/callbacks/callback_dump_gradients.hpp
+++ b/include/lbann/callbacks/callback_dump_gradients.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -36,13 +36,13 @@
 namespace lbann {
 
 /**
- * Dump gradient matrices to files.
- * This will dump each hidden layer's gradient matrix after each minibatch.
- * The matrices are written to files using Elemental's simple ASCII format. This
- * is not meant for checkpointing, but for exporting gradient matrices for
- * analysis that isn't easily done in LBANN.
- * Note this dumps matrices during each mini-batch. This will be slow and
- * produce a lot of output.
+ * @brief Dump gradient matrices to files.
+ * @details This will dump each hidden layer's gradient matrix after
+ * each minibatch.  The matrices are written to files using
+ * Elemental's simple ASCII format. This is not meant for
+ * checkpointing, but for exporting gradient matrices for analysis
+ * that isn't easily done in LBANN.  Note this dumps matrices during
+ * each mini-batch. This will be slow and produce a lot of output.
  */
 class lbann_callback_dump_gradients : public lbann_callback {
  public:
@@ -50,6 +50,7 @@ class lbann_callback_dump_gradients : public lbann_callback {
 
   /**
    * @param basename The basename for writing files.
+   * @param batch_interval The frequency at which to dump the gradients
    */
   lbann_callback_dump_gradients(std::string basename, int batch_interval = 1) :
     lbann_callback(batch_interval), m_basename(std::move(basename)) {}
@@ -63,7 +64,7 @@ class lbann_callback_dump_gradients : public lbann_callback {
   void on_backward_prop_end(model *m) override;
   std::string name() const override { return "dump gradients"; }
  private:
-  /** Basename for writing files. */
+  /** @brief Basename for writing files. */
   std::string m_basename;
 };
 
diff --git a/include/lbann/callbacks/callback_dump_minibatch_sample_indices.hpp b/include/lbann/callbacks/callback_dump_minibatch_sample_indices.hpp
index 9e03a37f140..8840b1a83c5 100644
--- a/include/lbann/callbacks/callback_dump_minibatch_sample_indices.hpp
+++ b/include/lbann/callbacks/callback_dump_minibatch_sample_indices.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -37,11 +37,11 @@
 namespace lbann {
 
 /**
- * Dump sample indices for each minibatch to files.
- * This will dump the list of indices from the training / validation /
- * testing data that was processed
- * Note this dumps vectors during each mini-batch. This will be slow and
- * produce a lot of output.
+ * @brief Dump sample indices for each minibatch to files.
+ * @details This will dump the list of indices from the training /
+ * validation / testing data that was processed Note this dumps
+ * vectors during each mini-batch. This will be slow and produce a lot
+ * of output.
  */
 class lbann_callback_dump_minibatch_sample_indices : public lbann_callback {
  public:
@@ -50,8 +50,10 @@ class lbann_callback_dump_minibatch_sample_indices : public lbann_callback {
 
   /**
    * @param basename The basename for writing files.
+   * @param batch_interval The frequency at which to dump sample indices
    */
-  lbann_callback_dump_minibatch_sample_indices(std::string basename, int batch_interval = 1) :
+  lbann_callback_dump_minibatch_sample_indices(std::string basename,
+                                               int batch_interval = 1) :
     lbann_callback(batch_interval), m_basename(std::move(basename)) {}
   lbann_callback_dump_minibatch_sample_indices(
     const lbann_callback_dump_minibatch_sample_indices&) = default;
diff --git a/include/lbann/callbacks/callback_dump_outputs.hpp b/include/lbann/callbacks/callback_dump_outputs.hpp
index 7b5c7e92f1a..0ad260be495 100644
--- a/include/lbann/callbacks/callback_dump_outputs.hpp
+++ b/include/lbann/callbacks/callback_dump_outputs.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -29,6 +29,9 @@
 
 #include "lbann/callbacks/callback.hpp"
 
+#include <set>
+#include <string>
+
 namespace lbann {
 
 /** @brief Dump layer output tensors to files.
@@ -50,7 +53,9 @@ namespace lbann {
 class lbann_callback_dump_outputs : public lbann_callback {
 public:
 
-  /** @param layer_names    Names of layers with output dumps
+  /** @brief Construct a callback to dump outputs.
+   *
+   *  @param layer_names    Names of layers with output dumps
    *                        (default: dump outputs for all layers).
    *  @param modes          Execution modes with output dumps
    *                        (default: dump outputs for all modes).
@@ -61,11 +66,13 @@ class lbann_callback_dump_outputs : public lbann_callback {
    *  @param file_format    Output file format. Options are csv, tsv,
    *                        npy, npz (default: csv).
    */
-  lbann_callback_dump_outputs(std::set<std::string> layer_names = {},
-                              std::set<execution_mode> modes = {},
-                              El::Int batch_interval = 0,
-                              std::string directory = "",
-                              std::string file_format = "");
+  lbann_callback_dump_outputs(
+    std::set<std::string> layer_names,// = std::set<std::string>(),
+    std::set<execution_mode> modes, // = std::set<std::string>(),
+    El::Int batch_interval = 0,
+    std::string directory = "",
+    std::string file_format = "");
+
   lbann_callback_dump_outputs* copy() const override {
     return new lbann_callback_dump_outputs(*this);
   }
diff --git a/include/lbann/callbacks/callback_dump_weights.hpp b/include/lbann/callbacks/callback_dump_weights.hpp
index cc84249fe87..7edb2aacc20 100644
--- a/include/lbann/callbacks/callback_dump_weights.hpp
+++ b/include/lbann/callbacks/callback_dump_weights.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/callbacks/callback_early_stopping.hpp b/include/lbann/callbacks/callback_early_stopping.hpp
index f196dcaad98..e02fe4d3601 100644
--- a/include/lbann/callbacks/callback_early_stopping.hpp
+++ b/include/lbann/callbacks/callback_early_stopping.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/callbacks/callback_gpu_memory_usage.hpp b/include/lbann/callbacks/callback_gpu_memory_usage.hpp
index ea18bf35d1d..aa890efcc87 100644
--- a/include/lbann/callbacks/callback_gpu_memory_usage.hpp
+++ b/include/lbann/callbacks/callback_gpu_memory_usage.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -35,7 +35,7 @@ namespace lbann {
 /** Callback hooks for printing GPU memory usage. */
 class lbann_callback_gpu_memory_usage : public lbann_callback {
  public:
-  
+
   /** Constructor.
    */
   lbann_callback_gpu_memory_usage() = default;
diff --git a/include/lbann/callbacks/callback_hang.hpp b/include/lbann/callbacks/callback_hang.hpp
index 1c3fcb010a2..2ec4c68b835 100644
--- a/include/lbann/callbacks/callback_hang.hpp
+++ b/include/lbann/callbacks/callback_hang.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/callbacks/callback_imcomm.hpp b/include/lbann/callbacks/callback_imcomm.hpp
index 39fe7054582..fb52daa2bee 100644
--- a/include/lbann/callbacks/callback_imcomm.hpp
+++ b/include/lbann/callbacks/callback_imcomm.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/callbacks/callback_io.hpp b/include/lbann/callbacks/callback_io.hpp
index 6f18e553963..2ed29430a05 100644
--- a/include/lbann/callbacks/callback_io.hpp
+++ b/include/lbann/callbacks/callback_io.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/callbacks/callback_learning_rate.hpp b/include/lbann/callbacks/callback_learning_rate.hpp
index a2bd0c7cc72..55dd090a7ea 100644
--- a/include/lbann/callbacks/callback_learning_rate.hpp
+++ b/include/lbann/callbacks/callback_learning_rate.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -238,7 +238,7 @@ class lbann_callback_linear_growth_learning_rate :
 class lbann_callback_poly_learning_rate : public lbann_callback_learning_rate {
  public:
   lbann_callback_poly_learning_rate(double p, uint64_t n_epochs, uint64_t max_iter);
-  lbann_callback_poly_learning_rate(double p, uint64_t n_epochs, uint64_t max_iter,
+  lbann_callback_poly_learning_rate(double p, uint64_t n_epochs, uint64_t max_iter, double endl_r,
     std::unordered_set<weights *> weights_list);
   lbann_callback_poly_learning_rate(
     const lbann_callback_poly_learning_rate&) = default;
@@ -259,6 +259,8 @@ class lbann_callback_poly_learning_rate : public lbann_callback_learning_rate {
   uint64_t m_num_epochs;
   /// The maximum number of iterations until which the learning rate changes
   uint64_t m_max_iter;
+  /// The minimum learning rate
+  float m_end_lr;
   /// The current rate to scale the base learning rate
   float m_lr;
   /// The learning rate scale used at the end of the last epoch
diff --git a/include/lbann/callbacks/callback_ltfb.hpp b/include/lbann/callbacks/callback_ltfb.hpp
index 5d430e5cc54..e28a717da9c 100644
--- a/include/lbann/callbacks/callback_ltfb.hpp
+++ b/include/lbann/callbacks/callback_ltfb.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -103,7 +103,7 @@ class lbann_callback_ltfb : public lbann_callback {
     checkpoint_file
   };
 
-  /** @brief
+  /** @brief Construct the LTFB callback
    *  @param batch_interval Number of training mini-batch steps between
    *                        tournaments.
    *  @param metric_name    Metric for tournament evaluation.
@@ -112,19 +112,22 @@ class lbann_callback_ltfb : public lbann_callback {
    *  @param low_score_wins Whether low-scoring or high-scoring models
    *                        survive a tournament.
    *  @param comm_algo      Inter-trainer communication scheme.
+   *  @param summarizer     The summarizer to use for this callback
    */
-  lbann_callback_ltfb(El::Int batch_interval,
-                      std::string metric_name,
-                      std::set<std::string> weights_names = {},
-                      bool low_score_wins = false,
-                      communication_algorithm comm_algo = communication_algorithm::sendrecv_weights,
-                      lbann_summary *summarizer = nullptr);
+  lbann_callback_ltfb(
+    El::Int batch_interval,
+    std::string metric_name,
+    std::set<std::string> weights_names = std::set<std::string>(),
+    bool low_score_wins = false,
+    communication_algorithm comm_algo = communication_algorithm::sendrecv_weights,
+    lbann_summary *summarizer = nullptr);
   lbann_callback_ltfb(const lbann_callback_ltfb& other);
   lbann_callback_ltfb& operator=(const lbann_callback_ltfb& other);
   lbann_callback_ltfb* copy() const override { return new lbann_callback_ltfb(*this); }
   std::string name() const override { return "LTFB"; }
 
   void setup(model *m) override;
+  void on_train_begin(model *m) override;
   void on_batch_begin(model *m) override;
 
   /** Convert string to LTFB communication algorithm.
diff --git a/include/lbann/callbacks/callback_perturb_adam.hpp b/include/lbann/callbacks/callback_perturb_adam.hpp
index db0ef6cc632..6adf47dd83a 100644
--- a/include/lbann/callbacks/callback_perturb_adam.hpp
+++ b/include/lbann/callbacks/callback_perturb_adam.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -62,7 +62,7 @@ class lbann_callback_perturb_adam : public lbann_callback {
    *  @param batch_interval Number of training mini-batch steps between
    *                        perturbations. Only used if
    *                        @c perturb_during_training is @c true.
-   *  @param weights_name   Names of weights with Adam optimizers. If
+   *  @param weights_names  Names of weights with Adam optimizers. If
    *                        empty, all Adam optimizers in the model are
    *                        perturbed.
    */
@@ -72,7 +72,8 @@ class lbann_callback_perturb_adam : public lbann_callback {
                               DataType eps_factor = 0,
                               bool perturb_during_training = false,
                               El::Int batch_interval = 1,
-                              std::set<std::string> weights_names = {});
+                              std::set<std::string> weights_names
+                              = std::set<std::string>());
   lbann_callback_perturb_adam* copy() const override { return new lbann_callback_perturb_adam(*this); }
   std::string name() const override { return "perturb Adam"; }
 
diff --git a/include/lbann/callbacks/callback_print.hpp b/include/lbann/callbacks/callback_print.hpp
index 427a52a960e..53c77d2a7a1 100644
--- a/include/lbann/callbacks/callback_print.hpp
+++ b/include/lbann/callbacks/callback_print.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -39,7 +39,8 @@ namespace lbann {
  */
 class lbann_callback_print : public lbann_callback {
  public:
-  lbann_callback_print(int batch_interval = 1) : lbann_callback(batch_interval) {}
+  lbann_callback_print(int batch_interval = 1, bool print_global_stat_only=false) :
+  lbann_callback(batch_interval), m_print_global_stat_only(print_global_stat_only) {}
   lbann_callback_print(const lbann_callback_print&) = default;
   lbann_callback_print& operator=(const lbann_callback_print&) = default;
   lbann_callback_print* copy() const override { return new lbann_callback_print(*this); }
@@ -53,6 +54,7 @@ class lbann_callback_print : public lbann_callback {
  private:
   /** Print objective function and metrics to standard output. */
   void report_results(model *m);
+  bool m_print_global_stat_only;
 
 };
 
diff --git a/include/lbann/callbacks/callback_replace_weights.hpp b/include/lbann/callbacks/callback_replace_weights.hpp
index a90713716e2..62bf033792c 100644
--- a/include/lbann/callbacks/callback_replace_weights.hpp
+++ b/include/lbann/callbacks/callback_replace_weights.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -36,13 +36,13 @@ namespace lbann {
 
 /**
  *  Weights/parameters replacement on k-batch end
- *  Currently support replacing weights/parameters using layer names 
+ *  Currently support replacing weights/parameters using layer names
  *  Can easily be extended to support replacement by weights name
- *  Given two layers specified in prototext, weights are copied from source layer to destination layer. 
+ *  Given two layers specified in prototext, weights are copied from source layer to destination layer.
  */
 class lbann_callback_replace_weights : public lbann_callback {
  public:
-  lbann_callback_replace_weights(std::vector<Layer*> src, 
+  lbann_callback_replace_weights(std::vector<Layer*> src,
     std::vector<Layer*> dst, int batch_interval=1) :
     lbann_callback(batch_interval),
     m_src_layers(std::move(src)),
@@ -50,7 +50,7 @@ class lbann_callback_replace_weights : public lbann_callback {
     if(m_src_layers.size() != m_dst_layers.size())
      throw lbann_exception("In replace weights callback: number of src and dest layers does not match.");
   }
-    
+
   lbann_callback_replace_weights(
     const lbann_callback_replace_weights&) = default;
   lbann_callback_replace_weights& operator=(
@@ -59,7 +59,7 @@ class lbann_callback_replace_weights : public lbann_callback {
     return new lbann_callback_replace_weights(*this);
   }
   void on_batch_end(model *m) override;
-  
+
   std::string name() const override { return "replace weights"; }
  private:
   std::vector<Layer*> m_src_layers, m_dst_layers;
diff --git a/include/lbann/callbacks/callback_save_images.hpp b/include/lbann/callbacks/callback_save_images.hpp
index 443f39ad49a..72d870f3fc1 100644
--- a/include/lbann/callbacks/callback_save_images.hpp
+++ b/include/lbann/callbacks/callback_save_images.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -55,7 +55,6 @@ class lbann_callback_save_images : public lbann_callback {
     return new lbann_callback_save_images(*this);
   }
   void on_epoch_end(model *m) override;
-  void on_phase_end(model *m) override;
   void on_test_end(model *m) override;
   std::string name() const override { return "save images"; }
 
diff --git a/include/lbann/callbacks/callback_save_model.hpp b/include/lbann/callbacks/callback_save_model.hpp
index c562359aed4..aeeae47415a 100644
--- a/include/lbann/callbacks/callback_save_model.hpp
+++ b/include/lbann/callbacks/callback_save_model.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -44,12 +44,15 @@ class lbann_callback_save_model : public lbann_callback {
  public:
   /**
    * @param dir directory to save model
-   * @param file extension e.g., model, state ......
+   * @param disable_save_after_training Don't save after training
+   * @param extension file extension e.g., model, state ......
    */
   lbann_callback_save_model(std::string dir,
                             bool disable_save_after_training,
                             std::string extension="prototext") :
-    lbann_callback(), m_dir(std::move(dir)), m_disable_save_after_training(disable_save_after_training), m_extension(std::move(extension))
+    lbann_callback(), m_dir(std::move(dir)),
+    m_disable_save_after_training(disable_save_after_training),
+    m_extension(std::move(extension))
     {}
   lbann_callback_save_model(const lbann_callback_save_model&) = default;
   lbann_callback_save_model& operator=(
diff --git a/include/lbann/callbacks/callback_summary.hpp b/include/lbann/callbacks/callback_summary.hpp
index 72b9b7ecdf9..15294ac240d 100644
--- a/include/lbann/callbacks/callback_summary.hpp
+++ b/include/lbann/callbacks/callback_summary.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -42,6 +42,9 @@ class lbann_callback_summary : public lbann_callback {
   /**
    * @param summarizer The summary object to write to; this callback takes
    * ownership of it.
+   * @param batch_interval The frequency with which to summarize
+   * @param mat_interval FIXME
+   * @todo Document mat_interval parameter.
    */
   lbann_callback_summary(lbann_summary *summarizer, int batch_interval = 1,
     int mat_interval = 25);
diff --git a/include/lbann/callbacks/callback_sync_layers.hpp b/include/lbann/callbacks/callback_sync_layers.hpp
index 0edf8b416c4..2c9d4984fa8 100644
--- a/include/lbann/callbacks/callback_sync_layers.hpp
+++ b/include/lbann/callbacks/callback_sync_layers.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -63,7 +63,7 @@ class lbann_callback_sync_layers : public lbann_callback {
 
   void on_forward_prop_end(model *m, Layer *l) override;
   void on_backward_prop_end(model *m, Layer *l) override;
-    
+
  protected:
   /** Whether to synchronize GPUs. */
   bool m_sync_gpus;
diff --git a/include/lbann/callbacks/callback_sync_selected.hpp b/include/lbann/callbacks/callback_sync_selected.hpp
index 34cecaf960f..53cda7e8b3f 100644
--- a/include/lbann/callbacks/callback_sync_selected.hpp
+++ b/include/lbann/callbacks/callback_sync_selected.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/callbacks/callback_timeline.hpp b/include/lbann/callbacks/callback_timeline.hpp
index acb65338862..8bf84dd787d 100644
--- a/include/lbann/callbacks/callback_timeline.hpp
+++ b/include/lbann/callbacks/callback_timeline.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -37,7 +37,7 @@ namespace lbann {
 /**
  * Record a timeline of training runtime on each rank and output it to a
  * logfile for external processing.
- * The logfile is named timeline.m<model-rank>.<rank>.txt.
+ * The logfile is named timeline.m\<model-rank\>.\<rank\>.txt.
  * Each line is a separate event, written as name:start-time:end-time.
  * Times are relative to the beginning of training.
  */
diff --git a/include/lbann/callbacks/callback_timer.hpp b/include/lbann/callbacks/callback_timer.hpp
index 2dec66c762c..a53243e7a3f 100644
--- a/include/lbann/callbacks/callback_timer.hpp
+++ b/include/lbann/callbacks/callback_timer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/callbacks/callback_variable_minibatch.hpp b/include/lbann/callbacks/callback_variable_minibatch.hpp
index 9ae789c77c9..44d8c62f766 100644
--- a/include/lbann/callbacks/callback_variable_minibatch.hpp
+++ b/include/lbann/callbacks/callback_variable_minibatch.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/callbacks/profiler.hpp b/include/lbann/callbacks/profiler.hpp
index 078b66ff04e..abedbaaa428 100644
--- a/include/lbann/callbacks/profiler.hpp
+++ b/include/lbann/callbacks/profiler.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -37,7 +37,7 @@ namespace lbann {
  */
 class lbann_callback_profiler : public lbann_callback {
  public:
-  lbann_callback_profiler(bool sync = false);
+  lbann_callback_profiler(bool sync = false, bool skip_init = false);
   lbann_callback_profiler(const lbann_callback_profiler&) = default;
   lbann_callback_profiler& operator=(const lbann_callback_profiler&) = default;
   lbann_callback_profiler* copy() const override {
@@ -75,6 +75,8 @@ class lbann_callback_profiler : public lbann_callback {
   int get_color(Layer *l);
   /** Whether to synchronize the when setting up profile regions. */
   bool m_sync;
+  /** Whether to skip initial iterations. */
+  bool m_skip_init;
 };
 
 }  // namespace lbann
diff --git a/include/lbann/comm.hpp b/include/lbann/comm.hpp
index bb88ffb917e..2ab72fe1273 100644
--- a/include/lbann/comm.hpp
+++ b/include/lbann/comm.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -135,7 +135,7 @@ class lbann_comm {
    * defaulting to every process in one trainer.
    */
   lbann_comm(int procs_per_trainer = 0,
-             El::mpi::Comm world = El::mpi::COMM_WORLD);
+             El::mpi::Comm world = El::mpi::COMM_WORLD.GetMPIComm());
   /** Don't allow copying; it doesn't make sense for the communicator. */
   lbann_comm(const lbann_comm&) = delete;
   /** Don't allow assignment; it doesn't make sense for the communicator. */
@@ -241,12 +241,12 @@ class lbann_comm {
 
   /// Broadcast a scalar value over an arbitrary communicator
   template < typename T, bool S = is_instantiated_El_mpi_type<T>::value >
-  void broadcast(int root, T& val, El::mpi::Comm c);
+  void broadcast(int root, T& val, const El::mpi::Comm& c);
 
   template <typename T>
-  void broadcast_custom(int root, T& val, El::mpi::Comm c) const;
+  void broadcast_custom(int root, T& val, const El::mpi::Comm& c) const;
   template <typename T>
-  void broadcast_native(int root, T& val, El::mpi::Comm c) const;
+  void broadcast_native(int root, T& val, const El::mpi::Comm& c) const;
 
   /// World broadcast of a scalar.
   template <typename T>
@@ -271,12 +271,12 @@ class lbann_comm {
 
   // Default to cpu memory
   template <typename T>
-  void broadcast(const int root, T* data, const int count, El::mpi::Comm c) {
-      broadcast(root, data, count, std::move(c), El::SyncInfo<El::Device::CPU>{});
+  void broadcast(const int root, T* data, const int count, const El::mpi::Comm& c) {
+      broadcast(root, data, count, c, El::SyncInfo<El::Device::CPU>{});
   }
 
   template < typename T, El::Device D, bool S = is_instantiated_El_mpi_type<T>::value >
-  void broadcast(const int root, T* data, const int count, El::mpi::Comm c,
+  void broadcast(const int root, T* data, const int count, const El::mpi::Comm& c,
                  El::SyncInfo<D> const& syncInfo);
 
   /// World broadcast of a buffer.
@@ -316,10 +316,10 @@ class lbann_comm {
    * Resize vector<> over an arbitrary communicator to match the one on root.
    */
   template <typename T>
-  size_t resize(const int root, std::vector<T> &data, El::mpi::Comm c) {
+  size_t resize(const int root, std::vector<T> &data, const El::mpi::Comm& c) {
     auto const rank_c = El::mpi::Rank(c);
     size_t count = data.size();
-    El::mpi::Broadcast(&count, 1, root, std::move(c), El::SyncInfo<El::Device::CPU>{});
+    El::mpi::Broadcast(&count, 1, root, c, El::SyncInfo<El::Device::CPU>{});
     count_bytes_broadcast(sizeof(size_t), rank_c, root);
     data.resize(count);
     return count;
@@ -330,12 +330,12 @@ class lbann_comm {
    * vector<> for non-root processes will be resized as needed.
    */
   template <typename T>
-  void broadcast(const int root, std::vector<T> &data, El::mpi::Comm c) {
+  void broadcast(const int root, std::vector<T> &data, const El::mpi::Comm& c) {
     const int count = static_cast<int>(resize(root, data, c));
     if (count <= 0) {
       return;
     }
-    broadcast<T>(root, data.data(), count, std::move(c), El::SyncInfo<El::Device::CPU>{});
+    broadcast<T>(root, data.data(), count, c, El::SyncInfo<El::Device::CPU>{});
   }
   /// Broadcast vector<> to world.
   template <typename T>
@@ -370,14 +370,14 @@ class lbann_comm {
 
   /** Allgather over an arbitrary communicator */
   template <typename T>
-  void all_gather(const T* src, int src_count, T* rcv, int rcv_count, El::mpi::Comm c) {
-    all_gather(src, src_count, rcv, rcv_count, std::move(c),
+  void all_gather(const T* src, int src_count, T* rcv, int rcv_count, const El::mpi::Comm& c) {
+    all_gather(src, src_count, rcv, rcv_count, c,
                    El::SyncInfo<El::Device::CPU>{});
   }
   template <typename T, El::Device D>
-  void all_gather(const T* src, int src_count, T* rcv, int rcv_count, El::mpi::Comm c,
+  void all_gather(const T* src, int src_count, T* rcv, int rcv_count, const El::mpi::Comm& c,
                   El::SyncInfo<D> const& syncInfo) {
-    El::mpi::AllGather<T>(src, src_count, rcv, rcv_count, std::move(c), syncInfo);
+    El::mpi::AllGather<T>(src, src_count, rcv, rcv_count, c, syncInfo);
   }
 
   /**
@@ -385,7 +385,7 @@ class lbann_comm {
    * all vectors must be correctly sized prior to entry.
    */
   template <typename T>
-  void all_gather(std::vector<T> &src, std::vector<T> &rcs, std::vector<int> &rcv_counts, std::vector<int> &rcv_disp, El::mpi::Comm c) {
+  void all_gather(std::vector<T> &src, std::vector<T> &rcs, std::vector<int> &rcv_counts, std::vector<int> &rcv_disp, const El::mpi::Comm& c) {
     if (src.size() == 0) {
       std::stringstream err;
       err << __FILE__ << " " << __LINE__ << " :: "
@@ -393,7 +393,7 @@ class lbann_comm {
               << "this doesn't work!";
       lbann_comm_abort(err.str());
     }
-    El::mpi::AllGather<T>(src.data(), src.size(), rcs.data(), rcv_counts.data(), rcv_disp.data(), std::move(c), El::SyncInfo<El::Device::CPU>{});
+    El::mpi::AllGather<T>(src.data(), src.size(), rcs.data(), rcv_counts.data(), rcv_disp.data(), c, El::SyncInfo<El::Device::CPU>{});
   }
   /**
    * Allgatherv over a trainer communicator;
@@ -408,8 +408,8 @@ class lbann_comm {
    * std::vector<T> &data must be correctly sized prior to entry.
    */
   template <typename T>
-  void all_gather(T &src, std::vector<T> &data, El::mpi::Comm c) {
-    El::mpi::AllGather(&src, 1, data.data(), 1, std::move(c),
+  void all_gather(T &src, std::vector<T> &data, const El::mpi::Comm& c) {
+    El::mpi::AllGather(&src, 1, data.data(), 1, c,
                        El::SyncInfo<El::Device::CPU>{});
   }
   /**
@@ -479,68 +479,68 @@ class lbann_comm {
   }
   /** Scalar gather (for non-root processes). */
   template <typename T>
-  void gather(T snd, int root, El::mpi::Comm c) {
+  void gather(T snd, int root, const El::mpi::Comm& c) {
     bytes_sent += sizeof(T);
-    El::mpi::Gather(&snd, 1, (T*) nullptr, 0, root, std::move(c),
+    El::mpi::Gather(&snd, 1, (T*) nullptr, 0, root, c,
                     El::SyncInfo<El::Device::CPU>{});
   }
   /** Scalar gather (for root processes). */
   template <typename T>
-  void gather(T snd, T *rcv, El::mpi::Comm c) {
+  void gather(T snd, T *rcv, const El::mpi::Comm& c) {
     auto const size_c = El::mpi::Size(c);
     auto const rank_c = El::mpi::Rank(c);
-    El::mpi::Gather(&snd, 1, rcv, 1, rank_c, std::move(c),
+    El::mpi::Gather(&snd, 1, rcv, 1, rank_c, c,
                     El::SyncInfo<El::Device::CPU>{});
     bytes_received += sizeof(T) * (size_c - 1);
   }
   /** Scalar gather (for root processes). */
   template <typename T>
-  void gather(T snd, std::vector<T>& rcv, El::mpi::Comm c) {
-    gather(snd, rcv.data(), std::move(c));
+  void gather(T snd, std::vector<T>& rcv, const El::mpi::Comm& c) {
+    gather(snd, rcv.data(), c);
   }
   /** Scalar-array gather (for non-root processes). */
   template <typename T>
-  void gather(T *snd, int count, int root, El::mpi::Comm c)
+  void gather(T *snd, int count, int root, const El::mpi::Comm& c)
   {
-    gather(snd, count, root, std::move(c),
+    gather(snd, count, root, c,
            El::SyncInfo<El::Device::CPU>{});
   }
   template <typename T, El::Device D>
-  void gather(T *snd, int count, int root, El::mpi::Comm c,
+  void gather(T *snd, int count, int root, const El::mpi::Comm& c,
               El::SyncInfo<D> const& syncInfo) {
     bytes_sent += sizeof(T) * count;
-    El::mpi::Gather(snd, count, (T*) nullptr, 0, root, std::move(c),
+    El::mpi::Gather(snd, count, (T*) nullptr, 0, root, c,
                     syncInfo);
   }
   /** Scalar-array gather (for root processes). */
   template <typename T>
-  void gather(T *snd, int count, T *rcv, El::mpi::Comm c) {
-      gather(snd, count, rcv, std::move(c), El::SyncInfo<El::Device::CPU>{});
+  void gather(T *snd, int count, T *rcv, const El::mpi::Comm& c) {
+      gather(snd, count, rcv, c, El::SyncInfo<El::Device::CPU>{});
   }
   template <typename T, El::Device D>
-  void gather(T *snd, int count, T *rcv, El::mpi::Comm c,
+  void gather(T *snd, int count, T *rcv, const El::mpi::Comm& c,
               El::SyncInfo<D> const& syncInfo) {
     auto const size_c = El::mpi::Size(c);
     auto const rank_c = El::mpi::Rank(c);
-    El::mpi::Gather(snd, count, rcv, count, rank_c, std::move(c), syncInfo);
+    El::mpi::Gather(snd, count, rcv, count, rank_c, c, syncInfo);
     bytes_received += sizeof(T) * count * (size_c - 1);
   }
   /** Scalar scatter (for non-root processes). */
   template <typename T>
-  T scatter(int root, El::mpi::Comm c) {
+  T scatter(int root, const El::mpi::Comm& c) {
     T val = {};
-    El::mpi::Scatter((T*) nullptr, 1, &val, 1, root, std::move(c),
+    El::mpi::Scatter((T*) nullptr, 1, &val, 1, root, c,
                      El::SyncInfo<El::Device::CPU>{});
     bytes_received += sizeof(T);
     return val;
   }
   /** Scalar scatter (for root processes). */
   template <typename T>
-  T scatter(T *snd, El::mpi::Comm c) {
+  T scatter(T *snd, const El::mpi::Comm& c) {
     bytes_sent += sizeof(T) * (El::mpi::Size(c) - 1);
     T val = {};
     auto root = El::mpi::Rank(c);
-    El::mpi::Scatter(snd, 1, &val, 1, root, std::move(c),
+    El::mpi::Scatter(snd, 1, &val, 1, root, c,
                      El::SyncInfo<El::Device::CPU>{});
     return val;
   }
@@ -576,18 +576,18 @@ class lbann_comm {
   }
   /** Scalar reduce (for non-root processes). */
   template <typename T>
-  void reduce(T snd, int root, El::mpi::Comm c, El::mpi::Op op = El::mpi::SUM) {
+  void reduce(T snd, int root, const El::mpi::Comm& c, El::mpi::Op op = El::mpi::SUM) {
     bytes_sent += sizeof(T);
-    El::mpi::Reduce(&snd, (T*) NULL, 1, op, root, std::move(c),
+    El::mpi::Reduce(&snd, (T*) NULL, 1, op, root, c,
                     El::SyncInfo<El::Device::CPU>{});
   }
   /** Scalar reduce (for root processes). */
   template <typename T>
-  T reduce(T snd, El::mpi::Comm c, El::mpi::Op op = El::mpi::SUM) {
+  T reduce(T snd, const El::mpi::Comm& c, El::mpi::Op op = El::mpi::SUM) {
     T val = {};
     auto const size_c = El::mpi::Size(c);
     auto const rank_c = El::mpi::Rank(c);
-    El::mpi::Reduce(&snd, &val, 1, op, rank_c, std::move(c),
+    El::mpi::Reduce(&snd, &val, 1, op, rank_c, c,
                     El::SyncInfo<El::Device::CPU>{});
     bytes_received += sizeof(T) * (size_c - 1);
     return val;
@@ -596,44 +596,44 @@ class lbann_comm {
   /** Scalar-array reduce (for non-root processes). */
   // Op is "SUM"
   template <typename T>
-  void reduce(T *snd, int count, int root, El::mpi::Comm c) {
-    reduce(snd, count, root, std::move(c), El::mpi::SUM,
+  void reduce(T *snd, int count, int root, const El::mpi::Comm& c) {
+    reduce(snd, count, root, c, El::mpi::SUM,
            El::SyncInfo<El::Device::CPU>{});
   }
   template <typename T, El::Device D>
-  void reduce(T *snd, int count, int root, El::mpi::Comm c, El::SyncInfo<D> const& syncInfo) {
-    reduce(snd, count, root, std::move(c), El::mpi::SUM, syncInfo);
+  void reduce(T *snd, int count, int root, const El::mpi::Comm& c, El::SyncInfo<D> const& syncInfo) {
+    reduce(snd, count, root, c, El::mpi::SUM, syncInfo);
   }
 
   template <typename T>
-  void reduce(T *snd, int count, int root, El::mpi::Comm c, El::mpi::Op op) {
-    reduce(snd, count, root, std::move(c), op, El::SyncInfo<El::Device::CPU>{});
+  void reduce(T *snd, int count, int root, const El::mpi::Comm& c, El::mpi::Op op) {
+    reduce(snd, count, root, c, op, El::SyncInfo<El::Device::CPU>{});
   }
   template <typename T, El::Device D>
-  void reduce(T *snd, int count, int root, El::mpi::Comm c, El::mpi::Op op, El::SyncInfo<D> const& syncInfo) {
+  void reduce(T *snd, int count, int root, const El::mpi::Comm& c, El::mpi::Op op, El::SyncInfo<D> const& syncInfo) {
     bytes_sent += sizeof(T) * count;
-    El::mpi::Reduce(snd, (T*) NULL, count, op, root, std::move(c), syncInfo);
+    El::mpi::Reduce(snd, (T*) NULL, count, op, root, c, syncInfo);
   }
   /** Scalar-array reduce (for root processes). */
   template <typename T, El::Device D>
-  void reduce(T *snd, int count, T *rcv, El::mpi::Comm c, El::SyncInfo<D> const& syncInfo) {
-    reduce(snd, count, rcv, std::move(c), El::mpi::SUM, syncInfo);
+  void reduce(T *snd, int count, T *rcv, const El::mpi::Comm& c, El::SyncInfo<D> const& syncInfo) {
+    reduce(snd, count, rcv, c, El::mpi::SUM, syncInfo);
   }
   template <typename T>
-  void reduce(T *snd, int count, T *rcv, El::mpi::Comm c) {
-    reduce(snd, count, rcv, std::move(c), El::mpi::SUM, El::SyncInfo<El::Device::CPU>{});
+  void reduce(T *snd, int count, T *rcv, const El::mpi::Comm& c) {
+    reduce(snd, count, rcv, c, El::mpi::SUM, El::SyncInfo<El::Device::CPU>{});
   }
 
   template <typename T>
-  void reduce(T *snd, int count, T *rcv, El::mpi::Comm c, El::mpi::Op op) {
-      reduce(snd, count, rcv, std::move(c), op, El::SyncInfo<El::Device::CPU>{});
+  void reduce(T *snd, int count, T *rcv, const El::mpi::Comm& c, El::mpi::Op op) {
+      reduce(snd, count, rcv, c, op, El::SyncInfo<El::Device::CPU>{});
   }
   template <typename T, El::Device D>
-  void reduce(T *snd, int count, T *rcv, El::mpi::Comm c, El::mpi::Op op, El::SyncInfo<D> const& syncInfo) {
+  void reduce(T *snd, int count, T *rcv, const El::mpi::Comm& c, El::mpi::Op op, El::SyncInfo<D> const& syncInfo) {
       if (snd == rcv) { snd = (T*)MPI_IN_PLACE; }
     auto const rank_c = El::mpi::Rank(c);
     auto const size_c = El::mpi::Size(c);
-    El::mpi::Reduce(snd, rcv, count, op, rank_c, std::move(c), syncInfo);
+    El::mpi::Reduce(snd, rcv, count, op, rank_c, c, syncInfo);
     bytes_received += sizeof(T) * count * (size_c - 1);
   }
   /** Inter-trainer all-reduce. */
@@ -653,10 +653,10 @@ class lbann_comm {
   }
   /** Scalar allreduce. */
   template <typename T>
-  T allreduce(T snd, El::mpi::Comm c, El::mpi::Op op = El::mpi::SUM) {
+  T allreduce(T snd, const El::mpi::Comm& c, El::mpi::Op op = El::mpi::SUM) {
     auto const size_c = El::mpi::Size(c);
     bytes_sent += sizeof(T);
-    allreduce(&snd, 1, std::move(c), op);
+    allreduce(&snd, 1, c, op);
     bytes_received += sizeof(T) * (size_c - 1);
     return snd;
   }
@@ -665,7 +665,7 @@ class lbann_comm {
   // assuming this is intended as a CPU-only call.
   /** Scalar-array allreduce. */
   template <typename T>
-  void allreduce(T *snd, int count, T *rcv, El::mpi::Comm c, El::mpi::Op op = El::mpi::SUM) {
+  void allreduce(T *snd, int count, T *rcv, const El::mpi::Comm& c, El::mpi::Op op = El::mpi::SUM) {
     auto const size_c = El::mpi::Size(c);
     bytes_sent += count * sizeof(T);
 #ifdef LBANN_HAS_ALUMINUM
@@ -675,16 +675,16 @@ class lbann_comm {
     ::Al::MPIAllreduceAlgorithm algo = ::Al::MPIAllreduceAlgorithm::automatic;
 #endif
     ::Al::Allreduce<::Al::MPIBackend>(
-      snd, rcv, count, mpi_op_to_al_op(op), c.template GetComm<::Al::MPIBackend>(), algo);
+        snd, rcv, count, mpi_op_to_al_op(op), c.template GetComm<::Al::MPIBackend>(El::SyncInfo<El::Device::CPU>{}), algo);
 #else
-    El::mpi::AllReduce(snd, rcv, count, op, std::move(c),
+    El::mpi::AllReduce(snd, rcv, count, op, c,
                        El::SyncInfo<El::Device::CPU>{});
 #endif
     bytes_received += count * sizeof(T) * (size_c - 1);
   }
   /** In-place scalar-array allreduce. */
   template <typename T>
-  void allreduce(T *data, int count, El::mpi::Comm c, El::mpi::Op op = El::mpi::SUM) {
+  void allreduce(T *data, int count, const El::mpi::Comm& c, El::mpi::Op op = El::mpi::SUM) {
     auto const size_c = El::mpi::Size(c);
     bytes_sent += count * sizeof(T);
 #ifdef LBANN_HAS_ALUMINUM
@@ -694,27 +694,27 @@ class lbann_comm {
     ::Al::MPIAllreduceAlgorithm algo = ::Al::MPIAllreduceAlgorithm::automatic;
 #endif
     ::Al::Allreduce<::Al::MPIBackend>(
-      data, count, mpi_op_to_al_op(op), c.template GetComm<::Al::MPIBackend>(), algo);
+      data, count, mpi_op_to_al_op(op), c.template GetComm<::Al::MPIBackend>(El::SyncInfo<El::Device::CPU>{}), algo);
 #else
-    El::mpi::AllReduce(data, count, op, std::move(c),
+    El::mpi::AllReduce(data, count, op, c,
                        El::SyncInfo<El::Device::CPU>{});
 #endif
     bytes_received += count * sizeof(T) * (size_c - 1);
   }
   /** Matrix allreduce. */
   void allreduce(AbsMat& m,
-                 El::mpi::Comm c,
+                 const El::mpi::Comm& c,
                  El::mpi::Op op = El::mpi::SUM);
   /** Matrix allreduce. */
   void allreduce(AbsDistMat& m,
-                 El::mpi::Comm c,
+                 const El::mpi::Comm& c,
                  El::mpi::Op op = El::mpi::SUM);
   /** Non-blocking matrix allreduce.
    *  If LBANN has not been built with Aluminum, then this calls a
    *  blocking matrix allreduce.
    */
   void nb_allreduce(AbsMat& m,
-                    El::mpi::Comm c,
+                    const El::mpi::Comm& c,
                     Al::request& req,
                     El::mpi::Op op = El::mpi::SUM);
   /** Non-blocking matrix allreduce.
@@ -722,7 +722,7 @@ class lbann_comm {
    *  blocking matrix allreduce.
    */
   void nb_allreduce(AbsDistMat& m,
-                    El::mpi::Comm c,
+                    const El::mpi::Comm& c,
                     Al::request& req,
                     El::mpi::Op op = El::mpi::SUM);
   /** Non-blocking in-place scalar-array allreduce.
@@ -731,16 +731,16 @@ class lbann_comm {
    *  This currently only supports host pointers (i.e. the MPI backend).
    */
   template <typename T>
-  void nb_allreduce(T *data, int count, El::mpi::Comm c, Al::request& req,
+  void nb_allreduce(T *data, int count, const El::mpi::Comm& c, Al::request& req,
                     El::mpi::Op op = El::mpi::SUM) {
 #ifdef LBANN_HAS_ALUMINUM
     bytes_sent += count * sizeof(T);
     req.mpi_req = Al::mpi_null_req;
     ::Al::NonblockingAllreduce<::Al::MPIBackend>(
-      data, count, mpi_op_to_al_op(op), c.template GetComm<::Al::MPIBackend>(), req.mpi_req);
+      data, count, mpi_op_to_al_op(op), c.template GetComm<::Al::MPIBackend>(El::SyncInfo<El::Device::CPU>{}), req.mpi_req);
     bytes_received += count * sizeof(T) * (El::mpi::Size(c) - 1);
 #else
-    allreduce(data, count, std::move(c), op);
+    allreduce(data, count, c, op);
 #endif  // LBANN_HAS_ALUMINUM
   }
 
@@ -768,7 +768,7 @@ class lbann_comm {
   /** Barrier among all processes. */
   void global_barrier();
   /** Barrier on an arbitrary communicator. */
-  void barrier(const El::mpi::Comm c);
+  void barrier(const El::mpi::Comm& c);
 
   /** Send a buffer to rank in trainer. */
   template <typename T>
@@ -802,9 +802,9 @@ class lbann_comm {
   }
   template <typename T>
   void nb_tagged_send(const T *data, int count, int rank, int tag,
-               El::mpi::Request<T>& req, El::mpi::Comm c) {
+               El::mpi::Request<T>& req, const El::mpi::Comm& c) {
     bytes_sent += sizeof(T) * count;
-    El::mpi::TaggedISend(data, count, rank, tag, std::move(c), req);
+    El::mpi::TaggedISend(data, count, rank, tag, c, req);
   }
   template <typename T> void nb_send(const T *data, int count, int trainer,
                                      El::mpi::Request<T>& req) {
@@ -866,8 +866,8 @@ class lbann_comm {
   }
   template <typename T> void nb_tagged_recv(
                T *data, int count, int rank, int tag,
-               El::mpi::Request<T>& req, El::mpi::Comm c) {
-    El::mpi::TaggedIRecv(data, count, rank, tag, std::move(c), req);
+               El::mpi::Request<T>& req, const El::mpi::Comm& c) {
+    El::mpi::TaggedIRecv(data, count, rank, tag, c, req);
     bytes_received += sizeof(T) * count;
   }
 
@@ -979,27 +979,27 @@ class lbann_comm {
   }
 
   /** Return the intertrainer communicator. */
-  El::mpi::Comm get_intertrainer_comm() const {
+  const El::mpi::Comm& get_intertrainer_comm() const {
     return intertrainer_comm;
   }
 
   /** Return the trainer communicator. */
-  El::mpi::Comm get_trainer_comm() const {
+  const El::mpi::Comm& get_trainer_comm() const {
     return trainer_comm;
   }
 
   /** Return the world communicator. */
-  const El::mpi::Comm get_world_comm() const {
+  const El::mpi::Comm& get_world_comm() const {
     return world_comm;
   }
 
   /** Return the communicator for this node. */
-  const El::mpi::Comm get_node_comm() const {
+  const El::mpi::Comm& get_node_comm() const {
     return node_comm;
   }
 
   /** Return true if rank (in comm) is on the local node. */
-  bool is_rank_node_local(int rank, const El::mpi::Comm comm) const {
+  bool is_rank_node_local(int rank, const El::mpi::Comm& comm) const {
     // Translating to COMM_WORLD is typically constant time.
     int world_rank = El::mpi::Translate(comm, rank, get_world_comm());
     return is_world_rank_on_node(world_rank);
@@ -1068,43 +1068,43 @@ class lbann_comm {
 };
 
 template <typename T, bool S>
-void lbann_comm::broadcast(int root, T& val, El::mpi::Comm c) {
+void lbann_comm::broadcast(int root, T& val, const El::mpi::Comm& c) {
   auto const rank_c = El::mpi::Rank(c);
   if (S) {
     // Avoid linking error from uninstantiated El::mpi routine if !S by converting T to El::byte
     using TT = typename interpret_as_byte_if_needed<S, T>::type;
-    broadcast_native<TT>(root, reinterpret_cast<TT&>(val), std::move(c));
+    broadcast_native<TT>(root, reinterpret_cast<TT&>(val), c);
   } else {
-    broadcast_custom(root, val, std::move(c));
+    broadcast_custom(root, val, c);
   }
   count_bytes_broadcast(sizeof(T), rank_c, root);
 }
 
 template <typename T>
-void lbann_comm::broadcast_native(int root, T& val, El::mpi::Comm c) const {
-  El::mpi::Broadcast(val, root, std::move(c), El::SyncInfo<El::Device::CPU>{});
+void lbann_comm::broadcast_native(int root, T& val, const El::mpi::Comm& c) const {
+  El::mpi::Broadcast(val, root, c, El::SyncInfo<El::Device::CPU>{});
 }
 
 template <typename T>
-void lbann_comm::broadcast_custom(int root, T& val, El::mpi::Comm c) const {
+void lbann_comm::broadcast_custom(int root, T& val, const El::mpi::Comm& c) const {
  const int bytes =  static_cast<int>(sizeof(T));
- El::mpi::Broadcast<El::byte>(reinterpret_cast<El::byte*>(&val), bytes, root, std::move(c),
+ El::mpi::Broadcast<El::byte>(reinterpret_cast<El::byte*>(&val), bytes, root, c,
                               El::SyncInfo<El::Device::CPU>{});
 }
 
 template <typename T, El::Device D, bool S>
-void lbann_comm::broadcast(const int root, T* data, const int count, El::mpi::Comm c, El::SyncInfo<D> const& syncInfo) {
+void lbann_comm::broadcast(const int root, T* data, const int count, const El::mpi::Comm& c, El::SyncInfo<D> const& syncInfo) {
   auto const rank_c = El::mpi::Rank(c);
   const int size = static_cast<int>(S? count : sizeof(T)*count);
   // Avoid linking error from uninstantiated El::mpi routine if !S by converting T to El::byte
   using TT = typename interpret_as_byte_if_needed<S, T>::type;
-  El::mpi::Broadcast<TT>(reinterpret_cast<TT*>(data), size, root, std::move(c), syncInfo);
+  El::mpi::Broadcast<TT>(reinterpret_cast<TT*>(data), size, root, c, syncInfo);
   count_bytes_broadcast(sizeof(T)*count, rank_c, root);
 }
 
 /// Broadcast std::string over an arbitrary communicator.
 template<>
-void lbann_comm::broadcast<std::string>(const int root, std::string& str, El::mpi::Comm c);
+void lbann_comm::broadcast<std::string>(const int root, std::string& str, const El::mpi::Comm& c);
 
 /** Get the current rank within MPI_COMM_WORLD.
  *  This function is safe to call even if MPI has not initialized or
diff --git a/include/lbann/data_readers/CMakeLists.txt b/include/lbann/data_readers/CMakeLists.txt
index 425f7e3672a..f6d513de63a 100644
--- a/include/lbann/data_readers/CMakeLists.txt
+++ b/include/lbann/data_readers/CMakeLists.txt
@@ -24,13 +24,15 @@ set_full_path(THIS_DIR_HEADERS
   data_reader_moving_mnist.hpp
   data_reader_nci.hpp
   data_reader_numpy.hpp
+  data_reader_numpy_npz.hpp
   data_reader_pilot2_molecular.hpp
+  data_reader_python.hpp
   data_reader_synthetic.hpp
   image_preprocessor.hpp
   image_utils.hpp
-  lbann_data_generator.hpp
   opencv.hpp
   opencv_extensions.hpp
+  data_reader_multihead_siamese.hpp
   )
 
 # Add the subdirectories
diff --git a/include/lbann/data_readers/compound_data_reader.hpp b/include/lbann/data_readers/compound_data_reader.hpp
index 0a62527e973..e23d24c1f04 100644
--- a/include/lbann/data_readers/compound_data_reader.hpp
+++ b/include/lbann/data_readers/compound_data_reader.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/cv_augmenter.hpp b/include/lbann/data_readers/cv_augmenter.hpp
index cf779bf4631..ba584ab18fe 100644
--- a/include/lbann/data_readers/cv_augmenter.hpp
+++ b/include/lbann/data_readers/cv_augmenter.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/cv_colorizer.hpp b/include/lbann/data_readers/cv_colorizer.hpp
index b342ad082a5..7d667f9cca5 100644
--- a/include/lbann/data_readers/cv_colorizer.hpp
+++ b/include/lbann/data_readers/cv_colorizer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/cv_cropper.hpp b/include/lbann/data_readers/cv_cropper.hpp
index 25a25f0c7ba..651e7945d5b 100644
--- a/include/lbann/data_readers/cv_cropper.hpp
+++ b/include/lbann/data_readers/cv_cropper.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/cv_decolorizer.hpp b/include/lbann/data_readers/cv_decolorizer.hpp
index 62f7c9dc45a..18e09aea0cf 100644
--- a/include/lbann/data_readers/cv_decolorizer.hpp
+++ b/include/lbann/data_readers/cv_decolorizer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/cv_mean_extractor.hpp b/include/lbann/data_readers/cv_mean_extractor.hpp
index f0e37d1c79b..eef53a0afa5 100644
--- a/include/lbann/data_readers/cv_mean_extractor.hpp
+++ b/include/lbann/data_readers/cv_mean_extractor.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/cv_normalizer.hpp b/include/lbann/data_readers/cv_normalizer.hpp
index f5d5d00591b..dfaf2954f89 100644
--- a/include/lbann/data_readers/cv_normalizer.hpp
+++ b/include/lbann/data_readers/cv_normalizer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/cv_process.hpp b/include/lbann/data_readers/cv_process.hpp
index ee49155ad2c..ffc315016a4 100644
--- a/include/lbann/data_readers/cv_process.hpp
+++ b/include/lbann/data_readers/cv_process.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/cv_process_patches.hpp b/include/lbann/data_readers/cv_process_patches.hpp
index 4e38966425a..b9c52ff955a 100644
--- a/include/lbann/data_readers/cv_process_patches.hpp
+++ b/include/lbann/data_readers/cv_process_patches.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/cv_resizer.hpp b/include/lbann/data_readers/cv_resizer.hpp
index 26c1f7ed2e3..69555897d2c 100644
--- a/include/lbann/data_readers/cv_resizer.hpp
+++ b/include/lbann/data_readers/cv_resizer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/cv_subtractor.hpp b/include/lbann/data_readers/cv_subtractor.hpp
index b10358d7c46..169181c4576 100644
--- a/include/lbann/data_readers/cv_subtractor.hpp
+++ b/include/lbann/data_readers/cv_subtractor.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/cv_transform.hpp b/include/lbann/data_readers/cv_transform.hpp
index 94e17e8d1b7..72455fc8907 100644
--- a/include/lbann/data_readers/cv_transform.hpp
+++ b/include/lbann/data_readers/cv_transform.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/cv_utils.hpp b/include/lbann/data_readers/cv_utils.hpp
index 0b1dc6c0d09..fdac1bc77e3 100644
--- a/include/lbann/data_readers/cv_utils.hpp
+++ b/include/lbann/data_readers/cv_utils.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/data_reader.hpp b/include/lbann/data_readers/data_reader.hpp
index f05fee6e69b..b70c576c376 100644
--- a/include/lbann/data_readers/data_reader.hpp
+++ b/include/lbann/data_readers/data_reader.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -43,6 +43,7 @@
 #include <string>
 #include <vector>
 #include <unistd.h>
+#include <unordered_set>
 
 
 #define NOT_IMPLEMENTED(n) { \
@@ -52,7 +53,7 @@
 
 namespace lbann {
 
-class generic_data_store;
+class data_store_conduit;
 class model;
 
 /**
@@ -90,8 +91,6 @@ class generic_data_reader : public lbann_image_preprocessor {
     m_shuffle(shuffle), m_absolute_sample_count(0), m_validation_percent(0.0),
     m_use_percent(1.0),
     m_master(false),
-    m_save_minibatch_indices(false),
-    m_compound_rank(0),
     m_gan_labelling(false), //default, not GAN
     m_gan_label_value(0),  //If GAN, default for fake label, discriminator model
     m_is_partitioned(false),
@@ -372,9 +371,7 @@ class generic_data_reader : public lbann_image_preprocessor {
             && (m_current_mini_batch_idx == 0));
   }
   /// Set the mini batch size
-  void set_mini_batch_size(const int s) {
-    m_mini_batch_size = s;
-  }
+  void set_mini_batch_size(const int s);
   /// Get the mini batch size
   int get_mini_batch_size() const {
     return m_mini_batch_size;
@@ -532,6 +529,9 @@ class generic_data_reader : public lbann_image_preprocessor {
   int *get_unused_data() {
     return &m_unused_indices[0];
   }
+  const std::vector<int>& get_unused_indices() {
+    return m_unused_indices;
+  }
   /// Set the number of iterations in each epoch.
   void set_num_iterations_per_epoch(int num_iterations_per_epoch) {
     m_num_iterations_per_epoch = num_iterations_per_epoch;  /// @todo BVE FIXME merge this with alternate approach
@@ -684,39 +684,34 @@ class generic_data_reader : public lbann_image_preprocessor {
     m_current_mini_batch_idx = (int) header.current_mini_batch_idx;
   }
 
-  /// returns the data store
-  generic_data_store * get_data_store() const {
+  /// returns a const ref to the data store
+  virtual const data_store_conduit& get_data_store() const {
     if (m_data_store == nullptr) {
       LBANN_ERROR("m_data_store is nullptr");
     }
-    return m_data_store;
+    return *m_data_store;
   }
 
-  /// sets up a data_store.
-  virtual void setup_data_store(model *m);
-
-  /** This call changes the functionality of fetch_data(); when set,
-    * indices are added to m_my_minibatch_indices, but fetch_datum()
-    * is not called. This method is added to support data store functionality.
-    */
-  void set_save_minibatch_entries(bool b);
-
-  /// support of data store functionality
-  void init_minibatch();
-
-  /// support of data store functionality
-  const std::vector<std::vector<int> > & get_minibatch_indices() const {
-    return m_my_minibatch_indices;
+  data_store_conduit* get_data_store_ptr() const {
+    return m_data_store;
   }
 
-  /// support of data store functionality
-  int get_compound_rank() {
-    return m_compound_rank;
-  }
+  /// sets up a data_store; this is called from build_model_from_prototext()
+  /// in utils/lbann_library.cpp. This is a bit awkward: would like to call it
+  /// when we instantiate the data_store, but we don;t know the mini_batch_size
+  /// until later.
+  void setup_data_store(int mini_batch_size);
 
-  /// support of data store functionality
-  void set_compound_rank(int r) {
-    m_compound_rank = r;
+  void instantiate_data_store(const std::vector<int>& local_list_sizes = std::vector<int>());
+
+  // note: don't want to make this virtual, since then all derived classes
+  //       would have to override. But, this should only be called from within
+  //       derived classes where it makes sense to do so.
+  //       Once the sample_list class and file formats are generalized and
+  //       finalized, it should (may?) be possible to code a single
+  //       preload_data_store method.
+  virtual void preload_data_store() {
+    LBANN_ERROR("you should not be here");
   }
 
   void set_gan_labelling(bool has_gan_labelling) {
@@ -725,7 +720,7 @@ class generic_data_reader : public lbann_image_preprocessor {
   void set_gan_label_value(int gan_label_value) { m_gan_label_value = gan_label_value; }
 
   /// support of data store functionality
-  void set_data_store(generic_data_store *g);
+  void set_data_store(data_store_conduit *g);
 
   virtual bool data_store_active() const;
 
@@ -739,6 +734,9 @@ class generic_data_reader : public lbann_image_preprocessor {
 
  protected:
 
+  // For use with conduit when samples are corrupt.
+  mutable std::unordered_set<int> m_using_random_node;
+
   /**
    * Return the absolute number of data samples that will be used for training
    * or testing.
@@ -757,13 +755,11 @@ class generic_data_reader : public lbann_image_preprocessor {
    */
   double get_validation_percent() const;
 
-  int m_rank;
-
-  generic_data_store *m_data_store;
+  data_store_conduit *m_data_store;
 
   lbann_comm *m_comm;
 
-  bool fetch_data_block(CPUMat& X, El::Int thread_index, El::Int mb_size, El::Matrix<El::Int>& indices_fetched);
+  virtual bool fetch_data_block(CPUMat& X, El::Int thread_index, El::Int mb_size, El::Matrix<El::Int>& indices_fetched);
 
   /**
    * Fetch a single sample into a matrix.
@@ -869,16 +865,6 @@ class generic_data_reader : public lbann_image_preprocessor {
   friend class data_reader_merge_samples;
 
  protected :
-   /// added to support data store functionality
-   bool m_save_minibatch_indices;
-
-   /// added to support data store functionality
-   std::vector<std::vector<int> > m_my_minibatch_indices;
-
-   /// added to support data store functionality
-   int m_compound_rank;
-
-
   //var to support GAN
   bool m_gan_labelling; //boolean flag of whether its GAN binary label, default is false
   int m_gan_label_value; //zero(0) or 1 label value for discriminator, default is 0
diff --git a/include/lbann/data_readers/data_reader_ascii.hpp b/include/lbann/data_readers/data_reader_ascii.hpp
index 7e541fe5aef..09504b49397 100644
--- a/include/lbann/data_readers/data_reader_ascii.hpp
+++ b/include/lbann/data_readers/data_reader_ascii.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/data_reader_cifar10.hpp b/include/lbann/data_readers/data_reader_cifar10.hpp
index ef49fb07115..7c72975bf98 100644
--- a/include/lbann/data_readers/data_reader_cifar10.hpp
+++ b/include/lbann/data_readers/data_reader_cifar10.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/data_reader_csv.hpp b/include/lbann/data_readers/data_reader_csv.hpp
index ab03f66e08a..58c55885c68 100644
--- a/include/lbann/data_readers/data_reader_csv.hpp
+++ b/include/lbann/data_readers/data_reader_csv.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -129,9 +129,6 @@ class csv_reader : public generic_data_reader {
    */
   std::vector<DataType> fetch_line_label_response(int data_id);
 
-  /// sets up a data_store.
-  void setup_data_store(model *m) override;
-
  protected:
   /**
    * Fetch the data associated with data_id.
diff --git a/include/lbann/data_readers/data_reader_image.hpp b/include/lbann/data_readers/data_reader_image.hpp
index a7f6c1b269c..ba809f6547c 100644
--- a/include/lbann/data_readers/data_reader_image.hpp
+++ b/include/lbann/data_readers/data_reader_image.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/data_reader_imagenet.hpp b/include/lbann/data_readers/data_reader_imagenet.hpp
index 750df07948c..4d6484e24c4 100644
--- a/include/lbann/data_readers/data_reader_imagenet.hpp
+++ b/include/lbann/data_readers/data_reader_imagenet.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -55,9 +55,6 @@ class imagenet_reader : public image_data_reader {
   virtual CPUMat create_datum_view(CPUMat& X, const int mb_idx) const;
   bool fetch_datum(CPUMat& X, int data_id, int mb_idx) override;
 
-  /// sets up a data_store.
-  void setup_data_store(model *m) override;
-
  protected:
   /// preprocessor duplicated for each omp thread
   std::vector<std::unique_ptr<cv_process> > m_pps;
diff --git a/include/lbann/data_readers/data_reader_imagenet_patches.hpp b/include/lbann/data_readers/data_reader_imagenet_patches.hpp
index 8ec20390720..49539429fab 100644
--- a/include/lbann/data_readers/data_reader_imagenet_patches.hpp
+++ b/include/lbann/data_readers/data_reader_imagenet_patches.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -56,8 +56,6 @@ class imagenet_reader_patches : public image_data_reader {
     return {m_num_patches*m_image_num_channels, m_image_height, m_image_width};
   }
 
-  void setup_data_store(model *m) override;
-
  protected:
   void set_defaults() override;
   virtual bool replicate_processor(const cv_process_patches& pp, const int nthreads);
diff --git a/include/lbann/data_readers/data_reader_jag.hpp b/include/lbann/data_readers/data_reader_jag.hpp
index d7f7eae1cc7..c10daf0c9de 100644
--- a/include/lbann/data_readers/data_reader_jag.hpp
+++ b/include/lbann/data_readers/data_reader_jag.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/data_reader_jag_conduit.hpp b/include/lbann/data_readers/data_reader_jag_conduit.hpp
index ea6219435f5..0938fa79438 100644
--- a/include/lbann/data_readers/data_reader_jag_conduit.hpp
+++ b/include/lbann/data_readers/data_reader_jag_conduit.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -44,7 +44,7 @@
 
 namespace lbann {
 
-class data_store_jag;
+class data_store_conduit;
 
 /**
  * Loads JAG simulation parameters and results from hdf5 files using conduit interfaces
@@ -59,7 +59,7 @@ class data_reader_jag_conduit : public generic_data_reader {
   using sample_locator_t = std::pair<std::string, hid_t>;
   using sample_map_t = std::vector<sample_locator_t>; ///< valid sample map type
   using sample_t = sample_list_jag::sample_t;
-  using sample_id_t = sample_list_jag::sample_id_t;
+  using sample_file_id_t = sample_list_jag::sample_file_id_t;
   /// linear transform on X defined as: first * X + second => X'
   using linear_transform_t = std::pair<double, double>;
 
@@ -79,6 +79,7 @@ class data_reader_jag_conduit : public generic_data_reader {
   data_reader_jag_conduit(bool shuffle = true) = delete;
   data_reader_jag_conduit(const std::shared_ptr<cv_process>& pp, bool shuffle = true);
   data_reader_jag_conduit(const data_reader_jag_conduit&);
+  data_reader_jag_conduit(const data_reader_jag_conduit&, const std::vector<int>& ds_sample_move_list);
   data_reader_jag_conduit& operator=(const data_reader_jag_conduit&);
   ~data_reader_jag_conduit() override;
   data_reader_jag_conduit* copy() const override { return new data_reader_jag_conduit(*this); }
@@ -235,10 +236,7 @@ class data_reader_jag_conduit : public generic_data_reader {
 
   void save_image(Mat& pixels, const std::string filename, bool do_scale = true) override;
 
-#ifndef _JAG_OFFLINE_TOOL_MODE_
-  /// sets up a data_store.
-  void setup_data_store(model *m) override;
-#endif // _JAG_OFFLINE_TOOL_MODE_
+  void setup_data_store(int mini_batch_size);
 
   /// A untiliy function to convert the pointer to image data into an opencv image
   static cv::Mat cast_to_cvMat(const std::pair<size_t, const ch_t*> img,
@@ -257,12 +255,15 @@ class data_reader_jag_conduit : public generic_data_reader {
   void add_input_normalization_param(const linear_transform_t& t);
 
  protected:
-  data_store_jag *m_jag_store;
+
+  /// once the sample_list class and file formats are generalized and
+  /// finalized, it should (may?) be possible to code a single
+  /// preload_data_store method.
+  void preload_data_store() override;
 
   virtual void set_defaults();
   virtual bool replicate_processor(const cv_process& pp, const int nthreads);
-  virtual void copy_members(const data_reader_jag_conduit& rhs);
-
+  virtual void copy_members(const data_reader_jag_conduit& rhs, const std::vector<int>& ds_sample_move_list = std::vector<int>());
 
   /// add data type for independent variable
   void add_independent_variable_type(const variable_t independent);
@@ -351,7 +352,9 @@ class data_reader_jag_conduit : public generic_data_reader {
   /// Allow const access to the conduit data structure
   static const conduit::Node& get_conduit_node(const conduit::Node& n_base, const std::string key);
   /** Load the conduit node with the data of the sample i identified by key
-   *  from the file that contains the sample.
+   *  from the file that contains the sample, and returm true. Upon failure
+   *  to load from file, attempt to retrieve a random conduit node from
+   *  the data_store (if --use_data_store) and return false.
    */
   bool load_conduit_node(const size_t i, const std::string& key, conduit::Node& node) const;
   /// Check if a key exist for sample i
@@ -362,12 +365,12 @@ class data_reader_jag_conduit : public generic_data_reader {
 
   bool data_store_active() const {
     bool flag = generic_data_reader::data_store_active();
-    return (m_jag_store != nullptr && flag);
+    return (m_data_store != nullptr && flag);
   }
 
   bool priming_data_store() const {
     bool flag = generic_data_reader::priming_data_store();
-    return (m_jag_store != nullptr && flag);
+    return (m_data_store != nullptr && flag);
   }
 
  protected:
diff --git a/include/lbann/data_readers/data_reader_jag_conduit_hdf5.hpp b/include/lbann/data_readers/data_reader_jag_conduit_hdf5.hpp
deleted file mode 100644
index af27eda86d8..00000000000
--- a/include/lbann/data_readers/data_reader_jag_conduit_hdf5.hpp
+++ /dev/null
@@ -1,214 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef _DATA_READER_JAG_CONDUIT_HDF5_HPP_
-#define _DATA_READER_JAG_CONDUIT_HDF5_HPP_
-
-#include "lbann_config.hpp" // may define LBANN_HAS_CONDUIT
-
-#ifdef LBANN_HAS_CONDUIT
-#include "lbann/data_readers/opencv.hpp"
-#include "data_reader.hpp"
-#include "conduit/conduit.hpp"
-#include "conduit/conduit_relay.hpp"
-#include "lbann/data_readers/cv_process.hpp"
-#include <string>
-#include <set>
-#include <unordered_map>
-
-namespace lbann {
-
-class jag_store;
-
-/**
- * Loads the pairs of JAG simulation inputs and results from a conduit-wrapped hdf5 file
- */
-class data_reader_jag_conduit_hdf5 : public generic_data_reader {
- public:
-  using ch_t = float; ///< jag output image channel type
-  using scalar_t = double; ///< jag scalar output type
-  using input_t = double; ///< jag input parameter type
-
-  /**
-   * Dependent/indepdendent variable types
-   * - JAG_Image: simulation output images
-   * - JAG_Scalar: simulation output scalars
-   * - JAG_Input: simulation input parameters
-   * - Undefined: the default
-   */
-  enum variable_t {Undefined=0, JAG_Image, JAG_Scalar, JAG_Input};
-  using TypeID = conduit::DataType::TypeID;
-
-  data_reader_jag_conduit_hdf5(bool shuffle = true) = delete;
-  data_reader_jag_conduit_hdf5(const std::shared_ptr<cv_process>& pp, bool shuffle = true);
-  data_reader_jag_conduit_hdf5(const data_reader_jag_conduit_hdf5&);
-  data_reader_jag_conduit_hdf5& operator=(const data_reader_jag_conduit_hdf5&);
-  ~data_reader_jag_conduit_hdf5() override;
-  data_reader_jag_conduit_hdf5* copy() const override { return new data_reader_jag_conduit_hdf5(*this); }
-
-  std::string get_type() const override {
-    return "data_reader_jag_conduit_hdf5";
-  }
-
-  /// Load data and do data reader's chores.
-  void load() override;
-
-  void setup(int num_io_threads, std::shared_ptr<thread_pool> io_thread_pool) override;
-
-  /// Return the number of samples
-  size_t get_num_samples() const;
-
-  /// Return the number of measurement views
-  unsigned int get_num_img_srcs() const;
-  // Return the number of channels in an image
-  unsigned int get_num_channels() const;
-  /// Return the linearized size of an image;
-  size_t get_linearized_image_size() const;
-  /// Return the linearized size of one channel in the image
-  size_t get_linearized_channel_size() const;
-  /// Return the linearized size of scalar outputs
-  size_t get_linearized_scalar_size() const;
-  /// Return the linearized size of inputs
-  size_t get_linearized_input_size() const;
-
-  /// Return the total linearized size of data
-  int get_linearized_data_size() const override;
-  /// Return the total linearized size of response
-  int get_linearized_response_size() const override;
-  /// Return the per-source linearized sizes of composite data
-  std::vector<size_t> get_linearized_data_sizes() const;
-  /// Return the per-source linearized sizes of composite response
-  std::vector<size_t> get_linearized_response_sizes() const;
-
-  /// Return the dimension of data
-  const std::vector<int> get_data_dims() const override;
-
-  int get_num_labels() const override;
-  int get_linearized_label_size() const override;
-
-  /// Show the description
-  std::string get_description() const;
-
-  /// Return the image simulation output of the i-th sample
-  std::vector<cv::Mat> get_cv_images(const size_t i, int tid) const;
-
-  template<typename S>
-  static size_t add_val(const std::string key, const conduit::Node& n, std::vector<S>& vals);
-
-  /// sets up a data_store.
-  void setup_data_store(model *m) override;
-
-  /// A untiliy function to convert the pointer to image data into an opencv image
-  static cv::Mat cast_to_cvMat(const std::pair<size_t, const ch_t*> img, const int height);
-
-  void set_image_dims(const int width, const int height, const int ch=1);
-
-  void set_scalar_keys(const std::string &keys) { m_scalar_keys = keys; }
-  void set_input_keys(const std::string &keys) { m_input_keys = keys; }
-  void set_image_views(const std::string &views) { m_image_views = views; }
-  void set_image_channels(const std::string &channels) { m_image_channels = channels; }
-
-  void post_update() override;
-
- protected:
-
-  friend jag_store;
-
-  virtual void set_defaults();
-  virtual bool replicate_processor(const cv_process& pp, const int nthreads);
-  virtual void copy_members(const data_reader_jag_conduit_hdf5& rhs);
-
-  bool fetch_datum(CPUMat& X, int data_id, int mb_idx);
-
-  virtual std::vector<CPUMat>
-    create_datum_views(CPUMat& X, const std::vector<size_t>& sizes, const int mb_idx) const;
-
-  bool fetch_label(CPUMat& X, int data_id, int mb_idx) override;
-
-  bool fetch_response(CPUMat& X, int data_id, int mb_idx) override;
-
-  /// Check if the given sample id is valid
-  bool check_sample_id(const size_t i) const;
-
-  /// Choose the image closest to the bang time among those associated with the i-th sample
-  std::vector<int> choose_image_near_bang_time(const size_t i) const;
-
-  jag_store * get_jag_store() const { return m_jag_store; }
-
-  int m_image_width; ///< image width
-  int m_image_height; ///< image height
-  int m_image_num_channels; ///< number of image channels
-
-  /// Whether data have been loaded
-  bool m_is_data_loaded;
-
-  int m_num_labels; ///< number of labels
-
-  /// preprocessor duplicated for each omp thread
-  std::vector<std::unique_ptr<cv_process> > m_pps;
-  std::unique_ptr<cv_process> m_master_pps;
-
-  /// jag_store; replaces m_data
-  jag_store *m_jag_store;
-
-  bool m_owns_jag_store;
-
-  /**
-   * Set of keys that are associated with non_numerical values.
-   * Such a variable requires a specific method for mapping to a numeric value.
-   * When a key is found in the set, the variable is ignored. Therefore,
-   * when a conversion is defined for such a key, remove it from the set.
-   */
-  static const std::set<std::string> non_numeric_vars;
-
-  /**
-   * indicate if all the input variables are of the input_t type, in which case
-   * we can rely on a data extraction method with lower overhead.
-   */
-  bool m_uniform_input_type;
-
-  /**
-   * maps integers to sample IDs. In the future the sample IDs may
-   * not be integers; also, this map only includes sample IDs that
-   * have <sample_id>/performance/success = 1
-   */
-  std::unordered_map<int, std::string> m_success_map;
-
-  std::set<std::string> m_emi_selectors;
-
-  std::string m_scalar_keys;
-  std::string m_input_keys;
-  std::string m_image_views;
-  std::string m_image_channels;
-
-  data_reader_jag_conduit_hdf5* m_primary_reader;
-};
-
-
-
-} // end of namespace lbann
-#endif // LBANN_HAS_CONDUIT
-#endif // _DATA_READER_JAG_CONDUIT_HDF5_HPP_
diff --git a/include/lbann/data_readers/data_reader_merge_features.hpp b/include/lbann/data_readers/data_reader_merge_features.hpp
index d161d35c94e..1f410c96c89 100644
--- a/include/lbann/data_readers/data_reader_merge_features.hpp
+++ b/include/lbann/data_readers/data_reader_merge_features.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -70,9 +70,6 @@ class data_reader_merge_features : public generic_compound_data_reader {
     return {get_linearized_data_size()};
   }
 
-  /// sets up a data_store.
-  void setup_data_store(model *m) override;
-
  protected:
   bool fetch_datum(CPUMat& X, int data_id, int mb_idx) override;
   bool fetch_label(CPUMat& Y, int data_id, int mb_idx) override;
diff --git a/include/lbann/data_readers/data_reader_merge_samples.hpp b/include/lbann/data_readers/data_reader_merge_samples.hpp
index f09775b45d0..c691ddffa83 100644
--- a/include/lbann/data_readers/data_reader_merge_samples.hpp
+++ b/include/lbann/data_readers/data_reader_merge_samples.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -56,12 +56,16 @@ class data_reader_merge_samples : public generic_compound_data_reader {
   void load() override;
 
   int get_num_labels() const override { return m_data_readers[0]->get_num_labels(); }
+  int get_num_responses() const override { return m_data_readers[0]->get_num_responses(); }
   int get_linearized_data_size() const override {
     return m_data_readers[0]->get_linearized_data_size();
   }
   int get_linearized_label_size() const override {
     return m_data_readers[0]->get_linearized_label_size();
   }
+  int get_linearized_response_size() const override {
+    return m_data_readers[0]->get_linearized_response_size();
+  }
   const std::vector<int> get_data_dims() const override {
     return m_data_readers[0]->get_data_dims();
   }
@@ -71,9 +75,6 @@ class data_reader_merge_samples : public generic_compound_data_reader {
     return m_num_samples_psum;
   }
 
-  /// sets up a data_store.
-  void setup_data_store(model *m) override;
-
  protected:
   bool fetch_datum(CPUMat& X, int data_id, int mb_idx) override;
   bool fetch_label(CPUMat& Y, int data_id, int mb_idx) override;
@@ -82,10 +83,6 @@ class data_reader_merge_samples : public generic_compound_data_reader {
   /// Partial sums of the number of samples in each reader.
   std::vector<int> m_num_samples_psum;
 
-  /// support for data store functionality; load() will call
-  /// this method when using data store
-  void load_using_data_store();
-
   /// code common to both load() and load_using_data_store()
   void setup_indices(int num_samples);
 
diff --git a/include/lbann/data_readers/data_reader_mesh.hpp b/include/lbann/data_readers/data_reader_mesh.hpp
index 078fd452e39..eb1701d731f 100644
--- a/include/lbann/data_readers/data_reader_mesh.hpp
+++ b/include/lbann/data_readers/data_reader_mesh.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/data_reader_mnist.hpp b/include/lbann/data_readers/data_reader_mnist.hpp
index 7d9f2e46781..2d3b30e0ed6 100644
--- a/include/lbann/data_readers/data_reader_mnist.hpp
+++ b/include/lbann/data_readers/data_reader_mnist.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/data_reader_mnist_siamese.hpp b/include/lbann/data_readers/data_reader_mnist_siamese.hpp
index 6dd8ffbb6b5..4536e3cebad 100644
--- a/include/lbann/data_readers/data_reader_mnist_siamese.hpp
+++ b/include/lbann/data_readers/data_reader_mnist_siamese.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -81,9 +81,6 @@ class data_reader_mnist_siamese : public data_reader_multi_images {
   /// Fetch this mini-batch's labels into Y by calling the new overloaded fetch_label()
   int fetch_labels(CPUMat& Y) override;
 
-  /// sets up a data_store.
-  void setup_data_store(model *m) override;
-
  protected:
   /**
    * Set the default configuration such as the width, height, and number of
diff --git a/include/lbann/data_readers/data_reader_moving_mnist.hpp b/include/lbann/data_readers/data_reader_moving_mnist.hpp
index c234f45ab87..034bca57880 100644
--- a/include/lbann/data_readers/data_reader_moving_mnist.hpp
+++ b/include/lbann/data_readers/data_reader_moving_mnist.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/data_reader_multi_images.hpp b/include/lbann/data_readers/data_reader_multi_images.hpp
index 18ec3e3fdaa..93a2959bd7d 100644
--- a/include/lbann/data_readers/data_reader_multi_images.hpp
+++ b/include/lbann/data_readers/data_reader_multi_images.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -96,9 +96,6 @@ class data_reader_multi_images : public imagenet_reader {
     return m_num_img_srcs;
   }
 
-  /// sets up a data_store.
-  void setup_data_store(model *m) override;
-
  protected:
   void set_defaults() override;
   virtual std::vector<CPUMat> create_datum_views(CPUMat& X, const int mb_idx) const;
diff --git a/include/lbann/data_readers/data_reader_multihead_siamese.hpp b/include/lbann/data_readers/data_reader_multihead_siamese.hpp
new file mode 100644
index 00000000000..dc95f3cb7e8
--- /dev/null
+++ b/include/lbann/data_readers/data_reader_multihead_siamese.hpp
@@ -0,0 +1,94 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+//
+// data_reader_multihead_siamese .hpp .cpp - data reader to use m patches
+//                                 generated offline.
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef DATA_READER_MULTIHEAD_SIAMESE_HPP
+#define DATA_READER_MULTIHEAD_SIAMESE_HPP
+
+#include "data_reader_multi_images.hpp"
+#include "cv_process.hpp"
+#include "offline_patches_npz.hpp"
+#include <vector>
+#include <string>
+#include <utility>
+#include <iostream>
+
+namespace lbann {
+class data_reader_multihead_siamese : public data_reader_multi_images {
+ public:
+  using label_t = offline_patches_npz::label_t;
+  using sample_t = offline_patches_npz::sample_t;
+
+  data_reader_multihead_siamese(const std::shared_ptr<cv_process>& pp, unsigned int nimages, bool shuffle = true);
+  data_reader_multihead_siamese(const std::shared_ptr<cv_process>& pp, bool shuffle = true);
+
+  data_reader_multihead_siamese(const data_reader_multihead_siamese&);
+  data_reader_multihead_siamese& operator=(const data_reader_multihead_siamese&);
+  ~data_reader_multihead_siamese() override;
+
+  data_reader_multihead_siamese* copy() const override {
+    return new data_reader_multihead_siamese(*this);
+  }
+
+  std::string get_type() const override {
+    return "data_reader_multihead_siamese";
+  }
+
+  /** Set up imagenet specific input parameters
+   *  If argument is set to 0, then this method does not change the value of
+   *  the corresponding parameter. However, width and height can only be both
+   *  zero or both non-zero.
+   */
+  void set_input_params(const int width, const int height, const int num_ch,
+                        const int num_labels) override;
+
+  // dataset specific functions
+  void load() override;
+
+  /// Return the sample list of current minibatch
+  std::vector<sample_t> get_image_list_of_current_mb() const;
+
+  /// Allow read-only access to the entire sample list
+  std::vector<sample_t> get_image_list() const;
+
+  sample_t get_sample(size_t idx) const {
+    return m_samples.get_sample(idx);
+  }
+
+ protected:
+  void set_defaults() override;
+  bool fetch_datum(CPUMat& X, int data_id, int mb_idx) override;
+  bool fetch_label(CPUMat& Y, int data_id, int mb_idx) override;
+
+ protected:
+  offline_patches_npz m_samples;
+};
+
+}  // namespace lbann
+
+#endif  // DATA_READER_MULTIHEAD_SIAMESE_HPP
diff --git a/include/lbann/data_readers/data_reader_nci.hpp b/include/lbann/data_readers/data_reader_nci.hpp
index 5e4b5cf784e..3d452e8a372 100644
--- a/include/lbann/data_readers/data_reader_nci.hpp
+++ b/include/lbann/data_readers/data_reader_nci.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/data_reader_numpy.hpp b/include/lbann/data_readers/data_reader_numpy.hpp
index 3e2105c8291..800f9ca2727 100644
--- a/include/lbann/data_readers/data_reader_numpy.hpp
+++ b/include/lbann/data_readers/data_reader_numpy.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/data_reader_numpy_npz.hpp b/include/lbann/data_readers/data_reader_numpy_npz.hpp
new file mode 100644
index 00000000000..e8fb13f3bdd
--- /dev/null
+++ b/include/lbann/data_readers/data_reader_numpy_npz.hpp
@@ -0,0 +1,114 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+//
+// lbann_data_reader_numpy_npz .hpp .cpp - generic_data_reader class for numpy .npz dataset
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef LBANN_DATA_READER_NUMPY_NPZ_HPP
+#define LBANN_DATA_READER_NUMPY_NPZ_HPP
+
+#include "data_reader.hpp"
+#include "data_reader_numpy.hpp"
+#include <cnpy.h>
+
+namespace lbann {
+  /**
+   * Data reader for data stored in numpy (.npz) files.
+   * This assumes that the file contains "data", "labels" (optional),
+   * and "responses" (optional) whose the zero'th axis is the sample axis.
+   * float, double, int16 data-types is accepted for "data".
+   */
+  class numpy_npz_reader : public generic_data_reader {
+  public:
+    numpy_npz_reader(const bool shuffle);
+    // These need to be explicit because of some issue with the cnpy copy
+    // constructor/assignment operator not linking correctly otherwise.
+    numpy_npz_reader(const numpy_npz_reader&);
+    numpy_npz_reader& operator=(const numpy_npz_reader&);
+    ~numpy_npz_reader() override {}
+
+    numpy_npz_reader* copy() const override { return new numpy_npz_reader(*this); }
+
+    std::string get_type() const override {
+      return "numpy_npz_reader";
+    }
+
+    /// Set whether to fetch labels.
+    void set_has_labels(bool b) { m_has_labels = b; }
+    /// Set whether to fetch responses.
+    void set_has_responses(bool b) { m_has_responses = b; }
+    /// Set a scaling factor for int16 data.
+    void set_scaling_factor_int16(DataType s) { m_scaling_factor_int16 = s; }
+
+    void load() override;
+
+    int get_num_labels() const override { return m_num_labels; }
+    int get_num_responses() const override { return get_linearized_response_size(); }
+    int get_linearized_data_size() const override { return m_num_features; }
+    int get_linearized_label_size() const override { return m_num_labels; }
+    int get_linearized_response_size() const override { return m_num_response_features; }
+    const std::vector<int> get_data_dims() const override {
+      std::vector<int> dims(m_data.shape.begin() + 1,
+                            m_data.shape.end());
+      return dims;
+    }
+
+  protected:
+    bool fetch_datum(CPUMat& X, int data_id, int mb_idx) override;
+    bool fetch_label(CPUMat& Y, int data_id, int mb_idx) override;
+    bool fetch_response(CPUMat& Y, int data_id, int mb_idx) override;
+
+    /// Number of samples.
+    int m_num_samples = 0;
+    /// Number of features in each sample.
+    int m_num_features = 0;
+    /// Number of label classes.
+    int m_num_labels = 0;
+    /// Number of features in each response.
+    int m_num_response_features = 0;
+    /// Whether to fetch a label from the last column.
+    bool m_has_labels = true;
+    /// Whether to fetch a response from the last column.
+    bool m_has_responses = false;
+    /**
+     * Underlying numpy data.
+     * Note raw data is managed with shared smart pointer semantics (relevant
+     * for copying).
+     */
+    cnpy::NpyArray m_data, m_labels, m_responses;
+
+    // A constant to be multiplied when data is converted
+    // from int16 to DataType.
+    DataType m_scaling_factor_int16 = 1.0;
+
+  private:
+    // Keys to retrieve data, labels, responses from a given .npz file.
+    static const std::string NPZ_KEY_DATA, NPZ_KEY_LABELS, NPZ_KEY_RESPONSES;
+
+  };
+
+}  // namespace lbann
+
+#endif  // LBANN_DATA_READER_NUMPY_NPZ_HPP
diff --git a/include/lbann/data_readers/data_reader_numpy_npz_conduit.hpp b/include/lbann/data_readers/data_reader_numpy_npz_conduit.hpp
new file mode 100644
index 00000000000..7d7cd00bf93
--- /dev/null
+++ b/include/lbann/data_readers/data_reader_numpy_npz_conduit.hpp
@@ -0,0 +1,112 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef LBANN_DATA_READER_NUMPY_NPZ_CONDUIT_HPP
+#define LBANN_DATA_READER_NUMPY_NPZ_CONDUIT_HPP
+
+#include "lbann/data_readers/data_reader.hpp"
+#include <cnpy.h>
+
+namespace lbann {
+  /**
+   * Data reader for data stored in numpy (.npz) files that are encapsulated .
+   * in conduit::Nodes
+   */
+  class numpy_npz_conduit_reader : public generic_data_reader {
+ public:
+  numpy_npz_conduit_reader(const bool shuffle);
+  // These need to be explicit because of some issue with the cnpy copy
+  // constructor/assignment operator not linking correctly otherwise.
+  // dah -- ??
+  numpy_npz_conduit_reader(const numpy_npz_conduit_reader&);
+  numpy_npz_conduit_reader& operator=(const numpy_npz_conduit_reader&);
+  ~numpy_npz_conduit_reader() override {}
+
+  numpy_npz_conduit_reader* copy() const override { return new numpy_npz_conduit_reader(*this); }
+
+  void copy_members(const numpy_npz_conduit_reader& rhs);
+
+  std::string get_type() const override {
+    return "numpy_npz_conduit_reader";
+  }
+
+  /// Set whether to fetch labels.
+  void set_has_labels(bool b) { m_has_labels = b; }
+  /// Set whether to fetch responses.
+  void set_has_responses(bool b) { m_has_responses = b; }
+  /// Set a scaling factor for int16 data.
+  void set_scaling_factor_int16(DataType s) { m_scaling_factor_int16 = s; }
+
+  void load() override;
+
+  void set_num_labels(int n) { m_num_labels = n; }
+  int get_num_labels() const override { return m_num_labels; }
+  int get_num_responses() const override { return get_linearized_response_size(); }
+  int get_linearized_data_size() const override { return m_num_features; }
+  int get_linearized_label_size() const override { return m_num_labels; }
+  int get_linearized_response_size() const override { return m_num_response_features; }
+  const std::vector<int> get_data_dims() const override { return m_data_dims; }
+
+  protected:
+    void preload_data_store();
+
+    bool fetch_datum(CPUMat& X, int data_id, int mb_idx) override;
+    bool fetch_label(CPUMat& Y, int data_id, int mb_idx) override;
+    bool fetch_response(CPUMat& Y, int data_id, int mb_idx) override;
+
+    /// Number of samples.
+    int m_num_samples = 0;
+    /// Number of features in each sample.
+    int m_num_features = 0;
+    /// Number of label classes.
+    int m_num_labels = 0;
+    /// Number of features in each response.
+    int m_num_response_features = 0;
+    /// Whether to fetch a label from the last column.
+    bool m_has_labels = true;
+    /// Whether to fetch a response from the last column.
+    bool m_has_responses = true;
+
+    std::vector<int> m_data_dims;
+    int m_data_word_size = 0;
+    size_t m_response_word_size = 0;
+
+    // A constant to be multiplied when data is converted
+    // from int16 to DataType.
+    DataType m_scaling_factor_int16 = 1.0;
+
+    // fills in: m_num_samples, m_num_features, m_num_response_features,
+    // m_data_dims, m_data_word_size, m_response_word_size
+    void fill_in_metadata();
+
+    std::vector<std::string> m_filenames;
+  };
+
+}  // namespace lbann
+
+#endif  // LBANN_DATA_READER_NUMPY_NPZ_CONDUIT_HPP
diff --git a/include/lbann/data_readers/data_reader_pilot2_molecular.hpp b/include/lbann/data_readers/data_reader_pilot2_molecular.hpp
index 5fa84185445..850ca8a5bf3 100644
--- a/include/lbann/data_readers/data_reader_pilot2_molecular.hpp
+++ b/include/lbann/data_readers/data_reader_pilot2_molecular.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -132,9 +132,6 @@ class pilot2_molecular_reader : public generic_data_reader {
     return m_neighbors_data_size;
   }
 
-  /// sets up a data_store.
-  void setup_data_store(model *m) override;
-
  protected:
   /// Fetch a molecule and its neighbors.
   bool fetch_datum(CPUMat& X, int data_id, int mb_idx) override;
diff --git a/include/lbann/data_readers/data_reader_python.hpp b/include/lbann/data_readers/data_reader_python.hpp
new file mode 100644
index 00000000000..35264a18e7b
--- /dev/null
+++ b/include/lbann/data_readers/data_reader_python.hpp
@@ -0,0 +1,174 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef LBANN_DATA_READERS_PYTHON_HPP_INCLUDED
+#define LBANN_DATA_READERS_PYTHON_HPP_INCLUDED
+
+#include "data_reader.hpp"
+#ifdef LBANN_HAS_PYTHON
+#include <Python.h>
+
+namespace lbann {
+
+namespace python {
+
+/** @brief Singleton class to manage embedded Python session.
+ *
+ *  This is very experimental. Be warned.
+ */
+class manager {
+public:
+
+  /** @brief Get singleton instance. */
+  static manager& get_instance();
+  /** @brief Construct singleton instance.
+   *  @details If there is already an instance, it is destroyed.
+   */
+  static void create();
+  /** Destroy singleton instance. */
+  static void destroy();
+
+  /** @brief Check if a Python error has occurred.
+   *
+   *  Throw an exception if an error is detected.
+   *
+   *  @param force_error Whether to force an exception to be thrown.
+   */
+  void check_error(bool force_error = false) const;
+
+  ~manager();
+
+private:
+
+  /** @brief Singleton instance. */
+  static std::unique_ptr<manager> m_instance;
+
+  /** @brief State on main Python thread. */
+  PyThreadState* m_thread_state = nullptr;
+
+  // Lifetime functions
+  manager();
+  manager(const manager&) = delete;
+  manager& operator=(const manager&) = delete;
+
+};
+
+/** @brief RAII wrapper for Python GIL.
+ *
+ *  The Python interpreter is not thread-safe, so it uses the "global
+ *  interpreter lock" to ensure only one thread is executing at a
+ *  time. Multithreading is achieved by periodically transferring
+ *  control of the GIL between threads. This makes it hard to get
+ *  meaningful speedups from simple multithreading. Certain
+ *  operations, e.g. I/O and numerical kernels in NumPy, can be
+ *  efficiently parallelized because they yield control of the GIL
+ *  while working.
+ *
+ *  This is very experimental. Be warned.
+ */
+class global_interpreter_lock {
+public:
+
+  global_interpreter_lock(const manager&);
+  ~global_interpreter_lock();
+
+private:
+
+  global_interpreter_lock(const global_interpreter_lock&) = delete;
+  global_interpreter_lock& operator=(const global_interpreter_lock&) = delete;
+
+  PyGILState_STATE m_gil_state;
+
+};
+
+/** @brief Convenience wrapper around @c PyObject pointer.
+ *
+ *  This is very experimental. Be warned.
+ */
+class object {
+public:
+  object(PyObject* obj = nullptr);
+  object(std::string val);
+  object(El::Int val);
+  object(DataType val);
+  object(const object& other);
+  object& operator=(const object& other);
+  object(object&& other);
+  object& operator=(object&& other);
+  ~object();
+  inline PyObject* get()                  { return m_ptr; }
+  inline const PyObject* get() const      { return m_ptr; }
+  inline operator PyObject*()             { return get(); }
+  inline operator const PyObject*() const { return get(); }
+private:
+  PyObject* m_ptr;
+};
+
+} // namespace python
+
+class python_reader : public generic_data_reader {
+public:
+  python_reader(std::string module,
+                std::string module_dir,
+                std::string sample_function,
+                std::string num_samples_function,
+                std::string sample_dims_function);
+  python_reader(const python_reader&) = default;
+  python_reader& operator=(const python_reader&) = default;
+  ~python_reader() override;
+  python_reader* copy() const override { return new python_reader(*this); }
+
+  std::string get_type() const override {
+    return "python_reader";
+  }
+
+  const std::vector<int> get_data_dims() const override;
+  int get_num_labels() const override;
+  int get_linearized_data_size() const override;
+  int get_linearized_label_size() const override;
+
+  void setup(int num_io_threads, std::shared_ptr<thread_pool> io_thread_pool) override;
+  void load() override;
+
+protected:
+  bool fetch_data_block(CPUMat& X,
+                        El::Int thread_id,
+                        El::Int mb_size,
+                        El::Matrix<El::Int>& indices_fetched) override;
+  bool fetch_label(CPUMat& Y, int data_id, int mb_idx) override;
+
+private:
+  std::vector<El::Int> m_sample_dims;
+  El::Int m_num_samples;
+  python::object m_sample_function;
+  python::object m_process_pool;
+
+};
+
+} // namespace lbann
+
+#endif // LBANN_HAS_PYTHON
+#endif // LBANN_DATA_READERS_PYTHON_HPP_INCLUDED
diff --git a/include/lbann/data_readers/data_reader_synthetic.hpp b/include/lbann/data_readers/data_reader_synthetic.hpp
index 3fba8d59355..83faee1e793 100644
--- a/include/lbann/data_readers/data_reader_synthetic.hpp
+++ b/include/lbann/data_readers/data_reader_synthetic.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/data_reader_triplet.hpp b/include/lbann/data_readers/data_reader_triplet.hpp
index 6b582a2f785..a1ee9e07871 100644
--- a/include/lbann/data_readers/data_reader_triplet.hpp
+++ b/include/lbann/data_readers/data_reader_triplet.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -25,6 +25,9 @@
 //
 // data_reader_triplet .hpp .cpp - data reader to use triplet patches
 //                                 generated offline.
+//
+// Depreciated and replaced by data_reader_multihead_siamese .hpp .cpp.
+// Kept here just for reference.
 ////////////////////////////////////////////////////////////////////////////////
 
 #ifndef DATA_READER_TRIPLET_HPP
@@ -78,9 +81,6 @@ class data_reader_triplet : public data_reader_multi_images {
     return m_samples.get_sample(idx);
   }
 
-  /// sets up a data_store.
-  void setup_data_store(model *m) override;
-
  protected:
   void set_defaults() override;
   bool fetch_datum(CPUMat& X, int data_id, int mb_idx) override;
diff --git a/include/lbann/data_readers/image_preprocessor.hpp b/include/lbann/data_readers/image_preprocessor.hpp
index 080bb04b707..fb730e23bf1 100644
--- a/include/lbann/data_readers/image_preprocessor.hpp
+++ b/include/lbann/data_readers/image_preprocessor.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -144,7 +144,7 @@ class lbann_image_preprocessor {
    * internal_save_image.
    * @param pixels The image to save (as a column vector).
    * @param filename The image filename (type inferred from extension).
-   * @param scale Whether pixels has been scaled (default true).
+   * @param do_scale Whether pixels has been scaled (default true).
    */
   virtual void save_image(Mat& pixels, const std::string filename,
                           bool do_scale = true) {}
@@ -188,9 +188,9 @@ class lbann_image_preprocessor {
   /** Undo cv_pixels. */
   void col_pixels(const cv::Mat& sqpixels, Mat& pixels, unsigned num_channels);
 
-  /**
-   * Flip sqpixels.
-   * @param flip_flag OpenCV flip flag: 0=vertical, 1=horizontal, -1=both.
+  /** @brief Flip sqpixels.
+   *  @param sqpixels The image to flip
+   *  @param flip_flag OpenCV flip flag: 0=vertical, 1=horizontal, -1=both.
    */
   void flip(cv::Mat& sqpixels, int flip_flag);
   /** Apply the affine transformation in 3x3 matrix trans. */
diff --git a/include/lbann/data_readers/image_utils.hpp b/include/lbann/data_readers/image_utils.hpp
index 2b6c494b87e..b52a7f4cb78 100644
--- a/include/lbann/data_readers/image_utils.hpp
+++ b/include/lbann/data_readers/image_utils.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/lbann_data_generator.hpp b/include/lbann/data_readers/lbann_data_generator.hpp
deleted file mode 100644
index f7925c18528..00000000000
--- a/include/lbann/data_readers/lbann_data_generator.hpp
+++ /dev/null
@@ -1,66 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-// lbann_data_generator .hpp .cpp - Synthetic Data Generator
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef LBANN_DATA_GENERATOR_HPP
-#define LBANN_DATA_GENERATOR_HPP
-
-#include "data_reader.hpp"
-#include "image_preprocessor.hpp"
-
-namespace lbann
-{
-class DataGenerator : public DataReader
-{
-public:
-  DataGenerator(Int num_samples, Int width, Int height, Int batchSize);
-  DataGenerator(const DataGenerator& source);
-  ~DataGenerator();
-
-  int fetch_data(Mat& X);
-  int fetch_label(Mat& Y) { return 0; }
-
-  int getDataWidth() { return m_data_width; }
-  int getDataHeight() { return m_data_height; }
-  int get_linearized_data_size() { return m_data_width * m_data_height; }
-  int get_linearized_label_size() { return 0; }
-
-  void load();
-
-  DataGenerator& operator=(const DataGenerator& source);
-
-private:
-
-  Int m_num_samples;
-  Int m_data_width;
-  Int m_data_height;
-  StarMat m_data;
-};
-
-}  // namespace lbann
-
-#endif  // LBANN_DATA_GENERATOR_HPP
diff --git a/include/lbann/data_store/data_store_merge_features.hpp b/include/lbann/data_readers/numpy_conduit_converter.hpp
similarity index 53%
rename from include/lbann/data_store/data_store_merge_features.hpp
rename to include/lbann/data_readers/numpy_conduit_converter.hpp
index c38cfe80871..32317487043 100644
--- a/include/lbann/data_store/data_store_merge_features.hpp
+++ b/include/lbann/data_readers/numpy_conduit_converter.hpp
@@ -1,5 +1,4 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -25,44 +24,48 @@
 //
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef __DATA_STORE_MERGE_FEATURES_HPP__
-#define __DATA_STORE_MERGE_FEATURES_HPP__
-
-#include "lbann/data_store/generic_data_store.hpp"
+#ifndef NUMPY_CONDUIT_CONVERTER_HPP
+#define NUMPY_CONDUIT_CONVERTER_HPP
 
+#include "lbann_config.hpp"
+#include "conduit/conduit.hpp"
 
 namespace lbann {
 
 /**
- * todo
+ * The numpy_conduit_converter class contains static method(s) for
+ * reading numpy files and copying the contents to a conduit file.
+ *
+ * In general the schema for npz files, after conversion to conduit, is:
+ *
+ * {
+ *   data_id (int) :
+ *   // one or more of the following sections
+ *   {
+ *     section_name :
+ *     {
+ *       "word_size": <int>,
+ *       "fortran_order: <0|1>,
+ *       "num_vals": <int>,
+ *       "shape": <[ vector ]>,
+ *       "data": <char*>
+ *     }
+ *   }
+ * }
+ *
+ * cosmoflow has the following sections:
+ *    "data":
+ *    "frm":
+ *    "responses":
  */
 
-class data_store_merge_features : public generic_data_store {
+class numpy_conduit_converter {
  public:
 
-  //! ctor
-  data_store_merge_features(generic_data_reader *reader, model *m); 
-
-  //! copy ctor
-  data_store_merge_features(const data_store_merge_features&) = default;
-
-  //! operator=
-  data_store_merge_features& operator=(const data_store_merge_features&) = default;
-
-  data_store_merge_features * copy() const override { return new data_store_merge_features(*this); }
-
-  //! dtor
-  ~data_store_merge_features() override;
-
-  void setup() override; 
-
- protected :
-
-   void exchange_data() override;
+  static void load_conduit_node(const std::string filename, int data_id, conduit::Node &output, bool reset_conduit_node = true);
 
-   std::vector<generic_data_store*> m_subsidiary_stores;
 };
 
 }  // namespace lbann
 
-#endif  // __DATA_STORE_MERGE_FEATURES_HPP__
+#endif  // NUMPY_CONDUIT_CONVERTER_HPP
diff --git a/include/lbann/data_readers/offline_patches_npz.hpp b/include/lbann/data_readers/offline_patches_npz.hpp
index e3c60bfcc79..c433d232ced 100644
--- a/include/lbann/data_readers/offline_patches_npz.hpp
+++ b/include/lbann/data_readers/offline_patches_npz.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -57,6 +57,9 @@ class offline_patches_npz {
   using sample_t = std::pair<std::vector<std::string>, label_t>;
 
   offline_patches_npz();
+  offline_patches_npz(size_t npatches);
+  offline_patches_npz(std::string divider);
+  offline_patches_npz(size_t npatches, std::string divider);
   // TODO: copy constructor and assignment operator for deep-copying if needed
   // The cnpy structure relies on shared_ptr
 
@@ -80,6 +83,10 @@ class offline_patches_npz {
   size_t get_num_patches() const {
     return m_num_patches;
   }
+  /// Set the number of patches per sample (the number of image data sources)
+  void set_num_patches(size_t npatches) {
+    m_num_patches = npatches;
+  }
   /// Reconsturct and return the meta-data (patch file names and the label) of idx-th sample
   sample_t get_sample(const size_t idx) const;
   /// Return the label of idx-th sample
diff --git a/include/lbann/data_readers/opencv.hpp b/include/lbann/data_readers/opencv.hpp
index b48e490011a..9adc7efa0d7 100644
--- a/include/lbann/data_readers/opencv.hpp
+++ b/include/lbann/data_readers/opencv.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/opencv_extensions.hpp b/include/lbann/data_readers/opencv_extensions.hpp
index 60d83b580fa..b24ed360d4d 100644
--- a/include/lbann/data_readers/opencv_extensions.hpp
+++ b/include/lbann/data_readers/opencv_extensions.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -126,7 +126,7 @@ inline bool check_if_cv_Mat_is_float_type(const cv::Mat& image) {
 inline bool check_if_cv_Mat_has_same_shape(const cv::Mat& image1, const cv::Mat& image2) {
   return ((image1.cols == image2.cols) &&
           (image1.rows == image2.rows) &&
-          (image1.channels() == image2.channels())); 
+          (image1.channels() == image2.channels()));
 }
 
 template<typename T>
diff --git a/include/lbann/data_readers/patchworks/patchworks.hpp b/include/lbann/data_readers/patchworks/patchworks.hpp
index 0077eecbeaf..d445bb2d343 100644
--- a/include/lbann/data_readers/patchworks/patchworks.hpp
+++ b/include/lbann/data_readers/patchworks/patchworks.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/patchworks/patchworks_ROI.hpp b/include/lbann/data_readers/patchworks/patchworks_ROI.hpp
index 3fc622dcd17..3abdfed5da6 100644
--- a/include/lbann/data_readers/patchworks/patchworks_ROI.hpp
+++ b/include/lbann/data_readers/patchworks/patchworks_ROI.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/patchworks/patchworks_common.hpp b/include/lbann/data_readers/patchworks/patchworks_common.hpp
index ffd2fa16cf8..5c3b9ceb7d1 100644
--- a/include/lbann/data_readers/patchworks/patchworks_common.hpp
+++ b/include/lbann/data_readers/patchworks/patchworks_common.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/patchworks/patchworks_patch_descriptor.hpp b/include/lbann/data_readers/patchworks/patchworks_patch_descriptor.hpp
index 8c849706696..2891055593c 100644
--- a/include/lbann/data_readers/patchworks/patchworks_patch_descriptor.hpp
+++ b/include/lbann/data_readers/patchworks/patchworks_patch_descriptor.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/patchworks/patchworks_stats.hpp b/include/lbann/data_readers/patchworks/patchworks_stats.hpp
index d7189449237..12141012eef 100644
--- a/include/lbann/data_readers/patchworks/patchworks_stats.hpp
+++ b/include/lbann/data_readers/patchworks/patchworks_stats.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/sample_list_jag.hpp b/include/lbann/data_readers/sample_list_jag.hpp
index a209002ff6a..07040a80d48 100644
--- a/include/lbann/data_readers/sample_list_jag.hpp
+++ b/include/lbann/data_readers/sample_list_jag.hpp
@@ -12,10 +12,18 @@
 #include <mpi.h>
 #endif
 
+#include "lbann/utils/file_utils.hpp"
 #include <cereal/types/unordered_map.hpp>
+#include <cereal/types/deque.hpp>
 #include <cereal/types/vector.hpp>
+#include <cereal/types/tuple.hpp>
 #include <cereal/types/string.hpp>
 #include <cereal/types/utility.hpp>
+#include "conduit/conduit_relay_io_hdf5.hpp"
+
+/// Number of system and other files that may be open during execution
+#define LBANN_MAX_OPEN_FILE_MARGIN 128
+#define LBANN_MAX_OPEN_FILE_RETRY 3
 
 namespace lbann {
 
@@ -41,26 +49,6 @@ struct sample_list_header {
   }
 };
 
-/**
- * Maps a global index of a sample list to a local index.
- * When managing the sample list in a distributed fashion, with which every
- * one has the same copy (the whole global list), m_partition_offset must be
- * zero. In this case, the local index is the same as the global index.
- * When managing the sample list in a centralized fashion, with which each
- * has a portion of the list that corresponds to the only samples it needs,
- * a global index is subtracted by m_partition_offset for local indexing.
- */
-struct sample_list_indexer {
-  sample_list_indexer();
-  size_t operator()(size_t idx) const;
-
-  void set_partition_offset(size_t o);
-  size_t get_partition_offset() const;
-  bool check_index(size_t i) const;
-
-  size_t m_partition_offset;
-};
-
 static const std::string conduit_hdf5_exclusion_list = "CONDUIT_HDF5_EXCLUSION";
 static const std::string conduit_hdf5_inclusion_list = "CONDUIT_HDF5_INCLUSION";
 
@@ -69,25 +57,28 @@ class sample_list_jag {
   /// The type of the native identifier of a sample rather than an arbitrarily assigned index
   using sample_name_t = std::string;
   /// The type for arbitrarily assigned index
-  using sample_id_t = std::size_t;
+  using sample_file_id_t = std::size_t;
   /// To describe a sample as a pair of the file to which it belongs and its name
   //  using sample_t = std::pair<std::string, sample_name_t>;
-  using sample_t = std::pair<sample_id_t, sample_name_t>;
-  using sample_id_map_t = std::pair<std::string, hid_t>;
+  using sample_t = std::pair<sample_file_id_t, sample_name_t>;
+  /// Statistics for each file used by the sample list: includes the file name, file descriptor, and
+  /// and a queue of each step and substep when data will be loaded from the file
+  using file_id_stats_t = std::tuple<std::string, hid_t, std::deque<std::pair<int,int>>>;
+
   /// Type for the list of samples
   using samples_t = std::vector< sample_t >;
-  using samples_id_map_v_t = std::vector< sample_id_map_t >;
+  /// Mapping of the file index to the statistics for each file
+  using file_id_stats_v_t = std::vector< file_id_stats_t >; // rename to sample_to_file_v or something
+  /// Type for the map of file descriptors to usage step and substep
+  using fd_use_map_t = std::pair<sample_file_id_t, std::pair<int,int>>;
 
   sample_list_jag();
+  ~sample_list_jag();
+  sample_list_jag(const sample_list_jag& rhs);
+  sample_list_jag& operator=(const sample_list_jag& rhs);
+  sample_list_jag& copy(const sample_list_jag& rhs);
 
-  /// Set the number of partitions and clear internal states
-  void set_num_partitions(size_t n);
-
-  /// Set the index mapping function
-  void set_indexer(const sample_list_indexer& indexer);
-
-  /// Get the index mapping function
-  const sample_list_indexer& get_indexer() const;
+  void copy_members(const sample_list_jag& rhs);
 
   /// Load a sample list file
   void load(const std::string& samplelist_file, size_t stride=1, size_t offset=0);
@@ -112,53 +103,158 @@ class sample_list_jag {
   /// Check if a sample index is in the valid range
   bool check_index(size_t idx) const;
 
-  /// Serialize sample list for a partition
-  bool to_string(size_t p, std::string& sstr) const;
-
-  /// Serialize sample list for all partitions
+  /// Serialize sample list
   bool to_string(std::string& sstr) const;
 
-  /// Write the sample list of partition p
-  void write(size_t p, const std::string filename) const;
-
-  /// Write the sample list of each partitions
+  /// Write the sample list
   void write(const std::string filename) const;
 
   /// Allow read-only access to the internal list data
   const samples_t& get_list() const;
 
-  /// Copy the internal list data for partition p
-  bool get_list(size_t p, samples_t& l_p) const;
-
-  /// Allow read-only access to the internal list data for partition p via iterators
-  std::pair<samples_t::const_iterator, samples_t::const_iterator> get_list(size_t p) const;
-
   /// Allow the read-only access to the list header
   const sample_list_header& get_header() const;
 
   /// Allow read-only access to the metadata of the idx-th sample in the list
   const sample_t& operator[](size_t idx) const;
 
-  const std::string& get_samples_filename(sample_id_t id) const {
-    return (m_sample_id_map[id]).first;
+  const std::string& get_samples_filename(sample_file_id_t id) const {
+    return std::get<0>(m_file_id_stats_map[id]);
+  }
+
+  const std::string& get_samples_dirname() const {
+    return m_header.get_file_dir();
+  }
+
+  hid_t get_samples_hdf5_handle(sample_file_id_t id) const {
+    hid_t h = std::get<1>(m_file_id_stats_map[id]);
+    return h;
   }
 
-  hid_t get_samples_hdf5_handle(sample_id_t id) const {
-    return (m_sample_id_map[id]).second;
+  void set_samples_filename(sample_file_id_t id, const std::string& filename) {
+    std::get<0>(m_file_id_stats_map[id]) = filename;
   }
 
-  void set_samples_filename(sample_id_t id, const std::string& filename) {
-    m_sample_id_map[id].first = filename;
+  void set_files_hdf5_handle(const std::string& filename, hid_t h) {
+    sample_file_id_t id = 0;
+    for (auto&& e : m_file_id_stats_map) {
+      if(std::get<0>(e) == filename) {
+        std::get<1>(e) = h;
+        break;
+      }
+      id++;
+    }
+    manage_open_hdf5_handles(id, true);
   }
 
-  void set_samples_hdf5_handle(sample_id_t id, hid_t h) {
-    m_sample_id_map[id].second = h;
+  void delete_hdf5_handle_pq_entry(sample_file_id_t id) {
+    for (std::deque<fd_use_map_t>::iterator it = m_open_fd_pq.begin(); it!=m_open_fd_pq.end(); ++it) {
+      if(it->first == id) {
+        it = m_open_fd_pq.erase(it);
+        break;
+      }
+    }
+    return;
+  }
+
+  void manage_open_hdf5_handles(sample_file_id_t id, bool pre_open_fd = false) {
+    /// When we enter this function the priority queue is either empty or a heap
+    if(!m_open_fd_pq.empty()) {
+      if(m_open_fd_pq.size() > m_max_open_files) {
+        auto& f = m_open_fd_pq.front();
+        auto& victim = m_file_id_stats_map[f.first];
+        hid_t victim_fd = std::get<1>(victim);
+        std::pop_heap(m_open_fd_pq.begin(), m_open_fd_pq.end(), pq_cmp);
+        m_open_fd_pq.pop_back();
+        if(victim_fd > 0) {
+          conduit::relay::io::hdf5_close_file(victim_fd);
+          std::get<1>(victim) = 0;
+        }
+      }
+    }
+
+    /// Before we can enqueue the any new access times for this descriptor, remove any
+    /// earlier descriptor
+    std::sort_heap(m_open_fd_pq.begin(), m_open_fd_pq.end(), pq_cmp);
+    if(m_open_fd_pq.front().first == id) {
+      m_open_fd_pq.pop_front();
+    }
+    std::make_heap(m_open_fd_pq.begin(), m_open_fd_pq.end(), pq_cmp);
+
+    auto& e = m_file_id_stats_map[id];
+    auto& file_access_queue = std::get<2>(e);
+    if(!file_access_queue.empty()) {
+      if(!pre_open_fd) {
+        file_access_queue.pop_front();
+      }
+    }
+    if(!file_access_queue.empty()) {
+      m_open_fd_pq.emplace_back(std::make_pair(id,file_access_queue.front()));
+    }else {
+      /// If there are no future access of the file place a terminator entry to track
+      /// the open file, but is always sorted to the top of the heap
+      m_open_fd_pq.emplace_back(std::make_pair(id,std::make_pair(INT_MAX,id)));
+    }
+    std::push_heap(m_open_fd_pq.begin(), m_open_fd_pq.end(), pq_cmp);
+    return;
+  }
+
+  hid_t open_samples_hdf5_handle(const size_t i, bool pre_open_fd = false) {
+    const sample_t& s = m_sample_list[i];
+    sample_file_id_t id = s.first;
+    hid_t h = get_samples_hdf5_handle(id);
+    if (h <= static_cast<hid_t>(0)) {
+      const std::string& file_name = get_samples_filename(id);
+      const std::string conduit_file_path = add_delimiter(get_samples_dirname()) + file_name;
+      if (file_name.empty() || !check_if_file_exists(conduit_file_path)) {
+        LBANN_ERROR(std::string{} + " :: data file '" + conduit_file_path + "' does not exist.");
+      }
+      bool retry = false;
+      int retry_cnt = 0;
+      do {
+        try {
+          h = conduit::relay::io::hdf5_open_file_for_read( conduit_file_path );
+        }catch (conduit::Error const& e) {
+          LBANN_WARNING(" :: trying to open the file " + conduit_file_path + " and got " + e.what());
+          retry = true;
+          retry_cnt++;
+        }catch (...) {
+          LBANN_ERROR("trying to open the file " + conduit_file_path + " and got an unknown exception");
+        }
+      }while(retry && retry_cnt < 3);
+
+      if (h <= static_cast<hid_t>(0)) {
+        LBANN_ERROR(std::string{} + " :: data file '" + conduit_file_path + "' could not be opened.");
+      }
+      auto& e = m_file_id_stats_map[id];
+      std::get<1>(e) = h;
+      /// If a new file is opened, place it in the priority queue
+      manage_open_hdf5_handles(id, pre_open_fd);
+    }
+    return h;
+  }
+
+  void close_if_done_samples_hdf5_handle(const size_t i) {
+    const sample_t& s = m_sample_list[i];
+    sample_file_id_t id = s.first;
+    hid_t h = get_samples_hdf5_handle(id);
+    if (h > static_cast<hid_t>(0)) {
+      auto& e = m_file_id_stats_map[id];
+      auto& file_access_queue = std::get<2>(e);
+      if(file_access_queue.empty()) {
+        conduit::relay::io::hdf5_close_file(std::get<1>(e));
+        std::get<1>(e) = 0;
+        delete_hdf5_handle_pq_entry(id);
+      }
+    }
   }
 
   void all_gather_archive(const std::string &archive, std::vector<std::string>& gathered_archive, lbann_comm& comm);
   template<typename T> size_t all_gather_field(T data, std::vector<T>& gathered_data, lbann_comm& comm);
   void all_gather_packed_lists(lbann_comm& comm);
 
+  void compute_epochs_file_usage(const std::vector<int>& shufled_indices, int mini_batch_size, const lbann_comm& comm);
+
  protected:
 
   /// Reads a header line from the sample list given as a stream, and use the info string for error message
@@ -179,30 +275,31 @@ class sample_list_jag {
   /// Reads a sample list and populates the internal list
   size_t get_samples_per_file(std::istream& istrm, const std::string& filename, size_t stride=1, size_t offset=0);
 
-  /// Compute the sample index range that partition p covers
-  void get_sample_range_per_part(const size_t p, size_t& sid_start, size_t& sid_end) const;
-
   /// Add the header info to the given string
   void write_header(std::string& sstr, size_t num_files) const;
 
- protected:
-
-  /// The number of partitions to divide samples into
-  size_t m_num_partitions;
+  static bool pq_cmp(fd_use_map_t left, fd_use_map_t right) {
+    return ((left.second).first < (right.second).first) ||
+           (((left.second).first == (right.second).first) &&
+            ((left.second).second < (right.second).second)); }
 
+ private:
   /// header info of sample list
   sample_list_header m_header;
 
-  /// Contains list of all sample
+  /// List of all samples with a file identifier and sample name for each sample
   samples_t m_sample_list;
 
-  /// Maps sample IDs to file names
-  samples_id_map_v_t m_sample_id_map;
-
-  /// Maps a global index to a local index
-  sample_list_indexer m_indexer;
+  /// Maps sample's file id to file names, file descriptors, and use counts
+  file_id_stats_v_t m_file_id_stats_map;
 
+  /// Track the number of samples per file
   std::unordered_map<std::string, size_t> m_file_map;
+
+  /// Track the number of open file descriptors and when they will be used next
+  std::deque<fd_use_map_t> m_open_fd_pq;
+
+  size_t m_max_open_files;
 };
 
 void handle_mpi_error(int ierr);
diff --git a/include/lbann/data_readers/sample_list_jag_impl.hpp b/include/lbann/data_readers/sample_list_jag_impl.hpp
index a5bde385c8d..6b7ea1eeaa8 100644
--- a/include/lbann/data_readers/sample_list_jag_impl.hpp
+++ b/include/lbann/data_readers/sample_list_jag_impl.hpp
@@ -19,6 +19,7 @@
 
 #include <cereal/archives/binary.hpp>
 #include <sstream>
+#include <unistd.h>
 
 namespace lbann {
 
@@ -46,56 +47,69 @@ inline const std::string& sample_list_header::get_file_dir() const {
   return m_file_dir;
 }
 
-
-inline sample_list_indexer::sample_list_indexer()
-: m_partition_offset(0u) {
-}
-
-inline bool sample_list_indexer::check_index(size_t i) const {
-  return (i >= m_partition_offset);
+inline sample_list_jag::sample_list_jag() {
+  m_max_open_files = getdtablesize() - LBANN_MAX_OPEN_FILE_MARGIN;
 }
 
-inline size_t sample_list_indexer::operator()(size_t i) const {
-  if (!check_index(i)) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__)
-                          + " :: index (" + std::to_string(i)
-                          + ") is less than the partition offset ("
-                          + std::to_string(m_partition_offset) + ")");
+inline sample_list_jag::~sample_list_jag() {
+  // Close the existing open files
+  for(auto f : m_file_id_stats_map) {
+    if(std::get<1>(f) > 0) {
+      conduit::relay::io::hdf5_close_file(std::get<1>(f));
+    }
+    std::get<1>(f) = 0;
+    std::get<2>(f).clear();
   }
-  return i - m_partition_offset;
+  m_file_id_stats_map.clear();
+  m_open_fd_pq.clear();
 }
 
-inline void sample_list_indexer::set_partition_offset(size_t o) {
-  m_partition_offset = o;
+inline sample_list_jag::sample_list_jag(const sample_list_jag& rhs) {
+  copy_members(rhs);
 }
 
-inline size_t sample_list_indexer::get_partition_offset() const {
-  return m_partition_offset;
-}
+inline sample_list_jag& sample_list_jag::operator=(const sample_list_jag& rhs) {
+  // check for self-assignment
+  if (this == &rhs) {
+    return (*this);
+  }
 
+  copy_members(rhs);
 
-inline sample_list_jag::sample_list_jag()
-: m_num_partitions(1u) {
+  return (*this);
 }
 
-inline void sample_list_jag::set_num_partitions(size_t n) {
-  if (n == 0) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__)
-                          + " :: number of partitions must be a positive number ("
-                          + std::to_string(n) + ")");
+inline sample_list_jag& sample_list_jag::copy(const sample_list_jag& rhs) {
+  // check for self-assignment
+  if (this == &rhs) {
+    return (*this);
   }
-  clear();
-  m_num_partitions = n;
-}
 
-inline void sample_list_jag::set_indexer(const sample_list_indexer& indexer) {
-  m_indexer = indexer;
-}
+  copy_members(rhs);
 
-inline const sample_list_indexer& sample_list_jag::get_indexer() const {
-  return m_indexer;
+  return (*this);
 }
 
+inline void sample_list_jag::copy_members(const sample_list_jag& rhs) {
+  m_header = rhs.m_header;
+  m_sample_list = rhs.m_sample_list;
+  m_file_id_stats_map = rhs.m_file_id_stats_map;
+  m_file_map = rhs.m_file_map;
+  m_max_open_files = rhs.m_max_open_files;
+
+  /// Keep track of existing filenames but do not copy any file
+  /// descriptor information
+  for(auto&& e : m_file_id_stats_map) {
+    if(std::get<1>(e) > 0) {
+      std::get<1>(e) = 0;
+    }
+    std::get<2>(e).clear();
+  }
+
+  /// Do not copy the open file descriptor priority queue
+  /// File handle ownership is not transfered in the copy
+  m_open_fd_pq.clear();
+}
 
 inline void sample_list_jag::load(const std::string& samplelist_file, size_t stride, size_t offset) {
   std::ifstream istr(samplelist_file);
@@ -103,35 +117,24 @@ inline void sample_list_jag::load(const std::string& samplelist_file, size_t str
   istr.close();
 }
 
-
 inline sample_list_header sample_list_jag::load_header(const std::string& samplelist_file) const {
   std::ifstream istr(samplelist_file);
   return read_header(istr, samplelist_file);
 }
 
-
 inline void sample_list_jag::load_from_string(const std::string& samplelist) {
   std::istringstream istr(samplelist);
   get_samples_per_file(istr, "<LOAD_FROM_STRING>", 1, 0);
 }
 
-
 inline size_t sample_list_jag::size() const {
   return m_sample_list.size();
 }
 
-
 inline bool sample_list_jag::empty() const {
   return m_sample_list.empty();
 }
 
-
-inline bool sample_list_jag::check_index(size_t idx) const {
-  return m_indexer.check_index(idx) &&
-         (m_indexer(idx) < m_sample_list.size());
-}
-
-
 inline std::string sample_list_jag::read_header_line(std::istream& istrm, const std::string& filename, const std::string& info) const {
   if (!istrm.good()) {
     throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__)
@@ -172,10 +175,8 @@ inline sample_list_header sample_list_jag::read_header(std::istream& istrm, cons
   size_t found = sample_list_type.find(type_exclusive);
 
   if (found != std::string::npos) {
-    std::cout << "Exclusive (" + sample_list_type + ") sample list" << std::endl;
     hdr.m_is_exclusive = true;
   } else {
-    std::cout << "Inclusive (" + sample_list_type + ") sample list" << std::endl;
     hdr.m_is_exclusive = false;
   }
 
@@ -194,7 +195,18 @@ inline sample_list_header sample_list_jag::read_header(std::istream& istrm, cons
 }
 
 inline hid_t sample_list_jag::get_conduit_bundle_samples(std::string conduit_file_path, std::vector<std::string>& sample_names, size_t included_samples, size_t excluded_samples) {
-  hid_t hdf5_file_hnd = conduit::relay::io::hdf5_open_file_for_read( conduit_file_path );
+  hid_t hdf5_file_hnd = 0;
+  bool retry = false;
+  int retry_cnt = 0;
+  do {
+    try {
+      hdf5_file_hnd = conduit::relay::io::hdf5_open_file_for_read( conduit_file_path );
+    }catch (conduit::Error const& e) {
+      LBANN_WARNING(" :: trying to open the file " + conduit_file_path + " and got " + e.what());
+      retry = true;
+      retry_cnt++;
+    }
+  }while(retry && retry_cnt < LBANN_MAX_OPEN_FILE_RETRY);
 
   if (hdf5_file_hnd <= static_cast<hid_t>(0)) {
     std::cout << "Opening the file didn't work" << std::endl;
@@ -281,8 +293,9 @@ inline void sample_list_jag::read_exclusive_list(std::istream& istrm, size_t str
       m_file_map[filename] = sample_names.size();
     }
 
-    sample_id_t index = m_sample_id_map.size();
-    m_sample_id_map.emplace_back(std::make_pair(filename, hdf5_file_hnd));
+    sample_file_id_t index = m_file_id_stats_map.size();
+    m_file_id_stats_map.emplace_back(std::make_tuple(filename, 0, std::deque<std::pair<int,int>>{}));
+    set_files_hdf5_handle(filename, hdf5_file_hnd);
 
     size_t valid_sample_count = 0u;
     for(auto s : sample_names) {
@@ -315,7 +328,7 @@ inline void sample_list_jag::read_exclusive_list(std::istream& istrm, size_t str
 }
 
 
-  inline void sample_list_jag::read_inclusive_list(std::istream& istrm, size_t stride, size_t offset) {
+inline void sample_list_jag::read_inclusive_list(std::istream& istrm, size_t stride, size_t offset) {
   const std::string whitespaces(" \t\f\v\n\r");
   size_t cnt_files = 0u;
   std::string line;
@@ -368,8 +381,9 @@ inline void sample_list_jag::read_exclusive_list(std::istream& istrm, size_t str
 
     std::unordered_set<std::string> set_of_samples(sample_names.begin(), sample_names.end());
 
-    sample_id_t index = m_sample_id_map.size();
-    m_sample_id_map.emplace_back(std::make_pair(filename, hdf5_file_hnd));
+    sample_file_id_t index = m_file_id_stats_map.size();
+    m_file_id_stats_map.emplace_back(std::make_tuple(filename, 0, std::deque<std::pair<int,int>>{}));
+    set_files_hdf5_handle(filename, hdf5_file_hnd);
 
     size_t valid_sample_count = 0u;
     while(!sstr.eof()) {
@@ -420,23 +434,6 @@ inline size_t sample_list_jag::get_samples_per_file(std::istream& istrm, const s
 }
 
 
-inline void sample_list_jag::get_sample_range_per_part(const size_t p, size_t& sid_start, size_t& sid_end) const{
-  const size_t total = static_cast<size_t>(m_sample_list.size());
-  const size_t one_more = total % m_num_partitions;
-  const size_t min_per_partition = total/m_num_partitions;
-
-  if (min_per_partition == 0u) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__)
-          + " :: insufficient number of samples for each partition to have at least one.");
-  } else if (m_num_partitions == 1u) {
-    sid_start = 0u;
-    sid_end = total;
-  } else {
-    sid_start = min_per_partition * p + ((p >= one_more)? one_more : p);
-    sid_end = sid_start + min_per_partition + ((p < one_more)? 1u : 0u);
-  }
-}
-
 inline void sample_list_jag::all_gather_archive(const std::string &archive, std::vector<std::string>& gathered_archive, lbann_comm& comm) {
   int size_of_list_archive = archive.size();
   std::vector<int> packed_sizes(comm.get_procs_per_trainer());
@@ -509,52 +506,48 @@ inline size_t sample_list_jag::all_gather_field(T data, std::vector<T>& gathered
 inline void sample_list_jag::all_gather_packed_lists(lbann_comm& comm) {
   int num_ranks = comm.get_procs_per_trainer();
   std::vector<samples_t> per_rank_samples(num_ranks);
-  std::vector<samples_id_map_v_t> per_rank_sample_id_map(num_ranks);
+  std::vector<file_id_stats_v_t> per_rank_file_id_stats_map(num_ranks);
   std::vector<std::unordered_map<std::string, size_t>> per_rank_file_map(num_ranks);
 
-  size_t num_samples = all_gather_field(m_sample_list, per_rank_samples, comm);
-  size_t num_ids = all_gather_field(m_sample_id_map, per_rank_sample_id_map, comm);
-  size_t num_files = all_gather_field(m_file_map, per_rank_file_map, comm);
-
   // Close the existing open files
-  for(auto f : m_sample_id_map) {
-    conduit::relay::io::hdf5_close_file(f.second);
+  for(auto&& e : m_file_id_stats_map) {
+    if(std::get<1>(e) > 0) {
+      conduit::relay::io::hdf5_close_file(std::get<1>(e));
+      std::get<1>(e) = 0;
+    }
+    std::get<2>(e).clear();
   }
+  m_open_fd_pq.clear();
+
+  size_t num_samples = all_gather_field(m_sample_list, per_rank_samples, comm);
+  size_t num_ids = all_gather_field(m_file_id_stats_map, per_rank_file_id_stats_map, comm);
+  size_t num_files = all_gather_field(m_file_map, per_rank_file_map, comm);
 
   m_sample_list.clear();
-  m_sample_id_map.clear();
+  m_file_id_stats_map.clear();
+
   m_sample_list.reserve(num_samples);
-  m_sample_id_map.reserve(num_ids);
+  m_file_id_stats_map.reserve(num_ids);
   m_file_map.reserve(num_files);
 
   for(int r = 0; r < num_ranks; r++) {
     const samples_t& sample_list = per_rank_samples[r];
-    const samples_id_map_v_t& sample_id_map = per_rank_sample_id_map[r];
+    const file_id_stats_v_t& file_id_stats_map = per_rank_file_id_stats_map[r];
     const std::unordered_map<std::string, size_t>& file_map = per_rank_file_map[r];
     for (const auto& s : sample_list) {
-      sample_id_t index = s.first;
-      const std::string& filename = sample_id_map[index].first;
-      if(index >= m_sample_id_map.size()
-         || (m_sample_id_map.back()/*[m_sample_id_map.size()-1]*/.first != filename)) {
-        index = m_sample_id_map.size();
-
-        // Open the file on this rank
-        const std::string conduit_file_path = add_delimiter(m_header.get_file_dir()) + filename;
-        if (filename.empty() || !check_if_file_exists(conduit_file_path)) {
-          LBANN_ERROR(std::string{} + " :: data file '" + conduit_file_path + "' does not exist.");
-        }
-        hid_t hdf5_file_hnd = conduit::relay::io::hdf5_open_file_for_read( conduit_file_path );
-        if (hdf5_file_hnd <= static_cast<hid_t>(0)) {
-          LBANN_ERROR(std::string{} + " :: data file '" + conduit_file_path + "' could not be opened.");
-        }
-        m_sample_id_map.emplace_back(std::make_pair(filename, hdf5_file_hnd));
+      sample_file_id_t index = s.first;
+      const std::string& filename = std::get<0>(file_id_stats_map[index]);
+      if(index >= m_file_id_stats_map.size()
+         || (std::get<0>(m_file_id_stats_map.back()) != filename)) {
+        index = m_file_id_stats_map.size();
+        m_file_id_stats_map.emplace_back(std::make_tuple(filename, 0, std::deque<std::pair<int,int>>{}));
         // Update the file map structure
         if(m_file_map.count(filename) == 0) {
           m_file_map[filename] = file_map.at(filename);
         }
       }else {
-        for(size_t i = 0; i < m_sample_id_map.size(); i++) {
-          if(filename == m_sample_id_map[i].first) {
+        for(size_t i = 0; i < m_file_id_stats_map.size(); i++) {
+          if(filename == std::get<0>(m_file_id_stats_map[i])) {
             index = i;
             break;
           }
@@ -567,14 +560,37 @@ inline void sample_list_jag::all_gather_packed_lists(lbann_comm& comm) {
   return;
 }
 
+inline void sample_list_jag::compute_epochs_file_usage(const std::vector<int>& shuffled_indices, int mini_batch_size, const lbann_comm& comm) {
+  for (auto&& e : m_file_id_stats_map) {
+    if(std::get<1>(e) > 0) {
+      conduit::relay::io::hdf5_close_file(std::get<1>(e));
+    }
+    std::get<1>(e) = 0;
+    std::get<2>(e).clear();
+  }
+  // Once all of the file handles are closed, clear the priority queue
+  m_open_fd_pq.clear();
+
+  for (size_t i = 0; i < shuffled_indices.size(); i++) {
+    int idx = shuffled_indices[i];
+    const auto& s = m_sample_list[idx];
+    sample_file_id_t index = s.first;
+
+    if((i % mini_batch_size) % comm.get_procs_per_trainer() == static_cast<size_t>(comm.get_rank_in_trainer())) {
+      /// Enqueue the iteration step when the sample will get used
+      int step = i / mini_batch_size;
+      int substep = (i % mini_batch_size) / comm.get_procs_per_trainer();
+      std::get<2>(m_file_id_stats_map[index]).emplace_back(std::make_pair(step, substep));
+    }
+  }
+}
 
 inline void sample_list_jag::clear() {
-  m_num_partitions = 1u;
   m_sample_list.clear();
 }
 
 template <class Archive> void sample_list_jag::serialize( Archive & ar ) {
-  ar(m_num_partitions, m_header, m_sample_list, m_sample_id_map);
+  ar(m_header, m_sample_list, m_file_id_stats_map);
 }
 
 inline void sample_list_jag::write_header(std::string& sstr, size_t num_files) const {
@@ -589,39 +605,21 @@ inline void sample_list_jag::write_header(std::string& sstr, size_t num_files) c
 }
 
 
-inline bool sample_list_jag::to_string(size_t p, std::string& sstr) const {
-  if ((m_num_partitions == 0u) ||
-      ((m_num_partitions > 1u) && (p >= m_num_partitions))) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__)
-          + " :: partition id is out of range.");
-    return false;
-  }
-
-  size_t i_begin, i_end;
-  get_sample_range_per_part(p, i_begin, i_end);
-
-  if (i_begin > i_end) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__)
-          + " :: incorrect partition range.");
-    return false;
-  }
-
+inline bool sample_list_jag::to_string(std::string& sstr) const {
   std::map<std::string, std::vector<sample_name_t>> tmp_file_map;
   for (const auto& s : m_sample_list) {
-    std::string filename = (m_sample_id_map[s.first]).first;
+    std::string filename = std::get<0>(m_file_id_stats_map[s.first]);
     tmp_file_map[filename].emplace_back(s.second);
   }
 
   samples_t::const_iterator it_begin = m_sample_list.cbegin();
   samples_t::const_iterator it_end = m_sample_list.cbegin();
-  std::advance(it_begin, i_begin);
-  std::advance(it_end, i_end);
 
   sstr.clear();
 
   // reserve the string to hold the entire sample lit
   size_t estimated_len = 30 + 42 + m_header.get_file_dir().size() + 1;
-  if (i_begin < i_end) {
+  if (it_begin < it_end) {
     estimated_len += tmp_file_map.size();
     sstr.reserve(estimated_len);
   }
@@ -647,37 +645,9 @@ inline bool sample_list_jag::to_string(size_t p, std::string& sstr) const {
   return true;
 }
 
-
-inline bool sample_list_jag::to_string(std::string& sstr) const {
-  size_t total_len = 0u;
-  std::vector<std::string> strvec(m_num_partitions);
-  bool ok = true;
-
-  for(size_t p=0u; (p < m_num_partitions) && ok; ++p) {
-    ok = to_string(p, strvec[p]);
-    total_len += strvec[p].size();
-  }
-
-  if (!ok) {
-    return false;
-  }
-
-  sstr.clear();
-  sstr.reserve(total_len);
-
-  for(size_t p=0u; p < m_num_partitions; ++p) {
-    sstr += strvec[p];
-  }
-
-  return true;
-}
-
-
-inline void sample_list_jag::write(size_t p, const std::string filename) const {
-  std::string filename_p = modify_file_name(filename, std::string("p") + std::to_string(p));
-
+inline void sample_list_jag::write(const std::string filename) const {
   std::string dir, basename;
-  parse_path(filename_p, dir, basename);
+  parse_path(filename, dir, basename);
   if (!dir.empty() && !check_if_dir_exists(dir)) {
     // The creation of a shared directory must be done once in a coordinated fashion
     // among the entities that have access to it. Thus, it must be done in advance
@@ -685,75 +655,29 @@ inline void sample_list_jag::write(size_t p, const std::string filename) const {
     return;
   }
 
-  std::fstream ofs(filename_p, std::fstream::out | std::fstream::binary);
+  std::fstream ofs(filename, std::fstream::out | std::fstream::binary);
 
   if (!ofs.good()) {
     return;
   }
 
   std::string buf;
-  to_string(p, buf);
+  to_string(buf);
 
   ofs.write(buf.data(), buf.size()*sizeof(std::string::value_type));
   ofs.close();
 }
 
-
-inline void sample_list_jag::write(const std::string filename) const {
-  for (size_t p = 0u; p < m_num_partitions; ++p) {
-    write(p, filename);
-  }
-}
-
-
 inline const sample_list_jag::samples_t& sample_list_jag::get_list() const {
   return m_sample_list;
 }
 
-
-inline std::pair<sample_list_jag::samples_t::const_iterator, sample_list_jag::samples_t::const_iterator>
-sample_list_jag::get_list(size_t p) const {
-  if (p >= m_num_partitions) {
-    return std::make_pair(m_sample_list.cend(), m_sample_list.cend());
-  }
-
-  size_t i_begin, i_end;
-  get_sample_range_per_part(p, i_begin, i_end);
-
-  if (i_begin > i_end) {
-    return std::make_pair(m_sample_list.cend(), m_sample_list.cend());
-  }
-
-  samples_t::const_iterator it_begin = m_sample_list.cbegin();
-  samples_t::const_iterator it_end = m_sample_list.cbegin();
-  std::advance(it_begin, i_begin);
-  std::advance(it_end, i_end);
-
-  return std::make_pair(it_begin, it_end);
-}
-
-
-inline bool sample_list_jag::get_list(size_t p, sample_list_jag::samples_t& l_p) const {
-  const auto it = get_list(p);
-  l_p.clear();
-  std::copy(it.first, it.second, l_p.begin());
-
-  return (it.first != m_sample_list.cend());
-}
-
-
 inline const sample_list_header& sample_list_jag::get_header() const {
   return m_header;
 }
 
 inline const sample_list_jag::sample_t& sample_list_jag::operator[](size_t idx) const {
-  size_t i = m_indexer(idx);
-  if (i >= m_sample_list.size()) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__)
-          + " :: index (" + std::to_string(i) + ") out of range [0 "
-          + std::to_string(m_sample_list.size()) + ")");
-  }
-  return m_sample_list[i];
+  return m_sample_list[idx];
 }
 
 } // end of namespace lbann
diff --git a/include/lbann/data_distributions/CMakeLists.txt b/include/lbann/data_store/CMakeLists.txt
similarity index 67%
rename from include/lbann/data_distributions/CMakeLists.txt
rename to include/lbann/data_store/CMakeLists.txt
index 5673839f6ba..84541f6e4be 100644
--- a/include/lbann/data_distributions/CMakeLists.txt
+++ b/include/lbann/data_store/CMakeLists.txt
@@ -1,8 +1,7 @@
 # Add the headers for this directory
 set_full_path(THIS_DIR_HEADERS
-  data_distribution.hpp
-  distributed_minibatch.hpp
-  partitioned_minibatch.hpp
+  generic_data_store.hpp
+  data_store_conduit.hpp
   )
 
 # Propagate the files up the tree
diff --git a/include/lbann/data_store/data_store_conduit.hpp b/include/lbann/data_store/data_store_conduit.hpp
new file mode 100644
index 00000000000..91995485d83
--- /dev/null
+++ b/include/lbann/data_store/data_store_conduit.hpp
@@ -0,0 +1,259 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef __DATA_STORE_CONDUIT_HPP__
+#define __DATA_STORE_CONDUIT_HPP__
+
+#include "lbann_config.hpp"
+
+#ifdef LBANN_HAS_CONDUIT
+
+#include "lbann/base.hpp"
+#include "lbann/comm.hpp"
+#include "conduit/conduit_node.hpp"
+#include <unordered_map>
+#include <unordered_set>
+
+
+namespace lbann {
+
+// support for encoding data_id in conduit::Node, used by
+// conduit_data_store and associated code
+#define LBANN_SAMPLE_ID_PAD 9
+#define LBANN_DATA_ID_STR(data_id) pad(std::to_string(data_id), LBANN_SAMPLE_ID_PAD, '0')
+
+class generic_data_reader;
+
+class data_store_conduit {
+
+ public:
+
+  //! ctor
+  data_store_conduit(generic_data_reader *reader);
+
+  //! copy ctor
+  data_store_conduit(const data_store_conduit&);
+
+  //! copy / split ctor
+  data_store_conduit(const data_store_conduit&, const std::vector<int>&);
+
+  //! operator=
+  data_store_conduit& operator=(const data_store_conduit&);
+
+  data_store_conduit * copy() const { return new data_store_conduit(*this); }
+
+  //! dtor
+  ~data_store_conduit();
+
+  /// normally not needed, since reader is passed to ctor. But may
+  /// be useful in some cases
+  void set_data_reader_ptr(generic_data_reader *reader) { m_reader = reader; }
+
+  //! convenience handle
+  void set_shuffled_indices(const std::vector<int> *indices) { m_shuffled_indices = indices; }
+
+  void setup(int mini_batch_size);
+
+  /*
+   * dah - may be needed in the future, but not needed for bare-bones squashing
+  void set_is_subsidiary_store() {
+    m_is_subsidiary_store = true;
+  }
+
+  bool is_subsidiary_store() const {
+    return m_is_subsidiary_store;
+  }
+  */
+
+  void check_mem_capacity(lbann_comm *comm, const std::string sample_list_file, size_t stride, size_t offset);
+
+  /// returns the conduit node
+  const conduit::Node & get_conduit_node(int data_id) const;
+
+  /// if 'already_have = true' then the passed 'node' was obtained by a call to
+  /// get_empty_node(). In some operating modes this saves us from copying the node
+  void set_conduit_node(int data_id, conduit::Node &node, bool already_have = false);
+
+  void set_preloaded_conduit_node(int data_id, conduit::Node &node);
+
+  const conduit::Node & get_random_node() const;
+
+  const conduit::Node & get_random_node(const std::string &field) const;
+
+  /// returns an empty node
+  conduit::Node & get_empty_node(int data_id);
+
+  /// As of this writing, will be called if cmd line includes: --preload_data_store
+  /// This may change in the future; TODO revisit
+  void set_preload() { m_preload = true; }
+
+  bool is_preloaded() { return m_preload; }
+
+  void set_explicit_loading(bool flag) { m_explicit_loading = flag; }
+
+  bool is_explicitly_loading() { return m_explicit_loading; }
+
+  /// fills in m_owner, which maps index -> owning processor
+  void build_preloaded_owner_map(const std::vector<int>& per_rank_list_sizes);
+
+  /// Removed nodes corresponding from the indices vector from the data store
+  void purge_unused_samples(const std::vector<int>& indices);
+
+  /// Recompact the nodes because they are not copied properly when instantiating
+  /// using the copy constructor
+  void compact_nodes();
+
+  /// returns the processor that owns the data associated
+  /// with the index
+  int get_index_owner(int idx);
+
+  bool is_local_cache() const { return m_is_local_cache; }
+
+  void exchange_mini_batch_data(size_t current_pos, size_t mb_size) {
+    if (is_local_cache()) {
+      return;
+    }
+    if (m_super_node) {
+      exchange_data_by_super_node(current_pos, mb_size);
+    } else {
+      exchange_data_by_sample(current_pos, mb_size);
+    }
+    ++m_n;
+  }
+
+  bool has_conduit_node(int data_id) const;
+
+protected :
+
+  /// records the number of times exchange_mini_batch_data has been called
+  int m_n;
+
+  bool m_is_setup;
+
+  void copy_members(const data_store_conduit& rhs, const std::vector<int>& = std::vector<int>());
+  generic_data_reader *m_reader;
+
+  lbann_comm *m_comm;
+
+  /// rank in the trainer; convenience handle
+  int  m_rank_in_trainer;
+
+  /// number of procs in the trainer; convenience handle
+  int  m_np_in_trainer;
+
+  /// convenience handle
+  bool m_world_master;
+
+  /// convenience handle
+  bool m_trainer_master;
+
+  /// set to true if data_store is preloaded
+  bool m_preload;
+
+  /// set to true if data_store is being explicitly loaded
+  bool m_explicit_loading;
+
+  /// maps an index to the processor that owns the associated data
+  mutable std::unordered_map<int, int> m_owner;
+
+  /// convenience handle
+  const std::vector<int> *m_shuffled_indices;
+
+  /// The size of the mini-batch that was used to calculate ownership
+  /// of samples when building the owner map.  This size has to be
+  /// used consistently when computing the indices that will be sent
+  /// and received.
+  int m_owner_map_mb_size;
+
+  /// if true, use exchange_data_by_super_node, else use
+  /// exchange_data_by_sample; default if false
+  bool m_super_node;
+
+  void exchange_data_by_super_node(size_t current_pos, size_t mb_size);
+  void exchange_data_by_sample(size_t current_pos, size_t mb_size);
+
+  /// Contains the list of data IDs that will be received
+  std::vector<int> m_recv_data_ids;
+
+  /// contains the Nodes that this processor owns;
+  /// maps data_id to conduit::Node
+  mutable std::unordered_map<int, conduit::Node> m_data;
+
+  /// This vector contains Nodes that this processor needs for
+  /// the current minibatch; this is filled in by exchange_data()
+  std::unordered_map<int, conduit::Node> m_minibatch_data;
+
+  /// work space; used in exchange_data
+  std::vector<conduit::Node> m_send_buffer;
+  std::vector<conduit::Node> m_send_buffer_2;
+  std::vector<El::mpi::Request<El::byte>> m_send_requests;
+  std::vector<El::mpi::Request<El::byte>> m_recv_requests;
+  std::vector<conduit::Node> m_recv_buffer;
+  std::vector<int> m_outgoing_msg_sizes;
+  std::vector<int> m_incoming_msg_sizes;
+
+  /// size of a compacted conduit::Node that contains a single sample
+  int m_compacted_sample_size;
+
+  /// used in exchange_data_by_super_node(); contains the super_nodes,
+  /// after they have been converted from compacted format
+  std::vector<conduit::Node> m_reconstituted;
+
+  void setup_data_store_buffers();
+
+  /// called by exchange_data
+  static void build_node_for_sending(const conduit::Node &node_in, conduit::Node &node_out);
+
+  /// fills in m_owner, which maps index -> owning processor
+  void build_owner_map(int mini_batch_size);
+
+  /// maps processor id -> set of indices (whose associated samples)
+  /// this proc needs to send. (formerly called "proc_to_indices)
+  std::vector<std::unordered_set<int>> m_indices_to_send;
+
+  /// fills in m_indices_to_send and returns the number of samples
+  /// that will be sent
+  int build_indices_i_will_send(int current_pos, int mb_size);
+
+  /// maps processor id -> set of indices (whose associated samples)
+  /// this proc needs to recv from others. (formerly called "needed")
+  std::vector<std::unordered_set<int>> m_indices_to_recv;
+
+  /// fills in m_indices_to_recv and returns the number of samples
+  /// that will be received
+  int build_indices_i_will_recv(int current_pos, int mb_size);
+
+  void error_check_compacted_node(const conduit::Node &nd, int data_id);
+
+  bool m_is_local_cache;
+};
+
+}  // namespace lbann
+
+#endif //#ifdef LBANN_HAS_CONDUIT
+
+#endif  // __DATA_STORE_JAG_HPP__
diff --git a/include/lbann/data_store/data_store_csv.hpp b/include/lbann/data_store/data_store_csv.hpp
deleted file mode 100644
index f180124de88..00000000000
--- a/include/lbann/data_store/data_store_csv.hpp
+++ /dev/null
@@ -1,99 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef __DATA_STORE_CSV_HPP__
-#define __DATA_STORE_CSV_HPP__
-
-#include "lbann/data_store/generic_data_store.hpp"
-#include <unordered_map>
-
-namespace lbann {
-
-class csv_reader;
-class data_store_merge_features;
-
-/**
- * todo
- */
-
-class data_store_csv : public generic_data_store {
- public:
-
-  //! ctor
-  data_store_csv(generic_data_reader *reader, model *m);
-
-  //! copy ctor
-  data_store_csv(const data_store_csv&) = default;
-
-  //! operator=
-  data_store_csv& operator=(const data_store_csv&) = default;
-
-  data_store_csv * copy() const override { return new data_store_csv(*this); }
-
-  //! dtor
-  ~data_store_csv() override;
-
-  void get_data_buf_DataType(int data_id, std::vector<DataType> *&buf) override;
-
-  void setup() override;
-
-protected :
-
-  friend data_store_merge_features;
-
-  csv_reader *m_csv_reader;
-
-  /// size of the vectors that are returned by 
-  /// reader->fetch_line_label_response(data_id)
-  int m_vector_size;
-
-  /// buffers for data that will be passed to the data reader's fetch_datum method
-  std::unordered_map<int, std::vector<DataType>> m_my_minibatch_data;
-
-  /// retrive data needed for passing to the data reader for the next epoch
-  void exchange_data() override;
-  /// returns, in "indices," the set of indices that processor "p"
-  /// needs for the next epoch. Called by exchange_data
-  void get_indices(std::unordered_set<int> &indices, int p);
-
-  /// returns, in "indices," the subset of indices that processor "p"
-  /// needs for the next epoch and that this processor owns. 
-  /// Called by exchange_data
-  void get_my_indices(std::unordered_set<int> &indices, int p);
-
-  /// will contain the data that this processor owns; 
-  /// Maps a global index to its associated data
-  std::map<int, std::vector<DataType>> m_data;
-  //std::unordered_map<int, std::vector<DataType>> m_data;
-
-  /// fills in m_data (the data store)
-  void populate_datastore();
-};
-
-}  // namespace lbann
-
-#endif  // __DATA_STORE_CSV_HPP__
diff --git a/include/lbann/data_store/data_store_image.hpp b/include/lbann/data_store/data_store_image.hpp
deleted file mode 100644
index 12316888926..00000000000
--- a/include/lbann/data_store/data_store_image.hpp
+++ /dev/null
@@ -1,157 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef __DATA_STORE_IMAGE_HPP__
-#define __DATA_STORE_IMAGE_HPP__
-
-#include "lbann/data_store/generic_data_store.hpp"
-#include <unordered_map>
-
-namespace lbann {
-
-/**
- * todo
- */
-
-class data_store_image : public generic_data_store {
- public:
-
-  //! ctor
-  data_store_image(generic_data_reader *reader, model *m) :
-    generic_data_store(reader, m),
-    m_num_img_srcs(1) {}
-
-  //! copy ctor
-  data_store_image(const data_store_image&) = default;
-
-  //! operator=
-  data_store_image& operator=(const data_store_image&) = default;
-
-  generic_data_store * copy() const override = 0;
-
-  //! dtor
-  ~data_store_image() override;
-
-  void setup() override;
-
-  using generic_data_store::get_data_buf;
-
-  /// data readers call this method
-  void get_data_buf(int data_id, std::vector<unsigned char> *&buf, int multi_idx = 0) override;
-
- protected :
-
-  void exchange_data() override;
-
-  /// maps a global index (wrt image_list) to number of bytes in the file
-  std::unordered_map<size_t, size_t> m_file_sizes;
-  /// fills in m_file_sizes
-  virtual void get_file_sizes() = 0;
-
-  /// fills in m_file_sizes; this is called when we're using files
-  /// from a tarball
-  virtual void read_file_sizes();
-
-  /// called by get_file_sizes
-  void exchange_file_sizes(
-    std::vector<int> &global_indices,
-    std::vector<int> &num_bytes);
-
-  /// buffers that will be passed to reader::fetch_datum
-  std::unordered_map<int, std::vector<unsigned char> > m_my_minibatch_data;
-   
-  /// loads file from disk into *p; checks that bytes read = sz
-  void load_file(const std::string &dir, const std::string &fn, unsigned char *p, size_t sz); 
-
-  /// reads all files assigned to this processor into memory (m_data)
-  /// version for in-memory mode
-  virtual void read_files() = 0; 
-  /// version for out-of-memory mode
-  virtual void read_files(const std::unordered_set<int> &indices) = 0; 
-
-  /// in multi-image scenarios, the number of images in each sample
-  unsigned int m_num_img_srcs;
-
-  /// the actual data store!
-  std::unordered_map<int, std::vector<unsigned char>> m_data;
-
-  /// returns memory required to hold p's files in memory
-  size_t get_my_num_file_bytes();
-
-  /// returns number of bytes in the data set
-  size_t get_global_num_file_bytes();
-
-  /// parses /proc/meminfo to determine available memory; returned 
-  /// value is memory in kB
-  size_t get_available_memory();
-
-  /// attempts to determine if there is sufficient RAM for
-  /// in-memory data store; may call MPI_Abort
-  void report_memory_constraints();
-
-  /// for out-of-memory mode: read files from, e.g, lscratchX, and write
-  /// to local store, e.g, /l/ssd
-  void stage_files();
-
-  /// for out-of-memory mode: unpack files from a previously created tarball
-  void stage_tarball();
-
-  /// called by data_reader::fetch_data; supports out-of-memory mode
-  void fetch_data() override;
-
-  /// creates a tarball of files written to local disk, then
-  /// copies the tarball to, e.g, lscratchX. Activated by the cmd line
-  /// options: --create_tarball <name> where <name> is the directory
-  /// to which to copy the tarball. 
-  void create_tarball();
-
-  /// returns the string that will be passed to a system call to
-  /// create the tarball on local store (/l/ssd), and string for copying
-  /// to remote store (lscratchX)
-  std::pair<std::string, std::string> get_tarball_exe();
-
-  /// called by create_tarball
-  void write_file_sizes();
-  
-  /// called by create_tarball
-  void write_datastore_indices();
-
-  void read_datastore_indices();
-
-  /// called by create_tarball
-  void write_data_filepaths();
-
-  void read_data_filepaths();
-
-  /// returns true if option: --create_tarball is in use;
-  /// print info to screen, and performs error checking
-  bool are_we_creating_tarballs();
-};
-
-}  // namespace lbann
-
-#endif  // __DATA_STORE_IMAGE_HPP__
diff --git a/include/lbann/data_store/data_store_imagenet.hpp b/include/lbann/data_store/data_store_imagenet.hpp
deleted file mode 100644
index 38a0299c08a..00000000000
--- a/include/lbann/data_store/data_store_imagenet.hpp
+++ /dev/null
@@ -1,77 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef __DATA_STORE_IMAGENET_HPP__
-#define __DATA_STORE_IMAGENET_HPP__
-
-#include "lbann/data_store/data_store_image.hpp"
-
-namespace lbann {
-
-/**
- * todo
- */
-
-class data_store_imagenet : public data_store_image {
- public:
-
-  //! ctor
-  data_store_imagenet(generic_data_reader *reader, model *m) :
-    data_store_image(reader, m) {}
-
-  //! copy ctor
-  data_store_imagenet(const data_store_imagenet&) = default;
-
-  //! operator=
-  data_store_imagenet& operator=(const data_store_imagenet&) = default;
-
-  data_store_imagenet * copy() const override { return new data_store_imagenet(*this); }
-
-  //! dtor
-  ~data_store_imagenet() override {};
-
-  void setup() override;
-
- protected :
-
-  void get_file_sizes() override;
-
-  /// for use during development and testing
-  virtual void test_data();
-
-  /// for use during development and testing
-  void test_file_sizes();
-
-  void read_files() override;
-  void read_files(const std::unordered_set<int> &indices) override;
-
-  void build_data_filepaths() override;
-};
-
-}  // namespace lbann
-
-#endif  // __DATA_STORE_IMAGENET_HPP__
diff --git a/include/lbann/data_store/data_store_jag.hpp b/include/lbann/data_store/data_store_jag.hpp
deleted file mode 100644
index 2a0dd7302b8..00000000000
--- a/include/lbann/data_store/data_store_jag.hpp
+++ /dev/null
@@ -1,121 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef __DATA_STORE_JAG_HPP__
-#define __DATA_STORE_JAG_HPP__
-
-#include "lbann_config.hpp"
-
-#ifdef LBANN_HAS_CONDUIT
-
-#include "lbann/data_store/generic_data_store.hpp"
-#include "conduit/conduit_relay_io.hpp"
-#include "conduit/conduit_relay_io_hdf5.hpp"
-#include "conduit/conduit_relay_mpi.hpp"
-#include <unordered_map>
-
-namespace lbann {
-
-class data_store_jag : public generic_data_store {
- public:
-
-  //! ctor
-  data_store_jag(generic_data_reader *reader, model *m);
-
-  //! copy ctor
-  data_store_jag(const data_store_jag&) = default;
-
-  //! operator=
-  data_store_jag& operator=(const data_store_jag&) = default;
-
-  data_store_jag * copy() const override { return new data_store_jag(*this); }
-
-  //! dtor
-  ~data_store_jag() override;
-
-  void setup() override;
-
-  /// returns the conduit node
-  const conduit::Node & get_conduit_node(int data_id, bool any_node = false) const;
-
-  void set_conduit_node(int data_id, conduit::Node &node);
-
-protected :
-
-  bool m_super_node;
-
-  /// retrive data needed for passing to the data reader for the next epoch
-  /// this is pure virtual in generic_data_reader, so must include it for
-  /// now. May go away when we refactore/revise all of data_store
-  void exchange_data() override {}
-
-  void exchange_mini_batch_data(size_t current_pos, size_t mb_size) override {
-    if (m_super_node) {
-      exchange_data_by_super_node(current_pos, mb_size);
-    } else {
-      exchange_data_by_sample(current_pos, mb_size);
-    }
-  }
-  void exchange_data_by_super_node(size_t current_pos, size_t mb_size);
-  void exchange_data_by_sample(size_t current_pos, size_t mb_size);
-  void setup_data_store_buffers();
-
-  // when m_super_node = false
-  std::unordered_map<int,int> m_index_to_data_id;
-
-  /// contains the Nodes that this processor owns;
-  /// maps data_id to conduit::Node
-  std::unordered_map<int, conduit::Node> m_data;
-
-  /// This vector contains Nodes that this processor needs for
-  /// the current minibatch; this is filled in by exchange_data()
-  std::unordered_map<int, conduit::Node> m_minibatch_data;
-
-  /// work space; used in exchange_data
-  std::vector<conduit::Node> m_send_buffer;
-  std::vector<conduit::Node> m_send_buffer_2;
-  std::vector<MPI_Request> m_send_requests;
-  std::vector<MPI_Request> m_recv_requests;
-  std::vector<MPI_Status> m_status;
-  std::vector<conduit::Node> m_recv_buffer;
-  std::vector<int> m_outgoing_msg_sizes;
-  std::vector<int> m_incoming_msg_sizes;
-
-  std::vector<conduit::Node> m_reconstituted;
-
-  /// called by exchange_data
-  void build_node_for_sending(const conduit::Node &node_in, conduit::Node &node_out);
-
-  /// fills in m_owner, which maps an index to the owning processor;
-  void exchange_ds_indices();
-};
-
-}  // namespace lbann
-
-#endif //#ifdef LBANN_HAS_CONDUIT
-
-#endif  // __DATA_STORE_JAG_HPP__
diff --git a/include/lbann/data_store/data_store_merge_samples.hpp b/include/lbann/data_store/data_store_merge_samples.hpp
deleted file mode 100644
index cb21f206e9c..00000000000
--- a/include/lbann/data_store/data_store_merge_samples.hpp
+++ /dev/null
@@ -1,95 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef __DATA_STORE_MERGE_SAMPLES_HPP__
-#define __DATA_STORE_MERGE_SAMPLES_HPP__
-
-#include "lbann/data_store/generic_data_store.hpp"
-
-
-namespace lbann {
-
-/**
- * todo
- */
-
-class data_store_pilot2_molecular;
-
-class data_store_merge_samples : public generic_data_store {
- public:
-
-  //! ctor
-  data_store_merge_samples(lbann_comm *comm, generic_data_reader *reader, model *m); 
-
-  //! copy ctor
-  data_store_merge_samples(const data_store_merge_samples&) = default;
-
-  //! operator=
-  data_store_merge_samples& operator=(const data_store_merge_samples&) = default;
-
-  data_store_merge_samples * copy() const override { return new data_store_merge_samples(*this); }
-
-  //! dtor
-  ~data_store_merge_samples() override;
-
-  using generic_data_store::get_data_buf;
-  void get_data_buf(int data_id, std::vector<unsigned char> *&buf, int multi_idx = 0) override {}
-
-  void setup() override;
-
- protected :
-
-  void exchange_data() override;
-
-  /// this contains a concatenation of the indices in m_minibatch_indices
-  /// (see: generic_data_reader.hpp)
-  std::vector<int> m_my_minibatch_indices;
-
-  std::vector<data_store_pilot2_molecular*> m_subsidiary_stores;
-
-
-  /// when running in in-memory mode, this buffer will contain
-  /// the concatenated data
-  //std::vector<unsigned char> m_data;
-
-  /// allocate mem for m_data
-  //void allocate_memory(); 
-
-  //void read_files();
-
-  /// will contain data to be passed to the data_reader
-  //std::vector<std::vector<unsigned char> > m_my_data;
-
-  /// maps indices wrt shuffled indices to indices in m_my_data
-  //std::unordered_map<size_t, size_t> m_my_data_hash;
-
-  MPI_Win m_win;
-};
-
-}  // namespace lbann
-
-#endif  // __DATA_STORE_MERGE_SAMPLES_HPP__
diff --git a/include/lbann/data_store/data_store_multi_images.hpp b/include/lbann/data_store/data_store_multi_images.hpp
deleted file mode 100644
index dc238db3e92..00000000000
--- a/include/lbann/data_store/data_store_multi_images.hpp
+++ /dev/null
@@ -1,79 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef __DATA_STORE_MULTI_IMAGES_HPP__
-#define __DATA_STORE_MULTI_IMAGES_HPP__
-
-#include "lbann/data_store/data_store_imagenet.hpp"
-
-namespace lbann {
-
-/**
- * todo
- */
-
-class data_store_multi_images : public data_store_imagenet {
- public:
-
-  //! ctor
-  data_store_multi_images(generic_data_reader *reader, model *m) :
-    data_store_imagenet(reader, m) {
-    set_name("data_store_multi_images");
-  }
-
-  //! copy ctor
-  data_store_multi_images(const data_store_multi_images&) = default;
-
-  //! operator=
-  data_store_multi_images& operator=(const data_store_multi_images&) = default;
-
-  data_store_multi_images * copy() const override { return new data_store_multi_images(*this); }
-
-  //! dtor
-  ~data_store_multi_images() override {};
-
-  void setup() override;
-
- protected :
-
-  void get_file_sizes() override;
-
-  void read_files() override;
-  void read_files(const std::unordered_set<int> &indices) override;
-
-
-  virtual std::vector<std::string> get_sample(size_t idx) const;
-
-  /// for use during development and testing
-  void extended_testing() override;
-
-  void build_data_filepaths() override;
-};
-
-}  // namespace lbann
-
-#endif  // __DATA_STORE_MULTI_IMAGES_HPP__
diff --git a/include/lbann/data_store/data_store_pilot2_molecular.hpp b/include/lbann/data_store/data_store_pilot2_molecular.hpp
deleted file mode 100644
index cc7a5172f78..00000000000
--- a/include/lbann/data_store/data_store_pilot2_molecular.hpp
+++ /dev/null
@@ -1,132 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef __DATA_STORE_PILOT2_MOLECULAR_HPP__
-#define __DATA_STORE_PILOT2_MOLECULAR_HPP__
-
-#include "lbann/data_store/generic_data_store.hpp"
-#include <unordered_map>
-
-namespace lbann {
-
-class pilot2_molecular_reader;
-class data_store_merge_samples;
-
-/**
- * todo
- */
-
-class data_store_pilot2_molecular : public generic_data_store {
- public:
-
-  //! ctor
-  data_store_pilot2_molecular(generic_data_reader *reader, model *m);
-
-  //! copy ctor
-  data_store_pilot2_molecular(const data_store_pilot2_molecular&) = default;
-
-  //! operator=
-  data_store_pilot2_molecular& operator=(const data_store_pilot2_molecular&) = default;
-
-  data_store_pilot2_molecular * copy() const override { return new data_store_pilot2_molecular(*this); }
-
-  //! dtor
-  ~data_store_pilot2_molecular() override;
-
-  using generic_data_store::get_data_buf;
-  void get_data_buf(int data_id, int tid, std::vector<double> *&buf) override; 
-
-  void setup() override;
-
-  /// needed to support data_reader_merge_samples (compound reader)
-  void clear_minibatch_indices() {
-    m_my_minibatch_indices_v.clear();
-  }
-
-  /// needed to support data_reader_merge_samples (compound reader)
-  void add_minibatch_index(int idx) {
-    m_my_minibatch_indices_v.push_back(idx);
-  }
-
-  /// needed to support data_reader_merge_samples (compound reader)
-  void set_no_shuffle() {
-    m_shuffle = false;
-  }
-
- protected :
-
-   friend data_store_merge_samples;
-
-   pilot2_molecular_reader *m_pilot2_reader;
-
-  /// fills in m_data 
-  void construct_data_store();
-  /// the data store. Note that this will break if word size = 4;
-  /// only meaningful on the owning processor
-  std::unordered_map<int, std::vector<double>> m_data;
-  /// called by construct_data_store()
-  void fill_in_data(
-    const int data_id, 
-    const int num_samples_per_frame, 
-    const int num_features, 
-    double *features);
-
-  /// maps: a shuffled index to the corresponding molecule's neighbors' indices
-  std::unordered_map<int, std::vector<int> > m_neighbors;
-  /// fills in m_neighbors
-  void build_nabor_map();
-
-  /// fills in m_my_molecules using non-blocking MPI send/recv
-  void exchange_data() override;
-
-  /// contains the data of all molecules required by this processor
-  /// to execute one epoch. Maps: molecule data_id to set of neighbors (including
-  /// self: data_id); this is the set of molecules required in one call
-  /// to fetch_datum by the data reader
-  std::unordered_map<int, std::vector<double>> m_my_molecules;
-
-  /// returns, in 's,' the set of molecules required for processor 'p'
-  /// for the next epoch
-  void get_required_molecules(std::unordered_set<int> &s, int p);
-
-  /// the buffers that will be passed to data_readers::fetch_datum
-  std::vector<std::vector<double> > m_data_buffer;
-
-  /// the process that "owns" the data, i.e, this is the only process
-  /// whose m_reader will load data from disk
-  int m_owner_rank;
-
-  /// true if this processor "owns" the data
-  bool m_owner;
-
-  /// support for data_store_merge_samples
-  bool m_shuffle;
-};
-
-}  // namespace lbann
-
-#endif  // __DATA_STORE_PILOT2_MOLECULAR_HPP__
diff --git a/include/lbann/data_store/generic_data_store.hpp b/include/lbann/data_store/generic_data_store.hpp
deleted file mode 100644
index a26b5004861..00000000000
--- a/include/lbann/data_store/generic_data_store.hpp
+++ /dev/null
@@ -1,284 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef __GENERIC_DATA_STORE_HPP__
-#define __GENERIC_DATA_STORE_HPP__
-
-
-#include "lbann/base.hpp"
-#include "lbann/comm.hpp"
-#include <vector>
-#include <unordered_set>
-#include <unordered_map>
-
-namespace lbann {
-
-class generic_data_reader;
-class lbann_comm;
-class model;
-
-/**
- * todo
- */
-
-class generic_data_store {
- public:
-
-  //! ctor
-  generic_data_store(generic_data_reader *reader, model *m);
-
-  //! copy ctor
-  generic_data_store(const generic_data_store&) = default;
-
-  //! operator=
-  generic_data_store& operator=(const generic_data_store&) = default;
-
-  //! dtor
-  virtual ~generic_data_store() {}
-
-  virtual generic_data_store * copy() const = 0;
-
-  /// called by generic_data_reader::setup_data_store
-  virtual void setup();
-
-  /// called by generic_data_reader::update;
-  /// this method calls exchange_data if m_epoch > 1
-  virtual void set_shuffled_indices(const std::vector<int> *indices, bool exchange_indices = true);
-
-  /// called by various image data readers
-  virtual void get_data_buf(int data_id, std::vector<unsigned char> *&buf, int multi_idx = 0) {}
-  virtual void get_data_buf(int data_id, int tid, std::vector<double> *&buf) {}
-
-  virtual void get_data_buf_DataType(int data_id, std::vector<DataType> *&buf) {}
-
-  const std::string & get_name() const {
-    return m_name;
-  }
-
-  void set_name(std::string name) {
-    m_name = name;
-  }
-
-  void set_is_subsidiary_store() {
-    m_is_subsidiary_store = true;
-  }
-
-  bool is_subsidiary_store() const {
-    return m_is_subsidiary_store;
-  }
-
-  const std::vector<std::vector<int> > * get_minibatch_indices() const {
-    return m_my_minibatch_indices;
-  }
-
-  void set_minibatch_indices(const std::vector<std::vector<int> > *indices) {
-    m_my_minibatch_indices = indices;
-  }
-
-  //@todo: for optimization, change m_my_minibatch_indices_v to a pointer,
-  //       and properly handle ownership and destruction; this is needed
-  //       to reduce memory requirements in, e.g, data_store_merge_features
-  const std::vector<int>  & get_minibatch_indices_v() const {
-    return m_my_minibatch_indices_v;
-  }
-
-  void set_minibatch_indices_v(const std::vector<int > &indices) {
-    m_my_minibatch_indices_v = indices;
-  }
-
-  //@todo: for optimization, change m_my_minibatch_indices_v to a pointer,
-  //       and properly handle ownership and destruction; this is needed
-  //       to reduce memory requirements in, e.g, data_store_merge_features
-  const std::unordered_set<int> & get_datastore_indices() const {
-    return m_my_datastore_indices;
-  }
-
-  void set_datastore_indices(const std::unordered_set<int> &indices) {
-    m_my_datastore_indices = indices;
-  }
-
-  const std::vector<std::vector<int>> & get_all_minibatch_indices() const {
-    return m_all_minibatch_indices;
-  }
-
-  //@todo: for optimization, change m_all_minibatch_indices to a pointer,
-  //       and properly handle ownership and destruction; this is needed
-  //       to reduce memory requirements in, e.g, data_store_merge_features
-  void set_all_minibatch_indices(const std::vector<std::vector<int>> &indices) {
-    m_all_minibatch_indices = indices;
-  }
-
-  /// supports out-of-memory-mode
-  virtual void fetch_data() {}
-
-  void init_minibatch();
-
-  virtual void exchange_mini_batch_data(size_t current_pos, size_t mb_size) {};
-  virtual void setup_data_store_buffers() {};
-protected :
-
-  // number of times set_shuffled_indices was called. This is
-  // a hack to get data_store_jag working correctly
-  int m_n;
-
-  virtual void exchange_data() = 0;
-
-  generic_data_reader *m_reader;
-
-  lbann_comm *m_comm;
-
-  std::string m_name;
-
-  /// returns the number of bytes in dir/fn; it's OK if dir = ""
-  size_t get_file_size(std::string dir, std::string fn);
-
-  /// number of indices that m_reader owns (in a global sense);
-  /// equal to m_shuffled_indices->size()
-  size_t m_num_global_indices;
-
-  void set_num_global_indices() {
-    m_num_global_indices = m_shuffled_indices->size();
-  }
-
-  /// the indices that will be used locally; the inner j-th vector
-  /// contains indices referenced during the j-th call to
-  /// generic_data_reader::fetch_data(...)
-  const std::vector<std::vector<int> > *m_my_minibatch_indices;
-  /// contains a concatenation of the indices in m_my_minibatch_indices
-  ///@todo: for optimization, this should be a pointer -- as it is now,
-  ///       in merge_features the vector must be copied to the subsidiary
-  ///       data_store_cvs
-  std::vector<int> m_my_minibatch_indices_v;
-  /// fills in m_my_minibatch_indices_v
-  void get_minibatch_index_vector();
-
-  /// m_mb_counts[j] contains the number of indices
-  /// passed to data_reader::fetch_data in one epoch
-  std::vector<int> m_mb_counts;
-  /// fills in m_mb_counts
-  void exchange_mb_counts();
-
-  /// m_all_minibatch_indices[j] will contain all indices that
-  /// will be passed to data_reader::fetch_data in one epoch,
-  /// for all processors
-  std::vector<std::vector<int>> m_all_minibatch_indices;
-  /// fills in m_all_minibatch_indices
-  void  exchange_mb_indices();
-
-  /// the indices that this processor owns;
-  std::unordered_set<int> m_my_datastore_indices;
-  /// fills in m_my_datastore_indices
-  void get_my_datastore_indices();
-  /// fills in m_my_datastore_indices; this call is used when creating tarballs
-  /// for pre-staging data
-  void get_my_tarball_indices();
-
-  size_t m_num_readers;
-
-  /// this processor's rank
-  int  m_rank;
-
-  /// number of procs in the model
-  int  m_np;
-
-  bool m_in_memory;
-
-  bool m_master;
-
-  const std::vector<int> *m_shuffled_indices;
-
-  model *m_model;
-
-  /// base directory for data
-  std::string m_dir;
-
-  /// conduct extensive testing
-  bool m_extended_testing;
-
-  /// returns the processor that owns the data associated
-  /// with the index
-  int get_index_owner(int idx);
-
-  /// maps an index to the processor that owns the associated data
-  std::unordered_map<int, int> m_owner;
-
-  /// fills in m_owner
-  virtual void build_index_owner();
-
-  /// mostly for use during development and debugging
-  virtual void extended_testing() {}
-
-  MPI_Comm m_mpi_comm;
-
-  /// as of now, only applicable to merge_features and merge_samples
-  bool m_is_subsidiary_store;
-
-  /// maps an index from m_my_datastore_indices to a filepath
-  /// for use in out-of-memory mode
-  std::unordered_map<int, std::string> m_data_filepaths;
-  /// fills in m_data_filepaths
-  virtual void build_data_filepaths() {std::cerr << "shouldn't be here!\n";}
-
-  /// outer vector size is m_np; m_all_partitioned_indices[i]
-  /// contains m_my_minibatch_indices from P_i
-  std::vector<std::vector<std::vector<int>>> m_all_partitioned_indices;
-  /// supports out-of-memory-mode;
-  /// all-to-all exchange of m_my_minibatch_indices;
-  /// fills in m_all_partitioned_indices
-  void exchange_partitioned_indices();
-  /// size of the largest middle vector in m_all_partitioned_indices;
-  /// this should be the number of minibatches in an epoch
-  size_t m_num_minibatches;
-
-  /// for debugging during development
-  void print_partitioned_indices();
-
-  /// supports out-of-memory mode; this is the current
-  /// minibatch that is read into memory
-  size_t m_cur_minibatch;
-
-  bool m_is_setup;
-  bool m_verbose;
-
-  /// given a pathname: someplace/[someplace_else/...]/prefix
-  /// returns <prefix,pathname>' throws exception if 's' does not contain
-  /// at least one directory name, or 's' end with //'
-  std::pair<std::string, std::string> get_pathname_and_prefix(std::string s);
-
-  /// created the directory structure specified in 's', if it doesn't exist;
-  /// 's' may optionally end in '/'
-  void create_dirs(std::string s);
-
-  /// runs a system command, and returns the output;
-  /// if exit_on_error=true, and the value returned by the system
-  /// call is other than the empty string, then an exception is thrown
-  std::string run_cmd(std::string s, bool exit_on_error = true);
-};
-
-}  // namespace lbann
-
-#endif  // __GENERIC_DATA_STORE_HPP__
diff --git a/include/lbann/data_store/jag_io.hpp b/include/lbann/data_store/jag_io.hpp
deleted file mode 100644
index e3947f2d27b..00000000000
--- a/include/lbann/data_store/jag_io.hpp
+++ /dev/null
@@ -1,193 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef __JAG_IO_HPP__
-#define __JAG_IO_HPP__
-
-#include "lbann_config.hpp"
-
-#ifdef LBANN_HAS_CONDUIT
-#include "conduit/conduit.hpp"
-#include "conduit/conduit_relay.hpp"
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-
-namespace lbann {
-
-class jag_io {
- public:
-
- using TypeID = conduit::DataType::TypeID;
-
- /**
-  * NOTE: some methods below take variables "string node_name" while others
-  *       take "string key." My convention is that "node_name" indicated
-  *       a fully qualified name, i.e, it begins with the sample id:
-  *          0/field_1/field_2
-  *       on the other hand, "key" does not contain the sample id:
-  *          field_1/field_2
-  */
-
-/// WARNING! CAUTION! BE ADVISED!
-/// cut-n-paste from data_reader_jag_conduit; this is
-/// fragile -- probably best to but these in a small
-/// file that both reader and store can then include
-/// @todo
-using ch_t = double; ///< jag output image channel type
-using scalar_t = double; ///< jag scalar output type
-using input_t = double; ///< jag input parameter type
-
-  //! ctor
-  jag_io();
-
-  //! copy ctor
-  jag_io(const jag_io&) = default;
-
-  //! operator=
-  jag_io& operator=(const jag_io&) = default;
-
-  jag_io * copy() const { return new jag_io(*this); }
-
-  //! dtor
-  ~jag_io();
-
-  /// converts conduit data to our format and saves to disk
-  void convert(std::string conduit_pathname, std::string base_dir);
-
-  /// load our format from disk
-  void load(std::string base_dir);
-
-  /// returns the set of the child nodes of the parent node
-  /// @todo not currently used; may not be needed
-  const std::unordered_set<std::string> &get_children(std::string parent) const;
-
-  //const std::vector<jag_io::scalar_t> & get_scalars(size_t sample_id) const; 
-
-  /// Returns size and data type information for the requested node. 
-  /// 'total_bytes_out' is num_elts_out * bytes_per_elt_out; 
-  void get_metadata(std::string node_name, size_t &num_elts_out, size_t &bytes_per_elt_out, size_t &total_bytes_out, conduit::DataType::TypeID &type_out);
-
-  /// Reads the requested data from file and returns it in 'data_out.'
-  /// The caller is responsible for allocating sufficient memory,
-  /// i.e, they should previously have called get_metadata(...), then
-  /// allocated memory, i.e, std::vector<char> d(total_bytes_out);
-  void get_data(std::string node_name, char * data_out, size_t num_bytes);
-
-  /// returns true if the key exists in the metadata map
-  bool has_key(std::string key) const;
-
-  const std::vector<std::string>& get_scalar_choices() const;
-
-  const std::vector<std::string>& get_input_choices() const;
-
-  size_t get_num_samples() const {
-    return m_num_samples;
-  }
-
-  /// this method is provided for testing and debugging
-  size_t get_offset(std::string node_name);
-
-  /// this method is provided for testing and debugging
-  const std::vector<std::string> &get_keys() const {
-    return m_keys;
-  }
-
-  /// this method is provided for testing and debugging
-  void print_metadata();
-
-protected :
-
-  struct MetaData {
-    MetaData() {}
-    MetaData(TypeID tp, int elts, int bytes, size_t _offset = 0)
-      : dType(tp), num_elts(elts), num_bytes(bytes), offset(_offset) {}
-
-    conduit::DataType::TypeID dType;
-    int         num_elts;  //number of elements in this field
-    int         num_bytes; //number of bytes for a single element
-    size_t      offset;  //offset wrt m_data: where this resides on disk
-  };
-
-  size_t m_num_samples;
-
-  /// used when reading converted data from file; 
-  std::ifstream *m_data_stream;
-
-  /// recursive function invoked by convert();
-  /// fills in m_keys and m_parent_to_children
-  void get_hierarchy(
-      conduit::Node &head,
-      std::string parent_name);
-
-  /// maps parent node_named to child node_names
-  std::unordered_map<std::string, std::unordered_set<std::string>> m_parent_to_children;
-
-  /// contains the same keys that appear in m_metadata; saving them 
-  /// separately so we can iterate through in the order they appeared
-  std::vector<std::string> m_keys;
-
-  
-  ///@todo this may go away ...
-  //std::unordered_map<std::string, std::string> m_data_reader;
-
-  std::unordered_map<std::string, MetaData> m_metadata;
-
-  /// number of bytes required to store each sample on disk in our format
-  size_t m_sample_offset;
-
-  //std::vector<std::string> m_scalar_keys;
-
-  std::vector<std::string> m_input_keys;
-
-  /// some conduit keys contain white space, which is annoying to parse,
-  /// so internally we convert them to underscores
-  void white_space_to_underscore(std::string &s) {
-    for (size_t j=0; j<s.size(); j++) {
-      if (s[j] == ' ') {
-        s[j] = '_';
-      }
-    }
-  }
-
-  /// returns the node_name with the sample_id removed;
-  /// also checks that the key exists in m_metadata
-  std::string get_metadata_key(std::string node_name) const;
-
-  /// checks that 'key' is a valid key in the m_metadata map;
-  /// if not, throws an exception
-  void key_exists(std::string key) const;
-
-  /// returns the sample ID
-  size_t get_sample_id(std::string node_name) const;
-};
-
-}  // namespace lbann
-
-#endif //ifndef LBANN_HAS_CONDUIT ... else
-
-#endif  // __JAG_IO_HPP__
diff --git a/include/lbann/data_store/jag_store.hpp b/include/lbann/data_store/jag_store.hpp
deleted file mode 100644
index da3770c320b..00000000000
--- a/include/lbann/data_store/jag_store.hpp
+++ /dev/null
@@ -1,264 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef _JAG_STORE_HPP__
-#define _JAG_STORE_HPP__
-
-#include "lbann_config.hpp" 
-
-#ifdef LBANN_HAS_CONDUIT
-
-#include "lbann/utils/timer.hpp"
-#include "conduit/conduit.hpp"
-#include "conduit/conduit_relay.hpp"
-#include "lbann/data_readers/data_reader_jag_conduit_hdf5.hpp"
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <vector>
-#include "lbann/comm.hpp"
-#include "hdf5.h"
-
-namespace lbann {
-
-class data_reader_jag_conduit_hdf5;
-
-/**
- * Loads the pairs of JAG simulation inputs and results from a conduit-wrapped hdf5 file
- */
-class jag_store {
- public:
-
-  #define METADATA_FN "metadata.txt"
-  #define IMAGE_SIZE_PER_CHANNEL 4096
-  #define NUM_IMAGE_CHANNELS 4
-  #define MAX_SAMPLES_PER_BINARY_FILE 1000
-  //#define MAX_SAMPLES_PER_BINARY_FILE 10000
-  #define BINARY_FILE_BASENAME "converted"
-  #define FILES_PER_DIR 1000
-
-  jag_store();
-
-  jag_store(const jag_store&) = default;
-
-  jag_store& operator=(const jag_store&) = default;
-
-  ~jag_store() {}
-
-  void set_comm(lbann_comm *comm) {
-    m_comm = comm;
-    m_num_procs_in_world = m_comm->get_procs_in_world();
-    m_rank_in_world = m_comm->get_rank_in_world();
-  }
-
-  /// Returns the requested inputs
-  const std::vector<data_reader_jag_conduit_hdf5::input_t> & fetch_inputs(size_t sample_id, size_t tid) const {
-    check_sample_id(sample_id);
-    return m_data_inputs[tid];
-  }
-
-  /// Returns the requested scalars
-  const std::vector<data_reader_jag_conduit_hdf5::scalar_t> & fetch_scalars (size_t sample_id, size_t tid) const {
-    check_sample_id(sample_id);
-    return m_data_scalars[tid];
-  }
-
-  /// Returns the requested images
-  const std::vector<std::vector<data_reader_jag_conduit_hdf5::ch_t>> & fetch_views(size_t sample_id, size_t tid) {
-    check_sample_id(sample_id);
-    return m_data_images[tid];
-  }
-
-  void setup(data_reader_jag_conduit_hdf5 *reader,
-             bool num_stores = 1,
-             int my_rank = 0);
-
-  void set_image_size(size_t n) { m_image_size = n; }
-
-  size_t get_linearized_data_size() const;
-  size_t get_linearized_image_size() const { return 4096*4; }
-  //size_t get_linearized_image_size() const { return m_image_size; }
-  size_t get_linearized_channel_size() const { return IMAGE_SIZE_PER_CHANNEL; }
-
-  /// returns the total number of channels in a view (image)
-  /// Note: probably should be deleted, since we can chose which
-  ///       channels to use
-  //size_t get_num_channels() const { return NUM_IMAGE_CHANNELS; }
-  size_t get_linearized_scalar_size() const { return m_scalars_to_use.size(); }
-  size_t get_linearized_input_size() const { return m_inputs_to_use.size(); }
-
-  /// returns the number of views (images) that we're actually using
-  /// (so currently may be 0, 1, 2, or 3)
-  size_t get_num_img_srcs() const { return m_image_views_to_use.size(); }
-
-  /// returns the number of channels that we're actually using per view,
-  /// i.e, may be 1, 2, 3, or 4
-  size_t get_num_channels_per_view() const { return m_image_channels_to_use.size(); }
-
-  /// returns the number channels that we're actually using, * num_views
-  size_t get_total_num_channels() const { return get_num_img_srcs() * get_num_channels_per_view(); }
-
-  const std::vector<size_t> & get_linearized_data_sizes() const { return m_data_sizes; }
-
-  bool check_sample_id(const size_t sample_id) const { return sample_id < m_num_samples; }
-
-  size_t get_num_samples() const { return m_num_samples; }
-
-  void load_data(int data_id, int tid) {
-    check_sample_id(data_id);
-    if (m_mode == 1) {
-      load_data_conduit(data_id, tid);
-    } else if (m_mode == 2) {
-      load_data_binary(data_id, tid);
-    }
-  }
-
- private:
-
-  /// one of these is called by load_data()
-  void load_data_conduit(int data_id, int tid);
-  void load_data_binary(int data_id, int tid);
-
-  size_t m_image_size;
-
-  size_t m_num_samples;
-
-  lbann_comm *m_comm;
-
-  int m_num_procs_in_world;
-
-  int m_rank_in_world;
-
-  bool m_master;
-
-  data_reader_jag_conduit_hdf5 *m_reader;
-
-  /// next three will contain the actual sample data;
-  /// they are filled in by one of the load_data_XX methods;
-  /// each thread has a separate set of buffers
-  std::vector<std::vector<data_reader_jag_conduit_hdf5::input_t>> m_data_inputs;
-  std::vector<std::vector<data_reader_jag_conduit_hdf5::scalar_t>> m_data_scalars;
-  std::vector<std::vector<std::vector<data_reader_jag_conduit_hdf5::ch_t>>> m_data_images;
-
-  /// next four are called by setup()
-  void build_data_sizes();
-  void load_variable_names();
-  void report_linearized_sizes();
-  void allocate_memory(); 
-
-  /// these hold the names of the dependent and independant variables
-  /// that we're using
-  std::vector<std::string> m_inputs_to_use;
-  std::vector<std::string> m_scalars_to_use;
-  std::vector<std::string> m_image_views_to_use;
-  std::vector<int> m_image_channels_to_use;
-
-  /// these fill in the above four variables;
-  /// they are called by load_variable_names()
-  void load_inputs_to_use(const std::string &keys);
-  void load_scalars_to_use(const std::string &keys);
-  void load_image_views_to_use(const std::string &keys);
-  void load_image_channels_to_use(const std::string &keys);
-
-  std::vector<size_t> m_data_sizes;
-
-  void check_entry(std::string &e) {
-    if (m_key_map.find(e) == m_key_map.end()) {
-      throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: m_key_map is missing entry: " + e);
-    }
-  }
-
-  /// one of the next three methods is called by setup(), depending 
-  /// on the value of --mode=<int>
-  int m_mode;
-  void setup_conduit();  // mode = 1
-  void setup_binary();   // mode = 2
-  void setup_testing();  // mode = 3
-
-  size_t m_max_samples;
-
-  /// next three are used when reading samples from conduit files
-  std::vector<std::string> m_conduit_filenames;
-  std::vector<int> m_data_id_to_conduit_filename_idx;
-  std::vector<std::string> m_data_id_to_sample_id;
-
-
-  // these are used when reading samples from binary formatted files
-  std::vector<std::vector<unsigned char>> m_scratch;
-  std::unordered_map<std::string, size_t> m_key_map;
-  // maps a shuffled index to <file_idx, local_idx>
-  std::unordered_map<int, std::pair<int, int>> m_sample_map;
-  std::unordered_map<std::string, int> m_sample_id_to_global_idx;
-  std::vector<std::string> m_binary_filenames;
-  // maps global idx (i.e: shuffled indices subscript) to sample ID 
-  // (e.g: 0.9.99.57:1)
-  std::unordered_map<int, std::string> m_sample_id_map;
-  size_t m_sample_len;
-  std::vector<std::vector<std::ifstream*>> m_streams;
-  void read_key_map(const std::string &filename); 
-
-  /// methods and variables for dealing with normalization
-  void load_normalization_values();
-  void load_normalization_values_impl(
-      std::vector<std::pair<double, double>> &values,
-      const std::vector<std::string> &variables); 
-
-  std::vector<std::pair<double, double>> m_normalize_inputs;
-  std::vector<std::pair<double, double>> m_normalize_scalars;
-  std::vector<std::pair<double, double>> m_normalize_views;
-
-  // magic numbers (from Rushil); these are for normalizing the images
-  // 0.035550589898738466
-  // 0.0012234476453273034
-  // 1.0744965260584181e-05
-  // 2.29319120949361e-07
-
-  // testing and other special methods: if these are invoked something 
-  // special happens, the the code exits; in the case a model is not run
-  void compute_min_max();
-  void compute_bandwidth();
-  void build_conduit_index(const std::vector<std::string> &filenames);
-  void compute_bandwidth_binary();
-  void convert_conduit_to_binary(const std::vector<std::string> &filenames);
-  void test_converted_files();
-
-  /// functions and variables for converting conduit files to a binary format;
-  /// these are used by convert_conduit_to_binary
-  void write_binary_metadata(std::string dir); 
-  void write_binary(const std::vector<std::string> &input, const std::string &dir); 
-  std::ofstream m_name_file;
-  size_t m_global_file_idx;
-  size_t m_num_converted_samples;
-  void open_binary_file_for_output(const std::string &dir);
-  std::ofstream m_binary_output_file;
-  std::ofstream m_binary_output_file_names;
-  std::string m_binary_output_filename;
-};
-
-} // end of namespace lbann
-#endif //ifdef LBANN_HAS_CONDUIT
-
-#endif // _JAG_STORE_HPP__
diff --git a/include/lbann/detect_El_mpi.hpp b/include/lbann/detect_El_mpi.hpp
index a566c16e621..d556f5f91d0 100644
--- a/include/lbann/detect_El_mpi.hpp
+++ b/include/lbann/detect_El_mpi.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -84,7 +84,7 @@ struct is_instantiated_El_mpi_type<
  */
 template<bool B, class T = void>
 struct interpret_as_byte_if_needed {
-  using type = El::byte; 
+  using type = El::byte;
 };
 
 /// Use type T as is if Elemental has instantiated MPI wrappers for type T.
diff --git a/include/lbann/io/data_buffers/generic_io_buffer.hpp b/include/lbann/io/data_buffers/generic_io_buffer.hpp
index 2f25c9526d5..1f0ebc807de 100644
--- a/include/lbann/io/data_buffers/generic_io_buffer.hpp
+++ b/include/lbann/io/data_buffers/generic_io_buffer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/io/data_buffers/partitioned_io_buffer.hpp b/include/lbann/io/data_buffers/partitioned_io_buffer.hpp
index e3fcdad1bf2..13a4a23f8b2 100644
--- a/include/lbann/io/data_buffers/partitioned_io_buffer.hpp
+++ b/include/lbann/io/data_buffers/partitioned_io_buffer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -59,7 +59,7 @@ class data_buffer {
     m_input_buffers.clear();
     m_input_buffers.reserve(other.m_input_buffers.size());
     for (const auto& ptr : other.m_input_buffers) {
-      m_input_buffers.emplace_back(ptr ? nullptr : ptr->Copy());
+      m_input_buffers.emplace_back(ptr ? ptr->Copy() : nullptr);
     }
   }
   data_buffer& operator=(const data_buffer& other) {
@@ -68,7 +68,7 @@ class data_buffer {
     m_input_buffers.clear();
     m_input_buffers.reserve(other.m_input_buffers.size());
     for (const auto& ptr : other.m_input_buffers) {
-      m_input_buffers.emplace_back(ptr ? nullptr : ptr->Copy());
+      m_input_buffers.emplace_back(ptr ? ptr->Copy() : nullptr);
     }
     return *this;
   }
@@ -85,8 +85,8 @@ class partitioned_io_buffer : public generic_io_buffer {
   partitioned_io_buffer(lbann_comm *comm, int num_parallel_readers, std::map<execution_mode, generic_data_reader *> data_readers, int num_child_layers);
   partitioned_io_buffer(const partitioned_io_buffer& other);
   partitioned_io_buffer& operator=(const partitioned_io_buffer& other);
-  ~partitioned_io_buffer() = default;
-  partitioned_io_buffer* copy() const override { return new partitioned_io_buffer(*this); }
+  ~partitioned_io_buffer();
+  partitioned_io_buffer* copy() const override;
 
   std::string get_type() const override { return "partitioned"; }
 
diff --git a/include/lbann/io/file_io.hpp b/include/lbann/io/file_io.hpp
index 2d003175f2d..b1d39c3ba11 100644
--- a/include/lbann/io/file_io.hpp
+++ b/include/lbann/io/file_io.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/io/persist.hpp b/include/lbann/io/persist.hpp
index f7c90472625..409dc5ddf89 100644
--- a/include/lbann/io/persist.hpp
+++ b/include/lbann/io/persist.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/activations/activations.hpp b/include/lbann/layers/activations/activations.hpp
index c32de2c9ca9..b36c8d61072 100644
--- a/include/lbann/layers/activations/activations.hpp
+++ b/include/lbann/layers/activations/activations.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/activations/elu.hpp b/include/lbann/layers/activations/elu.hpp
index 2921c5a76a9..52f797488be 100644
--- a/include/lbann/layers/activations/elu.hpp
+++ b/include/lbann/layers/activations/elu.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/activations/identity.hpp b/include/lbann/layers/activations/identity.hpp
index 7d5ea8b14f2..e895ba44b99 100644
--- a/include/lbann/layers/activations/identity.hpp
+++ b/include/lbann/layers/activations/identity.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/activations/leaky_relu.hpp b/include/lbann/layers/activations/leaky_relu.hpp
index 652158b034c..0e576117d3c 100644
--- a/include/lbann/layers/activations/leaky_relu.hpp
+++ b/include/lbann/layers/activations/leaky_relu.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/activations/log_softmax.hpp b/include/lbann/layers/activations/log_softmax.hpp
index b1652cea3c5..136edf89600 100644
--- a/include/lbann/layers/activations/log_softmax.hpp
+++ b/include/lbann/layers/activations/log_softmax.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/activations/softmax.hpp b/include/lbann/layers/activations/softmax.hpp
index 70e3f5d79ca..665323c3c14 100644
--- a/include/lbann/layers/activations/softmax.hpp
+++ b/include/lbann/layers/activations/softmax.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/image/bilinear_resize.hpp b/include/lbann/layers/image/bilinear_resize.hpp
index 322f0c7b51d..2e3e9e9da67 100644
--- a/include/lbann/layers/image/bilinear_resize.hpp
+++ b/include/lbann/layers/image/bilinear_resize.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/io/input/generic_input_layer.hpp b/include/lbann/layers/io/input/generic_input_layer.hpp
index 4b6bc06a393..3dfa79edb79 100644
--- a/include/lbann/layers/io/input/generic_input_layer.hpp
+++ b/include/lbann/layers/io/input/generic_input_layer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -250,10 +250,6 @@ class generic_input_layer : public io_layer {
   void fp_compute() override {
     execution_mode mode = this->m_model->get_execution_mode();
 
-    /// support for data_store out-of-memory mode; this instructs
-    /// the data_store (via the data_reader) to read in the
-    /// next mb from file, then exchange data as needed
-    get_data_reader()->init_minibatch();
     increment_active_buffer_idx(mode);
 
     generic_io_buffer* io_buffer = m_io_buffers[get_active_buffer_idx(mode) % m_io_buffers.size()];
@@ -296,10 +292,7 @@ class generic_input_layer : public io_layer {
         io_buffer->distribute_from_local_matrix(get_data_reader(), mode, get_activations(0), get_activations(1));
       }
     }else {
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "could not fp_compute for I/O layers : encoutered generic_io_buffer type";
-      throw lbann_exception(err.str());
+          LBANN_ERROR("could not fp_compute for I/O layers : encoutered generic_io_buffer type");
     }
 
     m_data_set_processed = io_buffer->update_data_set(get_data_reader(mode), mode);
@@ -346,9 +339,7 @@ class generic_input_layer : public io_layer {
     case execution_mode::testing:
       break;
     default:
-      throw lbann_exception(
-                            std::string{} + __FILE__ + " " + std::to_string(__LINE__) +
-                            " :: generic data distribution: invalid execution phase");
+      LBANN_ERROR("generic data distribution: invalid execution phase");
     }
     return data_reader;
   }
@@ -505,7 +496,7 @@ class generic_input_layer : public io_layer {
       return m_testing_dataset;
       break;
     default:
-      throw lbann_exception("get_dataset: invalid execution mode");
+      LBANN_ERROR("get_dataset: invalid execution mode");
     }
   }
 
@@ -521,7 +512,7 @@ class generic_input_layer : public io_layer {
       return m_testing_dataset;
       break;
     default:
-      throw lbann_exception("get_dataset: invalid execution mode");
+       LBANN_ERROR("get_dataset: invalid execution mode");
     }
   }
 
@@ -617,7 +608,7 @@ class generic_input_layer : public io_layer {
     if ((it != m_data_readers.end()) && it->second) {
       long tmp_data_size = (it->second)->get_linearized_data_size();
       if (linearized_data_size != -1 && linearized_data_size != tmp_data_size) {
-        throw lbann_exception("lbann_io_layer: validation data set size does not "
+        LBANN_ERROR("lbann_io_layer: validation data set size does not "
                               "match the currently established data set size");
       }
     }
@@ -626,7 +617,7 @@ class generic_input_layer : public io_layer {
     if ((it != m_data_readers.end()) && it->second) {
       long tmp_data_size = (it->second)->get_linearized_data_size();
       if (linearized_data_size != -1 && linearized_data_size != tmp_data_size) {
-        throw lbann_exception("lbann_io_layer: testing data set size does not "
+        LBANN_ERROR("lbann_io_layer: testing data set size does not "
                               "match the currently established data set size");
       }
     }
@@ -651,15 +642,14 @@ class generic_input_layer : public io_layer {
     if ((it != m_data_readers.end()) && it->second) {
       long tmp_label_size = (it->second)->get_linearized_label_size();
       if (linearized_label_size != -1 && linearized_label_size != tmp_label_size) {
-        throw lbann_exception("lbann_io_layer: validation label set size does not "
-                              "match the currently established data set size");
+        LBANN_ERROR("lbann_io_layer: validation label set size (" + std::to_string(tmp_label_size) + ") does not match the currently established data set size (" + std::to_string(linearized_label_size) + ")");
       }
     }
     it = m_data_readers.find(execution_mode::testing);
     if ((it != m_data_readers.end()) && it->second) {
       long tmp_label_size = (it->second)->get_linearized_label_size();
       if (linearized_label_size != -1 && linearized_label_size != tmp_label_size) {
-        throw lbann_exception("lbann_io_layer: testing label set size does not "
+        LBANN_ERROR("lbann_io_layer: testing label set size does not "
                               "match the currently established data set size");
       }
     }
@@ -681,7 +671,7 @@ class generic_input_layer : public io_layer {
     if ((it != m_data_readers.end()) && it->second) {
       long tmp_response_size = (it->second)->get_linearized_response_size();
       if (linearized_response_size != -1 && linearized_response_size != tmp_response_size) {
-        throw lbann_exception("lbann_io_layer: validation response set size does not "
+        LBANN_ERROR("lbann_io_layer: validation response set size does not "
                               "match the currently established data set size");
       }
     }
@@ -689,7 +679,7 @@ class generic_input_layer : public io_layer {
     if ((it != m_data_readers.end()) && it->second) {
       long tmp_response_size = (it->second)->get_linearized_response_size();
       if (linearized_response_size != -1 && linearized_response_size != tmp_response_size) {
-        throw lbann_exception("lbann_io_layer: testing response set size does not "
+        LBANN_ERROR("lbann_io_layer: testing response set size does not "
                               "match the currently established data set size");
       }
     }
diff --git a/include/lbann/layers/io/input/input_layer.hpp b/include/lbann/layers/io/input/input_layer.hpp
index 0c1b8a0b8f2..e2c144684b3 100644
--- a/include/lbann/layers/io/input/input_layer.hpp
+++ b/include/lbann/layers/io/input/input_layer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/io/io_layer.hpp b/include/lbann/layers/io/io_layer.hpp
index 7b57606ab44..4f0b22ec529 100644
--- a/include/lbann/layers/io/io_layer.hpp
+++ b/include/lbann/layers/io/io_layer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/layer.hpp b/include/lbann/layers/layer.hpp
index 790c59a4c10..6ed9ecb096b 100644
--- a/include/lbann/layers/layer.hpp
+++ b/include/lbann/layers/layer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/learning/base_convolution.hpp b/include/lbann/layers/learning/base_convolution.hpp
index 7d704a4e8e2..afa3046086b 100644
--- a/include/lbann/layers/learning/base_convolution.hpp
+++ b/include/lbann/layers/learning/base_convolution.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,12 +24,11 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef LBANN_LAYER_BASE_CONVOLUTION_HPP_INCLUDED
-#define LBANN_LAYER_BASE_CONVOLUTION_HPP_INCLUDED
+#ifndef LBANN_LAYERS_LEARNING_BASE_CONVOLUTION_HPP_INCLUDED
+#define LBANN_LAYERS_LEARNING_BASE_CONVOLUTION_HPP_INCLUDED
 
 #include <vector>
 #include <omp.h>
-#include "lbann/layers/learning/learning.hpp"
 #include "lbann/layers/layer.hpp"
 #include "lbann/weights/initializer.hpp"
 #include "lbann/weights/variance_scaling_initializers.hpp"
@@ -43,15 +42,16 @@ namespace lbann {
 
 /** @brief Computation kernels for convolution and deconvolution layers.
  */
-template <El::Device Dev>
-class base_convolution_layer : public learning_layer {
+template <El::Device Device>
+class base_convolution_layer : public Layer {
 
 protected:
 
-  /** Convolution kernel dimensions. */
-  std::vector<int> m_kernel_dims;
-  /** Size of convolutional kernel. */
-  int m_kernel_size;
+  int m_output_channels;
+  /** @brief Spatial dimensions for convolution kernel.
+   *  @details Excludes number of input and output channels.
+   */
+  std::vector<int> m_conv_dims;
   /** Convolution padding. */
   std::vector<int> m_pads;
   /** Convolution strides. */
@@ -63,124 +63,70 @@ class base_convolution_layer : public learning_layer {
    *  convolution. The default convolution operation has one group, and a
    *  depthwise convolution has as many groups as there are input channels.
    */
-  int m_num_groups;
+  int m_groups;
 
   /** Scaling factor for bias term.
    *  If the scaling factor is zero, bias is not applied.
    */
   DataType m_bias_scaling_factor;
 
-  /** Convolutional kernel gradient.
-   *  This is this layer's contribution to the objective function
-   *  gradient w.r.t. the convolutional kernel weights.
-   */
-  StarMat<Dev> m_kernel_gradient;
-  /** Bias gradient.
-   *  This is this layer's contribution to the objective function
-   *  gradient w.r.t. the bias weights.
-   */
-  StarMat<Dev> m_bias_gradient;
-
 #ifdef LBANN_HAS_CUDNN
 
   /** Convolution kernel cuDNN descriptor. */
-  cudnnFilterDescriptor_t m_kernel_cudnn_desc;
+  cudnnFilterDescriptor_t m_kernel_cudnn_desc = nullptr;
   /** Convolution cuDNN descriptor. */
-  cudnnConvolutionDescriptor_t m_convolution_cudnn_desc;
+  cudnnConvolutionDescriptor_t m_convolution_cudnn_desc = nullptr;
   /** Bias tensor cuDNN descriptor. */
-  cudnnTensorDescriptor_t m_bias_cudnn_desc;
+  cudnnTensorDescriptor_t m_bias_cudnn_desc = nullptr;
   /** Tensor cuDNN descriptors. */
   cudnn::data_parallel_layer_tensor_manager m_tensors_cudnn_desc;
+  /** Forward algorithm cache (mini-batch size -> algo). */
+  std::unordered_map<int, cudnnConvolutionFwdAlgo_t> m_fwd_cudnn_algos;
+  /** Backward data algorithm cache (mini-batch size -> algo). */
+  std::unordered_map<int, cudnnConvolutionBwdDataAlgo_t> m_bwd_data_cudnn_algos;
+  /** Backward filter algorithm cache (mini-batch size -> algo). */
+  std::unordered_map<int, cudnnConvolutionBwdFilterAlgo_t> m_bwd_filter_cudnn_algos;
 
 #endif // LBANN_HAS_CUDNN
 
 public:
-
-  base_convolution_layer(lbann_comm *comm,
+  /** @todo Remove num_data_dims from arg list */
+  base_convolution_layer(lbann_comm* comm,
                          int num_data_dims,
-                         int num_output_channels,
-                         const std::vector<int> conv_dims,
-                         const std::vector<int> pads,
-                         const std::vector<int> strides,
-                         const std::vector<int> dilations,
+                         int output_channels,
+                         std::vector<int> conv_dims,
+                         std::vector<int> pads,
+                         std::vector<int> strides,
+                         std::vector<int> dilations,
                          int groups,
                          bool has_bias)
-    : learning_layer(comm),
-      m_kernel_dims(conv_dims),
-      m_kernel_size(0),
-      m_pads(pads),
-      m_strides(strides),
-      m_dilations(dilations),
-      m_num_groups(groups),
-      m_bias_scaling_factor(has_bias ? DataType(1) : DataType(0)),
-      m_kernel_gradient(this->m_comm->get_trainer_grid()),
-      m_bias_gradient(this->m_comm->get_trainer_grid())
+    : Layer(comm),
+      m_output_channels(output_channels),
+      m_conv_dims(std::move(conv_dims)),
+      m_pads(std::move(pads)),
+      m_strides(std::move(strides)),
+      m_dilations(std::move(dilations)),
+      m_groups(groups),
+      m_bias_scaling_factor(has_bias ? 1 : 0)
 #ifdef LBANN_HAS_CUDNN
-    , m_kernel_cudnn_desc(nullptr),
-      m_convolution_cudnn_desc(nullptr),
-      m_bias_cudnn_desc(nullptr),
-      m_tensors_cudnn_desc(this)
+    , m_tensors_cudnn_desc(this)
 #endif // LBANN_HAS_CUDNN
-  {
-
-    bool nonunit_dilation = false;
-    for (const auto& d : m_dilations) {
-      if (d != 1) {
-        nonunit_dilation = true;
-        break;
-      }
-    }
-    if (Dev == El::Device::CPU && nonunit_dilation) {
-      std::stringstream err;
-      err << "layer \"" << get_name() << "\" "
-          << "has nonunit dilation which is only supported on GPUs";
-      LBANN_ERROR(err.str());
-    }
-    if (Dev == El::Device::CPU && m_num_groups > 1) {
-      std::stringstream err;
-      err << "layer \"" << get_name() << "\" "
-          << "has nonunit groups " << m_num_groups
-          << " which is only supported on GPUs";
-      LBANN_ERROR(err.str());
-    }
-
-    // Check dimensions of convolution parameters
-    if ((int) m_kernel_dims.size() != num_data_dims
-        || (int) m_pads.size() != num_data_dims
-        || (int) m_strides.size() != num_data_dims
-        || (int) m_dilations.size() != num_data_dims) {
-      std::stringstream err;
-      err << "layer \"" << get_name() << "\" "
-          << "has an invalid number of convolution parameters "
-          << "(expected " << num_data_dims << " parameters, "
-          << "conv_dims has " << m_kernel_dims.size() << ", "
-          << "pads has " << m_pads.size() << ", "
-          << "strides has " << m_strides.size() << ", "
-          << "dilations has " << m_dilations.size() << ")";
-      LBANN_ERROR(err.str());
-    }
-
-    // Record number of output channels
-    m_kernel_dims.insert(m_kernel_dims.begin(), num_output_channels);
-
-  }
+  {}
 
   base_convolution_layer(const base_convolution_layer& other)
-    : learning_layer(other),
-      m_kernel_dims(other.m_kernel_dims),
-      m_kernel_size(other.m_kernel_size),
+    : Layer(other),
+      m_output_channels(other.m_output_channels),
+      m_conv_dims(other.m_conv_dims),
       m_pads(other.m_pads),
       m_strides(other.m_strides),
       m_dilations(other.m_dilations),
-      m_num_groups(other.m_num_groups),
-      m_bias_scaling_factor(other.m_bias_scaling_factor),
-      m_kernel_gradient(other.m_kernel_gradient),
-      m_bias_gradient(other.m_bias_gradient)
+      m_groups(other.m_groups),
+      m_bias_scaling_factor(other.m_bias_scaling_factor)
 #ifdef LBANN_HAS_CUDNN
-    , m_kernel_cudnn_desc(nullptr),
-      m_convolution_cudnn_desc(nullptr),
-      m_bias_cudnn_desc(nullptr),
-      m_tensors_cudnn_desc(other.m_tensors_cudnn_desc)
+    , m_tensors_cudnn_desc(other.m_tensors_cudnn_desc),
+      m_fwd_cudnn_algos(other.m_fwd_cudnn_algos),
+      m_bwd_data_cudnn_algos(other.m_bwd_data_cudnn_algos),
+      m_bwd_filter_cudnn_algos(other.m_bwd_filter_cudnn_algos)
 #endif // LBANN_HAS_CUDNN
   {
 #ifdef LBANN_HAS_CUDNN
@@ -188,23 +134,23 @@ class base_convolution_layer : public learning_layer {
                            m_kernel_cudnn_desc);
     copy_convolution_cudnn_desc(other.m_convolution_cudnn_desc,
                                 m_convolution_cudnn_desc);
-    cudnn::copy_tensor_desc(other.m_bias_cudnn_desc,
-                            m_bias_cudnn_desc);
+    if (other.m_bias_scaling_factor != DataType(0)) {
+      cudnn::copy_tensor_desc(other.m_bias_cudnn_desc,
+                              m_bias_cudnn_desc);
+    }
     m_tensors_cudnn_desc.set_layer(this);
 #endif // LBANN_HAS_CUDNN
   }
 
   base_convolution_layer& operator=(const base_convolution_layer& other) {
-    learning_layer::operator=(other);
-    m_kernel_dims = other.m_kernel_dims;
-    m_kernel_size = other.m_kernel_size;
+    Layer::operator=(other);
+    m_output_channels = other.m_output_channels;
+    m_conv_dims = other.m_conv_dims;
     m_pads = other.m_pads;
     m_strides = other.m_strides;
     m_dilations = other.m_dilations;
-    m_num_groups = other.m_num_groups;
+    m_groups = other.m_groups;
     m_bias_scaling_factor = other.m_bias_scaling_factor;
-    m_kernel_gradient = other.m_kernel_gradient;
-    m_bias_gradient = other.m_bias_gradient;
 
 #ifdef LBANN_HAS_CUDNN
     // Copy cuDNN objects
@@ -212,10 +158,15 @@ class base_convolution_layer : public learning_layer {
                            m_kernel_cudnn_desc);
     copy_convolution_cudnn_desc(other.m_convolution_cudnn_desc,
                                 m_convolution_cudnn_desc);
-    cudnn::copy_tensor_desc(other.m_bias_cudnn_desc,
-                            m_bias_cudnn_desc);
+    if (other.m_bias_scaling_factor != DataType(0)) {
+      cudnn::copy_tensor_desc(other.m_bias_cudnn_desc,
+                              m_bias_cudnn_desc);
+    }
     m_tensors_cudnn_desc = other.m_tensors_cudnn_desc;
     m_tensors_cudnn_desc.set_layer(this);
+    m_fwd_cudnn_algos = other.m_fwd_cudnn_algos;
+    m_bwd_data_cudnn_algos = other.m_bwd_data_cudnn_algos;
+    m_bwd_filter_cudnn_algos = other.m_bwd_filter_cudnn_algos;
 #endif // LBANN_HAS_CUDNN
 
     return *this;
@@ -236,14 +187,14 @@ class base_convolution_layer : public learning_layer {
   }
 
   description get_description() const override {
-    auto&& desc = learning_layer::get_description();
-    std::stringstream ss;
+    auto&& desc = Layer::get_description();
+    std::ostringstream ss;
 
     // Convolution dimensions
     ss.str(std::string{});
     ss.clear();
-    for (size_t i = 2; i < m_kernel_dims.size(); ++i) {
-      ss << (i > 2 ? ", " : "" ) << m_kernel_dims[i];
+    for (size_t i = 0; i < m_conv_dims.size(); ++i) {
+      ss << (i > 0 ? ", " : "" ) << m_conv_dims[i];
     }
     desc.add("Convolution dimensions", ss.str());
 
@@ -272,7 +223,7 @@ class base_convolution_layer : public learning_layer {
     desc.add("Dilations", ss.str());
 
     // Groups
-    desc.add("Groups", m_num_groups);
+    desc.add("Groups", m_groups);
 
     // Bias
     ss.str(std::string{});
@@ -286,13 +237,103 @@ class base_convolution_layer : public learning_layer {
 
   }
 
+  void setup_dims() override {
+    Layer::setup_dims();
+    std::ostringstream err;
+
+    // Check number of channels and channel groups
+    const auto& input_dims = get_input_dims();
+    if (m_output_channels < 1) {
+      err << get_type() << " layer \"" << get_name() << "\" "
+          << "has an invalid number of output channels "
+          << "(" << m_output_channels << ")";
+      LBANN_ERROR(err.str());
+    } else if (m_groups < 1) {
+      err << get_type() << " layer \"" << get_name() << "\" "
+          << "has an invalid number of groups (" << m_groups << ")";
+      LBANN_ERROR(err.str());
+    } else if (input_dims[0] % m_groups != 0
+               || m_output_channels % m_groups != 0) {
+      err << get_type() << " layer \"" << get_name() << "\" "
+          << "has " << m_groups << " groups, which does not divide "
+          << "the input channels (" << input_dims[0] << ") or "
+          << "the output channels (" << m_output_channels << ")";
+      LBANN_ERROR(err.str());
+    }
+
+    // Check kernel dims, pads, stride, dilations
+    const auto& num_spatial_dims = input_dims.size() - 1;
+    if (m_conv_dims.size() != num_spatial_dims
+        || std::any_of(m_conv_dims.begin(), m_conv_dims.end(),
+                       [](El::Int d) { return d < 1; })) {
+      err << get_type() << " layer \"" << get_name() << "\" "
+          << "has invalid spatial dimensions for convolution kernel (";
+      if (m_conv_dims.empty()) { err << "no dimensions"; }
+      for (size_t i = 0; i < m_conv_dims.size(); ++i) {
+        err << (i > 0 ? "x" : "") << m_conv_dims[i];
+      }
+      err << ", expected " << num_spatial_dims << " spatial dimensions)";
+      LBANN_ERROR(err.str());
+    } else if (m_pads.size() != num_spatial_dims) {
+      err << get_type() << " layer \"" << get_name() << "\" "
+          << "has invalid convolution pads ((";
+      for (size_t i = 0; i < m_pads.size(); ++i) {
+        err << (i > 0 ? "," : "") << m_pads[i];
+      }
+      err << "), expected " << num_spatial_dims << " spatial dimensions)";
+      LBANN_ERROR(err.str());
+    } else if (m_strides.size() != num_spatial_dims
+               || std::any_of(m_strides.begin(), m_strides.end(),
+                              [](El::Int d) { return d < 1; })) {
+      err << get_type() << " layer \"" << get_name() << "\" "
+          << "has invalid convolution strides ((";
+      for (size_t i = 0; i < m_strides.size(); ++i) {
+        err << (i > 0 ? "," : "") << m_strides[i];
+      }
+      err << "), expected " << num_spatial_dims << " spatial dimensions)";
+      LBANN_ERROR(err.str());
+    } else if (m_dilations.size() != num_spatial_dims
+               || std::any_of(m_dilations.begin(), m_dilations.end(),
+                              [](El::Int d) { return d < 1; })) {
+      err << get_type() << " layer \"" << get_name() << "\" "
+          << "has invalid convolution dilations ((";
+      for (size_t i = 0; i < m_dilations.size(); ++i) {
+        err << (i > 0 ? "," : "") << m_dilations[i];
+      }
+      err << "), expected " << num_spatial_dims << " spatial dimensions)";
+      LBANN_ERROR(err.str());
+    }
+
+    // Make sure that configuration is supported
+    if (Device == El::Device::CPU
+        && std::any_of(m_dilations.begin(), m_dilations.end(),
+                       [](El::Int d) { return d != 1; })) {
+      err << get_type() << " layer \"" << get_name() << "\" "
+          << "has non-unit dilation, which is not yet supported on CPU";
+      LBANN_ERROR(err.str());
+    }
+    if (Device == El::Device::CPU && m_groups != 1) {
+      err << get_type() << " layer \"" << get_name() << "\" "
+          << "has " << m_groups << " groups, "
+          << "but only one group is currently supported on CPU";
+      LBANN_ERROR(err.str());
+    }
+
+  }
+
   /** Setup layer data.
    *  The kernel weights are setup in the convolution and
    *  deconvolution classes. */
   void setup_data() override {
-    learning_layer::setup_data();
+    Layer::setup_data();
+
+    // Tensor dimensions
     const auto& input_dims = get_input_dims();
     const auto& output_dims = get_output_dims();
+    const auto& kernel_dims = get_kernel_dims();
+    const auto& kernel_size = std::accumulate(kernel_dims.begin(),
+                                              kernel_dims.end(),
+                                              1, std::multiplies<int>());
 
     // Initialize default weights if none are provided
     if (this->m_weights.size() > 2) {
@@ -303,7 +344,11 @@ class base_convolution_layer : public learning_layer {
           << "found " << this->m_weights.size() << ")";
       LBANN_ERROR(err.str());
     }
-    this->m_weights.resize(2, nullptr);
+    if (m_bias_scaling_factor != DataType(0)) {
+      this->m_weights.resize(2, nullptr);
+    } else {
+      this->m_weights.resize(1, nullptr);
+    }
     if (this->m_weights[0] == nullptr) {
       auto* w = new weights(get_comm());
       std::unique_ptr<weights_initializer> init(new he_initializer(probability_distribution::gaussian));
@@ -314,41 +359,37 @@ class base_convolution_layer : public learning_layer {
       this->m_weights[0] = w;
       this->m_model->add_weights(w);
     }
-    if (this->m_weights[1] == nullptr) {
-      auto* w = new weights(get_comm());
-      std::unique_ptr<optimizer> opt(m_model->create_optimizer());
-      w->set_name(get_name() + "_bias");
-      w->set_optimizer(opt);
-      this->m_weights[1] = w;
-      this->m_model->add_weights(w);
-    }
     auto& kernel_weights = *this->m_weights[0];
-    auto& bias_weights = *this->m_weights[1];
 
     // Initialize variance scaling initialization
     auto* cast_initializer
       = dynamic_cast<variance_scaling_initializer*>(kernel_weights.get_initializer());
     if (cast_initializer != nullptr) {
-      cast_initializer->set_fan_in(m_kernel_size / output_dims[0]);
-      cast_initializer->set_fan_out(m_kernel_size / input_dims[0]);
+      cast_initializer->set_fan_in(kernel_size / output_dims[0]);
+      cast_initializer->set_fan_out(kernel_size / input_dims[0]);
     }
 
     // Initialize weight matrices
     auto dist = get_prev_activations().DistData();
     dist.colDist = El::STAR;
     dist.rowDist = El::STAR;
-    kernel_weights.set_dims(m_kernel_dims);
+    kernel_weights.set_dims(kernel_dims);
     kernel_weights.set_matrix_distribution(dist);
-    bias_weights.set_dims(output_dims[0]);
-    bias_weights.set_matrix_distribution(dist);
 
-    // Initialize gradients
-    El::Zeros(m_kernel_gradient,
-              kernel_weights.get_matrix_height(),
-              kernel_weights.get_matrix_width());
-    El::Zeros(m_bias_gradient,
-              bias_weights.get_matrix_height(),
-              bias_weights.get_matrix_width());
+    // Set up bias if needed.
+    if (m_bias_scaling_factor != DataType(0)) {
+      if (this->m_weights[1] == nullptr) {
+        auto* w = new weights(get_comm());
+        std::unique_ptr<optimizer> opt(m_model->create_optimizer());
+        w->set_name(get_name() + "_bias");
+        w->set_optimizer(opt);
+        this->m_weights[1] = w;
+        this->m_model->add_weights(w);
+      }
+      auto& bias_weights = *this->m_weights[1];
+      bias_weights.set_dims(output_dims[0]);
+      bias_weights.set_matrix_distribution(dist);
+    }
 
     // Initialize freeze state
     for (auto&& w : this->m_weights) {
@@ -373,20 +414,21 @@ class base_convolution_layer : public learning_layer {
 
   /// Initialize GPU objects
   void setup_gpu() override {
-    learning_layer::setup_gpu();
+    Layer::setup_gpu();
 #ifndef LBANN_HAS_CUDNN
     LBANN_ERROR("cuDNN not detected");
 #else
 
     const auto& output_dims = get_output_dims();
+    const auto& kernel_dims = get_kernel_dims();
 
     // Set kernel descriptor
     CHECK_CUDNN(cudnnCreateFilterDescriptor(&m_kernel_cudnn_desc));
     CHECK_CUDNN(cudnnSetFilterNdDescriptor(m_kernel_cudnn_desc,
                                            cudnn::get_data_type(),
                                            CUDNN_TENSOR_NCHW,
-                                           m_kernel_dims.size(),
-                                           m_kernel_dims.data()));
+                                           kernel_dims.size(),
+                                           kernel_dims.data()));
 
     // Set convolution descriptor
     CHECK_CUDNN(cudnnCreateConvolutionDescriptor(&m_convolution_cudnn_desc));
@@ -398,18 +440,23 @@ class base_convolution_layer : public learning_layer {
                                                 CUDNN_CROSS_CORRELATION,
                                                 cudnn::get_data_type()));
     CHECK_CUDNN(cudnnSetConvolutionGroupCount(m_convolution_cudnn_desc,
-                                              m_num_groups));
+                                              m_groups));
 
     // Set bias tensor descriptor
-    std::vector<int> bias_dims(output_dims.size() + 1, 1);
-    bias_dims[1] = output_dims[0];
-    cudnn::set_tensor_desc(m_bias_cudnn_desc, bias_dims);
+    if (m_bias_scaling_factor != DataType(0)) {
+      std::vector<int> bias_dims(output_dims.size() + 1, 1);
+      bias_dims[1] = output_dims[0];
+      cudnn::set_tensor_desc(m_bias_cudnn_desc, bias_dims);
+    }
 
 #endif // LBANN_HAS_CUDNN
   }
 
 protected:
 
+  /** Dimensions of convolution kernel. */
+  virtual std::vector<int> get_kernel_dims() const = 0;
+
   /** Convolution with cuDNN. */
   void apply_convolution_cudnn(bool during_forward_prop) {
 #ifndef LBANN_HAS_CUDNN
@@ -463,15 +510,11 @@ class base_convolution_layer : public learning_layer {
     // Perform convolution on the GPU
     // Determine convolution algorithm
     cudnnConvolutionFwdAlgo_t convolution_cudnn_algorithm
-      = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
-    CHECK_CUDNN(cudnnGetConvolutionForwardAlgorithm(cudnn::get_handle(),
-                                                    input_desc,
-                                                    m_kernel_cudnn_desc,
-                                                    m_convolution_cudnn_desc,
-                                                    output_desc,
-                                                    CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
-                                                    workspace_size,
-                                                    &convolution_cudnn_algorithm));
+      = get_forward_algo_cudnn(input.Width(), input_desc, input.LockedBuffer(),
+                               m_kernel_cudnn_desc, kernel.LockedBuffer(),
+                               m_convolution_cudnn_desc,
+                               output_desc, output.Buffer(),
+                               workspace_size, workspace.Buffer());
 
     // Apply convolution
     CHECK_CUDNN(cudnnConvolutionForward(cudnn::get_handle(),
@@ -544,21 +587,13 @@ class base_convolution_layer : public learning_layer {
 
     // Perform transposed convolution on the GPU
     // Determine transposed convolution algorithm
-    #ifndef LBANN_DETERMINISTIC
     cudnnConvolutionBwdDataAlgo_t transposed_convolution_cudnn_algorithm
-      = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
-    CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithm(cudnn::get_handle(),
-                                                         m_kernel_cudnn_desc,
-                                                         input_desc,
-                                                         m_convolution_cudnn_desc,
-                                                         output_desc,
-                                                         CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
-                                                         workspace_size,
-                                                         &transposed_convolution_cudnn_algorithm));
-    #else
-    cudnnConvolutionBwdDataAlgo_t transposed_convolution_cudnn_algorithm
-      = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
-    #endif
+      = get_backward_data_algo_cudnn(input.Width(),
+                                     m_kernel_cudnn_desc, kernel.LockedBuffer(),
+                                     input_desc, input.LockedBuffer(),
+                                     m_convolution_cudnn_desc,
+                                     output_desc, output.Buffer(),
+                                     workspace_size, workspace.Buffer());
     // Perform transposed convolution
     CHECK_CUDNN(cudnnConvolutionBackwardData(cudnn::get_handle(),
                                              &one,
@@ -609,8 +644,6 @@ class base_convolution_layer : public learning_layer {
     const auto& local_gradient_wrt_output = get_local_prev_error_signals();
 
     // Useful constants
-    const DataType zero = DataType(0);
-    const DataType one = DataType(1);
     const int effective_mini_batch_size = this->m_model->get_effective_mini_batch_size();
     const bool has_local_data = (local_input.Height() > 0
                                  && local_input.Width() > 0
@@ -618,30 +651,35 @@ class base_convolution_layer : public learning_layer {
                                  && local_gradient_wrt_output.Width() > 0);
 
     // Compute bias gradient
-    optimizer* bias_optimizer = m_weights[1]->get_optimizer();
-    if (bias_optimizer != nullptr && m_bias_scaling_factor != DataType(0)) {
-      if (!has_local_data) {
-        El::Zero(m_bias_gradient);
+    if (m_bias_scaling_factor != DataType(0)
+        && m_weights[1]->get_optimizer() != nullptr) {
+      optimizer* bias_optimizer = m_weights[1]->get_optimizer();
+      DataType dst_scale = DataType(0), gradient_scale = DataType(0);
+      auto& bias_gradient = bias_optimizer->get_gradient_buffer(
+        dst_scale, gradient_scale, true);
+      gradient_scale /= effective_mini_batch_size;
+      if (has_local_data) {
+        CHECK_CUDNN(cudnnConvolutionBackwardBias(
+                      cudnn::get_handle(),
+                      &gradient_scale,
+                      m_tensors_cudnn_desc.get_prev_error_signals(),
+                      local_gradient_wrt_output.LockedBuffer(),
+                      &dst_scale,
+                      m_bias_cudnn_desc,
+                      bias_gradient.Buffer()));
       } else {
-        CHECK_CUDNN(cudnnConvolutionBackwardBias(cudnn::get_handle(),
-                                                 &one,
-                                                 m_tensors_cudnn_desc.get_prev_error_signals(),
-                                                 local_gradient_wrt_output.LockedBuffer(),
-                                                 &zero,
-                                                 m_bias_cudnn_desc,
-                                                 m_bias_gradient.Buffer()));
+        El::Scale(dst_scale, bias_gradient);
       }
-      bias_optimizer->add_to_gradient_staging(m_bias_gradient,
-                                              m_bias_scaling_factor / effective_mini_batch_size);
     }
 
     // Compute kernel gradient
     optimizer* kernel_optimizer = m_weights[0]->get_optimizer();
     if (kernel_optimizer != nullptr) {
-      if (!has_local_data) {
-        El::Zero(m_kernel_gradient);
-      } else {
-
+      DataType dst_scale = DataType(0), gradient_scale = DataType(0);
+      auto& kernel_gradient = kernel_optimizer->get_gradient_buffer(
+        dst_scale, gradient_scale, true);
+      gradient_scale /= effective_mini_batch_size;
+      if (has_local_data) {
         // Initialize GPU workspace
         GPUMat workspace;
 #ifdef HYDROGEN_HAVE_CUB
@@ -656,71 +694,56 @@ class base_convolution_layer : public learning_layer {
         auto&& gradient_wrt_output_desc = m_tensors_cudnn_desc.get_prev_error_signals();
 
         // Determine algorithm and compute kernel gradient
-        #ifndef LBANN_DETERMINISTIC
-        cudnnConvolutionBwdFilterAlgo_t kernel_gradient_cudnn_algorithm
-          = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
-        #else
-        cudnnConvolutionBwdFilterAlgo_t kernel_gradient_cudnn_algorithm
-          = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1;
-        #endif
         if (using_transposed_convolution) {
-          #ifndef LBANN_DETERMINISTIC
-          CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm(cudnn::get_handle(),
-                                                                 gradient_wrt_output_desc,
-                                                                 input_desc,
-                                                                 m_convolution_cudnn_desc,
-                                                                 m_kernel_cudnn_desc,
-                                                                 CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
-                                                                 workspace_size,
-                                                                 &kernel_gradient_cudnn_algorithm));
-          #endif
-          CHECK_CUDNN(cudnnConvolutionBackwardFilter(cudnn::get_handle(),
-                                                     &one,
-                                                     gradient_wrt_output_desc,
-                                                     local_gradient_wrt_output.LockedBuffer(),
-                                                     input_desc,
-                                                     local_input.LockedBuffer(),
-                                                     m_convolution_cudnn_desc,
-                                                     kernel_gradient_cudnn_algorithm,
-                                                     workspace.Buffer(),
-                                                     workspace_size,
-                                                     &zero,
-                                                     m_kernel_cudnn_desc,
-                                                     m_kernel_gradient.Buffer()));
-        }
-        else {
-          #ifndef LBANN_DETERMINISTIC
-          CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm(cudnn::get_handle(),
-                                                                 input_desc,
-                                                                 gradient_wrt_output_desc,
-                                                                 m_convolution_cudnn_desc,
-                                                                 m_kernel_cudnn_desc,
-                                                                 CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
-                                                                 workspace_size,
-                                                                 &kernel_gradient_cudnn_algorithm));
-          #endif
-          CHECK_CUDNN(cudnnConvolutionBackwardFilter(cudnn::get_handle(),
-                                                     &one,
-                                                     input_desc,
-                                                     local_input.LockedBuffer(),
-                                                     gradient_wrt_output_desc,
-                                                     local_gradient_wrt_output.LockedBuffer(),
-                                                     m_convolution_cudnn_desc,
-                                                     kernel_gradient_cudnn_algorithm,
-                                                     workspace.Buffer(),
-                                                     workspace_size,
-                                                     &zero,
-                                                     m_kernel_cudnn_desc,
-                                                     m_kernel_gradient.Buffer()));
-
+          cudnnConvolutionBwdFilterAlgo_t kernel_gradient_cudnn_algorithm
+            = get_backward_filter_algo_cudnn(
+              local_input.Width(),
+              gradient_wrt_output_desc, local_gradient_wrt_output.LockedBuffer(),
+              input_desc, local_input.LockedBuffer(),
+              m_convolution_cudnn_desc,
+              m_kernel_cudnn_desc,
+              workspace_size, workspace.Buffer());
+          CHECK_CUDNN(cudnnConvolutionBackwardFilter(
+                        cudnn::get_handle(),
+                        &gradient_scale,
+                        gradient_wrt_output_desc,
+                        local_gradient_wrt_output.LockedBuffer(),
+                        input_desc,
+                        local_input.LockedBuffer(),
+                        m_convolution_cudnn_desc,
+                        kernel_gradient_cudnn_algorithm,
+                        workspace.Buffer(),
+                        workspace_size,
+                        &dst_scale,
+                        m_kernel_cudnn_desc,
+                        kernel_gradient.Buffer()));
+        } else {
+          cudnnConvolutionBwdFilterAlgo_t kernel_gradient_cudnn_algorithm
+            = get_backward_filter_algo_cudnn(
+              local_input.Width(),
+              input_desc, local_input.LockedBuffer(),
+              gradient_wrt_output_desc, local_gradient_wrt_output.LockedBuffer(),
+              m_convolution_cudnn_desc,
+              m_kernel_cudnn_desc,
+              workspace_size, workspace.Buffer());
+          CHECK_CUDNN(cudnnConvolutionBackwardFilter(
+                        cudnn::get_handle(),
+                        &gradient_scale,
+                        input_desc,
+                        local_input.LockedBuffer(),
+                        gradient_wrt_output_desc,
+                        local_gradient_wrt_output.LockedBuffer(),
+                        m_convolution_cudnn_desc,
+                        kernel_gradient_cudnn_algorithm,
+                        workspace.Buffer(),
+                        workspace_size,
+                        &dst_scale,
+                        m_kernel_cudnn_desc,
+                        kernel_gradient.Buffer()));
         }
-
+      } else {
+        El::Scale(dst_scale, kernel_gradient);
       }
-
-      // Add gradient contribution
-      kernel_optimizer->add_to_gradient_staging(m_kernel_gradient,
-                                                one / effective_mini_batch_size);
-
     }
 
 #endif // LBANN_HAS_CUDNN
@@ -750,14 +773,18 @@ class base_convolution_layer : public learning_layer {
       input_dims = get_output_dims();
       output_dims = get_input_dims();
     }
+    const auto& kernel_dims = get_kernel_dims();
+    const auto& kernel_size = std::accumulate(kernel_dims.begin(),
+                                              kernel_dims.end(),
+                                              1, std::multiplies<int>());
 
     // Initialize matrices
     const int m = output_size / output_dims[0];
     const int n = output_dims[0];
-    const int k = m_kernel_size / output_dims[0];
-    DMat<Dev> input_col, output_col;
-    DMat<Dev> im2col_matrix(k, m);
-    const DMat<Dev> kernel_matrix(k, n, local_kernel.LockedBuffer(), k);
+    const int k = kernel_size / output_dims[0];
+    DMat<Device> input_col, output_col;
+    DMat<Device> im2col_matrix(k, m);
+    const DMat<Device> kernel_matrix(k, n, local_kernel.LockedBuffer(), k);
 
     // Iterate through input columns
     for (El::Int col = 0; col < local_width; ++col) {
@@ -770,7 +797,7 @@ class base_convolution_layer : public learning_layer {
              input_dims.size() - 1,
              &input_dims[1],
              m_pads.data(),
-             &m_kernel_dims[2],
+             &kernel_dims[2],
              m_strides.data());
 
       // Apply convolution to current input column
@@ -791,9 +818,9 @@ class base_convolution_layer : public learning_layer {
     const auto& local_input = (during_forward_prop ?
                                get_local_prev_activations() :
                                get_local_prev_error_signals());
-    DMat<Dev>& local_output = (during_forward_prop ?
-                               get_local_activations() :
-                               get_local_error_signals());
+    DMat<Device>& local_output = (during_forward_prop ?
+                                  get_local_activations() :
+                                  get_local_error_signals());
 
     // Matrix parameters
     const int input_size = local_input.Height();
@@ -807,14 +834,18 @@ class base_convolution_layer : public learning_layer {
       input_dims = get_output_dims();
       output_dims = get_input_dims();
     }
+    const auto& kernel_dims = get_kernel_dims();
+    const auto& kernel_size = std::accumulate(kernel_dims.begin(),
+                                              kernel_dims.end(),
+                                              1, std::multiplies<int>());
 
     // Initialize matrices
-    const int m = m_kernel_size / input_dims[0];
+    const int m = kernel_size / input_dims[0];
     const int n = input_size / input_dims[0];
     const int k = input_dims[0];
-    DMat<Dev> input_col, output_col;
-    DMat<Dev> im2col_matrix(m, n);
-    const DMat<Dev> kernel_matrix(m, k, local_kernel.LockedBuffer(), m);
+    DMat<Device> input_col, output_col;
+    DMat<Device> im2col_matrix(m, n);
+    const DMat<Device> kernel_matrix(m, k, local_kernel.LockedBuffer(), m);
 
     // Iterate through input columns
     for (El::Int col = 0; col < local_width; ++col) {
@@ -834,7 +865,7 @@ class base_convolution_layer : public learning_layer {
              output_dims.size() - 1,
              &output_dims[1],
              m_pads.data(),
-             &m_kernel_dims[2],
+             &kernel_dims[2],
              m_strides.data());
 
     }
@@ -874,10 +905,10 @@ class base_convolution_layer : public learning_layer {
   void compute_gradients_im2col(bool using_transposed_convolution) {
 
     // Local matrices
-    const DMat<Dev>& local_input = get_local_prev_activations();
-    const DMat<Dev>& local_gradient_wrt_output = get_local_prev_error_signals();
-    auto& local_kernel_gradient = m_kernel_gradient.Matrix();
-    auto& local_bias_gradient = m_bias_gradient.Matrix();
+    const DMat<Device>& local_input = get_local_prev_activations();
+    const DMat<Device>& local_gradient_wrt_output = get_local_prev_error_signals();
+    const bool has_local_data = (!local_input.IsEmpty()
+                                 && !local_gradient_wrt_output.IsEmpty());
 
     // Get convolution parameters
     const El::Int local_width = local_input.Width();
@@ -887,31 +918,43 @@ class base_convolution_layer : public learning_layer {
     const int num_output_channels = output_dims[0];
     const int num_per_output_channel = get_output_size() / num_output_channels;
     const int effective_mini_batch_size = this->m_model->get_effective_mini_batch_size();
+    const auto& kernel_dims = get_kernel_dims();
+    const auto& kernel_size = std::accumulate(kernel_dims.begin(),
+                                              kernel_dims.end(),
+                                              1, std::multiplies<int>());
 
     // Compute bias gradient
     // Note: Sum is computed with Kahan summation
-    optimizer* bias_optimizer = this->m_weights[1]->get_optimizer();
-    if (m_bias_scaling_factor != DataType(0) && bias_optimizer != nullptr) {
-      LBANN_OMP_PARALLEL_FOR
-      for (int channel = 0; channel < num_output_channels; ++channel) {
-        const El::Int row_start = channel * num_per_output_channel;
-        const El::Int row_end = (channel+1) * num_per_output_channel;
-        DataType sum = 0;
-        DataType correction = 0;
-        for (El::Int col = 0; col < local_width; ++col) {
-          for (El::Int row = row_start; row < row_end; ++row) {
-            DataType term = local_gradient_wrt_output(row, col);
-            term += correction;
-            const DataType next_sum = sum + term;
-            correction = term - (next_sum - sum);
-            sum = next_sum;
+    if (m_bias_scaling_factor != DataType(0)
+        && this->m_weights[1]->get_optimizer() != nullptr) {
+      optimizer* bias_optimizer = this->m_weights[1]->get_optimizer();
+      DataType dst_scale = DataType(0), gradient_scale = DataType(0);
+      auto& bias_gradient = bias_optimizer->get_gradient_buffer(
+        dst_scale, gradient_scale, true);
+      gradient_scale /= effective_mini_batch_size;
+      if (has_local_data) {
+        auto& local_bias_gradient = bias_gradient.Matrix();
+        LBANN_OMP_PARALLEL_FOR
+        for (int channel = 0; channel < num_output_channels; ++channel) {
+          const El::Int row_start = channel * num_per_output_channel;
+          const El::Int row_end = (channel+1) * num_per_output_channel;
+          DataType sum = 0;
+          DataType correction = 0;
+          for (El::Int col = 0; col < local_width; ++col) {
+            for (El::Int row = row_start; row < row_end; ++row) {
+              DataType term = local_gradient_wrt_output(row, col);
+              term += correction;
+              const DataType next_sum = sum + term;
+              correction = term - (next_sum - sum);
+              sum = next_sum;
+            }
           }
+          local_bias_gradient(channel, 0) = dst_scale*local_bias_gradient(channel, 0)
+            + gradient_scale*sum;
         }
-        local_bias_gradient(channel, 0) = m_bias_scaling_factor * sum;
+      } else {
+        El::Scale(dst_scale, bias_gradient);
       }
-      const DataType bias_scale = m_bias_scaling_factor / effective_mini_batch_size;
-      bias_optimizer->add_to_gradient_staging(m_bias_gradient,
-                                              bias_scale);
     }
 
     // Stop early if kernel is not being optimized
@@ -920,23 +963,27 @@ class base_convolution_layer : public learning_layer {
 
     // Initialize matrices
     const int m = (using_transposed_convolution ?
-                   m_kernel_size / num_input_channels :
-                   m_kernel_size / num_output_channels);
+                   kernel_size / num_input_channels :
+                   kernel_size / num_output_channels);
     const int n = (using_transposed_convolution ?
                    num_input_channels :
                    num_output_channels);
     const int k = (using_transposed_convolution ?
                    get_input_size() / num_input_channels :
                    get_output_size() / num_output_channels);
-    DMat<Dev> im2col_matrix(m, k);
-    DMat<Dev> kernel_gradient_matrix(m, n, local_kernel_gradient.Buffer(), m);
-    El::Zero(kernel_gradient_matrix);
+    DataType dst_scale = 0, gradient_scale = 0;
+    auto& kernel_gradient = kernel_optimizer->get_gradient_buffer(
+      dst_scale, gradient_scale, true);
+    El::Scale(dst_scale, kernel_gradient);
+    gradient_scale /= effective_mini_batch_size;
+    DMat<Device> im2col_matrix(m, k);
+    DMat<Device> kernel_gradient_matrix(m, n, kernel_gradient.Buffer(), m);
 
     // Compute kernel gradient contributions from each data sample
     for (El::Int col = 0; col < local_width; ++col) {
       if (using_transposed_convolution) {
-        const DMat<Dev> input_col(k, n, local_input.LockedBuffer(0,col), k);
-        const DMat<Dev> gradient_wrt_output_col =
+        const DMat<Device> input_col(k, n, local_input.LockedBuffer(0,col), k);
+        const DMat<Device> gradient_wrt_output_col =
           El::LockedView(local_gradient_wrt_output, El::ALL, El::IR(col));
         im2col(gradient_wrt_output_col,
                im2col_matrix,
@@ -944,35 +991,30 @@ class base_convolution_layer : public learning_layer {
                output_dims.size() - 1,
                &output_dims[1],
                m_pads.data(),
-               &m_kernel_dims[2],
+               &kernel_dims[2],
                m_strides.data());
         El::Gemm(El::NORMAL, El::NORMAL,
-                 DataType(1), im2col_matrix, input_col,
+                 gradient_scale, im2col_matrix, input_col,
                  DataType(1), kernel_gradient_matrix);
       }
       else {
-        const DMat<Dev> input_col
+        const DMat<Device> input_col
           = El::LockedView(local_input, El::ALL, El::IR(col));
-        const DMat<Dev> gradient_wrt_output_col(k, n, local_gradient_wrt_output.LockedBuffer(0,col), k);
+        const DMat<Device> gradient_wrt_output_col(k, n, local_gradient_wrt_output.LockedBuffer(0,col), k);
         im2col(input_col,
                im2col_matrix,
                num_input_channels,
                input_dims.size() - 1,
                &input_dims[1],
                m_pads.data(),
-               &m_kernel_dims[2],
+               &kernel_dims[2],
                m_strides.data());
         El::Gemm(El::NORMAL, El::NORMAL,
-                 DataType(1), im2col_matrix, gradient_wrt_output_col,
+                 gradient_scale, im2col_matrix, gradient_wrt_output_col,
                  DataType(1), kernel_gradient_matrix);
       }
     }
 
-    // Scale and accumulate gradients
-    const DataType kernel_scale = DataType(1) / effective_mini_batch_size;
-    kernel_optimizer->add_to_gradient_staging(m_kernel_gradient,
-                                              kernel_scale);
-
   }
 
 private:
@@ -997,13 +1039,14 @@ class base_convolution_layer : public learning_layer {
       cudnnDataType_t data_type;
       cudnnTensorFormat_t format;
       int num_dims;
+      std::vector<int> dims(1);
       CHECK_CUDNN(cudnnGetFilterNdDescriptor(src,
-                                             0,
+                                             dims.size(),
                                              &data_type,
                                              &format,
                                              &num_dims,
-                                             nullptr));
-      std::vector<int> dims(num_dims);
+                                             dims.data()));
+      dims.resize(num_dims);
       CHECK_CUDNN(cudnnGetFilterNdDescriptor(src,
                                              num_dims,
                                              &data_type,
@@ -1070,10 +1113,109 @@ class base_convolution_layer : public learning_layer {
 
   }
 
+  /** Get the cuDNN algorithm to use for forward prop. */
+  cudnnConvolutionFwdAlgo_t get_forward_algo_cudnn(
+    const int local_mini_batch_size,
+    const cudnnTensorDescriptor_t& input_desc,
+    const DataType* input,
+    const cudnnFilterDescriptor_t& kernel_desc,
+    const DataType* kernel,
+    const cudnnConvolutionDescriptor_t& conv_desc,
+    const cudnnTensorDescriptor_t& output_desc,
+    DataType* output,
+    size_t ws_size,
+    DataType* ws) {
+    if (m_fwd_cudnn_algos.count(local_mini_batch_size) == 0) {
+#ifdef LBANN_DETERMINISTIC
+      bool deterministic = true;
+#else
+      bool deterministic = false;
+#endif
+      m_fwd_cudnn_algos[local_mini_batch_size] =
+        cudnn::get_fwd_algorithm(
+          true, deterministic,
+          input_desc, input,
+          kernel_desc, kernel,
+          conv_desc,
+          output_desc, output,
+          ws_size, ws);
+    }
+    return m_fwd_cudnn_algos[local_mini_batch_size];
+  }
+
+  /** Get the cuDNN algorithm to use for backward-data. */
+  cudnnConvolutionBwdDataAlgo_t get_backward_data_algo_cudnn(
+    const int local_mini_batch_size,
+    const cudnnFilterDescriptor_t& kernel_desc,
+    const DataType* kernel,
+    const cudnnTensorDescriptor_t& prev_error_signal_desc,
+    const DataType* prev_error_signal,
+    const cudnnConvolutionDescriptor_t& conv_desc,
+    const cudnnTensorDescriptor_t& error_signal_desc,
+    DataType* error_signal,
+    size_t ws_size,
+    DataType* ws) {
+    if (m_bwd_data_cudnn_algos.count(local_mini_batch_size) == 0) {
+#ifdef LBANN_DETERMINISTIC
+      bool deterministic = true;
+#else
+      bool deterministic = false;
+#endif
+      m_bwd_data_cudnn_algos[local_mini_batch_size] =
+        cudnn::get_bwd_data_algorithm(
+          true, deterministic,
+          kernel_desc, kernel,
+          prev_error_signal_desc, prev_error_signal,
+          conv_desc,
+          error_signal_desc, error_signal,
+          ws_size, ws);
+    }
+    return m_bwd_data_cudnn_algos[local_mini_batch_size];
+  }
+
+  /**
+   * Get the cuDNN algorithm to use for backward-filter.
+   * Buffer space for kernel_gradient is allocated via temporary workspace.
+   */
+  cudnnConvolutionBwdFilterAlgo_t get_backward_filter_algo_cudnn(
+    const int local_mini_batch_size,
+    const cudnnTensorDescriptor_t& input_desc,
+    const DataType* input,
+    const cudnnTensorDescriptor_t& prev_error_signal_desc,
+    const DataType* prev_error_signal,
+    const cudnnConvolutionDescriptor_t& conv_desc,
+    const cudnnFilterDescriptor_t& kernel_gradient_desc,
+    size_t ws_size,
+    DataType* ws) {
+    if (m_bwd_filter_cudnn_algos.count(local_mini_batch_size) == 0) {
+#ifdef LBANN_DETERMINISTIC
+      bool deterministic = true;
+#else
+      bool deterministic = false;
+#endif
+      // Temporary filter gradient buffer.
+      GPUMat kernel_gradient;
+#ifdef HYDROGEN_HAVE_CUB
+      kernel_gradient.SetMemoryMode(1);
+#endif
+      kernel_gradient.Resize(this->m_weights[0]->get_matrix_height(),
+                             this->m_weights[0]->get_matrix_width());
+      m_bwd_filter_cudnn_algos[local_mini_batch_size] =
+        cudnn::get_bwd_filter_algorithm(
+          true, deterministic,
+          input_desc, input,
+          prev_error_signal_desc, prev_error_signal,
+          conv_desc,
+          kernel_gradient_desc, kernel_gradient.Buffer(),
+          ws_size, ws);
+    }
+    return m_bwd_filter_cudnn_algos[local_mini_batch_size];
+  }
+
 #endif // LBANN_HAS_CUDNN
 
 };
 
 } // namespace lbann
 
-#endif // LBANN_LAYER_BASE_CONVOLUTION_HPP_INCLUDED
+#endif // LBANN_LAYERS_LEARNING_BASE_CONVOLUTION_HPP_INCLUDED
diff --git a/include/lbann/layers/learning/convolution.hpp b/include/lbann/layers/learning/convolution.hpp
index b6e5faab24a..9a7cf276a5d 100644
--- a/include/lbann/layers/learning/convolution.hpp
+++ b/include/lbann/layers/learning/convolution.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,10 +24,9 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef LBANN_LAYER_CONVOLUTION_HPP_INCLUDED
-#define LBANN_LAYER_CONVOLUTION_HPP_INCLUDED
+#ifndef LBANN_LAYERS_LEARNING_CONVOLUTION_HPP_INCLUDED
+#define LBANN_LAYERS_LEARNING_CONVOLUTION_HPP_INCLUDED
 
-#include <vector>
 #include "lbann/layers/learning/base_convolution.hpp"
 #include "lbann/utils/exception.hpp"
 
@@ -39,8 +38,8 @@ namespace lbann {
  *  tensors. This is primarily optimized for image data in NCHW
  *  format.
  */
-template <data_layout T_layout = data_layout::DATA_PARALLEL, El::Device Dev = El::Device::CPU>
-class convolution_layer : public base_convolution_layer<Dev> {
+template <data_layout Layout = data_layout::DATA_PARALLEL, El::Device Device = El::Device::CPU>
+class convolution_layer : public base_convolution_layer<Device> {
 private:
 
   friend class lbann_callback_imcomm;
@@ -75,17 +74,18 @@ class convolution_layer : public base_convolution_layer<Dev> {
                     std::vector<int> dilations,
                     int groups,
                     bool has_bias = true)
-    : base_convolution_layer<Dev>(comm,
-                                  num_data_dims,
-                                  num_output_channels,
-                                  conv_dims,
-                                  pads,
-                                  strides,
-                                  dilations,
-                                  groups,
-                                  has_bias) {
-    static_assert(T_layout == data_layout::DATA_PARALLEL,
-                  "convolution only supports DATA_PARALLEL");
+    : base_convolution_layer<Device>(
+        comm,
+        num_data_dims,
+        num_output_channels,
+        std::move(conv_dims),
+        std::move(pads),
+        std::move(strides),
+        std::move(dilations),
+        groups,
+        has_bias) {
+    static_assert(Layout == data_layout::DATA_PARALLEL,
+                  "convolution layer only supports DATA_PARALLEL");
 
   }
 
@@ -93,64 +93,24 @@ class convolution_layer : public base_convolution_layer<Dev> {
 
   std::string get_type() const override { return "convolution"; }
 
-  data_layout get_data_layout() const override { return T_layout; }
+  data_layout get_data_layout() const override { return Layout; }
 
-  El::Device get_device_allocation() const override { return Dev; }
+  El::Device get_device_allocation() const override { return Device; }
+
+protected:
 
   void setup_dims() override {
-    base_convolution_layer<Dev>::setup_dims();
-    std::stringstream err;
+    base_convolution_layer<Device>::setup_dims();
 
     // Get tensor dimensions
     const auto& input_dims = this->get_input_dims();
     auto output_dims = input_dims;
-    const auto input_channels = input_dims[0];
-    const auto output_channels = this->m_kernel_dims[0];
-
-    // Check that number of groups is valid
-    if (this->m_num_groups < 1) {
-      err << this->get_type() << " layer "
-          << "\"" << this->get_name() << "\" "
-          << "has " << this->m_num_groups << " groups";
-      LBANN_ERROR(err.str());
-    } else if (input_channels % this->m_num_groups != 0
-               || output_channels % this->m_num_groups != 0) {
-      err << this->get_type() << " layer "
-          << "\"" << this->get_name() << "\" has "
-          << input_channels << " input channels, "
-          << output_channels << " output channels, and "
-          << this->m_num_groups << " groups "
-          << "(groups must evenly divide "
-          << "the input channels and output channels)";
-      LBANN_ERROR(err.str());
-    }
-
-    // Initialize convolution kernel dimensions
-    this->m_kernel_dims.insert(this->m_kernel_dims.begin() + 1,
-                               input_channels / this->m_num_groups);
-    this->m_kernel_size = std::accumulate(this->m_kernel_dims.begin(),
-                                          this->m_kernel_dims.end(),
-                                          1, std::multiplies<int>());
-    if (this->m_kernel_dims.size() != input_dims.size() + 1) {
-      err << this->get_type() << " layer "
-          << "\"" << this->get_name() << "\" "
-          << "has a ";
-      for (size_t i = 0; i < input_dims.size(); ++i) {
-        err << (i > 0 ? " x " : "") << input_dims[i];
-      }
-      err << " input tensor and a ";
-      for (size_t i = 0; i < this->m_kernel_dims.size(); ++i) {
-        err << (i > 0 ? " x " : "") << this->m_kernel_dims[i];
-      }
-      err << " convolution kernel";
-      LBANN_ERROR(err.str());
-    }
 
     // Initialize output tensor dimensions
-    output_dims[0] = output_channels;
+    output_dims[0] = this->m_output_channels;
     for (size_t i = 0; i < output_dims.size() - 1; ++i) {
       const auto& input_dim = input_dims[i+1];
-      const auto& kernel_dim = this->m_kernel_dims[i+2];
+      const auto& kernel_dim = this->m_conv_dims[i];
       const auto& stride = this->m_strides[i];
       const auto& pad = this->m_pads[i];
       const auto& dilation = this->m_dilations[i];
@@ -163,25 +123,33 @@ class convolution_layer : public base_convolution_layer<Dev> {
 
   }
 
-protected:
+  std::vector<int> get_kernel_dims() const {
+    std::vector<int> dims;
+    dims.push_back(this->m_output_channels);
+    dims.push_back(this->get_input_dims()[0] / this->m_groups);
+    dims.insert(dims.end(),
+                this->m_conv_dims.begin(),
+                this->m_conv_dims.end());
+    return dims;
+  }
 
   void fp_compute() override {
     if(this->using_gpus()) {
-      base_convolution_layer<Dev>::apply_convolution_cudnn(true);
-      base_convolution_layer<Dev>::apply_bias_cudnn();
+      base_convolution_layer<Device>::apply_convolution_cudnn(true);
+      base_convolution_layer<Device>::apply_bias_cudnn();
     } else {
-      base_convolution_layer<Dev>::apply_convolution_im2col(true);
-      base_convolution_layer<Dev>::apply_bias_cpu();
+      base_convolution_layer<Device>::apply_convolution_im2col(true);
+      base_convolution_layer<Device>::apply_bias_cpu();
     }
   }
 
   void bp_compute() override {
     if(this->using_gpus()) {
-      base_convolution_layer<Dev>::compute_gradients_cudnn(false);
-      base_convolution_layer<Dev>::apply_transposed_convolution_cudnn(false);
+      base_convolution_layer<Device>::compute_gradients_cudnn(false);
+      base_convolution_layer<Device>::apply_transposed_convolution_cudnn(false);
     } else {
-      base_convolution_layer<Dev>::compute_gradients_im2col(false);
-      base_convolution_layer<Dev>::apply_transposed_convolution_im2col(false);
+      base_convolution_layer<Device>::compute_gradients_im2col(false);
+      base_convolution_layer<Device>::apply_transposed_convolution_im2col(false);
     }
   }
 
@@ -189,4 +157,4 @@ class convolution_layer : public base_convolution_layer<Dev> {
 
 } // namespace lbann
 
-#endif // LBANN_LAYER_CONVOLUTION_HPP_INCLUDED
+#endif // LBANN_LAYERS_LEARNING_CONVOLUTION_HPP_INCLUDED
diff --git a/include/lbann/layers/learning/deconvolution.hpp b/include/lbann/layers/learning/deconvolution.hpp
index 51884ff1c9c..f3c1f7bdd9e 100644
--- a/include/lbann/layers/learning/deconvolution.hpp
+++ b/include/lbann/layers/learning/deconvolution.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,10 +24,9 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef LBANN_LAYER_DECONVOLUTION_HPP_INCLUDED
-#define LBANN_LAYER_DECONVOLUTION_HPP_INCLUDED
+#ifndef LBANN_LAYERS_LEARNING_DECONVOLUTION_HPP_INCLUDED
+#define LBANN_LAYERS_LEARNING_DECONVOLUTION_HPP_INCLUDED
 
-#include <vector>
 #include "lbann/layers/learning/base_convolution.hpp"
 #include "lbann/utils/exception.hpp"
 
@@ -36,11 +35,9 @@ namespace lbann {
 // Forward declaration.
 class lbann_callback_imcomm;
 
-/** @brief Transpose of the convolution layer.
- *  @todo Rename to "transposed_convolution_layer".
- */
-template <data_layout T_layout = data_layout::DATA_PARALLEL, El::Device Dev = El::Device::CPU>
-class deconvolution_layer : public base_convolution_layer<Dev> {
+/** @brief Transpose of the convolution layer. */
+template <data_layout Layout = data_layout::DATA_PARALLEL, El::Device Device = El::Device::CPU>
+class deconvolution_layer : public base_convolution_layer<Device> {
 private:
 
   friend class lbann_callback_imcomm;
@@ -75,17 +72,18 @@ class deconvolution_layer : public base_convolution_layer<Dev> {
                       std::vector<int> dilations,
                       int groups,
                       bool has_bias = true)
-    : base_convolution_layer<Dev>(comm,
-                                  num_data_dims,
-                                  num_output_channels,
-                                  conv_dims,
-                                  pads,
-                                  strides,
-                                  dilations,
-                                  groups,
-                                  has_bias) {
-    static_assert(T_layout == data_layout::DATA_PARALLEL,
-                  "convolution only supports DATA_PARALLEL");
+    : base_convolution_layer<Device>(
+        comm,
+        num_data_dims,
+        num_output_channels,
+        std::move(conv_dims),
+        std::move(pads),
+        std::move(strides),
+        std::move(dilations),
+        groups,
+        has_bias) {
+    static_assert(Layout == data_layout::DATA_PARALLEL,
+                  "deconvolution layer only supports DATA_PARALLEL");
 
   }
 
@@ -93,19 +91,17 @@ class deconvolution_layer : public base_convolution_layer<Dev> {
 
   std::string get_type() const override { return "deconvolution"; }
 
-  data_layout get_data_layout() const override { return T_layout; }
+  data_layout get_data_layout() const override { return Layout; }
 
-  El::Device get_device_allocation() const override { return Dev; }
+  El::Device get_device_allocation() const override { return Device; }
 
   void setup_dims() override {
-    base_convolution_layer<Dev>::setup_dims();
+    base_convolution_layer<Device>::setup_dims();
     std::stringstream err;
 
     // Get tensor dimensions
     const auto& input_dims = this->get_input_dims();
     auto output_dims = input_dims;
-    const auto input_channels = input_dims[0];
-    const auto output_channels = this->m_kernel_dims[0];
 
     // Check for unsupported features
     /// @todo Implement dilated and grouped deconvolution
@@ -121,61 +117,20 @@ class deconvolution_layer : public base_convolution_layer<Dev> {
       err << ")";
       LBANN_ERROR(err.str());
     }
-    if (this->m_num_groups != 1) {
+    if (this->m_groups != 1) {
       err << this->get_type() << " layer "
           << "\"" << this->get_name() << "\" "
           << "has non-unit groups "
-          << "(" << this->m_num_groups << ")";
-      LBANN_ERROR(err.str());
-    }
-
-    // Check that number of groups is valid
-    if (this->m_num_groups < 1) {
-      err << this->get_type() << " layer "
-          << "\"" << this->get_name() << "\" "
-          << "has " << this->m_num_groups << " groups";
-      LBANN_ERROR(err.str());
-    } else if (input_channels % this->m_num_groups != 0
-               || output_channels % this->m_num_groups != 0) {
-      err << this->get_type() << " layer "
-          << "\"" << this->get_name() << "\" has "
-          << input_channels << " input channels, "
-          << output_channels << " output channels, and "
-          << this->m_num_groups << " groups "
-          << "(groups must evenly divide "
-          << "the input channels and output channels)";
-      LBANN_ERROR(err.str());
-    }
-
-    // Initialize convolution kernel dimensions
-    // Note: Unlike the convolutional kernel, the previous layer's
-    // number of channels is now the leading position -- keep in mind
-    // that deconvolution is the transpose of a convolution.
-    this->m_kernel_dims.insert(this->m_kernel_dims.begin(),
-                               input_channels / this->m_num_groups);
-    this->m_kernel_size = std::accumulate(this->m_kernel_dims.begin(),
-                                          this->m_kernel_dims.end(),
-                                          1, std::multiplies<int>());
-    if (this->m_kernel_dims.size() != input_dims.size() + 1) {
-      err << this->get_type() << " layer "
-          << "\"" << this->get_name() << "\" has a ";
-      for (size_t i = 0; i < input_dims.size(); ++i) {
-        err << (i > 0 ? " x " : "") << input_dims[i];
-      }
-      err << " input tensor and a ";
-      for (size_t i = 0; i < this->m_kernel_dims.size(); ++i) {
-        err << (i > 0 ? " x " : "") << this->m_kernel_dims[i];
-      }
-      err << " convolution kernel";
+          << "(" << this->m_groups << ")";
       LBANN_ERROR(err.str());
     }
 
     // Initialize output tensor dimensions
     /// @todo Dilated deconvolution
-    output_dims[0] = output_channels;
+    output_dims[0] = this->m_output_channels;
     for (size_t i = 0; i < output_dims.size() - 1; ++i) {
       const auto& input_dim = input_dims[i+1];
-      const auto& kernel_dim = this->m_kernel_dims[i+2];
+      const auto& kernel_dim = this->m_conv_dims[i];
       const auto& stride = this->m_strides[i];
       const auto& pad = this->m_pads[i];
       // const auto& dilation = this->m_dilations[i];
@@ -187,23 +142,33 @@ class deconvolution_layer : public base_convolution_layer<Dev> {
 
 protected:
 
+  std::vector<int> get_kernel_dims() const {
+    std::vector<int> dims;
+    dims.push_back(this->get_input_dims()[0]);
+    dims.push_back(this->m_output_channels);
+    dims.insert(dims.end(),
+                this->m_conv_dims.begin(),
+                this->m_conv_dims.end());
+    return dims;
+  }
+
   void fp_compute() override {
     if(this->using_gpus()) {
-      base_convolution_layer<Dev>::apply_transposed_convolution_cudnn(true);
-      base_convolution_layer<Dev>::apply_bias_cudnn();
+      base_convolution_layer<Device>::apply_transposed_convolution_cudnn(true);
+      base_convolution_layer<Device>::apply_bias_cudnn();
     } else {
-      base_convolution_layer<Dev>::apply_transposed_convolution_im2col(true);
-      base_convolution_layer<Dev>::apply_bias_cpu();
+      base_convolution_layer<Device>::apply_transposed_convolution_im2col(true);
+      base_convolution_layer<Device>::apply_bias_cpu();
     }
   }
 
   void bp_compute() override {
     if(this->using_gpus()) {
-      base_convolution_layer<Dev>::compute_gradients_cudnn(true);
-      base_convolution_layer<Dev>::apply_convolution_cudnn(false);
+      base_convolution_layer<Device>::compute_gradients_cudnn(true);
+      base_convolution_layer<Device>::apply_convolution_cudnn(false);
     } else {
-      base_convolution_layer<Dev>::compute_gradients_im2col(true);
-      base_convolution_layer<Dev>::apply_convolution_im2col(false);
+      base_convolution_layer<Device>::compute_gradients_im2col(true);
+      base_convolution_layer<Device>::apply_convolution_im2col(false);
     }
   }
 
@@ -211,4 +176,4 @@ class deconvolution_layer : public base_convolution_layer<Dev> {
 
 } // namespace lbann
 
-#endif // LBANN_LAYER_DECONVOLUTION_HPP_INCLUDED
+#endif // LBANN_LAYERS_LEARNING_DECONVOLUTION_HPP_INCLUDED
diff --git a/include/lbann/layers/learning/fully_connected.hpp b/include/lbann/layers/learning/fully_connected.hpp
index 25b8e764b14..f62c2318594 100644
--- a/include/lbann/layers/learning/fully_connected.hpp
+++ b/include/lbann/layers/learning/fully_connected.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -41,13 +41,13 @@ template <data_layout T_layout, El::Device Dev>
 class fully_connected_layer : public learning_layer {
 public:
 
+  /** @todo Accept a vector for output_size */
   fully_connected_layer(lbann_comm *comm,
-                        int output_size,  /// @todo Accept a vector
+                        int output_size,
                         bool transpose = false,
                         weights* weight = nullptr,
                         bool has_bias = true)
     : learning_layer(comm),
-      m_linearity_gradient(nullptr),
       m_bias_gradient(nullptr),
       m_transpose(transpose) {
 
@@ -65,11 +65,7 @@ class fully_connected_layer : public learning_layer {
     m_transpose(other.m_transpose) {
 
     // Deep matrix copies
-    m_linearity_gradient = other.m_linearity_gradient;
     m_bias_gradient = other.m_bias_gradient;
-    if (m_linearity_gradient != nullptr) {
-      m_linearity_gradient = m_linearity_gradient->Copy();
-    }
     if (m_bias_gradient != nullptr) {
       m_bias_gradient = m_bias_gradient->Copy();
     }
@@ -83,11 +79,7 @@ class fully_connected_layer : public learning_layer {
 
     // Deep matrix copies
     deallocate_matrices();
-    m_linearity_gradient = other.m_linearity_gradient;
     m_bias_gradient = other.m_bias_gradient;
-    if (m_linearity_gradient != nullptr) {
-      m_linearity_gradient = m_linearity_gradient->Copy();
-    }
     if (m_bias_gradient != nullptr) {
       m_bias_gradient = m_bias_gradient->Copy();
     }
@@ -129,7 +121,11 @@ class fully_connected_layer : public learning_layer {
           << "attempted to setup " << m_name << " with an invalid number of weights";
       throw lbann_exception(err.str());
     }
-    this->m_weights.resize(2, nullptr);
+    if (m_bias_scaling_factor != DataType(0)) {
+      this->m_weights.resize(2, nullptr);
+    } else {
+      this->m_weights.resize(1, nullptr);
+    }
     if (this->m_weights[0] == nullptr) {
       auto* w = new weights(get_comm());
       std::unique_ptr<weights_initializer> init(new he_initializer(probability_distribution::gaussian));
@@ -140,16 +136,7 @@ class fully_connected_layer : public learning_layer {
       this->m_weights[0] = w;
       this->m_model->add_weights(w);
     }
-    if (this->m_weights[1] == nullptr) {
-      auto* w = new weights(get_comm());
-      std::unique_ptr<optimizer> opt(m_model->create_optimizer());
-      w->set_name(get_name() + "_bias_weights");
-      w->set_optimizer(opt);
-      this->m_weights[1] = w;
-      this->m_model->add_weights(w);
-    }
     auto& linearity_weights = *this->m_weights[0];
-    auto& bias_weights = *this->m_weights[1];
 
     // Initialize variance scaling initialization
     auto* cast_initializer
@@ -173,19 +160,28 @@ class fully_connected_layer : public learning_layer {
     }
     linearity_weights.set_matrix_distribution(linearity_dist);
 
-    // Setup bias weights
-    auto bias_dist = get_activations().DistData();
-    bias_dist.rowDist = El::STAR;
-    bias_weights.set_dims(get_output_dims());
-    bias_weights.set_matrix_distribution(bias_dist);
-
-    // Setup weight gradients
-    El::Zeros(*m_linearity_gradient,
-              linearity_weights.get_matrix_height(),
-              linearity_weights.get_matrix_width());
-    El::Zeros(*this->m_bias_gradient,
-              bias_weights.get_matrix_height(),
-              bias_weights.get_matrix_width());
+    // Set up bias if needed.
+    if (m_bias_scaling_factor != DataType(0)) {
+      if (this->m_weights[1] == nullptr) {
+        auto* w = new weights(get_comm());
+        std::unique_ptr<optimizer> opt(m_model->create_optimizer());
+        w->set_name(get_name() + "_bias_weights");
+        w->set_optimizer(opt);
+        this->m_weights[1] = w;
+        this->m_model->add_weights(w);
+      }
+      auto& bias_weights = *this->m_weights[1];
+      // Setup bias weights
+      auto bias_dist = get_activations().DistData();
+      bias_dist.rowDist = El::STAR;
+      bias_weights.set_dims(get_output_dims());
+      bias_weights.set_matrix_distribution(bias_dist);
+      if (this->m_bias_gradient != nullptr) {
+        El::Zeros(*this->m_bias_gradient,
+                  bias_weights.get_matrix_height(),
+                  bias_weights.get_matrix_width());
+      }
+    }
 
     // Initialize freeze state
     for (auto&& w : this->m_weights) {
@@ -218,11 +214,6 @@ class fully_connected_layer : public learning_layer {
    */
   DataType m_bias_scaling_factor;
 
-  /** Linearity gradient.
-   *  This is this layer's contribution to the objective function
-   *  gradient w.r.t. the linearity weights (i.e. its matrix weights).
-   */
-  AbsDistMat* m_linearity_gradient;
   /** Bias weights gradient.
    *  This is this layer's contribution to the objective function
    *  gradient w.r.t. the bias weights.
@@ -234,7 +225,6 @@ class fully_connected_layer : public learning_layer {
 
   /** Deallocate distributed matrices. */
   void deallocate_matrices() {
-    if (m_linearity_gradient != nullptr) delete m_linearity_gradient;
     if (m_bias_gradient != nullptr) delete m_bias_gradient;
   }
 
diff --git a/include/lbann/layers/learning/learning.hpp b/include/lbann/layers/learning/learning.hpp
index 1e0996e39f0..2f2ab120bc5 100644
--- a/include/lbann/layers/learning/learning.hpp
+++ b/include/lbann/layers/learning/learning.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/loss/categorical_accuracy.hpp b/include/lbann/layers/loss/categorical_accuracy.hpp
index ba3e19841db..078abb6b2a4 100644
--- a/include/lbann/layers/loss/categorical_accuracy.hpp
+++ b/include/lbann/layers/loss/categorical_accuracy.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/loss/cross_entropy.hpp b/include/lbann/layers/loss/cross_entropy.hpp
index e3d3093ad61..e2ee89e4350 100644
--- a/include/lbann/layers/loss/cross_entropy.hpp
+++ b/include/lbann/layers/loss/cross_entropy.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/loss/entrywise.hpp b/include/lbann/layers/loss/entrywise.hpp
index 58d0dde2e32..6e55f58313e 100644
--- a/include/lbann/layers/loss/entrywise.hpp
+++ b/include/lbann/layers/loss/entrywise.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/loss/l1_norm.hpp b/include/lbann/layers/loss/l1_norm.hpp
index 7810ae46746..8ceb88c09c3 100644
--- a/include/lbann/layers/loss/l1_norm.hpp
+++ b/include/lbann/layers/loss/l1_norm.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/loss/l2_norm2.hpp b/include/lbann/layers/loss/l2_norm2.hpp
index d03f38b03f5..15ad24adbd0 100644
--- a/include/lbann/layers/loss/l2_norm2.hpp
+++ b/include/lbann/layers/loss/l2_norm2.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/loss/mean_absolute_error.hpp b/include/lbann/layers/loss/mean_absolute_error.hpp
index f9276fd47ad..c136f1f6c72 100644
--- a/include/lbann/layers/loss/mean_absolute_error.hpp
+++ b/include/lbann/layers/loss/mean_absolute_error.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/loss/mean_squared_error.hpp b/include/lbann/layers/loss/mean_squared_error.hpp
index 0fdc7e072f9..19ead85c346 100644
--- a/include/lbann/layers/loss/mean_squared_error.hpp
+++ b/include/lbann/layers/loss/mean_squared_error.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/loss/top_k_categorical_accuracy.hpp b/include/lbann/layers/loss/top_k_categorical_accuracy.hpp
index a7354548de2..6e0389e5f73 100644
--- a/include/lbann/layers/loss/top_k_categorical_accuracy.hpp
+++ b/include/lbann/layers/loss/top_k_categorical_accuracy.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/math/binary.hpp b/include/lbann/layers/math/binary.hpp
index aedddabea34..d389ccbaae8 100644
--- a/include/lbann/layers/math/binary.hpp
+++ b/include/lbann/layers/math/binary.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/math/clamp.hpp b/include/lbann/layers/math/clamp.hpp
index a12070f9b94..4b79dc06c09 100644
--- a/include/lbann/layers/math/clamp.hpp
+++ b/include/lbann/layers/math/clamp.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/math/unary.hpp b/include/lbann/layers/math/unary.hpp
index e955a19ebee..73034b0593f 100644
--- a/include/lbann/layers/math/unary.hpp
+++ b/include/lbann/layers/math/unary.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/misc/CMakeLists.txt b/include/lbann/layers/misc/CMakeLists.txt
index 65f75921f2b..2b5808fdfa7 100644
--- a/include/lbann/layers/misc/CMakeLists.txt
+++ b/include/lbann/layers/misc/CMakeLists.txt
@@ -3,6 +3,8 @@ set_full_path(THIS_DIR_HEADERS
   covariance.hpp
   variance.hpp
   channelwise_mean.hpp
+  mini_batch_index.hpp
+  mini_batch_size.hpp
   )
 
 # Propagate the files up the tree
diff --git a/include/lbann/layers/misc/channelwise_mean.hpp b/include/lbann/layers/misc/channelwise_mean.hpp
index 0b0253518ba..5889b853256 100644
--- a/include/lbann/layers/misc/channelwise_mean.hpp
+++ b/include/lbann/layers/misc/channelwise_mean.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/misc/covariance.hpp b/include/lbann/layers/misc/covariance.hpp
index 66e5cbd0f05..8f31d12d545 100644
--- a/include/lbann/layers/misc/covariance.hpp
+++ b/include/lbann/layers/misc/covariance.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/misc/mini_batch_index.hpp b/include/lbann/layers/misc/mini_batch_index.hpp
new file mode 100644
index 00000000000..51538000dce
--- /dev/null
+++ b/include/lbann/layers/misc/mini_batch_index.hpp
@@ -0,0 +1,92 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef LBANN_LAYERS_MISC_MINI_BATCH_INDEX_HPP_INCLUDED
+#define LBANN_LAYERS_MISC_MINI_BATCH_INDEX_HPP_INCLUDED
+
+#include "lbann/layers/layer.hpp"
+
+namespace lbann {
+
+/** @brief Mini-batch index.
+ *
+ *  Output tensor is a 1D tensor with a single entry containing the
+ *  mini-batch sample. Each sample in a model's mini-batch has a
+ *  unique index in [0, mini_batch_size).
+ */
+template <data_layout Layout = data_layout::DATA_PARALLEL, El::Device Device = El::Device::CPU>
+class mini_batch_index_layer : public Layer {
+public:
+
+  mini_batch_index_layer(lbann_comm* comm) : Layer(comm) {
+    this->m_expected_num_parent_layers = 0;
+  }
+
+  mini_batch_index_layer* copy() const override { return new mini_batch_index_layer(*this); }
+  std::string get_type() const override { return "mini-batch index"; }
+  data_layout get_data_layout() const override { return Layout; }
+  El::Device get_device_allocation() const override { return Device; }
+
+protected:
+
+  void setup_dims() override {
+    Layer::setup_dims();
+    set_output_dims({1});
+  }
+
+  void fp_compute() override {
+
+    // Get output matrix
+    auto& output = get_activations();
+    auto& local_output = output.Matrix();
+    const auto& local_width = local_output.Width();
+
+    // Create temporary matrix if output matrix is not on CPU
+    CPUMat local_output_v;
+    if (local_output.GetDevice() == El::Device::CPU) {
+      El::View(local_output_v, local_output);
+    } else {
+      local_output_v.Resize(1, local_width);
+    }
+
+    // Populate matrix on CPU
+    LBANN_OMP_PARALLEL_FOR
+    for (El::Int col = 0; col < local_width; ++col) {
+      local_output_v(0, col) = DataType(output.GlobalCol(col));
+    }
+
+    // Copy result from CPU if needed
+    if (!local_output_v.Viewing()) {
+      El::Copy(local_output_v, local_output);
+    }
+
+  }
+
+};
+
+} // namespace lbann
+
+#endif // LBANN_LAYERS_MISC_MINI_BATCH_INDEX_HPP_INCLUDED
diff --git a/include/lbann/layers/misc/mini_batch_size.hpp b/include/lbann/layers/misc/mini_batch_size.hpp
new file mode 100644
index 00000000000..5a1445ef422
--- /dev/null
+++ b/include/lbann/layers/misc/mini_batch_size.hpp
@@ -0,0 +1,77 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef LBANN_LAYERS_MISC_MINI_BATCH_SIZE_HPP_INCLUDED
+#define LBANN_LAYERS_MISC_MINI_BATCH_SIZE_HPP_INCLUDED
+
+#include "lbann/layers/layer.hpp"
+
+namespace lbann {
+
+/** @brief Mini-batch size.
+ *
+ *  Output tensor is a 1D tensor with a single entry containing the
+ *  model's current mini-batch size.
+ */
+template <data_layout Layout = data_layout::DATA_PARALLEL, El::Device Device = El::Device::CPU>
+class mini_batch_size_layer : public Layer {
+public:
+
+  mini_batch_size_layer(lbann_comm* comm) : Layer(comm) {
+    this->m_expected_num_parent_layers = 0;
+  }
+
+  mini_batch_size_layer* copy() const override { return new mini_batch_size_layer(*this); }
+  std::string get_type() const override { return "mini-batch size"; }
+  data_layout get_data_layout() const override { return Layout; }
+  El::Device get_device_allocation() const override { return Device; }
+
+protected:
+
+  void setup_dims() override {
+    Layer::setup_dims();
+    set_output_dims({1});
+  }
+
+  void fp_setup_outputs(El::Int mini_batch_size) override {
+    Layer::fp_setup_outputs(mini_batch_size);
+    m_mini_batch_size = mini_batch_size;
+  }
+
+  void fp_compute() override {
+    El::Fill(get_activations(), DataType(m_mini_batch_size));
+  }
+
+private:
+
+  /** Mini-batch size. */
+  El::Int m_mini_batch_size = 0;
+
+};
+
+} // namespace lbann
+
+#endif // LBANN_LAYERS_MISC_MINI_BATCH_SIZE_HPP_INCLUDED
diff --git a/include/lbann/layers/misc/variance.hpp b/include/lbann/layers/misc/variance.hpp
index c8139a32651..bc36581b73f 100644
--- a/include/lbann/layers/misc/variance.hpp
+++ b/include/lbann/layers/misc/variance.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/regularizers/batch_normalization.hpp b/include/lbann/layers/regularizers/batch_normalization.hpp
index 5da7fa3c428..2f896fcf081 100644
--- a/include/lbann/layers/regularizers/batch_normalization.hpp
+++ b/include/lbann/layers/regularizers/batch_normalization.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -85,13 +85,13 @@ class batch_normalization_layer : public regularizer_layer {
   std::unique_ptr<AbsDistMat> m_bias_gradient;
 
 public:
-  /**
-   * Set up batch normalization.
-   * @param decay Controls the momentum of the running mean/standard
-   * deviation averages.
-   * @param epsilon A small number to avoid division by zero.
-   * @param use_global_stats Whether to use global statistics when
-   * training.
+  /** @brief Set up batch normalization.
+   *
+   *  @param comm The communication context for this layer
+   *  @param decay Controls the momentum of the running mean/standard
+   *         deviation averages.
+   *  @param epsilon A small number to avoid division by zero.
+   *  @param stats_aggregation The type of statistics to use when training.
    */
   batch_normalization_layer(lbann_comm *comm,
                             DataType decay=0.9,
diff --git a/include/lbann/layers/regularizers/dropout.hpp b/include/lbann/layers/regularizers/dropout.hpp
index 82a7d310791..d19f4be4125 100644
--- a/include/lbann/layers/regularizers/dropout.hpp
+++ b/include/lbann/layers/regularizers/dropout.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -184,7 +184,7 @@ class dropout : public regularizer_layer {
     m_mask->Resize(height, width);
 #ifdef LBANN_DETERMINISTIC
     bernoulli_fill_procdet(*m_mask, height, width, DataType(m_keep_prob));
-    *m_mask *= scale;
+    El::Scale(scale, *m_mask);
 #else
     El::EntrywiseMap(*m_mask,
                      (std::function<DataType(const DataType&)>)
diff --git a/include/lbann/layers/regularizers/local_response_normalization.hpp b/include/lbann/layers/regularizers/local_response_normalization.hpp
index ca348d3755d..23ff7051fab 100644
--- a/include/lbann/layers/regularizers/local_response_normalization.hpp
+++ b/include/lbann/layers/regularizers/local_response_normalization.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/regularizers/regularizer.hpp b/include/lbann/layers/regularizers/regularizer.hpp
index ed8d93e7b06..c01b892c820 100644
--- a/include/lbann/layers/regularizers/regularizer.hpp
+++ b/include/lbann/layers/regularizers/regularizer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/regularizers/selu_dropout.hpp b/include/lbann/layers/regularizers/selu_dropout.hpp
index 34d474f0bb8..a2b3d6475a3 100644
--- a/include/lbann/layers/regularizers/selu_dropout.hpp
+++ b/include/lbann/layers/regularizers/selu_dropout.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/bernoulli.hpp b/include/lbann/layers/transform/bernoulli.hpp
index 9fa26dce56c..d3e827e6ee7 100644
--- a/include/lbann/layers/transform/bernoulli.hpp
+++ b/include/lbann/layers/transform/bernoulli.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/categorical_random.hpp b/include/lbann/layers/transform/categorical_random.hpp
index 3ed7bd87f73..ac756dbeb5f 100644
--- a/include/lbann/layers/transform/categorical_random.hpp
+++ b/include/lbann/layers/transform/categorical_random.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/concatenation.hpp b/include/lbann/layers/transform/concatenation.hpp
index 66fc671a1ac..5355787269f 100644
--- a/include/lbann/layers/transform/concatenation.hpp
+++ b/include/lbann/layers/transform/concatenation.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/constant.hpp b/include/lbann/layers/transform/constant.hpp
index 01c96c23f35..f4390884a56 100644
--- a/include/lbann/layers/transform/constant.hpp
+++ b/include/lbann/layers/transform/constant.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/crop.hpp b/include/lbann/layers/transform/crop.hpp
index 65c3b435dfe..f0b37b293d3 100644
--- a/include/lbann/layers/transform/crop.hpp
+++ b/include/lbann/layers/transform/crop.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/discrete_random.hpp b/include/lbann/layers/transform/discrete_random.hpp
index b8e4f3ad600..c668971726f 100644
--- a/include/lbann/layers/transform/discrete_random.hpp
+++ b/include/lbann/layers/transform/discrete_random.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/dummy.hpp b/include/lbann/layers/transform/dummy.hpp
index 4a9e08a58cd..ec451fbe08e 100644
--- a/include/lbann/layers/transform/dummy.hpp
+++ b/include/lbann/layers/transform/dummy.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/evaluation.hpp b/include/lbann/layers/transform/evaluation.hpp
index afff0e1b625..014ff9e3849 100644
--- a/include/lbann/layers/transform/evaluation.hpp
+++ b/include/lbann/layers/transform/evaluation.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/gaussian.hpp b/include/lbann/layers/transform/gaussian.hpp
index 307a5058a38..7ab43afc3a7 100644
--- a/include/lbann/layers/transform/gaussian.hpp
+++ b/include/lbann/layers/transform/gaussian.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/hadamard.hpp b/include/lbann/layers/transform/hadamard.hpp
index 4df2ee23c25..04426334b91 100644
--- a/include/lbann/layers/transform/hadamard.hpp
+++ b/include/lbann/layers/transform/hadamard.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/in_top_k.hpp b/include/lbann/layers/transform/in_top_k.hpp
index ad21ae28637..85abe8caba4 100644
--- a/include/lbann/layers/transform/in_top_k.hpp
+++ b/include/lbann/layers/transform/in_top_k.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/pooling.hpp b/include/lbann/layers/transform/pooling.hpp
index b60e17b653c..abf6689aa82 100644
--- a/include/lbann/layers/transform/pooling.hpp
+++ b/include/lbann/layers/transform/pooling.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -532,7 +532,7 @@ class pooling_layer : public transform_layer {
                                                 nullptr));
         std::vector<int> dims(num_dims), pads(num_dims), strides(num_dims);
         CHECK_CUDNN(cudnnGetPoolingNdDescriptor(src,
-                                                0,
+                                                num_dims,
                                                 &mode,
                                                 &nan_propagation,
                                                 &num_dims,
diff --git a/include/lbann/layers/transform/reduction.hpp b/include/lbann/layers/transform/reduction.hpp
index e7df5643f15..15df56534e1 100644
--- a/include/lbann/layers/transform/reduction.hpp
+++ b/include/lbann/layers/transform/reduction.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/reshape.hpp b/include/lbann/layers/transform/reshape.hpp
index e056c5d28de..7770080ff69 100644
--- a/include/lbann/layers/transform/reshape.hpp
+++ b/include/lbann/layers/transform/reshape.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/slice.hpp b/include/lbann/layers/transform/slice.hpp
index 2a682edc34e..62143bc32b8 100644
--- a/include/lbann/layers/transform/slice.hpp
+++ b/include/lbann/layers/transform/slice.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/sort.hpp b/include/lbann/layers/transform/sort.hpp
index 455f69085ad..8d04e25a795 100644
--- a/include/lbann/layers/transform/sort.hpp
+++ b/include/lbann/layers/transform/sort.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/split.hpp b/include/lbann/layers/transform/split.hpp
index 3a5aaa6c79c..a7f151f7452 100644
--- a/include/lbann/layers/transform/split.hpp
+++ b/include/lbann/layers/transform/split.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/stop_gradient.hpp b/include/lbann/layers/transform/stop_gradient.hpp
index 31ae6d1d5cb..4adeafbb205 100644
--- a/include/lbann/layers/transform/stop_gradient.hpp
+++ b/include/lbann/layers/transform/stop_gradient.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/sum.hpp b/include/lbann/layers/transform/sum.hpp
index 009ff86b873..ab9ce9a4af6 100644
--- a/include/lbann/layers/transform/sum.hpp
+++ b/include/lbann/layers/transform/sum.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/tessellate.hpp b/include/lbann/layers/transform/tessellate.hpp
index a2ffb4e6393..eafe02cb9df 100644
--- a/include/lbann/layers/transform/tessellate.hpp
+++ b/include/lbann/layers/transform/tessellate.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/transform.hpp b/include/lbann/layers/transform/transform.hpp
index b26cabe076c..98b2a169ea3 100644
--- a/include/lbann/layers/transform/transform.hpp
+++ b/include/lbann/layers/transform/transform.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/uniform.hpp b/include/lbann/layers/transform/uniform.hpp
index a22e3ae015e..b10bbb03375 100644
--- a/include/lbann/layers/transform/uniform.hpp
+++ b/include/lbann/layers/transform/uniform.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/unpooling.hpp b/include/lbann/layers/transform/unpooling.hpp
index decf7d979d2..9a88eabcc1a 100644
--- a/include/lbann/layers/transform/unpooling.hpp
+++ b/include/lbann/layers/transform/unpooling.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/weighted_sum.hpp b/include/lbann/layers/transform/weighted_sum.hpp
index 9289707b483..5f77caeaa9a 100644
--- a/include/lbann/layers/transform/weighted_sum.hpp
+++ b/include/lbann/layers/transform/weighted_sum.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/layers/transform/weights.hpp b/include/lbann/layers/transform/weights.hpp
index 1d9b63a7778..f6d74931347 100644
--- a/include/lbann/layers/transform/weights.hpp
+++ b/include/lbann/layers/transform/weights.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -172,6 +172,8 @@ class weights_layer : public transform_layer {
   }
 
   void bp_compute() override {
+    constexpr DataType zero = 0;
+    constexpr DataType one = 1;
 
     // Get optimizer
     // Note: Nothing needs to be done if there is no optimizer
@@ -181,14 +183,14 @@ class weights_layer : public transform_layer {
     // Matrices
     const auto& local_gradient_wrt_output = get_local_prev_error_signals();
     m_workspace->Resize(local_gradient_wrt_output.Width(), 1);
-    El::Fill(*m_workspace, DataType(1));
+    El::Fill(*m_workspace, one);
 
     // Compute gradient contribution and accumulate
-    const auto& scale = DataType(1) / this->m_model->get_effective_mini_batch_size();
+    const auto& scale = one / this->m_model->get_effective_mini_batch_size();
     El::Gemv(El::NORMAL,
              scale, local_gradient_wrt_output, *m_workspace,
-             DataType(0), m_gradient->Matrix());
-    opt->add_to_gradient_staging(*m_gradient);
+             zero, m_gradient->Matrix());
+    opt->add_to_gradient(*m_gradient, one, true);
 
     // Clean up
     m_workspace->Empty();
diff --git a/include/lbann/layers/transform/zero.hpp b/include/lbann/layers/transform/zero.hpp
deleted file mode 100644
index 57ec9d4d5d9..00000000000
--- a/include/lbann/layers/transform/zero.hpp
+++ /dev/null
@@ -1,114 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef LBANN_LAYER_ZERO_HPP_INCLUDED
-#define LBANN_LAYER_ZERO_HPP_INCLUDED
-
-#include "lbann/layers/transform/transform.hpp"
-
-namespace lbann {
-
-/** @brief
- *
- *  Layer outputs (transform previous activations to) zeros.
- *  use case: transforms part or all samples in a minibatch to zero
- *
- *  @param first_half output zeros for the first half of minibatch samples if true
- *  @param second_half output zeros for second half of minibatch samples if true
- *  @todo Change the name. "zero_layer" is extremely misleading.
- *  @todo Replace with more general functionality.
- */
-template <data_layout T_layout = data_layout::DATA_PARALLEL, El::Device Dev = El::Device::CPU>
-class zero_layer : public transform_layer {
- private:
-  bool  m_first_half;
-  bool  m_second_half;
-
- public:
- zero_layer(lbann_comm *comm,
-              bool first_half=true,
-              bool second_half=true)
-    : transform_layer(comm),
-      m_first_half(first_half),
-      m_second_half(second_half) {
-
-  }
-  zero_layer* copy() const override { return new zero_layer(*this); }
-  std::string get_type() const override { return "zero"; }
-  data_layout get_data_layout() const override { return T_layout; }
-  El::Device get_device_allocation() const override { return Dev; }
-
-  description get_description() const override {
-    auto&& desc = transform_layer::get_description();
-    desc.add("First half", m_first_half);
-    desc.add("Second half", m_second_half);
-    return desc;
-  }
-
-protected:
-
-  void fp_compute() override {
-    const auto& input = get_prev_activations();
-    const auto& local_input = input.LockedMatrix();
-    auto& local_output = get_local_activations();
-    const int local_height = local_input.Height();
-    const int local_width = local_input.Width();
-    for (int col = 0; col < local_width; ++col) {
-      for (int row = 0; row < local_height; ++row) {
-        const DataType x = local_input(row, col);
-        DataType& y = local_output(row, col);
-        if(m_first_half)
-        y = input.GlobalCol(col) < local_width/2 ?  DataType(0) : x;
-        if(m_second_half)
-        y = input.GlobalCol(col) >= local_width/2 ?  DataType(0) : x;
-      }
-    }
-  }
-
-  void bp_compute() override {
-    const auto& input = get_prev_error_signals();
-    const auto& local_gradient_wrt_output = get_local_prev_error_signals();
-    auto& local_gradient_wrt_input = get_local_error_signals();
-    const int local_height = input.LocalHeight();
-    const int local_width = input.LocalWidth();
-    for (int col = 0; col < local_width; ++col) {
-      for (int row = 0; row < local_height; ++row) {
-        const DataType dy = local_gradient_wrt_output(row, col);
-        DataType& dx = local_gradient_wrt_input(row, col);
-        if(m_first_half)
-        dx = input.GlobalCol(col) < local_width/2 ?  DataType(0) : dy;
-        if(m_second_half)
-        dx = input.GlobalCol(col) >= local_width/2 ?  DataType(0) : dy;
-      }
-    }
-  }
-
-
-};
-
-} // namespace lbann
-
-#endif // LBANN_LAYER_ZERO_HPP_INCLUDED
diff --git a/include/lbann/lbann.hpp b/include/lbann/lbann.hpp
index e67c9d084a7..5b1ba94d470 100644
--- a/include/lbann/lbann.hpp
+++ b/include/lbann/lbann.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -73,7 +73,6 @@
 #include "lbann/layers/transform/constant.hpp"
 #include "lbann/layers/transform/dummy.hpp"
 #include "lbann/layers/transform/hadamard.hpp"
-#include "lbann/layers/transform/zero.hpp"
 #include "lbann/layers/transform/reduction.hpp"
 #include "lbann/layers/transform/evaluation.hpp"
 #include "lbann/layers/transform/gaussian.hpp"
@@ -101,6 +100,8 @@
 #include "lbann/layers/misc/covariance.hpp"
 #include "lbann/layers/misc/variance.hpp"
 #include "lbann/layers/misc/channelwise_mean.hpp"
+#include "lbann/layers/misc/mini_batch_index.hpp"
+#include "lbann/layers/misc/mini_batch_size.hpp"
 
 /// Data readers
 #include "lbann/data_readers/data_reader_imagenet.hpp"
@@ -109,13 +110,14 @@
 #include "lbann/data_readers/data_reader_mnist.hpp"
 #include "lbann/data_readers/data_reader_multi_images.hpp"
 #include "lbann/data_readers/data_reader_mnist_siamese.hpp"
-#include "lbann/data_readers/data_reader_triplet.hpp"
+#include "lbann/data_readers/data_reader_multihead_siamese.hpp"
 #include "lbann/data_readers/data_reader_synthetic.hpp"
 #include "lbann/data_readers/data_reader_jag.hpp"
 #include "lbann/data_readers/data_reader_jag_conduit.hpp"
-#include "lbann/data_readers/data_reader_jag_conduit_hdf5.hpp"
 #include "lbann/data_readers/data_reader_nci.hpp"
 #include "lbann/data_readers/data_reader_numpy.hpp"
+#include "lbann/data_readers/data_reader_numpy_npz.hpp"
+#include "lbann/data_readers/data_reader_numpy_npz_conduit.hpp"
 #include "lbann/data_readers/data_reader_csv.hpp"
 #include "lbann/data_readers/data_reader_merge_samples.hpp"
 #include "lbann/data_readers/data_reader_merge_features.hpp"
@@ -123,10 +125,10 @@
 #include "lbann/data_readers/data_reader_pilot2_molecular.hpp"
 #include "lbann/data_readers/data_reader_mesh.hpp"
 #include "lbann/data_readers/data_reader_moving_mnist.hpp"
+#include "lbann/data_readers/data_reader_python.hpp"
 
 /// Data stores
-#include "lbann/data_store/generic_data_store.hpp"
-#include "lbann/data_store/data_store_imagenet.hpp"
+#include "lbann/data_store/data_store_conduit.hpp"
 
 /// Callbacks
 #include "lbann/callbacks/callback_check_init.hpp"
diff --git a/include/lbann/metrics/layer_metric.hpp b/include/lbann/metrics/layer_metric.hpp
index 7eefbb6d30e..f0f9c811504 100644
--- a/include/lbann/metrics/layer_metric.hpp
+++ b/include/lbann/metrics/layer_metric.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -58,9 +58,9 @@ class layer_metric : public metric {
   std::vector<Layer*> get_layer_pointers() const override;
   /** Set list of pointers to layers. */
   void set_layer_pointers(std::vector<Layer*> layers) override;
-  
+
  protected:
-  
+
   void setup(model& m) override;
   EvalType evaluate(execution_mode mode, int mini_batch_size) override;
 
@@ -87,7 +87,7 @@ class layer_metric : public metric {
 
   /** Get corresponding evaluation layer. */
   abstract_evaluation_layer& get_evaluation_layer();
-  
+
 };
 
 }  // namespace lbann
diff --git a/include/lbann/metrics/metric.hpp b/include/lbann/metrics/metric.hpp
index 4e06b59ad06..d270c361bb5 100644
--- a/include/lbann/metrics/metric.hpp
+++ b/include/lbann/metrics/metric.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/models/directed_acyclic_graph.hpp b/include/lbann/models/directed_acyclic_graph.hpp
index 56d8f8fc44b..a47c6a8f123 100644
--- a/include/lbann/models/directed_acyclic_graph.hpp
+++ b/include/lbann/models/directed_acyclic_graph.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -22,59 +22,42 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
-//
-// directed_acyclic_graph .hpp .cpp - Directed acyclic graph neural network models
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef LBANN_MODEL_DIRECTED_ACYCLIC_GRAPH_HPP
-#define LBANN_MODEL_DIRECTED_ACYCLIC_GRAPH_HPP
+#ifndef LBANN_MODELS_DIRECTED_ACYCLIC_GRAPH_HPP_INCLUDED
+#define LBANN_MODELS_DIRECTED_ACYCLIC_GRAPH_HPP_INCLUDED
 
 #include "lbann/models/model.hpp"
 #include "lbann/layers/layer.hpp"
 
 namespace lbann {
 
-/** Directed acyclic graph neural network model. */
+/** Neural network model with a DAG layer graph. */
 class directed_acyclic_graph_model : public model {
- public:
+public:
 
-  /** Constructor. */
   directed_acyclic_graph_model(lbann_comm *comm,
-                               int max_mini_batch_size,
+                               El::Int max_mini_batch_size,
                                objective_function *obj_fn,
                                optimizer *default_optimizer);
-
-  /** Copy constructor. */
   directed_acyclic_graph_model(const directed_acyclic_graph_model& other) = default;
-
-  /** Copy assignment operator. */
   directed_acyclic_graph_model& operator=(const directed_acyclic_graph_model& other) = default;
-
-  /** Destructor. */
   ~directed_acyclic_graph_model() override = default;
-
-  /** Create copy. */
   directed_acyclic_graph_model* copy() const override { return new directed_acyclic_graph_model(*this); }
-
-  /** Get model type. */
   std::string get_type() const override { return "directed acyclic graph"; }
 
- protected:
-
-  /** For general DAG models, users need to manually specify each layer to
-   *  freeze in the model description prototext.
-   */
-  void freeze_layers_under_frozen_surface() override {}
+protected:
 
   /** Set up layer execution order.
+   *
    *  Called in setup function. A topological sort applied is to the
-   *  layer list so that we can traverse a directed acyclic graph
+   *  layer list so that we can traverse the directed acyclic graph
    *  without violating dependencies.
    */
   void setup_layer_execution_order() override;
 
 };
 
-}  // namespace lbann
+} // namespace lbann
 
-#endif  // LBANN_MODEL_DIRECTED_ACYCLIC_GRAPH_HPP
+#endif // LBANN_MODELS_DIRECTED_ACYCLIC_GRAPH_HPP_INCLUDED
diff --git a/include/lbann/models/model.hpp b/include/lbann/models/model.hpp
index df0146fed30..7e8671a5289 100644
--- a/include/lbann/models/model.hpp
+++ b/include/lbann/models/model.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -22,12 +22,10 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
-//
-// lbann_model .hpp .cpp - Abstract class for neural network training models
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef LBANN_MODEL_HPP
-#define LBANN_MODEL_HPP
+#ifndef LBANN_MODELS_MODEL_HPP_INCLUDED
+#define LBANN_MODELS_MODEL_HPP_INCLUDED
 
 #include "lbann/base.hpp"
 #include "lbann/comm.hpp"
@@ -48,424 +46,496 @@
 
 namespace lbann {
 
-// Forward-declare this.
+// Forward declarations
 class lbann_callback;
 
-/** Base class for LBANN models. */
+/** @brief Abstract base class for neural network models. */
 class model {
 public:
 
-  /** Constructor. */
-  model(lbann_comm *comm,
-        int mini_batch_size,
-        objective_function *obj_fn,
-        optimizer* default_optimizer = nullptr);
+  // ===========================================
+  // Life cycle functions
+  // ===========================================
 
-  /** Copy constructor. */
+  model(lbann_comm* comm,
+        El::Int mini_batch_size,
+        objective_function* obj_fn,
+        optimizer* default_optimizer = nullptr);
   model(const model& other);
-  /** Copy assignment operator. */
   model& operator=(const model& other);
-  /** Destructor. */
   virtual ~model();
-  /** Copy model. */
   virtual model* copy() const = 0;
 
-  /** Return the model's type. */
-  virtual std::string get_type() const = 0;
+  // ===========================================
+  // Access functions
+  // ===========================================
 
-  /** Set the model's name; this is an arbitrary string
-   *  that may be useful in multi-model scenarios, e.g,
-   *  LTFB, jag
+  /** @brief Model type's name.
+   *  @details Should be a brief, human-readable description of the
+   *  model's architecture.
    */
-  void set_name(std::string name);
+  virtual std::string get_type() const = 0;
 
-  /** Return the model's name; this is an arbitrary string
-   *  that may be useful in multi-model scenarios, e.g,
-   *  LTFB, jag
+  /** @brief Model instance name.
+   *  @details Each model in a trainer should have a unique, and
+   *  preferably human-readable, name.
    */
-  std::string get_name() const {
-    return m_name;
-  }
+  std::string get_name() const noexcept { return m_name; }
+  /** @brief Model instance name.
+   *  @details Each model in a trainer should have a unique, and
+   *  preferably human-readable, name.
+   */
+  void set_name(std::string name);
 
-  /** Human-readable description. */
+  /** @brief Human-readable description. */
   virtual description get_description() const;
 
-  /** Set up the model. */
-  virtual void setup(std::shared_ptr<thread_pool> io_thread_pool);
-
-  /** Add layer to model. */
-  virtual void add_layer(Layer *layer);
-
-  /** Add weights to model. */
-  void add_weights(weights *w);
-
-  /** Register a new callback for the model. */
-  void add_callback(lbann_callback *cb);
-
-  /** Get the list of callbacks for the model. */
-  virtual std::vector<lbann_callback*>& get_callbacks() {
-    return m_callbacks;
-  }
-
-  /** Register a new metric for the model. */
-  void add_metric(metric *m);
-
-  /** Construct an instance of the default optimizer.
-   *  If there is no default optimizer, a null pointer is returned.
-   */
-  optimizer* create_optimizer() const;
-
-  /** Return the model's objective function. */
+  /** @brief Mathematical function to be minimized during training. */
   objective_function* get_objective_function() const {
     return m_objective_function;
   }
 
-  /** Return the model's metrics. */
-  virtual const std::vector<metric *>& get_metrics() const {
+  /** @brief Return the model's metrics. */
+  virtual const std::vector<metric*>& get_metrics() const {
     return m_metrics;
   }
 
-  /** Set the model's layers. */
-  void set_layers(std::vector<Layer *>& layers);
-
-  /** Return the model's layers. */
-  virtual const std::vector<Layer *>& get_layers() const { return m_layers; }
+  /** @brief Size of model's list of layers. */
+  El::Int get_num_layers() const noexcept;
+  /** @param pos Position in model's list of layers. */
+  Layer& get_layer(El::Int pos);
+  /** @param pos Position in model's list of layers. */
+  const Layer& get_layer(El::Int pos) const;
+  /** @brief Return list of layers in model.
+   *  @details The list is in execution order for forward propagation.
+   */
+  std::vector<Layer*> get_layers();
+  /** @brief Return list of layers in model.
+   *  @details The list is in execution order for forward propagation.
+   */
+  const std::vector<Layer*> get_layers() const;
 
   const std::vector<weights*> get_weights() const;
 
   std::vector<weights*> get_weights();
 
-  /** Replace the model's weights. */
-  void replace_weights(std::vector<weights *>& w);
-
-  /** Copy trained weights from input parameter w.
- *  Only weight values are placed, pointers and layer structure are in place.
- *  Weights to be copied are of the same name */
-  void copy_trained_weights_from(std::vector<weights *>& w);
+  /** @brief Get the list of callbacks for the model. */
+  virtual std::vector<lbann_callback*>& get_callbacks() {
+    return m_callbacks;
+  }
 
-  /** Return the I/O thread pool */
+  /** @brief Return the I/O thread pool */
   std::shared_ptr<thread_pool> get_io_thread_pool() { return m_io_thread_pool; }
 
-  /** Get the model's comm. */
+  /** @brief Get the model's comm. */
   inline lbann_comm *get_comm() const {
     return m_comm;
   }
 
-  /** Get the current epoch for the model. */
-  inline int get_cur_epoch() const {
-    return m_current_epoch;
-  }
-  /** Get the current step for the model. */
-  inline int get_cur_step() const {
-    return m_current_step;  /// @todo This should be renamed to get_cur_training step and replaced with one that returns the current based on execution mode
-  }
+  void set_execution_mode(execution_mode mode);
+  execution_mode get_execution_mode() const noexcept;
 
-  /** Get the current validation step for the model. */
-  inline int get_cur_validation_step() const {
-    return m_current_validation_step;
-  }
-  /** Get the current testing step for the model. */
-  inline int get_cur_testing_step() const {
-    return m_current_testing_step;
-  }
-  /** Set the model (and all layers') execution mode. */
-  inline void set_execution_mode(execution_mode mode) {
-    m_execution_mode = mode;
-  }
-  /** Get the model's execution mode. */
-  inline execution_mode get_execution_mode() const {
-    return m_execution_mode;
-  }
-  /** Set the model's current mini-batch size. */
+  /** @brief Number of times the training set has been traversed. */
+  inline El::Int get_epoch() const noexcept { return m_epoch; }
+
+  /** @brief Current mini-batch step for current execution mode.
+   *  @details Step counts are not reset after each epoch.
+   */
+  El::Int get_step() const noexcept;
+
+  /** @brief Current mini-batch step for given execution mode.
+   *  @details Step counts are not reset after each epoch.
+   */
+  El::Int get_step(execution_mode mode) const noexcept;
+
+  /** @brief Set the model's current mini-batch size. */
   inline void set_current_mini_batch_size(int mini_batch_size) {
     m_current_mini_batch_size = mini_batch_size;
   }
-  /** Get the model's current mini-batch size. */
+  /** @brief Get the model's current mini-batch size. */
   inline int get_current_mini_batch_size() const {
     return m_current_mini_batch_size;
   }
-  /** Get the model's maximum mini-batch size. */
+  /** @brief Get the model's maximum mini-batch size. */
   inline int get_max_mini_batch_size() const {
     return m_max_mini_batch_size;
   }
-  /** Get the model's effective mini-batch size. */
+  /** @brief Get the model's effective mini-batch size. */
   inline int get_effective_mini_batch_size() const {
     return m_effective_mini_batch_size;
   }
-  /** Set the model's effective mini-batch size. */
+  /** @brief Set the model's effective mini-batch size. */
   inline void set_effective_mini_batch_size(int mini_batch_size) {
     m_effective_mini_batch_size = mini_batch_size;
   }
   int get_num_iterations_per_epoch(execution_mode mode) const;
 
-  /** Get the current phase (multiple epochs) in layer-wise model training. */
-  inline int get_current_phase() const {
-    return m_current_phase;
-  }
-
-  /**
-   * Summarize statistics (e.g. timers, counters); these should be computable
-   * quickly.
-   */
-  virtual void summarize_stats(lbann_summary& summarizer);
-  /**
-   * Summarize matrices (e.g. means); these are called less frequently and can
-   * be more expensive.
-   */
-  virtual void summarize_matrices(lbann_summary& summarizer);
-
-  /** Return true if the flag to stop training is set. */
+  /** @brief Return true if the flag to stop training is set. */
   bool get_terminate_training() const {
     return m_terminate_training;
   }
-  /** Set the terminate training flag (on or off). */
+  /** @brief Set the terminate training flag (on or off). */
   void set_terminate_training(bool f) {
     m_terminate_training = f;
   }
 
-  /** Train model. */
-  virtual void train(int num_epochs, int num_batches=0);
-  /** Evaluate model. */
-  virtual void evaluate(execution_mode mode, int num_batches=0);
+  // ===========================================
+  // Model specification
+  // ===========================================
+
+  /** @brief Add layer to model. */
+  virtual void add_layer(std::unique_ptr<Layer> l);
+
+  /** @brief Add weights to model. */
+  void add_weights(weights *w);
+
+  /** @brief Register a new callback for the model. */
+  void add_callback(lbann_callback *cb);
+
+  /** @brief Register a new metric for the model. */
+  void add_metric(metric *m);
 
-  /** Run one epoch using only the input layer; this supports
-   *  data_store functionality
+  /** @brief Replace the model's weights. */
+  void replace_weights(std::vector<weights *>& w);
+
+  /** @brief Copy trained weights from input parameter w.
+   *
+   *  Only weight values are placed, pointers and layer structure are in place.
+   *  Weights to be copied are of the same name
    */
-  void collect_indices(execution_mode mode);
+  void copy_trained_weights_from(std::vector<weights *>& w);
 
-  /** Complete any background I/O data fetch for the execution
-      mode requested */
-  virtual void collect_background_data_fetch(execution_mode mode);
+  /** @brief Construct an instance of the default optimizer.
+   *
+   *  If there is no default optimizer, a null pointer is returned.
+   */
+  optimizer* create_optimizer() const;
 
-  /** Set a flag that can be used to enable / disable the background I/O activities */
+  /** @brief Set a flag that can be used to enable / disable the
+   *         background I/O activities
+   */
   void allow_background_io_activity(bool enable) { m_background_io_allowed = enable; }
 
-  /** Are background I/O activities enabled by the input layers */
+  /** @brief Are background I/O activities enabled by the input layers */
   bool background_io_activity_allowed() { return m_background_io_allowed; }
 
-  /** Checkpoint model to given file descriptor, return number of bytes written */
-  virtual bool save_to_checkpoint_shared(persist& p);
-  /** Restore model by reading checkpoint from given file descriptor, return number of bytes read */
-  virtual bool load_from_checkpoint_shared(persist& p);
+  // ===========================================
+  // Setup
+  // ===========================================
 
-  virtual bool save_to_checkpoint_distributed(persist& p);
-  virtual bool load_from_checkpoint_distributed(persist& p);
+  /** @details Must be called after model specification and before
+   *  execution. */
+  virtual void setup(std::shared_ptr<thread_pool> io_thread_pool);
 
-  /** Save the model's weight to file */
-  virtual bool save_weights(persist& p);
+  // ===========================================
+  // Execution
+  // ===========================================
 
-  /** Reload the model's weights from a file */
-  virtual bool reload_weights(const std::string latest,
-                              const std::vector<std::string>& weight_list);
+  /** @brief Evaluate model. */
+  virtual void evaluate(execution_mode mode, int num_batches=0);
 
-  /** Saves the model explicitly if the save_model callback is present */
-  virtual bool save_model();
+  /** @brief Train model. */
+  virtual void train(int num_epochs, int num_batches=0);
 
-  /** Write model to proto file */
-  virtual void write_proto(lbann_data::Model* proto);
+  /** @brief Complete any background I/O data fetch for the execution
+      mode requested */
+  virtual void collect_background_data_fetch(execution_mode mode);
 
-protected:
+  virtual void make_data_store_preloaded(execution_mode mode);
+
+  virtual void mark_data_store_explicitly_loading(execution_mode mode);
+
+  // ===========================================
+  // Summarizer
+  // ===========================================
 
-  /** The objective function used to train the model. */
-  objective_function *m_objective_function;
-  /** Give model a name. */
-  std::string m_name;
-  /** The model's current execution mode. */
-  execution_mode m_execution_mode;
-  /** Flag telling the model to terminate training. */
-  bool m_terminate_training;
-  /** Most recent/current epoch for the model. */
-  int m_current_epoch;
-  /** Most recent/current training step for the model. */
-  int m_current_step;
-  int m_current_validation_step;
-  int m_current_testing_step;
   /**
-   * Maximum possible minibatch size supported by layers in this model.
-   * Note that this is local to the particular model, not across multiple
-   * models.
+   * Summarize statistics (e.g. timers, counters); these should be computable
+   * quickly.
    */
-  int m_max_mini_batch_size;
-  /** Size of the current mini-batch in the model. */
-  int m_current_mini_batch_size;
+  virtual void summarize_stats(lbann_summary& summarizer);
   /**
-   * The "effective" size of a minibatch.
-   * This is the size of the minibatch across all models and used for e.g.
-   * correctly averaging gradients from multiple models.
+   * Summarize matrices (e.g. means); these are called less frequently and can
+   * be more expensive.
    */
-  int m_effective_mini_batch_size;
-  /** current phase (multiple of epoch counts) in training a model */
-  int m_current_phase;
-  /** Communicator for the model. */
-  lbann_comm *m_comm;
-  /** Current callbacks to process. */
-  std::vector<lbann_callback *> m_callbacks;
+  virtual void summarize_matrices(lbann_summary& summarizer);
 
-  /** Default optimizer.
-   *  If a layer needs to construct an optimizer during setup, it will
-   *  make a copy of the default optimizer.
-   */
-  optimizer *m_default_optimizer;
+  // ===========================================
+  // Checkpointing
+  // ===========================================
 
-  /** List of model metrics.
-   *  A metric can be used to evaluate the performance of the model
-   *  without affecting the training process.
-   */
-  std::vector<metric *> m_metrics;
+  /** @brief Checkpoint model to given file descriptor, return number of bytes written */
+  virtual bool save_to_checkpoint_shared(persist& p);
+  /** @brief Restore model by reading checkpoint from given file descriptor, return number of bytes read */
+  virtual bool load_from_checkpoint_shared(persist& p);
 
-  /** List of layers in model.
-   *  The list is in execution order for forward propagation.
-   */
-  std::vector<Layer *> m_layers;
-  /** List of weights in model. */
-  std::vector<weights *> m_weights;
+  virtual bool save_to_checkpoint_distributed(persist& p);
+  virtual bool load_from_checkpoint_distributed(persist& p);
 
-  /** Threads available for I/O */
-  std::shared_ptr<thread_pool> m_io_thread_pool;
+  /** @brief Save the model's weight to file */
+  virtual bool save_weights(persist& p);
+
+  /** @brief Reload the model's weights from a file */
+  virtual bool reload_weights(const std::string latest,
+                              const std::vector<std::string>& weight_list);
+
+  /** @brief Saves the model explicitly if the save_model callback is present */
+  virtual bool save_model();
+
+  /** @brief Write model to proto file */
+  virtual void write_proto(lbann_data::Model* proto);
 
-  /** Flag that allows input layers to fetch data in the background */
-  bool m_background_io_allowed;
+protected:
 
-  /** Check if the model execution mode is valid. */
+  /** @brief Check if the model execution mode is valid. */
   virtual bool is_execution_mode_valid(execution_mode mode) const;
 
-  /** Reorder layers. */
-  virtual void permute_layers(const std::vector<int>& permutation);
+  /** @brief Reorder layer list with a gather.
+   *
+   *  The new layer list is the same length as @c gather_indices and
+   *  its entries are given by
+   *  @f[ \text{new\_list}[i] = \text{old\_list}[\text{gather\_indices}[i]] @f]
+   *
+   *  Since entries in the layer list must be unique, this will fail
+   *  if @c gather_indices has any repeated entries.
+   */
+  void reorder_layers(const std::vector<El::Int>& gather_indices);
 
-  /** Remap pointers.
+  /** @brief Remap pointers.
+   *
    *  Layer and weights pointers are remapped using the provided
    *  maps. If a pointer is not a key in the corresponding map, the
    *  pointer is not changed.
    */
-  virtual void remap_pointers(const std::unordered_map<Layer *,Layer *>& layer_map,
-                              const std::unordered_map<weights *,weights *>& weights_map);
-
-  /** In case that a layer is frozen, also freeze layers that precede it if that
-   *  makes senses for the particular model, such as sequential or siamese.
-   *  For othe models, users can manually control the behaivor by indicating
-   *  whether to freeze each layer in the model description prototext.
+  virtual void remap_pointers(const std::unordered_map<Layer*,Layer*>& layer_map,
+                              const std::unordered_map<weights*,weights*>& weights_map);
+
+  /** @brief
+   *
+   *  In case that a layer is frozen, also freeze layers that precede
+   *  it if that makes senses for the particular model, such as
+   *  sequential or siamese.  For othe models, users can manually
+   *  control the behaivor by indicating whether to freeze each layer
+   *  in the model description prototext.
+   *
+   *  For general DAG models, users need to manually specify each
+   *  layer to freeze in the model description prototext.
    */
-  virtual void freeze_layers_under_frozen_surface();
+  virtual void freeze_layers_under_frozen_surface() {}
 
-  /** Set up topology of layer graph.
+  /** @brief Set up topology of layer graph.
+   *
    *  Called in setup function. All layers in connected component of
    *  layer graph are added to the model and all parent/child
    *  relationships between layers are reciprocated.
    */
   virtual void setup_layer_topology();
-  /** Set up layer execution order.
+  /** @brief Set up layer execution order.
+   *
    *  Called in setup function.
    */
   virtual void setup_layer_execution_order();
-  /** Set up layers.
+  /** @brief Set up layers.
+   *
    *  Called in setup function.
    */
   virtual void setup_layers();
-  /** Set up weights.
+  /** @brief Set up weights.
+   *
    *  Called in setup function. All weights being used by layers or
    *  the objective function are added to the model and all unused
    *  weights are deleted.
    */
   virtual void setup_weights();
 
-  /** Reset model pointer and execution mode. */
+  /** @brief Reset model pointer and execution mode. */
   virtual void reset_mode_and_model(execution_mode mode);
-  /** Reset model statistics for an epoch. */
+  /** @brief Reset model statistics for an epoch. */
   virtual void reset_epoch_statistics(execution_mode mode);
-  /** Evaluate model on a mini-batch */
+  /** @brief Evaluate model on a mini-batch */
   virtual bool evaluate_mini_batch(execution_mode mode);
-  /** Train model on a mini-batch. */
+  /** @brief Train model on a mini-batch. */
   virtual bool train_mini_batch();
 
-  /** Forward propagation step. */
+  /** @brief Forward propagation step. */
   virtual void forward_prop(execution_mode mode);
-  /** Backward propagation step. */
+  /** @brief Backward propagation step. */
   virtual void backward_prop();
-  /** Clear each optimizer's gradient.
+  /** @brief Clear each optimizer's gradient.
+   *
    *  This must be called before training forward prop since layers
    *  set an optimizer flag during forward prop.
    */
   virtual void clear_gradients();
-  /** Update weights step. */
+  /** @brief Update weights step. */
   virtual void update_weights();
-  /** Update layers step. */
+  /** @brief Update layers step. */
   virtual bool update_layers();
-  /** Reconcile weight values.
+  /** @brief Reconcile weight values.
+   *
    *  If weight values are duplicated across multiple processes, they
    *  are set to the average across the processes.
    */
   virtual void reconcile_weight_values();
 
-  ////////////////////////////////////////////////////////////
+  // ===========================================
   // Callbacks
-  ////////////////////////////////////////////////////////////
+  // ===========================================
 
-  /** Execute callbacks at start of training. */
+  /** @brief Execute callbacks at start of training. */
   virtual void do_train_begin_cbs();
-  /** Execute callbacks at end of training. */
+  /** @brief Execute callbacks at end of training. */
   virtual void do_train_end_cbs();
-  /** Execute callbacks at start of evaluation. */
+  /** @brief Execute callbacks at start of evaluation. */
   virtual void do_evaluate_begin_cbs(execution_mode mode);
-  /** Execute callbacks at end of evaluation. */
+  /** @brief Execute callbacks at end of evaluation. */
   virtual void do_evaluate_end_cbs(execution_mode mode);
-  /** Execute callbacks at start of epoch. */
+  /** @brief Execute callbacks at start of epoch. */
   virtual void do_epoch_begin_cbs();
-  /** Execute callbacks at end of epoch. */
+  /** @brief Execute callbacks at end of epoch. */
   virtual void do_epoch_end_cbs();
-  /** Execute callbacks at start of mini-batch. */
+  /** @brief Execute callbacks at start of mini-batch. */
   virtual void do_batch_begin_cbs(execution_mode mode);
-  /** Execute callbacks at end of mini-batch. */
+  /** @brief Execute callbacks at end of mini-batch. */
   virtual void do_batch_end_cbs(execution_mode mode);
-  /** Execute callbacks at start of model forward propagation. */
+  /** @brief Execute callbacks at start of model forward propagation. */
   virtual void do_model_forward_prop_begin_cbs(execution_mode mode);
-  /** Execute callbacks at end of model forward propagation. */
+  /** @brief Execute callbacks at end of model forward propagation. */
   virtual void do_model_forward_prop_end_cbs(execution_mode mode);
-  /** Execute callbacks at start of layer forward propagation. */
+  /** @brief Execute callbacks at start of layer forward propagation. */
   virtual void do_layer_forward_prop_begin_cbs(execution_mode mode, Layer *l);
-  /** Execute callbacks at end of layer forward propagation. */
+  /** @brief Execute callbacks at end of layer forward propagation. */
   virtual void do_layer_forward_prop_end_cbs(execution_mode mode, Layer *l);
-  /** Execute callbacks at start of model backward propagation. */
+  /** @brief Execute callbacks at start of model backward propagation. */
   virtual void do_model_backward_prop_begin_cbs();
-  /** Execute callbacks at end of model backward propagation. */
+  /** @brief Execute callbacks at end of model backward propagation. */
   virtual void do_model_backward_prop_end_cbs();
-  /** Execute callbacks at start of layer backward propagation. */
+  /** @brief Execute callbacks at start of layer backward propagation. */
   virtual void do_layer_backward_prop_begin_cbs(Layer *l);
-  /** Execute callbacks at end of layer backward propagation. */
+  /** @brief Execute callbacks at end of layer backward propagation. */
   virtual void do_layer_backward_prop_end_cbs(Layer *l);
-  /** Execute callbacks at start of model optimization. */
+  /** @brief Execute callbacks at start of model optimization. */
   virtual void do_model_optimize_begin_cbs();
-  /** Execute callbacks at end of model optimization. */
+  /** @brief Execute callbacks at end of model optimization. */
   virtual void do_model_optimize_end_cbs();
-  /** Execute callbacks at the start of weight optimization. */
+  /** @brief Execute callbacks at the start of weight optimization. */
   virtual void do_weight_optimize_begin_cbs(weights *w);
-  /** Execute callbacks at the end of weight optimization. */
+  /** @brief Execute callbacks at the end of weight optimization. */
   virtual void do_weight_optimize_end_cbs(weights *w);
 
 private:
 
-  /** Search layer graph and add all connected layers. */
-  void add_connected_layers();
-  /** Insert evaluation layers where needed.
-   *  If an objective function layer term or a layer metric
-   *  corresponds to a layer that is not an evaluation layer, an
-   *  evaluation layer is added as a child of the original layer and
-   *  set as the corresponding layer to the layer term or layer
-   *  metric.
-   */
-  void add_evaluation_layers();
-  /** Insert dummy layers after layers with too few children.
+  /** @brief LBANN communicator. */
+  lbann_comm* m_comm;
+
+  /** @brief Model instance's name.
+   *  @details Each model in a trainer should have a unique,
+   *  preferably human-readable, name.
+   */
+  std::string m_name;
+
+  /** @brief Current execution mode. */
+  execution_mode m_execution_mode = execution_mode::training;
+
+  /** @brief Number of times the training data set has been traversed. */
+  El::Int m_epoch = 0;
+
+  /** @brief Number of mini-batch steps performed.
+   *  @details Step counts are not reset after each epoch.
+   */
+  std::map<execution_mode, El::Int> m_step;
+
+  /** @brief Whether to terminate training.
+   *  @details If true, training will terminate immediately before
+   *  the next epoch.
+   */
+  bool m_terminate_training = false;
+
+  /** @brief Size of the current mini-batch in the model. */
+  int m_current_mini_batch_size;
+  /** @details Maximum possible minibatch size supported by layers in
+   *  this model.  Note that this is local to the particular model,
+   *  not across multiple models.
+   */
+  int m_max_mini_batch_size;
+  /** @brief The "effective" size of a minibatch.
+   *
+   *  This is the size of the minibatch across all models and used for
+   *  e.g.  correctly averaging gradients from multiple models.
+   */
+  int m_effective_mini_batch_size;
+
+  /** @brief Tensor operations.
+   *  @details The list is in execution order for forward propagation.
+   */
+  std::vector<std::unique_ptr<Layer>> m_layers;
+
+  /** @brief Trainable parameters. */
+  std::vector<weights*> m_weights;
+
+  /** @details If a layer needs to construct an optimizer during
+   *  setup, it will make a copy of the default optimizer. This object
+   *  is just used to create copies and is not actually used for
+   *  optimization.
+   */
+  optimizer* m_default_optimizer = nullptr;
+
+  /** @brief Mathematical function to be minimized during training. */
+  objective_function* m_objective_function;
+
+  /** @brief Numerical quantities to evaluate model performance.
+   *  @details Does not affect training.
+   */
+  std::vector<metric*> m_metrics;
+
+  /** @brief Current callbacks to process. */
+  std::vector<lbann_callback*> m_callbacks;
+
+  /** @brief Threads available for I/O */
+  std::shared_ptr<thread_pool> m_io_thread_pool;
+
+  /** @brief Flag that allows input layers to fetch data in the background */
+  bool m_background_io_allowed = true;
+
+  // ===========================================
+  // Functions to add utility layers
+  // ===========================================
+
+  /** @brief Insert evaluation layers where needed.
+   *
+   *  If a @c lbann::layer_term or @c lbann::layer_metric corresponds
+   *  to a layer that is not an evaluation_layer, an evaluation layer
+   *  is created and added to the model.
+   *
+   *  @param layer_set      Layers in model. Updated with any newly
+   *                        created layers.
+   *  @param layer_names    Names of layers in model. Updated with any
+   *                        newly created layers.
+   */
+  void add_evaluation_layers(std::unordered_set<Layer*>& layer_set,
+                             std::unordered_set<std::string>& layer_names);
+
+  /** @brief Insert dummy layers after layers with too few children.
+   *
    *  If a layer expects more child layers than it has, add dummy
    *  layers until it has enough children.
+   *
+   *  @param layer_names    Names of layers in model. Updated with any
+   *                        newly created layers.
    */
-  void add_dummy_layers();
-  /** Insert split layers after layers with too many children.
+  void add_dummy_layers(std::unordered_set<std::string>& layer_names);
+  /** @brief Insert split layers after layers with too many children.
+   *
    *  If a layer expects one child layer but has multiple, add a split
-   *  layer. The split layer will be the original layer's child and
-   *  the split layer's children will be the original children.
+   *  layer to the model.
+   *
+   *  @param layer_names    Names of layers in model. Updated with any
+   *                        newly created layers.
    */
-  void add_split_layers();
+  void add_split_layers(std::unordered_set<std::string>& layer_names);
+
 };
 
-}  // namespace lbann
+} // namespace lbann
 
-#endif  // LBANN_MODEL_HPP
+#endif // LBANN_MODELS_MODEL_HPP_INCLUDED
diff --git a/include/lbann/objective_functions/layer_term.hpp b/include/lbann/objective_functions/layer_term.hpp
index c5720a527ed..7a3622537fe 100644
--- a/include/lbann/objective_functions/layer_term.hpp
+++ b/include/lbann/objective_functions/layer_term.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -35,7 +35,7 @@ namespace lbann {
 class layer_term : public objective_function_term {
 public:
   layer_term(EvalType scale_factor = EvalType(1));
-  layer_term* copy() const override { return new layer_term(*this); } 
+  layer_term* copy() const override { return new layer_term(*this); }
   std::string name() const override { return "evaluation layer term"; }
 
   /** Set corresponding layer. */
@@ -44,7 +44,7 @@ class layer_term : public objective_function_term {
   Layer& get_layer();
   /** Get corresponding layer (const). */
   const Layer& get_layer() const;
-  
+
   void setup(model& m) override;
 
   void start_evaluation() override;
@@ -52,14 +52,14 @@ class layer_term : public objective_function_term {
   EvalType finish_evaluation() override;
 
   void differentiate() override;
-  
+
   void compute_weight_regularization() override {};
 
 private:
-  
+
   /** Get corresponding evaluation layer. */
   abstract_evaluation_layer& get_evaluation_layer();
-  
+
 };
 
 } // namespace lbann
diff --git a/include/lbann/objective_functions/objective_function.hpp b/include/lbann/objective_functions/objective_function.hpp
index 241b6ae9069..9e0195bdb80 100644
--- a/include/lbann/objective_functions/objective_function.hpp
+++ b/include/lbann/objective_functions/objective_function.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/objective_functions/objective_function_term.hpp b/include/lbann/objective_functions/objective_function_term.hpp
index 33d1f1fb947..1fa13bff220 100644
--- a/include/lbann/objective_functions/objective_function_term.hpp
+++ b/include/lbann/objective_functions/objective_function_term.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/objective_functions/weight_regularization/l2.hpp b/include/lbann/objective_functions/weight_regularization/l2.hpp
index aef3d5f9919..d8ef6fa47c0 100644
--- a/include/lbann/objective_functions/weight_regularization/l2.hpp
+++ b/include/lbann/objective_functions/weight_regularization/l2.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -31,7 +31,8 @@
 
 namespace lbann {
 
-/** @brief Apply L2 regularization to a set of weights.
+/** @class l2_weight_regularization
+ *  @brief Apply L2 regularization to a set of weights.
  *
  *  Given a weights tensor @f$ w @f$,
  *  @f[ L2(w) = \frac{1}{2} \sum\limits_{i} w(i)^2 @f]
@@ -79,6 +80,7 @@ class l2_weight_regularization : public objective_function_term {
 
   /** Add the sum of squares of @c vals to @c contribution.
    *
+   *  @param vals           The values to accumulate
    *  @param contribution   @f$ 1 \times 1 @f$ matrix. Used as an
    *                        accumulation variable.
    */
diff --git a/include/lbann/optimizers/adagrad.hpp b/include/lbann/optimizers/adagrad.hpp
index 1acc258a04f..9a5cc8adbe6 100644
--- a/include/lbann/optimizers/adagrad.hpp
+++ b/include/lbann/optimizers/adagrad.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,67 +24,67 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef LBANN_OPTIMIZER_ADAGRAD_HPP
-#define LBANN_OPTIMIZER_ADAGRAD_HPP
+#ifndef LBANN_OPTIMIZERS_ADAGRAD_HPP_INCLUDED
+#define LBANN_OPTIMIZERS_ADAGRAD_HPP_INCLUDED
 
 #include "lbann/optimizers/optimizer.hpp"
 
 namespace lbann {
 
-/** AdaGrad optimizer. */
+/** AdaGrad optimizer.
+ *
+ *  Reference:
+ *
+ *  John Duchi, Elad Hazan, and Yoram Singer. "Adaptive subgradient
+ *  methods for online learning and stochastic optimization." Journal
+ *  of Machine Learning Research 12, no. Jul (2011): 2121-2159.
+ */
 class adagrad : public optimizer {
- public:
+public:
 
-  /** Constructor. */
-  adagrad(lbann_comm *comm,
-          DataType learning_rate,
-          DataType eps = DataType(1e-8));
-
-  /** Copy constructor. */
+  adagrad(lbann_comm* comm, DataType learning_rate, DataType eps = 1e-8);
   adagrad(const adagrad& other);
-  /** Copy assignment operator. */
   adagrad& operator=(const adagrad& other);
-  /** Destructor. */
-  ~adagrad() override;
-  /** Create a copy. */
+  ~adagrad() override = default;
   adagrad* copy() const override { return new adagrad(*this); }
 
-  /** Get the optimizer name. */
+  /** Human-readable type name. */
   std::string get_type() const override { return "AdaGrad"; }
   /** Human-readable description. */
   description get_description() const override;
 
-  /** Setup optimizer. */
-  void setup(weights& w) override;
+  void setup(weights* w = nullptr) override;
+
+protected:
 
-  /** Perform the computation in an optimization step. */
+  /** Computation for an optimization step. */
   void step_compute(AbsDistMat& values, const AbsDistMat& gradient) override;
+
+private:
+
+  /** Small factor to avoid division by zero. */
+  DataType m_eps;
+  /** AdaGrad cache. */
+  std::unique_ptr<AbsDistMat> m_cache;
+
+  /** CPU implementation of optimization step. */
+  void step_compute_cpu(AbsDistMat& values, const AbsDistMat& gradient);
 #ifdef LBANN_HAS_CUDNN
-  /** Perform the computation in an optimization step on GPU. */
-  void step_compute_gpu(AbsDistMat& values, const AbsDistMat& gradient) override;
+  /** GPU implementation of optimization step. */
+  void step_compute_gpu(AbsDistMat& values, const AbsDistMat& gradient);
 #endif // LBANN_HAS_CUDNN
 
-  /// Set parameters to optimize and initialize optimizer
-  void setup(AbsDistMat *parameters) ;
-  /// Update parameters using objective function gradient
-  void update(const AbsDistMat *gradient) ;
-  std::string name() const { return "adagrad"; }
-
- private:
+  // ===========================================
+  // Checkpointing
+  // ===========================================
 
   bool save_to_checkpoint_shared(persist& p, std::string m_name) override;
   bool load_from_checkpoint_shared(persist& p, std::string m_name) override;
-
   bool save_to_checkpoint_distributed(persist& p, std::string m_name) override;
   bool load_from_checkpoint_distributed(persist& p, std::string m_name) override;
 
-  /** Small factor to avoid division by zero. */
-  DataType m_eps;
-  /** AdaGrad cache. */
-  AbsDistMat *m_cache;
-
 };
 
 } // namespace lbann
 
-#endif // LBANN_OPTIMIZER_ADAGRAD_HPP
+#endif // LBANN_OPTIMIZERS_ADAGRAD_HPP_INCLUDED
diff --git a/include/lbann/optimizers/adam.hpp b/include/lbann/optimizers/adam.hpp
index e80e399dec6..696c8416599 100644
--- a/include/lbann/optimizers/adam.hpp
+++ b/include/lbann/optimizers/adam.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,8 +24,8 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef LBANN_OPTIMIZERS_ADAM_HPP
-#define LBANN_OPTIMIZERS_ADAM_HPP
+#ifndef LBANN_OPTIMIZERS_ADAM_HPP_INCLUDED
+#define LBANN_OPTIMIZERS_ADAM_HPP_INCLUDED
 
 #include "lbann/optimizers/optimizer.hpp"
 
@@ -41,23 +41,47 @@ namespace lbann {
 class adam : public optimizer {
 public:
 
-  /** Constructor. */
-  adam(lbann_comm *comm,
+  /** @name Life cycle functions */
+  ///@{
+
+  adam(lbann_comm* comm,
        DataType learning_rate,
        DataType beta1 = 0.9,
        DataType beta2 = 0.99,
        DataType eps = 1e-8);
-
   adam(const adam& other);
   adam& operator=(const adam& other);
   ~adam() = default;
   adam* copy() const override { return new adam(*this); }
 
+  ///@}
+
+  /** @name Descriptions */
+  ///@{
+
   /** Human-readable type name. */
   std::string get_type() const override { return "Adam"; }
   /** Human-readable description. */
   description get_description() const override;
 
+  ///@}
+
+  /** @name Access functions */
+  ///@{
+
+  /** Update factor for first moment estimate. */
+  DataType get_beta1() const noexcept { return m_beta1; }
+  /** Update factor for first moment estimate. */
+  void set_beta1(DataType beta1) { m_beta1 = beta1; }
+  /** Update factor for second moment estimate. */
+  DataType get_beta2() const noexcept { return m_beta2; }
+  /** Update factor for second moment estimate. */
+  void set_beta2(DataType beta2) { m_beta2 = beta2; }
+  /** Small factor to avoid division by zero. */
+  DataType get_eps() const noexcept { return m_eps; }
+  /** Small factor to avoid division by zero. */
+  void set_eps(DataType eps) { m_eps = eps; }
+
   /** First moment estimates. */
   const AbsDistMat& get_moment1() const;
   /** First moment estimates. */
@@ -67,16 +91,37 @@ class adam : public optimizer {
   /** Second moment estimates. */
   AbsDistMat& get_moment2();
 
-  void setup(weights& w) override;
+  /** beta1 ^ iteration.
+   *  @todo This probably shouldn't be exposed.
+   */
+  DataType get_current_beta1() const noexcept { return m_current_beta1; }
+  /** beta1 ^ iteration.
+   *  @todo This probably shouldn't be exposed.
+   */
+  void set_current_beta1(DataType current_beta1) { m_current_beta1 = current_beta1; }
+  /** beta2 ^ iteration.
+   *  @todo This probably shouldn't be exposed.
+   */
+  DataType get_current_beta2() const noexcept { return m_current_beta2; }
+  /** beta2 ^ iteration.
+   *  @todo This probably shouldn't be exposed.
+   */
+  void set_current_beta2(DataType current_beta2) { m_current_beta2 = current_beta2; }
+
+  ///@}
+
+  /** @name Setup */
+  ///@{
 
-  /** Perform the computation in an optimization step. */
+  void setup(weights* w = nullptr) override;
+
+  ///@}
+
+protected:
+
+  /** Computation for an optimization step. */
   void step_compute(AbsDistMat& values,
                     const AbsDistMat& gradient) override;
-#ifdef LBANN_HAS_CUDNN
-  /** Perform the computation in an optimization step on GPU. */
-  void step_compute_gpu(AbsDistMat& values,
-                        const AbsDistMat& gradient) override;
-#endif // LBANN_HAS_CUDNN
 
 private:
 
@@ -98,9 +143,15 @@ class adam : public optimizer {
   /** Hyperparameter exploration. */
   friend class lbann_callback_perturb_adam;
 
-  // ===========================================
-  // Checkpointing
-  // ===========================================
+  /** CPU implementation of optimization step. */
+  void step_compute_cpu(AbsDistMat& values, const AbsDistMat& gradient);
+#ifdef LBANN_HAS_CUDA
+  /** GPU implementation of optimization step. */
+  void step_compute_gpu(AbsDistMat& values, const AbsDistMat& gradient);
+#endif // LBANN_HAS_CUDA
+
+  /** @name Checkpointing */
+  ///@{
 
   /* struct used to serialize mode fields in file and MPI transfer */
   struct packing_header {
@@ -150,8 +201,10 @@ class adam : public optimizer {
   bool save_to_checkpoint_distributed(persist& p, std::string m_name) override;
   bool load_from_checkpoint_distributed(persist& p, std::string m_name) override;
 
+  ///@}
+
 };
 
 } // namespace lbann
 
-#endif // LBANN_OPTIMIZERS_ADAM_HPP
+#endif // LBANN_OPTIMIZERS_ADAM_HPP_INCLUDED
diff --git a/include/lbann/optimizers/hypergradient_adam.hpp b/include/lbann/optimizers/hypergradient_adam.hpp
index dbf6dd1c27a..b0d362ad02e 100644
--- a/include/lbann/optimizers/hypergradient_adam.hpp
+++ b/include/lbann/optimizers/hypergradient_adam.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,79 +24,88 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef LBANN_OPTIMIZER_HYPERGRADIENT_ADAM_HPP
-#define LBANN_OPTIMIZER_HYPERGRADIENT_ADAM_HPP
+#ifndef LBANN_OPTIMIZERS_HYPERGRADIENT_ADAM_HPP_INCLUDED
+#define LBANN_OPTIMIZERS_HYPERGRADIENT_ADAM_HPP_INCLUDED
 
 #include "lbann/optimizers/optimizer.hpp"
 
 namespace lbann {
 
-/** Hypergradient Adam optimizer.
+/** @class hypergradient_adam
+ *  @brief Hypergradient Adam optimizer.
+ *
  *  Reference:
- *  Baydin et al. "Online Learning Rate Adaptation with Hypergradient Descent", 2017.
+ *
+ *  Baydin et al. "Online Learning Rate Adaptation with Hypergradient
+ *  Descent", 2017.
  */
 class hypergradient_adam : public optimizer {
- public:
-
-  /** Constructor
-   *  @param init_learning_rate Initial Adam learning rate (0.001 reasonable).
-   *  @param hyper_learning_rate Hypergradient learning rate.
-   *  @param beta1 Decay rate for the first moment moving average.
-   *  @param beta2 Decay rate for the second moment moving average.
-   *  @param eps A small value.
+public:
+
+  /** @brief Construct a Hypergradient Adam optimizer object
+   *
+   *  @param comm                   Communication context for this object
+   *  @param init_learning_rate     Initial Adam learning rate (0.001 is
+   *                                reasonable).
+   *  @param hyper_learning_rate    Hypergradient learning rate.
+   *  @param beta1                  Decay rate for the first moment
+   *                                moving average.
+   *  @param beta2                  Decay rate for the second moment
+   *                                moving average.
+   *  @param eps                    Small factor to avoid division by
+   *                                zero.
    */
   hypergradient_adam(lbann_comm *comm,
-                     DataType init_learning_rate,
-                     DataType hyper_learning_rate = DataType(1e-7),
-                     DataType beta1 = DataType(0.9),
-                     DataType beta2 = DataType(0.99),
-                     DataType eps = DataType(1e-8));
-
-  /** Copy constructor. */
+                     DataType init_learning_rate = 1e-3,
+                     DataType hyper_learning_rate = 1e-7,
+                     DataType beta1 = 0.9,
+                     DataType beta2 = 0.99,
+                     DataType eps = 1e-8);
   hypergradient_adam(const hypergradient_adam& other);
-  /** Copy assignment operator. */
   hypergradient_adam& operator=(const hypergradient_adam& other);
-  /** Destructor. */
-  ~hypergradient_adam() override;
-  /** Create a copy. */
+  ~hypergradient_adam() override = default;
   hypergradient_adam* copy() const override { return new hypergradient_adam(*this); }
 
-  /** Returns the optimizer name. */
+  /** @brief Human-readable type name. */
   std::string get_type() const override { return "hypergradient Adam"; }
-  /** Human-readable description. */
+  /** @brief Human-readable description. */
   description get_description() const override;
 
-  /** Setup optimizer. */
-  void setup(weights& w) override;
+  void setup(weights* w = nullptr) override;
+
+protected:
 
-  /** Perform the computation in an optimization step. */
+  /** @brief Computation for an optimization step. */
   void step_compute(AbsDistMat& values, const AbsDistMat& gradient) override;
 
- private:
+private:
 
-  /** Hypergradient learning rate. */
+  /** @brief Hypergradient learning rate. */
   DataType m_hyper_learning_rate;
-  /** Update factor for first moment estimate. */
+  /** @brief Update factor for first moment estimate. */
   DataType m_beta1;
-  /** Update factor for second moment estimate. */
+  /** @brief Update factor for second moment estimate. */
   DataType m_beta2;
-  /** Small factor to avoid division by zero. */
+  /** @brief Small factor to avoid division by zero. */
   DataType m_eps;
-  /** beta1 ^ iteration. */
+  /** @brief beta1 ^ iteration. */
   DataType m_current_beta1;
-  /** beta2 ^ iteration. */
+  /** @brief beta2 ^ iteration. */
   DataType m_current_beta2;
-  /** First moment estimates. */
-  AbsDistMat *m_moment1;
-  /** Second moment estimates. */
-  AbsDistMat *m_moment2;
-  /** Gradient estimate from the prior step (for hypergradient). */
-  AbsDistMat *m_old_gradient;
-
-  //************************************************************************
+  /** @brief First moment estimates. */
+  std::unique_ptr<AbsDistMat> m_moment1;
+  /** @brief Second moment estimates. */
+  std::unique_ptr<AbsDistMat> m_moment2;
+  /** @brief Gradient estimate from the prior step (for hypergradient). */
+  std::unique_ptr<AbsDistMat> m_old_gradient;
+
+  // ===========================================
   // Checkpointing
-  //************************************************************************
-  /* struct used to serialize mode fields in file and MPI transfer */
+  // ===========================================
+
+  /** @struct packing_header
+   *  @brief Used to serialize mode fields in file and MPI transfer
+   */
   struct packing_header {
     DataType hyper_learning_rate;
     DataType beta1;
@@ -154,4 +163,4 @@ class hypergradient_adam : public optimizer {
 
 } // namespace lbann
 
-#endif  // LBANN_OPTIMIZER_HYPERGRADIENT_ADAM_HPP
+#endif // LBANN_OPTIMIZER_HYPERGRADIENT_ADAM_HPP_INCLUDED
diff --git a/include/lbann/optimizers/optimizer.hpp b/include/lbann/optimizers/optimizer.hpp
index 14edb8fc467..6e0e9ee6712 100644
--- a/include/lbann/optimizers/optimizer.hpp
+++ b/include/lbann/optimizers/optimizer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,183 +24,262 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef LBANN_OPTIMIZER_HPP
-#define LBANN_OPTIMIZER_HPP
+#ifndef LBANN_OPTIMIZERS_OPTIMIZER_HPP_INCLUDED
+#define LBANN_OPTIMIZERS_OPTIMIZER_HPP_INCLUDED
 
+#include <string>
+#include <memory>
+#include <unordered_set>
 #include "lbann/utils/compiler_control.hpp"
 #include "lbann/base.hpp"
 #include "lbann/comm.hpp"
 #include "lbann/utils/exception.hpp"
 #include "lbann/utils/description.hpp"
 #include "lbann/weights/weights.hpp"
-#include <string>
-#include <unordered_set>
-
 #ifdef LBANN_HAS_GPU
 #include "lbann/utils/cuda.hpp"
 #endif // LBANN_HAS_GPU
 
 namespace lbann {
 
+/** @brief Status of values in objective function gradient. */
+enum class optimizer_gradient_status {
+  /** @brief Values can be accessed immediately. */
+  ready,
+  /** @brief Values have been cleared.
+   *  @details Buffer must be zeroed out before accessing.
+   */
+  cleared,
+  /** @brief Allreduce is needed before accessing values. */
+  allreduce_needed,
+  /** @brief Allreduce on values is in progress.
+   *  @details Non-blocking allreduce must be synchronized before
+   *  accessing.
+   */
+  allreduce_started
+};
+
+/** @brief Human-readable string for status of gradient in optimizer. */
+std::string to_string(optimizer_gradient_status status);
+
 // Forward declarations
 class weights;
 class persist;
 
-/** Abstract optimizer. */
+/** @brief Abstract base class for gradient-based optimization algorithms.
+ *
+ *  Uses a variant of stochastic gradient descent to optimize the
+ *  values in a @c weights instance. The weights values are
+ *  iteratively adjusted to minimize an objective function. Each
+ *  optimization step requires the objective function gradient
+ *  w.r.t. the weights.
+ */
 class optimizer {
- public:
+public:
 
-  optimizer(lbann_comm* comm, DataType learning_rate = DataType(0));
+  optimizer(lbann_comm* comm, DataType learning_rate = 0);
   optimizer(const optimizer& other);
   optimizer& operator=(const optimizer& other);
-  virtual ~optimizer();
+  virtual ~optimizer() = default;
+
+  /** @brief Create a copy of the class instance.
+   *
+   *  The caller is responsible for deallocating the returned object.
+   */
   virtual optimizer* copy() const = 0;
 
-  /** Get the optimizer name. */
+  /** @brief Human-readable type name. */
   virtual std::string get_type() const = 0;
-  /** Human-readable description. */
+  /** @brief Human-readable description. */
   virtual description get_description() const;
 
-  /** Whether the optimizer has been set up. */
-  inline bool is_initialized() const { return m_weights != nullptr; }
-
-  /** Get weights being optimized. */
+  /** @brief Weights being optimized. */
   weights& get_weights();
-  /** Set weights being optimized. */
-  void set_weights(weights& w) { m_weights = &w; }
-  /** Get learning rate. */
-  DataType get_learning_rate() const { return m_learning_rate; }
-  /** Set learning rate. */
-  void set_learning_rate(DataType learning_rate) {
-    m_learning_rate = learning_rate;
-  };
-
-  /** Get gradient matrix. */
-  const AbsDistMat& get_gradient();
-
-  /** Clear gradient matrix. */
-  void clear_gradient();
-  /** Add to the gradient matrix. */
-  void add_to_gradient(const AbsDistMat& gradient,
-                       DataType scale = DataType(1));
-
-  /** Add to the gradient staging matrix.
-   *  When the gradient is needed, an allreduce is applied over the
-   *  redundant communicator of the staging matrix and the result is
-   *  added to the gradient.
+  /** @brief Weights being optimized. */
+  const weights& get_weights() const;
+  /** @brief Weights being optimized. */
+  void set_weights(weights* w) { m_weights = w; }
+
+  /** @brief Objective function gradient w.r.t. the weights.
+   *
+   *  An allreduce may be launched and/or synchronized if needed.
    */
-  void add_to_gradient_staging(const AbsDistMat& gradient,
-                               DataType scale = DataType(1));
-  /** Start allreduce on the gradient staging matrix.
-   *  If an allreduce is not needed or if it has already started, this
-   *  function does nothing. This may call a non-blocking allreduce.
+  AbsDistMat& get_gradient();
+
+  /** @brief Add to the objective function gradient w.r.t. the weights.
+   *  @param gradient           Contribution to gradient.
+   *  @param scale              Scaling factor for gradient
+   *                            contribution.
+   *  @param allreduce_needed   Whether the gradient contribution
+   *                            requires an allreduce over its redundant
+   *                            communicator. If false, duplicated data
+   *                            (over the redundant communicator) is
+   *                            assumed to be identical. If true, an
+   *                            allreduce is performed lazily when the
+   *                            gradient is accessed.
    */
-  void start_gradient_staging_allreduce();
-
-  /** Get number of gradient sources.
-   *  This is the number of objects that contribute to the gradient
-   *  but have not added their contributions yet.
+  void add_to_gradient(const AbsDistMat& gradient,
+                       DataType scale = DataType(1),
+                       bool allreduce_needed = false);
+  /** @brief Zero out the objective function gradient w.r.t. the weights. */
+  void clear_gradient();
+  /** @brief Get the gradient buffer.
+   *
+   *  This provides access to the underlying gradient buffer, which may be
+   *  directly summed into. This buffer should be considered ephemeral and not
+   *  stored. The caller must also ensure the buffer has an appropriate
+   *  distribution. buf_scale provides the caller with a scale factor that must
+   *  be applied to the gradient buffer before writing to it, and in_scale
+   *  provides a scaling factor that must be applied to the user's data.
+   *  Essentially, this enables computations of the form
+   *  gradient = buf_scale*gradient + in_scale*new_gradient
+   *  This is an expert-mode function and is intended to help eliminate copies
+   *  and facilitate kernel fusion.
+   *
+   *  @param buf_scale A scale factor provided to the caller to scale the
+   *  returned buffer by.
+   *  @param in_scale A scale factor provided to the caller to scale their
+   *  gradient contributions by.
+   *  @param allreduce_needed Whether this gradient contribution will need to
+   *  be allreduced.
    */
-  int get_num_gradient_sources() const { return m_gradient_sources.size(); }
-  /** Add a gradient source.
-   *  Objects that depend on the weights being optimized and which
-   *  contribute to the gradient should add themselves as a gradient
-   *  source.
+  AbsDistMat& get_gradient_buffer(DataType& buf_scale,
+                                  DataType& in_scale,
+                                  bool allreduce_needed = false);
+
+  /** @brief Objects that are expected to contribute to the gradient. */
+  El::Int get_num_gradient_sources() const;
+  /** @brief Register a gradient source.
+   *
+   *  Any object that uses the weights and influences the objective
+   *  function is expected to contribute to the objective function
+   *  gradient. These objects should register themselves during
+   *  forward prop.
    */
   void add_gradient_source(const void* source);
-  /** Remove a gradient source.
-   *  Objects that contribute to the gradient should remove themselves
-   *  as gradient sources when they add to the gradient. If there are
-   *  no more gradient sources remaining, an allreduce is started on
-   *  the gradient staging matrix.
+  /** @brief Unregister a gradient source.
+   *
+   *  When an object adds its contribution to the objective function
+   *  gradient during back prop, it should unregister itself. If there
+   *  are no more gradient sources remaining, a non-blocking allreduce
+   *  will be launched on the gradient, if needed.
    */
   void remove_gradient_source(const void* source);
 
-  /** Setup optimizer. */
-  virtual void setup(weights& w);
+  /** @brief Must be called before training.
+   *
+   *  @param w Weights being optimized. If null, no change is made to
+   *  the weights.
+   */
+  virtual void setup(weights* w = nullptr);
 
-  /** Apply an optimization step. */
+  /** @brief Optimization step. */
   void step();
-  /** Perform the computation in an optimization step.
-   *  It can be assumed that values and gradient are the same size and
-   *  have the same matrix distribution.
+
+  /** @brief LBANN communicator. */
+  lbann_comm& get_comm() { return *m_comm; }
+  /** @brief LBANN communicator. */
+  const lbann_comm& get_comm() const { return *m_comm; }
+
+  /** @brief Scaling factor for optimization step sizes. */
+  DataType get_learning_rate() const;
+  /** @brief Scaling factor for optimization step sizes. */
+  void set_learning_rate(DataType learning_rate);
+
+  /** @brief Time spent in optimization step. */
+  EvalType get_step_time() const { return m_step_time; }
+  /** @brief Reset stats counters. */
+  virtual void reset_counters() { m_step_time = 0; }
+
+protected:
+
+  /** @brief Computation for an optimization step.
+   *
+   *  @c values and @c gradient can be assumed to have the same
+   *  distribution.
    */
   virtual void step_compute(AbsDistMat& values,
                             const AbsDistMat& gradient) = 0;
-#ifdef LBANN_HAS_GPU
-  /** Perform the computation in an optimization step on GPU.
-   *  The default implementation is to transfer data to CPU and call
-   *  step_compute.
-   */
-  virtual void step_compute_gpu(AbsDistMat& values,
-                                const AbsDistMat& gradient);
-#endif // LBANN_HAS_GPU
-
-  /** Get the time spent in step(). */
-  double get_step_time() const { return m_step_time; }
-  /** Reset stats counters. */
-  virtual void reset_counters() {
-    m_step_time = 0.0;
-  }
 
- protected:
+private:
 
-  /** LBANN communicator. */
-  lbann_comm *m_comm;
+  /** @brief LBANN communicator. */
+  lbann_comm* m_comm;
 
-  /** Weights being optimized. */
-  weights* m_weights;
+  /** @brief Weights being optimized. */
+  weights* m_weights = nullptr;
 
-  /** Learning rate. */
-  DataType m_learning_rate;
+  /** @brief Objective function gradient w.r.t. weights. */
+  std::unique_ptr<AbsDistMat> m_gradient;
 
-  /** Gradient matrix. */
-  AbsDistMat* m_gradient;
+  /** @brief Workspace matrix.
+   *
+   *  Helps ensure gradient contributions are in the right
+   *  distribution. Most of the time, this should just be a matrix
+   *  view.
+   */
+  std::unique_ptr<AbsDistMat> m_gradient_v;
+
+  /** @brief Sources of gradient contributions.
+   *
+   *  This set contains pointers to objects (e.g. layers and objective
+   *  function terms) that contribute to the objective function
+   *  gradient. Objects should register themselves as they use the
+   *  weights during forward prop and unregister themselves as they
+   *  add their gradient contributions. Once this set is empty, it is
+   *  safe to launch a non-blocking allreduce on the gradient, if
+   *  needed.
+   */
+  std::unordered_set<const void*> m_gradient_sources;
 
- private:
+  /** @brief Status of values in objective function gradient. */
+  optimizer_gradient_status m_gradient_status = optimizer_gradient_status::cleared;
 
-  /** Sources of gradient contributions.
-   *  This set contains pointers to objects (i.e. layers and objective
-   *  function terms) which depend on the weights being optimized and
-   *  which contribute to the gradient. Objects should add themselves
-   *  to the set as they request the weights and they should remove
-   *  themselves as they add their gradient contribution. Once this
-   *  set is empty, it is safe to perform an allreduce on the gradient
-   *  staging matrix.
+  /** @brief Communication request object for gradient allreduce.
+   *
+   *  Used to synchronize non-blocking allreduce.
    */
-  std::unordered_set<const void*> m_gradient_sources;
+  Al::request m_gradient_allreduce_req;
 
-  /** Gradient staging matrix.
-   *  When the gradient is needed, an allreduce is applied over the
-   *  redundant communicator of the staging matrix and the result is
-   *  added to the gradient matrix.
+  /** @brief Scaling factor for optimization step sizes.
+   *
+   *  This is not used by the base optimizer class, but is currently
+   *  used by all derived optimizer classes. There are several cases
+   *  where it is convenient to expose this in the base class,
+   *  e.g. for variable learning rate schedules.
+   *  @todo Consider moving this to the derived classes.
    */
-  AbsDistMat* m_gradient_staging;
+  DataType m_learning_rate;
+
+  /** @brief Time spent in optimization step. */
+  EvalType m_step_time = 0;
 
-  /** Whether the gradient staging matrix requires an allreduce. */
-  bool m_gradient_allreduce_needed;
-  /** Whether an allreduce on the gradient staging matrix has started. */
-  bool m_gradient_allreduce_started;
-  /** Whether an allreduce on the gradient staging matrix has been finished. */
-  bool m_gradient_allreduce_finished;
+  /** @brief Launch non-blocking allreduce on the gradient, if needed.
+   *
+   *  Does nothing if an allreduce is not needed or has already been
+   *  started.
+   */
+  void start_gradient_allreduce();
 
-  /** Running count of the time spent in step(). */
-  double m_step_time = 0.0;
+  /** @brief Synchronize non-blocking allreduce on the gradient, if needed.
+   *
+   *  Does nothing if an allreduce isn't needed. Throws an exception
+   *  if an allreduce is needed but hasn't been started.
+   */
+  void finish_gradient_allreduce();
 
-  /** The request for non-blocking allreduces. */
-  Al::request m_gradient_allreduce_req;
+public:
 
-//************************************************************************
-// Checkpointing
-//************************************************************************
- public:
+  // ===========================================
+  // Checkpointing
+  // ===========================================
   virtual bool save_to_checkpoint_shared(persist& p, std::string m_name);
   virtual bool load_from_checkpoint_shared(persist& p, std::string m_name);
-
   virtual bool save_to_checkpoint_distributed(persist& p, std::string m_name);
   virtual bool load_from_checkpoint_distributed(persist& p, std::string m_name);
+
 };
 
 } // namespace lbann
 
-#endif // LBANN_OPTIMIZER_HPP
+#endif // LBANN_OPTIMIZERS_OPTIMIZER_HPP_INCLUDED
diff --git a/include/lbann/optimizers/rmsprop.hpp b/include/lbann/optimizers/rmsprop.hpp
index 78364f357c6..a8debaa076c 100644
--- a/include/lbann/optimizers/rmsprop.hpp
+++ b/include/lbann/optimizers/rmsprop.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,57 +24,63 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef LBANN_OPTIMIZER_RMSPROP_HPP
-#define LBANN_OPTIMIZER_RMSPROP_HPP
+#ifndef LBANN_OPTIMIZERS_RMSPROP_HPP_INCLUDED
+#define LBANN_OPTIMIZERS_RMSPROP_HPP_INCLUDED
 
 #include "lbann/optimizers/optimizer.hpp"
 #include <sys/stat.h>
 
 namespace lbann {
 
-/** RMSprop optimizer. */
+/** RMSprop optimizer.
+ *
+ *  See
+ *  https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf.
+ */
 class rmsprop : public optimizer {
- public:
+public:
 
-  /** Constructor. */
-  rmsprop(lbann_comm *comm,
+  rmsprop(lbann_comm* comm,
           DataType learning_rate,
           DataType decay_rate,
-          DataType eps = DataType(1e-8));
-
-  /** Copy constructor. */
+          DataType eps = 1e-8);
   rmsprop(const rmsprop& other);
-  /** Copy assignment operator. */
   rmsprop& operator=(const rmsprop& other);
-  /** Destructor. */
-  ~rmsprop() override;
-  /** Create a copy. */
+  ~rmsprop() override = default;
   rmsprop* copy() const override { return new rmsprop(*this); }
 
-  /** Get the optimizer name. */
+  /** Human-readable type name. */
   std::string get_type() const override { return "RMSprop"; }
   /** Human-readable description. */
   description get_description() const override;
 
-  /** Setup optimizer. */
-  void setup(weights& w) override;
+  void setup(weights* w = nullptr) override;
+
+protected:
 
-  /** Perform the computation in an optimization step. */
-  void step_compute(AbsDistMat& values, const AbsDistMat& gradient) override;
+  /** Computation for an optimization step. */
+  void step_compute(AbsDistMat& values,
+                    const AbsDistMat& gradient) override;
 
- private:
+private:
 
   /** Decay rate. */
   DataType m_decay_rate;
   /** Small factor to avoid division by zero. */
   DataType m_eps;
   /** RMSprop cache. */
-  AbsDistMat *m_cache;
+  std::unique_ptr<AbsDistMat> m_cache;
 
+  /** CPU implementation of optimization step. */
+  void step_compute_cpu(AbsDistMat& values, const AbsDistMat& gradient);
+#ifdef LBANN_HAS_CUDA
+  /** GPU implementation of optimization step. */
+  void step_compute_gpu(AbsDistMat& values, const AbsDistMat& gradient);
+#endif // LBANN_HAS_CUDA
 
-//************************************************************************
-// Checkpointing
-//************************************************************************
+  // ===========================================
+  // Checkpointing
+  // ===========================================
 
   struct packing_header {
     DataType decay_rate;
@@ -108,4 +114,4 @@ class rmsprop : public optimizer {
 
 } // namespace lbann
 
-#endif // LBANN_OPTIMIZER_RMSPROP_HPP
+#endif // LBANN_OPTIMIZERS_RMSPROP_HPP_INCLUDED
diff --git a/include/lbann/optimizers/sgd.hpp b/include/lbann/optimizers/sgd.hpp
index 6a1aa27b589..2d59b8c2ffe 100644
--- a/include/lbann/optimizers/sgd.hpp
+++ b/include/lbann/optimizers/sgd.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,62 +24,103 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef LBANN_OPTIMIZERS_SGD_HPP
-#define LBANN_OPTIMIZERS_SGD_HPP
+#ifndef LBANN_OPTIMIZERS_SGD_HPP_INCLUDED
+#define LBANN_OPTIMIZERS_SGD_HPP_INCLUDED
 
 #include "lbann/optimizers/optimizer.hpp"
 
 namespace lbann {
 
-/** Stochastic gradient descent optimizer.
- *
- *  Supports momentum and Nesterov acceleration.
+/** @brief Stochastic gradient descent optimizer.
+ *  @details Supports momentum and Nesterov acceleration.
+ *  @todo Dedicated optimizers for momentum or Nesterov SGD.
  */
 class sgd : public optimizer {
 
- public:
+public:
+
+  /** @name Life cycle functions */
+  ///@{
 
   sgd(lbann_comm *comm,
       DataType learning_rate,
       DataType momentum = 0,
       bool nesterov = false);
-
   sgd(const sgd& other);
   sgd& operator=(const sgd& other);
   ~sgd() override = default;
   sgd* copy() const override { return new sgd(*this); }
 
+  ///@}
+
+  /** @name Descriptions */
+  ///@{
+
   /** Human-readable type name. */
   std::string get_type() const override { return "SGD"; }
   /** Human-readable description. */
   description get_description() const override;
 
-  /** Velocity for momentum optimizer. */
+  ///@}
+
+  /** @name Access functions */
+  ///@{
+
+  /** @brief Decay rate for gradient accumulation.
+   *  @details A momentum of zero corresponds to vanilla SGD.
+   */
+  DataType get_momentum() const noexcept { return m_momentum; }
+  /** @brief Decay rate for gradient accumulation.
+   *  @details A momentum of zero corresponds to vanilla SGD.
+   */
+  void set_momentum(DataType momentum) { m_momentum = momentum; }
+
+  /** Whether Nesterov acceleration is applied. */
+  bool using_nesterov() const noexcept { return m_nesterov; }
+  /** Whether Nesterov acceleration is applied. */
+  void set_nesterov(bool nesterov) { m_nesterov = nesterov; }
+
+  /** Accumulated gradients for momentum optimizer. */
   const AbsDistMat& get_velocity() const;
-  /** Velocity for momentum optimizer. */
+  /** Accumulated gradients for momentum optimizer. */
   AbsDistMat& get_velocity();
 
-  void setup(weights& w) override;
+  ///@}
 
-  /** Perform the computation in an optimization step. */
+  /** @name Setup */
+  ///@{
+
+  void setup(weights* w = nullptr) override;
+
+  ///@}
+
+protected:
+
+  /** Computation for an optimization step. */
   void step_compute(AbsDistMat& values, const AbsDistMat& gradient) override;
-#ifdef LBANN_HAS_CUDNN
-  /** Perform the computation in an optimization step on GPU. */
-  void step_compute_gpu(AbsDistMat& values, const AbsDistMat& gradient) override;
-#endif // LBANN_HAS_CUDNN
 
 private:
 
-  /** Momentum. */
+  /** @brief Decay rate for gradient accumulation.
+   *  @details A momentum of zero corresponds to vanilla SGD.
+   */
   DataType m_momentum;
-  /** Nesterov acceleration. */
+  /** Whether Nesterov acceleration is used. */
   bool m_nesterov;
-  /** Velocity for momentum optimizer. */
+  /** @brief Accumulated gradients.
+   *  @details Not used for vanilla SGD.
+   */
   std::unique_ptr<AbsDistMat> m_velocity;
 
-//************************************************************************
-// Checkpointing
-//************************************************************************
+  /** CPU implementation of momentum or Nesterov step. */
+  void momentum_step_cpu(AbsDistMat& values, const AbsDistMat& gradient);
+#ifdef LBANN_HAS_CUDA
+  /** GPU implementation of momentum or Nesterov step. */
+  void momentum_step_gpu(AbsDistMat& values, const AbsDistMat& gradient);
+#endif // LBANN_HAS_CUDA
+
+  /** @name Checkpointing */
+  ///@{
 
   struct packing_header {
     DataType momentum;
@@ -106,12 +147,13 @@ class sgd : public optimizer {
 
   bool save_to_checkpoint_shared(persist& p, std::string m_name) override;
   bool load_from_checkpoint_shared(persist& p, std::string m_name) override;
-
   bool save_to_checkpoint_distributed(persist& p, std::string m_name) override;
   bool load_from_checkpoint_distributed(persist& p, std::string m_name) override;
 
+  ///@}
+
 };
 
 } // namespace lbann
 
-#endif // LBANN_OPTIMIZERS_SGD_HPP
+#endif // LBANN_OPTIMIZERS_SGD_HPP_INCLUDED
diff --git a/include/lbann/proto/factories.hpp b/include/lbann/proto/factories.hpp
index d8bc94bb272..ca68f30975d 100644
--- a/include/lbann/proto/factories.hpp
+++ b/include/lbann/proto/factories.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,8 +24,8 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef LBANN_PROTO_FACTORIES_HPP
-#define LBANN_PROTO_FACTORIES_HPP
+#ifndef LBANN_PROTO_FACTORIES_HPP_INCLUDED
+#define LBANN_PROTO_FACTORIES_HPP_INCLUDED
 
 #include "lbann/proto/proto_common.hpp"
 #include "lbann/data_readers/data_reader.hpp"
@@ -40,16 +40,18 @@ model* construct_model(lbann_comm* comm,
                        const lbann_data::Model& proto_model);
 
 /** Construct a layer graph specified with a prototext. */
-std::vector<Layer*> construct_layer_graph(lbann_comm* comm,
-                                          const std::map<execution_mode, generic_data_reader *>& data_readers,
-                                          const lbann_data::Model& proto_model);
+std::vector<std::unique_ptr<Layer>> construct_layer_graph(
+  lbann_comm* comm,
+  const std::map<execution_mode, generic_data_reader *>& data_readers,
+  const lbann_data::Model& proto_model);
 
 /** Construct a layer specified with prototext. */
 template <data_layout layout, El::Device Dev>
-Layer* construct_layer(lbann_comm* comm,
-                       const std::map<execution_mode, generic_data_reader*>& data_readers,
-                       int num_parallel_readers,
-                       const lbann_data::Layer& proto_layer);
+std::unique_ptr<Layer> construct_layer(
+  lbann_comm* comm,
+  const std::map<execution_mode, generic_data_reader*>& data_readers,
+  int num_parallel_readers,
+  const lbann_data::Layer& proto_layer);
 
 /** Construct weights specified with prototext. */
 weights* construct_weights(lbann_comm* comm,
@@ -104,4 +106,4 @@ std::set<T> parse_set(std::string str) {
 } // namespace proto
 } // namespace lbann
 
-#endif // LBANN_PROTO_FACTORIES_HPP
+#endif // LBANN_PROTO_FACTORIES_HPP_INCLUDED
diff --git a/include/lbann/proto/init_image_data_readers.hpp b/include/lbann/proto/init_image_data_readers.hpp
index 9dac3da1b12..f35a5797e2b 100644
--- a/include/lbann/proto/init_image_data_readers.hpp
+++ b/include/lbann/proto/init_image_data_readers.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/proto/proto_common.hpp b/include/lbann/proto/proto_common.hpp
index 387177ae68b..b9986dfcc99 100644
--- a/include/lbann/proto/proto_common.hpp
+++ b/include/lbann/proto/proto_common.hpp
@@ -7,52 +7,66 @@
 
 namespace lbann {
 
-/// Returns true if the Model contains at least one MotifLayer
-bool has_motifs(lbann_comm *comm, const lbann_data::LbannPB& p);
+/** @brief Returns true if the Model contains at least one MotifLayer */
+bool has_motifs(const lbann_comm& comm, const lbann_data::LbannPB& p);
 
-void expand_motifs(lbann_comm *comm, lbann_data::LbannPB& pb);
+void expand_motifs(const lbann_comm& comm, lbann_data::LbannPB& pb);
 
-/** Customize the name of the index list with the following options:
+/** @brief Customize the name of the index list
+ *
+ *  The following options are available
  *   - trainer ID
  *   - model name
- * The format for the naming convention if the provided name is <index list> is:
- *   <index list> == <basename>.<extension>
- *   <model name>_t<ID>_<basename>.<extension>
+ *
+ *  The format for the naming convention if the provided name is
+ *  \<index list\> is:
+ *  @verbatim
+    <index list> == <basename>.<extension>
+    <model name>_t<ID>_<basename>.<extension> @endverbatim
  */
-void customize_data_readers_index_list(lbann::lbann_comm *comm, lbann_data::LbannPB& p);
+void customize_data_readers_index_list(const lbann_comm& comm,
+                                       lbann_data::LbannPB& p);
 
-/// instantiates one or more generic_data_readers and inserts them in &data_readers
+/** @brief instantiates one or more generic_data_readers and inserts
+ *         them in &data_readers
+ */
 void init_data_readers(
   lbann_comm *comm,
   const lbann_data::LbannPB& p,
   std::map<execution_mode, generic_data_reader *>& data_readers,
-  bool is_shareable_training_data_reader, bool is_shareable_testing_data_reader,
+  bool is_shareable_training_data_reader,
+  bool is_shareable_testing_data_reader,
   bool is_shareable_validation_data_reader = false);
 
-/// adjusts the number of parallel data readers
-void set_num_parallel_readers(const lbann_comm *comm, lbann_data::LbannPB& p);
+/** @brief adjusts the number of parallel data readers */
+void set_num_parallel_readers(const lbann_comm& comm, lbann_data::LbannPB& p);
+
+/** @brief adjusts the values in p by querying the options db */
+void get_cmdline_overrides(const lbann_comm& comm, lbann_data::LbannPB& p);
 
-/// adjusts the values in p by querying the options db
-void get_cmdline_overrides(lbann_comm *comm, lbann_data::LbannPB& p);
+/** @brief print various params (learn_rate, etc) to cout */
+void print_parameters(const lbann_comm& comm, lbann_data::LbannPB& p);
 
-/// print various params (learn_rate, etc) to cout
-void print_parameters(lbann_comm *comm, lbann_data::LbannPB& p);
+/** @brief prints usage information */
+void print_help(const lbann_comm& comm);
 
-/// prints usage information
-void print_help(lbann_comm *comm);
+/** @brief prints usage information */
+void print_help(std::ostream& os);
 
-/// prints prototext file, cmd line, etc to file
-void save_session(lbann_comm *comm, int argc, char **argv, lbann_data::LbannPB& p);
+/** @brief prints prototext file, cmd line, etc to file */
+void save_session(const lbann_comm& comm,
+                  const int argc, char * const* argv,
+                  lbann_data::LbannPB& p);
 
-///
+/** @brief Read prototext from a file into a protobuf message. */
 void read_prototext_file(
-  std::string fn,
+  const std::string& fn,
   lbann_data::LbannPB& pb,
-  bool master);
+  const bool master);
 
-///
-void write_prototext_file(
-  std::string fn,
+/** @brief Write a protobuf message into a prototext file. */
+bool write_prototext_file(
+  const std::string& fn,
   lbann_data::LbannPB& pb);
 
 } // namespace lbann
diff --git a/include/lbann/utils/CMakeLists.txt b/include/lbann/utils/CMakeLists.txt
index 2b3d75a8b37..a07932b662f 100644
--- a/include/lbann/utils/CMakeLists.txt
+++ b/include/lbann/utils/CMakeLists.txt
@@ -1,5 +1,6 @@
 # Add the headers for this directory
 set_full_path(THIS_DIR_HEADERS
+  any.hpp
   compiler_control.hpp
   cublas.hpp
   cuda.hpp
@@ -25,6 +26,7 @@ set_full_path(THIS_DIR_HEADERS
   statistics.hpp
   summary.hpp
   timer.hpp
+  type_erased_matrix.hpp
   )
 
 # Add the subdirectories
diff --git a/include/lbann/utils/any.hpp b/include/lbann/utils/any.hpp
new file mode 100644
index 00000000000..6b55e7caf8b
--- /dev/null
+++ b/include/lbann/utils/any.hpp
@@ -0,0 +1,405 @@
+#ifndef LBANN_UTILS_ANY_HPP_INCLUDED
+#define LBANN_UTILS_ANY_HPP_INCLUDED
+
+#include <lbann_config.hpp>
+
+#ifdef LBANN_HAS_STD_ANY
+
+#include <any>
+
+#else
+#include <lbann/utils/memory.hpp>// non-C++14 make_unique
+
+#include <iostream>
+#include <memory>
+#include <stdexcept>
+#include <typeinfo>
+#endif // LBANN_HAS_STD_ANY
+
+namespace lbann
+{
+namespace utils
+{
+
+#ifdef LBANN_HAS_STD_ANY
+// This case is simple symbol injection; don't feel great about this,
+// but it's not my fault they couldn't get this into C++11...
+
+using any = std::any;
+using bad_any_cast = std::bad_any_cast;
+using std::any_cast;
+using std::make_any;
+
+#else
+
+/** @defgroup stl_wrappers C++ STL Wrappers
+ *
+ *  The `std::any` interface was not added to ISO C++ until the 2017
+ *  standard. However, it provides useful features and is
+ *  implementable in C++11. This fallback implementation should only
+ *  be used if C++17 is not available.
+ */
+
+/** @class any
+ *  @brief Type-erasure class to store any object of copy-constructible type.
+ *
+ *  This class is (mostly) API-compatible with std::any. The
+ *  most notable omission is the std::in_place_type_t overloads of the
+ *  constructor (std::in_place_type_t is also C++17, and I don't want
+ *  to implement the whole standard). For best results, do not attempt
+ *  to use those in this code. For even better results (yes, better
+ *  than best. English is overrated), incessently remind your friends,
+ *  colleagues, and, most importantly, vendors that it's 2019, that
+ *  2019 > 2017, and that there are excellent free compilers in the
+ *  world until they concede to updating to a modern compiler and this
+ *  implementation can be banished to the depths.
+ *
+ *  @ingroup stl_wrappers
+ */
+class any
+{
+public:
+
+  /** @name Constructors and destructor */
+  ///@{
+
+  /** @brief Default construct an empty "any" */
+  any() noexcept = default;
+
+  /** @brief Construct an object holding a T */
+  template <typename T>
+  any(T&& obj);
+
+  /** @brief Copy construct from another container.
+   *
+   *  Makes a copy of the held object.
+   */
+  any(any const& other);
+
+  /** @brief Move construct from another container */
+  any(any&& other) noexcept = default;
+
+  /** @brief Default destructor */
+  ~any() = default;
+
+  ///@}
+  /** @name Assignment operator */
+  ///@{
+
+  /** @brief Copy assign from another container
+   *
+   *  Makes a deep copy of the held object.
+   */
+  any& operator=(any const& other);
+
+  /** @brief Move assign from another container */
+  any& operator=(any&& other) noexcept = default;
+
+  ///@}
+  /** @name Modifiers */
+  ///@{
+
+  /** @brief Change the contained object to one of type T
+   *
+   *  Any held object is destroyed and the new object is
+   *  emplace-constructed from the arguments given.
+   *
+   *  @tparam T The type of the new held object
+   *  @tparam Args (Deduced) types of arguments to the T constructor
+   *
+   *  @param args The arguments to the T constructor
+   *
+   *  @return A reference to the newly constructed object
+   */
+  template <typename T, typename... Args>
+  auto emplace(Args&&... args) -> typename std::decay<T>::type&;
+
+  /** @brief Reset the container to an empty state, destroying the
+   *         held object.
+   */
+  void reset() noexcept;
+
+  /** @brief Swap the contents of this container with another */
+  void swap(any& other) noexcept;
+
+  ///@}
+  /** @name Observers */
+  ///@{
+
+  /** @brief Test whether the container holds a value */
+  bool has_value() const noexcept;
+
+  /** @brief Get the type_info object for the held type */
+  std::type_info const& type() const noexcept;
+
+  ///@}
+
+private:
+
+  /** @class holder_base
+   *  @brief Abstract base class for storing the object
+   */
+  struct holder_base
+  {
+    /** @brief Destructor */
+    virtual ~holder_base() = default;
+
+    /** @brief Clone function */
+    virtual std::unique_ptr<holder_base> clone() const = 0;
+
+    /** @brief Get the type_info for the underlying object */
+    virtual std::type_info const& type() const = 0;
+  }; // class holder_base
+
+  /** @class holder<T>
+   *  @brief Class to hold a copy-constructible object of type T
+   */
+  template <typename T>
+  struct holder : holder_base
+  {
+    /** @brief Construct by copying data */
+    holder(T const& data) : m_data{data} {}
+
+    /** @brief Construct by moving data */
+    holder(T&& data) : m_data{std::move(data)} {}
+
+    /** @brief Construct by emplace-constructing the T with the given
+     *         arguments.
+     */
+    template <typename... Args>
+    holder(Args&&... args) : m_data{std::forward<Args>(args)...}
+    {}
+
+    /** @brief Destructor */
+    ~holder() = default;
+
+    /** @brief Clone the data holder */
+    std::unique_ptr<holder_base> clone() const final
+    {
+      return make_unique<holder>(m_data);
+    }
+
+    /** @brief Get the type_info for this object */
+    std::type_info const& type() const { return typeid(T); }
+
+    /** @brief The data object */
+    T m_data;
+  };// class holder
+
+private:
+
+  template <typename T>
+  friend T const* any_cast(any const*) noexcept;
+
+  template <typename T>
+  friend T* any_cast(any*) noexcept;
+
+  std::unique_ptr<holder_base> m_holder = nullptr;
+
+};// class any
+
+/** @class bad_any_cast
+ *  @brief Exception class indicating an any_cast has failed.
+ */
+struct bad_any_cast : std::runtime_error
+{
+  template <typename T>
+  bad_any_cast(T&& what_arg)
+    : std::runtime_error{std::forward<T>(what_arg)} {}
+};// struct bad_any_cast
+
+/** @brief Swap two any objects */
+inline void swap(any& lhs, any& rhs)
+{
+  lhs.swap(rhs);
+}
+
+/** @brief Create an any object of type T constructed with args.
+ *  @ingroup stl_wrappers
+ */
+template <typename T, typename... Ts>
+any make_any(Ts&&... args)
+{
+  return any{T(std::forward<Ts>(args)...)};
+}
+
+/** @brief Typesafe access to the held object.
+ *
+ *  @tparam T The type of the held object.
+ *
+ *  @param obj The any object.
+ *
+ *  @return If obj is not null and holds a T, a pointer to
+ *          the held object. Otherwise, nullptr.
+ *
+ *  @ingroup stl_wrappers
+ */
+template <typename T>
+T* any_cast(any* obj) noexcept
+{
+  return const_cast<T*>(
+    any_cast<T>(
+      static_cast<any const*>(obj)));
+}
+
+/** @brief Typesafe access to the held object, const version.
+ *
+ *  @tparam T The type of the held object.
+ *
+ *  @param obj The any object.
+ *
+ *  @return If obj is not null and holds a T, a pointer to
+ *          the held object. Otherwise, nullptr.
+ *
+ *  @ingroup stl_wrappers
+ */
+template <typename T>
+T const* any_cast(any const* obj) noexcept
+{
+  static_assert(!std::is_reference<T>::value,
+                "T must nust be a reference type.");
+
+  if (!obj || !obj->has_value())
+    return nullptr;
+
+  if (obj->type() != typeid(T))
+  {
+    return nullptr;
+  }
+
+  auto T_holder = dynamic_cast<any::holder<T> const*>(obj->m_holder.get());
+  return (T_holder ? &(T_holder->m_data) : nullptr);
+}
+
+/** @brief Typesafe access to the held object.
+ *
+ *  @tparam T The type of the held object.
+ *
+ *  @param obj The any object.
+ *
+ *  @return The held object.
+ *
+ *  @throws bad_any_cast If obj does not hold a T.
+ *
+ *  @ingroup stl_wrappers
+ */
+template <typename T>
+T any_cast(any& obj)
+{
+  using type =
+    typename std::remove_cv<
+      typename std::remove_reference<T>::type>::type;
+  auto* ret = any_cast<type>(&obj);
+  if (not ret)
+    throw bad_any_cast("bad any_cast");
+  return *ret;
+}
+
+/** @brief Typesafe access to the held object.
+ *
+ *  This will move the held object into the returned object, if
+ *  appropriate.
+ *
+ *  @tparam T The type of the held object.
+ *
+ *  @param obj The any object.
+ *
+ *  @return The held object.
+ *
+ *  @throws bad_any_cast If obj does not hold a T.
+ *
+ *  @post The any obj holds a moved-from T
+ *
+ *  @ingroup stl_wrappers
+ */
+template <typename T>
+T any_cast(any&& obj)
+{
+  using type =
+    typename std::remove_cv<
+      typename std::remove_reference<T>::type>::type;
+  auto ret = any_cast<type>(&obj);
+  if (not ret)
+    throw bad_any_cast("bad any_cast");
+  return std::move(*ret);
+}
+
+/** @brief Typesafe access to the held object.
+ *
+ *  @tparam T The type of the held object.
+ *
+ *  @param obj The any object.
+ *
+ *  @return The held object.
+ *
+ *  @throws bad_any_cast If obj does not hold a T.
+ *
+ *  @ingroup stl_wrappers
+ */
+template <typename T>
+T any_cast(any const& obj)
+{
+  using type =
+    typename std::remove_cv<typename std::remove_reference<T>::type>::type;
+  auto ret = any_cast<type>(&obj);
+  if (not ret)
+    throw bad_any_cast("bad any_cast");
+  return *ret;
+}
+
+// "any" member function implementation
+
+template <typename T>
+any::any(T&& obj)
+  : m_holder{make_unique<holder<typename std::decay<T>::type>>(
+    std::forward<T>(obj))}
+{}
+
+inline any::any(any const& other)
+  : m_holder{other.has_value() ? other.m_holder->clone() : nullptr} {}
+
+inline any& any::operator=(any const& other)
+{
+  m_holder = (other.has_value() ? other.m_holder->clone() : nullptr);
+  return *this;
+}
+
+template <typename T, typename... Args>
+auto any::emplace(Args&&... args)
+  -> typename std::decay<T>::type&
+{
+  using held_type = typename std::decay<T>::type;
+
+  reset();
+  auto tmp_holder = make_unique<holder<held_type>>(
+    std::forward<Args>(args)...);
+  auto& ret = tmp_holder->m_data;
+  m_holder = std::move(tmp_holder);
+  return ret;
+}
+
+inline void any::reset() noexcept
+{
+  m_holder.reset();
+}
+
+inline void any::swap(any& other) noexcept
+{
+  std::swap(m_holder,other.m_holder);
+}
+
+inline bool any::has_value() const noexcept
+{
+  return (bool) m_holder;
+}
+
+inline std::type_info const& any::type() const noexcept
+{
+  return m_holder ? m_holder->type() : typeid(void);
+}
+
+#endif /* End fallback implementation */
+
+}// namespace utils
+}// namespace lbann
+#endif // LBANN_UTILS_ANY_HPP_INCLUDED
diff --git a/include/lbann/utils/cnpy_utils.hpp b/include/lbann/utils/cnpy_utils.hpp
index 2dcb677118e..d4c195361dc 100644
--- a/include/lbann/utils/cnpy_utils.hpp
+++ b/include/lbann/utils/cnpy_utils.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/compiler_control.hpp b/include/lbann/utils/compiler_control.hpp
index 460b9bb21b1..e4f001f3195 100644
--- a/include/lbann/utils/compiler_control.hpp
+++ b/include/lbann/utils/compiler_control.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/cublas.hpp b/include/lbann/utils/cublas.hpp
index 5e0e7cefbaf..49225ff2336 100644
--- a/include/lbann/utils/cublas.hpp
+++ b/include/lbann/utils/cublas.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/cuda.hpp b/include/lbann/utils/cuda.hpp
index 99ebc170c6d..87201c0fe8d 100644
--- a/include/lbann/utils/cuda.hpp
+++ b/include/lbann/utils/cuda.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -139,19 +139,19 @@ template <typename T> __device__ __forceinline__ T min(const T& x, const T& y);
 template <typename T> __device__ __forceinline__ T max(const T& x, const T& y);
 template <typename T> __device__ __forceinline__ T mod(const T& x, const T& y);
 template <typename T> __device__ __forceinline__ T pow(const T& x, const T& y);
-  
+
 // Numeric limits
 template <typename T> constexpr __device__ __forceinline__ T min();
 template <typename T> constexpr __device__ __forceinline__ T max();
 template <typename T> constexpr __device__ __forceinline__ T epsilon();
 template <typename T> __device__ __forceinline__ T infinity();
-  
+
 #endif // __CUDACC__
 
 // -------------------------------------------------------------
 // Utilities for CUDA events
 // -------------------------------------------------------------
-  
+
 /** Wrapper class for a CUDA event. */
 class event_wrapper {
 public:
@@ -177,7 +177,7 @@ class event_wrapper {
    */
   cudaStream_t m_stream;
 };
-  
+
 // -------------------------------------------------------------
 // Helper functions for entrywise operations
 // -------------------------------------------------------------
@@ -199,7 +199,7 @@ template <typename BinaryOperator>
 void apply_entrywise_binary_operator(const AbsMat& input1,
                                      const AbsMat& input2,
                                      AbsMat& output);
-  
+
 
 /** Apply an entry-wise unary operator to GPU data.
  *  The input and output data must be on GPU, have the same
@@ -217,7 +217,7 @@ template <typename BinaryOperator>
 void apply_entrywise_binary_operator(const AbsDistMat& input1,
                                      const AbsDistMat& input2,
                                      AbsDistMat& output);
-  
+
 #endif // __CUDACC__
 
 // -------------------------------------------------------------
@@ -271,13 +271,13 @@ class allocator
   cudaStream_t m_stream;
   /** Thrust execution policy. */
   system_type m_system;
-  
+
 };
 
 /** Thrust device vector. */
 template <typename T>
 using vector = ::thrust::device_vector<T, allocator<T>>;
-  
+
 } // namespace thrust
 
 } // namespace cuda
diff --git a/include/lbann/utils/cudnn.hpp b/include/lbann/utils/cudnn.hpp
index fe8e1d7761a..e6a507624c7 100644
--- a/include/lbann/utils/cudnn.hpp
+++ b/include/lbann/utils/cudnn.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -199,6 +199,74 @@ class entrywise_layer_tensor_manager : public layer_tensor_manager {
   cudnnTensorDescriptor_t& get_error_signals(int parent_index = 0) override;
 };
 
+////////////////////////////////////////////////////////////
+// cuDNN algorithm selection
+////////////////////////////////////////////////////////////
+
+/**
+ * Select a forward convolution algorithm.
+ *
+ * If autotuning, memory for cuDNN algorithm runs is needed and should be
+ * provided via the pointer arguments.
+ *
+ * @param autotune True to attempt all cuDNN algorithms and select the fastest.
+ * @param deterministic True to require deterministic algorithms.
+ */
+cudnnConvolutionFwdAlgo_t get_fwd_algorithm(
+  bool autotune,
+  bool deterministic,
+  const cudnnTensorDescriptor_t& input_desc,
+  const void* input,
+  const cudnnFilterDescriptor_t& kernel_desc,
+  const void* kernel,
+  const cudnnConvolutionDescriptor_t& conv_desc,
+  const cudnnTensorDescriptor_t& output_desc,
+  void* output,
+  size_t ws_size,
+  void* ws);
+
+/** Select a backward data convolution algorithm.
+ *
+ * If autotuning, memory for cuDNN algorithm runs is needed and should be
+ * provided via the pointer arguments.
+ *
+ * @param autotune True to attempt all cuDNN algorithms and select the fastest.
+ * @param deterministic True to require deterministic algorithms.
+ */
+cudnnConvolutionBwdDataAlgo_t get_bwd_data_algorithm(
+  bool autotune,
+  bool deterministic,
+  const cudnnFilterDescriptor_t& kernel_desc,
+  const void* kernel,
+  const cudnnTensorDescriptor_t& prev_error_signal_desc,
+  const void* prev_error_signal,
+  const cudnnConvolutionDescriptor_t& conv_desc,
+  const cudnnTensorDescriptor_t& error_signal_desc,
+  void* error_signal,
+  size_t ws_size,
+  void* ws);
+
+/** Select a backward filter convolution algorithm.
+ *
+ * If autotuning, memory for cuDNN algorithm runs is needed and should be
+ * provided via the pointer arguments.
+ *
+ * @param autotune True to attempt all cuDNN algorithms and select the fastest.
+ * @param deterministic True to require deterministic algorithms.
+ */
+cudnnConvolutionBwdFilterAlgo_t get_bwd_filter_algorithm(
+  bool autotune,
+  bool deterministic,
+  const cudnnTensorDescriptor_t& input_desc,
+  const void* input,
+  const cudnnTensorDescriptor_t& prev_error_signal_desc,
+  const void* prev_error_signal,
+  const cudnnConvolutionDescriptor_t& conv_desc,
+  const cudnnFilterDescriptor_t& kernel_gradient_desc,
+  void* kernel_gradient,
+  size_t ws_size,
+  void* ws);
+
 } // namespace cudnn
 } // namespace lbann
 
diff --git a/include/lbann/utils/cyg_profile.hpp b/include/lbann/utils/cyg_profile.hpp
index 2dbe237b546..55801e478c5 100644
--- a/include/lbann/utils/cyg_profile.hpp
+++ b/include/lbann/utils/cyg_profile.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////xecu
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/dataset.hpp b/include/lbann/utils/dataset.hpp
index f2337513d78..2c1373f3807 100644
--- a/include/lbann/utils/dataset.hpp
+++ b/include/lbann/utils/dataset.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/description.hpp b/include/lbann/utils/description.hpp
index f22848d9cc5..b9a86b8c965 100644
--- a/include/lbann/utils/description.hpp
+++ b/include/lbann/utils/description.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/entrywise_operator.hpp b/include/lbann/utils/entrywise_operator.hpp
index cdcc31f9fb5..50172bc7700 100644
--- a/include/lbann/utils/entrywise_operator.hpp
+++ b/include/lbann/utils/entrywise_operator.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/exception.hpp b/include/lbann/utils/exception.hpp
index c2e8849edce..c487eb6af64 100644
--- a/include/lbann/utils/exception.hpp
+++ b/include/lbann/utils/exception.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -79,7 +79,7 @@ class exception : public std::exception {
    *  Reports the exception message and the stack trace.
    */
   void print_report(std::ostream& os = std::cerr) const;
-  
+
 private:
   /** Human-readable exception message. */
   std::string m_message;
@@ -87,10 +87,10 @@ class exception : public std::exception {
    *  The stack trace is recorded when the exception is constructed.
    */
   std::string m_stack_trace;
-  
+
 };
 using lbann_exception = exception;
-  
+
 } // namespace lbann
 
 #endif // LBANN_UTILS_EXCEPTION_HPP_INCLUDED
diff --git a/include/lbann/utils/factory.hpp b/include/lbann/utils/factory.hpp
index f552e36373c..8ab4995a013 100644
--- a/include/lbann/utils/factory.hpp
+++ b/include/lbann/utils/factory.hpp
@@ -50,6 +50,12 @@ class generic_factory : private KeyErrorPolicy<KeyT,BaseT>
   using map_type = std::unordered_map<key_type,builder_type>;
 
 public:
+  using size_type = typename map_type::size_type;
+
+public:
+  /** @name Builder registration */
+  ///@{
+
   /** @brief Register a new builder for key @c key.
    *
    *  @param key     An identifier for a concrete type to be constructed.
@@ -78,6 +84,10 @@ class generic_factory : private KeyErrorPolicy<KeyT,BaseT>
     return m_registered_builders.erase(key);
   }
 
+  ///@}
+  /** @brief Object construction */
+  ///@{
+
   /** @brief Construct a new object.
    *
    *  @param key  The key for the object to be created.
@@ -87,15 +97,25 @@ class generic_factory : private KeyErrorPolicy<KeyT,BaseT>
    */
   template <typename... Ts>
   std::unique_ptr<base_type> create_object(
-    key_type const& key, Ts&&... Args) const
+    key_type const& key, Ts&&... args) const
   {
     auto it = m_registered_builders.find(key);
     if (it != m_registered_builders.end())
-      return (it->second)(std::forward<Ts>(Args)...);
+      return (it->second)(std::forward<Ts>(args)...);
 
     return this->handle_unknown_key(key);
   }
 
+  ///@}
+  /** @name Queries */
+  ///@{
+
+  /** @brief Get the number of registered builders. */
+  size_type get_num_registered_builders() const noexcept
+  {
+    return m_registered_builders.size();
+  }
+
   /** @brief Get the names of all builders known to the factory.
    *
    *  @return A list of the known keys.
diff --git a/include/lbann/utils/file_utils.hpp b/include/lbann/utils/file_utils.hpp
index f654a0a1615..53f9c9b6be0 100644
--- a/include/lbann/utils/file_utils.hpp
+++ b/include/lbann/utils/file_utils.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -76,6 +76,19 @@ inline void __swapEndianInt(unsigned int& ui) {
   ui = ((ui >> 24) | ((ui<<8) & 0x00FF0000) | ((ui>>8) & 0x0000FF00) | (ui << 24));
 }
 
+// The generic approach
+template<typename T>
+std::basic_string<T> pad(const std::basic_string<T>& s,
+         typename std::basic_string<T>::size_type n, T c) {
+  if (n > s.length()) {
+    std::string t = s;
+    t.insert(t.begin(), n - t.length(), c);
+    return t;
+  }else {
+    return s;
+  }
+}
+
 namespace file {
 
 /** @brief Wrapper around @c dirname.
diff --git a/include/lbann/utils/glob.hpp b/include/lbann/utils/glob.hpp
index 4ab6729afe1..bb4b3ad0eeb 100644
--- a/include/lbann/utils/glob.hpp
+++ b/include/lbann/utils/glob.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/graph.hpp b/include/lbann/utils/graph.hpp
index 4282f129501..f9d4522762a 100644
--- a/include/lbann/utils/graph.hpp
+++ b/include/lbann/utils/graph.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -28,84 +28,91 @@
 #include <vector>
 #include <set>
 #include <map>
+#include "lbann/base.hpp"
 
 namespace lbann {
 namespace graph {
 
 /** Print the nodes and edges of a graph to an output stream. */
-void print(const std::set<int>& nodes,
-           const std::map<int,std::set<int>>& edges,
+void print(const std::set<El::Int>& nodes,
+           const std::map<El::Int,std::set<El::Int>>& edges,
            std::ostream& os = std::cout);
 
 /** Get nodes adjacent to a given node. */
-std::set<int> get_neighbors(int node,
-                            const std::map<int,std::set<int>>& edges);
+std::set<El::Int> get_neighbors(El::Int node,
+                                const std::map<El::Int,std::set<El::Int>>& edges);
 
-/** Check whether a graph is a closure.
- *  A closure is a set of nodes with no edges to nodes outside the
- *  set.
+/** @details A closure is a set of nodes with no edges to nodes
+ *  outside the set.
  */
-bool is_closure(const std::set<int>& nodes,
-                const std::map<int,std::set<int>>& edges);
+bool is_closure(const std::set<El::Int>& nodes,
+                const std::map<El::Int,std::set<El::Int>>& edges);
 
 /** Check whether a graph is topologically sorted.
+ *
  *  A topologically sorted graph has no edges going from a node to an
  *  earlier node. The graph must be a directed acyclic graph.
  */
-bool is_topologically_sorted(const std::set<int>& nodes,
-                             const std::map<int,std::set<int>>& edges);
+bool is_topologically_sorted(const std::set<El::Int>& nodes,
+                             const std::map<El::Int,std::set<El::Int>>& edges);
 
 /** Check whether a directed graph is cyclic. */
-bool is_cyclic(const std::set<int>& nodes,
-               const std::map<int,std::set<int>>& edges);
+bool is_cyclic(const std::set<El::Int>& nodes,
+               const std::map<El::Int,std::set<El::Int>>& edges);
 
 /** Construct the transpose of a graph.
+ *
  *  Reverses the direction of edges in the graph and returns the new
  *  set of edges.
  */
-std::map<int,std::set<int>> transpose(const std::set<int>& nodes,
-                                      const std::map<int,std::set<int>>& edges);
+std::map<El::Int,std::set<El::Int>> transpose(const std::set<El::Int>& nodes,
+                                              const std::map<El::Int,std::set<El::Int>>& edges);
 
 /** Construct an induced subgraph.
+ *
  *  Removes edges to nodes outside the set of nodes and returns the
  *  new set of edges.
  */
-std::map<int,std::set<int>> induce_subgraph(const std::set<int>& nodes,
-                                            const std::map<int,std::set<int>>& edges);
+std::map<El::Int,std::set<El::Int>> induce_subgraph(const std::set<El::Int>& nodes,
+                                                    const std::map<El::Int,std::set<El::Int>>& edges);
 
 /** Perform a breadth-first search starting from a given root node.
+ *
  *  The search order is deterministic.
  */
-std::vector<int> breadth_first_search(int root,
-                                      const std::map<int,std::set<int>>& edges);
+std::vector<El::Int> breadth_first_search(El::Int root,
+                                          const std::map<El::Int,std::set<El::Int>>& edges);
 
 /** Perform a depth-first search starting from a given root node.
+ *
  *  A depth-first search post-order is returned. The search order is
  *  deterministic.
  */
-std::vector<int> depth_first_search(int root,
-                                    const std::map<int,std::set<int>>& edges);
+std::vector<El::Int> depth_first_search(El::Int root,
+                                        const std::map<El::Int,std::set<El::Int>>& edges);
 
 /** Topologically sort a graph.
+ *
  *  A topologically sorted graph has no edges going from a node to an
  *  earlier node. The sort is deterministic and does not affect graphs
  *  that are already topologically sorted.
  */
-std::vector<int> topological_sort(const std::set<int>& nodes,
-                                  const std::map<int,std::set<int>>& edges);
+std::vector<El::Int> topological_sort(const std::set<El::Int>& nodes,
+                                      const std::map<El::Int,std::set<El::Int>>& edges);
 
 /** Construct the condensation of a graph.
+ *
  *  The condensation of a graph is constructed by determining the
  *  strongly connected components, i.e. sets of nodes that are
  *  reachable from all nodes in the set, and coalescing them into
  *  single nodes. The condensation is a DAG and will be topologically
  *  sorted.
  */
-void condensation(const std::set<int>& nodes,
-                  const std::map<int,std::set<int>>& edges,
-                  std::map<int,std::set<int>>& components,
-                  std::set<int>& condensation_nodes,
-                  std::map<int,std::set<int>>& condensation_edges);
+void condensation(const std::set<El::Int>& nodes,
+                  const std::map<El::Int,std::set<El::Int>>& edges,
+                  std::map<El::Int,std::set<El::Int>>& components,
+                  std::set<El::Int>& condensation_nodes,
+                  std::map<El::Int,std::set<El::Int>>& condensation_edges);
 
 }
 }
diff --git a/include/lbann/utils/im2col.hpp b/include/lbann/utils/im2col.hpp
index 266ad8b1b51..7579eb4fa4e 100644
--- a/include/lbann/utils/im2col.hpp
+++ b/include/lbann/utils/im2col.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/impl/cuda.hpp b/include/lbann/utils/impl/cuda.hpp
index 1da8f24a1bc..56b1ed27d9b 100644
--- a/include/lbann/utils/impl/cuda.hpp
+++ b/include/lbann/utils/impl/cuda.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -116,8 +116,8 @@ WRAP_UNARY_CUDA_MATH_FUNCTION(tanh)
 WRAP_UNARY_CUDA_MATH_FUNCTION(acosh)
 WRAP_UNARY_CUDA_MATH_FUNCTION(asinh)
 WRAP_UNARY_CUDA_MATH_FUNCTION(atanh)
-#undef WRAP_UNARY_CUDA_MATH_FUNCTION  
-  
+#undef WRAP_UNARY_CUDA_MATH_FUNCTION
+
 // Binary math functions
 #define WRAP_BINARY_CUDA_MATH_FUNCTION(func)                    \
   template <> __device__ __forceinline__                        \
@@ -147,7 +147,7 @@ float mod<float>(const float& x, const float& y) { return ::fmodf(x,y); }
 template <> __device__ __forceinline__
 double mod<double>(const double& x, const double& y) { return ::fmod(x,y); }
 WRAP_BINARY_CUDA_MATH_FUNCTION(pow)
-#undef WRAP_BINARY_CUDA_MATH_FUNCTION  
+#undef WRAP_BINARY_CUDA_MATH_FUNCTION
 
 // Numeric limits
 #ifdef __CUDACC_RELAXED_CONSTEXPR__
@@ -181,9 +181,9 @@ SPECIFIERS float infinity<float>()   { return CUDART_INF_F; }
 SPECIFIERS double infinity<double>() { return CUDART_INF;   }
 #undef HEADER
 #endif // __CUDACC_RELAXED_CONSTEXPR__
-  
+
 #endif // __CUDACC__
-  
+
 // -------------------------------------------------------------
 // Helper functions for entrywise operations
 // -------------------------------------------------------------
@@ -277,7 +277,7 @@ void apply_entrywise_unary_operator(const AbsMat& input,
         height, width, input.LockedBuffer(), input.LDim(),
         output.Buffer(), output.LDim());
   }
-  
+
 }
 
 /** Apply an entry-wise binary operator to GPU data.
@@ -330,9 +330,9 @@ void apply_entrywise_binary_operator(const AbsMat& input1,
         input2.LockedBuffer(), input2.LDim(),
         output.Buffer(), output.LDim());
   }
-  
+
 }
-  
+
 /** Apply an entry-wise unary operator to GPU data.
  *  The input and output data must be on GPU, have the same
  *  dimensions, and be aligned.
@@ -382,19 +382,21 @@ void apply_entrywise_binary_operator(const AbsDistMat& input1,
                                                   input2.LockedMatrix(),
                                                   output.Matrix());
 }
-  
+
 #endif // __CUDACC__
 
 // -------------------------------------------------------------
 // Utilities for Thrust
 // -------------------------------------------------------------
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
 namespace thrust {
 
 template <typename T>
 allocator<T>::allocator(cudaStream_t stream)
   : m_stream(stream),
     m_system(stream) {}
-  
+
 template <typename T>
 typename allocator<T>::pointer allocator<T>::allocate(allocator<T>::size_type size) {
   value_type* buffer = nullptr;
@@ -429,8 +431,9 @@ template <typename T>
 typename allocator<T>::system_type& allocator<T>::system() {
   return m_system;
 }
-  
+
 } // namespace thrust
+#endif // !DOXYGEN_SHOULD_SKIP_THIS
 
 } // namespace cuda
 } // namespace lbann
diff --git a/include/lbann/utils/jag_utils.hpp b/include/lbann/utils/jag_utils.hpp
index 243650c2d8c..ae822a77c72 100644
--- a/include/lbann/utils/jag_utils.hpp
+++ b/include/lbann/utils/jag_utils.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/lbann_library.hpp b/include/lbann/utils/lbann_library.hpp
index ffcd7c5040b..215a63ad542 100644
--- a/include/lbann/utils/lbann_library.hpp
+++ b/include/lbann/utils/lbann_library.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -34,15 +34,18 @@ namespace lbann {
 
 const int lbann_default_random_seed = 42;
 
-std::shared_ptr<thread_pool> construct_io_thread_pool(lbann_comm *comm);
+std::unique_ptr<thread_pool> construct_io_thread_pool(lbann_comm *comm);
 
-model *build_model_from_prototext(int argc, char **argv,
-                                  lbann_data::LbannPB &pb,
-                                  lbann_comm *comm,
-                                  std::shared_ptr<thread_pool> io_thread_pool,
-                                  bool first_model);
+std::unique_ptr<model> build_model_from_prototext(
+    int argc, char **argv,
+    lbann_data::LbannPB &pb,
+    lbann_comm *comm,
+    std::shared_ptr<thread_pool> io_thread_pool,
+    bool first_model);
 
-void print_lbann_configuration(lbann_data::Model *pb_model, lbann_comm *comm, int io_threads_per_process, int io_threads_offset);
+void print_lbann_configuration(
+    lbann_data::Model *pb_model, lbann_comm *comm,
+    int io_threads_per_process, int io_threads_offset);
 
 } // namespace lbann
 
diff --git a/include/lbann/utils/memory.hpp b/include/lbann/utils/memory.hpp
index 12003d1e19b..fc293fa819e 100644
--- a/include/lbann/utils/memory.hpp
+++ b/include/lbann/utils/memory.hpp
@@ -1,23 +1,26 @@
 #ifndef LBANN_MEMORY_HPP_
 #define LBANN_MEMORY_HPP_
 
+#include <lbann_config.hpp>
 #include <memory>
 
 namespace lbann {
 
-#if __cplusplus < 201402L
+#ifdef LBANN_HAS_STD_MAKE_UNIQUE
 
-/** \brief Local definition of make_unique for non-C++14 compilers */
+using std::make_unique;
+
+#else
+
+/** @brief Local definition of make_unique for non-C++14 compilers.
+ *  @ingroup stl_wrappers
+ */
 template <typename T, typename... Ts>
 std::unique_ptr<T> make_unique(Ts&&... params)
 {
     return std::unique_ptr<T>(new T(std::forward<Ts>(params)...));
 }
 
-#else
-
-using std::make_unique;
-
 #endif
 
 }// namespace lbann
diff --git a/include/lbann/utils/mild_exception.hpp b/include/lbann/utils/mild_exception.hpp
index a2afedb95a7..2b23b6b74d8 100644
--- a/include/lbann/utils/mild_exception.hpp
+++ b/include/lbann/utils/mild_exception.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/number_theory.hpp b/include/lbann/utils/number_theory.hpp
index e879c851090..335dcff133f 100644
--- a/include/lbann/utils/number_theory.hpp
+++ b/include/lbann/utils/number_theory.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/omp_diagnostics.hpp b/include/lbann/utils/omp_diagnostics.hpp
index 06fa4a6c666..9eaecde981c 100644
--- a/include/lbann/utils/omp_diagnostics.hpp
+++ b/include/lbann/utils/omp_diagnostics.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/omp_pragma.hpp b/include/lbann/utils/omp_pragma.hpp
index c1c1096aa54..df960f3ca12 100644
--- a/include/lbann/utils/omp_pragma.hpp
+++ b/include/lbann/utils/omp_pragma.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/options.hpp b/include/lbann/utils/options.hpp
index 81ef1a4c9e0..b68aad23005 100644
--- a/include/lbann/utils/options.hpp
+++ b/include/lbann/utils/options.hpp
@@ -31,7 +31,6 @@ public :
   //@{
   /** Returns true if the database contains the option */
   bool has_int(std::string option);
-  bool has_bool(std::string option);
   bool has_string(std::string option);
   bool has_float(std::string option) { return has_double(option); }
   bool has_double(std::string option);
diff --git a/include/lbann/utils/peek_map.hpp b/include/lbann/utils/peek_map.hpp
index 6f0ffe27772..ee44dc52b3f 100644
--- a/include/lbann/utils/peek_map.hpp
+++ b/include/lbann/utils/peek_map.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/profiling.hpp b/include/lbann/utils/profiling.hpp
index e023c527670..8985dc2faf5 100644
--- a/include/lbann/utils/profiling.hpp
+++ b/include/lbann/utils/profiling.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -37,6 +37,8 @@ constexpr int prof_colors[num_prof_colors] = {
   0x22AA99, 0xAAAA11, 0x6633CC, 0xE67300, 0x8B0707, 0x329262,
   0x5574A6, 0x3B3EAC};
 
+void prof_start();
+void prof_stop();
 void prof_region_begin(const char *s, int c, bool sync);
 void prof_region_end(const char *s, bool sync);
 
diff --git a/include/lbann/utils/protobuf_utils.hpp b/include/lbann/utils/protobuf_utils.hpp
index fa94f15aa63..48340c89db9 100644
--- a/include/lbann/utils/protobuf_utils.hpp
+++ b/include/lbann/utils/protobuf_utils.hpp
@@ -1,5 +1,5 @@
-#ifndef __PROTOBUF_UTILS_HPP__
-#define __PROTOBUF_UTILS_HPP__
+#ifndef LBANN_UTILS_PROTOBUF_UTILS_HPP_INCLUDED
+#define LBANN_UTILS_PROTOBUF_UTILS_HPP_INCLUDED
 
 #include <vector>
 #include "lbann/lbann.hpp"
@@ -7,9 +7,9 @@
 
 namespace lbann {
 
-/**
- * static methods for parsing command line for prototext filenames,
- * reading in prototext files, etc.
+/** @file protobuf_utils.hpp
+ *  @brief static methods for parsing command line for prototext
+ *         filenames, reading in prototext files, etc.
  */
 
 struct prototext_fn_triple {
@@ -19,51 +19,49 @@ struct prototext_fn_triple {
   std::string optimizer;
 };
 
-
-class protobuf_utils
+namespace protobuf_utils
 {
-public :
-
-  /** convience wrapper: calls parse_prototext_filenames_from_command_line(),
-   *  then load_prototext(), then verify_prototext(). This is the only function
-   *  that needs to be called from, e.g, model_zoo/lbann.cpp; the three called
-   *  functions are made public for testing.
-   */
-  static void load_prototext(
-                const bool master,
-                const int argc,
-                char **argv,
-                std::vector<lbann_data::LbannPB *> &models_out);
-
+/** @brief convience wrapper for other parsing, loading, and verifying prototext.
+ *
+ *  Calls parse_prototext_filenames_from_command_line(),
+ *  then load_prototext(), then verify_prototext(). This is the only function
+ *  that needs to be called from, e.g, model_zoo/lbann.cpp; the three called
+ *  functions are made public for testing.
+ */
+std::vector<std::unique_ptr<lbann_data::LbannPB>>
+load_prototext(
+  const bool master,
+  const int argc,
+  char* const argv[]);
 
-  /** parses the command line for --model=<string> --reader=<string>
-   *  optimizer=<string> and their multi counterparts:
-   *  --model={<string_1>,<string_2>,...}
-   *  --reader={<string_1>,<string_2>,...}
-   *  --optimizer={<string_1>,<string_2>,...}
-   *  If the multi-model option is given, the reader and optimzier
-   *  can either be single, or contain the same number of filenames
-   *  as does the --model={...} specification
-   */
-  static void parse_prototext_filenames_from_command_line(
-               bool master,
-               int argc,
-               char **argv,
-               std::vector<prototext_fn_triple> &names);
+/** @brief Parses the command line for special prototext flags
+ *
+ *  This looks for `--model=<string>`, `--reader=<string>`, and
+ *  `--optimizer=<string>` as well as their multi-value counterparts:
+ *  `--model={<string_1>,<string_2>,...}`,
+ *  `--reader={<string_1>,<string_2>,...}`, and
+ *  `--optimizer={<string_1>,<string_2>,...}`. If the multi-model
+ *  option is given, the reader and optimzier can either be single, or
+ *  contain the same number of filenames as does the `--model={...}`
+ *  specification.
+ */
+std::vector<prototext_fn_triple>
+parse_prototext_filenames_from_command_line(
+  const bool master, const int argc, char* const argv[]);
 
-  static void read_in_prototext_files(
-                bool master,
-                std::vector<prototext_fn_triple> &names,
-                std::vector<lbann_data::LbannPB*> &models_out);
+std::vector<std::unique_ptr<lbann_data::LbannPB>>
+read_in_prototext_files(
+  const bool master,
+  const std::vector<prototext_fn_triple> &names);
 
-  /** attempts to verify the all models are valid, and contain an
-   *  optimizer and reader
-   */
-  static void verify_prototext(
-               bool master,
-               const std::vector<lbann_data::LbannPB *> &models);
+/** @brief attempts to verify the all models are valid, and contain an
+ *         optimizer and reader
+ */
+void verify_prototext(
+  const bool master,
+  const std::vector<std::unique_ptr<lbann_data::LbannPB>> &models);
 
-};
+} // namespace protobuf_utils
 
 } //namespace lbann
-#endif
+#endif // LBANN_UTILS_PROTOBUF_UTILS_HPP_INCLUDED
diff --git a/include/lbann/utils/random.hpp b/include/lbann/utils/random.hpp
index 823f023fc05..dd48d1ee787 100644
--- a/include/lbann/utils/random.hpp
+++ b/include/lbann/utils/random.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -55,10 +55,25 @@ fast_rng_gen& get_fast_generator();
 
 /**
  * Return a reference to the global LBANN random number generator used
- * for shuffling the data samples within each mini-bathc
- * @note If compiling with OpenMP, this is stored in a threadprivate variable.
+ * for shuffling the data samples within each mini-batch
+ * @note This is stored in a thread_local variable.
  */
 rng_gen& get_data_seq_generator();
+
+/**
+ * Return a reference to the global LBANN random number generator used
+ * for shuffling the data samples within each mini-batch
+ * @note This is stored in a thread_local variable.
+ */
+rng_gen& get_io_generator();
+
+/**
+ * Return a reference to the fast global LBANN random number generator used
+ * for the I/O threads
+ * @note This is stored in a thread_local variable.
+ */
+fast_rng_gen& get_fast_io_generator();
+
 /**
  * Return random integers uniformly distributed in [0, max).
  * @param g C++ uniform random bit generator.
@@ -92,13 +107,12 @@ inline T fast_rand_int_pow2(Generator& g, T max) {
   return x & ((typename Generator::result_type) max);
 }
 
-/**
- * Initialize the random number generator (with optional seed).
- * @param comm If present, mixes the process's rank within the model into the
- * seed; if not, uses the MPI world rank.
- * @todo Support saving/restoring the generator's state. This is directly
- * supported via the >> and << operators on the generator (reading/writing
- * from/to a stream).
+/** @brief Initialize the random number generator (with optional seed).
+ *
+ *  @param seed Seed value for the random number generator
+ *  @param comm If present, mixes the process's rank within the model
+ *              into the seed; if not, uses the MPI world rank.
+ *
  */
 void init_random(int seed = -1, lbann_comm *comm = nullptr);
 
@@ -108,12 +122,18 @@ void init_random(int seed = -1, lbann_comm *comm = nullptr);
  * samples.  Using a separate RNG for the data sequences helps provide
  * a stable training result that does not vary with how much I/O
  * parallelism is applied.
- * @todo Support saving/restoring the generator's state. This is directly
- * supported via the >> and << operators on the generator (reading/writing
- * from/to a stream).
  */
 void init_data_seq_random(int seed = -1);
 
+/**
+ * Initialize a random number generator (with optional seed) that is
+ * specifically used by the I/O threads for tasks such as data
+ * preprocessing, etc.
+ *
+ * Called from init_random
+ */
+void init_io_random(int seed = -1);
+
 /**
  * Make mat into an m x n matrix where each entry is independently drawn from
  * a Gaussian distribution with given mean and standard deviation.
diff --git a/include/lbann/utils/stack_profiler.hpp b/include/lbann/utils/stack_profiler.hpp
index c5ca88b8465..4719c3b96fe 100644
--- a/include/lbann/utils/stack_profiler.hpp
+++ b/include/lbann/utils/stack_profiler.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/stack_trace.hpp b/include/lbann/utils/stack_trace.hpp
index 748958eb730..9c2333fa589 100644
--- a/include/lbann/utils/stack_trace.hpp
+++ b/include/lbann/utils/stack_trace.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -47,14 +47,14 @@ std::string get();
  *  Fatal signals are those that cause an abnormal termination by
  *  default, according to the POSIX C standard
  *  (http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/signal.h.html).
- *  
+ *
  *  This functionality is somewhat risky since the handler calls
  *  non-reentrant functions, which can result in undefined behavior
  *  (see https://www.ibm.com/developerworks/library/l-reent/).
  */
 void register_signal_handler(std::string file_base = "");
 
-} //namespace stack_trace 
+} //namespace stack_trace
 } //namespace lbann
 
 #endif // LBANN_UTILS_STACK_TRACE_HPP_INCLUDED
diff --git a/include/lbann/utils/statistics.hpp b/include/lbann/utils/statistics.hpp
index caad28644df..b7176b71702 100644
--- a/include/lbann/utils/statistics.hpp
+++ b/include/lbann/utils/statistics.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -31,22 +31,22 @@
 
 namespace lbann {
 
-/// Compute mean and standard deviation over matrix entries
-/** @param data    Input matrix.
- *  @param means   Mean value (output).
- *  @param stdevs  Standard deviation (output).
+/** @brief Compute mean and standard deviation over matrix entries
+ *  @param data    Input matrix.
+ *  @param mean    Mean value (output).
+ *  @param stdev   Standard deviation (output).
  */
 void entrywise_mean_and_stdev(const Mat& data, DataType& mean, DataType& stdev);
 
-/// Compute mean and standard deviation over matrix entries
-/** @param data    Input matrix.
- *  @param means   Mean value (output).
- *  @param stdevs  Standard deviation (output).
+/** @brief Compute mean and standard deviation over matrix entries
+ *  @param data    Input matrix.
+ *  @param mean    Mean value (output).
+ *  @param stdev   Standard deviation (output).
  */
 void entrywise_mean_and_stdev(const AbsDistMat& data, DataType& mean, DataType& stdev);
 
-/// Compute column-wise means and standard deviations
-/** @param data    Input matrix.
+/** @brief Compute column-wise means and standard deviations
+ *  @param data    Input matrix.
  *  @param means   Mean vector. Output as a row vector with same number
  *                 of columns as 'data'.
  *  @param stdevs  Standard deviation vector. Output as a row vector
@@ -54,8 +54,8 @@ void entrywise_mean_and_stdev(const AbsDistMat& data, DataType& mean, DataType&
  */
 void columnwise_mean_and_stdev(const Mat& data, Mat& means, Mat& stdevs);
 
-/// Compute column-wise means and standard deviations
-/** @param data    Input matrix.
+/** @brief Compute column-wise means and standard deviations
+ *  @param data    Input matrix.
  *  @param means   Mean vector. Output as a row vector with same number
  *                 of columns as 'data'.
  *  @param stdevs  Standard deviation vector. Output as a row vector
@@ -63,9 +63,9 @@ void columnwise_mean_and_stdev(const Mat& data, Mat& means, Mat& stdevs);
  */
 void columnwise_mean_and_stdev(const Mat& data, Mat& means, Mat& stdevs);
 
-/// Compute column-wise means and standard deviations
 //  Wraps around column-wise sum and sqsum
-/** @param data    Input matrix in U,V format.
+/** @brief Compute column-wise means and standard deviations
+ *  @param data    Input matrix in U,V format.
  *  @param means   Mean vector in STAR,V format. Output as a row vector
  *                 with same number of columns as 'data'.
  *  @param stdevs  Standard deviation vector in STAR,V format. Output as
@@ -75,19 +75,19 @@ void columnwise_mean_and_stdev(const AbsDistMat& data,
                                AbsDistMat& means,
                                AbsDistMat& stdevs);
 
-/// Compute column-wise sum and sqsum
-/** @param data    Input matrix in U,V format.
- *  @param sum     Sum vector in STAR,V format. Output as a row vector
+/** @brief Compute column-wise sum and sqsum
+ *  @param data    Input matrix in U,V format.
+ *  @param sums    Sum vector in STAR,V format. Output as a row vector
  *                 with same number of columns as 'data'.
- *  @param sqsum   Sum of squared vector in STAR,V format. Output as
+ *  @param sqsums  Sum of squared vector in STAR,V format. Output as
  *                 a row vector with same number of columns as 'data'.
  */
 void columnwise_sums_and_sqsums(const AbsDistMat& data,
                                AbsDistMat& sums,
                                AbsDistMat& sqsums);
 
-/// Compute row-wise means and standard deviations
-/** @param data    Input matrix.
+/** @brief Compute row-wise means and standard deviations
+ *  @param data    Input matrix.
  *  @param means   Mean vector. Output as a column vector with same
  *                 number of rows as 'data'.
  *  @param stdevs  Standard deviation vector. Output as a column vector
@@ -95,20 +95,20 @@ void columnwise_sums_and_sqsums(const AbsDistMat& data,
  */
 void rowwise_mean_and_stdev(const Mat& data, Mat& means, Mat& stdevs);
 
-/// Compute row-wise sum and sum of squares
-/** @param data    Input matrix in U,V format.
- *  @param sum     Sum vector in U,STAR format. Output as a column
+/** @brief Compute row-wise sum and sum of squares
+ *  @param data    Input matrix in U,V format.
+ *  @param sums    Sum vector in U,STAR format. Output as a column
  *                 vector with same number of rows as 'data'.
- *  @param sqsum   Sum of squared in U,STAR format. Output as
+ *  @param sqsums  Sum of squared in U,STAR format. Output as
  *                 a column vector with same number of rows as 'data'.
  */
 void rowwise_sums_and_sqsums(const AbsDistMat& data,
                             AbsDistMat& sums,
                             AbsDistMat& sqsums);
 
-/// Compute row-wise means and standard deviations
 //Wraps around rowwise_sum_and_sqsum
-/** @param data    Input matrix in U,V format.
+/** @brief Compute row-wise means and standard deviations
+ *  @param data    Input matrix in U,V format.
  *  @param means   Mean vector in U,STAR format. Output as a column
  *                 vector with same number of rows as 'data'.
  *  @param stdevs  Standard deviation vector in U,STAR format. Output as
@@ -118,8 +118,8 @@ void rowwise_mean_and_stdev(const AbsDistMat& data,
                             AbsDistMat& means,
                             AbsDistMat& stdevs);
 
-/// Compute column-wise covariances
-/** @param data1   Input matrix in U,V format.
+/** @brief Compute column-wise covariances
+ *  @param data1   Input matrix in U,V format.
  *  @param data2   Input matrix in U,V format.
  *  @param means1  Column-wise mean vector for data1 in STAR,V format.
  *  @param means2  Column-wise mean vector for data2 in STAR,V format.
diff --git a/include/lbann/utils/summary.hpp b/include/lbann/utils/summary.hpp
index f574432f663..dea6e19cd2b 100644
--- a/include/lbann/utils/summary.hpp
+++ b/include/lbann/utils/summary.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/threads/thread_pool.hpp b/include/lbann/utils/threads/thread_pool.hpp
index b6f1ff14b2a..81cfcf4c7a0 100644
--- a/include/lbann/utils/threads/thread_pool.hpp
+++ b/include/lbann/utils/threads/thread_pool.hpp
@@ -18,47 +18,47 @@ class thread_pool {
   using size_type = typename thread_container_type::size_type;
 
 private:
-  /** \class thread_joiner
-   *  \brief RAII object that destroys threads
+  /** @class thread_joiner
+   *  @brief RAII object that destroys threads
    */
   struct thread_joiner
   {
-    /** \brief Grab container reference */
+    /** @brief Grab container reference */
     thread_joiner(thread_container_type& threads) : threads_(threads) {}
-    /** \brief Destructor: safely shut all threads down */
+    /** @brief Destructor: safely shut all threads down */
     ~thread_joiner() { for (auto& t : threads_) if (t.joinable()) t.join(); }
-    /** \brief Thread container reference */
+    /** @brief Thread container reference */
     thread_container_type& threads_;
   };
 
 public:
-  /** \brief Construct an empty threadpool. Size must be set with launch().
+  /** @brief Construct an empty threadpool. Size must be set with launch().
    */
   thread_pool();
 
-  /** \brief Construct a threadpool of a given size.
+  /** @brief Construct a threadpool of a given size.
    *
-   *  \param max_threads Total threads available. max_threads-1 worker
+   *  @param max_threads Total threads available. max_threads-1 worker
    *                     threads will be launched.
    */
   thread_pool(size_type max_threads);
 
-  /** \brief Destroy the threadpool */
+  /** @brief Destroy the threadpool */
   ~thread_pool() {
     all_work_done_ = true;
     global_work_queue_.wake_all(true);
   }
 
-  /** \brief Launch the threads */
+  /** @brief Launch the threads */
   void launch_threads(size_type num_threads);
-  /** \brief Launch the threads and pin them to the Hyperthreaded cores */
+  /** @brief Launch the threads and pin them to the Hyperthreaded cores */
   void launch_pinned_threads(size_type num_threads, int cpu_offset);
   /** Wake and terminate all threads in the pool */
   void reap_threads();
   /** Reap all threads in the pool and relaunch pinned threads */
   void relaunch_pinned_threads(size_type num_threads);
 
-  /** \brief Submit a job to the pool's queue */
+  /** @brief Submit a job to the pool's queue */
   template <typename FunctionT>
   std::future<typename std::result_of<FunctionT()>::type>
   submit_job(FunctionT func)
@@ -71,7 +71,7 @@ class thread_pool {
     return future;
   }
 
-  /** \brief Submit a job to the pool's queue and place the future
+  /** @brief Submit a job to the pool's queue and place the future
       into a work group */
   template <typename FunctionT>
   void submit_job_to_work_group(FunctionT func)
@@ -85,7 +85,7 @@ class thread_pool {
     return;
   }
 
-  /** \brief Wait for all of the jobs in a work group to finish */
+  /** @brief Wait for all of the jobs in a work group to finish */
   bool finish_work_group() {
     std::string error_message;
     for (auto& f : m_work_group) {
@@ -99,38 +99,38 @@ class thread_pool {
     return true;
   }
 
-  /** Query the number of worker threads actually present */
+  /** @brief Query the number of worker threads actually present */
   size_type get_num_threads() const noexcept { return threads_.size(); }
 
-  /** Convert the C++ thread id into a local thread pool id */
+  /** @brief Convert the C++ thread id into a local thread pool id */
   int get_local_thread_id();
 
-  /** Convert the C++ thread id into a local thread pool id */
+  /** @brief Convert the C++ thread id into a local thread pool id */
   int get_threads_offset() { return m_threads_offset; }
 
 private:
-  /** \brief The task executed by each thread */
+  /** @brief The task executed by each thread */
   void do_thread_work_();
   void do_thread_work_pinned_thread_(int tid, cpu_set_t cpu_set);
 
 private:
 
-  /** \brief Container holding the threads */
+  /** @brief Container holding the threads */
   thread_container_type threads_;
 
-  /** \brief The thread-safe work queue */
+  /** @brief The thread-safe work queue */
   thread_safe_queue<type_erased_function> global_work_queue_;
 
-  /** \brief RAII "deleter" for the threads */
+  /** @brief RAII "deleter" for the threads */
   thread_joiner thread_joiner_;
 
-  /** \brief Flag to track if more work is to be done */
+  /** @brief Flag to track if more work is to be done */
   std::atomic<bool> all_work_done_;
 
   std::mutex m_thread_map_mutex;
   std::unordered_map<std::thread::id, int> m_thread_id_to_local_id_map;
 
-  /** \brief Work Group */
+  /** @brief Work Group */
   std::vector<std::future<bool>> m_work_group;
 
   int m_threads_offset;
diff --git a/include/lbann/utils/threads/thread_safe_queue.hpp b/include/lbann/utils/threads/thread_safe_queue.hpp
index fe20628a569..29e729aa8fe 100644
--- a/include/lbann/utils/threads/thread_safe_queue.hpp
+++ b/include/lbann/utils/threads/thread_safe_queue.hpp
@@ -8,23 +8,25 @@
 
 namespace lbann {
 
-/** \class thread_safe_queue
- *  \brief A queue that is safe for multiple threads to push to or
- *  pull from "simultaneously". This version uses locks.
+/** @class thread_safe_queue
+ *  @brief A queue that is safe for multiple threads to push to or
+ *  pull from "simultaneously".
+ *
+ *  This version uses locks.
  *
  *  This is essentially a fancy linked-list implementation that
  *  enables finer-grained locks than simply wrapping an
  *  std::queue. The trade-off is two locks, one for the front and one
  *  for the back of the list.
  *
- *  \tparam T A move- or copy-constructible type
+ *  @tparam T A move- or copy-constructible type
  */
 template <typename T>
 class thread_safe_queue {
 private:
 
-  /** \class _Node
-   *  \brief A data value in the thread-safe FIFO queue
+  /** @class _Node
+   *  @brief A data value in the thread-safe FIFO queue
    */
   struct _Node
   {
@@ -34,10 +36,12 @@ class thread_safe_queue {
 
 public:
 
-  /** \brief Default constructor; creates an empty queue */
-  thread_safe_queue() : head_(make_unique<_Node>()), tail_(head_.get()), m_stop_threads(false) {}
+  /** @brief Default constructor; creates an empty queue */
+  thread_safe_queue()
+    : head_(make_unique<_Node>()), tail_(head_.get()), m_stop_threads(false)
+  {}
 
-  /** \brief Adds a value to back of the queue */
+  /** @brief Adds a value to back of the queue */
   void push(T value)
   {
     // Make the new data outside of the lock to minimize lock time
@@ -67,9 +71,9 @@ class thread_safe_queue {
   /// Allow the thread pool to set / reset the flags
   void set_stop_threads(bool flag) { m_stop_threads = flag; }
 
-  /** \brief Try to remove the first value from the queue
+  /** @brief Try to remove the first value from the queue
    *
-   *  \return nullptr if empty(); otherwise return a value
+   *  @return nullptr if empty(); otherwise return a value
    */
   std::unique_ptr<T> try_pop()
   {
@@ -83,7 +87,7 @@ class thread_safe_queue {
     return std::move(popped_head->data_);
   }
 
-  /** \brief Wait for data and then return it */
+  /** @brief Wait for data and then return it */
   std::unique_ptr<T> wait_and_pop()
   {
     std::unique_lock<std::mutex> lk(head_mtx_);
@@ -104,7 +108,7 @@ class thread_safe_queue {
     return std::move(popped_head->data_);
   }
 
-  /** Check if queue is empty */
+  /** @brief Check if queue is empty */
   bool empty() const
   {
     std::lock_guard<std::mutex> lk(head_mtx_);
@@ -113,7 +117,7 @@ class thread_safe_queue {
 
 private:
 
-  /** \brief Get the tail pointer */
+  /** @brief Get the tail pointer */
   _Node* do_get_tail_() const
   {
     std::lock_guard<std::mutex> lk(tail_mtx_);
@@ -122,19 +126,19 @@ class thread_safe_queue {
 
 private:
 
-  /** \brief The mutex protecting the head of the list */
+  /** @brief The mutex protecting the head of the list */
   mutable std::mutex head_mtx_;
 
-  /** \brief The mutex protecting the tail of the list */
+  /** @brief The mutex protecting the tail of the list */
   mutable std::mutex tail_mtx_;
 
-  /** \brief The first node in the list */
+  /** @brief The first node in the list */
   std::unique_ptr<_Node> head_;
 
-  /** \brief The last node in the list */
+  /** @brief The last node in the list */
   _Node* tail_;
 
-  /** \brief Condition variable tripped when data added */
+  /** @brief Condition variable tripped when data added */
   std::condition_variable data_available_;
 
   bool m_stop_threads;
diff --git a/include/lbann/utils/threads/thread_utils.hpp b/include/lbann/utils/threads/thread_utils.hpp
index abee7b4c6aa..514ff1e4549 100644
--- a/include/lbann/utils/threads/thread_utils.hpp
+++ b/include/lbann/utils/threads/thread_utils.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/threads/type_erased_function.hpp b/include/lbann/utils/threads/type_erased_function.hpp
index 0a2af104f4a..1b46f082f4c 100644
--- a/include/lbann/utils/threads/type_erased_function.hpp
+++ b/include/lbann/utils/threads/type_erased_function.hpp
@@ -7,60 +7,60 @@
 
 namespace lbann {
 
-/** \class type_erased_function
- *  \brief A move-only callable type for wrapping functions
+/** @class type_erased_function
+ *  @brief A move-only callable type for wrapping functions
  */
 class type_erased_function {
 public:
 
-  /** \brief Erase the type of input function F */
+  /** @brief Erase the type of input function F */
   template <typename FunctionT>
   type_erased_function(FunctionT&& F)
     : held_function_(make_unique<Function<FunctionT>>(std::move(F))) {}
 
-  /** \brief Move constructor */
+  /** @brief Move constructor */
   type_erased_function(type_erased_function&& other) = default;
 
-  /** \brief Move assignment */
+  /** @brief Move assignment */
   type_erased_function& operator=(type_erased_function&& other) = default;
 
-  /** \brief Make the function callable */
+  /** @brief Make the function callable */
   void operator()() { held_function_->call_held(); }
 
-  /** \name Deleted functions */
+  /** @name Deleted functions */
   ///@{
 
-  /** \brief Deleted constructor */
+  /** @brief Deleted constructor */
   type_erased_function() = delete;
 
-  /** \brief Deleted copy constructor */
+  /** @brief Deleted copy constructor */
   type_erased_function(const type_erased_function& other) = delete;
 
-  /** \brief Deleted copy assignment */
+  /** @brief Deleted copy assignment */
   type_erased_function& operator=(const type_erased_function& other) = delete;
 
   ///@}
 
 private:
-  /** \name Type erasure template types */
+  /** @name Type erasure template types */
   ///@{
 
-  /** \class FunctionHolder
-   *  \brief Simple function object holder
+  /** @class FunctionHolder
+   *  @brief Simple function object holder
    */
   struct FunctionHolder
   {
-    /** \brief Destructor */
+    /** @brief Destructor */
     virtual ~FunctionHolder() = default;
 
-    /** \brief Call the held function */
+    /** @brief Call the held function */
     virtual void call_held() = 0;
   };
 
-  /** \class Function
-   *  \brief A wrapper for a specific type of function
+  /** @class Function
+   *  @brief A wrapper for a specific type of function
    *
-   *  \tparam FunctionT Must be MoveConstructible and Callable
+   *  @tparam FunctionT Must be MoveConstructible and Callable
    */
   template <typename FunctionT>
   struct Function : FunctionHolder
@@ -68,22 +68,22 @@ class type_erased_function {
     static_assert(std::is_move_constructible<FunctionT>::value,
                   "Given type is not move constructible!");
 
-    /** \brief Construct by moving from the input function type */
+    /** @brief Construct by moving from the input function type */
     Function(FunctionT&& f)
       : F__(std::move(f)) {}
 
-    /** \brief Destructor */
+    /** @brief Destructor */
     ~Function() = default;
 
-    /** \brief Call the held function */
+    /** @brief Call the held function */
     void call_held() override { F__(); }
 
-    /** \brief The held function */
+    /** @brief The held function */
     FunctionT F__;
   };
   ///@}
 
-  /** \brief A type-erased function */
+  /** @brief A type-erased function */
   std::unique_ptr<FunctionHolder> held_function_;
 };// class type_erased_function
 
diff --git a/include/lbann/utils/timer.hpp b/include/lbann/utils/timer.hpp
index 9a7b88336d3..73bd072a678 100644
--- a/include/lbann/utils/timer.hpp
+++ b/include/lbann/utils/timer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/type_erased_matrix.hpp b/include/lbann/utils/type_erased_matrix.hpp
new file mode 100644
index 00000000000..4812287a369
--- /dev/null
+++ b/include/lbann/utils/type_erased_matrix.hpp
@@ -0,0 +1,130 @@
+#ifndef LBANN_UTILS_TYPE_ERASED_MATRIX_HPP_INCLUDED
+#define LBANN_UTILS_TYPE_ERASED_MATRIX_HPP_INCLUDED
+
+#include <lbann/utils/any.hpp>
+#include <lbann/utils/memory.hpp>
+
+#include <El.hpp>
+
+namespace lbann
+{
+namespace utils
+{
+
+/** @class type_erased_matrix
+ *  @brief A type-erased wrapper around an @c El::Matrix<T,Device::CPU>
+ *
+ *  @warning This class is an implementation detail of the
+ *      preprocessing pipeline and should not be used in general
+ *      LBANN code.
+ */
+class type_erased_matrix
+{
+public:
+
+  /** @brief Construct from a copy of a given matrix.
+   *
+   *  Deep-copy the input matrix into the held matrix.
+   *
+   *  @tparam Field The data type of the input matrix
+   *
+   *  @param in_matrix The input matrix.
+   *
+   *  @warning This performs a deep copy of the matrix.
+   */
+  template <typename Field>
+  type_erased_matrix(El::Matrix<Field> const& in_matrix)
+  {
+    El::Matrix<Field> held;
+    El::Copy(in_matrix, held);
+    m_matrix.emplace<El::Matrix<Field>>(std::move(held));
+  }
+
+  /** @brief Construct by moving the given matrix into type-erased
+   *      storage.
+   *
+   *  Move the input matrix into the held matrix.
+   *
+   *  @tparam Field The data type of the input matrix
+   *
+   *  @param in_matrix The input matrix.
+   */
+  template <typename Field>
+  type_erased_matrix(El::Matrix<Field>&& in_matrix)
+    : m_matrix{std::move(in_matrix)}
+  {}
+
+  /** @brief Access the underlying matrix.
+   *
+   *  Provides read/write access to the underlying matrix if the input
+   *  @c Field matches the data type of the held matrix.
+   *
+   *  @tparam Field The data type of the held matrix
+   *
+   *  @throws bad_any_cast If the datatype of the held matrix does not
+   *      match the input @c Field.
+   */
+  template <typename Field>
+  El::Matrix<Field>& get()
+  {
+    return const_cast<El::Matrix<Field>&>(
+        static_cast<type_erased_matrix const&>(*this)
+        .template get<Field>());
+  }
+
+  /** @brief Access the underlying matrix.
+   *
+   *  Provides read-only access to the underlying matrix if the input
+   *  @c Field matches the data type of the held matrix.
+   *
+   *  @tparam Field The data type of the held matrix
+   *
+   *  @return Reference to the underlying matrix
+   *
+   *  @throws bad_any_cast If the datatype of the held matrix does not
+   *      match the input @c Field.
+   */
+  template <typename Field>
+  El::Matrix<Field> const& get() const
+  {
+    return any_cast<El::Matrix<Field> const&>(m_matrix);
+  }
+
+  /** @brief Replace the held matrix with a new one constructed
+   *      in-place from the arguments.
+   *
+   *  @tparam Field The data type of the newly held matrix
+   *
+   *  @param args The arguments with which to construct the new matrix.
+   *
+   *  @return Reference to the new underlying matrix
+   */
+  template <typename Field, typename... Args>
+  El::Matrix<Field>& emplace(Args&&... args)
+  {
+    return m_matrix.emplace<El::Matrix<Field>>(std::forward<Args>(args)...);
+  }
+
+private:
+  /** @brief Type-erased matrix storage */
+  any m_matrix;
+};// class type_erased_matrix
+
+/** @brief Create an empty type-erased matrix with given underlying
+ *      data type.
+ *
+ *  @tparam Field The type of the underlying matrix.
+ *
+ *  @return A pointer to an empty type-erased matrix with data type @c
+ *      Field.
+ */
+template <typename Field>
+std::unique_ptr<type_erased_matrix>
+create_type_erased_matrix()
+{
+  return make_unique<type_erased_matrix>(El::Matrix<Field>{});
+}
+
+}// namespace utils
+}// namespace lbann
+#endif // LBANN_UTILS_TYPE_ERASED_MATRIX_HPP_INCLUDED
diff --git a/include/lbann/weights/initializer.hpp b/include/lbann/weights/initializer.hpp
index 14b6e609d9c..84f696d9554 100644
--- a/include/lbann/weights/initializer.hpp
+++ b/include/lbann/weights/initializer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/weights/variance_scaling_initializers.hpp b/include/lbann/weights/variance_scaling_initializers.hpp
index 4cf05c5975b..c6256cfe956 100644
--- a/include/lbann/weights/variance_scaling_initializers.hpp
+++ b/include/lbann/weights/variance_scaling_initializers.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/weights/weights.hpp b/include/lbann/weights/weights.hpp
index 93973c85da1..784869331fa 100644
--- a/include/lbann/weights/weights.hpp
+++ b/include/lbann/weights/weights.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -124,7 +124,7 @@ class weights {
    *  See the 'get_dims' function for an explanation of the notation.
    */
   void set_dims(std::vector<int> matrix_height_dims,
-                std::vector<int> matrix_width_dims = {});
+                std::vector<int> matrix_width_dims = std::vector<int>());
   /** Set weight tensor dimensions as a 1D tensor. */
   void set_dims(int size) { set_dims({size}, {}); }
 
diff --git a/lbann_license_template.txt b/lbann_license_template.txt
index d4ac28fc8b4..4c0bb7d4139 100644
--- a/lbann_license_template.txt
+++ b/lbann_license_template.txt
@@ -1,6 +1,6 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC. 
-// Produced at the Lawrence Livermore National Laboratory. 
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
 //
@@ -9,7 +9,7 @@
 //
 // This file is part of LBANN: Livermore Big Artificial Neural Network
 // Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN. 
+// https://github.com/LLNL/LBANN.
 //
 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
 // may not use this file except in compliance with the License.  You may
diff --git a/model_zoo/CMakeLists.txt b/model_zoo/CMakeLists.txt
index c9f2d0ec882..6d63af9db54 100644
--- a/model_zoo/CMakeLists.txt
+++ b/model_zoo/CMakeLists.txt
@@ -3,6 +3,8 @@ add_executable( lbann-bin lbann.cpp )
 target_link_libraries(lbann-bin lbann )
 set_target_properties(lbann-bin PROPERTIES OUTPUT_NAME lbann)
 
+add_executable( lbann-help lbann_help.cpp )
+target_link_libraries(lbann-help lbann )
 
 #this can be done simler - quick copy/paste hack //d hysom
 add_executable( lbann-bin2 lbann2.cpp )
@@ -28,6 +30,7 @@ set_target_properties(lbann-inf-bin PROPERTIES OUTPUT_NAME lbann_inf)
 # Install the binaries
 install(
   TARGETS lbann-bin lbann-bin2 lbann-gan-bin lbann-cycgan-bin lbann-aecycgan-bin
+  lbann-help
   EXPORT LBANNTargets
   RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
   ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
@@ -37,5 +40,5 @@ install(
 
 # Install the relevant prototext
 install(FILES README.md DESTINATION ${CMAKE_INSTALL_DATADIR}/model_zoo)
-install(DIRECTORY data_readers models optimizers
+install(DIRECTORY data_readers models optimizers tests vision
   DESTINATION ${CMAKE_INSTALL_DATADIR}/model_zoo)
diff --git a/model_zoo/data_readers/data_reader_mnist_numpy_npz_int16.prototext b/model_zoo/data_readers/data_reader_mnist_numpy_npz_int16.prototext
new file mode 100644
index 00000000000..55b4f5c8068
--- /dev/null
+++ b/model_zoo/data_readers/data_reader_mnist_numpy_npz_int16.prototext
@@ -0,0 +1,67 @@
+data_reader {
+  reader {
+    name: "numpy_npz"
+    role: "train"
+    shuffle: true
+    data_filedir: "/p/lscratchh/brainusr/datasets/MNIST/numpy/train_int16.npz"
+    validation_percent: 0.1
+    absolute_sample_count: 0
+    percent_of_data_to_use: 1.0
+    disable_responses: true
+    num_labels: 10
+    scaling_factor_int16: 0.000030518509476 # 1 / 0x7FFF
+
+    image_preprocessor {
+      normalizer {
+        scale: true
+        subtract_mean: false
+        unit_variance: false
+        z_score: false
+      }
+      augmenter {
+        horizontal_flip: false
+        vertical_flip: false
+        rotation: 0
+        horizontal_shift: 0
+        vertical_shift: 0
+        shear_range: 0
+      }
+      noiser {
+        disable: true
+        factor: 0.0
+      }
+    }
+  }
+  reader {
+    name: "numpy_npz"
+    role: "test"
+    shuffle: true
+    data_filedir: "/p/lscratchh/brainusr/datasets/MNIST/numpy/test_int16.npz"
+    absolute_sample_count: 0
+    percent_of_data_to_use: 1.0
+    disable_responses: true
+    num_labels: 10
+    scaling_factor_int16: 0.000030518509476 # 1 / 0x7FFF
+
+    image_preprocessor {
+      normalizer {
+        scale: true
+        subtract_mean: false
+        unit_variance: false
+        z_score: false
+      }
+      augmenter {
+        horizontal_flip: false
+        vertical_flip: false
+        rotation: 0
+        horizontal_shift: 0
+        vertical_shift: 0
+        shear_range: 0
+      }
+      noiser {
+        disable: true
+        factor: 0.0
+      }
+    }
+  }
+}
diff --git a/model_zoo/jag_utils/CMakeLists.txt b/model_zoo/jag_utils/CMakeLists.txt
index f4faaa64680..115b5d06ff0 100644
--- a/model_zoo/jag_utils/CMakeLists.txt
+++ b/model_zoo/jag_utils/CMakeLists.txt
@@ -51,4 +51,9 @@ if (LBANN_HAS_CONDUIT)
   add_executable( build_sample_id_mapping-bin build_sample_id_mapping.cpp )
   target_link_libraries(build_sample_id_mapping-bin lbann )
   set_target_properties(build_sample_id_mapping-bin PROPERTIES OUTPUT_NAME build_sample_id_mapping)
+
+  add_executable( generate_corrupt_samples-bin generate_corrupt_samples.cpp )
+  target_link_libraries(generate_corrupt_samples-bin lbann )
+  set_target_properties(generate_corrupt_samples-bin PROPERTIES OUTPUT_NAME generate_corrupt_samples)
+
 endif ()
diff --git a/model_zoo/jag_utils/add_overlap.py b/model_zoo/jag_utils/add_overlap.py
new file mode 100755
index 00000000000..f1110d01a31
--- /dev/null
+++ b/model_zoo/jag_utils/add_overlap.py
@@ -0,0 +1,130 @@
+#!/usr/tce/bin/python
+
+import sys
+import random
+
+if len(sys.argv) < 2 :
+  usage = '''
+  usage: add_overlap.py list_base_name number_of_lists overlap_percent 
+
+
+  example: if your lists are t0_list.txt, t1_list.txt and t2_list.txt
+           you want 30 percent overlap you would run as:
+             add_overlap.py list.txt 2 30
+           The output lists names in this example would be:
+             t0_list.txt.overlap=30 (etc)
+           The output list will contain 30% more samples;
+           specifically, t0 will receive 15% of randomly selected
+           samples from t1 and t2.
+           The input lists are unchanged
+
+           The "excluded" counts in the output files are all set to -1,
+           because I haven't taken the time to get them correct.
+           I don't think these are used anyplace in lbann, so this should
+           be OK.
+  '''
+  print usage
+  exit(9)
+
+
+#============================================================================
+# the List class parses and encapsulate a sample list
+
+class List :
+
+  # the constructor parses the sample list
+  def __init__(self, filename) :
+    self.filename = filename
+    a = open(filename)
+    self.first_line = a.readline()
+    assert(self.first_line.find('CONDUIT_HDF5_INCLUSION') != -1)
+    t = a.readline().split()
+    self.valid_samples = int(t[0])
+    self.invalid_samples = int(t[1])
+    self.num_files = int(t[2])
+    self.base_dir = a.readline()
+    self.samples = []
+    self.counts =  {}
+    for line in a :
+      if len(line) > 2 :
+        t = line.split()
+        dir = t[0]
+        included = int(t[1])
+        excluded = int(t[2])
+        self.counts[dir] = included + excluded
+        for j in range(3, len(t)):
+          self.samples.append((dir, t[j])) 
+
+  #returns a list that contains random samples
+  def get_random_samples(self, n) :
+    w = set()
+    while len(w) < n :
+      x = random.randint(0, len(self.samples)-1)
+      if x not in w :
+        w.add(x)
+    r = []
+    for x in w :
+      r.append(self.samples[x])
+    return r  
+
+  def num_samples(self) :
+    return len(self.samples)
+
+  # add random samples from some other List to this List
+  def add_samples(self, samples) :
+     for x in samples :
+       self.samples.append(x)
+
+  # write final output (sample list file)
+  def write(self, overlap) :
+    out = open(self.filename + '.overlap=' + str(overlap), 'w')
+    out.write(self.first_line)
+
+    #build map: filename -> (included samples)
+    s = {}
+    for sample in self.samples :
+      if sample[0] not in s :
+        s[sample[0]] = set()
+      s[sample[0]].add(sample[1])  
+
+    #write included_samples excluded_samples, num_files
+    out.write(str(len(self.samples)) + ' -1 ' + str(len(s)) + '\n')
+    out.write(self.base_dir)
+
+    #write the samples
+    for fn in s.keys() :
+      out.write(fn + ' ' + str(len(s[fn])) + ' -1 ')
+      for sample_id in s[fn] :
+        out.write(sample_id + ' ')
+      out.write('\n')
+    out.close()
+
+#============================================================================
+
+# parse cmd line
+base = sys.argv[1]
+count = int(sys.argv[2])
+overlap = int(sys.argv[3])
+
+the_lists = []
+random_samples = []
+for j in range(count) :
+  # instantiate a List object; this holds all information from a sample list
+  c = List('t' + str(j) + '_' + base)
+  the_lists.append(c)
+
+  # get the random samples from the list; this is the overlap that
+  # will be added to the other lists
+  n = c.num_samples()
+  p = int( (overlap / (count-1))* n / 100)
+  random_samples.append(c.get_random_samples(p))
+
+# add overlap to the samples
+for j in range(count) :
+  for k in range(count) :
+    if j != k :
+      the_lists[j].add_samples(random_samples[k])
+
+# write output files
+for x in the_lists :
+  x.write(overlap)
diff --git a/model_zoo/jag_utils/build_index.cpp b/model_zoo/jag_utils/build_index.cpp
index a894e4dd81b..9a92153b940 100644
--- a/model_zoo/jag_utils/build_index.cpp
+++ b/model_zoo/jag_utils/build_index.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -44,7 +44,7 @@ using namespace lbann;
 
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
   bool master = comm->am_world_master();
 
   if (master) {
@@ -70,7 +70,6 @@ int main(int argc, char *argv[]) {
           "function: constructs an index that lists number of samples\n"
           "          in each file, indices of invalid samples, etc\n";
       }
-      finalize(comm);
       return EXIT_SUCCESS;
     }
 
@@ -85,7 +84,7 @@ int main(int argc, char *argv[]) {
     int rank = comm->get_rank_in_world();
     std::stringstream ss;
     ss << output_fn << "." << rank;
-    std::ofstream out(ss.str().c_str());
+    std::ofstream out(ss.str());
     std::cerr << rank << " :: opened for writing: " << ss.str() << "\n";
     if (!out.good()) {
       throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + output_fn + " for writing");
@@ -96,7 +95,7 @@ int main(int argc, char *argv[]) {
 
     // get list of input filenames
     std::vector<std::string> filenames;
-    read_filelist(comm, input_fn, filenames);
+    read_filelist(comm.get(), input_fn, filenames);
 
     int num_samples = 0;
     int num_samples_bad = 0;
@@ -169,7 +168,7 @@ if (j >= 400) break;
       if (!out2) {
         LBANN_ERROR("failed to open output file");
       }
-      out2 << "CONDUIT_HDF5_EXCLUSION\n" << global_num_samples << " " << global_num_samples_bad 
+      out2 << "CONDUIT_HDF5_EXCLUSION\n" << global_num_samples << " " << global_num_samples_bad
            << " " << filenames.size() << "\n" << base_dir << "\n";
       out2.close();
 
@@ -200,7 +199,6 @@ if (j >= 400) break;
     } // if (master)
 
   } catch (std::exception const &e) {
-    finalize(comm);
     if (master) std::cerr << "caught exception: " << e.what() << "\n";
     return EXIT_FAILURE;
   } catch (...) {
@@ -209,7 +207,6 @@ if (j >= 400) break;
   }
 
   // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
 }
 
diff --git a/model_zoo/jag_utils/build_sample_id_mapping.cpp b/model_zoo/jag_utils/build_sample_id_mapping.cpp
index 8feac894855..3814ef676d1 100644
--- a/model_zoo/jag_utils/build_sample_id_mapping.cpp
+++ b/model_zoo/jag_utils/build_sample_id_mapping.cpp
@@ -20,7 +20,7 @@ using namespace lbann;
 
 int main(int argc, char **argv) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
   bool master = comm->am_world_master();
   int rank, np;
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -36,7 +36,6 @@ int main(int argc, char **argv) {
            << "assumes: the file '<base_dir>/index.txt' exists\n"
            << "output: writes the file <base_dir>/id_mapping.txt\n\n";
     }
-    finalize(comm);
     return(0);
   }
 
@@ -78,7 +77,7 @@ int main(int argc, char **argv) {
     out << filenames[j] << " ";
     ++q;
     if (q % 10 == 0) cout << rank << " :: " << q/10 << " *10 processed\n";
-    const std::string f_name(base_dir + filenames[j]); 
+    const std::string f_name(base_dir + filenames[j]);
     hid_t hdf5_file_hnd = conduit::relay::io::hdf5_open_file_for_read( f_name );
     std::vector<std::string> cnames;
     conduit::relay::io::hdf5_group_list_child_names(hdf5_file_hnd, "/", cnames);
@@ -117,5 +116,4 @@ int main(int argc, char **argv) {
     }
   }
 
-  finalize(comm);
 }
diff --git a/model_zoo/jag_utils/check_for_duplicate_samples.cpp b/model_zoo/jag_utils/check_for_duplicate_samples.cpp
index 4317382a66f..553f7aaa4be 100644
--- a/model_zoo/jag_utils/check_for_duplicate_samples.cpp
+++ b/model_zoo/jag_utils/check_for_duplicate_samples.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -47,7 +47,7 @@ void get_input_names(std::unordered_set<std::string> &s);
 //==========================================================================
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
   bool master = comm->am_world_master();
   const int rank = comm->get_rank_in_world();
   const int np = comm->get_procs_in_world();
@@ -71,7 +71,7 @@ int main(int argc, char *argv[]) {
     // read list of conduit filenames
     std::vector<std::string> files;
     const std::string fn = opts->get_string("filelist");
-    read_filelist(comm, fn, files);
+    read_filelist(comm.get(), fn, files);
 
     std::unordered_set<std::string> input_names;
     get_input_names(input_names);
@@ -141,16 +141,13 @@ int main(int argc, char *argv[]) {
     }
   } catch (exception const &e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   } catch (std::exception const &e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   }
 
   // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
 }
 
diff --git a/model_zoo/jag_utils/check_images.cpp b/model_zoo/jag_utils/check_images.cpp
index a87b9931161..29dc779fdf3 100644
--- a/model_zoo/jag_utils/check_images.cpp
+++ b/model_zoo/jag_utils/check_images.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -48,7 +48,7 @@ using namespace lbann;
 //==========================================================================
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
   bool master = comm->am_world_master();
   const int rank = comm->get_rank_in_world();
   const int np = comm->get_procs_in_world();
@@ -127,16 +127,13 @@ int main(int argc, char *argv[]) {
     }
   } catch (exception const &e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   } catch (std::exception const &e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   }
 
   // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
 }
 #endif //#ifdef LBANN_HAS_CONDUIT
diff --git a/model_zoo/jag_utils/compute_min_max_images.cpp b/model_zoo/jag_utils/compute_min_max_images.cpp
index 2bbac32057d..42167e082ac 100644
--- a/model_zoo/jag_utils/compute_min_max_images.cpp
+++ b/model_zoo/jag_utils/compute_min_max_images.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -46,7 +46,7 @@ using namespace lbann;
 //==========================================================================
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
   bool master = comm->am_world_master();
   const int rank = comm->get_rank_in_world();
   const int np = comm->get_procs_in_world();
@@ -227,16 +227,13 @@ std::cerr << rank << " :: opening for reading: " << files[j] << "\n";
 
   } catch (exception const &e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   } catch (std::exception const &e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   }
 
   // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
 }
 
diff --git a/model_zoo/jag_utils/compute_per_channel_image_avg_min_max.cpp b/model_zoo/jag_utils/compute_per_channel_image_avg_min_max.cpp
index 297e07955c0..8a5745c1a29 100644
--- a/model_zoo/jag_utils/compute_per_channel_image_avg_min_max.cpp
+++ b/model_zoo/jag_utils/compute_per_channel_image_avg_min_max.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -46,7 +46,7 @@ using namespace lbann;
 //==========================================================================
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
   bool master = comm->am_world_master();
   const int rank = comm->get_rank_in_world();
   const int np = comm->get_procs_in_world();
@@ -233,16 +233,13 @@ std::cerr << rank << " :: opening for reading: " << files[j] << "\n";
 
   } catch (exception const &e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   } catch (std::exception const &e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   }
 
   // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
 }
 
diff --git a/model_zoo/jag_utils/detect_corruption.cpp b/model_zoo/jag_utils/detect_corruption.cpp
index 65d5de77b8a..b42b67271b3 100644
--- a/model_zoo/jag_utils/detect_corruption.cpp
+++ b/model_zoo/jag_utils/detect_corruption.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -38,16 +38,20 @@
 #include <string>
 #include <sstream>
 #include "lbann/lbann.hpp"
+#include "lbann/utils/jag_utils.hpp"
 #include <time.h>
 
 using namespace lbann;
 
 void get_input_names(std::unordered_set<std::string> &s);
 void get_scalar_names(std::unordered_set<std::string> &s);
+void get_image_names(std::unordered_set<std::string> &s);
+void print_errs(world_comm_ptr &comm, int np, int rank, std::ostringstream &s, const char *msg);
+
 //==========================================================================
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
   bool master = comm->am_world_master();
   const int rank = comm->get_rank_in_world();
   const int np = comm->get_procs_in_world();
@@ -59,66 +63,61 @@ int main(int argc, char *argv[]) {
     // sanity check invocation
     if (!opts->has_string("filelist")) {
       if (master) {
-        throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: usage: " + argv[0] + " --filelist=<string>");
+        throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: usage: " + argv[0] + " --filelist=<string> \nwhere: 'filelist' is a file that contains the fully qualified filenames of the conduit *'bundle' files that are to be inspected.\nfunction: attemptsto detect and report currupt files and/or samples within those files.");
       }
     }
 
-    // master reads the filelist and bcasts to others
-    std::vector<std::string> files;
-    std::string f;
-    int size;
-    if (master) {
-      std::stringstream s;
-      std::ifstream in(opts->get_string("filelist").c_str());
-      if (!in) {
-          throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + opts->get_string("filelist") + " for reading");
-      }
-      std::string line;
-      while (getline(in, line)) {
-        if (line.size()) {
-          s << line << " ";
-          //files.push_back(line);
-        }
-      }
-      in.close();
-      f = s.str();
-      size = s.str().size();
-      std::cout << "size: " << size << "\n";
-    }
-    comm->world_broadcast<int>(0, &size, 1);
-    f.resize(size);
-    comm->world_broadcast<char>(0, &f[0], size);
-
-    // unpack the filenames into a vector
-    std::stringstream s2(f);
-    std::string filename;
-    while (s2 >> filename) {
-      if (filename.size()) {
-        files.push_back(filename);
-      }
-    }
-    if (rank == 1) std::cerr << "num files: " << files.size() << "\n";
+    const std::string fn = opts->get_string("filelist");
+    std::vector<std::string> filenames;
+    read_filelist(comm.get(), fn, filenames);
 
     std::unordered_set<std::string> input_names;
     std::unordered_set<std::string> scalar_names;
+    std::unordered_set<std::string> image_names;
     get_input_names(input_names);
     get_scalar_names(scalar_names);
+    get_image_names(image_names);
+
+    if (master) {
+      std::cerr << "\nchecking the following inputs: \n";
+      for (auto t : input_names) std::cerr << t << " ";
+      std::cerr << "\n";
+      std::cerr << "\nchecking the following scalars: ";
+      for (auto t : scalar_names) std::cerr << t << " ";
+      std::cerr << "\n";
+      std::cerr << "\nchecking the following images: ";
+      for (auto t : image_names) std::cerr << t << " ";
+      std::cerr << "\n\n";
+    }
 
+    //================================================================
     // detect corruption!
+
+    //these  error conditions ar liste in the order in which they're
+    //tested. Upon failure, we call continue," i.e, no further tests
+    //are cunducted
+    std::ostringstream open_err;         //failed to open file
+    std::ostringstream children_err;     //failed to read child names
+    std::ostringstream success_flag_err; //failed to read success flag
+
+    std::ostringstream sample_err; //catch all for errors in reading inputs,
+                                  //scalars, and images
     hid_t hdf5_file_hnd;
     std::string key;
     conduit::Node n_ok;
     conduit::Node tmp;
-    size_t h = 0;
-    for (size_t j=rank; j<files.size(); j+= np) {
+    int h = 0;
+
+    // used to ensure all values are used
+    double total = 0;
+    for (size_t j=rank; j<filenames.size(); j+= np) {
       h += 1;
-      //if (h % 10 == 0) std::cout << rank << " :: processed " << h << " files\n";
+      if (h % 1 == 0 && master) std::cerr << "P_0 has processed " << h << " files\n";
 
       try {
-
-        hdf5_file_hnd = conduit::relay::io::hdf5_open_file_for_read( files[j].c_str() );
+        hdf5_file_hnd = conduit::relay::io::hdf5_open_file_for_read( filenames[j].c_str() );
       } catch (...) {
-        std::cerr << rank << " :: exception hdf5_open_file_for_read: " << files[j] << "\n";
+        open_err << filenames[j] << "\n";
         continue;
       }
 
@@ -126,18 +125,16 @@ int main(int argc, char *argv[]) {
       try {
         conduit::relay::io::hdf5_group_list_child_names(hdf5_file_hnd, "/", cnames);
       } catch (...) {
-        std::cerr << rank << " :: exception hdf5_group_list_child_names; " << files[j] << "\n";
+        children_err << filenames[j] << "\n";
         continue;
       }
-      std::cerr << rank << " :: " << files[j] << " contains " << cnames.size() << " samples\n";
 
       for (size_t i=0; i<cnames.size(); i++) {
-
         key = "/" + cnames[i] + "/performance/success";
         try {
           conduit::relay::io::hdf5_read(hdf5_file_hnd, key, n_ok);
         } catch (...) {
-          std::cerr << rank << " :: exception reading success flag: " << files[j] << "\n";
+          success_flag_err << filenames[j] << " " << cnames[i] << "\n";
           continue;
         }
 
@@ -147,9 +144,11 @@ int main(int argc, char *argv[]) {
               for (auto t : input_names) {
                 key = cnames[i] + "/inputs/" + t;
                 conduit::relay::io::hdf5_read(hdf5_file_hnd, key, tmp);
+                total += static_cast<double>(tmp.value());
               }
             } catch (...) {
-              std::cerr << rank << " :: " << "exception reading an input for sample: " << cnames[i] << " which is " << i << " of " << cnames[i] << "; "<< files[j] << "\n";
+              success_flag_err << filenames[j] << "\n";
+              sample_err << filenames[j] << " " << cnames[i] << "\n";
               continue;
             }
 
@@ -157,51 +156,56 @@ int main(int argc, char *argv[]) {
               for (auto t : scalar_names) {
                 key = cnames[i] + "/outputs/scalars/" + t;
                 conduit::relay::io::hdf5_read(hdf5_file_hnd, key, tmp);
+                total += static_cast<double>(tmp.value());
               }
             } catch (...) {
-              std::cerr << rank << " :: " << "exception reading an scalar for sample: " << cnames[i] << " which is " << i << " of " << cnames[i] << "; "<< files[j] << "\n";
-              continue;
-            }
-
-            try {
-              key = cnames[i] + "/outputs/images/(0.0, 0.0)//0.0/emi";
-              conduit::relay::io::hdf5_read(hdf5_file_hnd, key, tmp);
-            } catch (...) {
-              std::cerr << rank << " :: " << "exception reading image: (0.0, 0.0) for sample: " << cnames[i] << " which is " << i << " of " << cnames[i] << "; "<< files[j] << "\n";
-              continue;
-            }
-
-            try {
-              key = cnames[i] + "/outputs/images/(90.0, 0.0)//0.0/emi";
-              conduit::relay::io::hdf5_read(hdf5_file_hnd, key, tmp);
-            } catch (...) {
-              std::cerr << rank << " :: " << "exception reading image: (90.0, 0.0) for sample: " << cnames[i] << " which is " << i << " of " << cnames[i] << "; "<< files[j] << "\n";
+              sample_err << filenames[j] << " " << cnames[i] << "\n";
               continue;
             }
 
-
             try {
-              key = cnames[i] + "/outputs/images/(90.0, 78.0)//0.0/emi";
-              conduit::relay::io::hdf5_read(hdf5_file_hnd, key, tmp);
+              for (auto t : image_names) {
+                key = cnames[i] + "/outputs/images/" + t + "/0.0/emi";
+                conduit::relay::io::hdf5_read(hdf5_file_hnd, key, tmp);
+                conduit::float32_array emi = tmp.value();
+                const size_t image_size = emi.number_of_elements();
+                for (size_t k=0; k<image_size; k++) {
+                  total += emi[k];
+                }
+              }
             } catch (...) {
-              std::cerr << rank << " :: " << "exception reading image: (90.0, 78.0) for sample: " << cnames[i] << " which is " << i << " of " << cnames[i] << "; "<< files[j] << "\n";
+              sample_err << filenames[j] << " " << cnames[i] << "\n";
               continue;
             }
           }
         }
       }
+
+      if (master) {
+        int h2 = comm->reduce<int>(h, comm->get_world_comm());
+        double total2 = comm->reduce<double>(total, comm->get_world_comm());
+        std::cerr << "\nnum files processed: " << h2 << "\n"
+                  << "sanity check - please ignore: " << total2 << "\n\n";
+      } else {
+        comm->reduce<int>(h, 0, comm->get_world_comm());
+        comm->reduce<double>(total, 0, comm->get_world_comm());
+      }
+
+      // print erros, if any
+      print_errs(comm, np, rank, open_err, "failed to open these files (if any):");
+      print_errs(comm, np, rank, children_err, "failed to read children from these files (if any):");
+      print_errs(comm, np, rank, success_flag_err, "failed to read success flag for these samples (if any):");
+      print_errs(comm, np, rank, sample_err, "failed to read input or scalars or images for these samples (if any):");
+
   } catch (exception const &e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   } catch (std::exception const &e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   }
 
   // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
 }
 
@@ -237,4 +241,22 @@ void get_scalar_names(std::unordered_set<std::string> &s) {
   s.insert("tMINradius");
   s.insert("MINradius");
 }
+
+void get_image_names(std::unordered_set<std::string> &s) {
+  s.insert("(0.0, 0.0)");
+  s.insert("(90.0, 0.0)");
+  s.insert("(90.0, 78.0)");
+}
+
+void print_errs(world_comm_ptr &comm, int np, int rank, std::ostringstream &s, const char *msg) {
+  comm->global_barrier();
+  if (rank == 0) { std::cerr << "\n" << msg << "\n"; }
+  for (int i=0; i<np; i++) {
+    comm->global_barrier();
+    if (rank == i) {
+        std::cerr << s.str();
+    }
+  }
+  comm->global_barrier();
+}
 #endif //#ifdef LBANN_HAS_CONDUIT
diff --git a/model_zoo/jag_utils/dump_bundle.cpp b/model_zoo/jag_utils/dump_bundle.cpp
index c24f1e6804a..7191a65fa96 100644
--- a/model_zoo/jag_utils/dump_bundle.cpp
+++ b/model_zoo/jag_utils/dump_bundle.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -43,7 +43,7 @@ using namespace lbann;
 
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
   bool master = comm->am_world_master();
   int np = comm->get_procs_in_world();
   if (np != 1 || argc == 1) {
@@ -52,14 +52,12 @@ int main(int argc, char *argv[]) {
                 << "usage: " << argv[0] << " conduit_bundle_filename\n"
                 << "function: dumps the conduit file to cout\n";
     }
-    finalize(comm);
   }
 
   conduit::Node node;
   conduit::relay::io::load(argv[1], "hdf5", node);
   node.print();
 
-  finalize(comm);
   return EXIT_SUCCESS;
 }
 
diff --git a/model_zoo/jag_utils/extract_random_samples.cpp b/model_zoo/jag_utils/extract_random_samples.cpp
index b7fb5ecd35b..7937183aa68 100644
--- a/model_zoo/jag_utils/extract_random_samples.cpp
+++ b/model_zoo/jag_utils/extract_random_samples.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -84,7 +84,7 @@ void print_sample_ids(
 //==========================================================================
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
   bool master = comm->am_world_master();
   const int rank = comm->get_rank_in_world();
   const int np = comm->get_procs_in_world();
@@ -96,7 +96,6 @@ int main(int argc, char *argv[]) {
       if (master) {
         std::cout << usage();
       }
-      finalize(comm);
       return EXIT_SUCCESS;
     }
 
@@ -149,21 +148,18 @@ int main(int argc, char *argv[]) {
     build_sample_mapping(conduit_filenames, indices, samples);
     num_files = samples.size();
 
-    extract_samples(comm, rank, np, conduit_filenames, samples);
+    extract_samples(comm.get(), rank, np, conduit_filenames, samples);
 
   } catch (exception& e) {
     std::cerr << "\n\n" << rank << " ::::: caught exception, outer try/catch: " << e.what() << "\n\n";
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   } catch (std::exception& e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   }
 
   // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
 }
 
diff --git a/model_zoo/jag_utils/generate_corrupt_samples.cpp b/model_zoo/jag_utils/generate_corrupt_samples.cpp
new file mode 100644
index 00000000000..3a2181ea6d8
--- /dev/null
+++ b/model_zoo/jag_utils/generate_corrupt_samples.cpp
@@ -0,0 +1,164 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann_config.hpp"
+
+#ifdef LBANN_HAS_CONDUIT
+
+#include "conduit/conduit.hpp"
+#include "conduit/conduit_relay_io_handle.hpp"
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <string>
+#include <sstream>
+#include "lbann/lbann.hpp"
+#include "lbann/utils/jag_utils.hpp"
+
+using namespace lbann;
+
+int main(int argc, char *argv[]) {
+  int random_seed = lbann_default_random_seed;
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
+  bool master = comm->am_world_master();
+  const int rank = comm->get_rank_in_world();
+  const int np = comm->get_procs_in_world();
+
+  // check that we're running with a single CPU
+  if (np != 1) {
+    LBANN_ERROR("apologies, this is a sequential code; please run with a single processor. Thanks for playing!");
+  }
+
+  std::stringstream err;
+  options *opts = options::get();
+  opts->init(argc, argv);
+
+  // sanity check invocation
+  if (!opts->has_string("filelist")) {
+    if (master) {
+      err << " :: usage: " << argv[0] << " --filelist=<string>\n"
+          << "WARNING: this driver deletes the directory 'corrupt_jag_samples' if it exists "
+          << "then creates a new directory with that name";
+      LBANN_ERROR(err.str());
+    }
+  }
+
+  // read list of conduit filenames
+  std::vector<std::string> files;
+  const std::string fn = opts->get_string("filelist");
+  read_filelist(comm.get(), fn, files);
+
+  int ee = system("rm -rf corrupt_jag_samples");
+  ee = system("mkdir corrupt_jag_samples");
+  if (ee) {
+    LBANN_ERROR("system call: 'mkdir corrupt_jag_samples' failed");
+  }
+
+  std::ofstream out("corrupt_jag_samples/README.txt");
+  if (! out) {
+    LBANN_ERROR("failed to open corrupt_jag_samples/README.txt for reading");
+  }
+  out << "#This file contains information for the samples in the file: 'corrupt.bundle'\n";
+
+  conduit::relay::io::IOHandle hndl;
+  std::string key;
+  conduit::Node node;
+  conduit::Node output;
+  for (size_t j=rank; j<files.size(); ++j) {
+    std::cerr << "processing: " << j << " of " << files.size() << " files\n";
+
+    // open the next conduit file
+    try {
+      hndl.open(files[j], "hdf5");
+    } catch (...) {
+      err << "failed to open: " << files[j];
+      LBANN_ERROR(err.str());
+    }
+
+    // get list of samples in this file
+    std::vector<std::string> cnames;
+    try {
+      hndl.list_child_names(cnames);
+    } catch (std::exception e) {
+      err << "list_child_names failed for this file: " << files[j];
+      LBANN_ERROR(err.str());
+    }
+
+    // loop over the samples in the current file
+    for (size_t i=0; i<cnames.size(); i++) {
+      try {
+        hndl.read(cnames[i], node);
+      } catch (...) {
+        err << "exception reading from file: " + files[j]<< " this key: " << key;
+        LBANN_ERROR(err.str());
+      }
+
+      if (i < 1) {
+        output[cnames[i]] = node;
+        out << cnames[i] << " no corruption\n";
+      } else if (i == 1) {
+        out << cnames[i] << " missing inputs\n";
+        node.remove("inputs");
+        output[cnames[i]] = node;
+      } else if (i == 2) {
+        out << cnames[i] << " missing outputs/scalars\n";
+        node.remove("outputs/scalars");
+        output[cnames[i]] = node;
+      } else if (i == 3) {
+        out << cnames[i] << " missing ouputs/images\n";
+        node.remove("outputs/images");
+        output[cnames[i]] = node;
+      } else if (i == 4) {
+        out << cnames[i] << " missing outputs\n";
+        node.remove("outputs");
+        output[cnames[i]] = node;
+      } else if (i == 5) {
+        out << cnames[i] << " missing outputs/images/(90.0, 0.0)/0.0/emi\n";
+        node.remove("outputs/images/(90.0, 0.0)/0.0/emi");
+        output[cnames[i]] = node;
+      } else if (i == 6) {
+        out << cnames[i] << " missing outputs/scalars/MAXpressure";
+        node.remove("outputs/scalars/MAXpressure");
+        output[cnames[i]] = node;
+      } else {
+        break;
+      }
+    }
+  }
+  std::stringstream output_fn;
+  output_fn << "corrupt_jag_samples/corrupt.bundle";
+  try {
+    conduit::relay::io::save(output, output_fn.str(), "hdf5");
+  } catch (...) {
+    err << "failed to write " << output_fn.str();
+    LBANN_ERROR(err.str());
+  }
+
+  out.close();
+  std::cout << "\nMade directory 'corrupt_jag_samples/' and wrote files in that directory\n\n";
+}
+#endif //#ifdef LBANN_HAS_CONDUIT
diff --git a/model_zoo/jag_utils/load_balance.cpp b/model_zoo/jag_utils/load_balance.cpp
index d84669e3f0d..5a3403bae68 100644
--- a/model_zoo/jag_utils/load_balance.cpp
+++ b/model_zoo/jag_utils/load_balance.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -44,7 +44,7 @@ using namespace lbann;
 
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
   bool master = comm->am_world_master();
   const int rank = comm->get_rank_in_world();
   const int np = comm->get_procs_in_world();
@@ -206,12 +206,10 @@ int main(int argc, char *argv[]) {
 
   } catch (std::exception const &e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   }
 
   // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
 }
 
diff --git a/model_zoo/jag_utils/load_bundle2raw.cpp b/model_zoo/jag_utils/load_bundle2raw.cpp
index f2e4b7ee880..703a39edafd 100644
--- a/model_zoo/jag_utils/load_bundle2raw.cpp
+++ b/model_zoo/jag_utils/load_bundle2raw.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -51,7 +51,7 @@ void get_input_names(std::vector<std::string> &s);
 //==========================================================================
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
   bool master = comm->am_world_master();
   const int rank = comm->get_rank_in_world();
   const int np = comm->get_procs_in_world();
@@ -79,7 +79,7 @@ int main(int argc, char *argv[]) {
 
     std::vector<std::string> files;
     const std::string fn = opts->get_string("filelist");
-    read_filelist(comm, fn, files);
+    read_filelist(comm.get(), fn, files);
 
     std::vector<std::string> scalar_names;
     std::vector<std::string> input_names;
@@ -212,16 +212,13 @@ std::cerr << rank << " :: num samples: " << cnames.size() << "\n";
 
   } catch (exception const &e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   } catch (std::exception const &e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   }
 
   // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
 }
 
diff --git a/model_zoo/jag_utils/select_samples.cpp b/model_zoo/jag_utils/select_samples.cpp
index b56f55e2ce0..527ab8be1b2 100644
--- a/model_zoo/jag_utils/select_samples.cpp
+++ b/model_zoo/jag_utils/select_samples.cpp
@@ -21,7 +21,7 @@ using namespace lbann;
 
 int main(int argc, char **argv) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
   bool master = comm->am_world_master();
   int rank, np;
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -39,7 +39,7 @@ int main(int argc, char **argv) {
   std::stringstream err;
 
   // sanity check the cmd line
-  if (! (opts->has_string("index_fn") && opts->has_string("sample_mapping_fn") 
+  if (! (opts->has_string("index_fn") && opts->has_string("sample_mapping_fn")
          && opts->has_int("num_samples") && opts->has_int("random_seed")
          && opts->has_string("output_fn"))) {
     if (master) {
@@ -92,7 +92,7 @@ int main(int argc, char **argv) {
 
   //==========================================================================
   // master builds two maps: <string, set<int>> maps a filename to the
-  // set of indices (not sample_ids; that comes later!) that are to be 
+  // set of indices (not sample_ids; that comes later!) that are to be
   // included and excluded
   if (master) {
 
@@ -114,7 +114,7 @@ int main(int argc, char **argv) {
 
     int num_valid, num_invalid, num_files;
     in >> num_valid >> num_invalid >> num_files;
-    getline(in, line);  //discard newline 
+    getline(in, line);  //discard newline
     string base_dir;
     getline(in, base_dir);
     cerr << "input index file contains " << num_valid << " valid samples\n";
@@ -130,7 +130,7 @@ int main(int argc, char **argv) {
         break;
       }
     }
-  
+
     // loop over each entry from in input index file; determine which, if any,
     // local indices will be added to the INCLUSION index
     int first = 0;
@@ -281,10 +281,9 @@ int main(int argc, char **argv) {
       }
     }
 
-    out << total_good << " " << total_bad << " " << num_include_files 
+    out << total_good << " " << total_bad << " " << num_include_files
             << "\n" << base_dir << "\n" << sout.str();
   }
 
-
-  finalize(comm);
+  return EXIT_SUCCESS;
 }
diff --git a/model_zoo/jag_utils/test_conduit_hdf5.cpp b/model_zoo/jag_utils/test_conduit_hdf5.cpp
index 1a9cba3175a..3c2d6955bfa 100644
--- a/model_zoo/jag_utils/test_conduit_hdf5.cpp
+++ b/model_zoo/jag_utils/test_conduit_hdf5.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -49,7 +49,7 @@ void get_image_names(std::unordered_set<std::string> &s);
 //==========================================================================
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
   bool master = comm->am_world_master();
   const int np = comm->get_procs_in_world();
 
@@ -119,7 +119,6 @@ int main(int argc, char *argv[]) {
       }
     }
 
-  finalize(comm);
   return 0;
 }
 
diff --git a/model_zoo/jag_utils/test_conduit_with_mpi.cpp b/model_zoo/jag_utils/test_conduit_with_mpi.cpp
index f8e181557fc..bb86c0708f9 100644
--- a/model_zoo/jag_utils/test_conduit_with_mpi.cpp
+++ b/model_zoo/jag_utils/test_conduit_with_mpi.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -45,7 +45,7 @@ using namespace lbann;
 int main(int argc, char *argv[]) {
 
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
   bool master = comm->am_world_master();
   int np = comm->get_procs_in_world();
 
@@ -80,7 +80,6 @@ int main(int argc, char *argv[]) {
 
   } catch (std::exception& e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   } catch (...) {
     std::cerr << "unknown exception in main\n";
@@ -88,7 +87,6 @@ int main(int argc, char *argv[]) {
   }
 
   // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
 
 #endif //if 0
diff --git a/model_zoo/jag_utils/test_mpi.cpp b/model_zoo/jag_utils/test_mpi.cpp
index e9edd7c45e4..92a78ef4f1d 100644
--- a/model_zoo/jag_utils/test_mpi.cpp
+++ b/model_zoo/jag_utils/test_mpi.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -47,7 +47,7 @@ int main(int argc, char *argv[]) {
 #if 0
 
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
   bool master = comm->am_world_master();
   int np = comm->get_procs_in_world();
 
@@ -82,7 +82,6 @@ int main(int argc, char *argv[]) {
 
   } catch (std::exception& e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   } catch (...) {
     std::cerr << "unknown exception in main\n";
@@ -90,7 +89,6 @@ int main(int argc, char *argv[]) {
   }
 
   // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
 
 #endif //if 0
diff --git a/model_zoo/lbann.cpp b/model_zoo/lbann.cpp
index 5a4d0741bc4..868b3e1d362 100644
--- a/model_zoo/lbann.cpp
+++ b/model_zoo/lbann.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -29,7 +29,7 @@
 #include "lbann/lbann.hpp"
 #include "lbann/proto/proto_common.hpp"
 #include "lbann/utils/protobuf_utils.hpp"
-#include "lbann/data_store/generic_data_store.hpp"
+#include "lbann/data_store/data_store_conduit.hpp"
 #include <cstdlib>
 
 
@@ -37,8 +37,8 @@ using namespace lbann;
 
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
-  bool master = comm->am_world_master();
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
+  const bool master = comm->am_world_master();
 
   if (master) {
     std::cout << "\n\n==============================================================\n"
@@ -49,23 +49,18 @@ int main(int argc, char *argv[]) {
     std::cout << std::endl << std::endl;
   }
 
-#ifdef EL_USE_CUBLAS
-  El::GemmUseGPU(32,32,32);
-#endif
-
   try {
     // Initialize options db (this parses the command line)
     options *opts = options::get();
     opts->init(argc, argv);
     if (opts->has_string("h") or opts->has_string("help") or argc == 1) {
-      print_help(comm);
-      finalize(comm);
-      return 0;
+      print_help(*comm);
+      return EXIT_SUCCESS;
     }
 
     //this must be called after call to opts->init();
-    if (!opts->has_bool("disable_signal_handler")) {
-      std::string file_base = (opts->has_bool("stack_trace_to_file") ?
+    if (!opts->get_bool("disable_signal_handler")) {
+      std::string file_base = (opts->get_bool("stack_trace_to_file") ?
                                "stack_trace" : "");
       stack_trace::register_signal_handler(file_base);
     }
@@ -74,18 +69,21 @@ int main(int argc, char *argv[]) {
     stack_profiler::get()->activate(comm->get_rank_in_world());
 
     // Initalize a global I/O thread pool
-    std::shared_ptr<thread_pool> io_thread_pool = construct_io_thread_pool(comm);
+    std::shared_ptr<thread_pool> io_thread_pool = construct_io_thread_pool(comm.get());
 
-    std::vector<lbann_data::LbannPB *> pbs;
-    protobuf_utils::load_prototext(master, argc, argv, pbs);
+    auto pbs = protobuf_utils::load_prototext(master, argc, argv);
     lbann_data::LbannPB pb = *(pbs[0]);
 
     lbann_data::Model *pb_model = pb.mutable_model();
 
-    model *model = build_model_from_prototext(argc, argv, pb,
-                                              comm, io_thread_pool, true);
+    auto model = build_model_from_prototext(argc, argv, pb,
+                                            comm.get(), io_thread_pool, true);
+
+    if (opts->has_string("create_tarball")) {
+      return EXIT_SUCCESS;
+    }
 
-    if (! (opts->has_bool("exit_after_setup") && opts->get_bool("exit_after_setup"))) {
+    if (! opts->get_bool("exit_after_setup")) {
 
       // Train model
       model->train(pb_model->num_epochs());
@@ -109,30 +107,23 @@ int main(int argc, char *argv[]) {
       stack_profiler::get()->print();
     }
 
-    // @todo: figure out and implement coherent strategy
-    // for freeing dynamically allocated memory
-    delete model;
-
   } catch (exception& e) {
-    if (options::get()->has_bool("stack_trace_to_file")) {
-      std::stringstream ss("stack_trace");
+    if (options::get()->get_bool("stack_trace_to_file")) {
+      std::ostringstream ss("stack_trace");
       const auto& rank = get_rank_in_world();
-      if (rank >= 0) { ss << "_rank" << rank; }
+      if (rank >= 0) {
+        ss << "_rank" << rank;
+      }
       ss << ".txt";
-      std::ofstream fs(ss.str().c_str());
+      std::ofstream fs(ss.str());
       e.print_report(fs);
     }
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   } catch (std::exception& e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   }
 
-  // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
-
 }
diff --git a/model_zoo/lbann2.cpp b/model_zoo/lbann2.cpp
index eaa61fbf3ca..b72ddd2a38f 100644
--- a/model_zoo/lbann2.cpp
+++ b/model_zoo/lbann2.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -35,44 +35,40 @@ using namespace lbann;
 
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
-  bool master = comm->am_world_master();
-
-#ifdef EL_USE_CUBLAS
-  El::GemmUseGPU(32,32,32);
-#endif
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
+  const bool master = comm->am_world_master();
 
   try {
     // Initialize options db (this parses the command line)
     options *opts = options::get();
     opts->init(argc, argv);
     if (opts->has_string("h") or opts->has_string("help") or argc == 1) {
-      print_help(comm);
-      finalize(comm);
-      return 0;
+      print_help(*comm);
+      return EXIT_SUCCESS;
     }
 
-    std::stringstream err;
+    std::ostringstream err;
 
     // Initalize a global I/O thread pool
-    std::shared_ptr<thread_pool> io_thread_pool = construct_io_thread_pool(comm);
+    std::shared_ptr<thread_pool> io_thread_pool = construct_io_thread_pool(comm.get());
 
-    std::vector<lbann_data::LbannPB *> pbs;
-    protobuf_utils::load_prototext(master, argc, argv, pbs);
+    auto pbs = protobuf_utils::load_prototext(master, argc, argv);
 
-    model *model_1 = build_model_from_prototext(argc, argv, *(pbs[0]),
-                                                comm, io_thread_pool, true);
-    model *model_2 = nullptr;
+    auto model_1 = build_model_from_prototext(argc, argv, *(pbs[0]),
+                                                comm.get(), io_thread_pool, true);
+    std::unique_ptr<model> model_2;
     if (pbs.size() > 1) {
       model_2 = build_model_from_prototext(argc, argv, *(pbs[1]),
-                                           comm, io_thread_pool, false);
+                                           comm.get(), io_thread_pool, false);
     }
     // Load layer weights from checkpoint if checkpoint directory given
     if(opts->has_string("ckpt_dir")){
-      lbann_callback_save_model::load_model_weights(opts->get_string("ckpt_dir"), model_1);
+      lbann_callback_save_model::load_model_weights(opts->get_string("ckpt_dir"), model_1.get());
     }
     // Train model
-    if (master)  std::cerr << "\nSTARTING train - model 1\n\n";
+    if (master) {
+      std::cerr << "\nSTARTING train - model 1\n\n";
+    }
     const lbann_data::Model pb_model = pbs[0]->model();
 
     // When using checkpoint states, skip training as those could be the result
@@ -91,34 +87,29 @@ int main(int argc, char *argv[]) {
       for(size_t l2=0; l2 < layers2.size(); l2++) {
         for(size_t l1=0; l1 < layers1.size(); l1++) {
            if(layers2[l2]->get_name() == layers1[l1]->get_name()){
-             if(master) std::cout << "Model 1 Layer " << layers1[l1]->get_name();
+             if(master) {
+               std::cout << "Model 1 Layer " << layers1[l1]->get_name();
+             }
              layers2[l2]->replace_weights(layers1[l1]);
-             if(master) std::cout << " copied to Model2 Layer " << std::endl;
+             if(master) {
+               std::cout << " copied to Model2 Layer " << std::endl;
+             }
            }
          }
        }
 
-      if (master) std::cerr << "\n STARTING train - model 2\n\n";
+      if (master) {
+        std::cerr << "\n STARTING train - model 2\n\n";
+      }
       const lbann_data::Model pb_model_2 = pbs[1]->model();
       model_2->train( pb_model_2.num_epochs() );
       model_2->evaluate(execution_mode::testing);
     }
 
-    delete model_1;
-    if (model_2 != nullptr) {
-      delete model_2;
-    }
-    for (auto t : pbs) {
-      delete t;
-    }
-
   } catch (std::exception& e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   }
 
-  // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
 }
diff --git a/model_zoo/lbann_aecycgan.cpp b/model_zoo/lbann_aecycgan.cpp
index 43b6e93176d..699b779117a 100644
--- a/model_zoo/lbann_aecycgan.cpp
+++ b/model_zoo/lbann_aecycgan.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -35,45 +35,40 @@ using namespace lbann;
 
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
-  bool master = comm->am_world_master();
-
-#ifdef EL_USE_CUBLAS
-  El::GemmUseGPU(32,32,32);
-#endif
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
+  const bool master = comm->am_world_master();
 
   try {
     // Initialize options db (this parses the command line)
     options *opts = options::get();
     opts->init(argc, argv);
     if (opts->has_string("h") or opts->has_string("help") or argc == 1) {
-      print_help(comm);
-      finalize(comm);
-      return 0;
+      print_help(*comm);
+      return EXIT_SUCCESS;
     }
 
-    std::stringstream err;
+    std::ostringstream err;
 
     // Initalize a global I/O thread pool
-    std::shared_ptr<thread_pool> io_thread_pool = construct_io_thread_pool(comm);
+    std::shared_ptr<thread_pool> io_thread_pool = construct_io_thread_pool(comm.get());
 
-    std::vector<lbann_data::LbannPB *> pbs;
-    protobuf_utils::load_prototext(master, argc, argv, pbs);
+    auto pbs = protobuf_utils::load_prototext(master, argc, argv);
 
-    model *model_1 = build_model_from_prototext(argc, argv, *(pbs[0]),
-                                                comm, io_thread_pool, true); //ae
-    model *model_2 = nullptr; //cycgan
-    model *model_3 = nullptr; //ae+cycgan
+    auto model_1 = build_model_from_prototext(argc, argv, *(pbs[0]),
+                                                comm.get(), io_thread_pool, true); //ae
+    std::unique_ptr<model>
+      model_2, //cycgan
+      model_3; //ae+cycgan
 
 
     if (pbs.size() > 1) {
       model_2 = build_model_from_prototext(argc, argv, *(pbs[1]),
-                                           comm, io_thread_pool, false);
+                                           comm.get(), io_thread_pool, false);
     }
 
     if (pbs.size() > 2) {
       model_3 = build_model_from_prototext(argc, argv, *(pbs[2]),
-                                           comm, io_thread_pool, false);
+                                           comm.get(), io_thread_pool, false);
     }
 
 
@@ -97,27 +92,15 @@ int main(int argc, char *argv[]) {
     //Evaluate on pretrained autoencoder
     if(master) std::cout << " Copy trained weights from cycle GAN" << std::endl;
     model_3->copy_trained_weights_from(model2_weights);
-    if(master) std::cout << " Evaluate pretrained autoencoder" << std::endl;
+    if(master) std::cout << " Save AE + cycleGAN" << std::endl;
+    model_3->save_model();
+    if(master) std::cout << " Evaluate cycleGAN model on pretrained autoencoder" << std::endl;
     model_3->evaluate(execution_mode::testing);
 
-    delete model_1;
-    if (model_2 != nullptr) {
-      delete model_2;
-    }
-    if (model_3 != nullptr) {
-      delete model_3;
-    }
-    for (auto t : pbs) {
-      delete t;
-    }
-
   } catch (std::exception& e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   }
 
-  // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
 }
diff --git a/model_zoo/lbann_cycgan.cpp b/model_zoo/lbann_cycgan.cpp
index 54d7fe60c23..3093075aa7e 100644
--- a/model_zoo/lbann_cycgan.cpp
+++ b/model_zoo/lbann_cycgan.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -35,8 +35,8 @@ using namespace lbann;
 
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
-  bool master = comm->am_world_master();
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
+  const bool master = comm->am_world_master();
 
   if (master) {
     std::cout << "\n\n==============================================================\n"
@@ -47,23 +47,18 @@ int main(int argc, char *argv[]) {
     std::cout << std::endl << std::endl;
   }
 
-#ifdef EL_USE_CUBLAS
-  El::GemmUseGPU(32,32,32);
-#endif
-
   try {
     // Initialize options db (this parses the command line)
     options *opts = options::get();
     opts->init(argc, argv);
     if (opts->has_string("h") or opts->has_string("help") or argc == 1) {
-      print_help(comm);
-      finalize(comm);
-      return 0;
+      print_help(*comm);
+      return EXIT_SUCCESS;
     }
 
     //this must be called after call to opts->init();
-    if (!opts->has_bool("disable_signal_handler")) {
-      std::string file_base = (opts->has_bool("stack_trace_to_file") ?
+    if (!opts->get_bool("disable_signal_handler")) {
+      std::string file_base = (opts->get_bool("stack_trace_to_file") ?
                                "stack_trace" : "");
       stack_trace::register_signal_handler(file_base);
     }
@@ -71,42 +66,41 @@ int main(int argc, char *argv[]) {
     //to activate, must specify --st_on on cmd line
     stack_profiler::get()->activate(comm->get_rank_in_world());
 
-    std::stringstream err;
+    std::ostringstream err;
 
     // Initalize a global I/O thread pool
-    std::shared_ptr<thread_pool> io_thread_pool = construct_io_thread_pool(comm);
+    std::shared_ptr<thread_pool> io_thread_pool = construct_io_thread_pool(comm.get());
 
-    std::vector<lbann_data::LbannPB *> pbs;
-    protobuf_utils::load_prototext(master, argc, argv, pbs);
+    auto pbs = protobuf_utils::load_prototext(master, argc, argv);
 
-    model *model_1 = build_model_from_prototext(argc, argv, *(pbs[0]),
-                                                comm, io_thread_pool, true); //D1 solver
+    auto model_1 = build_model_from_prototext(argc, argv, *(pbs[0]),
+                                              comm.get(), io_thread_pool, true); //D1 solver
     //hack, overide model name to make reporting easy, what can break?"
-    model *model_2 = nullptr; //G1 solver
-    model *model_3 = nullptr; //G2 solver
+    std::unique_ptr<model> model_2, //G1 solver
+      model_3, //G2 solver
 
-    //Support for autoencoder models
-    model *ae_model = nullptr;
-    model *ae_cycgan_model = nullptr; //contain layer(s) from (cyc)GAN
+      //Support for autoencoder models
+      ae_model,
+      ae_cycgan_model; //contain layer(s) from (cyc)GAN
 
     if (pbs.size() > 1) {
       model_2 = build_model_from_prototext(argc, argv, *(pbs[1]),
-                                           comm, io_thread_pool, false);
+                                           comm.get(), io_thread_pool, false);
     }
 
     if (pbs.size() > 2) {
       model_3 = build_model_from_prototext(argc, argv, *(pbs[2]),
-                                           comm, io_thread_pool, false);
+                                           comm.get(), io_thread_pool, false);
     }
 
     if (pbs.size() > 3) {
       ae_model = build_model_from_prototext(argc, argv, *(pbs[3]),
-                                           comm, io_thread_pool, false);
+                                           comm.get(), io_thread_pool, false);
     }
 
     if (pbs.size() > 4) {
       ae_cycgan_model = build_model_from_prototext(argc, argv, *(pbs[4]),
-                                           comm, io_thread_pool, false);
+                                           comm.get(), io_thread_pool, false);
     }
 
     const lbann_data::Model pb_model = pbs[0]->model();
@@ -178,30 +172,10 @@ int main(int argc, char *argv[]) {
     //has no affect unless option: --st_on was given
     stack_profiler::get()->print();
 
-    delete model_1;
-    if (model_2 != nullptr) {
-      delete model_2;
-    }
-    if (model_3 != nullptr) {
-      delete model_3;
-    }
-    if (ae_model != nullptr) {
-      delete ae_model;
-    }
-    if (ae_cycgan_model != nullptr) {
-      delete ae_cycgan_model;
-    }
-    for (auto t : pbs) {
-      delete t;
-    }
-
   } catch (std::exception& e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   }
 
-  // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
 }
diff --git a/model_zoo/lbann_gan.cpp b/model_zoo/lbann_gan.cpp
index a3632c76231..2db1b291bf6 100644
--- a/model_zoo/lbann_gan.cpp
+++ b/model_zoo/lbann_gan.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -35,36 +35,30 @@ using namespace lbann;
 
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
-  bool master = comm->am_world_master();
-
-#ifdef EL_USE_CUBLAS
-  El::GemmUseGPU(32,32,32);
-#endif
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
+  const bool master = comm->am_world_master();
 
   try {
     // Initialize options db (this parses the command line)
     options *opts = options::get();
     opts->init(argc, argv);
     if (opts->has_string("h") or opts->has_string("help") or argc == 1) {
-      print_help(comm);
-      finalize(comm);
-      return 0;
+      print_help(*comm);
+      return EXIT_SUCCESS;
     }
 
-    std::stringstream err;
+    std::ostringstream err;
 
     // Initalize a global I/O thread pool
-    std::shared_ptr<thread_pool> io_thread_pool = construct_io_thread_pool(comm);
+    std::shared_ptr<thread_pool> io_thread_pool = construct_io_thread_pool(comm.get());
 
-    std::vector<lbann_data::LbannPB *> pbs;
-    protobuf_utils::load_prototext(master, argc, argv, pbs);
+    auto pbs = protobuf_utils::load_prototext(master, argc, argv);
 
-    model *model_1 = build_model_from_prototext(argc, argv, *(pbs[0]), comm, io_thread_pool, true); //discriminator
+    auto model_1 = build_model_from_prototext(argc, argv, *(pbs[0]), comm.get(), io_thread_pool, true); //discriminator
                                                                                     //model
-    model *model_2 = nullptr; //adversarial model
+    std::unique_ptr<model> model_2 = nullptr; //adversarial model
     if (pbs.size() > 1) {
-      model_2 = build_model_from_prototext(argc, argv, *(pbs[1]), comm, io_thread_pool, false);
+      model_2 = build_model_from_prototext(argc, argv, *(pbs[1]), comm.get(), io_thread_pool, false);
     }
 
     const lbann_data::Model pb_model = pbs[0]->model();
@@ -103,23 +97,10 @@ int main(int argc, char *argv[]) {
       super_step++;
     }
 
-
-
-    delete model_1;
-    if (model_2 != nullptr) {
-      delete model_2;
-    }
-    for (auto t : pbs) {
-      delete t;
-    }
-
   } catch (std::exception& e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   }
 
-  // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
 }
diff --git a/include/lbann/data_store/data_store_imagenet_patches.hpp b/model_zoo/lbann_help.cpp
similarity index 55%
rename from include/lbann/data_store/data_store_imagenet_patches.hpp
rename to model_zoo/lbann_help.cpp
index 238448d44ec..a297877a36a 100644
--- a/include/lbann/data_store/data_store_imagenet_patches.hpp
+++ b/model_zoo/lbann_help.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,42 +23,16 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 //
+// lbann_proto.cpp - prototext application
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef __DATA_STORE_IMAGENET_PATCHES_HPP__
-#define __DATA_STORE_IMAGENET_PATCHES_HPP__
+#include <lbann/proto/proto_common.hpp>
 
-#include "lbann/data_store/data_store_imagenet.hpp"
+#include <iostream>
 
-namespace lbann {
+using namespace lbann;
 
-/**
- * todo
- */
-
-class data_store_imagenet_patches : public data_store_imagenet {
- public:
-
-  //! ctor
-  data_store_imagenet_patches(generic_data_reader *reader, model *m) :
-    data_store_imagenet(reader, m) {}
-
-  //! copy ctor
-  data_store_imagenet_patches(const data_store_imagenet_patches&) = default;
-
-  //! operator=
-  data_store_imagenet_patches& operator=(const data_store_imagenet_patches&) = default;
-
-  data_store_imagenet_patches * copy() const override { return new data_store_imagenet_patches(*this); }
-
-  //! dtor
-  ~data_store_imagenet_patches() override {};
-
-  void setup() override;
-
- protected :
-};
-
-}  // namespace lbann
-
-#endif  // __DATA_STORE_IMAGENET_PATCHES_HPP__
+int main(int, char **) {
+  print_help(std::cerr);
+  return EXIT_SUCCESS;
+}
diff --git a/model_zoo/lbann_inf.cpp b/model_zoo/lbann_inf.cpp
index b1f439a54a9..252e944e256 100644
--- a/model_zoo/lbann_inf.cpp
+++ b/model_zoo/lbann_inf.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -35,36 +35,36 @@ using namespace lbann;
 
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
-  bool master = comm->am_world_master();
+  auto comm = initialize(argc, argv, random_seed);
+  const bool master = comm->am_world_master();
 
   try {
     // Initialize options db (this parses the command line)
     options *opts = options::get();
     opts->init(argc, argv);
     if (opts->has_string("h") or opts->has_string("help") or argc == 1) {
-      print_help(comm);
-      finalize(comm);
-      return 0;
+      print_help(*comm);
+      return EXIT_SUCCESS;
     }
 
-    std::stringstream err;
+    std::ostringstream err;
 
     // Initalize a global I/O thread pool
-    std::shared_ptr<thread_pool> io_thread_pool = construct_io_thread_pool(comm);
+    std::shared_ptr<thread_pool> io_thread_pool
+      = construct_io_thread_pool(comm.get());
 
-    std::vector<lbann_data::LbannPB *> pbs;
-    protobuf_utils::load_prototext(master, argc, argv, pbs);
-    std::vector<model*> models;
-    for(auto pb_model : pbs) {
-      models.emplace_back(build_model_from_prototext(argc, argv, *pb_model,
-                                                     comm, io_thread_pool, models.size() == 0));
+    auto pbs = protobuf_utils::load_prototext(master, argc, argv);
+    std::vector<std::unique_ptr<model>> models;
+    for(auto&& pb_model : pbs) {
+      models.emplace_back(
+        build_model_from_prototext(argc, argv, *pb_model,
+                                   comm.get(), io_thread_pool, models.size() == 0));
     }
 
     // Load layer weights from checkpoint if checkpoint directory given
     if(opts->has_string("ckpt_dir")){
-      for(auto m : models) {
-        lbann_callback_save_model::load_model_weights(opts->get_string("ckpt_dir"), m);
+      for(auto&& m : models) {
+        lbann_callback_save_model::load_model_weights(opts->get_string("ckpt_dir"), m.get());
       }
     }else {
       LBANN_ERROR("Unable to reload model");
@@ -74,27 +74,15 @@ int main(int argc, char *argv[]) {
     /// Enable shared testing data readers on the command line via --share_testing_data_readers=1
     El::Int num_samples = models[0]->get_num_iterations_per_epoch(execution_mode::testing);
     for(El::Int s = 0; s < num_samples; s++) {
-      for(auto m : models) {
+      for(auto&& m : models) {
         m->evaluate(execution_mode::testing, 1);
       }
     }
 
-    for(auto m : models) {
-      delete m;
-    }
-
-    for (auto t : pbs) {
-      delete t;
-    }
-
   } catch (std::exception& e) {
     El::ReportException(e);
-    finalize(comm);
     return EXIT_FAILURE;
   }
 
-  // Clean up
-  finalize(comm);
   return EXIT_SUCCESS;
-
 }
diff --git a/model_zoo/models/alexnet/model_alexnet.prototext b/model_zoo/models/alexnet/model_alexnet.prototext
index b3cd4dc1a23..58e8edfb22d 100644
--- a/model_zoo/models/alexnet/model_alexnet.prototext
+++ b/model_zoo/models/alexnet/model_alexnet.prototext
@@ -65,9 +65,7 @@ model {
     name: "data"
     children: "image label"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     name: "image"
diff --git a/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_ecfp.prototext b/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_ecfp.prototext
index 4faacb06bc4..6e40b9f3328 100644
--- a/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_ecfp.prototext
+++ b/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_ecfp.prototext
@@ -51,22 +51,21 @@ model {
   layer {
     name: "input"
     children: "data dummy"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     input {
-      io_buffer: "distributed"
       target_mode: "reconstruction"
     }
   }
   layer {
     parents: "input"
     name: "data"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     split {}
   }
   layer {
     parents: "input"
     name: "dummy"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     dummy {}
   }
 
diff --git a/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_ecfp_200x150x100x100x100.prototext b/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_ecfp_200x150x100x100x100.prototext
index d81eb16d276..c25231de172 100644
--- a/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_ecfp_200x150x100x100x100.prototext
+++ b/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_ecfp_200x150x100x100x100.prototext
@@ -51,22 +51,21 @@ model {
   layer {
     name: "input"
     children: "data dummy"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     input {
-      io_buffer: "distributed"
       target_mode: "reconstruction"
     }
   }
   layer {
     parents: "input"
     name: "data"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     split {}
   }
   layer {
     parents: "input"
     name: "dummy"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     dummy {}
   }
 
diff --git a/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_ecfp_500x250x100.prototext b/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_ecfp_500x250x100.prototext
index 3917087b639..576d5a3c402 100644
--- a/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_ecfp_500x250x100.prototext
+++ b/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_ecfp_500x250x100.prototext
@@ -51,21 +51,19 @@ model {
   layer {
     name: "input"
     children: "data dummy"
-    data_layout: "model_parallel"
-    input {
-      io_buffer: "distributed"
-    }
+    data_layout: "data_parallel"
+    input {}
   }
   layer {
     parents: "input"
     name: "data"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     split {}
   }
   layer {
     parents: "input"
     name: "dummy"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     dummy {}
   }
 
diff --git a/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_sigmoid.prototext b/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_sigmoid.prototext
index 449d499471e..374ed07ec4c 100644
--- a/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_sigmoid.prototext
+++ b/model_zoo/models/autoencoder_candle_pilot1/model_autoencoder_chem_sigmoid.prototext
@@ -47,22 +47,21 @@ model {
   layer {
     name: "input"
     children: "data dummy"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     input {
-      io_buffer: "distributed"
       target_mode: "reconstruction"
     }
   }
   layer {
     parents: "input"
     name: "data"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     split {}
   }
   layer {
     parents: "input"
     name: "dummy"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     dummy {}
   }
 
diff --git a/model_zoo/models/autoencoder_candle_pilot1/model_dnn_chem_ecfp.prototext b/model_zoo/models/autoencoder_candle_pilot1/model_dnn_chem_ecfp.prototext
index e617cc79afc..47e375a2e25 100644
--- a/model_zoo/models/autoencoder_candle_pilot1/model_dnn_chem_ecfp.prototext
+++ b/model_zoo/models/autoencoder_candle_pilot1/model_dnn_chem_ecfp.prototext
@@ -55,21 +55,19 @@ model {
   layer {
     name: "data"
     children: "finetunedata label"
-    data_layout: "model_parallel"
-    input {
-      io_buffer: "distributed"
-    }
+    data_layout: "data_parallel"
+    input {}
   }
   layer {
     parents: "data"
     name: "finetunedata"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     split {}
   }
   layer {
     parents: "data"
     name: "label"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     split {}
   }
 
diff --git a/model_zoo/models/autoencoder_cifar10/model_autoencoder_cifar10.prototext b/model_zoo/models/autoencoder_cifar10/model_autoencoder_cifar10.prototext
index c6fd5fcd20b..c8f1e32b091 100644
--- a/model_zoo/models/autoencoder_cifar10/model_autoencoder_cifar10.prototext
+++ b/model_zoo/models/autoencoder_cifar10/model_autoencoder_cifar10.prototext
@@ -44,21 +44,19 @@ model {
   layer {
     name: "data"
     children: "image dummy"
-    data_layout: "model_parallel"
-    input {
-      io_buffer: "distributed"
-    }
+    data_layout: "data_parallel"
+    input {}
   }
   layer {
     parents: "data"
     name: "image"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     split {}
   }
   layer {
     parents: "data"
     name: "dummy"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     dummy {}
   }
 
@@ -71,7 +69,6 @@ model {
     data_layout: "model_parallel"
     fully_connected {
       num_neurons: 1000
-      weight_initialization: "glorot_uniform"
       has_bias: true
     }
   }
@@ -105,9 +102,8 @@ model {
     parents: "dropout1"
     name: "decode1"
     data_layout: "model_parallel"
-    num_neurons_from_data_reader: true
+    hint_layer: "image"
     fully_connected {
-      weight_initialization: "glorot_uniform"
       has_bias: true
     }
   }
@@ -139,7 +135,7 @@ model {
   layer {
     parents: "dropout2 image"
     name: "mean_squared_error"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     mean_squared_error {}
   }
 
diff --git a/model_zoo/models/autoencoder_cifar10/model_conv_autoencoder_cifar10.prototext b/model_zoo/models/autoencoder_cifar10/model_conv_autoencoder_cifar10.prototext
index 0b5abd7f425..1107d1f2bfd 100644
--- a/model_zoo/models/autoencoder_cifar10/model_conv_autoencoder_cifar10.prototext
+++ b/model_zoo/models/autoencoder_cifar10/model_conv_autoencoder_cifar10.prototext
@@ -54,9 +54,7 @@ model {
     name: "data"
     children: "image dummy"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "distributed"
-    }
+    input {}
   }
   layer {
     parents: "data"
@@ -93,7 +91,7 @@ model {
   # RELU 1
   ######
   layer {
-    image: "conv1"
+    parents: "conv1"
     name: "relu1"
     data_layout: "data_parallel"
     relu {
@@ -343,7 +341,7 @@ model {
     parents: "relu6"
     name: "decode1"
     data_layout: "data_parallel"
-    num_neurons_from_data_reader: true
+    hint_layer: "image"
     fully_connected {
       num_neurons: 784
       has_bias: true
diff --git a/model_zoo/models/autoencoder_imagenet/model_conv_autoencoder_imagenet.prototext b/model_zoo/models/autoencoder_imagenet/model_conv_autoencoder_imagenet.prototext
index 1ac12c65832..97d0ee18f3f 100644
--- a/model_zoo/models/autoencoder_imagenet/model_conv_autoencoder_imagenet.prototext
+++ b/model_zoo/models/autoencoder_imagenet/model_conv_autoencoder_imagenet.prototext
@@ -47,9 +47,7 @@ model {
     name: "data"
     children: "image dummy"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     parents: "data"
diff --git a/model_zoo/models/autoencoder_mnist/model_autoencoder_mnist.prototext b/model_zoo/models/autoencoder_mnist/model_autoencoder_mnist.prototext
index b55d4eba3c0..81d11fbce37 100644
--- a/model_zoo/models/autoencoder_mnist/model_autoencoder_mnist.prototext
+++ b/model_zoo/models/autoencoder_mnist/model_autoencoder_mnist.prototext
@@ -33,21 +33,19 @@ model {
   layer {
     name: "data"
     children: "image dummy"
-    data_layout: "model_parallel"
-    input {
-      io_buffer: "distributed"
-    }
+    data_layout: "data_parallel"
+    input {}
   }
   layer {
     parents: "data"
     name: "image"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     split {}
   }
   layer {
     parents: "data"
     name: "dummy"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     dummy {}
   }
 
@@ -210,7 +208,7 @@ model {
     parents: "relu6"
     name: "decode1"
     data_layout: "model_parallel"
-    num_neurons_from_data_reader: true
+    hint_layer: "image"
     fully_connected {
       weight_initialization: "glorot_uniform"
       has_bias: true
@@ -228,14 +226,13 @@ model {
     }
   }
 
-
   #################
   # RECONSTRUCTION
   #################
   layer {
     parents: "reconstruction image"
     name: "mean_squared_error"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     mean_squared_error {}
   }
 
diff --git a/model_zoo/models/autoencoder_mnist/model_conv_autoencoder_mnist.prototext b/model_zoo/models/autoencoder_mnist/model_conv_autoencoder_mnist.prototext
index 52e667c096e..0bd522e79a7 100644
--- a/model_zoo/models/autoencoder_mnist/model_conv_autoencoder_mnist.prototext
+++ b/model_zoo/models/autoencoder_mnist/model_conv_autoencoder_mnist.prototext
@@ -47,9 +47,7 @@ model {
     name: "data"
     children: "image dummy"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "distributed"
-    }
+    input {}
   }
   layer {
     parents: "data"
@@ -336,7 +334,7 @@ model {
     parents: "relu6"
     name: "decode1"
     data_layout: "data_parallel"
-    num_neurons_from_data_reader: true
+    hint_layer: "image"
     fully_connected {
       has_bias: true
     }
diff --git a/model_zoo/models/autoencoder_mnist/vae_mnist.prototext b/model_zoo/models/autoencoder_mnist/vae_mnist.prototext
index 7cbf5e52e41..bcba455a50b 100644
--- a/model_zoo/models/autoencoder_mnist/vae_mnist.prototext
+++ b/model_zoo/models/autoencoder_mnist/vae_mnist.prototext
@@ -51,9 +51,7 @@ model {
     name: "data"
     children: "image dummy"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     parents: "data"
diff --git a/model_zoo/models/candle/pilot1/ae_nodeselect_gdc.prototext b/model_zoo/models/candle/pilot1/ae_nodeselect_gdc.prototext
index 9de1859dc9f..93509871d2a 100644
--- a/model_zoo/models/candle/pilot1/ae_nodeselect_gdc.prototext
+++ b/model_zoo/models/candle/pilot1/ae_nodeselect_gdc.prototext
@@ -48,9 +48,8 @@ model {
   layer {
     name: "data"
     #children: "encode1 recon_data"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     input {
-      io_buffer: "distributed"
       target_mode: "N/A"
     }
   }
@@ -58,7 +57,7 @@ model {
   layer {
     name: "recon_data"
     parents: "data"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     identity { }
   }
 
diff --git a/model_zoo/models/candle/pilot1/combo.prototext b/model_zoo/models/candle/pilot1/combo.prototext
index 0cbeb722d05..0b5a5ac5535 100644
--- a/model_zoo/models/candle/pilot1/combo.prototext
+++ b/model_zoo/models/candle/pilot1/combo.prototext
@@ -51,22 +51,21 @@ model {
   layer {
     name: "input"
     children: "data response"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     input {
-      io_buffer: "distributed"
       target_mode: "regression"
     }
   }
   layer {
     parents: "input"
     name: "data"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     split {}
   }
   layer {
     parents: "input"
     name: "response"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     split {}
   }
 
diff --git a/model_zoo/models/cosmoflow/model_cosmoflow.prototext b/model_zoo/models/cosmoflow/model_cosmoflow.prototext
index 4c1e04dbf3c..f4b6829a637 100644
--- a/model_zoo/models/cosmoflow/model_cosmoflow.prototext
+++ b/model_zoo/models/cosmoflow/model_cosmoflow.prototext
@@ -24,7 +24,6 @@ model {
     children: "DARK_MATTER SECRETS_OF_THE_UNIVERSE"
     data_layout: "data_parallel"
     input {
-      io_buffer: "partitioned"
       target_mode: "regression"
     }
   }
diff --git a/model_zoo/models/gan/jags/cycle_gan/cycgan_m1.prototext b/model_zoo/models/gan/jags/cycle_gan/cycgan_m1.prototext
index 9739bb4db50..26e83f01d6f 100644
--- a/model_zoo/models/gan/jags/cycle_gan/cycgan_m1.prototext
+++ b/model_zoo/models/gan/jags/cycle_gan/cycgan_m1.prototext
@@ -23,9 +23,7 @@ model {
   num_epochs: 1
   data_layout: "data_parallel"
   layer {
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
     name: "data"
     data_layout: "data_parallel"
     parents: " "
diff --git a/model_zoo/models/gan/jags/cycle_gan/cycgan_m2.prototext b/model_zoo/models/gan/jags/cycle_gan/cycgan_m2.prototext
index ebe4ff5ff3d..e188b803517 100644
--- a/model_zoo/models/gan/jags/cycle_gan/cycgan_m2.prototext
+++ b/model_zoo/models/gan/jags/cycle_gan/cycgan_m2.prototext
@@ -23,9 +23,7 @@ model {
   num_epochs: 1
   data_layout: "data_parallel"
   layer {
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
     name: "data"
     data_layout: "data_parallel"
     parents: " "
diff --git a/model_zoo/models/gan/jags/cycle_gan/cycgan_m3.prototext b/model_zoo/models/gan/jags/cycle_gan/cycgan_m3.prototext
index a354a2d02ef..ee04a006ed8 100644
--- a/model_zoo/models/gan/jags/cycle_gan/cycgan_m3.prototext
+++ b/model_zoo/models/gan/jags/cycle_gan/cycgan_m3.prototext
@@ -23,9 +23,7 @@ model {
   num_epochs: 1
   data_layout: "data_parallel"
   layer {
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
     name: "data"
     data_layout: "data_parallel"
     parents: " "
diff --git a/model_zoo/models/gan/jags/cycle_gan/generate_cycgan_m1.py b/model_zoo/models/gan/jags/cycle_gan/generate_cycgan_m1.py
index d03c92ebd04..82cc659dcfe 100644
--- a/model_zoo/models/gan/jags/cycle_gan/generate_cycgan_m1.py
+++ b/model_zoo/models/gan/jags/cycle_gan/generate_cycgan_m1.py
@@ -62,7 +62,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   l.weights = w1 + 'linearity'
 
   l = new_layer(model, relu1, fc1,'relu')
-  
+
 
   l = new_layer(model, fc2, relu1,'fully_connected')
   l.fully_connected.num_neurons = 16
@@ -71,7 +71,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w2 + 'linearity', 'he_normal_initializer')
   l.weights = w2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
 
   l = new_layer(model, fc3, relu2, 'fully_connected')
@@ -81,13 +81,13 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w3 + 'linearity', 'he_normal_initializer')
   l.weights = w3 + 'linearity'
-  return fc3 
+  return fc3
 
 
 #Generator
 #Weight frozen, no weight sharing
 def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropout=True, tag=''):
-  #different weights  
+  #different weights
   fc1 = prefix+'fc1'+tag
   fc2 = prefix+'fc2'+tag
   fc3 = prefix+'fc3'+tag
@@ -114,7 +114,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   l.freeze = freeze
   w = new_weights(model, fc2 + 'linearity', 'he_normal_initializer')
   l.weights = fc2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
   next_parent = relu2
   if(add_dropout):
@@ -128,7 +128,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   l.freeze = freeze
   w = new_weights(model, fc3 + 'linearity', 'he_normal_initializer')
   l.weights = fc3 + 'linearity'
-  
+
   l = new_layer(model, relu3, fc3, 'relu')
 
   l = new_layer(model, fc4, relu3, 'fully_connected')
@@ -147,8 +147,7 @@ def configure_model(model):
     #####INPUT DATA (including Slices)
     ### Input data comes from merge features of image (Y) and param (X)
     l = new_layer(model,'data',' ', 'input')
-    l.input.io_buffer = 'partitioned'
-    
+
     slice_points = [0,2500,2511]
     l = new_layer(model, 'slice_data','data', 'slice')
     l.children = 'image_data_dummy param_data_id'
@@ -167,7 +166,7 @@ def configure_model(model):
 
     #ID parameter data (X)
     l = new_layer(model,'param_data_id','slice_data','identity')
-    
+
     #D_Loss1 branch
     #Fake path
     #freeze generator = True
@@ -175,12 +174,12 @@ def configure_model(model):
     g_sample = add_generator(model, 'param_data_id','gen1', 2500, True,True)
     #g_sample2= generator(y)
     g_sample2 = add_generator(model,'image_data_dummy','gen2', 11, True,False)
-    
+
     #True path (share weights with fake path discriminator)
     #discriminator(y,x)
     #data = y + x
     D_real = add_discriminator(model, 'data','disc1',False, True, '_real')
-    #CONCAT 
+    #CONCAT
     # Gsample + x
     #
     l = new_layer(model, 'concat_gsample_n_param','','concatenation')
@@ -188,15 +187,15 @@ def configure_model(model):
     #discriminator false path
     #discriminator(g_sample,x)
     D_fake = add_discriminator(model,'concat_gsample_n_param','disc1',False, False, '_fake')
-    
+
 
     #D_loss2 branch
     #Reconcatenate x+y
     l = new_layer(model, 'concat_param_n_img','param_data_id image_data_dummy','concatenation')
-  
+
     #D_real2 = discriminator2(x,y)
     D_real2 = add_discriminator(model,'concat_param_n_img','disc2',False, True, '_real')
-    
+
     #D_fake2  = discriminator2(G_sample2,y)
     l = new_layer(model, 'concat_gsample2_n_img',g_sample2+ ' image_data_dummy','concatenation')
     D_fake2 = add_discriminator(model,'concat_gsample2_n_img','disc2', False, False, '_fake')
@@ -207,7 +206,7 @@ def configure_model(model):
 
     l = new_layer(model, 'disc1_fake_bce', [D_fake, zero.name], 'sigmoid_binary_cross_entropy')
     l = new_layer(model, 'disc1_fake_eval','disc1_fake_bce', 'evaluation')
- 
+
     l = new_layer(model, 'disc2_real_bce', [D_real2, one.name], 'sigmoid_binary_cross_entropy')
     l = new_layer(model, 'disc2_real_eval','disc2_real_bce', 'evaluation')
 
@@ -248,4 +247,3 @@ def configure_model(model):
     # Export prototext
     with open(output_proto, 'w') as f:
         f.write(txtf.MessageToString(pb))
-    
diff --git a/model_zoo/models/gan/jags/cycle_gan/generate_cycgan_m2.py b/model_zoo/models/gan/jags/cycle_gan/generate_cycgan_m2.py
index 6c28fe41b41..b0298098194 100644
--- a/model_zoo/models/gan/jags/cycle_gan/generate_cycgan_m2.py
+++ b/model_zoo/models/gan/jags/cycle_gan/generate_cycgan_m2.py
@@ -61,7 +61,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   l.weights = w1 + 'linearity'
 
   l = new_layer(model, relu1, fc1,'relu')
-  
+
 
   l = new_layer(model, fc2, relu1,'fully_connected')
   l.fully_connected.num_neurons = 16
@@ -70,7 +70,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w2 + 'linearity', 'he_normal_initializer')
   l.weights = w2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
 
   l = new_layer(model, fc3, relu2, 'fully_connected')
@@ -80,12 +80,12 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w3 + 'linearity', 'he_normal_initializer')
   l.weights = w3 + 'linearity'
-  return fc3 
+  return fc3
 
 
 #Generator
 def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropout=True, add_weight=True, tag=''):
-  
+
   w1 = prefix+'fc1'
   w2 = prefix+'fc2'
   w3 = prefix+'fc3'
@@ -119,7 +119,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   if(add_weight):
     w = new_weights(model, w2 + 'linearity', 'he_normal_initializer')
   l.weights = w2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
   next_parent = relu2
   if(add_dropout):
@@ -134,7 +134,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   if(add_weight) :
     w = new_weights(model, w3 + 'linearity', 'he_normal_initializer')
   l.weights = w3 + 'linearity'
-  
+
   l = new_layer(model, relu3, fc3, 'relu')
 
   l = new_layer(model, fc4, relu3, 'fully_connected')
@@ -154,8 +154,7 @@ def configure_model(model):
     #####INPUT DATA (including Slices)
     ### Input data comes from merge features of image (Y) and param (X)
     l = new_layer(model,'data',' ', 'input')
-    l.input.io_buffer = 'partitioned'
-    
+
     slice_points = [0,2500,2511]
     l = new_layer(model, 'slice_data','data', 'slice')
     l.children = 'image_data_dummy param_data_id'
@@ -166,13 +165,13 @@ def configure_model(model):
 
     #ID parameter data (X)
     l = new_layer(model,'param_data_id','slice_data','identity')
-    
+
     #********************************************
     #g_sample=generator(x)
     #do not freeze, train generator to confuse discriminator
     #_1 => first generator1 to be added, to solve problem of all generator1 having the same name
     g_sample = add_generator(model, 'param_data_id','gen1', 2500, False,True,True,'_1')
-    # g_adv1 = discriminator(g_sample,x) 
+    # g_adv1 = discriminator(g_sample,x)
     l = new_layer(model, 'concat_gsample_n_param',g_sample+' param_data_id','concatenation')
     #freeze discriminator, fake it as real
     D_real = add_discriminator(model,'concat_gsample_n_param','disc1',True, True, '_real')
@@ -182,7 +181,7 @@ def configure_model(model):
     one.constant.num_neurons = '1'
     l = new_layer(model, 'g_adv1_bce', [D_real, one.name], 'sigmoid_binary_cross_entropy')
     l = new_layer(model, 'g_adv1_eval','g_adv1_bce', 'evaluation')
-    
+
     #************************************************
     #g_sample2= generator2(y) //freeze
     g_sample2 = add_generator(model,'image_data_dummy','gen2', 11, True,False,True,'_y')
@@ -245,4 +244,3 @@ def configure_model(model):
     # Export prototext
     with open(output_proto, 'w') as f:
         f.write(txtf.MessageToString(pb))
-    
diff --git a/model_zoo/models/gan/jags/cycle_gan/generate_cycgan_m3.py b/model_zoo/models/gan/jags/cycle_gan/generate_cycgan_m3.py
index 361e939edf8..73cd7f9c134 100644
--- a/model_zoo/models/gan/jags/cycle_gan/generate_cycgan_m3.py
+++ b/model_zoo/models/gan/jags/cycle_gan/generate_cycgan_m3.py
@@ -60,7 +60,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   l.weights = w1 + 'linearity'
 
   l = new_layer(model, relu1, fc1,'relu')
-  
+
 
   l = new_layer(model, fc2, relu1,'fully_connected')
   l.fully_connected.num_neurons = 16
@@ -69,7 +69,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w2 + 'linearity', 'he_normal_initializer')
   l.weights = w2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
 
   l = new_layer(model, fc3, relu2, 'fully_connected')
@@ -79,12 +79,12 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w3 + 'linearity', 'he_normal_initializer')
   l.weights = w3 + 'linearity'
-  return fc3 
+  return fc3
 
 
 #Generator
 def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropout=True, add_weight=True, tag=''):
-  
+
   w1 = prefix+'fc1'
   w2 = prefix+'fc2'
   w3 = prefix+'fc3'
@@ -118,7 +118,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   if(add_weight):
     w = new_weights(model, w2 + 'linearity', 'he_normal_initializer')
   l.weights = w2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
   next_parent = relu2
   if(add_dropout):
@@ -133,7 +133,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   if(add_weight) :
     w = new_weights(model, w3 + 'linearity', 'he_normal_initializer')
   l.weights = w3 + 'linearity'
-  
+
   l = new_layer(model, relu3, fc3, 'relu')
 
   l = new_layer(model, fc4, relu3, 'fully_connected')
@@ -153,8 +153,7 @@ def configure_model(model):
     #####INPUT DATA (including Slices)
     ### Input data comes from merge features of image (Y) and param (X)
     l = new_layer(model,'data',' ', 'input')
-    l.input.io_buffer = 'partitioned'
-    
+
     slice_points = [0,2500,2511]
     l = new_layer(model, 'slice_data','data', 'slice')
     l.children = 'image_data_dummy param_data_id'
@@ -165,13 +164,13 @@ def configure_model(model):
 
     #ID parameter data (X)
     l = new_layer(model,'param_data_id','slice_data','identity')
-    
+
     #********************************************
     #g_sample2=generator2(y)
     #do not freeze, train generator to confuse discriminator
     #_1 => first generator1 to be added, to solve problem of all generator2 having the same name
     g_sample2 = add_generator(model, 'image_data_dummy','gen2', 11, False,False,True,'_1')
-    # g_adv21 = discriminator2(g_sample2,y) 
+    # g_adv21 = discriminator2(g_sample2,y)
     l = new_layer(model, 'concat_gsample2_n_img',g_sample2+' image_data_dummy','concatenation')
     #freeze discriminator, fake it as real
     D_real = add_discriminator(model,'concat_gsample2_n_img','disc2',True, True, '_real')
@@ -181,7 +180,7 @@ def configure_model(model):
     one.constant.num_neurons = '1'
     l = new_layer(model, 'g_adv2_bce', [D_real, one.name], 'sigmoid_binary_cross_entropy')
     l = new_layer(model, 'g_adv2_eval','g_adv2_bce', 'evaluation')
-    
+
     #************************************************
     #g_sample2= generator2(y) //train
     g_sample2 = add_generator(model,'image_data_dummy','gen2', 11, False,False,False,'_y')
@@ -246,4 +245,3 @@ def configure_model(model):
     # Export prototext
     with open(output_proto, 'w') as f:
         f.write(txtf.MessageToString(pb))
-    
diff --git a/model_zoo/models/gan/mnist/adversarial_model.prototext b/model_zoo/models/gan/mnist/adversarial_model.prototext
index 17b9ea9040b..644df4a90f3 100644
--- a/model_zoo/models/gan/mnist/adversarial_model.prototext
+++ b/model_zoo/models/gan/mnist/adversarial_model.prototext
@@ -68,9 +68,7 @@ model {
     name: "input"
     children: "input label"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     parents: "input"
@@ -85,15 +83,50 @@ model {
     split {}
   }
 
+  # Divide mini-batch samples into two halves
+  layer {
+    name: "mb_index"
+    mini_batch_index {}
+  }
+  layer {
+    name: "mb_size"
+    mini_batch_size {}
+  }
+  layer {
+    parents: "mb_index mb_size"
+    name: "mb_factor"
+    divide {}
+  }
+  layer {
+    parents: "mb_factor"
+    name: "in_second_half_scalar"
+    round {}
+  }
+  layer {
+    parents: "in_second_half_scalar"
+    name: "in_second_half_scalar3d"
+    reshape {
+      dims: "1 1 1"
+    }
+  }
+  layer {
+    parents: "in_second_half_scalar3d"
+    name: "in_second_half"
+    hint_layer: "data"
+    tessellate {}
+  }
+  layer {
+    parents: "in_second_half"
+    name: "in_first_half"
+    not {}
+  }
+
   #ZERO
   layer {
-    parents: "data"
+    parents: "data in_second_half"
     name: "zero_data"
     data_layout: "data_parallel"
-    zero {
-      first_half: false
-      second_half: true
-    }
+    multiply {}
   }
 
 
@@ -248,13 +281,10 @@ model {
 
   #ZERO
   layer {
-    parents: "reshape1"
+    parents: "reshape1 in_first_half"
     name: "zero_fake"
     data_layout: "data_parallel"
-    zero {
-      first_half:true
-      second_half:false
-    }
+    multiply {}
   }
 
   #SUM
diff --git a/model_zoo/models/gan/mnist/discriminator_model.prototext b/model_zoo/models/gan/mnist/discriminator_model.prototext
index dc19a6c8594..063390d93dd 100644
--- a/model_zoo/models/gan/mnist/discriminator_model.prototext
+++ b/model_zoo/models/gan/mnist/discriminator_model.prototext
@@ -61,9 +61,7 @@ model {
     name: "input"
     children: "input label"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     parents: "input"
@@ -78,15 +76,50 @@ model {
     split {}
   }
 
+  # Divide mini-batch samples into two halves
+  layer {
+    name: "mb_index"
+    mini_batch_index {}
+  }
+  layer {
+    name: "mb_size"
+    mini_batch_size {}
+  }
+  layer {
+    parents: "mb_index mb_size"
+    name: "mb_factor"
+    divide {}
+  }
+  layer {
+    parents: "mb_factor"
+    name: "in_second_half_scalar"
+    round {}
+  }
+  layer {
+    parents: "in_second_half_scalar"
+    name: "in_second_half_scalar3d"
+    reshape {
+      dims: "1 1 1"
+    }
+  }
+  layer {
+    parents: "in_second_half_scalar3d"
+    name: "in_second_half"
+    hint_layer: "data"
+    tessellate {}
+  }
+  layer {
+    parents: "in_second_half"
+    name: "in_first_half"
+    not {}
+  }
+
   #ZERO
   layer {
-    parents: "data"
+    parents: "data in_second_half"
     name: "zero_data"
     data_layout: "data_parallel"
-    zero {
-      first_half: false
-      second_half: true
-    }
+    multiply {}
   }
 
 
@@ -258,13 +291,10 @@ model {
 
   #ZERO
   layer {
-    parents: "reshape1"
+    parents: "reshape1 in_first_half"
     name: "zero_fake"
     data_layout: "data_parallel"
-    zero {
-      first_half: true
-      second_half: false
-    }
+    multiply {}
   }
 
   #SUM
diff --git a/model_zoo/models/jag/ae_cycle_gan/3models/ae.prototext b/model_zoo/models/jag/ae_cycle_gan/3models/ae.prototext
index 6e20a7f4e47..5f0b88111a6 100644
--- a/model_zoo/models/jag/ae_cycle_gan/3models/ae.prototext
+++ b/model_zoo/models/jag/ae_cycle_gan/3models/ae.prototext
@@ -4,7 +4,7 @@
 model {
   name: "ae_model"
   shareable_training_data_reader:false
-  serialize_background_io: true
+  serialize_io: true
   data_layout: "data_parallel"
   mini_batch_size: 256
   block_size: 256
@@ -72,14 +72,12 @@ model {
   #  children: "encode1 reconstruction"
   #  data_layout: "data_parallel"
   #  input {
-  #    io_buffer: "distributed"
   #    target_mode: "reconstruction"
   #  }
   #}
 
   layer {
     input {
-      io_buffer: "partitioned"
       target_mode: "N/A"
     }
     name: "data"
diff --git a/model_zoo/models/jag/ae_cycle_gan/3models/ae_cyc.prototext b/model_zoo/models/jag/ae_cycle_gan/3models/ae_cyc.prototext
index 793354e53bb..c7931f84084 100644
--- a/model_zoo/models/jag/ae_cycle_gan/3models/ae_cyc.prototext
+++ b/model_zoo/models/jag/ae_cycle_gan/3models/ae_cyc.prototext
@@ -2,7 +2,7 @@
 model {
   name: "ae_cycgan_model"
   shareable_training_data_reader:false
-  serialize_background_io: true
+  serialize_io: true
   data_layout: "data_parallel"
   mini_batch_size: 256
   block_size: 256
@@ -58,7 +58,6 @@ model {
   #Layer from cycle GAN
   layer {
     input {
-      io_buffer: "partitioned"
       target_mode: "N/A"
     }
     name: "data"
diff --git a/model_zoo/models/jag/ae_cycle_gan/3models/ae_cyc2.prototext b/model_zoo/models/jag/ae_cycle_gan/3models/ae_cyc2.prototext
index f3328566bfb..e2a6eb6085d 100644
--- a/model_zoo/models/jag/ae_cycle_gan/3models/ae_cyc2.prototext
+++ b/model_zoo/models/jag/ae_cycle_gan/3models/ae_cyc2.prototext
@@ -3,7 +3,7 @@
 model {
   name: "ae_cycgan_model"
   shareable_training_data_reader:false
-  serialize_background_io: true
+  serialize_io: true
   data_layout: "data_parallel"
   mini_batch_size: 256
   block_size: 256
@@ -54,7 +54,6 @@ model {
   #Layer from cycle GAN
   layer {
     input {
-      io_buffer: "partitioned"
       target_mode: "N/A"
     }
     name: "data"
diff --git a/model_zoo/models/jag/ae_cycle_gan/3models/cycle_gan.prototext b/model_zoo/models/jag/ae_cycle_gan/3models/cycle_gan.prototext
index 1f6856a9437..7d38e4ca6bb 100644
--- a/model_zoo/models/jag/ae_cycle_gan/3models/cycle_gan.prototext
+++ b/model_zoo/models/jag/ae_cycle_gan/3models/cycle_gan.prototext
@@ -1,7 +1,7 @@
 model {
   name: "cycgan_model"
   shareable_training_data_reader:false
-  serialize_background_io: true
+  serialize_io: true
   procs_per_trainer:0
   objective_function {
     l2_weight_regularization {
@@ -50,7 +50,6 @@ model {
   data_layout: "data_parallel"
   layer {
     input {
-      io_buffer: "partitioned"
       data_set_per_model: true
       target_mode: "N/A"
     }
diff --git a/model_zoo/models/jag/ae_cycle_gan/cycgan_m1.prototext b/model_zoo/models/jag/ae_cycle_gan/cycgan_m1.prototext
index 31ad29381dc..41a071fab87 100644
--- a/model_zoo/models/jag/ae_cycle_gan/cycgan_m1.prototext
+++ b/model_zoo/models/jag/ae_cycle_gan/cycgan_m1.prototext
@@ -1,7 +1,7 @@
 model {
   name: "dis_model"
   shareable_training_data_reader: true
-  serialize_background_io: true
+  serialize_io: true
   objective_function {
     l2_weight_regularization {
       scale_factor: 0.0001
@@ -28,7 +28,6 @@ model {
 #### Data space
   layer {
     input {
-      io_buffer: "partitioned"
       target_mode: "N/A"
     }
     name: "data"
diff --git a/model_zoo/models/jag/ae_cycle_gan/cycgan_m2.prototext b/model_zoo/models/jag/ae_cycle_gan/cycgan_m2.prototext
index 04a671b9d7c..9a6715a0fc9 100644
--- a/model_zoo/models/jag/ae_cycle_gan/cycgan_m2.prototext
+++ b/model_zoo/models/jag/ae_cycle_gan/cycgan_m2.prototext
@@ -1,7 +1,7 @@
 model {
   name: "fw_model"
   shareable_training_data_reader: true
-  serialize_background_io: true
+  serialize_io: true
   objective_function {
     l2_weight_regularization {
       scale_factor: 0.0001
@@ -19,7 +19,6 @@ model {
   data_layout: "model_parallel"
   layer {
     input {
-      io_buffer: "partitioned"
       target_mode: "N/A"
     }
     name: "data"
diff --git a/model_zoo/models/jag/ae_cycle_gan/cycgan_m3.prototext b/model_zoo/models/jag/ae_cycle_gan/cycgan_m3.prototext
index ae28be52944..41005af6f15 100644
--- a/model_zoo/models/jag/ae_cycle_gan/cycgan_m3.prototext
+++ b/model_zoo/models/jag/ae_cycle_gan/cycgan_m3.prototext
@@ -1,7 +1,7 @@
 model {
   name: "inv_model"
   shareable_training_data_reader: true
-  serialize_background_io: true
+  serialize_io: true
   objective_function {
     l2_weight_regularization {
       scale_factor: 0.0001
@@ -19,7 +19,6 @@ model {
   data_layout: "model_parallel"
   layer {
     input {
-      io_buffer: "partitioned"
       target_mode: "N/A"
     }
     name: "data"
diff --git a/model_zoo/models/jag/ae_cycle_gan/jag_100M_metadata.prototext b/model_zoo/models/jag/ae_cycle_gan/jag_100M_metadata.prototext
index 371e812d976..1643b6db51a 100644
--- a/model_zoo/models/jag/ae_cycle_gan/jag_100M_metadata.prototext
+++ b/model_zoo/models/jag/ae_cycle_gan/jag_100M_metadata.prototext
@@ -32,20 +32,20 @@ data_set_metadata {
         "tMAXt",        # absent in Jim's list
         "BWn",
         "MAXpressure",
-        "BAte",
-        "MAXtion",
+        #"BAte",
+        #"MAXtion",
         "tMAXpressure",
         "BAt",          # absent in Jim's list
         "Yn",
         "Ye",
         "Yx",
-        "tMAXte",       # absent in Jim's list
-        "BAtion",
-        "MAXte",
-        "tMAXtion",     # absent in Jim's list
+        #"tMAXte",       # absent in Jim's list
+        #"BAtion",
+        #"MAXte",
+        #"tMAXtion",     # absent in Jim's list
         "BTx",
         "MAXt",         # absent in Jim's list
-        "BTn",
+        #"BTn",
         "BApressure",
         "tMINradius",
         "MINradius"     # absent in Jim's list
@@ -78,20 +78,20 @@ data_set_metadata {
       { scale: 1.490713e+00  bias: -3.495498e+00 },   #tMAXt
       { scale: 4.375123e+01  bias: -1.593477e+00 },   #BWn
       { scale: 1.685576e-06  bias: -5.330971e-01 },   #MAXpressure
-      { scale: 2.636422e-01  bias: -9.762907e-01 },   #BAte
-      { scale: 2.419509e-01  bias: -9.853402e-01 },   #MAXtion
+      #{ scale: 2.636422e-01  bias: -9.762907e-01 },   #BAte
+      #{ scale: 2.419509e-01  bias: -9.853402e-01 },   #MAXtion
       { scale: 1.430615e+00  bias: -3.351173e+00 },   #tMAXpressure
       { scale: 2.636422e-01  bias: -9.762907e-01 },   #BAt
       { scale: 7.154074e-18  bias: -1.864709e-02 },   #Yn
       { scale: 3.166824e-03  bias: -1.864709e-02 },   #Ye
       { scale: 2.102178e-02  bias: -3.071955e-01 },   #Yx
-      { scale: 1.490713e+00  bias: -3.495498e+00 },   #tMAXte
-      { scale: 2.636422e-01  bias: -9.762907e-01 },   #BAtion
-      { scale: 2.419509e-01  bias: -9.853402e-01 },   #MAXte
-      { scale: 1.490713e+00  bias: -3.495498e+00 },   #tMAXtion
+      #{ scale: 1.490713e+00  bias: -3.495498e+00 },   #tMAXte
+      #{ scale: 2.636422e-01  bias: -9.762907e-01 },   #BAtion
+      #{ scale: 2.419509e-01  bias: -9.853402e-01 },   #MAXte
+      #{ scale: 1.490713e+00  bias: -3.495498e+00 },   #tMAXtion
       { scale: 1.346439e+00  bias: -3.118446e+00 },   #BTx
       { scale: 2.419509e-01  bias: -9.853402e-01 },   #MAXt
-      { scale: 1.459875e+00  bias: -3.427656e+00 },   #BTn
+      #{ scale: 1.459875e+00  bias: -3.427656e+00 },   #BTn
       { scale: 2.061877e-06  bias: -5.213394e-01 },   #BApressure
       { scale: 1.392544e+00  bias: -3.239921e+00 },   #tMINradius
       { scale: 6.266253e-02  bias: -1.384504e+00 }   #MINradius
diff --git a/model_zoo/models/jag/ae_cycle_gan/vae1.prototext b/model_zoo/models/jag/ae_cycle_gan/vae1.prototext
index 3fe497acbaf..1646bdd0298 100644
--- a/model_zoo/models/jag/ae_cycle_gan/vae1.prototext
+++ b/model_zoo/models/jag/ae_cycle_gan/vae1.prototext
@@ -4,7 +4,7 @@
 model {
   name: "ae_model"
   shareable_training_data_reader: false
-  serialize_background_io: true
+  serialize_io: true
   data_layout: "model_parallel"
   mini_batch_size: 256
   block_size: 256
@@ -57,14 +57,12 @@ model {
   #  children: "encode1 reconstruction"
   #  data_layout: "model_parallel"
   #  input {
-  #    io_buffer: "distributed"
   #    target_mode: "reconstruction"
   #  }
   #}
 
   layer {
     input {
-      io_buffer: "partitioned"
       target_mode: "N/A"
     }
     name: "input"
diff --git a/model_zoo/models/jag/ae_cycle_gan/vae_cyc.prototext b/model_zoo/models/jag/ae_cycle_gan/vae_cyc.prototext
index c377313ac69..d5d3deca580 100644
--- a/model_zoo/models/jag/ae_cycle_gan/vae_cyc.prototext
+++ b/model_zoo/models/jag/ae_cycle_gan/vae_cyc.prototext
@@ -4,7 +4,7 @@
 model {
   name: "ae_cycgan_model"
   shareable_training_data_reader: true
-  serialize_background_io: true
+  serialize_io: true
   data_layout: "model_parallel"
   mini_batch_size: 256
   block_size: 256
@@ -61,7 +61,6 @@ model {
   #Layer from cycle GAN
   layer {
     input {
-      io_buffer: "partitioned"
       target_mode: "N/A"
     }
     name: "input"
diff --git a/model_zoo/models/jag/cycle_gan/cycgan_m1.prototext b/model_zoo/models/jag/cycle_gan/cycgan_m1.prototext
index 7c0fb4c7ef4..574fa83ed20 100644
--- a/model_zoo/models/jag/cycle_gan/cycgan_m1.prototext
+++ b/model_zoo/models/jag/cycle_gan/cycgan_m1.prototext
@@ -24,7 +24,6 @@ model {
   data_layout: "data_parallel"
   layer {
     input {
-      io_buffer: "partitioned"
       target_mode: "N/A"
     }
     name: "data"
diff --git a/model_zoo/models/jag/cycle_gan/cycgan_m2.prototext b/model_zoo/models/jag/cycle_gan/cycgan_m2.prototext
index 5af382ab027..6fd5b2caa07 100644
--- a/model_zoo/models/jag/cycle_gan/cycgan_m2.prototext
+++ b/model_zoo/models/jag/cycle_gan/cycgan_m2.prototext
@@ -16,7 +16,6 @@ model {
   data_layout: "data_parallel"
   layer {
     input {
-      io_buffer: "partitioned"
       target_mode: "N/A"
     }
     name: "data"
diff --git a/model_zoo/models/jag/cycle_gan/cycgan_m3.prototext b/model_zoo/models/jag/cycle_gan/cycgan_m3.prototext
index b8e482078f1..6917f1767a1 100644
--- a/model_zoo/models/jag/cycle_gan/cycgan_m3.prototext
+++ b/model_zoo/models/jag/cycle_gan/cycgan_m3.prototext
@@ -16,7 +16,6 @@ model {
   data_layout: "data_parallel"
   layer {
     input {
-      io_buffer: "partitioned"
       target_mode: "N/A"
     }
     name: "data"
diff --git a/model_zoo/models/jag/cycle_gan/generate_cycgan_m1.py b/model_zoo/models/jag/cycle_gan/generate_cycgan_m1.py
index 2abc985a995..c089a2a49ca 100644
--- a/model_zoo/models/jag/cycle_gan/generate_cycgan_m1.py
+++ b/model_zoo/models/jag/cycle_gan/generate_cycgan_m1.py
@@ -62,7 +62,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   l.weights = w1 + 'linearity'
 
   l = new_layer(model, relu1, fc1,'relu')
-  
+
 
   l = new_layer(model, fc2, relu1,'fully_connected')
   l.fully_connected.num_neurons = 16
@@ -71,7 +71,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w2 + 'linearity', 'he_normal_initializer')
   l.weights = w2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
 
   l = new_layer(model, fc3, relu2, 'fully_connected')
@@ -81,14 +81,14 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w3 + 'linearity', 'he_normal_initializer')
   l.weights = w3 + 'linearity'
-  return fc3 
+  return fc3
 
 
 #Generator
 #Weight frozen, no weight sharing
 #todo, handle weight sharing
 def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropout=True, tag=''):
-  #different weights  
+  #different weights
   fc1 = prefix+'fc1'+tag
   fc2 = prefix+'fc2'+tag
   fc3 = prefix+'fc3'+tag
@@ -115,7 +115,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   l.freeze = freeze
   w = new_weights(model, fc2 + 'linearity', 'he_normal_initializer')
   l.weights = fc2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
   next_parent = relu2
   if(add_dropout):
@@ -129,7 +129,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   l.freeze = freeze
   w = new_weights(model, fc3 + 'linearity', 'he_normal_initializer')
   l.weights = fc3 + 'linearity'
-  
+
   l = new_layer(model, relu3, fc3, 'relu')
 
   l = new_layer(model, fc4, relu3, 'fully_connected')
@@ -148,8 +148,7 @@ def configure_model(model):
     #####INPUT DATA (including Slices)
     ### Input data comes from merge features of image (Y) and param (X)
     l = new_layer(model,'data',' ', 'input')
-    l.input.io_buffer = 'partitioned'
-    
+
     slice_points = [0,2500,2511]
     l = new_layer(model, 'slice_data','data', 'slice')
     l.children = 'image_data_dummy param_data_id'
@@ -162,13 +161,13 @@ def configure_model(model):
     one = new_layer(model,'one','','constant')
     one.constant.value = 1.0
     one.constant.num_neurons = '1'
-    
+
     #ID Image (Y) data
     l = new_layer(model,'image_data_dummy','slice_data','identity')
 
     #ID parameter data (X)
     l = new_layer(model,'param_data_id','slice_data','identity')
-    
+
     #D_Loss1 branch
     #Fake path
     #def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropout=True, tag=''):
@@ -177,12 +176,12 @@ def configure_model(model):
     g_sample = add_generator(model, 'param_data_id','gen1', 2500, True,True)
     #g_sample2= generator(y)
     g_sample2 = add_generator(model,'image_data_dummy','gen2', 11, True,False)
-    
+
     #True path (share weights with fake path discriminator)
     #discriminator(y,x)
     #data = y + x
     D_real = add_discriminator(model, 'data','disc1',False, True, '_real')
-    #CONCAT 
+    #CONCAT
     # Gsample + x
     #
     l = new_layer(model, 'concat_gsample_n_param','','concatenation')
@@ -191,16 +190,16 @@ def configure_model(model):
     #question: how to deal with weight sharing?
     #discriminator(g_sample,x)
     D_fake = add_discriminator(model,'concat_gsample_n_param','disc1',False, False, '_fake')
-    
+
     #obectives here (D_real, D_fake)
 
     #D_loss2 branch
     #Reconcatenate x+y
     l = new_layer(model, 'concat_param_n_img','param_data_id image_data_dummy','concatenation')
-  
+
     #D_real2 = discriminator2(x,y)
     D_real2 = add_discriminator(model,'concat_param_n_img','disc2',False, True, '_real')
-    
+
     #D_fake2  = discriminator2(G_sample2,y)
     l = new_layer(model, 'concat_gsample2_n_img',g_sample2+ ' image_data_dummy','concatenation')
     D_fake2 = add_discriminator(model,'concat_gsample2_n_img','disc2', False, False, '_fake')
@@ -211,7 +210,7 @@ def configure_model(model):
 
     l = new_layer(model, 'disc1_fake_bce', [D_fake, zero.name], 'sigmoid_binary_cross_entropy')
     l = new_layer(model, 'disc1_fake_eval','disc1_fake_bce', 'evaluation')
- 
+
     l = new_layer(model, 'disc2_real_bce', [D_real2, one.name], 'sigmoid_binary_cross_entropy')
     l = new_layer(model, 'disc2_real_eval','disc2_real_bce', 'evaluation')
 
@@ -252,4 +251,3 @@ def configure_model(model):
     # Export prototext
     with open(output_proto, 'w') as f:
         f.write(txtf.MessageToString(pb))
-    
diff --git a/model_zoo/models/jag/cycle_gan/generate_cycgan_m2.py b/model_zoo/models/jag/cycle_gan/generate_cycgan_m2.py
index 80447d1329d..de8b704f877 100644
--- a/model_zoo/models/jag/cycle_gan/generate_cycgan_m2.py
+++ b/model_zoo/models/jag/cycle_gan/generate_cycgan_m2.py
@@ -62,7 +62,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   l.weights = w1 + 'linearity'
 
   l = new_layer(model, relu1, fc1,'relu')
-  
+
 
   l = new_layer(model, fc2, relu1,'fully_connected')
   l.fully_connected.num_neurons = 16
@@ -71,7 +71,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w2 + 'linearity', 'he_normal_initializer')
   l.weights = w2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
 
   l = new_layer(model, fc3, relu2, 'fully_connected')
@@ -81,14 +81,14 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w3 + 'linearity', 'he_normal_initializer')
   l.weights = w3 + 'linearity'
-  return fc3 
+  return fc3
 
 
 #Generator
 #Weight frozen, no weight sharing
 #todo, handle weight sharing
 def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropout=True, add_weight=True, tag=''):
-  
+
   w1 = prefix+'fc1'
   w2 = prefix+'fc2'
   w3 = prefix+'fc3'
@@ -122,7 +122,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   if(add_weight):
     w = new_weights(model, w2 + 'linearity', 'he_normal_initializer')
   l.weights = w2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
   next_parent = relu2
   if(add_dropout):
@@ -137,7 +137,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   if(add_weight) :
     w = new_weights(model, w3 + 'linearity', 'he_normal_initializer')
   l.weights = w3 + 'linearity'
-  
+
   l = new_layer(model, relu3, fc3, 'relu')
 
   l = new_layer(model, fc4, relu3, 'fully_connected')
@@ -157,8 +157,7 @@ def configure_model(model):
     #####INPUT DATA (including Slices)
     ### Input data comes from merge features of image (Y) and param (X)
     l = new_layer(model,'data',' ', 'input')
-    l.input.io_buffer = 'partitioned'
-    
+
     slice_points = [0,2500,2511]
     l = new_layer(model, 'slice_data','data', 'slice')
     l.children = 'image_data_dummy param_data_id'
@@ -169,13 +168,13 @@ def configure_model(model):
 
     #ID parameter data (X)
     l = new_layer(model,'param_data_id','slice_data','identity')
-    
+
     #********************************************
     #g_sample=generator(x)
     #do not freeze, train generator to confuse discriminator
     #_1 => first generator1 to be added, to solve problem of all generator1 having the same name
     g_sample = add_generator(model, 'param_data_id','gen1', 2500, False,True,True,'_1')
-    # g_adv1 = discriminator(g_sample,x) 
+    # g_adv1 = discriminator(g_sample,x)
     l = new_layer(model, 'concat_gsample_n_param',g_sample+' param_data_id','concatenation')
     #freeze discriminator, fake it as real
     D_real = add_discriminator(model,'concat_gsample_n_param','disc1',True, True, '_real')
@@ -185,7 +184,7 @@ def configure_model(model):
     one.constant.num_neurons = '1'
     l = new_layer(model, 'g_adv1_bce', [D_real, one.name], 'sigmoid_binary_cross_entropy')
     l = new_layer(model, 'g_adv1_eval','g_adv1_bce', 'evaluation')
-    
+
     #************************************************
     #g_sample2= generator2(y) //freeze
     g_sample2 = add_generator(model,'image_data_dummy','gen2', 11, True,False,True,'_y')
@@ -254,4 +253,3 @@ def configure_model(model):
     # Export prototext
     with open(output_proto, 'w') as f:
         f.write(txtf.MessageToString(pb))
-    
diff --git a/model_zoo/models/jag/cycle_gan/generate_cycgan_m3.py b/model_zoo/models/jag/cycle_gan/generate_cycgan_m3.py
index ee0a9c7b198..3a14b8b6da5 100644
--- a/model_zoo/models/jag/cycle_gan/generate_cycgan_m3.py
+++ b/model_zoo/models/jag/cycle_gan/generate_cycgan_m3.py
@@ -62,7 +62,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   l.weights = w1 + 'linearity'
 
   l = new_layer(model, relu1, fc1,'relu')
-  
+
 
   l = new_layer(model, fc2, relu1,'fully_connected')
   l.fully_connected.num_neurons = 16
@@ -71,7 +71,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w2 + 'linearity', 'he_normal_initializer')
   l.weights = w2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
 
   l = new_layer(model, fc3, relu2, 'fully_connected')
@@ -81,14 +81,14 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w3 + 'linearity', 'he_normal_initializer')
   l.weights = w3 + 'linearity'
-  return fc3 
+  return fc3
 
 
 #Generator
 #Weight frozen, no weight sharing
 #todo, handle weight sharing
 def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropout=True, add_weight=True, tag=''):
-  
+
   w1 = prefix+'fc1'
   w2 = prefix+'fc2'
   w3 = prefix+'fc3'
@@ -122,7 +122,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   if(add_weight):
     w = new_weights(model, w2 + 'linearity', 'he_normal_initializer')
   l.weights = w2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
   next_parent = relu2
   if(add_dropout):
@@ -137,7 +137,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   if(add_weight) :
     w = new_weights(model, w3 + 'linearity', 'he_normal_initializer')
   l.weights = w3 + 'linearity'
-  
+
   l = new_layer(model, relu3, fc3, 'relu')
 
   l = new_layer(model, fc4, relu3, 'fully_connected')
@@ -157,8 +157,7 @@ def configure_model(model):
     #####INPUT DATA (including Slices)
     ### Input data comes from merge features of image (Y) and param (X)
     l = new_layer(model,'data',' ', 'input')
-    l.input.io_buffer = 'partitioned'
-    
+
     slice_points = [0,2500,2511]
     l = new_layer(model, 'slice_data','data', 'slice')
     l.children = 'image_data_dummy param_data_id'
@@ -169,13 +168,13 @@ def configure_model(model):
 
     #ID parameter data (X)
     l = new_layer(model,'param_data_id','slice_data','identity')
-    
+
     #********************************************
     #g_sample2=generator2(y)
     #do not freeze, train generator to confuse discriminator
     #_1 => first generator1 to be added, to solve problem of all generator2 having the same name
     g_sample2 = add_generator(model, 'image_data_dummy','gen2', 11, False,False,True,'_1')
-    # g_adv21 = discriminator2(g_sample2,y) 
+    # g_adv21 = discriminator2(g_sample2,y)
     l = new_layer(model, 'concat_gsample2_n_img',g_sample2+' image_data_dummy','concatenation')
     #freeze discriminator, fake it as real
     D_real = add_discriminator(model,'concat_gsample2_n_img','disc2',True, True, '_real')
@@ -185,7 +184,7 @@ def configure_model(model):
     one.constant.num_neurons = '1'
     l = new_layer(model, 'g_adv2_bce', [D_real, one.name], 'sigmoid_binary_cross_entropy')
     l = new_layer(model, 'g_adv2_eval','g_adv2_bce', 'evaluation')
-    
+
     #************************************************
     #g_sample2= generator2(y) //train
     g_sample2 = add_generator(model,'image_data_dummy','gen2', 11, False,False,False,'_y')
@@ -256,4 +255,3 @@ def configure_model(model):
     # Export prototext
     with open(output_proto, 'w') as f:
         f.write(txtf.MessageToString(pb))
-    
diff --git a/model_zoo/models/jag/gan/cyclic/cyclic_gan_model.prototext b/model_zoo/models/jag/gan/cyclic/cyclic_gan_model.prototext
index 7a8f12f6cd3..235a00313d7 100644
--- a/model_zoo/models/jag/gan/cyclic/cyclic_gan_model.prototext
+++ b/model_zoo/models/jag/gan/cyclic/cyclic_gan_model.prototext
@@ -46,7 +46,6 @@ model {
   data_layout: "data_parallel"
   layer {
     input {
-      io_buffer: "partitioned"
       data_set_per_model: true
       target_mode: "N/A"
     }
diff --git a/model_zoo/models/jag/gan/cyclic/generate_model.py b/model_zoo/models/jag/gan/cyclic/generate_model.py
index 5c1e22c9c9d..1cd1032cd38 100644
--- a/model_zoo/models/jag/gan/cyclic/generate_model.py
+++ b/model_zoo/models/jag/gan/cyclic/generate_model.py
@@ -63,7 +63,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   l.weights = w1 + 'linearity'
 
   l = new_layer(model, relu1, fc1,'relu')
-  
+
 
   l = new_layer(model, fc2, relu1,'fully_connected')
   l.fully_connected.num_neurons = 16
@@ -73,7 +73,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w2 + 'linearity', 'he_normal_initializer')
   l.weights = w2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
 
   l = new_layer(model, fc3, relu2, 'fully_connected')
@@ -83,16 +83,16 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w3 + 'linearity', 'he_normal_initializer')
   l.weights = w3 + 'linearity'
-  return fc3 
+  return fc3
 
 
 #Generator
 #Weight frozen, no weight sharing
 #todo, handle weight sharing
 #@todo, use default weight/bias, adding weights cause bad thing to happen with LTFB except you add/transfer both w and b
-#@todo, generally automate manual editing made in debugging process 
+#@todo, generally automate manual editing made in debugging process
 def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropout=True, tag=''):
-  #different weights  
+  #different weights
   fc1 = prefix+'fc1'+tag
   fc2 = prefix+'fc2'+tag
   fc3 = prefix+'fc3'+tag
@@ -119,7 +119,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   l.freeze = freeze
   w = new_weights(model, fc2 + 'linearity', 'he_normal_initializer')
   l.weights = fc2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
   next_parent = relu2
   if(add_dropout):
@@ -133,7 +133,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   l.freeze = freeze
   w = new_weights(model, fc3 + 'linearity', 'he_normal_initializer')
   l.weights = fc3 + 'linearity'
-  
+
   l = new_layer(model, relu3, fc3, 'relu')
 
   l = new_layer(model, fc4, relu3, 'fully_connected')
@@ -152,8 +152,7 @@ def configure_model(model):
     #####INPUT DATA (including Slices)
     ### Input data comes from merge features of image (Y) and param (X)
     l = new_layer(model,'data',' ', 'input')
-    l.input.io_buffer = 'partitioned'
-    
+
     slice_points = [0,2500,2511]
     l = new_layer(model, 'slice_data','data', 'slice')
     l.children = 'image_data_dummy param_data_id'
@@ -172,8 +171,8 @@ def configure_model(model):
 
     #ID parameter data (X)
     l = new_layer(model,'param_data_id','slice_data','identity')
-   
-    # Forward Model 
+
+    # Forward Model
     #D_Loss1 branch
     #Fake path
     #def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropout=True, tag=''):
@@ -181,15 +180,15 @@ def configure_model(model):
     #forward generator x->y'
     #g_sample=generator1(x)
     g_sample = add_generator(model, 'param_data_id','gen1', 2500, False,True)
-     
+
     #True path (share weights with fake path discriminator)
     #discriminator(y,x)
     #data = y + x
     #def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, tag=''):
     #forward_model
     D_real = add_discriminator(model, 'data','d1',False, True, '_real')
-    
-    #CONCAT 
+
+    #CONCAT
     # Gsample + x
     #
     l = new_layer(model, 'concat_gsample_n_param','','concatenation')
@@ -197,17 +196,17 @@ def configure_model(model):
     l.children = 'd1_stop_gradient d2_dummy'
     #discriminator false path
     #question: how to deal with d1 weight sharing? //Dreal and Dfake weights are shared?
-    #And copied to discriminator (d2) on adversarial path at every iteration 
+    #And copied to discriminator (d2) on adversarial path at every iteration
     #discriminator(g_sample,x)
     #add stop gradient, so gradient doesnt go to generator on Dfake path
-    l = new_layer(model, 'd1_stop_gradient','concat_gsample_n_param', 'stop_gradient') 
+    l = new_layer(model, 'd1_stop_gradient','concat_gsample_n_param', 'stop_gradient')
     #D_fake = add_discriminator(model,'concat_gsample_n_param','disc1',False, False, '_fake')
     D_fake = add_discriminator(model,'d1_stop_gradient','d1',False, False, '_fake')
 
     #Objective term (and metric) layers here
     l = new_layer(model, 'disc1_real_bce', [D_real, one.name], 'sigmoid_binary_cross_entropy')
     l = new_layer(model, 'disc1_fake_bce', [D_fake, zero.name], 'sigmoid_binary_cross_entropy')
-    
+
     #Adversarial part
     #replicate discriminator (freeze it), weight will be copied through replace_layer callback, fake it as real
     #add identity/dummy layer that is a copy of concat
@@ -222,11 +221,11 @@ def configure_model(model):
     l = new_layer(model, 'gsample_minus_y', ' ', 'weighted_sum')
     l.parents = g_sample+' image_data_dummy'
     l.weighted_sum.scaling_factors = '1 -1'
- 
+
     l = new_layer(model,'l_l2_y', 'gsample_minus_y','l2_norm2')
 
     #####Inverse Model
-    
+
     #inverse generator y->x'
     #g_sample2=generator2(y)
     g_sample2 = add_generator(model, 'image_data_dummy','gen2', 11, False,False)
@@ -236,7 +235,7 @@ def configure_model(model):
     l.parents =  'param_data_id image_data_dummy'
     #l.children = ' '
     D_inv_real = add_discriminator(model, 'concat_param_n_img','d1_inv',False, True, '_real')
-    #CONCAT 
+    #CONCAT
     # Gsample2 (that is x') + y
     #
     l = new_layer(model, 'concat_gsample2_n_img','','concatenation')
@@ -244,7 +243,7 @@ def configure_model(model):
     l.children = 'd1_inv_stop_gradient d2_inv_dummy'
     #discriminator(g_sample2,y)
     #add stop gradient, so gradient doesnt go to generator on this path
-    l = new_layer(model, 'd1_inv_stop_gradient','concat_gsample2_n_img', 'stop_gradient') 
+    l = new_layer(model, 'd1_inv_stop_gradient','concat_gsample2_n_img', 'stop_gradient')
     D_inv_fake = add_discriminator(model,'d1_inv_stop_gradient','d1_inv',False, False, '_fake')
     #Objective term (and metric) layers here
     l = new_layer(model, 'disc1_inv_real_bce', [D_inv_real, one.name], 'sigmoid_binary_cross_entropy')
@@ -260,7 +259,7 @@ def configure_model(model):
     l = new_layer(model, 'gsample2_minus_x', ' ', 'weighted_sum')
     l.parents = g_sample2+' param_data_id'
     l.weighted_sum.scaling_factors = '1 -1'
- 
+
     l = new_layer(model,'l_l2_x', 'gsample2_minus_x','l2_norm2')
 
 if __name__ == "__main__":
@@ -296,4 +295,3 @@ def configure_model(model):
     # Export prototext
     with open(output_proto, 'w') as f:
         f.write(txtf.MessageToString(pb))
-    
diff --git a/model_zoo/models/jag/gan/vanilla/gan.prototext b/model_zoo/models/jag/gan/vanilla/gan.prototext
index df40cdcc1db..7bfa4e4d3d8 100644
--- a/model_zoo/models/jag/gan/vanilla/gan.prototext
+++ b/model_zoo/models/jag/gan/vanilla/gan.prototext
@@ -52,7 +52,6 @@ model {
   data_layout: "data_parallel"
   layer {
     input {
-      io_buffer: "partitioned"
       data_set_per_model: true
       target_mode: "N/A"
     }
diff --git a/model_zoo/models/jag/gan/vanilla/generate_gan.py b/model_zoo/models/jag/gan/vanilla/generate_gan.py
index 277f9063fe9..26eae2f3c1e 100644
--- a/model_zoo/models/jag/gan/vanilla/generate_gan.py
+++ b/model_zoo/models/jag/gan/vanilla/generate_gan.py
@@ -62,7 +62,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   l.weights = w1 + 'linearity'
 
   l = new_layer(model, relu1, fc1,'relu')
-  
+
 
   l = new_layer(model, fc2, relu1,'fully_connected')
   l.fully_connected.num_neurons = 16
@@ -71,7 +71,7 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w2 + 'linearity', 'he_normal_initializer')
   l.weights = w2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
 
   l = new_layer(model, fc3, relu2, 'fully_connected')
@@ -81,14 +81,14 @@ def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, t
   if(add_weight) :
     w = new_weights(model, w3 + 'linearity', 'he_normal_initializer')
   l.weights = w3 + 'linearity'
-  return fc3 
+  return fc3
 
 
 #Generator
 #Weight frozen, no weight sharing
 #todo, handle weight sharing
 def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropout=True, tag=''):
-  #different weights  
+  #different weights
   fc1 = prefix+'fc1'+tag
   fc2 = prefix+'fc2'+tag
   fc3 = prefix+'fc3'+tag
@@ -115,7 +115,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   l.freeze = freeze
   w = new_weights(model, fc2 + 'linearity', 'he_normal_initializer')
   l.weights = fc2 + 'linearity'
-  
+
   l = new_layer(model, relu2, fc2,'relu')
   next_parent = relu2
   if(add_dropout):
@@ -129,7 +129,7 @@ def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropou
   l.freeze = freeze
   w = new_weights(model, fc3 + 'linearity', 'he_normal_initializer')
   l.weights = fc3 + 'linearity'
-  
+
   l = new_layer(model, relu3, fc3, 'relu')
 
   l = new_layer(model, fc4, relu3, 'fully_connected')
@@ -148,8 +148,7 @@ def configure_model(model):
     #####INPUT DATA (including Slices)
     ### Input data comes from merge features of image (Y) and param (X)
     l = new_layer(model,'data',' ', 'input')
-    l.input.io_buffer = 'partitioned'
-    
+
     slice_points = [0,2500,2511]
     l = new_layer(model, 'slice_data','data', 'slice')
     l.children = 'image_data_dummy param_data_id'
@@ -168,20 +167,20 @@ def configure_model(model):
 
     #ID parameter data (X)
     l = new_layer(model,'param_data_id','slice_data','identity')
-    
+
     #D_Loss1 branch
     #Fake path
     #def add_generator(model, gen_input, prefix, output_dim, freeze=False, add_dropout=True, tag=''):
     #freeze generator = False
     #g_sample=generator1(x)
     g_sample = add_generator(model, 'param_data_id','gen1', 2500, False,True)
-    
+
     #True path (share weights with fake path discriminator)
     #discriminator(y,x)
     #data = y + x
     #def add_discriminator(model,disc_input, prefix, freeze=False, add_weight=True, tag=''):
     D_real = add_discriminator(model, 'data','d1',False, True, '_real')
-    #CONCAT 
+    #CONCAT
     # Gsample + x
     #
     l = new_layer(model, 'concat_gsample_n_param','','concatenation')
@@ -191,7 +190,7 @@ def configure_model(model):
     #question: how to deal with weight sharing?
     #discriminator(g_sample,x)
     #add stop gradient, so gradient doesnt go to generator on this path
-    l = new_layer(model, 'd1_stop_gradient','concat_gsample_n_param', 'stop_gradient') 
+    l = new_layer(model, 'd1_stop_gradient','concat_gsample_n_param', 'stop_gradient')
     #D_fake = add_discriminator(model,'concat_gsample_n_param','disc1',False, False, '_fake')
     D_fake = add_discriminator(model,'d1_stop_gradient','d1',False, False, '_fake')
 
@@ -212,7 +211,7 @@ def configure_model(model):
     #fake as real
     l = new_layer(model, 'g_adv1_bce', [D_real2, one.name], 'sigmoid_binary_cross_entropy')
     l = new_layer(model, 'g_adv1_eval','g_adv1_bce', 'evaluation')
-    
+
 
 if __name__ == "__main__":
 
@@ -247,4 +246,3 @@ def configure_model(model):
     # Export prototext
     with open(output_proto, 'w') as f:
         f.write(txtf.MessageToString(pb))
-    
diff --git a/model_zoo/models/jag/vae_fcn.prototext b/model_zoo/models/jag/vae_fcn.prototext
index 859e6503512..8f2528984df 100644
--- a/model_zoo/models/jag/vae_fcn.prototext
+++ b/model_zoo/models/jag/vae_fcn.prototext
@@ -66,22 +66,21 @@ model {
   layer {
     name: "input"
     children: "data dummy"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     input {
-      io_buffer: "distributed"
       target_mode: "reconstruction"
     }
   }
   layer {
     parents: "input"
     name: "data"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     split {}
   }
   layer {
     parents: "input"
     name: "dummy"
-    data_layout: "model_parallel"
+    data_layout: "data_parallel"
     dummy {}
   }
 
diff --git a/model_zoo/models/jag/wae.prototext b/model_zoo/models/jag/wae.prototext
index 5ab9cb5c0ad..f8edac45647 100644
--- a/model_zoo/models/jag/wae.prototext
+++ b/model_zoo/models/jag/wae.prototext
@@ -1,6 +1,6 @@
 model {
   random_init_models_differently: true
-  serialize_background_io: true
+  serialize_io: true
   objective_function {
     l2_weight_regularization {
       scale_factor: 0.0001
@@ -38,7 +38,6 @@ model {
   data_layout: "data_parallel"
   layer {
     input {
-      io_buffer: "partitioned"
       data_set_per_model: true
       target_mode: "N/A"
     }
diff --git a/model_zoo/models/jag/wae_cycle_gan/cycle_gan.prototext b/model_zoo/models/jag/wae_cycle_gan/cycle_gan.prototext
new file mode 100644
index 00000000000..69fbf816f53
--- /dev/null
+++ b/model_zoo/models/jag/wae_cycle_gan/cycle_gan.prototext
@@ -0,0 +1,1093 @@
+model {
+  name: "cycgan_model"
+  shareable_training_data_reader:false 
+  serialize_io: true
+  procs_per_trainer:0 
+  objective_function {
+    l2_weight_regularization {
+      scale_factor: 0.0001
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "disc1_real_bce"
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "disc1_fake_bce"
+    }
+    layer_term {
+      #scale_factor: 0.05
+      scale_factor: 0.01
+      layer: "g_adv1_bce"
+    }
+    layer_term {
+      #scale_factor: 0.025
+      scale_factor: 1.0
+      layer: "l_l2_y"
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "disc1_inv_real_bce"
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "disc1_inv_fake_bce"
+    }
+    layer_term {
+      #scale_factor: 0.05
+      scale_factor: 0.01
+      layer: "g_inv_adv1_bce"
+    }
+    layer_term {
+      #scale_factor: 0.025
+      scale_factor: 1.0
+      layer: "l_l2_x"
+    }
+    layer_term {
+      scale_factor: 0.1
+      layer: "L_cyc_x"
+    }
+  }
+  num_epochs: 40 
+  super_steps: 10
+  metric {
+    layer_metric {
+      name: "fw_latent_loss"
+      layer: "l_l2_y"
+    }
+  }
+  metric {
+    layer_metric {
+      name: "inv_l1_loss"
+      layer: "l_l2_x"
+    }
+  }
+  metric {
+    layer_metric {
+      name: "X_cyclic_loss"
+      layer: "L_cyc_x"
+    }
+  }
+  data_layout: "data_parallel"
+  layer {
+    input {
+      io_buffer: "partitioned"
+      data_set_per_model: true
+      target_mode: "N/A"
+    }
+    name: "data"
+    data_layout: "data_parallel"
+    parents: " "
+  }
+  layer {
+    name: "zero"
+    data_layout: "data_parallel"
+    constant {
+      value: 0.0
+      num_neurons: "1"
+    }
+  }
+  layer {
+    name: "one"
+    data_layout: "data_parallel"
+    constant {
+      value: 1.0
+      num_neurons: "1"
+    }
+  }
+  layer {
+    name: "slice_data"
+    data_layout: "data_parallel"
+    parents: "data"
+    #children: "image_data_dummy param_data_id"
+    children: "image_data_id param_data_id"
+    slice {
+      #slice_points: "0 2500 2511"
+      get_slice_points_from_reader: "independent"
+    }
+  }
+  layer {
+    identity {
+    }
+    #name: "image_data_dummy"
+    name: "image_data_id"
+    data_layout: "data_parallel"
+    parents: "slice_data"
+  }
+  layer {
+    identity {
+    }
+    name: "param_data_id"
+    data_layout: "data_parallel"
+    parents: "slice_data"
+  }
+########Data space end here
+  ###Encoder from WAE
+  #########################
+  layer {
+    fully_connected {
+      #num_neurons: 32
+      num_neurons: 1024
+      has_bias: true
+    }
+    name: "encodefc1"
+    data_layout: "data_parallel"
+    freeze: true
+    #weights: "encodefc1linearity"
+    parents: "image_data_id"
+  }
+  layer {
+    elu {
+    }
+    name: "encodeleaky_relu1"
+    data_layout: "data_parallel"
+    parents: "encodefc1"
+  }
+  layer {
+    parents: "encodeleaky_relu1"
+    name: "encodefc1_bn"
+    data_layout: "data_parallel"
+    freeze: true
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 256
+      has_bias: true
+    }
+    name: "encodefc2"
+    data_layout: "data_parallel"
+    freeze: true
+    #weights: "encodefc2linearity"
+    parents: "encodefc1_bn"
+  }
+  layer {
+    tanh {
+    }
+    name: "encodeleaky_relu2"
+    data_layout: "data_parallel"
+    parents: "encodefc2"
+  }
+  layer {
+    parents: "encodeleaky_relu2"
+    name: "encodefc2_bn"
+    data_layout: "data_parallel"
+    freeze: true
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 32
+      has_bias: true
+    }
+    name: "encodefc3"
+    data_layout: "data_parallel"
+    freeze: true
+    #weights: "encodefc3linearity"
+    parents: "encodefc2_bn"
+  }
+  layer {
+    tanh {
+    }
+    name: "encodeleaky_relu3"
+    data_layout: "data_parallel"
+    parents: "encodefc3"
+  }
+  layer {
+    parents: "encodeleaky_relu3"
+    name: "encodefc3_bn"
+    data_layout: "data_parallel"
+    freeze: true
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      #gen output is latent dim
+      num_neurons: 20
+      has_bias: true
+    }
+    #z_sample
+    name: "encodefc4"
+    data_layout: "data_parallel"
+    #weights: "encodefc4linearity"
+    freeze: true
+    parents: "encodefc3_bn"
+  }
+  #####################
+
+  layer {
+    parents: "encodefc4"
+    #name: "sample"
+    ###This is actually sample in latent space, call image_data_dummy for legacy
+    name: "image_data_dummy"
+    data_layout: "data_parallel"
+    identity {}
+  }
+  #####WAE Encoder ends here, sample feeds or replaces image data dummy
+  ###Generator starts here
+  layer {
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "gen1fc1"
+    data_layout: "data_parallel"
+    weights: "gen1fc1linearity gen1fc1bias"
+    parents: "param_data_id"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen1leaky_relu1"
+    data_layout: "data_parallel"
+    parents: "gen1fc1"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "gen1fc2"
+    data_layout: "data_parallel"
+    weights: "gen1fc2linearity gen1fc2bias"
+    parents: "gen1leaky_relu1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen1leaky_relu2"
+    data_layout: "data_parallel"
+    parents: "gen1fc2"
+  }
+  #layer {
+  #  dropout {
+  #    keep_prob: 0.8
+  #  }
+  #  name: "gen1dropout1"
+  #  data_layout: "data_parallel"
+  #  parents: "gen1leaky_relu2"
+  #}
+  layer {
+    fully_connected {
+      num_neurons: 2048
+      has_bias: true
+    }
+    name: "gen1fc3"
+    data_layout: "data_parallel"
+    weights: "gen1fc3linearity gen1fc3bias"
+    #parents: "gen1dropout1"
+    parents: "gen1leaky_relu2"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen1leaky_relu3"
+    data_layout: "data_parallel"
+    parents: "gen1fc3"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 2500
+      #get_slice_points_from_reader: "independent"
+      #get_num_neurons_of_slice_from_reader: [ 1 ]
+      #replace image_dim with latent_dim
+      num_neurons: 20
+      has_bias: true
+    }
+    name: "gen1fc4"
+    data_layout: "data_parallel"
+    weights: "gen1fc4linearity gen1fc4bias"
+    parents: "gen1leaky_relu3"
+  }
+  #concat latenty sample (image_data_dummy) and param
+  layer {
+    name: "concat_latent_sample_n_param"
+    data_layout: "data_parallel"
+    parents: "image_data_dummy param_data_id"
+    concatenation {
+    }
+  }
+  #####Discriminator
+  layer {
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "d1fc1_real"
+    data_layout: "data_parallel"
+    weights: "d1fc1linearity d1fc1bias"
+    #parents: "data"
+    parents: "concat_latent_sample_n_param"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1leaky_relu1_real"
+    data_layout: "data_parallel"
+    parents: "d1fc1_real"
+  }
+  layer {
+    parents: "d1leaky_relu1_real"
+    name: "d1fc1_real_bn"
+    weights: "d1fc1_w0 d1fc1_w1 d1fc1_w2 d1fc1_w3"
+    data_layout: "data_parallel"
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "d1fc2_real"
+    data_layout: "data_parallel"
+    weights: "d1fc2linearity d1fc2bias"
+    #parents: "d1leaky_relu1_real"
+    parents: "d1fc1_real_bn"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1leaky_relu2_real"
+    data_layout: "data_parallel"
+    parents: "d1fc2_real"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    name: "d1fc3_real"
+    data_layout: "data_parallel"
+    weights: "d1fc3linearity d1fc3bias"
+    parents: "d1leaky_relu2_real"
+  }
+  layer {
+    name: "concat_gsample_n_param"
+    data_layout: "data_parallel"
+    parents: "gen1fc4 param_data_id"
+    children: "d1_stop_gradient d2_dummy"
+    concatenation {
+    }
+  }
+  layer {
+    name: "d1_stop_gradient"
+    data_layout: "data_parallel"
+    parents: "concat_gsample_n_param"
+    stop_gradient {
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "d1fc1_fake"
+    data_layout: "data_parallel"
+    weights: "d1fc1linearity d1fc1bias"
+    parents: "d1_stop_gradient"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1leaky_relu1_fake"
+    data_layout: "data_parallel"
+    parents: "d1fc1_fake"
+  }
+  layer {
+    parents: "d1leaky_relu1_fake"
+    name: "d1fc1_fake_bn"
+    weights: "d1fc1_w0 d1fc1_w1 d1fc1_w2 d1fc1_w3"
+    data_layout: "data_parallel"
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "d1fc2_fake"
+    data_layout: "data_parallel"
+    weights: "d1fc2linearity d1fc2bias"
+    #parents: "d1leaky_relu1_fake"
+    parents: "d1fc1_fake_bn"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1leaky_relu2_fake"
+    data_layout: "data_parallel"
+    parents: "d1fc2_fake"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    name: "d1fc3_fake"
+    data_layout: "data_parallel"
+    weights: "d1fc3linearity d1fc3bias"
+    parents: "d1leaky_relu2_fake"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "disc1_real_bce"
+    data_layout: "data_parallel"
+    parents: "d1fc3_real one"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "disc1_fake_bce"
+    data_layout: "data_parallel"
+    parents: "d1fc3_fake zero"
+  }
+  layer {
+    identity {
+    }
+    name: "d2_dummy"
+    data_layout: "data_parallel"
+    parents: "concat_gsample_n_param"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "d2fc1"
+    data_layout: "data_parallel"
+    parents: "d2_dummy"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d2leaky_relu1"
+    data_layout: "data_parallel"
+    parents: "d2fc1"
+  }
+  layer {
+    parents: "d2leaky_relu1"
+    name: "d2fc1_bn"
+    freeze: true
+    data_layout: "data_parallel"
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "d2fc2"
+    data_layout: "data_parallel"
+    #parents: "d2leaky_relu1"
+    parents: "d2fc1_bn"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d2leaky_relu2"
+    data_layout: "data_parallel"
+    parents: "d2fc2"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    name: "d2fc3"
+    data_layout: "data_parallel"
+    parents: "d2leaky_relu2"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "g_adv1_bce"
+    data_layout: "data_parallel"
+    parents: "d2fc3 one"
+  }
+  #layer {
+  #  name: "gsample_minus_y"
+  #  data_layout: "data_parallel"
+  #  parents: "gen1fc4 image_data_dummy"
+  #  weighted_sum {
+  #    scaling_factors: "1 -1"
+  #  }
+  #}
+  layer {
+    name: "l_l2_y"
+    data_layout: "data_parallel"
+    #l2_norm2 {
+    #}
+    mean_absolute_error { }
+    #parents: "gsample_minus_y"
+    parents: "gen1fc4 image_data_dummy"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 64
+      num_neurons: 16
+      has_bias: true
+    }
+    name: "gen2fc1"
+    data_layout: "data_parallel"
+    weights: "gen2fc1linearity gen2fc1bias"
+    parents: "image_data_dummy"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu1"
+    data_layout: "data_parallel"
+    parents: "gen2fc1"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 512
+      num_neurons: 128
+      has_bias: true
+    }
+    name: "gen2fc2"
+    data_layout: "data_parallel"
+    weights: "gen2fc2linearity gen2fc2bias"
+    parents: "gen2leaky_relu1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu2"
+    data_layout: "data_parallel"
+    parents: "gen2fc2"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 2048
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "gen2fc3"
+    data_layout: "data_parallel"
+    weights: "gen2fc3linearity gen2fc3bias"
+    parents: "gen2leaky_relu2"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu3"
+    data_layout: "data_parallel"
+    parents: "gen2fc3"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 11
+      get_slice_points_from_reader: "independent"
+      get_num_neurons_of_slice_from_reader: [ 2 ]
+      has_bias: true
+    }
+    name: "gen2fc4"
+    data_layout: "data_parallel"
+    weights: "gen2fc4linearity gen2fc4bias"
+    parents: "gen2leaky_relu3"
+  }
+  layer {
+    name: "concat_param_n_img"
+    data_layout: "data_parallel"
+    parents: "param_data_id image_data_dummy"
+    concatenation {
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "d1_invfc1_real"
+    data_layout: "data_parallel"
+    weights: "d1_invfc1linearity d1_invfc1bias"
+    parents: "concat_param_n_img"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1_invleaky_relu1_real"
+    data_layout: "data_parallel"
+    parents: "d1_invfc1_real"
+  }
+  layer {
+    parents: "d1_invleaky_relu1_real"
+    name: "d1invfc1_real_bn"
+    weights: "d1invfc1_w0 d1invfc1_w1 d1invfc1_w2 d1invfc1_w3"
+    data_layout: "data_parallel"
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "d1_invfc2_real"
+    data_layout: "data_parallel"
+    weights: "d1_invfc2linearity d1_invfc2bias"
+    #parents: "d1_invleaky_relu1_real"
+    parents: "d1invfc1_real_bn"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1_invleaky_relu2_real"
+    data_layout: "data_parallel"
+    parents: "d1_invfc2_real"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    name: "d1_invfc3_real"
+    data_layout: "data_parallel"
+    weights: "d1_invfc3linearity d1_invfc3bias"
+    parents: "d1_invleaky_relu2_real"
+  }
+  layer {
+    name: "concat_gsample2_n_img"
+    data_layout: "data_parallel"
+    parents: "gen2fc4 image_data_dummy"
+    children: "d1_inv_stop_gradient d2_inv_dummy"
+    concatenation {
+    }
+  }
+  layer {
+    name: "d1_inv_stop_gradient"
+    data_layout: "data_parallel"
+    parents: "concat_gsample2_n_img"
+    stop_gradient {
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "d1_invfc1_fake"
+    data_layout: "data_parallel"
+    weights: "d1_invfc1linearity d1_invfc1bias"
+    parents: "d1_inv_stop_gradient"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1_invleaky_relu1_fake"
+    data_layout: "data_parallel"
+    parents: "d1_invfc1_fake"
+  }
+  layer {
+    parents: "d1_invleaky_relu1_fake"
+    name: "d1invfc1_fake_bn"
+    weights: "d1invfc1_w0 d1invfc1_w1 d1invfc1_w2 d1invfc1_w3"
+    data_layout: "data_parallel"
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "d1_invfc2_fake"
+    data_layout: "data_parallel"
+    weights: "d1_invfc2linearity d1_invfc2bias"
+    #parents: "d1_invleaky_relu1_fake"
+    parents: "d1invfc1_fake_bn"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1_invleaky_relu2_fake"
+    data_layout: "data_parallel"
+    parents: "d1_invfc2_fake"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    name: "d1_invfc3_fake"
+    data_layout: "data_parallel"
+    weights: "d1_invfc3linearity d1_invfc3bias"
+    parents: "d1_invleaky_relu2_fake"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "disc1_inv_real_bce"
+    data_layout: "data_parallel"
+    parents: "d1_invfc3_real one"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "disc1_inv_fake_bce"
+    data_layout: "data_parallel"
+    parents: "d1_invfc3_fake zero"
+  }
+  layer {
+    identity {
+    }
+    name: "d2_inv_dummy"
+    data_layout: "data_parallel"
+    parents: "concat_gsample2_n_img"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "d2_invfc1"
+    data_layout: "data_parallel"
+    parents: "d2_inv_dummy"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d2_invleaky_relu1"
+    data_layout: "data_parallel"
+    parents: "d2_invfc1"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "d2_invfc2"
+    data_layout: "data_parallel"
+    parents: "d2_invleaky_relu1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d2_invleaky_relu2"
+    data_layout: "data_parallel"
+    parents: "d2_invfc2"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    name: "d2_invfc3"
+    data_layout: "data_parallel"
+    parents: "d2_invleaky_relu2"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "g_inv_adv1_bce"
+    data_layout: "data_parallel"
+    parents: "d2_invfc3 one"
+  }
+  #layer {
+  #  name: "gsample2_minus_x"
+  #  data_layout: "data_parallel"
+  #  parents: "gen2fc4 param_data_id"
+  #  weighted_sum {
+  #    scaling_factors: "1 -1"
+  #  }
+  #}
+  #@todo: replace with mean abs layer
+  layer {
+    name: "l_l2_x"
+    data_layout: "data_parallel"
+    #l2_norm2 {
+    #}
+    mean_absolute_error{
+    }
+    #parents: "gsample2_minus_x"
+    parents: "gen2fc4 param_data_id"
+  }
+  weights {
+    name: "gen1fc1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen1fc1bias"
+  }
+  weights {
+    name: "gen1fc2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen1fc2bias"
+  }
+  weights {
+    name: "gen1fc3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen1fc3bias"
+  }
+  weights {
+    name: "gen1fc4linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen1fc4bias"
+  }
+  weights {
+    name: "d1fc1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "d1fc1bias"
+  }
+  weights {
+    name: "d1fc2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "d1fc2bias"
+  }
+  weights {
+    name: "d1fc3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "d1fc3bias"
+  }
+  weights {
+    name: "gen2fc1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen2fc2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen2fc3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen2fc4linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen2fc1bias"
+  }
+  weights {
+    name: "gen2fc2bias"
+  }
+  weights {
+    name: "gen2fc3bias"
+  }
+  weights {
+    name: "gen2fc4bias"
+  }
+  weights {
+    name: "d1_invfc1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "d1_invfc1bias"
+  }
+  weights {
+    name: "d1_invfc2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "d1_invfc2bias"
+  }
+  weights {
+    name: "d1_invfc3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "d1_invfc3bias"
+  }
+  ###Weights for batch norm
+  weights {
+     name: "d1fc1_w0"
+  }
+  weights {
+     name: "d1fc1_w1"
+  }
+  weights {
+     name: "d1fc1_w2"
+  }
+  weights {
+     name: "d1fc1_w3"
+  }
+  weights {
+     name: "d1invfc1_w0"
+  }
+  weights {
+     name: "d1invfc1_w1"
+  }
+  weights {
+     name: "d1invfc1_w2"
+  }
+  weights {
+     name: "d1invfc1_w3"
+  }
+  mini_batch_size: 128
+  callback {
+    print {
+      interval: 10
+    }
+  }
+  callback {
+    timer {
+    }
+  }
+  callback { gpu_memory_usage {} }
+  #callback { debug {} }
+  #callback {
+  #  summary {
+  #    dir: "."
+  #    mat_interval: 25
+  #  }
+  #}
+  callback {
+    replace_weights {
+      source_layers: "d1fc1_real d1fc2_real d1fc3_real d1_invfc1_real d1_invfc2_real d1_invfc3_real d1fc1_real_bn"
+      destination_layers: "d2fc1 d2fc2 d2fc3 d2_invfc1 d2_invfc2 d2_invfc3 d2fc1_bn"
+      batch_interval: 1
+    }
+  }
+  #callback {
+  #  ltfb {
+  #    round_size: 100
+  #    eval_metrics: "l_l2_y_eval"
+  #    increasing_metric_mode: false
+  #    weights_tosend: "gen1fc1linearity gen1fc1bias gen1fc2linearity gen1fc2bias gen1fc3linearity gen1fc3bias gen1fc4linearity gen1fc4bias gen2fc1_linearity_weights gen2fc1_bias_weights gen2fc2_linearity_weights gen2fc2_bias_weights gen2fc3_linearity_weights gen2fc3_bias_weights gen2fc4_linearity_weights gen2fc4_bias_weights"
+
+   # }
+ # }
+  block_size: 256
+  ####For metric, loss per individual sample
+  layer {
+    name: "fw_latent_loss"
+    data_layout: "data_parallel"
+    parents: "param_data_id encodefc4 l_l2_x l_l2_y L_cyc_x"
+    concatenation {
+    }
+  }
+
+  #callback {
+  #  dump_outputs {
+      #directory :"fw_latent_loss/"
+  #    batch_interval: 782
+  #    layers: "fw_latent_loss"
+  #    execution_modes: "train test"
+  #    format: "npy"
+  #  }
+  #}
+  callback { save_model { dir: "model" } }
+  ##########X cyclic loss, input to this path is Y_fake (gen1fc4) from fw model
+  #### Shares weight with path that takes real/encoder (latent) image
+  layer {
+    fully_connected {
+      #num_neurons: 64
+      num_neurons: 16
+      has_bias: true
+    }
+    name: "gen2fc1_cyclic"
+    data_layout: "data_parallel"
+    weights: "gen2fc1linearity gen2fc1bias"
+    parents: "gen1fc4"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu1_cyclic"
+    data_layout: "data_parallel"
+    parents: "gen2fc1_cyclic"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 512
+      num_neurons: 128
+      has_bias: true
+    }
+    name: "gen2fc2_cyclic"
+    data_layout: "data_parallel"
+    weights: "gen2fc2linearity gen2fc2bias"
+    parents: "gen2leaky_relu1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu2_cyclic"
+    data_layout: "data_parallel"
+    parents: "gen2fc2_cyclic"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 2048
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "gen2fc3_cyclic"
+    data_layout: "data_parallel"
+    weights: "gen2fc3linearity gen2fc3bias"
+    parents: "gen2leaky_relu2_cyclic"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu3_cyclic"
+    data_layout: "data_parallel"
+    parents: "gen2fc3_cyclic"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 11
+      get_slice_points_from_reader: "independent"
+      get_num_neurons_of_slice_from_reader: [ 2 ]
+      has_bias: true
+    }
+    name: "gen2fc4_cyclic"
+    data_layout: "data_parallel"
+    weights: "gen2fc4linearity gen2fc4bias"
+    parents: "gen2leaky_relu3_cyclic"
+  }
+  layer {
+    name: "L_cyc_x"
+    data_layout: "data_parallel"
+    mean_absolute_error{
+    }
+    parents: "gen2fc4_cyclic param_data_id"
+  }
+}
diff --git a/model_zoo/models/jag/wae_cycle_gan/cycle_gan_only.prototext b/model_zoo/models/jag/wae_cycle_gan/cycle_gan_only.prototext
new file mode 100644
index 00000000000..6c840d48da9
--- /dev/null
+++ b/model_zoo/models/jag/wae_cycle_gan/cycle_gan_only.prototext
@@ -0,0 +1,954 @@
+model {
+  name: "cycgan_model"
+  shareable_training_data_reader:false 
+  serialize_io: true
+  procs_per_trainer:0 
+  objective_function {
+    l2_weight_regularization {
+      scale_factor: 0.0001
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "disc1_real_bce"
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "disc1_fake_bce"
+    }
+    layer_term {
+      #scale_factor: 0.05
+      scale_factor: 0.01
+      layer: "g_adv1_bce"
+    }
+    layer_term {
+      #scale_factor: 0.025
+      scale_factor: 1.0
+      layer: "l_l2_y"
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "disc1_inv_real_bce"
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "disc1_inv_fake_bce"
+    }
+    layer_term {
+      #scale_factor: 0.05
+      scale_factor: 0.01
+      layer: "g_inv_adv1_bce"
+    }
+    layer_term {
+      #scale_factor: 0.025
+      scale_factor: 1.0
+      layer: "l_l2_x"
+    }
+    layer_term {
+      scale_factor: 0.1
+      layer: "L_cyc_x"
+    }
+  }
+  num_epochs: 40 
+  super_steps: 10
+  metric {
+    layer_metric {
+      name: "fw_latent_loss"
+      layer: "l_l2_y"
+    }
+  }
+  metric {
+    layer_metric {
+      name: "inv_l1_loss"
+      layer: "l_l2_x"
+    }
+  }
+  metric {
+    layer_metric {
+      name: "X_cyclic_loss"
+      layer: "L_cyc_x"
+    }
+  }
+  data_layout: "data_parallel"
+  layer {
+    input {
+      io_buffer: "partitioned"
+      data_set_per_model: true
+      target_mode: "N/A"
+    }
+    name: "data"
+    data_layout: "data_parallel"
+    parents: " "
+  }
+  layer {
+    name: "zero"
+    data_layout: "data_parallel"
+    constant {
+      value: 0.0
+      num_neurons: "1"
+    }
+  }
+  layer {
+    name: "one"
+    data_layout: "data_parallel"
+    constant {
+      value: 1.0
+      num_neurons: "1"
+    }
+  }
+  layer {
+    name: "slice_data"
+    data_layout: "data_parallel"
+    parents: "data"
+    children: "image_data_dummy param_data_id"
+    #children: "image_data_id param_data_id"
+    slice {
+      #slice_points: "0 2500 2511"
+      get_slice_points_from_reader: "independent"
+    }
+  }
+  layer {
+    identity {
+    }
+    name: "image_data_dummy"
+    #name: "image_data_id"
+    data_layout: "data_parallel"
+    parents: "slice_data"
+  }
+  layer {
+    identity {
+    }
+    name: "param_data_id"
+    data_layout: "data_parallel"
+    parents: "slice_data"
+  }
+########Data space end here
+  ###Generator starts here
+  layer {
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "gen1fc1"
+    data_layout: "data_parallel"
+    weights: "gen1fc1linearity gen1fc1bias"
+    parents: "param_data_id"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen1leaky_relu1"
+    data_layout: "data_parallel"
+    parents: "gen1fc1"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "gen1fc2"
+    data_layout: "data_parallel"
+    weights: "gen1fc2linearity gen1fc2bias"
+    parents: "gen1leaky_relu1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen1leaky_relu2"
+    data_layout: "data_parallel"
+    parents: "gen1fc2"
+  }
+  #layer {
+  #  dropout {
+  #    keep_prob: 0.8
+  #  }
+  #  name: "gen1dropout1"
+  #  data_layout: "data_parallel"
+  #  parents: "gen1leaky_relu2"
+  #}
+  layer {
+    fully_connected {
+      num_neurons: 2048
+      has_bias: true
+    }
+    name: "gen1fc3"
+    data_layout: "data_parallel"
+    weights: "gen1fc3linearity gen1fc3bias"
+    #parents: "gen1dropout1"
+    parents: "gen1leaky_relu2"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen1leaky_relu3"
+    data_layout: "data_parallel"
+    parents: "gen1fc3"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 2500
+      get_slice_points_from_reader: "independent"
+      get_num_neurons_of_slice_from_reader: [ 1 ]
+      #replace image_dim with latent_dim
+      #num_neurons: 20
+      has_bias: true
+    }
+    name: "gen1fc4"
+    data_layout: "data_parallel"
+    weights: "gen1fc4linearity gen1fc4bias"
+    parents: "gen1leaky_relu3"
+  }
+  #concat latenty sample (image_data_dummy) and param
+  layer {
+    name: "concat_latent_sample_n_param"
+    data_layout: "data_parallel"
+    parents: "image_data_dummy param_data_id"
+    concatenation {
+    }
+  }
+  #####Discriminator
+  layer {
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "d1fc1_real"
+    data_layout: "data_parallel"
+    weights: "d1fc1linearity d1fc1bias"
+    #parents: "data"
+    parents: "concat_latent_sample_n_param"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1leaky_relu1_real"
+    data_layout: "data_parallel"
+    parents: "d1fc1_real"
+  }
+  layer {
+    parents: "d1leaky_relu1_real"
+    name: "d1fc1_real_bn"
+    weights: "d1fc1_w0 d1fc1_w1 d1fc1_w2 d1fc1_w3"
+    data_layout: "data_parallel"
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "d1fc2_real"
+    data_layout: "data_parallel"
+    weights: "d1fc2linearity d1fc2bias"
+    #parents: "d1leaky_relu1_real"
+    parents: "d1fc1_real_bn"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1leaky_relu2_real"
+    data_layout: "data_parallel"
+    parents: "d1fc2_real"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    name: "d1fc3_real"
+    data_layout: "data_parallel"
+    weights: "d1fc3linearity d1fc3bias"
+    parents: "d1leaky_relu2_real"
+  }
+  layer {
+    name: "concat_gsample_n_param"
+    data_layout: "data_parallel"
+    parents: "gen1fc4 param_data_id"
+    children: "d1_stop_gradient d2_dummy"
+    concatenation {
+    }
+  }
+  layer {
+    name: "d1_stop_gradient"
+    data_layout: "data_parallel"
+    parents: "concat_gsample_n_param"
+    stop_gradient {
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "d1fc1_fake"
+    data_layout: "data_parallel"
+    weights: "d1fc1linearity d1fc1bias"
+    parents: "d1_stop_gradient"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1leaky_relu1_fake"
+    data_layout: "data_parallel"
+    parents: "d1fc1_fake"
+  }
+  layer {
+    parents: "d1leaky_relu1_fake"
+    name: "d1fc1_fake_bn"
+    weights: "d1fc1_w0 d1fc1_w1 d1fc1_w2 d1fc1_w3"
+    data_layout: "data_parallel"
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "d1fc2_fake"
+    data_layout: "data_parallel"
+    weights: "d1fc2linearity d1fc2bias"
+    #parents: "d1leaky_relu1_fake"
+    parents: "d1fc1_fake_bn"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1leaky_relu2_fake"
+    data_layout: "data_parallel"
+    parents: "d1fc2_fake"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    name: "d1fc3_fake"
+    data_layout: "data_parallel"
+    weights: "d1fc3linearity d1fc3bias"
+    parents: "d1leaky_relu2_fake"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "disc1_real_bce"
+    data_layout: "data_parallel"
+    parents: "d1fc3_real one"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "disc1_fake_bce"
+    data_layout: "data_parallel"
+    parents: "d1fc3_fake zero"
+  }
+  layer {
+    identity {
+    }
+    name: "d2_dummy"
+    data_layout: "data_parallel"
+    parents: "concat_gsample_n_param"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "d2fc1"
+    data_layout: "data_parallel"
+    parents: "d2_dummy"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d2leaky_relu1"
+    data_layout: "data_parallel"
+    parents: "d2fc1"
+  }
+  layer {
+    parents: "d2leaky_relu1"
+    name: "d2fc1_bn"
+    freeze: true
+    data_layout: "data_parallel"
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "d2fc2"
+    data_layout: "data_parallel"
+    #parents: "d2leaky_relu1"
+    parents: "d2fc1_bn"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d2leaky_relu2"
+    data_layout: "data_parallel"
+    parents: "d2fc2"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    name: "d2fc3"
+    data_layout: "data_parallel"
+    parents: "d2leaky_relu2"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "g_adv1_bce"
+    data_layout: "data_parallel"
+    parents: "d2fc3 one"
+  }
+  layer {
+    name: "l_l2_y"
+    data_layout: "data_parallel"
+    mean_absolute_error {
+    }
+    parents: "gen1fc4 image_data_dummy"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 64
+      num_neurons: 16
+      has_bias: true
+    }
+    name: "gen2fc1"
+    data_layout: "data_parallel"
+    weights: "gen2fc1linearity gen2fc1bias"
+    parents: "image_data_dummy"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu1"
+    data_layout: "data_parallel"
+    parents: "gen2fc1"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 512
+      num_neurons: 128
+      has_bias: true
+    }
+    name: "gen2fc2"
+    data_layout: "data_parallel"
+    weights: "gen2fc2linearity gen2fc2bias"
+    parents: "gen2leaky_relu1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu2"
+    data_layout: "data_parallel"
+    parents: "gen2fc2"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 2048
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "gen2fc3"
+    data_layout: "data_parallel"
+    weights: "gen2fc3linearity gen2fc3bias"
+    parents: "gen2leaky_relu2"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu3"
+    data_layout: "data_parallel"
+    parents: "gen2fc3"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 11
+      get_slice_points_from_reader: "independent"
+      get_num_neurons_of_slice_from_reader: [ 2 ]
+      has_bias: true
+    }
+    name: "gen2fc4"
+    data_layout: "data_parallel"
+    weights: "gen2fc4linearity gen2fc4bias"
+    parents: "gen2leaky_relu3"
+  }
+  layer {
+    name: "concat_param_n_img"
+    data_layout: "data_parallel"
+    parents: "param_data_id image_data_dummy"
+    concatenation {
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "d1_invfc1_real"
+    data_layout: "data_parallel"
+    weights: "d1_invfc1linearity d1_invfc1bias"
+    parents: "concat_param_n_img"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1_invleaky_relu1_real"
+    data_layout: "data_parallel"
+    parents: "d1_invfc1_real"
+  }
+  layer {
+    parents: "d1_invleaky_relu1_real"
+    name: "d1invfc1_real_bn"
+    weights: "d1invfc1_w0 d1invfc1_w1 d1invfc1_w2 d1invfc1_w3"
+    data_layout: "data_parallel"
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "d1_invfc2_real"
+    data_layout: "data_parallel"
+    weights: "d1_invfc2linearity d1_invfc2bias"
+    #parents: "d1_invleaky_relu1_real"
+    parents: "d1invfc1_real_bn"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1_invleaky_relu2_real"
+    data_layout: "data_parallel"
+    parents: "d1_invfc2_real"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    name: "d1_invfc3_real"
+    data_layout: "data_parallel"
+    weights: "d1_invfc3linearity d1_invfc3bias"
+    parents: "d1_invleaky_relu2_real"
+  }
+  layer {
+    name: "concat_gsample2_n_img"
+    data_layout: "data_parallel"
+    parents: "gen2fc4 image_data_dummy"
+    children: "d1_inv_stop_gradient d2_inv_dummy"
+    concatenation {
+    }
+  }
+  layer {
+    name: "d1_inv_stop_gradient"
+    data_layout: "data_parallel"
+    parents: "concat_gsample2_n_img"
+    stop_gradient {
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "d1_invfc1_fake"
+    data_layout: "data_parallel"
+    weights: "d1_invfc1linearity d1_invfc1bias"
+    parents: "d1_inv_stop_gradient"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1_invleaky_relu1_fake"
+    data_layout: "data_parallel"
+    parents: "d1_invfc1_fake"
+  }
+  layer {
+    parents: "d1_invleaky_relu1_fake"
+    name: "d1invfc1_fake_bn"
+    weights: "d1invfc1_w0 d1invfc1_w1 d1invfc1_w2 d1invfc1_w3"
+    data_layout: "data_parallel"
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "d1_invfc2_fake"
+    data_layout: "data_parallel"
+    weights: "d1_invfc2linearity d1_invfc2bias"
+    #parents: "d1_invleaky_relu1_fake"
+    parents: "d1invfc1_fake_bn"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d1_invleaky_relu2_fake"
+    data_layout: "data_parallel"
+    parents: "d1_invfc2_fake"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    name: "d1_invfc3_fake"
+    data_layout: "data_parallel"
+    weights: "d1_invfc3linearity d1_invfc3bias"
+    parents: "d1_invleaky_relu2_fake"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "disc1_inv_real_bce"
+    data_layout: "data_parallel"
+    parents: "d1_invfc3_real one"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "disc1_inv_fake_bce"
+    data_layout: "data_parallel"
+    parents: "d1_invfc3_fake zero"
+  }
+  layer {
+    identity {
+    }
+    name: "d2_inv_dummy"
+    data_layout: "data_parallel"
+    parents: "concat_gsample2_n_img"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "d2_invfc1"
+    data_layout: "data_parallel"
+    parents: "d2_inv_dummy"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d2_invleaky_relu1"
+    data_layout: "data_parallel"
+    parents: "d2_invfc1"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "d2_invfc2"
+    data_layout: "data_parallel"
+    parents: "d2_invleaky_relu1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "d2_invleaky_relu2"
+    data_layout: "data_parallel"
+    parents: "d2_invfc2"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    name: "d2_invfc3"
+    data_layout: "data_parallel"
+    parents: "d2_invleaky_relu2"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "g_inv_adv1_bce"
+    data_layout: "data_parallel"
+    parents: "d2_invfc3 one"
+  }
+  layer {
+    name: "l_l2_x"
+    data_layout: "data_parallel"
+    mean_absolute_error{
+    }
+    parents: "gen2fc4 param_data_id"
+  }
+  weights {
+    name: "gen1fc1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen1fc1bias"
+  }
+  weights {
+    name: "gen1fc2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen1fc2bias"
+  }
+  weights {
+    name: "gen1fc3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen1fc3bias"
+  }
+  weights {
+    name: "gen1fc4linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen1fc4bias"
+  }
+  weights {
+    name: "d1fc1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "d1fc1bias"
+  }
+  weights {
+    name: "d1fc2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "d1fc2bias"
+  }
+  weights {
+    name: "d1fc3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "d1fc3bias"
+  }
+  weights {
+    name: "gen2fc1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen2fc2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen2fc3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen2fc4linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen2fc1bias"
+  }
+  weights {
+    name: "gen2fc2bias"
+  }
+  weights {
+    name: "gen2fc3bias"
+  }
+  weights {
+    name: "gen2fc4bias"
+  }
+  weights {
+    name: "d1_invfc1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "d1_invfc1bias"
+  }
+  weights {
+    name: "d1_invfc2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "d1_invfc2bias"
+  }
+  weights {
+    name: "d1_invfc3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "d1_invfc3bias"
+  }
+  ###Weights for batch norm
+  weights {
+     name: "d1fc1_w0"
+  }
+  weights {
+     name: "d1fc1_w1"
+  }
+  weights {
+     name: "d1fc1_w2"
+  }
+  weights {
+     name: "d1fc1_w3"
+  }
+  weights {
+     name: "d1invfc1_w0"
+  }
+  weights {
+     name: "d1invfc1_w1"
+  }
+  weights {
+     name: "d1invfc1_w2"
+  }
+  weights {
+     name: "d1invfc1_w3"
+  }
+  mini_batch_size: 128
+  callback {
+    print {
+      interval: 10
+    }
+  }
+  callback {
+    timer {
+    }
+  }
+  callback { gpu_memory_usage {} }
+  #callback { debug {} }
+  #callback {
+  #  summary {
+  #    dir: "."
+  #    mat_interval: 25
+  #  }
+  #}
+  callback {
+    replace_weights {
+      source_layers: "d1fc1_real d1fc2_real d1fc3_real d1_invfc1_real d1_invfc2_real d1_invfc3_real d1fc1_real_bn"
+      destination_layers: "d2fc1 d2fc2 d2fc3 d2_invfc1 d2_invfc2 d2_invfc3 d2fc1_bn"
+      batch_interval: 1
+    }
+  }
+  #callback {
+  #  ltfb {
+  #    round_size: 100
+  #    eval_metrics: "l_l2_y_eval"
+  #    increasing_metric_mode: false
+  #    weights_tosend: "gen1fc1linearity gen1fc1bias gen1fc2linearity gen1fc2bias gen1fc3linearity gen1fc3bias gen1fc4linearity gen1fc4bias gen2fc1_linearity_weights gen2fc1_bias_weights gen2fc2_linearity_weights gen2fc2_bias_weights gen2fc3_linearity_weights gen2fc3_bias_weights gen2fc4_linearity_weights gen2fc4_bias_weights"
+
+   # }
+ # }
+  block_size: 256
+  ####For metric, loss per individual sample
+  layer {
+    name: "fw_latent_loss"
+    data_layout: "data_parallel"
+    parents: "param_data_id l_l2_x l_l2_y L_cyc_x"
+    concatenation {
+    }
+  }
+
+  #callback { save_model { dir: "model" } }
+  ##########X cyclic loss, input to this path is Y_fake (gen1fc4) from fw model
+  #### Shares weight with path that takes real/encoder (latent) image
+  layer {
+    fully_connected {
+      #num_neurons: 64
+      num_neurons: 16
+      has_bias: true
+    }
+    name: "gen2fc1_cyclic"
+    data_layout: "data_parallel"
+    weights: "gen2fc1linearity gen2fc1bias"
+    parents: "gen1fc4"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu1_cyclic"
+    data_layout: "data_parallel"
+    parents: "gen2fc1_cyclic"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 512
+      num_neurons: 128
+      has_bias: true
+    }
+    name: "gen2fc2_cyclic"
+    data_layout: "data_parallel"
+    weights: "gen2fc2linearity gen2fc2bias"
+    parents: "gen2leaky_relu1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu2_cyclic"
+    data_layout: "data_parallel"
+    parents: "gen2fc2_cyclic"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 2048
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "gen2fc3_cyclic"
+    data_layout: "data_parallel"
+    weights: "gen2fc3linearity gen2fc3bias"
+    parents: "gen2leaky_relu2_cyclic"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu3_cyclic"
+    data_layout: "data_parallel"
+    parents: "gen2fc3_cyclic"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 11
+      get_slice_points_from_reader: "independent"
+      get_num_neurons_of_slice_from_reader: [ 2 ]
+      has_bias: true
+    }
+    name: "gen2fc4_cyclic"
+    data_layout: "data_parallel"
+    weights: "gen2fc4linearity gen2fc4bias"
+    parents: "gen2leaky_relu3_cyclic"
+  }
+  layer {
+    name: "L_cyc_x"
+    data_layout: "data_parallel"
+    mean_absolute_error{
+    }
+    parents: "gen2fc4_cyclic param_data_id"
+  }
+}
diff --git a/model_zoo/models/jag/wae_cycle_gan/wae.prototext b/model_zoo/models/jag/wae_cycle_gan/wae.prototext
new file mode 100644
index 00000000000..cf8265e7019
--- /dev/null
+++ b/model_zoo/models/jag/wae_cycle_gan/wae.prototext
@@ -0,0 +1,734 @@
+model {
+  random_init_models_differently: true
+  serialize_io: true
+  name: "wae_model"
+  objective_function {
+    l2_weight_regularization {
+      scale_factor: 0.0001
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "disc1_real_bce"
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "disc1_fake_bce"
+    }
+    layer_term {
+      #lam = 0.01
+      scale_factor: 0.01
+      layer: "g_adv1_bce"
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "img_loss"
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "rec_error"
+    }
+  }
+  metric {
+    layer_metric {
+      name: "wae_loss"
+      layer: "img_loss"
+    }
+  }
+  num_epochs: 40
+  data_layout: "data_parallel"
+  layer {
+    input {
+      io_buffer: "partitioned"
+      data_set_per_model: true
+      target_mode: "N/A"
+    }
+    name: "data1"
+    data_layout: "data_parallel"
+  }
+  #z or sample_z
+  #@todo z = -1+2*np.random.rand(batch_size, zdim=20)
+  layer {
+    name: "sample_z"
+    data_layout: "data_parallel"
+    gaussian {
+      mean: 0.0
+      stdev: 1.0
+      neuron_dims: "20"
+    }
+  }
+
+  layer {
+    name: "zero"
+    data_layout: "data_parallel"
+    constant {
+      value: 0.0
+      num_neurons: "1"
+    }
+  }
+  layer {
+    name: "one"
+    data_layout: "data_parallel"
+    constant {
+      value: 1.0
+      num_neurons: "1"
+    }
+  }
+
+  layer {
+    name: "slice_data"
+    data_layout: "data_parallel"
+    parents: "data1"
+    children: "image_data_dummy param_data_id"
+    slice {
+      get_slice_points_from_reader: "independent"
+    }
+  }
+  layer {
+    identity {
+    }
+    name: "image_data_dummy"
+    data_layout: "data_parallel"
+    parents: "slice_data"
+  }
+  layer {
+    identity {
+    }
+    name: "param_data_id"
+    data_layout: "data_parallel"
+    parents: "slice_data"
+  }
+
+  #concate image data with sample_z
+  layer {
+    name: "concat_y_n_samplez"
+    data_layout: "data_parallel"
+    parents: "image_data_dummy sample_z"
+    concatenation {
+    }
+  }
+
+  ###generator == encoder
+  layer {
+    fully_connected {
+      #num_neurons: 32
+      num_neurons: 1024
+      has_bias: true
+    }
+    name: "encodefc1"
+    data_layout: "data_parallel"
+    #weights: "encodefc1linearity"
+    parents: "image_data_dummy"
+  }
+  layer {
+    elu {
+    }
+    name: "encodeleaky_relu1"
+    data_layout: "data_parallel"
+    parents: "encodefc1"
+  }
+  layer {
+    parents: "encodeleaky_relu1"
+    name: "encodefc1_bn"
+    data_layout: "data_parallel"
+    batch_normalization {
+      #decay: 0.99
+      #scale_init: 1.0
+      #bias_init: 0.0
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 256
+      has_bias: true
+    }
+    name: "encodefc2"
+    data_layout: "data_parallel"
+    #weights: "encodefc2linearity"
+    parents: "encodefc1_bn"
+  }
+  layer {
+    tanh {
+    }
+    name: "encodeleaky_relu2"
+    data_layout: "data_parallel"
+    parents: "encodefc2"
+  }
+  layer {
+    parents: "encodeleaky_relu2"
+    name: "encodefc2_bn"
+    data_layout: "data_parallel"
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 32
+      has_bias: true
+    }
+    name: "encodefc3"
+    data_layout: "data_parallel"
+    #weights: "encodefc3linearity"
+    parents: "encodefc2_bn"
+  }
+  layer {
+    tanh {
+    }
+    name: "encodeleaky_relu3"
+    data_layout: "data_parallel"
+    parents: "encodefc3"
+  }
+  layer {
+    parents: "encodeleaky_relu3"
+    name: "encodefc3_bn"
+    data_layout: "data_parallel"
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      #gen output is latent dim
+      num_neurons: 20
+      has_bias: true
+    }
+    #z_sample
+    name: "encodefc4"
+    data_layout: "data_parallel"
+    #weights: "encodefc4linearity"
+    parents: "encodefc3_bn"
+  }
+
+  ####Discriminator
+  layer {
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "wae_d1fc1_real"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc1linearity wae_d1fc1bias"
+    parents: "concat_y_n_samplez"
+  }
+  layer {
+    leaky_relu {
+    }
+    #@todo: use "acts" for activation instead of actualy type
+    name: "wae_d1leaky_relu1_real"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc1_real"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 256
+      has_bias: true
+    }
+    name: "wae_d1fc2_real"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc2linearity wae_d1fc2bias"
+    parents: "wae_d1leaky_relu1_real"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d1leaky_relu2_real"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc2_real"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 128
+      has_bias: true
+    }
+    name: "wae_d1fc3_real"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc3linearity wae_d1fc3bias"
+    parents: "wae_d1leaky_relu2_real"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d1leaky_relu3_real"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc3_real"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "wae_d1fc4_real"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc4linearity wae_d1fc4bias"
+    parents: "wae_d1leaky_relu3_real"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d1leaky_relu4_real"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc4_real"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    ## This is D_prior
+    name: "wae_d1fc5_real"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc5linearity wae_d1fc5bias"
+    parents: "wae_d1leaky_relu4_real"
+  }
+  layer {
+    name: "concat_y_n_zsample"
+    data_layout: "data_parallel"
+    parents: "image_data_dummy encodefc4"
+    children: "wae_d1_stop_gradient wae_d2_dummy"
+    concatenation {
+    }
+  }
+  layer {
+    name: "wae_d1_stop_gradient"
+    data_layout: "data_parallel"
+    parents: "concat_y_n_zsample"
+    stop_gradient {
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "wae_d1fc1_fake"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc1linearity wae_d1fc1bias"
+    parents: "wae_d1_stop_gradient"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d1leaky_relu1_fake"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc1_fake"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 256
+      has_bias: true
+    }
+    name: "wae_d1fc2_fake"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc2linearity wae_d1fc2bias"
+    parents: "wae_d1leaky_relu1_fake"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d1leaky_relu2_fake"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc2_fake"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 128
+      has_bias: true
+    }
+    name: "wae_d1fc3_fake"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc3linearity wae_d1fc3bias"
+    parents: "wae_d1leaky_relu2_fake"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d1leaky_relu3_fake"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc3_fake"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "wae_d1fc4_fake"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc4linearity wae_d1fc4bias"
+    parents: "wae_d1leaky_relu3_fake"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d1leaky_relu4_fake"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc4_fake"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    #This is D_sample
+    name: "wae_d1fc5_fake"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc5linearity wae_d1fc5bias"
+    parents: "wae_d1leaky_relu4_fake"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "disc1_real_bce"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc5_real one"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "disc1_fake_bce"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc5_fake zero"
+  }
+  layer {
+    identity {
+    }
+    name: "wae_d2_dummy"
+    data_layout: "data_parallel"
+    parents: "concat_y_n_zsample"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "wae_d2fc1"
+    data_layout: "data_parallel"
+    parents: "wae_d2_dummy"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d2leaky_relu1"
+    data_layout: "data_parallel"
+    parents: "wae_d2fc1"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 256
+      has_bias: true
+    }
+    name: "wae_d2fc2"
+    data_layout: "data_parallel"
+    parents: "wae_d2leaky_relu1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d2leaky_relu2"
+    data_layout: "data_parallel"
+    parents: "wae_d2fc2"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 128
+      has_bias: true
+    }
+    name: "wae_d2fc3"
+    data_layout: "data_parallel"
+    parents: "wae_d2leaky_relu2"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d2leaky_relu3"
+    data_layout: "data_parallel"
+    parents: "wae_d2fc3"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "wae_d2fc4"
+    data_layout: "data_parallel"
+    parents: "wae_d2leaky_relu3"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d2leaky_relu4"
+    data_layout: "data_parallel"
+    parents: "wae_d2fc4"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    name: "wae_d2fc5"
+    data_layout: "data_parallel"
+    parents: "wae_d2leaky_relu4"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "g_adv1_bce"
+    data_layout: "data_parallel"
+    parents: "wae_d2fc5 one"
+  }
+  layer {
+    name: "decode0_minus_y"
+    data_layout: "data_parallel"
+    parents: "decode0 image_data_dummy"
+   weighted_sum {
+      scaling_factors: "1 -1"
+    }
+  }
+  #L2loss
+  layer {
+    l2_norm2 {
+    }
+    name: "rec_error"
+    data_layout: "data_parallel"
+    parents: "decode0_minus_y"
+  }
+
+  layer {
+    parents: "decode0 image_data_dummy"
+    name: "img_loss"
+    data_layout: "data_parallel"
+    mean_squared_error {}
+  }
+
+
+  ######################
+  # Decoder
+  ######################
+
+  # decode3
+  layer {
+    parents: "encodefc4"
+    name: "decode3"
+    weights: "decode3linearity decode3bias"
+    data_layout: "data_parallel"
+    fully_connected {
+      num_neurons: 32
+      has_bias: true
+    }
+  }
+  layer {
+    parents: "decode3"
+    name: "decode3_tanh"
+    data_layout: "data_parallel"
+    elu {}
+  }
+  layer {
+    parents: "decode3_tanh"
+    name: "decode3_dropout"
+    data_layout: "data_parallel"
+    dropout {
+      keep_prob: 1.0
+    }
+  }
+
+  # decode2
+  layer {
+    parents: "decode3_dropout"
+    name: "decode2"
+    weights: "decode2linearity decode2bias"
+    data_layout: "data_parallel"
+    fully_connected {
+      num_neurons: 256
+      has_bias: true
+    }
+  }
+  layer {
+    parents: "decode2"
+    name: "decode2_tanh"
+    data_layout: "data_parallel"
+    tanh {}
+  }
+  layer {
+    parents: "decode2_tanh"
+    name: "decode2_dropout"
+    data_layout: "data_parallel"
+    dropout {
+      keep_prob: 1.0
+    }
+  }
+
+  # decode1
+  layer {
+    parents: "decode2_dropout"
+    name: "decode1"
+    weights: "decode1linearity decode1bias"
+    data_layout: "data_parallel"
+    fully_connected {
+      num_neurons: 1024
+      has_bias: true
+    }
+  }
+  layer {
+    parents: "decode1"
+    name: "decode1_elu"
+    data_layout: "data_parallel"
+    tanh {
+    }
+  }
+  layer {
+    parents: "decode1_elu"
+    name: "decode1_dropout"
+    data_layout: "data_parallel"
+    dropout {
+      keep_prob: 1.0
+    }
+  }
+
+  # decode0
+  layer {
+    parents: "decode1_dropout"
+    name: "decode0"
+    weights: "decode0linearity decode0bias"
+    data_layout: "data_parallel"
+    fully_connected {
+      get_slice_points_from_reader: "independent"
+      get_num_neurons_of_slice_from_reader: [ 1 ]
+      has_bias: true
+    }
+  }
+
+  ######################
+  ###@todo : delete not used, LTFB uses encodefc*linearity_weights instead
+  weights {
+    name: "encodefc1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "encodefc2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "encodefc3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "encodefc4linearity"
+    he_normal_initializer {
+    }
+  }
+
+  #Decoder weights here to be used in WAE+cyclic model
+  weights {
+    name: "decode0linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "decode0bias"
+  }
+  weights {
+    name: "decode1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "decode1bias"
+  }
+  weights {
+    name: "decode2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "decode2bias"
+  }
+  weights {
+    name: "decode3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "decode3bias"
+  }
+
+  
+  #Discriminator (shared)
+  weights {
+    name: "wae_d1fc1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "wae_d1fc1bias"
+  }
+  weights {
+    name: "wae_d1fc2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "wae_d1fc2bias"
+  }
+  weights {
+    name: "wae_d1fc3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "wae_d1fc3bias"
+  }
+  weights {
+    name: "wae_d1fc4linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "wae_d1fc4bias"
+  }
+  weights {
+    name: "wae_d1fc5linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "wae_d1fc5bias"
+  }
+  mini_batch_size: 128
+  callback {
+    print {
+      interval: 1
+    }
+  }
+  callback {
+    timer {
+    }
+  }
+  callback {
+    replace_weights {
+      source_layers: "wae_d1fc1_real wae_d1fc2_real wae_d1fc3_real wae_d1fc4_real wae_d1fc5_real"
+      destination_layers: "wae_d2fc1 wae_d2fc2 wae_d2fc3 wae_d2fc4 wae_d2fc5"
+      batch_interval: 2
+    }
+  }
+  #callback {
+  #  ltfb {
+  #    batch_interval: 100
+  #    low_score_wins: true
+  #    metric: "l_l2_y_eval"
+  #    weights: "encodefc1_linearity_weights encodefc1_bias_weights encodefc2_linearity_weights encodefc2_bias_weights encodefc3_linearity_weights encodefc3_bias_weights encodefc4_linearity_weights encodefc4_bias_weights"
+  #    }
+
+ # }
+
+  callback { save_model { dir: "model" } }
+  block_size: 256
+  procs_per_trainer:0 
+}
diff --git a/model_zoo/models/jag/wae_cycle_gan/wae_fw_inv.prototext b/model_zoo/models/jag/wae_cycle_gan/wae_fw_inv.prototext
new file mode 100644
index 00000000000..c38e09e5fb2
--- /dev/null
+++ b/model_zoo/models/jag/wae_cycle_gan/wae_fw_inv.prototext
@@ -0,0 +1,960 @@
+#Augumented version of ae_cyc.prototext so we can we ae_loss, fw_latent_loss and fw_out_loss all in the same file instead of 3 files, a request from MLSI ML team. This augmentation involves replicating blocks for fw_model from cycle gan and encode from autoencoder.
+model {
+  name: "wae_fw_inv_model"
+  shareable_training_data_reader:false 
+  serialize_io: true
+  data_layout: "data_parallel"
+  mini_batch_size: 16384 
+  block_size: 256
+  num_epochs: 1
+  num_parallel_readers: 0
+  procs_per_trainer: 0
+
+  ###################################################
+  # Objective function
+  ###################################################
+
+  objective_function {
+    layer_term { layer: "fw_out_loss" }
+    l2_weight_regularization {
+      scale_factor: 1e-4
+    }
+  }
+
+  ###################################################
+  # Metrics
+  ###################################################
+
+  metric {
+    layer_metric {
+      name: "wae_loss"
+      layer: "ae_loss"
+    }
+  }
+  metric {
+    layer_metric {
+      name: "fw_latent_loss"
+      layer: "fw_latent_loss"
+    }
+  }
+  metric {
+    layer_metric {
+      name: "fw_out_loss"
+      layer: "fw_out_loss"
+    }
+  }
+  metric {
+    layer_metric {
+      name: "inv_loss"
+      layer: "inv_loss"
+    }
+  }
+  metric {
+    layer_metric {
+      name: "L_cyc_x_loss"
+      layer: "L_cyc_x"
+    }
+  }
+  ###################################################
+  # Callbacks
+  ###################################################
+  callback {
+    print {
+      interval: 1
+    }
+  }
+  callback { timer {} }
+
+  ###################################################
+  # start of layers
+  ###################################################
+
+  ######################
+  # Data
+  ######################
+  #Layer from cycle GAN
+  layer {
+    input {
+      io_buffer: "partitioned"
+      target_mode: "N/A"
+    }
+    name: "data"
+    data_layout: "data_parallel"
+    parents: " "
+  }
+  layer {
+    name: "slice_data"
+    data_layout: "data_parallel"
+    parents: "data"
+    children: "image_data_id param_data_id"
+    slice {
+      #slice_points: "0 16384 16389"
+      get_slice_points_from_reader: "independent"
+    }
+  }
+  layer {
+    identity {
+    }
+    name: "image_data_id"
+    data_layout: "data_parallel"
+    parents: "slice_data"
+  }
+  ## separate predicted scalar from image
+  layer {
+    name: "slice_image_data_id"
+    data_layout: "data_parallel"
+    parents: "image_data_id"
+    #parents: "reconstruction"
+    children: "image scalar"
+    slice {
+      slice_points: "0 49152 49167"
+    }
+  }
+  #image --not used
+  layer {
+    identity {
+    }
+    name: "image"
+    data_layout: "data_parallel"
+    parents: "slice_image_data_id"
+  }
+  #scalar --used in dump outputs
+  layer {
+    identity {
+    }
+    name: "scalar"
+    data_layout: "data_parallel"
+    parents: "slice_image_data_id"
+  }
+
+  layer {
+    identity {
+    }
+    name: "param_data_id"
+    data_layout: "data_parallel"
+    parents: "slice_data"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 64
+      #num_neurons: 256
+      has_bias: true
+    }
+    name: "gen1fc1"
+    data_layout: "data_parallel"
+    weights: "gen1fc1linearity gen1fc1bias"
+    parents: "param_data_id"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen1leaky_relu1_1"
+    data_layout: "data_parallel"
+    parents: "gen1fc1"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 2048
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "gen1fc2"
+    data_layout: "data_parallel"
+    weights: "gen1fc2linearity gen1fc2bias"
+    parents: "gen1leaky_relu1_1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen1leaky_relu2_1"
+    data_layout: "data_parallel"
+    parents: "gen1fc2"
+  }
+  layer {
+    dropout {
+      keep_prob: 0.8
+    }
+    name: "gen1dropout1_1"
+    data_layout: "data_parallel"
+    parents: "gen1leaky_relu2_1"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 8192
+      num_neurons: 2048
+      has_bias: true
+    }
+    name: "gen1fc3"
+    data_layout: "data_parallel"
+    weights: "gen1fc3linearity gen1fc3bias"
+    parents: "gen1dropout1_1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen1leaky_relu3_1"
+    data_layout: "data_parallel"
+    parents: "gen1fc3"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 16384
+      #latent_dim
+      num_neurons: 20
+      has_bias: true
+    }
+    name: "gen1fc4"
+    data_layout: "data_parallel"
+    weights: "gen1fc4linearity gen1fc4bias"
+    parents: "gen1leaky_relu3_1"
+  }
+  
+  weights {
+    name: "gen1fc1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen1fc1bias"
+  }
+  weights {
+    name: "gen1fc2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen1fc2bias"
+  }
+  weights {
+    name: "gen1fc3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen1fc3bias"
+  }
+  weights {
+    name: "gen1fc4linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen1fc4bias"
+  }
+
+  ###Encoder from WAE
+  #########################
+  layer {
+    fully_connected {
+      #num_neurons: 32
+      num_neurons: 1024
+      has_bias: true
+    }
+    name: "encodefc1"
+    data_layout: "data_parallel"
+    freeze: true
+    #weights: "encodefc1linearity"
+    parents: "image_data_id"
+  }
+  layer {
+    elu {
+    }
+    name: "encodeleaky_relu1"
+    data_layout: "data_parallel"
+    parents: "encodefc1"
+  }
+  layer {
+    parents: "encodeleaky_relu1"
+    name: "encodefc1_bn"
+    data_layout: "data_parallel"
+    freeze: true
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 256
+      has_bias: true
+    }
+    name: "encodefc2"
+    data_layout: "data_parallel"
+    freeze: true
+    #weights: "encodefc2linearity"
+    parents: "encodefc1_bn"
+  }
+  layer {
+    tanh {
+    }
+    name: "encodeleaky_relu2"
+    data_layout: "data_parallel"
+    parents: "encodefc2"
+  }
+  layer {
+    parents: "encodeleaky_relu2"
+    name: "encodefc2_bn"
+    data_layout: "data_parallel"
+    freeze: true
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 32
+      has_bias: true
+    }
+    name: "encodefc3"
+    data_layout: "data_parallel"
+    freeze: true
+    #weights: "encodefc3linearity"
+    parents: "encodefc2_bn"
+  }
+  layer {
+    tanh {
+    }
+    name: "encodeleaky_relu3"
+    data_layout: "data_parallel"
+    parents: "encodefc3"
+  }
+  layer {
+    parents: "encodeleaky_relu3"
+    name: "encodefc3_bn"
+    data_layout: "data_parallel"
+    freeze: true
+    batch_normalization {
+      epsilon: 1e-3
+    }
+  }
+  layer {
+    fully_connected {
+      #gen output is latent dim
+      num_neurons: 20
+      has_bias: true
+    }
+    #z_sample
+    name: "encodefc4"
+    data_layout: "data_parallel"
+    #weights: "encodefc4linearity"
+    freeze: true
+    parents: "encodefc3_bn"
+  }
+  #####################
+
+  layer {
+    parents: "encodefc4"
+    #name: "sample"
+    ###This is actually sample in latent space, call image_data_dummy for legacy
+    name: "image_data_dummy"
+    data_layout: "data_parallel"
+    identity {}
+  }
+  ####output of encoder goes to decoder and cycGAN duplicates 
+  ######################
+  # Decoder for foward output loss
+
+  # decode3
+  layer {
+    parents: "gen1fc4"
+    name: "decode3"
+    data_layout: "data_parallel"
+    weights: "decode3linearity decode3bias"
+    fully_connected {
+      num_neurons: 32
+      has_bias: true
+    }
+  }
+  layer {
+    parents: "decode3"
+    name: "decode3_tanh"
+    data_layout: "data_parallel"
+    elu {}
+  }
+  layer {
+    parents: "decode3_tanh"
+    name: "decode3_dropout"
+    data_layout: "data_parallel"
+    dropout {
+      keep_prob: 1.0
+    }
+  }
+
+  # decode2
+  layer {
+    parents: "decode3_dropout"
+    name: "decode2"
+    data_layout: "data_parallel"
+    weights: "decode2linearity decode2bias"
+    fully_connected {
+      num_neurons: 256
+      has_bias: true
+    }
+  }
+  layer {
+    parents: "decode2"
+    name: "decode2_tanh"
+    data_layout: "data_parallel"
+    tanh {}
+  }
+  layer {
+    parents: "decode2_tanh"
+    name: "decode2_dropout"
+    data_layout: "data_parallel"
+    dropout {
+      keep_prob: 1.0
+    }
+  }
+
+  # decode1
+  layer {
+    parents: "decode2_dropout"
+    name: "decode1"
+    data_layout: "data_parallel"
+    weights: "decode1linearity decode1bias"
+    fully_connected {
+      num_neurons: 1024
+      has_bias: true
+    }
+  }
+  layer {
+    parents: "decode1"
+    name: "decode1_elu"
+    data_layout: "data_parallel"
+    tanh {
+    }
+  }
+  layer {
+    parents: "decode1_elu"
+    name: "decode1_dropout"
+    data_layout: "data_parallel"
+    dropout {
+      keep_prob: 1.0
+    }
+  }
+
+  # decode0
+  layer {
+    parents: "decode1_dropout"
+    name: "decode0"
+    data_layout: "data_parallel"
+    weights: "decode0linearity decode0bias"
+    fully_connected {
+      #num_neurons: 16384
+      get_slice_points_from_reader: "independent"
+      get_num_neurons_of_slice_from_reader: [ 1 ]
+      has_bias: true
+    }
+  }
+
+  ######################
+ #Need this?
+  #layer {
+  #  parents: "decode0"
+  #  name: "sigmoid"
+  #  data_layout: "data_parallel"
+  #  sigmoid {}
+  #}
+
+  ######################
+  # Reconstruction
+  ######################
+
+  layer {
+    #parents: "sigmoid"
+    parents: "decode0"
+    name: "reconstruction"
+    data_layout: "data_parallel"
+    split {}
+  }
+  #layer {
+  #  parents: "reconstruction image_data_id"
+    #name: "binary_cross_entropy"
+  #  name: "mean_squared_error"
+  #  data_layout: "data_parallel"
+    #binary_cross_entropy {}
+  #  mean_squared_error {}
+  #}
+  layer {
+    parents: "reconstruction image_data_id"
+    name: "fw_out_loss"
+    data_layout: "data_parallel"
+    mean_squared_error {}
+  }
+
+  ####Decoder weights
+  weights {
+    name: "decode0linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "decode0bias"
+  }
+
+  weights {
+    name: "decode1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "decode1bias"
+  }
+  weights {
+    name: "decode2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "decode2bias"
+  }
+  weights {
+    name: "decode3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "decode3bias"
+  }
+
+#Decoder duplicated for ae_loss
+  # decode3
+  layer {
+    parents: "image_data_dummy"
+    name: "ae_decode3"
+    data_layout: "data_parallel"
+    weights: "decode3linearity decode3bias"
+    fully_connected {
+      num_neurons: 32 
+      has_bias: true
+    }
+  }
+  layer {
+    parents: "ae_decode3"
+    name: "ae_decode3_tanh"
+    data_layout: "data_parallel"
+    elu {}
+  }
+  layer {
+    parents: "ae_decode3_tanh"
+    name: "ae_decode3_dropout"
+    data_layout: "data_parallel"
+    dropout {
+      keep_prob: 1.0
+    }
+  }
+
+  # decode2
+  layer {
+    parents: "ae_decode3_dropout"
+    name: "ae_decode2"
+    data_layout: "data_parallel"
+    weights: "decode2linearity decode2bias"
+    fully_connected {
+      num_neurons: 256
+      has_bias: true
+    }
+  }
+  layer {
+    parents: "ae_decode2"
+    name: "ae_decode2_tanh"
+    data_layout: "data_parallel"
+    tanh {}
+  }
+  layer {
+    parents: "ae_decode2_tanh"
+    name: "ae_decode2_dropout"
+    data_layout: "data_parallel"
+    dropout {
+      keep_prob: 1.0
+    }
+  }
+
+  # decode1
+  layer {
+    parents: "ae_decode2_dropout"
+    name: "ae_decode1"
+    data_layout: "data_parallel"
+    weights: "decode1linearity decode1bias"
+    fully_connected {
+      num_neurons: 1024
+      has_bias: true
+    }
+  }
+  layer {
+    parents: "ae_decode1"
+    name: "ae_decode1_elu"
+    data_layout: "data_parallel"
+    tanh {
+    }
+  }
+  layer {
+    parents: "ae_decode1_elu"
+    name: "ae_decode1_dropout"
+    data_layout: "data_parallel"
+    dropout {
+      keep_prob: 1.0
+    }
+  }
+
+  # decode0
+  layer {
+    parents: "ae_decode1_dropout"
+    name: "ae_decode0"
+    data_layout: "data_parallel"
+    weights: "decode0linearity decode0bias"
+    fully_connected {
+      #num_neurons: 16384
+      get_slice_points_from_reader: "independent"
+      get_num_neurons_of_slice_from_reader: [ 1 ]
+      has_bias: true
+    }
+  }
+
+  #layer {
+  #  parents: "ae_decode0"
+  #  name: "ae_sigmoid"
+  #  data_layout: "data_parallel"
+  #  sigmoid {}
+  #}
+
+  ######################
+  # Reconstruction
+  ######################
+
+  layer {
+    parents: "ae_decode0"
+    #parents: "ae_sigmoid"
+    name: "ae_reconstruction"
+    data_layout: "data_parallel"
+    split {}
+  }
+  layer {
+    parents: "ae_reconstruction image_data_id"
+    name: "ae_loss"
+    data_layout: "data_parallel"
+    mean_squared_error {}
+  }
+
+  ###Cycle GAN duplicated for latent loss dump
+  #Takes output of encoder as input
+  layer {
+    fully_connected {
+      num_neurons: 64
+      #num_neurons: 256
+      has_bias: true
+    }
+    name: "latent_gen1fc1"
+    data_layout: "data_parallel"
+    weights: "gen1fc1linearity gen1fc1bias"
+    parents: "param_data_id"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "latent_gen1leaky_relu1_1"
+    data_layout: "data_parallel"
+    parents: "latent_gen1fc1"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 2048
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "latent_gen1fc2"
+    data_layout: "data_parallel"
+    weights: "gen1fc2linearity gen1fc2bias"
+    parents: "latent_gen1leaky_relu1_1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "latent_gen1leaky_relu2_1"
+    data_layout: "data_parallel"
+    parents: "latent_gen1fc2"
+  }
+  layer {
+    dropout {
+      keep_prob: 0.8
+    }
+    name: "latent_gen1dropout1_1"
+    data_layout: "data_parallel"
+    parents: "latent_gen1leaky_relu2_1"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 8192
+      num_neurons: 2048
+      has_bias: true
+    }
+    name: "latent_gen1fc3"
+    data_layout: "data_parallel"
+    weights: "gen1fc3linearity gen1fc3bias"
+    parents: "latent_gen1dropout1_1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "latent_gen1leaky_relu3_1"
+    data_layout: "data_parallel"
+    parents: "latent_gen1fc3"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 16384
+      #latent_dim
+      num_neurons: 20
+      has_bias: true
+    }
+    name: "latent_gen1fc4"
+    data_layout: "data_parallel"
+    weights: "gen1fc4linearity gen1fc4bias"
+    parents: "latent_gen1leaky_relu3_1"
+  }
+  
+  #layer {
+  #  name: "gsample_minus_latentsample"
+  #  data_layout: "data_parallel"
+  #  parents: "latent_gen1fc4 image_data_dummy"
+  #  weighted_sum {
+  #    scaling_factors: "1 -1"
+  #  }
+  #}
+  layer {
+    name: "fw_latent_loss"
+    data_layout: "data_parallel"
+    #l2_norm2 {
+    #}
+    mean_absolute_error { }
+    #parents: "gsample_minus_latentsample"
+    parents: "latent_gen1fc4 image_data_dummy"
+  }
+  
+  #####Inverse loss from cycle GAN
+  #### latent space (image_data_dummy) -> pred X'(gen2fc4)
+  layer {
+    fully_connected {
+      num_neurons: 16
+      has_bias: true
+    }
+    name: "gen2fc1"
+    data_layout: "data_parallel"
+    weights: "gen2fc1linearity gen2fc1bias"
+    parents: "image_data_dummy"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu1"
+    data_layout: "data_parallel"
+    parents: "gen2fc1"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 128
+      has_bias: true
+    }
+    name: "gen2fc2"
+    data_layout: "data_parallel"
+    weights: "gen2fc2linearity gen2fc2bias"
+    parents: "gen2leaky_relu1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu2"
+    data_layout: "data_parallel"
+    parents: "gen2fc2"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "gen2fc3"
+    data_layout: "data_parallel"
+    weights: "gen2fc3linearity gen2fc3bias"
+    parents: "gen2leaky_relu2"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu3"
+    data_layout: "data_parallel"
+    parents: "gen2fc3"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 11
+      get_slice_points_from_reader: "independent"
+      get_num_neurons_of_slice_from_reader: [ 2 ]
+      has_bias: true
+    }
+    name: "gen2fc4"
+    data_layout: "data_parallel"
+    weights: "gen2fc4linearity gen2fc4bias"
+    parents: "gen2leaky_relu3"
+  }
+  
+
+  #layer {
+  #  name: "gsample2_minus_x"
+  #  data_layout: "data_parallel"
+  #  parents: "gen2fc4 param_data_id"
+  #  weighted_sum {
+  #    scaling_factors: "1 -1"
+  #  }
+  #}
+  ### ||X-X'||
+  layer {
+    #name: "l_l2_x"
+    name: "inv_loss"
+    data_layout: "data_parallel"
+    #l2_norm2 {
+    #}
+    mean_absolute_error{ }
+    parents: "gen2fc4 param_data_id"
+    #parents: "gsample2_minus_x"
+  }
+  #layer {
+  #  name: "abs_inv_loss"
+  #  data_layout: "data_parallel"
+  #  abs {
+  #  }
+  #  parents: "gsample2_minus_x"
+  #}
+  ##########X cyclic loss, input to this path is Y_fake (gen1fc4) from fw model
+  #### Shares weight with path that takes real/encoder (latent) image
+  layer {
+    fully_connected {
+      #num_neurons: 64
+      num_neurons: 16
+      has_bias: true
+    }
+    name: "gen2fc1_cyclic"
+    data_layout: "data_parallel"
+    weights: "gen2fc1linearity gen2fc1bias"
+    parents: "gen1fc4"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu1_cyclic"
+    data_layout: "data_parallel"
+    parents: "gen2fc1_cyclic"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 512
+      num_neurons: 128
+      has_bias: true
+    }
+    name: "gen2fc2_cyclic"
+    data_layout: "data_parallel"
+    weights: "gen2fc2linearity gen2fc2bias"
+    parents: "gen2leaky_relu1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu2_cyclic"
+    data_layout: "data_parallel"
+    parents: "gen2fc2_cyclic"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 2048
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "gen2fc3_cyclic"
+    data_layout: "data_parallel"
+    weights: "gen2fc3linearity gen2fc3bias"
+    parents: "gen2leaky_relu2_cyclic"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "gen2leaky_relu3_cyclic"
+    data_layout: "data_parallel"
+    parents: "gen2fc3_cyclic"
+  }
+  layer {
+    fully_connected {
+      #num_neurons: 11
+      get_slice_points_from_reader: "independent"
+      get_num_neurons_of_slice_from_reader: [ 2 ]
+      has_bias: true
+    }
+    name: "gen2fc4_cyclic"
+    data_layout: "data_parallel"
+    weights: "gen2fc4linearity gen2fc4bias"
+    parents: "gen2leaky_relu3_cyclic"
+  }
+  layer {
+    name: "L_cyc_x"
+    data_layout: "data_parallel"
+    mean_absolute_error{
+    }
+    parents: "gen2fc4_cyclic param_data_id"
+  }
+
+  ####For metric, loss per individual sample
+  layer {
+    name: "param_scalar_latentdim_losses"
+    data_layout: "data_parallel"
+    parents: "param_data_id scalar encodefc4 ae_loss fw_latent_loss fw_out_loss L_cyc_x"
+    concatenation {
+    }
+  }
+  #callback {
+  #  dump_outputs {
+      #directory: "loss/"
+      #batch_interval: 3000
+      #layers: "data_latentdim_losses"
+  #    execution_modes: "test"
+  #    format: "npy"
+  #  }
+  #}
+  callback {
+    save_model {
+      dir: "model"
+      disable_save_after_training: true
+    }
+  }
+  weights {
+    name: "gen2fc1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen2fc2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen2fc3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen2fc4linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "gen2fc1bias"
+  }
+  weights {
+    name: "gen2fc2bias"
+  }
+  weights {
+    name: "gen2fc3bias"
+  }
+  weights {
+    name: "gen2fc4bias"
+  }
+  ###################################################
+  # end of layers
+  ###################################################
+}
diff --git a/model_zoo/models/jag/wae_cycle_gan/wae_nobn.prototext b/model_zoo/models/jag/wae_cycle_gan/wae_nobn.prototext
new file mode 100644
index 00000000000..b3afa63d263
--- /dev/null
+++ b/model_zoo/models/jag/wae_cycle_gan/wae_nobn.prototext
@@ -0,0 +1,713 @@
+model {
+  random_init_models_differently: true
+  name: "wae_model"
+  serialize_io: true
+  objective_function {
+    l2_weight_regularization {
+      scale_factor: 0.0001
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "disc1_real_bce"
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "disc1_fake_bce"
+    }
+    layer_term {
+      #lam = 0.01
+      scale_factor: 0.01
+      layer: "g_adv1_bce"
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "img_loss"
+    }
+    layer_term {
+      scale_factor: 1.0
+      layer: "rec_error"
+    }
+  }
+  metric {
+    layer_metric {
+      layer: "img_loss"
+    }
+  }
+  num_epochs: 6
+  data_layout: "data_parallel"
+  layer {
+    input {
+      io_buffer: "partitioned"
+      data_set_per_model: true
+      target_mode: "N/A"
+    }
+    name: "data1"
+    data_layout: "data_parallel"
+  }
+  #z or sample_z
+  #@todo z = -1+2*np.random.rand(batch_size, zdim=20)
+  layer {
+    name: "sample_z"
+    data_layout: "data_parallel"
+    gaussian {
+      mean: 0.0
+      stdev: 1.0
+      neuron_dims: "20"
+    }
+  }
+
+  layer {
+    name: "zero"
+    data_layout: "data_parallel"
+    constant {
+      value: 0.0
+      num_neurons: "1"
+    }
+  }
+  layer {
+    name: "one"
+    data_layout: "data_parallel"
+    constant {
+      value: 1.0
+      num_neurons: "1"
+    }
+  }
+
+  layer {
+    name: "slice_data"
+    data_layout: "data_parallel"
+    parents: "data1"
+    children: "image_data_dummy param_data_id"
+    slice {
+      get_slice_points_from_reader: "independent"
+    }
+  }
+  layer {
+    identity {
+    }
+    name: "image_data_dummy"
+    data_layout: "data_parallel"
+    parents: "slice_data"
+  }
+  layer {
+    identity {
+    }
+    name: "param_data_id"
+    data_layout: "data_parallel"
+    parents: "slice_data"
+  }
+
+  #concate image data with sample_z
+  layer {
+    name: "concat_y_n_samplez"
+    data_layout: "data_parallel"
+    parents: "image_data_dummy sample_z"
+    concatenation {
+    }
+  }
+
+  ###generator == encoder
+  layer {
+    fully_connected {
+      #num_neurons: 32
+      num_neurons: 1024
+      has_bias: true
+    }
+    name: "encodefc1"
+    data_layout: "data_parallel"
+    #weights: "encodefc1linearity"
+    parents: "image_data_dummy"
+  }
+  layer {
+    elu {
+    }
+    name: "encodeleaky_relu1"
+    data_layout: "data_parallel"
+    parents: "encodefc1"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 256
+      has_bias: true
+    }
+    name: "encodefc2"
+    data_layout: "data_parallel"
+    #weights: "encodefc2linearity"
+    parents: "encodeleaky_relu1"
+  }
+  layer {
+    tanh {
+    }
+    name: "encodeleaky_relu2"
+    data_layout: "data_parallel"
+    parents: "encodefc2"
+  }
+  layer {
+    dropout {
+      keep_prob: 1.0
+    }
+    name: "encodedropout1"
+    data_layout: "data_parallel"
+    parents: "encodeleaky_relu2"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 32
+      has_bias: true
+    }
+    name: "encodefc3"
+    data_layout: "data_parallel"
+    #weights: "encodefc3linearity"
+    parents: "encodedropout1"
+  }
+  layer {
+    tanh {
+    }
+    name: "encodeleaky_relu3"
+    data_layout: "data_parallel"
+    parents: "encodefc3"
+  }
+  layer {
+    fully_connected {
+      #gen output is latent dim
+      num_neurons: 20
+      has_bias: true
+    }
+    #z_sample
+    name: "encodefc4"
+    data_layout: "data_parallel"
+    #weights: "encodefc4linearity"
+    parents: "encodeleaky_relu3"
+  }
+
+  ####Discriminator
+  layer {
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "wae_d1fc1_real"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc1linearity wae_d1fc1bias"
+    parents: "concat_y_n_samplez"
+  }
+  layer {
+    leaky_relu {
+    }
+    #@todo: use "acts" for activation instead of actualy type
+    name: "wae_d1leaky_relu1_real"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc1_real"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 256
+      has_bias: true
+    }
+    name: "wae_d1fc2_real"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc2linearity wae_d1fc2bias"
+    parents: "wae_d1leaky_relu1_real"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d1leaky_relu2_real"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc2_real"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 128
+      has_bias: true
+    }
+    name: "wae_d1fc3_real"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc3linearity wae_d1fc3bias"
+    parents: "wae_d1leaky_relu2_real"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d1leaky_relu3_real"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc3_real"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "wae_d1fc4_real"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc4linearity wae_d1fc4bias"
+    parents: "wae_d1leaky_relu3_real"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d1leaky_relu4_real"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc4_real"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    ## This is D_prior
+    name: "wae_d1fc5_real"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc5linearity wae_d1fc5bias"
+    parents: "wae_d1leaky_relu4_real"
+  }
+  layer {
+    name: "concat_y_n_zsample"
+    data_layout: "data_parallel"
+    parents: "image_data_dummy encodefc4"
+    children: "wae_d1_stop_gradient wae_d2_dummy"
+    concatenation {
+    }
+  }
+  layer {
+    name: "wae_d1_stop_gradient"
+    data_layout: "data_parallel"
+    parents: "concat_y_n_zsample"
+    stop_gradient {
+    }
+  }
+  layer {
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "wae_d1fc1_fake"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc1linearity wae_d1fc1bias"
+    parents: "wae_d1_stop_gradient"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d1leaky_relu1_fake"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc1_fake"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 256
+      has_bias: true
+    }
+    name: "wae_d1fc2_fake"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc2linearity wae_d1fc2bias"
+    parents: "wae_d1leaky_relu1_fake"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d1leaky_relu2_fake"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc2_fake"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 128
+      has_bias: true
+    }
+    name: "wae_d1fc3_fake"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc3linearity wae_d1fc3bias"
+    parents: "wae_d1leaky_relu2_fake"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d1leaky_relu3_fake"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc3_fake"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "wae_d1fc4_fake"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc4linearity wae_d1fc4bias"
+    parents: "wae_d1leaky_relu3_fake"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d1leaky_relu4_fake"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc4_fake"
+  }
+  layer {
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    #This is D_sample
+    name: "wae_d1fc5_fake"
+    data_layout: "data_parallel"
+    weights: "wae_d1fc5linearity wae_d1fc5bias"
+    parents: "wae_d1leaky_relu4_fake"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "disc1_real_bce"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc5_real one"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "disc1_fake_bce"
+    data_layout: "data_parallel"
+    parents: "wae_d1fc5_fake zero"
+  }
+  layer {
+    identity {
+    }
+    name: "wae_d2_dummy"
+    data_layout: "data_parallel"
+    parents: "concat_y_n_zsample"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 512
+      has_bias: true
+    }
+    name: "wae_d2fc1"
+    data_layout: "data_parallel"
+    parents: "wae_d2_dummy"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d2leaky_relu1"
+    data_layout: "data_parallel"
+    parents: "wae_d2fc1"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 256
+      has_bias: true
+    }
+    name: "wae_d2fc2"
+    data_layout: "data_parallel"
+    parents: "wae_d2leaky_relu1"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d2leaky_relu2"
+    data_layout: "data_parallel"
+    parents: "wae_d2fc2"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 128
+      has_bias: true
+    }
+    name: "wae_d2fc3"
+    data_layout: "data_parallel"
+    parents: "wae_d2leaky_relu2"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d2leaky_relu3"
+    data_layout: "data_parallel"
+    parents: "wae_d2fc3"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 64
+      has_bias: true
+    }
+    name: "wae_d2fc4"
+    data_layout: "data_parallel"
+    parents: "wae_d2leaky_relu3"
+  }
+  layer {
+    leaky_relu {
+    }
+    name: "wae_d2leaky_relu4"
+    data_layout: "data_parallel"
+    parents: "wae_d2fc4"
+  }
+  layer {
+    freeze: true
+    fully_connected {
+      num_neurons: 1
+      has_bias: true
+    }
+    name: "wae_d2fc5"
+    data_layout: "data_parallel"
+    parents: "wae_d2leaky_relu4"
+  }
+  layer {
+    sigmoid_binary_cross_entropy {
+    }
+    name: "g_adv1_bce"
+    data_layout: "data_parallel"
+    parents: "wae_d2fc5 one"
+  }
+  layer {
+    name: "decode0_minus_y"
+    data_layout: "data_parallel"
+    parents: "decode0 image_data_dummy"
+   weighted_sum {
+      scaling_factors: "1 -1"
+    }
+  }
+  #L2loss
+  layer {
+    l2_norm2 {
+    }
+    name: "rec_error"
+    data_layout: "data_parallel"
+    parents: "decode0_minus_y"
+  }
+
+  layer {
+    parents: "decode0 image_data_dummy"
+    name: "img_loss"
+    data_layout: "data_parallel"
+    mean_squared_error {}
+  }
+
+
+  ######################
+  # Decoder
+  ######################
+
+  # decode3
+  layer {
+    parents: "encodefc4"
+    name: "decode3"
+    weights: "decode3linearity decode3bias"
+    data_layout: "data_parallel"
+    fully_connected {
+      num_neurons: 32
+      has_bias: true
+    }
+  }
+  layer {
+    parents: "decode3"
+    name: "decode3_tanh"
+    data_layout: "data_parallel"
+    elu {}
+  }
+  layer {
+    parents: "decode3_tanh"
+    name: "decode3_dropout"
+    data_layout: "data_parallel"
+    dropout {
+      keep_prob: 1.0
+    }
+  }
+
+  # decode2
+  layer {
+    parents: "decode3_dropout"
+    name: "decode2"
+    weights: "decode2linearity decode2bias"
+    data_layout: "data_parallel"
+    fully_connected {
+      num_neurons: 256
+      has_bias: true
+    }
+  }
+  layer {
+    parents: "decode2"
+    name: "decode2_tanh"
+    data_layout: "data_parallel"
+    tanh {}
+  }
+  layer {
+    parents: "decode2_tanh"
+    name: "decode2_dropout"
+    data_layout: "data_parallel"
+    dropout {
+      keep_prob: 1.0
+    }
+  }
+
+  # decode1
+  layer {
+    parents: "decode2_dropout"
+    name: "decode1"
+    weights: "decode1linearity decode1bias"
+    data_layout: "data_parallel"
+    fully_connected {
+      num_neurons: 1024
+      has_bias: true
+    }
+  }
+  layer {
+    parents: "decode1"
+    name: "decode1_elu"
+    data_layout: "data_parallel"
+    tanh {
+    }
+  }
+  layer {
+    parents: "decode1_elu"
+    name: "decode1_dropout"
+    data_layout: "data_parallel"
+    dropout {
+      keep_prob: 1.0
+    }
+  }
+
+  # decode0
+  layer {
+    parents: "decode1_dropout"
+    name: "decode0"
+    weights: "decode0linearity decode0bias"
+    data_layout: "data_parallel"
+    fully_connected {
+      get_slice_points_from_reader: "independent"
+      get_num_neurons_of_slice_from_reader: [ 1 ]
+      has_bias: true
+    }
+  }
+
+  ######################
+  ###@todo : delete not used, LTFB uses encodefc*linearity_weights instead
+  weights {
+    name: "encodefc1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "encodefc2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "encodefc3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "encodefc4linearity"
+    he_normal_initializer {
+    }
+  }
+
+  #Decoder weights here to be used in WAE+cyclic model
+  weights {
+    name: "decode0linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "decode0bias"
+  }
+  weights {
+    name: "decode1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "decode1bias"
+  }
+  weights {
+    name: "decode2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "decode2bias"
+  }
+  weights {
+    name: "decode3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "decode3bias"
+  }
+
+  
+  #Discriminator (shared)
+  weights {
+    name: "wae_d1fc1linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "wae_d1fc1bias"
+  }
+  weights {
+    name: "wae_d1fc2linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "wae_d1fc2bias"
+  }
+  weights {
+    name: "wae_d1fc3linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "wae_d1fc3bias"
+  }
+  weights {
+    name: "wae_d1fc4linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "wae_d1fc4bias"
+  }
+  weights {
+    name: "wae_d1fc5linearity"
+    he_normal_initializer {
+    }
+  }
+  weights {
+    name: "wae_d1fc5bias"
+  }
+  mini_batch_size: 128
+  callback {
+    print {
+      interval: 1
+    }
+  }
+  callback {
+    timer {
+    }
+  }
+  callback {
+    replace_weights {
+      source_layers: "wae_d1fc1_real wae_d1fc2_real wae_d1fc3_real wae_d1fc4_real wae_d1fc5_real"
+      destination_layers: "wae_d2fc1 wae_d2fc2 wae_d2fc3 wae_d2fc4 wae_d2fc5"
+      batch_interval: 2
+    }
+  }
+  #callback {
+  #  ltfb {
+  #    batch_interval: 100
+  #    low_score_wins: true
+  #    metric: "l_l2_y_eval"
+  #    weights: "encodefc1_linearity_weights encodefc1_bias_weights encodefc2_linearity_weights encodefc2_bias_weights encodefc3_linearity_weights encodefc3_bias_weights encodefc4_linearity_weights encodefc4_bias_weights"
+  #    }
+
+ # }
+  #callback { save_model { dir: "model" } }
+  block_size: 256
+  procs_per_model:0 
+}
diff --git a/model_zoo/models/lenet_mnist/model_lenet_mnist.prototext b/model_zoo/models/lenet_mnist/model_lenet_mnist.prototext
index dc806c51748..99a763c52b6 100644
--- a/model_zoo/models/lenet_mnist/model_lenet_mnist.prototext
+++ b/model_zoo/models/lenet_mnist/model_lenet_mnist.prototext
@@ -92,9 +92,7 @@ model {
     name: "data"
     children: "images labels"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     name: "images"
diff --git a/model_zoo/models/molecular_autoencoder_candle_pilot2/model_conv_molecular_autoencoder_pilot2.prototext b/model_zoo/models/molecular_autoencoder_candle_pilot2/model_conv_molecular_autoencoder_pilot2.prototext
index bd172e4864d..8b6bb672ceb 100644
--- a/model_zoo/models/molecular_autoencoder_candle_pilot2/model_conv_molecular_autoencoder_pilot2.prototext
+++ b/model_zoo/models/molecular_autoencoder_candle_pilot2/model_conv_molecular_autoencoder_pilot2.prototext
@@ -39,9 +39,7 @@ model {
     name: "input"
     children: "data label"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     parents: "input"
diff --git a/model_zoo/models/molecular_autoencoder_candle_pilot2/model_conv_molecular_bead_autoencoder_pilot2.prototext b/model_zoo/models/molecular_autoencoder_candle_pilot2/model_conv_molecular_bead_autoencoder_pilot2.prototext
index 9fb642a23f6..2c503c625ab 100644
--- a/model_zoo/models/molecular_autoencoder_candle_pilot2/model_conv_molecular_bead_autoencoder_pilot2.prototext
+++ b/model_zoo/models/molecular_autoencoder_candle_pilot2/model_conv_molecular_bead_autoencoder_pilot2.prototext
@@ -35,9 +35,7 @@ model {
     name: "input"
     children: "data label"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     parents: "input"
diff --git a/model_zoo/models/molecular_autoencoder_candle_pilot2/model_molecular_autoencoder_pilot2.prototext b/model_zoo/models/molecular_autoencoder_candle_pilot2/model_molecular_autoencoder_pilot2.prototext
index ffc2b49bf54..91810258bd0 100644
--- a/model_zoo/models/molecular_autoencoder_candle_pilot2/model_molecular_autoencoder_pilot2.prototext
+++ b/model_zoo/models/molecular_autoencoder_candle_pilot2/model_molecular_autoencoder_pilot2.prototext
@@ -35,9 +35,7 @@ model {
     name: "input"
     children: "data label"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     parents: "input"
diff --git a/model_zoo/models/python/keras/kerbann.py b/model_zoo/models/python/keras/kerbann.py
index 2b471c496fd..ba304cc5526 100644
--- a/model_zoo/models/python/keras/kerbann.py
+++ b/model_zoo/models/python/keras/kerbann.py
@@ -28,7 +28,6 @@ def keras_to_lbann(model, num_classes,
         l = pb.model.layer.add()
         l.name = model.input_names[0]
         exec('l.input.SetInParent()')
-        l.input.io_buffer = "partitioned"
     setup_layers(model)
     # allow user to specify we need a reconstruciton target layer
     target_layer(model,target)
@@ -115,7 +114,6 @@ def setup_callbacks(callbacks):
 # IO layers
 def input(keras_layer, pb_layer):
     exec('pb_layer.input.SetInParent()')
-    pb_layer.input.io_buffer = "partitioned"
 
 def target_layer(model, target):
     l = pb.model.layer.add()
diff --git a/model_zoo/models/python/keras/mnist_cnn.prototext b/model_zoo/models/python/keras/mnist_cnn.prototext
index b47899c70ad..f32940773dc 100644
--- a/model_zoo/models/python/keras/mnist_cnn.prototext
+++ b/model_zoo/models/python/keras/mnist_cnn.prototext
@@ -14,9 +14,7 @@ model {
   }
   data_layout: "data_parallel"
   layer {
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
     name: "conv2d_1_input"
   }
   layer {
diff --git a/model_zoo/models/resnet50/model_resnet50.prototext b/model_zoo/models/resnet50/model_resnet50.prototext
index e87b5bdf862..520bb8aa11a 100644
--- a/model_zoo/models/resnet50/model_resnet50.prototext
+++ b/model_zoo/models/resnet50/model_resnet50.prototext
@@ -63,9 +63,7 @@ model {
     name: "data"
     children: "images labels"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     name: "images"
diff --git a/model_zoo/models/siamese/finetune-cub/model_cub.prototext b/model_zoo/models/siamese/finetune-cub/model_cub.prototext
index 3a00b2ccc7e..6d9e9761e1f 100644
--- a/model_zoo/models/siamese/finetune-cub/model_cub.prototext
+++ b/model_zoo/models/siamese/finetune-cub/model_cub.prototext
@@ -319,7 +319,6 @@ model {
     children: "data_new label_new"
     data_layout: "data_parallel"
     input {
-      io_buffer: "partitioned"
     }
   }
   layer {
diff --git a/model_zoo/models/siamese/finetune-cub/model_cub_batchnorm.prototext b/model_zoo/models/siamese/finetune-cub/model_cub_batchnorm.prototext
index 25b06aac3cd..091ed4b5acd 100644
--- a/model_zoo/models/siamese/finetune-cub/model_cub_batchnorm.prototext
+++ b/model_zoo/models/siamese/finetune-cub/model_cub_batchnorm.prototext
@@ -349,9 +349,7 @@ model {
     name: "input_new"
     children: "data_new label_new"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     parents: "input_new"
diff --git a/model_zoo/models/siamese/finetune-cub/model_cub_batchnorm_transferred_and_frozen.prototext b/model_zoo/models/siamese/finetune-cub/model_cub_batchnorm_transferred_and_frozen.prototext
index 2c3f779404f..1c8b5a4ffae 100644
--- a/model_zoo/models/siamese/finetune-cub/model_cub_batchnorm_transferred_and_frozen.prototext
+++ b/model_zoo/models/siamese/finetune-cub/model_cub_batchnorm_transferred_and_frozen.prototext
@@ -533,9 +533,7 @@ model {
     name: "input_new"
     children: "data_new label_new"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     parents: "input_new"
@@ -968,7 +966,6 @@ model {
   layer {
     parents: "fc8_new"
     name: "prob_new"
-    children: "target_new"
     data_layout: "data_parallel"
     softmax {}
   }
diff --git a/model_zoo/models/siamese/triplet/model_triplet_alexnet_batchnorm_dag_frozen_bn.prototext b/model_zoo/models/siamese/triplet/model_triplet_alexnet_batchnorm_dag_frozen_bn.prototext
index 2286e2dccfa..f85cd86db11 100644
--- a/model_zoo/models/siamese/triplet/model_triplet_alexnet_batchnorm_dag_frozen_bn.prototext
+++ b/model_zoo/models/siamese/triplet/model_triplet_alexnet_batchnorm_dag_frozen_bn.prototext
@@ -76,7 +76,6 @@ model {
       sgd {
         learn_rate: 0.007
         momentum: 0.9
-        decay_rate: 0.0002
         nesterov: false
       }
     }
@@ -91,7 +90,6 @@ model {
       sgd {
         learn_rate: 0.02
         momentum: 0.9
-        decay_rate: 0
         nesterov: false
       }
     }
@@ -107,7 +105,6 @@ model {
       sgd {
         learn_rate: 0.008
         momentum: 0.9
-        decay_rate: 0.0002
         nesterov: false
       }
     }
@@ -122,7 +119,6 @@ model {
       sgd {
         learn_rate: 0.02
         momentum: 0.9
-        decay_rate: 0
         nesterov: false
       }
     }
@@ -138,7 +134,6 @@ model {
       sgd {
         learn_rate: 0.009
         momentum: 0.9
-        decay_rate: 0.0002
         nesterov: false
       }
     }
@@ -153,7 +148,6 @@ model {
       sgd {
         learn_rate: 0.02
         momentum: 0.9
-        decay_rate: 0
         nesterov: false
       }
     }
@@ -169,7 +163,6 @@ model {
       sgd {
         learn_rate: 0.01
         momentum: 0.9
-        decay_rate: 0.0002
         nesterov: false
       }
     }
@@ -184,7 +177,6 @@ model {
       sgd {
         learn_rate: 0.02
         momentum: 0.9
-        decay_rate: 0
         nesterov: false
       }
     }
@@ -200,7 +192,6 @@ model {
       sgd {
         learn_rate: 0.009
         momentum: 0.9
-        decay_rate: 0.0002
         nesterov: false
       }
     }
@@ -215,7 +206,6 @@ model {
       sgd {
         learn_rate: 0.02
         momentum: 0.9
-        decay_rate: 0
         nesterov: false
       }
     }
@@ -231,7 +221,6 @@ model {
       sgd {
         learn_rate: 0.008
         momentum: 0.9
-        decay_rate: 0.0002
         nesterov: false
       }
     }
@@ -246,7 +235,6 @@ model {
       sgd {
         learn_rate: 0.02
         momentum: 0.9
-        decay_rate: 0
         nesterov: false
       }
     }
@@ -262,7 +250,6 @@ model {
       sgd {
         learn_rate: 0.007
         momentum: 0.9
-        decay_rate: 0.0002
         nesterov: false
       }
     }
@@ -277,7 +264,6 @@ model {
       sgd {
         learn_rate: 0.02
         momentum: 0.9
-        decay_rate: 0
         nesterov: false
       }
     }
@@ -293,7 +279,6 @@ model {
       sgd {
         learn_rate: 0.006
         momentum: 0.9
-        decay_rate: 0.0002
         nesterov: false
       }
     }
@@ -308,7 +293,6 @@ model {
       sgd {
         learn_rate: 0.02
         momentum: 0.9
-        decay_rate: 0
         nesterov: false
       }
     }
@@ -324,7 +308,6 @@ model {
       sgd {
         learn_rate: 0.005
         momentum: 0.9
-        decay_rate: 0.0002
         nesterov: false
       }
     }
@@ -339,7 +322,6 @@ model {
       sgd {
         learn_rate: 0.02
         momentum: 0.9
-        decay_rate: 0
         nesterov: false
       }
     }
@@ -572,9 +554,7 @@ model {
     name: "input"
     children: "slice label"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     parents: "input"
@@ -582,7 +562,7 @@ model {
     children: "conv1_head0 conv1_head1 conv1_head2"
     data_layout: "data_parallel"
     slice {
-      slice_axis: 0
+      axis: 0
       slice_points: "0 3 6 9"
     }
   }
@@ -1554,7 +1534,6 @@ model {
   layer {
     parents: "fc9"
     name: "prob"
-    children: "target"
     data_layout: "data_parallel"
     softmax {}
   }
diff --git a/model_zoo/models/simple_mnist/model_mnist_simple_1.prototext b/model_zoo/models/simple_mnist/model_mnist_simple_1.prototext
index bb37f09b3bf..77a1c7ed256 100644
--- a/model_zoo/models/simple_mnist/model_mnist_simple_1.prototext
+++ b/model_zoo/models/simple_mnist/model_mnist_simple_1.prototext
@@ -56,9 +56,7 @@ model {
     name: "data"
     children: "image label"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     parents: "data"
diff --git a/model_zoo/models/simple_mnist/model_mnist_simple_2.prototext b/model_zoo/models/simple_mnist/model_mnist_simple_2.prototext
index 23f42530e21..0be924650f9 100644
--- a/model_zoo/models/simple_mnist/model_mnist_simple_2.prototext
+++ b/model_zoo/models/simple_mnist/model_mnist_simple_2.prototext
@@ -56,9 +56,7 @@ model {
     name: "data"
     children: "image label"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     parents: "data"
diff --git a/model_zoo/models/vram/generate_dram.py b/model_zoo/models/vram/generate_dram.py
index d0778b0846d..9dd68656da4 100755
--- a/model_zoo/models/vram/generate_dram.py
+++ b/model_zoo/models/vram/generate_dram.py
@@ -146,7 +146,7 @@ class LstmCell:
     input_fc = None
     output_fc = None
     cell_fc = None
-    
+
     def __init__(self, name, size, model):
         self.name = name
         self.size = size
@@ -216,7 +216,6 @@ def configure_model(model):
 
     # Initialize input
     data = new_layer(model, "data", [], "input", "cpu")
-    data.input.io_buffer = "partitioned"
     image = new_layer(model, "image", data, "split")
     label = new_layer(model, "label", data, "split")
     data.children = str_list([image.name, label.name])
@@ -267,7 +266,7 @@ def configure_model(model):
     # Classification network components
     class_network = FullyConnectedCell("class_prob", label_dims[0], model,
                                     "softmax", "glorot_normal_initializer", False)
-    
+
     # Construct unrolled model
     for step in range(unroll_depth):
 
@@ -320,13 +319,13 @@ def configure_model(model):
         glimpse3.pooling.pool_dims_i = 32
         glimpse3.pooling.pool_strides_i = glimpse3.pooling.pool_dims_i
         glimpse3.pooling.pool_mode = "average"
-        glimpse = new_layer(model, "glimpse_step%d" % step, 
+        glimpse = new_layer(model, "glimpse_step%d" % step,
                             [glimpse1, glimpse2, glimpse3], "concatenation")
-        glimpse = new_layer(model, "glimpse_flat_step%d" % step, 
+        glimpse = new_layer(model, "glimpse_flat_step%d" % step,
                             glimpse, "reshape")
         glimpse.reshape.num_dims = 1
         glimpse.reshape.dims = str_list([128 * 3])
-        
+
         # Recurrent network
         h1 = lstm1(glimpse)
         h2 = lstm2(h1)
@@ -366,7 +365,7 @@ def configure_model(model):
         met.layer_metric.name = "top-5 categorical accuracy (step %d)" % step
         met.layer_metric.layer = acc5.name
         met.layer_metric.unit = "%"
-        
+
         # Objective function
         class_obj = new_layer(model, "classification_cross_entropy_step%d" % step,
                               [class_prob, label], "cross_entropy")
@@ -383,8 +382,8 @@ def configure_model(model):
         obj = model.objective_function.layer_term.add()
         obj.scale_factor = 1.0
         obj.layer = locy_obj.name
-        
-    
+
+
 if __name__ == "__main__":
 
     # Make sure protobuf Python implementation is built
diff --git a/model_zoo/optimizers/opt_sgd.prototext b/model_zoo/optimizers/opt_sgd.prototext
index 3ab5afd6406..8d066780476 100644
--- a/model_zoo/optimizers/opt_sgd.prototext
+++ b/model_zoo/optimizers/opt_sgd.prototext
@@ -1,8 +1,7 @@
 optimizer {
   sgd {
     learn_rate: 0.01
-    momentum: 0.9 
-    decay_rate: 0
+    momentum: 0.9
     nesterov: false
-  }  
+  }
 }
diff --git a/model_zoo/tests/CMakeLists.txt b/model_zoo/tests/CMakeLists.txt
index 27b586276c5..9a0f2a8c0ee 100644
--- a/model_zoo/tests/CMakeLists.txt
+++ b/model_zoo/tests/CMakeLists.txt
@@ -15,14 +15,16 @@ function(add_mpi_ctest TEST_NAME)
   endif()
 
   # ctest with 1, 2, and 4 MPI processes
-  set(NUM_PROCS 1)
-  while(8 GREATER ${NUM_PROCS})
-    add_test("${TEST_NAME}_${NUM_PROCS}"
-      ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} ${NUM_PROCS} ${MPIEXEC_PREFLAGS}
-      ${CMAKE_CURRENT_BINARY_DIR}/${TEST_EXE})
-    math(EXPR NUM_PROCS "${NUM_PROCS} * 2")
-  endwhile()
-
+  set(COMM_TEST_IS_GENERAL FALSE)
+  if (COMM_TEST_IS_GENERAL)
+    set(NUM_PROCS 1)
+    while(8 GREATER ${NUM_PROCS})
+      add_test("${TEST_NAME}_${NUM_PROCS}"
+        ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} ${NUM_PROCS} ${MPIEXEC_PREFLAGS}
+        ${CMAKE_CURRENT_BINARY_DIR}/${TEST_EXE})
+      math(EXPR NUM_PROCS "${NUM_PROCS} * 2")
+    endwhile()
+  endif (COMM_TEST_IS_GENERAL)
 endfunction()
 
 # Parallel Tests
diff --git a/model_zoo/tests/comm_test.cpp b/model_zoo/tests/comm_test.cpp
index 808debf35c9..4975e133cb2 100644
--- a/model_zoo/tests/comm_test.cpp
+++ b/model_zoo/tests/comm_test.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/model_zoo/tests/conduit_timing_test.cpp b/model_zoo/tests/conduit_timing_test.cpp
index 5ac0ae7655a..a37cee28f54 100644
--- a/model_zoo/tests/conduit_timing_test.cpp
+++ b/model_zoo/tests/conduit_timing_test.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/model_zoo/tests/layer_tests/model_channelwise_mean.prototext b/model_zoo/tests/layer_tests/model_channelwise_mean.prototext
index b683d82023d..d530e311f1b 100644
--- a/model_zoo/tests/layer_tests/model_channelwise_mean.prototext
+++ b/model_zoo/tests/layer_tests/model_channelwise_mean.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_clamp.prototext b/model_zoo/tests/layer_tests/model_clamp.prototext
index bb27c22d471..b02fd5919ec 100644
--- a/model_zoo/tests/layer_tests/model_clamp.prototext
+++ b/model_zoo/tests/layer_tests/model_clamp.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_covariance.prototext b/model_zoo/tests/layer_tests/model_covariance.prototext
index 972ae7ea9d3..1324f945ec8 100644
--- a/model_zoo/tests/layer_tests/model_covariance.prototext
+++ b/model_zoo/tests/layer_tests/model_covariance.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_elu.prototext b/model_zoo/tests/layer_tests/model_elu.prototext
index e0366e63736..ce20c7cb110 100644
--- a/model_zoo/tests/layer_tests/model_elu.prototext
+++ b/model_zoo/tests/layer_tests/model_elu.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_identity.prototext b/model_zoo/tests/layer_tests/model_identity.prototext
index 1dbb0ab0f71..98eb617f70e 100644
--- a/model_zoo/tests/layer_tests/model_identity.prototext
+++ b/model_zoo/tests/layer_tests/model_identity.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_l1_norm.prototext b/model_zoo/tests/layer_tests/model_l1_norm.prototext
index b874c33e891..9192a686411 100644
--- a/model_zoo/tests/layer_tests/model_l1_norm.prototext
+++ b/model_zoo/tests/layer_tests/model_l1_norm.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_l2_norm2.prototext b/model_zoo/tests/layer_tests/model_l2_norm2.prototext
index 1c879529096..07c72d2ef85 100644
--- a/model_zoo/tests/layer_tests/model_l2_norm2.prototext
+++ b/model_zoo/tests/layer_tests/model_l2_norm2.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   layer {
diff --git a/model_zoo/tests/layer_tests/model_leaky_relu.prototext b/model_zoo/tests/layer_tests/model_leaky_relu.prototext
index 53f4c7d6afc..cc6473695cb 100644
--- a/model_zoo/tests/layer_tests/model_leaky_relu.prototext
+++ b/model_zoo/tests/layer_tests/model_leaky_relu.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_log_sigmoid.prototext b/model_zoo/tests/layer_tests/model_log_sigmoid.prototext
index 882b3f905a6..b3e58f7fd15 100644
--- a/model_zoo/tests/layer_tests/model_log_sigmoid.prototext
+++ b/model_zoo/tests/layer_tests/model_log_sigmoid.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_log_softmax.prototext b/model_zoo/tests/layer_tests/model_log_softmax.prototext
index 1b4ef3099d9..12555305705 100644
--- a/model_zoo/tests/layer_tests/model_log_softmax.prototext
+++ b/model_zoo/tests/layer_tests/model_log_softmax.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_mean_absolute_error.prototext b/model_zoo/tests/layer_tests/model_mean_absolute_error.prototext
index d75d4b06432..beda327e807 100644
--- a/model_zoo/tests/layer_tests/model_mean_absolute_error.prototext
+++ b/model_zoo/tests/layer_tests/model_mean_absolute_error.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_relu.prototext b/model_zoo/tests/layer_tests/model_relu.prototext
index dbc49b704ac..db91a7ba590 100644
--- a/model_zoo/tests/layer_tests/model_relu.prototext
+++ b/model_zoo/tests/layer_tests/model_relu.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_selu.prototext b/model_zoo/tests/layer_tests/model_selu.prototext
index 19ce0f1c3f7..9e98a04ea17 100644
--- a/model_zoo/tests/layer_tests/model_selu.prototext
+++ b/model_zoo/tests/layer_tests/model_selu.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_sigmoid.prototext b/model_zoo/tests/layer_tests/model_sigmoid.prototext
index 3f53416f080..989c1fb4c5c 100644
--- a/model_zoo/tests/layer_tests/model_sigmoid.prototext
+++ b/model_zoo/tests/layer_tests/model_sigmoid.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_softmax.prototext b/model_zoo/tests/layer_tests/model_softmax.prototext
index 313165c17f8..b231ff7d179 100644
--- a/model_zoo/tests/layer_tests/model_softmax.prototext
+++ b/model_zoo/tests/layer_tests/model_softmax.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_softplus.prototext b/model_zoo/tests/layer_tests/model_softplus.prototext
index 30d84782bfd..fc4d06823b3 100644
--- a/model_zoo/tests/layer_tests/model_softplus.prototext
+++ b/model_zoo/tests/layer_tests/model_softplus.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_softsign.prototext b/model_zoo/tests/layer_tests/model_softsign.prototext
index e008da2aa80..55e4e89cfc9 100644
--- a/model_zoo/tests/layer_tests/model_softsign.prototext
+++ b/model_zoo/tests/layer_tests/model_softsign.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_squared_difference.prototext b/model_zoo/tests/layer_tests/model_squared_difference.prototext
index 6ee5f1ce4fe..87b8a14c7c7 100644
--- a/model_zoo/tests/layer_tests/model_squared_difference.prototext
+++ b/model_zoo/tests/layer_tests/model_squared_difference.prototext
@@ -46,9 +46,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_tessellate.prototext b/model_zoo/tests/layer_tests/model_tessellate.prototext
index fb9b6a56822..11440379413 100644
--- a/model_zoo/tests/layer_tests/model_tessellate.prototext
+++ b/model_zoo/tests/layer_tests/model_tessellate.prototext
@@ -46,9 +46,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/layer_tests/model_variance.prototext b/model_zoo/tests/layer_tests/model_variance.prototext
index 9ad486c62d6..33d0ac06373 100644
--- a/model_zoo/tests/layer_tests/model_variance.prototext
+++ b/model_zoo/tests/layer_tests/model_variance.prototext
@@ -49,9 +49,7 @@ model {
   layer {
     name: "data"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
 
   # Input data
diff --git a/model_zoo/tests/model_lenet_mnist_ckpt.prototext b/model_zoo/tests/model_lenet_mnist_ckpt.prototext
index 0a62ba260ec..e717e129366 100644
--- a/model_zoo/tests/model_lenet_mnist_ckpt.prototext
+++ b/model_zoo/tests/model_lenet_mnist_ckpt.prototext
@@ -70,9 +70,7 @@ model {
     name: "data"
     children: "image label"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     parents: "data"
diff --git a/model_zoo/tests/model_lenet_mnist_dist_ckpt.prototext b/model_zoo/tests/model_lenet_mnist_dist_ckpt.prototext
index cf81201a4dc..8afa85edd18 100644
--- a/model_zoo/tests/model_lenet_mnist_dist_ckpt.prototext
+++ b/model_zoo/tests/model_lenet_mnist_dist_ckpt.prototext
@@ -72,9 +72,7 @@ model {
     name: "data"
     children: "image label"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     parents: "data"
diff --git a/model_zoo/tests/model_lenet_mnist_lbann2ckpt.prototext b/model_zoo/tests/model_lenet_mnist_lbann2ckpt.prototext
index 00db10776b1..d8e7066afd5 100644
--- a/model_zoo/tests/model_lenet_mnist_lbann2ckpt.prototext
+++ b/model_zoo/tests/model_lenet_mnist_lbann2ckpt.prototext
@@ -69,9 +69,7 @@ model {
     name: "data"
     children: "image label"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     parents: "data"
diff --git a/model_zoo/tests/model_mnist_conv_graph.prototext b/model_zoo/tests/model_mnist_conv_graph.prototext
index 1973cfdcab5..21e5b210d53 100644
--- a/model_zoo/tests/model_mnist_conv_graph.prototext
+++ b/model_zoo/tests/model_mnist_conv_graph.prototext
@@ -3,8 +3,6 @@ model {
   mini_batch_size: 31
   block_size: 257
   num_epochs: 4
-  num_parallel_readers: 0
-  procs_per_trainer: 0
 
   ###################################################
   # Objective function
@@ -36,9 +34,7 @@ model {
     name: "data"
     children: "images labels"
     data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
+    input {}
   }
   layer {
     name: "images"
@@ -141,7 +137,7 @@ model {
     children: "branch3_conv1 branch3_conv2"
     data_layout: "data_parallel"
     slice {
-      slice_axis: 1
+      axis: 1
       slice_points: "0 4 6"
     }
   }
@@ -186,7 +182,7 @@ model {
     name: "branch3_concat"
     data_layout: "data_parallel"
     concatenation {
-      concatenation_axis: 1
+      axis: 1
     }
   }
 
@@ -213,6 +209,12 @@ model {
   }
   layer {
     parents: "prob_pool"
+    name: "prob_flat"
+    reshape { dims: "-1" }
+    data_layout: "data_parallel"
+  }
+  layer {
+    parents: "prob_flat"
     name: "prob"
     softmax {}
     data_layout: "data_parallel"
diff --git a/model_zoo/tests/model_mnist_distributed_io.prototext b/model_zoo/tests/model_mnist_distributed_io.prototext
deleted file mode 100644
index 90741394952..00000000000
--- a/model_zoo/tests/model_mnist_distributed_io.prototext
+++ /dev/null
@@ -1,254 +0,0 @@
-model {
-  data_layout: "model_parallel"
-  mini_batch_size: 10
-  block_size: 256
-  num_epochs: 20
-  num_parallel_readers: 0
-  procs_per_trainer: 1
-
-  ###################################################
-  # Objective function
-  ###################################################
-
-  objective_function {
-    layer_term { layer: "cross_entropy" }
-    l2_weight_regularization {
-      scale_factor: 1e-4
-    }
-  }
-
-  ###################################################
-  # Metrics
-  ###################################################
-
-  metric {
-    layer_metric {
-      name: "categorical accuracy"
-      layer: "accuracy"
-      unit: "%"
-    }
-  }
-
-  ###################################################
-  # Callbacks
-  ###################################################
-  callback {
-    print {
-      interval: 1
-    }
-  }
-  callback {
-    timer {
-    }
-  }
-  callback {
-    summary {
-      dir: "."
-      batch_interval: 1
-      mat_interval: 25
-    }
-  }
-  # callback {
-  #   debug {
-  #     phase: "test"
-  #   }
-  # }
-  # callback {
-  #   debug_io {
-  #     phase: "test"
-  #     lvl: 1
-  #   }
-  # }
-  callback {
-    adaptive_learning_rate {
-      patience: 4
-      amt: 0.1
-    }
-  }
-  callback {
-    imcomm {
-      intertrainer_comm_method: "normal"
-      all_optimizers: true
-    }
-  }
-  # callback {
-  #   dump_mb_indices {
-  #     basename: "debug_alexnet/"
-  #     interval: 1
-  #   }
-  # }
-  # callback {
-  #   disp_io_stats {
-  #     layers: "1"
-  #   }
-  # }
-
-  ###################################################
-  # start of layers
-  ###################################################
-
-
-  # INPUT 1
-  ######################
-  layer {
-    name: "1"
-    children: "1a 1b"
-    data_layout: "model_parallel"
-    input {
-      io_buffer: "distributed"
-    }
-  }
-  layer {
-    parents: "1"
-    name: "1a"
-    data_layout: "model_parallel"
-    split {}
-  }
-  layer {
-    parents: "1"
-    name: "1b"
-    data_layout: "model_parallel"
-    split {}
-  }
-
-  # FULLY_CONNECTED 2
-  ######################
-  layer {
-    parents: "1a"
-    name: "2"
-    data_layout: "model_parallel"
-    fully_connected {
-      num_neurons: 1024
-      weight_initialization: "glorot_uniform"
-      has_bias: true
-    }
-  }
-
-  # RELU 3
-  ######################
-  layer {
-    parents: "2"
-    name: "3"
-    data_layout: "model_parallel"
-    relu {
-    }
-  }
-
-  # DROPOUT 4
-  ######################
-  layer {
-    parents: "3"
-    name: "4"
-    data_layout: "model_parallel"
-    dropout {
-      keep_prob: -1
-    }
-  }
-
-  # FULLY_CONNECTED 5
-  ######################
-  layer {
-    parents: "4"
-    name: "5"
-    data_layout: "model_parallel"
-    fully_connected {
-      num_neurons: 1024
-      weight_initialization: "glorot_uniform"
-      has_bias: true
-    }
-  }
-
-  # RELU 6
-  ######################
-  layer {
-    parents: "5"
-    name: "6"
-    data_layout: "model_parallel"
-    relu {
-    }
-  }
-
-  # DROPOUT 7
-  ######################
-  layer {
-    parents: "6"
-    name: "7"
-    data_layout: "model_parallel"
-    dropout {
-      keep_prob: -1
-    }
-  }
-
-  # FULLY_CONNECTED 8
-  ######################
-  layer {
-    parents: "7"
-    name: "8"
-    data_layout: "model_parallel"
-    fully_connected {
-      num_neurons: 1024
-      weight_initialization: "glorot_uniform"
-      has_bias: true
-    }
-  }
-
-  # RELU 9
-  ######################
-  layer {
-    parents: "8"
-    name: "9"
-    data_layout: "model_parallel"
-    relu {
-    }
-  }
-
-  # DROPOUT 10
-  ######################
-  layer {
-    parents: "9"
-    name: "10"
-    data_layout: "model_parallel"
-    dropout {
-      keep_prob: -1
-    }
-  }
-
-  # FULLY_CONNECTED 11
-  ######################
-  layer {
-    parents: "10"
-    name: "11"
-    data_layout: "model_parallel"
-    fully_connected {
-      num_neurons: 10
-      weight_initialization: "glorot_uniform"
-      has_bias: false
-    }
-  }
-
-  # SOFTMAX 12
-  ######################
-  layer {
-    parents: "11"
-    name: "12"
-    data_layout: "model_parallel"
-    softmax {
-    }
-  }
-
-  # Evaluation
-  ######################
-  layer {
-    parents: "12 1b"
-    name: "cross_entropy"
-    data_layout: "model_parallel"
-    cross_entropy {}
-  }
-  layer {
-    parents: "12 1b"
-    name: "accuracy"
-    data_layout: "model_parallel"
-    categorical_accuracy {}
-  }
-
-}
diff --git a/model_zoo/tests/model_mnist_partitioned_io.prototext b/model_zoo/tests/model_mnist_partitioned_io.prototext
deleted file mode 100644
index 0e17945ddb7..00000000000
--- a/model_zoo/tests/model_mnist_partitioned_io.prototext
+++ /dev/null
@@ -1,266 +0,0 @@
-model {
-  data_layout: "model_parallel"
-  mini_batch_size: 10
-  block_size: 256
-  num_epochs: 20
-  num_parallel_readers: 0
-  procs_per_trainer: 1
-
-  ###################################################
-  # Objective function
-  ###################################################
-
-  objective_function {
-    layer_term { layer: "cross_entropy" }
-    l2_weight_regularization {
-      scale_factor: 1e-4
-    }
-  }
-
-  ###################################################
-  # Metrics
-  ###################################################
-
-  metric {
-    layer_metric {
-      name: "categorical accuracy"
-      layer: "accuracy"
-      unit: "%"
-    }
-  }
-
-  ###################################################
-  # Callbacks
-  ###################################################
-  callback {
-    print {
-      interval: 1
-    }
-  }
-  callback {
-    timer {
-    }
-  }
-  callback {
-    summary {
-      dir: "."
-      batch_interval: 1
-      mat_interval: 25
-    }
-  }
-  # callback {
-  #   debug {
-  #     phase: "train"
-  #   }
-  # }
-  callback {
-    adaptive_learning_rate {
-      patience: 4
-      amt: 0.1
-    }
-  }
-  callback {
-    imcomm {
-      intertrainer_comm_method: "normal"
-      all_optimizers: true
-    }
-  }
-  # callback {
-  #   dump_mb_indices {
-  #     basename: "debug_mnist/"
-  #     interval: 1
-  #   }
-  # }
-  # callback {
-  #   disp_io_stats {
-  #     layers: "1"
-  #   }
-  # }
-  # callback {
-  #   checkpoint {
-  #     checkpoint_dir: "test"
-  #     checkpoint_epochs: 1
-  #     checkpoint_steps: 1
-  #     #checkpoint_secs: 7
-  #   }
-  # }
-
-  # callback {
-  #   dump_weights {
-  #     basename: "debug/"
-  #   }
-  # }
-  # callback {
-  #   dump_gradients {
-  #     basename: "debug/"
-  #   }
-  # }
-  ###################################################
-  # start of layers
-  ###################################################
-
-
-  # INPUT 1
-  ######################
-  layer {
-    name: "1"
-    children: "1a 1b"
-    data_layout: "data_parallel"
-    input {
-      io_buffer: "partitioned"
-    }
-  }
-  layer {
-    parents: "1"
-    name: "1a"
-    data_layout: "data_parallel"
-    split {}
-  }
-  layer {
-    parents: "1"
-    name: "1b"
-    data_layout: "data_parallel"
-    split {}
-  }
-
-  # FULLY_CONNECTED 2
-  ######################
-  layer {
-    parents: "1"
-    name: "2"
-    data_layout: "model_parallel"
-    fully_connected {
-      num_neurons: 1024
-      weight_initialization: "glorot_uniform"
-      has_bias: true
-    }
-  }
-
-  # RELU 3
-  ######################
-  layer {
-    parents: "2"
-    name: "3"
-    data_layout: "model_parallel"
-    relu {
-    }
-  }
-
-  # DROPOUT 4
-  ######################
-  layer {
-    parents: "3"
-    name: "4"
-    data_layout: "model_parallel"
-    dropout {
-      keep_prob: -1
-    }
-  }
-
-  # FULLY_CONNECTED 5
-  ######################
-  layer {
-    parents: "4"
-    name: "5"
-    data_layout: "model_parallel"
-    fully_connected {
-      num_neurons: 1024
-      weight_initialization: "glorot_uniform"
-      has_bias: true
-    }
-  }
-
-  # RELU 6
-  ######################
-  layer {
-    parents: "5"
-    name: "6"
-    data_layout: "model_parallel"
-    relu {
-    }
-  }
-
-  # DROPOUT 7
-  ######################
-  layer {
-    parents: "6"
-    name: "7"
-    data_layout: "model_parallel"
-    dropout {
-      keep_prob: -1
-    }
-  }
-
-  # FULLY_CONNECTED 8
-  ######################
-  layer {
-    parents: "7"
-    name: "8"
-    data_layout: "model_parallel"
-    fully_connected {
-      num_neurons: 1024
-      weight_initialization: "glorot_uniform"
-      has_bias: true
-    }
-  }
-
-  # RELU 9
-  ######################
-  layer {
-    parents: "8"
-    name: "9"
-    data_layout: "model_parallel"
-    relu {
-    }
-  }
-
-  # DROPOUT 10
-  ######################
-  layer {
-    parents: "9"
-    name: "10"
-    data_layout: "model_parallel"
-    dropout {
-      keep_prob: -1
-    }
-  }
-
-  # FULLY_CONNECTED 11
-  ######################
-  layer {
-    parents: "10"
-    name: "11"
-    data_layout: "model_parallel"
-    fully_connected {
-      num_neurons: 10
-      weight_initialization: "glorot_uniform"
-      has_bias: false
-    }
-  }
-
-  # SOFTMAX 12
-  ######################
-  layer {
-    parents: "11"
-    name: "12"
-    data_layout: "model_parallel"
-    softmax {
-    }
-  }
-
-  # Evaluation
-  ######################
-  layer {
-    parents: "12 1b"
-    name: "cross_entropy"
-    data_layout: "model_parallel"
-    cross_entropy {}
-  }
-  layer {
-    parents: "12 1b"
-    name: "accuracy"
-    data_layout: "model_parallel"
-    categorical_accuracy {}
-  }
-
-}
diff --git a/model_zoo/tests/model_mnist_ridge_regression.prototext b/model_zoo/tests/model_mnist_ridge_regression.prototext
index ca77151c085..173ea38fc71 100644
--- a/model_zoo/tests/model_mnist_ridge_regression.prototext
+++ b/model_zoo/tests/model_mnist_ridge_regression.prototext
@@ -43,7 +43,7 @@ model {
     name: "data"
     children: "image label"
     data_layout: "data_parallel"
-    input { io_buffer: "partitioned" }
+    input {}
   }
   layer {
     parents: "data"
diff --git a/model_zoo/tests/model_mnist_softmax_classifier.prototext b/model_zoo/tests/model_mnist_softmax_classifier.prototext
index b7c7d43ce8c..8bbd7fa5bc5 100644
--- a/model_zoo/tests/model_mnist_softmax_classifier.prototext
+++ b/model_zoo/tests/model_mnist_softmax_classifier.prototext
@@ -41,7 +41,7 @@ model {
     name: "data"
     children: "image label"
     data_layout: "data_parallel"
-    input { io_buffer: "partitioned" }
+    input {}
   }
   layer {
     parents: "data"
diff --git a/model_zoo/tests/test_utils.hpp b/model_zoo/tests/test_utils.hpp
index ee65a17386e..0b86f9e7666 100644
--- a/model_zoo/tests/test_utils.hpp
+++ b/model_zoo/tests/test_utils.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -172,7 +172,7 @@ lbann::DataType absolute_error(lbann::CPUMat& approx_val, lbann::CPUMat& true_va
   ASSERT_EQ(approx_val.Width(), true_val.Width());
   ASSERT_EQ(approx_val.Height(), true_val.Height());
   elemerr = true_val;
-  elemerr -= approx_val;
+  El::Axpy(lbann::DataType{-1}, approx_val, elemerr);
   lbann::DataType abs_err = El::EntrywiseNorm(elemerr, 1);
   El::EntrywiseMap(elemerr, std::function<lbann::DataType(const lbann::DataType&)>(
   [](const lbann::DataType& x) {
diff --git a/model_zoo/vision/.gitignore b/model_zoo/vision/.gitignore
new file mode 100644
index 00000000000..8a9d92cf4c7
--- /dev/null
+++ b/model_zoo/vision/.gitignore
@@ -0,0 +1 @@
+model.prototext
diff --git a/model_zoo/vision/alexnet.py b/model_zoo/vision/alexnet.py
new file mode 100755
index 00000000000..54fbdbafa5f
--- /dev/null
+++ b/model_zoo/vision/alexnet.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+import argparse
+from os.path import dirname, join
+import google.protobuf.text_format as txtf
+import lbann
+import lbann.models
+import lbann.proto
+import lbann.contrib.args
+
+# Default data reader
+model_zoo_dir = dirname(dirname(__file__))
+data_reader_prototext = join(model_zoo_dir,
+                             'data_readers',
+                             'data_reader_imagenet.prototext')
+
+# Command-line arguments
+desc = ('Construct and run AlexNet on ImageNet-1K data. '
+        'Running the experiment is only supported on LC systems.')
+parser = argparse.ArgumentParser(description=desc)
+lbann.contrib.args.add_scheduler_arguments(parser)
+parser.add_argument(
+    '--mini-batch-size', action='store', default=256, type=int,
+    help='mini-batch size (default: 256)', metavar='NUM')
+parser.add_argument(
+    '--num-epochs', action='store', default=100, type=int,
+    help='number of epochs (default: 100)', metavar='NUM')
+parser.add_argument(
+    '--num-labels', action='store', default=1000, type=int,
+    help='number of data classes (default: 1000)', metavar='NUM')
+lbann.contrib.args.add_optimizer_arguments(parser)
+parser.add_argument(
+    '--data-reader', action='store',
+    default=data_reader_prototext, type=str,
+    help='data reader prototext file (default: ' + data_reader_prototext + ')',
+    metavar='FILE')
+parser.add_argument(
+    '--prototext', action='store', type=str,
+    help='exported prototext file', metavar='FILE')
+args = parser.parse_args()
+
+# Due to a data reader limitation, the actual model realization must be
+# hardcoded to 1000 labels for ImageNet.
+imagenet_labels = 1000
+
+# Construct layer graph
+input = lbann.Input()
+images = lbann.Identity(input)
+labels = lbann.Identity(input)
+preds = lbann.models.AlexNet(imagenet_labels)(images)
+probs = lbann.Softmax(preds)
+cross_entropy = lbann.CrossEntropy([probs, labels])
+top1 = lbann.CategoricalAccuracy([probs, labels])
+top5 = lbann.TopKCategoricalAccuracy([probs, labels], k=5)
+layers = list(lbann.traverse_layer_graph(input))
+
+# Setup objective function
+weights = set()
+for l in layers:
+    weights.update(l.weights)
+l2_reg = lbann.L2WeightRegularization(weights=weights, scale=5e-4)
+obj = lbann.ObjectiveFunction([cross_entropy, l2_reg])
+
+# Setup model
+metrics = [lbann.Metric(top1, name='top-1 accuracy', unit='%'),
+           lbann.Metric(top5, name='top-5 accuracy', unit='%')]
+callbacks = [lbann.CallbackPrint(),
+             lbann.CallbackTimer(),
+             lbann.CallbackDropFixedLearningRate(
+                 drop_epoch=[20,40,60], amt=0.1)]
+model = lbann.Model(args.mini_batch_size,
+                    args.num_epochs,
+                    layers=layers,
+                    weights=weights,
+                    objective_function=obj,
+                    metrics=metrics,
+                    callbacks=callbacks)
+
+# Setup optimizer
+opt = lbann.contrib.args.create_optimizer(args)
+
+# Load data reader from prototext
+data_reader_proto = lbann.lbann_pb2.LbannPB()
+with open(args.data_reader, 'r') as f:
+  txtf.Merge(f.read(), data_reader_proto)
+data_reader_proto = data_reader_proto.data_reader
+
+# Save prototext
+if args.prototext:
+    lbann.proto.save_prototext(args.prototext,
+                               model=model, optimizer=opt,
+                               data_reader=data_reader_proto)
+
+# Run experiment
+if not args.prototext:
+    from lbann.contrib.lc.paths import imagenet_dir, imagenet_labels
+    import lbann.contrib.lc.launcher
+    kwargs = lbann.contrib.args.get_scheduler_kwargs(args)
+    classes = args.num_labels
+    kwargs['lbann_args'] = (
+        '--data_filedir_train={} --data_filename_train={} '
+        '--data_filedir_test={} --data_filename_test={}'
+        .format(imagenet_dir(data_set='train', num_classes=classes),
+                imagenet_labels(data_set='train', num_classes=classes),
+                imagenet_dir(data_set='val', num_classes=classes),
+                imagenet_labels(data_set='val', num_classes=classes)))
+    lbann.contrib.lc.launcher.run(model, data_reader_proto, opt,
+                                  job_name = 'lbann_alexnet',
+                                  **kwargs)
diff --git a/model_zoo/vision/lenet.py b/model_zoo/vision/lenet.py
new file mode 100755
index 00000000000..30eb060d798
--- /dev/null
+++ b/model_zoo/vision/lenet.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+import argparse
+import os.path
+import google.protobuf.text_format as txtf
+import lbann
+
+# ----------------------------------
+# Command-line arguments
+# ----------------------------------
+
+desc = ('Construct and run LeNet on MNIST data. '
+        'Running the experiment is only supported on LC systems.')
+parser = argparse.ArgumentParser(description=desc)
+parser.add_argument(
+    '--partition', action='store', type=str,
+    help='scheduler partition', metavar='NAME')
+parser.add_argument(
+    '--account', action='store', type=str,
+    help='scheduler account', metavar='NAME')
+args = parser.parse_args()
+
+# ----------------------------------
+# Construct layer graph
+# ----------------------------------
+
+# Input data
+input = lbann.Input()
+images = lbann.Identity(input)
+labels = lbann.Identity(input)
+
+# LeNet
+x = lbann.Convolution(images,
+                      num_dims = 2,
+                      num_output_channels = 6,
+                      num_groups = 1,
+                      conv_dims_i = 5,
+                      conv_strides_i = 1,
+                      conv_dilations_i = 1,
+                      has_bias = True)
+x = lbann.Relu(x)
+x = lbann.Pooling(x,
+                  num_dims = 2,
+                  pool_dims_i = 2,
+                  pool_strides_i = 2,
+                  pool_mode = "max")
+x = lbann.Convolution(x,
+                      num_dims = 2,
+                      num_output_channels = 16,
+                      num_groups = 1,
+                      conv_dims_i = 5,
+                      conv_strides_i = 1,
+                      conv_dilations_i = 1,
+                      has_bias = True)
+x = lbann.Relu(x)
+x = lbann.Pooling(x,
+                  num_dims = 2,
+                  pool_dims_i = 2,
+                  pool_strides_i = 2,
+                  pool_mode = "max")
+x = lbann.FullyConnected(x, num_neurons = 120, has_bias = True)
+x = lbann.Relu(x)
+x = lbann.FullyConnected(x, num_neurons = 84, has_bias = True)
+x = lbann.Relu(x)
+x = lbann.FullyConnected(x, num_neurons = 10, has_bias = True)
+probs = lbann.Softmax(x)
+
+# Loss function and accuracy
+loss = lbann.CrossEntropy([probs, labels])
+acc = lbann.CategoricalAccuracy([probs, labels])
+
+# ----------------------------------
+# Setup experiment
+# ----------------------------------
+
+# Setup model
+mini_batch_size = 64
+num_epochs = 20
+model = lbann.Model(mini_batch_size,
+                    num_epochs,
+                    layers=lbann.traverse_layer_graph(input),
+                    objective_function=loss,
+                    metrics=[lbann.Metric(acc, name='accuracy', unit='%')],
+                    callbacks=[lbann.CallbackPrint(), lbann.CallbackTimer()])
+
+# Setup optimizer
+opt = lbann.SGD(learn_rate=0.01, momentum=0.9)
+
+# Load data reader from prototext
+model_zoo_dir = os.path.dirname(os.path.dirname(__file__))
+data_reader_file = os.path.join(model_zoo_dir,
+                                'data_readers',
+                                'data_reader_mnist.prototext')
+data_reader_proto = lbann.lbann_pb2.LbannPB()
+with open(data_reader_file, 'r') as f:
+    txtf.Merge(f.read(), data_reader_proto)
+data_reader_proto = data_reader_proto.data_reader
+
+# ----------------------------------
+# Run experiment
+# ----------------------------------
+# Note: Use `lbann.contrib.lc.launcher.run` instead for optimized
+# defaults on LC systems.
+
+kwargs = {}
+if args.partition: kwargs['partition'] = args.partition
+if args.account: kwargs['account'] = args.account
+lbann.run(model, data_reader_proto, opt,
+          job_name='lbann_lenet',
+          **kwargs)
diff --git a/model_zoo/vision/resnet.py b/model_zoo/vision/resnet.py
new file mode 100755
index 00000000000..adda3b98c50
--- /dev/null
+++ b/model_zoo/vision/resnet.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+import argparse
+from os.path import dirname, join
+import google.protobuf.text_format as txtf
+import lbann
+import lbann.models
+import lbann.models.resnet
+import lbann.proto
+import lbann.contrib.args
+import lbann.contrib.models.wide_resnet
+
+# Default data reader
+model_zoo_dir = dirname(dirname(__file__))
+data_reader_prototext = join(model_zoo_dir,
+                             'data_readers',
+                             'data_reader_imagenet.prototext')
+
+# Command-line arguments
+desc = ('Construct and run ResNet on ImageNet-1K data. '
+        'Running the experiment is only supported on LC systems.')
+parser = argparse.ArgumentParser(description=desc)
+lbann.contrib.args.add_scheduler_arguments(parser)
+parser.add_argument(
+    '--resnet', action='store', default=50, type=int,
+    choices=(18, 34, 50, 101, 152),
+    help='ResNet variant (default: 50)')
+parser.add_argument(
+    '--width', action='store', default=1, type=float,
+    help='Wide ResNet width factor (default: 1)')
+parser.add_argument(
+    '--block-type', action='store', default=None, type=str,
+    choices=('basic', 'bottleneck'),
+    help='ResNet block type')
+parser.add_argument(
+    '--blocks', action='store', default=None, type=str,
+    help='ResNet block counts (comma-separated list)')
+parser.add_argument(
+    '--block-channels', action='store', default=None, type=str,
+    help='Internal channels in each ResNet block (comma-separated list)')
+parser.add_argument(
+    '--bn-stats-aggregation', action='store', default='local', type=str,
+    help=('aggregation mode for batch normalization statistics '
+          '(default: "local")'))
+parser.add_argument(
+    '--warmup', action='store_true', help='use a linear warmup')
+parser.add_argument(
+    '--mini-batch-size', action='store', default=256, type=int,
+    help='mini-batch size (default: 256)', metavar='NUM')
+parser.add_argument(
+    '--num-epochs', action='store', default=90, type=int,
+    help='number of epochs (default: 90)', metavar='NUM')
+parser.add_argument(
+    '--num-labels', action='store', default=1000, type=int,
+    help='number of data classes (default: 1000)', metavar='NUM')
+lbann.contrib.args.add_optimizer_arguments(parser, default_learning_rate=0.1)
+parser.add_argument(
+    '--data-reader', action='store',
+    default=data_reader_prototext, type=str,
+    help='data reader prototext file (default: ' + data_reader_prototext + ')',
+    metavar='FILE')
+parser.add_argument(
+    '--prototext', action='store', type=str,
+    help='exported prototext file (do not run experiment)', metavar='FILE')
+args = parser.parse_args()
+
+# Due to a data reader limitation, the actual model realization must be
+# hardcoded to 1000 labels for ImageNet.
+imagenet_labels = 1000
+
+# Choose ResNet variant
+resnet_variant_dict = {18: lbann.models.ResNet18,
+                       34: lbann.models.ResNet34,
+                       50: lbann.models.ResNet50,
+                       101: lbann.models.ResNet101,
+                       152: lbann.models.ResNet152}
+wide_resnet_variant_dict = {50: lbann.contrib.models.wide_resnet.WideResNet50_2}
+block_variant_dict = {
+    'basic': lbann.models.resnet.BasicBlock,
+    'bottleneck': lbann.models.resnet.BottleneckBlock
+}
+
+if (any([args.block_type, args.blocks, args.block_channels])
+    and not all([args.block_type, args.blocks, args.block_channels])):
+    raise RuntimeError('Must specify all of --block-type, --blocks, --block-channels')
+if args.block_type and args.blocks and args.block_channels:
+    # Build custom ResNet.
+    resnet = lbann.models.ResNet(
+        block_variant_dict[args.block_type],
+        imagenet_labels,
+        list(map(int, args.blocks.split(','))),
+        list(map(int, args.block_channels.split(','))),
+        zero_init_residual=True,
+        bn_stats_aggregation=args.bn_stats_aggregation,
+        name='custom_resnet',
+        width=args.width)
+elif args.width == 1:
+    # Vanilla ResNet.
+    resnet = resnet_variant_dict[args.resnet](
+        imagenet_labels,
+        bn_stats_aggregation=args.bn_stats_aggregation)
+elif args.width == 2 and args.resnet == 50:
+    # Use pre-defined WRN-50-2.
+    resnet = wide_resnet_variant_dict[args.resnet](
+        imagenet_labels,
+        bn_stats_aggregation=args.bn_stats_aggregation)
+else:
+    # Some other Wide ResNet.
+    resnet = resnet_variant_dict[args.resnet](
+        imagenet_labels,
+        bn_stats_aggregation=args.bn_stats_aggregation,
+        width=args.width)
+
+# Construct layer graph
+input = lbann.Input()
+images = lbann.Identity(input)
+labels = lbann.Identity(input)
+preds = resnet(images)
+probs = lbann.Softmax(preds)
+cross_entropy = lbann.CrossEntropy([probs, labels])
+top1 = lbann.CategoricalAccuracy([probs, labels])
+top5 = lbann.TopKCategoricalAccuracy([probs, labels], k=5)
+layers = list(lbann.traverse_layer_graph(input))
+
+# Setup objective function
+l2_reg_weights = set()
+for l in layers:
+    if type(l) == lbann.Convolution or type(l) == lbann.FullyConnected:
+        l2_reg_weights.update(l.weights)
+l2_reg = lbann.L2WeightRegularization(weights=l2_reg_weights, scale=1e-4)
+obj = lbann.ObjectiveFunction([cross_entropy, l2_reg])
+
+# Setup model
+metrics = [lbann.Metric(top1, name='top-1 accuracy', unit='%'),
+           lbann.Metric(top5, name='top-5 accuracy', unit='%')]
+callbacks = [lbann.CallbackPrint(),
+             lbann.CallbackTimer(),
+             lbann.CallbackDropFixedLearningRate(
+                 drop_epoch=[30, 60, 80], amt=0.1)]
+if args.warmup:
+    callbacks.append(
+        lbann.CallbackLinearGrowthLearningRate(
+            target=0.1 * args.mini_batch_size / 256, num_epochs=5))
+model = lbann.Model(args.mini_batch_size,
+                    args.num_epochs,
+                    layers=layers,
+                    objective_function=obj,
+                    metrics=metrics,
+                    callbacks=callbacks)
+
+# Setup optimizer
+opt = lbann.contrib.args.create_optimizer(args)
+
+# Load data reader from prototext
+data_reader_proto = lbann.lbann_pb2.LbannPB()
+with open(args.data_reader, 'r') as f:
+  txtf.Merge(f.read(), data_reader_proto)
+data_reader_proto = data_reader_proto.data_reader
+
+# Save prototext
+if args.prototext:
+    lbann.proto.save_prototext(args.prototext,
+                               model=model, optimizer=opt,
+                               data_reader=data_reader_proto)
+
+# Run experiment
+if not args.prototext:
+    from lbann.contrib.lc.paths import imagenet_dir, imagenet_labels
+    import lbann.contrib.lc.launcher
+    kwargs = lbann.contrib.args.get_scheduler_kwargs(args)
+    classes = args.num_labels
+    kwargs['lbann_args'] = (
+        '--data_filedir_train={} --data_filename_train={} '
+        '--data_filedir_test={} --data_filename_test={}'
+        .format(imagenet_dir(data_set='train', num_classes=classes),
+                imagenet_labels(data_set='train', num_classes=classes),
+                imagenet_dir(data_set='val', num_classes=classes),
+                imagenet_labels(data_set='val', num_classes=classes)))
+    lbann.contrib.lc.launcher.run(model, data_reader_proto, opt,
+                                  job_name='lbann_resnet',
+                                  **kwargs)
diff --git a/scripts/proto/.gitignore b/python/.gitignore
similarity index 100%
rename from scripts/proto/.gitignore
rename to python/.gitignore
diff --git a/python/README.md b/python/README.md
new file mode 100644
index 00000000000..fec92161dc9
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,201 @@
+# LBANN Python Interface
+
+This provides a convenient Python wrapper for configuring and running
+LBANN experiments. The syntax is meant to be deliberately reminiscent
+of [PyTorch](https://pytorch.org/).
+
+This is still a work in progress, so please [open an
+issue](https://github.com/LLNL/lbann/issues/new) if you find any
+problems or have feature suggestions.
+
+* For more details about the LBANN/ONNX converter,
+see [here](docs/onnx/README.md).
+* For more details about the *accuracy/loss* visualization script
+(also known as `lbplot`), see [here](docs/plot/README.md).
+
+## Setup
+
+The `lbann` Python package is installed as part of the LBANN build
+process. Usage instructions depend on which build method was used.
+
+_Spack_: `module load lbann`.
+
+_CMake_: The Python package is typically installed inside the install
+directory at `<install directory>/share/python`. To make sure Python
+can detect it, update the `PYTHONPATH` environment variable:
+```sh
+export PYTHONPATH=<install directory>/share/python:${PYTHONPATH}
+```
+Alternatively, the package can be installed into a Python
+site-packages directory so that Python can detect it immediately. This
+usually requires an active virtual environment or root access. To
+build with this approach, pass `-DLBANN_PYTHON_IN_INSTALL_DIR=OFF` as
+an argument into CMake during the build process.
+
+_Warnings_:
+* The build system is still under active development.
+* Python 2 is not supported.
+* The CMake build process does not handle package dependencies. See
+  `$LBANN_HOME/cmake/configure_files/setup.py.in` for the full list of
+  dependencies.
+* Installing the ONNX Python package may require some work. See [the
+  documentation](https://github.com/onnx/onnx#source).
+  * If you do not already have the ONNX Python package installed, you
+    will need to ensure the `protoc` compiler is in your path when you
+    run this. Either load the appropriate Spack module or add
+    `<install directory>/bin` to `$PATH` before running.
+
+## Modules
+
+### `lbann`
+
+The `Model` class describes a neural network model and contains the
+following components:
+
+* A `Layer` is a tensor operation, arranged within a directed acyclic
+  graph. A layer will recieve input tensors from its parents and will
+  send output tensor to its children. Once the layer graph has been
+  constructed, it may be helpful to call `traverse_layer_graph`, which
+  is a generator function that traverses the layer graph in a
+  topological order.
+* A `Weights` is a set of trainable parameters, typically associated
+  with one or more layers. The initial values are populated with an
+  `Initializer` and it is optimized with an `Optimizer`.
+* The `ObjectiveFunction` is a mathematical expression that the
+  optimization algorithm will attempt to minimize. It is made up of
+  multiple `ObjectiveFunctionTerm`s, which are added up (possibly with
+  scaling factors) to obtain the full objective function. There are
+  currently two objective function terms:
+    - `LayerTerm` gets its value from a `Layer`. The layer must output
+      a scalar (tensor with one entry).
+    - `L2WeightRegularization` gets its value by computing the L2 norm
+      of the model weights.
+* A `Metric` reports values to the user, which is helpful for
+  evaluating the progress of training. They get the their values from
+  layers, which must output scalars (tensors with one entry).
+* A `Callback` performs some function at various points during
+  training. They are helpful for performing advanced training
+  techniques.
+
+Many of these components, e.g. layers, are automatically generated by
+parsing messages defined in `src/proto/lbann.proto`. This file is
+currently the best source for documentation. Note that LBANN currently
+only supports static models, i.e. models with static execution graphs.
+
+### `lbann.proto`
+
+The `save_prototext` function can be used to export an LBANN
+experiment to a prototext file. A typical experiment is comprised of a
+model, data reader, and optimizer.
+
+### `lbann.modules`
+
+This is a collection of neural network modules, which are patterns of
+layers that take an input layer to produce an output layer. Once
+created, a `Module` is _callable_. Calling it with an input layer will
+add the module's pattern to the layer graph and will return the output
+layer.
+
+_A possible note of confusion_: "modules" in LBANN are similar to
+"layers" in PyTorch, TensorFlow, and Keras. LBANN uses "layer" in a
+similar manner as Caffe.
+
+### `lbann.models`
+
+This consists of common and influential neural network models. They
+are implemented as `Module`s and can be used as components within more
+complicated models.
+
+### `lbann.launcher`
+
+The `run` function interfaces with job schedulers on HPC clusters. It
+will either submit a batch job (if on a login node) or run with an
+existing node allocation (if on a compute node).
+
+_LLNL users_: The `run` function in the `lbann.contrib.lc.launcher`
+module provides similar functionality, with defaults and optimizations
+for LC systems.
+
+### `lbann.onnx`
+
+This contains functionality to convert between LBANN and ONNX models.
+
+## Examples
+
+A simple (and not very good) convolutional neural network for MNIST
+data:
+
+```py
+import lbann
+import lbann.proto
+
+# ----------------------------------------------------------
+# Construct layer graph
+# ----------------------------------------------------------
+# Note: The first argument to every layer specifies its parents,
+# i.e. the sources for its input tensors.
+
+# Input data
+# Note: Order matters for the children of the input layer!
+input = lbann.Input()           # Interacts with data reader
+images = lbann.Identity(input)  # NCHW image tensor
+labels = lbann.Identity(input)  # One-hot vector
+
+# Simple convolutional network
+conv = lbann.Convolution(
+    images,
+    num_dims=2,             # 2D convolution for NCHW tensors
+    num_output_channels=64, # I.e. number of filters
+    conv_dims_i=5,          # Convolution window size (64x3x5x5 kernel)
+    conv_pads_i=2,          # Padding of 2 in every dimension
+    conv_strides_i=2,       # Stride of 2 in every dimension
+    has_bias=True)          # Channel-wise bias
+bn = lbann.BatchNormalization(conv)
+relu = lbann.Relu(bn)
+pool = lbann.Pooling(
+    relu,
+    num_dims=2,         # 2D pooling (for NCHW tensors)
+    pool_dims_i=3,      # 3x3 pooling window
+    pool_pads_i=1,      # Padding of 1 in every dimension
+    pool_strides_i=2,   # Stride of 2 in every dimension
+    pool_mode='max')    # Max pooling
+fc = lbann.FullyConnected(
+    pool,
+    num_neurons=10, # Output size
+    has_bias=False) # Entry-wise bias
+softmax = lbann.Softmax(fc)
+
+# Compute values for objective function and metrics
+loss = lbann.CrossEntropy([softmax, labels])
+acc = lbann.CategoricalAccuracy([softmax, labels])
+
+# ----------------------------------------------------------
+# Construct model
+# ----------------------------------------------------------
+
+mini_batch_size = 256
+num_epochs = 10
+obj = lbann.ObjectiveFunction([loss])
+metrics = [lbann.Metric(acc, name='accuracy', unit='%')]
+callbacks = [
+    lbann.CallbackPrint(), # Print basic information
+    lbann.CallbackTimer()  # Print timing information
+]
+model = lbann.Model(
+    mini_batch_size, num_epochs,
+    layers=lbann.traverse_layer_graph(input),   # Layers connected to input
+    objective_function=obj,
+    metrics=metrics,
+    callbacks=callbacks)
+
+# ----------------------------------------------------------
+# Save the model to a prototext file.
+# ----------------------------------------------------------
+
+lbann.proto.save_prototext('test.prototext', model=model)
+
+```
+
+See the implementation of LeNet in
+`$LBANN_HOME/model_zoo/vision/lenet.py` for a more comprehensive
+example.
diff --git a/scripts/proto/docs/onnx/README.md b/python/docs/onnx/README.md
similarity index 100%
rename from scripts/proto/docs/onnx/README.md
rename to python/docs/onnx/README.md
diff --git a/scripts/proto/docs/onnx/l2o.md b/python/docs/onnx/l2o.md
similarity index 100%
rename from scripts/proto/docs/onnx/l2o.md
rename to python/docs/onnx/l2o.md
diff --git a/scripts/proto/docs/onnx/o2l.md b/python/docs/onnx/o2l.md
similarity index 100%
rename from scripts/proto/docs/onnx/o2l.md
rename to python/docs/onnx/o2l.md
diff --git a/scripts/proto/docs/onnx/support_status.md b/python/docs/onnx/support_status.md
similarity index 100%
rename from scripts/proto/docs/onnx/support_status.md
rename to python/docs/onnx/support_status.md
diff --git a/scripts/proto/docs/onnx/viz/l2o/alexnet/alexnet_lbann.pdf b/python/docs/onnx/viz/l2o/alexnet/alexnet_lbann.pdf
similarity index 100%
rename from scripts/proto/docs/onnx/viz/l2o/alexnet/alexnet_lbann.pdf
rename to python/docs/onnx/viz/l2o/alexnet/alexnet_lbann.pdf
diff --git a/scripts/proto/docs/onnx/viz/l2o/alexnet/alexnet_lbann.png b/python/docs/onnx/viz/l2o/alexnet/alexnet_lbann.png
similarity index 100%
rename from scripts/proto/docs/onnx/viz/l2o/alexnet/alexnet_lbann.png
rename to python/docs/onnx/viz/l2o/alexnet/alexnet_lbann.png
diff --git a/scripts/proto/docs/onnx/viz/l2o/alexnet/alexnet_onnx_netron.png b/python/docs/onnx/viz/l2o/alexnet/alexnet_onnx_netron.png
similarity index 100%
rename from scripts/proto/docs/onnx/viz/l2o/alexnet/alexnet_onnx_netron.png
rename to python/docs/onnx/viz/l2o/alexnet/alexnet_onnx_netron.png
diff --git a/scripts/proto/docs/onnx/viz/l2o/mnist/mnist_lbann.pdf b/python/docs/onnx/viz/l2o/mnist/mnist_lbann.pdf
similarity index 100%
rename from scripts/proto/docs/onnx/viz/l2o/mnist/mnist_lbann.pdf
rename to python/docs/onnx/viz/l2o/mnist/mnist_lbann.pdf
diff --git a/scripts/proto/docs/onnx/viz/l2o/mnist/mnist_lbann.png b/python/docs/onnx/viz/l2o/mnist/mnist_lbann.png
similarity index 100%
rename from scripts/proto/docs/onnx/viz/l2o/mnist/mnist_lbann.png
rename to python/docs/onnx/viz/l2o/mnist/mnist_lbann.png
diff --git a/scripts/proto/docs/onnx/viz/l2o/mnist/mnist_onnx_netron.png b/python/docs/onnx/viz/l2o/mnist/mnist_onnx_netron.png
similarity index 100%
rename from scripts/proto/docs/onnx/viz/l2o/mnist/mnist_onnx_netron.png
rename to python/docs/onnx/viz/l2o/mnist/mnist_onnx_netron.png
diff --git a/scripts/proto/docs/onnx/viz/l2o/mnist/mnist_onnx_netron_name.png b/python/docs/onnx/viz/l2o/mnist/mnist_onnx_netron_name.png
similarity index 100%
rename from scripts/proto/docs/onnx/viz/l2o/mnist/mnist_onnx_netron_name.png
rename to python/docs/onnx/viz/l2o/mnist/mnist_onnx_netron_name.png
diff --git a/scripts/proto/docs/onnx/viz/l2o/resnet50/resnet50_lbann.pdf b/python/docs/onnx/viz/l2o/resnet50/resnet50_lbann.pdf
similarity index 100%
rename from scripts/proto/docs/onnx/viz/l2o/resnet50/resnet50_lbann.pdf
rename to python/docs/onnx/viz/l2o/resnet50/resnet50_lbann.pdf
diff --git a/scripts/proto/docs/onnx/viz/l2o/resnet50/resnet50_lbann.png b/python/docs/onnx/viz/l2o/resnet50/resnet50_lbann.png
similarity index 100%
rename from scripts/proto/docs/onnx/viz/l2o/resnet50/resnet50_lbann.png
rename to python/docs/onnx/viz/l2o/resnet50/resnet50_lbann.png
diff --git a/scripts/proto/docs/onnx/viz/l2o/resnet50/resnet50_onnx_netron.png b/python/docs/onnx/viz/l2o/resnet50/resnet50_onnx_netron.png
similarity index 100%
rename from scripts/proto/docs/onnx/viz/l2o/resnet50/resnet50_onnx_netron.png
rename to python/docs/onnx/viz/l2o/resnet50/resnet50_onnx_netron.png
diff --git a/scripts/proto/docs/plot/README.md b/python/docs/plot/README.md
similarity index 100%
rename from scripts/proto/docs/plot/README.md
rename to python/docs/plot/README.md
diff --git a/scripts/proto/examples/onnx/lbann2onnx.py b/python/examples/onnx/lbann2onnx.py
similarity index 100%
rename from scripts/proto/examples/onnx/lbann2onnx.py
rename to python/examples/onnx/lbann2onnx.py
diff --git a/scripts/proto/examples/onnx/onnx2lbann.py b/python/examples/onnx/onnx2lbann.py
similarity index 100%
rename from scripts/proto/examples/onnx/onnx2lbann.py
rename to python/examples/onnx/onnx2lbann.py
diff --git a/scripts/proto/examples/plot/data/fmow_final_sgd_1e-3_1node.out b/python/examples/plot/data/fmow_final_sgd_1e-3_1node.out
similarity index 100%
rename from scripts/proto/examples/plot/data/fmow_final_sgd_1e-3_1node.out
rename to python/examples/plot/data/fmow_final_sgd_1e-3_1node.out
diff --git a/scripts/proto/examples/plot/data/fmow_res_sgd_1e-3_1gpu.2.out b/python/examples/plot/data/fmow_res_sgd_1e-3_1gpu.2.out
similarity index 100%
rename from scripts/proto/examples/plot/data/fmow_res_sgd_1e-3_1gpu.2.out
rename to python/examples/plot/data/fmow_res_sgd_1e-3_1gpu.2.out
diff --git a/scripts/proto/examples/plot/data/fmow_res_sgd_1e-3_1gpu.out b/python/examples/plot/data/fmow_res_sgd_1e-3_1gpu.out
similarity index 100%
rename from scripts/proto/examples/plot/data/fmow_res_sgd_1e-3_1gpu.out
rename to python/examples/plot/data/fmow_res_sgd_1e-3_1gpu.out
diff --git a/scripts/proto/examples/plot/data/fmow_res_sgd_1e-3_proc_1gpu.out b/python/examples/plot/data/fmow_res_sgd_1e-3_proc_1gpu.out
similarity index 100%
rename from scripts/proto/examples/plot/data/fmow_res_sgd_1e-3_proc_1gpu.out
rename to python/examples/plot/data/fmow_res_sgd_1e-3_proc_1gpu.out
diff --git a/scripts/proto/examples/plot/data/fmow_res_sgd_1e-3_proc_1node.out b/python/examples/plot/data/fmow_res_sgd_1e-3_proc_1node.out
similarity index 100%
rename from scripts/proto/examples/plot/data/fmow_res_sgd_1e-3_proc_1node.out
rename to python/examples/plot/data/fmow_res_sgd_1e-3_proc_1node.out
diff --git a/scripts/proto/examples/plot/data/pascal_2_stats_res_sgd.json b/python/examples/plot/data/pascal_2_stats_res_sgd.json
similarity index 100%
rename from scripts/proto/examples/plot/data/pascal_2_stats_res_sgd.json
rename to python/examples/plot/data/pascal_2_stats_res_sgd.json
diff --git a/scripts/proto/examples/plot/data/pascal_2_stats_sgd.json b/python/examples/plot/data/pascal_2_stats_sgd.json
similarity index 100%
rename from scripts/proto/examples/plot/data/pascal_2_stats_sgd.json
rename to python/examples/plot/data/pascal_2_stats_sgd.json
diff --git a/scripts/proto/examples/plot/data/pascal_res_sgd_1e-3_1gpu.json b/python/examples/plot/data/pascal_res_sgd_1e-3_1gpu.json
similarity index 100%
rename from scripts/proto/examples/plot/data/pascal_res_sgd_1e-3_1gpu.json
rename to python/examples/plot/data/pascal_res_sgd_1e-3_1gpu.json
diff --git a/scripts/proto/examples/plot/data/pascal_res_sgd_1e-3_orig_1node.json b/python/examples/plot/data/pascal_res_sgd_1e-3_orig_1node.json
similarity index 100%
rename from scripts/proto/examples/plot/data/pascal_res_sgd_1e-3_orig_1node.json
rename to python/examples/plot/data/pascal_res_sgd_1e-3_orig_1node.json
diff --git a/scripts/proto/examples/plot/data/pascal_res_sgd_1e-3_proc_1gpu.json b/python/examples/plot/data/pascal_res_sgd_1e-3_proc_1gpu.json
similarity index 100%
rename from scripts/proto/examples/plot/data/pascal_res_sgd_1e-3_proc_1gpu.json
rename to python/examples/plot/data/pascal_res_sgd_1e-3_proc_1gpu.json
diff --git a/scripts/proto/examples/plot/data/pascal_res_sgd_1e-3_proc_1node.json b/python/examples/plot/data/pascal_res_sgd_1e-3_proc_1node.json
similarity index 100%
rename from scripts/proto/examples/plot/data/pascal_res_sgd_1e-3_proc_1node.json
rename to python/examples/plot/data/pascal_res_sgd_1e-3_proc_1node.json
diff --git a/scripts/proto/examples/plot/test.sh b/python/examples/plot/test.sh
similarity index 100%
rename from scripts/proto/examples/plot/test.sh
rename to python/examples/plot/test.sh
diff --git a/python/lbann/__init__.py b/python/lbann/__init__.py
new file mode 100644
index 00000000000..7b48f126daf
--- /dev/null
+++ b/python/lbann/__init__.py
@@ -0,0 +1,35 @@
+"""LBANN Python frontend."""
+import sys
+import os.path
+import configparser
+
+# Check for Python 3
+if sys.version_info[0] != 3:
+    raise ImportError('Python 3 is required')
+
+# Try getting build-specific paths from config file
+_config_file = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                            'python_config.ini')
+_lbann_exe = None
+if os.path.isfile(_config_file):
+    try:
+        _config = configparser.ConfigParser()
+        _config.read(_config_file)
+        sys.path.append(os.path.dirname(_config['Paths']['lbann_pb2.py']))
+        _lbann_exe = _config['Paths']['lbann_exe']
+    except:
+        pass
+import lbann_pb2
+def lbann_exe():
+    """LBANN executable."""
+    return _lbann_exe if _lbann_exe else 'lbann'
+
+# Import core functionality into lbann namespace
+from lbann.callback import *
+from lbann.layer import *
+from lbann.metric import *
+from lbann.model import *
+from lbann.objective_function import *
+from lbann.optimizer import *
+from lbann.weights import *
+from lbann.launcher import run
diff --git a/python/lbann/callback.py b/python/lbann/callback.py
new file mode 100644
index 00000000000..151986e3db8
--- /dev/null
+++ b/python/lbann/callback.py
@@ -0,0 +1,24 @@
+"""Callbacks for neural network training."""
+import abc
+from lbann import lbann_pb2
+import lbann.util.class_generator
+
+class Callback(abc.ABC):
+    """Callback for neural network training."""
+
+    def __init__(self):
+        pass
+
+    def export_proto(self):
+        """Construct and return a protobuf message."""
+        return lbann_pb2.Callback()
+
+# Generate Callback sub-classes from lbann.proto
+# Note: The list of skip fields must be updated if any new fields are
+# added to the Callback message in lbann.proto
+classes = lbann.util.class_generator.generate_classes_from_protobuf_message(
+    lbann_pb2.Callback,
+    base_class = Callback,
+    base_has_export_proto = True)
+for c in classes:
+    globals()[c.__name__] = c
diff --git a/scripts/proto/lbann/__init__.py b/python/lbann/contrib/__init__.py
similarity index 100%
rename from scripts/proto/lbann/__init__.py
rename to python/lbann/contrib/__init__.py
diff --git a/python/lbann/contrib/args.py b/python/lbann/contrib/args.py
new file mode 100644
index 00000000000..86e08e0c9b5
--- /dev/null
+++ b/python/lbann/contrib/args.py
@@ -0,0 +1,131 @@
+"""Helper functions to add common command-line arguments."""
+import argparse
+import lbann.optimizer
+
+def add_scheduler_arguments(parser):
+    """Add command-line arguments for common scheduler settings.
+
+    Adds the following options: `--nodes`, `--procs-per-node`,
+    `--partition`, `--account`, `--time-limit`, `--reservation`.
+    The caller is responsible for using them.
+    `get_scheduler_kwargs` can assist with extracting them.
+
+    Args:
+        parser (argparse.ArgumentParser): command-line argument
+            parser.
+
+    """
+    if not isinstance(parser, argparse.ArgumentParser):
+        raise TypeError('expected an argparse.ArgumentParser')
+    parser.add_argument(
+        '--nodes', action='store', type=int,
+        help='number of compute nodes', metavar='NUM')
+    parser.add_argument(
+        '--procs-per-node', action='store', type=int,
+        help='number of processes per compute node', metavar='NUM')
+    parser.add_argument(
+        '--partition', action='store', type=str,
+        help='scheduler partition', metavar='NAME')
+    parser.add_argument(
+        '--account', action='store', type=str,
+        help='scheduler account', metavar='NAME')
+    parser.add_argument(
+        '--reservation', action='store', type=str,
+        help='scheduler reservation', metavar='NAME')
+    parser.add_argument(
+        '--time-limit', action='store', type=int,
+        help='time limit (in minutes)', metavar='MIN')
+    parser.add_argument(
+        '--setup-only', action='store_true',
+        help='set up (but do not run) the experiment')
+
+def get_scheduler_kwargs(args):
+    """Generate keyword arguments for a scheduler.
+
+    The parsed arguments must be generated by an
+    `argparse.ArgumentParser` that has been processed by
+    `add_scheduler_arguments`.
+
+    Args:
+        args (Namespace): A namespace returned by
+            `argparse.ArgumentParser.parse_args`.
+
+    Return:
+        dict
+
+    """
+    kwargs = {}
+    if args.nodes is not None: kwargs['nodes'] = args.nodes
+    if args.procs_per_node is not None:
+        kwargs['procs_per_node'] = args.procs_per_node
+    if args.partition: kwargs['partition'] = args.partition
+    if args.account: kwargs['account'] = args.account
+    if args.reservation: kwargs['reservation'] = args.reservation
+    if args.time_limit: kwargs['time_limit'] = args.time_limit
+    if args.setup_only: kwargs['setup_only'] = True
+    return kwargs
+
+def add_optimizer_arguments(parser, default_optimizer='momentum',
+                            default_learning_rate=0.01):
+    """Add command-line arguments for optimizers.
+
+    Adds the following options: `--optimizer`,
+    `--optimizer-learning-rate`. The parsed arguments
+    (e.g. `parser.parse_args()`) are the input for `create_optimizer`.
+
+    Args:
+        parser (argparse.ArgumentParser): command-line argument
+            parser.
+        default_optimizer (str): default optimizer to use.
+        default_learning_rate (float): default learning rate.
+
+    """
+    if not isinstance(parser, argparse.ArgumentParser):
+        raise TypeError('expected an argparse.ArgumentParser')
+    parser.add_argument(
+        '--optimizer', action='store', default=default_optimizer, type=str,
+        choices=('momentum', 'sgd', 'adam', 'adagrad', 'rmsprop'),
+        help='optimizer (default: {})'.format(default_optimizer))
+    parser.add_argument(
+        '--optimizer-learning-rate',
+        action='store', default=default_learning_rate, type=float,
+        help='optimizer learning rate (default: {})'.format(default_learning_rate),
+        metavar='VAL')
+
+def create_optimizer(args):
+    """Create optimizer from command-line arguments.
+
+    The parsed arguments must be generated by an
+    `argparse.ArgumentParser` that has been processed by
+    `add_optimizer_arguments`.
+
+    Args:
+        args (Namespace): A namespace returned by
+           `argparse.ArgumentParser.parse_args`.
+
+    Return:
+        lbann.optimizer.Optimizer
+
+    """
+
+    # Get parsed command-line arguments
+    try:
+        opt = args.optimizer
+        lr = args.optimizer_learning_rate
+    except AttributeError:
+        raise ValueError('parsed arguments have not been processed '
+                         'by `add_optimizer_arguments`')
+
+    # Create optimizer
+    if opt == 'momentum':
+        return lbann.optimizer.SGD(learn_rate=lr, momentum=0.9)
+    elif opt == 'sgd':
+        return lbann.optimizer.SGD(learn_rate=lr)
+    elif opt == 'adam':
+        return lbann.optimizer.Adam(learn_rate=lr, beta1=0.9, beta2=0.99, eps=1e-8)
+    elif opt == 'adagrad':
+        return lbann.optimizer.AdaGrad(learn_rate=lr, eps=1e-8)
+    elif opt == 'rmsprop':
+        return lbann.optimizer.RMSprop(learn_rate=lr, decay_rate=0.99, eps=1e-8)
+    else:
+        raise ValueError('invalid optimizer type ({})'.format(opt))
diff --git a/scripts/proto/lbann/contrib/__init__.py b/python/lbann/contrib/lc/__init__.py
similarity index 100%
rename from scripts/proto/lbann/contrib/__init__.py
rename to python/lbann/contrib/lc/__init__.py
diff --git a/python/lbann/contrib/lc/launcher.py b/python/lbann/contrib/lc/launcher.py
new file mode 100644
index 00000000000..e459a13978c
--- /dev/null
+++ b/python/lbann/contrib/lc/launcher.py
@@ -0,0 +1,83 @@
+from lbann import lbann_exe
+from lbann.contrib.lc.systems import *
+import lbann.launcher
+
+def run(model, data_reader, optimizer,
+        lbann_exe=lbann_exe(),
+        lbann_args='',
+        experiment_dir=None,
+        nodes=1,
+        procs_per_node=procs_per_node(),
+        time_limit=60,
+        scheduler=scheduler(),
+        job_name='lbann',
+        system=system(),
+        partition=partition(),
+        account=account(),
+        reservation=None,
+        launcher_args='',
+        environment={},
+        setup_only=False):
+    """Run LBANN experiment with LC-specific optimizations.
+
+    This is a convenience wrapper around the `lbann.launcher.run`
+    function, with defaults and optimizations for LC systems.
+
+    """
+
+    # Setup GPU bindings
+    # Note: Hydrogen processes take ownership of the GPU indices that
+    # matches their node communicator ranks. mpibind assigns each rank
+    # a unique GPU with index 0, so it should be disabled. Processes
+    # may touch the wrong GPUs in the process of figuring out GPU
+    # ownership, so an exclusive GPU compute mode causes problems.
+    if scheduler == 'slurm' and has_gpu(system):
+        launcher_args += ' --mpibind=off --nvidia_compute_mode=default'
+
+    # Deal with Pascal's strange hardware topology
+    # Note: Both GPUs on a Pascal node are on the same socket, so we
+    # only use cores on that socket.
+    if system == 'pascal' and procs_per_node == 2:
+        if scheduler == 'slurm':
+            launcher_args += ' --cpu_bind=mask_cpu:0x000001ff,0x0003fe00'
+        environment['OMP_NUM_THREADS'] = 8
+        environment['AL_PROGRESS_RANKS_PER_NUMA_NODE'] = 2
+
+    # Hacked bugfix for MPI_Init in MVAPICH2-2.3
+    # Note: MPI_Init hangs when started with more than 35
+    # processes. This bug is not present in MVAPICH2-2.2 but is
+    # present in MVAPICH2-2.3rc2.
+    environment['MV2_USE_RDMA_CM'] = 0
+
+    # Hacked bugfix for MPI_Sendrecv in MVAPICH2-2.3
+    # Note: MPI_Sendrecv produces incorrect output under certain
+    # circumstances. This bug is not present in MVAPICH2-2.2 or
+    # MVAPICH2-2.3.1.
+    environment['MV2_USE_LAZY_MEM_UNREGISTER'] = 0
+
+    # Magic default arguments to jsrun/etc.
+    # Note: Pack processes using ten cores for each, with 40 cores total, and
+    # all four GPUs visible to each process.
+    if system in ('sierra', 'lassen'):
+        if scheduler == 'lsf':
+            launcher_args += ' -d packed -b "packed:10" -r 1 -c 40 -g 4'
+        environment['OMP_NUM_THREADS'] = 4
+        # Deal with topology mis-identification on Sierra/Lassen.
+        environment['AL_PROGRESS_RANKS_PER_NUMA_NODE'] = 2
+
+    # Run LBANN
+    lbann.launcher.run(model, data_reader, optimizer,
+                       lbann_exe = lbann_exe,
+                       lbann_args = lbann_args,
+                       experiment_dir = experiment_dir,
+                       nodes = nodes,
+                       procs_per_node = procs_per_node,
+                       time_limit = time_limit,
+                       scheduler = scheduler,
+                       job_name = job_name,
+                       system = system,
+                       partition = partition,
+                       account = account,
+                       launcher_args = launcher_args,
+                       environment = environment,
+                       setup_only = setup_only)
diff --git a/python/lbann/contrib/lc/paths.py b/python/lbann/contrib/lc/paths.py
new file mode 100644
index 00000000000..2ca67f2f4a1
--- /dev/null
+++ b/python/lbann/contrib/lc/paths.py
@@ -0,0 +1,102 @@
+"""Useful file paths on LC systems."""
+import os.path
+from lbann.contrib.lc.systems import system
+
+# ==============================================
+# Data sets
+# ==============================================
+
+def parallel_file_system_path(system = system()):
+    """Base path to parallel file system."""
+    if system in ('lassen', 'sierra'):
+        return '/p/gpfs1/'
+    else:
+        return '/p/lustre2/'
+
+def mnist_dir(system = system()):
+    """MNIST directory on LC system.
+
+    The directory contains four files: train-images-idx3-ubyte,
+    train-labels-idx1-ubyte, t10k-images-idx3-ubyte,
+    t10k-labels-idx1-ubyte. These files can be obtained by downloading
+    from http://yann.lecun.com/exdb/mnist/ and uncompressing.
+
+    """
+    return parallel_file_system_path(system) + 'brainusr/datasets/MNIST'
+
+def imagenet_dir(system = system(), data_set = 'training',
+                 num_classes = 1000):
+    """ImageNet directory on LC system.
+
+    The directory contains JPEG images from the ILSVRC2012
+    competition. File names in the label file are relative to this
+    directory. The images can be obtained from
+    http://image-net.org/challenges/LSVRC/2012/.
+
+    There are three available data sets: 'training', 'validation', and
+    'testing'.
+
+    Some of these data sets have been preprocessed to only include
+    images in a subset of the label classes, e.g. images in the first
+    10 label classes. This is convenient for quickly evaluating
+    performance or learning behavior. The availabiilty of these
+    subsampled data sets may vary by system.
+
+    """
+    base_path = parallel_file_system_path(system)
+    base_path += 'brainusr/datasets/ILSVRC2012/original/'
+    if data_set.lower() in ('train', 'training'):
+        return base_path + 'train/'
+    elif data_set.lower() in ('val', 'validation'):
+        return base_path + 'val/'
+    elif data_set.lower() in ('test', 'testing'):
+        return base_path + 'test/'
+    else:
+        raise RuntimeError('unknown ImageNet data set (' + data_set + ')')
+
+def imagenet_labels(system = system(), data_set = 'train',
+                    num_classes = 1000):
+    """ImageNet label file on LC system.
+
+    The file contains ground truth labels from the ILSVRC2012
+    competition. It is a plain text file where each line contains an
+    image file path (relative to the ImageNet directory; see the
+    `imagenet_dir` function) and the corresponding label ID.
+
+    There are three available data sets: 'training', 'validation', and
+    'testing'.
+
+    Some of these data sets have been preprocessed to only include
+    images in a subset of the label classes, e.g. images in the first
+    10 label classes. This is convenient for quickly evaluating
+    performance or learning behavior. The availabiilty of these
+    subsampled data sets may vary by system.
+
+    """
+    label_dir = parallel_file_system_path(system)
+    if system in ('lassen', 'sierra'):
+        label_dir += 'brainusr/datasets/ILSVRC2012/original/labels/'
+    else:
+        label_dir += 'brainusr/datasets/ILSVRC2012/labels/'
+    suffixes = {1000: '', 10: '_c0-9', 100: '_c0-99',
+                200: '_c100-299', 300: '_c0-299'}
+    if data_set.lower() in ('train', 'training'):
+        if num_classes in suffixes.keys():
+            return os.path.join(label_dir,
+                                'train' + suffixes[num_classes] + '.txt')
+        else:
+            raise RuntimeError('invalid number of classes ({0}) '
+                               'for ImageNet data set ({1})'
+                               .format(num_classes, data_set))
+    elif data_set.lower() in ('val', 'validation'):
+        if num_classes in suffixes.keys():
+            return os.path.join(label_dir,
+                                'val' + suffixes[num_classes] + '.txt')
+        else:
+            raise RuntimeError('invalid number of classes ({0}) '
+                               'for ImageNet data set ({1})'
+                               .format(num_classes, data_set))
+    elif data_set.lower() in ('test', 'testing'):
+        return os.path.join(label_dir, 'test.txt')
+    else:
+        raise RuntimeError('unknown ImageNet data set (' + data_set + ')')
diff --git a/python/lbann/contrib/lc/systems.py b/python/lbann/contrib/lc/systems.py
new file mode 100644
index 00000000000..4156a979bc2
--- /dev/null
+++ b/python/lbann/contrib/lc/systems.py
@@ -0,0 +1,88 @@
+"""Default settings for LC systems."""
+import socket
+import re
+
+# ==============================================
+# Set system parameters
+# ==============================================
+
+class SystemParams:
+    """Simple data structure to describe an LC system."""
+    def __init__(self,
+                 cores_per_node, gpus_per_node,
+                 scheduler, partition, account):
+        self.cores_per_node = cores_per_node
+        self.gpus_per_node = gpus_per_node
+        self.scheduler = scheduler
+        self.partition = partition
+        self.account = account
+
+# Supported LC systems
+_system_params = {'catalyst': SystemParams(24, 0, 'slurm', 'pbatch', 'brain'),
+                  'pascal':   SystemParams(36, 2, 'slurm', 'pbatch', 'lc'),
+                  'quartz':   SystemParams(36, 0, 'slurm', 'pbatch', 'brain'),
+                  'surface':  SystemParams(16, 2, 'slurm', 'pbatch', 'hpclearn'),
+                  'lassen':   SystemParams(44, 4, 'lsf', 'pbatch', None),
+                  'sierra':   SystemParams(44, 4, 'lsf', 'pbatch', None)}
+
+# Detect system
+_system = re.sub(r'\d+', '', socket.gethostname())
+if _system not in _system_params.keys():
+    _system = None
+
+# ==============================================
+# Access functions
+# ==============================================
+
+def system():
+    """Name of LC system."""
+    if _system:
+        return _system
+    else:
+        raise RuntimeError('unknown system '
+                           '(' + socket.gethostname() + ')')
+
+def is_lc_system(system = system()):
+    """Whether current system is a supported LC system."""
+    return (system is not None) and (system in _system_params.keys())
+
+def gpus_per_node(system = system()):
+    """Number of GPUs per node."""
+    if not is_lc_system(system):
+        raise RuntimeError('unknown system (' + system + ')')
+    return _system_params[system].gpus_per_node
+
+def has_gpu(system = system()):
+    """Whether LC system has GPUs."""
+    return gpus_per_node(system) > 0
+
+def cores_per_node(system = system()):
+    """Number of CPU cores per node."""
+    if not is_lc_system(system):
+        raise RuntimeError('unknown system (' + system + ')')
+    return _system_params[system].cores_per_node
+
+def scheduler(system = system()):
+    """Job scheduler for LC system."""
+    if not is_lc_system(system):
+        raise RuntimeError('unknown system (' + system + ')')
+    return _system_params[system].scheduler
+
+def partition(system = system()):
+    """Default scheduler partition."""
+    if not is_lc_system(system):
+        raise RuntimeError('unknown system (' + system + ')')
+    return _system_params[system].partition
+
+def account(system = system()):
+    """Default scheduler account."""
+    if not is_lc_system(system):
+        raise RuntimeError('unknown system (' + system + ')')
+    return _system_params[system].account
+
+def procs_per_node(system = system()):
+    """Default number of processes per node."""
+    if has_gpu(system):
+        return gpus_per_node(system)
+    else:
+        return 2
diff --git a/scripts/proto/lbann/models/__init__.py b/python/lbann/contrib/models/__init__.py
similarity index 100%
rename from scripts/proto/lbann/models/__init__.py
rename to python/lbann/contrib/models/__init__.py
diff --git a/python/lbann/contrib/models/wide_resnet.py b/python/lbann/contrib/models/wide_resnet.py
new file mode 100644
index 00000000000..349c32c6883
--- /dev/null
+++ b/python/lbann/contrib/models/wide_resnet.py
@@ -0,0 +1,37 @@
+import lbann.models.resnet
+
+class WideResNet50_2(lbann.models.resnet.ResNet):
+    """Wide ResNet-50-2 neural network.
+
+    See:
+        Sergey Zagoruyko and Nikos Komodakis. "Wide Residual Networks."
+        In Proceedings of the British Machine Vision Conference. 2016.
+
+    """
+
+    global_count = 0  # Static counter, used for default names.
+
+    def __init__(self, output_size,
+                 zero_init_residual=True,
+                 bn_stats_aggregation='local',
+                 name=None):
+        """Initialize WRN-50-2.
+
+        Args:
+            output_size (int): Size of output tensor.
+            zero_init_residual (bool, optional): Whether to initialize
+                the final batch normalization in residual branches
+                with zeros.
+            bn_stats_aggregation (str, optional): Aggregation mode for
+                batch normalization statistics.
+            name (str, optional): Module name.
+                (default: 'wide_resnet50_module<index>')
+
+        """
+        WideResNet50_2.global_count += 1
+        name = name or 'wide_resnet50_module{}'.format(
+            WideResNet50_2.global_count)
+        super().__init__(lbann.models.resnet.BottleneckBlock,
+                         output_size, (3,4,6,3), (64,128,256,512),
+                         zero_init_residual, bn_stats_aggregation, name,
+                         width=2)
diff --git a/python/lbann/contrib/objective_functions.py b/python/lbann/contrib/objective_functions.py
new file mode 100644
index 00000000000..6fae83082e9
--- /dev/null
+++ b/python/lbann/contrib/objective_functions.py
@@ -0,0 +1,83 @@
+"""Experimental objective functions.
+
+These are mostly unsupported.
+
+"""
+
+from collections.abc import Iterable
+import lbann
+import lbann.modules
+
+class CrossEntropyWithUncertainty(lbann.modules.Module):
+    def forward(self, inputs):
+        if len(inputs) != 2:
+            raise ValueError('expected two inputs: predictions and labels')
+        pred = inputs[0]
+        label = inputs[1]   # Assumed to be Boolean
+        masked_pred = lbann.Multiply([pred, label])
+        pred_sum = lbann.Reduction(masked_pred)
+        return lbann.Negative(lbann.Log(pred_sum))
+
+class GeometricDistributionNegativeLogLikelihood(lbann.modules.Module):
+    def forward(self, inputs):
+        if len(inputs) != 2:
+            raise ValueError('expected two inputs: predictions and labels')
+        pred = inputs[0]
+        label = inputs[1]
+        ones = p.Constant(hint_layer=pred, value=1.0)
+        term1 = lbann.Multiply([label, lbann.Log(lbann.Subtract([ones, pred]))])
+        term2 = lbann.Log(pred)
+        full = lbann.WeightedSum([term1, term2], scaling_factors='-1.0 -1.0')
+        return lbann.Reduction(full)
+
+class PoissonDistributionNegativeLogLikelihood(lbann.modules.Module):
+    def forward(self, inputs):
+        raise NotImplementedError   # Requires log-gamma function
+        if len(inputs) != 2:
+            raise ValueError('expected two inputs: predictions and labels')
+        pred = inputs[0]
+        label = inputs[1]
+        ones = lbann.Constant(hint_layer=pred, value=1.0)
+        term1 = pred
+        term2 = lbann.Multiply([label, lbann.Log(pred)])
+        term3 = lbann.LogGamma(lbann.Add([label, ones]))
+        full = lbann.WeightedSum([term1, term2, term3], scaling_factors='1.0 -1.0 1.0')
+        return lbann.Reduction(full)
+
+class PolyaDistributionNegativeLogLikelihood(lbann.modules.Module):
+    def forward(self, inputs):
+        raise NotImplementedError   # Requires log-gamma function
+        if len(inputs) != 2:
+            raise ValueError('expected two inputs: predictions and labels')
+        pred = inputs[0]
+        label = inputs[1]
+        count = lbann.Reduction(label)
+        alpha_sum = lbann.Reduction(pred)
+        lgamma_alpha_sum = lbann.Reduction(lbann.LogGamma(pred))
+        lgamma_alpha_level_count_sum = lbann.Reduction(lbann.LogGamma(lbann.Add([pred, label])))
+        return lbann.WeightedSum([lbann.LogGamma(alpha_sum),
+                                  lbann.LogGamma(lbann.Sum([count, alpha_sum])),
+                                  lgamma_alpha_level_count,
+                                  lgamma_alpha_sum],
+                                 scaling_factors='-1.0 1.0 -1.0 1.0')
+
+class GroupLasso(lbann.modules.Module):
+    def __init__(self, weights, height, width):
+        self.weights = weights
+        self.height = height
+        self.width = width
+    def forward(self, _):
+        w = lbann.WeightsLayer(weights=self.weights, dims='%d %d'.format(self.width, self.height))
+        slice = lbann.Slice(w, axis=0, slice_points=' '.join(range(self.width+1)))
+        cols = []
+        for _ in range(self.width):
+            cols.append(lbann.Sqrt(lbann.L2Norm2(slice)))
+        return lbann.Sum(cols)
+
+class L1WeightRegularization(lbann.modules.Module):
+    def __init__(self, weights, dims):
+        self.weights = weights
+        self.dims = dims
+    def forward(self, _):
+        w = lbann.WeightsLayer(weights=self.weights, dims=' '.join(self.dims))
+        return lbann.L1Norm(w)
diff --git a/python/lbann/launcher/__init__.py b/python/lbann/launcher/__init__.py
new file mode 100644
index 00000000000..35ecd09e7ee
--- /dev/null
+++ b/python/lbann/launcher/__init__.py
@@ -0,0 +1,125 @@
+import os
+import os.path
+import datetime
+import lbann
+import lbann.proto
+import lbann.launcher.slurm
+import lbann.launcher.lsf
+
+# ==============================================
+# Run experiments
+# ==============================================
+
+def run(model, data_reader, optimizer,
+        lbann_exe=lbann.lbann_exe(),
+        lbann_args='',
+        experiment_dir=None,
+        nodes=1,
+        procs_per_node=1,
+        time_limit=60,
+        scheduler='slurm',
+        job_name='lbann',
+        system=None,
+        partition=None,
+        account=None,
+        reservation=None,
+        launcher_args='',
+        environment={},
+        setup_only=False):
+    """Run LBANN experiment.
+
+    This is intended to interface with job schedulers on HPC
+    clusters. It will either submit a batch job (if on a login node)
+    or run with an existing node allocation (if on a compute
+    node). Behavior may vary across schedulers.
+
+    If an experiment directory is not provided, a timestamped
+    directory is created (by default in the current working
+    directory). The location of autogenerated experiment directories
+    can be set with the environment variable `LBANN_EXPERIMENT_DIR`.
+
+    Args:
+        model (lbann.model.Model or lbann_pb2.Model): Neural network
+            model.
+        data_reader (lbann_pb2.DataReader): Data reader.
+        optimizer (lbann.model.Model or lbann_pb2.Optimizer): Default
+            optimizer for model.
+        lbann_exe (str, optional): LBANN executable.
+        lbann_args (str, optional): Command-line arguments to LBANN
+            executable.
+        experiment_dir (str, optional): Experiment directory.
+        nodes (int, optional): Number of compute nodes.
+        procs_per_node (int, optional): Number of processes per compute
+            node.
+        time_limit (int, optional): Job time limit, in minutes.
+        scheduler (str, optional): Job scheduler.
+        job_name (str, optional): Batch job name.
+        system (str, optional): Target system.
+        partition (str, optional): Scheduler partition.
+        account (str, optional): Scheduler account.
+        reservation (str, optional): Scheduler reservation name.
+        launcher_args (str, optional): Command-line arguments to
+            launcher.
+        environment (dict of {str: str}, optional): Environment
+            variables.
+        setup_only (bool, optional): If true, the experiment is not
+            run after the experiment directory is initialized.
+
+    """
+
+    # Construct experiment directory if needed
+    if not experiment_dir:
+        if 'LBANN_EXPERIMENT_DIR' in os.environ:
+            experiment_dir = os.environ['LBANN_EXPERIMENT_DIR']
+        else:
+            experiment_dir = os.path.join(os.getcwd())
+        timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
+        experiment_dir = os.path.join(experiment_dir,
+                                      '{}_{}'.format(timestamp, job_name))
+        i = 1
+        while os.path.lexists(experiment_dir):
+            i += 1
+            experiment_dir = os.path.join(
+                os.path.dirname(experiment_dir),
+                '{}_{}_{}'.format(timestamp, job_name, i))
+    experiment_dir = os.path.abspath(experiment_dir)
+    os.makedirs(experiment_dir, exist_ok=True)
+
+    # Create experiment prototext file
+    prototext_file = os.path.join(experiment_dir, 'experiment.prototext')
+    lbann.proto.save_prototext(prototext_file,
+                               model = model,
+                               data_reader = data_reader,
+                               optimizer = optimizer)
+    lbann_args += ' --prototext=' + prototext_file
+
+    # Run experiment
+    if scheduler.lower() in ('slurm', 'srun', 'sbatch'):
+        slurm.run(experiment_dir=experiment_dir,
+                  command='{} {}'.format(lbann_exe, lbann_args),
+                  nodes=nodes,
+                  procs_per_node=procs_per_node,
+                  time_limit=time_limit,
+                  job_name=job_name,
+                  partition=partition,
+                  account=account,
+                  reservation=reservation,
+                  srun_args=launcher_args,
+                  environment=environment,
+                  setup_only=setup_only)
+    elif scheduler.lower() in ('lsf', 'jsrun', 'bsub'):
+        lsf.run(experiment_dir=experiment_dir,
+                command='{} {}'.format(lbann_exe, lbann_args),
+                nodes=nodes,
+                procs_per_node=procs_per_node,
+                time_limit=time_limit,
+                job_name=job_name,
+                partition=partition,
+                account=account,
+                reservation=reservation,
+                jsrun_args=launcher_args,
+                environment=environment,
+                setup_only=setup_only)
+    else:
+        raise RuntimeError('unsupported job scheduler ({})'
+                           .format(scheduler))
diff --git a/python/lbann/launcher/lsf.py b/python/lbann/launcher/lsf.py
new file mode 100644
index 00000000000..e24c2846a10
--- /dev/null
+++ b/python/lbann/launcher/lsf.py
@@ -0,0 +1,130 @@
+"""Utility functions for LSF."""
+
+import os
+import os.path
+import subprocess
+from lbann.util import make_iterable
+
+def run(command,
+        experiment_dir=os.getcwd(),
+        nodes=1,
+        procs_per_node=1,
+        time_limit=-1,
+        job_name=None,
+        partition=None,
+        account=None,
+        reservation=None,
+        jsrun_args='',
+        environment={},
+        setup_only=False):
+    """Run executable with LSF.
+
+    Creates an LSF batch script in the experiment directory. If a LSF
+    job allocation is detected, the script is run directly. Otherwise,
+    the script is submitted to bsub.
+
+    Args:
+        command (str): Program to run under LSF, i.e. an executable and
+            its command-line arguments.
+        experiment_dir (str, optional): Experiment directory.
+        nodes (int, optional): Number of compute nodes.
+        procs_per_node (int, optional): Number of processes per compute
+            node.
+        time_limit (int, optional): Job time limit, in minutes. A
+            negative value implies the system-default time limit.
+        job_name (str, optional): Batch job name.
+        partition (str, optional): Scheduler partition.
+        account (str, optional): Scheduler account.
+        reservation (str, optional): Scheduler reservation name.
+        jsrun_args (str, optional): Command-line arguments to jsrun.
+        environment (dict of {str: str}, optional): Environment
+            variables.
+        setup_only (bool, optional): If true, the experiment is not
+            run after the batch script is created.
+
+    """
+    # Check for an existing job allocation.
+    # Note: Settings for existing allocations take precedence.
+    has_allocation = 'LSB_JOBID' in os.environ
+    if has_allocation:
+        job_name = os.environ['LSB_JOBNAME']
+        partition = os.environ['LSB_QUEUE']
+        # LSF does not provide a way to get the account via env vars.
+        time_limit = -1
+
+    # Experiment directory
+    experiment_dir = os.path.abspath(experiment_dir)
+    os.makedirs(experiment_dir, exist_ok=True)
+    batch_file = os.path.join(experiment_dir, 'batch.sh')
+    out_file = os.path.join(experiment_dir, 'out.log')
+    err_file = os.path.join(experiment_dir, 'err.log')
+    nodes_file = os.path.join(experiment_dir, 'nodes.txt')
+
+    # Create batch script.
+    s = '#!/bin/sh\n'
+    if job_name:
+        s += '#BSUB -J {}\n'.format(job_name)
+    s += '#BSUB -nnodes {}\n'.format(nodes)
+    if partition:
+        s += '#BSUB -q {}\n'.format(partition)
+    if account:
+        s += '#BSUB -G {}\n'.format(account)
+    else:
+        raise ValueError('LSF requires an account')
+    if reservation:
+        s += '#BSUB -U {}\n'.format(reservation)
+    s += '#BSUB -cwd {}\n'.format(experiment_dir)
+    s += '#BSUB -o {}\n'.format(out_file)
+    s += '#BSUB -e {}\n'.format(err_file)
+    if time_limit >= 0:
+        s += '#BSUB -W {}\n'.format(time_limit)
+
+    # Set environment variables.
+    if environment:
+        s += '\n# ==== Environment ====\n'
+        for variable, value in environment.items():
+            s += 'export {}={}\n'.format(variable, value)
+
+    # Time and node list.
+    s += '\n# ==== Useful info ====\n'
+    s += 'date\n'
+    s += 'jsrun -n {} -a 1 hostname > {}\n'.format(nodes, nodes_file)
+    s += 'sort --unique --output={0} {0}\n'.format(nodes_file)
+
+    # Run experiment.
+    s += '\n# ==== Experiment ====\n'
+    for cmd in make_iterable(command):
+        s += 'jsrun -n {} -a {} {} {}\n'.format(
+            nodes, procs_per_node, jsrun_args, cmd)
+
+    with open(batch_file, 'w') as f:
+        f.write(s)
+
+    # Make batch script executable.
+    os.chmod(batch_file, 0o755)
+
+    # Launch if needed.
+    if not setup_only:
+        if has_allocation:
+            run_proc = subprocess.Popen(['sh', batch_file],
+                                        stdout=subprocess.PIPE,
+                                        stderr=subprocess.PIPE,
+                                        cwd=experiment_dir)
+        else:
+            # bsub requires the batch script be read from its stdin.
+            run_proc = subprocess.Popen('bsub < {}'.format(batch_file),
+                                        stdout=subprocess.PIPE,
+                                        stderr=subprocess.PIPE,
+                                        cwd=experiment_dir,
+                                        shell=True)
+        out_proc = subprocess.Popen(['tee', out_file],
+                                    stdin=run_proc.stdout,
+                                    cwd=experiment_dir)
+        err_proc = subprocess.Popen(['tee', err_file],
+                                    stdin=run_proc.stderr,
+                                    cwd=experiment_dir)
+        run_proc.stdout.close()
+        run_proc.stderr.close()
+        run_proc.wait()
+        out_proc.wait()
+        err_proc.wait()
diff --git a/python/lbann/launcher/slurm.py b/python/lbann/launcher/slurm.py
new file mode 100644
index 00000000000..55ca2b71882
--- /dev/null
+++ b/python/lbann/launcher/slurm.py
@@ -0,0 +1,130 @@
+"""Utility functions for Slurm."""
+import os, os.path
+import subprocess
+from lbann.util import make_iterable
+
+def run(command,
+        experiment_dir=os.getcwd(),
+        nodes=1,
+        procs_per_node=1,
+        time_limit=-1,
+        job_name=None,
+        partition=None,
+        account=None,
+        reservation=None,
+        srun_args='',
+        environment={},
+        setup_only=False):
+    """Run executable with Slurm.
+
+    Creates a Slurm batch script in the experiment directory. If a
+    Slurm job allocation is detected, the script is run
+    directly. Otherwise, the script is submitted to sbatch.
+
+    Args:
+        command (str): Program to run under Slurm, i.e. an executable
+            and its command-line arguments.
+        experiment_dir (str, optional): Experiment directory.
+        nodes (int, optional): Number of compute nodes.
+        procs_per_node (int, optional): Number of processes per compute
+            node.
+        time_limit (int, optional): Job time limit, in minutes. A
+            negative value implies the system-default time limit.
+        job_name (str, optional): Batch job name.
+        partition (str, optional): Scheduler partition.
+        account (str, optional): Scheduler account.
+        reservation (str, optional): Scheduler reservation name.
+        srun_args (str, optional): Command-line arguments to srun.
+        environment (dict of {str: str}, optional): Environment
+            variables.
+        setup_only (bool, optional): If true, the experiment is not
+            run after the batch script is created.
+
+    """
+
+    # Check for an existing job allocation from Slurm
+    # Note: Settings for current job allocation take precedence
+    has_allocation = 'SLURM_JOB_ID' in os.environ
+    if has_allocation:
+        job_name = os.environ['SLURM_JOB_NAME']
+        partition = os.environ['SLURM_JOB_PARTITION']
+        account = os.environ['SLURM_JOB_ACCOUNT']
+        time_limit = -1
+
+    # Experiment directory
+    experiment_dir = os.path.abspath(experiment_dir)
+    os.makedirs(experiment_dir, exist_ok=True)
+    batch_file = os.path.join(experiment_dir, 'batch.sh')
+    out_file = os.path.join(experiment_dir, 'out.log')
+    err_file = os.path.join(experiment_dir, 'err.log')
+    nodes_file = os.path.join(experiment_dir, 'nodes.txt')
+
+    # Write batch script
+    with open(batch_file, 'w') as f:
+        f.write('#!/bin/sh\n')
+
+        # Slurm job settings
+        if job_name:
+            f.write('#SBATCH --job-name={}\n'.format(job_name))
+        f.write('#SBATCH --nodes={}\n'.format(nodes))
+        if partition:
+            f.write('#SBATCH --partition={}\n'.format(partition))
+        if account:
+            f.write('#SBATCH --account={}\n'.format(account))
+        if reservation:
+            raise ValueError('Slurm reservations not supported')
+        f.write('#SBATCH --workdir={}\n'.format(experiment_dir))
+        f.write('#SBATCH --output={}\n'.format(out_file))
+        f.write('#SBATCH --error={}\n'.format(err_file))
+        if time_limit >= 0:
+            seconds = int((time_limit % 1) * 60)
+            hours, minutes = divmod(int(time_limit), 60)
+            days, hours = divmod(hours, 24)
+            f.write('#SBATCH --time={}-{:02d}:{:02d}:{:02d}\n'
+                    .format(days, hours, minutes, seconds))
+
+        # Set environment
+        if environment:
+            f.write('\n')
+            f.write('# ==== Environment ====\n')
+            for variable, value in environment.items():
+                f.write('export {}={}\n'.format(variable, value))
+
+        # Display time and node list
+        f.write('\n')
+        f.write('# ==== Useful info ====\n')
+        f.write('date\n')
+        f.write('srun --nodes={0} --ntasks={0} hostname > {1}\n'
+                .format(nodes, nodes_file))
+        f.write('sort --unique --output={0} {0}\n'.format(nodes_file))
+
+        # Run experiment
+        f.write('\n')
+        f.write('# ==== Experiment ====\n')
+        for cmd in make_iterable(command):
+            f.write('srun {} --nodes={} --ntasks={} {}\n'
+                    .format(srun_args, nodes, nodes * procs_per_node,
+                            cmd))
+
+    # Make batch script executable
+    os.chmod(batch_file, 0o755)
+
+    # Launch job if needed
+    # Note: Pipes output to log files
+    if not setup_only:
+        run_exe = 'sh' if has_allocation else 'sbatch'
+        run_proc = subprocess.Popen([run_exe, batch_file],
+                                    stdout = subprocess.PIPE,
+                                    stderr = subprocess.PIPE,
+                                    cwd = experiment_dir)
+        out_proc = subprocess.Popen(['tee', out_file],
+                                    stdin = run_proc.stdout,
+                                    cwd = experiment_dir)
+        err_proc = subprocess.Popen(['tee', err_file],
+                                    stdin = run_proc.stderr,
+                                    cwd = experiment_dir)
+        run_proc.stdout.close()
+        run_proc.stderr.close()
+        run_proc.wait()
+        out_proc.wait()
+        err_proc.wait()
diff --git a/python/lbann/layer.py b/python/lbann/layer.py
new file mode 100644
index 00000000000..b553cba52c2
--- /dev/null
+++ b/python/lbann/layer.py
@@ -0,0 +1,135 @@
+"""Neural network tensor operations."""
+import abc
+from lbann import lbann_pb2
+from lbann.util import make_iterable
+import lbann.util.class_generator
+
+class Layer(abc.ABC):
+    """Neural network tensor operation."""
+
+    global_count = 0  # Static counter, used for default names
+
+    def __init__(self, parents = [], children = [], weights = [],
+                 name = None, data_layout = 'data_parallel',
+                 hint_layer = None):
+        """Constructor.
+
+        Args:
+            parents (Iterable of Layer, optional): Sources of input
+                tensors.
+            children (Iterable of Layer, optional): Destinations of
+                output tensors.
+            weights (Iterable of Weights, optional): Trainable
+                parameters.
+            name (str, optional): Unique identifier (default is
+                'layer<index>').
+            data_layout (str, optional): Data distribution scheme.
+            hint_layer (Layer, optional): Hint for output dimensions.
+
+        """
+        Layer.global_count += 1
+        self.parents = []
+        self.children = []
+        self.weights = []
+        self.name = name if name else 'layer{0}'.format(Layer.global_count)
+        self.data_layout = data_layout
+        self.hint_layer = hint_layer
+
+        # Initialize parents, children, and weights
+        for l in make_iterable(parents):
+            self.add_parent(l)
+        for l in make_iterable(children):
+            self.add_child(child)
+        for w in make_iterable(weights):
+            self.add_weights(w)
+
+    def export_proto(self):
+        """Construct and return a protobuf message."""
+        proto = lbann_pb2.Layer()
+        proto.parents = ' '.join([l.name for l in self.parents])
+        proto.children = ' '.join([l.name for l in self.children])
+        proto.weights = ' '.join([w.name for w in self.weights])
+        proto.name = self.name
+        proto.data_layout = self.data_layout
+        proto.hint_layer = self.hint_layer.name if self.hint_layer else ''
+        return proto
+
+    def add_parent(self, parent):
+        """This layer will receive an input tensor from `parent`."""
+        for p in make_iterable(parent):
+            self.parents.append(p)
+            p.children.append(self)
+
+    def add_child(self, child):
+        """"This layer will send an output tensor to `child`."""
+        for c in make_iterable(child):
+            self.children.append(c)
+            c.parents.append(self)
+
+    def add_weights(self, w):
+        """Add w to this layer's weights."""
+        self.weights.extend(make_iterable(w))
+
+    def __call__(self, parent):
+        """This layer will recieve an input tensor from `parent`.
+
+        Syntactic sugar around `add_parent` function.
+
+        """
+        self.add_parent(parent)
+
+# Generate Layer sub-classes from lbann.proto
+# Note: The list of skip fields must be updated if any new fields are
+# added to the Layer message in lbann.proto
+classes = lbann.util.class_generator.generate_classes_from_protobuf_message(
+    lbann_pb2.Layer,
+    skip_fields = set([
+        'name', 'parents', 'children', 'data_layout', 'device_allocation',
+        'weights', 'num_neurons_from_data_reader', 'freeze', 'hint_layer',
+        'weights_data', 'top', 'bottom', 'type', 'motif_layer']),
+    base_class = Layer,
+    base_kwargs = set([
+        'parents', 'children', 'weights',
+        'name', 'data_layout', 'hint_layer']),
+    base_has_export_proto = True)
+for c in classes:
+    globals()[c.__name__] = c
+
+def traverse_layer_graph(layers):
+    """Topologically ordered traversal of layer graph.
+
+    All layers that are connected to `layers` will be traversed. The
+    layer graph is assumed to be acyclic. No checks are made for
+    cycles and strange things may happen if one exists.
+
+    Args:
+        layers (Layer or Iterator of Layer): Node(s) in layer graph.
+
+    Yields:
+        Layer: Node in layer graph, in a topological order.
+
+    """
+
+    # DFS to find root nodes in layer graph
+    roots = []
+    visited = set()
+    stack = list(make_iterable(layers))
+    while stack:
+        l = stack.pop()
+        if l not in visited:
+            visited.add(l)
+            stack.extend(l.parents)
+            stack.extend(l.children)
+            if not l.parents:
+                roots.append(l)
+
+    # DFS to traverse layer graph in topological order
+    visited = set()
+    stack = roots
+    while stack:
+        l = stack.pop()
+        if (l not in visited
+            and all([(p in visited) for p in l.parents])):
+            visited.add(l)
+            stack.extend(l.children)
+            yield l
diff --git a/python/lbann/metric.py b/python/lbann/metric.py
new file mode 100644
index 00000000000..ebdf4a83e28
--- /dev/null
+++ b/python/lbann/metric.py
@@ -0,0 +1,25 @@
+"""Neural network tensor operations."""
+import abc
+from lbann import lbann_pb2
+
+class Metric:
+    """Metric that takes value from a layer.
+
+    Corresponds to a "layer metric" in LBANN. This may need to be
+    generalized if any other LBANN metrics are implemented.
+
+    """
+
+    def __init__(self, layer, name=None, unit=''):
+        """Initialize a metric based of off layer."""
+        self.layer = layer
+        self.name = name if name else self.layer.name
+        self.unit = unit
+
+    def export_proto(self):
+        """Construct and return a protobuf message."""
+        proto = lbann_pb2.Metric()
+        proto.layer_metric.layer = self.layer.name
+        proto.layer_metric.name = self.name
+        proto.layer_metric.unit = self.unit
+        return proto
diff --git a/python/lbann/model.py b/python/lbann/model.py
new file mode 100644
index 00000000000..19e3f79e248
--- /dev/null
+++ b/python/lbann/model.py
@@ -0,0 +1,64 @@
+"""Neural network model."""
+import abc
+from lbann import lbann_pb2
+from lbann.util import make_iterable
+import lbann.layer
+import lbann.objective_function
+
+class Model:
+    """Neural network model."""
+
+    def __init__(self, mini_batch_size, epochs,
+                 layers=[], weights=[], objective_function=None,
+                 metrics=[], callbacks=[]):
+
+        # Scalar fields
+        self.mini_batch_size = mini_batch_size
+        self.epochs = epochs
+        self.block_size = 256           # TODO: Make configurable
+        self.num_parallel_readers = 0   # TODO: Make configurable
+        self.procs_per_trainer = 0      # TODO: Make configurable
+
+        # Get connected layers
+        self.layers = list(lbann.layer.traverse_layer_graph(layers))
+
+        # Get weights associated with layers
+        self.weights = set(make_iterable(weights))
+        for l in self.layers:
+            self.weights.update(l.weights)
+
+        # Construct objective function if needed
+        obj_type = lbann.objective_function.ObjectiveFunction
+        if isinstance(objective_function, obj_type):
+            self.objective_function = objective_function
+        elif objective_function is None:
+            self.objective_function = obj_type()
+        else:
+            self.objective_function = obj_type(objective_function)
+
+        # Metrics and callbacks
+        self.metrics = make_iterable(metrics)
+        self.callbacks = make_iterable(callbacks)
+
+    def export_proto(self):
+        """Construct and return a protobuf message."""
+        # Initialize protobuf message
+        model = lbann_pb2.Model()
+        model.mini_batch_size = self.mini_batch_size
+        model.num_epochs = self.epochs
+        model.block_size = self.block_size
+        model.num_parallel_readers = self.num_parallel_readers
+        model.procs_per_trainer = self.procs_per_trainer
+
+        # Add model components
+        model.layer.extend([l.export_proto() for l in self.layers])
+        model.weights.extend([w.export_proto() for w in self.weights])
+        model.objective_function.CopyFrom(self.objective_function.export_proto())
+        model.metric.extend([m.export_proto() for m in self.metrics])
+        model.callback.extend([c.export_proto() for c in self.callbacks])
+
+        return model
+
+    def save_proto(self, filename):
+        """Export model to prototext file."""
+        save_prototext(filename, model=self.export_proto())
diff --git a/scripts/proto/lbann/models/.gitignore b/python/lbann/models/.gitignore
similarity index 100%
rename from scripts/proto/lbann/models/.gitignore
rename to python/lbann/models/.gitignore
diff --git a/scripts/proto/lbann/models/README.md b/python/lbann/models/README.md
similarity index 100%
rename from scripts/proto/lbann/models/README.md
rename to python/lbann/models/README.md
diff --git a/python/lbann/models/__init__.py b/python/lbann/models/__init__.py
new file mode 100644
index 00000000000..257cfd46152
--- /dev/null
+++ b/python/lbann/models/__init__.py
@@ -0,0 +1,3 @@
+from lbann.models.alexnet import AlexNet
+from lbann.models.lenet import LeNet
+from lbann.models.resnet import ResNet, ResNet18, ResNet34, ResNet50, ResNet101, ResNet152
diff --git a/python/lbann/models/alexnet.py b/python/lbann/models/alexnet.py
new file mode 100644
index 00000000000..bb1b8dad303
--- /dev/null
+++ b/python/lbann/models/alexnet.py
@@ -0,0 +1,85 @@
+import lbann
+import lbann.modules
+
+class AlexNet(lbann.modules.Module):
+    """AlexNet neural network.
+
+    Assumes image data in NCHW format.
+
+    See:
+        Alex Krizhevsky, Ilya Sutskever, and Geoffrey
+        E. Hinton. "ImageNet classification with deep convolutional
+        neural networks." In Advances in Neural Information Processing
+        Systems, pp. 1097-1105. 2012.
+
+    Note that there is very little consistency in the implementation of
+    AlexNet across frameworks. If a particular variant is needed, you should
+    implement it yourself.
+
+    """
+
+    global_count = 0  # Static counter, used for default names
+
+    def __init__(self, output_size, name=None):
+        """Initialize AlexNet.
+
+        Args:
+            output_size (int): Size of output tensor.
+            name (str, optional): Module name
+                (default: 'alexnet_module<index>').
+
+        """
+        AlexNet.global_count += 1
+        self.instance = 0
+        self.name = (name if name
+                     else 'alexnet_module{0}'.format(AlexNet.global_count))
+        conv = lbann.modules.Convolution2dModule
+        fc = lbann.modules.FullyConnectedModule
+        self.conv1 = conv(96, 11, stride=4, activation=lbann.Relu,
+                          name=self.name+'_conv1')
+        self.conv2 = conv(256, 5, padding=2, activation=lbann.Relu,
+                          name=self.name+'_conv2')
+        self.conv3 = conv(384, 3, padding=1, activation=lbann.Relu,
+                          name=self.name+'_conv3')
+        self.conv4 = conv(384, 3, padding=1, activation=lbann.Relu,
+                          name=self.name+'_conv4')
+        self.conv5 = conv(256, 3, padding=1, activation=lbann.Relu,
+                          name=self.name+'_conv5')
+        self.fc6 = fc(4096, activation=lbann.Relu, name=self.name+'_fc6')
+        self.fc7 = fc(4096, activation=lbann.Relu, name=self.name+'_fc7')
+        self.fc8 = fc(output_size, name=self.name+'_fc8')
+
+    def forward(self, x):
+        self.instance += 1
+
+        # Convolutional network
+        x = self.conv1(x)
+        x = lbann.LocalResponseNormalization(
+            x, window_width=5, lrn_alpha=0.0001, lrn_beta=0.75, lrn_k=2,
+            name='{0}_norm1_instance{1}'.format(self.name,self.instance))
+        x = lbann.Pooling(x, num_dims=2, has_vectors=False,
+                          pool_dims_i=3, pool_pads_i=0, pool_strides_i=2,
+                          pool_mode='max',
+                          name='{0}_pool1_instance{1}'.format(self.name,self.instance))
+        x = self.conv2(x)
+        x = lbann.LocalResponseNormalization(
+            x, window_width=5, lrn_alpha=0.0001, lrn_beta=0.75, lrn_k=2,
+            name='{0}_norm2_instance{1}'.format(self.name,self.instance))
+        x = lbann.Pooling(x, num_dims=2, has_vectors=False,
+                          pool_dims_i=3, pool_pads_i=0, pool_strides_i=2,
+                          pool_mode='max',
+                          name='{0}_pool2_instance{1}'.format(self.name,self.instance))
+        x = self.conv5(self.conv4(self.conv3(x)))
+        x = lbann.Pooling(x, num_dims=2, has_vectors=False,
+                          pool_dims_i=3, pool_pads_i=0, pool_strides_i=2,
+                          pool_mode='max',
+                          name='{0}_pool5_instance{1}'.format(self.name,self.instance))
+
+        # Fully-connected network
+        x = self.fc6(x)
+        x = lbann.Dropout(x, keep_prob=0.5,
+                          name='{0}_drop6_instance{1}'.format(self.name,self.instance))
+        x = self.fc7(x)
+        x = lbann.Dropout(x, keep_prob=0.5,
+                          name='{0}_drop7_instance{1}'.format(self.name,self.instance))
+        return self.fc8(x)
diff --git a/python/lbann/models/lenet.py b/python/lbann/models/lenet.py
new file mode 100644
index 00000000000..86e9015e858
--- /dev/null
+++ b/python/lbann/models/lenet.py
@@ -0,0 +1,56 @@
+import lbann
+import lbann.modules
+
+class LeNet(lbann.modules.Module):
+    """LeNet neural network.
+
+    Assumes image data in NCHW format.
+
+    See:
+        Yann LeCun, Leon Bottou, Yoshua Bengio, and Patrick
+        Haffner. "Gradient-based learning applied to document
+        recognition." Proceedings of the IEEE 86, no. 11 (1998):
+        2278-2324.
+
+    """
+
+    global_count = 0  # Static counter, used for default names
+
+    def __init__(self, output_size, name=None):
+        """Initialize LeNet.
+
+        Args:
+            output_size (int): Size of output tensor.
+            name (str, optional): Module name
+                (default: 'lenet_module<index>').
+
+        """
+        LeNet.global_count += 1
+        self.instance = 0
+        self.name = (name if name
+                     else 'lenet_module{0}'.format(LeNet.global_count))
+        conv = lbann.modules.Convolution2dModule
+        fc = lbann.modules.FullyConnectedModule
+        self.conv1 = conv(6, 5, activation=lbann.Relu,
+                          name=self.name+'_conv1')
+        self.conv2 = conv(16, 5, activation=lbann.Relu,
+                          name=self.name+'_conv2')
+        self.fc1 = fc(120, activation=lbann.Relu, name=self.name+'_fc1')
+        self.fc2 = fc(84, activation=lbann.Relu, name=self.name+'_fc2')
+        self.fc3 = fc(output_size, name=self.name+'_fc3')
+
+    def forward(self, x):
+        self.instance += 1
+
+        # Convolutional network
+        x = self.conv1(x)
+        x = lbann.Pooling(x, num_dims=2, has_vectors=False,
+                          pool_dims_i=2, pool_pads_i=0, pool_strides_i=2,
+                          pool_mode='max',
+                          name='{0}_pool1_instance{1}'.format(self.name,self.instance))
+        x = self.conv2(x)
+        x = lbann.Pooling(x, num_dims=2, has_vectors=False,
+                       pool_dims_i=2, pool_pads_i=0, pool_strides_i=2,
+                       pool_mode='max',
+                       name='{0}_pool2_instance{1}'.format(self.name,self.instance))
+        return self.fc3(self.fc2(self.fc1(x)))
diff --git a/scripts/proto/lbann/models/resnet.py b/python/lbann/models/resnet.py
similarity index 74%
rename from scripts/proto/lbann/models/resnet.py
rename to python/lbann/models/resnet.py
index 46a25ebbb59..3becd020348 100644
--- a/scripts/proto/lbann/models/resnet.py
+++ b/python/lbann/models/resnet.py
@@ -1,11 +1,11 @@
-import lbann.proto as lp
-import lbann.modules as lm
+import lbann
+import lbann.modules
 
 # ==============================================
 # Helper modules
 # ==============================================
 
-class ConvBNRelu(lm.Module):
+class ConvBNRelu(lbann.modules.Module):
     """Convolution -> Batch normalization -> ReLU
 
     Basic unit for ResNets. Assumes image data in NCHW format.
@@ -36,17 +36,20 @@ def __init__(self, out_channels, kernel_size, stride, padding,
         self.instance = 0
 
         # Initialize convolution
-        self.conv = lm.Convolution2dModule(out_channels, kernel_size,
-                                           stride=stride, padding=padding,
-                                           bias=False,
-                                           name=self.name + '_conv')
+        self.conv = lbann.modules.Convolution2dModule(
+            out_channels, kernel_size,
+            stride=stride, padding=padding,
+            bias=False,
+            name=self.name + '_conv')
 
         # Initialize batch normalization
         bn_scale_init = 0.0 if bn_zero_init else 1.0
-        bn_scale = lp.Weights(initializer=lp.ConstantInitializer(value=bn_scale_init),
-                              name=self.name + '_bn_scale')
-        bn_bias = lp.Weights(initializer=lp.ConstantInitializer(value=0.0),
-                             name=self.name + '_bn_bias')
+        bn_scale = lbann.Weights(
+            initializer=lbann.ConstantInitializer(value=bn_scale_init),
+            name=self.name + '_bn_scale')
+        bn_bias = lbann.Weights(
+            initializer=lbann.ConstantInitializer(value=0.0),
+            name=self.name + '_bn_bias')
         self.bn_weights = [bn_scale, bn_bias]
         self.bn_stats_aggregation = bn_stats_aggregation
 
@@ -56,15 +59,17 @@ def __init__(self, out_channels, kernel_size, stride, padding,
     def forward(self, x):
         self.instance += 1
         conv = self.conv(x)
-        bn = lp.BatchNormalization(conv, weights=self.bn_weights,
-                                   stats_aggregation=self.bn_stats_aggregation,
-                                   name='{0}_bn_instance{1}'.format(self.name,self.instance))
+        bn = lbann.BatchNormalization(
+            conv, weights=self.bn_weights,
+            stats_aggregation=self.bn_stats_aggregation,
+            name='{0}_bn_instance{1}'.format(self.name,self.instance))
         if self.relu:
-            return lp.Relu(bn, name='{0}_relu_instance{1}'.format(self.name,self.instance))
+            return lbann.Relu(
+                bn, name='{0}_relu_instance{1}'.format(self.name,self.instance))
         else:
             return bn
 
-class BasicBlock(lm.Module):
+class BasicBlock(lbann.modules.Module):
     """Residual block without bottlenecking.
 
     The number of output channels is the same as the number of
@@ -75,7 +80,7 @@ class BasicBlock(lm.Module):
 
     def __init__(self, in_channels, mid_channels,
                  downsample, zero_init_residual,
-                 bn_stats_aggregation, name):
+                 bn_stats_aggregation, name, width=1):
         """Initialize residual block.
 
         Args:
@@ -88,11 +93,14 @@ def __init__(self, in_channels, mid_channels,
             bn_stats_aggregation (str): Aggregation mode for batch
                 normalization statistics.
             name (str): Module name.
+            width (float, optional): Width growth factor for 3x3
+                convolutions.
 
         """
         super().__init__()
         self.name = name
         self.instance = 0
+        mid_channels = int(mid_channels * width)
         self.out_channels = mid_channels
 
         # Skip connection
@@ -121,11 +129,11 @@ def forward(self, x):
         self.instance += 1
         y1 = self.branch1(x) if self.branch1 else x
         y2 = self.branch2b(self.branch2a(x))
-        z = lp.Add([y1, y2],
-                   name='{0}_sum_instance{1}'.format(self.name,self.instance))
-        return lp.Relu(z, name='{0}_relu_instance{1}'.format(self.name,self.instance))
+        z = lbann.Add([y1, y2],
+                      name='{0}_sum_instance{1}'.format(self.name,self.instance))
+        return lbann.Relu(z, name='{0}_relu_instance{1}'.format(self.name,self.instance))
 
-class BottleneckBlock(lm.Module):
+class BottleneckBlock(lbann.modules.Module):
     """Residual block with bottlenecking.
 
     The number of output channels is four times the number of internal
@@ -136,7 +144,7 @@ class BottleneckBlock(lm.Module):
 
     def __init__(self, in_channels, mid_channels,
                  downsample, zero_init_residual,
-                 bn_stats_aggregation, name):
+                 bn_stats_aggregation, name, width=1):
         """Initialize residual block.
 
         Args:
@@ -149,12 +157,16 @@ def __init__(self, in_channels, mid_channels,
             bn_stats_aggregation (str): Aggregation mode for batch
                 normalization statistics.
             name (str): Module name.
+            width (float, optional): Width growth factor for 3x3
+                convolutions.
 
         """
         super().__init__()
         self.name = name
         self.instance = 0
         self.out_channels = 4 * mid_channels
+        # Width factor does not grow the output channel size.
+        mid_channels = int(mid_channels * width)
 
         # Skip connection
         if downsample:
@@ -185,15 +197,15 @@ def forward(self, x):
         self.instance += 1
         y1 = self.branch1(x) if self.branch1 else x
         y2 = self.branch2c(self.branch2b(self.branch2a(x)))
-        z = lp.Add([y1, y2],
-                   name='{0}_sum_instance{1}'.format(self.name,self.instance))
-        return lp.Relu(z, name='{0}_relu_instance{1}'.format(self.name,self.instance))
+        z = lbann.Add([y1, y2],
+                      name='{0}_sum_instance{1}'.format(self.name,self.instance))
+        return lbann.Relu(z, name='{0}_relu_instance{1}'.format(self.name,self.instance))
 
 # ==============================================
 # ResNet modules
 # ==============================================
 
-class ResNet(lm.Module):
+class ResNet(lbann.modules.Module):
     """Residual neural network.
 
     A ResNet is comprised of residual blocks, which are small
@@ -217,7 +229,7 @@ class ResNet(lm.Module):
     def __init__(self, block, output_size,
                  layer_sizes, layer_channels,
                  zero_init_residual, bn_stats_aggregation,
-                 name):
+                 name, width=1):
         """Initialize ResNet.
 
         Args:
@@ -233,6 +245,7 @@ def __init__(self, block, output_size,
             bn_stats_aggregation (str): Aggregation mode for batch
                 normalization statistics.
             name (str): Module name.
+            width (float, optional): Width growth factor.
 
         """
         super().__init__()
@@ -252,21 +265,22 @@ def __init__(self, block, output_size,
                 b = block(in_channels, mid_channels,
                           downsample, zero_init_residual,
                           bn_stats_aggregation,
-                          '{0}_layer{1}_block{2}'.format(self.name, layer, i))
+                          '{0}_layer{1}_block{2}'.format(self.name, layer, i),
+                          width=width)
                 self.blocks.append(b)
-        self.fc = lm.FullyConnectedModule(output_size, bias=False,
-                                          name=self.name + '_fc')
+        self.fc = lbann.modules.FullyConnectedModule(
+            output_size, bias=False, name=self.name + '_fc')
 
     def forward(self, x):
         self.instance += 1
         x = self.conv1(x)
-        x = lp.Pooling(x, num_dims=2, has_vectors=False,
-                       pool_dims_i=3, pool_pads_i=1, pool_strides_i=2,
-                       pool_mode='max',
-                       name='{0}_pool1_instance{1}'.format(self.name,self.instance))
+        x = lbann.Pooling(x, num_dims=2, has_vectors=False,
+                          pool_dims_i=3, pool_pads_i=1, pool_strides_i=2,
+                          pool_mode='max',
+                          name='{0}_pool1_instance{1}'.format(self.name,self.instance))
         for b in self.blocks:
             x = b(x)
-        x = lp.ChannelwiseMean(x, name='{0}_avgpool_instance{1}'.format(self.name,self.instance))
+        x = lbann.ChannelwiseMean(x, name='{0}_avgpool_instance{1}'.format(self.name,self.instance))
         return self.fc(x)
 
 class ResNet18(ResNet):
@@ -287,7 +301,7 @@ class ResNet18(ResNet):
     def __init__(self, output_size,
                  zero_init_residual=True,
                  bn_stats_aggregation='local',
-                 name=None):
+                 name=None, width=1):
         """Initialize ResNet-18.
 
         Args:
@@ -299,6 +313,7 @@ def __init__(self, output_size,
                 batch normalization statistics.
             name (str, optional): Module name
                 (default: 'resnet18_module<index>')
+            width (float, optional): Width growth factor.
 
         """
         ResNet18.global_count += 1
@@ -307,7 +322,7 @@ def __init__(self, output_size,
         super().__init__(BasicBlock, output_size,
                          (2,2,2,2), (64,128,256,512),
                          zero_init_residual, bn_stats_aggregation,
-                         name)
+                         name, width=width)
 
 class ResNet34(ResNet):
     """ResNet-34 neural network.
@@ -327,7 +342,7 @@ class ResNet34(ResNet):
     def __init__(self, output_size,
                  zero_init_residual=True,
                  bn_stats_aggregation='local',
-                 name=None):
+                 name=None, width=1):
         """Initialize ResNet-34.
 
         Args:
@@ -339,6 +354,7 @@ def __init__(self, output_size,
                 batch normalization statistics.
             name (str, optional): Module name
                 (default: 'resnet34_module<index>')
+            width (float, optional): Width growth factor.
 
         """
         ResNet34.global_count += 1
@@ -347,7 +363,7 @@ def __init__(self, output_size,
         super().__init__(BasicBlock, output_size,
                          (3,4,6,3), (64,128,256,512),
                          zero_init_residual, bn_stats_aggregation,
-                         name)
+                         name, width=width)
 
 class ResNet50(ResNet):
     """ResNet-50 neural network.
@@ -367,7 +383,7 @@ class ResNet50(ResNet):
     def __init__(self, output_size,
                  zero_init_residual=True,
                  bn_stats_aggregation='local',
-                 name=None):
+                 name=None, width=1):
         """Initialize ResNet-50.
 
         Args:
@@ -379,6 +395,7 @@ def __init__(self, output_size,
                 batch normalization statistics.
             name (str, optional): Module name
                 (default: 'resnet50_module<index>')
+            width (float, optional): Width growth factor.
 
         """
         ResNet50.global_count += 1
@@ -387,7 +404,7 @@ def __init__(self, output_size,
         super().__init__(BottleneckBlock, output_size,
                          (3,4,6,3), (64,128,256,512),
                          zero_init_residual, bn_stats_aggregation,
-                         name)
+                         name, width=width)
 
 class ResNet101(ResNet):
     """ResNet-101 neural network.
@@ -407,7 +424,7 @@ class ResNet101(ResNet):
     def __init__(self, output_size,
                  zero_init_residual=True,
                  bn_stats_aggregation='local',
-                 name=None):
+                 name=None, width=1):
         """Initialize ResNet-101.
 
         Args:
@@ -419,6 +436,7 @@ def __init__(self, output_size,
                 batch normalization statistics.
             name (str, optional): Module name
                 (default: 'resnet101_module<index>')
+            width (float, optional): Width growth factor.
 
         """
         ResNet101.global_count += 1
@@ -427,7 +445,7 @@ def __init__(self, output_size,
         super().__init__(BottleneckBlock, output_size,
                          (3,4,23,3), (64,128,256,512),
                          zero_init_residual, bn_stats_aggregation,
-                         name)
+                         name, width=width)
 
 class ResNet152(ResNet):
     """ResNet-152 neural network.
@@ -447,7 +465,7 @@ class ResNet152(ResNet):
     def __init__(self, output_size,
                  zero_init_residual=True,
                  bn_stats_aggregation='local',
-                 name=None):
+                 name=None, width=1):
         """Initialize ResNet-152.
 
         Args:
@@ -459,6 +477,7 @@ def __init__(self, output_size,
                 batch normalization statistics.
             name (str, optional): Module name
                 (default: 'resnet152_module<index>')
+            width (float, optional): Width growth factor.
 
         """
         ResNet152.global_count += 1
@@ -467,82 +486,4 @@ def __init__(self, output_size,
         super().__init__(BottleneckBlock, output_size,
                          (3,8,36,3), (64,128,256,512),
                          zero_init_residual, bn_stats_aggregation,
-                         name)
-
-# ==============================================
-# Export prototext
-# ==============================================
-
-if __name__ == '__main__':
-
-    # Options
-    import argparse
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        'file',
-        nargs='?', default='model.prototext', type=str,
-        help='exported prototext file')
-    parser.add_argument(
-        '--resnet',
-        action='store', default=50, type=int,
-        choices=(18, 34, 50, 101, 152),
-        help='ResNet variant (default: 50)')
-    parser.add_argument(
-        '--num-labels', action='store', default=1000, type=int,
-        help='number of data classes (default: 1000)')
-    parser.add_argument(
-        '--bn-stats-aggregation',
-        action='store', default='local', type=str,
-        help=('aggregation mode for batch normalization statistics '
-              '(default: "local")'))
-    parser.add_argument(
-        '--mbsize', action='store', default=256, type=int,
-        help='mini-batch size (default: 256)')
-    parser.add_argument(
-        '--epochs', action='store', default=90, type=int,
-        help='number of epochs (default: 90)')
-    parser.add_argument(
-        '--warmup', action='store_true',
-        help='Use a linear warmup (default: false)')
-    args = parser.parse_args()
-
-    # Choose ResNet variant
-    resnet_variant_dict = {18: ResNet18, 34: ResNet34,
-                           50: ResNet50, 101: ResNet101, 152: ResNet152}
-    resnet = resnet_variant_dict[args.resnet](
-        args.num_labels,
-        bn_stats_aggregation=args.bn_stats_aggregation)
-
-    # Construct layer graph.
-    input = lp.Input(io_buffer='partitioned')
-    images = lp.Identity(input)
-    labels = lp.Identity(input)
-    softmax = lp.Softmax(resnet(images))
-    ce = lp.CrossEntropy([softmax, labels])
-    top1 = lp.CategoricalAccuracy([softmax, labels])
-    top5 = lp.TopKCategoricalAccuracy([softmax, labels], k=5)
-    layers = list(lp.traverse_layer_graph(input))
-
-    # Setup objective function
-    l2_reg_weights = set()
-    for l in layers:
-        if type(l) == lp.Convolution or type(l) == lp.FullyConnected:
-            l2_reg_weights.update(l.weights)
-    l2_reg = lp.L2WeightRegularization(weights=l2_reg_weights, scale=1e-4)
-    obj = lp.ObjectiveFunction([ce, l2_reg])
-
-    # Set up metrics and callbacks
-    metrics = [lp.Metric(top1, name='categorical accuracy', unit='%'),
-               lp.Metric(top5, name='top-5 categorical accuracy', unit='%')]
-    callbacks = [lp.CallbackPrint(),
-                 lp.CallbackTimer(),
-                 lp.CallbackDropFixedLearningRate(
-                     drop_epoch=[30, 60, 80], amt=0.1)]
-    if args.warmup:
-        callbacks.append(lp.CallbackLinearGrowthLearningRate(
-            target=0.1*args.mbsize / 256, num_epochs=5))
-
-    # Export model to file
-    lp.save_model(args.file, args.mbsize, args.epochs,
-                  layers=layers, objective_function=obj,
-                  metrics=metrics, callbacks=callbacks)
+                         name, width=width)
diff --git a/python/lbann/modules.py b/python/lbann/modules.py
new file mode 100644
index 00000000000..dd674f78e6c
--- /dev/null
+++ b/python/lbann/modules.py
@@ -0,0 +1,364 @@
+"""Neural network modules.
+
+These are a convenience for common layer patterns that are often the
+basic building blocks for larger models.
+
+"""
+
+from collections.abc import Iterable
+import warnings
+from math import sqrt
+import lbann
+from lbann.util import make_iterable
+
+def _str_list(l):
+    """Convert an iterable object to a space-separated string."""
+    return ' '.join(str(i) for i in make_iterable(l))
+
+class Module:
+    """Base class for neural network modules.
+
+    A module is a pattern of operations that may be applied to a set
+    of input layers, obtaining a set of output layers.
+
+    """
+
+    def __init__(self):
+        pass
+
+    def forward(self, input):
+        """Apply module pattern to `input`.
+
+        `input` is a `Layer` or a sequence of `Layer`s. The module
+        pattern is added to the layer graph and the output layer(s)
+        are returned.
+
+        """
+        # Should be overridden in all sub-classes
+        raise NotImplementedError
+
+    def __call__(self, input):
+        """Apply module mattern to `input`.
+
+        Syntatic sugar around `forward` function.
+
+        """
+        return self.forward(input)
+
+class FullyConnectedModule(Module):
+    """Basic block for fully-connected neural networks.
+
+    Applies a dense linearity and a nonlinear activation function.
+
+    """
+
+    global_count = 0  # Static counter, used for default names
+
+    def __init__(self, size, bias=True, weights=[], activation=None,
+                 name=None, data_layout='data_parallel'):
+        """Initialize fully-connected module.
+
+        Args:
+            size (int): Size of output tensor.
+            activation (type): Layer class for activation function.
+            bias (bool): Whether to apply bias after linearity.
+            weights (`Weights` or iterator of `Weights`): Weights in
+                fully-connected layer. There are at most two: the
+                matrix and the bias. If weights are not provided, the
+                matrix will be initialized with He normal
+                initialization and the bias with zeros.
+            name (str): Default name is in the form 'fcmodule<index>'.
+            data_layout (str): Data layout.
+
+        """
+        super().__init__()
+        FullyConnectedModule.global_count += 1
+        self.instance = 0
+        self.size = size
+        self.bias = bias
+        self.name = (name
+                     if name
+                     else 'fcmodule{0}'.format(FullyConnectedModule.global_count))
+        self.data_layout = data_layout
+
+        # Initialize weights
+        # Note: If weights are not provided, matrix weights are
+        # initialized with He normal scheme and bias weights are
+        # initialized with zeros.
+        self.weights = list(make_iterable(weights))
+        if len(self.weights) > 2:
+            raise ValueError('`FullyConnectedModule` has '
+                             'at most two weights, '
+                             'but got {0}'.format(len(self.weights)))
+        if len(self.weights) == 0:
+            self.weights.append(
+                lbann.Weights(initializer=lbann.HeNormalInitializer(),
+                              name=self.name+'_matrix'))
+        if len(self.weights) == 1:
+            self.weights.append(
+                lbann.Weights(initializer=lbann.ConstantInitializer(value=0.0),
+                              name=self.name+'_bias'))
+
+        # Initialize activation layer
+        self.activation = None
+        if activation:
+            if isinstance(activation, type):
+                self.activation = activation
+            else:
+                self.activation = type(activation)
+            if not issubclass(self.activation, lbann.Layer):
+                raise ValueError('activation must be a layer')
+
+    def forward(self, x):
+        self.instance += 1
+        name = '{0}_instance{1}'.format(self.name, self.instance)
+        y = lbann.FullyConnected(x,
+                                 weights=self.weights,
+                                 name=(name+'_fc' if self.activation else name),
+                                 data_layout=self.data_layout,
+                                 num_neurons=self.size,
+                                 has_bias=self.bias)
+        if self.activation:
+            return self.activation(y,
+                                   name=name+'_activation',
+                                   data_layout=self.data_layout)
+        else:
+            return y
+
+class ConvolutionModule(Module):
+    """Basic block for convolutional neural networks.
+
+    Applies a convolution and a nonlinear activation function.
+
+    """
+
+    global_count = 0  # Static counter, used for default names
+
+    def __init__(self, num_dims,
+                 out_channels, kernel_size,
+                 stride=1, padding=0, dilation=1, groups=1, bias=True,
+                 weights=[], activation=None, name=None):
+        """Initialize convolution module.
+
+        Args:
+            num_dims (int): Number of dimensions.
+            out_channels (int): Number of output channels, i.e. number
+                of filters.
+            kernel_size (int): Size of convolution kernel.
+            stride (int): Convolution stride.
+            padding (int): Convolution padding.
+            dilation (int): Convolution dilation.
+            groups (int): Number of convolution groups.
+            bias (bool): Whether to apply channel-wise bias after
+                convolution.
+            weights (`Weights` or iterator of `Weights`): Weights in
+                convolution layer. There are at most two: the kernel
+                and the bias. If weights are not provided, the kernel
+                will be initialized with He normal initialization and
+                the bias with zeros.
+            name (str): Default name is in the form 'convmodule<index>'.
+
+        """
+        super().__init__()
+        ConvolutionModule.global_count += 1
+        self.instance = 0
+        self.num_dims = num_dims
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.groups = groups
+        self.bias = bias
+        self.weights = list(make_iterable(weights))
+        self.name = (name
+                     if name
+                     else 'convmodule{0}'.format(ConvolutionModule.global_count))
+
+        # Initialize weights
+        # Note: If weights are not provided, kernel weights are
+        # initialized with He normal scheme and bias weights are
+        # initialized with zeros.
+        self.weights = list(make_iterable(weights))
+        if len(self.weights) > 2:
+            raise ValueError('`ConvolutionModule` has '
+                             'at most two weights, '
+                             'but got {0}'.format(len(self.weights)))
+        if len(self.weights) == 0:
+            self.weights.append(
+                lbann.Weights(initializer=lbann.HeNormalInitializer(),
+                              name=self.name+'_kernel'))
+        if len(self.weights) == 1:
+            self.weights.append(
+                lbann.Weights(initializer=lbann.ConstantInitializer(value=0.0),
+                              name=self.name+'_bias'))
+
+        # Initialize activation layer
+        self.activation = None
+        if activation:
+            if isinstance(activation, type):
+                self.activation = activation
+            else:
+                self.activation = type(activation)
+            if not issubclass(self.activation, lbann.Layer):
+                raise ValueError('activation must be a layer')
+
+    def forward(self, x):
+        self.instance += 1
+        name = '{0}_instance{1}'.format(self.name, self.instance)
+        y = lbann.Convolution(x,
+                              weights=self.weights,
+                              name=(name+'_conv' if self.activation else name),
+                              num_dims=self.num_dims,
+                              num_output_channels=self.out_channels,
+                              has_vectors=False,
+                              conv_dims_i=self.kernel_size,
+                              conv_pads_i=self.padding,
+                              conv_strides_i=self.stride,
+                              conv_dilations_i=self.dilation,
+                              num_groups=self.groups,
+                              has_bias=self.bias)
+        if self.activation:
+            return self.activation(y, name=name+'_activation')
+        else:
+            return y
+
+class Convolution2dModule(ConvolutionModule):
+    """Basic block for 2D convolutional neural networks.
+
+    Applies a convolution and a nonlinear activation function.
+    This is a wrapper class for ConvolutionModule.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(2, *args, **kwargs)
+
+class Convolution3dModule(ConvolutionModule):
+    """Basic block for 3D convolutional neural networks.
+
+    Applies a convolution and a nonlinear activation function.
+    This is a wrapper class for ConvolutionModule.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(3, *args, **kwargs)
+
+class LSTMCell(Module):
+    """Long short-term memory cell."""
+
+    global_count = 0  # Static counter, used for default names
+
+    def __init__(self, size, bias = True,
+                 weights=[], name=None, data_layout='data_parallel'):
+        """Initialize LSTM cell.
+
+        Args:
+            size (int): Size of output tensor.
+            bias (bool): Whether to apply biases after linearity.
+            weights (`Weights` or iterator of `Weights`): Weights in
+                fully-connected layer. There are at most two - a
+                matrix ((4*size) x (input_size+size) dimensions) and a
+                bias (4*size entries). If weights are not provided,
+                the matrix and bias will be initialized in a similar
+                manner as PyTorch (uniform random values from
+                [-1/sqrt(size), 1/sqrt(size)]).
+            name (str): Default name is in the form 'lstmcell<index>'.
+            data_layout (str): Data layout.
+
+        """
+        super().__init__()
+        LSTMCell.global_count += 1
+        self.step = 0
+        self.size = size
+        self.name = (name
+                     if name
+                     else 'lstmcell{0}'.format(LSTMCell.global_count))
+        self.data_layout = data_layout
+
+        # Initial state
+        self.last_output = lbann.Constant(value=0.0, num_neurons=str(size),
+                                          name=self.name + '_init_output',
+                                          data_layout=self.data_layout)
+        self.last_cell = lbann.Constant(value=0.0, num_neurons=str(size),
+                                        name=self.name + '_init_cell',
+                                        data_layout=self.data_layout)
+
+        # Weights
+        self.weights = list(make_iterable(weights))
+        if len(self.weights) > 2:
+            raise ValueError('`LSTMCell` has at most two weights, '
+                             'but got {0}'.format(len(self.weights)))
+        if len(self.weights) == 0:
+            self.weights.append(
+                lbann.Weights(initializer=lbann.UniformInitializer(min=-1/sqrt(self.size),
+                                                                   max=-1/sqrt(self.size)),
+                              name=self.name+'_matrix'))
+        if len(self.weights) == 1:
+            self.weights.append(
+                lbann.Weights(initializer=lbann.UniformInitializer(min=-1/sqrt(self.size),
+                                                                   max=-1/sqrt(self.size)),
+                           name=self.name+'_bias'))
+
+        # Linearity
+        self.fc = FullyConnectedModule(4*size, bias=bias,
+                                       weights=self.weights,
+                                       name=self.name + '_fc',
+                                       data_layout=self.data_layout)
+
+    def forward(self, x):
+        """Perform LSTM step.
+
+        State from previous steps is used to compute output.
+
+        """
+        self.step += 1
+        name = '{0}_step{1}'.format(self.name, self.step)
+
+        # Apply linearity
+        input_concat = lbann.Concatenation([x, self.last_output],
+                                           name=name + '_input',
+                                           data_layout=self.data_layout)
+        fc = self.fc(input_concat)
+
+        # Get gates and cell update
+        slice = lbann.Slice(fc,
+                            slice_points=_str_list([0, self.size, 4*self.size]),
+                            name=name + '_fc_slice',
+                            data_layout=self.data_layout)
+        cell_update = lbann.Tanh(slice,
+                                 name=name + '_cell_update',
+                                 data_layout=self.data_layout)
+        sigmoid = lbann.Sigmoid(slice,
+                                name=name + '_sigmoid',
+                                data_layout=self.data_layout)
+        slice = lbann.Slice(sigmoid,
+                            slice_points=_str_list([0, self.size, 2*self.size, 3*self.size]),
+                            name=name + '_sigmoid_slice',
+                            data_layout=self.data_layout)
+        f = lbann.Identity(slice, name=name + '_forget_gate',
+                           data_layout=self.data_layout)
+        i = lbann.Identity(slice, name=name + '_input_gate',
+                           data_layout=self.data_layout)
+        o = lbann.Identity(slice, name=name + '_output_gate',
+                           data_layout=self.data_layout)
+
+        # Cell state
+        cell_forget = lbann.Multiply([f, self.last_cell],
+                                     name=name + '_cell_forget',
+                                     data_layout=self.data_layout)
+        cell_input = lbann.Multiply([i, cell_update],
+                                    name=name + '_cell_input',
+                                    data_layout=self.data_layout)
+        cell = lbann.Add([cell_forget, cell_input], name=name + '_cell',
+                         data_layout=self.data_layout)
+
+        # Output
+        cell_act = lbann.Tanh(cell, name=name + '_cell_activation',
+                              data_layout=self.data_layout)
+        output = lbann.Multiply([o, cell_act], name=name,
+                                data_layout=self.data_layout)
+
+        # Update state and return output
+        self.last_cell = cell
+        self.last_output = output
+        return output
diff --git a/python/lbann/objective_function.py b/python/lbann/objective_function.py
new file mode 100644
index 00000000000..6e30532bf2a
--- /dev/null
+++ b/python/lbann/objective_function.py
@@ -0,0 +1,75 @@
+import abc
+from lbann import lbann_pb2
+from lbann.util import make_iterable
+import lbann.layer
+
+# Note: Currently, only layer terms and L2 weight regularization terms
+# are supported in LBANN. If more terms are added, it may be
+# worthwhile to autogenerate sub-classes of ObjectiveFunctionTerm.
+
+class ObjectiveFunctionTerm(abc.ABC):
+    def export_proto(self): pass
+
+class LayerTerm(ObjectiveFunctionTerm):
+    """Objective function term that takes value from a layer."""
+
+    def __init__(self, layer, scale=1.0):
+        self.layer = layer
+        self.scale = scale
+
+    def export_proto(self):
+        """Construct and return a protobuf message."""
+        proto = lbann_pb2.LayerTerm()
+        proto.layer = self.layer.name
+        proto.scale_factor = self.scale
+        return proto
+
+class L2WeightRegularization(ObjectiveFunctionTerm):
+    """Objective function term for L2 regularization on weights."""
+
+    def __init__(self, weights=[], scale=1.0):
+        self.scale = scale
+        self.weights = list(make_iterable(weights))
+
+    def export_proto(self):
+        """Construct and return a protobuf message."""
+        proto = lbann_pb2.L2WeightRegularization()
+        proto.scale_factor = self.scale
+        proto.weights = ' '.join([w.name for w in self.weights])
+        return proto
+
+class ObjectiveFunction:
+    """Objective function for optimization algorithm."""
+
+    def __init__(self, terms=[]):
+        """Create an objective function with layer terms and regularization.
+
+        `terms` should be a sequence of `ObjectiveFunctionTerm`s and
+        `Layer`s.
+
+        """
+        self.terms = []
+        for t in make_iterable(terms):
+            self.add_term(t)
+
+    def add_term(self, term):
+        """Add a term to the objective function.
+
+        `term` may be a `Layer`, in which case a `LayerTerm` is
+        constructed and added to the objective function.
+
+        """
+        if isinstance(term, lbann.layer.Layer):
+            term = LayerTerm(term)
+        self.terms.append(term)
+
+    def export_proto(self):
+        """Construct and return a protobuf message."""
+        proto = lbann_pb2.ObjectiveFunction()
+        for term in self.terms:
+            term_message = term.export_proto()
+            if type(term) is LayerTerm:
+                proto.layer_term.extend([term_message])
+            elif type(term) is L2WeightRegularization:
+                proto.l2_weight_regularization.extend([term_message])
+        return proto
diff --git a/scripts/proto/lbann/onnx/__init__.py b/python/lbann/onnx/__init__.py
similarity index 100%
rename from scripts/proto/lbann/onnx/__init__.py
rename to python/lbann/onnx/__init__.py
diff --git a/scripts/proto/lbann/onnx/l2o/__init__.py b/python/lbann/onnx/l2o/__init__.py
similarity index 99%
rename from scripts/proto/lbann/onnx/l2o/__init__.py
rename to python/lbann/onnx/l2o/__init__.py
index 5ce2e54d06d..1f50e41b00b 100644
--- a/scripts/proto/lbann/onnx/l2o/__init__.py
+++ b/python/lbann/onnx/l2o/__init__.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-from lbann.proto import lbann_pb2
+from lbann import lbann_pb2
 from lbann_pb2 import LbannPB, Model
 
 import google.protobuf.text_format as txtf
diff --git a/scripts/proto/lbann/onnx/l2o/layers/__init__.py b/python/lbann/onnx/l2o/layers/__init__.py
similarity index 100%
rename from scripts/proto/lbann/onnx/l2o/layers/__init__.py
rename to python/lbann/onnx/l2o/layers/__init__.py
diff --git a/scripts/proto/lbann/onnx/l2o/layers/learnings.py b/python/lbann/onnx/l2o/layers/learnings.py
similarity index 100%
rename from scripts/proto/lbann/onnx/l2o/layers/learnings.py
rename to python/lbann/onnx/l2o/layers/learnings.py
diff --git a/scripts/proto/lbann/onnx/l2o/layers/losses.py b/python/lbann/onnx/l2o/layers/losses.py
similarity index 100%
rename from scripts/proto/lbann/onnx/l2o/layers/losses.py
rename to python/lbann/onnx/l2o/layers/losses.py
diff --git a/scripts/proto/lbann/onnx/l2o/layers/math.py b/python/lbann/onnx/l2o/layers/math.py
similarity index 100%
rename from scripts/proto/lbann/onnx/l2o/layers/math.py
rename to python/lbann/onnx/l2o/layers/math.py
diff --git a/scripts/proto/lbann/onnx/l2o/layers/regularizers.py b/python/lbann/onnx/l2o/layers/regularizers.py
similarity index 100%
rename from scripts/proto/lbann/onnx/l2o/layers/regularizers.py
rename to python/lbann/onnx/l2o/layers/regularizers.py
diff --git a/scripts/proto/lbann/onnx/l2o/layers/transforms.py b/python/lbann/onnx/l2o/layers/transforms.py
similarity index 93%
rename from scripts/proto/lbann/onnx/l2o/layers/transforms.py
rename to python/lbann/onnx/l2o/layers/transforms.py
index 8a426d14d40..f919854dfd9 100644
--- a/scripts/proto/lbann/onnx/l2o/layers/transforms.py
+++ b/python/lbann/onnx/l2o/layers/transforms.py
@@ -31,14 +31,14 @@ def parse(self):
         offsets = list(map(int, params.slice_points.split(" ")))
         sizes = list(map(lambda x: offsets[x+1]-offsets[x], range(len(offsets)-1)))
         self.appendOperator("Split",
-                            attrs={"axis": params.slice_axis,
+                            attrs={"axis": params.axis,
                                    "split": sizes})
 
 @parserDescriptor(["Concat"])
 class LbannLayerParser_concatenation(LbannLayerParser):
     def parse(self):
         self.appendOperator("Concat",
-                            attrs={"axis": self.l.concatenation.concatenation_axis})
+                            attrs={"axis": self.l.concatenation.axis})
 
 @parserDescriptor(["RandomNormal"])
 class LbannLayerParser_gaussian(LbannLayerParser):
@@ -75,8 +75,3 @@ def parse(self):
 class LbannLayerParser_evaluation(LbannLayerParser):
     def parse(self):
         self.appendOperator("LbannEvaluation")
-
-@parserDescriptor(stub=True)
-class LbannLayerParser_zero(LbannLayerParser):
-    def parse(self):
-        raise NotImplementedError()
diff --git a/scripts/proto/lbann/onnx/l2o/util.py b/python/lbann/onnx/l2o/util.py
similarity index 100%
rename from scripts/proto/lbann/onnx/l2o/util.py
rename to python/lbann/onnx/l2o/util.py
diff --git a/scripts/proto/lbann/onnx/o2l/__init__.py b/python/lbann/onnx/o2l/__init__.py
similarity index 97%
rename from scripts/proto/lbann/onnx/o2l/__init__.py
rename to python/lbann/onnx/o2l/__init__.py
index 8890c98d9f2..b65f7fce0ea 100644
--- a/scripts/proto/lbann/onnx/o2l/__init__.py
+++ b/python/lbann/onnx/o2l/__init__.py
@@ -4,7 +4,7 @@
 import onnx.numpy_helper
 import google.protobuf.text_format as txtf
 
-from lbann.proto import lbann_pb2
+from lbann import lbann_pb2
 import lbann.onnx.util
 from lbann.onnx.o2l.layers import PARSERS
 from lbann.onnx.l2o import getStaticTensorShapes
@@ -55,7 +55,7 @@ def onnxToLbannLayers(o, lbannInputNames, l2oInputMap, dataLayout="auto"):
     layers.append(lbann_pb2.Layer(name=inputLayerName,
                                   children=lbann.onnx.util.list2LbannList(lbannInputNames),
                                   data_layout="data_parallel",
-                                  input=lbann_pb2.Input(io_buffer="partitioned")))
+                                  input=lbann_pb2.Input()))
     for i in lbannInputNames:
         layers.append(lbann_pb2.Layer(name=i,
                                       parents=lbann.onnx.util.list2LbannList([inputLayerName]),
diff --git a/scripts/proto/lbann/onnx/o2l/layers/__init__.py b/python/lbann/onnx/o2l/layers/__init__.py
similarity index 100%
rename from scripts/proto/lbann/onnx/o2l/layers/__init__.py
rename to python/lbann/onnx/o2l/layers/__init__.py
diff --git a/scripts/proto/lbann/onnx/o2l/layers/learnings.py b/python/lbann/onnx/o2l/layers/learnings.py
similarity index 97%
rename from scripts/proto/lbann/onnx/o2l/layers/learnings.py
rename to python/lbann/onnx/o2l/layers/learnings.py
index 8f9fd3bd8a7..daec886b471 100644
--- a/scripts/proto/lbann/onnx/o2l/layers/learnings.py
+++ b/python/lbann/onnx/o2l/layers/learnings.py
@@ -1,6 +1,6 @@
 from lbann.onnx.o2l.layers import OnnxLayerParser, OnnxSpatialLayerParser
 from lbann.onnx.parserDescriptor import parserDescriptor
-from lbann.proto import lbann_pb2
+from lbann import lbann_pb2
 
 @parserDescriptor(["convolution"])
 class parse_Conv(OnnxSpatialLayerParser):
diff --git a/scripts/proto/lbann/onnx/o2l/layers/math.py b/python/lbann/onnx/o2l/layers/math.py
similarity index 83%
rename from scripts/proto/lbann/onnx/o2l/layers/math.py
rename to python/lbann/onnx/o2l/layers/math.py
index f54bd0d45c8..8f0e379ba14 100644
--- a/scripts/proto/lbann/onnx/o2l/layers/math.py
+++ b/python/lbann/onnx/o2l/layers/math.py
@@ -1,6 +1,6 @@
 from lbann.onnx.o2l.layers import OnnxLayerParser
 from lbann.onnx.parserDescriptor import parserDescriptor
-from lbann.proto import lbann_pb2
+from lbann import lbann_pb2
 
 @parserDescriptor(["relu"])
 class parse_Relu(OnnxLayerParser):
@@ -15,7 +15,7 @@ def parse(self):
 @parserDescriptor(["concatenation"])
 class parse_Concat(OnnxLayerParser):
     def parse(self):
-        return {"concatenation": lbann_pb2.Concatenation(concatenation_axis = self.getNodeAttribute("axis"))}
+        return {"concatenation": lbann_pb2.Concatenation(axis = self.getNodeAttribute("axis"))}
 
 @parserDescriptor(["sum"])
 class parse_Sum(OnnxLayerParser):
diff --git a/scripts/proto/lbann/onnx/o2l/layers/regularizers.py b/python/lbann/onnx/o2l/layers/regularizers.py
similarity index 97%
rename from scripts/proto/lbann/onnx/o2l/layers/regularizers.py
rename to python/lbann/onnx/o2l/layers/regularizers.py
index 38246115b70..602229a9949 100644
--- a/scripts/proto/lbann/onnx/o2l/layers/regularizers.py
+++ b/python/lbann/onnx/o2l/layers/regularizers.py
@@ -1,6 +1,6 @@
 from lbann.onnx.o2l.layers import OnnxLayerParser
 from lbann.onnx.parserDescriptor import parserDescriptor
-from lbann.proto import lbann_pb2
+from lbann import lbann_pb2
 
 @parserDescriptor(["local_response_normalization"])
 class parse_LRN(OnnxLayerParser):
diff --git a/scripts/proto/lbann/onnx/o2l/layers/transforms.py b/python/lbann/onnx/o2l/layers/transforms.py
similarity index 97%
rename from scripts/proto/lbann/onnx/o2l/layers/transforms.py
rename to python/lbann/onnx/o2l/layers/transforms.py
index 87540c5d744..ee34a75f475 100644
--- a/scripts/proto/lbann/onnx/o2l/layers/transforms.py
+++ b/python/lbann/onnx/o2l/layers/transforms.py
@@ -1,7 +1,7 @@
 from lbann.onnx.o2l.layers import OnnxLayerParser, OnnxSpatialLayerParser
 from lbann.onnx.parserDescriptor import parserDescriptor
 from lbann.onnx.util import list2LbannList
-from lbann.proto import lbann_pb2
+from lbann import lbann_pb2
 
 @parserDescriptor(["relu"])
 class OnnxPoolingLayerParser(OnnxSpatialLayerParser):
diff --git a/scripts/proto/lbann/onnx/parserDescriptor.py b/python/lbann/onnx/parserDescriptor.py
similarity index 100%
rename from scripts/proto/lbann/onnx/parserDescriptor.py
rename to python/lbann/onnx/parserDescriptor.py
diff --git a/scripts/proto/lbann/onnx/tests/__init__.py b/python/lbann/onnx/tests/__init__.py
similarity index 100%
rename from scripts/proto/lbann/onnx/tests/__init__.py
rename to python/lbann/onnx/tests/__init__.py
diff --git a/scripts/proto/lbann/onnx/tests/lbann2onnx_layer_test.py b/python/lbann/onnx/tests/lbann2onnx_layer_test.py
similarity index 94%
rename from scripts/proto/lbann/onnx/tests/lbann2onnx_layer_test.py
rename to python/lbann/onnx/tests/lbann2onnx_layer_test.py
index d9604012b9e..2486dacb525 100644
--- a/scripts/proto/lbann/onnx/tests/lbann2onnx_layer_test.py
+++ b/python/lbann/onnx/tests/lbann2onnx_layer_test.py
@@ -16,8 +16,8 @@
 
 from lbann.onnx.l2o import parseLbannLayer
 from lbann.onnx.util import list2LbannList, getNodeAttributeByName
-import lbann.proto as lp
-from lbann.proto import lbann_pb2
+import lbann
+from lbann import lbann_pb2
 
 class TestLbann2OnnxLayer(unittest.TestCase):
     def _assertFields(self, l, o):
@@ -64,8 +64,8 @@ def _test_l2o_layer_convolution(self, numDims, hasBias):
             group=G
         )
 
-        layer = lp.Convolution(
-            lp.Input(name="x"),
+        layer = lbann.Convolution(
+            lbann.Input(name="x"),
             num_dims=numDims,
             num_output_channels=C_out,
             has_vectors=False,
@@ -105,8 +105,8 @@ def _test_l2o_layer_pooling(self, numDims, poolMode, onnxOp):
             strides=[S]*numDims,
         )
 
-        layer = lp.Pooling(
-            lp.Input(name="x"),
+        layer = lbann.Pooling(
+            lbann.Input(name="x"),
             num_dims=numDims,
             has_vectors=False,
             pool_dims_i=K,
@@ -144,8 +144,8 @@ def test_l2o_layer_batch_normalization(self):
             spatial=1
         )
 
-        layer = lp.BatchNormalization(
-            lp.Input(name="x"),
+        layer = lbann.BatchNormalization(
+            lbann.Input(name="x"),
             decay=decay, epsilon=epsilon,
         )
         lbannBN = parseLbannLayer(layer.export_proto(), {"x_0": (N, C, H, W)})["nodes"]
@@ -161,8 +161,8 @@ def test_l2o_layer_relu(self):
             outputs=["y"],
         )
 
-        layer = lp.Relu(
-            lp.Input(name="x"),
+        layer = lbann.Relu(
+            lbann.Input(name="x"),
         )
         lbannRelu = parseLbannLayer(layer.export_proto(), {"x_0": (N, C, H, W)})["nodes"]
 
diff --git a/scripts/proto/lbann/onnx/tests/lbann2onnx_test.py b/python/lbann/onnx/tests/lbann2onnx_test.py
similarity index 89%
rename from scripts/proto/lbann/onnx/tests/lbann2onnx_test.py
rename to python/lbann/onnx/tests/lbann2onnx_test.py
index 9e2a3ea8a38..7ed01810fe7 100644
--- a/scripts/proto/lbann/onnx/tests/lbann2onnx_test.py
+++ b/python/lbann/onnx/tests/lbann2onnx_test.py
@@ -87,7 +87,7 @@ def test_l2o_autoencoder_mnist(self):
                    {"image": [width*width]},
                    [("reconstruction", [MB_PLACEHOLDER, width*width])])
 
-    @unittest.skip("This model contains a zero layer, which is not supported in l2o.")
+    @unittest.skip("Skipped since some tensor shapes cannot be inferred.")
     def test_l2o_vae_mnist(self):
         width = 28
         self._test("{}/autoencoder_mnist/vae_mnist.prototext".format(LBANN_MODEL_ROOT),
@@ -118,7 +118,7 @@ def test_l2o_cosmoflow(self):
                    [("act5", [MB_PLACEHOLDER, 256, 4, 4, 4]),
                     ("drop3", [MB_PLACEHOLDER, secrets])])
 
-    @unittest.skip("This model contains a zero layer, which is not supported in l2o.")
+    @unittest.skip("This model contains a 'not' layer, which is not implemented yet.")
     def test_l2o_gan_mnist_adversarial(self):
         width = 28
         classes = 2
@@ -127,16 +127,7 @@ def test_l2o_gan_mnist_adversarial(self):
                    [("fc4_tanh", [1, width*width]),
                     ("prob", [MB_PLACEHOLDER, 2])])
 
-    @unittest.skip("This model contains a zero layer, which is not supported in l2o.")
-    def test_l2o_gan_mnist_discriminator(self):
-        width = 28
-        classes = 2
-        self._test("{}/gan/mnist/discriminator_model.prototext".format(LBANN_MODEL_ROOT),
-                   {"data": [width, width], "label": [classes]},
-                   [("fc4_tanh", [1, width*width]),
-                    ("prob", [MB_PLACEHOLDER, 2])])
-
-    @unittest.skip("This model contains a zero layer, which is not supported in l2o.")
+    @unittest.skip("This model contains a 'not' layer, which is not implemented yet.")
     def test_l2o_gan_mnist_discriminator(self):
         width = 28
         classes = 2
diff --git a/scripts/proto/lbann/onnx/tests/onnx2lbann_layer_test.py b/python/lbann/onnx/tests/onnx2lbann_layer_test.py
similarity index 95%
rename from scripts/proto/lbann/onnx/tests/onnx2lbann_layer_test.py
rename to python/lbann/onnx/tests/onnx2lbann_layer_test.py
index 4b20f85624f..c7c4e2e131a 100644
--- a/scripts/proto/lbann/onnx/tests/onnx2lbann_layer_test.py
+++ b/python/lbann/onnx/tests/onnx2lbann_layer_test.py
@@ -16,7 +16,7 @@
 import lbann.onnx.o2l
 from lbann.onnx.util import lbannList2List
 from lbann.onnx.tests.util import getLbannVectorField
-import lbann.proto as lp
+import lbann
 
 def makeFloatTensorVI(name, shape):
     return onnx.helper.make_tensor_value_info(
@@ -91,8 +91,8 @@ def _assertFields(self, lbannFields, onnxFields):
     def _test_o2l_layer_Gemm(self, hasBias):
         M, N, K = (100, 200, 300)
 
-        lbannFC = lp.FullyConnected(
-            lp.Input(),
+        lbannFC = lbann.FullyConnected(
+            lbann.Input(),
             num_neurons=N,
             has_bias=hasBias
         )
@@ -128,8 +128,8 @@ def _test_o2l_layer_Conv(self, numDims, hasBias):
         K, P, S, D = (3, 1, 1, 1)
         G = 1
 
-        lbannConv = lp.Convolution(
-            lp.Input(),
+        lbannConv = lbann.Convolution(
+            lbann.Input(),
             num_dims=numDims,
             num_output_channels=C_out,
             has_vectors=False,
@@ -180,8 +180,8 @@ def _test_o2l_layer_Pool(self, numDims, poolMode, onnxOp):
         N, C, H = (256, 3, 224)
         K, P, S = (3, 1, 1)
 
-        lbannPooling = lp.Pooling(
-            lp.Input(),
+        lbannPooling = lbann.Pooling(
+            lbann.Input(),
             num_dims=numDims,
             has_vectors=False,
             pool_dims_i=K,
@@ -225,8 +225,8 @@ def test_o2l_layer_BatchNormalization(self):
         decay = 0.95
         epsilon = 1e-6
 
-        lbannBN = lp.BatchNormalization(
-            lp.Input(),
+        lbannBN = lbann.BatchNormalization(
+            lbann.Input(),
             decay=decay, epsilon=epsilon,
         )
 
@@ -254,8 +254,8 @@ def test_o2l_layer_BatchNormalization(self):
     def test_o2l_layer_Relu(self):
         N, C, H, W = (100,200,300,400)
 
-        lbannRelu = lp.Relu(
-            lp.Input(),
+        lbannRelu = lbann.Relu(
+            lbann.Input(),
         )
 
         node = onnx.helper.make_node(
diff --git a/scripts/proto/lbann/onnx/tests/onnx2lbann_test.py b/python/lbann/onnx/tests/onnx2lbann_test.py
similarity index 98%
rename from scripts/proto/lbann/onnx/tests/onnx2lbann_test.py
rename to python/lbann/onnx/tests/onnx2lbann_test.py
index 94ada869ae3..047f4ee97ff 100644
--- a/scripts/proto/lbann/onnx/tests/onnx2lbann_test.py
+++ b/python/lbann/onnx/tests/onnx2lbann_test.py
@@ -20,7 +20,7 @@
 import google.protobuf.text_format as txtf
 
 import lbann.onnx.o2l
-from lbann.proto import lbann_pb2
+from lbann import lbann_pb2
 from lbann.onnx.util import parseBoolEnvVar, getLbannRoot
 from lbann.onnx.tests.util import isModelDumpEnabled, createAndGetDumpedModelsDir
 
diff --git a/scripts/proto/lbann/onnx/tests/util.py b/python/lbann/onnx/tests/util.py
similarity index 100%
rename from scripts/proto/lbann/onnx/tests/util.py
rename to python/lbann/onnx/tests/util.py
diff --git a/scripts/proto/lbann/onnx/util.py b/python/lbann/onnx/util.py
similarity index 100%
rename from scripts/proto/lbann/onnx/util.py
rename to python/lbann/onnx/util.py
diff --git a/python/lbann/optimizer.py b/python/lbann/optimizer.py
new file mode 100644
index 00000000000..061d002baff
--- /dev/null
+++ b/python/lbann/optimizer.py
@@ -0,0 +1,17 @@
+from lbann import lbann_pb2
+import lbann.util.class_generator
+
+class Optimizer:
+    def export_proto(self):
+        """Construct and return a protobuf message."""
+        return lbann_pb2.Optimizer()
+
+# Generate Optimizer sub-classes from lbann.proto
+# Note: The list of skip fields must be updated if any new fields are
+# added to the Optimizer message in lbann.proto
+classes = lbann.util.class_generator.generate_classes_from_protobuf_message(
+    lbann_pb2.Optimizer,
+    base_class = Optimizer,
+    base_has_export_proto = True)
+for c in classes:
+    globals()[c.__name__] = c
diff --git a/scripts/proto/lbann/plot/__init__.py b/python/lbann/plot/__init__.py
similarity index 100%
rename from scripts/proto/lbann/plot/__init__.py
rename to python/lbann/plot/__init__.py
diff --git a/scripts/proto/lbann/plot/parser.py b/python/lbann/plot/parser.py
similarity index 83%
rename from scripts/proto/lbann/plot/parser.py
rename to python/lbann/plot/parser.py
index a9dc04b120e..bfe3c0459b0 100644
--- a/scripts/proto/lbann/plot/parser.py
+++ b/python/lbann/plot/parser.py
@@ -7,6 +7,9 @@
     ('val_loss', 'validation objective function : ([0-9.]+)', lambda r: float(r.group(1))),
     ('val_acc', 'validation categorical accuracy : ([0-9.]+)', lambda r: float(r.group(1))/100.0),
     ('val_time', 'validation run time : ([0-9.]+)', lambda r: float(r.group(1))),
+    ('test_loss', 'test objective function : ([0-9.]+)', lambda r: float(r.group(1))),
+    ('num_procs', 'Total number of processes\s*:\s*([\d]+)', lambda r: int(r.group(1))),
+    ('num_procs_on_node', 'Processes on node\s*:\s*([\d]+)', lambda r: int(r.group(1))),
 ]
 
 def parse(file_path):
diff --git a/scripts/proto/lbann/plot/plot.py b/python/lbann/plot/plot.py
similarity index 74%
rename from scripts/proto/lbann/plot/plot.py
rename to python/lbann/plot/plot.py
index 8b58360d066..fd0f8a6598d 100644
--- a/scripts/proto/lbann/plot/plot.py
+++ b/python/lbann/plot/plot.py
@@ -4,6 +4,7 @@
 import json
 import matplotlib.pyplot as plt
 import texttable as tt
+import pandas as pd
 
 # Local imports
 from . import parser
@@ -25,8 +26,14 @@ def _get_time_axis(time_list, units='hours'):
     return time_axis
 
 def plot(stat_path_list, stat_name_list, ind_var='time', time_units='hours',
-         plot_accuracy=True, merge_train_val=False, pretty_ylim=True):
+         plot_accuracy=True, merge_train_val=False, pretty_ylim=True, save_fig=None, save_csv=None, ylim=None,
+         test_loss=False):
     """Tabulate and plot stats from LBANN or PyTorch training in common format."""
+
+    if pretty_ylim and ylim is not None:
+        print('ERROR: pretty_ylim and ylim must not be set at the same time.')
+        sys.exit(1)
+
     ### Load stat dicts and print stat summary
     stat_dict_list = []
     # Get run names
@@ -41,14 +48,18 @@ def plot(stat_path_list, stat_name_list, ind_var='time', time_units='hours',
         run_name_list = stat_name_list
     # Create table for comparing trials
     stat_table = tt.Texttable()
-    headings = ['Trial', 'Num Epochs', 'Avg. Train Time (s)', 'Avg. Val Time (s)']
+    headings = ['Trial', 'Num Procs', 'Num Nodes', 'Num Epochs', 'Avg. Train Time (s)', 'Avg. Val Time (s)']
     if plot_accuracy:
         headings += ['Peak Train Acc', 'Peak Val Acc']
 
     headings += ['Min. Train Loss', 'Min. Val Loss']
+    if test_loss:
+        headings += ['Min. Test Loss']
 
     stat_table.header(headings)
     # Loop through each trial
+    rows = []
+    row_names = []
     for run_name, stat_path in zip(run_name_list, stat_path_list):
         # Load stat file
         stat_ext = os.path.splitext(stat_path)[1]
@@ -64,21 +75,46 @@ def plot(stat_path_list, stat_name_list, ind_var='time', time_units='hours',
             print('ERROR: Invalid file extension: {} from {}\nPlease provide either an LBANN output file with .out or .txt extension or a PyTorch output file with .json extension.'.format(stat_ext, stat_path))
             sys.exit(1)
 
+        # Total number of processes
+        def parse_num(d, key):
+            if key in d.keys() and len(set(d[key])) == 1:
+                return d[key][0]
+            else:
+                return None
+
+        num_procs = parse_num(d, 'num_procs')
+        num_procs_on_node = parse_num(d, 'num_procs_on_node')
+        if num_procs is not None and num_procs_on_node is not None:
+            assert (num_procs % num_procs_on_node) == 0
+            num_nodes = int(num_procs / num_procs_on_node)
+        else :
+            num_nodes = None
+            print('WARNING: No process counts are provided from {}'.format(stat_path))
+
         # Total epochs of training
         total_epochs = len(d['val_time'])
 
         # Compute accuracy stats
         if plot_accuracy:
+            if len(d['train_acc']) == 0:
+                print('WARNING: No accuracy information is provided from {}'.format(stat_path))
+                continue
+
             peak_train_acc = max(d['train_acc'])
             peak_train_epoch = d['train_acc'].index(peak_train_acc)
             peak_val_acc = max(d['val_acc'])
             peak_val_epoch = d['val_acc'].index(peak_val_acc)
 
+        if len(d['train_loss']) == 0:
+            print('WARNING: No loss information is provided from {}'.format(stat_path))
+            continue
+
         # Compute loss stats
         min_train_loss = min(d['train_loss'])
         min_train_epoch = d['train_loss'].index(min_train_loss)
         min_val_loss = min(d['val_loss'])
         min_val_epoch = d['val_loss'].index(min_val_loss)
+        min_test_loss = d['test_loss'][0] if test_loss else None
 
         # Compute time stats
         avg_train_time = int(sum(d['train_time'])/len(d['train_time']))
@@ -100,9 +136,15 @@ def plot(stat_path_list, stat_name_list, ind_var='time', time_units='hours',
         stat_dict_list.append((run_name, d))
 
         # Add row to stats table for current trial
-        stat_table.add_row([run_name, total_epochs, avg_train_time, avg_val_time] \
-                           + ([peak_train_acc, peak_val_acc] if plot_accuracy else []) \
-                           + [min_train_loss, min_val_loss])
+        row = [run_name, num_procs, num_nodes, total_epochs, avg_train_time, avg_val_time] \
+            + ([peak_train_acc, peak_val_acc] if plot_accuracy else []) \
+            + [min_train_loss, min_val_loss] \
+            + ([min_test_loss] if test_loss else [])
+        rows.append(row)
+        row_names.append(run_name)
+
+    for row in rows:
+        stat_table.add_row(row)
 
     # Print the stats table
     print()
@@ -150,6 +192,8 @@ def plot(stat_path_list, stat_name_list, ind_var='time', time_units='hours',
         plt.ylabel('Train Loss')
         if pretty_ylim:
             plt.ylim(0, PRETTY_YLIM_LOSS)
+        elif ylim is not None:
+            plt.ylim(*ylim)
 
         p, = plt.plot(stat_dict['train_axis'], stat_dict['train_loss'], label=run_name_train)
 
@@ -160,6 +204,8 @@ def plot(stat_path_list, stat_name_list, ind_var='time', time_units='hours',
         plt.ylabel('Val Loss')
         if pretty_ylim:
             plt.ylim(0, PRETTY_YLIM_LOSS)
+        elif ylim is not None:
+            plt.ylim(*ylim)
 
         kwargs = {} if not merge_train_val else {"color": p.get_color(), "linestyle": "dashed"}
         plt.plot(stat_dict['val_axis'], stat_dict['val_loss'], label=run_name_val, **kwargs)
@@ -167,5 +213,14 @@ def plot(stat_path_list, stat_name_list, ind_var='time', time_units='hours',
     # Legend position will likely only be good for the test example
     # plt.legend(loc=(0.25, 1.22))
     plt.legend()
-    # Show the plot
-    plt.show()
+
+    if save_fig is None:
+        # Show the plot
+        plt.show()
+    else:
+        plt.savefig(save_fig)
+
+    if save_csv is not None:
+        df = pd.DataFrame([dict(zip(headings, row)) for row in rows],
+                          index=row_names)
+        df.to_csv(save_csv)
diff --git a/python/lbann/proto.py b/python/lbann/proto.py
new file mode 100644
index 00000000000..2dcac9fb77e
--- /dev/null
+++ b/python/lbann/proto.py
@@ -0,0 +1,24 @@
+"""Generate LBANN experiment prototext files."""
+
+import google.protobuf.text_format
+import google.protobuf.message
+from lbann import lbann_pb2
+
+def save_prototext(filename, **kwargs):
+    """Save a prototext file.
+
+    LbannPB fields (e.g. `model`, `data_reader`, `optimizer`) are
+    accepted via `kwargs`.
+
+    """
+
+    # Construct protobuf message
+    for key, value in kwargs.items():
+        if not isinstance(value, google.protobuf.message.Message):
+            kwargs[key] = value.export_proto()
+    pb = lbann_pb2.LbannPB(**kwargs)
+
+    # Write to file
+    with open(filename, 'wb') as f:
+        f.write(google.protobuf.text_format.MessageToString(
+            pb, use_index_order=True).encode())
diff --git a/python/lbann/util/__init__.py b/python/lbann/util/__init__.py
new file mode 100644
index 00000000000..04e36dcffbb
--- /dev/null
+++ b/python/lbann/util/__init__.py
@@ -0,0 +1,13 @@
+from collections.abc import Iterable
+
+def make_iterable(obj):
+    """Convert to an iterable object.
+
+    Simply returns `obj` if it is alredy iterable. Otherwise returns a
+    1-tuple containing `obj`.
+
+    """
+    if isinstance(obj, Iterable) and not isinstance(obj, str):
+        return obj
+    else:
+        return (obj,)
diff --git a/python/lbann/util/class_generator.py b/python/lbann/util/class_generator.py
new file mode 100644
index 00000000000..b52580af589
--- /dev/null
+++ b/python/lbann/util/class_generator.py
@@ -0,0 +1,171 @@
+"""Utility functions to generate classes from Protobuf messages."""
+import google.protobuf.descriptor
+from lbann import lbann_pb2
+
+# Map from Protobuf label enums to strings
+_proto_label_to_str = {
+    google.protobuf.descriptor.FieldDescriptor.LABEL_OPTIONAL: 'optional',
+    google.protobuf.descriptor.FieldDescriptor.LABEL_REQUIRED: 'required',
+    google.protobuf.descriptor.FieldDescriptor.LABEL_REPEATED: 'repeated'
+}
+# Map from Protobuf type enums to strings
+_proto_type_to_str = {
+    google.protobuf.descriptor.FieldDescriptor.TYPE_BOOL: 'bool',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_BYTES: 'bytes',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_DOUBLE: 'double',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_ENUM: 'enum',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_FIXED32: 'fixed32',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_FIXED64: 'fixed64',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_FLOAT: 'float',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_GROUP: 'group',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_INT32: 'int32',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_INT64: 'int64',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_MESSAGE: 'message',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_SFIXED32: 'sfixed32',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_SFIXED64: 'sfixed64',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_SINT32: 'sint32',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_SINT64: 'sint64',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_STRING: 'string',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_UINT32: 'uint32',
+    google.protobuf.descriptor.FieldDescriptor.TYPE_UINT64: 'uint64'
+}
+
+def _generate_class(message_descriptor,
+                    base_field_name,
+                    base_class,
+                    base_kwargs,
+                    base_has_export_proto):
+    """Generate new class from Protobuf message.
+
+    Args:
+        message (google.protobuf.descriptor.Descriptor): Descriptor
+            for Protobuf message.
+        base_field_name (str): Name of corresponding field in parent
+            message.
+        base_class (type): Base class for generated class.
+        base_kwargs (Iterable of str): Keyword arguments for base
+            class `__init__` method.
+        base_has_export_proto (bool): Whether the base class
+            implements an `export_proto` method. If `True`, the
+            generated class `export_proto` will set the appropriate
+            field in the Protobuf message returned by the base class
+            `export_proto`.
+
+    Returns:
+        type: Generated class.
+
+    """
+
+    # Names of Protobuf message and its fields
+    message_name = message_descriptor.name
+    field_names = message_descriptor.fields_by_name.keys()
+
+    # Make sure fields in generated and base classes are distinct
+    for arg in base_kwargs:
+        if arg in field_names:
+            raise RuntimeError(
+                'class {0} and its parent class {1} '
+                'both define the field {2}. This is a bug!'
+                .format(message_name, base_class.__name__, arg))
+
+    def __init__(self, *args, **kwargs):
+
+        # Extract arguments to pass to base class constructor
+        _base_kwargs = {}
+        for arg in base_kwargs:
+            if arg in kwargs:
+                _base_kwargs[arg] = kwargs[arg]
+                del kwargs[arg]
+        base_class.__init__(self, *args, **_base_kwargs)
+
+        # Make sure arguments are valid
+        for arg in kwargs:
+            if arg not in field_names:
+                raise ValueError('Unknown argument {0}'.format(arg))
+
+        # Set field values
+        for arg in field_names:
+            setattr(self, arg, kwargs.get(arg, None))
+
+    def export_proto(self):
+        """Construct and return a protobuf message."""
+
+        # Construct Protobuf message
+        if base_has_export_proto:
+            proto = base_class.export_proto(self)
+            message = getattr(proto, base_field_name)
+            message.SetInParent()
+        else:
+            proto = getattr(lbann_pb2, message_name)()
+            message = proto
+
+        # Set message
+        for field in field_names:
+            val = getattr(self, field)
+            if val is not None:
+                if type(val) is list:
+                    getattr(message, field).extend(val)
+                else:
+                    setattr(message, field, val)
+
+        # Return Protobuf message
+        return proto
+
+    def get_field_names(self):
+        """Names of parameters in derived class."""
+        return field_names
+
+    # Generate docstring
+    if message_descriptor.fields:
+        doc = 'Fields:\n'
+        for field in message_descriptor.fields:
+            doc += '    {0} ({1} {2})\n'.format(
+                field.name,
+                _proto_label_to_str.get(field.label, 'unknown'),
+                _proto_type_to_str.get(field.type, 'unknown'))
+    else:
+        doc = 'Fields: none\n'
+
+    # Create new class
+    return type(message_name, (base_class,),
+                {'__init__': __init__,
+                 '__doc__': doc,
+                 'export_proto': export_proto,
+                 'get_field_names': get_field_names})
+
+def generate_classes_from_protobuf_message(message,
+                                           skip_fields = set(),
+                                           base_class = object,
+                                           base_kwargs = set(),
+                                           base_has_export_proto = False):
+    """Generate new classes based on fields in a Protobuf message.
+
+    Args:
+        message (type): A derived class of
+            `google.protobuf.message.Message`. A new class will be
+            generated for each field in the message.
+        skip_fields (Iterable of str, optional): Protobuf message
+            fields to ignore.
+        base_class (type, optional): Generated classes will inherit
+            from this class.
+        base_kwargs (Iterable of str, optional): Keyword arguments for
+            base class `__init__` method.
+        base_has_export_proto (bool, optional): Whether the base class
+            implements an `export_proto` method. If `True`, the base
+            class `export_proto` is responsible for constructing a
+            message of type `message` and the generated class
+            `export_proto` will set the appropriate field.
+
+    Returns:
+        list of type: Generated classes.
+
+    """
+    classes = []
+    for field in message.DESCRIPTOR.fields:
+        if field.name not in skip_fields:
+            classes.append(_generate_class(field.message_type,
+                                           field.name,
+                                           base_class,
+                                           base_kwargs,
+                                           base_has_export_proto))
+    return classes
diff --git a/python/lbann/viz.py b/python/lbann/viz.py
new file mode 100644
index 00000000000..e47cab2bbe2
--- /dev/null
+++ b/python/lbann/viz.py
@@ -0,0 +1,87 @@
+"""Visualize LBANN models."""
+from re import sub
+from enum import Enum
+from graphviz import Digraph
+from lbann import lbann_pb2, Model
+
+def visualize_layer_graph(model, filename,
+                          file_format='pdf',
+                          label_format='name only',
+                          graphviz_engine='dot'):
+    """Visualize a model's layer graph and save to file.
+
+    Args:
+        model (`lbann_pb2.Model` or `lbann.model.Model`): Neural
+            network model.
+        filename (`str`): Output file.
+        file_format (`str`): Output file format.
+        label_format (`str`): Displayed layer information (options:
+            type-only, name-only, type-and-name, full).
+        graphviz_engine (`str`): Graphviz visualization scheme.
+
+    """
+
+    # Get protobuf message
+    if isinstance(model, lbann_pb2.Model):
+        proto = model
+    elif isinstance(model, Model):
+         proto = model.export_proto()
+    else:
+        raise TypeError('expected `model` to be an '
+                        '`lbann_pb2.Model` or a `lbann.model.Model`')
+
+    # Strip extension from filename
+    if filename.endswith('.' + file_format):
+        filename = filename[:-len(file_format)-1]
+
+    # Convert label format to lowercase with no spaces
+    label_format = sub(r' |-|_', '', label_format.lower())
+
+    # Construct graphviz graph
+    graph = Digraph(filename=filename, format=file_format, engine=graphviz_engine)
+    graph.attr('node', shape='rect')
+
+    # Construct nodes in layer graph
+    layer_types = (set(lbann_pb2.Layer.DESCRIPTOR.fields_by_name.keys())
+                   - set(['name', 'parents', 'children',
+                          'data_layout', 'device_allocation', 'weights',
+                          'num_neurons_from_data_reader', 'freeze',
+                          'hint_layer', 'weights_data',
+                          'top', 'bottom', 'type', 'motif_layer']))
+    for l in proto.layer:
+
+        # Determine layer type
+        type = ''
+        for _type in layer_types:
+            if l.HasField(_type):
+                type = getattr(l,_type).DESCRIPTOR.name
+                break
+
+        # Construct node label
+        label = ''
+        if label_format == 'nameonly':
+            label = l.name
+        elif label_format == 'typeonly':
+            label = type
+        elif label_format == 'typeandname':
+            label = '<{0}<br/>{1}>'.format(type, l.name)
+        elif label_format == 'full':
+            label = '<'
+            for (index, line) in enumerate(str(l).strip().split('\n')):
+                if index > 0:
+                    label += '<br/>'
+                label += line
+            label += '>'
+
+        # Add layer as layer graph node
+        graph.node(l.name, label=label)
+
+    # Add parent/child relationships as layer graph edges
+    edges = set()
+    for l in proto.layer:
+        edges.update([(p, l.name) for p in l.parents.split()])
+        edges.update([(l.name, c) for c in l.children.split()])
+    graph.edges(edges)
+
+    # Save to file
+    graph.render(filename=filename, cleanup=True, format=file_format)
diff --git a/python/lbann/weights.py b/python/lbann/weights.py
new file mode 100644
index 00000000000..df902a6ccd4
--- /dev/null
+++ b/python/lbann/weights.py
@@ -0,0 +1,54 @@
+"""Trainable model parameters."""
+import abc
+from lbann import lbann_pb2
+import lbann.util.class_generator
+
+class Initializer(abc.ABC):
+    """Initialization scheme for `Weights`."""
+    def export_proto(self):
+        pass
+
+# Generate Initializer sub-classes from lbann.proto.
+# Note: The list of skip fields must be updated if any new fields are
+# added to the Weights message in lbann.proto
+classes = lbann.util.class_generator.generate_classes_from_protobuf_message(
+    lbann_pb2.Weights,
+    skip_fields = set(['name', 'optimizer']),
+    base_class = Initializer)
+for c in classes:
+    globals()[c.__name__] = c
+
+class Weights:
+    """Trainable model parameters."""
+
+    global_count = 0  # Static counter, used for default names
+
+    def __init__(self, initializer=None, optimizer=None, name=None):
+        Weights.global_count += 1
+        self.name = name if name else 'weights{0}'.format(Weights.global_count)
+        self.initializer = initializer
+        self.optimizer = optimizer
+
+    def export_proto(self):
+        """Construct and return a protobuf message."""
+        proto = lbann_pb2.Weights()
+        proto.name = self.name
+
+        # Set initializer if needed
+        if self.initializer:
+            type_name = type(self.initializer).__name__
+            field_name = None
+            for field in lbann_pb2.Weights.DESCRIPTOR.fields:
+                if field.message_type and field.message_type.name == type_name:
+                    field_name = field.name
+                    break
+            init_message = getattr(proto, field_name)
+            init_message.CopyFrom(self.initializer.export_proto())
+            init_message.SetInParent()
+
+        # Set optimizer if needed
+        if self.optimizer:
+            proto.optimizer.CopyFrom(self.optimizer.export_proto())
+            proto.optimizer.SetInParent()
+
+        return proto
diff --git a/scripts/proto/scripts/onnx/create_support_status.py b/python/scripts/onnx/create_support_status.py
similarity index 100%
rename from scripts/proto/scripts/onnx/create_support_status.py
rename to python/scripts/onnx/create_support_status.py
diff --git a/scripts/proto/scripts/onnx/download_onnx_model_zoo.sh b/python/scripts/onnx/download_onnx_model_zoo.sh
similarity index 100%
rename from scripts/proto/scripts/onnx/download_onnx_model_zoo.sh
rename to python/scripts/onnx/download_onnx_model_zoo.sh
diff --git a/scripts/proto/scripts/plot/lbplot b/python/scripts/plot/lbplot
similarity index 56%
rename from scripts/proto/scripts/plot/lbplot
rename to python/scripts/plot/lbplot
index ccb3d711afe..538e3daa533 100755
--- a/scripts/proto/scripts/plot/lbplot
+++ b/python/scripts/plot/lbplot
@@ -23,13 +23,36 @@ def main():
     parser.add_argument("--pretty-ylim", dest="pretty_ylim", action="store_const",
                         const=True, default=False,
                         help="Set ylim to [0,1] for accuracy plots and [0,{}] for loss plots".format(PRETTY_YLIM_LOSS))
+    parser.add_argument('--save-fig', type=str, default='',
+                        help="Save the training/validation curve plot as an image file.")
+    parser.add_argument('--save-csv', type=str, default='',
+                        help="Save the sumamry table as a CSV file.")
+    parser.add_argument('--loss-ymin', type=float, # default=None, # float("-inf"),
+                        help="The minimum y-axis limit of the loss plot.")
+    parser.add_argument('--loss-ymax', type=float, # default=None, # float("inf"),
+                        help="The maximum y-axis limit of the loss plot.")
+    parser.add_argument("--test-loss", dest="test_loss", action="store_const",
+                        const=True, default=False,
+                        help="Show the test loss")
     args = parser.parse_args()
 
+    ylim = None
+    if args.loss_ymin is not None:
+        if args.loss_ymax is None:
+            print("ERROR: --loss-ylim and --loss-ymax should be set at the same time.")
+            exit(1)
+
+        ylim = [args.loss_ymin, args.loss_ymax]
+
     # Tabulate and plot stats from user input files
     plot(args.stat_path, args.stat_name, ind_var=args.ind_var, time_units=args.time_units,
          plot_accuracy=(not args.no_accuracy),
          merge_train_val=args.merge_train_val,
-         pretty_ylim=args.pretty_ylim)
+         pretty_ylim=args.pretty_ylim,
+         save_fig=args.save_fig if args.save_fig != '' else None,
+         save_csv=args.save_csv if args.save_csv != '' else None,
+         ylim=[args.loss_ymin, args.loss_ymax],
+         test_loss=args.test_loss)
 
 
 if __name__=='__main__':
diff --git a/scripts/build_lbann_lc.sh b/scripts/build_lbann_lc.sh
index 9f27c12f07e..ee7c61ae4d1 100755
--- a/scripts/build_lbann_lc.sh
+++ b/scripts/build_lbann_lc.sh
@@ -12,9 +12,8 @@ CORAL=$([[ $(hostname) =~ (sierra|lassen|ray) ]] && echo 1 || echo 0)
 
 COMPILER=gnu
 if [ "${CLUSTER}" == "surface" -o "${CLUSTER}" == "pascal" ]; then
-    # NVCC in CUDA 9.1 does not support GCC versions later than 6
-    COMPILER=gnu
-    module load gcc/4.9.3
+    module load gcc/7.3.0
+    module load opt cudatoolkit/9.2
 elif [ "${CLUSTER}" == "sierra" -o "${CLUSTER}" == "lassen" ]; then
     module load gcc/7.3.1
 fi
@@ -69,7 +68,7 @@ INSTRUMENT=
 WITH_ALUMINUM=
 ALUMINUM_WITH_MPI_CUDA=OFF
 ALUMINUM_WITH_NCCL=
-WITH_CONDUIT=OFF
+WITH_CONDUIT=ON
 WITH_TBINF=OFF
 RECONFIGURE=0
 USE_NINJA=0
@@ -314,22 +313,9 @@ fi
 # Load packages
 if [ ${USE_MODULES} -ne 0 ]; then
     module load git
-    if [ "${WITH_CONDUIT}" = "ON" ] ; then
-        module load cmake/3.12.1
-        HDF5_CMAKE_EXE=$(which cmake)
-    fi
-    module load cmake/3.9.2
-    
-    CMAKE_PATH=$(dirname $(which cmake))
+    module load cmake/3.12.1
 else
     use git
-    CMAKE_PATH=/usr/workspace/wsb/brain/utils/toss2/cmake-3.9.6/bin
-fi
-
-if [[ ${CORAL} -eq 1 ]]; then
-	# the latest version, 3.12.1, has several issues
-    module load cmake/3.9.2
-    CMAKE_PATH=$(dirname $(which cmake))
 fi
 
 ################################################################
@@ -442,7 +428,6 @@ if [ "${BUILD_TYPE}" == "Release" ]; then
             CXX_FLAGS="${CXX_FLAGS} -mcpu=power8 -mtune=power8"
             Fortran_FLAGS="${Fortran_FLAGS} -mcpu=power8 -mtune=power8"
         elif [ "${CLUSTER}" == "sierra" -o "${CLUSTER}" == "lassen" ]; then
-			# no power9 option shown in the manual
             C_FLAGS="${C_FLAGS} -mcpu=power9 -mtune=power9"
             CXX_FLAGS="${CXX_FLAGS} -mcpu=power9 -mtune=power9"
             Fortran_FLAGS="${Fortran_FLAGS} -mcpu=power9 -mtune=power9"
@@ -584,11 +569,11 @@ if [ "${CLUSTER}" == "surface" -o "${CORAL}" -eq 1 -o "${CLUSTER}" == "pascal" ]
     WITH_ALUMINUM=${WITH_ALUMINUM:-ON}
     ALUMINUM_WITH_NCCL=${ALUMINUM_WITH_NCCL:-ON}
 	if [[ ${CORAL} -eq 1 ]]; then
-		export NCCL_DIR=/usr/workspace/wsb/brain/nccl2/nccl_2.3.7-1+cuda9.2_ppc64le
+		export NCCL_DIR=/usr/workspace/wsb/brain/nccl2/nccl_2.4.2-1+cuda9.2_ppc64le
 		module del cuda
 		CUDA_TOOLKIT_MODULE=${CUDA_TOOLKIT_MODULE:-cuda/9.2.148}
 	else
-		export NCCL_DIR=/usr/workspace/wsb/brain/nccl2/nccl_2.2.12-1+cuda9.0_x86_64
+		export NCCL_DIR=/usr/workspace/wsb/brain/nccl2/nccl_2.4.2-1+cuda9.2_x86_64
 	fi
 
     # Hack for surface
@@ -630,7 +615,7 @@ if [ "${WITH_CUDA}" == "ON" ]; then
 	# CUDNN
 	if [ -z "${CUDNN_DIR}" ]; then
 		if [ "${CUDA_TOOLKIT_VERSION}" == "9.2" ]; then
-			CUDNN_DIR=/usr/workspace/wsb/brain/cudnn/cudnn-7.4.1/cuda-${CUDA_TOOLKIT_VERSION}_${ARCH}
+			CUDNN_DIR=/usr/workspace/wsb/brain/cudnn/cudnn-7.5.1/cuda-${CUDA_TOOLKIT_VERSION}_${ARCH}
 		elif [ "${CUDA_TOOLKIT_VERSION}" == "9.1" ]; then
 			CUDNN_DIR=/usr/workspace/wsb/brain/cudnn/cudnn-7.1.3/cuda-${CUDA_TOOLKIT_VERSION}_${ARCH}
 		fi
@@ -772,7 +757,7 @@ fi
 
 # Configure build with CMake
 CONFIGURE_COMMAND=$(cat << EOF
- ${CMAKE_PATH}/cmake \
+cmake \
 -G ${GENERATOR} \
 -D CMAKE_EXPORT_COMPILE_COMMANDS=ON \
 -D CMAKE_BUILD_TYPE=${BUILD_TYPE} \
@@ -792,7 +777,6 @@ CONFIGURE_COMMAND=$(cat << EOF
 -D ALUMINUM_ENABLE_NCCL=${ALUMINUM_WITH_NCCL} \
 -D LBANN_SB_BUILD_CONDUIT=${WITH_CONDUIT} \
 -D LBANN_SB_BUILD_HDF5=${WITH_CONDUIT} \
--D HDF5_CMAKE_COMMAND=${HDF5_CMAKE_EXE} \
 -D LBANN_SB_BUILD_LBANN=ON \
 -D CMAKE_CXX_FLAGS="${CXX_FLAGS}" \
 -D CMAKE_C_FLAGS="${C_FLAGS}" \
diff --git a/scripts/proto/README.md b/scripts/proto/README.md
deleted file mode 100644
index a154269661c..00000000000
--- a/scripts/proto/README.md
+++ /dev/null
@@ -1,200 +0,0 @@
-# LBANN Python Prototext Interface
-
-This provides a convenient Python wrapper for writing and generating
-LBANN model prototext files. The syntax is meant to be deliberately
-reminiscent of [PyTorch](https://pytorch.org/). If you use that, it
-should be familiar.
-
-This is still a work in progress, so please open an issue if you find
-any problems or have feature suggestions.
-
-* For more details about the LBANN/ONNX converter,
-see [here](docs/onnx/README.md).
-* For more details about the visualization script
-(also known as `lbplot`), see [here](docs/plot/README.md).
-
-# Setup
-
-Requirements:
-* Python 3. You may need to load the relevant module for your
-  environment to be set up right.
-* The Python protobuf module, which can be locally installed with
-  `pip3 install --user protobuf`.
-* A build of LBANN.
-
-Run `pip3 install --user -e .` on this directory to install this
-package.
-
-If you do not already have the ONNX Python package
-installed, you will need to ensure the `protoc` compiler is in your
-path when you run this. Either load the appropriate Spack module or
-add `$LBANN_HOME/build/<your build>/install/bin` to `$PATH` before
-running. (See [here](https://github.com/onnx/onnx#source) for
-additional documentation on installing ONNX.)
-
-_Advanced users_: This requires the `lbann_pb2` Python module
-generated from `lbann.proto` using the `protoc` compiler. The LBANN
-build process should do this automatically, installing it to
-`LBANN_BUILD_DIR/install/share/python`.  This package uses some basic
-heuristics to attempt to locate this if `lbann_pb2` is not in your
-default Python search path. If these fail, you should manually set
-your Python path to include the directory with `lbann_pb2` (e.g. by
-setting the `PYTHONPATH` environment variable).
-
-# Use
-
-This consists of two components, `lbann.proto` and
-`lbann.modules`. `lbann.proto` is an automatically generated interface
-to (most of) the components of the LBANN prototext system, e.g. the
-layers, weights, objective functions, metrics, and
-callbacks. `lbann.modules` consists of manually-curated higher-level
-building blocks that are commonly used in neural networks.
-
-_Possible points of confusion_: LBANN constructs a static graph of
-layers (specifically a DAG), as opposed to the dynamic execution
-graphs that are supported in some other frameworks (e.g. PyTorch and
-TensorFlow). This interface is just for building that graph, and does
-not actually run the model. Also note that a module is different from
-a layer: a layer is a single instance of an operator, whereas a module
-creates multiple instances of a (set of) layers with the same
-parameters.
-
-## `lbann.proto`
-
-Neural network model components:
-
-* A `Layer` is a tensor operation, arranged within a directed acyclic
-  graph. A layer will recieve input tensors from its parents and will
-  send output tensor to its children. Once the layer graph has been
-  constructed, it may be helpful to call `traverse_layer_graph`, which
-  is a generator function that traverses the layer graph in a
-  topological order.
-* A `Weights` is a set of trainable parameters. They are typically
-  associated with one or more layers. Their initial values are
-  populated with an `Initializer`.
-* The `ObjectiveFunction` is a mathematical expression that the
-  optimization algorithm will attempt to minimize. It is made up of
-  multiple `ObjectiveFunctionTerm`s, which are added up (possibly with
-  scaling factors) to obtain the full objective function. There are
-  currently two objective function terms:
-    - `LayerTerm` gets its value from a `Layer`. The layer must output
-      a scalar (tensor with one entry).
-    - `L2WeightRegularization` gets its value by computing the L2 norm
-      of the model weights.
-* A `Metric` reports values to the user, which is helpful for
-  evaluating the progress of training. They get the their values from
-  layers, which must output scalars (tensors with one entry).
-* A `Callback` performs some function at various points during
-  training. They are helpful for performing advanced training
-  techniques.
-
-Once all model components have been constructed, they can be exported
-to a prototext file with the `save_model` method.
-
-### Example
-
-A simple (and not very good) convolutional neural network for MNIST
-data:
-
-```py
-import lbann.proto as lp
-
-# ----------------------------------------------------------
-# Construct layer graph.
-# ----------------------------------------------------------
-# Note: The first argument to every layer specifies its parents,
-# i.e. the sources for its input tensors.
-
-# Input data.
-# Note: Order matters for the children of the input layer!
-input = lp.Input(io_buffer='partitioned') # Interacts with data reader.
-images = lp.Identity(input)     # NCHW image tensor.
-labels = lp.Identity(input)     # One-hot vector.
-
-# Simple convolutional network.
-conv = lp.Convolution(
-    images,
-    num_dims=2,             # 2D convolution for NCHW tensors.
-    num_output_channels=64, # I.e. number of filters.
-    conv_dims_i=5,          # Convolution window size (64x3x5x5 kernel).
-    conv_pads_i=2,          # Padding of 2 in every dimension.
-    conv_strides_i=2,       # Stride of 2 in every dimension.
-    has_bias=True)          # Channel-wise bias.
-bn = lp.BatchNormalization(conv)
-relu = lp.Relu(bn)
-pool = lp.Pooling(
-    relu,
-    num_dims=2,         # 2D pooling (for NCHW tensors).
-    pool_dims_i=3,      # 3x3 pooling window.
-    pool_pads_i=1,      # Padding of 1 in every dimension.
-    pool_strides_i=2,   # Stride of 2 in every dimension.
-    pool_mode='max')    # Max pooling.
-fc = lp.FullyConnected(pool,
-                       num_neurons=10,  # Output size.
-                       has_bias=False)  # Entry-wise bias.
-softmax = lp.Softmax(fc)
-
-# Compute values for objective function and metrics.
-cross_entropy = lp.CrossEntropy([softmax, labels])
-top1 = lp.CategoricalAccuracy([softmax, labels])
-top5 = lp.TopKCategoricalAccuracy([softmax, labels], k=5)
-
-# ----------------------------------------------------------
-# Construct objective function, metrics, and callbacks.
-# ----------------------------------------------------------
-
-obj = lp.ObjectiveFunction([
-    cross_entropy,
-    lp.L2WeightRegularization(scale_factor=1e-4)])  # L2 weight regularization
-])
-metrics = [
-    lp.Metric(top1, name='categorical accuracy', unit='%'),
-    lp.Metric(top5, name='top-5 categorical accuracy', unit='%')
-]
-callbacks = [
-    lp.CallbackPrint(), # Print basic information every epoch.
-    lp.CallbackTimer()  # Print timing information every epoch.
-]
-
-# ----------------------------------------------------------
-# Save the model to a prototext file.
-# ----------------------------------------------------------
-
-lp.save_model(
-    'test.prototext',   # Write to test.prototext.
-    256,                # Mini-batch size.
-    10,                 # Number of epochs for training.
-    layers=traverse_layer_graph(input), # Get all layers connected to input.
-    objective_function=obj,
-    metrics=metrics,
-    callbacks=callbacks)
-
-```
-
-### Documentation
-
-Right now, the best source for documentation on neural network
-components (layers, weights, etc.) is in `src/proto/lbann.proto`. All
-fields present in a message are supported as keyword arguments in this
-API.
-
-## `lbann.modules`
-
-This presently consists of a small number of neural network modules,
-which are patterns of layers that take an input layer to produce an
-output layer. Once created, a `Module` is _callable_. Calling it with
-an input layer will add the module's pattern to the layer graph and
-will return the output layer. For example, instead of creating a
-convolution layer as above, we could instead create a convolution
-module:
-
-```py
-import lbann.modules as lm
-conv_module = lm.Convolution2dModule(
-    64,         # Number of output channels, i.e. number of filters.
-    5,          # Convolution window size (64x3x5x5 kernel).
-    stride=2,   # Padding of 2 in every dimension.
-    padding=2,  # Stride of 2 in every dimension.
-    bias=True)  # Channel-wise bias.
-conv = conv_module(images)  # images is a Layer
-```
diff --git a/scripts/proto/lbann/contrib/objective_functions.py b/scripts/proto/lbann/contrib/objective_functions.py
deleted file mode 100644
index ec5458fddc9..00000000000
--- a/scripts/proto/lbann/contrib/objective_functions.py
+++ /dev/null
@@ -1,83 +0,0 @@
-"""Experimental objective functions.
-
-These are mostly unsupported.
-
-"""
-
-import lbann.proto as lp
-import lbann.modules as lm
-from collections.abc import Iterable
-
-class CrossEntropyWithUncertainty(lm.Module):
-    def forward(self, inputs):
-        if len(inputs) != 2:
-            raise ValueError('expected two inputs: predictions and labels')
-        pred = inputs[0]
-        label = inputs[1]   # Assumed to be Boolean
-        masked_pred = lp.Multiply([pred, label])
-        pred_sum = lp.Reduction(masked_pred)
-        return lp.Negative(lp.Log(pred_sum))
-
-class GeometricDistributionNegativeLogLikelihood(lm.Module):
-    def forward(self, inputs):
-        if len(inputs) != 2:
-            raise ValueError('expected two inputs: predictions and labels')
-        pred = inputs[0]
-        label = inputs[1]
-        ones = p.Constant(hint_layer=pred, value=1.0)
-        term1 = lp.Multiply([label, lp.Log(lp.Subtract([ones, pred]))])
-        term2 = lp.Log(pred)
-        full = lp.WeightedSum([term1, term2], scaling_factors='-1.0 -1.0')
-        return lp.Reduction(full)
-
-class PoissonDistributionNegativeLogLikelihood(lm.Module):
-    def forward(self, inputs):
-        raise NotImplementedError   # Requires log-gamma function
-        if len(inputs) != 2:
-            raise ValueError('expected two inputs: predictions and labels')
-        pred = inputs[0]
-        label = inputs[1]
-        ones = lp.Constant(hint_layer=pred, value=1.0)
-        term1 = pred
-        term2 = lp.Multiply([label, lp.Log(pred)])
-        term3 = lp.LogGamma(lp.Add([label, ones]))
-        full = lp.WeightedSum([term1, term2, term3], scaling_factors='1.0 -1.0 1.0')
-        return lp.Reduction(full)
-
-class PolyaDistributionNegativeLogLikelihood(lm.Module):
-    def forward(self, inputs):
-        raise NotImplementedError   # Requires log-gamma function
-        if len(inputs) != 2:
-            raise ValueError('expected two inputs: predictions and labels')
-        pred = inputs[0]
-        label = inputs[1]
-        count = lp.Reduction(label)
-        alpha_sum = lp.Reduction(pred)
-        lgamma_alpha_sum = lp.Reduction(lp.LogGamma(pred))
-        lgamma_alpha_level_count_sum = lp.Reduction(lp.LogGamma(lp.Add([pred, label])))
-        return lp.WeightedSum([lp.LogGamma(alpha_sum),
-                               lp.LogGamma(lp.Sum([count, alpha_sum])),
-                               lgamma_alpha_level_count,
-                               lgamma_alpha_sum],
-                              scaling_factors='-1.0 1.0 -1.0 1.0')
-
-class GroupLasso(lm.Module):
-    def __init__(self, weights, height, width):
-        self.weights = weights
-        self.height = height
-        self.width = width
-    def forward(self, _):
-        w = lp.WeightsLayer(weights=self.weights, dims='%d %d'.format(self.width, self.height))
-        slice = lp.Slice(w, slice_axis=0, slice_points=' '.join(range(self.width+1)))
-        cols = []
-        for _ in range(self.width):
-            cols.append(lp.Sqrt(lp.L2Norm2(slice)))
-        return lp.Sum(cols)
-
-class L1WeightRegularization(lm.Module):
-    def __init__(self, weights, dims):
-        self.weights = weights
-        self.dims = dims
-    def forward(self, _):
-        w = lp.WeightsLayer(weights=self.weights, dims=' '.join(self.dims))
-        return lp.L1Norm(w)
diff --git a/scripts/proto/lbann/models/alexnet.py b/scripts/proto/lbann/models/alexnet.py
deleted file mode 100644
index d53b7b747f5..00000000000
--- a/scripts/proto/lbann/models/alexnet.py
+++ /dev/null
@@ -1,145 +0,0 @@
-import lbann.proto as lp
-import lbann.modules as lm
-
-# ==============================================
-# AlexNet module
-# ==============================================
-
-class AlexNet(lm.Module):
-    """AlexNet neural network.
-
-    Assumes image data in NCHW format.
-
-    See:
-        Alex Krizhevsky, Ilya Sutskever, and Geoffrey
-        E. Hinton. "ImageNet classification with deep convolutional
-        neural networks." In Advances in Neural Information Processing
-        Systems, pp. 1097-1105. 2012.
-        
-    Note that there is very little consistency in the implementation of
-    AlexNet across frameworks. If a particular variant is needed, you should
-    implement it yourself.
-
-    """
-
-    global_count = 0  # Static counter, used for default names
-
-    def __init__(self, output_size, name=None):
-        """Initialize AlexNet.
-
-        Args:
-            output_size (int): Size of output tensor.
-            name (str, optional): Module name
-                (default: 'alexnet_module<index>').
-
-        """
-        AlexNet.global_count += 1
-        self.instance = 0
-        self.name = (name if name
-                     else 'alexnet_module{0}'.format(AlexNet.global_count))
-        self.conv1 = lm.Convolution2dModule(96, 11, stride=4,
-                                            activation=lp.Relu,
-                                            name=self.name+'_conv1')
-        self.conv2 = lm.Convolution2dModule(256, 5, padding=2,
-                                            activation=lp.Relu,
-                                            name=self.name+'_conv2')
-        self.conv3 = lm.Convolution2dModule(384, 3, padding=1,
-                                            activation=lp.Relu,
-                                            name=self.name+'_conv3')
-        self.conv4 = lm.Convolution2dModule(384, 3, padding=1,
-                                            activation=lp.Relu,
-                                            name=self.name+'_conv4')
-        self.conv5 = lm.Convolution2dModule(256, 3, padding=1,
-                                            activation=lp.Relu,
-                                            name=self.name+'_conv5')
-        self.fc6 = lm.FullyConnectedModule(4096, activation=lp.Relu,
-                                           name=self.name+'_fc6')
-        self.fc7 = lm.FullyConnectedModule(4096, activation=lp.Relu,
-                                           name=self.name+'_fc7')
-        self.fc8 = lm.FullyConnectedModule(output_size,
-                                           name=self.name+'_fc8')
-
-    def forward(self, x):
-        self.instance += 1
-
-        # Convolutional network
-        x = self.conv1(x)
-        x = lp.LocalResponseNormalization(
-            x, window_width=5, lrn_alpha=0.0001, lrn_beta=0.75, lrn_k=2,
-            name='{0}_norm1_instance{1}'.format(self.name,self.instance))
-        x = lp.Pooling(
-            x, num_dims=2, has_vectors=False,
-            pool_dims_i=3, pool_pads_i=0, pool_strides_i=2,
-            pool_mode='max',
-            name='{0}_pool1_instance{1}'.format(self.name,self.instance))
-        x = self.conv2(x)
-        x = lp.LocalResponseNormalization(
-            x, window_width=5, lrn_alpha=0.0001, lrn_beta=0.75, lrn_k=2,
-            name='{0}_norm2_instance{1}'.format(self.name,self.instance))
-        x = lp.Pooling(x, num_dims=2, has_vectors=False,
-                       pool_dims_i=3, pool_pads_i=0, pool_strides_i=2,
-                       pool_mode='max',
-                       name='{0}_pool2_instance{1}'.format(self.name,self.instance))
-        x = self.conv5(self.conv4(self.conv3(x)))
-        x = lp.Pooling(x, num_dims=2, has_vectors=False,
-                       pool_dims_i=3, pool_pads_i=0, pool_strides_i=2,
-                       pool_mode='max',
-                       name='{0}_pool5_instance{1}'.format(self.name,self.instance))
-
-        # Fully-connected network
-        x = self.fc6(x)
-        x = lp.Dropout(x, keep_prob=0.5,
-                       name='{0}_drop6_instance{1}'.format(self.name,self.instance))
-        x = self.fc7(x)
-        x = lp.Dropout(x, keep_prob=0.5,
-                       name='{0}_drop7_instance{1}'.format(self.name,self.instance))
-        return self.fc8(x)
-
-# ==============================================
-# Export prototext
-# ==============================================
-
-if __name__ == '__main__':
-
-    # Options
-    import argparse
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        'file',
-        nargs='?', default='model.prototext', type=str,
-        help='exported prototext file')
-    parser.add_argument(
-        '--num-labels', action='store', default=1000, type=int,
-        help='number of data classes (default: 1000)')
-    args = parser.parse_args()
-
-    # Construct layer graph.
-    input = lp.Input(io_buffer='partitioned')
-    images = lp.Identity(input)
-    labels = lp.Identity(input)
-    preds = AlexNet(args.num_labels)(images)
-    softmax = lp.Softmax(preds)
-    ce = lp.CrossEntropy([softmax, labels])
-    top1 = lp.CategoricalAccuracy([softmax, labels])
-    top5 = lp.TopKCategoricalAccuracy([softmax, labels], k=5)
-    layers = list(lp.traverse_layer_graph(input))
-
-    # Setup objective function
-    weights = set()
-    for l in layers:
-        weights.update(l.weights)
-    l2_reg = lp.L2WeightRegularization(weights=weights, scale=5e-4)
-    obj = lp.ObjectiveFunction([ce, l2_reg])
-
-    # Set up metrics and callbacks
-    metrics = [lp.Metric(top1, name='categorical accuracy', unit='%'),
-               lp.Metric(top5, name='top-5 categorical accuracy', unit='%')]
-    callbacks = [lp.CallbackPrint(),
-                 lp.CallbackTimer(),
-                 lp.CallbackDropFixedLearningRate(
-                     drop_epoch=[20,40,60], amt=0.1)]
-
-    # Export model to file
-    lp.save_model(args.file, 256, 100,
-                  layers=layers, objective_function=obj,
-                  metrics=metrics, callbacks=callbacks)
diff --git a/scripts/proto/lbann/modules.py b/scripts/proto/lbann/modules.py
deleted file mode 100644
index 71f1ae4c77e..00000000000
--- a/scripts/proto/lbann/modules.py
+++ /dev/null
@@ -1,248 +0,0 @@
-"""Neural network modules.
-
-These are a convenience for common layer patterns that are often the
-basic building blocks for larger models.
-
-"""
-
-import lbann.proto as lp
-from collections.abc import Iterable
-import warnings
-
-def _make_iterable(obj):
-    """Convert to an iterable object.
-
-    Simply returns `obj` if it is alredy iterable. Otherwise returns a
-    1-tuple containing `obj`.
-
-    """
-    if isinstance(obj, Iterable):
-        return obj
-    else:
-        return (obj,)
-
-class Module:
-    """Base class for neural network modules.
-
-    A module is a pattern of operations that may be applied to a set
-    of input layers, obtaining a set of output layers.
-
-    """
-
-    def __init__(self):
-        pass
-
-    def forward(self, input):
-        """Apply module pattern to `input`.
-
-        `input` is a `Layer` or a sequence of `Layer`s. The module
-        pattern is added to the layer graph and the output layer(s)
-        are returned.
-
-        """
-        # Should be overridden in all sub-classes
-        raise NotImplementedError
-
-    def __call__(self, input):
-        """Apply module mattern to `input`.
-
-        Syntatic sugar around `forward` function.
-
-        """
-        return self.forward(input)
-
-class FullyConnectedModule(Module):
-    """Basic block for fully-connected neural networks.
-
-    Applies a dense linearity and a nonlinear activation function.
-
-    """
-
-    global_count = 0  # Static counter, used for default names
-
-    def __init__(self, size, bias=True, weights=[], activation=None,
-                 name=None, data_layout='data_parallel'):
-        """Initialize fully-connected module.
-
-        Args:
-            size (int): Size of output tensor.
-            activation (type): Layer class for activation function.
-            bias (bool): Whether to apply bias after linearity.
-            weights (`Weights` or iterator of `Weights`): Weights in
-                fully-connected layer. There are at most two: the matrix
-                and the bias. If weights are not provided, LBANN will
-                initialize them with default settings.
-            name (str): Default name is in the form 'fcmodule<index>'.
-            data_layout (str): Data layout.
-
-        """
-        super().__init__()
-        FullyConnectedModule.global_count += 1
-        self.instance = 0
-        self.size = size
-        self.bias = bias
-        self.name = (name
-                     if name
-                     else 'fcmodule{0}'.format(FullyConnectedModule.global_count))
-        self.data_layout = data_layout
-
-        # Initialize weights
-        # Note: If weights are not provided, matrix weights are
-        # initialized with He normal scheme and bias weights are
-        # initialized with zeros.
-        self.weights = list(_make_iterable(weights))
-        if len(self.weights) > 2:
-            raise ValueError('`FullyConnectedModule` has '
-                             'at most two weights, '
-                             'but got {0}'.format(len(self.weights)))
-        if len(self.weights) == 0:
-            self.weights.append(
-                lp.Weights(initializer=lp.HeNormalInitializer(),
-                           name=self.name+'_matrix'))
-        if len(self.weights) == 1:
-            self.weights.append(
-                lp.Weights(initializer=lp.ConstantInitializer(value=0.0),
-                           name=self.name+'_bias'))
-
-        # Initialize activation layer
-        self.activation = None
-        if activation:
-            if isinstance(activation, type):
-                self.activation = activation
-            else:
-                self.activation = type(activation)
-            if not issubclass(self.activation, lp.Layer):
-                raise ValueError('activation must be a layer')
-
-    def forward(self, x):
-        self.instance += 1
-        name = '{0}_instance{1}'.format(self.name, self.instance)
-        y = lp.FullyConnected(x,
-                              weights=self.weights,
-                              name=(name+'_fc' if self.activation else name),
-                              data_layout=self.data_layout,
-                              num_neurons=self.size,
-                              has_bias=self.bias)
-        if self.activation:
-            return self.activation(y,
-                                   name=name+'_activation',
-                                   data_layout=self.data_layout)
-        else:
-            return y
-
-class ConvolutionNdModule(Module):
-    """Basic block for ND convolutional neural networks.
-
-    Applies a convolution and a nonlinear activation function.
-
-    """
-
-    global_count = 0  # Static counter, used for default names
-
-    def __init__(self, num_dims,
-                 out_channels, kernel_size,
-                 stride=1, padding=0, dilation=1, groups=1, bias=True,
-                 weights=[], activation=None, name=None):
-        """Initialize convolution module.
-
-        Args:
-            num_dims (int): Number of dimensions.
-            out_channels (int): Number of output channels, i.e. number
-                of filters.
-            kernel_size (int): Size of convolution kernel.
-            stride (int): Convolution stride.
-            padding (int): Convolution padding.
-            dilation (int): Convolution dilation.
-            groups (int): Number of convolution groups.
-            bias (bool): Whether to apply channel-wise bias after
-                convolution.
-            weights (`Weights` or iterator of `Weights`): Weights in
-                convolution layer. There are at most two: the kernel
-                and the bias. If weights are not provided, LBANN will
-                initialize them with default settings.
-            name (str): Default name is in the form 'convmodule<index>'.
-
-        """
-        super().__init__()
-        ConvolutionNdModule.global_count += 1
-        self.instance = 0
-        self.num_dims = num_dims
-        self.out_channels = out_channels
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = padding
-        self.dilation = dilation
-        self.groups = groups
-        self.bias = bias
-        self.weights = list(_make_iterable(weights))
-        self.name = (name
-                     if name
-                     else 'convmodule{0}'.format(ConvolutionNdModule.global_count))
-
-        # Initialize weights
-        # Note: If weights are not provided, kernel weights are
-        # initialized with He normal scheme and bias weights are
-        # initialized with zeros.
-        self.weights = list(_make_iterable(weights))
-        if len(self.weights) > 2:
-            raise ValueError('`ConvolutionNdModule` has '
-                             'at most two weights, '
-                             'but got {0}'.format(len(self.weights)))
-        if len(self.weights) == 0:
-            self.weights.append(
-                lp.Weights(initializer=lp.HeNormalInitializer(),
-                           name=self.name+'_kernel'))
-        if len(self.weights) == 1:
-            self.weights.append(
-                lp.Weights(initializer=lp.ConstantInitializer(value=0.0),
-                           name=self.name+'_bias'))
-
-        # Initialize activation layer
-        self.activation = None
-        if activation:
-            if isinstance(activation, type):
-                self.activation = activation
-            else:
-                self.activation = type(activation)
-            if not issubclass(self.activation, lp.Layer):
-                raise ValueError('activation must be a layer')
-
-    def forward(self, x):
-        self.instance += 1
-        name = '{0}_instance{1}'.format(self.name, self.instance)
-        y = lp.Convolution(x,
-                           weights=self.weights,
-                           name=(name+'_conv' if self.activation else name),
-                           num_dims=self.num_dims,
-                           num_output_channels=self.out_channels,
-                           has_vectors=False,
-                           conv_dims_i=self.kernel_size,
-                           conv_pads_i=self.padding,
-                           conv_strides_i=self.stride,
-                           conv_dilations_i=self.dilation,
-                           num_groups=self.groups,
-                           has_bias=self.bias)
-        if self.activation:
-            return self.activation(y, name=name+'_activation')
-        else:
-            return y
-
-class Convolution2dModule(ConvolutionNdModule):
-    """Basic block for 2D convolutional neural networks.
-
-    Applies a convolution and a nonlinear activation function.
-    This is a wrapper class for ConvolutionNdModule.
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(2, *args, **kwargs)
-
-class Convolution3dModule(ConvolutionNdModule):
-    """Basic block for 3D convolutional neural networks.
-
-    Applies a convolution and a nonlinear activation function.
-    This is a wrapper class for ConvolutionNdModule.
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(3, *args, **kwargs)
diff --git a/scripts/proto/lbann/proto.py b/scripts/proto/lbann/proto.py
deleted file mode 100644
index 85472bea296..00000000000
--- a/scripts/proto/lbann/proto.py
+++ /dev/null
@@ -1,572 +0,0 @@
-"""Generate prototexts for LBANN models."""
-
-# Check for Python 3
-import sys
-if sys.version_info[0] != 3:
-    raise ImportError('Python 3 is required')
-
-# Import modules
-import google.protobuf.text_format
-from collections.abc import Iterable
-
-# Import lbann_pb2 module generated by protobuf
-# Note: This should be built automatically during the LBANN build
-# process. If it's not in the default Python search path, try to find
-# it with some heuristics.
-try:
-    import lbann_pb2
-except ImportError:
-    # Not found, try to find and add the build directory for this system.
-    import socket, re, os, os.path
-    _system_name = re.sub(r'\d+', '', socket.gethostname())
-    _script_dir = os.path.dirname(os.path.abspath(__file__))
-    _lbann_dir = os.path.dirname(os.path.dirname(os.path.dirname(_script_dir)))
-    # For now, hardcode GCC, Release/Debug, and .llnl.gov.
-    # TODO: Relax this.
-    _release_dir = os.path.join(_lbann_dir, 'build',
-                                'gnu.Release.' + _system_name + '.llnl.gov')
-    _debug_dir = os.path.join(_lbann_dir, 'build',
-                              'gnu.Debug.' + _system_name + '.llnl.gov')
-    if os.path.isdir(_release_dir):
-        sys.path.append(os.path.join(_release_dir,
-                                     'install', 'share', 'python'))
-        import lbann_pb2
-    elif os.path.isdir(_debug_dir):
-        sys.path.append(os.path.join(_debug_dir,
-                                     'install', 'share', 'python'))
-        import lbann_pb2
-    else:
-        raise  # Give up.
-
-def _add_to_module_namespace(stuff):
-    """Add stuff to the module namespace.
-
-    stuff is a dict, keys will be the name.
-
-    """
-    g = globals()
-    for k, v in stuff.items():
-        g[k] = v
-
-def _make_iterable(obj):
-    """Convert to an iterable object.
-
-    Simply returns `obj` if it is alredy iterable. Otherwise returns a
-    1-tuple containing `obj`.
-
-    """
-    if isinstance(obj, Iterable):
-        return obj
-    else:
-        return (obj,)
-
-# Map protobuf label enums to a string name.
-_proto_label_to_str = {
-    google.protobuf.descriptor.FieldDescriptor.LABEL_OPTIONAL: 'optional',
-    google.protobuf.descriptor.FieldDescriptor.LABEL_REPEATED: 'repeated'
-}
-# Map protobuf type enums to a strong name.
-_proto_type_to_str = {
-    google.protobuf.descriptor.FieldDescriptor.TYPE_BOOL: 'bool',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_BYTES: 'bytes',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_DOUBLE: 'double',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_ENUM: 'enum',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_FIXED32: 'fixed32',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_FIXED64: 'fixed64',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_FLOAT: 'float',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_GROUP: 'group',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_INT32: 'int32',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_INT64: 'int64',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_MESSAGE: 'message',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_SFIXED32: 'sfixed32',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_SFIXED64: 'sfixed64',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_SINT32: 'sint32',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_SINT64: 'sint64',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_STRING: 'string',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_UINT32: 'uint32',
-    google.protobuf.descriptor.FieldDescriptor.TYPE_UINT64: 'uint64'
-}
-
-def _generate_class(base_class, type_name, message_field_name,
-                    base_kwargs=[], type_has_parent=True):
-    """Generate a new class from Protobuf.
-
-    base_class is the class the generated class will inherit from.
-    base_class should have __init__ and export_proto methods. export_proto is
-    only used if type_has_parent is True.
-    type_name is the name of the Protobuf type to generate the class from.
-    message_field_name is the name of the field in the Protobuf message.
-    base_kwargs is a list of (arg, default value) kwargs that will be passed to
-    the base_class's __init__ method instead of being treated as field names.
-    type_has_parent indicates whether this message is nested within a parent
-    message.
-
-    Returns a new class type.
-
-    """
-    # Extract the names of all fields in the type.
-    message_type = getattr(lbann_pb2, type_name)
-    field_names = list(message_type.DESCRIPTOR.fields_by_name.keys())
-
-    # Define the constructor.
-    def __init__(self, *args, **kwargs):
-        # Extract arguments to pass to the base class __init__, accounting for
-        # regular args.
-        init_kwargs = dict(base_kwargs[len(args):])
-        for arg_name, _ in base_kwargs:
-            if arg_name in field_names:
-                raise RuntimeError('Keyword arg {0} matches existing field for {1}. This is a bug!'.format(
-                    arg_name, type_name))
-            if arg_name in kwargs:
-                init_kwargs[arg_name] = kwargs[arg_name]
-                del kwargs[arg_name]
-        base_class.__init__(self, *args, **init_kwargs)
-        # Check and set up fields.
-        for field in kwargs:
-            if field not in field_names:
-                raise ValueError('Unknown argument {0}'.format(field))
-        for field_name in field_names:
-            # Ensure we don't accidentally clobber an existing variable.
-            try:
-                getattr(self, field_name)
-                raise RuntimeError('Field {0} conflicts with already existing field for {1}. This is a bug!'.format(
-                    field_name, type_name))
-            except: pass
-            # Set field values.
-            if field_name in kwargs:
-                setattr(self, field_name, kwargs[field_name])
-            else:
-                setattr(self, field_name, None)
-    # Define the method to export a protobuf message.
-    def export_proto(self):
-        if type_has_parent:
-            proto = base_class.export_proto(self)
-            message = getattr(proto, message_field_name)
-            message.SetInParent()  # Create empty message.
-        else:
-            proto = message_type()
-            message = proto
-        for field_name in field_names:
-            v = getattr(self, field_name)
-            if v is not None:
-                if type(v) is list:  # Repeated field.
-                    getattr(message, field_name).extend(v)
-                else:  # Singular field.
-                    setattr(message, field_name, v)
-        return proto
-    # Define the method to return the names of all fields.
-    def get_field_names(self):
-        return field_names
-    # Define a simple docstring consisting of the available fields.
-    if field_names:
-        doc = 'Fields:\n'
-        for field_name in field_names:
-            doc += '{0} ({1} {2})\n'.format(
-                field_name,
-                _proto_label_to_str.get(
-                    message_type.DESCRIPTOR.fields_by_name[field_name].label,
-                    'unknown'),
-                _proto_type_to_str.get(
-                    message_type.DESCRIPTOR.fields_by_name[field_name].type,
-                    'unknown'))
-    else:
-        doc = 'Fields: n/a\n'
-    # Create the sub-class.
-    return type(type_name, (base_class,),
-                {'__init__': __init__, 'export_proto': export_proto,
-                 '__doc__': doc,
-                 'get_field_names': get_field_names})
-
-def _generate_classes_from_message(base_class, message, skip_fields=None,
-                                   base_kwargs=[], type_has_parent=True):
-    """Generate new classes based on fields in message.
-
-    base_class is the class generated classes will inherit from.
-    message is a Protobuf message type (e.g. lbann_pb2.Layer).
-    skip_fields is a set of field names to not generate classes for.
-    base_kwargs and type_has_parent are passed to _generate_class.
-
-    Classes are automatically added to the namespace.
-
-    """
-    skip_fields = skip_fields or set()
-    generated_classes = {}
-    for field in message.DESCRIPTOR.fields:
-        if field.name not in skip_fields:
-            type_name = field.message_type.name
-            generated_classes[type_name] = _generate_class(
-                base_class, type_name, field.name,
-                base_kwargs=base_kwargs, type_has_parent=type_has_parent)
-    _add_to_module_namespace(generated_classes)
-
-# ==============================================
-# Layers
-# ==============================================
-
-class Layer:
-    """Base class for layers."""
-
-    global_count = 0  # Static counter, used for default names
-
-    def __init__(self, parents, children, weights,
-                 name, data_layout, hint_layer):
-        Layer.global_count += 1
-        self.parents = []
-        self.children = []
-        self.weights = []
-        self.name = name if name else 'layer{0}'.format(Layer.global_count)
-        self.data_layout = data_layout
-        self.hint_layer = hint_layer
-
-        # Initialize parents, children, and weights
-        for l in _make_iterable(parents):
-            self.add_parent(l)
-        for l in _make_iterable(children):
-            self.add_child(child)
-        for w in _make_iterable(weights):
-            self.add_weights(w)
-
-    def export_proto(self):
-        """Construct and return a protobuf message."""
-        proto = lbann_pb2.Layer()
-        proto.parents = ' '.join([l.name for l in self.parents])
-        proto.children = ' '.join([l.name for l in self.children])
-        proto.weights = ' '.join([w.name for w in self.weights])
-        proto.name = self.name
-        proto.data_layout = self.data_layout
-        proto.hint_layer = self.hint_layer.name if self.hint_layer else ''
-        return proto
-
-    def add_parent(self, parent):
-        """This layer will receive an input tensor from `parent`."""
-        for p in _make_iterable(parent):
-            self.parents.append(p)
-            p.children.append(self)
-
-    def add_child(self, child):
-        """"This layer will send an output tensor to `child`."""
-        for c in _make_iterable(child):
-            self.children.append(c)
-            c.parents.append(self)
-
-    def add_weights(self, w):
-        """Add w to this layer's weights."""
-        self.weights.extend(_make_iterable(w))
-
-    def __call__(self, parent):
-        """This layer will recieve an input tensor from `parent`.
-
-        Syntactic sugar around `add_parent` function.
-
-        """
-        self.add_parent(parent)
-
-# Generate Layer sub-classes from lbann.proto
-# Note: The list of skip fields must be updated if any new fields are
-# added to the Layer message in lbann.proto
-_generate_classes_from_message(
-    Layer, lbann_pb2.Layer,
-    skip_fields=set([
-        'name', 'parents', 'children', 'data_layout', 'device_allocation',
-        'weights', 'num_neurons_from_data_reader', 'freeze', 'hint_layer',
-        'weights_data', 'top', 'bottom', 'type', 'motif_layer']),
-    base_kwargs=[('parents', []), ('children', []), ('weights', []),
-                 ('name', None), ('data_layout', 'data_parallel'),
-                 ('hint_layer', None)])
-
-def traverse_layer_graph(layers):
-    """Generator function for a topologically ordered graph traversal.
-
-    `layers` should be a `Layer` or a sequence of `Layer`s. All layers
-    that are connected to `layers` will be traversed.
-
-    The layer graph is assumed to be acyclic. Strange things may
-    happen if this does not hold.
-
-    """
-
-    # DFS to find root nodes in layer graph
-    roots = []
-    visited = set()
-    stack = list(_make_iterable(layers))
-    while stack:
-        l = stack.pop()
-        if l not in visited:
-            visited.add(l)
-            if not l.parents:
-                roots.append(l)
-            else:
-                stack.extend(l.parents)
-
-    # DFS to traverse layer graph in topological order
-    visited = set()
-    stack = roots
-    while stack:
-        l = stack.pop()
-        if (l not in visited
-            and all([(p in visited) for p in l.parents])):
-            visited.add(l)
-            stack.extend(l.children)
-            yield l
-
-# ==============================================
-# Weights and weight initializers
-# ==============================================
-
-# Set up weight initializers.
-class Initializer:
-    """Base class for weight initializers."""
-
-    def __init__(self):
-        pass
-
-    def export_proto(self):
-        """Construct and return a protobuf message."""
-        # Should be overridden in all sub-classes
-        raise NotImplementedError
-
-# Generate Initializer sub-classes from lbann.proto.
-# Note: The list of skip fields must be updated if any new fields are
-# added to the Weights message in lbann.proto
-_generate_classes_from_message(
-    Initializer, lbann_pb2.Weights,
-    skip_fields=set(['name', 'optimizer']),
-    type_has_parent=False)
-
-class Weights:
-    """Trainable model parameters."""
-
-    global_count = 0  # Static counter, used for default names
-
-    def __init__(self, initializer=None, optimizer=None, name=None):
-        Weights.global_count += 1
-        self.name = name if name else 'weights{0}'.format(Weights.global_count)
-        self.initializer = initializer
-        self.optimizer = optimizer
-
-    def export_proto(self):
-        """Construct and return a protobuf message."""
-        proto = lbann_pb2.Weights()
-        proto.name = self.name
-
-        # Set initializer if needed
-        if self.initializer:
-            type_name = type(self.initializer).__name__
-            field_name = None
-            for field in lbann_pb2.Weights.DESCRIPTOR.fields:
-                if field.message_type and field.message_type.name == type_name:
-                    field_name = field.name
-                    break
-            init_message = getattr(proto, field_name)
-            init_message.CopyFrom(self.initializer.export_proto())
-            init_message.SetInParent()
-
-        # TODO: implement
-        if self.optimizer:
-            raise NotImplementedError('Weights cannot handle non-default optimizers')
-
-        return proto
-
-# ==============================================
-# Objective functions
-# ==============================================
-
-# Note: Currently, only layer terms and L2 weight regularization terms
-# are supported in LBANN. If more terms are added, it may be
-# worthwhile to autogenerate sub-classes of ObjectiveFunctionTerm.
-
-class ObjectiveFunctionTerm:
-    """Base class for objective function terms."""
-
-    def __init__(self):
-        pass
-
-    def export_proto(self):
-        """Construct and return a protobuf message."""
-        # Should be overridden in all sub-classes
-        raise NotImplementedError
-
-class LayerTerm(ObjectiveFunctionTerm):
-    """Objective function term that takes value from a layer."""
-
-    def __init__(self, layer, scale=1.0):
-        self.layer = layer
-        self.scale = scale
-
-    def export_proto(self):
-        """Construct and return a protobuf message."""
-        proto = lbann_pb2.LayerTerm()
-        proto.layer = self.layer.name
-        proto.scale_factor = self.scale
-        return proto
-
-class L2WeightRegularization(ObjectiveFunctionTerm):
-    """Objective function term for L2 regularization on weights."""
-
-    def __init__(self, weights=[], scale=1.0):
-        self.scale = scale
-        self.weights = list(_make_iterable(weights))
-
-    def export_proto(self):
-        """Construct and return a protobuf message."""
-        proto = lbann_pb2.L2WeightRegularization()
-        proto.scale_factor = self.scale
-        proto.weights = ' '.join([w.name for w in self.weights])
-        return proto
-
-class ObjectiveFunction:
-    """Objective function for optimization algorithm."""
-
-    def __init__(self, terms=[]):
-        """Create an objective function with layer terms and regularization.
-
-        `terms` should be a sequence of `ObjectiveFunctionTerm`s and
-        `Layer`s.
-
-        """
-        self.terms = []
-        for t in _make_iterable(terms):
-            self.add_term(t)
-
-    def add_term(self, term):
-        """Add a term to the objective function.
-
-        `term` may be a `Layer`, in which case a `LayerTerm` is
-        constructed and added to the objective function.
-
-        """
-        if isinstance(term, Layer):
-            term = LayerTerm(term)
-        self.terms.append(term)
-
-    def export_proto(self):
-        """Construct and return a protobuf message."""
-        proto = lbann_pb2.ObjectiveFunction()
-        for term in self.terms:
-            term_message = term.export_proto()
-            if type(term) is LayerTerm:
-                proto.layer_term.extend([term_message])
-            elif type(term) is L2WeightRegularization:
-                proto.l2_weight_regularization.extend([term_message])
-        return proto
-
-# ==============================================
-# Metrics
-# ==============================================
-
-class Metric:
-    """Metric that takes value from a layer.
-
-    Corresponds to a "layer metric" in LBANN. This may need to be
-    generalized if any other LBANN metrics are implemented.
-
-    """
-
-    def __init__(self, layer, name=None, unit=''):
-        """Initialize a metric based of off layer."""
-        self.layer = layer
-        self.name = name if name else self.layer.name
-        self.unit = unit
-
-    def export_proto(self):
-        """Construct and return a protobuf message."""
-        proto = lbann_pb2.Metric()
-        proto.layer_metric.layer = self.layer.name
-        proto.layer_metric.name = self.name
-        proto.layer_metric.unit = self.unit
-        return proto
-
-# ==============================================
-# Callbacks
-# ==============================================
-
-class Callback:
-    """Base class for callbacks."""
-
-    def __init__(self):
-        pass
-
-    def export_proto(self):
-        """Construct and return a protobuf message."""
-        return lbann_pb2.Callback()
-
-# Generate Callback sub-classes from lbann.proto
-# Note: The list of skip fields must be updated if any new fields are
-# added to the Callback message in lbann.proto
-_generate_classes_from_message(Callback, lbann_pb2.Callback)
-
-# ==============================================
-# Model
-# ==============================================
-
-class Model:
-    """Base class for models."""
-
-    def __init__(self, mini_batch_size, epochs,
-                 layers, weights=[], objective_function=None,
-                 metrics=[], callbacks=[]):
-        self.mini_batch_size = mini_batch_size
-        self.epochs = epochs
-        self.layers = layers
-        self.weights = weights
-        self.objective_function = objective_function
-        self.metrics = metrics
-        self.callbacks = callbacks
-
-    def export_proto(self):
-        """Construct and return a protobuf message."""
-        # Initialize protobuf message
-        model = lbann_pb2.Model()
-        model.mini_batch_size = self.mini_batch_size
-        model.num_epochs = self.epochs
-        model.block_size = 256           # TODO: Make configurable.
-        model.num_parallel_readers = 0   # TODO: Make configurable
-        model.procs_per_trainer = 0      # TODO: Make configurable
-
-        # Add layers
-        layers = list(traverse_layer_graph(self.layers))
-        model.layer.extend([l.export_proto() for l in layers])
-
-        # Add weights
-        weights = set(self.weights)
-        for l in layers:
-            weights.update(l.weights)
-        model.weights.extend([w.export_proto() for w in weights])
-
-        # Add objective function
-        objective_function = self.objective_function \
-            if self.objective_function else ObjectiveFunction()
-        model.objective_function.CopyFrom(objective_function.export_proto())
-
-        # Add metrics and callbacks
-        model.metric.extend([m.export_proto() for m in self.metrics])
-        model.callback.extend([c.export_proto() for c in self.callbacks])
-
-        return model
-
-# ==============================================
-# Export models
-# ==============================================
-
-def save_model(filename, *args, **kwargs):
-    """Create a model and save to a file.
-    This function delegates all the arguments to `lp.Model` except
-    for `filename`.
-    """
-
-    save_prototext(filename,
-                   model=Model(*args, **kwargs).export_proto())
-
-def save_prototext(filename, **kwargs):
-    """Save a prototext.
-    This function accepts the LbannPB objects via `kwargs`, such as
-    `model`, `data_reader`, and `optimizer`.
-    """
-
-    # Initialize protobuf message
-    pb = lbann_pb2.LbannPB(**kwargs)
-
-    # Write to file
-    with open(filename, 'wb') as f:
-        f.write(google.protobuf.text_format.MessageToString(
-            pb, use_index_order=True).encode())
diff --git a/scripts/proto/setup.py b/scripts/proto/setup.py
deleted file mode 100755
index 621cc107441..00000000000
--- a/scripts/proto/setup.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env python3
-
-from setuptools import setup
-import subprocess
-import re
-
-def getLBANNVersion():
-    try:
-        latestTag = subprocess.check_output(["git", "describe", "--abbrev=0"]).decode("utf-8")
-    except (subprocess.CalledProcessError, FileNotFoundError):
-        return None
-
-    m = re.compile("v(\d+.\d+.\d+)").match(latestTag)
-    if m:
-        return m.group(1)
-
-    return None
-
-setup(
-    name="lbann",
-    description="Python wrapper for writing and generating LBANN model prototext files",
-    version=getLBANNVersion(),
-    url="https://github.com/LLNL/lbann",
-    author="Lawrence Livermore National Security, LLC.",
-    license="Apache 2.0",
-    packages=["lbann"],
-    scripts=["scripts/plot/lbplot"],
-    install_requires=["protobuf>=3.6.1",
-                      "onnx>=1.3.0",
-                      "numpy>=1.16.0",
-                      "matplotlib>=2.0.2",
-                      "texttable==1.4.0",
-                      "nose>=1.3.7"],
-    test_suite="nose.collector",
-    tests_require=["nose"],
-)
diff --git a/scripts/viz.py b/scripts/viz.py
new file mode 100755
index 00000000000..95bc0899f9d
--- /dev/null
+++ b/scripts/viz.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+import argparse
+import google.protobuf.text_format as txtf
+from lbann.proto import lbann_pb2
+import lbann.viz
+
+# Parse command-line arguments
+parser = argparse.ArgumentParser(
+    description='Visualize layer graph for LBANN model.')
+parser.add_argument(
+    'input', action='store', type=str,
+    help='model prototext file')
+parser.add_argument(
+    'output', action='store', nargs='?',
+    default='graph.pdf', type=str,
+    help='output file (default: graph.pdf)')
+parser.add_argument(
+    '--file-format', action='store', default='pdf', type=str,
+    help='output file format (default: pdf)', metavar='FORMAT')
+parser.add_argument(
+    '--label-format',
+    action='store', default='type-only', type=str,
+    choices=('type-only', 'name-only', 'type-and-name', 'full'),
+    help='displayed layer info (default: type-only)')
+parser.add_argument(
+    '--graphviz-engine', action='store', default='dot', type=str,
+    help='Graphviz visualization scheme (default: dot)', metavar='ENGINE')
+args = parser.parse_args()
+
+# Parse prototext file
+proto = lbann_pb2.LbannPB()
+with open(args.input, 'r') as f:
+    txtf.Merge(f.read(), proto)
+
+# Visualize
+lbann.viz.visualize_layer_graph(proto.model, args.output,
+                                file_format=args.file_format,
+                                label_format=args.label_format,
+                                graphviz_engine=args.graphviz_engine)
diff --git a/spack_environments/developer_release_osx_spack.yaml b/spack_environments/developer_release_osx_spack.yaml
new file mode 100644
index 00000000000..8b1bee0296c
--- /dev/null
+++ b/spack_environments/developer_release_osx_spack.yaml
@@ -0,0 +1,76 @@
+# This is a Spack Environment file.
+#
+# It describes a set of packages to be installed, along with
+# configuration settings.
+spack:
+  # add package specs to the `specs` list
+  specs:
+  - protobuf@3.6.1 build_type=Release +shared
+  - conduit@master~doc~doxygen+hdf5~hdf5_compat+mpi+python+shared~silo
+  - cnpy@master build_type=RelWithDebInfo
+  - opencv@3.4.3 build_type=RelWithDebInfo ~calib3d+core~cuda~dnn~eigen+fast-math~features2d~flann~gtk+highgui+imgproc~ipp~ipp_iw~jasper~java+jpeg~lapack~ml~opencl~opencl_svm~openclamdblas~openclamdfft~openmp+png+powerpc~pthreads_pf~python~qt+shared~stitching~superres+tiff~ts~video~videoio~videostab+vsx~vtk+zlib
+  - cereal@1.2.2 build_type=RelWithDebInfo patches=2dfa0bff9816d0ebd8a1bcc70ced4483b3cda83a982ea5027f1aaadceaa15aac,720265382f29b744488d67e8df5000f2ca1b4dceb2018835fb5dc7a3a1c23f75,91f968e9ac3964e1a689a9ad379ab16f7803ac3d34d24f87ebcaecaa3f9a2f16
+  - ninja@1.8.2
+  - zlib@1.2.11
+  - openblas@0.3.4 cpu_target=auto ~ilp64+pic+shared threads=none ~virtual_machine
+  - hwloc@2.0.2
+  - cmake@3.12.1
+  - py-cython@0.29
+  - py-breathe
+  - py-m2r
+  - py-sphinx
+  - py-certifi
+  - py-urllib3
+  - py-idna
+  - py-chardet
+  - doxygen
+  mirrors: {}
+  modules:
+    enable: []
+  repos: []
+  config: {}
+################################################################################
+# Include paths to standard compilers and packages on LLNL LC systems
+# Remove and/or replace these with your site specific packages and paths
+################################################################################
+# include:
+#   - externals_llnl_lc_cz.yaml
+  packages:
+    all:
+      providers:
+        mpi: [openmpi@4.0 arch=darwin-highsierra-x86_64]
+      buildable: true
+      version: []
+      paths: {}
+      modules: {}
+      compiler: [clang@7.0.1 arch=darwin-highsierra-x86_64]
+
+    cmake:
+      variants: ~openssl ~ncurses
+      paths:
+        cmake@3.14.0 arch=darwin-highsierra--x86_64:  /usr/local/
+    python:
+      buildable: True
+      variants: +shared
+      version: [3.7.2]
+
+    openmpi:
+      buildable: False
+      version: [4.0]
+      paths:
+        openmpi@4.0 arch=darwin-highsierra-x86_64: /usr/local/
+
+  compilers:
+  - compiler:
+      environment: {}
+      extra_rpaths: []
+      flags: {}
+      modules: []
+      operating_system: highsierra
+      paths:
+        cc: /usr/local/Cellar/llvm/7.0.1/bin/clang
+        cxx: /usr/local/Cellar/llvm/7.0.1/bin/clang++
+        f77: /usr/local/bin/gfortran
+        fc: /usr/local/bin/gfortran
+      spec: clang@7.0.1
+      target: x86_64
diff --git a/spack_environments/developer_release_ppc64le_cuda_spack.yaml b/spack_environments/developer_release_ppc64le_cuda_spack.yaml
index d912909fc94..5326a77bbd3 100644
--- a/spack_environments/developer_release_ppc64le_cuda_spack.yaml
+++ b/spack_environments/developer_release_ppc64le_cuda_spack.yaml
@@ -2,24 +2,41 @@
 #
 # It describes a set of packages to be installed, along with
 # configuration settings.
+
+################################################################################
+# Notes on building a environment file:
+# 1) Packages that should be explicitly installed to satisfy dependencies
+#    for LBANN, Hydrogen, or Aluminum are in the specs list.
+# 2) Packages and their variants that are secondary dependencies of
+#    the primary dependencies should be in the packages list.  This
+#    ensures that all primary dependenies build with a consistent set
+#    of secondary (and tertiary, ...) dependencies.
+################################################################################
+
 spack:
   # add package specs to the `specs` list
   specs:
-  - protobuf@3.6.1 build_type=Release +shared
-  - conduit@master~doc~doxygen+hdf5+mpi+python+shared~silo
+  - conduit@master~doc~doxygen+hdf5~hdf5_compat+mpi+python+shared~silo
   - cnpy@master build_type=RelWithDebInfo
-  - opencv@3.4.3 build_type=RelWithDebInfo ~calib3d+core~cuda~dnn~eigen+fast-math~features2d~flann~gtk+highgui+imgproc~ipp~ipp_iw~jasper~java+jpeg~lapack~ml~opencl~opencl_svm~openclamdblas~openclamdfft~openmp+png+powerpc~pthreads_pf~python~qt+shared~stitching~superres+tiff~ts~video~videoio~videostab+vsx~vtk+zlib
-  - cereal@1.2.2 build_type=RelWithDebInfo patches=2dfa0bff9816d0ebd8a1bcc70ced4483b3cda83a982ea5027f1aaadceaa15aac,720265382f29b744488d67e8df5000f2ca1b4dceb2018835fb5dc7a3a1c23f75,91f968e9ac3964e1a689a9ad379ab16f7803ac3d34d24f87ebcaecaa3f9a2f16
-  - ninja@1.8.2
-  - zlib@1.2.11
-  - openblas@0.3.4 cpu_target=auto ~ilp64+pic+shared threads=none ~virtual_machine
-  - cuda@9.2.88
-  - cudnn@7.4.2
-  - cub@1.7.1
-  - nccl@2.3.7-1
-  - hwloc@2.0.2
-  - cmake@3.12.1
-  - py-cython@0.29
+  - opencv build_type=RelWithDebInfo ~calib3d+core~cuda~dnn~eigen+fast-math~features2d~flann~gtk+highgui+imgproc~ipp~ipp_iw~jasper~java+jpeg~lapack~ml~opencl~opencl_svm~openclamdblas~openclamdfft~openmp+png+powerpc~pthreads_pf~python~qt+shared~stitching~superres+tiff~ts~video~videoio~videostab+vsx~vtk+zlib
+  - cereal
+  - ninja
+  - zlib
+  - cmake
+  - cudnn@7.5.1-10.1-ppc64le
+  - cub
+  - nccl
+  - hwloc
+  - py-argparse
+  - py-configparser
+  - py-cython
+  - py-graphviz
+  - py-matplotlib
+  - py-onnx
+  - py-pandas
+  - py-protobuf+cpp
+  - py-setuptools
+  - py-texttable
   mirrors: {}
   modules:
     enable: []
@@ -28,78 +45,8 @@ spack:
 ################################################################################
 # Include paths to standard compilers and packages on LLNL LC systems
 # Remove and/or replace these with your site specific packages and paths
-################################################################################ 
-#  include:
-#    - externals_llnl_lc_cz.yaml
-  packages:
-    all:
-      providers:
-        mpi: [spectrum-mpi@rolling-release arch=linux-rhel7-ppc64le, mvapich2@2.3 arch=linux-rhel7-x86_64]
-      buildable: true
-      version: []
-      paths: {}
-      modules: {}
-      compiler: [gcc@7.3.0 arch=linux-rhel7-x86_64, gcc@7.3.1 arch=linux-rhel7-ppc64le]
-      
-    cmake:
-      variants: ~openssl ~ncurses
-      paths:
-        cmake@3.12.1 arch=linux-rhel7-x86_64:  /usr/tce/packages/cmake/cmake-3.12.1
-
-    mvapich2:
-      buildable: True
-      version: [2.3]
-      paths:
-        mvapich2@2.3%gcc@7.3.0 arch=linux-rhel7-x86_64: /usr/tce/packages/mvapich2/mvapich2-2.3-gcc-7.3.0/
-
-    hwloc:
-      buildable: False
-      version: [2.0.2]
-      paths:
-        hwloc@2.0.2 arch=linux-rhel7-x86_64: /usr/lib64/libhwloc.so
-
-    cuda:
-      buildable: False
-      version: [9.2.88]
-      paths:
-        cuda@9.2.88 arch=linux-rhel7-ppc64le: /usr/tce/packages/cuda/cuda-9.2.88/
-
-    cudnn:
-      buildable: False
-      version: [7.4.2]
-      paths:
-        cudnn@7.4.2 arch=linux-rhel7-ppc64le: /usr/workspace/wsb/brain/cudnn/cudnn-7.4.2/cuda-9.2_ppc64le
-
-    spectrum-mpi:
-      buildable: False
-      version: [rolling-release]
-      paths:
-        spectrum-mpi@rolling-release %gcc@7.3.1 arch=linux-rhel7-ppc64le: /usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-gcc-7.3.1
-          
-  compilers:
-  - compiler:
-      environment: {}
-      extra_rpaths: []
-      flags: {}
-      modules: []
-      operating_system: rhel7
-      paths:
-        cc: /usr/tce/packages/gcc/gcc-7.3.0/bin/gcc
-        cxx: /usr/tce/packages/gcc/gcc-7.3.0/bin/g++
-        f77: /usr/tce/packages/gcc/gcc-7.3.0/bin/gfortran
-        fc: /usr/tce/packages/gcc/gcc-7.3.0/bin/gfortran
-      spec: gcc@7.3.0
-      target: x86_64
-  - compiler:
-      environment: {}
-      extra_rpaths: []
-      flags: {}
-      modules: []
-      operating_system: rhel7
-      paths:
-        cc: /usr/tce/packages/gcc/gcc-7.3.1/bin/gcc
-        cxx: /usr/tce/packages/gcc/gcc-7.3.1/bin/g++
-        f77: /usr/tce/packages/gcc/gcc-7.3.1/bin/gfortran
-        fc: /usr/tce/packages/gcc/gcc-7.3.1/bin/gfortran
-      spec: gcc@7.3.1
-      target: ppc64le
+# Note that the include files are expected to be local to this yaml file
+################################################################################
+  include:
+    - std_versions_and_variants_llnl_lc_cz.yaml
+    - externals_ppc64le_llnl_lc_cz.yaml
diff --git a/spack_environments/developer_release_x86_64_cuda_spack.yaml b/spack_environments/developer_release_x86_64_cuda_spack.yaml
index bd91fde4b2e..6349b29ab09 100644
--- a/spack_environments/developer_release_x86_64_cuda_spack.yaml
+++ b/spack_environments/developer_release_x86_64_cuda_spack.yaml
@@ -2,24 +2,41 @@
 #
 # It describes a set of packages to be installed, along with
 # configuration settings.
+
+################################################################################
+# Notes on building a environment file:
+# 1) Packages that should be explicitly installed to satisfy dependencies
+#    for LBANN, Hydrogen, or Aluminum are in the specs list.
+# 2) Packages and their variants that are secondary dependencies of
+#    the primary dependencies should be in the packages list.  This
+#    ensures that all primary dependenies build with a consistent set
+#    of secondary (and tertiary, ...) dependencies.
+################################################################################
+
 spack:
   # add package specs to the `specs` list
   specs:
-  - protobuf@3.6.1 build_type=Release +shared
-  - conduit@master~doc~doxygen+hdf5+mpi+python+shared~silo
+  - conduit@master~doc~doxygen+hdf5~hdf5_compat+mpi+python+shared~silo
   - cnpy@master build_type=RelWithDebInfo
-  - opencv@3.4.3 build_type=RelWithDebInfo ~calib3d+core~cuda~dnn~eigen+fast-math~features2d~flann~gtk+highgui+imgproc~ipp~ipp_iw~jasper~java+jpeg~lapack~ml~opencl~opencl_svm~openclamdblas~openclamdfft~openmp+png+powerpc~pthreads_pf~python~qt+shared~stitching~superres+tiff~ts~video~videoio~videostab+vsx~vtk+zlib
-  - cereal@1.2.2 build_type=RelWithDebInfo patches=2dfa0bff9816d0ebd8a1bcc70ced4483b3cda83a982ea5027f1aaadceaa15aac,720265382f29b744488d67e8df5000f2ca1b4dceb2018835fb5dc7a3a1c23f75,91f968e9ac3964e1a689a9ad379ab16f7803ac3d34d24f87ebcaecaa3f9a2f16
-  - ninja@1.8.2
-  - zlib@1.2.11
-  - openblas@0.3.4 cpu_target=auto ~ilp64+pic+shared threads=none ~virtual_machine
-  - cuda@10.0.130
-  - cudnn@7.4.2
-  - cub@1.7.1
-  - nccl@2.3.7-1
-  - hwloc@2.0.2
-  - cmake@3.12.1
-  - py-cython@0.29
+  - opencv build_type=RelWithDebInfo ~calib3d+core~cuda~dnn~eigen+fast-math~features2d~flann~gtk+highgui+imgproc~ipp~ipp_iw~jasper~java+jpeg~lapack~ml~opencl~opencl_svm~openclamdblas~openclamdfft~openmp+png~powerpc~pthreads_pf~python~qt+shared~stitching~superres+tiff~ts~video~videoio~videostab~vsx~vtk+zlib
+  - cereal
+  - ninja
+  - zlib
+  - cmake
+  - cudnn@7.5.1-10.0-x86_64
+  - cub
+  - nccl
+  - hwloc
+  - py-argparse
+  - py-configparser
+  - py-cython
+  - py-graphviz
+  - py-matplotlib
+  - py-onnx
+  - py-pandas
+  - py-protobuf+cpp
+  - py-setuptools
+  - py-texttable
   mirrors: {}
   modules:
     enable: []
@@ -28,79 +45,8 @@ spack:
 ################################################################################
 # Include paths to standard compilers and packages on LLNL LC systems
 # Remove and/or replace these with your site specific packages and paths
+# Note that the include files are expected to be local to this yaml file
 ################################################################################
-# include:
-#   - externals_llnl_lc_cz.yaml
-  packages:
-    all:
-      providers:
-        mpi: [mvapich2@2.3 arch=linux-rhel7-x86_64, spectrum-mpi@rolling-release arch=linux-rhel7-ppc64le]
-      buildable: true
-      version: []
-      paths: {}
-      modules: {}
-      compiler: [gcc@7.3.0 arch=linux-rhel7-x86_64, gcc@7.3.1 arch=linux-rhel7-ppc64le]
-      
-    cmake:
-      variants: ~openssl ~ncurses
-      paths:
-        cmake@3.12.1 arch=linux-rhel7-x86_64:  /usr/tce/packages/cmake/cmake-3.12.1
-
-    mvapich2:
-      buildable: True
-      version: [2.3]
-      paths:
-        mvapich2@2.3%gcc@7.3.0 arch=linux-rhel7-x86_64: /usr/tce/packages/mvapich2/mvapich2-2.3-gcc-7.3.0/
-
-    hwloc:
-      buildable: False
-      version: [2.0.2]
-      paths:
-        hwloc@2.0.2 arch=linux-rhel7-x86_64: /usr/lib64/libhwloc.so
-
-    cuda:
-      buildable: False
-      version: [9.2.88, 10.0.130]
-      paths:
-        cuda@10.0.130 arch=linux-rhel7-x86_64: /usr/tce/packages/cuda/cuda-10.0.130
-        cuda@9.2.88 arch=linux-rhel7-ppc64le: /usr/tce/packages/cuda/cuda-9.2.88/
-
-    cudnn:
-      buildable: False
-      version: [7.4.2]
-      paths:
-        cudnn@7.4.2 arch=linux-rhel7-x86_64: /usr/workspace/wsb/brain/cudnn/cudnn-7.4.2/cuda-10.0_x86_64
-
-    spectrum-mpi:
-      buildable: False
-      version: [rolling-release]
-      paths:
-        spectrum-mpi@rolling-release %gcc@7.3.1 arch=linux-rhel7-ppc64le: /usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-gcc-7.3.1
-          
-  compilers:
-  - compiler:
-      environment: {}
-      extra_rpaths: []
-      flags: {}
-      modules: []
-      operating_system: rhel7
-      paths:
-        cc: /usr/tce/packages/gcc/gcc-7.3.0/bin/gcc
-        cxx: /usr/tce/packages/gcc/gcc-7.3.0/bin/g++
-        f77: /usr/tce/packages/gcc/gcc-7.3.0/bin/gfortran
-        fc: /usr/tce/packages/gcc/gcc-7.3.0/bin/gfortran
-      spec: gcc@7.3.0
-      target: x86_64
-  - compiler:
-      environment: {}
-      extra_rpaths: []
-      flags: {}
-      modules: []
-      operating_system: rhel7
-      paths:
-        cc: /usr/tce/packages/gcc/gcc-7.3.1/bin/gcc
-        cxx: /usr/tce/packages/gcc/gcc-7.3.1/bin/g++
-        f77: /usr/tce/packages/gcc/gcc-7.3.1/bin/gfortran
-        fc: /usr/tce/packages/gcc/gcc-7.3.1/bin/gfortran
-      spec: gcc@7.3.1
-      target: ppc64le
+  include:
+    - std_versions_and_variants_llnl_lc_cz.yaml
+    - externals_x86_64_llnl_lc_cz.yaml
diff --git a/spack_environments/externals_llnl_lc_cz.yaml b/spack_environments/externals_llnl_lc_cz.yaml
deleted file mode 100644
index 14ed4837809..00000000000
--- a/spack_environments/externals_llnl_lc_cz.yaml
+++ /dev/null
@@ -1,81 +0,0 @@
-packages:
-  all:
-    providers:
-      mpi: [mvapich2@2.3 arch=linux-rhel7-x86_64, spectrum-mpi@rolling-release arch=linux-rhel7-ppc64le]
-    buildable: true
-    version: []
-    paths: {}
-    modules: {}
-    compiler: [gcc@7.3.0 arch=linux-rhel7-x86_64, gcc@7.3.1 arch=linux-rhel7-ppc64le]
-
-  cmake:
-    variants: ~openssl ~ncurses
-    paths:
-      cmake@3.12.1 arch=linux-rhel7-x86_64:  /usr/tce/packages/cmake/cmake-3.12.1
-
-  mvapich2:
-    buildable: True
-    version: [2.3]
-    paths:
-      mvapich2@2.3%gcc@7.3.0 arch=linux-rhel7-x86_64: /usr/tce/packages/mvapich2/mvapich2-2.3-gcc-7.3.0/
-
-  hwloc:
-    buildable: False
-    version: [2.0.2]
-    paths:
-      hwloc@2.0.2 arch=linux-rhel7-x86_64: /usr/lib64/libhwloc.so
-
-  cuda:
-    buildable: False
-    version: [9.2.88, 10.0.130]
-    paths:
-      cuda@10.0.130 arch=linux-rhel7-x86_64: /usr/tce/packages/cuda/cuda-10.0.130
-      cuda@9.2.88 arch=linux-rhel7-ppc64le: /usr/tce/packages/cuda/cuda-9.2.88/
-
-  cudnn:
-    buildable: False
-    version: [7.4.2]
-    paths:
-      cudnn@7.4.2 arch=linux-rhel7-x86_64: /usr/workspace/wsb/brain/cudnn/cudnn-7.4.2/cuda-10.0_x86_64
-      cudnn@7.4.2 arch=linux-rhel7-ppc64le: /usr/workspace/wsb/brain/cudnn/cudnn-7.4.2/cuda-9.2_ppc64le
-
-  nccl:
-    buildable: False
-    version: [2.3]
-    paths:
-      nccl@2.3 arch=linux-rhel7-x86_64: /usr/workspace/wsb/brain/nccl2/nccl_2.3.7-1+cuda10.0_x86_64
-      nccl@2.3 arch=linux-rhel7-ppc64le: /usr/workspace/wsb/brain/nccl2/nccl_2.2.13-1+cuda9.2_ppc64le
-
-  spectrum-mpi:
-    buildable: False
-    version: [rolling-release]
-    paths:
-      spectrum-mpi@rolling-release %gcc@7.3.1 arch=linux-rhel7-ppc64le: /usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-gcc-7.3.1
-
-compilers:
-- compiler:
-    environment: {}
-    extra_rpaths: []
-    flags: {}
-    modules: []
-    operating_system: rhel7
-    paths:
-      cc: /usr/tce/packages/gcc/gcc-7.3.0/bin/gcc
-      cxx: /usr/tce/packages/gcc/gcc-7.3.0/bin/g++
-      f77: /usr/tce/packages/gcc/gcc-7.3.0/bin/gfortran
-      fc: /usr/tce/packages/gcc/gcc-7.3.0/bin/gfortran
-    spec: gcc@7.3.0
-    target: x86_64
-- compiler:
-    environment: {}
-    extra_rpaths: []
-    flags: {}
-    modules: []
-    operating_system: rhel7
-    paths:
-      cc: /usr/tce/packages/gcc/gcc-7.3.1/bin/gcc
-      cxx: /usr/tce/packages/gcc/gcc-7.3.1/bin/g++
-      f77: /usr/tce/packages/gcc/gcc-7.3.1/bin/gfortran
-      fc: /usr/tce/packages/gcc/gcc-7.3.1/bin/gfortran
-    spec: gcc@7.3.1
-    target: ppc64le
diff --git a/spack_environments/externals_ppc64le_llnl_lc_cz.yaml b/spack_environments/externals_ppc64le_llnl_lc_cz.yaml
new file mode 100644
index 00000000000..c3b297e0062
--- /dev/null
+++ b/spack_environments/externals_ppc64le_llnl_lc_cz.yaml
@@ -0,0 +1,68 @@
+  packages:
+    all:
+      providers:
+        mpi: [spectrum-mpi@rolling-release arch=linux-rhel7-ppc64le]
+        lapack: [openblas threads=openmp]
+        blas: [openblas threasd=openmp]
+      buildable: true
+      version: []
+      paths: {}
+      modules: {}
+      compiler: [gcc@7.3.1 arch=linux-rhel7-ppc64le]
+
+    cmake::
+      variants: ~openssl ~ncurses
+      version: [3.12.1]
+      paths:
+        cmake@3.12.1 arch=linux-rhel7-ppc64le: /usr/tce/packages/cmake/cmake-3.12.1
+
+    cuda::
+      buildable: False
+      version: [9.2.88, 10.1.105]
+      paths:
+        cuda@9.2.88 arch=linux-rhel7-ppc64le: /usr/tce/packages/cuda/cuda-9.2.88/
+        cuda@10.1.105 arch=linux-rhel7-ppc64le: /usr/tce/packages/cuda/cuda-10.1.105
+
+    cudnn::
+      buildable: true
+      version: [7.4.2. 7.5.1, 7.5.1-10.1-ppc64le]
+      paths:
+        cudnn@7.5.1 arch=linux-rhel7-ppc64le: /usr/workspace/wsb/brain/cudnn/cudnn-7.5.1/cuda-10.1_ppc64le/
+        cudnn@7.4.2 arch=linux-rhel7-ppc64le: /usr/workspace/wsb/brain/cudnn/cudnn-7.4.2/cuda-9.2_ppc64le
+
+    hwloc::
+      buildable: False
+      version: [2.0.2]
+      paths:
+        hwloc@2.0.2 arch=linux-rhel7-ppc64le: /usr/lib64/libhwloc.so
+
+    openblas::
+      buildable: True
+      variants: threads=openmp ~avx2 ~avx512
+      version: [0.3.6]
+
+    opencv::
+      buildable: true
+      variants: +powerpc +vsx
+      version: [4.1.0]
+
+    spectrum-mpi::
+      buildable: False
+      version: [rolling-release]
+      paths:
+        spectrum-mpi@rolling-release %gcc@7.3.1 arch=linux-rhel7-ppc64le: /usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-gcc-7.3.1
+
+  compilers:
+  - compiler:
+      environment: {}
+      extra_rpaths: []
+      flags: {}
+      modules: []
+      operating_system: rhel7
+      paths:
+        cc: /usr/tce/packages/gcc/gcc-7.3.1/bin/gcc
+        cxx: /usr/tce/packages/gcc/gcc-7.3.1/bin/g++
+        f77: /usr/tce/packages/gcc/gcc-7.3.1/bin/gfortran
+        fc: /usr/tce/packages/gcc/gcc-7.3.1/bin/gfortran
+      spec: gcc@7.3.1
+      target: ppc64le
diff --git a/spack_environments/externals_x86_64_llnl_lc_cz.yaml b/spack_environments/externals_x86_64_llnl_lc_cz.yaml
new file mode 100644
index 00000000000..00cb62e1bf1
--- /dev/null
+++ b/spack_environments/externals_x86_64_llnl_lc_cz.yaml
@@ -0,0 +1,63 @@
+  packages:
+    all:
+      providers:
+        mpi: [mvapich2@2.3 arch=linux-rhel7-x86_64]
+        lapack: [openblas threads=openmp]
+        blas: [openblas threasd=openmp]
+      buildable: true
+      version: []
+      paths: {}
+      modules: {}
+      compiler: [gcc@7.3.0 arch=linux-rhel7-x86_64]
+
+    cmake::
+      variants: ~openssl ~ncurses
+      version: [3.12.1]
+      paths:
+        cmake@3.12.1 arch=linux-rhel7-x86_64:  /usr/tce/packages/cmake/cmake-3.12.1
+
+    cuda::
+      buildable: False
+      version: [10.0.130]
+      paths:
+        cuda@10.0.130 arch=linux-rhel7-x86_64: /usr/tce/packages/cuda/cuda-10.0.130
+
+    cudnn::
+      buildable: true
+      version: [7.5.1-10.0-x86_64]
+
+    hwloc::
+      buildable: False
+      version: [2.0.2]
+      paths:
+        hwloc@2.0.2 arch=linux-rhel7-x86_64: /usr/lib64/libhwloc.so
+
+    mvapich2::
+      buildable: True
+      version: [2.3]
+      paths:
+        mvapich2@2.3%gcc@7.3.0 arch=linux-rhel7-x86_64: /usr/tce/packages/mvapich2/mvapich2-2.3-gcc-7.3.0/
+
+    openblas::
+      buildable: True
+      variants: threads=openmp
+      version: [0.3.6]
+
+    opencv::
+      buildable: true
+      version: [4.1.0]
+
+  compilers:
+  - compiler:
+      environment: {}
+      extra_rpaths: []
+      flags: {}
+      modules: []
+      operating_system: rhel7
+      paths:
+        cc: /usr/tce/packages/gcc/gcc-7.3.0/bin/gcc
+        cxx: /usr/tce/packages/gcc/gcc-7.3.0/bin/g++
+        f77: /usr/tce/packages/gcc/gcc-7.3.0/bin/gfortran
+        fc: /usr/tce/packages/gcc/gcc-7.3.0/bin/gfortran
+      spec: gcc@7.3.0
+      target: x86_64
diff --git a/spack_environments/std_versions_and_variants_llnl_lc_cz.yaml b/spack_environments/std_versions_and_variants_llnl_lc_cz.yaml
new file mode 100644
index 00000000000..64604c430d1
--- /dev/null
+++ b/spack_environments/std_versions_and_variants_llnl_lc_cz.yaml
@@ -0,0 +1,45 @@
+  packages:
+    all:
+      providers: {}
+      buildable: true
+      version: []
+      paths: {}
+      modules: {}
+      compiler: []
+
+    cereal::
+      buildable: true
+      version: [1.2.2]
+
+    cub::
+      buildable: true
+      version: [1.7.1]
+
+    nccl::
+      buildable: true
+      version: [2.4.6-1]
+
+    protobuf::
+      buildable: True
+      variants: build_type=Release +shared
+      version: [3.7.1]
+
+    python::
+      buildable: True
+      version: [3.7.2]
+
+    py-cython::
+      buildable: True
+      version: [0.29]
+
+    py-matplotlib::
+      buildable: True
+      variants: ~tk ~image
+
+    py-numpy::
+      buildable: True
+      version: [1.16.2]
+
+    zlib::
+      buildable: True
+      version: [1.2.11]
diff --git a/spack_environments/users/llnl_lc/ppc64le_cuda/spack.yaml b/spack_environments/users/llnl_lc/ppc64le_cuda/spack.yaml
new file mode 100644
index 00000000000..edcf3ed1367
--- /dev/null
+++ b/spack_environments/users/llnl_lc/ppc64le_cuda/spack.yaml
@@ -0,0 +1,36 @@
+# This is a Spack Environment file.
+#
+# It describes a set of packages to be installed, along with
+# configuration settings.
+spack:
+  # add package specs to the `specs` list
+  specs:
+  - lbann@develop+docs+gpu+nccl
+  mirrors: {}
+  modules:
+    enable: []
+  repos: []
+  config: {}
+################################################################################
+# Include paths to standard compilers and packages on LLNL LC systems
+# Remove and/or replace these with your site specific packages and paths
+################################################################################
+  include:
+  - ../../../std_versions_and_variants_llnl_lc_cz.yaml
+  - ../../../externals_ppc64le_llnl_lc_cz.yaml
+  packages:
+    aluminum:
+      buildable: true
+      version: [master]
+      providers: {}
+      paths: {}
+      modules: {}
+      compiler: []
+    hydrogen:
+      buildable: true
+      version: [develop]
+      providers: {}
+      paths: {}
+      modules: {}
+      compiler: []
+  upstreams: {}
diff --git a/spack_environments/users/llnl_lc/x86_64_cuda/spack.yaml b/spack_environments/users/llnl_lc/x86_64_cuda/spack.yaml
new file mode 100644
index 00000000000..0240a6fa787
--- /dev/null
+++ b/spack_environments/users/llnl_lc/x86_64_cuda/spack.yaml
@@ -0,0 +1,37 @@
+# This is a Spack Environment file.
+#
+# It describes a set of packages to be installed, along with
+# configuration settings.
+
+spack:
+  # add package specs to the `specs` list
+  specs:
+  - lbann@develop+docs+gpu+nccl
+  mirrors: {}
+  modules:
+    enable: []
+  repos: []
+  config: {}
+################################################################################
+# Include paths to standard compilers and packages on LLNL LC systems
+# Remove and/or replace these with your site specific packages and paths
+################################################################################
+  include:
+  - ../../../std_versions_and_variants_llnl_lc_cz.yaml
+  - ../../../externals_x86_64_llnl_lc_cz.yaml
+  packages:
+    aluminum:
+      buildable: true
+      version: [master]
+      providers: {}
+      paths: {}
+      modules: {}
+      compiler: []
+    hydrogen:
+      buildable: true
+      version: [develop]
+      providers: {}
+      paths: {}
+      modules: {}
+      compiler: []
+  upstreams: {}
diff --git a/src/Elemental_extensions.cpp b/src/Elemental_extensions.cpp
index 6d6f35bd4c4..bcea0cb6dfe 100644
--- a/src/Elemental_extensions.cpp
+++ b/src/Elemental_extensions.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/base.cpp b/src/base.cpp
index 60560f06d64..f67fd982d84 100644
--- a/src/base.cpp
+++ b/src/base.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -47,12 +47,13 @@
 
 namespace lbann {
 
-lbann_comm* initialize(int& argc, char**& argv, int seed) {
+world_comm_ptr initialize(int& argc, char**& argv, int seed) {
   // Initialize Elemental.
   El::Initialize(argc, argv);
   // Create a new comm object.
   // Initial creation with every process in one model.
-  auto* comm = new lbann_comm(0);
+  auto comm = world_comm_ptr{new lbann_comm(0), &lbann::finalize };
+
 #if defined(LBANN_TOPO_AWARE)
   // Determine the number of NUMA nodes present.
   hwloc_topology_t topo;
diff --git a/src/callbacks/callback_check_dataset.cpp b/src/callbacks/callback_check_dataset.cpp
index 602de3460e9..5bf0702b54c 100644
--- a/src/callbacks/callback_check_dataset.cpp
+++ b/src/callbacks/callback_check_dataset.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -58,16 +58,16 @@ void lbann_callback_check_dataset::add_to_set(model *m, Layer *l, int64_t step,
 }
 
 void lbann_callback_check_dataset::on_forward_prop_end(model *m, Layer *l) {
-  add_to_set(m, l, m->get_cur_step(), training_set);
+  add_to_set(m, l, m->get_step(), training_set);
 }
 
 void lbann_callback_check_dataset::on_evaluate_forward_prop_end(model *m, Layer *l) {
   switch(m->get_execution_mode()) {
   case execution_mode::validation:
-    add_to_set(m, l, m->get_cur_validation_step(), validation_set);
+    add_to_set(m, l, m->get_step(), validation_set);
     break;
   case execution_mode::testing:
-    add_to_set(m, l, m->get_cur_testing_step(), testing_set);
+    add_to_set(m, l, m->get_step(), testing_set);
     break;
   default:
     throw lbann_exception("lbann_callback_check_dataset: invalid execution phase");
diff --git a/src/callbacks/callback_check_gradients.cpp b/src/callbacks/callback_check_gradients.cpp
index 8cf7ad7b0db..9f133f467da 100644
--- a/src/callbacks/callback_check_gradients.cpp
+++ b/src/callbacks/callback_check_gradients.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/callbacks/callback_check_init.cpp b/src/callbacks/callback_check_init.cpp
index 529cbc42740..2d50f07dad1 100644
--- a/src/callbacks/callback_check_init.cpp
+++ b/src/callbacks/callback_check_init.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -33,7 +33,7 @@ namespace lbann {
 
 void lbann_callback_check_init::on_train_begin(model *m) {
   // Skip after the first epoch.
-  if (m->get_cur_epoch() != 0) {
+  if (m->get_epoch() != 0) {
     return;
   }
   lbann_comm *comm = m->get_comm();
diff --git a/src/callbacks/callback_check_metric.cpp b/src/callbacks/callback_check_metric.cpp
index 8b50d1bcf36..2e3719a2c82 100644
--- a/src/callbacks/callback_check_metric.cpp
+++ b/src/callbacks/callback_check_metric.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/callbacks/callback_checknan.cpp b/src/callbacks/callback_checknan.cpp
index 5e627b64a61..143ec4ad776 100644
--- a/src/callbacks/callback_checknan.cpp
+++ b/src/callbacks/callback_checknan.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -80,8 +80,8 @@ void dump_network(model *m) {
     std::stringstream ss;
     ss << "model" << m->get_comm()->get_trainer_rank()
        << "-rank" << m->get_comm()->get_rank_in_trainer()
-       << "-epoch" << m->get_cur_epoch()
-       << "-step" << m->get_cur_step()
+       << "-epoch" << m->get_epoch()
+       << "-step" << m->get_step(execution_mode::training)
        << "-" << l->get_name() << "-";
     const std::string prefix = ss.str();
     for (int i = 0; i < l->get_num_children(); ++i) {
@@ -99,8 +99,8 @@ void dump_network(model *m) {
     std::stringstream ss;
     ss << "model" << m->get_comm()->get_trainer_rank()
        << "-rank" << m->get_comm()->get_rank_in_trainer()
-       << "-epoch" << m->get_cur_epoch()
-       << "-step" << m->get_cur_step()
+       << "-epoch" << m->get_epoch()
+       << "-step" << m->get_step(execution_mode::training)
        << "-" << w->get_name() << "-";
     const std::string prefix = ss.str();
     El::Write(w->get_values().LockedMatrix(),
diff --git a/src/callbacks/callback_checkpoint.cpp b/src/callbacks/callback_checkpoint.cpp
index 35d06149caf..2fafefaf836 100644
--- a/src/callbacks/callback_checkpoint.cpp
+++ b/src/callbacks/callback_checkpoint.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -76,7 +76,7 @@ bool lbann_callback_checkpoint::need_checkpoint(model *m) {
   m_checkpoint_shared = false;
   m_checkpoint_dist = false;
   lbann_comm *comm = m->get_comm();
-  int cur_epoch = m->get_cur_epoch();
+  int cur_epoch = m->get_epoch();
   // If we are at the end of a training epoch and the training epoch lands on defined interval, ckpt
   if (!m_checkpoint_shared && m_checkpoint_epochs > 0 && (p.get_cb_type() == callback_type::epoch || p.get_cb_type() == callback_type::validation)){
       m_checkpoint_shared = (cur_epoch > 0) && (cur_epoch % m_checkpoint_epochs == 0);
@@ -88,11 +88,11 @@ bool lbann_callback_checkpoint::need_checkpoint(model *m) {
 
   // If we are at the end of a training mb step and the training mb step lands on defined interval, trigger checkpoint
   if (!m_checkpoint_shared && m_checkpoint_steps > 0) {
-    m_checkpoint_shared = (m->get_cur_step() > 0) && (m->get_cur_step() % m_checkpoint_steps == 0);
+    m_checkpoint_shared = (m->get_step(execution_mode::training) > 0) && (m->get_step(execution_mode::training) % m_checkpoint_steps == 0);
   }
 
   if(!m_checkpoint_dist && m_ckpt_dist_steps > 0){
-      m_checkpoint_dist = (m->get_cur_step() > 0) && (m->get_cur_step() % m_ckpt_dist_steps == 0);
+      m_checkpoint_dist = (m->get_step(execution_mode::training) > 0) && (m->get_step(execution_mode::training) % m_ckpt_dist_steps == 0);
   }
 
   // check the clock if time-based checkpoint is enabled
@@ -135,8 +135,8 @@ bool lbann_callback_checkpoint::checkpoint(model *m) {
   comm->trainer_barrier();
   // let user know we're saving a checkpoint
   if (comm->am_trainer_master()) {
-    epoch = m->get_cur_epoch();
-    step = m->get_cur_step();
+    epoch = m->get_epoch();
+    step = m->get_step(execution_mode::training);
     timer.Start();
     printf("Checkpoint: epoch %d step %d ...\n", epoch, step);
     fflush(stdout);
diff --git a/src/callbacks/callback_checksmall.cpp b/src/callbacks/callback_checksmall.cpp
index 4982b77733b..e310c64b6da 100644
--- a/src/callbacks/callback_checksmall.cpp
+++ b/src/callbacks/callback_checksmall.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -36,7 +36,7 @@ void lbann_callback_checksmall::on_forward_prop_end(model *m, Layer *l) {
     ss << name() << ": "
        << "[" << std::to_string(m->get_comm()->get_rank_in_world()) << "]: "
        << "error in activations of " << l->get_name() << " "
-       << "(step=" << std::to_string(m->get_cur_step()) << ")";
+       << "(step=" << std::to_string(m->get_step(execution_mode::training)) << ")";
     throw lbann_exception(ss.str());
   }
 }
@@ -49,7 +49,7 @@ void lbann_callback_checksmall::on_backward_prop_end(model *m) {
       ss << name() << ": "
          << "[" << std::to_string(m->get_comm()->get_rank_in_world()) << "]: "
          << "error in weights gradient of " << w->get_name() << " "
-         << "(step=" << std::to_string(m->get_cur_step()) << ")";
+         << "(step=" << std::to_string(m->get_step(execution_mode::training)) << ")";
       throw lbann_exception(ss.str());
     }
   }
@@ -62,7 +62,7 @@ void lbann_callback_checksmall::on_batch_end(model *m) {
       ss << name() << ": "
          << "[" << std::to_string(m->get_comm()->get_rank_in_world()) << "]: "
          << "error in weights of " << w->get_name() << " "
-         << "(step=" << std::to_string(m->get_cur_step()-1) << ")";
+         << "(step=" << std::to_string(m->get_step(execution_mode::training)-1) << ")";
       throw lbann_exception(ss.str());
     }
   }
diff --git a/src/callbacks/callback_confusion_matrix.cpp b/src/callbacks/callback_confusion_matrix.cpp
index bc787bd9080..03eef71e449 100644
--- a/src/callbacks/callback_confusion_matrix.cpp
+++ b/src/callbacks/callback_confusion_matrix.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -207,10 +207,10 @@ void lbann_callback_confusion_matrix::save_confusion_matrix(const model& m) {
     std::string mode_string;
     switch (mode) {
     case execution_mode::training:
-      mode_string = "train-epoch" + std::to_string(m.get_cur_epoch());
+      mode_string = "train-epoch" + std::to_string(m.get_epoch());
       break;
     case execution_mode::validation:
-      mode_string = "validation-epoch" + std::to_string(m.get_cur_epoch());
+      mode_string = "validation-epoch" + std::to_string(m.get_epoch());
       break;
     case execution_mode::testing:
       mode_string = "test";
diff --git a/src/callbacks/callback_debug.cpp b/src/callbacks/callback_debug.cpp
index 71d36a98083..ffb391272d6 100644
--- a/src/callbacks/callback_debug.cpp
+++ b/src/callbacks/callback_debug.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -62,17 +62,8 @@ std::string weights_string(const weights& w) {
 std::string batch_step_string(const model& m) {
   std::stringstream msg;
   const auto& mode = m.get_execution_mode();
-  msg << _to_string(mode) << " batch";
-  switch (mode) {
-  case execution_mode::training:
-    msg << " " << m.get_cur_step(); break;
-  case execution_mode::validation:
-    msg << " " << m.get_cur_validation_step(); break;
-  case execution_mode::testing:
-    msg << " " << m.get_cur_testing_step(); break;
-  default: break;
-  }
-  msg << " (epoch " << m.get_cur_epoch() << ")";
+  msg << _to_string(mode) << " batch " << m.get_step();
+  msg << " (epoch " << m.get_epoch() << ")";
   return msg.str();
 }
 
diff --git a/src/callbacks/callback_debug_io.cpp b/src/callbacks/callback_debug_io.cpp
index bae9e3a7a5a..78bedc27c11 100644
--- a/src/callbacks/callback_debug_io.cpp
+++ b/src/callbacks/callback_debug_io.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -52,23 +52,10 @@ void lbann::lbann_callback_debug_io::on_forward_prop_begin(model *m, Layer *l) {
 }
 
 void lbann::lbann_callback_debug_io::print_fp_start(model *m, generic_input_layer *input) {
-  int64_t step;
-  switch(m->get_execution_mode()) {
-  case execution_mode::training:
-    step = m->get_cur_step();
-    break;
-  case execution_mode::validation:
-    step = m->get_cur_validation_step();
-    break;
-  case execution_mode::testing:
-    step = m->get_cur_testing_step();
-    break;
-  default:
-    throw lbann_exception("Illegal execution mode in evaluate forward prop function");
-  }
+  const auto& step = m->get_step();
   std::cout << "[" << m->get_comm()->get_trainer_rank()
             << "." << m->get_comm()->get_rank_in_trainer()
-            << "] @" << m->get_cur_epoch() << "." << step
+            << "] @" << m->get_epoch() << "." << step
             << " Phase: " << _to_string(m->get_execution_mode())
             << " starting forward propagation for layer " << input->get_name()
             << " type: " << input->get_type()
@@ -97,17 +84,7 @@ void lbann::lbann_callback_debug_io::print_phase_start(model *m, execution_mode
   }
   if (data_reader == nullptr) { return; }
 
-  int64_t step;
-  switch(mode) {
-  case execution_mode::training:
-    step = m->get_cur_step(); break;
-  case execution_mode::validation:
-    step = m->get_cur_validation_step(); break;
-  case execution_mode::testing:
-    step = m->get_cur_testing_step(); break;
-  default:
-    throw lbann_exception("Illegal execution mode in evaluate forward prop function");
-  }
+  const auto& step = m->get_step();
 
   if(data_reader->get_rank() < data_reader->get_num_parallel_readers()) {
     std::cout << "[" << m->get_comm()->get_trainer_rank()
diff --git a/src/callbacks/callback_dump_error_signals.cpp b/src/callbacks/callback_dump_error_signals.cpp
index c3a793aaa8b..f204a1caef4 100644
--- a/src/callbacks/callback_dump_error_signals.cpp
+++ b/src/callbacks/callback_dump_error_signals.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -37,8 +37,8 @@ void lbann_callback_dump_error_signals::on_backward_prop_end(model *m, Layer *l)
     std::stringstream file;
     file << m_basename
          << "model" << m->get_comm()->get_trainer_rank() << "-"
-         << "epoch" << m->get_cur_epoch() << "-"
-         << "step" << m->get_cur_step() << "-"
+         << "epoch" << m->get_epoch() << "-"
+         << "step" << m->get_step() << "-"
          << l->get_name() << "-"
          << "ErrorSignals";
     if (l->get_num_parents() > 1) { file << i; }
diff --git a/src/callbacks/callback_dump_gradients.cpp b/src/callbacks/callback_dump_gradients.cpp
index 7248071e4ac..7f2a55c25af 100644
--- a/src/callbacks/callback_dump_gradients.cpp
+++ b/src/callbacks/callback_dump_gradients.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -38,8 +38,8 @@ void lbann_callback_dump_gradients::on_backward_prop_end(model *m) {
       const std::string file
         = (m_basename
            + "model" + std::to_string(m->get_comm()->get_trainer_rank())
-           + "-epoch" + std::to_string(m->get_cur_epoch())
-           + "-step" + std::to_string(m->get_cur_step())
+           + "-epoch" + std::to_string(m->get_epoch())
+           + "-step" + std::to_string(m->get_step())
            + "-" + w->get_name()
            + "-Gradient");
       El::Write(opt->get_gradient(), file, El::ASCII);
diff --git a/src/callbacks/callback_dump_minibatch_sample_indices.cpp b/src/callbacks/callback_dump_minibatch_sample_indices.cpp
index bd189c8a436..cd0aec7dba7 100644
--- a/src/callbacks/callback_dump_minibatch_sample_indices.cpp
+++ b/src/callbacks/callback_dump_minibatch_sample_indices.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -58,8 +58,8 @@ void lbann_callback_dump_minibatch_sample_indices::dump_to_file(model *m, Layer
          + _to_string(m->get_execution_mode())
          + "-model" + std::to_string(m->get_comm()->get_trainer_rank())
          + "-rank" + std::to_string(m->get_comm()->get_rank_in_trainer())
-         + "-epoch" + std::to_string(m->get_cur_epoch())
-         + "-step" + std::to_string(m->get_cur_step())
+         + "-epoch" + std::to_string(m->get_epoch())
+         + "-step" + std::to_string(m->get_step(execution_mode::training))
          + "-" + l->get_name()
          + "-MB_Sample_Indices");
     El::Write(*indices, file, El::ASCII);
@@ -67,20 +67,11 @@ void lbann_callback_dump_minibatch_sample_indices::dump_to_file(model *m, Layer
 }
 
 void lbann_callback_dump_minibatch_sample_indices::on_forward_prop_end(model *m, Layer *l) {
-  dump_to_file(m, l, m->get_cur_step());
+  dump_to_file(m, l, m->get_step());
 }
 
 void lbann_callback_dump_minibatch_sample_indices::on_evaluate_forward_prop_end(model *m, Layer *l) {
-  switch(m->get_execution_mode()) {
-  case execution_mode::validation:
-    dump_to_file(m, l, m->get_cur_validation_step());
-    break;
-  case execution_mode::testing:
-    dump_to_file(m, l, m->get_cur_testing_step());
-    break;
-  default:
-    throw lbann_exception("lbann_callback_dump_minibatch_sample_indices: invalid execution phase");
-  }
+  dump_to_file(m, l, m->get_step());
 }
 
 }  // namespace lbann
diff --git a/src/callbacks/callback_dump_outputs.cpp b/src/callbacks/callback_dump_outputs.cpp
index a70689110cb..2c5fc57cb34 100644
--- a/src/callbacks/callback_dump_outputs.cpp
+++ b/src/callbacks/callback_dump_outputs.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -134,17 +134,8 @@ void lbann_callback_dump_outputs::dump_outputs(const model& m, const Layer& l) {
 
   // Get mini-batch step information
   const auto& mode = m.get_execution_mode();
-  const auto& epoch = m.get_cur_epoch();
-  El::Int step = 0;
-  switch (mode) {
-  case execution_mode::training:
-    step = m.get_cur_step();            break;
-  case execution_mode::validation:
-    step = m.get_cur_validation_step(); break;
-  case execution_mode::testing:
-    step = m.get_cur_testing_step();    break;
-  default: LBANN_ERROR("invalid execution mode");
-  }
+  const auto& epoch = m.get_epoch();
+  const auto& step = m.get_step();
 
   // Quit if output dump isn't needed
   if (!m_modes.empty() && m_modes.count(mode) == 0) { return; }
diff --git a/src/callbacks/callback_dump_weights.cpp b/src/callbacks/callback_dump_weights.cpp
index fff95ae7911..4129a2a2acd 100644
--- a/src/callbacks/callback_dump_weights.cpp
+++ b/src/callbacks/callback_dump_weights.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -41,7 +41,7 @@ void lbann_callback_dump_weights::on_epoch_end(model *m) {
 
 void lbann_callback_dump_weights::dump_weights(model *m, std::string s) {
   for (weights *w : m->get_weights()) {
-    std::string epoch = "-epoch" + std::to_string(m->get_cur_epoch()-1);
+    std::string epoch = "-epoch" + std::to_string(m->get_epoch()-1);
     if(s != "") {
       epoch = "-" + s;
     }
diff --git a/src/callbacks/callback_early_stopping.cpp b/src/callbacks/callback_early_stopping.cpp
index e7bdfd37eab..d7af962290b 100644
--- a/src/callbacks/callback_early_stopping.cpp
+++ b/src/callbacks/callback_early_stopping.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/callbacks/callback_gpu_memory_usage.cpp b/src/callbacks/callback_gpu_memory_usage.cpp
index a4628595e51..4cacebf1a81 100644
--- a/src/callbacks/callback_gpu_memory_usage.cpp
+++ b/src/callbacks/callback_gpu_memory_usage.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/callbacks/callback_imcomm.cpp b/src/callbacks/callback_imcomm.cpp
index e5eeb62b9b2..014d5724af8 100644
--- a/src/callbacks/callback_imcomm.cpp
+++ b/src/callbacks/callback_imcomm.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -132,7 +132,7 @@ void lbann_callback_imcomm::do_summary(model *m, weights *w,
   }
   std::string prefix = w->get_name() + "/imcomm_";
   m_summarizer->reduce_scalar(prefix + "time",
-                              im_time, m->get_cur_step());
+                              im_time, m->get_step(execution_mode::training));
   // Use the same approximation the comm layer does.
   const CPUMat& local_gradients =
     static_cast<const CPUMat&>(w->get_optimizer()->get_gradient().LockedMatrix());
@@ -141,9 +141,9 @@ void lbann_callback_imcomm::do_summary(model *m, weights *w,
   size_t bytes_received =
     sizeof(DataType) * local_gradients.Height() * local_gradients.Width();
   m_summarizer->reduce_scalar(prefix + "bytes_sent",
-                              bytes_sent, m->get_cur_step());
+                              bytes_sent, m->get_step(execution_mode::training));
   m_summarizer->reduce_scalar(prefix + "bytes_received",
-                              bytes_received, m->get_cur_step());
+                              bytes_received, m->get_step(execution_mode::training));
 }
 
 static std::vector<std::string> comm_type_names  =
diff --git a/src/callbacks/callback_io.cpp b/src/callbacks/callback_io.cpp
index ee33cea9814..640172f7f07 100644
--- a/src/callbacks/callback_io.cpp
+++ b/src/callbacks/callback_io.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -48,7 +48,7 @@ void lbann_callback_io::on_epoch_end(model *m) {
         std::cout << "Rank " << comm->get_trainer_rank() << "." << comm->get_rank_in_trainer() << " processed "
                   << input->get_num_samples_trained() << " training samples of "
                   << input->get_total_num_training_samples() << " ("
-                  << input->get_num_samples_trained() / m->get_cur_epoch() << " per epoch)" << std::endl;
+                  << input->get_num_samples_trained() / m->get_epoch() << " per epoch)" << std::endl;
       }
     }
   }
@@ -64,7 +64,7 @@ void lbann_callback_io::on_test_end(model *m) {
         std::cout << "Rank " << comm->get_trainer_rank() << "." << comm->get_rank_in_trainer() << " processed "
                   << input->get_num_samples_tested() << " test samples of "
                   << input->get_total_num_testing_samples() << " ("
-                  << input->get_num_samples_tested() / m->get_cur_epoch() << " per epoch)" << std::endl;
+                  << input->get_num_samples_tested() / m->get_epoch() << " per epoch)" << std::endl;
       }
     }
   }
diff --git a/src/callbacks/callback_learning_rate.cpp b/src/callbacks/callback_learning_rate.cpp
index 196dbd30f50..07c33580ffd 100644
--- a/src/callbacks/callback_learning_rate.cpp
+++ b/src/callbacks/callback_learning_rate.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -71,7 +71,7 @@ void lbann_callback_learning_rate::on_epoch_end(model *m) {
   if (comm->am_trainer_master() && new_lr != old_global_lr) {
     std::cout << "Model " << comm->get_trainer_rank() << ": "
               << "changing global learning rate to " << new_lr
-              << " at epoch " << m->get_cur_epoch() << std::endl;
+              << " at epoch " << m->get_epoch() << std::endl;
   }
   for (weights *w : m_weights) {
     optimizer *opt = w->get_optimizer();
@@ -102,7 +102,7 @@ lbann_callback_step_learning_rate::lbann_callback_step_learning_rate(
   lbann_callback_learning_rate(weights_list), m_step(step), m_amt(amt) {}
 
 float lbann_callback_step_learning_rate::global_schedule(model *m) {
-  if (m->get_cur_epoch() % m_step == 0) {
+  if (m->get_epoch() % m_step == 0) {
     return m_cur_global_lr * m_amt;
   } else {
     return m_cur_global_lr;
@@ -120,8 +120,8 @@ lbann_callback_adaptive_learning_rate::lbann_callback_adaptive_learning_rate(
 
 float lbann_callback_adaptive_learning_rate::global_schedule(model *m) {
   // Determine behavior the first time this is called in an epoch
-  if (m_cur_epoch != m->get_cur_epoch()) {
-    m_cur_epoch = m->get_cur_epoch();
+  if (m_cur_epoch != m->get_epoch()) {
+    m_cur_epoch = m->get_epoch();
     const execution_mode mode = m->get_execution_mode();
     const EvalType score = m->get_objective_function()->get_mean_value(mode);
     if (score < m_last_score) {
@@ -164,12 +164,12 @@ lbann_callback_drop_fixed_learning_rate::lbann_callback_drop_fixed_learning_rate
 float lbann_callback_drop_fixed_learning_rate::global_schedule(model* m) {
   // Delete last drop epoch if we have already passed it
   while (!m_drop_epochs.empty()
-         && m->get_cur_epoch() > m_drop_epochs.back()) {
+         && m->get_epoch() > m_drop_epochs.back()) {
     m_drop_epochs.pop_back();
   }
 
   // Adjust learning rate if at a drop epoch
-  if (!m_drop_epochs.empty() && m->get_cur_epoch() == m_drop_epochs.back()) {
+  if (!m_drop_epochs.empty() && m->get_epoch() == m_drop_epochs.back()) {
     return m_cur_global_lr * m_amt;
   } else {
     return m_cur_global_lr;
@@ -203,10 +203,10 @@ void lbann_callback_linear_growth_learning_rate::setup(model *m) {
 }
 
 float lbann_callback_linear_growth_learning_rate::global_schedule(model *m) {
-  if (m->get_cur_epoch() < m_delay) {
+  if (m->get_epoch() < m_delay) {
     return m_cur_global_lr;
-  } else if (m->get_cur_epoch() <= m_num_epochs + m_delay) {
-    int num_left = m_num_epochs + m_delay - m->get_cur_epoch();
+  } else if (m->get_epoch() <= m_num_epochs + m_delay) {
+    int num_left = m_num_epochs + m_delay - m->get_epoch();
     return m_base_lr + m_inc*(m_num_epochs - num_left);
   } else {
     return m_cur_global_lr;
@@ -223,12 +223,14 @@ lbann_callback_poly_learning_rate::lbann_callback_poly_learning_rate(
   double p, uint64_t n_epochs, uint64_t max_iter)
   : lbann_callback_learning_rate(std::unordered_set<weights *>()),
     m_p(p), m_num_epochs(n_epochs), m_max_iter(max_iter),
+    m_end_lr(0.0f),
     m_lr(1.0f), m_last_epoch_lr(1.0f) {}
 
 lbann_callback_poly_learning_rate::lbann_callback_poly_learning_rate(
-  double p, uint64_t n_epochs, uint64_t max_iter, std::unordered_set<weights *> weights_list)
+  double p, uint64_t n_epochs, uint64_t max_iter, double end_lr,  std::unordered_set<weights *> weights_list)
   : lbann_callback_learning_rate(weights_list),
     m_p(p), m_num_epochs(n_epochs), m_max_iter(max_iter),
+    m_end_lr(end_lr),
     m_lr(1.0f), m_last_epoch_lr(1.0f) {}
 
 /**
@@ -248,19 +250,19 @@ void lbann_callback_poly_learning_rate::setup(model *m) {
 float lbann_callback_poly_learning_rate::global_schedule(model *m) {
   const float scale = m_lr / m_last_epoch_lr;
   m_last_epoch_lr = m_lr;
-  return m_cur_global_lr * scale;
+  return (m_cur_global_lr - m_end_lr) * scale + m_end_lr;
 }
 
 /**
  * Compute the learning rate for the next iteration.
  */
 float lbann_callback_poly_learning_rate::optimizer_schedule(model *m, optimizer &opt) {
-  const uint64_t cur_iter = static_cast<uint64_t>(m->get_cur_step());
+  const uint64_t cur_iter = static_cast<uint64_t>(m->get_step(execution_mode::training));
   if (m_max_iter > cur_iter) {
     m_lr = static_cast<float>(std::pow(static_cast<double>(m_max_iter - cur_iter)/m_max_iter, m_p));
   }
   const float scale = m_lr / m_last_epoch_lr;
-  return m_cur_global_lr * scale;
+  return (m_cur_global_lr - m_end_lr) * scale + m_end_lr;
 }
 
 lbann_callback_optimizerwise_adaptive_learning_rate::lbann_callback_optimizerwise_adaptive_learning_rate(
diff --git a/src/callbacks/callback_ltfb.cpp b/src/callbacks/callback_ltfb.cpp
index c136628affd..4ec15242449 100644
--- a/src/callbacks/callback_ltfb.cpp
+++ b/src/callbacks/callback_ltfb.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,6 +24,7 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
+#include <tuple>
 #include "lbann/callbacks/callback_ltfb.hpp"
 #include "lbann/callbacks/callback_imcomm.hpp"
 #include "lbann/utils/random.hpp"
@@ -126,6 +127,19 @@ void exchange_models__sendrecv_weights(lbann_comm& comm,
       const auto* send_sgd = dynamic_cast<const sgd*>(send_opt);
       auto* recv_sgd = dynamic_cast<sgd*>(recv_opt);
       if (send_sgd != nullptr && recv_sgd != nullptr) {
+        using hyperparameters_type = std::tuple<DataType, DataType, bool>;
+        hyperparameters_type hyperparameters(send_sgd->get_learning_rate(),
+                                             send_sgd->get_momentum(),
+                                             send_sgd->using_nesterov());
+        El::mpi::SendRecv(reinterpret_cast<El::byte*>(&hyperparameters),
+                          sizeof(hyperparameters_type),
+                          partner_rank_in_world,
+                          partner_rank_in_world,
+                          comm.get_world_comm(),
+                          El::SyncInfo<El::Device::CPU>{});
+        recv_sgd->set_learning_rate(std::get<0>(hyperparameters));
+        recv_sgd->set_momentum(std::get<1>(hyperparameters));
+        recv_sgd->set_nesterov(std::get<2>(hyperparameters));
         El::SendRecv(send_sgd->get_velocity().LockedMatrix(),
                      recv_sgd->get_velocity().Matrix(),
                      comm.get_world_comm(),
@@ -135,6 +149,25 @@ void exchange_models__sendrecv_weights(lbann_comm& comm,
       const auto* send_adam = dynamic_cast<const adam*>(send_opt);
       auto* recv_adam = dynamic_cast<adam*>(recv_opt);
       if (send_adam != nullptr && recv_adam != nullptr) {
+        using hyperparameters_type = std::tuple<DataType, DataType, DataType, DataType, DataType, DataType>;
+        hyperparameters_type hyperparameters(send_adam->get_learning_rate(),
+                                             send_adam->get_beta1(),
+                                             send_adam->get_beta2(),
+                                             send_adam->get_eps(),
+                                             send_adam->get_current_beta1(),
+                                             send_adam->get_current_beta2());
+        El::mpi::SendRecv(reinterpret_cast<El::byte*>(&hyperparameters),
+                          sizeof(hyperparameters_type),
+                          partner_rank_in_world,
+                          partner_rank_in_world,
+                          comm.get_world_comm(),
+                          El::SyncInfo<El::Device::CPU>{});
+        recv_adam->set_learning_rate(std::get<0>(hyperparameters));
+        recv_adam->set_beta1(std::get<1>(hyperparameters));
+        recv_adam->set_beta2(std::get<2>(hyperparameters));
+        recv_adam->set_eps(std::get<3>(hyperparameters));
+        recv_adam->set_current_beta1(std::get<4>(hyperparameters));
+        recv_adam->set_current_beta2(std::get<5>(hyperparameters));
         El::SendRecv(send_adam->get_moment1().LockedMatrix(),
                      recv_adam->get_moment1().Matrix(),
                      comm.get_world_comm(),
@@ -160,7 +193,7 @@ void exchange_models__checkpoint_file(lbann_comm& comm,
 
   // Checkpoint directories
   const auto local_trainer = comm.get_trainer_rank();
-  const auto step = m.get_cur_step();
+  const auto step = m.get_step();
   const std::string send_dir = (m.get_name()
                                 + "_trainer" + std::to_string(local_trainer)
                                 + "_step" + std::to_string(step));
@@ -219,7 +252,7 @@ void restore_local_model__checkpoint_file(lbann_comm& comm, model& m) {
 
   // Checkpoint directories
   const auto local_trainer = comm.get_trainer_rank();
-  const auto step = m.get_cur_step();
+  const auto step = m.get_step();
   const std::string checkpoint_dir = (m.get_name()
                                       + "_trainer" + std::to_string(local_trainer)
                                       + "_step" + std::to_string(step));
@@ -246,6 +279,10 @@ EvalType evaluate(model& m, const std::string& metric_name) {
   const auto original_mode = m.get_execution_mode();
   m.collect_background_data_fetch(original_mode);
 
+  // Mark the data store as loading - Note that this is a temporary fix
+  // for the current use of the tournament
+  m.mark_data_store_explicitly_loading(execution_mode::validation);
+
   // Evaluate model on validation set
   m.evaluate(execution_mode::validation);
 
@@ -266,6 +303,10 @@ EvalType evaluate(model& m, const std::string& metric_name) {
     LBANN_ERROR(err.str());
   }
 
+  // Mark the data store as loaded - Note that this is a temporary fix
+  // for the current use of the tournament
+  m.make_data_store_preloaded(execution_mode::validation);
+
   // Clean up and return metric value
   m.set_execution_mode(original_mode);
   return metric_value;
@@ -340,12 +381,27 @@ void lbann_callback_ltfb::setup(model *m) {
 
 }
 
+void lbann_callback_ltfb::on_train_begin(model *m) {
+  auto&& comm = *m->get_comm();
+
+  if (comm.am_world_master()) {
+    std::cout << "starting synchronizing trainers...\n";
+  }
+  double tm1 = get_time();
+  /// Make sure that all of the trainers are ready to go before starting
+  comm.intertrainer_barrier();
+
+  if (comm.am_world_master()) {
+    std::cout << "synchronizing trainers... " << get_time()-tm1 <<"s\n";
+  }
+}
+
 void lbann_callback_ltfb::on_batch_begin(model *m) {
   auto&& comm = *m->get_comm();
 
   // Check whether to start LTFB round
   const auto mode = m->get_execution_mode();
-  const auto step = m->get_cur_step();
+  const auto step = m->get_step();
   if (mode != execution_mode::training || step == 0) { return; }
 
   // Print message
diff --git a/src/callbacks/callback_perturb_adam.cpp b/src/callbacks/callback_perturb_adam.cpp
index 523a5fdb370..7a170be15cf 100644
--- a/src/callbacks/callback_perturb_adam.cpp
+++ b/src/callbacks/callback_perturb_adam.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -49,7 +49,7 @@ void lbann_callback_perturb_adam::setup(model* m) {
 }
 
 void lbann_callback_perturb_adam::on_batch_begin(model* m) {
-  if (m_perturb_during_training && m->get_cur_step() > 0) {
+  if (m_perturb_during_training && m->get_step() > 0) {
     perturb(*m);
   }
 }
diff --git a/src/callbacks/callback_print.cpp b/src/callbacks/callback_print.cpp
index 92caf987c5f..85b31d0e56d 100644
--- a/src/callbacks/callback_print.cpp
+++ b/src/callbacks/callback_print.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -58,7 +58,7 @@ void lbann_callback_print::on_epoch_begin(model *m) {
     // Print message
     std::cout << "--------------------------------------------------------------------------------"
               << std::endl;
-    std::cout << "[" << m->get_cur_epoch() << "] Epoch : stats formated [tr/v/te]"
+    std::cout << "[" << m->get_epoch() << "] Epoch : stats formated [tr/v/te]"
               << " iter/epoch ="
               << " ["
               << input->get_num_iterations_per_epoch(execution_mode::training)
@@ -135,7 +135,7 @@ void lbann_callback_print::report_results(model *m) {
   std::string mode_string;
   switch (mode) {
   case execution_mode::training:
-    mode_string = "training epoch " + std::to_string(m->get_cur_epoch()-1);
+    mode_string = "training epoch " + std::to_string(m->get_epoch()-1);
     break;
   case execution_mode::validation:
     mode_string = "validation";
@@ -151,7 +151,7 @@ void lbann_callback_print::report_results(model *m) {
   }
 
   if (comm->am_trainer_master()) {
-    const int num_models = comm->get_num_trainers();
+    const int num_trainers = comm->get_num_trainers();
 
     // Report objective function value
     const EvalType obj_fn = m->get_objective_function()->get_mean_value(mode);
@@ -161,12 +161,14 @@ void lbann_callback_print::report_results(model *m) {
       std::vector<int> num_samples_list(comm->get_num_trainers());
       comm->intertrainer_gather(obj_fn, obj_fn_list);
       comm->intertrainer_gather(obj_fn_samples, num_samples_list);
-      for (int i = 0; i < num_models; ++i) {
-        std::cout << m->get_name() << " (instance " <<  i <<  ") "  << mode_string << " "
-                  << "objective function : " << obj_fn_list[i]
-                  << std::endl;
+      if(!m_print_global_stat_only) {
+        for (int i = 0; i < num_trainers; ++i) {
+          std::cout << m->get_name() << " (instance " <<  i <<  ") "  << mode_string << " "
+                    << "objective function : " << obj_fn_list[i]
+                    << std::endl;
+        }
       }
-      if (num_models > 1) {
+      if (num_trainers > 1) {
         const EvalType avg_obj_fn = (std::inner_product(num_samples_list.begin(),
                                                         num_samples_list.end(),
                                                         obj_fn_list.begin(),
@@ -192,13 +194,16 @@ void lbann_callback_print::report_results(model *m) {
         std::vector<int> num_samples_list(comm->get_num_trainers());
         comm->intertrainer_gather(score, score_list);
         comm->intertrainer_gather(score_samples, num_samples_list);
-        for (int i = 0; i < num_models; ++i) {
-          std::cout << m->get_name() << " (instance " << i <<  ") " << mode_string << " "
-                    << met->name() << " : "
-                    << score_list[i] << met->get_unit()
-                    << std::endl;
+        if(!m_print_global_stat_only) {
+          for (int i = 0; i < num_trainers; ++i) {
+            std::cout << m->get_name() << " (instance " << i <<  ") " << mode_string << " "
+                      << met->name() << " : "
+                      << score_list[i] << met->get_unit()
+                      << std::endl;
+          }
         }
-        if (num_models > 1) {
+        if (num_trainers > 1) {
+          const EvalType min_score = *std::min_element(score_list.begin(), score_list.end());
           const EvalType avg_score = (std::inner_product(num_samples_list.begin(),
                                                          num_samples_list.end(),
                                                          score_list.begin(),
@@ -206,10 +211,30 @@ void lbann_callback_print::report_results(model *m) {
                                       / std::accumulate(num_samples_list.begin(),
                                                         num_samples_list.end(),
                                                         0));
-          std::cout << m->get_name() << " (global) "  << mode_string << " "
+          const EvalType max_score = *std::max_element(score_list.begin(), score_list.end());
+          EvalType scores_stdev = EvalType(0);
+          for (const auto& t : score_list) {
+            const auto& diff = t - avg_score;
+            scores_stdev += diff * diff;
+          }
+          scores_stdev /= score_list.size() - 1;
+          scores_stdev = std::sqrt(std::max(scores_stdev, EvalType(0)));
+          std::cout << m->get_name() << " (global average) "  << mode_string << " "
                     << met->name() << " : "
                     << avg_score << met->get_unit()
                     << std::endl;
+          std::cout << m->get_name() << " (global min) "  << mode_string << " "
+                    << met->name() << " : "
+                    << min_score << met->get_unit()
+                    << std::endl;
+          std::cout << m->get_name() << " (global max) "  << mode_string << " "
+                    << met->name() << " : "
+                    << max_score << met->get_unit()
+                    << std::endl;
+          std::cout << m->get_name() << " (global stdev) "  << mode_string << " "
+                    << met->name() << " : "
+                    << scores_stdev << met->get_unit()
+                    << std::endl;
         }
       } else {
         comm->intertrainer_gather(score, comm->get_intertrainer_master());
diff --git a/src/callbacks/callback_replace_weights.cpp b/src/callbacks/callback_replace_weights.cpp
index ad86e15f0df..d177a36f1a9 100644
--- a/src/callbacks/callback_replace_weights.cpp
+++ b/src/callbacks/callback_replace_weights.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -29,11 +29,11 @@
 namespace lbann {
 
 void lbann_callback_replace_weights::on_batch_end(model *m) {
-  const auto& step = m->get_cur_step();
+  const auto& step = m->get_step(execution_mode::training);
   if(step % m_batch_interval == 0) {
     for(size_t i = 0; i < m_src_layers.size(); i++) {
       m_dst_layers[i]->replace_weights(m_src_layers[i]);
-    } 
+    }
   }
 }
 
diff --git a/src/callbacks/callback_save_images.cpp b/src/callbacks/callback_save_images.cpp
index 6081182c85d..83dac7384c8 100644
--- a/src/callbacks/callback_save_images.cpp
+++ b/src/callbacks/callback_save_images.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -117,13 +117,13 @@ void save_image(std::string prefix,
 
       // Write image to file
       cv::imwrite(prefix + "-" + name + "." + format, img);
-        
+
     }
-      
+
   }
 #endif // LBANN_HAS_OPENCV
 }
-  
+
 } // namespace
 
 lbann_callback_save_images::lbann_callback_save_images(std::vector<std::string> layer_names,
@@ -139,14 +139,7 @@ lbann_callback_save_images::lbann_callback_save_images(std::vector<std::string>
 }
 
 void lbann_callback_save_images::on_epoch_end(model *m) {
-  save_image(m_image_prefix + "epoch" + std::to_string(m->get_cur_epoch()),
-             m_image_format,
-             m->get_layers(),
-             m_layer_names);
-}
-
-void lbann_callback_save_images::on_phase_end(model *m) {
-  save_image(m_image_prefix + "phase" + std::to_string(m->get_current_phase()),
+  save_image(m_image_prefix + "epoch" + std::to_string(m->get_epoch()),
              m_image_format,
              m->get_layers(),
              m_layer_names);
diff --git a/src/callbacks/callback_save_model.cpp b/src/callbacks/callback_save_model.cpp
index 1b49cc15b2a..b9efbd08354 100644
--- a/src/callbacks/callback_save_model.cpp
+++ b/src/callbacks/callback_save_model.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -93,8 +93,8 @@ bool lbann_callback_save_model::save_model_weights(model *m) {
   lbann_comm *comm = m->get_comm();
   comm->trainer_barrier();
   // let user know we're saving the weights
-  int epoch = m->get_cur_epoch();
-  int step = m->get_cur_step();
+  int epoch = m->get_epoch();
+  int step = m->get_step(execution_mode::training);
   if (comm->am_trainer_master()) {
     timer.Start();
     printf("[%s.%d] Saving model weights: epoch %d step %d ...\n", m->get_name().c_str(), comm->get_trainer_rank(), epoch, step);
diff --git a/src/callbacks/callback_summary.cpp b/src/callbacks/callback_summary.cpp
index f0064d5ce45..a5d66de440a 100644
--- a/src/callbacks/callback_summary.cpp
+++ b/src/callbacks/callback_summary.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -48,7 +48,7 @@ void lbann_callback_summary::on_train_begin(model *m) {
 void lbann_callback_summary::on_batch_end(model *m) {
   prof_region_begin("summary-batch", prof_colors[0], false);
   m->summarize_stats(*m_summarizer);
-  if (m_mat_interval > 0 && m->get_cur_step() % m_mat_interval == 0) {
+  if (m_mat_interval > 0 && m->get_step(execution_mode::training) % m_mat_interval == 0) {
     m->summarize_matrices(*m_summarizer);
   }
   lbann_comm *comm = m->get_comm();
@@ -58,15 +58,15 @@ void lbann_callback_summary::on_batch_end(model *m) {
   size_t intertrainer_barriers = comm->get_num_intertrainer_barriers();
   size_t global_barriers = comm->get_num_global_barriers();
   comm->reset_stats_counters();
-  m_summarizer->sum_reduce_scalar("bytes_sent", bytes_sent, m->get_cur_step());
+  m_summarizer->sum_reduce_scalar("bytes_sent", bytes_sent, m->get_step(execution_mode::training));
   m_summarizer->sum_reduce_scalar("bytes_received", bytes_received,
-                                  m->get_cur_step());
+                                  m->get_step(execution_mode::training));
   m_summarizer->reduce_scalar("trainer_barriers", trainer_barriers,
-                              m->get_cur_step());
+                              m->get_step(execution_mode::training));
   m_summarizer->reduce_scalar("intertrainer_barriers", intertrainer_barriers,
-                              m->get_cur_step());
+                              m->get_step(execution_mode::training));
   m_summarizer->reduce_scalar("global_barriers", global_barriers,
-                              m->get_cur_step());
+                              m->get_step(execution_mode::training));
   prof_region_end("summary-batch", false);
 }
 
@@ -79,7 +79,7 @@ void lbann_callback_summary::on_epoch_end(model *m) {
     std::transform(metric_name.begin(), metric_name.end(), metric_name.begin(),
                    [] (char c) { return c == ' ' ? '_' : c; });
     std::string phase = "train_" + metric_name;
-    m_summarizer->reduce_scalar(phase, train_score, m->get_cur_step());
+    m_summarizer->reduce_scalar(phase, train_score, m->get_step(execution_mode::training));
   }
   save_histograms(m);
   m_summarizer->flush();
@@ -96,7 +96,7 @@ void lbann_callback_summary::on_test_end(model *m) {
     std::transform(metric_name.begin(), metric_name.end(), metric_name.begin(),
                    [] (char c) { return c == ' ' ? '_' : c; });
     std::string phase = "test_" + metric_name;
-    m_summarizer->reduce_scalar(phase, test_score, m->get_cur_step());
+    m_summarizer->reduce_scalar(phase, test_score, m->get_step(execution_mode::training));
   }
   // Reset counters incremented during test phase.
   comm->reset_stats_counters();
@@ -113,7 +113,7 @@ void lbann_callback_summary::save_histograms(model *m) {
       AbsDistMatReadProxy<El::Device::CPU> acts(layer->get_activations(i));
       m_summarizer->reduce_histogram(prefix + "activations" + std::to_string(i),
                                      acts.GetLocked(),
-                                     m->get_cur_step());
+                                     m->get_step(execution_mode::training));
     }
   }
   for (const auto& w : m->get_weights()) {
@@ -121,13 +121,13 @@ void lbann_callback_summary::save_histograms(model *m) {
     AbsDistMatReadProxy<El::Device::CPU> weights(w->get_values());
     m_summarizer->reduce_histogram(prefix + "weights",
                                    weights.GetLocked(),
-                                   m->get_cur_step());
+                                   m->get_step(execution_mode::training));
     optimizer *opt = w->get_optimizer();
     if (opt != nullptr) {
       AbsDistMatReadProxy<El::Device::CPU> gradients(opt->get_gradient());
       m_summarizer->reduce_histogram(prefix + "weights_gradient",
                                      gradients.GetLocked(),
-                                     m->get_cur_step());
+                                     m->get_step(execution_mode::training));
     }
   }
 }
diff --git a/src/callbacks/callback_sync_layers.cpp b/src/callbacks/callback_sync_layers.cpp
index 395f4fae1b1..de8fb939c55 100644
--- a/src/callbacks/callback_sync_layers.cpp
+++ b/src/callbacks/callback_sync_layers.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/callbacks/callback_sync_selected.cpp b/src/callbacks/callback_sync_selected.cpp
index 5cce3b09667..8844cd176b2 100644
--- a/src/callbacks/callback_sync_selected.cpp
+++ b/src/callbacks/callback_sync_selected.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/callbacks/callback_timeline.cpp b/src/callbacks/callback_timeline.cpp
index b841a4eeae3..c4701078d29 100644
--- a/src/callbacks/callback_timeline.cpp
+++ b/src/callbacks/callback_timeline.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/callbacks/callback_timer.cpp b/src/callbacks/callback_timer.cpp
index 8fe764cce95..2300951a335 100644
--- a/src/callbacks/callback_timer.cpp
+++ b/src/callbacks/callback_timer.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -40,8 +40,8 @@ void lbann_callback_timer::batch_timing_end(const model& m) {
   const auto& batch_time = get_time() - m_batch_start_times[mode];
   m_batch_times[mode].push_back(batch_time);
   if (m_summarizer != nullptr) {
-    m_summarizer->reduce_scalar("minibatch_time", batch_time, m.get_cur_step()-1);
-    m_summarizer->reduce_scalar_all("minibatch_time", batch_time, m.get_cur_step()-1);
+    m_summarizer->reduce_scalar("minibatch_time", batch_time, m.get_step(execution_mode::training)-1);
+    m_summarizer->reduce_scalar_all("minibatch_time", batch_time, m.get_step(execution_mode::training)-1);
   }
 }
 
@@ -88,7 +88,7 @@ void lbann_callback_timer::timing_end(model& m) {
   std::string mode_string;
   switch(mode) {
   case execution_mode::training:
-    mode_string = "training epoch " + std::to_string(m.get_cur_epoch()-1);
+    mode_string = "training epoch " + std::to_string(m.get_epoch()-1);
     break;
   case execution_mode::validation:
     mode_string = "validation";
diff --git a/src/callbacks/callback_variable_minibatch.cpp b/src/callbacks/callback_variable_minibatch.cpp
index d2c947450ea..5300b881b84 100644
--- a/src/callbacks/callback_variable_minibatch.cpp
+++ b/src/callbacks/callback_variable_minibatch.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -39,7 +39,7 @@ lbann_callback_variable_minibatch::lbann_callback_variable_minibatch(
 
 void lbann_callback_variable_minibatch::on_train_begin(model *m) {
   // Avoid issues with the train method being called multiple times.
-  if (m->get_cur_epoch() != 0) { return; }
+  if (m->get_epoch() != 0) { return; }
 
   // Get first input layer in model
   generic_input_layer* input = nullptr;
@@ -103,12 +103,12 @@ void lbann_callback_variable_minibatch::on_epoch_end(model *m) {
         std::cout << "Model " << comm->get_trainer_rank() <<
           ": Changing mini-batch size to " << new_mbsize <<
           " and learning rate to " << new_lr << " at epoch " <<
-          m->get_cur_epoch() << std::endl;
+          m->get_epoch() << std::endl;
       }
     } else if (comm->am_trainer_master()) {
       std::cout << "Model " << comm->get_trainer_rank() <<
         ": Changing mini-batch size to " << new_mbsize <<
-        " at epoch " << m->get_cur_epoch() << std::endl;
+        " at epoch " << m->get_epoch() << std::endl;
     }
   }
   // Ramp the learning rate, if needed.
@@ -152,7 +152,7 @@ lbann_callback_step_minibatch::lbann_callback_step_minibatch(
 
 bool lbann_callback_step_minibatch::schedule(
   model *m, int& new_mbsize, float& new_lr, int& ramp_time) {
-  if (m->get_cur_epoch() % m_step == 0) {
+  if (m->get_epoch() % m_step == 0) {
     new_mbsize = m_current_mini_batch_size * 2;
     new_lr = get_current_learning_rate(m) * 2;
     ramp_time = m_ramp_time;
@@ -173,7 +173,7 @@ lbann_callback_minibatch_schedule::lbann_callback_minibatch_schedule(
 
 bool lbann_callback_minibatch_schedule::schedule(
   model *m, int& new_mbsize, float& new_lr, int& ramp_time) {
-  if (!m_steps.empty() && m->get_cur_epoch() == m_steps.back().epoch) {
+  if (!m_steps.empty() && m->get_epoch() == m_steps.back().epoch) {
     new_mbsize = m_steps.back().mbsize;
     new_lr = m_steps.back().lr;
     ramp_time = m_steps.back().ramp_time;
diff --git a/src/callbacks/profiler.cpp b/src/callbacks/profiler.cpp
index 17dd474d243..05c0fddb215 100644
--- a/src/callbacks/profiler.cpp
+++ b/src/callbacks/profiler.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -38,60 +38,67 @@
 
 namespace lbann {
 
-lbann_callback_profiler::lbann_callback_profiler(bool sync) :
-  lbann_callback(), m_sync(sync) {
+lbann_callback_profiler::lbann_callback_profiler(bool sync, bool skip_init) :
+    lbann_callback(), m_sync(sync), m_skip_init(skip_init) {
 #ifdef LBANN_NVPROF
   nvtxNameCudaStreamA(El::GPUManager::Stream(), "Hydrogen");
-#endif  
+#endif
+  if (!m_skip_init) {
+    prof_start();
+  }
 }
 
 void lbann_callback_profiler::on_epoch_begin(model *m) {
-  prof_region_begin(("epoch " + std::to_string(m->get_cur_epoch())).c_str(),
+  // Skip the first epoch
+  if (m_skip_init && m->get_epoch() == 1) {
+    prof_start();
+  }
+  prof_region_begin(("epoch " + std::to_string(m->get_epoch())).c_str(),
                     prof_colors[0], m_sync);
 }
 
 void lbann_callback_profiler::on_epoch_end(model *m) {
-  prof_region_end(("epoch " + std::to_string(m->get_cur_epoch())).c_str(),
+  prof_region_end(("epoch " + std::to_string(m->get_epoch())).c_str(),
                   m_sync);
 }
 
 void lbann_callback_profiler::on_validation_begin(model *m) {
-  prof_region_begin(("val " + std::to_string(m->get_cur_epoch())).c_str(),
+  prof_region_begin(("val " + std::to_string(m->get_epoch())).c_str(),
                     prof_colors[0], m_sync);
 }
 
 void lbann_callback_profiler::on_validation_end(model *m) {
-  prof_region_end(("val " + std::to_string(m->get_cur_epoch())).c_str(),
+  prof_region_end(("val " + std::to_string(m->get_epoch())).c_str(),
                   m_sync);
 }
 
 void lbann_callback_profiler::on_test_begin(model *m) {
-  prof_region_begin(("test " + std::to_string(m->get_cur_epoch())).c_str(),
+  prof_region_begin(("test " + std::to_string(m->get_epoch())).c_str(),
                     prof_colors[0], m_sync);
 }
 
 void lbann_callback_profiler::on_test_end(model *m) {
-  prof_region_end(("test " + std::to_string(m->get_cur_epoch())).c_str(),
+  prof_region_end(("test " + std::to_string(m->get_epoch())).c_str(),
                   m_sync);
 }
 
 void lbann_callback_profiler::on_batch_begin(model *m) {
-  prof_region_begin(("batch " + std::to_string(m->get_cur_step())).c_str(),
+  prof_region_begin(("batch " + std::to_string(m->get_step(execution_mode::training))).c_str(),
                     prof_colors[1], m_sync);
 }
 
 void lbann_callback_profiler::on_batch_end(model *m) {
-  prof_region_end(("batch " + std::to_string(m->get_cur_step())).c_str(),
+  prof_region_end(("batch " + std::to_string(m->get_step(execution_mode::training))).c_str(),
                   m_sync);
 }
 
 void lbann_callback_profiler::on_batch_evaluate_begin(model *m) {
-  prof_region_begin(("batch eval " + std::to_string(m->get_cur_step())).c_str(),
+  prof_region_begin(("batch eval " + std::to_string(m->get_step(execution_mode::training))).c_str(),
                     prof_colors[1], m_sync);
 }
 
 void lbann_callback_profiler::on_batch_evaluate_end(model *m) {
-  prof_region_end(("batch eval " + std::to_string(m->get_cur_step())).c_str(),
+  prof_region_end(("batch eval " + std::to_string(m->get_step(execution_mode::training))).c_str(),
                   m_sync);
 }
 
diff --git a/src/comm.cpp b/src/comm.cpp
index d3c2538f007..24c545064ea 100644
--- a/src/comm.cpp
+++ b/src/comm.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -121,7 +121,7 @@ void lbann_comm::split_trainers(int ppm) {
   if (grid != nullptr) {
     delete grid;
   }
-  grid = new Grid(trainer_comm);
+  grid = new Grid(trainer_comm.GetMPIComm());
 }
 
 void lbann_comm::intertrainer_sum_matrix(AbsMat& mat) {
@@ -135,7 +135,7 @@ void lbann_comm::intertrainer_sum_matrix(AbsDistMat& mat) {
 }
 
 void lbann_comm::allreduce(AbsMat& m,
-                           El::mpi::Comm c,
+                           const El::mpi::Comm& c,
                            El::mpi::Op op) {
   if (El::mpi::Size(c) == 1 || m.Height() < 1 || m.Width() < 1) {
     return;
@@ -182,7 +182,7 @@ void lbann_comm::allreduce(AbsMat& m,
       m.Buffer(),
       local_size,
       mpi_op_to_al_op(op),
-      c.template GetComm<::Al::MPIBackend>());
+      c.template GetComm<::Al::MPIBackend>(El::SyncInfo<El::Device::CPU>{}));
   }
 #ifdef AL_HAS_NCCL
   if (t == std::type_index(typeid(::Al::NCCLBackend))) {
@@ -190,7 +190,9 @@ void lbann_comm::allreduce(AbsMat& m,
       m.Buffer(),
       local_size,
       mpi_op_to_al_op(op),
-      c.template GetComm<::Al::NCCLBackend>());
+      c.template GetComm<::Al::NCCLBackend>(
+          SyncInfoFromMatrix(
+              static_cast<El::Matrix<DataType,El::Device::GPU>&>(m))));
   }
 #endif // AL_HAS_NCCL
 #ifdef AL_HAS_MPI_CUDA
@@ -200,7 +202,9 @@ void lbann_comm::allreduce(AbsMat& m,
       m.Buffer(),
       local_size,
       mpi_op_to_al_op(op),
-      c.template GetComm<::Al::MPICUDABackend>(),
+      c.template GetComm<::Al::MPICUDABackend>(
+          SyncInfoFromMatrix(
+              static_cast<El::Matrix<DataType,El::Device::GPU>&>(m))),
       ::Al::MPICUDAAllreduceAlgorithm::host_transfer);
   }
 #endif  // AL_HAS_MPI_CUDA
@@ -211,13 +215,13 @@ void lbann_comm::allreduce(AbsMat& m,
 }
 
 void lbann_comm::allreduce(AbsDistMat& m,
-                           El::mpi::Comm c,
+                           const El::mpi::Comm& c,
                            El::mpi::Op op) {
-  allreduce(m.Matrix(), std::move(c), op);
+  allreduce(m.Matrix(), c, op);
 }
 
 void lbann_comm::nb_allreduce(AbsMat& m,
-                              El::mpi::Comm c,
+                              const El::mpi::Comm& c,
                               Al::request& req,
                               El::mpi::Op op) {
   if (El::mpi::Size(c) == 1 || m.Height() < 1 || m.Width() < 1) {
@@ -265,7 +269,7 @@ void lbann_comm::nb_allreduce(AbsMat& m,
       m.Buffer(),
       local_size,
       mpi_op_to_al_op(op),
-      c.template GetComm<::Al::MPIBackend>(),
+      c.template GetComm<::Al::MPIBackend>(El::SyncInfo<El::Device::CPU>{}),
       req.mpi_req);
   }
   /// @todo MPI-CUDA backend
@@ -275,7 +279,9 @@ void lbann_comm::nb_allreduce(AbsMat& m,
       m.Buffer(),
       local_size,
       mpi_op_to_al_op(op),
-      c.template GetComm<::Al::NCCLBackend>(),
+      c.template GetComm<::Al::NCCLBackend>(
+          SyncInfoFromMatrix(
+              static_cast<El::Matrix<DataType,El::Device::GPU>&>(m))),
       req.nccl_req);
   }
 #endif // AL_HAS_NCCL
@@ -286,22 +292,24 @@ void lbann_comm::nb_allreduce(AbsMat& m,
       m.Buffer(),
       local_size,
       mpi_op_to_al_op(op),
-      c.template GetComm<::Al::MPICUDABackend>(),
+      c.template GetComm<::Al::MPICUDABackend>(
+          SyncInfoFromMatrix(
+              static_cast<El::Matrix<DataType,El::Device::GPU>&>(m))),
       req.mpicuda_req,
       ::Al::MPICUDAAllreduceAlgorithm::host_transfer);
   }
 #endif  // AL_HAS_MPI_CUDA
   bytes_received += sizeof(DataType) * local_size * (El::mpi::Size(c) - 1);
 #else
-  allreduce(m, std::move(c), op);
+  allreduce(m, c, op);
 #endif // LBANN_HAS_ALUMINUM
 }
 
 void lbann_comm::nb_allreduce(AbsDistMat& m,
-                              El::mpi::Comm c,
+                              const El::mpi::Comm& c,
                               Al::request& req,
                               El::mpi::Op op) {
-  nb_allreduce(m.Matrix(), std::move(c), req, op);
+  nb_allreduce(m.Matrix(), c, req, op);
 }
 
 void lbann_comm::wait(Al::request& req) {
@@ -353,9 +361,9 @@ void lbann_comm::intertrainer_broadcast_matrix(AbsDistMat& mat, int root) {
 }
 
 template<>
-void lbann_comm::broadcast<std::string>(const int root, std::string& str, El::mpi::Comm c) {
+void lbann_comm::broadcast<std::string>(const int root, std::string& str, const El::mpi::Comm& c) {
   std::vector<char> data(str.begin(), str.end());
-  broadcast(root, data, std::move(c));
+  broadcast(root, data, c);
   str.assign(data.begin(), data.end());
 }
 
@@ -374,7 +382,7 @@ void lbann_comm::global_barrier() {
   barrier(get_world_comm());
 }
 
-void lbann_comm::barrier(const El::mpi::Comm c) {
+void lbann_comm::barrier(const El::mpi::Comm& c) {
   El::mpi::Barrier(c);
 }
 
@@ -451,7 +459,7 @@ void lbann_comm::setup_node_comm() {
   auto *node_name_list = new char[hash_comm_size*MPI_MAX_PROCESSOR_NAME];
   checkMPI(MPI_Allgather(node_name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR,
                          node_name_list, MPI_MAX_PROCESSOR_NAME, MPI_CHAR,
-                         hash_comm.comm));
+                         hash_comm.GetMPIComm()));
   int node_num = El::mpi::Rank(hash_comm);
   for(int i=0; i<hash_comm_size; ++i) {
     const std::string other_node_string(node_name_list + i*MPI_MAX_PROCESSOR_NAME);
diff --git a/src/data_readers/CMakeLists.txt b/src/data_readers/CMakeLists.txt
index 5d9d67f0395..ebc80896808 100644
--- a/src/data_readers/CMakeLists.txt
+++ b/src/data_readers/CMakeLists.txt
@@ -21,7 +21,6 @@ set_full_path(THIS_DIR_SOURCES
   data_reader_imagenet_patches.cpp
   data_reader_jag.cpp
   data_reader_jag_conduit.cpp
-  data_reader_jag_conduit_hdf5.cpp
   data_reader_merge_features.cpp
   data_reader_merge_samples.cpp
   data_reader_mesh.cpp
@@ -29,14 +28,18 @@ set_full_path(THIS_DIR_SOURCES
   data_reader_moving_mnist.cpp
   data_reader_nci.cpp
   data_reader_numpy.cpp
+  data_reader_numpy_npz.cpp
   data_reader_pilot2_molecular.cpp
   data_reader_synthetic.cpp
   data_reader_multi_images.cpp
   data_reader_mnist_siamese.cpp
-  data_reader_triplet.cpp
+  data_reader_multihead_siamese.cpp
+  data_reader_python.cpp
   offline_patches_npz.cpp
   image_preprocessor.cpp
   image_utils.cpp
+  numpy_conduit_converter.cpp 
+  data_reader_numpy_npz_conduit.cpp
   )
 
 # Add the subdirectories
diff --git a/src/data_readers/cv_augmenter.cpp b/src/data_readers/cv_augmenter.cpp
index c625e138059..2418d592399 100644
--- a/src/data_readers/cv_augmenter.cpp
+++ b/src/data_readers/cv_augmenter.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -120,7 +120,7 @@ bool cv_augmenter::determine_transform(const cv::Mat& image) {
     return false;
   }
 
-  rng_gen& gen = get_generator();
+  rng_gen& gen = get_io_generator();
 
   std::uniform_int_distribution<int> bool_dist(0, 1);
 
@@ -251,4 +251,3 @@ std::ostream& cv_augmenter::print(std::ostream& os) const {
 
 } // end of namespace lbann
 #endif // LBANN_HAS_OPENCV
-
diff --git a/src/data_readers/cv_colorizer.cpp b/src/data_readers/cv_colorizer.cpp
index ecc89eb379f..4606623cf70 100644
--- a/src/data_readers/cv_colorizer.cpp
+++ b/src/data_readers/cv_colorizer.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/cv_cropper.cpp b/src/data_readers/cv_cropper.cpp
index 6d82a2a48b6..6e16b09466a 100644
--- a/src/data_readers/cv_cropper.cpp
+++ b/src/data_readers/cv_cropper.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -85,7 +85,7 @@ void cv_cropper::set(const unsigned int width, const unsigned int height,
 }
 
 void cv_cropper::reset() {
-  m_enabled = false; 
+  m_enabled = false;
   m_zoom = 1.0;
   m_interpolation = m_interpolation_choices[0];
 }
@@ -152,8 +152,8 @@ bool cv_cropper::apply(cv::Mat& image) {
 
   // Get random crop of image
   if(m_rand_crop) {
-    const int rnd_dw = fast_rand_int(get_fast_generator(), static_cast<int>(2*(zoomed_roi_width - zoomed_width)) + 1);
-    const int rnd_dh = fast_rand_int(get_fast_generator(), static_cast<int>(2*(zoomed_roi_height - zoomed_height)) + 1);
+    const int rnd_dw = fast_rand_int(get_fast_io_generator(), static_cast<int>(2*(zoomed_roi_width - zoomed_width)) + 1);
+    const int rnd_dh = fast_rand_int(get_fast_io_generator(), static_cast<int>(2*(zoomed_roi_height - zoomed_height)) + 1);
     crop_x_start = static_cast<int>(image.cols - zoomed_roi_width + rnd_dw + 1) / 2;
     crop_y_start = static_cast<int>(image.rows - zoomed_roi_height + rnd_dh + 1) / 2;
   } else {
@@ -194,4 +194,3 @@ std::ostream& cv_cropper::print(std::ostream& os) const {
 
 } // end of namespace lbann
 #endif // LBANN_HAS_OPENCV
-
diff --git a/src/data_readers/cv_decolorizer.cpp b/src/data_readers/cv_decolorizer.cpp
index a2f5b4d0d2d..9d7f7ae3a14 100644
--- a/src/data_readers/cv_decolorizer.cpp
+++ b/src/data_readers/cv_decolorizer.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/cv_mean_extractor.cpp b/src/data_readers/cv_mean_extractor.cpp
index 62256d60389..4026e71bcb3 100644
--- a/src/data_readers/cv_mean_extractor.cpp
+++ b/src/data_readers/cv_mean_extractor.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -61,7 +61,7 @@ cv_mean_extractor *cv_mean_extractor::clone() const {
 /** Set up the internal matrices used to accumulate image statistics,
  *  and initialize the batch size.
  */
-void cv_mean_extractor::set(const unsigned int width, const unsigned int height, 
+void cv_mean_extractor::set(const unsigned int width, const unsigned int height,
                             const unsigned int n_ch, const unsigned int batch_sz) {
   if (!m_sum.empty() || (width == 0u) || (height == 0u) || (n_ch == 0u) || (batch_sz == 0u)) {
     std::stringstream err;
@@ -98,7 +98,7 @@ void cv_mean_extractor::create_matrices(const unsigned int width, const unsigned
 }
 
 void cv_mean_extractor::reset() {
-  // convert to a single change image before resetting the values as the 
+  // convert to a single change image before resetting the values as the
   // dimension of Scalar is limited to 4 (4 channels)
   cv::Mat m_sum_1ch = m_sum.reshape(1);
   m_sum_1ch.setTo(static_cast<Float_T>(0));
@@ -121,7 +121,7 @@ bool cv_mean_extractor::determine_transform(const cv::Mat& image) {
   if (m_sum.empty()) {
     create_matrices(image.cols, image.rows, image.channels());
     reset();
-   
+
     m_enabled = true;
   } else {
     m_enabled = check_if_cv_Mat_has_same_shape(image, m_avg);
diff --git a/src/data_readers/cv_normalizer.cpp b/src/data_readers/cv_normalizer.cpp
index 4430a23775b..88099544bfc 100644
--- a/src/data_readers/cv_normalizer.cpp
+++ b/src/data_readers/cv_normalizer.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/cv_process.cpp b/src/data_readers/cv_process.cpp
index b47fccb4c0c..2864aeeca61 100644
--- a/src/data_readers/cv_process.cpp
+++ b/src/data_readers/cv_process.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -222,7 +222,7 @@ bool cv_process::preprocess(cv::Mat& image, unsigned int tr_start, unsigned int
 
   const unsigned int num_trs = static_cast<unsigned int>(m_transforms.size());
   const bool lazy_normalization = (tr_end == num_trs) && to_fuse_normalizer_with_copy();
-  const unsigned int n_immediate_transforms 
+  const unsigned int n_immediate_transforms
       = std::min((lazy_normalization?  m_normalizer_idx : num_trs), tr_end);
 
   for (size_t i = tr_start; i < n_immediate_transforms; ++i) {
@@ -248,7 +248,7 @@ bool cv_process::postprocess(cv::Mat& image) {
   bool ok = true;
 
   const bool lazy_normalization = to_fuse_normalizer_with_copy();
-  const unsigned int n_immediate_transforms 
+  const unsigned int n_immediate_transforms
       = (lazy_normalization? m_normalizer_idx : m_transforms.size());
 
   // If normalizer is the last transform in the preprocessing pipeline, it will
diff --git a/src/data_readers/cv_process_patches.cpp b/src/data_readers/cv_process_patches.cpp
index 39bb1051e69..08a227741a9 100644
--- a/src/data_readers/cv_process_patches.cpp
+++ b/src/data_readers/cv_process_patches.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/cv_resizer.cpp b/src/data_readers/cv_resizer.cpp
index 7d3cd1c9b08..e3c12c0844a 100644
--- a/src/data_readers/cv_resizer.cpp
+++ b/src/data_readers/cv_resizer.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -61,7 +61,7 @@ void cv_resizer::set(const unsigned int width, const unsigned int height,
 }
 
 void cv_resizer::reset() {
-  m_enabled = false; 
+  m_enabled = false;
   m_interpolation = m_interpolation_choices[0];
 }
 
@@ -115,4 +115,3 @@ std::ostream& cv_resizer::print(std::ostream& os) const {
 
 } // end of namespace lbann
 #endif // LBANN_HAS_OPENCV
-
diff --git a/src/data_readers/cv_subtractor.cpp b/src/data_readers/cv_subtractor.cpp
index 7eb24826ff2..b52e1c1391c 100644
--- a/src/data_readers/cv_subtractor.cpp
+++ b/src/data_readers/cv_subtractor.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/cv_transform.cpp b/src/data_readers/cv_transform.cpp
index 3c6378cdf6e..1a0774b2813 100644
--- a/src/data_readers/cv_transform.cpp
+++ b/src/data_readers/cv_transform.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/cv_utils.cpp b/src/data_readers/cv_utils.cpp
index 30048b40dcd..9a730775344 100644
--- a/src/data_readers/cv_utils.cpp
+++ b/src/data_readers/cv_utils.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/data_reader.cpp b/src/data_readers/data_reader.cpp
index 0345c0d89dc..bcdef9d9b68 100644
--- a/src/data_readers/data_reader.cpp
+++ b/src/data_readers/data_reader.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -27,7 +27,7 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 #include "lbann/data_readers/data_reader.hpp"
-#include "lbann/data_store/generic_data_store.hpp"
+#include "lbann/data_store/data_store_conduit.hpp"
 #include "lbann/utils/omp_pragma.hpp"
 #include "lbann/models/model.hpp"
 #include <omp.h>
@@ -50,7 +50,7 @@ void generic_data_reader::shuffle_indices(rng_gen& gen) {
   }
 }
 
-  void generic_data_reader::setup(int num_io_threads, std::shared_ptr<thread_pool> io_thread_pool) {
+void generic_data_reader::setup(int num_io_threads, std::shared_ptr<thread_pool> io_thread_pool) {
   m_base_offset = 0;
   m_sample_stride = 1;
   m_stride_to_next_mini_batch = 0;
@@ -108,9 +108,14 @@ int lbann::generic_data_reader::fetch_data(CPUMat& X, El::Matrix<El::Int>& indic
   const int mb_size = std::min(El::Int{((end_pos - m_current_pos) + m_sample_stride - 1) / m_sample_stride},
       X.Width());
 
-  if (!m_save_minibatch_indices) {
-    El::Zeros_seq(X, X.Height(), X.Width());
-    El::Zeros_seq(indices_fetched, mb_size, 1);
+  El::Zeros_seq(X, X.Height(), X.Width());
+  El::Zeros_seq(indices_fetched, mb_size, 1);
+
+  /// Make sure that every rank participates in the data store prior
+  /// to seeing if the local rank's position is valid.  Note that
+  /// every rank will hold data that may be used in the last mini-batch
+  if (data_store_active()) {
+    m_data_store->exchange_mini_batch_data(m_current_pos-m_base_offset-m_model_offset, loaded_batch_size);
   }
 
   if(!position_valid()) {
@@ -123,16 +128,10 @@ int lbann::generic_data_reader::fetch_data(CPUMat& X, El::Matrix<El::Int>& indic
     }
   }
 
-  if (data_store_active()) {
-    m_data_store->exchange_mini_batch_data(m_current_pos-m_base_offset-m_model_offset, loaded_batch_size);
-  }
-
-  if (!m_save_minibatch_indices) {
-    /// Allow each thread to perform any preprocessing necessary on the
-    /// data source prior to fetching data
-    for (int t = 0; t < static_cast<int>(m_io_thread_pool->get_num_threads()); t++) {
-      preprocess_data_source(t);
-    }
+  /// Allow each thread to perform any preprocessing necessary on the
+  /// data source prior to fetching data
+  for (int t = 0; t < static_cast<int>(m_io_thread_pool->get_num_threads()); t++) {
+    preprocess_data_source(t);
   }
 
   static bool fix_jag = true;
@@ -141,36 +140,26 @@ int lbann::generic_data_reader::fetch_data(CPUMat& X, El::Matrix<El::Int>& indic
     set_jag_variables(mb_size);
   }
 
-  if (m_save_minibatch_indices) {
-    m_my_minibatch_indices.resize(m_my_minibatch_indices.size() + 1);
-    for (int s = 0; s < mb_size; s++) {
-      int n = m_current_pos + (s * m_sample_stride);
-      m_my_minibatch_indices.back().push_back(n);
+  for (int t = 0; t < static_cast<int>(m_io_thread_pool->get_num_threads()); t++) {
+    // Queue up work into other threads and then finish off the
+    // mini-batch in the active thread
+    if(t == m_io_thread_pool->get_local_thread_id()) {
+      continue;
+    }else {
+      m_io_thread_pool->submit_job_to_work_group(
+        std::bind(&generic_data_reader::fetch_data_block, this, std::ref(X), t,
+                  mb_size, std::ref(indices_fetched)));
     }
   }
+  fetch_data_block(X, m_io_thread_pool->get_local_thread_id(), mb_size, indices_fetched);
 
-  else {
-    for (int t = 0; t < static_cast<int>(m_io_thread_pool->get_num_threads()); t++) {
-      // Queue up work into other threads and then finish off the
-      // mini-batch in the active thread
-      if(t == m_io_thread_pool->get_local_thread_id()) {
-        continue;
-      }else {
-        m_io_thread_pool->submit_job_to_work_group(
-          std::bind(&generic_data_reader::fetch_data_block, this, std::ref(X), t,
-                    mb_size, std::ref(indices_fetched)));
-      }
-    }
-    fetch_data_block(X, m_io_thread_pool->get_local_thread_id(), mb_size, indices_fetched);
+  // Wait for all of the threads to finish
+  m_io_thread_pool->finish_work_group();
 
-    // Wait for all of the threads to finish
-    m_io_thread_pool->finish_work_group();
-
-    /// Allow each thread to perform any postprocessing necessary on the
-    /// data source prior to fetching data
-    for (int t = 0; t < static_cast<int>(m_io_thread_pool->get_num_threads()); t++) {
-      postprocess_data_source(t);
-    }
+  /// Allow each thread to perform any postprocessing necessary on the
+  /// data source prior to fetching data
+  for (int t = 0; t < static_cast<int>(m_io_thread_pool->get_num_threads()); t++) {
+    postprocess_data_source(t);
   }
 
   return mb_size;
@@ -222,22 +211,17 @@ int lbann::generic_data_reader::fetch_labels(CPUMat& Y) {
     }
   }
 
-//  if (m_data_store != nullptr) {
-    //@todo: get it to work, then add omp support
-    //m_data_store->fetch_labels(...);
- // }
-//  else {
-    std::string error_message;
-    for (int s = 0; s < mb_size; s++) {
-      int n = m_current_pos + (s * m_sample_stride);
-      int index = m_shuffled_indices[n];
-      bool valid = fetch_label(Y, index, s);
-      if (!valid) {
-        error_message = "invalid label (index " + std::to_string(index) + ")";
-      }
+  std::string error_message;
+  for (int s = 0; s < mb_size; s++) {
+    int n = m_current_pos + (s * m_sample_stride);
+    int index = m_shuffled_indices[n];
+    bool valid = fetch_label(Y, index, s);
+    if (!valid) {
+      error_message = "invalid label (index " + std::to_string(index) + ")";
     }
-    if (!error_message.empty()) { LBANN_ERROR(error_message); }
-  //}
+  }
+  if (!error_message.empty()) { LBANN_ERROR(error_message); }
+
   return mb_size;
 }
 
@@ -306,15 +290,12 @@ bool generic_data_reader::update(bool is_active_reader) {
         + std::to_string(m_stride_to_last_mini_batch));
     }
 
-    if (!m_save_minibatch_indices) {
-      shuffle_indices();
-      if (priming_data_store()) {
-        m_data_store->set_shuffled_indices(&m_shuffled_indices);
-      }
+    shuffle_indices();
+    if (priming_data_store()) {
+      m_data_store->set_shuffled_indices(&m_shuffled_indices);
     }
 
     set_initial_position();
-
   }
 
   post_update();
@@ -575,6 +556,11 @@ void generic_data_reader::select_subset_of_data() {
 
 void generic_data_reader::use_unused_index_set() {
   m_shuffled_indices.swap(m_unused_indices);
+  if(m_data_store != nullptr) {
+    /// Update the data store's pointer to the shuffled indices
+    m_data_store->set_shuffled_indices(&m_shuffled_indices);
+    m_data_store->purge_unused_samples(m_unused_indices);
+  }
   m_unused_indices.clear();
   std::vector<int>().swap(m_unused_indices); // Trick to force memory reallocation
 }
@@ -722,42 +708,90 @@ double generic_data_reader::get_use_percent() const {
   return m_use_percent;
 }
 
-void generic_data_reader::setup_data_store(model *m) {
-  m_data_store = nullptr;
+void generic_data_reader::instantiate_data_store(const std::vector<int>& local_list_sizes) {
+  options *opts = options::get();
+  if (! (opts->get_bool("use_data_store") || opts->get_bool("preload_data_store"))) {
+    if (m_data_store != nullptr) {
+      delete m_data_store;
+      m_data_store = nullptr;
+    }
+    return;
+  }
+
+  if (is_master()) {
+    std::cout << "\nUSING DATA_STORE\n\n";
+  }
+  m_data_store = new data_store_conduit(this);  // *data_store_conduit
+  if (m_shuffled_indices.size() == 0) {
+    LBANN_ERROR("shuffled_indices.size() == 0");
+  }
+
+  //a call to m_data_store->check_mem_capacity(...) should go here, but
+  //at the moment that depends on the sample_list class, which it shouldn't
+  //TODO: revisit
+
+  m_data_store->set_shuffled_indices(&m_shuffled_indices);
+
+  // optionally preload the data store
+  if (opts->get_bool("preload_data_store")) {
+    if(is_master()) {
+      std::cout << "Starting the preload" << std::endl;
+    }
+    if (local_list_sizes.size() != 0) {
+      m_data_store->build_preloaded_owner_map(local_list_sizes);
+    }
+    preload_data_store();
+    if(is_master()) {
+      std::cout << "preload complete" << std::endl;
+    }
+  }
+
+  if(is_master()) {
+    std::cout << "Setting up the data store is complete" << std::endl;
+  }
+}
+
+void generic_data_reader::setup_data_store(int mini_batch_size) {
+  if (m_data_store == nullptr) {
+    LBANN_ERROR("m_data_store == nullptr; you shouldn't be here");
+  }
+  m_data_store->setup(mini_batch_size);
 }
 
 bool generic_data_reader::data_store_active() const {
+  if (m_data_store != nullptr && m_data_store->is_preloaded()) {
+    return true;
+  }
+  /// Use the data store for all modes except testing
+  /// i.e. training, validation, tournament
   return (m_data_store != nullptr
-          && (m_model->get_execution_mode() == execution_mode::training)
-          && m_model->get_cur_epoch() > 0);
+          && (((m_model->get_execution_mode() == execution_mode::training)
+               && m_model->get_epoch() > 0)
+              || ((m_model->get_execution_mode() == execution_mode::validation)
+                  && m_model->get_epoch() > 1)));
 }
 
 bool generic_data_reader::priming_data_store() const {
-  return (m_data_store != nullptr
-          && (m_model->get_execution_mode() == execution_mode::training)
-          && m_model->get_cur_epoch() == 0);
-}
-
-void generic_data_reader::set_save_minibatch_entries(bool b) {
-  m_save_minibatch_indices = b;
-  if (b) {
-    m_my_minibatch_indices.reserve(get_num_iterations_per_epoch());
+  if (m_data_store != nullptr && m_data_store->is_preloaded()) {
+    return false;
   }
+  /// Use the data store for all modes except testing
+  /// i.e. training, validation, tournament
+  return (m_data_store != nullptr
+          && (((m_model->get_execution_mode() == execution_mode::training)
+               && m_model->get_epoch() == 0)
+              || ((m_model->get_execution_mode() == execution_mode::validation)
+                  && m_model->get_epoch() == 1)
+              || m_data_store->is_explicitly_loading()));
 }
 
-void generic_data_reader::set_data_store(generic_data_store *g) {
+void generic_data_reader::set_data_store(data_store_conduit *g) {
     if (m_data_store != nullptr) {
       delete m_data_store;
     }
     m_data_store = g;
 }
 
-void generic_data_reader::init_minibatch() {
-  if (m_data_store != nullptr) {
-    m_data_store->init_minibatch();
-  }
-}
-
 void generic_data_reader::set_partitioned(bool partitioned_yes, double overlap, int mode) {
   if (m_comm->get_num_trainers() == 1 || m_comm->get_procs_in_world() == 1) {
     m_is_partitioned  = false;
@@ -772,4 +806,8 @@ void generic_data_reader::set_partitioned(bool partitioned_yes, double overlap,
   m_my_partition = m_comm->get_trainer_rank();
 }
 
+void generic_data_reader::set_mini_batch_size(const int s) {
+  m_mini_batch_size = s;
+}
+
 }  // namespace lbann
diff --git a/src/data_readers/data_reader_ascii.cpp b/src/data_readers/data_reader_ascii.cpp
index b5d80416e2c..854c6348861 100644
--- a/src/data_readers/data_reader_ascii.cpp
+++ b/src/data_readers/data_reader_ascii.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/data_reader_cifar10.cpp b/src/data_readers/data_reader_cifar10.cpp
index fb4cb32a6e6..5b69c79d7c3 100644
--- a/src/data_readers/data_reader_cifar10.cpp
+++ b/src/data_readers/data_reader_cifar10.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/data_reader_csv.cpp b/src/data_readers/data_reader_csv.cpp
index f970df155c2..3b1b80b37fd 100644
--- a/src/data_readers/data_reader_csv.cpp
+++ b/src/data_readers/data_reader_csv.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -28,7 +28,6 @@
 
 #include <unordered_set>
 #include "lbann/data_readers/data_reader_csv.hpp"
-#include "lbann/data_store/data_store_csv.hpp"
 #include "lbann/utils/options.hpp"
 #include <omp.h>
 
@@ -101,7 +100,7 @@ void csv_reader::load() {
   bool master = m_comm->am_world_master();
   setup_ifstreams();
   std::ifstream& ifs = *m_ifstreams[0];
-  const El::mpi::Comm world_comm = m_comm->get_world_comm();
+  const El::mpi::Comm& world_comm = m_comm->get_world_comm();
   // Parse the header to determine how many columns there are.
   // Skip rows if needed.
   if (master) {
@@ -270,18 +269,10 @@ void csv_reader::load() {
 }
 
 bool csv_reader::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
-  if (m_data_store != nullptr) {
-    std::vector<DataType> *buf;
-    m_data_store->get_data_buf_DataType(data_id, buf);
-    for (size_t i = 0; i < buf->size(); ++i) {
-      X(i, mb_idx) = (*buf)[i];
-    }
-  } else {
-    auto line = fetch_line_label_response(data_id);
-    // TODO: Avoid unneeded copies.
-    for (size_t i = 0; i < line.size(); ++i) {
-      X(i, mb_idx) = line[i];
-    }
+  auto line = fetch_line_label_response(data_id);
+  // TODO: Avoid unneeded copies.
+  for (size_t i = 0; i < line.size(); ++i) {
+    X(i, mb_idx) = line[i];
   }
   return true;
 }
@@ -405,14 +396,4 @@ void csv_reader::setup_ifstreams() {
   }
 }
 
-void csv_reader::setup_data_store(model *m) {
-  if (m_data_store != nullptr) {
-    delete m_data_store;
-  }
-  m_data_store = new data_store_csv(this, m);
-  if (m_data_store != nullptr) {
-    m_data_store->setup();
-  }
-}
-
 }  // namespace lbann
diff --git a/src/data_readers/data_reader_image.cpp b/src/data_readers/data_reader_image.cpp
index c0e40476091..596d08d344f 100644
--- a/src/data_readers/data_reader_image.cpp
+++ b/src/data_readers/data_reader_image.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/data_reader_imagenet.cpp b/src/data_readers/data_reader_imagenet.cpp
index d1822836261..fc46023737e 100644
--- a/src/data_readers/data_reader_imagenet.cpp
+++ b/src/data_readers/data_reader_imagenet.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -28,7 +28,6 @@
 
 #include "lbann/data_readers/data_reader_imagenet.hpp"
 #include "lbann/data_readers/image_utils.hpp"
-#include "lbann/data_store/data_store_imagenet.hpp"
 #include <omp.h>
 
 namespace lbann {
@@ -130,17 +129,10 @@ bool imagenet_reader::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
 
   int width=0, height=0, img_type=0;
 
-  std::vector<unsigned char> *image_buf;
-
   CPUMat X_v = create_datum_view(X, mb_idx);
 
   bool ret;
-  if (m_data_store != nullptr) {
-    m_data_store->get_data_buf(data_id, image_buf, 0);
-    ret = lbann::image_utils::load_image(*image_buf, width, height, img_type, *(m_pps[tid]), X_v);
-  } else {
-    ret = lbann::image_utils::load_image(imagepath, width, height, img_type, *(m_pps[tid]), X_v, m_thread_buffer[tid], &m_thread_cv_buffer[tid]);
-  }
+  ret = lbann::image_utils::load_image(imagepath, width, height, img_type, *(m_pps[tid]), X_v, m_thread_buffer[tid], &m_thread_cv_buffer[tid]);
 
   if(!ret) {
     throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " "
@@ -156,14 +148,4 @@ bool imagenet_reader::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
   return true;
 }
 
-void imagenet_reader::setup_data_store(model *m) {
-  if (m_data_store != nullptr) {
-    delete m_data_store;
-  }
-  m_data_store = new data_store_imagenet(this, m);
-  if (m_data_store != nullptr) {
-    m_data_store->setup();
-  }
-}
-
 }  // namespace lbann
diff --git a/src/data_readers/data_reader_imagenet_patches.cpp b/src/data_readers/data_reader_imagenet_patches.cpp
index fb66a6345f5..f577474ba04 100644
--- a/src/data_readers/data_reader_imagenet_patches.cpp
+++ b/src/data_readers/data_reader_imagenet_patches.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -28,7 +28,6 @@
 
 #include "lbann/data_readers/data_reader_imagenet_patches.hpp"
 #include "lbann/data_readers/image_utils.hpp"
-#include "lbann/data_store/data_store_imagenet_patches.hpp"
 
 #include <omp.h>
 
@@ -152,13 +151,7 @@ bool imagenet_reader_patches::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
   int width=0, height=0, img_type=0;
   std::vector<CPUMat> X_v = create_datum_views(X, mb_idx);
   bool ret;
-  if (m_data_store != nullptr) {
-    std::vector<unsigned char> *image_buf;
-    m_data_store->get_data_buf(data_id, image_buf, 0);
-    ret = lbann::image_utils::load_image(*image_buf, width, height, img_type, *(m_pps[tid]), X_v);
-  } else {
-    ret = lbann::image_utils::load_image(imagepath, width, height, img_type, *(m_pps[tid]), X_v, m_thread_buffer[tid], &m_thread_cv_buffer[tid]);
-  }
+  ret = lbann::image_utils::load_image(imagepath, width, height, img_type, *(m_pps[tid]), X_v, m_thread_buffer[tid], &m_thread_cv_buffer[tid]);
     //ret = lbann::image_utils::load_image(imagepath, width, height, img_type, *(m_pps[tid]), X_v);
 
   if (m_pps[tid]->is_self_labeling()) {
@@ -179,14 +172,4 @@ bool imagenet_reader_patches::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
   return true;
 }
 
-void imagenet_reader_patches::setup_data_store(model *m) {
-  if (m_data_store != nullptr) {
-    delete m_data_store;
-  }
-  m_data_store = new data_store_imagenet_patches(this, m);
-  if (m_data_store != nullptr) {
-    m_data_store->setup();
-  }
-}
-
 }  // namespace lbann
diff --git a/src/data_readers/data_reader_jag.cpp b/src/data_readers/data_reader_jag.cpp
index c6a0708b305..1003d4eb90e 100644
--- a/src/data_readers/data_reader_jag.cpp
+++ b/src/data_readers/data_reader_jag.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/data_reader_jag_conduit.cpp b/src/data_readers/data_reader_jag_conduit.cpp
index a3c997ddb18..b9214a680aa 100644
--- a/src/data_readers/data_reader_jag_conduit.cpp
+++ b/src/data_readers/data_reader_jag_conduit.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -25,14 +25,11 @@
 //
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef _JAG_OFFLINE_TOOL_MODE_
 #include "lbann/data_readers/data_reader_jag_conduit.hpp"
 #include "lbann/io/data_buffers/partitioned_io_buffer.hpp"
-#include "lbann/data_store/data_store_jag.hpp"
+#include "lbann/data_store/data_store_conduit.hpp"
 #include "lbann/models/model.hpp"
-#else
-#include "data_reader_jag_conduit.hpp"
-#endif // _JAG_OFFLINE_TOOL_MODE_
+#include "lbann/utils/lbann_library.hpp"
 
 #ifdef LBANN_HAS_CONDUIT
 #include "lbann/utils/file_utils.hpp" // for add_delimiter() in load()
@@ -52,6 +49,7 @@
 #include "conduit/conduit_relay.hpp"
 #include "conduit/conduit_relay_io_hdf5.hpp"
 
+
 #include <cereal/archives/binary.hpp>
 #include <sstream>
 
@@ -102,7 +100,6 @@ const std::set<std::string> data_reader_jag_conduit::non_numeric_vars = {
   "solver_mode"
 };
 
-#ifndef _JAG_OFFLINE_TOOL_MODE_
 void data_reader_jag_conduit::set_io_buffer_type(const std::string io_buffer) {
   m_io_buffer_type = io_buffer;
 }
@@ -129,6 +126,7 @@ void data_reader_jag_conduit::shuffle_indices(rng_gen& gen) {
     return;
   }
   generic_data_reader::shuffle_indices(gen);
+  m_sample_list.compute_epochs_file_usage(get_shuffled_indices(), get_mini_batch_size(), *m_comm);
 }
 
 int data_reader_jag_conduit::compute_max_num_parallel_readers() {
@@ -147,7 +145,6 @@ int data_reader_jag_conduit::compute_max_num_parallel_readers() {
 bool data_reader_jag_conduit::check_num_parallel_readers(long data_set_size) {
   return true;
 }
-#endif // _JAG_OFFLINE_TOOL_MODE_
 
 data_reader_jag_conduit::data_reader_jag_conduit(const std::shared_ptr<cv_process>& pp, bool shuffle)
   : generic_data_reader(shuffle) {
@@ -160,7 +157,7 @@ data_reader_jag_conduit::data_reader_jag_conduit(const std::shared_ptr<cv_proces
   m_master_pps = lbann::make_unique<cv_process>(*pp);
 }
 
-void data_reader_jag_conduit::copy_members(const data_reader_jag_conduit& rhs) {
+void data_reader_jag_conduit::copy_members(const data_reader_jag_conduit& rhs, const std::vector<int>& ds_sample_move_list) {
   m_independent = rhs.m_independent;
   m_independent_groups = rhs.m_independent_groups;
   m_dependent = rhs.m_dependent;
@@ -208,9 +205,18 @@ void data_reader_jag_conduit::copy_members(const data_reader_jag_conduit& rhs) {
   m_scalar_normalization_params = rhs.m_scalar_normalization_params;
   m_input_normalization_params = rhs.m_input_normalization_params;
 
-  m_sample_list = rhs.m_sample_list;
+  m_sample_list.copy(rhs.m_sample_list);
   m_list_per_trainer = rhs.m_list_per_trainer;
   m_list_per_model = rhs.m_list_per_model;
+
+  if(rhs.m_data_store != nullptr) {
+    if(ds_sample_move_list.size() == 0) {
+      m_data_store = new data_store_conduit(rhs.get_data_store());
+    } else {
+      m_data_store = new data_store_conduit(rhs.get_data_store(), ds_sample_move_list);
+    }
+    m_data_store->set_data_reader_ptr(this);
+  }
 }
 
 data_reader_jag_conduit::data_reader_jag_conduit(const data_reader_jag_conduit& rhs)
@@ -218,6 +224,11 @@ data_reader_jag_conduit::data_reader_jag_conduit(const data_reader_jag_conduit&
   copy_members(rhs);
 }
 
+data_reader_jag_conduit::data_reader_jag_conduit(const data_reader_jag_conduit& rhs, const std::vector<int>& ds_sample_move_list)
+  : generic_data_reader(rhs) {
+  copy_members(rhs, ds_sample_move_list);
+}
+
 data_reader_jag_conduit& data_reader_jag_conduit::operator=(const data_reader_jag_conduit& rhs) {
   // check for self-assignment
   if (this == &rhs) {
@@ -232,10 +243,13 @@ data_reader_jag_conduit& data_reader_jag_conduit::operator=(const data_reader_ja
 }
 
 data_reader_jag_conduit::~data_reader_jag_conduit() {
+  // if (m_data_store != nullptr) {
+  //   delete m_data_store;
+  // }
 }
 
 void data_reader_jag_conduit::set_defaults() {
-  m_jag_store = nullptr;
+  m_data_store = nullptr;
   m_independent.clear();
   m_independent_groups.clear();
   m_dependent.clear();
@@ -313,19 +327,68 @@ const conduit::Node& data_reader_jag_conduit::get_conduit_node(const conduit::No
 }
 
 bool data_reader_jag_conduit::load_conduit_node(const size_t i, const std::string& key, conduit::Node& node) const {
+
+  if (m_io_thread_pool != nullptr && m_using_random_node.count(m_io_thread_pool->get_local_thread_id())) {
+    LBANN_ERROR("previously retrieved a random conduit node from data_store, so shouldn't be here");
+  }
+
   const sample_t& s = m_sample_list[i];
   const std::string& sample_name = s.second;
   const std::string path = sample_name + key;
 
-  sample_id_t id = s.first;
+  sample_file_id_t id = s.first;
   hid_t h = m_sample_list.get_samples_hdf5_handle(id);
-  const std::string& file_name = m_sample_list.get_samples_filename(id);
   if (h <= static_cast<hid_t>(0) || !conduit::relay::io::hdf5_has_path(h, path)) {
-    LBANN_ERROR(get_type() + ":: Cannot open file " + file_name + \
-                " for sample "+ sample_name);
-    return false;
+    if (m_data_store != nullptr) {
+      const std::string& file_name = m_sample_list.get_samples_filename(id);
+      if (! m_data_store->is_preloaded()) {
+        const conduit::Node obj = m_data_store->get_random_node();
+        node = obj["data"];
+        const std::vector<std::string>& child_names = node.child_names();
+        const std::string cur_child = child_names[0];
+        const std::string new_child = LBANN_DATA_ID_STR(i);
+        node.rename_child(cur_child, new_child);
+        m_using_random_node.emplace(m_io_thread_pool->get_local_thread_id());
+        std::cout << get_type() + ":: replacing with random node, since failed to open file "
+                  << file_name << " for sample " << sample_name
+                  <<" and key: " << key << "\n";
+        return false;
+      } else {
+        if (h <= static_cast<hid_t>(0) ) {
+          LBANN_ERROR("failed to get file handle for file " + file_name);
+        } else if (!conduit::relay::io::hdf5_has_path(h, path)) {
+          LBANN_ERROR("got file handle for file " + file_name + \
+                      " but the path doesn't exist in the file: " + path);
+        } else {
+          LBANN_ERROR("it should not be possible to be here");
+        }
+      }
+    }
+
+    // this block fires if we cannot load a conduit node, either from file
+    // or from the data_store
+    else {
+      const std::string& file_name = m_sample_list.get_samples_filename(id);
+      if (h <= static_cast<hid_t>(0)) {
+        LBANN_ERROR(get_type() + ":: Cannot open file " + file_name + \
+                    " in dir: " + m_sample_list.get_samples_dirname() +
+                    " for sample "+ sample_name + " ran_in_trainer: " \
+                    + std::to_string(m_comm->get_rank_in_trainer()) \
+                    + " because we could not get a file handle");
+        return false;
+      } else {
+        LBANN_ERROR(get_type() + ":: Cannot open file " + file_name + \
+                    " in dir: " + m_sample_list.get_samples_dirname() +
+                    " for sample "+ sample_name + " ran_in_trainer: " \
+                    + std::to_string(m_comm->get_rank_in_trainer()) \
+                    + " because we could not get a sample from the data_store");
+          return false;
+      }
+    }
   }
 
+  /// @todo explore the possibility of putting the sample name in
+  /// node's hierarchy, e.g. node[sample_name]
   conduit::relay::io::hdf5_read(h, path, node);
 
   return true;
@@ -333,13 +396,12 @@ bool data_reader_jag_conduit::load_conduit_node(const size_t i, const std::strin
 
 bool data_reader_jag_conduit::has_conduit_path(const size_t i, const std::string& key) const {
   const sample_t& s = m_sample_list[i];
-  sample_id_t id = s.first;
-  const std::string& file_name = m_sample_list.get_samples_filename(id);
+  sample_file_id_t id = s.first;
   const std::string& sample_name = s.second;
   const hid_t h = m_sample_list.get_samples_hdf5_handle(id);
-
   const std::string path = sample_name + key;
   if (h <= static_cast<hid_t>(0) || !conduit::relay::io::hdf5_has_path(h, path)) {
+    const std::string& file_name = m_sample_list.get_samples_filename(id);
     _THROW_LBANN_EXCEPTION_(get_type(), "Cannot open file " + file_name + \
                                         " for sample "+ sample_name);
     return false;
@@ -537,7 +599,7 @@ void data_reader_jag_conduit::check_image_data() {
     return;
   }
 
-  size_t first_idx = m_sample_list.get_indexer().get_partition_offset();
+  size_t first_idx = (m_sample_list[0]).first;
   if (!has_conduit_path(first_idx, "")) {
     _THROW_LBANN_EXCEPTION_(_CN_, "check_image_data() : no sample by " + m_sample_list[first_idx].second);
     return;
@@ -545,11 +607,9 @@ void data_reader_jag_conduit::check_image_data() {
   conduit::Node n_imageset;
   load_conduit_node(first_idx, m_output_image_prefix, n_imageset);
   if (static_cast<size_t>(n_imageset.number_of_children()) == 0u) {
-    _THROW_LBANN_EXCEPTION_(_CN_, "check_image_data() : no image in data");
     return;
   }
   if (m_emi_image_keys.size() == 0u) {
-    _THROW_LBANN_EXCEPTION_(_CN_, "check_image_data() : no image is selected");
     return;
   }
   for (const auto& emi_tag: m_emi_image_keys) {
@@ -618,7 +678,7 @@ void data_reader_jag_conduit::check_scalar_keys() {
   std::set<std::string> keys_conduit;
 
   conduit::Node n_scalar;
-  size_t first_idx = m_sample_list.get_indexer().get_partition_offset();
+  size_t first_idx = (m_sample_list[0]).first;
   load_conduit_node(first_idx, m_output_scalar_prefix, n_scalar);
   const std::vector<std::string>& child_names = n_scalar.child_names();
   for (const auto& key: child_names) {
@@ -684,7 +744,7 @@ void data_reader_jag_conduit::check_input_keys() {
   std::map<std::string, TypeID> keys_conduit;
 
   conduit::Node n_input;
-  size_t first_idx = m_sample_list.get_indexer().get_partition_offset();
+  size_t first_idx = (m_sample_list[0]).first;
   load_conduit_node(first_idx, "/inputs", n_input);
   conduit::NodeConstIterator itr = n_input.children();
 
@@ -733,7 +793,6 @@ void data_reader_jag_conduit::check_input_keys() {
 }
 
 
-#ifndef _JAG_OFFLINE_TOOL_MODE_
 void data_reader_jag_conduit::load() {
   if(m_gan_labelling) {
     m_num_labels=2;
@@ -752,23 +811,29 @@ void data_reader_jag_conduit::load() {
 
   m_shuffled_indices.clear();
 
+  if(is_master()) {
+    std::cout << "starting load" << std::endl;
+  }
   const std::string data_dir = add_delimiter(get_file_dir());
   const std::string sample_list_file = data_dir + get_data_index_list();
 
+  options *opts = options::get();
+
   /// The use of these flags need to be updated to properly separate
   /// how index lists are used between trainers and models
   /// @todo m_list_per_trainer || m_list_per_model
   load_list_of_samples(sample_list_file, m_comm->get_procs_per_trainer(), m_comm->get_rank_in_trainer());
-  m_sample_list.all_gather_packed_lists(*m_comm);
-  std::stringstream s;
-  std::string basename = get_basename_without_ext(sample_list_file);
-  std::string ext = get_ext_name(sample_list_file);
-  s << "r" << m_comm->get_rank_in_trainer() << "_per_rank_" << basename << "." << ext;
-  m_sample_list.write(s.str());
+  if(is_master()) {
+    std::cout << "Finished sample list, check data" << std::endl;
+  }
 
-  if (!m_is_data_loaded) {
+  /// Check the data that each rank loaded
+  if (!m_is_data_loaded && !m_sample_list.empty()) {
     m_is_data_loaded = true;
 
+    /// Open the first sample to make sure that all of the fields are correct
+    m_sample_list.open_samples_hdf5_handle(0, true);
+
     if (m_scalar_keys.size() == 0u) {
       set_all_scalar_choices(); // use all by default if none is specified
     }
@@ -780,14 +845,97 @@ void data_reader_jag_conduit::load() {
     check_input_keys();
 
     check_image_data();
+
+    m_sample_list.close_if_done_samples_hdf5_handle(0);
   }
-  m_shuffled_indices.resize(m_sample_list.size());
+  if(is_master()) {
+    std::cout << "Done with data checking" << std::endl;
+  }
+
+
+  // need to resize and init shuffled indices here, since it's needed in
+  // preload_data_store, which must be called before merging the sample lists
+  int sz = m_sample_list.size();
+  std::vector<int> local_list_sizes(m_comm->get_procs_per_trainer());
+  m_comm->trainer_all_gather(sz, local_list_sizes);
 
+  if(is_master()) {
+    std::cout << "We now have the proper size" << std::endl;
+  }
+
+  /// Merge all of the sample lists
+  m_sample_list.all_gather_packed_lists(*m_comm);
+  if (opts->has_string("write_sample_list") && m_comm->am_trainer_master()) {
+    const std::string msg = " writing sample list " + sample_list_file;
+    log_msg(msg.c_str());
+    std::stringstream s;
+    std::string basename = get_basename_without_ext(sample_list_file);
+    std::string ext = get_ext_name(sample_list_file);
+    s << basename << "." << ext;
+    m_sample_list.write(s.str());
+  }
+  m_shuffled_indices.resize(m_sample_list.size());
   std::iota(m_shuffled_indices.begin(), m_shuffled_indices.end(), 0);
 
+  if(is_master()) {
+    std::cout << "Lists have been gathered" << std::endl;
+  }
+
+  instantiate_data_store(local_list_sizes);
+
   select_subset_of_data();
 }
 
+
+void data_reader_jag_conduit::preload_data_store() {
+  m_data_store->set_preload();
+  conduit::Node work;
+  const std::string key; // key = "" is intentional
+
+  /// @todo BVE FIXME this
+  m_rank_in_model = get_comm()->get_rank_in_trainer();
+
+  options *opts = options::get();
+  double tm1 = get_time();
+  if (get_comm()->am_world_master() ||
+      (opts->get_bool("ltfb_verbose") && get_comm()->am_trainer_master())) {
+    std::stringstream msg;
+    msg << " for role: " << get_role() << " starting preload";
+    log_msg(msg.str().c_str());
+  }
+
+  for (size_t idx=0; idx < m_shuffled_indices.size(); idx++) {
+    if(m_data_store->get_index_owner(idx) != m_rank_in_model) {
+      continue;
+    }
+    try {
+      work.reset();
+      m_sample_list.open_samples_hdf5_handle(idx, true);
+      load_conduit_node(idx, key, work);
+      conduit::Node & node = m_data_store->get_empty_node(idx);
+      const std::string padded_idx = '/' + LBANN_DATA_ID_STR(idx);
+      node[padded_idx] = work;
+
+      m_data_store->set_preloaded_conduit_node(idx, node);
+    }catch (conduit::Error const& e) {
+      LBANN_ERROR(" :: trying to load the node " + std::to_string(idx) + " with key " + key + " and got " + e.what());
+    }
+  }
+  /// Once all of the data has been preloaded, close all of the file handles
+  for (size_t idx=0; idx < m_shuffled_indices.size(); idx++) {
+    if(m_data_store->get_index_owner(idx) != m_rank_in_model) {
+      continue;
+    }
+    m_sample_list.close_if_done_samples_hdf5_handle(idx);
+  }
+  if (get_comm()->am_world_master() ||
+      (opts->get_bool("ltfb_verbose") && get_comm()->am_trainer_master())) {
+    std::stringstream msg;
+    msg << " loading data for role: " << get_role() << " took " << get_time() - tm1 << "s";
+    log_msg(msg.str().c_str());
+  }
+}
+
 void data_reader_jag_conduit::load_list_of_samples(const std::string sample_list_file, size_t stride, size_t offset) {
   // load the sample list
   double tm1 = get_time();
@@ -813,7 +961,6 @@ void data_reader_jag_conduit::load_list_of_samples_from_archive(const std::strin
     std::cout << "Time to load sample list from archive: " << tm2 - tm1 << std::endl;
   }
 }
-#endif // _JAG_OFFLINE_TOOL_MODE_
 
 unsigned int data_reader_jag_conduit::get_num_img_srcs() const {
   return m_num_img_srcs;
@@ -1067,7 +1214,7 @@ bool data_reader_jag_conduit::check_non_numeric(const std::string key) {
     std::string err = "data_reader_jag_conduit::add_val() : non-numeric '" + key
                     + "' requires a conversion method.";
    #if 1
-    std::cerr << err << " Skipping for now." << std::endl;
+    std::cout << err << " Skipping for now." << std::endl;
    #else
     throw lbann_exception(err);
    #endif
@@ -1084,15 +1231,19 @@ data_reader_jag_conduit::get_image_data(const size_t sample_id, conduit::Node& s
 
   for (const auto& emi_tag : m_emi_image_keys) {
     const std::string conduit_field = m_output_image_prefix + emi_tag;
-    const std::string conduit_obj = '/' + std::to_string(sample_id) + '/' + conduit_field;
+    const std::string conduit_obj = '/' + LBANN_DATA_ID_STR(sample_id) + '/' + conduit_field;
     if(sample[conduit_obj].schema().dtype().is_empty()) {
       if (data_store_active()) {
         LBANN_ERROR("Unable to find field " + conduit_obj
                     + " in conduit node: " + std::to_string(sample_id));
       }
       conduit::Node n_image;
-      load_conduit_node(sample_id, conduit_field, n_image);
-      sample[conduit_obj].set(n_image);
+      bool from_file = load_conduit_node(sample_id, conduit_field, n_image);
+      if (from_file) {
+        sample[conduit_obj].set(n_image);
+      } else {
+        sample = n_image;
+      }
     }
     conduit_ch_t emi = sample[conduit_obj].value();
     const size_t num_vals = emi.number_of_elements();
@@ -1207,15 +1358,19 @@ std::vector<data_reader_jag_conduit::scalar_t> data_reader_jag_conduit::get_scal
 
   for(const auto key: m_scalar_keys) {
     std::string conduit_field = m_output_scalar_prefix + key;
-    std::string conduit_obj = '/' + std::to_string(sample_id) + '/' + conduit_field;
+    std::string conduit_obj = '/' + LBANN_DATA_ID_STR(sample_id) + '/' + conduit_field;
     if(sample[conduit_obj].schema().dtype().is_empty()) {
       if (data_store_active()) {
         LBANN_ERROR("Unable to find field " + conduit_obj
                     + " in conduit node: " + std::to_string(sample_id));
       }
       conduit::Node n_scalar;
-      load_conduit_node(sample_id, conduit_field, n_scalar);
-      sample[conduit_obj].set(n_scalar);
+      bool from_file = load_conduit_node(sample_id, conduit_field, n_scalar);
+      if (from_file) {
+        sample[conduit_obj].set(n_scalar);
+      } else {
+        sample = n_scalar;
+      }
     }
     const scalar_t val_raw = static_cast<scalar_t>(sample[conduit_obj].to_value());
     const scalar_t val = static_cast<scalar_t>(val_raw * tr->first + tr->second);
@@ -1238,15 +1393,19 @@ std::vector<data_reader_jag_conduit::input_t> data_reader_jag_conduit::get_input
     // avoid some overhead by taking advantage of the fact that all the variables are of the same type
     for(const auto key: m_input_keys) {
       const std::string conduit_field = m_input_prefix + key;
-      const std::string conduit_obj = '/' + std::to_string(sample_id) + '/' + conduit_field;
+      const std::string conduit_obj = '/' + LBANN_DATA_ID_STR(sample_id) + '/' + conduit_field;
       if(sample[conduit_obj].schema().dtype().is_empty()) {
         if (data_store_active()) {
           LBANN_ERROR("Unable to find field " + conduit_obj
                       + " in conduit node: " + std::to_string(sample_id));
         }
         conduit::Node n_input;
-        load_conduit_node(sample_id, conduit_field, n_input);
-        sample[conduit_obj].set(n_input);
+        bool from_file = load_conduit_node(sample_id, conduit_field, n_input);
+        if (from_file) {
+          sample[conduit_obj].set(n_input);
+        } else {
+          sample = n_input;
+        }
       }
       const input_t val_raw = static_cast<input_t>(sample[conduit_obj].value());
       const input_t val = static_cast<input_t>(val_raw * tr->first + tr->second);
@@ -1256,15 +1415,19 @@ std::vector<data_reader_jag_conduit::input_t> data_reader_jag_conduit::get_input
   } else {
     for(const auto key: m_input_keys) {
       const std::string conduit_field = m_input_prefix + key;
-      const std::string conduit_obj = '/' + std::to_string(sample_id) + '/' + conduit_field;
+      const std::string conduit_obj = '/' + LBANN_DATA_ID_STR(sample_id) + '/' + conduit_field;
       if(sample[conduit_obj].schema().dtype().is_empty()) {
         if (data_store_active()) {
           LBANN_ERROR("Unable to find field " + conduit_obj
                       + " in conduit node: " + std::to_string(sample_id));
         }
         conduit::Node n_input;
-        load_conduit_node(sample_id, conduit_field, n_input);
-        sample[conduit_obj].set(n_input);
+        bool from_file = load_conduit_node(sample_id, conduit_field, n_input);
+        if (from_file) {
+          sample[conduit_obj].set(n_input);
+        } else {
+          sample = n_input;
+        }
       }
       add_val(key, sample[conduit_obj], inputs); // more overhead but general
       input_t& val = inputs.back();
@@ -1383,8 +1546,10 @@ bool data_reader_jag_conduit::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
   // Create a node to hold all of the data
   conduit::Node node;
   if (data_store_active()) {
-    const conduit::Node& ds_node = m_jag_store->get_conduit_node(data_id);
+    const conduit::Node& ds_node = m_data_store->get_conduit_node(data_id);
     node.set_external(ds_node);
+  }else {
+    m_sample_list.open_samples_hdf5_handle(data_id);
   }
 
   for(size_t i = 0u; ok && (i < X_v.size()); ++i) {
@@ -1394,9 +1559,11 @@ bool data_reader_jag_conduit::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
 
   if (priming_data_store()) {
     // Once the node has been populated save it in the data store
-    m_jag_store->set_conduit_node(data_id, node);
+    m_data_store->set_conduit_node(data_id, node);
   }
 
+  m_sample_list.close_if_done_samples_hdf5_handle(data_id);
+  m_using_random_node.erase(m_io_thread_pool->get_local_thread_id());
   return ok;
 }
 
@@ -1407,17 +1574,17 @@ bool data_reader_jag_conduit::fetch_response(CPUMat& X, int data_id, int mb_idx)
   bool ok = true;
   // Create a node to hold all of the data
   conduit::Node node;
-  if (m_jag_store != nullptr && m_model->get_cur_epoch() > 0) {
-    const conduit::Node& ds_node = m_jag_store->get_conduit_node(data_id);
+  if (m_data_store != nullptr && m_model->get_epoch() > 0) {
+    const conduit::Node& ds_node = m_data_store->get_conduit_node(data_id);
     node.set_external(ds_node);
   }
   for(size_t i = 0u; ok && (i < X_v.size()); ++i) {
     ok = fetch(X_v[i], data_id, node, 0, tid, m_dependent[i], "response");
   }
-  if (m_jag_store != nullptr && m_model->get_cur_epoch() == 0) {
+  if (m_data_store != nullptr && m_model->get_epoch() == 0) {
     // Once the node has been populated save it in the data store
-    if (m_jag_store != nullptr) {
-      m_jag_store->set_conduit_node(data_id, node);
+    if (m_data_store != nullptr) {
+      m_data_store->set_conduit_node(data_id, node);
     }
   }
   return ok;
@@ -1433,21 +1600,14 @@ bool data_reader_jag_conduit::fetch_label(CPUMat& Y, int data_id, int mb_idx) {
   return true;
 }
 
-#ifndef _JAG_OFFLINE_TOOL_MODE_
-void data_reader_jag_conduit::setup_data_store(model *m) {
-  if (m_data_store != nullptr) {
-    delete m_data_store;
-  }
-  m_jag_store = new data_store_jag(this, m);  // *data_store_jag
-  m_data_store = m_jag_store;                 // *generic_data_store
-  m_data_store->setup();
+void data_reader_jag_conduit::setup_data_store(int mini_batch_size) {
+   if (m_data_store != nullptr) {
+     m_data_store->setup(mini_batch_size);
+   }
 }
-#endif // _JAG_OFFLINE_TOOL_MODE_
 
 void data_reader_jag_conduit::save_image(Mat& pixels, const std::string filename, bool do_scale) {
-#ifndef _JAG_OFFLINE_TOOL_MODE_
   internal_save_image(pixels, filename, m_image_height, m_image_width, 1, do_scale);
-#endif // _JAG_OFFLINE_TOOL_MODE_
 }
 
 void data_reader_jag_conduit::print_schema(const size_t sample_id) const {
diff --git a/src/data_readers/data_reader_jag_conduit_hdf5.cpp b/src/data_readers/data_reader_jag_conduit_hdf5.cpp
deleted file mode 100644
index 48a8bee0f9d..00000000000
--- a/src/data_readers/data_reader_jag_conduit_hdf5.cpp
+++ /dev/null
@@ -1,409 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#include "lbann/data_readers/data_reader_jag_conduit_hdf5.hpp"
-#include "lbann/utils/file_utils.hpp" // for add_delimiter() in load()
-#include "lbann/utils/options.hpp" // for add_delimiter() in load()
-#include "lbann/data_store/jag_store.hpp"
-#include "lbann/models/model.hpp"
-
-#ifdef LBANN_HAS_CONDUIT
-#include "lbann/data_readers/opencv_extensions.hpp"
-#include <memory>
-#include "lbann/data_readers/image_utils.hpp"
-#include "lbann/utils/timer.hpp"
-#include "lbann/utils/glob.hpp"
-#include <thread>
-
-
-// This macro may be moved to a global scope
-#define _THROW_LBANN_EXCEPTION_(_CLASS_NAME_,_MSG_) { \
-  std::stringstream err; \
-  err << __FILE__ << ' '  << __LINE__ << " :: " \
-      << (_CLASS_NAME_) << "::" << (_MSG_); \
-  throw lbann_exception(err.str()); \
-}
-
-#define _THROW_LBANN_EXCEPTION2_(_CLASS_NAME_,_MSG1_,_MSG2_) { \
-  std::stringstream err; \
-  err << __FILE__ << ' '  << __LINE__ << " :: " \
-      << (_CLASS_NAME_) << "::" << (_MSG1_) << (_MSG2_); \
-  throw lbann_exception(err.str()); \
-}
-
-// This comes after all the headers, and is only visible within the current implementation file.
-// To make sure, we put '#undef _CN_' at the end of this file
-#define _CN_ "data_reader_jag_conduit_hdf5"
-
-namespace lbann {
-
-data_reader_jag_conduit_hdf5::data_reader_jag_conduit_hdf5(const std::shared_ptr<cv_process>& pp, bool shuffle)
-  : generic_data_reader(shuffle),
-    m_jag_store(nullptr),
-    m_owns_jag_store(false),
-    m_primary_reader(nullptr) {
-
-  set_defaults();
-
-  if (!pp) {
-    _THROW_LBANN_EXCEPTION_(get_type(), " construction error: no image processor");
-  }
-
-  m_master_pps = lbann::make_unique<cv_process>(*pp);
-}
-
-void data_reader_jag_conduit_hdf5::copy_members(const data_reader_jag_conduit_hdf5& rhs) {
-  m_jag_store = rhs.m_jag_store;
-  m_owns_jag_store = rhs.m_owns_jag_store;
-  m_image_width = rhs.m_image_width;
-  m_image_height = rhs.m_image_height;
-  m_image_num_channels = rhs.m_image_num_channels;
-  m_is_data_loaded = rhs.m_is_data_loaded;
-  m_scalar_keys = rhs.m_scalar_keys;
-  m_input_keys = rhs.m_input_keys;
-  m_success_map = rhs.m_success_map;
-
-  if (!rhs.m_master_pps) {
-    _THROW_LBANN_EXCEPTION_(get_type(), " construction error: no image processor");
-  }
-
-  m_master_pps = lbann::make_unique<cv_process>(*rhs.m_master_pps);
-  m_uniform_input_type = rhs.m_uniform_input_type;
-}
-
-
-data_reader_jag_conduit_hdf5::data_reader_jag_conduit_hdf5(const data_reader_jag_conduit_hdf5& rhs)
-  : generic_data_reader(rhs) {
-  copy_members(rhs);
-}
-
-data_reader_jag_conduit_hdf5& data_reader_jag_conduit_hdf5::operator=(const data_reader_jag_conduit_hdf5& rhs) {
-  // check for self-assignment
-  if (this == &rhs) {
-    return (*this);
-  }
-
-  generic_data_reader::operator=(rhs);
-
-  copy_members(rhs);
-
-  return (*this);
-}
-
-data_reader_jag_conduit_hdf5::~data_reader_jag_conduit_hdf5() {
-  if (m_owns_jag_store) {
-    delete m_jag_store;
-  }
-}
-
-void data_reader_jag_conduit_hdf5::set_defaults() {
-  m_image_width = 0;
-  m_image_height = 0;
-  m_image_num_channels = 1;
-  m_num_labels = 0;
-}
-
-  void data_reader_jag_conduit_hdf5::setup(int num_io_threads, std::shared_ptr<thread_pool> io_thread_pool) {
-  generic_data_reader::setup(num_io_threads, io_thread_pool);
-  replicate_processor(*m_master_pps, num_io_threads);
-}
-
-/// Replicate image processor for each I/O thread
-  bool data_reader_jag_conduit_hdf5::replicate_processor(const cv_process& pp, const int nthreads) {
-  m_pps.resize(nthreads);
-
-  // Construct thread private preprocessing objects out of a shared pointer
-  for (int i = 0; i < nthreads; ++i) {
-    m_pps[i] = lbann::make_unique<cv_process>(pp);
-  }
-
-  bool ok = true;
-  for (int i = 0; ok && (i < nthreads); ++i) {
-    if (!m_pps[i]) ok = false;
-  }
-
-  if (!ok || (nthreads <= 0)) {
-    _THROW_LBANN_EXCEPTION_(get_type(), " cannot replicate image processor");
-    return false;
-  }
-
-  const std::vector<unsigned int> dims = pp.get_data_dims();
-  if ((dims.size() == 2u) && (dims[0] != 0u) && (dims[1] != 0u)) {
-    m_image_width = static_cast<int>(dims[0]);
-    m_image_height = static_cast<int>(dims[1]);
-  }
-
-  return true;
-}
-
-void data_reader_jag_conduit_hdf5::set_image_dims(const int width, const int height, const int ch) {
-  m_image_width = width;
-  m_image_height = height;
-  m_image_num_channels = ch;
-}
-
-bool data_reader_jag_conduit_hdf5::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
-  int tid = m_io_thread_pool->get_local_thread_id();
-  m_jag_store->load_data(data_id, tid);
-
-  std::vector<size_t> sizes = get_linearized_data_sizes();
-  std::vector<CPUMat> X_v = create_datum_views(X, sizes, mb_idx);
-
-  size_t i = 0;
-  std::vector<cv::Mat> images = get_cv_images(data_id, tid);
-
-  for(size_t k=0u; k < get_num_img_srcs(); ++k) {
-    int width, height, img_type;
-    image_utils::process_image(images[k], width, height, img_type, *(m_pps[tid]), X_v[i++]);
-   }
-
-  const std::vector<data_reader_jag_conduit_hdf5::scalar_t> &scalars = m_jag_store->fetch_scalars(data_id, tid);
-  set_minibatch_item<data_reader_jag_conduit_hdf5::scalar_t>(X_v[i++], 0, scalars.data(), m_jag_store->get_linearized_scalar_size());
-
-  const std::vector<data_reader_jag_conduit_hdf5::input_t> &inputs = m_jag_store->fetch_inputs(data_id, tid);
-  set_minibatch_item<data_reader_jag_conduit_hdf5::input_t>(X_v[i++], 0, inputs.data(), m_jag_store->get_linearized_input_size());
-  return true;
-}
-
-void data_reader_jag_conduit_hdf5::load() {
-  if(m_gan_labelling) {
-    m_num_labels=2;
-  }
-
-  if (is_master()) {
-    std::cout << "JAG load GAN m_gan_labelling : label_value "
-              << m_gan_labelling <<" : " << m_gan_label_value << std::endl;
-  }
-
-  bool setup_jag_store = true;
-
-  if (setup_jag_store) {
-    m_jag_store = new jag_store;
-
-    m_jag_store->set_comm(m_comm);
-    if (is_master()) std::cerr << "calling: m_jag_store->set_image_size\n";
-    m_jag_store->set_image_size(m_image_height * m_image_width);
-
-    if (m_first_n > 0) {
-      _THROW_LBANN_EXCEPTION_(_CN_, "load() does not support first_n feature.");
-    }
-
-    if (is_master()) std::cerr << "data_reader_jag_conduit_hdf5: calling m_jag_store->setup()\n";
-    m_jag_store->setup(this);
-  }
-
-  m_is_data_loaded = true;
-
-  // reset indices
-  m_shuffled_indices.resize(get_num_samples());
-  std::iota(m_shuffled_indices.begin(), m_shuffled_indices.end(), 0);
-
-  select_subset_of_data();
-
-  if (is_master()) {
-    std::cout << "\n" << get_description() << "\n\n";
-  }
-}
-
-size_t data_reader_jag_conduit_hdf5::get_num_samples() const {
-  return m_jag_store->get_num_samples();
-}
-
-unsigned int data_reader_jag_conduit_hdf5::get_num_img_srcs() const {
-  return m_jag_store->get_num_img_srcs();
-}
-
-unsigned int data_reader_jag_conduit_hdf5::get_num_channels() const {
-  return m_jag_store->get_num_channels_per_view();
-}
-
-size_t data_reader_jag_conduit_hdf5::get_linearized_channel_size() const {
-  return m_jag_store->get_linearized_channel_size();
-}
-
-size_t data_reader_jag_conduit_hdf5::get_linearized_image_size() const {
-  return m_jag_store->get_linearized_image_size();
-}
-
-size_t data_reader_jag_conduit_hdf5::get_linearized_scalar_size() const {
-  return m_jag_store->get_linearized_scalar_size();
-}
-
-size_t data_reader_jag_conduit_hdf5::get_linearized_input_size() const {
-  return m_jag_store->get_linearized_input_size();
-}
-
-
-int data_reader_jag_conduit_hdf5::get_linearized_data_size() const {
-  return m_jag_store->get_linearized_data_size();
-}
-
-int data_reader_jag_conduit_hdf5::get_linearized_response_size() const {
-  throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: not implemented");
-  return 0;
-  return 0;
-}
-
-std::vector<size_t> data_reader_jag_conduit_hdf5::get_linearized_data_sizes() const {
-  return m_jag_store->get_linearized_data_sizes();
-}
-
-std::vector<size_t> data_reader_jag_conduit_hdf5::get_linearized_response_sizes() const {
-  throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: not implemented");
-  std::vector<size_t> r;
-  return r;
-}
-
-const std::vector<int> data_reader_jag_conduit_hdf5::get_data_dims() const {
-  return {get_linearized_data_size()};
-}
-
-int data_reader_jag_conduit_hdf5::get_num_labels() const {
-  throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: not implemented");
-  return m_num_labels;
-}
-
-int data_reader_jag_conduit_hdf5::get_linearized_label_size() const {
-  throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: not implemented");
-  return m_num_labels;
-  return 0;
-}
-
-std::string data_reader_jag_conduit_hdf5::get_description() const {
-/*
-  std::vector<size_t> s = get_linearized_data_sizes();
-  std::string ret = std::string("data_reader_jag_conduit_hdf5:\n")
-    + " - independent: " + data_reader_jag_conduit_hdf5::to_string(m_independent) + "\n"
-    + " - dependent: " + data_reader_jag_conduit_hdf5::to_string(m_dependent) + "\n"
-    + " - images: "   + std::to_string(m_num_img_srcs) + 'x'
-                      + std::to_string(m_image_width) + 'x'
-                      + std::to_string(m_image_height) + "\n"
-    + " - scalars: "  + std::to_string(get_linearized_scalar_size()) + "\n"
-    + " - inputs: "   + std::to_string(get_linearized_input_size()) + "\n"
-    + " - linearized data size: "   + std::to_string(get_linearized_data_size()) + "\n"
-
-    + " - uniform_input_type: " + (m_uniform_input_type? "true" : "false") + '\n';
-    ret += '\n';
-  return ret;
-  */
-  return "";
-}
-
-
-bool data_reader_jag_conduit_hdf5::check_sample_id(const size_t sample_id) const {
-  m_jag_store->check_sample_id(sample_id);
-  return true;
-}
-
-cv::Mat data_reader_jag_conduit_hdf5::cast_to_cvMat(const std::pair<size_t, const ch_t*> img, const int height) {
-  const int num_pixels = static_cast<int>(img.first);
-  const ch_t* ptr = img.second;
-
-  // add a zero copying view to data
-  using InputBuf_T = cv_image_type<ch_t>;
-  const cv::Mat image(num_pixels, 1, InputBuf_T::T(1u),
-                      reinterpret_cast<void*>(const_cast<ch_t*>(ptr)));
-  // reshape the image. Furter need to clone (deep-copy) the image
-  // to preserve the constness of the original data
-  return (image.reshape(0, height));
-}
-
-std::vector<cv::Mat> data_reader_jag_conduit_hdf5::get_cv_images(const size_t sample_id, int tid) const {
-  const std::vector<std::vector<data_reader_jag_conduit_hdf5::ch_t>> &raw_images = m_jag_store->fetch_views(sample_id, tid);
-  std::vector< std::pair<size_t, const ch_t*> > img_ptrs(raw_images.size());
-  size_t num_pixels = get_linearized_channel_size();
-  for (size_t h=0; h<raw_images.size(); h++) {
-    img_ptrs[h] = std::make_pair(num_pixels, raw_images[h].data());
-  }
-
-  std::vector<cv::Mat> images;
-  images.reserve(img_ptrs.size());
-
-  for (const auto& img: img_ptrs) {
-    images.emplace_back(cast_to_cvMat(img, m_image_height).clone());
-  }
-  return images;
-}
-
-std::vector<CPUMat>
-data_reader_jag_conduit_hdf5::create_datum_views(CPUMat& X, const std::vector<size_t>& sizes, const int mb_idx) const {
-  std::vector<CPUMat> X_v(sizes.size());
-  El::Int h = 0;
-
-  for(size_t i=0u; i < sizes.size(); ++i) {
-    const El::Int h_end =  h + static_cast<El::Int>(sizes[i]);
-    El::View(X_v[i], X, El::IR(h, h_end), El::IR(mb_idx, mb_idx + 1));
-    h = h_end;
-  }
-  return X_v;
-}
-
-bool data_reader_jag_conduit_hdf5::fetch_response(CPUMat& X, int data_id, int mb_idx) {
-  throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: not implemented");
-  return true;
-#if 0
-  int tid = m_io_thread_pool->get_local_thread_id();
-  std::vector<size_t> sizes = get_linearized_response_sizes();
-  std::vector<CPUMat> X_v = create_datum_views(X, sizes, mb_idx);
-  bool ok = true;
-  for(size_t i = 0u; ok && (i < X_v.size()); ++i) {
-    ok = fetch(X_v[i], data_id, 0, tid, m_dependent[i], "response");
-  }
-  return ok;
-#endif
-}
-
-bool data_reader_jag_conduit_hdf5::fetch_label(CPUMat& Y, int data_id, int mb_idx) {
-  if(m_gan_label_value) Y.Set(m_gan_label_value,mb_idx,1); //fake sample is set to 1; adversarial model
-  else { //fake sample (second half of minibatch is set to 0;discriminator model
-    //mb_idx < (m_mb_size/2) ? Y.Set(1,mb_idx,1) : Y.Set(m_gan_label_value,mb_idx,1);
-    mb_idx < (get_current_mini_batch_size()/2) ? Y.Set(1,mb_idx,1) : Y.Set(m_gan_label_value,mb_idx,1);
-  }
-  //Y.Set(m_gan_label_value, mb_idx, 1);
-  return true;
-}
-
-void data_reader_jag_conduit_hdf5::setup_data_store(model *m) {
-  if (m_data_store != nullptr) {
-    //delete m_data_store;
-  }
-/*
-  m_data_store = new data_store_jag_conduit(this, m);
-  if (m_data_store != nullptr) {
-    m_data_store->setup();
-  }
-*/
-}
-
-void data_reader_jag_conduit_hdf5::post_update() {
-  return;
-}
-
-} // end of namespace lbann
-
-#undef _CN_
-#endif // #ifdef LBANN_HAS_CONDUIT
diff --git a/src/data_readers/data_reader_merge_features.cpp b/src/data_readers/data_reader_merge_features.cpp
index 5442b6f5ef1..6f444a04b7f 100644
--- a/src/data_readers/data_reader_merge_features.cpp
+++ b/src/data_readers/data_reader_merge_features.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -27,7 +27,6 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 #include "lbann/data_readers/data_reader_merge_features.hpp"
-#include "lbann/data_store/data_store_merge_features.hpp"
 #include "lbann/utils/options.hpp"
 #include "lbann/utils/timer.hpp"
 
@@ -110,14 +109,4 @@ bool data_reader_merge_features::fetch_response(CPUMat& Y, int data_id, int mb_i
   return m_label_reader->fetch_response(Y, data_id, mb_idx);
 }
 
-void data_reader_merge_features::setup_data_store(model *m) {
-  if (m_data_store != nullptr) {
-    delete m_data_store;
-  }
-  m_data_store = new data_store_merge_features(this, m);
-  if (m_data_store != nullptr) {
-    m_data_store->setup();
-  }
-}
-
 }  // namespace lbann
diff --git a/src/data_readers/data_reader_merge_samples.cpp b/src/data_readers/data_reader_merge_samples.cpp
index 3d88c992ece..ec6560968d7 100644
--- a/src/data_readers/data_reader_merge_samples.cpp
+++ b/src/data_readers/data_reader_merge_samples.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Semy_num_readersity, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Semy_num_readersity, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -27,7 +27,6 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 #include "lbann/data_readers/data_reader_merge_samples.hpp"
-#include "lbann/data_store/data_store_merge_samples.hpp"
 #include "lbann/utils/options.hpp"
 
 namespace lbann {
@@ -52,38 +51,6 @@ data_reader_merge_samples& data_reader_merge_samples::operator=(
 data_reader_merge_samples::~data_reader_merge_samples() {}
 
 
-void data_reader_merge_samples::load_using_data_store() {
-  // load the subsidiary data readers
-  int global_num_readers = m_data_readers.size();
-  int np = m_comm->get_procs_per_trainer();
-  for (int j=0; j<global_num_readers; j++) {
-    int owner = j % np;
-    m_data_readers[j]->set_compound_rank(owner);
-    m_data_readers[j]->set_comm(m_comm);
-    //only the processor whose rank == owner loads the NpyArray
-    m_data_readers[j]->load();
-  }
-
-  // do some sanity checks.
-  int num_labels, data_size, label_size;
-  num_labels = m_data_readers[0]->get_num_labels();
-  data_size = m_data_readers[0]->get_linearized_data_size();
-  label_size = m_data_readers[0]->get_linearized_label_size();
-  const std::vector<int> data_dims = m_data_readers[0]->get_data_dims();
-  /*
-  MPI_Comm comm = m_comm->get_trainer_comm().comm;
-  std::vector<int> data_dims_2 = data_dims;
-  MPI_Bcast(&num_labels, 1, MPI_INT, 0, comm);
-  MPI_Bcast(&data_size, 1, MPI_INT, 0, comm);
-  MPI_Bcast(&label_size, 1, MPI_INT, 0, comm);
-  MPI_Bcast(&data_dims_2[0], data_dims_2.size(), MPI_INT, 0, comm);
-  */
-  sanity_check_for_consistency(num_labels, data_size, label_size, data_dims);
-
-  size_t global_num_samples = compute_num_samples_psum();
-  setup_indices(global_num_samples);
-}
-
 size_t data_reader_merge_samples::compute_num_samples_psum() {
   size_t global_num_samples = 0;
   // Prepend a 0 to make things easier.
@@ -129,11 +96,6 @@ void data_reader_merge_samples::setup_indices(int num_samples) {
 }
 
 void data_reader_merge_samples::load() {
-  if (options::get()->has_bool("use_data_store") && options::get()->get_bool("use_data_store")) {
-    data_reader_merge_samples::load_using_data_store();
-    return;
-  }
-
   // Load each subsidiary data reader.
   for (auto&& reader : m_data_readers) {
     reader->set_comm(m_comm);
@@ -189,17 +151,4 @@ bool data_reader_merge_samples::fetch_response(CPUMat& Y, int data_id, int mb_id
     std::to_string(data_id));
 }
 
-void data_reader_merge_samples::setup_data_store(model *m) {
-  if (m_data_store != nullptr) {
-    delete m_data_store;
-  }
-  m_data_store = nullptr;
-/*
-  m_data_store = new data_store_merge_samples(this, m);
-  if (m_data_store != nullptr) {
-    m_data_store->setup();
-  }
-*/
-}
-
 }  // namespace lbann
diff --git a/src/data_readers/data_reader_mesh.cpp b/src/data_readers/data_reader_mesh.cpp
index 18fc2c6e63b..1d3d2318294 100644
--- a/src/data_readers/data_reader_mesh.cpp
+++ b/src/data_readers/data_reader_mesh.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -68,7 +68,7 @@ void mesh_reader::load() {
 
 bool mesh_reader::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
   if (m_random_flips) {
-    fast_rng_gen& gen = get_fast_generator();
+    fast_rng_gen& gen = get_fast_io_generator();
     std::uniform_int_distribution<int> dist(0, 1);
     m_flip_choices[data_id].first = dist(gen);
     m_flip_choices[data_id].second = dist(gen);
diff --git a/src/data_readers/data_reader_mnist.cpp b/src/data_readers/data_reader_mnist.cpp
index a1a8edf4308..b7b0612efac 100644
--- a/src/data_readers/data_reader_mnist.cpp
+++ b/src/data_readers/data_reader_mnist.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/data_reader_mnist_siamese.cpp b/src/data_readers/data_reader_mnist_siamese.cpp
index 813db041bc9..9f7f2fce6a4 100644
--- a/src/data_readers/data_reader_mnist_siamese.cpp
+++ b/src/data_readers/data_reader_mnist_siamese.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -29,7 +29,6 @@
 
 #include "lbann/data_readers/data_reader_mnist_siamese.hpp"
 #include "lbann/data_readers/image_utils.hpp"
-#include "lbann/data_store/data_store_multi_images.hpp"
 #include "lbann/utils/file_utils.hpp"
 #include <fstream>
 #include <sstream>
@@ -156,23 +155,17 @@ int data_reader_mnist_siamese::fetch_labels(CPUMat& Y) {
 
   El::Zeros(Y, Y.Height(), Y.Width());
 
-//  if (m_data_store != nullptr) {
-    //@todo: get it to work, then add omp support
-    //m_data_store->fetch_labels(...);
- // }
-
-//  else {
-    std::string error_message;
-    for (int s = 0; s < mb_size; s++) {
-      int n = m_current_pos + (s * m_sample_stride);
-      sample_t index = std::make_pair(m_shuffled_indices[n], m_shuffled_indices2[n]);
-      bool valid = fetch_label(Y, index, s);
-      if (!valid) {
-        error_message = "invalid label";
-      }
+  std::string error_message;
+  for (int s = 0; s < mb_size; s++) {
+    int n = m_current_pos + (s * m_sample_stride);
+    sample_t index = std::make_pair(m_shuffled_indices[n], m_shuffled_indices2[n]);
+    bool valid = fetch_label(Y, index, s);
+    if (!valid) {
+      error_message = "invalid label";
     }
-    if (!error_message.empty()) { LBANN_ERROR(error_message); }
-  //}
+  }
+  if (!error_message.empty()) { LBANN_ERROR(error_message); }
+
   return mb_size;
 }
 
@@ -300,11 +293,4 @@ void data_reader_mnist_siamese::shuffle_indices() {
 }
 
 
-void data_reader_mnist_siamese::setup_data_store(model *m) {
-  if (m_data_store != nullptr) {
-    delete m_data_store;
-  }
-  m_data_store = nullptr;
-}
-
 }  // namespace lbann
diff --git a/src/data_readers/data_reader_moving_mnist.cpp b/src/data_readers/data_reader_moving_mnist.cpp
index ebe7c779da6..b570f826e4e 100644
--- a/src/data_readers/data_reader_moving_mnist.cpp
+++ b/src/data_readers/data_reader_moving_mnist.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -90,7 +90,7 @@ bool moving_mnist_reader::fetch_datum(CPUMat& X, int data_id, int col) {
   for (El::Int obj = 0; obj < m_num_objects; ++obj) {
     size_t hash = 1234;
     hash_combine(hash, data_id);
-    hash_combine(hash, m_model->get_cur_epoch());
+    hash_combine(hash, m_model->get_epoch());
     hash_combine(hash, obj);
     raw_image_indices[obj] = hash % m_num_raw_images;
   }
@@ -130,10 +130,10 @@ bool moving_mnist_reader::fetch_datum(CPUMat& X, int data_id, int col) {
     const auto& object_width = bounds[obj][1] - bounds[obj][0];
     const auto& object_height = bounds[obj][3] - bounds[obj][2];
     pos[obj].resize(m_num_frames);
-    pos[obj][0][0] = (m_image_width - object_width + 1) * dist(get_generator());
-    pos[obj][0][1] = (m_image_height - object_height + 1) * dist(get_generator());
-    const DataType vnorm = vmax * dist(get_generator());
-    const DataType theta = 2 * M_PI * dist(get_generator());
+    pos[obj][0][0] = (m_image_width - object_width + 1) * dist(get_io_generator());
+    pos[obj][0][1] = (m_image_height - object_height + 1) * dist(get_io_generator());
+    const DataType vnorm = vmax * dist(get_io_generator());
+    const DataType theta = 2 * M_PI * dist(get_io_generator());
     v[obj][0] = vnorm * std::sin(theta);
     v[obj][1] = vnorm * std::cos(theta);
   }
@@ -225,7 +225,7 @@ bool moving_mnist_reader::fetch_label(CPUMat& Y, int data_id, int col) {
   for (El::Int obj = 0; obj < m_num_objects; ++obj) {
     size_t hash = 1234;
     hash_combine(hash, data_id);
-    hash_combine(hash, m_model->get_cur_epoch());
+    hash_combine(hash, m_model->get_epoch());
     hash_combine(hash, obj);
     raw_image_indices[obj] = hash % m_num_raw_images;
   }
diff --git a/src/data_readers/data_reader_multi_images.cpp b/src/data_readers/data_reader_multi_images.cpp
index ed141a895c1..f1f5208758f 100644
--- a/src/data_readers/data_reader_multi_images.cpp
+++ b/src/data_readers/data_reader_multi_images.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -29,7 +29,6 @@
 
 #include "lbann/data_readers/data_reader_multi_images.hpp"
 #include "lbann/data_readers/image_utils.hpp"
-#include "lbann/data_store/data_store_multi_images.hpp"
 #include "lbann/utils/file_utils.hpp"
 #include <fstream>
 #include <sstream>
@@ -107,13 +106,7 @@ bool data_reader_multi_images::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
     int width=0, height=0, img_type=0;
     const std::string imagepath = get_file_dir() + img_src[i];
     bool ret = true;
-    if (m_data_store != nullptr) {
-      std::vector<unsigned char> *image_buf;
-      m_data_store->get_data_buf(data_id, image_buf, i);
-      ret = lbann::image_utils::load_image(*image_buf, width, height, img_type, *(m_pps[tid]), X_v[i]);
-    } else {
-      ret = lbann::image_utils::load_image(imagepath, width, height, img_type, *(m_pps[tid]), X_v[i], m_thread_buffer[tid], &m_thread_cv_buffer[tid]);
-    }
+    ret = lbann::image_utils::load_image(imagepath, width, height, img_type, *(m_pps[tid]), X_v[i], m_thread_buffer[tid], &m_thread_cv_buffer[tid]);
 
     if(!ret) {
       throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " "
@@ -214,14 +207,4 @@ void data_reader_multi_images::load() {
   select_subset_of_data();
 }
 
-void data_reader_multi_images::setup_data_store(model *m) {
-  if (m_data_store != nullptr) {
-    delete m_data_store;
-  }
-  m_data_store = new data_store_multi_images(this, m);
-  if (m_data_store != nullptr) {
-    m_data_store->setup();
-  }
-}
-
 }  // namespace lbann
diff --git a/src/data_readers/data_reader_multihead_siamese.cpp b/src/data_readers/data_reader_multihead_siamese.cpp
new file mode 100644
index 00000000000..cdb4c582042
--- /dev/null
+++ b/src/data_readers/data_reader_multihead_siamese.cpp
@@ -0,0 +1,175 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+//
+// data_reader_multihead_siamese .hpp .cpp - data reader to use m patches
+//                                 generated offline.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/data_readers/data_reader_multihead_siamese.hpp"
+#include "lbann/data_readers/image_utils.hpp"
+#include "lbann/utils/file_utils.hpp"
+#include <fstream>
+#include <sstream>
+#include <omp.h>
+
+#include <iostream>
+
+namespace lbann {
+
+data_reader_multihead_siamese::data_reader_multihead_siamese(const std::shared_ptr<cv_process>& pp, unsigned int nimages, bool shuffle) : data_reader_multi_images(pp, shuffle) {
+  set_defaults();
+  m_num_img_srcs = nimages;
+  m_samples = offline_patches_npz (m_num_img_srcs);
+}
+
+data_reader_multihead_siamese::data_reader_multihead_siamese(const std::shared_ptr<cv_process>& pp, bool shuffle)
+  : data_reader_multi_images(pp, shuffle) {
+  set_defaults();
+}
+
+data_reader_multihead_siamese::data_reader_multihead_siamese(const data_reader_multihead_siamese& rhs)
+  : data_reader_multi_images(rhs),
+    m_samples(rhs.m_samples)
+{}
+
+data_reader_multihead_siamese& data_reader_multihead_siamese::operator=(const data_reader_multihead_siamese& rhs) {
+  // check for self-assignment
+  if (this == &rhs) {
+    return (*this);
+  }
+
+  data_reader_multi_images::operator=(rhs);
+  m_samples = rhs.m_samples;
+
+  return (*this);
+}
+
+data_reader_multihead_siamese::~data_reader_multihead_siamese() {
+}
+
+void data_reader_multihead_siamese::set_defaults() {
+  m_image_width = 110;
+  m_image_height = 110;
+  m_image_num_channels = 3;
+  set_linearized_image_size();
+  m_num_labels = 20;
+  m_num_img_srcs = 3;
+}
+
+/**
+ * Same as the parent class method except the default value of the last argument,
+ * num_img_srcs, which is 4 here.
+ */
+void data_reader_multihead_siamese::set_input_params(const int width, const int height, const int num_ch, const int num_labels) {
+  data_reader_multi_images::set_input_params(width, height, num_ch, num_labels, 4);
+}
+
+
+bool data_reader_multihead_siamese::fetch_datum(Mat& X, int data_id, int mb_idx) {
+
+  int tid = m_io_thread_pool->get_local_thread_id();
+  std::vector<Mat> X_v = create_datum_views(X, mb_idx);
+
+  sample_t sample = m_samples.get_sample(data_id);
+  for(size_t i=0u; i < m_num_img_srcs; ++i) {
+    int width=0, height=0, img_type=0;
+    const std::string imagepath = get_file_dir() + sample.first[i];
+    bool ret = true;
+    ret = lbann::image_utils::load_image(imagepath, width, height, img_type, *(m_pps[tid]), X_v[i], m_thread_buffer[tid], &m_thread_cv_buffer[tid]);
+
+    if(!ret) {
+      throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " "
+                            + get_type() + ": image_utils::load_image failed to load - "
+                            + imagepath);
+    }
+    if((width * height * CV_MAT_CN(img_type)) != m_image_linearized_size) {
+      throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " "
+                            + get_type() + ": mismatch data size -- either width, height or channel - "
+                            + imagepath + " [w,h,c]=[" + std::to_string(width) + "x" + std::to_string(height)
+                            + "x" + std::to_string(CV_MAT_CN(img_type)) + "] != " + std::to_string(m_image_linearized_size));
+    }
+  }
+
+  return true;
+}
+
+
+bool data_reader_multihead_siamese::fetch_label(Mat& Y, int data_id, int mb_idx) {
+  const label_t label = m_samples.get_label(data_id);
+  Y.Set(label, mb_idx, 1);
+  return true;
+}
+
+
+std::vector<data_reader_multihead_siamese::sample_t> data_reader_multihead_siamese::get_image_list_of_current_mb() const {
+  std::vector<sample_t> ret;
+  ret.reserve(m_mini_batch_size);
+  return ret;
+}
+
+
+std::vector<data_reader_multihead_siamese::sample_t> data_reader_multihead_siamese::get_image_list() const {
+  const size_t num_samples = m_samples.get_num_samples();
+  std::vector<sample_t> ret;
+  ret.reserve(num_samples);
+
+  for (size_t i=0; i < num_samples; ++i) {
+    ret.emplace_back(m_samples.get_sample(i));
+  }
+  return ret;
+}
+
+
+void data_reader_multihead_siamese::load() {
+  const std::string data_filename = get_data_filename();
+
+  // To support m_first_n semantic, m_samples.load() takes m_first_n
+  // as an argument and attempt to shrink the CNPY arrays loaded as needed
+  if (!m_samples.load(data_filename, m_first_n)) {
+    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " "
+                          + get_type() + ": failed to load the file " + data_filename);
+  }
+
+  size_t num_samples = m_samples.get_num_samples();
+
+  if (m_first_n > 0) {
+    num_samples = (static_cast<size_t>(m_first_n) <= num_samples)?
+                   static_cast<size_t>(m_first_n) : num_samples;
+
+    m_first_n = num_samples;
+    set_use_percent(1.0);
+    set_absolute_sample_count(0u);
+  }
+
+  // reset indices
+  m_shuffled_indices.clear();
+
+  m_shuffled_indices.resize(num_samples);
+  std::iota(m_shuffled_indices.begin(), m_shuffled_indices.end(), 0);
+
+  select_subset_of_data();
+}
+
+}  // namespace lbann
diff --git a/src/data_readers/data_reader_nci.cpp b/src/data_readers/data_reader_nci.cpp
index 0a42e3d4342..7993bbee23b 100644
--- a/src/data_readers/data_reader_nci.cpp
+++ b/src/data_readers/data_reader_nci.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/data_reader_numpy.cpp b/src/data_readers/data_reader_numpy.cpp
index 9b144c0354b..8f7b9715977 100644
--- a/src/data_readers/data_reader_numpy.cpp
+++ b/src/data_readers/data_reader_numpy.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/data_reader_numpy_npz.cpp b/src/data_readers/data_reader_numpy_npz.cpp
new file mode 100644
index 00000000000..695242dd2bd
--- /dev/null
+++ b/src/data_readers/data_reader_numpy_npz.cpp
@@ -0,0 +1,215 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+//
+// data_reader_numpy_npz .hpp .cpp - generic_data_reader class for numpy .npz dataset
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/data_readers/data_reader_numpy_npz.hpp"
+#include <cstdio>
+#include <string>
+#include <unordered_set>
+#include <cnpy.h>
+
+namespace lbann {
+  const std::string numpy_npz_reader::NPZ_KEY_DATA = "data";
+  const std::string numpy_npz_reader::NPZ_KEY_LABELS = "labels";
+  const std::string numpy_npz_reader::NPZ_KEY_RESPONSES = "responses";
+
+  numpy_npz_reader::numpy_npz_reader(const bool shuffle)
+    : generic_data_reader(shuffle),
+      m_num_samples(0),
+      m_num_features(0),
+      m_num_response_features(0) {}
+
+  numpy_npz_reader::numpy_npz_reader(const numpy_npz_reader& other) :
+    generic_data_reader(other),
+    m_num_samples(other.m_num_samples),
+    m_num_features(other.m_num_features),
+    m_num_labels(other.m_num_labels),
+    m_num_response_features(other.m_num_response_features),
+    m_has_labels(other.m_has_labels),
+    m_has_responses(other.m_has_responses),
+    m_data(other.m_data),
+    m_labels(other.m_labels),
+    m_responses(other.m_responses),
+    m_scaling_factor_int16(other.m_scaling_factor_int16) {}
+
+  numpy_npz_reader& numpy_npz_reader::operator=(const numpy_npz_reader& other) {
+    generic_data_reader::operator=(other);
+    m_num_samples = other.m_num_samples;
+    m_num_features = other.m_num_features;
+    m_num_labels = other.m_num_labels;
+    m_num_response_features = other.m_num_response_features;
+    m_has_labels = other.m_has_labels;
+    m_has_responses = other.m_has_responses;
+    m_data = other.m_data;
+    m_labels = other.m_labels;
+    m_responses = other.m_responses;
+    m_scaling_factor_int16 = other.m_scaling_factor_int16;
+    return *this;
+  }
+
+  void numpy_npz_reader::load() {
+    std::string infile = get_data_filename();
+    // Ensure the file exists.
+    std::ifstream ifs(infile);
+    if (!ifs) {
+      throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) +
+                            " numpy_npz_reader::load() - can't open file : " + infile);
+    }
+    ifs.close();
+
+    const cnpy::npz_t npz = cnpy::npz_load(infile);
+
+    std::vector<std::tuple<const bool, const std::string, cnpy::NpyArray &> > npyLoadList;
+    npyLoadList.push_back(std::forward_as_tuple(true,            NPZ_KEY_DATA,      m_data));
+    npyLoadList.push_back(std::forward_as_tuple(m_has_labels,    NPZ_KEY_LABELS,    m_labels));
+    npyLoadList.push_back(std::forward_as_tuple(m_has_responses, NPZ_KEY_RESPONSES, m_responses));
+    for(const auto npyLoad : npyLoadList) {
+      // Check whether the tensor have to be loaded.
+      if(!std::get<0>(npyLoad)) {
+        continue;
+      }
+
+      // Load the tensor.
+      const std::string key = std::get<1>(npyLoad);
+      cnpy::NpyArray &ary = std::get<2>(npyLoad);
+      const auto i = npz.find(key);
+      if(i != npz.end()) {
+        ary = i->second;
+      } else {
+        throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) +
+                              " numpy_npz_reader::load() - can't find npz key : " + key);
+      }
+
+      // Check whether the labels/responses has the same number of samples.
+      if(key == NPZ_KEY_DATA) {
+        m_num_samples = m_data.shape[0];
+      } else if(m_num_samples != (int) ary.shape[0]) {
+        throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) +
+                              " numpy_npz_reader::load() - the number of samples of data and " + key + " do not match : "
+                              + std::to_string(m_num_samples) + " vs. " + std::to_string(ary.shape[0]));
+      }
+    }
+
+    m_num_features = std::accumulate(m_data.shape.begin() + 1,
+                                     m_data.shape.end(),
+                                     (unsigned) 1,
+                                     std::multiplies<unsigned>());
+    if(m_has_responses) {
+      m_num_response_features = std::accumulate(m_responses.shape.begin() + 1,
+                                                m_responses.shape.end(),
+                                                (unsigned) 1,
+                                                std::multiplies<unsigned>());
+    }
+
+    // Ensure we understand the word size.
+    if (!(m_data.word_size == 2 || m_data.word_size == 4 || m_data.word_size == 8)) {
+      throw lbann_exception("numpy_npz_reader: word size " + std::to_string(m_data.word_size) +
+                            " not supported");
+    }
+
+    if (m_has_labels) {
+      // Determine number of label classes.
+      std::unordered_set<int> label_classes;
+      if (m_labels.word_size != 4) {
+        throw lbann_exception("numpy_npz_reader: label numpy array should be in int32");
+      }
+      int *data = m_labels.data<int>();
+      for (int i = 0; i < m_num_samples; ++i) {
+        label_classes.insert((int) data[i]);
+      }
+
+      // Sanity checks.
+      auto minmax = std::minmax_element(label_classes.begin(), label_classes.end());
+      if (*minmax.first != 0) {
+        throw lbann_exception("numpy_reader: classes are not indexed from 0");
+      }
+      if (*minmax.second != (int) label_classes.size() - 1) {
+        throw lbann_exception("numpy_reader: label classes are not contiguous");
+      }
+      m_num_labels = label_classes.size();
+    }
+
+    // Reset indices.
+    m_shuffled_indices.clear();
+    m_shuffled_indices.resize(m_num_samples);
+    std::iota(m_shuffled_indices.begin(), m_shuffled_indices.end(), 0);
+    select_subset_of_data();
+  }
+
+  bool numpy_npz_reader::fetch_datum(Mat& X, int data_id, int mb_idx) {
+    Mat X_v = El::View(X, El::IR(0, X.Height()), El::IR(mb_idx, mb_idx+1));
+
+    if (m_data.word_size == 2) {
+      // Convert int16 to DataType.
+      const short *data = m_data.data<short>() + data_id * m_num_features;
+      DataType *dest = X_v.Buffer();
+
+      // OPTIMIZE
+      LBANN_OMP_PARALLEL_FOR
+        for(int j = 0; j < m_num_features; j++)
+          dest[j] = data[j] * m_scaling_factor_int16;
+
+    } else {
+      void *data = NULL;
+      if (m_data.word_size == 4) {
+        data = (void *) (m_data.data<float>() + data_id * m_num_features);
+      } else if (m_data.word_size == 8) {
+        data = (void *) (m_data.data<double>() + data_id * m_num_features);
+      }
+      std::memcpy(X_v.Buffer(), data, m_num_features * m_data.word_size);
+    }
+    return true;
+  }
+
+  bool numpy_npz_reader::fetch_label(Mat& Y, int data_id, int mb_idx) {
+    if (!m_has_labels) {
+      throw lbann_exception("numpy_npz_reader: do not have labels");
+    }
+    const int label = m_labels.data<int>()[data_id];
+    Y(label, mb_idx) = 1;
+    return true;
+  }
+
+  bool numpy_npz_reader::fetch_response(Mat& Y, int data_id, int mb_idx) {
+    if (!m_has_responses) {
+      throw lbann_exception("numpy_npz_reader: do not have responses");
+    }
+    void *responses = NULL;
+    if (m_responses.word_size == 4) {
+      responses = (void *) (m_responses.data<float>()
+                            + data_id * m_num_response_features);
+    } else if (m_responses.word_size == 8) {
+      responses = (void *) (m_responses.data<double>()
+                            + data_id * m_num_response_features);
+    }
+    Mat Y_v = El::View(Y, El::IR(0, Y.Height()), El::IR(mb_idx, mb_idx + 1));
+    std::memcpy(Y_v.Buffer(), responses,
+                m_num_response_features * m_responses.word_size);
+    return true;
+  }
+
+}  // namespace lbann
diff --git a/src/data_readers/data_reader_numpy_npz_conduit.cpp b/src/data_readers/data_reader_numpy_npz_conduit.cpp
new file mode 100644
index 00000000000..79dfd8196ec
--- /dev/null
+++ b/src/data_readers/data_reader_numpy_npz_conduit.cpp
@@ -0,0 +1,381 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/data_readers/data_reader_numpy_npz_conduit.hpp"
+#include "lbann/data_store/data_store_conduit.hpp"
+#include "lbann/data_readers/numpy_conduit_converter.hpp"
+#include <unordered_set>
+#include "lbann/utils/file_utils.hpp" // pad()
+#include "lbann/utils/jag_utils.hpp"  // read_filelist(..) TODO should be move to file_utils
+#include "lbann/utils/timer.hpp"
+#include "lbann/models/model.hpp"
+
+
+namespace lbann {
+
+numpy_npz_conduit_reader::numpy_npz_conduit_reader(const bool shuffle)
+  : generic_data_reader(shuffle) {}
+
+numpy_npz_conduit_reader::numpy_npz_conduit_reader(const numpy_npz_conduit_reader& rhs)  : generic_data_reader(rhs) {
+  copy_members(rhs);
+}
+
+numpy_npz_conduit_reader& numpy_npz_conduit_reader::operator=(const numpy_npz_conduit_reader& rhs) {
+  // check for self-assignment
+  if (this == &rhs) {
+    return (*this);
+  }
+  generic_data_reader::operator=(rhs);
+  copy_members(rhs);
+  return (*this);
+}
+
+
+void numpy_npz_conduit_reader::copy_members(const numpy_npz_conduit_reader &rhs) {
+  if(rhs.m_data_store != nullptr) {
+      m_data_store = new data_store_conduit(rhs.get_data_store());
+  }
+  m_data_store->set_data_reader_ptr(this);
+
+  m_num_samples = rhs.m_num_samples;
+  m_num_features = rhs.m_num_features;
+  m_num_labels = rhs.m_num_labels;
+  m_num_response_features = rhs.m_num_response_features;
+  m_has_labels = rhs.m_has_labels;
+  m_has_responses = rhs.m_has_responses;
+  m_data_dims = rhs.m_data_dims;
+  m_data_word_size = rhs.m_data_word_size;
+  m_response_word_size = rhs.m_response_word_size;
+  m_scaling_factor_int16 = rhs.m_scaling_factor_int16;
+  m_filenames = rhs.m_filenames;
+}
+
+void numpy_npz_conduit_reader::load() {
+  if(is_master()) {
+    std::cout << "starting load" << std::endl;
+  }
+
+  options *opts = options::get();
+
+  if (! (opts->get_bool("preload_data_store") || opts->get_bool("use_data_store"))) {
+    LBANN_ERROR("numpy_npz_conduit_reader requires data_store; please pass either --use_data_store or --preload_data_store on the cmd line");
+  }
+
+  //dah - for now, I assume the input file contains, on each line, the complete
+  //      pathname of an npz file. This will no doubt change in the future.
+  //      I'd like to call load_list_of_samples(), but the sample_list class
+  //      is too specialized -- it checks data in a manner particular to
+  //      conduit, and that doesn't apply here.
+
+  std::string infile = get_data_filename();
+  read_filelist(m_comm, infile, m_filenames);
+
+  // fills in: m_num_samples, m_num_features, m_num_response_features,
+  // m_data_dims, m_data_word_size, m_response_word_size
+  fill_in_metadata();
+
+  if (m_num_labels == 0 && !opts->get_bool("preload_data_store") && opts->get_bool("use_data_store")) {
+    LBANN_WARNING("when not preloading you must specify the number of labels in the prototext file if you are doing classification");
+  }
+
+  std::vector<int> local_list_sizes;
+  if (opts->get_bool("preload_data_store")) {
+    int np = m_comm->get_procs_per_trainer();
+    int base_files_per_rank = m_filenames.size() / np;
+    int extra = m_filenames.size() - (base_files_per_rank*np);
+    if (extra > np) {
+      LBANN_ERROR("extra > np");
+    }
+    local_list_sizes.resize(np, 0);
+    for (int j=0; j<np; j++) {
+      local_list_sizes[j] = base_files_per_rank;
+      if (j < extra) {
+        local_list_sizes[j] += 1;
+      }
+    }
+  }
+
+  // Reset indices.
+  m_shuffled_indices.clear();
+  m_shuffled_indices.resize(m_num_samples);
+  std::iota(m_shuffled_indices.begin(), m_shuffled_indices.end(), 0);
+
+  instantiate_data_store(local_list_sizes);
+
+  // TODO: this may need fixing up for efficiency. If using an absolute
+  //       num samples, or percentage of samples, and we've preloaded,
+  //       this is wasteful and not what we want
+  select_subset_of_data();
+}
+
+void numpy_npz_conduit_reader::preload_data_store() {
+  double tm1 = get_time();
+  m_data_store->set_preload();
+  int rank = m_comm->get_rank_in_trainer();
+
+  std::unordered_set<int> label_classes;
+  for (size_t data_id=0; data_id<m_filenames.size(); data_id++) {
+    if (m_data_store->get_index_owner(data_id) != rank) {
+      continue;
+    }
+
+    conduit::Node node;
+    numpy_conduit_converter::load_conduit_node(m_filenames[data_id], data_id, node);
+    const char *char_ptr = node[LBANN_DATA_ID_STR(data_id) + "/frm/data"].value();
+    const int* label_ptr = reinterpret_cast<const int*>(char_ptr);
+    label_classes.insert(*label_ptr);
+    m_data_store->set_conduit_node(data_id, node);
+  }
+
+  if (m_has_labels) {
+
+    // get max element. Yes, I know you can do this with, e.g, lambda
+    // expressions and c++11 and etc, etc. But that's just B-ugly and
+    // confusing
+    int my_min = INT_MAX;
+    int my_max = INT_MIN;
+    for (auto t : label_classes) {
+      if (t < my_min) { my_min = t; }
+      if (t > my_max) { my_max = t; }
+    }
+    int trainer_min = m_comm->trainer_allreduce<int>(my_min, El::mpi::MIN);
+    int trainer_max = m_comm->trainer_allreduce<int>(my_max, El::mpi::MAX);
+
+    // dah - commenting out sanity checks, as I don't know if they're
+    //       valid. Also, Nikoli says we're not using labels, so NA
+    //       for now
+    #if 0
+    // sanity checks
+    if (trainer_min < 0) {
+      LBANN_ERROR("trainer_min < 0");
+    }
+    if (trainer_max < 0) {
+      LBANN_ERROR("trainer_max < 0");
+    }
+    #endif
+
+    // if we're using a subset of the data we may not have a contiguous
+    // set of zero-based labels, so let's pretend like we do
+    if (m_num_labels != 0) { //note: num_labels may be specified in the reader
+      m_num_labels = trainer_max - trainer_min;
+      if(is_master()) {
+        std::cout << "num_labels: " << m_num_labels << "\n";
+      }
+    }
+
+    #if 0
+    // Sanity checks.
+    auto minmax = std::minmax_element(label_classes.begin(), label_classes.end());
+    if (*minmax.first != 0) {
+      LBANN_ERROR("numpy_reader: label classes are not indexed from 0");
+    }
+    if (*minmax.second != (int) label_classes.size() - 1) {
+      LBANN_ERROR("numpy_reader: label classes are not contiguous");
+    }
+    m_num_labels = label_classes.size();
+    #endif
+  }
+  double tm2 = get_time();
+  if (is_master()) {
+    std::cout << "time to preload: " << tm2 - tm1 << " for role: " << get_role() << "\n";
+  }
+}
+
+bool numpy_npz_conduit_reader::fetch_datum(Mat& X, int data_id, int mb_idx) {
+  Mat X_v = El::View(X, El::IR(0, X.Height()), El::IR(mb_idx, mb_idx+1));
+  conduit::Node node;
+  if (data_store_active()) {
+    const conduit::Node& ds_node = m_data_store->get_conduit_node(data_id);
+    node.set_external(ds_node);
+  } else {
+    numpy_conduit_converter::load_conduit_node(m_filenames[data_id], data_id, node);
+    //note: if testing, and test set is touched more than once, the following
+    //      will through an exception TODO: relook later
+    if (priming_data_store() || m_model->get_execution_mode() == execution_mode::testing) {
+      m_data_store->set_conduit_node(data_id, node);
+    }
+  }
+
+  const char *char_data = node[LBANN_DATA_ID_STR(data_id) + "/data/data"].value();
+  char *char_data_2 = const_cast<char*>(char_data);
+
+  if (m_data_word_size == 2) {
+    // Convert int16 to DataType.
+    short *data = reinterpret_cast<short*>(char_data_2);
+    DataType *dest = X_v.Buffer();
+
+    // OPTIMIZE
+    LBANN_OMP_PARALLEL_FOR
+      for(int j = 0; j < m_num_features; j++) {
+        dest[j] = data[j] * m_scaling_factor_int16;
+      }
+
+  } else {
+    void *data = (void*)char_data_2;
+    std::memcpy(X_v.Buffer(), data, m_num_features * m_data_word_size);
+
+    /*
+    // the following is from data_reader_numpy_npz -- I don't think it's necessary
+    if (m_data_word_size == 4) {
+      float *f = reinterpret_cast<float*>(char_data_2);
+      data = (void*)(f + data_id * m_num_features);
+    } else if (m_data_word_size == 8) {
+      double *d = reinterpret_cast<double*>(char_data_2);
+      data = (void*)(d + data_id * m_num_features);
+    }
+    std::memcpy(X_v.Buffer(), data, m_num_features * m_data_word_size);
+    */
+  }
+
+  return true;
+}
+
+bool numpy_npz_conduit_reader::fetch_label(Mat& Y, int data_id, int mb_idx) {
+  if (!m_has_labels) {
+    LBANN_ERROR("numpy_npz_conduit_reader: do not have labels");
+  }
+  if (m_num_labels == 0) {
+    LBANN_ERROR("num labels = 0. num_labels is only valid when run with --preload_data_store, *or* if your reader prototext contains a 'num_labels' field");
+  }
+
+  const conduit::Node& node = m_data_store->get_conduit_node(data_id);
+  const char *char_data = node[LBANN_DATA_ID_STR(data_id)+ "/frm/data"].value();
+  char *char_data_2 = const_cast<char*>(char_data);
+  int *label = reinterpret_cast<int*>(char_data_2);
+  Y(*label, mb_idx) = 1;
+
+  return true;
+}
+
+bool numpy_npz_conduit_reader::fetch_response(Mat& Y, int data_id, int mb_idx) {
+  if (!m_has_responses) {
+    LBANN_ERROR("numpy_npz_conduit_reader: do not have responses");
+  }
+
+  // assumes: fetch_datum for this data_id has previously been called,
+  //          hence, the requested node will be in the data_store;
+  //          this is for the case where we didn't preload. If we did
+  //          preload, the requested nod should also be in the data_store
+  //
+  conduit::Node node;
+  if (data_store_active()) {
+    const conduit::Node& ds_node = m_data_store->get_conduit_node(data_id);
+    node.set_external(ds_node);
+  } else {
+    numpy_conduit_converter::load_conduit_node(m_filenames[data_id], data_id, node);
+    if (priming_data_store()) {
+      m_data_store->set_conduit_node(data_id, node);
+    } else {
+      LBANN_ERROR("you shouldn't be here; please contact Dave Hysom");
+    }
+  }
+
+  const char *char_data = node[LBANN_DATA_ID_STR(data_id) + "/responses/data"].value();
+  void *responses =  (void*)char_data;
+  //char *char_data_2 = const_cast<char*>(char_data);
+  //void *responses = (void*)
+  /*
+  if (m_response_word_size == 4) {
+    responses = (void *) reinterpret_cast<float*>(char_data_2);
+  } else if (m_response_word_size == 8) {
+    responses = (void *) reinterpret_cast<double*>(char_data_2);
+  } else {
+    LBANN_ERROR("m_response_word_size= " + std::to_string(m_response_word_size) + "; should be 4 our 8");
+  }
+  */
+  Mat Y_v = El::View(Y, El::IR(0, Y.Height()), El::IR(mb_idx, mb_idx + 1));
+  std::memcpy(Y_v.Buffer(), responses,
+              m_num_response_features * m_response_word_size);
+  return true;
+}
+
+void numpy_npz_conduit_reader::fill_in_metadata() {
+  int rank = m_comm->get_rank_in_trainer();
+  // to avoid contention, each rank opens a separate file
+  std::ifstream in(m_filenames[rank]);
+  if (!in) {
+    LBANN_ERROR("failed to open " + m_filenames[rank] + " for reading");
+  }
+  in.close();
+
+  m_num_samples = m_filenames.size();
+  if (is_master()) {
+    std::cout << "num samples: " << m_num_samples << "\n";
+  }
+
+  int data_id = 0; //meaningless
+  conduit::Node node;
+  numpy_conduit_converter::load_conduit_node(m_filenames[rank], data_id, node);
+
+  //fill in m_data_dims
+  auto shape = node[LBANN_DATA_ID_STR(data_id) + "/data/shape"].as_uint64_array();
+  int shape_num_elts = shape.number_of_elements();
+  for (int k=1; k<shape_num_elts; k++) {
+    m_data_dims.push_back(shape[k]);
+  }
+  m_num_features = std::accumulate(m_data_dims.begin() + 1,
+                                   m_data_dims.end(),
+                                   (unsigned) 1,
+                                   std::multiplies<unsigned>());
+  if (is_master()) {
+    std::cout << "num features: " << m_num_features << "\n";
+  }
+
+  // Ensure we understand the word sizes
+  size_t word_size = node[LBANN_DATA_ID_STR(data_id) + "/data/word_size"].value();
+  if (!(word_size == 2 || word_size == 4 || word_size == 8)) {
+    LBANN_ERROR("numpy_npz_conduit_reader: word size " +
+                std::to_string(word_size) + " not supported");
+  }
+  m_data_word_size = word_size;
+  if (is_master()) {
+    std::cout << "data word size: " << m_data_word_size << "\n";
+  }
+
+  if (m_has_labels) {
+    word_size = node[LBANN_DATA_ID_STR(data_id) + "/frm/word_size"].value();
+    if (word_size != 4) {
+      LBANN_ERROR("numpy_npz_conduit_reader: label should be in int32, but word_size= " + std::to_string(word_size));
+    }
+  }
+
+  if (m_has_responses) {
+    m_response_word_size = node[LBANN_DATA_ID_STR(data_id) + "/responses/word_size"].value();
+    auto r_shape = node[LBANN_DATA_ID_STR(data_id) + "/responses/shape"].as_uint64_array();
+    int n = r_shape.number_of_elements();
+    m_num_response_features = 1;
+    for (int k=1; k<n; k++) {
+      m_num_response_features *= r_shape[k];
+    }
+    if (is_master()) {
+      std::cout << "response word size: " << m_response_word_size << "\n";
+      std::cout << "num response features: " << m_num_response_features<< "\n";
+    }
+  }
+}
+
+}  // namespace lbann
diff --git a/src/data_readers/data_reader_pilot2_molecular.cpp b/src/data_readers/data_reader_pilot2_molecular.cpp
index 6adf99f6d73..e48a56610e1 100644
--- a/src/data_readers/data_reader_pilot2_molecular.cpp
+++ b/src/data_readers/data_reader_pilot2_molecular.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -27,7 +27,6 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 #include "lbann/data_readers/data_reader_pilot2_molecular.hpp"
-#include "lbann/data_store/data_store_pilot2_molecular.hpp"
 #include "lbann/utils/options.hpp"
 
 namespace lbann {
@@ -40,16 +39,9 @@ void pilot2_molecular_reader::load() {
   // support for data store functionality: when not using data store, all procs
   // load the data; when using data store, only one does so
   bool is_mine = true;
-  int rank = m_comm->get_rank_in_trainer();
   // note: when support for merge_samples is in place, the condition
   //       "get_role() == "test" will go away. For now we need it, else
   //       merge_samples will break
-  options *opts = options::get();
-  if (opts->has_bool("use_data_store") && opts->get_bool("use_data_store") && get_role() == "test") {
-    if (rank != get_compound_rank()) {
-      is_mine = false;
-    }
-  }
 
   if (is_mine) {
     std::string infile = get_file_dir() + get_data_filename();
@@ -119,34 +111,6 @@ void pilot2_molecular_reader::load() {
     m_shape[2] = m_features.shape[3];
   }
 
-  // when using data store, need to bcast some variable to all procs
-  if (options::get()->get_bool("use_data_store")) {
-    std::vector<int> tmp(8);
-    if (rank == get_compound_rank()) {
-      //@todo: fix if we have floats!
-      m_neighbors_data_size = m_neighbors.data_holder->size() / 8;
-
-      tmp[0] = m_num_samples;
-      tmp[1] = m_num_samples_per_frame;
-      tmp[2] = m_num_features;
-      tmp[3] = m_num_neighbors + 1;
-      tmp[4] = m_features.shape[2];
-      tmp[5] = m_features.shape[3];
-      tmp[6] = m_word_size;
-      tmp[7] = m_neighbors_data_size;
-    }
-    MPI_Bcast(tmp.data(), 8, MPI_INT, get_compound_rank(), m_comm->get_trainer_comm().comm);
-    m_num_samples = tmp[0];
-    m_num_samples_per_frame = tmp[1];
-    m_num_features = tmp[2];
-    m_shape.resize(3);
-    m_shape[0] = tmp[3];
-    m_shape[1] = tmp[4];
-    m_shape[2] = tmp[5];
-    m_word_size = tmp[6];
-    m_neighbors_data_size = tmp[7];
-  }
-
   // Reset indices.
   m_shuffled_indices.clear();
   m_shuffled_indices.resize(m_num_samples);
@@ -156,20 +120,6 @@ void pilot2_molecular_reader::load() {
 
 bool pilot2_molecular_reader::fetch_datum(
   CPUMat& X, int data_id, int mb_idx) {
-  int tid = m_io_thread_pool->get_local_thread_id();
-
-  if (m_data_store != nullptr) {
-    std::vector<double> *buf;
-    size_t jj = 0;
-    m_data_store->get_data_buf(data_id, tid, buf);
-    for (int idx = 0; idx < m_num_neighbors+1; idx++) {
-      for (int i = 0; i < m_num_features; ++i) {
-        X(m_num_features * idx + i, mb_idx) = (*buf)[jj++];
-        //note: scale_data was already computed by the data_store
-      }
-    }
-    return true;
-  }
 
   const int frame = get_frame(data_id);
   // Fetch the actual molecule.
@@ -227,14 +177,4 @@ void pilot2_molecular_reader::fetch_molecule(CPUMat& X, int data_id, int idx,
   }
 }
 
-void pilot2_molecular_reader::setup_data_store(model *m) {
-  if (m_data_store != nullptr) {
-    delete m_data_store;
-  }
-  m_data_store = new data_store_pilot2_molecular(this, m);
-  if (m_data_store != nullptr) {
-    m_data_store->setup();
-  }
-}
-
 }  // namespace lbann
diff --git a/src/data_readers/data_reader_python.cpp b/src/data_readers/data_reader_python.cpp
new file mode 100644
index 00000000000..13853fd0a65
--- /dev/null
+++ b/src/data_readers/data_reader_python.cpp
@@ -0,0 +1,325 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/data_readers/data_reader_python.hpp"
+#ifdef LBANN_HAS_PYTHON
+#include <cstdio>
+#include <algorithm>
+
+namespace lbann {
+
+namespace python {
+
+// Static variables
+std::unique_ptr<manager> manager::m_instance;
+
+manager& manager::get_instance() {
+  if (m_instance == nullptr) { create(); }
+  return *m_instance;
+}
+
+void manager::create() {
+  m_instance.reset(new manager());
+}
+
+void manager::destroy() {
+  m_instance.reset(nullptr);
+}
+
+manager::manager() {
+  if (!Py_IsInitialized()) {
+
+    // Hack to display output from Python
+    // Note: Python outputs didn't appear because MPI intercepts
+    // stdout and stderr. See
+    // https://stackoverflow.com/questions/29352485/python-print-not-working-when-embedded-into-mpi-program
+    Py_UnbufferedStdioFlag = 1;
+
+    // Initialize embedded Python session
+    Py_Initialize();
+    PyEval_InitThreads();
+
+    // Release GIL
+    m_thread_state = PyEval_SaveThread();
+
+  }
+  if (!Py_IsInitialized()) {
+    LBANN_ERROR("error creating embedded Python session");
+  }
+}
+
+manager::~manager() {
+  if (Py_IsInitialized()) {
+    if (m_thread_state != nullptr) {
+      PyEval_RestoreThread(m_thread_state);
+    }
+    Py_Finalize();
+  }
+}
+
+void manager::check_error(bool force_error) const {
+  global_interpreter_lock gil(*this);
+  if (force_error || PyErr_Occurred()) {
+
+    // Get error information from Python session
+    PyObject *type, *value, *traceback;
+    PyErr_Fetch(&type, &value, &traceback);
+
+    // Construct error message
+    std::ostringstream err;
+    err << "detected Python error";
+    if (value != nullptr) {
+      auto msg = PyObject_Repr(value);
+      auto msg_str = PyUnicode_AsEncodedString(msg, "utf-8", "Error -");
+      err << " (" << PyBytes_AS_STRING(msg_str) << ")";
+      Py_XDECREF(msg_str);
+      Py_XDECREF(msg);
+    }
+
+    // Print Python traceback if available
+    if (traceback != nullptr) {
+
+      // Format traceback
+      auto module = PyImport_ImportModule("traceback");
+      auto func = PyObject_GetAttrString(module, "format_tb");
+      auto args = PyTuple_Pack(1, traceback);
+      auto message = PyObject_CallObject(func, args);
+
+      // Print traceback
+      err << "\n\n" << "Python traceback:";
+      auto iter = PyObject_GetIter(message);
+      for (auto line = PyIter_Next(iter);
+           line != nullptr;
+           line = PyIter_Next(iter)) {
+        const char* line_ = PyUnicode_AsUTF8(line);
+        err << "\n" << (line_ ? line_ : "");
+        Py_DECREF(line);
+      }
+
+      // Clean up
+      Py_XDECREF(iter);
+      Py_XDECREF(message);
+      Py_XDECREF(args);
+      Py_XDECREF(func);
+      Py_XDECREF(module);
+
+    }
+
+    // Clean up and throw exception
+    Py_XDECREF(type);
+    Py_XDECREF(value);
+    Py_XDECREF(traceback);
+    LBANN_ERROR(err.str());
+
+  }
+}
+
+global_interpreter_lock::global_interpreter_lock(const manager&)
+  : m_gil_state(PyGILState_Ensure()) {}
+
+global_interpreter_lock::~global_interpreter_lock() {
+  if (Py_IsInitialized()) {
+    PyGILState_Release(m_gil_state);
+  }
+}
+
+object::object(PyObject* ptr) : m_ptr(ptr) {
+  if (Py_IsInitialized() && PyErr_Occurred()) {
+    manager::get_instance().check_error();
+  }
+}
+
+object::object(std::string val)
+  : object(PyUnicode_FromStringAndSize(val.c_str(), val.size())) {}
+object::object(El::Int val) : object(PyLong_FromLong(val)) {}
+object::object(DataType val) : object(PyFloat_FromDouble(val)) {}
+
+object::object(const object& other) : m_ptr(other.m_ptr) {
+  Py_XINCREF(m_ptr);
+}
+
+object& object::operator=(const object& other) {
+  Py_XDECREF(m_ptr);
+  m_ptr = other.m_ptr;
+  Py_XINCREF(m_ptr);
+  return *this;
+}
+
+object::object(object&& other) : m_ptr(other.m_ptr) {
+  other.m_ptr = nullptr;
+}
+
+object& object::operator=(object&& other) {
+  Py_XDECREF(m_ptr);
+  m_ptr = other.m_ptr;
+  other.m_ptr = nullptr;
+  return *this;
+}
+
+object::~object() {
+  if (Py_IsInitialized()) {
+    Py_XDECREF(m_ptr);
+  }
+}
+
+} // namespace python
+
+python_reader::python_reader(std::string module,
+                             std::string module_dir,
+                             std::string sample_function,
+                             std::string num_samples_function,
+                             std::string sample_dims_function)
+  : generic_data_reader(true) {
+
+  // Acquire Python GIL
+  auto& manager = python::manager::get_instance();
+  python::global_interpreter_lock gil(manager);
+
+  // Import Python module for data
+  if (!module_dir.empty()) {
+    auto path = PySys_GetObject("path");  // Borrowed reference
+    PyList_Append(path, python::object(module_dir));
+    manager.check_error();
+  }
+  python::object data_module = PyImport_ImportModule(module.c_str());
+
+  // Get number of samples
+  python::object num_func
+    = PyObject_GetAttrString(data_module, num_samples_function.c_str());
+  python::object num = PyObject_CallObject(num_func, nullptr);
+  m_num_samples = PyLong_AsLong(num);
+  manager.check_error();
+
+  // Get sample dimensions
+  python::object dims_func
+    = PyObject_GetAttrString(data_module, sample_dims_function.c_str());
+  python::object dims = PyObject_CallObject(dims_func, nullptr);
+  dims = PyObject_GetIter(dims);
+  for (auto d = PyIter_Next(dims); d != nullptr; d = PyIter_Next(dims)) {
+    m_sample_dims.push_back(PyLong_AsLong(d));
+    Py_DECREF(d);
+  }
+  manager.check_error();
+
+  // Get sample function
+  m_sample_function = PyObject_GetAttrString(data_module,
+                                             sample_function.c_str());
+
+}
+
+python_reader::~python_reader() {
+  if (Py_IsInitialized() && m_process_pool != nullptr) {
+    PyObject_CallMethod(m_process_pool, "terminate", nullptr);
+  }
+}
+
+const std::vector<int> python_reader::get_data_dims() const {
+  std::vector<int> dims;
+  for (const auto& d : m_sample_dims) {
+    dims.push_back(d);
+  }
+  return dims;
+}
+int python_reader::get_num_labels() const {
+  return 1;
+}
+int python_reader::get_linearized_data_size() const {
+  const auto& dims = get_data_dims();
+  return std::accumulate(dims.begin(), dims.end(), 1,
+                         std::multiplies<int>());
+}
+int python_reader::get_linearized_label_size() const {
+  return get_num_labels();
+}
+
+bool python_reader::fetch_data_block(CPUMat& X,
+                                     El::Int thread_id,
+                                     El::Int mb_size,
+                                     El::Matrix<El::Int>& indices_fetched) {
+
+  // Acquire Python GIL on first IO thread
+  // Note: Do nothing on other IO threads.
+  if (thread_id != 0) { return true; }
+  auto& manager = python::manager::get_instance();
+  python::global_interpreter_lock gil(manager);
+
+  // Get sample indices
+  python::object indices = PyList_New(0);
+  for (El::Int i = 0; i < mb_size; ++i) {
+    El::Int index = m_shuffled_indices[m_current_pos + i * m_sample_stride];
+    PyList_Append(indices, python::object(index));
+    indices_fetched.Set(i, 0, index);
+  }
+
+  // Get samples using Python process pool
+  python::object samples = PyObject_CallMethod(m_process_pool,
+                                               "map",
+                                               "(O,O)",
+                                               m_sample_function.get(),
+                                               indices.get());
+
+  // Extract sample entries from Python objects
+  const El::Int sample_size = get_linearized_data_size();
+  samples = PyObject_GetIter(samples);
+  for (El::Int col = 0; col < mb_size; ++col) {
+    python::object sample = PyIter_Next(samples);
+    sample = PyObject_GetIter(sample);
+    for (El::Int row = 0; row < sample_size; ++row) {
+      python::object val = PyIter_Next(sample);
+      X(row, col) = PyFloat_AsDouble(val);
+    }
+  }
+
+  return true;
+}
+
+bool python_reader::fetch_label(CPUMat& Y, int data_id, int col) {
+  return true;
+}
+
+void python_reader::setup(int num_io_threads,
+                          std::shared_ptr<thread_pool> io_thread_pool) {
+  generic_data_reader::setup(num_io_threads, io_thread_pool);
+
+  // Initialize Python process pool
+  auto& manager = python::manager::get_instance();
+  python::global_interpreter_lock gil(manager);
+  python::object multiprocessing_module
+    = PyImport_ImportModule("multiprocessing");
+  m_process_pool = PyObject_CallMethod(multiprocessing_module, "Pool",
+                                       "(L)", num_io_threads);
+
+}
+
+void python_reader::load() {
+  m_shuffled_indices.resize(m_num_samples);
+  std::iota(m_shuffled_indices.begin(), m_shuffled_indices.end(), 0);
+  select_subset_of_data();
+}
+
+} // namespace lbann
+
+#endif // LBANN_HAS_PYTHON
diff --git a/src/data_readers/data_reader_synthetic.cpp b/src/data_readers/data_reader_synthetic.cpp
index 340c5ca3565..d8f3ea207d4 100644
--- a/src/data_readers/data_reader_synthetic.cpp
+++ b/src/data_readers/data_reader_synthetic.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -37,7 +37,7 @@ namespace {
 
 void fill_matrix(CPUMat& mat) {
   std::normal_distribution<DataType> dist(DataType(0), DataType(1));
-  auto& gen = get_fast_generator();
+  auto& gen = get_fast_io_generator();
   const El::Int height = mat.Height();  // Width is 1.
   DataType * __restrict__ buf = mat.Buffer();
   for (El::Int i = 0; i < height; ++i) {
@@ -74,7 +74,7 @@ bool data_reader_synthetic::fetch_label(CPUMat& Y, int data_id, int mb_idx) {
   if (m_num_labels == 0) {
     LBANN_ERROR("Synthetic data reader does not have labels");
   }
-  Y.Set(fast_rand_int(get_fast_generator(), m_num_labels), mb_idx, 1);
+  Y.Set(fast_rand_int(get_fast_io_generator(), m_num_labels), mb_idx, 1);
   return true;
 }
 
diff --git a/src/data_readers/data_reader_triplet.cpp b/src/data_readers/data_reader_triplet.cpp
index 8e70a618e7a..f2ee426a594 100644
--- a/src/data_readers/data_reader_triplet.cpp
+++ b/src/data_readers/data_reader_triplet.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -25,11 +25,12 @@
 //
 // data_reader_triplet .hpp .cpp - data reader to use triplet patches
 //                                 generated offline.
+// Depreciated and replaced by data_reader_multihead_siamese .hpp .cpp.
+// Kept here just for reference.
 ////////////////////////////////////////////////////////////////////////////////
 
 #include "lbann/data_readers/data_reader_triplet.hpp"
 #include "lbann/data_readers/image_utils.hpp"
-#include "lbann/data_store/data_store_triplet.hpp"
 #include "lbann/utils/file_utils.hpp"
 #include <fstream>
 #include <sstream>
@@ -89,14 +90,7 @@ bool data_reader_triplet::fetch_datum(Mat& X, int data_id, int mb_idx) {
     int width=0, height=0, img_type=0;
     const std::string imagepath = get_file_dir() + sample.first[i];
     bool ret = true;
-    if (m_data_store != nullptr) {
-      std::vector<unsigned char> *image_buf;
-      m_data_store->get_data_buf(data_id, image_buf, i);
-      // This could probably have used image_utils::import_image()
-      ret = lbann::image_utils::load_image(*image_buf, width, height, img_type, *(m_pps[tid]), X_v[i]);
-    } else {
-      ret = lbann::image_utils::load_image(imagepath, width, height, img_type, *(m_pps[tid]), X_v[i], m_thread_buffer[tid], &m_thread_cv_buffer[tid]);
-    }
+    ret = lbann::image_utils::load_image(imagepath, width, height, img_type, *(m_pps[tid]), X_v[i], m_thread_buffer[tid], &m_thread_cv_buffer[tid]);
 
     if(!ret) {
       throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " "
@@ -170,14 +164,4 @@ void data_reader_triplet::load() {
   select_subset_of_data();
 }
 
-void data_reader_triplet::setup_data_store(model *m) {
-  if (m_data_store != nullptr) {
-    delete m_data_store;
-  }
-  m_data_store = new data_store_triplet(this, m);
-  if (m_data_store != nullptr) {
-    m_data_store->setup();
-  }
-}
-
 }  // namespace lbann
diff --git a/src/data_readers/image_preprocessor.cpp b/src/data_readers/image_preprocessor.cpp
index 38a825d879b..5166ab6ed8e 100644
--- a/src/data_readers/image_preprocessor.cpp
+++ b/src/data_readers/image_preprocessor.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -60,7 +60,7 @@ void lbann_image_preprocessor::augment(Mat& pixels, unsigned imheight,
                       m_shear_range;
   if (do_transform) {
     cv::Mat sqpixels = cv_pixels(pixels, imheight, imwidth, num_channels);
-    rng_gen& gen = get_generator();
+    rng_gen& gen = get_io_generator();
     std::uniform_int_distribution<int> bool_dist(0, 1);
     // Flips.
     bool horiz_flip = bool_dist(gen) && m_horizontal_flip;
@@ -192,22 +192,22 @@ void lbann_image_preprocessor::unit_scale(Mat& pixels,
     unsigned num_channels) {
   // Pixels are in range [0, 255], normalize using that.
   // Channels are not relevant here.
-  pixels *= DataType(1) / 255;
+  El::Scale(DataType(1) / 255, pixels);
 }
 
 
-void lbann_image_preprocessor::pixel_noise(Mat& pixels) 
+void lbann_image_preprocessor::pixel_noise(Mat& pixels)
 {
   if(m_noise_factor){
     Mat X_noise;
     El::Gaussian(X_noise, pixels.Height(), pixels.Width(), DataType(0), DataType(1));
     El::Axpy(m_noise_factor,X_noise,pixels);
     //@todo - clip to min and max of input entry
-    auto clip = [](const DataType& z) { 
+    auto clip = [](const DataType& z) {
          return std::max(DataType(0), std::min(z,DataType(1)));
     };
     EntrywiseMap(pixels, El::MakeFunction(clip));
-  } 
+  }
 }
 
 void lbann_image_preprocessor::z_score(Mat& pixels,
diff --git a/src/data_readers/image_utils.cpp b/src/data_readers/image_utils.cpp
index e15879ff0f1..64102e334c5 100644
--- a/src/data_readers/image_utils.cpp
+++ b/src/data_readers/image_utils.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/lbann_data_generator.cpp b/src/data_readers/lbann_data_generator.cpp
index 70330cafdd5..f215248d906 100644
--- a/src/data_readers/lbann_data_generator.cpp
+++ b/src/data_readers/lbann_data_generator.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/numpy_conduit_converter.cpp b/src/data_readers/numpy_conduit_converter.cpp
new file mode 100644
index 00000000000..25ddf445689
--- /dev/null
+++ b/src/data_readers/numpy_conduit_converter.cpp
@@ -0,0 +1,72 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/data_readers/numpy_conduit_converter.hpp"
+#include "lbann/utils/exception.hpp"
+#include "lbann/data_store/data_store_conduit.hpp"
+#include <cnpy.h>
+
+namespace lbann {
+
+//static
+void numpy_conduit_converter::load_conduit_node(const std::string filename, int data_id, conduit::Node &output, bool reset) {
+
+  try {
+    if (reset) {
+      output.reset();
+    }
+
+    std::vector<size_t> shape;
+    std::map<std::string, cnpy::NpyArray> a = cnpy::npz_load(filename);
+
+    for (auto &&t : a) {
+      cnpy::NpyArray &b = t.second;
+      if (b.shape[0] != 1) {
+        LBANN_ERROR("lbann currently only supports one sample per npz file; this file appears to contain " + std::to_string(b.shape[0]) + " samples");
+      }
+      output[LBANN_DATA_ID_STR(data_id) + "/" + t.first + "/word_size"] = b.word_size;
+      output[LBANN_DATA_ID_STR(data_id) + "/" + t.first + "/fortran_order"] = b.fortran_order;
+      output[LBANN_DATA_ID_STR(data_id) + "/" + t.first + "/num_vals"] = b.num_vals;
+      output[LBANN_DATA_ID_STR(data_id) + "/" + t.first + "/shape"] = b.shape;
+
+      if (b.data_holder->size() / b.word_size != b.num_vals) {
+        LBANN_ERROR("b.data_holder->size() / b.word_size (" + std::to_string(b.data_holder->size()) + " / " + std::to_string(b.word_size) + ") != b.num_vals (" + std::to_string(b.num_vals));
+      }
+
+      // conduit makes a copy of the data, hence owns the data, hence it
+      // will be properly deleted when then conduit::Node is deleted
+      char *data = b.data_holder->data();
+      output[LBANN_DATA_ID_STR(data_id) + "/" + t.first + "/data"].set_char_ptr(data, b.word_size*b.num_vals);
+    }
+  } catch (...) {
+    //note: npz_load throws std::runtime_error, but I don't want to assume
+    //      that won't change in the future
+    LBANN_ERROR("failed to open " + filename + " during cnpy::npz_load");
+  }
+}
+
+} // end of namespace lbann
diff --git a/src/data_readers/offline_patches_npz.cpp b/src/data_readers/offline_patches_npz.cpp
index 42d4c03f023..506623d547a 100644
--- a/src/data_readers/offline_patches_npz.cpp
+++ b/src/data_readers/offline_patches_npz.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -31,14 +31,36 @@
 #include <set>
 #include <algorithm>
 
+#include <iostream>
+
 namespace lbann {
 
+offline_patches_npz::offline_patches_npz(size_t npatches, std::string divider)
+  : m_checked_ok(false), m_lbann_format(false)
+{
+  m_num_patches = npatches;
+  m_variant_divider = divider;
+}
+
+offline_patches_npz::offline_patches_npz(size_t npatches)
+  : m_checked_ok(false), m_lbann_format(false)
+{
+  m_num_patches = npatches;
+  m_variant_divider = ".JPEG.";
+}
+
+offline_patches_npz::offline_patches_npz(std::string divider)
+  : m_checked_ok(false), m_lbann_format(false)
+{
+  m_num_patches = 3u;
+  m_variant_divider = divider;
+}
+
 offline_patches_npz::offline_patches_npz()
   : m_checked_ok(false), m_num_patches(3u), m_variant_divider(".JPEG."),
     m_lbann_format(false)
 {}
 
-
 bool offline_patches_npz::load(const std::string filename, size_t first_n,
   bool keep_file_lists) {
   m_item_class_list.clear();
@@ -88,6 +110,7 @@ bool offline_patches_npz::load(const std::string filename, size_t first_n,
   { // load the label array into a vector of label_t (uint8_t)
     cnpy::NpyArray d_item_class_list = dataset["item_class_list"];
     m_checked_ok = (d_item_class_list.shape.size() == 1u);
+
     if (m_checked_ok) {
       // In case of shrinking to first_n, make sure the size is consistent
       const size_t num_samples = m_item_root_list.shape[0];
@@ -130,7 +153,6 @@ bool offline_patches_npz::load(const std::string filename, size_t first_n,
       dataset.erase(it); // to keep memory footprint as low as possible
     }
   }
-  //for (const auto& fl: m_file_root_list) std::cout << fl << std::endl;
 
   { // load the array of dictionary substrings of variant type
     cnpy::NpyArray d_file_variant_list = dataset["file_variant_list"];
@@ -156,12 +178,10 @@ bool offline_patches_npz::load(const std::string filename, size_t first_n,
       dataset.erase(it); // to keep memory footprint as low as possible
     }
   }
-  //for (const auto& fl: m_file_variant_list) std::cout << fl << std::endl;
 
   m_checked_ok = m_checked_ok && check_data();
 
   if (!m_checked_ok) {
-    //std::cout << get_description();
     m_item_class_list.clear();
     m_file_root_list.clear();
     m_file_variant_list.clear();
@@ -184,6 +204,7 @@ bool offline_patches_npz::check_data() const {
             (m_item_variant_list.shape[2] > 0u) &&
             (m_item_root_list.word_size == sizeof(size_t)) &&
             (m_item_variant_list.word_size == sizeof(size_t));
+
   return ok;
 }
 
@@ -213,6 +234,7 @@ offline_patches_npz::sample_t offline_patches_npz::get_sample(const size_t idx)
 
   for (size_t p = 0u; p < m_num_patches; ++p) {
     const size_t root = cnpy_utils::data<size_t>(m_item_root_list, {idx, p});
+
     if (root >= m_file_root_list.size()) {
       using std::to_string;
       throw lbann_exception("offline_patches_npz: invalid file_root_list index: "
@@ -229,6 +251,7 @@ offline_patches_npz::sample_t offline_patches_npz::get_sample(const size_t idx)
     file_name += m_file_variant_list.at(variant[ve]);
     file_names.push_back(file_name);
   }
+
   return std::make_pair(file_names, m_item_class_list[idx]);
 }
 
diff --git a/src/data_readers/patchworks/patchworks.cpp b/src/data_readers/patchworks/patchworks.cpp
index 4a5160d8a27..40fa91cea9f 100644
--- a/src/data_readers/patchworks/patchworks.cpp
+++ b/src/data_readers/patchworks/patchworks.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -129,7 +129,7 @@ cv::Mat drop_2channels(const cv::Mat& _img) {
   // compute channel to remain
   pw_fp_t m[3] = {0.0 _f, 0.0 _f, 0.0 _f};
 
-  ::lbann::rng_gen& gen = ::lbann::get_generator();
+  ::lbann::rng_gen& gen = ::lbann::get_io_generator();
 
   std::uniform_int_distribution<int> rg_ch(0, 2);
   const int chosenCh = rg_ch(gen);
diff --git a/src/data_readers/patchworks/patchworks_ROI.cpp b/src/data_readers/patchworks/patchworks_ROI.cpp
index e7e3b6e8b0c..e9084640af3 100644
--- a/src/data_readers/patchworks/patchworks_ROI.cpp
+++ b/src/data_readers/patchworks/patchworks_ROI.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_readers/patchworks/patchworks_patch_descriptor.cpp b/src/data_readers/patchworks/patchworks_patch_descriptor.cpp
index 79457c628a5..42751ebe550 100644
--- a/src/data_readers/patchworks/patchworks_patch_descriptor.cpp
+++ b/src/data_readers/patchworks/patchworks_patch_descriptor.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -110,7 +110,7 @@ bool patch_descriptor::get_first_patch(ROI& patch) {
     y_margin = m_height + (m_height+1)/2 + 2*m_jitter + m_gap;
   }
 
-  ::lbann::rng_gen& gen = ::lbann::get_generator();
+  ::lbann::rng_gen& gen = ::lbann::get_io_generator();
 
   if ((m_mode_center == 0u || m_mode_center == 1u)) {
     // area where the center of a center patch can be in
@@ -160,7 +160,7 @@ bool patch_descriptor::get_first_patch(ROI& patch) {
 bool patch_descriptor::get_next_patch(ROI& patch) {
   bool got_one = false;
 
-  ::lbann::rng_gen& gen = ::lbann::get_generator();
+  ::lbann::rng_gen& gen = ::lbann::get_io_generator();
 
   do {
     ROI p = m_patch_center;
@@ -218,7 +218,7 @@ bool patch_descriptor::extract_patches(const cv::Mat& img, std::vector<cv::Mat>&
   }
 
   std::uniform_int_distribution<int> rg_patch_idx(0, m_displacements.size()-1);
-  ::lbann::rng_gen& gen = ::lbann::get_generator();
+  ::lbann::rng_gen& gen = ::lbann::get_io_generator();
   m_cur_patch_idx = rg_patch_idx(gen);
 
   if (!get_next_patch(roi)) {
diff --git a/src/data_readers/patchworks/patchworks_stats.cpp b/src/data_readers/patchworks/patchworks_stats.cpp
index cea8384e86b..9216fbceab0 100644
--- a/src/data_readers/patchworks/patchworks_stats.cpp
+++ b/src/data_readers/patchworks/patchworks_stats.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/data_store/CMakeLists.txt b/src/data_store/CMakeLists.txt
index 1e3ac36a98f..00a3dd24cd6 100644
--- a/src/data_store/CMakeLists.txt
+++ b/src/data_store/CMakeLists.txt
@@ -1,24 +1,6 @@
 # Add the source files for this directory
 set_full_path(THIS_DIR_SOURCES
-  generic_data_store.cpp
-  data_store_csv.cpp
-  data_store_image.cpp
-  data_store_multi_images.cpp
-  data_store_imagenet.cpp
-  data_store_imagenet_patches.cpp
-  data_store_merge_samples.cpp
-  data_store_merge_features.cpp
-  data_store_pilot2_molecular.cpp
-  data_store_triplet.cpp
-  data_store_jag.cpp
-  jag_io.cpp
-  jag_store.cpp
+  data_store_conduit.cpp
 )
 
-add_executable( jag_converter-bin jag_converter.cpp )
-target_link_libraries(jag_converter-bin lbann )
-set_target_properties(jag_converter-bin PROPERTIES OUTPUT_NAME jag_converter)
-
-
 set(SOURCES "${SOURCES}" "${THIS_DIR_SOURCES}" PARENT_SCOPE)
-
diff --git a/src/data_store/data_store_conduit.cpp b/src/data_store/data_store_conduit.cpp
new file mode 100644
index 00000000000..1bf930cd932
--- /dev/null
+++ b/src/data_store/data_store_conduit.cpp
@@ -0,0 +1,802 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/data_store/data_store_conduit.hpp"
+
+#ifdef LBANN_HAS_CONDUIT
+
+#include "lbann/data_readers/data_reader_jag_conduit.hpp"
+#include "lbann/utils/exception.hpp"
+#include "lbann/utils/options.hpp"
+#include "lbann/utils/timer.hpp"
+#include <unordered_set>
+
+namespace lbann {
+
+data_store_conduit::data_store_conduit(
+  generic_data_reader *reader) :
+  m_n(0),
+  m_is_setup(false),
+  m_reader(reader),
+  m_preload(false),
+  m_explicit_loading(false),
+  m_owner_map_mb_size(0),
+  m_super_node(false),
+  m_compacted_sample_size(0),
+  m_is_local_cache(false) {
+  m_comm = m_reader->get_comm();
+  if (m_comm == nullptr) {
+    LBANN_ERROR(" m_comm is nullptr");
+  }
+
+  m_world_master = m_comm->am_world_master();
+  m_trainer_master = m_comm->am_trainer_master();
+  m_rank_in_trainer = m_comm->get_rank_in_trainer();
+  m_np_in_trainer = m_comm->get_procs_per_trainer();
+
+  options *opts = options::get();
+  m_super_node = opts->get_bool("super_node");
+
+  m_is_local_cache = opts->get_bool("data_store_cache");
+  if (m_is_local_cache && opts->get_bool("preload_data_store")) {
+    LBANN_ERROR("you cannot use both of these options: --data_store_cache --preload_data_store");
+  }
+}
+
+data_store_conduit::~data_store_conduit() {}
+
+data_store_conduit::data_store_conduit(const data_store_conduit& rhs) {
+  copy_members(rhs);
+}
+
+data_store_conduit::data_store_conduit(const data_store_conduit& rhs, const std::vector<int>& ds_sample_move_list) {
+
+  copy_members(rhs, ds_sample_move_list);
+}
+
+data_store_conduit& data_store_conduit::operator=(const data_store_conduit& rhs) {
+  // check for self-assignment
+  if (this == &rhs) {
+    return (*this);
+  }
+  copy_members(rhs);
+  return (*this);
+}
+
+void data_store_conduit::copy_members(const data_store_conduit& rhs, const std::vector<int>& ds_sample_move_list) {
+  m_n = rhs.m_n;
+  m_is_setup = rhs.m_is_setup;
+  m_reader = rhs.m_reader;
+  m_comm = rhs.m_comm;
+  m_rank_in_trainer = rhs.m_rank_in_trainer;
+  m_np_in_trainer = rhs.m_np_in_trainer;
+  m_world_master = rhs.m_world_master;
+  m_trainer_master = rhs.m_trainer_master;
+  m_preload = rhs.m_preload;
+  m_explicit_loading = rhs.m_explicit_loading;
+  m_owner = rhs.m_owner;
+  m_shuffled_indices = rhs.m_shuffled_indices;
+  m_owner_map_mb_size = rhs.m_owner_map_mb_size;
+  m_super_node = rhs.m_super_node;
+  m_compacted_sample_size = rhs.m_compacted_sample_size;
+  m_is_local_cache = rhs.m_is_local_cache;
+
+  if(ds_sample_move_list.size() == 0) {
+    m_data = rhs.m_data;
+  } else {
+    /// Move indices on the list from the data and owner maps in the RHS data store to the new data store
+    for(auto&& i : ds_sample_move_list) {
+      if(rhs.m_data.find(i) != rhs.m_data.end()){
+        conduit::Node node = rhs.m_data[i]["data"];
+        rhs.m_data.erase(i);
+        /// Repack the nodes because they don't seem to copy correctly
+        build_node_for_sending(node, m_data[i]);
+      }
+      /// Removed migrated nodes from the original data store's owner list
+      if(rhs.m_owner.find(i) != rhs.m_owner.end()) {
+        m_owner[i] = rhs.m_owner[i];
+        rhs.m_owner.erase(i);
+      }
+    }
+  }
+
+
+  /// Clear the pointer to the data reader, this cannot be copied
+  m_reader = nullptr;
+  m_shuffled_indices = nullptr;
+
+  //these will probably zero-length, but I don't want to make assumptions
+  //as to state when copy_member is called
+  m_minibatch_data = rhs.m_minibatch_data;
+  m_send_buffer = rhs.m_send_buffer;
+  m_send_buffer_2 = rhs.m_send_buffer_2;
+  m_send_requests = rhs.m_send_requests;
+  m_recv_requests = rhs.m_recv_requests;
+  m_recv_buffer = rhs.m_recv_buffer;
+  m_outgoing_msg_sizes = rhs.m_outgoing_msg_sizes;
+  m_incoming_msg_sizes = rhs.m_incoming_msg_sizes;
+  m_compacted_sample_size = rhs.m_compacted_sample_size;
+  m_reconstituted = rhs.m_reconstituted;
+  m_indices_to_send = rhs.m_indices_to_send;
+  m_indices_to_recv = rhs.m_indices_to_recv;
+}
+
+void data_store_conduit::setup(int mini_batch_size) {
+
+  if (m_world_master) {
+    if (m_super_node) {
+      std::cout << "data store mode: exchange_data via super nodes\n";
+    } else {
+      std::cout << "data store mode: exchange_data via individual samples\n";
+    }
+  }
+
+  double tm1 = get_time();
+  if (m_world_master && !m_preload) {
+    std::cout << "starting data_store_conduit::setup() for role: " << m_reader->get_role() << "\n";
+  }
+
+  if (!m_preload) {
+    // generic_data_store::setup(mini_batch_size);
+    build_owner_map(mini_batch_size);
+  } else {
+    m_owner_map_mb_size = mini_batch_size;
+  }
+
+  m_is_setup = true;
+
+  if (m_world_master && !m_preload) {
+    std::cout << "TIME for data_store_conduit setup: " << get_time() - tm1 << "\n";
+  }
+}
+
+void data_store_conduit::setup_data_store_buffers() {
+  // allocate buffers that are used in exchange_data()
+  m_send_buffer.resize(m_np_in_trainer);
+  m_send_buffer_2.resize(m_np_in_trainer);
+  m_send_requests.resize(m_np_in_trainer);
+  m_recv_requests.resize(m_np_in_trainer);
+  m_outgoing_msg_sizes.resize(m_np_in_trainer);
+  m_incoming_msg_sizes.resize(m_np_in_trainer);
+  m_recv_buffer.resize(m_np_in_trainer);
+  m_reconstituted.resize(m_np_in_trainer);
+}
+
+// Note: conduit has a very nice interface for communicating nodes
+//       in blocking scenarios. Unf, for non-blocking we need to
+//       handle things ourselves. TODO: possibly modify conduit to
+//       handle non-blocking comms
+void data_store_conduit::exchange_data_by_super_node(size_t current_pos, size_t mb_size) {
+
+  if (! m_is_setup) {
+    LBANN_ERROR("setup(mb_size) has not been called");
+  }
+
+  if (m_n == 0) {
+    setup_data_store_buffers();
+  }
+
+  //========================================================================
+  //part 1: construct the super_nodes
+
+  build_indices_i_will_send(current_pos, mb_size);
+  build_indices_i_will_recv(current_pos, mb_size);
+
+  // construct a super node for each processor; the super node
+  // contains all samples this proc owns that other procs need
+  for (int p=0; p<m_np_in_trainer; p++) {
+    m_send_buffer[p].reset();
+    for (auto idx : m_indices_to_send[p]) {
+      m_send_buffer[p].update_external(m_data[idx]);
+    }
+    build_node_for_sending(m_send_buffer[p], m_send_buffer_2[p]);
+  }
+
+  //========================================================================
+  //part 1.5: exchange super_node sizes
+
+  for (int p=0; p<m_np_in_trainer; p++) {
+    m_outgoing_msg_sizes[p] = m_send_buffer_2[p].total_bytes_compact();
+    El::byte *s = reinterpret_cast<El::byte*>(&m_outgoing_msg_sizes[p]);
+    m_comm->nb_send<El::byte>(s, sizeof(int), m_comm->get_trainer_rank(), p, m_send_requests[p]);
+  }
+
+  for (int p=0; p<m_np_in_trainer; p++) {
+    El::byte *s = reinterpret_cast<El::byte*>(&m_incoming_msg_sizes[p]);
+    m_comm->nb_recv<El::byte>(s, sizeof(int), m_comm->get_trainer_rank(), p, m_recv_requests[p]);
+  }
+  m_comm->wait_all<El::byte>(m_send_requests);
+  m_comm->wait_all<El::byte>(m_recv_requests);
+
+  //========================================================================
+  //part 2: exchange the actual data
+
+  // start sends for outgoing data
+  for (int p=0; p<m_np_in_trainer; p++) {
+    const El::byte *s = reinterpret_cast<El::byte*>(m_send_buffer_2[p].data_ptr());
+    m_comm->nb_send<El::byte>(s, m_outgoing_msg_sizes[p], m_comm->get_trainer_rank(), p, m_send_requests[p]);
+  }
+
+  // start recvs for incoming data
+  for (int p=0; p<m_np_in_trainer; p++) {
+    m_recv_buffer[p].set(conduit::DataType::uint8(m_incoming_msg_sizes[p]));
+    m_comm->nb_recv<El::byte>((El::byte*)m_recv_buffer[p].data_ptr(), m_incoming_msg_sizes[p], m_comm->get_trainer_rank(), p, m_recv_requests[p]);
+  }
+
+  // wait for all msgs to complete
+  m_comm->wait_all<El::byte>(m_send_requests);
+  m_comm->wait_all<El::byte>(m_recv_requests);
+
+  //========================================================================
+  //part 3: construct the Nodes needed by me for the current minibatch
+
+  m_minibatch_data.clear();
+  for (int p=0; p<m_np_in_trainer; p++) {
+    conduit::uint8 *n_buff_ptr = (conduit::uint8*)m_recv_buffer[p].data_ptr();
+    conduit::Node n_msg;
+    n_msg["schema_len"].set_external((conduit::int64*)n_buff_ptr);
+    n_buff_ptr +=8;
+    n_msg["schema"].set_external_char8_str((char*)(n_buff_ptr));
+    conduit::Schema rcv_schema;
+    conduit::Generator gen(n_msg["schema"].as_char8_str());
+    gen.walk(rcv_schema);
+    n_buff_ptr += n_msg["schema"].total_bytes_compact();
+    n_msg["data"].set_external(rcv_schema,n_buff_ptr);
+    m_reconstituted[p].reset();
+
+    // I'm unsure what happens here: m_reconstituted is persistent, but
+    // we're updating from n_msg, which is transitory. Best guess,
+    // when n_msg goes out of scope a deep copy is made. Possibly
+    // there's room for optimization here.
+    m_reconstituted[p].update_external(n_msg["data"]);
+    const std::vector<std::string> &names = m_reconstituted[p].child_names();
+
+    for (auto &t : names) {
+      m_minibatch_data[atoi(t.c_str())][t].update_external(m_reconstituted[p][t]);
+    }
+  }
+}
+
+void data_store_conduit::set_preloaded_conduit_node(int data_id, conduit::Node &node) {
+  // note: at this point m_data[data_id] = node
+  // note: if running in super_node mode, nothing to do
+  if (!m_super_node) {
+    conduit::Node n2 = node;
+    build_node_for_sending(n2, m_data[data_id]);
+    error_check_compacted_node(m_data[data_id], data_id);
+  }
+}
+
+void data_store_conduit::error_check_compacted_node(const conduit::Node &nd, int data_id) {
+  if(m_compacted_sample_size == 0) {
+    m_compacted_sample_size = nd.total_bytes_compact();
+  } else if(m_compacted_sample_size != nd.total_bytes_compact()) {
+    LBANN_ERROR("Conduit node being added data_id: " + std::to_string(data_id)
+                + " is not the same size as existing nodes in the data_store "
+                + std::to_string(m_compacted_sample_size) + " != "
+                + std::to_string(nd.total_bytes_compact())
+                + " role: " + m_reader->get_role());
+  }
+  if(!nd.is_contiguous()) {
+    LBANN_ERROR("m_data[" + std::to_string(data_id) + "] does not have a contiguous layout");
+  }
+  if(nd.data_ptr() == nullptr) {
+    LBANN_ERROR("m_data[" + std::to_string(data_id) + "] does not have a valid data pointer");
+  }
+  if(nd.contiguous_data_ptr() == nullptr) {
+    LBANN_ERROR("m_data[" + std::to_string(data_id) + "] does not have a valid contiguous data pointer");
+  }
+}
+
+
+void data_store_conduit::set_conduit_node(int data_id, conduit::Node &node, bool already_have) {
+  if (already_have == false && m_data.find(data_id) != m_data.end()) {
+    LBANN_ERROR("duplicate data_id: " + std::to_string(data_id) + " in data_store_conduit::set_conduit_node");
+  }
+
+  if (already_have && is_local_cache()) {
+    if (m_data.find(data_id) == m_data.end()) {
+      LBANN_ERROR("you claim the passed node was obtained from this data_store, but the data_id (" + std::to_string(data_id) + ") doesn't exist in m_data");
+    }
+    return;
+  }
+
+  if (m_owner[data_id] != m_rank_in_trainer) {
+    std::stringstream s;
+    s << "set_conduit_node error for data id: "<<data_id<< " m_owner: " << m_owner[data_id] << " me: " << m_rank_in_trainer << "; data reader role: " << m_reader->get_role() << "\n";
+    LBANN_ERROR(s.str());
+  }
+
+  if (is_local_cache()) {
+    m_data[data_id] = node;
+  }
+
+  else if (! m_super_node) {
+    build_node_for_sending(node, m_data[data_id]);
+    error_check_compacted_node(m_data[data_id], data_id);
+  }
+
+  else {
+    m_data[data_id] = node;
+    // @TODO would like to do: m_data[data_id].set_external(node); but since
+    // (as of now) 'node' is a local variable in a data_reader+jag_conduit,
+    // we need to do a deep copy. If the data_store furnishes a node to the
+    // data_reader during the first epoch, this copy can be avoided
+  }
+}
+
+const conduit::Node & data_store_conduit::get_conduit_node(int data_id) const {
+  /**
+   * dah: commenting this out since it gives a false positive for test
+   *      case with unshuffled indices. Since we currently send samples
+   *      to ourselves, they should be in m_minibatch_data. The following
+   *      block is only useful if, at some future time, we do not send
+   *      indices to ourself
+  std::unordered_map<int, conduit::Node>::const_iterator t = m_data.find(data_id);
+  if (t != m_data.end()) {
+    if(m_super_node) {
+      return t->second;
+    } else {
+      return t->second["data"];
+    }
+  }
+  */
+  if (is_local_cache()) {
+    std::unordered_map<int, conduit::Node>::const_iterator t3 = m_data.find(data_id);
+    if (t3 == m_data.end()) {
+      LBANN_ERROR("failed to find data_id: " + std::to_string(data_id) + " in m_data; m_data.size: " + std::to_string(m_data.size()));
+    }
+    return t3->second;
+  }
+
+  std::unordered_map<int, conduit::Node>::const_iterator t2 = m_minibatch_data.find(data_id);
+  // if not preloaded, and get_label() or get_response() is called,
+  // we need to check m_data
+  if (t2 == m_minibatch_data.end()) {
+    std::unordered_map<int, conduit::Node>::const_iterator t3 = m_data.find(data_id);
+    if (t3 != m_data.end()) {
+      return t3->second["data"];
+    }
+    LBANN_ERROR("failed to find data_id: " + std::to_string(data_id) + " in m_minibatch_data; m_minibatch_data.size: " + std::to_string(m_minibatch_data.size())+ " and also failed to find it in m_data; m_data.size: " + std::to_string(m_data.size()) + "; role: " + m_reader->get_role());
+  }
+
+  return t2->second;
+}
+
+// code in the following method is a modification of code from
+// conduit/src/libs/relay/conduit_relay_mpi.cpp
+void data_store_conduit::build_node_for_sending(const conduit::Node &node_in, conduit::Node &node_out) {
+
+  node_out.reset();
+  conduit::Schema s_data_compact;
+  if( node_in.is_compact() && node_in.is_contiguous()) {
+    s_data_compact = node_in.schema();
+  } else {
+    node_in.schema().compact_to(s_data_compact);
+  }
+
+  std::string snd_schema_json = s_data_compact.to_json();
+
+  conduit::Schema s_msg;
+  s_msg["schema_len"].set(conduit::DataType::int64());
+  s_msg["schema"].set(conduit::DataType::char8_str(snd_schema_json.size()+1));
+  s_msg["data"].set(s_data_compact);
+
+  conduit::Schema s_msg_compact;
+  s_msg.compact_to(s_msg_compact);
+  node_out.reset();
+  node_out.set(s_msg_compact);
+  node_out["schema"].set(snd_schema_json);
+  node_out["data"].update(node_in);
+
+  if(!node_out.is_contiguous()) {
+    LBANN_ERROR("node_out does not have a contiguous layout");
+  }
+  if(node_out.data_ptr() == nullptr) {
+    LBANN_ERROR("node_out does not have a valid data pointer");
+  }
+  if(node_out.contiguous_data_ptr() == nullptr) {
+    LBANN_ERROR("node_out does not have a valid contiguous data pointer");
+  }
+}
+
+void data_store_conduit::exchange_data_by_sample(size_t current_pos, size_t mb_size) {
+  if (! m_is_setup) {
+    LBANN_ERROR("setup(mb_size) has not been called");
+  }
+
+  int num_send_req = build_indices_i_will_send(current_pos, mb_size);
+  int num_recv_req = build_indices_i_will_recv(current_pos, mb_size);
+
+  m_send_requests.resize(num_send_req);
+  m_recv_requests.resize(num_recv_req);
+  m_recv_buffer.resize(num_recv_req);
+  m_recv_data_ids.resize(num_recv_req);
+
+  //========================================================================
+  //part 2: exchange the actual data
+
+  // start sends for outgoing data
+  size_t ss = 0;
+  for (int p=0; p<m_np_in_trainer; p++) {
+    const std::unordered_set<int> &indices = m_indices_to_send[p];
+    for (auto index : indices) {
+      if (m_data.find(index) == m_data.end()) {
+        LBANN_ERROR("failed to find data_id: " + std::to_string(index) + " to be sent to " + std::to_string(p) + " in m_data");
+      }
+      const conduit::Node& n = m_data[index];
+      const El::byte *s = reinterpret_cast<const El::byte*>(n.data_ptr());
+      if(!n.is_contiguous()) {
+        LBANN_ERROR("data_id: " + std::to_string(index) + " does not have a contiguous layout");
+      }
+      if(n.data_ptr() == nullptr) {
+        LBANN_ERROR("data_id: " + std::to_string(index) + " does not have a valid data pointer");
+      }
+      if(n.contiguous_data_ptr() == nullptr) {
+        LBANN_ERROR("data_id: " + std::to_string(index) + " does not have a valid contiguous data pointer");
+      }
+      m_comm->nb_tagged_send(s, m_compacted_sample_size, p, index, m_send_requests[ss++], m_comm->get_trainer_comm());
+    }
+  }
+
+  // sanity checks
+  if (ss != m_send_requests.size()) {
+    LBANN_ERROR("ss != m_send_requests.size; ss: " + std::to_string(ss) + " m_send_requests.size: " + std::to_string(m_send_requests.size()));
+  }
+
+  // start recvs for incoming data
+  ss = 0;
+  for (int p=0; p<m_np_in_trainer; p++) {
+    const std::unordered_set<int> &indices = m_indices_to_recv[p];
+    for (auto index : indices) {
+      m_recv_buffer[ss].set(conduit::DataType::uint8(m_compacted_sample_size));
+      El::byte *r = reinterpret_cast<El::byte*>(m_recv_buffer[ss].data_ptr());
+      m_comm->nb_tagged_recv<El::byte>(r, m_compacted_sample_size, p, index, m_recv_requests[ss], m_comm->get_trainer_comm());
+      m_recv_data_ids[ss] = index;
+      ++ss;
+    }
+  }
+
+  // sanity checks
+  if (ss != m_recv_buffer.size()) {
+    LBANN_ERROR("ss != m_recv_buffer.size; ss: " + std::to_string(ss) + " m_recv_buffer.size: " + std::to_string(m_recv_buffer.size()));
+  }
+  if (m_recv_requests.size() != m_recv_buffer.size()) {
+    LBANN_ERROR("m_recv_requests.size != m_recv_buffer.size; m_recv_requests: " + std::to_string(m_recv_requests.size()) + " m_recv_buffer.size: " + std::to_string(m_recv_buffer.size()));
+  }
+
+  // wait for all msgs to complete
+  m_comm->wait_all(m_send_requests);
+  m_comm->wait_all(m_recv_requests);
+
+  //========================================================================
+  //part 3: construct the Nodes needed by me for the current minibatch
+
+  conduit::Node nd;
+  m_minibatch_data.clear();
+  for (size_t j=0; j < m_recv_buffer.size(); j++) {
+    conduit::uint8 *n_buff_ptr = (conduit::uint8*)m_recv_buffer[j].data_ptr();
+    conduit::Node n_msg;
+    n_msg["schema_len"].set_external((conduit::int64*)n_buff_ptr);
+    n_buff_ptr +=8;
+    n_msg["schema"].set_external_char8_str((char*)(n_buff_ptr));
+    conduit::Schema rcv_schema;
+    conduit::Generator gen(n_msg["schema"].as_char8_str());
+    gen.walk(rcv_schema);
+    n_buff_ptr += n_msg["schema"].total_bytes_compact();
+    n_msg["data"].set_external(rcv_schema,n_buff_ptr);
+
+    int data_id = m_recv_data_ids[j];
+    m_minibatch_data[data_id].set_external(n_msg["data"]);
+  }
+}
+
+int data_store_conduit::build_indices_i_will_recv(int current_pos, int mb_size) {
+  m_indices_to_recv.clear();
+  m_indices_to_recv.resize(m_np_in_trainer);
+  int k = 0;
+  for (int i=current_pos; i< current_pos + mb_size; ++i) {
+    auto index = (*m_shuffled_indices)[i];
+    if ((i % m_owner_map_mb_size) % m_np_in_trainer == m_rank_in_trainer) {
+      int owner = m_owner[index];
+      m_indices_to_recv[owner].insert(index);
+      k++;
+    }
+  }
+  return k;
+}
+
+int data_store_conduit::build_indices_i_will_send(int current_pos, int mb_size) {
+  m_indices_to_send.clear();
+  m_indices_to_send.resize(m_np_in_trainer);
+  int k = 0;
+  for (int i = current_pos; i < current_pos + mb_size; i++) {
+    auto index = (*m_shuffled_indices)[i];
+    /// If this rank owns the index send it to the (i%m_np)'th rank
+    if (m_data.find(index) != m_data.end()) {
+      m_indices_to_send[(i % m_owner_map_mb_size) % m_np_in_trainer].insert(index);
+
+      // Sanity check
+      if (m_owner[index] != m_rank_in_trainer) {
+        std::stringstream s;
+        s << "error for i: "<<i<<" index: "<<index<< " m_owner: " << m_owner[index] << " me: " << m_rank_in_trainer;
+        LBANN_ERROR(s.str());
+      }
+      k++;
+    }
+  }
+  return k;
+}
+
+void data_store_conduit::build_preloaded_owner_map(const std::vector<int>& per_rank_list_sizes) {
+  m_owner.clear();
+  int owning_rank = 0;
+  size_t per_rank_list_range_start = 0;
+  for (size_t i = 0; i < m_shuffled_indices->size(); i++) {
+    const auto per_rank_list_size = per_rank_list_sizes[owning_rank];
+    if(i == (per_rank_list_range_start + per_rank_list_size)) {
+      ++owning_rank;
+      per_rank_list_range_start += per_rank_list_size;
+    }
+    m_owner[i] = owning_rank;
+  }
+}
+
+void data_store_conduit::build_owner_map(int mini_batch_size) {
+  if (m_world_master) std::cout << "starting data_store_conduit::build_owner_map for role: " << m_reader->get_role() << " with mini_batch_size: " << mini_batch_size << "\n";
+  if (mini_batch_size == 0) {
+    LBANN_ERROR("mini_batch_size == 0; can't build owner_map");
+  }
+  m_owner.clear();
+  m_owner_map_mb_size = mini_batch_size;
+  for (size_t i = 0; i < m_shuffled_indices->size(); i++) {
+    auto index = (*m_shuffled_indices)[i];
+    /// To compute the owner index first find its position inside of
+    /// the mini-batch (mod mini-batch size) and then find how it is
+    /// striped across the ranks in the trainer
+    m_owner[index] = (i % m_owner_map_mb_size) % m_np_in_trainer;
+  }
+}
+
+const conduit::Node & data_store_conduit::get_random_node() const {
+  size_t sz = m_data.size();
+
+  // Deal with edge case
+  if (sz == 0) {
+    LBANN_ERROR("can't return random node since we have no data (set_conduit_node has never been called)");
+  }
+
+  int offset = random() % sz;
+  auto it = std::next(m_data.begin(), offset);
+  return it->second;
+}
+
+const conduit::Node & data_store_conduit::get_random_node(const std::string &field) const {
+  auto node = get_random_node();
+  //return node;
+  return node[field];
+}
+
+conduit::Node & data_store_conduit::get_empty_node(int data_id) {
+  if (m_data.find(data_id) != m_data.end()) {
+    LBANN_ERROR("we already have a node with data_id= " + std::to_string(data_id));
+  }
+  return m_data[data_id];
+}
+
+void data_store_conduit::purge_unused_samples(const std::vector<int>& indices) {
+  /// Remove unused indices from the data and owner maps
+  for(auto&& i : indices) {
+    if(m_data.find(i) != m_data.end()){
+      m_data.erase(i);
+    }
+    if(m_owner.find(i) != m_owner.end()) {
+      m_owner.erase(i);
+    }
+  }
+}
+
+void data_store_conduit::compact_nodes() {
+  for(auto&& j : *m_shuffled_indices) {
+    if(m_data.find(j) != m_data.end()){
+      if(!m_data[j].is_contiguous()) {
+        /// Repack the nodes because they don't seem to copy correctly
+        conduit::Node node = m_data[j]["data"];
+        m_data.erase(j);
+        build_node_for_sending(node, m_data[j]);
+      }
+    }
+  }
+}
+
+int data_store_conduit::get_index_owner(int idx) {
+  if (m_owner.find(idx) == m_owner.end()) {
+    std::stringstream err;
+    err << __FILE__ << " " << __LINE__ << " :: "
+        << " idx: " << idx << " was not found in the m_owner map;"
+        << " map size: " << m_owner.size();
+    throw lbann_exception(err.str());
+  }
+  return m_owner[idx];
+}
+
+void data_store_conduit::check_mem_capacity(lbann_comm *comm, const std::string sample_list_file, size_t stride, size_t offset) {
+  if (comm->am_world_master()) {
+    // note: we only estimate memory required by the data reader/store
+
+    // get avaliable memory
+    std::ifstream in("/proc/meminfo");
+    std::string line;
+    std::string units;
+    double a_mem = 0;
+    while (getline(in, line)) {
+      if (line.find("MemAvailable:")) {
+        std::stringstream s3(line);
+        s3 >> line >> a_mem >> units;
+        if (units != "kB") {
+          LBANN_ERROR("units is " + units + " but we only know how to handle kB; please contact Dave Hysom");
+        }
+        break;
+      }
+    }
+    in.close();
+    if (a_mem == 0) {
+      LBANN_ERROR("failed to find MemAvailable field in /proc/meminfo");
+    }
+
+    // a lot of the following is cut-n-paste from the sample list class;
+    // would like to use the sample list class directly, but this
+    // is quicker than figuring out how to modify the sample_list.
+    // Actually there are at least three calls, starting from
+    // data_reader_jag_conduit, before getting to the code that
+    // loads the sample list file names
+
+    // get list of conduit files that I own, and compute my num_samples
+    std::ifstream istr(sample_list_file);
+    if (!istr.good()) {
+      LBANN_ERROR("failed to open " + sample_list_file + " for reading");
+    }
+
+    std::string base_dir;
+    std::getline(istr, line);  //exclusiveness; discard
+
+    std::getline(istr, line);
+    std::stringstream s5(line);
+    int included_samples;
+    int excluded_samples;
+    size_t num_files;
+    s5 >> included_samples >> excluded_samples >> num_files;
+
+    std::getline(istr, base_dir); // base dir; discard
+
+    const std::string whitespaces(" \t\f\v\n\r");
+    size_t cnt_files = 0u;
+    int my_sample_count = 0;
+
+    conduit::Node useme;
+    bool got_one = false;
+
+    // loop over conduit filenames
+    while (std::getline(istr, line)) {
+      const size_t end_of_str = line.find_last_not_of(whitespaces);
+      if (end_of_str == std::string::npos) { // empty line
+        continue;
+      }
+      if (cnt_files++ >= num_files) {
+        break;
+      }
+      if ((cnt_files-1)%stride != offset) {
+        continue;
+      }
+      std::stringstream sstr(line.substr(0, end_of_str + 1)); // clear trailing spaces for accurate parsing
+      std::string filename;
+      sstr >> filename >> included_samples >> excluded_samples;
+      my_sample_count += included_samples;
+
+      // attempt to load a JAG sample
+      if (!got_one) {
+        hid_t hdf5_file_hnd;
+        try {
+          hdf5_file_hnd = conduit::relay::io::hdf5_open_file_for_read(base_dir + '/' + filename);
+        } catch (conduit::Error const& e) {
+          LBANN_ERROR(" failed to open " + base_dir + '/' + filename + " for reading");
+        }
+        std::vector<std::string> sample_names;
+        try {
+          conduit::relay::io::hdf5_group_list_child_names(hdf5_file_hnd, "/", sample_names);
+        } catch (conduit::Error const& e) {
+          LBANN_ERROR("hdf5_group_list_child_names() failed");
+        }
+
+        for (auto t : sample_names) {
+          std::string key = "/" + t + "/performance/success";
+          try {
+            conduit::relay::io::hdf5_read(hdf5_file_hnd, key, useme);
+          } catch (conduit::Error const& e) {
+            LBANN_ERROR("failed to read success flag for " + key);
+          }
+          if (useme.to_int64() == 1) {
+            got_one = true;
+            try {
+              key = "/" + t;
+              conduit::relay::io::hdf5_read(hdf5_file_hnd, key, useme);
+            } catch (conduit::Error const& e) {
+              LBANN_ERROR("failed to load JAG sample: " + key);
+            }
+            break;
+          }
+        } // end: for (auto t : sample_names)
+
+        conduit::relay::io::hdf5_close_file(hdf5_file_hnd);
+      } // end: attempt to load a JAG sample
+    } // end: loop over conduit filenames
+    istr.close();
+    // end: get list of conduit files that I own, and compute my num_samples
+
+    if (! got_one) {
+      LBANN_ERROR("failed to find any successful JAG samples");
+    }
+
+    // compute memory for the compacted nodes this processor owns
+    double bytes_per_sample = useme.total_bytes_compact() / 1024;
+    double  procs_per_node = comm->get_procs_per_node();
+    double mem_this_proc = bytes_per_sample * my_sample_count;
+    double mem_this_node = mem_this_proc * procs_per_node;
+
+    std::cout
+      << "\n"
+      << "==============================================================\n"
+      << "Estimated memory requirements for JAG samples:\n"
+      << "Memory for one sample:             " <<  bytes_per_sample << " kB\n"
+      << "Total mem for a single rank:       " << mem_this_proc << " kB\n"
+      << "Samples per proc:                  " << my_sample_count << "\n"
+      << "Procs per node:                    " << procs_per_node << "\n"
+      << "Total mem for all ranks on a node: " << mem_this_node << " kB\n"
+      << "Available memory: " << a_mem << " kB (RAM only; not virtual)\n";
+    if (mem_this_node > static_cast<double>(a_mem)) {
+      std::cout << "\nYOU DO NOT HAVE ENOUGH MEMORY\n"
+        << "==============================================================\n\n";
+      LBANN_ERROR("insufficient memory to load data\n");
+    } else {
+      double m = 100 * mem_this_node / a_mem;
+      std::cout << "Estimate that data will consume at least " << m << " % of memory\n"
+        << "==============================================================\n\n";
+    }
+  }
+
+  comm->trainer_barrier();
+}
+
+bool data_store_conduit::has_conduit_node(int data_id) const {
+  std::unordered_map<int, conduit::Node>::const_iterator t = m_data.find(data_id);
+  return t == m_data.end();
+}
+
+
+}  // namespace lbann
+
+#endif //#ifdef LBANN_HAS_CONDUIT
diff --git a/src/data_store/data_store_csv.cpp b/src/data_store/data_store_csv.cpp
deleted file mode 100644
index cbadaa26e49..00000000000
--- a/src/data_store/data_store_csv.cpp
+++ /dev/null
@@ -1,225 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#include "lbann/data_store/data_store_csv.hpp"
-#include "lbann/data_readers/data_reader_csv.hpp"
-#include "lbann/utils/exception.hpp"
-#include "lbann/utils/options.hpp"
-#include "lbann/utils/timer.hpp"
-#include <unordered_set>
-
-namespace lbann {
-
-data_store_csv::data_store_csv(
-  generic_data_reader *reader, model *m) :
-  generic_data_store(reader, m) {
-  set_name("data_store_csv");
-}
-
-data_store_csv::~data_store_csv() {
-}
-
-void data_store_csv::setup() {
-  double tm1 = get_time();
-  std::stringstream err;
-
-  if (m_master) {
-    std::cerr << "starting data_store_csv::setup() for role: "
-              << m_reader->get_role() << "\n"
-              << "calling generic_data_store::setup()\n";
-  }
-  generic_data_store::setup();
-  build_index_owner();
-
-  if (! m_in_memory) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "not yet implemented";
-    throw lbann_exception(err.str());
-  }
-
-  else {
-    //sanity check
-    csv_reader *reader = dynamic_cast<csv_reader*>(m_reader);
-    if (reader == nullptr) {
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "dynamic_cast<data_reader_csv*>(m_reader) failed";
-      throw lbann_exception(err.str());
-    }
-    m_csv_reader = reader;
-
-    if (m_np != reader->get_num_parallel_readers() && ! is_subsidiary_store()) {
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "num_parallel_readers(): " << reader->get_num_parallel_readers()
-          << " m_np: " << m_np
-          << "; for this data_store num_readers must be the same as procs per model;\n"
-          << " if this isn't acceptable, please notify Dave Hysom so he can fix.\n"
-          << "reader role: " << m_reader->get_role();
-      throw lbann_exception(err.str());
-    }
-
-    if (m_master) {
-      std::vector<DataType> v = reader->fetch_line_label_response(0);
-      m_vector_size = v.size();
-    }
-    m_comm->world_broadcast<int>(0, m_vector_size);
-
-    if (is_subsidiary_store()) {
-      return;
-    }
-
-    if (m_master) std::cerr << "calling get_minibatch_index_vector\n";
-    get_minibatch_index_vector();
-
-    if (m_master) std::cerr << "calling exchange_mb_indices()\n";
-    exchange_mb_indices();
-
-    if (m_master) std::cerr << "calling get_my_datastore_indices\n";
-    get_my_datastore_indices();
-
-    if (m_master) std::cerr << "calling populate_datastore()\n";
-    populate_datastore();
-
-    if (m_master) std::cerr << "calling exchange_data()\n";
-    exchange_data();
-  }
-
-  if (m_master) {
-    std::cerr << "TIME for data_store_csv setup: " << get_time() - tm1 << "\n";
-  }
-}
-
-void data_store_csv::get_data_buf_DataType(int data_id, std::vector<DataType> *&buf) {
-static int n = 0;
-  if (m_my_minibatch_data.find(data_id) == m_my_minibatch_data.end()) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to find data_id: " << data_id << " in m_my_minibatch_data\n"
-        << "m_my_minibatch_data.size(): " << m_my_minibatch_data.size() << "\n"
-        << "role: " << m_reader->get_role() << "  n: " << n;
-    throw lbann_exception(err.str());
-  }
-  n += 1;
-  buf = &m_my_minibatch_data[data_id];
-}
-
-void data_store_csv::get_my_indices(std::unordered_set<int> &indices, int p) {
-  indices.clear();
-  std::vector<int> &v = m_all_minibatch_indices[p];
-  for (auto t : v) {
-    int index = (*m_shuffled_indices)[t];
-    if (m_data.find(index) != m_data.end()) {
-      indices.insert(index);
-    }
-  }
-}
-
-void data_store_csv::get_indices(std::unordered_set<int> &indices, int p) {
-  indices.clear();
-  std::vector<int> &v = m_all_minibatch_indices[p];
-  for (auto t : v) {
-    indices.insert((*m_shuffled_indices)[t]);
-  }
-}
-
-
-void data_store_csv::exchange_data() {
-  double tm1 = get_time();
-  std::stringstream err;
-
-  //get indices I need for the next epoch, and start receives
-  std::unordered_set<int> indices;
-  get_indices(indices, m_rank);
-  std::vector<El::mpi::Request<DataType>> recv_req(indices.size());
-
-  m_my_minibatch_data.clear();
-  size_t jj = 0;
-  for (auto data_id : indices) {
-    m_my_minibatch_data[data_id].resize(m_vector_size);
-    int owner = get_index_owner(data_id);
-    if (owner >= m_np or owner < 0) {
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << " ERROR: bad rank for owner in nb_recv; owner: " << owner << " data_id: " << data_id << " jj: " << jj+1 << " of " << indices.size();
-      throw lbann_exception(err.str());
-    }
-    m_comm->nb_tagged_recv<DataType>(m_my_minibatch_data[data_id].data(), m_vector_size, owner, data_id, recv_req[jj++], m_comm->get_trainer_comm());
-  }
-
-  //start sends to all processors
-  std::vector<std::vector<El::mpi::Request<DataType>>> send_req(m_np);
-  for (int p=0; p<m_np; p++) {
-    get_my_indices(indices, p);
-    send_req[p].resize(indices.size());
-    jj = 0;
-    for (auto data_id : indices) {
-      if (m_data.find(data_id) == m_data.end()) {
-        err << __FILE__ << " " << __LINE__ << " :: "
-            << " m_data.find(" << data_id << ") failed.";
-        throw lbann_exception(err.str());
-      }
-      if (m_data[data_id].size() != (size_t)m_vector_size) {
-        err << __FILE__ << " " << __LINE__ << " :: "
-            << " m_data[" << data_id << "].size = " << m_data[data_id].size()
-            << " should be: " << m_vector_size << "; " << jj+1
-            << " of " << indices.size()
-            << " m_reader->get_role: " << m_reader->get_role();
-        throw lbann_exception(err.str());
-      }
-      m_comm->nb_tagged_send<DataType>(m_data[data_id].data(), m_vector_size, p, data_id, send_req[p][jj++], m_comm->get_trainer_comm());
-    }
-  }
-
-  //wait for sends to finish
-  if (m_master) {
-    for (size_t i=0; i<send_req.size(); i++) {
-      m_comm->wait_all(send_req[i]);
-    }
-  }
-
-  //wait for recvs to finish
-  m_comm->wait_all(recv_req);
-
-  if (m_master) {
-    std::cerr << "TIME for data_store_csv::exchange_data(): "
-             << get_time() - tm1 << "; role: " << m_reader->get_role() << "\n";
-  }
-}
-
-void data_store_csv::populate_datastore() {
-  for (auto idx : m_my_datastore_indices) {
-    m_data[idx] = m_csv_reader->fetch_line_label_response(idx);
-    if (m_data[idx].size() != (size_t) m_vector_size) {
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "m_data[" << idx << "].size() is " << m_data[idx].size()
-          << " but should be: " << m_vector_size
-          << "; m_data.size: " << m_data.size() << "\n";
-      throw lbann_exception(err.str());
-    }
-  }
-}
-
-}  // namespace lbann
diff --git a/src/data_store/data_store_image.cpp b/src/data_store/data_store_image.cpp
deleted file mode 100644
index d411584e7e9..00000000000
--- a/src/data_store/data_store_image.cpp
+++ /dev/null
@@ -1,922 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#include "lbann/data_store/data_store_image.hpp"
-#include "lbann/utils/exception.hpp"
-#include "lbann/data_readers/data_reader.hpp"
-#include "lbann/utils/timer.hpp"
-#include "lbann/utils/options.hpp"
-#include "lbann/io/file_io.hpp"
-
-#ifdef LBANN_SYS_SENDFILE_OK
-#include <sys/sendfile.h>
-#endif // LBANN_SYS_SENDFILE_OK
-
-#include <sys/stat.h>
-
-namespace lbann {
-
-data_store_image::~data_store_image() {
-}
-
-void data_store_image::setup() {
-  set_name("data_store_image");
-  if (m_master) std::cerr << "starting data_store_image::setup()\n";
-
-  options *opts = options::get();
-  bool using_tarball = opts->has_string("use_tarball") ? true : false;
-  bool creating_tarball = are_we_creating_tarballs();
-  if (using_tarball && creating_tarball) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) +
-             " :: you cannot use both --using_tarball and --creating_tarball options");
-  }
-
-  if (using_tarball) {
-    stage_tarball();
-    m_comm->global_barrier();
-  }
-
-  if (m_master) std::cerr << "data_store_image - calling generic_data_store::setup()\n";
-  generic_data_store::setup();
-
-  double tm;
-
-  //==========================================================================
-  // block for running in out-of-memory mode
-  //==========================================================================
-  if (! m_in_memory) {
-    if (!creating_tarball) {
-      if (m_master) std::cerr << "data_store_image - calling exchange_partitioned_indices\n";
-      exchange_partitioned_indices();
-    }
-
-    if (m_master) std::cerr << "data_store_image - calling get_my_datastore_indices\n";
-    if (creating_tarball) {
-      get_my_tarball_indices();
-    } else {
-      get_my_datastore_indices();
-    }
-
-    if (m_master) std::cerr << "data_store_image - calling build_data_filepaths\n";
-    if (using_tarball) {
-      read_data_filepaths();
-    } else {
-      build_data_filepaths();
-    }
-
-    if (m_master) std::cerr << "data_store_image - calling get_file_sizes\n";
-    tm = get_time();
-    if (using_tarball) {
-      read_file_sizes();
-      read_datastore_indices();
-    } else {
-      get_file_sizes();
-    }
-    if (m_master) std::cerr << "TIME for get_file_sizes: " << get_time() - tm << "\n";
-
-    if (m_master) std::cerr << "data_store_image - calling build_index_owner\n";
-    tm = get_time();
-    build_index_owner();
-    if (m_master) std::cerr << "TIME for build_index_owner: " << get_time() - tm << "\n";
-
-    if (! using_tarball) {
-      if (m_master) std::cerr << "data_store_image - calling stage_files\n";
-      tm = get_time();
-      stage_files();
-      if (m_master) std::cerr << "TIME for stage_files: " << get_time() - tm << "\n";
-    }
-
-    // create tarball and copy to lscratch (or where ever)
-    if (creating_tarball) {
-      if (m_master) std::cerr << "data_store_image - creating tarball\n";
-      tm = get_time();
-      create_tarball();
-      if (m_master) std::cerr << "TIME for creating tarball: " << get_time() - tm << "\n";
-    }
-
-    m_is_setup = true;
-  }
-
-  //==========================================================================
-  // block for running in in-memory mode
-  //==========================================================================
-  else {
-    if (m_master) std::cerr << "data_store_image - calling get_minibatch_index_vector\n";
-    get_minibatch_index_vector();
-
-    if (m_master) std::cerr << "data_store_image - calling exchange_mb_indices\n";
-    exchange_mb_indices();
-
-    if (m_master) std::cerr << "data_store_image - calling get_my_datastore_indices\n";
-    get_my_datastore_indices();
-
-    if (m_master) std::cerr << "data_store_image - calling get_file_sizes\n";
-    double tma = get_time();
-    get_file_sizes();
-    size_t num_bytes = get_global_num_file_bytes();
-    if (m_master) std::cerr << "TIME for get_file_sizes: " << get_time() - tma << " global num files: " << m_file_sizes.size() << " data set size: " << ((double)num_bytes/1000000) << " MB\n";
-
-    if (m_master) std::cerr << "data_store_image - calling build_index_owner\n";
-    tm = get_time();
-    build_index_owner();
-    if (m_master) std::cerr << "TIME for build_index_owner: " << get_time() - tm << "\n";
-
-    if (m_master) std::cerr << "data_store_image - calling report_memory_constrains\n";
-    report_memory_constraints();
-
-    if (m_master) std::cerr << "data_store_image - calling read_files\n";
-    tma = get_time();
-    read_files();
-    if (m_master) std::cerr << "TIME for read_files: " << get_time() - tma << "\n";
-
-    if (m_master) std::cerr << "data_store_image - calling exchange_data\n";
-    exchange_data();
-
-    if (m_extended_testing) {
-      if (m_master) std::cerr << "data_store_image - calling extended_testing\n";
-      extended_testing();
-    }
-  }
-}
-
-
-void data_store_image::get_data_buf(int data_id, std::vector<unsigned char> *&buf, int multi_idx) {
-  std::stringstream err;
-  int index = data_id * m_num_img_srcs + multi_idx;
-  if (m_my_minibatch_data.find(index) == m_my_minibatch_data.end()) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to find index: " << index << " in m_my_minibatch_data; size: "
-        << m_my_minibatch_data.size() << " role: " << m_reader->get_role();
-    throw lbann_exception(err.str());
-  }
-
-  buf = &m_my_minibatch_data[index];
-}
-
-void data_store_image::load_file(const std::string &dir, const std::string &fn, unsigned char *p, size_t sz) {
-  std::string imagepath;
-  if (dir != "") {
-    imagepath = dir + fn;
-  } else {
-    imagepath = fn;
-  }
-  std::ifstream in(imagepath.c_str(), std::ios::in | std::ios::binary);
-  if (!in) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to open " << imagepath << " for reading"
-        << "; dir: " << dir << "  fn: " << fn << "\n"
-        << "hostname: " << getenv("SLURMD_NODENAME") << " role: " << m_reader->get_role();
-    throw lbann_exception(err.str());
-  }
-  in.read((char*)p, sz);
-  if ((int)sz != in.gcount()) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to read " << sz << " bytes from " << imagepath
-        << " num bytes read: " << in.gcount()
-        << "\nhostname: " << getenv("SLURMD_NODENAME") << " role: " << m_reader->get_role();
-    throw lbann_exception(err.str());
-  }
-  in.close();
-}
-
-void data_store_image::exchange_data() {
-  double tm1 = get_time();
-  std::stringstream err;
-
-  //build map: proc -> global indices that proc needs for this epoch, and
-  //                   which I own
-  std::unordered_map<int, std::unordered_set<int>> proc_to_indices;
-  for (size_t p=0; p<m_all_minibatch_indices.size(); p++) {
-    for (auto idx : m_all_minibatch_indices[p]) {
-      int index = (*m_shuffled_indices)[idx];
-      if (m_my_datastore_indices.find(index) != m_my_datastore_indices.end()) {
-        proc_to_indices[p].insert(index);
-      }
-    }
-  }
-
-  //start sends
-  std::vector<std::vector<El::mpi::Request<unsigned char>>> send_req(m_np);
-  for (int p=0; p<m_np; p++) {
-    send_req[p].resize(proc_to_indices[p].size()*m_num_img_srcs);
-    size_t jj = 0;
-    for (auto idx : proc_to_indices[p]) {
-      for (size_t k=0; k<m_num_img_srcs; k++) {
-        int index = idx*m_num_img_srcs+k;
-        if (m_file_sizes.find(index) == m_file_sizes.end()) {
-          err << __FILE__ << " " << __LINE__ << " :: "
-              << " m_file_sizes.find(" << index << ") failed";
-          throw lbann_exception(err.str());
-        }
-        int len = m_file_sizes[index];
-        m_comm->nb_tagged_send<unsigned char>(
-            m_data[index].data(), len, p, index,
-            send_req[p][jj++], m_comm->get_trainer_comm());
-
-      }
-    }
-    if (jj != send_req[p].size()) throw lbann_exception("ERROR 1");
-  } //start sends
-
-  //build map: proc -> global indices that proc owns that I need
-  proc_to_indices.clear();
-  for (auto idx : m_my_minibatch_indices_v) {
-    int index = (*m_shuffled_indices)[idx];
-    int owner = get_index_owner(index);
-    proc_to_indices[owner].insert(index);
-  }
-
-  //start recvs
-  m_my_minibatch_data.clear();
-  std::vector<std::vector<El::mpi::Request<unsigned char>>> recv_req(m_np);
-  for (auto t : proc_to_indices) {
-    int owner = t.first;
-    size_t jj = 0;
-    const std::unordered_set<int> &s = t.second;
-    recv_req[owner].resize(s.size()*m_num_img_srcs);
-    for (auto idx : s) {
-      for (size_t k=0; k<m_num_img_srcs; k++) {
-        size_t index = idx*m_num_img_srcs+k;
-        if (m_file_sizes.find(index) == m_file_sizes.end()) {
-          err << __FILE__ << " " << __LINE__ << " :: "
-              << " m_file_sizes.find(" << index << ") failed"
-              << " m_file_sizes.size(): " << m_file_sizes.size()
-              << " m_my_minibatch_indices_v.size(): " << m_my_minibatch_indices_v.size();
-        }
-        size_t len = m_file_sizes[index];
-        m_my_minibatch_data[index].resize(len);
-        m_comm->nb_tagged_recv<unsigned char>(
-            m_my_minibatch_data[index].data(), len, owner,
-            index, recv_req[owner][jj++], m_comm->get_trainer_comm());
-      }
-    }
-  }
-
-  //wait for sends to finish
-  for (size_t i=0; i<send_req.size(); i++) {
-    m_comm->wait_all<unsigned char>(send_req[i]);
-  }
-
-  //wait for recvs to finish
-  for (size_t i=0; i<recv_req.size(); i++) {
-    m_comm->wait_all<unsigned char>(recv_req[i]);
-  }
-
-  if (m_master) {
-    std::cerr << "TIME for exchange_data: " << get_time() - tm1
-              << "; role: " << m_reader->get_role() << "\n";
-  }
-}
-
-
-void data_store_image::exchange_file_sizes(
-  std::vector<int> &my_global_indices,
-  std::vector<int> &my_num_bytes) {
-
-  if (my_global_indices.size() == 0) {
-    my_global_indices.push_back(-1);
-    my_num_bytes.push_back(-1);
-  }
-
-  std::vector<int> rcv_counts(m_np);
-  int nbytes = my_global_indices.size();
-  m_comm->trainer_all_gather<int>(nbytes, rcv_counts);
-  int num_global_indices = std::accumulate(rcv_counts.begin(), rcv_counts.end(), 0);
-
-  std::vector<int> disp(m_np);   //@todo: fix for model
-  disp[0] = 0;
-  for (int h=1; h<m_np; h++) {
-    disp[h] = disp[h-1] + rcv_counts[h-1];
-  }
-  std::vector<int> all_global_indices(num_global_indices);
-  std::vector<int> all_num_bytes(num_global_indices);
-
-  m_comm->all_gather<int>(my_global_indices, all_global_indices, rcv_counts, disp, m_comm->get_world_comm());
-
-  m_comm->all_gather<int>(my_num_bytes, all_num_bytes, rcv_counts, disp, m_comm->get_world_comm());
-
-  for (size_t j=0; j<all_global_indices.size(); j++) {
-    if (all_global_indices[j] != -1) {
-      m_file_sizes[all_global_indices[j]] = all_num_bytes[j];
-    }
-  }
-}
-
-size_t data_store_image::get_global_num_file_bytes() {
-  size_t n = get_my_num_file_bytes();
-  size_t g = 0;
-  if (m_master) {
-    g = m_comm->reduce(n, m_comm->get_world_comm());
-  } else {
-    m_comm->reduce(n, 0, m_comm->get_world_comm());
-  }
-  return g;
-}
-
-size_t data_store_image::get_my_num_file_bytes() {
-  size_t count = 0;
-  for (auto idx : m_my_datastore_indices) {
-    for (size_t i=0; i<m_num_img_srcs; i++) {
-      int index = idx*m_num_img_srcs + i;
-      if (m_file_sizes.find(index) == m_file_sizes.end()) {
-        std::stringstream err;
-        err << __FILE__ << " " << __LINE__ << " :: "
-            << " failed to find " << idx << " in m_file_sizes; count: " << count
-            << " m_file_sizes.size(): " << m_file_sizes.size();
-        throw lbann_exception(err.str());
-      }
-      count += m_file_sizes[index];
-    }
-  }
-  return count;
-}
-
-size_t data_store_image::get_available_memory() {
-  std::ifstream in("/proc/meminfo");
-  std::string line;
-  size_t size;
-  bool found = false;
-  std::string name;
-  std::string units;
-  while (! in.eof()) {
-    getline(in, line);
-    std::stringstream s(line);
-    s >> name >> size >> units;
-    if (name.find("MemFree") != std::string::npos) {
-      found = true;
-      break;
-    }
-  }
-  in.close();
-
-  if (!found) {
-    if (m_master) {
-      std::cerr <<
-        "\nWARNING: data_store_image::get_available_memory failed\n"
-        "failed to find 'MemFree in /proc/meminfo\n"
-        "therefore we cannot advise whether you have enough resources\n"
-        "to contain all data files in memory\n";
-    }
-    return 0;
-  }
-  return size;
-}
-
-
-//note: this could be done on P_0 with no communication,
-//      but it's a cheap operation, so I'm coding it the
-//      easy way
-void data_store_image::report_memory_constraints() {
-  size_t count = get_my_num_file_bytes();
-
-  std::vector<long long> counts(m_np);
-  if (m_master) {
-    m_comm->gather<long long>(count, counts.data(), m_comm->get_world_comm());
-  } else {
-    m_comm->gather<long long>(count, 0, m_comm->get_world_comm());
-  }
-
-  double global = get_global_num_file_bytes()/1000000;
-
-  if (!m_master) {
-    return;
-  }
-
-  /// determine the amount of memory required for files for all
-  /// processors on this node
-  double required = 0;
-  for (int p=0; p<m_np; p++) {
-    if (m_comm->is_rank_node_local(p, m_comm->get_world_comm())) {
-      required += counts[p];
-    }
-  }
-  required /= 1000000;
-
-  double available = get_available_memory();
-  if (available == 0) {
-    std::cerr << required << " kB of memory are required for files on this node\n";
-    return;
-  }
-  available /= 1000;
-
-  double percent = required / available * 100.0;
-  std::cerr << "\n"
-            << "===============================================\n"
-            << "Memory Constraints for: " << m_reader->get_role() << "\n"
-            << "Global data set size:               " << global << " MB\n"
-            << "Required for data set on this node: " << required << " MB\n"
-            << "Available memory on this node: "      << available << " MB\n"
-            << "Required is " << percent << " % of Available\n"
-            << "===============================================\n\n";
-
-  double limit = 0.8;
-  if (options::get()->has_float("mem_limit")) {
-    limit = options::get()->get_float("mem_limit");
-  }
-  if (required > limit*available) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "You have insufficient memory to hold all required files;\n"
-        << "required is > 80% of available\n"
-        << "quitting now, so you don't waste your time\n";
-  }
-}
-
-
-// the input string "s" should be one of the forms:
-//   dir1/[dir2/...]/filename
-//   /dir1/[dir2/...]/filename
-//   /dir1/[dir2/...]/
-
-void data_store_image::stage_files() {
-  std::stringstream err;
-
-  //create directory structure on local file store
-  std::string local_dir = m_reader->get_local_file_dir();
-  create_dirs(local_dir);
-  m_comm->global_barrier();
-  std::unordered_set<std::string> make_dirs;
-  for (auto t : m_data_filepaths) {
-    size_t j = t.second.rfind('/');
-    if (j != std::string::npos) {
-      make_dirs.insert(t.second.substr(0, j+1));
-    }
-  }
-
-  std::string dir = m_reader->get_file_dir();
-  std::stringstream ss;
-  for (auto t : make_dirs) {
-    ss.clear();
-    ss.str("");
-    ss << local_dir << "/" << t;
-    create_dirs(ss.str());
-  }
-  m_comm->global_barrier();
-
-  // copy files to local store
-  size_t j = 0;
-  struct stat stat_buf;
-  double tm = get_time();
-  std::stringstream s;
-  int write_fd;
-
-  for (auto t : m_data_filepaths) {
-    s.clear();
-    s.str("");
-    s << local_dir << '/' << t.second;
-    ++j;
-    if (j % 100 == 0 and m_master) {
-      double e = get_time() - tm;
-      double time_per_file = e / j;
-      int remaining_files = m_data_filepaths.size()-j;
-      double estimated_remaining_time = time_per_file * remaining_files;
-      std::cerr << "P_0: staged " << j << " of " << m_data_filepaths.size()
-                << " files; elapsed time: " << get_time() - tm
-                << "s est. remaining time: " << estimated_remaining_time << "s\n";
-    }
-    if (access(s.str().c_str(), F_OK | R_OK) == -1 ) {
-      write_fd = open(s.str().c_str(),  O_RDWR | O_CREAT, S_IRWXU);
-      if (write_fd == -1) {
-        err << __FILE__ << " " << __LINE__ << " :: "
-            << "failed to open " << s.str() << " for writing;\n"
-            << "error code is: " << std::strerror(errno) << "\n"
-            << "local_dir: " << local_dir << " m_cur_minibatch: " << 1+m_cur_minibatch;
-        throw lbann_exception(err.str());
-      }
-      off_t offset = 0;
-      s.clear();
-      s.str("");
-      s << dir << '/' << t.second;
-      int read_fd = open(s.str().c_str(), O_RDONLY);
-      if (read_fd == -1) {
-        err << __FILE__ << " " << __LINE__ << " :: "
-            << "failed to open " << s.str() << " for reading;\n"
-            << "error code is: " << std::strerror(errno);
-        throw lbann_exception(err.str());
-      }
-      int e2 = fstat(read_fd, &stat_buf);
-      if (e2 == -1) {
-        err << __FILE__ << " " << __LINE__ << " :: "
-            << "fstat failed for file: " << s.str();
-        throw lbann_exception(err.str());
-      }
-#ifdef LBANN_SYS_SENDFILE_OK
-      ssize_t e = sendfile(write_fd, read_fd, &offset, stat_buf.st_size);
-      if (e == -1) {
-        err << __FILE__ << " " << __LINE__ << " :: "
-            << "failed to copy file to location: " << s.str()
-            << ";\nerror code is: " << std::strerror(errno);
-        throw lbann_exception(err.str());
-      }
-#else
-      // FIXME: This is the fastest way to deal with this issue for
-      // OSX pending a "real" fix.
-      (void) offset;// Silence a warning
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "Header <sys/sendfile.h> is not found on this system. "
-          << "sendfile() won't work. This is not a permanent fix.";
-      throw lbann_exception(err.str());
-#endif // LBANN_SYS_SENDFILE_OK
-
-      close(read_fd);
-      close(write_fd);
-    }
-  }
-}
-
-void data_store_image::fetch_data() {
-  if (!m_is_setup) {
-    return;
-  }
-  std::stringstream err;
-  double tm1 = get_time();
-
-  ++m_cur_minibatch;
-  if (m_cur_minibatch >= m_all_partitioned_indices[0].size()) {
-    m_cur_minibatch = 0;
-  }
-
-  //build map: proc -> global indices that proc needs for this epoch, and
-  //                   which I own
-  std::unordered_map<int, std::unordered_set<int>> proc_to_indices;
-
-  for (int p = 0; p<m_np; p++) {
-      if (m_cur_minibatch > m_all_partitioned_indices[p].size() -1) {
-        err << __FILE__ << " " << __LINE__ << " :: "
-            << "send to: P_" << p << " m_cur_minibatch: " << m_cur_minibatch
-            << " m_all_partitioned_indices[p].size(): " << m_all_partitioned_indices[p].size();
-        throw lbann_exception(err.str());
-      }
-      const std::vector<int> &v = m_all_partitioned_indices[p][m_cur_minibatch];
-      for (auto idx : v) {
-        int index = (*m_shuffled_indices)[idx];
-        if (m_my_datastore_indices.find(index) != m_my_datastore_indices.end()) {
-          proc_to_indices[p].insert(index);
-        }
-      }
-  }
-
-  //read required files and start sends
-  m_data.clear();
-
-  //compute number of sends, and allocate Request vector
-  size_t num_sends = 0;
-  for (auto t : proc_to_indices) {
-    num_sends += t.second.size();
-  }
-  num_sends *= m_num_img_srcs;
-  std::vector<El::mpi::Request<unsigned char>> send_req(num_sends);
-
-  size_t req_idx = 0;
-  for (int p=0; p<m_np; p++) {
-    if (m_all_partitioned_indices[p].size() >= m_cur_minibatch
-        && proc_to_indices.find(p) != proc_to_indices.end()) {
-      const std::unordered_set<int> &s = proc_to_indices[p];
-      read_files(s);
-      for (auto idx : s) {
-        for (size_t k=0; k<m_num_img_srcs; k++) {
-          int index = idx*m_num_img_srcs+k;
-          int len = m_file_sizes[index];
-          m_comm->nb_tagged_send<unsigned char>(
-                         m_data[index].data(), len, p, index,
-                         send_req[req_idx++], m_comm->get_trainer_comm());
-        }
-      }
-    }
-  }
-
-
-  //build map: proc -> global indices that proc owns that I need
-  proc_to_indices.clear();
-  if (m_cur_minibatch < m_my_minibatch_indices->size()) {
-    for (auto idx  : (*m_my_minibatch_indices)[m_cur_minibatch]) {
-      int index = (*m_shuffled_indices)[idx];
-      int owner = get_index_owner(index);
-      proc_to_indices[owner].insert(index);
-    }
-  }
-
-  //compute number recvs, and allocate Request vector
-  size_t num_recvs = 0;
-  for (auto t : proc_to_indices) {
-    num_recvs += t.second.size();
-  }
-  num_recvs *= m_num_img_srcs;
-
-
-  //start recvs
-  m_my_minibatch_data.clear();
-  req_idx = 0;
-  std::vector<El::mpi::Request<unsigned char>> recv_req(num_recvs);
-  for (auto t : proc_to_indices) {
-    int owner = t.first;
-    const std::unordered_set<int> &s = t.second;
-    for (auto idx : s) {
-      //note: for imagenet_reader, m_num_img_srcs = 1;
-      //      for other readers (multi, triplet) it is larger, probably three
-      for (size_t k=0; k<m_num_img_srcs; k++) {
-        size_t index = idx*m_num_img_srcs+k;
-        if (m_file_sizes.find(index) == m_file_sizes.end()) {
-          err << __FILE__ << " " << __LINE__ << " :: "
-              << " m_file_sizes.find(" << index << ") failed"
-              << " m_file_sizes.size(): " << m_file_sizes.size()
-              << " m_my_minibatch_indices_v.size(): " << m_my_minibatch_indices_v.size();
-         throw lbann_exception(err.str());
-        }
-        size_t len = m_file_sizes[index];
-        m_my_minibatch_data[index].resize(len);
-        m_comm->nb_tagged_recv<unsigned char>(
-            m_my_minibatch_data[index].data(), len, owner,
-            index, recv_req[req_idx++], m_comm->get_trainer_comm());
-      }
-    }
-  }
-
-  //wait for sends to finish
-  m_comm->wait_all<unsigned char>(send_req);
-
-  //wait for recvs to finish
-  m_comm->wait_all<unsigned char>(recv_req);
-
-  if (m_master && m_verbose) {
-    std::cerr << "TIME (P_0) for reading from local disk: "
-              << get_time() - tm1 << "; role: " << m_reader->get_role()
-              << "  minibatch " << 1+m_cur_minibatch << " of "
-              << m_num_minibatches << "; " << m_reader->get_role() << "\n";
-  }
-}
-
-void data_store_image::write_data_filepaths() {
-  std::string local_dir = m_reader->get_local_file_dir();
-  std::stringstream s;
-  s << local_dir << "/data_filepaths.txt";
-  std::ofstream out(s.str().c_str());
-  if (! out.good()) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to open " << s.str() << " for reading";
-    throw lbann_exception(err.str());
-  }
-  for (auto t : m_data_filepaths) {
-    out << t.first << " " << t.second << "\n";
-  }
-  out.close();
-}
-
-void data_store_image::read_data_filepaths() {
-  std::string local_dir = m_reader->get_local_file_dir();
-  std::stringstream s;
-  s << local_dir << "/data_filepaths.txt";
-  std::ifstream in(s.str().c_str());
-  //note: file_sizes.txt may not exist on all processors;
-  //      this is the case where we're using tarballed data, and
-  //      running with more processors than were used to create
-  //      the tarball
-  if (in.good()) {
-    std::string path;
-    int idx;
-    while (in >> idx >> path) {
-      m_data_filepaths[idx] = path;
-    }
-    in.close();
-  }
-}
-
-void data_store_image::write_file_sizes() {
-  std::string local_dir = m_reader->get_local_file_dir();
-  std::stringstream s;
-  s << local_dir << "/file_sizes.txt";
-  std::ofstream out(s.str().c_str());
-  if (! out.good()) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to open " << s.str() << " for reading";
-    throw lbann_exception(err.str());
-  }
-  for (auto t : m_file_sizes) {
-    out << t.first << " " << t.second << "\n";
-  }
-  out.close();
-}
-
-std::pair<std::string, std::string>  data_store_image::get_tarball_exe() {
-  std::pair<std::string, std::string> names = get_pathname_and_prefix(
-                           options::get()->get_string("create_tarball"));
-  if (m_master) {
-    std::cerr << "tarball dir: " << names.second << "\n"
-              << "tarball prefix: " << names.first << "\n\n";
-  }
-
-  //This is somewhat fragile. For now I assume the local_dir is
-  //of the form: /l/ssd/train
-  std::string local_dir = m_reader->get_local_file_dir();
-  size_t j = local_dir.rfind('/', local_dir.size()-2);
-  std::string work_dir = local_dir.substr(0, j);
-
-  std::stringstream tarball_name;
-  tarball_name << names.first << "_" << m_reader->get_role()
-      << "_rank=" << m_rank << "_np=" << m_np << ".tar";
-  std::stringstream exe_1;
-  std::stringstream exe_2;
-  exe_1 << "tar cf " << work_dir << '/' << tarball_name.str()
-      << " " << local_dir;
-  exe_2 << "cp -f " << work_dir << '/' << tarball_name.str()
-      << " " << names.second;
-  return std::make_pair(exe_1.str(), exe_2.str());
-}
-
-void data_store_image::create_tarball() {
-  write_file_sizes();
-  write_datastore_indices();
-  write_data_filepaths();
-  std::pair<std::string, std::string> cmds = get_tarball_exe();
-  if (m_master)  std::cerr << "\nabout to execute: " << cmds.first << "\n";
-  run_cmd(cmds.first);
-  if (m_master)  std::cerr << "\nabout to execute: " << cmds.second << "\n";
-  run_cmd(cmds.second);
-}
-
-bool data_store_image::are_we_creating_tarballs() {
-  bool retval = false;
-  options *opts = options::get();
-  if (opts->has_string("create_tarball")) {
-    if (m_comm->get_procs_per_node() != 1) {
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "--create_tarball=<string> was specified; you have "
-          << m_comm->get_procs_per_node() << "; you must use a"
-          << "single core per node when creating tarballs";
-      throw lbann_exception(err.str());
-    }
-    retval = true;
-    if (m_reader->get_role() == "validate") {
-      return retval;
-    }
-  }
-  return retval;
-}
-
-void data_store_image::read_file_sizes() {
-  std::stringstream s;
-  s << m_reader->get_local_file_dir() << "/file_sizes.txt";
-  //note: file_sizes.txt may not exist on all processors;
-  //      this is the case where we're using tarballed data, and
-  //      running with more processors than were used to create
-  //      the tarball
-  std::ifstream in(s.str().c_str());
-  if (in.good()) {
-    size_t idx;
-    size_t len;
-    while (in >> idx >> len) {
-      m_file_sizes[idx] = len;
-    }
-  }
-  m_comm->global_barrier();
-  int n = m_file_sizes.size();
-  m_comm->broadcast<int>(0, n, m_comm->get_world_comm());
-  std::vector<int> s2(n*2);
-  if (m_rank == 0) {
-    size_t j = 0;
-    for (auto t : m_file_sizes) {
-      s2[j++] = t.first;
-      s2[j++] = t.second;
-    }
-  }
-
-  m_comm->broadcast(0, s2.data(), s2.size(), m_comm->get_trainer_comm());
-  for (size_t j=0; j<s2.size(); j+=2) {
-    m_file_sizes[s2[j]] = s2[j+1];
-  }
-}
-
-void data_store_image::write_datastore_indices() {
-  std::string local_dir = m_reader->get_local_file_dir();
-  std::stringstream s;
-  s << local_dir << "/datastore_indices.txt";
-  std::ofstream out(s.str().c_str());
-  if (! out.good()) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to open " << s.str() << " for reading";
-    throw lbann_exception(err.str());
-  }
-  for (auto t: m_my_datastore_indices) {
-    out << t << "\n";
-  }
-}
-
-void data_store_image::read_datastore_indices() {
-  m_my_datastore_indices.clear();
-  if (m_comm->get_rank_in_node() == 0) {
-    std::stringstream s;
-    s << m_reader->get_local_file_dir() << "/datastore_indices.txt";
-    std::ifstream in(s.str().c_str());
-
-    //note: datastore_indices.txt may not exist on all processors;
-    //      this is the case where we're using tarballed data, and
-    //      running with more processors than were used to create
-    //      the tarball
-    if (in.good()) {
-      int idx;
-      while (in >> idx) {
-        m_my_datastore_indices.insert(idx);
-      }
-      in.close();
-    }
-  }
-}
-
-void data_store_image::stage_tarball() {
-  if (m_reader->get_role() == "validate") {
-    return;
-  }
-  std::stringstream err;
-  int procs_per_node = m_comm->get_procs_per_node();
-  int max = m_comm->allreduce(procs_per_node, m_comm->get_world_comm(),El::mpi::MAX);
-  int min = m_comm->allreduce(procs_per_node, m_comm->get_world_comm(),El::mpi::MIN);
-  if (max != min) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "all nodes must contain the same number of active processors";
-    throw lbann_exception(err.str());
-  }
-
-/*
-  for (int j=0; j<m_np; j++) {
-    m_comm->global_barrier();
-    if (m_rank == j) {
-      std::cerr << "rank: " << m_rank << " rank in node: " << m_comm->get_rank_in_node() << " procs_per_node: " << procs_per_node << "
-    }
-  }
-*/
-
-  options *opts = options::get();
-  if (!opts->has_int("num_tarballs")) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "you must specify --num_tarballs=<int>";
-    throw lbann_exception(err.str());
-  }
-  int num_tarballs = opts->get_int("num_tarballs");
-
-  int fake_rank = m_rank / procs_per_node;
-  if (m_comm->get_rank_in_node() == 0 && fake_rank < num_tarballs) {
-    std::string raw_name = options::get()->get_string("use_tarball");
-    std::pair<std::string, std::string> names = get_pathname_and_prefix(raw_name);
-
-  if (m_master) std::cerr << "num tarballs: " << num_tarballs << "\n";
-    std::stringstream tarball_filename(names.first);
-    tarball_filename << names.first << "_" << m_reader->get_role()
-              << "_rank=" << fake_rank << "_np=" << num_tarballs << ".tar";
-
-    //This is somewhat fragile. For now I assume the local_dir is
-    //of the form: /l/ssd/train
-    std::string local_dir = m_reader->get_local_file_dir();
-    size_t j = local_dir.rfind('/', local_dir.size()-2);
-    std::string ssd = local_dir.substr(0, j);
-
-    std::stringstream s;
-    s << "cp -f " << names.second << "/" << tarball_filename.str() << " " << ssd;
-    if (m_master)  std::cerr << "\nabout to execute: " << s.str()<< "\n";
-    run_cmd(s.str());
-
-    s.clear();
-    s.str("");
-    s << "cd " << ssd << "; tar xf " << tarball_filename.str();
-    if (m_master)  std::cerr << "\nabout to execute: " << s.str()<< "\n";
-    run_cmd(s.str());
-
-    s.clear();
-    s.str("");
-    s << "mv " << ssd  << "/" << local_dir << " " << ssd;
-    if (m_master)  std::cerr << "\nabout to execute: " << s.str()<< "\n";
-    run_cmd(s.str());
-  }
-  m_comm->global_barrier();
-}
-
-}  // namespace lbann
diff --git a/src/data_store/data_store_imagenet.cpp b/src/data_store/data_store_imagenet.cpp
deleted file mode 100644
index 0eea79eb0c7..00000000000
--- a/src/data_store/data_store_imagenet.cpp
+++ /dev/null
@@ -1,235 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#include "lbann/data_store/data_store_imagenet.hpp"
-#include "lbann/data_readers/data_reader_imagenet.hpp"
-#include "lbann/utils/exception.hpp"
-#include "lbann/utils/options.hpp"
-#include "lbann/utils/timer.hpp"
-
-namespace lbann {
-
-void data_store_imagenet::setup() {
-  double tm1 = get_time();
-  if (m_rank == 0) {
-    std::cerr << "starting data_store_imagenet::setup() for data reader with role: " << m_reader->get_role() << std::endl;
-  }
-
-  set_name("data_store_imagenet");
-
-  //sanity check
-  image_data_reader *reader = dynamic_cast<image_data_reader*>(m_reader);
-  if (reader == nullptr) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "dynamic_cast<image_data_reader*>(m_reader) failed";
-    throw lbann_exception(err.str());
-  }
-
-
-  //optionally run some tests at the end of setup()
-  bool run_tests = false;
-  if (options::get()->has_bool("test_data_store") && options::get()->get_bool("test_data_store")) {
-    run_tests = true;
-    options::get()->set_option("exit_after_setup", true);
-  }
-
-  data_store_image::setup();
-
-
-  if (run_tests && m_in_memory) {
-    test_file_sizes();
-    test_data();
-  }  
-
-  double tm2 = get_time();
-  if (m_rank == 0) {
-    std::cerr << "TIME for data_store_imagenet setup: " << tm2 - tm1 << std::endl;
-  }
-}
-
-
-void data_store_imagenet::test_data() {
-  image_data_reader *reader = dynamic_cast<image_data_reader*>(m_reader);
-  const std::vector<std::pair<std::string, int> > & image_list = reader->get_image_list();
-  std::vector<unsigned char> b;
-  std::vector<unsigned char> *datastore_buf;
-  for (auto t : m_my_minibatch_indices_v) {
-    int idx = (*m_shuffled_indices)[t];
-
-    //read directly from file
-    std::string imagepath = m_dir + image_list[idx].first;
-    std::ifstream in(imagepath.c_str(), std::ios::in | std::ios::binary);
-    if (! in.good()) {
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "failed to open " << imagepath << " for reading";
-      throw lbann_exception(err.str());
-    }
-
-    in.seekg(0, std::ios::end);
-    size_t sz = in.tellg();
-    in.seekg(0, std::ios::beg);
-    b.resize(sz);
-    in.read((char*)&b[0], sz);
-    in.close();
-
-    //get from datastore
-    get_data_buf(idx, datastore_buf, 0);
-    if (b != *datastore_buf) {
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << " :: data_store_imagenet::test_data, b != v; b.size: " 
-          << b.size() << "  datstore_buf->size: " << datastore_buf->size();
-      throw lbann_exception(err.str());
-    } 
-  }
-
-  std::cerr << "rank: " << m_rank << " role: " << m_reader->get_role() << " :: data_store_imagenet::test_data: PASSES!\n";
-}
-
-void data_store_imagenet::test_file_sizes() {
-  if (m_master) {
-    std::cerr << m_rank << " :: STARTING data_store_imagenet::test_file_sizes()\n";
-  }
-  image_data_reader *reader = dynamic_cast<image_data_reader*>(m_reader);
-  const std::vector<std::pair<std::string, int> > & image_list = reader->get_image_list();
-  for (auto t : m_file_sizes) {
-    size_t len = get_file_size(m_dir, image_list[t.first].first);
-    if (t.second != len || len == 0) {
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "m_file_sizes[" << t.first << "] = " << t.second
-          << " but actual size appears to be " << len;
-      throw lbann_exception(err.str());
-    }
-  }
-  std::cerr << "rank:  " << m_rank << " role: " << m_reader->get_role() << " :: data_store_imagenet::test_file_sizes: PASSES!\n";
-}
-
-void data_store_imagenet::read_files(const std::unordered_set<int> &indices) {
-  std::stringstream err;
-  std::string local_dir = m_reader->get_local_file_dir();
-  std::stringstream fp;
-  int n = 0;
-  double tm = get_time();
-  for (auto index : indices) {
-    ++n;
-    if (n % 100 == 0 && m_master) {
-      double time_per_file = (get_time() - tm) / n;
-      int remaining_files = indices.size() - n;
-      double estimated_remaining_time = time_per_file * remaining_files;
-      std::cerr << "P_0, " << m_reader->get_role() << "; read " << n << " of " 
-                << indices.size() << " files; elapsed time " << (get_time() - tm)
-                << "s; est. remaining time: " << estimated_remaining_time << "\n";
-    }
-    if (m_file_sizes.find(index) == m_file_sizes.end()) {
-      err << __FILE__ << " " << __LINE__ << " :: " 
-          << " m_file_sizes.find(index) failed for index: " << index
-          << " role: " << m_reader->get_role();
-      throw lbann_exception(err.str());
-    }
-    if (m_data_filepaths.find(index) == m_data_filepaths.end()) {
-      err << __FILE__ << " " << __LINE__ << " :: " 
-          << " m_data_filepaths.find(index) failed for index: " << index
-          << " m_data_filepaths.size: " << m_data_filepaths.size()
-          << "\nhostname: " << getenv("SLURMD_NODENAME");
-      throw lbann_exception(err.str());
-    }
-    size_t file_len = m_file_sizes[index];
-    fp.clear();
-    fp.str("");
-    fp << local_dir << "/" << m_data_filepaths[index];
-    m_data[index].resize(file_len);
-    try {
-      load_file("", fp.str(), m_data[index].data(), file_len);
-    } catch (std::bad_alloc& ba) {
-      err << m_rank << " caught std::bad_alloc, what: " << ba.what()
-          << " " << getenv("SLURMD_NODENAME") << " file: "
-          << fp.str() << " length: " << file_len << "\n";
-      throw lbann_exception(err.str()); 
-    }
-  }
-}
-
-void data_store_imagenet::read_files() {
-  image_data_reader *reader = dynamic_cast<image_data_reader*>(m_reader);
-  const std::vector<std::pair<std::string, int> > & image_list = reader->get_image_list();
-  for (auto index : m_my_datastore_indices) {
-    if (m_file_sizes.find(index) == m_file_sizes.end()) {
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: " 
-          << " m_file_sizes.find(index) failed for index: " << index;
-      throw lbann_exception(err.str());
-    }
-    size_t file_len = m_file_sizes[index];
-    m_data[index].resize(file_len);
-    load_file(m_dir, image_list[index].first, m_data[index].data(), file_len);
-  }
-}
-
-void data_store_imagenet::get_file_sizes() {
-  image_data_reader *reader = dynamic_cast<image_data_reader*>(m_reader);
-  const std::vector<std::pair<std::string, int> > & image_list = reader->get_image_list();
-
-  std::vector<int> global_indices(m_my_datastore_indices.size());
-  std::vector<int> bytes(m_my_datastore_indices.size());
-
-  size_t j = 0;
-  double tm = get_time();
-  for (auto index : m_my_datastore_indices) {
-    global_indices[j] = index;
-    bytes[j] = get_file_size(m_dir, image_list[index].first);
-    ++j;
-    if (j % 100 == 0 and m_master) {
-      double e = get_time() - tm;
-      double time_per_file = e / j;
-      int remaining_files = m_my_datastore_indices.size()-j;
-      double estimated_remaining_time = time_per_file * remaining_files;
-      std::cerr << "P_0: got size for " << j << " of " << m_my_datastore_indices.size()
-                << " files; elapsed time: " << get_time() - tm
-                << "s est. remaining time: " << estimated_remaining_time << "s\n";
-    }
-  }
-  if (m_master) {
-    std::cerr << "P_0: got size for " << j << " of " << m_my_datastore_indices.size()
-                << " files; elapsed time: " << get_time() - tm << "\n";
-  }
-
-  exchange_file_sizes(global_indices, bytes);
-}
-
-void data_store_imagenet::build_data_filepaths() {
-  m_data_filepaths.clear();
-  image_data_reader *reader = dynamic_cast<image_data_reader*>(m_reader);
-  const std::vector<std::pair<std::string, int> > & image_list = reader->get_image_list();
-  for (auto index : m_my_datastore_indices) {
-    m_data_filepaths[index] = image_list[index].first;
-  }
-}
-
-}  // namespace lbann
diff --git a/src/data_store/data_store_imagenet_patches.cpp b/src/data_store/data_store_imagenet_patches.cpp
deleted file mode 100644
index eb33dfeb35a..00000000000
--- a/src/data_store/data_store_imagenet_patches.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#include "lbann/data_store/data_store_imagenet_patches.hpp"
-#include "lbann/data_readers/data_reader_imagenet_patches.hpp"
-#include "lbann/utils/exception.hpp"
-#include "lbann/utils/options.hpp"
-#include "lbann/utils/timer.hpp"
-
-namespace lbann {
-
-void data_store_imagenet_patches::setup() {
-  double tm1 = get_time();
-  if (m_rank == 0) {
-    std::cerr << "starting data_store_imagenet_patches::setup() for data reader with role: " << m_reader->get_role() << std::endl;
-  }
-
-  set_name("data_store_imagenet_patches");
-
-  //sanity check
-  imagenet_reader_patches *reader = dynamic_cast<imagenet_reader_patches*>(m_reader);
-  if (reader == nullptr) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "dynamic_cast<imagenet_reader_patches*>(m_reader) failed";
-    throw lbann_exception(err.str());
-  }
-
-  data_store_imagenet::setup();
-
-  if (m_rank == 0) {
-    std::cerr << "TIME for data_store_imagenet setup: " << get_time() - tm1 << std::endl;
-  }
-}
-
-
-}  // namespace lbann
diff --git a/src/data_store/data_store_jag.cpp b/src/data_store/data_store_jag.cpp
deleted file mode 100644
index 2b9987446bf..00000000000
--- a/src/data_store/data_store_jag.cpp
+++ /dev/null
@@ -1,625 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#include "lbann/data_store/data_store_jag.hpp"
-
-#ifdef LBANN_HAS_CONDUIT
-
-#include "lbann/data_readers/data_reader_jag_conduit.hpp"
-#include "lbann/utils/exception.hpp"
-#include "lbann/utils/options.hpp"
-#include "lbann/utils/timer.hpp"
-#include "lbann/models/model.hpp"
-#include <unordered_set>
-
-
-namespace lbann {
-
-std::ofstream debug;
-char b[1024];
-
-data_store_jag::data_store_jag(
-  generic_data_reader *reader, model *m) :
-  generic_data_store(reader, m),
-  m_super_node(false) {
-  set_name("data_store_jag");
-}
-
-data_store_jag::~data_store_jag() {
-  debug.close();
-}
-
-void data_store_jag::setup() {
-  double tm1 = get_time();
-  std::stringstream err;
-
-  if (m_master) {
-    std::cout << "starting data_store_jag::setup() for role: " << m_reader->get_role() << "\n";
-  }
-
-  // I suspect we'll never go out-of-memory ...
-  if (! m_in_memory) {
-    LBANN_ERROR("out-of-memory mode for data_store_jag has not been implemented");
-  }
-
-  generic_data_store::setup();
-
-  m_super_node = options::get()->get_bool("super_node");
-  if (m_master) {
-    if (m_super_node) {
-      std::cerr << "mode: exchange_data via super nodes\n";
-    } else {
-      std::cerr << "mode: exchange_data via individual samples\n";
-    }
-  }
-
-  sprintf(b, "debug.%d", m_rank);
-  debug.open(b);
-
-  if (m_master) {
-    std::cout << "num shuffled_indices: " << m_shuffled_indices->size() << "\n";
-  }
-
-  data_reader_jag_conduit *jag_reader = dynamic_cast<data_reader_jag_conduit*>(m_reader);
-  if (jag_reader == nullptr) {
-    LBANN_ERROR(" dynamic_cast<data_reader_jag_conduit*>(m_reader) failed");
-  }
-
-  if (m_master) {
-    std::cout << "TIME for data_store_jag setup: " << get_time() - tm1 << "\n";
-  }
-}
-
-void data_store_jag::setup_data_store_buffers() {
-  // allocate buffers that are used in exchange_data()
-  m_send_buffer.resize(m_np);
-  m_send_buffer_2.resize(m_np);
-  m_send_requests.resize(m_np);
-  m_recv_requests.resize(m_np);
-  m_status.resize(m_np);
-  m_outgoing_msg_sizes.resize(m_np);
-  m_incoming_msg_sizes.resize(m_np);
-  m_recv_buffer.resize(m_np);
-
-  m_reconstituted.resize(m_data.size());
-
-  exchange_ds_indices();
-}
-
-// this gets called at the beginning of each epoch (except for epoch 0)
-//
-// Note: conduit has a very nice interface for communicating nodes
-//       in non-blocking scenarios. Unf, for blocking we need to
-//       handle things ourselves. TODO: possible modify conduit to
-//       handle non-blocking comms
-void data_store_jag::exchange_data_by_super_node(size_t current_pos, size_t mb_size) {
-  // double tm1 = get_time();
-
-  //========================================================================
-  //build map: proc -> global indices that P_x needs for this epoch, and
-  //                   which I own
-
-  //@TODO: change m_all_minibatch_indices from vector<vector<int>> to
-  //vector<unordered_set<int>>; then:
-  //  const std::unordered_set<int>> &my_datastore_indices;m_rank]
-  //
-  //  Hm ... I think m_all_minibatch_indices is identical to ds indices
-
-  double tma = get_time();
-
-  std::vector<std::unordered_set<int>> proc_to_indices(m_np);
-  /// Within a trainer the shuffled indices are distributed round
-  /// robin across ranks
-  size_t j = 0;
-  for (auto i = current_pos; i < current_pos + mb_size; i++) {
-    auto index = (*m_shuffled_indices)[i];
-    /// If this rank owns the index send it to the j'th rank
-    if (m_data.find(index) != m_data.end()) {
-      proc_to_indices[j].insert(index);
-    }
-    j = (j + 1) % m_np;
-  }
-
-  //========================================================================
-  //part 1: exchange the sizes of the data
-  // m_send_buffer[j] is a conduit::Node that contains
-  // all samples that this proc will send to P_j
-
-tma = get_time();
-//double t1 = 0;
-//double t2 = 0;
-
-  for (int p=0; p<m_np; p++) {
-//tmb = get_time;
-    m_send_buffer[p].reset();
-    //    std::cout << "For rank "<< p << " I am packing indices ";
-    for (auto idx : proc_to_indices[p]) {
-      //std::cout << " " << idx;
-      m_send_buffer[p].update_external(m_data[idx]);
-    }
-    //                  std::cout << std::endl;
-      //if (m_master) m_send_buffer[p].print();
-
-    // code in the following method is a modification of code from
-    // conduit/src/libs/relay/conduit_relay_mpi.cpp
-    build_node_for_sending(m_send_buffer[p], m_send_buffer_2[p]);
-
-    m_outgoing_msg_sizes[p] = m_send_buffer_2[p].total_bytes_compact();
-    MPI_Isend((void*)&m_outgoing_msg_sizes[p], 1, MPI_INT, p, 0, MPI_COMM_WORLD, &m_send_requests[p]);
-  }
-
-  //start receives for sizes of the data
-  for (int p=0; p<m_np; p++) {
-    MPI_Irecv((void*)&m_incoming_msg_sizes[p], 1, MPI_INT, p, 0, MPI_COMM_WORLD, &m_recv_requests[p]);
-
-  }
-
-  // wait for all msgs to complete
-  MPI_Waitall(m_np, m_send_requests.data(), m_status.data());
-  MPI_Waitall(m_np, m_recv_requests.data(), m_status.data());
-
-debug << "TOTAL Time to exchange data sizes: " << get_time() -  tma << "\n\n";
-
-  //========================================================================
-  //part 2: exchange the actual data
-
-  //tma = get_time();
-
-  // start sends for outgoing data
-  for (int p=0; p<m_np; p++) {
-    const void *s = m_send_buffer_2[p].data_ptr();
-    MPI_Isend(s, m_outgoing_msg_sizes[p], MPI_BYTE, p, 1, MPI_COMM_WORLD, &m_send_requests[p]);
-  }
-
-  // start recvs for incoming data
-  for (int p=0; p<m_np; p++) {
-    m_recv_buffer[p].set(conduit::DataType::uint8(m_incoming_msg_sizes[p]));
-    MPI_Irecv(m_recv_buffer[p].data_ptr(), m_incoming_msg_sizes[p], MPI_BYTE, p, 1, MPI_COMM_WORLD, &m_recv_requests[p]);
-  }
-
-  // wait for all msgs to complete
-  MPI_Waitall(m_np, m_send_requests.data(), m_status.data());
-  MPI_Waitall(m_np, m_recv_requests.data(), m_status.data());
-
-// debug << "TOTAL Time to exchange the actual data: " << get_time() -  tma << "\n";
-//tma = get_time();
-
-  //========================================================================
-  //part 3: construct the Nodes needed by me for the current minibatch
-
-//double tmw = get_time();
-
-  m_minibatch_data.clear();
-  for (int p=0; p<m_np; p++) {
-    conduit::uint8 *n_buff_ptr = (conduit::uint8*)m_recv_buffer[p].data_ptr();
-    conduit::Node n_msg;
-    n_msg["schema_len"].set_external((conduit::int64*)n_buff_ptr);
-    n_buff_ptr +=8;
-    n_msg["schema"].set_external_char8_str((char*)(n_buff_ptr));
-    conduit::Schema rcv_schema;
-    conduit::Generator gen(n_msg["schema"].as_char8_str());
-    gen.walk(rcv_schema);
-    n_buff_ptr += n_msg["schema"].total_bytes_compact();
-    n_msg["data"].set_external(rcv_schema,n_buff_ptr);
-    //nd.reset();
-    //nd.update_external(n_msg["data"]);
-    m_reconstituted[p].reset();
-    m_reconstituted[p].update_external(n_msg["data"]);
-    const std::vector<std::string> &names = m_reconstituted[p].child_names();
-
-    for (auto t : names) {
-      m_minibatch_data[atoi(t.c_str())][t].update_external(m_reconstituted[p][t]);
-    }
-  }
-
-// debug << "TOTAL Time to unpack and break up all incoming data: " << get_time() - tmw << "\n";
-
-//  if (m_master) std::cout << "data_store_jag::exchange_data Time: " << get_time() - tm1 << "\n";
-
-  // debug << "TOTAL exchange_data Time: " << get_time() - tm1 << "\n";
-}
-
-void data_store_jag::set_conduit_node(int data_id, conduit::Node &node) {
-  if (m_data.find(data_id) != m_data.end()) {
-    LBANN_ERROR("duplicate data_id: " + std::to_string(data_id) + " in data_store_jag::set_conduit_node");
-  }
-
-  if (! m_super_node) {
-    node["id"] = data_id;
-    conduit::Node n2;
-    build_node_for_sending(node, n2);
-
-    // if(n2.total_bytes_compact() != 201624) {
-
-    // //    debug.open(b, std::ios::app);
-    // std::cout << "set_conduit_node sample size node " << data_id << " node : " << node.total_bytes_compact() << "\n";
-    // //    node.print();
-    // std::cout << "set_conduit_node sample size node " << data_id << " node n2 : " << n2.total_bytes_compact() << "\n";
-    //   n2.print();
-    //   MPI_Barrier(MPI_COMM_WORLD);
-    //   MPI_Abort(MPI_COMM_WORLD, -1);
-    // }
-    // debug.close();
-    // debug.open(b, std::ios::app);
-
-    m_data[data_id] = n2;
-  }
-
-  else {
-    m_data[data_id] = node;
-    /* debug block, to test if idx matches the id in the conduit node;
-     * if these don't match up exceptions will be thrown in get_conduit_node
-     *
-    if (m_master) {
-      std::cerr<<"data id:" <<data_id<< "\n";
-      node.print();
-    }
-    MPI_Barrier(MPI_COMM_WORLD);
-    MPI_Abort(MPI_COMM_WORLD, -1);
-    */
-  }
-}
-
-const conduit::Node & data_store_jag::get_conduit_node(int data_id, bool any_node) const {
-  if (any_node) {
-    LBANN_ERROR("data_store_jag::get_conduit_node called with any_node = true; this is not yet functional; please contact Dave Hysom");
-  }
-
-  {
-    std::unordered_map<int, conduit::Node>::const_iterator t = m_data.find(data_id);
-    if (t != m_data.end()) {
-      return t->second;
-    }
-  }
-
-  /// check the main m_data as well
-  std::unordered_map<int, conduit::Node>::const_iterator t = m_minibatch_data.find(data_id);
-  if (t == m_minibatch_data.end()) {
-    debug << "failed to find data_id: " << data_id <<  " in m_minibatch_data; m_minibatch_data.size: " << m_minibatch_data.size() << "\n";
-    debug << "data IDs that we know about (these are the keys in the m_minibatch_data map): ";
-    std::set<int> s3;
-    for (auto t3 :  m_minibatch_data) {
-      s3.insert(t3.first);
-    }
-    for (auto t3 : s3) debug << t3 << " ";
-    debug << "\n";
-    int owner = m_owner.at(data_id);
-    debug << "I believe that the owner is " << std::to_string(owner) << "\n";
-    debug.close();
-    debug.open(b, std::ios::app);
-
-    LBANN_ERROR("failed to find data_id: " + std::to_string(data_id) + " in m_minibatch_data; m_minibatch_data.size: " + std::to_string(m_minibatch_data.size()) + "; epoch:"  + std::to_string(m_model->get_cur_epoch()));
-  }
-
-  return t->second;
-}
-
-// code in the following method is a modification of code from
-// conduit/src/libs/relay/conduit_relay_mpi.cpp
-void data_store_jag::build_node_for_sending(const conduit::Node &node_in, conduit::Node &node_out) {
-  conduit::Schema s_data_compact;
-  if( node_in.is_compact() && node_in.is_contiguous()) {
-    s_data_compact = node_in.schema();
-  } else {
-    node_in.schema().compact_to(s_data_compact);
-  }
-
-  std::string snd_schema_json = s_data_compact.to_json();
-
-  conduit::Schema s_msg;
-  s_msg["schema_len"].set(conduit::DataType::int64());
-  s_msg["schema"].set(conduit::DataType::char8_str(snd_schema_json.size()+1));
-  s_msg["data"].set(s_data_compact);
-
-  conduit::Schema s_msg_compact;
-  s_msg.compact_to(s_msg_compact);
-  node_out.reset();
-  node_out.set(s_msg_compact);
-  node_out["schema"].set(snd_schema_json);
-  node_out["data"].update(node_in);
-}
-
-
-#if 0
-void data_store_jag::build_all_minibatch_indices() {
-  m_all_minibatch_indices.clear();
-  m_owner.clear();
-  m_all_minibatch_indices.resize(m_np);
-  for (size_t idx=0; idx<m_shuffled_indices->size(); ++idx) {
-    int owner = idx % m_np;
-    m_owner[idx] = owner;
-    m_all_minibatch_indices[owner].push_back(idx);
-  }
-}
-#endif
-
-void data_store_jag::exchange_data_by_sample(size_t current_pos, size_t mb_size) {
-  double tm1 = get_time();
-
-  debug.open(b, std::ios::app);
-  debug << "\n============================================================\n"
-  <<"starting exchange_data_by_sample; epoch: "<<m_model->get_cur_epoch()<< " data size: "<<m_data.size()<<"  m_n: " << m_n << "  send_buffer size: " << m_send_buffer.size() << "\n";
-  debug.close();
-
-  if (m_n == 1) {
-    if (m_master) std::cerr << "allocating storage\n";
-    int sz = m_data.size();
-    m_send_buffer.resize(sz);
-    m_send_requests.resize(sz);
-    m_recv_requests.resize(sz);
-    m_recv_buffer.resize(sz);
-    m_status.resize(sz);
-
-    exchange_ds_indices();
-    // sanity check
-    /*
-    int n = 0;
-    for (auto t : m_data) {
-      if (t.second.total_bytes_compact() != n) {
-        LBANN_ERROR("t.total_bytes_compact() != n; " + std::to_string(n) + " " + std::to_string(t.second.total_bytes_compact()));
-      }
-    }
-    */
-  }
-
-  //========================================================================
-  // build map: proc -> global indices that P_x needs for this epoch, and
-  //                    which I own
-  // build map: owner -> set of indices I need that owner has
-
-  //@TODO: change m_all_minibatch_indices from vector<vector<int>> to
-  //vector<unordered_set<int>>; then:
-  //  const std::unordered_set<int>> &my_datastore_indices;m_rank]
-  //
-  //  Hm ... I think m_all_minibatch_indices is identical to ds indices
-
-double tma = get_time();
-
-  // std::unordered_set<int> my_ds_indices;
-  // for (auto t : m_all_minibatch_indices[m_rank]) {
-  //   my_ds_indices.insert(t);
-  // }
-
-  std::vector<std::unordered_set<int>> proc_to_indices(m_np);
-  // get indices that I need for this epoch; these correspond to
-  // samples that this proc receives from others
-  std::unordered_map<int, std::unordered_set<int>> needed;
-  {
-  size_t j = 0;
-  for (auto i = current_pos; i < current_pos + mb_size; i++) {
-    auto index = (*m_shuffled_indices)[i];
-    /// If this rank owns the index send it to the j'th rank
-    if (m_data.find(index) != m_data.end()) {
-      proc_to_indices[j].insert(index);
-    }
-    if(j == static_cast<size_t>(m_rank)) {
-      int owner = m_owner[index];
-      needed[owner].insert(index);
-    }
-    j = (j + 1) % m_np;
-  }
-  }
-  {
-  debug.open(b, std::ios::app);
-  debug << "preparing to send the following indices: "  << "\n";
-  for (int p=0; p<m_np; p++) {
-    debug << p << ": ";
-    for (auto idx : proc_to_indices[p]) {
-      debug << idx << " ";
-    }
-    debug << "\n";
-  }
-  debug.close();
-  debug.open(b, std::ios::app);
-  }
-
-  //debug block
-  int tot = 0;
-  for (auto t : needed) {
-    debug << "I need " << t.second.size() << " samples from P_" << t.first << " :: ";
-    for (auto tt : t.second) debug << tt << " ";
-    debug << "\n";
-    tot += t.second.size();
-  }
-  debug << "total incoming samples: " << tot << "\n";
-  debug << "exchange_data: Time to build maps: " << get_time() -  tma << "\n";
-  debug.close();
-  debug.open(b, std::ios::app);
-
-  int sample_size = 0;
-  for (auto t : m_data) {
-    if(sample_size == 0) {
-      sample_size = t.second.total_bytes_compact();
-    }else {
-      if(sample_size != t.second.total_bytes_compact()) {
-        debug << "bad sample size: " << t.second.total_bytes_compact() << " num samples: " << m_data.size() << "\n";
-      }
-    }
-  }
-  debug << "sample size: " << sample_size << " num samples: " << m_data.size() << "\n";
-  debug.close();
-  debug.open(b, std::ios::app);
-
-
-  //========================================================================
-  //part 2: exchange the actual data
-
-tma = get_time();
-
-  // start sends for outgoing data
-  size_t ss = 0;
-  for (int p=0; p<m_np; p++) {
-    const std::unordered_set<int> &indices = proc_to_indices[p];
-    for (auto index : indices) {
-      if (m_data.find(index) == m_data.end()) {
-        LBANN_ERROR("failed to find data_id: " + std::to_string(index) + " to be sent to " + std::to_string(p) + " in m_data");
-      }
-
-  debug << "sending " << index << " to " << p << " &m_send_requests size: " << m_send_requests.size() <<  " bytes: " << m_data[index].total_bytes_compact() << " ss: " << ss << "\n";
-  debug.close();
-  debug.open(b, std::ios::app);
-
-      //const void *s = m_send_buffer[ss].data_ptr();
-      const void *s = m_data[index].data_ptr();
-      MPI_Isend(s, sample_size, MPI_BYTE, p, index, MPI_COMM_WORLD, &m_send_requests[ss++]);
-      //MPI_Isend(s, m_outgoing_msg_sizes[p], MPI_BYTE, p, 1, MPI_COMM_WORLD, &m_send_requests[p]);
-
-  debug << "    DONE!\n";
-  debug.close();
-  debug.open(b, std::ios::app);
-
-    }
-  }
-  LBANN_ERROR("Stopping");
-
-  // sanity checks
-  if (ss != m_send_requests.size()) {
-    LBANN_ERROR("ss != m_send_requests.size; ss: " + std::to_string(ss) + " m_send_requests`.size: " + std::to_string(m_send_requests.size()));
-  }
-
-  MPI_Barrier(MPI_COMM_WORLD);
-  if (m_master) std::cerr << "\nSENDS STARTED\n\n";
-  debug << "\nSENDS STARTED\n\n";
-  MPI_Barrier(MPI_COMM_WORLD);
-
-
-  // start recvs for incoming data
-  ss = 0;
-  for (int p=0; p<m_np; p++) {
-    const std::unordered_set<int> &indices = needed[p];
-debug << "starting " << indices.size() << " recvs from " << p << "\n";
-    for (auto index : indices) {
-      m_recv_buffer[ss].set(conduit::DataType::uint8(sample_size));
-      MPI_Irecv(m_recv_buffer[ss].data_ptr(), sample_size, MPI_BYTE, p, index, MPI_COMM_WORLD, &m_recv_requests[ss]);
-      m_index_to_data_id[index] = ss;
-      ++ss;
-    }
-
-debug << "FINISHED! starting " << indices.size() << " recvs from " << p << "\n";
-debug.close();
-debug.open(b, std::ios::app);
-
-  }
-  debug << "\nALL RECVS STARTED\n\n";
-debug.close();
-debug.open(b, std::ios::app);
-
-  // sanity checks
-  if (ss != m_recv_buffer.size()) {
-    LBANN_ERROR("ss != m_recv_buffer.size; ss: " + std::to_string(ss) + " m_recv_buffer.size: " + std::to_string(m_recv_buffer.size()));
-  }
-  if (m_recv_requests.size() != m_recv_buffer.size()) {
-    LBANN_ERROR("m_recv_requests.size != m_recv_buffer.size; m_recv_requests: " + std::to_string(m_recv_requests.size()) + " m_recv_buffer.size: " + std::to_string(m_recv_buffer.size()));
-  }
-
-  // wait for all msgs to complete
-  MPI_Waitall(m_send_requests.size(), m_send_requests.data(), m_status.data());
-  MPI_Waitall(m_recv_requests.size(), m_recv_requests.data(), m_status.data());
-
-debug << "TOTAL Time to exchange the actual data: " << get_time() -  tma << "\n";
-debug.close();
-debug.open(b, std::ios::app);
-
-tma = get_time();
-
-  //========================================================================
-  //part 3: construct the Nodes needed by me for the current minibatch
-
-double tmw = get_time();
-
-  conduit::Node nd;
-  m_minibatch_data.clear();
-  for (size_t j=0; j < m_recv_buffer.size(); j++) {
-    conduit::uint8 *n_buff_ptr = (conduit::uint8*)m_recv_buffer[j].data_ptr();
-    conduit::Node n_msg;
-    n_msg["schema_len"].set_external((conduit::int64*)n_buff_ptr);
-    n_buff_ptr +=8;
-    n_msg["schema"].set_external_char8_str((char*)(n_buff_ptr));
-    conduit::Schema rcv_schema;
-    conduit::Generator gen(n_msg["schema"].as_char8_str());
-    gen.walk(rcv_schema);
-    n_buff_ptr += n_msg["schema"].total_bytes_compact();
-    n_msg["data"].set_external(rcv_schema,n_buff_ptr);
-
-    // this is inefficent @TODO
-    nd.reset();
-    nd.update(n_msg["data"]);
-//    int data_id = m_index_to_data_id[j];
-//    m_data[data_id] = nd;
-    m_minibatch_data[nd["id"].value()] = nd;
-
-  }
-for (auto t : m_minibatch_data) {
-  debug << t.first << " ";
-}
-debug << "\n";
-
-debug << "TOTAL Time to unpack incoming data: " << get_time() - tmw << "\n";
-
-  if (m_master) std::cout << "data_store_jag::exchange_data Time: " << get_time() - tm1 << "\n";
-
-  debug << "TOTAL exchange_data Time: " << get_time() - tm1 << "\n";
-debug.close(); debug.open(b, std::ios::app);
-}
-
-void data_store_jag::exchange_ds_indices() {
-  std::vector<int> counts(m_np);
-  int my_num_indices = m_data.size();
-  m_comm->trainer_all_gather<int>(my_num_indices, counts);
-
-  //setup data structures to exchange minibatch indices with all processors
-  //displacement vector
-  std::vector<int> displ(m_np);
-  displ[0] = 0;
-  for (size_t j=1; j<counts.size(); j++) {
-    displ[j] = displ[j-1] + counts[j-1];
-  }
-
-  //recv vector
-  int n = std::accumulate(counts.begin(), counts.end(), 0);
-  std::vector<int> all_indices(n);
-
-  //receive the indices
-  std::vector<int> v;
-  v.reserve(m_data.size());
-  for (auto t : m_data) {
-    v.push_back(t.first);
-  }
-  m_comm->all_gather<int>(v, all_indices, counts, displ, m_comm->get_trainer_comm());
-
-  //fill in the final data structure
-  m_owner.clear();
-  for (int p=0; p<m_np; p++) {
-    for (int i=displ[p]; i<displ[p]+counts[p]; i++) {
-      m_owner[all_indices[i]] = p;
-    }
-  }
-}
-
-}  // namespace lbann
-
-#endif //#ifdef LBANN_HAS_CONDUIT
diff --git a/src/data_store/data_store_merge_features.cpp b/src/data_store/data_store_merge_features.cpp
deleted file mode 100644
index 78f6d5c99f5..00000000000
--- a/src/data_store/data_store_merge_features.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#include "lbann/data_store/data_store_merge_features.hpp"
-#include "lbann/data_store/data_store_csv.hpp"
-#include "lbann/data_readers/data_reader_merge_features.hpp"
-#include "lbann/utils/exception.hpp"
-#include "lbann/utils/options.hpp"
-#include "lbann/utils/timer.hpp"
-
-namespace lbann {
-
-data_store_merge_features::data_store_merge_features(generic_data_reader *reader, model *m) :
-    generic_data_store(reader, m) {
-  set_name("data_store_merge_features");
-}
-
-
-data_store_merge_features::~data_store_merge_features() {
-}
-
-void data_store_merge_features::exchange_data() {
-  for (auto s : m_subsidiary_stores) {
-    data_store_csv *store = dynamic_cast<data_store_csv*>(s);
-    store->set_shuffled_indices(m_shuffled_indices, false);
-    store->exchange_data();
-  }
-}
-
-void data_store_merge_features::setup() {
-  double tm1 = get_time();
-  if (m_master) {
-    std::cerr << "starting data_store_merge_features::setup() for data reader with role: " << m_reader->get_role() << std::endl;
-  }
-
-  generic_data_store::setup();
-
-  if (! m_in_memory) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "not yet implemented";
-    throw lbann_exception(err.str());
-  } 
-  
-  else {
-    //sanity check
-    data_reader_merge_features *reader = dynamic_cast<data_reader_merge_features*>(m_reader);
-    if (reader == nullptr) {
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "dynamic_cast<merge_features_reader*>(m_reader) failed";
-      throw lbann_exception(err.str());
-    }
-
-    // get list of indices used in calls to generic_data_reader::fetch_data
-    if (m_master) std::cerr << "calling get_minibatch_index_vector\n";
-    get_minibatch_index_vector();
-
-    if (m_master) std::cerr << "calling get_my_datastore_indices\n";
-    get_my_datastore_indices();
-
-    if (m_master) std::cerr << "calling exchange_mb_indices()\n";
-    exchange_mb_indices();
-
-    std::vector<generic_data_reader*> &readers = reader->get_data_readers();
-    m_subsidiary_stores.reserve(readers.size());
-    for (auto r : readers) {
-      data_store_csv *store = new data_store_csv(r, m_model);
-      m_subsidiary_stores.push_back(store);
-      r->set_data_store(store);
-      store->set_is_subsidiary_store();
-      store->set_minibatch_indices(get_minibatch_indices());
-      store->set_all_minibatch_indices(get_all_minibatch_indices());
-      store->set_minibatch_indices_v(get_minibatch_indices_v());
-      store->set_datastore_indices(get_datastore_indices());
-      store->setup();
-      store->set_shuffled_indices(m_shuffled_indices, false);
-      store->populate_datastore();
-      store->exchange_data();
-    }
-  }
-  if (m_master) {
-    std::cerr << "TIME for data_store_merge_features setup: " << get_time() - tm1 << "\n";
-  }
-}
-
-}  // namespace lbann
diff --git a/src/data_store/data_store_merge_samples.cpp b/src/data_store/data_store_merge_samples.cpp
deleted file mode 100644
index 43f3d4844a6..00000000000
--- a/src/data_store/data_store_merge_samples.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#include "lbann/data_store/data_store_merge_samples.hpp"
-#include "lbann/data_store/data_store_pilot2_molecular.hpp"
-#include "lbann/data_readers/data_reader_pilot2_molecular.hpp"
-#include "lbann/data_readers/data_reader_merge_samples.hpp"
-#include "lbann/utils/exception.hpp"
-#include "lbann/utils/options.hpp"
-#include "lbann/utils/timer.hpp"
-
-namespace lbann {
-
-data_store_merge_samples::data_store_merge_samples(lbann_comm *comm, generic_data_reader *reader, model *m) :
-    generic_data_store(reader, m) {
-  set_name("data_store_merge_samples");
-}
-
-
-data_store_merge_samples::~data_store_merge_samples() {
-  MPI_Win_free( &m_win );
-}
-
-void data_store_merge_samples::setup() {
-  if (m_rank == 0) std::cerr << "STARTING data_store_merge_samples::setup()\n"; 
-  //double tm1 = get_time();
-
-  generic_data_store::setup();
-
-/*
-  bool run_tests = false;
-  if (options::get()->has_bool("test_data_store") && options::get()->get_bool("test_data_store")) {
-    run_tests = true;
-  }
-  */
-
-  if (m_rank == 0) {
-    std::cout << "starting data_store_merge_samples::setup() for data reader with role: " << m_reader->get_role() << std::endl;
-  }
-  
-  if (! m_in_memory) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "not yet implemented";
-    throw lbann_exception(err.str());
-  } 
-  
-  else {
-    //sanity check
-    data_reader_merge_samples *reader = dynamic_cast<data_reader_merge_samples*>(m_reader);
-    if (reader == nullptr) {
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "dynamic_cast<merge_samples_reader*>(m_reader) failed";
-      throw lbann_exception(err.str());
-    }
-
-
-    // get list of indices used in calls to generic_data_reader::fetch_data
-    get_minibatch_index_vector();
-
-    std::vector<generic_data_reader*> &readers = reader->get_data_readers();
-    for (size_t j=0; j<readers.size(); j++) {
-      readers[j]->get_data_store();
-      pilot2_molecular_reader *pilot2_reader = dynamic_cast<pilot2_molecular_reader*>(m_reader);
-      generic_data_store *store = pilot2_reader->get_data_store();
-      data_store_pilot2_molecular *s = dynamic_cast<data_store_pilot2_molecular*>(store);
-      s->clear_minibatch_indices();
-      m_subsidiary_stores.push_back(s);
-    }
-
-    for (auto t : m_subsidiary_stores) {
-      t->set_no_shuffle();
-    }
-
-    const std::vector<int> &num_samples_psum = reader->get_num_samples_psum();
-    for (auto data_id : m_my_minibatch_indices_v) {
-      for (size_t i = 0; i < m_subsidiary_stores.size(); ++i) {
-        if (data_id < num_samples_psum[i + 1]) {
-          data_id -= num_samples_psum[i];
-          m_subsidiary_stores[i]->add_minibatch_index(data_id);
-        }
-      }
-    }
-  }
-}
-
-void data_store_merge_samples::exchange_data() {
-  //for (auto t : m_subsidiary_stores) {
-    
-}
-
-}  // namespace lbann
diff --git a/src/data_store/data_store_multi_images.cpp b/src/data_store/data_store_multi_images.cpp
deleted file mode 100644
index 23108ab4b81..00000000000
--- a/src/data_store/data_store_multi_images.cpp
+++ /dev/null
@@ -1,218 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#include "lbann/data_store/data_store_multi_images.hpp"
-#include "lbann/data_readers/data_reader_multi_images.hpp"
-#include "lbann/utils/exception.hpp"
-#include "lbann/utils/options.hpp"
-#include "lbann/utils/timer.hpp"
-
-
-namespace lbann {
-
-std::vector<std::string> data_store_multi_images::get_sample(size_t idx) const {
-  const data_reader_multi_images *reader = dynamic_cast<data_reader_multi_images*>(m_reader);
-  data_reader_multi_images::sample_t sample = reader->get_sample(idx);
-  return sample.first;
-}   
-
-
-void data_store_multi_images::setup() {
-  double tm1 = get_time();
-  if (m_rank == 0) {
-    std::cerr << "starting data_store_multi_images::setup() for data reader with role: " << m_reader->get_role() << std::endl;
-  }
-
-  set_name("data_store_multi_images");
-
-  //sanity check
-  data_reader_multi_images *reader = dynamic_cast<data_reader_multi_images*>(m_reader);
-  if (reader == nullptr) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "dynamic_cast<data_reader_multi_images*>(m_reader) failed\n";
-    throw lbann_exception(err.str());
-  }
-
-  m_num_img_srcs = reader->get_num_img_srcs();
-
-  data_store_imagenet::setup();
-
-  if (m_rank == 0) {
-    std::cerr << "TIME for data_store_multi_images setup: " << get_time() - tm1 << std::endl;
-  }
-}
-
-void data_store_multi_images::get_file_sizes() {
-  std::vector<int> global_indices(m_my_datastore_indices.size()*m_num_img_srcs);
-  std::vector<int> bytes(m_my_datastore_indices.size()*m_num_img_srcs);
-
-  std::unordered_map<std::string, size_t> names;
-  size_t jj = 0;
-  size_t j = 0;
-  double tm = get_time();
-  for (auto base_index : m_my_datastore_indices) {
-    ++j;
-    if (j % 100 == 0 and m_master) {
-      double e = get_time() - tm;
-      double time_per_file = e / j;
-      int remaining_files = (m_my_datastore_indices.size()-j)*m_num_img_srcs;
-      double estimated_remaining_time = time_per_file * remaining_files;
-      std::cerr << "P_0: got size for " << j*m_num_img_srcs << " of " << m_data_filepaths.size() 
-                << " files; elapsed time: " << get_time() - tm 
-                << "s est. remaining time: " << estimated_remaining_time << "s\n";
-    }
-    const std::vector<std::string> sample(get_sample(base_index));
-    for (size_t k=0; k<sample.size(); k++) {
-      size_t index = base_index*m_num_img_srcs + k; 
-      size_t file_len = 0;
-      if (names.find(sample[k]) != names.end()) {
-        file_len = names[sample[k]];
-      } else {
-        file_len = get_file_size(m_dir, sample[k]);
-        names[sample[k]] = file_len;
-      }
-
-      global_indices[jj] = index;
-      bytes[jj] = file_len;
-      ++jj;
-    }
-  }
-
-  exchange_file_sizes(global_indices, bytes);
-}
-
-void data_store_multi_images::read_files(const std::unordered_set<int> &indices) {
-  std::stringstream err;
-  std::string local_dir = m_reader->get_local_file_dir();
-  std::stringstream fp;
-  double tm = get_time();
-  int n = 0;
-  for (auto base_index : indices) {
-    ++n;
-    if (n % 100 == 0 && m_master) {
-      double time_per_file = (get_time() - tm) / n;
-      int remaining_files = indices.size() - n;
-      double estimated_remaining_time = time_per_file * remaining_files;
-      std::cerr << "P_0, " << m_reader->get_role() << "; read " << n 
-                << " of " << indices.size() << " files; elapsed time " 
-                << (get_time() - tm)
-                << "s; est. remaining time: " << estimated_remaining_time << "\n";
-    }
-    const std::vector<std::string> sample(get_sample(base_index));
-    for (size_t k=0; k<sample.size(); k++) {
-      size_t index = base_index * m_num_img_srcs + k;
-      if (m_file_sizes.find(index) == m_file_sizes.end()) {
-        err << __FILE__ << " " << __LINE__ << " :: " 
-            << " m_file_sizes.find(index) failed for index: " << index;
-        throw lbann_exception(err.str());
-      }
-      if (m_data_filepaths.find(index) == m_data_filepaths.end()) {
-        err << __FILE__ << " " << __LINE__ << " :: " 
-            << " m_data_filepaths.find(index) failed for index: " << index;
-        throw lbann_exception(err.str());
-      }
-      size_t file_len = m_file_sizes[index];
-      fp.clear();
-      fp.str("");
-      fp << local_dir << "/" << m_data_filepaths[index];
-      m_data[index].resize(file_len);
-      load_file("", fp.str(), m_data[index].data(), file_len);
-    }
-  }
-}
-
-void data_store_multi_images::read_files() {
-  std::stringstream err;
-  for (auto base_index : m_my_datastore_indices) {
-    const std::vector<std::string> sample(get_sample(base_index));
-    for (size_t k=0; k<sample.size(); k++) {
-      size_t index = base_index * m_num_img_srcs + k;
-      if (m_file_sizes.find(index) == m_file_sizes.end()) {
-        err << __FILE__ << " " << __LINE__ << " :: " 
-            << " m_file_sizes.find(index) failed for index: " << index;
-        throw lbann_exception(err.str());
-      }
-      size_t file_len = m_file_sizes[index];
-      m_data[index].resize(file_len);
-      load_file(m_dir, sample[k], m_data[index].data(), file_len);
-    }
-  }
-}
-
-
-void data_store_multi_images::extended_testing() {
-  if (m_master) std::cerr << "STARTING data_store_multi_images::extended_testing()\n";
-  std::stringstream err;
-  std::vector<unsigned char> v;
-  for (auto idx : m_my_minibatch_indices_v) {
-    int base_index = (*m_shuffled_indices)[idx];
-    const std::vector<std::string> sample(get_sample(base_index));
-    for (size_t k=0; k<sample.size(); k++) {
-      size_t index = base_index*m_num_img_srcs + k; 
-
-      if (m_file_sizes.find(index) == m_file_sizes.end()) {
-        err << __FILE__ << " " << __LINE__ << " :: " 
-            << " file length not found: " << index;
-        throw lbann_exception(err.str());
-      }
-      size_t file_len = m_file_sizes[index];
-
-      v.resize(file_len);
-      load_file(m_dir, sample[k], v.data(), file_len);
-
-      if (m_my_minibatch_data.find(index) == m_my_minibatch_data.end()) {
-        err << __FILE__ << " " << __LINE__ << " :: " 
-            << " m_my_minibatch_data.find(" << index << ") failed.";
-        throw lbann_exception(err.str());
-      }
-      if (m_my_minibatch_data[index] != v) {
-        err << __FILE__ << " " << __LINE__ << " :: " 
-            << " data_store_multi_images::extended_testing: "
-            << " rank: " << m_rank << " index: " << index << " FAILED!\n";
-        throw lbann_exception(err.str());
-      }
-    }
-  }
-  std::cerr << "rank: " << m_rank << " data_store_multi_images::extended_testing, PASSED!\n";
-}
-
-
-void data_store_multi_images::build_data_filepaths() {
-  m_data_filepaths.clear();
-  std::unordered_set<std::string> names;
-  for (auto base_index : m_my_datastore_indices) {
-    const std::vector<std::string> sample(get_sample(base_index));
-    for (size_t k=0; k<sample.size(); k++) {
-      size_t index = base_index*m_num_img_srcs + k; 
-      m_data_filepaths[index] = sample[k];
-    }
-  }
-}
-
-} //namespace lbann
-
diff --git a/src/data_store/data_store_pilot2_molecular.cpp b/src/data_store/data_store_pilot2_molecular.cpp
deleted file mode 100644
index 5ccf304787e..00000000000
--- a/src/data_store/data_store_pilot2_molecular.cpp
+++ /dev/null
@@ -1,305 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#include "lbann/data_store/data_store_pilot2_molecular.hpp"
-#include "lbann/data_readers/data_reader_pilot2_molecular.hpp"
-#include "lbann/utils/exception.hpp"
-#include "lbann/utils/options.hpp"
-#include "lbann/utils/timer.hpp"
-#include <unordered_set>
-#include <omp.h>
-
-namespace lbann {
-
-data_store_pilot2_molecular::data_store_pilot2_molecular(
-  generic_data_reader *reader, model *m) :
-  generic_data_store(reader, m) {
-  set_name("data_store_pilot2_molecular");
-}
-
-data_store_pilot2_molecular::~data_store_pilot2_molecular() {
-}
-
-void data_store_pilot2_molecular::setup() {
-  double tm1 = get_time();
-  std::stringstream err;
-  m_owner = (int)m_reader->get_compound_rank() == (int)m_rank;
-  m_owner_rank = m_reader->get_compound_rank();
-
-  if (m_owner) std::cerr << "starting data_store_pilot2_molecular::setup() for role: "
-          << m_reader->get_role() << "; owning processor: " << m_owner_rank << std::endl;
-  if (m_owner) std::cerr << "calling generic_data_store::setup()\n";
-  generic_data_store::setup();
-
-  if (! m_in_memory) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "not yet implemented";
-    throw lbann_exception(err.str());
-  }
-
-  else {
-    //sanity check
-    pilot2_molecular_reader *reader = dynamic_cast<pilot2_molecular_reader*>(m_reader);
-    if (reader == nullptr) {
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "dynamic_cast<data_reader_pilot2_molecular*>(m_reader) failed";
-      throw lbann_exception(err.str());
-    }
-    m_pilot2_reader = reader;
-
-    // get list of indices used in calls to generic_data_reader::fetch_data
-    // for this processor
-    get_minibatch_index_vector();
-
-    // get list of indices used in calls to generic_data_reader::fetch_data
-    // for all processors
-    if (m_master) std::cerr << "calling exchange_mb_indices\n";
-    exchange_mb_indices();
-
-    // allocate storage for the data that will be passed to the data reader's
-    // fetch_datum method.
-    m_data_buffer.resize(omp_get_max_threads());
-    int num_features = m_pilot2_reader->get_num_features();
-    int num_neighbors = m_pilot2_reader->get_num_neighbors();
-    for (size_t j=0; j<m_data_buffer.size(); j++) {
-      m_data_buffer[j].resize(num_features * (num_neighbors+1));
-    }
-
-    if (m_owner) {
-      std::cerr << "calling construct_data_store()\n";
-      construct_data_store();
-    }
-
-    if (m_owner) std::cerr << "calling build_nabor_map()\n";
-    build_nabor_map();
-
-    if (m_owner) std::cerr << "calling exchange_data()\n";
-    exchange_data();
-  }
-
-  if (m_owner) {
-    std::cerr << "TIME data_store_pilot2_molecular setup: " << get_time() - tm1 << "\n";
-  }
-}
-
-void data_store_pilot2_molecular::construct_data_store() {
-  std::stringstream err;
-
-  // get the feature and neighbor data from the pilot2_molecular data reader
-  if (m_pilot2_reader->get_word_size() == 4) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "not implemented for word_size = 4; please ask Dave Hysom to fix";
-    throw lbann_exception(err.str());
-  }
-  double *features_8 = m_pilot2_reader->get_features_8();
-
-  int num_samples_per_frame = m_pilot2_reader->get_num_samples_per_frame();
-  for (size_t j=0; j<m_num_global_indices; j++) {
-    int data_id = (*m_shuffled_indices)[j];
-    int num_features = m_pilot2_reader->get_num_features();
-    fill_in_data(data_id, num_samples_per_frame, num_features, features_8);
-  }
-}
-
-
-// replicated code from data_reader_pilot2_molecular::fetch_molecule
-void data_store_pilot2_molecular::fill_in_data(
-    const int data_id,
-    const int num_samples_per_frame,
-    const int num_features,
-    double *features) {
-  const int frame = m_pilot2_reader->get_frame(data_id);
-  const int frame_offset = frame * num_features * num_samples_per_frame;
-  const int intra_frame_data_id = data_id - frame * num_samples_per_frame;
-  double *data = features + frame_offset + intra_frame_data_id * num_features;
-  if (m_data.find(data_id) != m_data.end()) {
-    std::stringstream err;
-    err << __FILE__  << " :: " << __LINE__ << " :: "
-        << " duplicate data_id: " << data_id;
-    throw lbann_exception(err.str());
-  }
-  m_data[data_id].resize(num_features);
-  for (int i=0; i<num_features; i++) {
-    m_data[data_id][i] = m_pilot2_reader->scale_data<double>(i, data[i]);
-  }
-}
-
-
-
-
-void data_store_pilot2_molecular::build_nabor_map() {
-  //bcast neighbor data
-  size_t sz;
-  if (m_owner) {
-    sz = m_pilot2_reader->get_neighbors_data_size();
-  }
-  m_comm->world_broadcast<size_t>(0, sz);
-
-  double *neighbors_8;
-  std::vector<double> work;
-  if (m_owner) {
-    neighbors_8 = m_pilot2_reader->get_neighbors_8();
-  } else {
-    work.resize(sz);
-    neighbors_8 = work.data();
-  }
-  m_comm->world_broadcast<double>(0, neighbors_8, sz);
-
-  //fill in the nabors map
-  for (auto data_id : (*m_shuffled_indices)) {
-    int frame = m_pilot2_reader->get_frame(data_id);
-    int max_neighborhood = m_pilot2_reader->get_max_neighborhood();
-    int num_samples_per_frame = m_pilot2_reader->get_num_samples_per_frame();
-    const int neighbor_frame_offset = frame * num_samples_per_frame * (2 * max_neighborhood);
-    const int intra_frame_data_id = data_id - frame * num_samples_per_frame;
-    int num_neighbors = m_pilot2_reader->get_num_neighbors();
-    m_neighbors[data_id].reserve(num_neighbors);
-    double *neighbor_data = neighbors_8 + neighbor_frame_offset + intra_frame_data_id * (2 * max_neighborhood);
-    for (int i=1; i<num_neighbors + 1; ++i) {
-      int neighbor_id = neighbor_data[i];
-      m_neighbors[data_id].push_back(neighbor_id);
-    }
-  }
-}
-
-void data_store_pilot2_molecular::get_data_buf(int data_id, int tid, std::vector<double> *&buf) {
-  std::stringstream err;
-  std::vector<double> &v = m_data_buffer[tid];
-  std::fill(v.begin(), v.end(), 0.0);
-  if (m_neighbors.find(data_id) == m_neighbors.end()) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << data_id << " not found in m_neighbors (primary molecule)";
-    throw lbann_exception(err.str());
-  }
-  if (m_my_molecules.find(data_id) == m_my_molecules.end()) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << data_id << " not found in m_my_molecules";
-    throw lbann_exception(err.str());
-  }
-
-  //fill in data for the primary molecule
-  size_t jj = 0;
-  std::vector<double> &d1 = m_my_molecules[data_id];
-  for (size_t j=0; j<d1.size(); j++) {
-    v[jj++] = d1[j];
-  }
-
-  //fill in data for the primary molecule's neighbor
-  std::vector<int> &nabors = m_neighbors[data_id];
-  int num_features = m_pilot2_reader->get_num_features();
-  for (size_t h=0; h<nabors.size(); h++) {
-    if (nabors[h] != -1) {
-      if (m_my_molecules.find(data_id) == m_my_molecules.end()) {
-        err << __FILE__ << " " << __LINE__ << " :: "
-            << nabors[h] << " not found in m_my_molecules (neighbor)";
-        throw lbann_exception(err.str());
-      }
-      std::vector<double> &d2 = m_my_molecules[nabors[h]];
-      for (size_t i=0; i<d2.size(); i++) {
-        v[jj++] = d2[i];
-      }
-    } else {
-      jj += num_features;
-    }
-  }
-  buf = &v;
-}
-
-void data_store_pilot2_molecular::get_required_molecules(std::unordered_set<int> &required_molecules, int p) {
-  required_molecules.clear();
-  std::vector<int> &v = m_all_minibatch_indices[p];
-  for (auto t : v) {
-    int data_id = (*m_shuffled_indices)[t];
-    required_molecules.insert(data_id);
-    if (m_neighbors.find(data_id) == m_neighbors.end()) {
-      std::stringstream err;
-      err << __FILE__  << " :: " << __LINE__ << " :: "
-          << " m_neighbors.find(" << data_id << " failed";
-      throw lbann_exception(err.str());
-    }
-    for (auto t2 : m_neighbors[data_id]) {
-      if (t2 != -1) {
-        required_molecules.insert(t2);
-      }
-    }
-  }
-}
-
-void data_store_pilot2_molecular::exchange_data() {
-  double tm1 = get_time();
-  std::stringstream err;
-
-  //get set of molecules required for the next epoch for myself
-  std::unordered_set<int> required_molecules;
-  get_required_molecules(required_molecules, m_rank);
-
-  //start receives for my required molecules
-  m_my_molecules.clear();
-  int num_features = m_pilot2_reader->get_num_features();
-
-  std::vector<El::mpi::Request<double>> recv_req(required_molecules.size());
-  size_t jj = 0;
-  for (auto data_id : required_molecules) {
-    m_my_molecules[data_id].resize(num_features);
-    m_comm->nb_tagged_recv<double>(
-          m_my_molecules[data_id].data(), num_features, m_owner_rank,
-          data_id, recv_req[jj++], m_comm->get_world_comm());
-  }
-
-  //owner starts sends
-  std::vector<std::vector<El::mpi::Request<double>>> send_req;
-  if (m_owner) {
-    send_req.resize(m_np);
-    for (int p = 0; p<m_np; p++) {
-      jj = 0;
-      get_required_molecules(required_molecules, p);
-      send_req[p].resize(required_molecules.size());
-      for (auto data_id : required_molecules) {
-        m_comm->nb_tagged_send<double>(
-           m_data[data_id].data(), num_features, p,
-           data_id, send_req[p][jj++], m_comm->get_world_comm());
-      }
-    }
-  }
-
-  //wait for sends to finish
-  if (m_owner) {
-    for (size_t i=0; i<send_req.size(); i++) {
-      m_comm->wait_all<double>(send_req[i]);
-    }
-  }
-
-  //wait for recvs to finish
-  m_comm->wait_all<double>(recv_req);
-
-  if (m_owner) {
-    std::cout << "TIME for data_store_pilot2_molecular::exchange_data(): "
-              << get_time() - tm1 << "; role: " << m_reader->get_role() << "\n";
-  }
-}
-
-}  // namespace lbann
diff --git a/src/data_store/data_store_triplet.cpp b/src/data_store/data_store_triplet.cpp
deleted file mode 100644
index 41d1a7bfb53..00000000000
--- a/src/data_store/data_store_triplet.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#include "lbann/data_store/data_store_triplet.hpp"
-#include "lbann/data_readers/data_reader_triplet.hpp"
-#include "lbann/utils/exception.hpp"
-#include "lbann/utils/options.hpp"
-#include "lbann/utils/timer.hpp"
-
-namespace lbann {
-
-std::vector<std::string> data_store_triplet::get_sample(size_t idx) const {
-  const data_reader_triplet *reader = dynamic_cast<data_reader_triplet*>(m_reader);
-  data_reader_triplet::sample_t sample = reader->get_sample(idx);
-  return sample.first;
-}
-
-void data_store_triplet::setup() {
-  double tm1 = get_time();
-  if (m_rank == 0) {
-    std::cerr << "starting data_store_triplet::setup() for data reader with role: " << m_reader->get_role() << std::endl;
-  }
-
-  set_name("data_store_triplet");
-
-  //sanity check
-  data_reader_triplet *reader = dynamic_cast<data_reader_triplet*>(m_reader);
-  if (reader == nullptr) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "dynamic_cast<data_reader_triplet*>(m_reader) failed";
-    throw lbann_exception(err.str());
-  }
-
-  data_store_multi_images::setup();
-
-  if (m_rank == 0) {
-    std::cerr << "TIME for data_store_triplet setup: " << get_time() - tm1 << std::endl;
-  }
-}
-
-}  // namespace lbann
diff --git a/src/data_store/generic_data_store.cpp b/src/data_store/generic_data_store.cpp
deleted file mode 100644
index 73183f2de8a..00000000000
--- a/src/data_store/generic_data_store.cpp
+++ /dev/null
@@ -1,443 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#include "lbann/data_store/generic_data_store.hpp"
-#include "lbann/data_readers/data_reader.hpp"
-#include "lbann/utils/options.hpp"
-#include "lbann/models/model.hpp"
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <numeric>
-#include <string.h>
-
-namespace lbann {
-
-generic_data_store::generic_data_store(generic_data_reader *reader, model *m) :
-m_n(0),
-    m_reader(reader),
-    m_my_minibatch_indices(nullptr),
-    m_in_memory(true),
-    m_model(m),
-    m_extended_testing(false),
-    m_is_subsidiary_store(false),
-    m_cur_minibatch(1000000),
-    m_is_setup(false),
-    m_verbose(false)
-{
-  if (m_reader == nullptr) {
-    LBANN_ERROR(" m_reader is nullptr");
-  }
-
-  if (m_model == nullptr) {
-    LBANN_ERROR(" m_model is nullptr");
-  }
-
-  m_comm = m_model->get_comm();
-  if (m_comm == nullptr) {
-    LBANN_ERROR(" m_comm is nullptr");
-  }
-
-  m_master = m_comm->am_world_master();
-  m_rank = m_comm->get_rank_in_trainer();
-  m_np = m_comm->get_procs_per_trainer();
-  m_mpi_comm = m_comm->get_trainer_comm().comm;
-
-  m_dir = m_reader->get_file_dir();
-
-  set_name("generic_data_store");
-
-  if (m_master) std::cerr << "generic_data_store::generic_data_store; np: " << m_np << "\n";
-  options *opts = options::get();
-  if (opts->has_bool("extended_testing") && opts->get_bool("extended_testing")) {
-    m_extended_testing = true;
-  }
-
-  if (opts->has_bool("local_disk") && opts->get_bool("local_disk")) {
-    if (m_master) std::cerr << "running in out-of-memory mode\n";
-    m_in_memory = false;
-  }
-
-  if (opts->has_bool("verbose") && opts->get_bool("verbose")) {
-    m_verbose = true;
-  }
-
-  if (opts->has_string("use_tarball")) {
-    m_dir = m_reader->get_local_file_dir();
-  }
-
-  if (m_comm->get_num_trainers() != 1) {
-    if (m_master) {
-      std::cerr << "\nFATAL ERROR: data store classes currently assume there is\n"
-                << "a single model; please ask Dave Hysom to fix!\n\n";
-    }
-    exit(9);
-  }
-}
-
-void generic_data_store::get_minibatch_index_vector() {
-  size_t s2 = 0;
-  for (auto t1 : (*m_my_minibatch_indices)) {
-    s2 += t1.size();
-  }
-  m_my_minibatch_indices_v.reserve(s2);
-  for (auto t1 : (*m_my_minibatch_indices)) {
-    for (auto t2 : t1) {
-      m_my_minibatch_indices_v.push_back(t2);
-    }
-  }
-}
-
-void generic_data_store::get_my_tarball_indices() {
- size_t idx = m_rank;
- do {
-   m_my_datastore_indices.insert(idx);
-   idx += m_np;
- } while (idx < m_num_global_indices);
-}
-
-void generic_data_store::get_my_datastore_indices() {
-  for (size_t j=0; j<m_shuffled_indices->size(); ++j) {
-    int idx = (*m_shuffled_indices)[j];
-    int owner = idx % m_np;
-    if (owner == m_rank) {
-      m_my_datastore_indices.insert(idx);
-    }
-  }
-}
-
-void generic_data_store::setup() {
-  set_shuffled_indices( &(m_reader->get_shuffled_indices()) );
-  set_num_global_indices();
-  m_num_readers = m_reader->get_num_parallel_readers();
-  if (m_master) {
-    std::cerr << "data_reader type is: " << m_reader->get_type()
-              << " num_readers: " << m_num_readers << " role: "
-              << m_reader->get_role() << "\n";
-  }
-
-  if (is_subsidiary_store()) {
-    return;
-  }
-
-  // get the set of global indices used by this processor in
-  // generic_data_reader::fetch_data(). Note that these are
-  // "original' indices, not shuffled indices, i.e, these indices
-  // remain constant through all epochs
-  if (m_master) { std::cerr << "calling m_model->collect_indices\n"; }
-  m_reader->set_save_minibatch_entries(true);
-  if (m_reader->get_role() == "train") {
-    m_model->collect_indices(execution_mode::training);
-  } else if (m_reader->get_role() == "validate") {
-    m_model->collect_indices(execution_mode::validation);
-  } else if (m_reader->get_role() == "test") {
-    m_model->collect_indices(execution_mode::testing);
-  } else {
-    std::stringstream s2;
-    s2 << __FILE__ << " " << __LINE__ << " :: "
-       << " bad role; should be train, test, or validate;"
-       << " we got: " << m_reader->get_role();
-      throw lbann_exception(s2.str());
-  }
-  m_reader->set_save_minibatch_entries(false);
-  m_my_minibatch_indices = &(m_reader->get_minibatch_indices());
-  if (m_master) {
-    std::cerr << "my num minibatch indices: " << m_my_minibatch_indices->size() << "\n";
-  }
-
-}
-
-void generic_data_store::print_partitioned_indices() {
-  if (! m_master) {
-    return;
-  }
-  std::cerr << "\n\n=============================================\n"
-            << "minibatch indices:\n";
-  for (size_t j=0; j<m_all_partitioned_indices.size(); j++) {
-    std::cerr << "===== P_"<<j<<"\n";
-    for (size_t i=0; i<m_all_partitioned_indices[j].size(); i++) {
-      std::cerr << "  mb #" << i << " ";
-      for (size_t k=0; k<m_all_partitioned_indices[j][i].size(); k++) {
-        std::cerr << m_all_partitioned_indices[j][i][k] <<  " ";
-      }
-      std::cerr << "\n";
-    }
-  }
-  std::cerr << "=============================================\n\n";
-}
-
-size_t generic_data_store::get_file_size(std::string dir, std::string fn) {
-  std::string imagepath;
-  if (m_dir == "") {
-    imagepath = fn;
-  } else {
-    imagepath = dir + fn;
-  }
-  struct stat st;
-  if (stat(imagepath.c_str(), &st) != 0) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "stat failed for dir: " << dir
-        << " and fn: " << fn
-        << " on node: " << getenv("SLURMD_NODENAME");
-    throw lbann_exception(err.str());
-  }
-  return st.st_size;
-}
-
-void generic_data_store::set_shuffled_indices(const std::vector<int> *indices, bool exchange_indices) {
-if (m_master)std::cerr<<"starting set_shuffled_indices; epoch: "<<m_model->get_cur_epoch()<<" role: " << m_reader->get_role()<<";  n: " << m_n << "\n";
-  m_shuffled_indices = indices;
-//  if (m_model->get_cur_epoch() > 0 && exchange_indices && m_in_memory) {
-  // if (m_n > 0) {
-  //   exchange_data();
-  // }
-
-  if(m_n > 0) {
-    setup_data_store_buffers();
-  }
-
-  ++m_n;
-}
-
-void generic_data_store::exchange_mb_counts() {
-  int my_num_indices = m_my_minibatch_indices_v.size();
-  m_mb_counts.resize(m_np);
-  m_comm->trainer_all_gather<int>(my_num_indices, m_mb_counts);
-}
-
-void generic_data_store::exchange_mb_indices() {
-  exchange_mb_counts();
-  //setup data structures to exchange minibatch indices with all processors
-  //displacement vector
-  std::vector<int> displ(m_np);
-  displ[0] = 0;
-  for (size_t j=1; j<m_mb_counts.size(); j++) {
-    displ[j] = displ[j-1] + m_mb_counts[j-1];
-  }
-
-  //recv vector
-  int n = std::accumulate(m_mb_counts.begin(), m_mb_counts.end(), 0);
-  std::vector<int> all_indices(n);
-
-  //receive the indices
-  m_comm->all_gather<int>(m_my_minibatch_indices_v, all_indices, m_mb_counts, displ, m_comm->get_world_comm());
-
-  //fill in the final data structure
-  m_all_minibatch_indices.resize(m_np);
-  for (int j=0; j<m_np; j++) {
-    m_all_minibatch_indices[j].reserve(m_mb_counts[j]);
-    for (int i=displ[j]; i<displ[j]+m_mb_counts[j]; i++) {
-      m_all_minibatch_indices[j].push_back(all_indices[i]);
-    }
-  }
-}
-
-void generic_data_store::exchange_partitioned_indices() {
-  //determine the largest number of minibatches over all processors
-  std::vector<int> counts(m_np);
-  int my_num_mb = m_my_minibatch_indices->size();
-  m_comm->trainer_all_gather<int>(my_num_mb, counts);
-  m_num_minibatches = 0;
-  for (auto t : counts) {
-    m_num_minibatches = (size_t)t > m_num_minibatches ? t : m_num_minibatches;
-  }
-  if (m_master) std::cerr << "num minibatches: " << m_num_minibatches << "\n";
-
-  //pack m_my_minibatch_indices into a single vector;
-  //first, compute vector size, and exchange size with all procs
-  std::vector<int> v;
-  int count = m_my_minibatch_indices->size() + 1;
-  for (auto t : (*m_my_minibatch_indices)) {
-    count += t.size();
-  }
-  m_comm->trainer_all_gather<int>(count, counts);
-
-
-  //now, fill in the vector
-  std::vector<int> w;
-  w.reserve(count);
-  w.push_back(m_my_minibatch_indices->size());
-  for (auto t : (*m_my_minibatch_indices)) {
-    w.push_back(t.size());
-    for (size_t h=0; h<t.size(); h++) {
-      w.push_back(t[h]);
-    }
-  }
-  if (w.size() != (size_t)count) {
-    std::stringstream err;
-    err << "count: " << count << " w.size: " << w.size();
-    throw lbann_exception(err.str());
-  }
-
-  // exchange the vectors
-  std::vector<int> displ(m_np);
-  displ[0] = 0;
-  for (size_t k=1; k<counts.size(); k++) {
-    displ[k] = displ[k-1] + counts[k-1];
-  }
-
-  //construct recv vector
-  int n = std::accumulate(counts.begin(), counts.end(), 0);
-  std::vector<int> all_w(n);
-
-  //exchange the indices
-  m_comm->all_gather<int>(w, all_w, counts, displ, m_comm->get_world_comm());
-
-  //fill in the final data structure
-  m_all_partitioned_indices.resize(m_np);
-  for (size_t p=0; p<(size_t)m_np; p++) {
-    int *ww = all_w.data() + displ[p];
-    //note: it's possible that m_num_minibatches > num_minibatches;
-    //      that's OK; for simplicity elsewhere in the code we want
-    //      all procs to have the same number of minibatches
-    m_all_partitioned_indices[p].resize(m_num_minibatches);
-    size_t num_minibatches = *ww++;
-    for (size_t i=0; i<num_minibatches; i++) {
-      int mb_size = *ww++;
-      m_all_partitioned_indices[p][i].reserve(mb_size);
-      for (int j=0; j<mb_size; j++) {
-        m_all_partitioned_indices[p][i].push_back(*ww++);
-      }
-    }
-  }
-}
-
-void generic_data_store::init_minibatch() {
-  if (! m_in_memory) {
-    fetch_data();
-  }
-}
-
-std::pair<std::string, std::string> generic_data_store::get_pathname_and_prefix(std::string s) {
-  int num_slash = std::count(s.begin(), s.end(), '/');
-  if (num_slash < 1 || s.back() == '/') {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "<string> must be of the form: <pathname>/prefix";
-    throw lbann_exception(err.str());
-  }
-
-  size_t j = s.rfind('/');
-  std::string prefix = s.substr(j+1);
-  std::string pathname = s.substr(0, j);
-  return std::make_pair(prefix, pathname);
-}
-
-void generic_data_store::create_dirs(std::string s) {
-  if (m_comm->get_rank_in_node() == 0) {
-    if (s.back() != '/') {
-      s += '/';
-    }
-    size_t i = s.find('/', 1);
-    while (i != std::string::npos) {
-      std::string s2 = s.substr(0, i);
-      const int dir_err = mkdir(s2.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
-      if (dir_err == -1 && errno != 17) { // 17: File Exists
-        std::stringstream err;
-        err << __FILE__ << " " << __LINE__ << " :: "
-            << "failed to create directory: " << s2 << "\n"
-            << "error code is: " << errno << " -> " << std::strerror(errno)
-            << "\n" << getenv("SLURMD_NODENAME");
-        throw lbann_exception(err.str());
-      }
-      i = s.find('/', i+1);
-    }
-  }
-  m_comm->barrier(m_comm->get_node_comm());
-}
-
-std::string generic_data_store::run_cmd(std::string cmd, bool exit_on_error) {
-  std::array<char, 128> buffer;
-  std::string result;
-  size_t len = cmd.size();
-  //copy to c-style string; Jay-Seung says this may be needed on ray
-  char *b = new char[len+1];
-  strcpy(b, cmd.data());
-  b[len] = '\0';
-  std::shared_ptr<FILE> pipe(popen(b, "r"), pclose);
-  if (!pipe) throw std::runtime_error("popen() failed!");
-  while (!feof(pipe.get())) {
-      if (fgets(buffer.data(), 128, pipe.get()) != nullptr)
-        result += buffer.data();
-  }
-  if (exit_on_error && result != "") {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "system call returned:\n" << result;
-    throw lbann_exception(err.str());
-  }
-  return result;
-}
-
-int generic_data_store::get_index_owner(int idx) {
-  if (m_owner.find(idx) == m_owner.end()) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << " idx: " << idx << " was not found in the m_owner map;"
-        << " map size: " << m_owner.size();
-    throw lbann_exception(err.str());
-  }
-  return m_owner[idx];
-}
-
-void generic_data_store::build_index_owner() {
-  m_owner.clear();
-  int num_indices = m_my_datastore_indices.size();
-  if (num_indices == 0) {
-    num_indices = 1;
-  }
-  std::vector<int>counts(m_np);
-  m_comm->trainer_all_gather<int>(num_indices, counts);
-
-  std::vector<int> disp(m_np);
-  disp[0] = 0;
-  for (int h=1; h<m_np; h++) {
-    disp[h] = disp[h-1] + counts[h-1];
-  }
-  int num_global_indices = std::accumulate(counts.begin(), counts.end(), 0);
-  std::vector<int> my_indices;
-  my_indices.reserve(num_indices);
-  if (m_my_datastore_indices.empty()) {
-    my_indices.push_back(-1);
-  }
-  for (auto t : m_my_datastore_indices) {
-    my_indices.push_back(t);
-  }
-
-  std::vector<int> all_indices(num_global_indices);
-  m_comm->all_gather<int>(my_indices, all_indices, counts, disp, m_comm->get_world_comm());
-  for (size_t rank=0; rank<counts.size(); rank++) {
-    for (int j = disp[rank]; j<disp[rank] + counts[rank]; j++) {
-      if (all_indices[j] != -1) {
-        m_owner[all_indices[j]] = rank;
-      }
-    }
-  }
-}
-
-}  // namespace lbann
diff --git a/src/data_store/jag_converter.cpp b/src/data_store/jag_converter.cpp
deleted file mode 100644
index 4f10daeb24e..00000000000
--- a/src/data_store/jag_converter.cpp
+++ /dev/null
@@ -1,245 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#include "lbann/lbann.hpp"
-#include "lbann/data_store/jag_io.hpp"
-#include "lbann/utils/options.hpp"
-#include <cstdint>
-
-std::string usage("\n\nusage: jag_converter --mode=<convert|test|both>  --bundle=<filename> --dir=<directory for converted *.bundle>");
-
-using namespace lbann;
-
-void convert(std::string bundle_fn, std::string dir);
-void test(std::string bundle_fn, std::string dir);
-
-int main(int argc, char *argv[]) {
-
-#ifndef LBANN_HAS_CONDUIT
-  std::cerr << "ERROR: lbann was not compiled with conduit support\n"
-               "(LBANN_HAS_CONDUIT was not defined at compile time)\n";
-  exit(9);
-#else
-
-  lbann_comm *comm = initialize(argc, argv, 42);
-  std::cerr << "num ranks: " << comm->get_procs_in_world() << "\n";
-
-  try {
-    options *opts = options::get();
-    opts->init(argc, argv);
-  
-    std::stringstream err;
-
-    std::string bundle_fn;
-    std::string convert_dir;
-    std::string mode;
-
-    if (!opts->has_string("mode")) {
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "you must pass the option: --mode=<string>,\n"
-             "where <string> is \"convert\" or \"test\" or \"both\""
-             << usage;
-      throw lbann_exception(err.str());
-    }
-    mode = opts->get_string("mode");
-    
-    if (!opts->has_string("bundle")) {
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "you must pass the option: --bundle=<pathname>,\n"
-             "which is the input filename"
-          << usage;
-      throw lbann_exception(err.str());
-    }
-    bundle_fn= opts->get_string("bundle");
-
-    if (!opts->has_string("dir")) {
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "you must pass the option: --dir=<string>,\n"
-             "which is the directory for the converted file"
-          << usage;
-      throw lbann_exception(err.str());
-    }
-    convert_dir = opts->get_string("dir");
-
-    if (mode == "convert") {
-      convert(bundle_fn, convert_dir);
-    } else if (mode == "test") {
-      test(bundle_fn, convert_dir);
-    } else if (mode == "both") {
-      convert(bundle_fn, convert_dir);
-      test(bundle_fn, convert_dir);
-    } else {
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "bad value for option: --mode=<string>;\n"
-             "must be 'convert' or 'test' or 'both'"
-          << usage;
-      throw lbann_exception(err.str());
-    }
-
-  } catch (lbann_exception& e) {
-    e.print_report();
-  }  
-
-#endif //ifdef LBANN_HAS_CONDUIT
-
-  return 0;
-}
-
-#ifdef LBANN_HAS_CONDUIT
-void convert(std::string bundle_fn, std::string dir) {
-  jag_io io;
-  io.convert(bundle_fn, dir);
-}
-
-void test(std::string bundle_fn, std::string dir) {
-  using TypeID = conduit::DataType::TypeID;
-
-  std::cerr << "\nstarting test ...\n";
-  std::cerr << "loading conduit node...\n";
-  double tm = get_time();
-  conduit::Node head;
-  conduit::relay::io::load(bundle_fn, "hdf5", head);
-  std::cerr << "time to load node: " << get_time() - tm << "\n";
-
-  std::cerr << "calling jag.load("<<dir<<")\n";
-  jag_io jag;
-  jag.load(dir);
-  size_t num_samples = jag.get_num_samples();
-  size_t sample_id = num_samples > 1 ? 1 : 0;
-  const std::vector<std::string> &keys = jag.get_keys();
-
-  std::cerr << "using sample " << sample_id << " of " << num_samples << "\n";
-  std::cerr << "num keys: " << keys.size() << "\n";
-
-  size_t num_elts;
-  size_t bytes_per_elt;
-  size_t total_bytes;
-  TypeID type;
-  std::vector<char> data;
-  size_t pass = 0;
-  size_t skipped = 0;
-  size_t warnings = 0;
-  size_t total = 0;
-
-  //=========================================================================\n;
-  // test #1: 
-  //   loop over all keys; test that what we get from the jag_io is identical
-  //   to what we get directly from the conduit node
-  //
-  //=========================================================================\n;
-  double tm2 = get_time();
-  for (size_t s=0; s<num_samples; s++) {
-  for (auto key : keys) {
-    ++total;
-
-    // get data from jag_io
-    jag.get_metadata(key, num_elts, bytes_per_elt, total_bytes, type);
-    if (total_bytes == 0) {
-      ++skipped;
-      continue;
-    }
-    data.resize(total_bytes);
-    //std::string key2 = std::to_string(sample_id) + '/' + key;
-    std::string key2 = std::to_string(s) + '/' + key;
-    jag.get_data(key2, data.data(), total_bytes);
-
-    // get data directly from conduit
-    conduit::Node truth = head[key2];
-
-    char *f2 = 0;
-    switch (type) {
-      case TypeID::INT64_ID : {
-        long *f = truth.as_int64_ptr();
-        f2 = (char*)f;
-        break;
-      }
-      case TypeID::FLOAT64_ID : {
-        double *f = truth.as_float64_ptr();
-        f2 = (char*)f;
-        break;
-      }
-      case TypeID::UINT64_ID : {
-        uint64 *f = truth.as_uint64_ptr();
-        f2 = (char*)f;
-        break;
-      }
-      case TypeID::CHAR8_STR_ID : {
-        char *f = truth.as_char8_str();
-        f2 = (char*)f;
-        break;
-      }
-      default :
-        std::cerr << "WARNING: unhandled type: " << type << "\n";
-        ++warnings;
-    }
-    if (f2) {
-      for (size_t i=0; i<total_bytes; i++) {
-        if (data[i] != f2[i]) {
-          std::cerr << "ERROR: data from jag_io doesn't match data from conuit Node\n"
-                    << "key: " << key2 << "\n";
-          exit(9);
-        }
-      }
-      ++pass;
-    }
-  }
-  }
-  std::cerr << "testing time: " << get_time() - tm2 << "\n";
-  size_t sanity = skipped + warnings + pass;
-  std::cerr << "\n\n"
-            << "total keys tested:  " << total << "\n"
-            << "total keys skipped: " << skipped << " (due to zero length data)\n"
-            << "total warnings:     " << warnings << " (handling a data type that's not yet supported)\n"
-            << "number that passed: " << pass << "\n"
-            << "sanity:             " << sanity << " (should be same as total keys tested)\n";
-
-  //=========================================================================\n;
-  // test #2: 
-  //   test, for each key, that type, num elts, num_bytes identical
-  //=========================================================================\n;
-  for (auto key : keys) {
-    jag.get_metadata(key, num_elts, bytes_per_elt, total_bytes, type);
-    for (size_t j=0; j<num_samples; j++) {
-      std::string key2 = std::to_string(sample_id) + '/' + key;
-      conduit::Node truth = head[key2];
-      conduit::DataType dType = truth.dtype();
-      if (num_elts != (size_t)dType.number_of_elements()) {
-        std::cerr << "ERROR 1!\n";
-        exit(9);
-      }
-      if (bytes_per_elt != (size_t)dType.element_bytes()) {
-        std::cerr << "ERROR 2!\n";
-        exit(9);
-      }
-      if (type != dType.id()) {
-        std::cerr << "ERROR 3!\n";
-        exit(9);
-      }
-    }
-  }
-}
-#endif //LBANN_HAS_CONDUIT
diff --git a/src/data_store/jag_io.cpp b/src/data_store/jag_io.cpp
deleted file mode 100644
index 54b2cd24868..00000000000
--- a/src/data_store/jag_io.cpp
+++ /dev/null
@@ -1,389 +0,0 @@
-#include "lbann_config.hpp" // may define LBANN_HAS_CONDUIT
-
-#ifdef LBANN_HAS_CONDUIT
-
-#include "lbann/data_store/jag_io.hpp"
-#include "lbann/data_readers/data_reader_jag_conduit.hpp"
-#include "lbann/utils/exception.hpp"
-#include "lbann/utils/timer.hpp"
-#include "lbann/utils/file_utils.hpp"
-#include "lbann_config.hpp" 
-#include <unordered_map>
-#include <unordered_set>
-#include <fstream>
-#include <cstdlib>
-#include <algorithm>
-
-namespace lbann {
-
-jag_io::~jag_io() {
-  if (m_data_stream != nullptr && m_data_stream->is_open()) {
-    m_data_stream->close();
-    delete m_data_stream;
-  }
-}
-
-jag_io::jag_io() : m_data_stream(nullptr) {}
-
-void jag_io::get_hierarchy(conduit::Node &nd, std::string parent) {
-  std::string parent_2 = parent;
-  if (parent.find('/') != std::string::npos) {
-    // hack to discard keys that vary between samples;
-    // will fix later, when we have the samples we're going
-    // to actually use, and guidance as to which outputs
-    // we should use.
-    if (parent.find("outputs/scalars") == std::string::npos) {
-      m_keys.push_back( parent.substr(2));
-    }  
-  } 
-  conduit::Node nd2 = nd[parent];
-  const std::vector<std::string> &children_names = nd2.child_names();
-  for (auto t : children_names) {
-    m_parent_to_children[parent_2].insert(t);
-    std::string p = parent + '/' + t;
-    get_hierarchy(nd, p);
-  }
-}
-
-void jag_io::convert(std::string conduit_pathname, std::string base_dir) {
-  std::stringstream err;
-
-  //create the output directory (if it doesn't already exist)
-  create_dir(base_dir);
-
-  // load the conduit bundle
-  std::cerr << "Loading conduit file ...\n";
-  double tm1 = get_time();
-  conduit::Node head;
-  conduit::relay::io::load(conduit_pathname, "hdf5", head);
-  std::cerr << "time to load: " << get_time() - tm1 << "\n";
-  m_num_samples = head.number_of_children();
-  std::cerr << "\nconversion in progress for " << m_num_samples << " samples\n";
-
-  // get the hierarchy (get all keys in the hierarchy); this fills in m_keys 
-  // and m_parent_to_children
-  get_hierarchy(head, "6");
-
-  // fill in m_metadata
-  m_sample_offset = 0;
-  for (auto t : m_keys) {
-    conduit::Node nd = head["0/" + t];
-    conduit::DataType m = nd.dtype();
-    m_metadata[t] = MetaData((TypeID)m.id(), m.number_of_elements(), m.element_bytes(), m_sample_offset);
-    m_sample_offset += (m.number_of_elements() * m.element_bytes());
-  }
-
-  // write metadata to file
-  std::string fn = base_dir + "/metadata.txt";
-  std::ofstream metadata_writer(fn.c_str());
-  if (!metadata_writer.good()) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to open " << fn << " for writing";
-    throw lbann_exception(err.str());
-  }
-
-  metadata_writer << m_num_samples << "\n";
-  metadata_writer << m_sample_offset << "\n";
-  for (auto t : m_keys) {
-    metadata_writer << m_metadata[t].dType << " " << m_metadata[t].num_elts
-           << " " << m_metadata[t].num_bytes << " " << m_metadata[t].offset 
-           << " " << t << "\n";
-  }
-  metadata_writer.close();
-  std::cerr << "wrote file: " << fn << "\n";
-
-
-  // open output file for binary data
-  fn = base_dir + "/data.bin";
-  std::ofstream bin_writer(fn.c_str(), std::ios::binary);
-  if (!bin_writer.good()) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to open " << fn << " for writing";
-    throw lbann_exception(err.str());
-  }
-
-  // write binary data
-  for (size_t j=0; j<m_num_samples; j++) {
-    for (auto t2 : m_keys) {
-      const conduit::Node d = head[std::to_string(j) + '/' + t2];
-      const TypeID t = (TypeID)d.dtype().id();
-      if (m_metadata.find(t2) == m_metadata.end()) {
-        err << __FILE__ << " " << __LINE__ << " :: "
-            << "key is missing from metadata map: " << t2;
-        throw lbann_exception(err.str());
-      }
-      size_t total_bytes = m_metadata[t2].num_elts * m_metadata[t2].num_bytes;
-
-      if (total_bytes) {
-        //as of now I'm only coding for the dataTypes that are in our
-        //current *.bundle files; we may need to add additional later,
-        //if the schema changes
-        switch (t) {
-          case TypeID::CHAR8_STR_ID :
-            bin_writer.write((char*)d.as_char8_str(), total_bytes);
-            break;
-          case TypeID::FLOAT64_ID :
-            bin_writer.write((char*)d.as_float64_ptr(), total_bytes);
-            break;
-          case TypeID::UINT64_ID :
-            bin_writer.write((char*)d.as_uint64_ptr(), total_bytes);
-            break;
-          case TypeID::INT64_ID :
-            bin_writer.write((char*)d.as_int64_ptr(), total_bytes);
-            break;
-          default:
-            err << __FILE__ << " " << __LINE__ << " :: "
-                << "get_value() failed; dType: " << d.dtype().name()
-                << " " << (std::to_string(j) + '/' + t2);
-            throw lbann_exception(err.str());
-        }
-      }
-    }
-  }
-  bin_writer.close();
-  std::cerr << "wrote " << fn << "\n";
-
-  #if 0
-  //write scalar keys
-  fn = base_dir + "/scalar_keys.txt";
-  std::ofstream out_scalars(fn.c_str());
-  if (!out_scalars.good()) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to open " << fn << " for writing";
-    throw lbann_exception(err.str());
-  }
-  const conduit::Node & n_scalar = head["0/outputs/scalars"];
-  conduit::NodeConstIterator itr5 = n_scalar.children();
-  while (itr5.has_next()) {
-    itr5.next();
-    out_scalars << itr5.name() << "\n";
-  }
-  out_scalars.close();
-  std::cerr << "wrote " << fn << "\n";
-  #endif
-
-  //write input keys
-  fn = base_dir + "/input_keys.txt";
-  std::ofstream out_inputs(fn.c_str());
-  if (!out_inputs.good()) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to open " << fn << " for writing";
-    throw lbann_exception(err.str());
-  }
-  const conduit::Node & n_input = head["0/inputs"];
-  conduit::NodeConstIterator itr6 = n_input.children();
-  while (itr6.has_next()) {
-    itr6.next();
-    out_inputs << itr6.name() << "\n";
-  }
-  out_inputs.close();
-  std::cerr << "wrote " << fn << "\n";
-
-  //write the parent_to_child mapping
-  fn = base_dir + "/parent_to_child.txt";
-  std::ofstream out(fn.c_str());
-  if (!out.good()) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to open " << fn << " for writing";
-    throw lbann_exception(err.str());
-  }
-  for (auto t : m_parent_to_children) {
-    out << t.first << " ";
-    for (auto t2 : t.second) {
-      out << t2 << " ";
-    }
-    out << "\n";
-  }
-  out.close();
-  std::cerr << "wrote " << fn << "\n";
-
-  std::cerr << "finished conversion!\n";
-}
-
-void jag_io::load(std::string base_dir) {
-
-  std::stringstream err;
-  std::string fn;
-  std::ifstream in;
-  std::string key;
-
-  // open the binary data file
-  fn = base_dir + "/data.bin";
-  m_data_stream = new std::ifstream(fn.c_str(), std::ios::in | std::ios::binary);
-  if (! m_data_stream->good()) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to open " << fn << " for reading";
-    throw lbann_exception(err.str());
-  }
-
-  // fill in parent_to_child map
-  fn = base_dir + "/parent_to_child.txt";
-  in.open(fn.c_str());
-  if (!in.good()) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to open " << fn << " for reading";
-    throw lbann_exception(err.str());
-  }
-  std::string parent;
-  std::string child;
-  while (in >> parent >> child) {
-    m_parent_to_children[parent].insert(child);
-  }
-  in.close();
-
-  // open metadata file
-  fn = base_dir + "/metadata.txt";
-  in.open(fn.c_str(), std::ios::in | std::ios::binary);
-  if (!in.good()) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to open " << fn << " for reading";
-    throw lbann_exception(err.str());
-  }
-
-  // get num_samples, etc.
-  in >> m_num_samples;
-  in >> m_sample_offset;
-
-  // fill in the metadata map
-  uint64 dType;
-  int num_elts;
-  int bytes_per_elt;
-  size_t offset;
-  while (in >> dType >> num_elts >> bytes_per_elt >> offset >> key) {
-    m_metadata[key] = MetaData((TypeID)dType, num_elts, bytes_per_elt, offset);
-    m_keys.push_back(key);
-  }
-  in.close();
-
-  #if 0
-  // fill in m_scalar_keys
-  fn = base_dir + "/scalar_keys.txt";
-  in.open(fn.c_str());
-  if (!in.good()) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to open " << fn << " for reading";
-    throw lbann_exception(err.str());
-  }
-  while (in >> key) {
-    m_scalar_keys.push_back(key);
-  }
-  in.close();
-  #endif
-
-  // fill in m_input_keys
-  fn = base_dir + "/input_keys.txt";
-  in.open(fn.c_str());
-  if (!in.good()) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to open " << fn << " for reading";
-    throw lbann_exception(err.str());
-  }
-  while (in >> key) {
-    m_input_keys.push_back(key);
-  }
-  in.close();
-}
-
-const std::unordered_set<std::string> & jag_io::get_children(std::string parent) const {
-  std::unordered_map<std::string, std::unordered_set<std::string>>::const_iterator t;
-  t = m_parent_to_children.find(parent);
-  if (t == m_parent_to_children.end()) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to find " << parent << " in m_parent_to_children map\n"
-        << "m_parent_to_children.size(): " << m_parent_to_children.size();
-    throw lbann_exception(err.str());
-  }
-  return (*t).second;
-}
-
-size_t jag_io::get_sample_id(std::string node_name) const {
-  std::stringstream err;
-  size_t j = node_name.find('/');
-  if (j == std::string::npos) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to find '/' in node_name: " << node_name;
-    throw lbann_exception(err.str());
-  }
-  for (size_t i=0; i<j; i++) {
-    if (! isdigit(node_name[i])) {
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "isdigit(" << node_name << "[" << i << "] failed";
-      throw lbann_exception(err.str());
-    }
-  }
-  return atoi(node_name.data());
-}
-
-std::string jag_io::get_metadata_key(std::string node_name) const {
-  std::stringstream err;
-  size_t j = node_name.find('/');
-  if (j == std::string::npos) {
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "failed to find '/' in node_name: " << node_name;
-    throw lbann_exception(err.str());
-  }
-  std::string key = node_name.substr(j+1);
-  key_exists(key);
-  return key;
-}
-
-void jag_io::key_exists(std::string key) const {
-  if (m_metadata.find(key) == m_metadata.end()) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: "
-        << "the key: " << key << " is not valid for the metadata map";
-    throw lbann_exception(err.str());
-  }
-}
-
-void jag_io::get_data(std::string node_name, char * data_out, size_t num_bytes) {
-  std::string key = get_metadata_key(node_name);
-  size_t sample_id = get_sample_id(node_name);
-  size_t offset = m_sample_offset * sample_id + m_metadata[key].offset;
-  m_data_stream->seekg(offset);
-  m_data_stream->read(data_out, num_bytes);
-}
-
-const std::vector<std::string>& jag_io::get_input_choices() const {
-  return m_input_keys;
-}
-
-/*
-const std::vector<std::string>& jag_io::get_scalar_choices() const {
-  return m_scalar_keys;
-}
-*/
-
-
-void jag_io::get_metadata(std::string key, size_t &num_elts_out, size_t &bytes_per_elt_out, size_t &total_bytes_out, conduit::DataType::TypeID &type_out) {
-  num_elts_out = m_metadata[key].num_elts;
-  bytes_per_elt_out = m_metadata[key].num_bytes;
-  total_bytes_out = num_elts_out*bytes_per_elt_out;
-  type_out = m_metadata[key].dType;
-}
-
-bool jag_io::has_key(std::string key) const {
-  if (m_metadata.find(key) == m_metadata.end()) {
-    return false;
-  }  
-  return true;
-}
-
-size_t jag_io::get_offset(std::string node_name) {
-  std::string key = get_metadata_key(node_name);
-  size_t sample_id = get_sample_id(node_name);
-  return sample_id*m_sample_offset + m_metadata[key].offset;
-}
-
-void jag_io::print_metadata() {
-  for (auto key : m_keys) {
-    std::cerr << "type/num_elts/bytes_per_elt: " << m_metadata[key].dType
-              << " " << m_metadata[key].num_elts << " " << m_metadata[key].num_bytes << " offset: " << m_metadata[key].offset << " :: " << key << "\n";
-  }
-}
-
-}  // namespace lbann
-
-#endif //#ifdef LBANN_HAS_CONDUIT
-
diff --git a/src/data_store/jag_store.cpp b/src/data_store/jag_store.cpp
deleted file mode 100644
index 293b57d4c80..00000000000
--- a/src/data_store/jag_store.cpp
+++ /dev/null
@@ -1,999 +0,0 @@
-#include "lbann/data_store/jag_store.hpp"
-
-#ifdef LBANN_HAS_CONDUIT
-
-#include "lbann/utils/exception.hpp"
-#include "lbann/utils/options.hpp"
-#include "conduit/conduit_relay.hpp"
-#include "conduit/conduit_relay_io_hdf5.hpp"
-#include "lbann/data_readers/data_reader_jag_conduit_hdf5.hpp"
-#include "lbann/utils/glob.hpp"
-#include <cmath>
-#include <limits>
-#include "hdf5.h"
-#include <unordered_set>
-
-namespace lbann {
-
-jag_store::jag_store()
-  : m_image_size(0),
-    m_comm(nullptr),
-    m_master(false),
-    m_max_samples(INT_MAX)
-  {
-  }
-
-void load_keys(std::vector<std::string> &v, const std::string &keys) {
-   std::stringstream s;
-   s << keys;
-   std::string key;
-   while (s >> key) {
-     v.push_back(key);
-   }
-}
-
-void jag_store::load_scalars_to_use(const std::string &keys) {
-  m_scalars_to_use.clear();
-  load_keys(m_scalars_to_use, keys);
-}
-
-void jag_store::load_inputs_to_use(const std::string &keys) {
-  m_inputs_to_use.clear();
-  load_keys(m_inputs_to_use, keys);
-}
-
-void jag_store::load_image_views_to_use(const std::string &keys) {
-  m_image_views_to_use.clear();
-  size_t last = 0;
-  while (true) {
-    size_t j1 = keys.find('(', last);
-    size_t j2 = keys.find(')', last);
-    if (j1 == std::string::npos || j2 == std::string::npos) {
-      break;
-    }
-    std::string key = keys.substr(j1, j2-j1+1);
-    m_image_views_to_use.push_back(key);
-    last = j2+1;
-  }
-}
-
-void jag_store::load_image_channels_to_use(const std::string &keys) {
-   std::stringstream s;
-   s << keys;
-   int channel;
-   while (s >> channel) {
-     m_image_channels_to_use.push_back(channel);
-   }
-}
-
-void jag_store::build_conduit_index(const std::vector<std::string> &filenames) {
-  options *opts = options::get();
-  if (!opts->has_string("base_dir")) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: you must pass --base_dir=<string> on the cmd line");
-  }
-  const std::string base_dir = opts->get_string("base_dir");
-  const std::string output_fn = opts->get_string("build_conduit_index");
-  std::stringstream ss;
-  ss << output_fn << "." << m_rank_in_world;
-  std::ofstream out(ss.str().c_str());
-  if (!out.good()) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + output_fn + " for writing");
-  }
-  if (m_master) std::cerr << "writing index file: " << output_fn << "\n";
-  if (m_rank_in_world == 0) {
-    out << base_dir << "\n";
-  }
-  if (m_master) std::cerr << "base dir: " << base_dir << "\n";
-
-  int global_num_samples = 0;
-  for (size_t j=m_rank_in_world; j<filenames.size(); j+=m_num_procs_in_world) {
-    out << filenames[j] << " ";
-    std::string fn(base_dir);
-    fn += '/';
-    fn += filenames[j];
-    hid_t hdf5_file_hnd = conduit::relay::io::hdf5_open_file_for_read( fn );
-    std::vector<std::string> cnames;
-    conduit::relay::io::hdf5_group_list_child_names(hdf5_file_hnd, "/", cnames);
-    size_t is_good = 0;
-    size_t is_bad = 0;
-    std::stringstream s5;
-    conduit::Node n_ok;
-    for (size_t h=0; h<cnames.size(); h++) {
-      const std::string key_1 = "/" + cnames[h] + "/performance/success";
-      conduit::relay::io::hdf5_read(hdf5_file_hnd, key_1, n_ok);
-      int success = n_ok.to_int64();
-      if (success == 1) {
-        ++is_good;
-      } else {
-        s5 << h << " ";
-        ++is_bad;
-      }
-    }
-    global_num_samples += is_good;
-    out << is_good << " " << is_bad << " " << s5.str() << "\n";
-    conduit::relay::io::hdf5_close_file(hdf5_file_hnd);
-  }
-  out.close();
-  m_comm->global_barrier();
-
-  int num_samples;
-  MPI_Reduce(&global_num_samples, &num_samples, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
-  //m_comm->reduce<int>(&global_num_samples, 1, 0, m_comm->get_world_comm(), El::mpi::SUM);
-  //
-
-  if (m_master) {
-    std::stringstream s3;
-    s3 << "echo " << num_samples << " " << filenames.size() << " >  num_samples_tmp";
-    system(s3.str().c_str());
-    s3.clear();
-    s3.str("");
-    s3 << "cat num_samples_tmp ";
-    for (int k=0; k<m_num_procs_in_world; k++) {
-      s3 << output_fn << "." << k << " ";
-    }
-    s3 << "> " << output_fn;
-    system(s3.str().c_str());
-    s3.clear();
-    s3.str("");
-    s3 << "chmod 660 " << output_fn;
-    system(s3.str().c_str());
-    s3.clear();
-    s3.str("");
-    s3 << "rm -f num_samples_tmp ";
-    for (int k=0; k<m_num_procs_in_world; k++) {
-      s3 << output_fn << "." << k << " ";
-    }
-    system(s3.str().c_str());
-  }
-  m_comm->global_barrier();
-}
-
-void jag_store::setup_testing() {
-  setup_conduit();
-  setup_binary();
-}
-
-void jag_store::setup(
-  data_reader_jag_conduit_hdf5 *reader,
-  bool num_stores,
-  int my_rank) {
-  double tm1 = get_time();
-
-  m_master = m_comm->am_world_master();
-  options *opts = options::get();
-  m_reader = reader;
-
-  m_max_samples = INT_MAX;
-  if (opts->has_int("max_samples")) {
-    m_max_samples = (size_t)opts->get_int("max_samples");
-  }
-
-  bool has_conduit_filenames = false;
-  if (opts->has_string("conduit_filelist")) {
-    std::string f = opts->get_string("conduit_filelist");
-    std::ifstream in(f.c_str());
-    if (!in) {
-      throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + f + " for reading");
-    }
-    std::string line;
-    while (getline(in, line)) {
-      m_conduit_filenames.push_back(line);
-    }
-    in.close();
-    if (m_max_samples < m_conduit_filenames.size()) {
-      m_conduit_filenames.resize(m_max_samples);
-    }
-    has_conduit_filenames = true;
-  }
-
-  if (m_image_size == 0) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: image_size = 0; probably set_image_size() has not been called");
-  }
-
-  // optionally build an index file, then exit. Each line of the file will
-  // contain a conduit filename, followed by the valid sample_ids in
-  // the conduit file
-  if (opts->has_string("build_conduit_index")) {
-    if (! has_conduit_filenames) {
-      throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: you must pass --conduit_filenames=<string> on the cmd line when building a conduit index");
-    }
-    build_conduit_index(m_conduit_filenames);
-    exit(0);
-  }
-
-  load_variable_names();
-  build_data_sizes();
-  report_linearized_sizes();
-  allocate_memory();
-  load_normalization_values();
-
-  if (!opts->has_int("mode")) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: you must pass --mode=<int> on cmd line, where <int> is 1 (to use conduit files) or 2 or 3 (for testing) (to use binary files)");
-  }
-  m_mode = opts->get_int("mode");
-  if (! (m_mode == 1 || m_mode == 2 || m_mode == 3)) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: you must pass --mode=<int> on cmd line, where <int> is 1 (to use conduit files) or 2 (to use binary files); or 4 (for testing) you passed: " + std::to_string(m_mode));
-  }
-  if (m_master) std::cerr << "Running in mode: " << m_mode << "\n";
-
-  // optionally convert conduit files to our binary format, then exit
-  if (opts->has_string("convert_conduit")) {
-    if (! has_conduit_filenames) {
-      throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: you must pass --conduit_filenames=<string> on the cmd line when converting conduit filenames to binary");
-    }
-    setup_conduit();
-    convert_conduit_to_binary(m_conduit_filenames);
-    exit(0);
-  }
-
-  if (m_mode == 1) {
-    setup_conduit();
-  } else if (m_mode == 2) {
-    setup_binary();
-  } else {
-    setup_testing();
-  }
-
-  if (m_master) {
-    std::cerr << "jag_store::setup time: " << get_time() - tm1 << "; num samples: " << m_num_samples << std::endl;
-  }
-
-  if (m_mode == 3) {
-    test_converted_files();
-    m_comm->global_barrier();
-    exit(0);
-  }
-
-  // optionally compute min/max values, then exit.
-  // This is only needed for one-time computation of normalization values
-  if (opts->has_string("compute_min_max")) {
-    compute_min_max();
-    exit(0);
-  }
-
-  // optionally check bandwidth (sort of), then exit
-  if (opts->has_int("bandwidth")) {
-    if (m_mode == 0) {
-      compute_bandwidth();
-    } else {
-      compute_bandwidth_binary();
-    }
-    exit(0);
-  }
-}
-
-size_t jag_store::get_linearized_data_size() const {
-  size_t n = m_image_views_to_use.size() * m_image_channels_to_use.size() * get_linearized_channel_size()
-           + m_scalars_to_use.size()
-           + m_inputs_to_use.size();
-  return n;
-}
-
-void jag_store::build_data_sizes() {
-  for (size_t i=0; i<get_total_num_channels(); i++) {
-    m_data_sizes.push_back(get_linearized_channel_size());
-  }
-  if (get_linearized_scalar_size() > 0.0) {
-    m_data_sizes.push_back(get_linearized_scalar_size());
-  }
-  if (get_linearized_input_size() > 0.0) {
-    m_data_sizes.push_back(get_linearized_input_size());
-  }
-}
-
-void jag_store::report_linearized_sizes() {
-  if (! m_master) {
-    return;
-  }
-  std::cerr
-    << "===================================================================\n"
-    << "LINEARIZED SIZES REPORT:\n"
-    << "get_linearized_data_size:  " << get_linearized_data_size() << "\n"
-    << "get_linearized_image_size:   " << get_linearized_image_size() << "\n"
-    << "get_linearized_channel_size: " << get_linearized_channel_size() << "\n"
-    << "get_num_channels: " << get_num_channels_per_view() << "\n"
-    << "get_linearized_scalar_size:  " << get_linearized_scalar_size() << "\n"
-    << "get_linearized_input_size:   " << get_linearized_input_size() << "\n"
-    << "get_num_img_srcs:            " << get_num_img_srcs() << "\n"
-    << "sizes vector: ";
-  size_t total = 0;
-  for (auto t : m_data_sizes) {
-    std::cerr << t << " ";
-    total += t;
-  }
-  std::cerr << "\n";
-  std::cerr << "total, from m_data_sizes; should be same as above: "
-    << total << "\n"
-    << "===================================================================\n";
-}
-
-void jag_store::load_data_binary(int data_id, int tid) {
-  const int file_idx = m_sample_map[data_id].first;
-//  std::string fn = m_binary_filenames[file_idx];
-  const int sample_idx = m_sample_map[data_id].second;
-
- // std::ifstream in(fn.c_str(), std::ios::out | std::ios::binary);
-  /*
-  if (!in.good()) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open: " + fn + " for reading; data_id: " + std::to_string(data_id) + " tid: " + std::to_string(tid));
-  }
-  */
-
-//  in.seekg(sample_idx*m_sample_len);
-  m_streams[tid][file_idx]->seekg(sample_idx*m_sample_len);
-  m_streams[tid][file_idx]->read((char*)m_scratch[tid].data(), m_sample_len);
-  //in.read((char*)m_scratch[tid].data(), m_sample_len);
-//  in.close();
-
-//  size_t offset = sample_idx * m_sample_len;
-
-//  in.seekg(offset);
- // in.read((char*)m_scratch[tid].data(), m_sample_len);
-
-  for (size_t j=0; j<m_inputs_to_use.size(); j++) {
-    check_entry(m_inputs_to_use[j]);
-    memcpy((void*)(m_data_inputs[tid].data()+j), (void*)(m_scratch[tid].data()+m_key_map[m_inputs_to_use[j]]), 8);
-  }
-  for (size_t j=0; j<m_data_inputs[tid].size(); j++) {
-    m_data_inputs[tid][j] = m_data_inputs[tid][j]*m_normalize_inputs[j].first - m_normalize_inputs[j].second;
-  }
-
-  for (size_t j=0; j<m_scalars_to_use.size(); j++) {
-    check_entry(m_scalars_to_use[j]);
-    memcpy((void*)(m_data_scalars[tid].data()+j), (void*)(m_scratch[tid].data()+m_key_map[m_scalars_to_use[j]]), 8);
-  }
-  for (size_t j=0; j<m_data_scalars[tid].size(); j++) {
-    m_data_scalars[tid][j] = m_data_scalars[tid][j]*m_normalize_scalars[j].first - m_normalize_scalars[j].second;
-  }
-
-  size_t y = 0;
-  for (size_t view=0; view<m_image_views_to_use.size(); view++) {
-    check_entry(m_image_views_to_use[view]);
-    for (size_t k=0; k<m_image_channels_to_use.size(); k++) {
-      int channel = m_image_channels_to_use[k];
-
-      memcpy((void*)m_data_images[tid][y].data(),
-             (void*)(m_scratch[tid].data()+m_key_map[m_image_views_to_use[view]] + channel*get_linearized_channel_size()*sizeof(data_reader_jag_conduit_hdf5::ch_t)), get_linearized_channel_size());
-      for (size_t x=0; x<m_data_images[tid][y].size(); x++) {
-        m_data_images[tid][y][x] = m_data_images[tid][y][x]*m_normalize_views[channel].first - m_normalize_views[channel].second;
-      }
-      ++y;
-    }
-  }
-}
-
-void jag_store::load_data_conduit(int data_id, int tid) {
-  //map data_id to the correct file and sample_id
-  int idx = m_data_id_to_conduit_filename_idx[data_id];
-  const std::string &filename = m_conduit_filenames[idx];
-  const std::string sample_id = m_data_id_to_sample_id[data_id];
-  hid_t hdf5_file_hnd = conduit::relay::io::hdf5_open_file_for_read(filename);
-
-  conduit::Node node;
-
-  size_t j = 0;
-  for (auto input_name : m_inputs_to_use) {
-    const std::string key = sample_id + "/inputs/" + input_name;
-    conduit::relay::io::hdf5_read(hdf5_file_hnd, key, node);
-    //this is fragile; will break if input_t changes
-    double d = node.to_float64();
-    d = d*m_normalize_inputs[j].first - m_normalize_inputs[j].second;
-    m_data_inputs[tid][j++] = d;
-  }
-
-  j = 0;
-  for (auto scalar_name : m_scalars_to_use) {
-    const std::string key = sample_id + "/outputs/scalars/" + scalar_name;
-    conduit::relay::io::hdf5_read(hdf5_file_hnd, key, node);
-    //this is fragile; will break if scalar_t changes
-    double d = node.to_float64();
-    d = d*m_normalize_scalars[j].first - m_normalize_scalars[j].second;
-    m_data_scalars[tid][j++] = d;
-  }
-
-  j = 0;
-  for (auto image_name : m_image_views_to_use) {
-    const std::string key = sample_id + "/outputs/images/" + image_name + "/0.0/emi";
-    conduit::relay::io::hdf5_read(hdf5_file_hnd, key, node);
-    conduit::float32_array emi = node.value();
-    const size_t image_size = emi.number_of_elements();
-    //this is fragile; will break if ch_t changes
-    for (size_t h=0; h<m_image_channels_to_use.size(); h++) {
-      int channel = m_image_channels_to_use[h];
-      int k = 0;
-      for (size_t i=channel; i<image_size; i+=4) {
-        float d = emi[i];
-        d = d*m_normalize_views[channel].first - m_normalize_views[channel].second;
-        m_data_images[tid][j][k++] = d;
-      }
-    }
-    ++j;
-  }
-  conduit::relay::io::hdf5_close_file(hdf5_file_hnd);
-}
-
-void jag_store::open_binary_file_for_output(const std::string &dir) {
-  if (m_binary_output_file.is_open()) {
-    m_binary_output_file.close();
-    m_binary_output_file_names.close();
-    ++m_global_file_idx;
-  }
-
-  std::stringstream s;
-  s << dir << "/" << BINARY_FILE_BASENAME << "_" << m_global_file_idx << ".bin";
-  m_binary_output_file.open(s.str().c_str(), std::ios::out | std::ios::binary);
-  if (!m_binary_output_file) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + s.str() + " for writing");
-  }
-  m_binary_output_filename = s.str();
-  std::cerr << "opened for writing: " << s.str() << "\n";
-
-  s.clear();
-  s.str("");
-  s << dir << "/" << BINARY_FILE_BASENAME << "_" << m_global_file_idx << "_names.txt";
-  m_binary_output_file_names.open(s.str());
-  if (!m_binary_output_file_names) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + s.str() + " for writing");
-  }
-  std::cerr << "opened for writing: " << s.str() << "\n";
-}
-
-void jag_store::write_binary(const std::vector<std::string> &filenames, const std::string &dir) {
-  if (m_master) std::cerr << "starting jag_store::write_binary\n";
-  options *opts = options::get();
-  const std::string output_dir = opts->get_string("convert_conduit");
-
-  m_global_file_idx = 0;
-  m_num_converted_samples = 0;
-  m_binary_output_filename = "";
-  open_binary_file_for_output(output_dir);
-
-  size_t num_samples_written = 0;
-  std::string fn;
-  for (size_t k=0; k<filenames.size(); ++k) {
-    std::stringstream s2;
-    s2 << filenames[k];
-    s2 >> fn;
-    hid_t hdf5_file_hnd = conduit::relay::io::hdf5_open_file_for_read( fn );
-    std::vector<std::string> cnames;
-    conduit::relay::io::hdf5_group_list_child_names(hdf5_file_hnd, "/", cnames);
-    if (m_master) std::cerr << "  num samples this file: " << cnames.size() << "\n";
-
-    conduit::Node n_ok;
-    conduit::Node node;
-    for (auto sample_name : cnames) {
-      const std::string key_1 = "/" + sample_name + "/performance/success";
-      conduit::relay::io::hdf5_read(hdf5_file_hnd, key_1, n_ok);
-      int success = n_ok.to_int64();
-      if (success == 1) {
-        m_binary_output_file_names << sample_name << "\n";
-        for (auto input_name : m_inputs_to_use) {
-          const std::string key = "/" + sample_name + "/inputs/" + input_name;
-          conduit::relay::io::hdf5_read(hdf5_file_hnd, key, node);
-          //this is fragile; will break if input_t changes
-          double tmp = node.to_float64();
-          m_binary_output_file.write((char*)&tmp, sizeof(data_reader_jag_conduit_hdf5::input_t));
-        }
-
-        for (auto scalar_name : m_scalars_to_use) {
-          const std::string key = "/" + sample_name + "/outputs/scalars/" + scalar_name;
-          conduit::relay::io::hdf5_read(hdf5_file_hnd, key, node);
-          //this is fragile; will break if scalar_t changes
-          double tmp = node.to_float64();
-          m_binary_output_file.write((char*)&tmp, sizeof(data_reader_jag_conduit_hdf5::scalar_t));
-        }
-
-        for (auto image_name : m_image_views_to_use) {
-          const std::string key = "/" + sample_name + "/outputs/images/" + image_name + "/0.0/emi";
-          conduit::relay::io::hdf5_read(hdf5_file_hnd, key, node);
-          conduit::float32_array emi = node.value();
-          const size_t image_size = emi.number_of_elements();
-          //this is fragile; will break if ch_t changes
-          for (int channel=0; channel<4; channel++) {
-            for (size_t j=channel; j<image_size; j+=4) {
-              m_binary_output_file.write((char*)&emi[j], sizeof(data_reader_jag_conduit_hdf5::ch_t));
-
-            }
-          }
-        }
-        ++m_num_converted_samples;
-        if (m_num_converted_samples >= m_max_samples) {
-          conduit::relay::io::hdf5_close_file(hdf5_file_hnd);
-          goto EARLY_EXIT;
-          break;
-        }
-        ++num_samples_written;
-        if (num_samples_written == MAX_SAMPLES_PER_BINARY_FILE) {
-          num_samples_written = 0;
-          open_binary_file_for_output(output_dir);
-        }
-      }
-    }
-    conduit::relay::io::hdf5_close_file(hdf5_file_hnd);
-  }
-EARLY_EXIT :
-  m_binary_output_file.close();
-  m_binary_output_file_names.close();
-  if (m_master) std::cerr << "LEAVING jag_store::write_binary\n";
-}
-
-void jag_store::read_key_map(const std::string &filename) {
-  if (m_master) std::cerr << "starting jag_store::read_key_map; opening file: " << filename << "\n";
-  std::ifstream in(filename.c_str());
-  if (!in.good()) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open: " + filename);
-  }
-
-  std::string line;
-  getline(in, line);
-  getline(in, line);
-  getline(in, line);
-
-  std::string key;
-  int n;
-  for (int k=0; k<3; k++) {
-    getline(in, line);
-    std::stringstream s;
-    s << line;
-    s >> key >> n;
-    for (int j=0; j<n; j++) {
-      getline(in, line);
-      size_t j2 = line.rfind(" ");
-      if (j2 == std::string::npos) {
-      throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to rfind space for this line: " + line);
-      }
-      int k2 = atoi(&line[j2+1]);
-      m_key_map[line.substr(0, j2)] = k2;
-    }
-  }
-  getline(in, line);
-  if (line.find("TOTAL") == std::string::npos) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: missing TOTAL field");
-  }
-  size_t j3 = line.rfind(" ");
-  if (j3 == std::string::npos) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to rfind space for this line: " + line);
-  }
-  m_sample_len = atoi(&line[j3+1]);
-  in.close();
-
-  if (m_master) {
-    for (auto t : m_key_map) {
-      std::cerr << "key: " << t.first << " offset: " << t.second << "\n";
-    }
-  }
-}
-
-void jag_store::write_binary_metadata(std::string dir) {
-  std::stringstream s;
-  s << dir << "/" << METADATA_FN;
-  std::ofstream out(s.str().c_str());
-  if (m_master) std::cerr << "writing metadata for file: " << s.str() << "\n";
-  out << "input_t " << sizeof(data_reader_jag_conduit_hdf5::input_t) << "\n";
-  out << "scalar_t " << sizeof(data_reader_jag_conduit_hdf5::scalar_t) << "\n";
-  out << "ch_t " << sizeof(data_reader_jag_conduit_hdf5::ch_t) << "\n";
-
-  out << "INPUTS " << m_inputs_to_use.size() << "\n";
-  size_t offset = 0;
-  for (auto t : m_inputs_to_use) {
-    out << t << " " << offset << "\n";
-    offset += sizeof(data_reader_jag_conduit_hdf5::input_t);
-  }
-
-  out << "SCALARS " << m_scalars_to_use.size() << "\n";
-  for (auto t : m_scalars_to_use) {
-    out << t << " " << offset << "\n";
-    offset += sizeof(data_reader_jag_conduit_hdf5::scalar_t);
-  }
-
-  out << "VIEWS " << m_image_views_to_use.size() << "\n";
-  for (auto t : m_image_views_to_use) {
-    out << t << " " << offset << "\n";
-    offset += sizeof(data_reader_jag_conduit_hdf5::ch_t)*128*128; //magic number!
-  }
-  out << "TOTAL " << offset << "\n";
-  out.close();
-}
-
-void jag_store::convert_conduit_to_binary(const std::vector<std::string> &conduit_filenames) {
-  m_num_converted_samples = 0;
-
-  if (m_comm->get_procs_in_world() != 1) {
-      throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: you must run convert_conduit with a single processor");
-  }
-
-  options *opts = options::get();
-  std::string output_dir = opts->get_string("convert_conduit");
-  if (m_master) {
-    char b[128];
-    sprintf(b, "mkdir --mode=770 -p %s", output_dir.c_str());
-    system(b);
-    write_binary_metadata(output_dir);
-  }
-  write_binary(conduit_filenames, output_dir);
-}
-
-void jag_store::load_variable_names() {
-  load_inputs_to_use(m_reader->m_input_keys);
-  load_scalars_to_use(m_reader->m_scalar_keys);
-  load_image_views_to_use(m_reader->m_image_views);
-  load_image_channels_to_use(m_reader->m_image_channels);
-
-  if (m_master) {
-    std::cerr << "using these inputs:\n";
-    for (auto t : m_inputs_to_use) {
-      std::cerr << "    " << t << "\n";
-    }
-    std::cerr << "\nusing these scalars:\n";
-    for (auto t : m_scalars_to_use) {
-      std::cerr << "    " << t << "\n";
-    }
-    std::cerr << "\nusing these views:\n";
-    for (auto t : m_image_views_to_use) {
-      std::cerr << "    " << t << "\n";
-    }
-    std::cerr << "\nusing these image channels: ";
-    for (auto t : m_image_channels_to_use) {
-      std::cerr << t << " ";
-    }
-    std::cerr << "\n";
-  }
-}
-
-void jag_store::allocate_memory() {
-  size_t nthreads = omp_get_max_threads();
-  if (m_master) std::cerr << "starting jag_store::allocate_memory; nthreads: " << nthreads << "\n";
-  m_data_inputs.resize(nthreads);
-  m_data_scalars.resize(nthreads);
-  for (size_t j=0; j<nthreads; j++) {
-    m_data_inputs[j].resize(m_inputs_to_use.size());
-    m_data_scalars[j].resize(m_scalars_to_use.size());
-  }
-
-  m_data_images.resize(nthreads);
-  for (size_t j=0; j<m_data_images.size(); j++) {
-    m_data_images[j].resize(get_total_num_channels());
-    for (size_t i=0; i<m_data_images[j].size(); i++) {
-      m_data_images[j][i].resize(get_linearized_channel_size());
-    }
-  }
-}
-
-void jag_store::test_converted_files() {
-  int np = m_comm->get_procs_in_world();
-  if (np != 1) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: mode 3 (test converted binary files) must be run with a single process");
-  }
-  std::cerr << "\nstarting jag_store::test_converted_files()\n";
-
-  std::vector<std::vector<data_reader_jag_conduit_hdf5::input_t>> inputs;
-  std::vector<std::vector<data_reader_jag_conduit_hdf5::scalar_t>> scalars;
-  std::vector<std::vector<std::vector<data_reader_jag_conduit_hdf5::ch_t>>> images;
-
-  int tid = 0;
-  options *opts = options::get();
-  if (!opts->has_int("num_to_test")) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: when running in test mode you must pass --num_to_test=<int> on the cmd line");
-  }
-  size_t num_to_test = opts->get_int("num_to_test");
-  std::cerr << "\nnum to test: " << num_to_test << "\n";
-  for (size_t data_id=0; data_id<num_to_test; data_id++) {
-
-    // sanity checks
-    if (data_id >= m_data_id_to_sample_id.size()) {
-      throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: data_id: " + std::to_string(data_id) + " >= m_data_id_to_sample_id.size(): " + std::to_string(m_data_id_to_sample_id.size()));
-    }
-
-    const std::string sample_id = m_data_id_to_sample_id[data_id];
-
-    if (m_sample_id_to_global_idx.find(sample_id) == m_sample_id_to_global_idx.end()) {
-    std::cerr << "discarding " << sample_id << " since it's not found in m_sample_id_to_global_idx\n";
-      //throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to find " + sample_id + " in m_sample_id_to_global_idx; data_id: " + std::to_string(data_id));
-    }
-
-else {
-    int global_id = m_sample_id_to_global_idx[sample_id];
-
-    std::cerr << "testing sample: " << sample_id << " data_id: " << data_id << " global_id: " << global_id << "\n";
-
-    load_data_conduit(data_id, tid);
-    inputs = m_data_inputs;
-    scalars = m_data_scalars;
-    images = m_data_images;
-
-    load_data_binary(global_id, tid);
-
-    if (inputs != m_data_inputs) {
-      std::cerr << "inputs for data_id " << data_id << " failed.\n"
-                << "values from conduit: ";
-      for (auto t : inputs[tid]) std::cerr << t << " ";
-      std::cerr << "\nvalues from binary:  ";
-      for (auto t : m_data_inputs[tid]) std::cerr << t << " ";
-      std::cerr << "\n";
-      exit(9);
-    }
-    if (scalars != m_data_scalars) {
-      std::cerr << "scalars != m_data_scalars\n";
-      exit(9);
-    }
-
-    std::cerr << "1. num channels: " << images[0].size() << "\n";
-    std::cerr << "2. num channels: " << m_data_images[0].size() << "\n";
-    for (size_t j=0; j<images[0].size(); j++) {
-      if (images[0][j] != m_data_images[0][j]) {
-        std::cerr << "FAILED: images[0][" << j << "] != m_data_images[0][" << j << "]\n";
-        for (size_t x=0; x<images[0][j].size(); x++) {
-          if (images[0][j][x] != m_data_images[0][j][x]) {
-            bool testme = images[0][j][x] - m_data_images[0][j][x] <  std::numeric_limits<float>::epsilon();
-            std::cerr << x << " " << images[0][j][x] << " " << m_data_images[0][j][x] << "  epsilon? " << testme << "\n";
-          }
-        }
-        //exit(9);
-      } else {
-        std::cerr << "PASSED: images[0][" << j << "] == m_data_images[0][" << j << "]\n";
-      }
-    }
-  }
-  }
-  std::cerr << "\ntested " << m_max_samples << "; all passed\n";
-}
-
-void jag_store::setup_conduit() {
-  if (m_master) std::cerr << "starting jag_store::setup_conduit\n";
-
-  std::string filename;
-  std::string sample_id;
-  int j = -1;
-  std::vector<std::string> tmp;
-  for (auto t : m_conduit_filenames) {
-    if (m_data_id_to_sample_id.size() == m_max_samples) {
-      break;
-    }
-    ++j;
-    std::stringstream s(t);
-    s >> filename;
-    tmp.push_back(filename);
-    while (s >> sample_id) {
-      m_data_id_to_conduit_filename_idx.push_back(j);
-      m_data_id_to_sample_id.push_back(sample_id);
-      if (m_data_id_to_sample_id.size() == m_max_samples) {
-        break;
-      }
-    }
-  }
-  m_conduit_filenames = tmp;
-  m_num_samples = m_data_id_to_sample_id.size();
-  if (m_master) std::cerr << "finished reading " << m_num_samples << " sample names\n";
-}
-
-void jag_store::setup_binary() {
-  if (m_master) std::cerr << "starting jag_store::setup_binary\n";
-  options *opts = options::get();
-  if (!opts->has_string("binary_filelist")) {
-      throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: you must pass --binary_filelist=<string> on the cmd line");
-  }
-
-  const std::string fn = opts->get_string("binary_filelist");
-  std::ifstream in(fn.c_str());
-  if (!in.good()) {
-      throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + fn + " for reading");
-  }
-  if (m_master) std::cerr << "opened " << fn << " for reading\n";
-
-  std::string filename;
-  size_t num_files = 0;
-  while (in >> filename) {
-    ++num_files;
-  }
-  in.close();
-
-  in.open(fn.c_str());
-  size_t nthreads = omp_get_max_threads();
-  m_streams.resize(nthreads);
-  for (size_t j=0; j<nthreads; j++) {
-    m_streams[j].resize(num_files);
-  }
-
-  size_t global_idx = 0;
-  int file_idx = -1;
-  while (in >> filename) {
-    if (m_master) std::cerr << "next binary filename: " << filename << "\n";
-    ++file_idx;
-
-    for (size_t tid=0; tid<nthreads; tid++) {
-      m_streams[tid][file_idx] = new std::ifstream(filename.c_str(), std::ios::out | std::ios::binary);
-    }
-
-    if (global_idx == m_max_samples) {
-      break;
-    }
-
-    m_binary_filenames.push_back(filename);
-
-    size_t j = filename.rfind(".bin");
-    if (j == std::string::npos) {
-      throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: t.rfind('.bin') failed for filename: " + filename);
-    }
-
-    std::stringstream s;
-    s << filename.substr(0, j) << "_names.txt";
-    std::ifstream in2(s.str().c_str());
-    if (!in2.good()) {
-      throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + s.str() + " for reading");
-    }
-    if (m_master) std::cerr << "opened " << s.str() << " for reading\n";
-
-    size_t local_idx = 0;
-    std::string sample_id;
-    while (in2 >> sample_id) {
-      //maps global index (shuffled index subscript) to <file_index,
-      //num sample within the file
-      m_sample_map[global_idx] = std::make_pair(file_idx, local_idx++);
-      m_sample_id_to_global_idx[sample_id] = global_idx;
-
-      //maps global index (shuffled index subscript) to sample id
-      m_sample_id_map[global_idx] = sample_id;
-
-      ++global_idx;
-      if (global_idx == m_max_samples) {
-        break;
-      }
-    }
-    in2.close();
-  }
-  m_num_samples = m_sample_map.size();
-  if (m_master) std::cerr << "num samples: " << m_num_samples << "\n";
-
-  size_t jj = filename.rfind('/');
-  if (jj == std::string::npos) {
-    throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: " + filename + ".rfind('/') failed");
-  }
-  std::string key_fn = filename.substr(0, jj);
-  key_fn += "/metadata.txt";
-  read_key_map(key_fn);
-
-  m_scratch.resize(nthreads);
-  for (size_t j=0; j<nthreads; j++) {
-    for (size_t i=0; i<m_scratch.size(); i++) {
-      m_scratch[i].resize(m_sample_len);
-    }
-  }
-}
-
-void jag_store::compute_bandwidth_binary() {
-  if (m_master) std::cerr << "starting bandwidth test (binary); num_samples: " << m_num_samples << " m_max_samples: " << m_max_samples << "\n";
-  double tm1 = get_time();
-  int me = get_rank_in_world();
-  int np = m_comm->get_procs_in_world();
-
-  LBANN_OMP_PARALLEL
-  {
-    const auto threadId = omp_get_thread_num();
-
-    LBANN_OMP_PARALLEL_FOR
-    for (size_t j = me; j<m_max_samples; j += np) {
-      if (j % 1000 == 0 && m_master) std::cerr << "processed " << j/1000 << "K samples\n";
-      load_data_binary(j, threadId);
-    }
-  }
-  std::cerr << "P_" << me << " finished; time: " << get_time() - tm1 << "\n";
-  m_comm->global_barrier();
-  if (m_master) std::cerr << "time to load all data: " << get_time() - tm1 << "\n";
-}
-
-void jag_store::compute_bandwidth() {
-  if (m_master) std::cerr << "starting bandwidth test\n";
-  double tm1 = get_time();
-  int me = get_rank_in_world();
-  int np = m_comm->get_procs_in_world();
-  size_t n = 0;
-  for (size_t j = me; j<m_data_id_to_conduit_filename_idx.size(); j+= np) {
-    if (j % 1000 == 0 && m_master) std::cerr << "processed " << j/1000 << "K samples\n";
-    load_data(j, 0);
-    n += np;
-  }
-  std::cerr << "P_" << me << " finished; time: " << get_time() - tm1 << "\n";
-  m_comm->global_barrier();
-  if (m_master) std::cerr << "time to load all data: " << get_time() - tm1 << "\n";
-}
-
-void jag_store::compute_min_max() {
-  std::vector<double> inputs_max(m_inputs_to_use.size(), DBL_MIN);
-  std::vector<double> inputs_min(m_inputs_to_use.size(), DBL_MAX);
-  std::vector<double> inputs_avg(m_inputs_to_use.size(), 0.);
-  std::vector<double> scalars_max(m_scalars_to_use.size(), DBL_MIN);;
-  std::vector<double> scalars_min(m_scalars_to_use.size(), DBL_MAX);;
-  std::vector<double> scalars_avg(m_scalars_to_use.size(), 0.);;
-
-  for (size_t j = 0; j<m_data_id_to_conduit_filename_idx.size(); j++) {
-    if (j == m_max_samples) {
-      break;
-    }
-    if (j % 1000 == 0) std::cerr << "processed " << j/1000 << "K samples\n";
-    load_data(j, 0);
-    const std::vector<data_reader_jag_conduit_hdf5::input_t> &t1 = fetch_inputs(j, 0);
-    for (size_t h=0; h<t1.size(); h++) {
-      if (j == 0) {
-        inputs_min[h] = t1[h];
-        inputs_max[h] = t1[h];
-        inputs_avg[h] += t1[h];
-      } else {
-        inputs_avg[h] += t1[h];
-        if (t1[h] > inputs_max[h]) inputs_max[h] = t1[h];
-        if (t1[h] < inputs_min[h]) inputs_min[h] = t1[h];
-      }
-    }
-
-    const std::vector<data_reader_jag_conduit_hdf5::scalar_t> &t2 = fetch_scalars(j, 0);
-    for (size_t h=0; h<t2.size(); h++) {
-      scalars_avg[h] += t2[h];
-      if (t2[h] > scalars_max[h]) scalars_max[h] = t2[h];
-      if (t2[h] < scalars_min[h]) scalars_min[h] = t2[h];
-    }
-  }
-  std::cerr << "\n\ninputs min: ";
-  for (auto t : inputs_min) std::cerr << t << " ";
-  std::cerr << "\ninputs max: ";
-  for (auto t : inputs_max) std::cerr << t << " ";
-  std::cerr << "\ninputs avg: ";
-  for (auto t : inputs_avg) std::cerr << t/m_data_id_to_conduit_filename_idx.size() << " ";
-  std::cerr << "\n\n";
-  std::cerr << "\n\nscalars min: ";
-  for (auto t : scalars_min) std::cerr << t << " ";
-  std::cerr << "\nscalars max: ";
-  for (auto t : scalars_max) std::cerr << t << " ";
-  std::cerr << "\nscalars avg: ";
-  for (auto t : scalars_avg) std::cerr << t/m_data_id_to_conduit_filename_idx.size() << " ";
-  std::cerr << "\n\n";
-}
-
-void jag_store::load_normalization_values_impl(
-    std::vector<std::pair<double, double>> &values,
-    const std::vector<std::string> &variables) {
-  values.resize(variables.size());
-  for (size_t j=0; j<values.size(); j++) {
-    values[j] = std::make_pair(1.0, 0.0);
-  }
-
-  options *opts = options::get();
-  if (!opts->has_string("normalization_fn")) {
-    if (m_master) {
-      std::cerr << "\nWARNING! missing --normalization_fn option on command line; inputs, scalars, and possibly images will not be normalized. This is probably a bad thing.\n";
-    }
-  } else {
-    const std::string fn = opts->get_string("normalization_fn");
-    std::unordered_map<std::string, std::pair<double, double>> m;
-    std::ifstream in(fn.c_str());
-    if (!in.good()) {
-      throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + fn + " for reading");
-    }
-    std::string variable;
-    double scale;
-    double bias;
-    while (in >> variable >> scale >> bias) {
-      m[variable] = std::make_pair(scale, bias);
-    }
-    in.close();
-    for (size_t j=0; j<variables.size(); j++) {
-      if (m.find(variables[j]) == m.end()) {
-        throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed find scale and bias value for: " + variables[j]);
-      }
-      values[j] = m[variables[j]];
-    }
-  }
-}
-
-void jag_store::load_normalization_values() {
-  load_normalization_values_impl(m_normalize_inputs, m_inputs_to_use);
-  load_normalization_values_impl(m_normalize_scalars, m_scalars_to_use);
-  std::vector<std::string> channels_to_use;
-  for (int j=0; j<4; j++) {
-    std::string s = "C" + std::to_string(j);
-    channels_to_use.push_back(s);
-  }
-  load_normalization_values_impl(m_normalize_views, channels_to_use);
-}
-
-
-} // namespace lbann
-#endif //ifdef LBANN_HAS_CONDUIT
diff --git a/src/io/data_buffers/generic_io_buffer.cpp b/src/io/data_buffers/generic_io_buffer.cpp
index d6bce674ee7..c45322da2b2 100644
--- a/src/io/data_buffers/generic_io_buffer.cpp
+++ b/src/io/data_buffers/generic_io_buffer.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/io/data_buffers/partitioned_io_buffer.cpp b/src/io/data_buffers/partitioned_io_buffer.cpp
index ba362df402b..a57bfab9e06 100644
--- a/src/io/data_buffers/partitioned_io_buffer.cpp
+++ b/src/io/data_buffers/partitioned_io_buffer.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -34,16 +34,27 @@ lbann::partitioned_io_buffer::partitioned_io_buffer(lbann_comm *comm, int num_pa
   m_data_buffers[execution_mode::testing] = new data_buffer(comm, num_child_layers);
 }
 
+lbann::partitioned_io_buffer::~partitioned_io_buffer() {
+  for (auto& buf : m_data_buffers) {
+    delete buf.second;
+  }
+}
+
 lbann::partitioned_io_buffer::partitioned_io_buffer(const lbann::partitioned_io_buffer& other)
   : generic_io_buffer(other) {
-  for (auto& buf : m_data_buffers) {
-    buf.second = buf.second->copy();
+  for (const auto& buf : other.m_data_buffers) {
+    m_data_buffers[buf.first] = buf.second->copy();
   }
 }
 
+lbann::partitioned_io_buffer* lbann::partitioned_io_buffer::copy() const {
+  return new partitioned_io_buffer(*this);
+}
+
 lbann::partitioned_io_buffer& lbann::partitioned_io_buffer::operator=(const lbann::partitioned_io_buffer& other) {
   generic_io_buffer::operator=(other);
   for (auto& buf : m_data_buffers) {
+    if (buf.second) delete buf.second;
     buf.second = buf.second->copy();
   }
   return *this;
diff --git a/src/io/file_io.cpp b/src/io/file_io.cpp
index 56ab2e76adf..7761fcc53c8 100644
--- a/src/io/file_io.cpp
+++ b/src/io/file_io.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/io/persist.cpp b/src/io/persist.cpp
index 59feb807240..87513600bcd 100644
--- a/src/io/persist.cpp
+++ b/src/io/persist.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/activations/activations.cpp b/src/layers/activations/activations.cpp
index b0e799e6c9b..6b45f0c0b63 100644
--- a/src/layers/activations/activations.cpp
+++ b/src/layers/activations/activations.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/activations/activations.cu b/src/layers/activations/activations.cu
index 6657babcae8..16696e11910 100644
--- a/src/layers/activations/activations.cu
+++ b/src/layers/activations/activations.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/activations/elu.cpp b/src/layers/activations/elu.cpp
index 3ab074f0fc3..4a4083b8a78 100644
--- a/src/layers/activations/elu.cpp
+++ b/src/layers/activations/elu.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/activations/elu.cu b/src/layers/activations/elu.cu
index b24ef0cf51e..f6d6f1581fb 100644
--- a/src/layers/activations/elu.cu
+++ b/src/layers/activations/elu.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/activations/leaky_relu.cpp b/src/layers/activations/leaky_relu.cpp
index 08083a8d80a..e808e35a017 100644
--- a/src/layers/activations/leaky_relu.cpp
+++ b/src/layers/activations/leaky_relu.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/activations/leaky_relu.cu b/src/layers/activations/leaky_relu.cu
index 8adbdd1b778..e87d9a39af0 100644
--- a/src/layers/activations/leaky_relu.cu
+++ b/src/layers/activations/leaky_relu.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/activations/log_softmax.cpp b/src/layers/activations/log_softmax.cpp
index 53e41eac449..737d1ec1045 100644
--- a/src/layers/activations/log_softmax.cpp
+++ b/src/layers/activations/log_softmax.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/activations/log_softmax.cu b/src/layers/activations/log_softmax.cu
index 83f08c99c88..d7ce82c5f9c 100644
--- a/src/layers/activations/log_softmax.cu
+++ b/src/layers/activations/log_softmax.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/activations/softmax.cpp b/src/layers/activations/softmax.cpp
index 90ea3a560b6..e56788fd43f 100644
--- a/src/layers/activations/softmax.cpp
+++ b/src/layers/activations/softmax.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/activations/softmax.cu b/src/layers/activations/softmax.cu
index 8ef0f909ebe..a58d38c760a 100644
--- a/src/layers/activations/softmax.cu
+++ b/src/layers/activations/softmax.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/image/bilinear_resize.cpp b/src/layers/image/bilinear_resize.cpp
index c3ffbb36b19..4e293070d1c 100644
--- a/src/layers/image/bilinear_resize.cpp
+++ b/src/layers/image/bilinear_resize.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/image/bilinear_resize.cu b/src/layers/image/bilinear_resize.cu
index f970875aa50..166b87c753c 100644
--- a/src/layers/image/bilinear_resize.cu
+++ b/src/layers/image/bilinear_resize.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -48,7 +48,7 @@ __global__ void fp_kernel(El::Int num_samples,
   constexpr DataType one = 1;
   const El::Int gid = threadIdx.x + blockIdx.x * blockDim.x;
   const El::Int num_threads = blockDim.x * gridDim.x;
-  
+
   // Stride between interpolation points
   const auto& x_stride = static_cast<DataType>(input_width) / output_width;
   const auto& y_stride = static_cast<DataType>(input_height) / output_height;
@@ -74,7 +74,7 @@ __global__ void fp_kernel(El::Int num_samples,
     const auto input_row = static_cast<El::Int>(cuda::floor(y - half));
     const auto& input_row0 = cuda::max(input_row, El::Int(0));
     const auto& input_row1 = cuda::min(input_row+1, input_height-1);
-    
+
     // Interpolation point relative to input pixel centers
     const auto& unit_x = x - (input_col + half);
     const auto& unit_y = y - (input_row + half);
@@ -106,14 +106,14 @@ __global__ void fp_kernel(El::Int num_samples,
               + pixel01 * unit_x * (one - unit_y)
               + pixel10 * (one - unit_x) * unit_y
               + pixel11 * unit_x * unit_y);
-    
+
   }
-  
-}  
-  
+
 }
 
-  
+}
+
+
 template <>
 void bilinear_resize_layer<data_layout::DATA_PARALLEL, El::Device::GPU>::fp_compute() {
 
@@ -153,7 +153,7 @@ void bilinear_resize_layer<data_layout::DATA_PARALLEL, El::Device::GPU>::fp_comp
         m_height, m_width,
         local_output.Buffer(), local_output.LDim());
   }
-  
+
 }
-  
+
 } // namespace lbann
diff --git a/src/layers/layer.cpp b/src/layers/layer.cpp
index d18d7cd889a..2fabfe77505 100644
--- a/src/layers/layer.cpp
+++ b/src/layers/layer.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/learning/fully_connected.cpp b/src/layers/learning/fully_connected.cpp
index e7728bcf478..730a58631be 100644
--- a/src/layers/learning/fully_connected.cpp
+++ b/src/layers/learning/fully_connected.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -33,7 +33,6 @@ void fully_connected_layer<data_layout::MODEL_PARALLEL, El::Device::CPU>
   ::setup_matrices(const El::Grid& grid) {
   learning_layer::setup_matrices(grid);
   deallocate_matrices();
-  m_linearity_gradient = new MCMRMat<El::Device::CPU>(grid);
   m_bias_gradient = new MCStarMat<El::Device::CPU>(grid);
 }
 
@@ -42,7 +41,6 @@ void fully_connected_layer<data_layout::DATA_PARALLEL, El::Device::CPU>
   ::setup_matrices(const El::Grid& grid) {
   learning_layer::setup_matrices(grid);
   deallocate_matrices();
-  m_linearity_gradient = new StarMat<El::Device::CPU>(grid);
   m_bias_gradient = new StarMat<El::Device::CPU>(grid);
 }
 
@@ -52,8 +50,6 @@ void fully_connected_layer<data_layout::MODEL_PARALLEL, El::Device::GPU>
   ::setup_matrices(const El::Grid& grid) {
   learning_layer::setup_matrices(grid);
   deallocate_matrices();
-  m_linearity_gradient = new MCMRMat<El::Device::GPU>(grid);
-  m_bias_gradient = new MCStarMat<El::Device::GPU>(grid);
 }
 
 template <>
@@ -61,8 +57,6 @@ void fully_connected_layer<data_layout::DATA_PARALLEL, El::Device::GPU>
   ::setup_matrices(const El::Grid& grid) {
   learning_layer::setup_matrices(grid);
   deallocate_matrices();
-  m_linearity_gradient = new StarMat<El::Device::GPU>(grid);
-  m_bias_gradient = new StarMat<El::Device::GPU>(grid);
 }
 #endif // LBANN_HAS_GPU
 
@@ -121,46 +115,49 @@ void fully_connected_layer<data_layout::MODEL_PARALLEL, El::Device::CPU>::bp_com
   auto& local_gradient_wrt_input = gradient_wrt_input.Matrix();
 
   // Compute gradient w.r.t. bias if needed
-  optimizer* bias_optimizer = this->m_weights[1]->get_optimizer();
-  if (m_bias_scaling_factor != DataType(0)
-      && bias_optimizer != nullptr) {
-    El::RowSum(local_gradient_wrt_output,
-               m_bias_gradient->Matrix());
-    bias_optimizer->add_to_gradient_staging(
-      *m_bias_gradient,
-      m_bias_scaling_factor / mini_batch_size);
+  if (m_bias_scaling_factor != DataType(0)) {
+    optimizer* bias_optimizer = this->m_weights[1]->get_optimizer();
+    if (bias_optimizer != nullptr) {
+      El::RowSum(local_gradient_wrt_output,
+                 m_bias_gradient->Matrix());
+      bias_optimizer->add_to_gradient(
+        *m_bias_gradient,
+        m_bias_scaling_factor / mini_batch_size,
+        true);
+    }
   }
 
   // Compute gradient w.r.t. linearity if needed
   // Note: Perform GEMMs independently if possible
   optimizer* linearity_optimizer = this->m_weights[0]->get_optimizer();
   if (linearity_optimizer != nullptr) {
+    DataType dst_scale = DataType(0), gradient_scale = DataType(1);
     if (linearity.DistSize() == 1) {
+      auto& linearity_gradient = linearity_optimizer->get_gradient_buffer(
+        dst_scale, gradient_scale, true);
+      gradient_scale /= mini_batch_size;
       if (m_transpose) {
         El::Gemm(El::NORMAL, El::TRANSPOSE,
-                 DataType(1), local_input, local_gradient_wrt_output,
-                 DataType(0), m_linearity_gradient->Matrix());
+                 gradient_scale, local_input, local_gradient_wrt_output,
+                 dst_scale, linearity_gradient.Matrix());
       } else {
         El::Gemm(El::NORMAL, El::TRANSPOSE,
-                 DataType(1), local_gradient_wrt_output, local_input,
-                 DataType(0), m_linearity_gradient->Matrix());
+                 gradient_scale, local_gradient_wrt_output, local_input,
+                 dst_scale, linearity_gradient.Matrix());
       }
-      linearity_optimizer->add_to_gradient_staging(
-        *m_linearity_gradient,
-        DataType(1) / mini_batch_size);
     } else {
+      auto& linearity_gradient = linearity_optimizer->get_gradient_buffer(
+        dst_scale, gradient_scale);
+      gradient_scale /= mini_batch_size;
       if (m_transpose) {
         El::Gemm(El::NORMAL, El::TRANSPOSE,
-                 DataType(1), input, gradient_wrt_output,
-                 DataType(0), *m_linearity_gradient);
+                 gradient_scale, input, gradient_wrt_output,
+                 dst_scale, linearity_gradient);
       } else {
         El::Gemm(El::NORMAL, El::TRANSPOSE,
-                 DataType(1), gradient_wrt_output, input,
-                 DataType(0), *m_linearity_gradient);
+                 gradient_scale, gradient_wrt_output, input,
+                 dst_scale, linearity_gradient);
       }
-      linearity_optimizer->add_to_gradient(
-        *m_linearity_gradient,
-        DataType(1) / mini_batch_size);
     }
   }
 
@@ -222,31 +219,34 @@ void fully_connected_layer<data_layout::DATA_PARALLEL, El::Device::CPU>::bp_comp
   auto& local_gradient_wrt_input = get_local_error_signals();
 
   // Compute gradient w.r.t. bias if needed
-  optimizer* bias_optimizer = this->m_weights[1]->get_optimizer();
-  if (m_bias_scaling_factor != DataType(0)
-      && bias_optimizer != nullptr) {
-    El::RowSum(local_gradient_wrt_output,
-               m_bias_gradient->Matrix());
-    bias_optimizer->add_to_gradient_staging(
-      *m_bias_gradient,
-      m_bias_scaling_factor / mini_batch_size);
+  if (m_bias_scaling_factor != DataType(0)) {
+    optimizer* bias_optimizer = this->m_weights[1]->get_optimizer();
+    if (bias_optimizer != nullptr) {
+      El::RowSum(local_gradient_wrt_output,
+                 m_bias_gradient->Matrix());
+      bias_optimizer->add_to_gradient(
+        *m_bias_gradient,
+        m_bias_scaling_factor / mini_batch_size,
+        true);
+    }
   }
 
   // Compute gradient w.r.t. linearity if needed
   optimizer* linearity_optimizer = this->m_weights[0]->get_optimizer();
   if (linearity_optimizer != nullptr) {
+    DataType dst_scale = DataType(0), gradient_scale = DataType(0);
+    auto& linearity_gradient = linearity_optimizer->get_gradient_buffer(
+      dst_scale, gradient_scale, true);
+    gradient_scale /= mini_batch_size;
     if (m_transpose) {
       El::Gemm(El::NORMAL, El::TRANSPOSE,
-               DataType(1), local_input, local_gradient_wrt_output, 
-               DataType(0), m_linearity_gradient->Matrix());
+               gradient_scale, local_input, local_gradient_wrt_output,
+               dst_scale, linearity_gradient.Matrix());
     } else {
       El::Gemm(El::NORMAL, El::TRANSPOSE,
-               DataType(1), local_gradient_wrt_output, local_input,
-               DataType(0), m_linearity_gradient->Matrix());
+               gradient_scale, local_gradient_wrt_output, local_input,
+               dst_scale, linearity_gradient.Matrix());
     }
-    linearity_optimizer->add_to_gradient_staging(
-      *m_linearity_gradient,
-      DataType(1) / mini_batch_size);
   }
 
   // Compute gradient w.r.t. input
@@ -286,7 +286,7 @@ void fully_connected_layer<data_layout::DATA_PARALLEL, El::Device::GPU>::fp_comp
              m_bias_scaling_factor, local_bias, ones,
              DataType(1), local_output);
   }
-  
+
 }
 
 /** GPU implementation of backward prop computation. */
@@ -303,43 +303,46 @@ void fully_connected_layer<data_layout::DATA_PARALLEL, El::Device::GPU>::bp_comp
   auto& local_gradient_wrt_input = get_local_error_signals();
 
   // Compute gradient w.r.t. bias if needed
-  optimizer* bias_optimizer = this->m_weights[1]->get_optimizer();
-  if (m_bias_scaling_factor != DataType(0)
-      && bias_optimizer != nullptr) {
-    if (local_gradient_wrt_output.Height() < 1
-        || local_gradient_wrt_output.Width() < 1) {
-      El::Zero(*m_bias_gradient);
-    } else {
-      GPUMat ones;
+  if (m_bias_scaling_factor != DataType(0)) {
+    optimizer* bias_optimizer = this->m_weights[1]->get_optimizer();
+    if (bias_optimizer != nullptr) {
+      DataType dst_scale = DataType(0), gradient_scale = DataType(0);
+      auto& bias_gradient = bias_optimizer->get_gradient_buffer(
+        dst_scale, gradient_scale, true);
+      gradient_scale /= mini_batch_size;
+      if (local_gradient_wrt_output.Height() < 1
+          || local_gradient_wrt_output.Width() < 1) {
+        El::Scale(dst_scale, bias_gradient);
+      } else {
+        GPUMat ones;
 #ifdef HYDROGEN_HAVE_CUB
-      ones.SetMemoryMode(1); // Use CUB GPU memory pool if possible
+        ones.SetMemoryMode(1); // Use CUB GPU memory pool if possible
 #endif // HYDROGEN_HAVE_CUB
-      ones.Resize(local_gradient_wrt_output.Width(), 1);
-      El::Fill(ones, DataType(1));
-      El::Gemv(El::NORMAL,
-               m_bias_scaling_factor, local_gradient_wrt_output, ones,
-               DataType(0), m_bias_gradient->Matrix());
+        ones.Resize(local_gradient_wrt_output.Width(), 1);
+        El::Fill(ones, DataType(1));
+        El::Gemv(El::NORMAL,
+                 gradient_scale, local_gradient_wrt_output, ones,
+                 dst_scale, bias_gradient.Matrix());
+      }
     }
-    bias_optimizer->add_to_gradient_staging(
-      *m_bias_gradient,
-      m_bias_scaling_factor / mini_batch_size);
   }
 
   // Compute gradient w.r.t. linearity if needed
   optimizer* linearity_optimizer = this->m_weights[0]->get_optimizer();
   if (linearity_optimizer != nullptr) {
+    DataType dst_scale = DataType(0), gradient_scale = DataType(0);
+    auto& linearity_gradient = linearity_optimizer->get_gradient_buffer(
+      dst_scale, gradient_scale, true);
+    gradient_scale /= mini_batch_size;
     if (m_transpose) {
       El::Gemm(El::NORMAL, El::TRANSPOSE,
-               DataType(1), local_input, local_gradient_wrt_output, 
-               DataType(0), m_linearity_gradient->Matrix());
+               gradient_scale, local_input, local_gradient_wrt_output,
+               dst_scale, linearity_gradient.Matrix());
     } else {
       El::Gemm(El::NORMAL, El::TRANSPOSE,
-               DataType(1), local_gradient_wrt_output, local_input,
-               DataType(0), m_linearity_gradient->Matrix());
+               gradient_scale, local_gradient_wrt_output, local_input,
+               dst_scale, linearity_gradient.Matrix());
     }
-    linearity_optimizer->add_to_gradient_staging(
-      *m_linearity_gradient,
-      DataType(1) / mini_batch_size);
   }
 
   // Compute gradient w.r.t. input
@@ -386,7 +389,7 @@ void fully_connected_layer<data_layout::MODEL_PARALLEL, El::Device::GPU>::fp_com
              m_bias_scaling_factor, bias.LockedMatrix(), ones,
              DataType(1), output.Matrix());
   }
-  
+
 }
 
 template <>
@@ -407,58 +410,61 @@ void fully_connected_layer<data_layout::MODEL_PARALLEL, El::Device::GPU>::bp_com
 
   // Compute gradient w.r.t. bias if needed
   // Note: local GEMV is sufficient, no need for global row sum
-  optimizer* bias_optimizer = this->m_weights[1]->get_optimizer();
-  if (m_bias_scaling_factor != DataType(0)
-      && bias_optimizer != nullptr) {
-    if (local_gradient_wrt_output.Height() < 1
-        || local_gradient_wrt_output.Width() < 1) {
-      El::Zero(*m_bias_gradient);
-    } else {
-      GPUMat ones;
+  if (m_bias_scaling_factor != DataType(0)) {
+    optimizer* bias_optimizer = this->m_weights[1]->get_optimizer();
+    if (bias_optimizer != nullptr) {
+      DataType dst_scale = DataType(0), gradient_scale = DataType(0);
+      auto& bias_gradient = bias_optimizer->get_gradient_buffer(
+        dst_scale, gradient_scale, true);
+      gradient_scale /= mini_batch_size;
+      if (local_gradient_wrt_output.Height() < 1
+          || local_gradient_wrt_output.Width() < 1) {
+        El::Scale(dst_scale, bias_gradient);
+      } else {
+        GPUMat ones;
 #ifdef HYDROGEN_HAVE_CUB
-      ones.SetMemoryMode(1); // Use CUB GPU memory pool if possible
+        ones.SetMemoryMode(1); // Use CUB GPU memory pool if possible
 #endif // HYDROGEN_HAVE_CUB
-      ones.Resize(local_gradient_wrt_output.Width(), 1);
-      El::Fill(ones, DataType(1));
-      El::Gemv(El::NORMAL,
-               m_bias_scaling_factor, local_gradient_wrt_output, ones,
-               DataType(0), m_bias_gradient->Matrix());
+        ones.Resize(local_gradient_wrt_output.Width(), 1);
+        El::Fill(ones, DataType(1));
+        El::Gemv(El::NORMAL,
+                 gradient_scale, local_gradient_wrt_output, ones,
+                 dst_scale, bias_gradient.Matrix());
+      }
     }
-    bias_optimizer->add_to_gradient_staging(
-      *m_bias_gradient,
-      m_bias_scaling_factor / mini_batch_size);
   }
 
   // Compute gradient w.r.t. linearity if needed
   // Note: Perform GEMMs independently if possible
   optimizer* linearity_optimizer = this->m_weights[0]->get_optimizer();
   if (linearity_optimizer != nullptr) {
+    DataType dst_scale = DataType(0), gradient_scale = DataType(0);
     if (linearity.DistSize() == 1) {
+      auto& linearity_gradient = linearity_optimizer->get_gradient_buffer(
+        dst_scale, gradient_scale, true);
+      gradient_scale /= mini_batch_size;
       if (m_transpose) {
         El::Gemm(El::NORMAL, El::TRANSPOSE,
-                 DataType(1), local_input, local_gradient_wrt_output,
-                 DataType(0), m_linearity_gradient->Matrix());
+                 gradient_scale, local_input, local_gradient_wrt_output,
+                 dst_scale, linearity_gradient.Matrix());
       } else {
         El::Gemm(El::NORMAL, El::TRANSPOSE,
-                 DataType(1), local_gradient_wrt_output, local_input,
-                 DataType(0), m_linearity_gradient->Matrix());
+                 gradient_scale, local_gradient_wrt_output, local_input,
+                 dst_scale, linearity_gradient.Matrix());
       }
-      linearity_optimizer->add_to_gradient_staging(
-        *m_linearity_gradient,
-        DataType(1) / mini_batch_size);
     } else {
+      auto& linearity_gradient = linearity_optimizer->get_gradient_buffer(
+        dst_scale, gradient_scale);
+      gradient_scale /= mini_batch_size;
       if (m_transpose) {
         El::Gemm(El::NORMAL, El::TRANSPOSE,
-                 DataType(1), input, gradient_wrt_output,
-                 DataType(0), *m_linearity_gradient);
+                 gradient_scale, input, gradient_wrt_output,
+                 dst_scale, linearity_gradient);
       } else {
         El::Gemm(El::NORMAL, El::TRANSPOSE,
-                 DataType(1), gradient_wrt_output, input,
-                 DataType(0), *m_linearity_gradient);
+                 gradient_scale, gradient_wrt_output, input,
+                 dst_scale, linearity_gradient);
       }
-      linearity_optimizer->add_to_gradient(
-        *m_linearity_gradient,
-        DataType(1) / mini_batch_size);
     }
   }
 
@@ -475,7 +481,7 @@ void fully_connected_layer<data_layout::MODEL_PARALLEL, El::Device::GPU>::bp_com
              DataType(1), linearity, gradient_wrt_output,
              DataType(0), gradient_wrt_input);
   }
-  
+
 }
 #endif // LBANN_HAS_GPU
 
diff --git a/src/layers/loss/categorical_accuracy.cpp b/src/layers/loss/categorical_accuracy.cpp
index 4234d50030d..f10f80c599c 100644
--- a/src/layers/loss/categorical_accuracy.cpp
+++ b/src/layers/loss/categorical_accuracy.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/loss/categorical_accuracy.cu b/src/layers/loss/categorical_accuracy.cu
index 870ff989b1d..c91f3359bce 100644
--- a/src/layers/loss/categorical_accuracy.cu
+++ b/src/layers/loss/categorical_accuracy.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -49,7 +49,7 @@ __global__ void fill_indices_kernel(El::Int local_height,
     indices[row + col*local_height] = col_shift + row * col_stride;
   }
 }
-  
+
 /** Find largest entry within each CUDA block.
  *  Each block is assigned several entries from the same mini-batch
  *  sample and it finds the largest entry. Results are output to
@@ -73,7 +73,7 @@ __global__ void reduce_max_entries_kernel(El::Int height, El::Int width,
   const El::Int bidy = blockIdx.y;
   const El::Int nthreadsx = blockDim.x * gridDim.x;
   const El::Int nblocksx = gridDim.x;
-  
+
   // Reduce each matrix column independently
   for (El::Int col = bidy; col < width; col += gridDim.y) {
 
@@ -115,7 +115,7 @@ __global__ void reduce_max_entries_kernel(El::Int height, El::Int width,
     }
 
   }
-  
+
 }
 
 /** Compute sample-wise categorical accuracy.
@@ -137,7 +137,7 @@ __global__ void compute_accuracy_kernel(El::Int local_width,
                            DataType(1) : DataType(0));
   }
 }
-  
+
 /** GPU implementation of categorical accuracy layer forward prop. */
 void fp_gpu(lbann_comm& comm,
             const AbsDistMat& predictions,
@@ -154,7 +154,7 @@ void fp_gpu(lbann_comm& comm,
   const auto& local_height = local_predictions.Height();
   const auto& local_width = local_predictions.Width();
   if (local_width < 1) { return; }
-  
+
   // Column communicator
   auto&& col_comm = predictions.ColComm();
   const auto& col_comm_rank = El::mpi::Rank(col_comm);
@@ -361,7 +361,7 @@ void fp_gpu(lbann_comm& comm,
       prediction_inds.data().get(), label_inds.data().get(),
       local_loss.Buffer(), local_loss.LDim());
   }
-  
+
 }
 
 } // namespace
diff --git a/src/layers/loss/cross_entropy.cpp b/src/layers/loss/cross_entropy.cpp
index 4190b9076a4..65eda604540 100644
--- a/src/layers/loss/cross_entropy.cpp
+++ b/src/layers/loss/cross_entropy.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/loss/cross_entropy.cu b/src/layers/loss/cross_entropy.cu
index db90a46d314..73ba9e2d226 100644
--- a/src/layers/loss/cross_entropy.cu
+++ b/src/layers/loss/cross_entropy.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -39,7 +39,7 @@ __global__ void fp_kernel(int height, int width,
                           const DataType* __restrict__ ground_truth,
                           int ground_truth_ldim,
                           DataType* __restrict__ contribution) {
-  
+
   // Indices
   const int tid = threadIdx.x;
   const int gidx = threadIdx.x + blockIdx.x * blockDim.x;
@@ -72,11 +72,11 @@ __global__ void fp_kernel(int height, int width,
     if (tid == 0) {
       cuda::atomic_add(&contribution[col], shared_contribution[0]);
     }
-    
+
   }
-    
+
 }
-  
+
 void local_fp_gpu(const AbsMat& local_prediction,
                   const AbsMat& local_ground_truth,
                   AbsMat& local_contribution) {
@@ -110,7 +110,7 @@ __global__ void bp_kernel(int height, int width,
                           int gradient_wrt_prediction_ldim,
                           DataType* __restrict__ gradient_wrt_ground_truth,
                           int gradient_wrt_ground_truth_ldim) {
-  
+
   // Indices
   const int gidx = threadIdx.x + blockIdx.x * blockDim.x;
   const int bidy = blockIdx.y;
@@ -128,7 +128,7 @@ __global__ void bp_kernel(int height, int width,
       dxhat = - dy * std::log(x);
     }
   }
-    
+
 }
 
 void local_bp_gpu(const AbsMat& local_prediction,
diff --git a/src/layers/loss/entrywise.cpp b/src/layers/loss/entrywise.cpp
index f7dacbd5d18..112a9c6861f 100644
--- a/src/layers/loss/entrywise.cpp
+++ b/src/layers/loss/entrywise.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/loss/entrywise.cu b/src/layers/loss/entrywise.cu
index 952707e1669..c206dc815f6 100644
--- a/src/layers/loss/entrywise.cu
+++ b/src/layers/loss/entrywise.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/loss/l1_norm.cpp b/src/layers/loss/l1_norm.cpp
index 1c43504574a..1d20295719a 100644
--- a/src/layers/loss/l1_norm.cpp
+++ b/src/layers/loss/l1_norm.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/loss/l1_norm.cu b/src/layers/loss/l1_norm.cu
index 54da2bc1dc7..1dfda1aac6e 100644
--- a/src/layers/loss/l1_norm.cu
+++ b/src/layers/loss/l1_norm.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -36,7 +36,7 @@ __global__ void fp_kernel(El::Int local_height,
                           const DataType* __restrict__ input,
                           El::Int input_ldim,
                           DataType* __restrict__ contribution) {
-  
+
   // Indices
   const El::Int tid = threadIdx.x;
   const El::Int gidx = threadIdx.x + blockIdx.x * blockDim.x;
@@ -66,11 +66,11 @@ __global__ void fp_kernel(El::Int local_height,
     if (tid == 0) {
       cuda::atomic_add(&contribution[col], shared_contribution[0]);
     }
-    
+
   }
-    
+
 }
-  
+
 void local_fp_gpu(const AbsMat& local_input, AbsMat& local_contribution) {
   El::Zero(local_contribution);
   if (!local_input.IsEmpty()) {
diff --git a/src/layers/loss/l2_norm2.cpp b/src/layers/loss/l2_norm2.cpp
index 419a54805a2..258179d883f 100644
--- a/src/layers/loss/l2_norm2.cpp
+++ b/src/layers/loss/l2_norm2.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/loss/l2_norm2.cu b/src/layers/loss/l2_norm2.cu
index 84fda9d3508..1a02a1096c2 100644
--- a/src/layers/loss/l2_norm2.cu
+++ b/src/layers/loss/l2_norm2.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -36,7 +36,7 @@ __global__ void fp_kernel(El::Int local_height,
                           const DataType* __restrict__ input,
                           El::Int input_ldim,
                           DataType* __restrict__ contribution) {
-  
+
   // Indices
   const El::Int tid = threadIdx.x;
   const El::Int gidx = threadIdx.x + blockIdx.x * blockDim.x;
@@ -66,11 +66,11 @@ __global__ void fp_kernel(El::Int local_height,
     if (tid == 0) {
       cuda::atomic_add(&contribution[col], shared_contribution[0]);
     }
-    
+
   }
-    
+
 }
-  
+
 void local_fp_gpu(const AbsMat& local_input, AbsMat& local_contribution) {
   El::Zero(local_contribution);
   if (!local_input.IsEmpty()) {
diff --git a/src/layers/loss/mean_absolute_error.cpp b/src/layers/loss/mean_absolute_error.cpp
index 2e60c7c4c78..e497f11add6 100644
--- a/src/layers/loss/mean_absolute_error.cpp
+++ b/src/layers/loss/mean_absolute_error.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/loss/mean_absolute_error.cu b/src/layers/loss/mean_absolute_error.cu
index 9eb7adc2f78..ba7c224c899 100644
--- a/src/layers/loss/mean_absolute_error.cu
+++ b/src/layers/loss/mean_absolute_error.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -38,7 +38,7 @@ __global__ void fp_kernel(int global_height,
                           const DataType* __restrict__ ground_truth,
                           int ground_truth_ldim,
                           DataType* __restrict__ contribution) {
-  
+
   // Indices
   const int tid = threadIdx.x;
   const int gidx = threadIdx.x + blockIdx.x * blockDim.x;
@@ -70,11 +70,11 @@ __global__ void fp_kernel(int global_height,
       shared_contribution[0] /= global_height;
       cuda::atomic_add(&contribution[col], shared_contribution[0]);
     }
-    
+
   }
-    
+
 }
-  
+
 void local_fp_gpu(El::Int height,
                   const AbsMat& local_prediction,
                   const AbsMat& local_ground_truth,
@@ -110,7 +110,7 @@ __global__ void bp_kernel(int global_height,
                           int gradient_wrt_prediction_ldim,
                           DataType* __restrict__ gradient_wrt_ground_truth,
                           int gradient_wrt_ground_truth_ldim) {
-  
+
   // Indices
   const int gidx = threadIdx.x + blockIdx.x * blockDim.x;
   const int bidy = blockIdx.y;
@@ -136,7 +136,7 @@ __global__ void bp_kernel(int global_height,
       }
     }
   }
-    
+
 }
 
 void local_bp_gpu(El::Int height,
diff --git a/src/layers/loss/mean_squared_error.cpp b/src/layers/loss/mean_squared_error.cpp
index a662e5139cd..19df1b60b4c 100644
--- a/src/layers/loss/mean_squared_error.cpp
+++ b/src/layers/loss/mean_squared_error.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/loss/mean_squared_error.cu b/src/layers/loss/mean_squared_error.cu
index 98a5ab149fa..024b676b39f 100644
--- a/src/layers/loss/mean_squared_error.cu
+++ b/src/layers/loss/mean_squared_error.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -38,7 +38,7 @@ __global__ void fp_kernel(int global_height,
                           const DataType* __restrict__ ground_truth,
                           int ground_truth_ldim,
                           DataType* __restrict__ contribution) {
-  
+
   // Indices
   const int tid = threadIdx.x;
   const int gidx = threadIdx.x + blockIdx.x * blockDim.x;
@@ -70,11 +70,11 @@ __global__ void fp_kernel(int global_height,
       shared_contribution[0] /= global_height;
       cuda::atomic_add(&contribution[col], shared_contribution[0]);
     }
-    
+
   }
-    
+
 }
-  
+
 void local_fp_gpu(El::Int height,
                   const AbsMat& local_prediction,
                   const AbsMat& local_ground_truth,
@@ -110,7 +110,7 @@ __global__ void bp_kernel(int global_height,
                           int gradient_wrt_prediction_ldim,
                           DataType* __restrict__ gradient_wrt_ground_truth,
                           int gradient_wrt_ground_truth_ldim) {
-  
+
   // Indices
   const int gidx = threadIdx.x + blockIdx.x * blockDim.x;
   const int bidy = blockIdx.y;
@@ -128,7 +128,7 @@ __global__ void bp_kernel(int global_height,
       dxhat = - 2 * err * dy / global_height;
     }
   }
-    
+
 }
 
 void local_bp_gpu(El::Int height,
diff --git a/src/layers/loss/top_k_categorical_accuracy.cpp b/src/layers/loss/top_k_categorical_accuracy.cpp
index 602c08ef6bb..9cad631a78c 100644
--- a/src/layers/loss/top_k_categorical_accuracy.cpp
+++ b/src/layers/loss/top_k_categorical_accuracy.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/loss/top_k_categorical_accuracy.cu b/src/layers/loss/top_k_categorical_accuracy.cu
index db0ffeec781..7ea4f64a77a 100644
--- a/src/layers/loss/top_k_categorical_accuracy.cu
+++ b/src/layers/loss/top_k_categorical_accuracy.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/math/binary.cpp b/src/layers/math/binary.cpp
index dd36627d899..774bc7c7bf5 100644
--- a/src/layers/math/binary.cpp
+++ b/src/layers/math/binary.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/math/binary.cu b/src/layers/math/binary.cu
index 005e7ec3642..401ea2c94b8 100644
--- a/src/layers/math/binary.cu
+++ b/src/layers/math/binary.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/math/clamp.cpp b/src/layers/math/clamp.cpp
index 17b6b7072f2..0815cc2ee61 100644
--- a/src/layers/math/clamp.cpp
+++ b/src/layers/math/clamp.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/math/clamp.cu b/src/layers/math/clamp.cu
index 0174a99e45b..31e4064f745 100644
--- a/src/layers/math/clamp.cu
+++ b/src/layers/math/clamp.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/math/unary.cpp b/src/layers/math/unary.cpp
index c622999d8df..f0d8954e0dd 100644
--- a/src/layers/math/unary.cpp
+++ b/src/layers/math/unary.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/math/unary.cu b/src/layers/math/unary.cu
index 4844ac67eb8..143381522e7 100644
--- a/src/layers/math/unary.cu
+++ b/src/layers/math/unary.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/misc/channelwise_mean.cpp b/src/layers/misc/channelwise_mean.cpp
index 0d2af9a8bb5..fa9561ef6c6 100644
--- a/src/layers/misc/channelwise_mean.cpp
+++ b/src/layers/misc/channelwise_mean.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/misc/channelwise_mean.cu b/src/layers/misc/channelwise_mean.cu
index d67f2b8b2e4..e4aa15c3850 100644
--- a/src/layers/misc/channelwise_mean.cu
+++ b/src/layers/misc/channelwise_mean.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/misc/covariance.cpp b/src/layers/misc/covariance.cpp
index 74ece68121a..5387a7f5605 100644
--- a/src/layers/misc/covariance.cpp
+++ b/src/layers/misc/covariance.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/misc/covariance.cu b/src/layers/misc/covariance.cu
index c750b7b7b1c..4d1b544922c 100644
--- a/src/layers/misc/covariance.cu
+++ b/src/layers/misc/covariance.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -46,7 +46,7 @@ __global__ void mean_contribution_kernel(El::Int height,
                                          const DataType* __restrict__ input1,
                                          El::Int input1_ldim,
                                          DataType* __restrict__ contribution) {
-  
+
   // Indices
   const El::Int tid = threadIdx.x;
   const El::Int gidx = threadIdx.x + blockIdx.x * blockDim.x;
@@ -55,7 +55,7 @@ __global__ void mean_contribution_kernel(El::Int height,
 
   // Compute local contribution for each matrix column
   for (El::Int col = bidy; col < width; col += gridDim.y) {
-    
+
     // Compute contributions for each thread
     DataType private_contribution0 = 0;
     DataType private_contribution1 = 0;
@@ -63,7 +63,7 @@ __global__ void mean_contribution_kernel(El::Int height,
       private_contribution0 += input0[row + col * input0_ldim];
       private_contribution1 += input1[row + col * input1_ldim];
     }
-    
+
     // Shared memory reduction to get contribution for each block
     /// @todo unroll loops
     __shared__ DataType shared_contribution0[block_size];
@@ -83,11 +83,11 @@ __global__ void mean_contribution_kernel(El::Int height,
       cuda::atomic_add(&contribution[2*col+1],
                        scale * shared_contribution1[0]);
     }
-    
+
   }
-  
+
 }
-  
+
 /** Compute local contributions to covariances. */
 template <El::Int block_size>
 __global__ void covariance_contribution_kernel(El::Int height,
@@ -99,7 +99,7 @@ __global__ void covariance_contribution_kernel(El::Int height,
                                                El::Int input1_ldim,
                                                const DataType* __restrict__ means,
                                                DataType* __restrict__ contribution) {
-  
+
   // Indices
   const El::Int tid = threadIdx.x;
   const El::Int gidx = threadIdx.x + blockIdx.x * blockDim.x;
@@ -110,7 +110,7 @@ __global__ void covariance_contribution_kernel(El::Int height,
   for (El::Int col = bidy; col < width; col += gridDim.y) {
     const auto& mean0 = means[2*col];
     const auto& mean1 = means[2*col+1];
-    
+
     // Compute contributions for each thread
     DataType private_contribution = 0;
     for (El::Int row = gidx; row < height; row += nthreadsx) {
@@ -133,9 +133,9 @@ __global__ void covariance_contribution_kernel(El::Int height,
       cuda::atomic_add(&contribution[col],
                        scale * shared_contribution[0]);
     }
-    
+
   }
-  
+
 }
 
 /** Compute gradients w.r.t. inputs. */
@@ -170,7 +170,7 @@ void covariance_backprop_kernel(El::Int height,
     dx1 = dy * scale * (x0 - mean0);
   }
 }
-  
+
 /** GPU forward prop implementation.
  *  We use a two-pass algorithm since it is more numerically stable
  *  than the naive single-pass algorithm.
@@ -187,7 +187,7 @@ void fp_gpu(const AbsDistMat& input0,
   const auto& local_input1 = static_cast<const GPUMat&>(input1.LockedMatrix());
   auto& local_means = static_cast<GPUMat&>(means.Matrix());
   auto& local_workspace = static_cast<GPUMat&>(workspace.Matrix());
-  
+
   // Dimensions
   const auto& height = input0.Height();
   const auto& width = input0.Width();
@@ -235,7 +235,7 @@ void fp_gpu(const AbsDistMat& input0,
   }
   El::AllReduce(workspace, workspace.RedundantComm());
   El::Copy(workspace, output);
-  
+
 }
 
 /** GPU backprop implementation.
@@ -257,7 +257,7 @@ void bp_gpu(const AbsDistMat& input0,
   auto& local_gradient_wrt_input1 = static_cast<GPUMat&>(gradient_wrt_input1.Matrix());
   const auto& local_means = static_cast<const GPUMat&>(means.LockedMatrix());
   auto& local_workspace = static_cast<GPUMat&>(workspace.Matrix());
-  
+
   // Dimensions
   const auto& height = input0.Height();
   const auto& local_height = local_input0.Height();
@@ -285,7 +285,7 @@ void bp_gpu(const AbsDistMat& input0,
 }
 
 } // namespace
-  
+
 template <>
 void covariance_layer<data_layout::DATA_PARALLEL, El::Device::GPU>
      ::fp_compute() {
diff --git a/src/layers/misc/variance.cpp b/src/layers/misc/variance.cpp
index 8c80b611e47..49e4f7e8f9e 100644
--- a/src/layers/misc/variance.cpp
+++ b/src/layers/misc/variance.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/misc/variance.cu b/src/layers/misc/variance.cu
index f30192737c7..02ace16d465 100644
--- a/src/layers/misc/variance.cu
+++ b/src/layers/misc/variance.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -38,7 +38,7 @@ __global__ void variance_contribution_kernel(El::Int height,
                                              El::Int input_ldim,
                                              const DataType* __restrict__ means,
                                              DataType* __restrict__ contribution) {
-  
+
   // Indices
   const El::Int tid = threadIdx.x;
   const El::Int gidx = threadIdx.x + blockIdx.x * blockDim.x;
@@ -48,7 +48,7 @@ __global__ void variance_contribution_kernel(El::Int height,
   // Compute local contribution for each matrix column
   for (El::Int col = bidy; col < width; col += gridDim.y) {
     const auto& mean = means[col];
-    
+
     // Compute contributions for each thread
     DataType private_contribution = 0;
     for (El::Int row = gidx; row < height; row += nthreadsx) {
@@ -70,9 +70,9 @@ __global__ void variance_contribution_kernel(El::Int height,
       cuda::atomic_add(&contribution[col],
                        scale * shared_contribution[0]);
     }
-    
+
   }
-  
+
 }
 
 __global__
@@ -98,7 +98,7 @@ void variance_backprop_kernel(El::Int height,
     dx = dy * scale * (x - mean);
   }
 }
-  
+
 /** GPU forward prop implementation.
  *  We use a two-pass algorithm since it is more numerically stable
  *  than the naive single-pass algorithm.
@@ -113,7 +113,7 @@ void fp_gpu(const AbsDistMat& input,
   const auto& local_input = static_cast<const GPUMat&>(input.LockedMatrix());
   auto& local_means = static_cast<GPUMat&>(means.Matrix());
   auto& local_workspace = static_cast<GPUMat&>(workspace.Matrix());
-  
+
   // Dimensions
   const auto& height = input.Height();
   const auto& width = input.Width();
@@ -155,7 +155,7 @@ void fp_gpu(const AbsDistMat& input,
   }
   El::AllReduce(workspace, workspace.RedundantComm());
   El::Copy(workspace, output);
-  
+
 }
 
 /** GPU backprop implementation.
@@ -173,7 +173,7 @@ void bp_gpu(const AbsDistMat& input,
   auto& local_gradient_wrt_input = static_cast<GPUMat&>(gradient_wrt_input.Matrix());
   const auto& local_means = static_cast<const GPUMat&>(means.LockedMatrix());
   auto& local_workspace = static_cast<GPUMat&>(workspace.Matrix());
-  
+
   // Dimensions
   const auto& height = input.Height();
   const auto& local_height = local_input.Height();
@@ -199,7 +199,7 @@ void bp_gpu(const AbsDistMat& input,
 }
 
 } // namespace
-  
+
 template <>
 void variance_layer<data_layout::DATA_PARALLEL, El::Device::GPU>
      ::fp_compute() {
diff --git a/src/layers/regularizers/batch_normalization.cpp b/src/layers/regularizers/batch_normalization.cpp
index d98188c7f9f..5d1535f01c7 100644
--- a/src/layers/regularizers/batch_normalization.cpp
+++ b/src/layers/regularizers/batch_normalization.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -245,13 +245,15 @@ void batch_normalization_layer<data_layout::DATA_PARALLEL, El::Device::CPU>::bp_
   }
   optimizer* scale_optimizer = m_weights[0]->get_optimizer();
   if (scale_optimizer != nullptr) {
-    scale_optimizer->add_to_gradient_staging(*m_scale_gradient,
-                                             one / effective_mini_batch_size);
+    scale_optimizer->add_to_gradient(*m_scale_gradient,
+                                     one / effective_mini_batch_size,
+                                     true);
   }
   optimizer* bias_optimizer = m_weights[1]->get_optimizer();
   if (bias_optimizer != nullptr) {
-    bias_optimizer->add_to_gradient_staging(*m_bias_gradient,
-                                            one / effective_mini_batch_size);
+    bias_optimizer->add_to_gradient(*m_bias_gradient,
+                                    one / effective_mini_batch_size,
+                                    true);
   }
 
   // Compute error signal
diff --git a/src/layers/regularizers/batch_normalization.cu b/src/layers/regularizers/batch_normalization.cu
index 48880160172..ae679ec1c64 100644
--- a/src/layers/regularizers/batch_normalization.cu
+++ b/src/layers/regularizers/batch_normalization.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -292,7 +292,7 @@ __global__ void backprop2_kernel(
 }
 
 } // namespace
-  
+
 template <>
 void batch_normalization_layer<data_layout::DATA_PARALLEL, El::Device::GPU>::fp_compute() {
   constexpr DataType one = 1;
@@ -301,7 +301,7 @@ void batch_normalization_layer<data_layout::DATA_PARALLEL, El::Device::GPU>::fp_
   // CUDA objects
   CHECK_CUDA(cudaSetDevice(El::GPUManager::Device()));
   auto&& stream = El::GPUManager::Stream();
-  
+
   // Matrices
   const auto& input = get_prev_activations();
   const auto& local_input = input.LockedMatrix();
@@ -401,7 +401,7 @@ void batch_normalization_layer<data_layout::DATA_PARALLEL, El::Device::GPU>::fp_
         local_scale.LockedBuffer(), local_bias.LockedBuffer(),
         local_output.Buffer(), local_output.LDim());
   }
-  
+
 }
 
 template <>
@@ -484,13 +484,15 @@ void batch_normalization_layer<data_layout::DATA_PARALLEL, El::Device::GPU>::bp_
   }
   optimizer* scale_optimizer = m_weights[0]->get_optimizer();
   if (scale_optimizer != nullptr) {
-    scale_optimizer->add_to_gradient_staging(*m_scale_gradient,
-                                             one / effective_mini_batch_size);
+    scale_optimizer->add_to_gradient(*m_scale_gradient,
+                                     one / effective_mini_batch_size,
+                                     true);
   }
   optimizer* bias_optimizer = m_weights[1]->get_optimizer();
   if (bias_optimizer != nullptr) {
-    bias_optimizer->add_to_gradient_staging(*m_bias_gradient,
-                                            one / effective_mini_batch_size);
+    bias_optimizer->add_to_gradient(*m_bias_gradient,
+                                    one / effective_mini_batch_size,
+                                    true);
   }
 
   // Compute error signal
@@ -526,7 +528,7 @@ void batch_normalization_layer<data_layout::DATA_PARALLEL, El::Device::GPU>::bp_
         local_mean_gradient.LockedBuffer(), local_var_gradient.LockedBuffer(),
         local_gradient_wrt_input.Buffer(), local_gradient_wrt_input.LDim());
   }
-  
+
 }
-  
+
 } // namespace lbann
diff --git a/src/layers/transform/crop.cpp b/src/layers/transform/crop.cpp
index 2065d32b68e..b319cee2985 100644
--- a/src/layers/transform/crop.cpp
+++ b/src/layers/transform/crop.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -27,7 +27,7 @@
 #include "lbann/layers/transform/crop.hpp"
 
 namespace lbann {
-  
+
 template <>
 void crop_layer<data_layout::DATA_PARALLEL, El::Device::CPU>::fp_compute_3d() {
   fp_compute_nd();
diff --git a/src/layers/transform/crop.cu b/src/layers/transform/crop.cu
index fc6e9dfde34..460b7e4404c 100644
--- a/src/layers/transform/crop.cu
+++ b/src/layers/transform/crop.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -66,7 +66,7 @@ __global__ void fp_compute_3d_kernel(
     offz = min(max(offz, El::Int(0)), num_offsets_z - 1);
     offy = min(max(offy, El::Int(0)), num_offsets_y - 1);
     offx = min(max(offx, El::Int(0)), num_offsets_x - 1);
-    
+
     // Iterate through output entries in mini-batch sample
     for (El::Int output_pos = gidx;
          output_pos < output_size;
@@ -89,10 +89,10 @@ __global__ void fp_compute_3d_kernel(
 
       // Copy entry
       output_entry = input_entry;
-      
+
     }
   }
-  
+
 }
 
 /** CUDA kernel for 3D tensor crop backprop.
@@ -130,7 +130,7 @@ __global__ void bp_compute_3d_kernel(
     offz = min(max(offz, El::Int(0)), num_offsets_z - 1);
     offy = min(max(offy, El::Int(0)), num_offsets_y - 1);
     offx = min(max(offx, El::Int(0)), num_offsets_x - 1);
-    
+
     // Iterate through output entries in mini-batch sample
     for (El::Int output_pos = gidx;
          output_pos < output_size;
@@ -153,14 +153,14 @@ __global__ void bp_compute_3d_kernel(
 
       // Copy entry
       input_entry = output_entry;
-      
+
     }
   }
-  
+
 }
-  
+
 } // namespace
-  
+
 template <>
 void crop_layer<data_layout::DATA_PARALLEL, El::Device::GPU>::fp_compute_3d() {
 
@@ -190,7 +190,7 @@ void crop_layer<data_layout::DATA_PARALLEL, El::Device::GPU>::fp_compute_3d() {
       local_output.Buffer(), local_output.LDim(),
       local_crop_pos.LockedBuffer(), local_crop_pos.LDim());
   }
-  
+
 }
 
 template <>
@@ -204,7 +204,7 @@ void crop_layer<data_layout::DATA_PARALLEL, El::Device::GPU>::bp_compute_3d() {
   const auto& local_gradient_wrt_output = get_local_prev_error_signals();
   const auto& local_crop_pos = get_local_prev_activations(1);
   auto& local_gradient_wrt_input = get_local_error_signals(0);
-  
+
   // Tensor dimensions
   const auto& local_width = local_gradient_wrt_input.Width();
   const auto input_dims = get_input_dims();
@@ -226,7 +226,7 @@ void crop_layer<data_layout::DATA_PARALLEL, El::Device::GPU>::bp_compute_3d() {
       local_gradient_wrt_input.Buffer(), local_gradient_wrt_input.LDim(),
       local_crop_pos.LockedBuffer(), local_crop_pos.LDim());
   }
-  
+
 }
 
 } // namespace lbann
diff --git a/src/layers/transform/evaluation.cpp b/src/layers/transform/evaluation.cpp
index 93114cabf0f..9bbae41007d 100644
--- a/src/layers/transform/evaluation.cpp
+++ b/src/layers/transform/evaluation.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/transform/in_top_k.cpp b/src/layers/transform/in_top_k.cpp
index 331a4dafacc..0ce65b1e454 100644
--- a/src/layers/transform/in_top_k.cpp
+++ b/src/layers/transform/in_top_k.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/transform/in_top_k.cu b/src/layers/transform/in_top_k.cu
index fdc9aa54ad8..1331f1c18ee 100644
--- a/src/layers/transform/in_top_k.cu
+++ b/src/layers/transform/in_top_k.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/transform/sort.cpp b/src/layers/transform/sort.cpp
index bbe611aed5d..72aa76835da 100644
--- a/src/layers/transform/sort.cpp
+++ b/src/layers/transform/sort.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/transform/sort.cu b/src/layers/transform/sort.cu
index 32227b044b8..d8aeb743438 100644
--- a/src/layers/transform/sort.cu
+++ b/src/layers/transform/sort.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -46,11 +46,11 @@ void sort_layer<data_layout::DATA_PARALLEL, El::Device::GPU>
   auto& local_indices = *m_indices;
   const auto& local_height = local_input.Height();
   const auto& local_width = local_input.Width();
-  
+
   // GPU objects
   auto&& stream = El::GPUManager::Stream();
   cuda::thrust::allocator<> alloc(stream);
-  
+
   // Sort each matrix column
   El::Copy(local_input, local_output);
   for (El::Int col = 0; col < local_width; ++col) {
@@ -68,7 +68,7 @@ void sort_layer<data_layout::DATA_PARALLEL, El::Device::GPU>
                             ::thrust::less<DataType>());
     }
   }
-  
+
 }
 
 template <>
@@ -81,11 +81,11 @@ void sort_layer<data_layout::DATA_PARALLEL, El::Device::GPU>
   const auto& local_indices = *m_indices;
   const auto& local_height = local_gradient_wrt_input.Height();
   const auto& local_width = local_gradient_wrt_input.Width();
-  
+
   // GPU objects
   auto&& stream = El::GPUManager::Stream();
   cuda::thrust::allocator<> alloc(stream);
-  
+
   // Scatter gradients based on sorted indices
   for (El::Int col = 0; col < local_width; ++col) {
     const ::thrust::device_ptr<const El::Int> inds(m_indices->LockedBuffer(0, col));
@@ -95,7 +95,7 @@ void sort_layer<data_layout::DATA_PARALLEL, El::Device::GPU>
                       grad_wrt_out, grad_wrt_out + local_height, inds,
                       grad_wrt_in);
   }
-  
+
 }
 
 } // namespace lbann
diff --git a/src/layers/transform/tessellate.cpp b/src/layers/transform/tessellate.cpp
index 9398dc4e686..6c5c0dd09c9 100644
--- a/src/layers/transform/tessellate.cpp
+++ b/src/layers/transform/tessellate.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/transform/tessellate.cu b/src/layers/transform/tessellate.cu
index 5e52047039e..94f4bd7110b 100644
--- a/src/layers/transform/tessellate.cu
+++ b/src/layers/transform/tessellate.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/metrics/layer_metric.cpp b/src/metrics/layer_metric.cpp
index a0d0a924ca5..16e193d185b 100644
--- a/src/metrics/layer_metric.cpp
+++ b/src/metrics/layer_metric.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/metrics/metric.cpp b/src/metrics/metric.cpp
index 8a103497d7b..80c75d5df8f 100644
--- a/src/metrics/metric.cpp
+++ b/src/metrics/metric.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/models/directed_acyclic_graph.cpp b/src/models/directed_acyclic_graph.cpp
index 79984549e49..cf0b8d623d9 100644
--- a/src/models/directed_acyclic_graph.cpp
+++ b/src/models/directed_acyclic_graph.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -30,7 +30,7 @@
 namespace lbann {
 
 directed_acyclic_graph_model::directed_acyclic_graph_model(lbann_comm *comm,
-                                                           int mini_batch_size,
+                                                           El::Int mini_batch_size,
                                                            objective_function *obj_fn,
                                                            optimizer* default_optimizer)
   : model(comm, mini_batch_size, obj_fn, default_optimizer) {}
@@ -39,16 +39,17 @@ void directed_acyclic_graph_model::setup_layer_execution_order() {
 
   // Construct layer graph
   // Note: Each layer depends on its parent layers and its hint layer.
-  std::set<int> nodes;
-  std::map<int,std::set<int>> edges;
-  const int num_layers = m_layers.size();
-  std::unordered_map<const Layer *,int> layer_indices;
-  for (int node = 0; node < num_layers; ++node) {
+  const auto& layers = this->get_layers();
+  const El::Int num_layers = layers.size();
+  std::set<El::Int> nodes;
+  std::map<El::Int,std::set<El::Int>> edges;
+  std::unordered_map<const Layer*,El::Int> layer_indices;
+  for (El::Int node = 0; node < num_layers; ++node) {
     nodes.insert(node);
-    layer_indices[m_layers[node]] = node;
+    layer_indices[layers[node]] = node;
   }
-  for (int node = 0; node < num_layers; ++node) {
-    const auto& l = m_layers[node];
+  for (El::Int node = 0; node < num_layers; ++node) {
+    const auto& l = layers[node];
     for (const auto& child : l->get_child_layers()) {
       edges[node].insert(layer_indices[child]);
     }
@@ -59,9 +60,9 @@ void directed_acyclic_graph_model::setup_layer_execution_order() {
 
   // Topologically sort layers
   const auto& sorted_order = graph::topological_sort(nodes, edges);
-  permute_layers(sorted_order);
+  reorder_layers(sorted_order);
   model::setup_layer_execution_order();
 
 }
 
-}  // namespace lbann
+} // namespace lbann
diff --git a/src/models/model.cpp b/src/models/model.cpp
index 6f94c291b33..7d8f163d920 100644
--- a/src/models/model.cpp
+++ b/src/models/model.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -37,6 +37,7 @@
 #include "lbann/utils/random.hpp"
 #include "lbann/utils/omp_diagnostics.hpp"
 #include "lbann/utils/description.hpp"
+#include "lbann/data_store/data_store_conduit.hpp"
 #include <string>
 #include <unistd.h>
 #include <iomanip>
@@ -48,42 +49,20 @@
 
 namespace lbann {
 
-namespace {
+// =============================================
+// Life cycle functions
+// =============================================
 
-bool layer_has_name(std::string name, const std::vector<Layer*>& layers) {
-  for (const auto& l : layers) {
-    if (l->get_name() == name) {
-      return true;
-    }
-  }
-  return false;
-}
-
-} // namespace
-
-////////////////////////////////////////////////////////////
-// Constructors and destructor
-////////////////////////////////////////////////////////////
-
-model::model(lbann_comm *comm,
-             int mini_batch_size,
-             objective_function *obj_fn,
+model::model(lbann_comm* comm,
+             El::Int mini_batch_size,
+             objective_function* obj_fn,
              optimizer* default_optimizer)
-  : m_objective_function(obj_fn),
-    m_execution_mode(execution_mode::training),
-    m_terminate_training(false),
-    m_current_epoch(0),
-    m_current_step(0),
-    m_current_validation_step(0),
-    m_current_testing_step(0),
-    m_max_mini_batch_size(mini_batch_size),
+  : m_comm(comm),
     m_current_mini_batch_size(mini_batch_size),
+    m_max_mini_batch_size(mini_batch_size),
     m_effective_mini_batch_size(mini_batch_size),
-    m_current_phase(0),
-    m_comm(comm),
     m_default_optimizer(default_optimizer),
-    m_io_thread_pool(),
-    m_background_io_allowed(true) {
+    m_objective_function(obj_fn) {
 
   // Default model name
   static El::Int num_models = 0;
@@ -93,47 +72,53 @@ model::model(lbann_comm *comm,
 }
 
 model::model(const model& other) :
+  m_comm(other.m_comm),
+  m_name(other.m_name),
   m_execution_mode(other.m_execution_mode),
+  m_epoch(other.m_epoch),
+  m_step(other.m_step),
   m_terminate_training(other.m_terminate_training),
-  m_current_epoch(other.m_current_epoch),
-  m_current_step(other.m_current_step),
-  m_current_validation_step(other.m_current_validation_step),
-  m_current_testing_step(other.m_current_testing_step),
-  m_max_mini_batch_size(other.m_max_mini_batch_size),
   m_current_mini_batch_size(other.m_current_mini_batch_size),
+  m_max_mini_batch_size(other.m_max_mini_batch_size),
   m_effective_mini_batch_size(other.m_effective_mini_batch_size),
-  m_current_phase(other.m_current_phase),
-  m_comm(other.m_comm),
   m_background_io_allowed(other.m_background_io_allowed) {
 
   // Deep copies
-  m_objective_function = other.m_objective_function;
-  m_metrics            = other.m_metrics;
-  m_callbacks          = other.m_callbacks;
-  m_layers             = other.m_layers;
-  m_weights            = other.m_weights;
-  if (m_objective_function != nullptr) {
-    m_objective_function = m_objective_function->copy();
-  }
+  m_default_optimizer = (other.m_default_optimizer ?
+                         other.m_default_optimizer->copy() : nullptr);
+  m_objective_function = (other.m_objective_function ?
+                          other.m_objective_function->copy() : nullptr);
+  m_metrics = other.m_metrics;
+  m_callbacks = other.m_callbacks;
   for (auto& m : m_metrics) {
     m = m->copy();
   }
   for (auto& cb : m_callbacks) {
     cb = cb->copy();
   }
-  std::unordered_map<Layer *,Layer *> layer_map;
-  for (auto& l : m_layers) {
-    auto&& l_copy = l->copy();
-    layer_map[l] = l_copy;
-    l_copy->set_model(this);
-    l = l_copy;
+
+  // Copy layers
+  std::unordered_map<Layer*,Layer*> layer_map;
+  m_layers.reserve(other.m_layers.size());
+  for (const auto& ptr : other.m_layers) {
+    if (ptr == nullptr) { LBANN_ERROR("unexpected null pointer"); }
+    auto* old_layer = ptr.get();
+    auto* new_layer = old_layer->copy();
+    new_layer->set_model(this);
+    m_layers.emplace_back(new_layer);
+    layer_map[old_layer] = new_layer;
   }
-  std::unordered_map<weights *,weights *> weights_map;
+
+  // Copy weights
+  m_weights = other.m_weights;
+  std::unordered_map<weights*,weights*> weights_map;
   for (auto& w : m_weights) {
     auto&& w_copy = w->copy();
     weights_map[w] = w_copy;
     w = w_copy;
   }
+
+  // Fix pointers
   remap_pointers(layer_map, weights_map);
 
 }
@@ -144,28 +129,24 @@ model& model::operator=(const model& other) {
   if (m_objective_function != nullptr) { delete m_objective_function; }
   for (const auto& m : m_metrics)      { delete m; }
   for (const auto& cb : m_callbacks)   { delete cb; }
-  for (const auto& l : m_layers)       { delete l; }
   for (const auto& w : m_weights)      { delete w; }
 
   // Shallow copies
+  m_comm = other.m_comm;
+  m_name = other.m_name;
   m_execution_mode = other.m_execution_mode;
+  m_epoch = other.m_epoch;
+  m_step = other.m_step;
   m_terminate_training = other.m_terminate_training;
-  m_current_epoch = other.m_current_epoch;
-  m_current_step = other.m_current_step;
-  m_current_validation_step = other.m_current_validation_step;
-  m_current_testing_step = other.m_current_testing_step;
-  m_max_mini_batch_size = other.m_max_mini_batch_size;
   m_current_mini_batch_size = other.m_current_mini_batch_size;
+  m_max_mini_batch_size = other.m_max_mini_batch_size;
   m_effective_mini_batch_size = other.m_effective_mini_batch_size;
-  m_current_phase = other.m_current_phase;
-  m_comm = other.m_comm;
   m_background_io_allowed = other.m_background_io_allowed;
 
   // Deep copies
   m_objective_function = other.m_objective_function;
   m_metrics            = other.m_metrics;
   m_callbacks          = other.m_callbacks;
-  m_layers             = other.m_layers;
   m_weights            = other.m_weights;
   if (m_objective_function != nullptr) {
     m_objective_function = m_objective_function->copy();
@@ -176,12 +157,18 @@ model& model::operator=(const model& other) {
   for (auto& cb : m_callbacks) {
     cb = cb->copy();
   }
-  std::unordered_map<Layer *,Layer *> layer_map;
-  for (auto& l : m_layers) {
-    l = layer_map[l] = l->copy();
-    l->set_model(this);
-  }
-  std::unordered_map<weights *,weights *> weights_map;
+  std::unordered_map<Layer*,Layer*> layer_map;
+  m_layers.clear();
+  m_layers.reserve(other.m_layers.size());
+  for (const auto& ptr : other.m_layers) {
+    if (ptr == nullptr) { LBANN_ERROR("unexpected null pointer"); }
+    auto* old_layer = ptr.get();
+    auto* new_layer = old_layer->copy();
+    new_layer->set_model(this);
+    m_layers.emplace_back(new_layer);
+    layer_map[old_layer] = new_layer;
+  }
+  std::unordered_map<weights*,weights*> weights_map;
   for (auto& w : m_weights) {
     w = weights_map[w] = w->copy();
   }
@@ -191,19 +178,190 @@ model& model::operator=(const model& other) {
 }
 
 model::~model() {
-  if (m_objective_function)           { delete m_objective_function; }
-  if (m_default_optimizer != nullptr) { delete m_default_optimizer; }
-  for (const auto& l : m_layers)      { delete l; }
-  for (const auto& w : m_weights)     { delete w; }
-  for (const auto& m : m_metrics)     { delete m; }
-  for (const auto& cb : m_callbacks)  { delete cb; }
+  if (m_objective_function != nullptr) { delete m_objective_function; }
+  if (m_default_optimizer != nullptr)  { delete m_default_optimizer; }
+  for (const auto& w : m_weights)      { delete w; }
+  for (const auto& m : m_metrics)      { delete m; }
+  for (const auto& cb : m_callbacks)   { delete cb; }
+}
+
+// =============================================
+// Access functions
+// =============================================
+
+void model::set_name(std::string name) {
+  if (name.empty()) {
+    std::ostringstream err;
+    err << "attempted to rename model \"" << get_name() << "\" "
+        << "with empty string";
+    LBANN_ERROR(err.str());
+  }
+  m_name = std::move(name);
+}
+
+description model::get_description() const {
+
+  // Construct description object
+  description desc(get_name());
+  desc.add("Type", get_type());
+
+  // Layer topology
+  description layer_topology_desc("Layer topology:");
+  for (El::Int k = 0; k < get_num_layers(); ++k) {
+    const auto& l = get_layer(k);
+    std::stringstream ss;
+    ss << l.get_name() << " (" << l.get_type() << "): {";
+    const auto& parents = l.get_parent_layers();
+    const auto& children = l.get_child_layers();
+    for (size_t i = 0; i < parents.size(); ++i) {
+      ss << (i > 0 ? ", " : "");
+      if (parents[i] == nullptr) {
+        ss << "unknown layer";
+      } else {
+        ss << parents[i]->get_name() << " (";
+        const auto& dims = l.get_input_dims(i);
+        for (size_t j = 0; j < dims.size(); ++j) {
+          ss << (j > 0 ? "x" : "") << dims[j];
+        }
+        ss << ")";
+      }
+    }
+    ss << "} -> {";
+    for (size_t i = 0; i < children.size(); ++i) {
+      ss << (i > 0 ? ", " : "");
+      if (children[i] == nullptr) {
+        ss << "unknown layer";
+      } else {
+        ss << children[i]->get_name() << " (";
+        const auto& dims = l.get_output_dims(i);
+        for (size_t j = 0; j < dims.size(); ++j) {
+          ss << (j > 0 ? "x" : "") << dims[j];
+        }
+        ss << ")";
+      }
+    }
+    ss << "}";
+    layer_topology_desc.add(ss.str());
+  }
+  desc.add(std::string{});
+  desc.add(layer_topology_desc);
+
+  // Layer details
+  description layer_details_desc("Layer details:");
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    layer_details_desc.add(get_layer(i).get_description());
+  }
+  desc.add(std::string{});
+  desc.add(layer_details_desc);
+
+  // Weights
+  description weights_desc("Weights:");
+  for (const auto* w : m_weights) {
+    if (w == nullptr) {
+      weights_desc.add("unknown weights");
+    } else {
+      weights_desc.add(w->get_description());
+    }
+  }
+  desc.add(std::string{});
+  desc.add(weights_desc);
+
+  /// @todo Descriptions for objective function, metrics, callbacks
+
+  // Result
+  return desc;
+
+}
+
+El::Int model::get_num_layers() const noexcept {
+  return m_layers.size();
+}
+Layer& model::get_layer(El::Int pos) {
+  // Item 3, p. 23 in "Effective C++", 3rd ed., by Scott Meyers
+  return const_cast<Layer&>(static_cast<const model&>(*this).get_layer(pos));
+}
+const Layer& model::get_layer(El::Int pos) const {
+  std::stringstream err;
+  if (pos < 0 || pos >= get_num_layers()) {
+    err << "could not access layer in model \"" << get_name() << "\" "
+        << "(requested index " << pos << ", "
+        << "but there are " << get_num_layers() << " layers)";
+    LBANN_ERROR(err.str());
+  } else if (m_layers[pos] == nullptr) {
+    err << "model \"" << get_name() << "\" "
+        << "has a null pointer in its layer list";
+    LBANN_ERROR(err.str());
+  }
+  return *m_layers[pos];
+}
+std::vector<Layer*> model::get_layers() {
+  std::vector<Layer*> layer_list;
+  layer_list.reserve(m_layers.size());
+  for (const auto& ptr : m_layers) {
+    layer_list.push_back(ptr.get());
+  }
+  return layer_list;
+}
+const std::vector<Layer*> model::get_layers() const {
+  std::vector<Layer*> layer_list;
+  layer_list.reserve(m_layers.size());
+  for (const auto& ptr : m_layers) {
+    layer_list.push_back(ptr.get());
+  }
+  return layer_list;
 }
 
-////////////////////////////////////////////////////////////
+std::vector<weights*> model::get_weights() {
+  std::vector<weights*> weights_list;
+  for (const auto& w : m_weights) {
+    weights_list.push_back(w);
+  }
+  return weights_list;
+}
+
+const std::vector<weights*> model::get_weights() const {
+  std::vector<weights*> weights_list;
+  for (const auto& w : m_weights) {
+    weights_list.push_back(w);
+  }
+  return weights_list;
+}
+
+void model::set_execution_mode(execution_mode mode) {
+  m_execution_mode = mode;
+}
+
+execution_mode model::get_execution_mode() const noexcept {
+  return m_execution_mode;
+}
+
+El::Int model::get_step() const noexcept {
+  return get_step(get_execution_mode());
+}
+
+El::Int model::get_step(execution_mode mode) const noexcept {
+  if (m_step.count(mode) > 0) {
+    return m_step.at(mode);
+  } else {
+    return 0;
+  }
+}
+
+int model::get_num_iterations_per_epoch(execution_mode mode) const {
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    const auto* input = dynamic_cast<const generic_input_layer*>(&get_layer(i));
+    if (input != nullptr) {
+      return input->get_num_iterations_per_epoch(mode);
+    }
+  }
+  return 0;
+}
+
+// =============================================
 // Model specification
-////////////////////////////////////////////////////////////
+// =============================================
 
-void model::add_layer(Layer* l) {
+void model::add_layer(std::unique_ptr<Layer> l) {
   std::stringstream err;
 
   // Check for null pointer
@@ -218,15 +376,19 @@ void model::add_layer(Layer* l) {
   // bottleneck. If it is, consider maintaining a hash table
   // containing all layer names (and properly updating it during
   // copies and pointer remaps).
-  if (layer_has_name(l->get_name(), m_layers)) {
-    err << "attempted to add layer \"" << l->get_name() << "\" to "
-        << "model \"" << get_name() << "\", "
-        << "but the model already contains a layer with that name";
-    LBANN_ERROR(err.str());
+  const auto& name = l->get_name();
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    if (get_layer(i).get_name() == name) {
+      err << "attempted to add layer \"" << name << "\" to "
+          << "model \"" << get_name() << "\", "
+          << "but the model already contains a layer with that name";
+      LBANN_ERROR(err.str());
+    }
   }
 
   // Add layer to model
-  m_layers.push_back(l);
+  m_layers.emplace_back(std::move(l));
+  m_layers.back()->set_model(this);
 
 }
 
@@ -274,41 +436,6 @@ void model::add_metric(metric *m) {
   m_metrics.push_back(m);
 }
 
-void model::set_name(std::string name) {
-  m_name = name;
-}
-
-void model::set_layers(std::vector<Layer*>& layers) {
-
-  // Delete old layers
-  for (const auto& layer : m_layers) {
-    delete layer;
-  }
-  m_layers.clear();
-
-  // Add new layers
-  for (const auto& layer : layers) {
-    add_layer(layer);
-  }
-
-}
-
-std::vector<weights*> model::get_weights() {
-  std::vector<weights*> weights_list;
-  for (const auto& w : m_weights) {
-    weights_list.push_back(w);
-  }
-  return weights_list;
-}
-
-const std::vector<weights*> model::get_weights() const {
-  std::vector<weights*> weights_list;
-  for (const auto& w : m_weights) {
-    weights_list.push_back(w);
-  }
-  return weights_list;
-}
-
 void model::replace_weights(std::vector<weights*>& new_weights) {
 
   // Check that number of weights is valid
@@ -323,8 +450,8 @@ void model::replace_weights(std::vector<weights*>& new_weights) {
   // Replace weights in list
   std::vector<weights *> old_weights(m_weights.begin(),
                                      m_weights.begin() + new_weights.size());
-  std::unordered_map<weights *,weights *> weights_map;
-  std::unordered_map<Layer *,Layer *> layer_map;
+  std::unordered_map<weights*,weights*> weights_map;
+  std::unordered_map<Layer*,Layer*> layer_map;
   for (size_t i = 0; i < new_weights.size(); ++i) {
     m_weights[i] = weights_map[old_weights[i]] = new_weights[i];
   }
@@ -364,107 +491,50 @@ optimizer* model::create_optimizer() const {
 }
 
 bool model::is_execution_mode_valid(execution_mode mode) const {
-  for (const auto& layer : m_layers) {
-    const auto *input = dynamic_cast<const generic_input_layer*>(layer);
-    if (input != nullptr
-        && !input->is_execution_mode_valid(mode)) {
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    const auto* input = dynamic_cast<const generic_input_layer*>(&get_layer(i));
+    if (input != nullptr && !input->is_execution_mode_valid(mode)) {
       return false;
     }
   }
   return true;
 }
 
-void model::permute_layers(const std::vector<int>& permutation) {
-  std::vector<Layer*> reordered_layers(permutation.size());
-  for (size_t i = 0; i < permutation.size(); ++i) {
-    reordered_layers[i] = m_layers[permutation[i]];
-  }
-  m_layers = std::move(reordered_layers);
-}
-
-description model::get_description() const {
-
-  // Construct description object
-  description desc(get_name());
-  desc.add("Type", get_type());
+void model::reorder_layers(const std::vector<El::Int>& gather_indices) {
+  std::stringstream err;
 
-  // Layer topology
-  description layer_topology_desc("Layer topology:");
-  for (const auto* l : m_layers) {
-    std::stringstream ss;
-    if (l == nullptr) {
-      ss << "unknown layer: {} -> {}";
-    } else {
-      ss << l->get_name() << " (" << l->get_type() << "): {";
-      const auto& parents = l->get_parent_layers();
-      const auto& children = l->get_child_layers();
-      for (size_t i = 0; i < parents.size(); ++i) {
-        ss << (i > 0 ? ", " : "");
-        if (parents[i] == nullptr) {
-          ss << "unknown layer";
-        } else {
-          ss << parents[i]->get_name() << " (";
-          const auto& dims = l->get_input_dims(i);
-          for (size_t j = 0; j < dims.size(); ++j) {
-            ss << (j > 0 ? "x" : "") << dims[j];
-          }
-          ss << ")";
-        }
-      }
-      ss << "} -> {";
-      for (size_t i = 0; i < children.size(); ++i) {
-        ss << (i > 0 ? ", " : "");
-        if (children[i] == nullptr) {
-          ss << "unknown layer";
-        } else {
-          ss << children[i]->get_name() << " (";
-          const auto& dims = l->get_output_dims(i);
-          for (size_t j = 0; j < dims.size(); ++j) {
-            ss << (j > 0 ? "x" : "") << dims[j];
-          }
-          ss << ")";
-        }
-      }
-      ss << "}";
-    }
-    layer_topology_desc.add(ss.str());
+  // Check that gather indices are in valid range
+  const auto& num_layers = get_num_layers();
+  if (std::any_of(gather_indices.begin(), gather_indices.end(),
+                  [num_layers](El::Int index) {
+                    return index < 0 || index >= num_layers;
+                  })) {
+    err << "attempted to reorder layer list for "
+        << "model \"" << get_name() << "\" "
+        << "with invalid gather index";
+    LBANN_ERROR(err.str());
   }
-  desc.add(std::string{});
-  desc.add(layer_topology_desc);
 
-  // Layer details
-  description layer_details_desc("Layer details:");
-  for (const auto* l : m_layers) {
-    if (l == nullptr) {
-      layer_details_desc.add("unknown layer");
-    } else {
-      layer_details_desc.add(l->get_description());
-    }
+  // Reorder layers
+  std::vector<std::unique_ptr<Layer>> reordered_layers(gather_indices.size());
+  for (size_t i = 0; i < gather_indices.size(); ++i) {
+    reordered_layers[i] = std::move(m_layers[gather_indices[i]]);
   }
-  desc.add(std::string{});
-  desc.add(layer_details_desc);
+  m_layers = std::move(reordered_layers);
 
-  // Weights
-  description weights_desc("Weights:");
-  for (const auto* w : m_weights) {
-    if (w == nullptr) {
-      weights_desc.add("unknown weights");
-    } else {
-      weights_desc.add(w->get_description());
+  // Check that layer list has no null pointers
+  for (const auto& l : m_layers) {
+    if (l == nullptr) {
+      err << "found a null pointer in the layer list for "
+          << "model \"" << get_name() << "\" after reordering";
+      LBANN_ERROR(err.str());
     }
   }
-  desc.add(std::string{});
-  desc.add(weights_desc);
-
-  /// @todo Descriptions for objective function, metrics, callbacks
-
-  // Result
-  return desc;
 
 }
 
-void model::remap_pointers(const std::unordered_map<Layer *,Layer *>& layer_map,
-                           const std::unordered_map<weights *,weights *>& weights_map) {
+void model::remap_pointers(const std::unordered_map<Layer*,Layer*>& layer_map,
+                           const std::unordered_map<weights*,weights*>& weights_map) {
 
   // Fix pointers in objective function
   if (m_objective_function != nullptr) {
@@ -496,51 +566,34 @@ void model::remap_pointers(const std::unordered_map<Layer *,Layer *>& layer_map,
   }
 
   // Fix pointers in layers
-  for (const auto& l : m_layers) {
-    auto layer_pointers = l->get_layer_pointers();
-    for (auto& layer_pointer : layer_pointers) {
-      if (layer_map.count(layer_pointer) > 0) {
-        layer_pointer = layer_map.at(layer_pointer);
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    auto& l = get_layer(i);
+    auto layer_pointers = l.get_layer_pointers();
+    auto weights_pointers = l.get_weights();
+    for (auto& ptr : layer_pointers) {
+      if (layer_map.count(ptr) > 0) {
+        ptr = layer_map.at(ptr);
       }
     }
-    l->set_layer_pointers(layer_pointers);
-    auto weights_pointers = l->get_weights();
-    for (auto& weights_pointer : weights_pointers) {
-      if (weights_map.count(weights_pointer) > 0) {
-        weights_pointer = weights_map.at(weights_pointer);
+    for (auto& ptr : weights_pointers) {
+      if (weights_map.count(ptr) > 0) {
+        ptr = weights_map.at(ptr);
       }
     }
-    l->set_weights(weights_pointers);
+    l.set_layer_pointers(layer_pointers);
+    l.set_weights(weights_pointers);
   }
 
 }
 
-void model::freeze_layers_under_frozen_surface() {
-  bool freezing = false;
-  for (size_t i = m_layers.size(); i-- > 0u; ) {
-    auto& l = m_layers[i];
-    if (dynamic_cast<io_layer*>(l) != nullptr) {
-      if (l->is_frozen()) {
-        throw lbann_exception("Frozen io_layer!");
-      }
-      continue;
-    }
-    if (!freezing) {
-      freezing = l->is_frozen();
-    } else {
-      l->freeze();
-    }
-  }
-}
-
-////////////////////////////////////////////////////////////
+// =============================================
 // Setup
-////////////////////////////////////////////////////////////
+// =============================================
 
 void model::setup(std::shared_ptr<thread_pool> io_thread_pool) {
   // Setup I/O threads - set up before setting up the layers (input
   // layer depends on having a properly initialized thread pool)
-  m_io_thread_pool = io_thread_pool;
+  m_io_thread_pool = std::move(io_thread_pool);
 
   // Setup layers
   setup_layer_topology();
@@ -565,81 +618,101 @@ void model::setup(std::shared_ptr<thread_pool> io_thread_pool) {
 }
 
 void model::setup_layer_topology() {
+  std::stringstream err;
 
-  // Search layer graph and add all connected layers
-  add_connected_layers();
+  // Check that layer list is valid
+  // Note: Throws an exception if the layer list contains two layers
+  // with the same name or if a layer has a pointer to a layer in a
+  // different model.
+  std::unordered_set<Layer*> layer_set;
+  std::unordered_set<std::string> layer_names;
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    auto& l = get_layer(i);
+    if (layer_names.count(l.get_name()) > 0) {
+      err << "model \"" << get_name() << "\" "
+          << "has multiple layers named \"" << l.get_name() << "\"";
+      LBANN_ERROR(err.str());
+    }
+    layer_set.insert(&l);
+    layer_names.insert(l.get_name());
+  }
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    auto& l = get_layer(i);
+    for (const auto& ptr : l.get_layer_pointers()) {
+      if (ptr != nullptr && layer_set.count(ptr) == 0) {
+        err << "layer \"" << l.get_name() << "\" "
+            << "(in model \"" << get_name() << "\") "
+            << "has a pointer to layer " << ptr->get_name() << "\" ";
+        if (ptr->get_model() == nullptr) {
+          err << "(not in a model)";
+        } else {
+          err << "(in model \"" << ptr->get_model()->get_name() << "\")";
+        }
+        LBANN_ERROR(err.str());
+      }
+    }
+  }
 
   // Make sure parent/child relationships are reciprocated
-  for (const auto& layer : m_layers) {
-    for (const auto& parent : layer->get_parent_layers()) {
-      const_cast<Layer *>(parent)->add_child_layer(layer);
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    auto& l = get_layer(i);
+    for (auto* parent : l.get_parent_layers()) {
+      const_cast<Layer*>(parent)->add_child_layer(&l);
     }
-    for (const auto& child : layer->get_child_layers()) {
-      const_cast<Layer *>(child)->add_parent_layer(layer);
+    for (auto* child : l.get_child_layers()) {
+      const_cast<Layer*>(child)->add_parent_layer(&l);
     }
   }
 
   // Add utility layers
-  add_evaluation_layers();
-  add_dummy_layers();
-  add_split_layers();
-
-  // Check that layer names are unique
-  std::unordered_set<std::string> names;
-  for (const auto& l : m_layers) {
-    if (names.count(l->get_name()) > 0) {
-      std::stringstream err;
-      err << "model \"" << get_name() << "\" "
-          << "has multiple layers named \"" << l->get_name() << "\"";
-      LBANN_ERROR(err.str());
-    }
-    names.insert(l->get_name());
-  }
+  add_evaluation_layers(layer_set, layer_names);
+  add_dummy_layers(layer_names);
+  add_split_layers(layer_names);
 
 }
 
 void model::setup_layer_execution_order() {
 
   // Find input layers
-  const int num_layers = m_layers.size();
-  std::vector<int> input_layers, other_layers;
-  for (int i = 0; i < num_layers; ++i) {
-    if (dynamic_cast<generic_input_layer*>(m_layers[i]) != nullptr) {
+  std::vector<El::Int> input_layers, other_layers;
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    if (dynamic_cast<generic_input_layer*>(&get_layer(i)) != nullptr) {
       input_layers.push_back(i);
     } else {
       other_layers.push_back(i);
     }
   }
 
-  // Permute layers so input layers are executed first
-  std::vector<int> permutation;
-  permutation.insert(permutation.end(),
-                     input_layers.begin(), input_layers.end());
-  permutation.insert(permutation.end(),
-                     other_layers.begin(), other_layers.end());
-  permute_layers(permutation);
+  // Reorder layers so input layers are executed first
+  std::vector<El::Int> gather_indices;
+  gather_indices.insert(gather_indices.end(),
+                        input_layers.begin(), input_layers.end());
+  gather_indices.insert(gather_indices.end(),
+                        other_layers.begin(), other_layers.end());
+  reorder_layers(gather_indices);
 
 }
 
 void model::setup_layers() {
-  for (const auto& layer : m_layers) {
-    layer->set_model(this);
-    layer->setup();
-    layer->check_setup();
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    auto& l = get_layer(i);
+    l.set_model(this);
+    l.setup();
+    l.check_setup();
   }
 }
 
 void model::setup_weights() {
 
   // List of used and unused weights
-  std::unordered_set<weights *> weights_set(m_weights.begin(),
-                                            m_weights.end());
-  std::set<weights *> unused_weights(m_weights.begin(),
-                                     m_weights.end());
+  std::unordered_set<weights*> weights_set(m_weights.begin(),
+                                           m_weights.end());
+  std::set<weights*> unused_weights(m_weights.begin(),
+                                    m_weights.end());
 
   // Find weights used by layers
-  for (const auto* l : m_layers) {
-    for (const auto& w : l->get_weights()) {
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    for (const auto& w : get_layer(i).get_weights()) {
       if (weights_set.count(w) == 0) {
         m_weights.push_back(w);
         weights_set.insert(w);
@@ -668,82 +741,48 @@ void model::setup_weights() {
 
 }
 
-void model::add_connected_layers() {
-
-  // Initialize breadth-first search queue with layer list
-  std::queue<const Layer*> layer_queue;
-  std::unordered_set<const Layer*> layer_set;
-  for (const auto& layer : m_layers) {
-    layer_queue.push(layer);
-    layer_set.insert(layer);
-  }
-
-  // Visit nodes in search queue until it is exhausted
-  while (!layer_queue.empty()) {
-    const Layer *layer = layer_queue.front();
-    layer_queue.pop();
-
-    // Find neighbors of current node
-    std::vector<const Layer*> relatives;
-    for (const auto& parent : layer->get_parent_layers()) {
-      relatives.push_back(parent);
-    }
-    for (const auto& child : layer->get_child_layers()) {
-      relatives.push_back(child);
-    }
-
-    // Add neighbors to search queue if they aren't in the layer list
-    for (const auto& relative : relatives) {
-      if (layer_set.count(relative) == 0) {
-        add_layer(const_cast<Layer*>(relative));
-        layer_queue.push(relative);
-        layer_set.insert(relative);
-      }
-    }
-
-  }
-
-}
-
-void model::add_evaluation_layers() {
+void model::add_evaluation_layers(std::unordered_set<Layer*>& layer_set,
+                                  std::unordered_set<std::string>& layer_names) {
+  std::stringstream err;
 
   // Add evaluation layers corresponding to objective function layer terms
   for (auto* t : m_objective_function->get_terms()) {
     auto* term = dynamic_cast<layer_term*>(t);
     if (term != nullptr) {
-      auto* l = &term->get_layer();
-      const size_t pos = (std::find(m_layers.begin(), m_layers.end(), l)
-                          - m_layers.begin());
-      if (pos >= m_layers.size()) {
-        std::stringstream err;
-        err << "an objective function layer term in "
-            << "model \"" << get_name() << "\" corresponds to "
-            << "layer \"" << l->get_name() << "\", "
-            << "which isn't in the model";
+      auto& l = term->get_layer();
+      if (layer_set.count(&l) == 0) {
+        err << "model \"" << get_name() << "\" "
+            << "has an objective function layer term corresponding to "
+            << "layer \"" << l.get_name() << "\", "
+            << "which isn't in the model's list of layers";
         LBANN_ERROR(err.str());
       }
-      if (dynamic_cast<abstract_evaluation_layer*>(l) == nullptr) {
+      if (dynamic_cast<abstract_evaluation_layer*>(&l) == nullptr) {
 
         // Create evaluation layer
-        auto* eval = abstract_evaluation_layer::construct(
-                       l->get_comm(),
-                       l->get_data_layout(),
-                       l->get_device_allocation());
+        std::unique_ptr<Layer> eval(abstract_evaluation_layer::construct(
+                                      l.get_comm(),
+                                      l.get_data_layout(),
+                                      l.get_device_allocation()));
 
         // Set evaluation layer name
         El::Int name_index = 1;
-        std::string name = l->get_name() + "_eval";
-        while (layer_has_name(name, m_layers)) {
+        std::string name = l.get_name() + "_eval";
+        while (layer_names.count(name) > 0) {
           name_index++;
-          name = l->get_name() + "_eval" + std::to_string(name_index);
+          name = l.get_name() + "_eval" + std::to_string(name_index);
         }
         eval->set_name(name);
 
+        // Update workspace objects
+        layer_set.insert(eval.get());
+        layer_names.insert(eval->get_name());
+
         // Add evaluation layer to model
-        l->add_child_layer(eval);
-        eval->add_parent_layer(l);
+        l.add_child_layer(eval.get());
+        eval->add_parent_layer(&l);
         term->set_layer(*eval);
-        add_layer(eval);
+        add_layer(std::move(eval));
 
       }
     }
@@ -753,38 +792,39 @@ void model::add_evaluation_layers() {
   for (auto* m : m_metrics) {
     auto* met = dynamic_cast<layer_metric*>(m);
     if (met != nullptr) {
-      auto* l = &met->get_layer();
-      const size_t pos = (std::find(m_layers.begin(), m_layers.end(), l)
-                          - m_layers.begin());
-      if (pos >= m_layers.size()) {
-        std::stringstream err;
+      auto& l = met->get_layer();
+      if (layer_set.count(&l) == 0) {
         err << "layer metric \"" << met->name() << "\" "
-            << "corresponds to layer \"" << l->get_name() << "\", "
+            << "corresponds to layer \"" << l.get_name() << "\", "
             << "which is not in model \"" << get_name() << "\"";
         LBANN_ERROR(err.str());
       }
-      if (dynamic_cast<abstract_evaluation_layer*>(l) == nullptr) {
+      if (dynamic_cast<abstract_evaluation_layer*>(&l) == nullptr) {
 
         // Create evaluation layer
-        auto* eval = abstract_evaluation_layer::construct(
-                       l->get_comm(),
-                       l->get_data_layout(),
-                       l->get_device_allocation());
+        std::unique_ptr<Layer> eval(abstract_evaluation_layer::construct(
+                                      l.get_comm(),
+                                      l.get_data_layout(),
+                                      l.get_device_allocation()));
 
         // Set evaluation layer name
         El::Int name_index = 1;
-        std::string name = l->get_name() + "_eval";
-        while (layer_has_name(name, m_layers)) {
+        std::string name = l.get_name() + "_eval";
+        while (layer_names.count(name) > 0) {
           name_index++;
-          name = l->get_name() + "_eval" + std::to_string(name_index);
+          name = l.get_name() + "_eval" + std::to_string(name_index);
         }
         eval->set_name(name);
 
+        // Update workspace objects
+        layer_set.insert(eval.get());
+        layer_names.insert(eval->get_name());
+
         // Add evaluation layer to model
-        l->add_child_layer(eval);
-        eval->add_parent_layer(l);
+        l.add_child_layer(eval.get());
+        eval->add_parent_layer(&l);
         met->set_layer(*eval);
-        add_layer(eval);
+        add_layer(std::move(eval));
 
       }
     }
@@ -792,101 +832,99 @@ void model::add_evaluation_layers() {
 
 }
 
-void model::add_dummy_layers() {
-
-  // Add dummy layers until all layers have enough children
-  for (size_t i = 0; i < m_layers.size(); ++i) {
-    auto layer = m_layers[i];
-    while (layer->get_num_children() < layer->get_expected_num_child_layers()) {
+void model::add_dummy_layers(std::unordered_set<std::string>& layer_names) {
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    auto& l = get_layer(i);
+    while (l.get_num_children() < l.get_expected_num_child_layers()) {
 
       // Create dummy layer
-      Layer *dummy = nullptr;
+      std::unique_ptr<Layer> dummy;
       using args_tuple = std::tuple<data_layout,El::Device>;
-      args_tuple args(layer->get_data_layout(), layer->get_device_allocation());
+      args_tuple args(l.get_data_layout(), l.get_device_allocation());
       if (args == args_tuple(data_layout::DATA_PARALLEL, El::Device::CPU)) {
-        dummy = new dummy_layer<data_layout::DATA_PARALLEL, El::Device::CPU>(m_comm);
+        dummy.reset(new dummy_layer<data_layout::DATA_PARALLEL, El::Device::CPU>(m_comm));
       }
       if (args == args_tuple(data_layout::MODEL_PARALLEL, El::Device::CPU)) {
-        dummy = new dummy_layer<data_layout::MODEL_PARALLEL, El::Device::CPU>(m_comm);
+        dummy.reset(new dummy_layer<data_layout::MODEL_PARALLEL, El::Device::CPU>(m_comm));
       }
 #ifdef LBANN_HAS_GPU
       if (args == args_tuple(data_layout::DATA_PARALLEL, El::Device::GPU)) {
-        dummy = new dummy_layer<data_layout::DATA_PARALLEL, El::Device::GPU>(m_comm);
+        dummy.reset(new dummy_layer<data_layout::DATA_PARALLEL, El::Device::GPU>(m_comm));
       }
       if (args == args_tuple(data_layout::MODEL_PARALLEL, El::Device::GPU)) {
-        dummy = new dummy_layer<data_layout::MODEL_PARALLEL, El::Device::GPU>(m_comm);
+        dummy.reset(new dummy_layer<data_layout::MODEL_PARALLEL, El::Device::GPU>(m_comm));
       }
 #endif // LBANN_HAS_GPU
       if (dummy == nullptr) {
         std::stringstream err;
         err << "could not construct dummy layer corresponding to "
-            << "layer \"" << layer->get_name() << "\" "
+            << "layer \"" << l.get_name() << "\" "
             << "in model \"" << get_name() << "\"";
         LBANN_ERROR(err.str());
       }
 
       // Set dummy layer name
       El::Int name_index = 1;
-      std::string name = layer->get_name() + "_dummy";
-      while (layer_has_name(name, m_layers)) {
+      std::string name = l.get_name() + "_dummy";
+      while (layer_names.count(name) > 0) {
         name_index++;
-        name = layer->get_name() + "_dummy" + std::to_string(name_index);
+        name = l.get_name() + "_dummy" + std::to_string(name_index);
       }
       dummy->set_name(name);
+      layer_names.insert(name);
 
       // Add dummy layer to model
-      layer->add_child_layer(dummy);
-      dummy->add_parent_layer(layer);
-      add_layer(dummy);
+      l.add_child_layer(dummy.get());
+      dummy->add_parent_layer(&l);
+      add_layer(std::move(dummy));
 
     }
   }
-
 }
 
-void model::add_split_layers() {
-  for (size_t i = 0; i < m_layers.size(); ++i) {
-    auto layer = m_layers[i];
+void model::add_split_layers(std::unordered_set<std::string>& layer_names) {
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    auto& l = get_layer(i);
 
     // Add split layer if layer expects one child but has multiple
-    auto& children = layer->get_child_layers();
-    if (layer->get_expected_num_child_layers() == 1
-        && children.size() != 1) {
+    auto& children = l.get_child_layers();
+    if (l.get_expected_num_child_layers() == 1 && children.size() != 1) {
 
       // Create split layer
-      Layer *split = nullptr;
+      std::unique_ptr<Layer> split;
       using args_tuple = std::tuple<data_layout,El::Device>;
-      args_tuple args(layer->get_data_layout(), layer->get_device_allocation());
+      args_tuple args(l.get_data_layout(), l.get_device_allocation());
       if (args == args_tuple(data_layout::DATA_PARALLEL, El::Device::CPU)) {
-        split = new split_layer<data_layout::DATA_PARALLEL, El::Device::CPU>(m_comm);
+        split.reset(new split_layer<data_layout::DATA_PARALLEL, El::Device::CPU>(m_comm));
       }
       if (args == args_tuple(data_layout::MODEL_PARALLEL, El::Device::CPU)) {
-        split = new split_layer<data_layout::MODEL_PARALLEL, El::Device::CPU>(m_comm);
+        split.reset(new split_layer<data_layout::MODEL_PARALLEL, El::Device::CPU>(m_comm));
       }
 #ifdef LBANN_HAS_GPU
       if (args == args_tuple(data_layout::DATA_PARALLEL, El::Device::GPU)) {
-        split = new split_layer<data_layout::DATA_PARALLEL, El::Device::GPU>(m_comm);
+        split.reset(new split_layer<data_layout::DATA_PARALLEL, El::Device::GPU>(m_comm));
       }
       if (args == args_tuple(data_layout::MODEL_PARALLEL, El::Device::GPU)) {
-        split = new split_layer<data_layout::MODEL_PARALLEL, El::Device::GPU>(m_comm);
+        split.reset(new split_layer<data_layout::MODEL_PARALLEL, El::Device::GPU>(m_comm));
       }
 #endif // LBANN_HAS_GPU
       if (split == nullptr) {
         std::stringstream err;
         err << "could not construct split layer corresponding to "
-            << "layer \"" << layer->get_name() << "\" "
+            << "layer \"" << l.get_name() << "\" "
             << "in model \"" << get_name() << "\"";
         LBANN_ERROR(err.str());
       }
 
       // Set split layer name
       El::Int name_index = 1;
-      std::string name = layer->get_name() + "_split";
-      while (layer_has_name(name, m_layers)) {
+      std::string name = l.get_name() + "_split";
+      while (layer_names.count(name) > 0) {
         name_index++;
-        name = layer->get_name() + "_split" + std::to_string(name_index);
+        name = l.get_name() + "_split" + std::to_string(name_index);
       }
       split->set_name(name);
+      layer_names.insert(name);
 
       // Setup relationships between split layer and child layers
       for (auto&& const_child : children) {
@@ -894,38 +932,25 @@ void model::add_split_layers() {
         split->add_child_layer(child);
         auto& child_parents = child->get_parent_layers();
         std::replace(child_parents.begin(), child_parents.end(),
-                     layer, split);
+                     &l, split.get());
       }
 
       // Setup relationship between current layer and split layer
       children.clear();
-      layer->add_child_layer(split);
-      split->add_parent_layer(layer);
+      l.add_child_layer(split.get());
+      split->add_parent_layer(&l);
 
       // Add split layer to layer list
-      add_layer(split);
+      add_layer(std::move(split));
 
     }
 
   }
 }
 
-int model::get_num_iterations_per_epoch(execution_mode mode) const {
-  generic_input_layer* input = nullptr;
-  for (auto&& l : m_layers) {
-    input = dynamic_cast<generic_input_layer*>(l);
-    if (input != nullptr) { break; }
-  }
-  if (input == nullptr) {
-    return 0;
-  } else {
-    return input->get_num_iterations_per_epoch(mode);
-  }
-}
-
-////////////////////////////////////////////////////////////
-// Evaluation and training
-////////////////////////////////////////////////////////////
+// =============================================
+// Execution
+// =============================================
 
 void model::evaluate(execution_mode mode, int num_batches) {
 
@@ -951,34 +976,9 @@ void model::evaluate(execution_mode mode, int num_batches) {
   do_evaluate_end_cbs(mode);
 }
 
-//this is for data store functionality
-void model::collect_indices(execution_mode mode) {
-  reset_mode_and_model(mode);
-  while (true) {
-    m_layers[0]->forward_prop();
-    bool finished = true;
-    finished = m_layers[0]->update() && finished;
-    if (finished) {
-      break;
-    }
-  }
-  //this may not be necessary, but shouldn't hurt
-  reset_epoch_statistics(mode);
-}
-
-void model::collect_background_data_fetch(execution_mode mode) {
-  for (const auto& layer : m_layers) {
-    auto *input = dynamic_cast<generic_input_layer*>(layer);
-    if (input != nullptr) {
-      input->collect_background_data_fetch(mode);
-    }
-  }
-  return;
-}
-
 void model::train(int num_epochs, int num_batches) {
   do_train_begin_cbs();
-  for (int epoch = m_current_epoch; epoch < num_epochs; ++epoch) {
+  for (int epoch = m_epoch; epoch < num_epochs; ++epoch) {
     if (get_terminate_training()) { break; }
 
     // Initialize epoch
@@ -993,7 +993,7 @@ void model::train(int num_epochs, int num_batches) {
     }
 
     // Finalize epoch
-    ++m_current_epoch;
+    ++m_epoch;
     reconcile_weight_values();
     do_epoch_end_cbs();
     reset_epoch_statistics(execution_mode::training);
@@ -1005,12 +1005,47 @@ void model::train(int num_epochs, int num_batches) {
   do_train_end_cbs();
 }
 
+
+void model::collect_background_data_fetch(execution_mode mode) {
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    auto *input = dynamic_cast<generic_input_layer*>(&get_layer(i));
+    if (input != nullptr) {
+      input->collect_background_data_fetch(mode);
+    }
+  }
+}
+
+void model::make_data_store_preloaded(execution_mode mode) {
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    auto *input = dynamic_cast<generic_input_layer*>(&get_layer(i));
+    if (input != nullptr) {
+      auto *data_store = input->get_data_reader(mode)->get_data_store_ptr();
+      if(data_store != nullptr && !data_store->is_preloaded()) {
+        input->get_data_reader(mode)->get_data_store_ptr()->set_preload();
+        input->get_data_reader(mode)->get_data_store_ptr()->set_explicit_loading(false);
+      }
+    }
+  }
+}
+
+void model::mark_data_store_explicitly_loading(execution_mode mode) {
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    auto *input = dynamic_cast<generic_input_layer*>(&get_layer(i));
+    if (input != nullptr) {
+      auto *data_store = input->get_data_reader(mode)->get_data_store_ptr();
+      if(data_store != nullptr && !data_store->is_preloaded()) {
+        input->get_data_reader(mode)->get_data_store_ptr()->set_explicit_loading(true);
+      }
+    }
+  }
+}
+
 // At the start of the epoch, set the execution mode and make sure
 // that each layer points to this model
 void model::reset_mode_and_model(execution_mode mode) {
   set_execution_mode(mode);
-  for (const auto& l : m_layers) {
-    l->set_model(this);
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    get_layer(i).set_model(this);
   }
 }
 
@@ -1032,23 +1067,20 @@ bool model::evaluate_mini_batch(execution_mode mode) {
     m->evaluate(mode, get_current_mini_batch_size());
   }
   const bool finished = update_layers();
-  switch(m_execution_mode) {
-  case execution_mode::validation:
-    ++m_current_validation_step;
-    break;
-  case execution_mode::testing:
-    ++m_current_testing_step;
-    break;
-  default:
-    throw lbann_exception("Illegal execution mode in evaluate mini-batch function");
-  }
+
+  // Increment mini-batch step
+  /// @todo Move after the callbacks
+  if (m_step.count(mode) < 1) { m_step[mode] = 0; }
+  ++m_step[mode];
+
   do_batch_end_cbs(mode);
   return finished;
 }
 
 bool model::train_mini_batch() {
-  reset_mode_and_model(execution_mode::training);
-  do_batch_begin_cbs(execution_mode::training);
+  constexpr execution_mode mode = execution_mode::training;
+  reset_mode_and_model(mode);
+  do_batch_begin_cbs(mode);
 
 
   bool finished;
@@ -1061,10 +1093,9 @@ bool model::train_mini_batch() {
 #endif
   // Forward prop step
   clear_gradients();
-  forward_prop(execution_mode::training);
+  forward_prop(mode);
   // Result is not needed until the end of the mini-batch.
-  m_objective_function->start_evaluation(execution_mode::training,
-                                         get_current_mini_batch_size());
+  m_objective_function->start_evaluation(mode, get_current_mini_batch_size());
 
   // Backward prop step
   m_objective_function->differentiate();
@@ -1072,11 +1103,9 @@ bool model::train_mini_batch() {
   m_objective_function->compute_weight_regularization();
 
   // Finish evaluation.
-  m_objective_function->finish_evaluation(execution_mode::training,
-                                          get_current_mini_batch_size());
+  m_objective_function->finish_evaluation(mode, get_current_mini_batch_size());
   for (const auto& m : m_metrics) {
-    m->evaluate(execution_mode::training,
-                get_current_mini_batch_size());
+    m->evaluate(mode, get_current_mini_batch_size());
   }
 
   // Update step
@@ -1087,7 +1116,11 @@ bool model::train_mini_batch() {
   }
 #endif
 
-  ++m_current_step;
+  // Increment mini-batch step
+  /// @todo Move after the callbacks
+  if (m_step.count(mode) < 1) { m_step[mode] = 0; }
+  ++m_step[mode];
+
   do_batch_end_cbs(execution_mode::training);
   return finished;
 }
@@ -1101,23 +1134,24 @@ void model::clear_gradients() {
 
 void model::forward_prop(execution_mode mode) {
   do_model_forward_prop_begin_cbs(mode);
-  for (const auto& layer : m_layers) {
-    do_layer_forward_prop_begin_cbs(mode, layer);
-    layer->forward_prop();
-    do_layer_forward_prop_end_cbs(mode, layer);
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    auto& l = get_layer(i);
+    do_layer_forward_prop_begin_cbs(mode, &l);
+    l.forward_prop();
+    do_layer_forward_prop_end_cbs(mode, &l);
   }
   do_model_forward_prop_end_cbs(mode);
 }
 
 void model::backward_prop() {
   do_model_backward_prop_begin_cbs();
-  for (int l = m_layers.size() - 1; l >= 0; --l) {
+  for (El::Int i = get_num_layers()-1; i >= 0; --i) {
 
     // Perform backward prop step on current layer
-    Layer *layer = m_layers[l];
-    do_layer_backward_prop_begin_cbs(layer);
-    layer->back_prop();
-    do_layer_backward_prop_end_cbs(layer);
+    auto& l = get_layer(i);
+    do_layer_backward_prop_begin_cbs(&l);
+    l.back_prop();
+    do_layer_backward_prop_end_cbs(&l);
 
     // Terminate early if all gradients have been computed
     bool all_gradients_computed = true;
@@ -1136,13 +1170,13 @@ void model::backward_prop() {
 
 void model::update_weights() {
   do_model_optimize_begin_cbs();
-  for (int i = m_weights.size() - 1; i >= 0; --i) {
-    auto& w = m_weights[i];
-    optimizer* opt = w->get_optimizer();
+  for (El::Int i = m_weights.size()-1; i >= 0; --i) {
+    auto& w = *m_weights[i];
+    optimizer* opt = w.get_optimizer();
     if (opt != nullptr) {
-      do_weight_optimize_begin_cbs(w);
+      do_weight_optimize_begin_cbs(&w);
       opt->step();
-      do_weight_optimize_end_cbs(w);
+      do_weight_optimize_end_cbs(&w);
     }
   }
   do_model_optimize_end_cbs();
@@ -1150,23 +1184,23 @@ void model::update_weights() {
 
 bool model::update_layers() {
   bool finished = true;
-  for (int l = m_layers.size() - 1; l >= 0; --l) {
-    finished = m_layers[l]->update() && finished;
+  for (El::Int i = get_num_layers()-1; i >= 0; --i) {
+    finished = get_layer(i).update() && finished;
   }
   return finished;
 }
 
 void model::reconcile_weight_values() {
   std::vector<Al::request> reqs(m_weights.size());
-  for (int i = m_weights.size() - 1; i >= 0; --i) {
+  for (El::Int i = m_weights.size()-1; i >= 0; --i) {
     m_weights[i]->reconcile_values(reqs[i]);
   }
   for (auto& req : reqs) { m_comm->wait(req); }
 }
 
-////////////////////////////////////////////////////////////
+// =============================================
 // Callbacks
-////////////////////////////////////////////////////////////
+// =============================================
 
 void model::do_train_begin_cbs() {
   for (const auto& cb : m_callbacks) {
@@ -1228,7 +1262,7 @@ void model::do_batch_begin_cbs(execution_mode mode) {
   for (const auto& cb : m_callbacks) {
     switch (mode) {
     case execution_mode::training:
-      if (get_cur_step() % cb->get_batch_interval() == 0) {
+      if (get_step() % cb->get_batch_interval() == 0) {
         cb->on_batch_begin(this);
       }
       break;
@@ -1237,10 +1271,7 @@ void model::do_batch_begin_cbs(execution_mode mode) {
       cb->on_batch_evaluate_begin(this);
       break;
     default:
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "invalid execution mode";
-      throw lbann_exception(err.str());
+      LBANN_ERROR("invalid execution mode");
     }
   }
 }
@@ -1249,7 +1280,7 @@ void model::do_batch_end_cbs(execution_mode mode) {
   for (const auto& cb : m_callbacks) {
     switch (mode) {
     case execution_mode::training:
-      if (get_cur_step() % cb->get_batch_interval() == 0) {
+      if (get_step() % cb->get_batch_interval() == 0) {
         cb->on_batch_end(this);
       }
       break;
@@ -1258,10 +1289,7 @@ void model::do_batch_end_cbs(execution_mode mode) {
       cb->on_batch_evaluate_end(this);
       break;
     default:
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "invalid execution mode";
-      throw lbann_exception(err.str());
+      LBANN_ERROR("invalid execution mode");
     }
   }
 }
@@ -1270,7 +1298,7 @@ void model::do_model_forward_prop_begin_cbs(execution_mode mode) {
   for (const auto& cb : m_callbacks) {
     switch (mode) {
     case execution_mode::training:
-      if (get_cur_step() % cb->get_batch_interval() == 0) {
+      if (get_step() % cb->get_batch_interval() == 0) {
         cb->on_forward_prop_begin(this);
       }
       break;
@@ -1279,10 +1307,7 @@ void model::do_model_forward_prop_begin_cbs(execution_mode mode) {
       cb->on_evaluate_forward_prop_begin(this);
       break;
     default:
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "invalid execution mode";
-      throw lbann_exception(err.str());
+      LBANN_ERROR("invalid execution mode");
     }
   }
 }
@@ -1291,7 +1316,7 @@ void model::do_model_forward_prop_end_cbs(execution_mode mode) {
   for (const auto& cb : m_callbacks) {
     switch (mode) {
     case execution_mode::training:
-      if (get_cur_step() % cb->get_batch_interval() == 0) {
+      if (get_step() % cb->get_batch_interval() == 0) {
         cb->on_forward_prop_end(this);
       }
       break;
@@ -1300,23 +1325,19 @@ void model::do_model_forward_prop_end_cbs(execution_mode mode) {
       cb->on_evaluate_forward_prop_end(this);
       break;
     default:
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "invalid execution mode";
-      throw lbann_exception(err.str());
+      LBANN_ERROR("invalid execution mode");
     }
   }
 }
 
 /** @todo Consistent behavior between train, validation, and test
- *  modes, e.g.
- *    if (get_cur_validation_step() % cb->get_batch_interval() == 0) { ... }
+ *  modes
  */
 void model::do_layer_forward_prop_begin_cbs(execution_mode mode, Layer *l) {
   for (const auto& cb : m_callbacks) {
     switch (mode) {
     case execution_mode::training:
-      if (get_cur_step() % cb->get_batch_interval() == 0) {
+      if (get_step() % cb->get_batch_interval() == 0) {
         cb->on_forward_prop_begin(this, l);
       }
       break;
@@ -1325,23 +1346,19 @@ void model::do_layer_forward_prop_begin_cbs(execution_mode mode, Layer *l) {
       cb->on_evaluate_forward_prop_begin(this, l);
       break;
     default:
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "invalid execution mode";
-      throw lbann_exception(err.str());
+      LBANN_ERROR("invalid execution mode");
     }
   }
 }
 
 /** @todo Consistent behavior between train, validation, and test
- *  modes, e.g.
- *    if (get_cur_validation_step() % cb->get_batch_interval() == 0) { ... }
+ *  modes
  */
 void model::do_layer_forward_prop_end_cbs(execution_mode mode, Layer *l) {
   for (const auto& cb : m_callbacks) {
     switch (mode) {
     case execution_mode::training:
-      if (get_cur_step() % cb->get_batch_interval() == 0) {
+      if (get_step() % cb->get_batch_interval() == 0) {
         cb->on_forward_prop_end(this, l);
       }
       break;
@@ -1350,17 +1367,14 @@ void model::do_layer_forward_prop_end_cbs(execution_mode mode, Layer *l) {
       cb->on_evaluate_forward_prop_end(this, l);
       break;
     default:
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "invalid execution mode";
-      throw lbann_exception(err.str());
+      LBANN_ERROR("invalid execution mode");
     }
   }
 }
 
 void model::do_model_backward_prop_begin_cbs() {
   for (const auto& cb : m_callbacks) {
-    if (get_cur_step() % cb->get_batch_interval() == 0) {
+    if (get_step() % cb->get_batch_interval() == 0) {
       cb->on_backward_prop_begin(this);
     }
   }
@@ -1368,7 +1382,7 @@ void model::do_model_backward_prop_begin_cbs() {
 
 void model::do_model_backward_prop_end_cbs() {
   for (const auto& cb : m_callbacks) {
-    if (get_cur_step() % cb->get_batch_interval() == 0) {
+    if (get_step() % cb->get_batch_interval() == 0) {
       cb->on_backward_prop_end(this);
     }
   }
@@ -1376,7 +1390,7 @@ void model::do_model_backward_prop_end_cbs() {
 
 void model::do_layer_backward_prop_begin_cbs(Layer *l) {
   for (const auto& cb : m_callbacks) {
-    if (get_cur_step() % cb->get_batch_interval() == 0) {
+    if (get_step() % cb->get_batch_interval() == 0) {
       cb->on_backward_prop_begin(this, l);
     }
   }
@@ -1384,7 +1398,7 @@ void model::do_layer_backward_prop_begin_cbs(Layer *l) {
 
 void model::do_layer_backward_prop_end_cbs(Layer *l) {
   for (const auto& cb : m_callbacks) {
-    if (get_cur_step() % cb->get_batch_interval() == 0) {
+    if (get_step() % cb->get_batch_interval() == 0) {
       cb->on_backward_prop_end(this, l);
     }
   }
@@ -1392,7 +1406,7 @@ void model::do_layer_backward_prop_end_cbs(Layer *l) {
 
 void model::do_model_optimize_begin_cbs() {
   for (const auto& cb : m_callbacks) {
-    if (get_cur_step() % cb->get_batch_interval() == 0) {
+    if (get_step() % cb->get_batch_interval() == 0) {
       cb->on_optimize_begin(this);
     }
   }
@@ -1400,7 +1414,7 @@ void model::do_model_optimize_begin_cbs() {
 
 void model::do_model_optimize_end_cbs() {
   for (const auto& cb : m_callbacks) {
-    if (get_cur_step() % cb->get_batch_interval() == 0) {
+    if (get_step() % cb->get_batch_interval() == 0) {
       cb->on_optimize_end(this);
     }
   }
@@ -1408,7 +1422,7 @@ void model::do_model_optimize_end_cbs() {
 
 void model::do_weight_optimize_begin_cbs(weights *w) {
   for (const auto& cb : m_callbacks) {
-    if (get_cur_step() % cb->get_batch_interval() == 0) {
+    if (get_step() % cb->get_batch_interval() == 0) {
       cb->on_optimize_begin(this, w);
     }
   }
@@ -1416,31 +1430,31 @@ void model::do_weight_optimize_begin_cbs(weights *w) {
 
 void model::do_weight_optimize_end_cbs(weights *w) {
   for (const auto& cb : m_callbacks) {
-    if (get_cur_step() % cb->get_batch_interval() == 0) {
+    if (get_step() % cb->get_batch_interval() == 0) {
       cb->on_optimize_end(this, w);
     }
   }
 }
 
-////////////////////////////////////////////////////////////
+// =============================================
 // Summarizer
-////////////////////////////////////////////////////////////
+// =============================================
 
 void model::summarize_stats(lbann_summary& summarizer) {
-  for (const auto& layer : m_layers) {
-    layer->summarize_stats(summarizer, get_cur_step());
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    get_layer(i).summarize_stats(summarizer, get_step(execution_mode::training));
   }
   summarizer.reduce_scalar("objective",
                            m_objective_function->get_mean_value(m_execution_mode),
-                           get_cur_step());
+                           get_step(execution_mode::training));
   summarizer.reduce_scalar(
     "objective_evaluation_time",
     m_objective_function->get_evaluation_time(),
-    get_cur_step());
+    get_step(execution_mode::training));
   summarizer.reduce_scalar(
     "objective_differentiation_time",
     m_objective_function->get_differentiation_time(),
-    get_cur_step());
+    get_step(execution_mode::training));
   m_objective_function->reset_counters();
   double total_metric_time = 0.0;
   for (auto&& m : m_metrics) {
@@ -1450,30 +1464,29 @@ void model::summarize_stats(lbann_summary& summarizer) {
   summarizer.reduce_scalar(
     "metric_evaluation_time",
     total_metric_time,
-    get_cur_step());
+    get_step(execution_mode::training));
 }
 
 void model::summarize_matrices(lbann_summary& summarizer) {
-  for (const auto& layer : m_layers) {
-    layer->summarize_matrices(summarizer, get_cur_step());
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    get_layer(i).summarize_matrices(summarizer, get_step(execution_mode::training));
   }
 }
 
-////////////////////////////////////////////////////////////
+// =============================================
 // Checkpointing
-////////////////////////////////////////////////////////////
+// =============================================
 
 /* struct used to serialize mode fields in file and MPI transfer */
 struct lbann_model_header {
   uint32_t execution_mode;
   uint32_t terminate_training;
-  uint64_t current_epoch;
-  uint64_t current_step;
-  uint64_t current_validation_step;
-  uint64_t current_testing_step;
+  uint64_t epoch;
+  uint64_t training_step;
+  uint64_t validation_step;
+  uint64_t testing_step;
   uint32_t max_mini_batch_size;
   uint32_t current_mini_batch_size;
-  uint32_t current_phase;
   uint32_t callback_type;;
 };
 
@@ -1483,23 +1496,22 @@ bool model::save_to_checkpoint_shared(persist& p) {
     if (m_comm->am_trainer_master()) {
       p.write_uint32(persist_type::train, "execution_mode",     (uint32_t) m_execution_mode);
       p.write_uint32(persist_type::train, "terminate_training", (uint32_t) m_terminate_training);
-      p.write_uint64(persist_type::train, "current_epoch",      (uint64_t) m_current_epoch);
-      p.write_uint64(persist_type::train, "current_step",       (uint64_t) m_current_step);
-      p.write_uint64(persist_type::train, "current_testing_step",       (uint64_t) m_current_testing_step);
+      p.write_uint64(persist_type::train, "epoch",              (uint64_t) m_epoch);
+      p.write_uint64(persist_type::train, "training_step",      (uint64_t) get_step(execution_mode::training));
+      p.write_uint64(persist_type::train, "testing_step",       (uint64_t) get_step(execution_mode::testing));
       p.write_uint32(persist_type::train, "max_mini_batch_size",      (uint32_t) m_max_mini_batch_size);
       p.write_uint32(persist_type::train, "current_mini_batch_size",      (uint32_t) m_current_mini_batch_size);
-      p.write_uint32(persist_type::train, "current_phase",      (uint32_t) m_current_phase);
       p.write_uint32(persist_type::train, "persist_callback_type",      (uint32_t) p.get_cb_type());
       if(p.get_cb_type() == callback_type::batch)
-        p.write_uint64(persist_type::validate, "current_validataion_step",       (uint64_t) m_current_validation_step);
+        p.write_uint64(persist_type::validate, "validation_step",       (uint64_t) get_step(execution_mode::validation));
     }
 
     for (weights *w : m_weights) {
       w->save_to_checkpoint_shared(p);
     }
 
-    for (size_t l = 0; l < m_layers.size(); l++) {
-      if (! m_layers[l]->save_to_checkpoint_shared(p)) {
+    for (El::Int i = 0; i < get_num_layers(); ++i) {
+      if (!get_layer(i).save_to_checkpoint_shared(p)) {
         return false;
       }
     }
@@ -1512,14 +1524,14 @@ bool model::save_to_checkpoint_shared(persist& p) {
   }
   else{
     if (m_comm->am_trainer_master()) {
-      p.write_uint64(persist_type::validate, "current_validataion_step",       (uint64_t) m_current_validation_step);
+      p.write_uint64(persist_type::validate, "validation_step",       (uint64_t) get_step(execution_mode::validation));
     }
     save_rng_to_checkpoint_shared(p, m_comm);
     for (weights *w : m_weights) {
       w->save_to_checkpoint_shared(p);
     }
-    for (size_t l = 0; l < m_layers.size(); l++) {
-      if (! m_layers[l]->save_to_checkpoint_shared(p)) {
+    for (El::Int i = 0; i < get_num_layers(); ++i) {
+      if (!get_layer(i).save_to_checkpoint_shared(p)) {
         return false;
       }
     }
@@ -1539,17 +1551,16 @@ bool model::load_from_checkpoint_shared(persist& p) {
     if (p.get_cb_type() != callback_type::validation) {
       p.read_uint32(persist_type::train, "execution_mode",     &header.execution_mode);
       p.read_uint32(persist_type::train, "terminate_training", &header.terminate_training);
-      p.read_uint64(persist_type::train, "current_epoch",      &header.current_epoch);
-      p.read_uint64(persist_type::train, "current_step",       &header.current_step);
+      p.read_uint64(persist_type::train, "epoch",              &header.epoch);
+      p.read_uint64(persist_type::train, "training_step",       &header.training_step);
       if(get_num_iterations_per_epoch(execution_mode::validation) != 0)
-        p.read_uint64(persist_type::validate, "current_validation_step",       &header.current_validation_step);
-      p.read_uint64(persist_type::train, "current_testing_step",       &header.current_testing_step);
+        p.read_uint64(persist_type::validate, "validation_step",       &header.validation_step);
+      p.read_uint64(persist_type::train, "testing_step",       &header.testing_step);
       p.read_uint32(persist_type::train, "max_mini_batch_size",      &header.max_mini_batch_size);
       p.read_uint32(persist_type::train, "current_mini_batch_size",      &header.current_mini_batch_size);
-      p.read_uint32(persist_type::train, "current_phase",      &header.current_phase);
       p.read_uint32(persist_type::train, "persist_callback_type",     &header.callback_type);
     } else {
-      p.read_uint64(persist_type::validate, "current_validation_step",       &header.current_validation_step);
+      p.read_uint64(persist_type::validate, "validation_step",       &header.validation_step);
     }
   }
   load_rng_from_checkpoint_shared(p, m_comm);
@@ -1560,18 +1571,17 @@ bool model::load_from_checkpoint_shared(persist& p) {
   if (p.get_cb_type() != callback_type::validation) {
     m_execution_mode     = (execution_mode) header.execution_mode;
     m_terminate_training = (bool)           header.terminate_training;
-    m_current_epoch      = (int)            header.current_epoch;
-    m_current_step       = (int)            header.current_step;
+    m_epoch              = (int)            header.epoch;
+    m_step[execution_mode::training] = (int) header.training_step;
     if(get_num_iterations_per_epoch(execution_mode::validation) != 0)
-      m_current_validation_step = (int)       header.current_validation_step;
-    m_current_testing_step = (int)          header.current_testing_step;
+      m_step[execution_mode::validation] = (int) header.validation_step;
+    m_step[execution_mode::testing] = (int) header.testing_step;
     m_max_mini_batch_size = (int)           header.max_mini_batch_size;
     m_current_mini_batch_size = (int)       header.current_mini_batch_size;
-    m_current_phase      =                  header.current_phase;
     // set state of persist object to know which type of ckpt we are returning from.
     p.set_cb_type((callback_type) header.callback_type);
   } else {
-    m_current_validation_step = (int)       header.current_validation_step;
+    m_step[execution_mode::validation] = (int) header.validation_step;
   }
 
   for (weights *w : m_weights) {
@@ -1579,8 +1589,8 @@ bool model::load_from_checkpoint_shared(persist& p) {
   }
 
   // read in each layer
-  for (size_t l = 0; l < m_layers.size(); l++) {
-    if (! m_layers[l]->load_from_checkpoint_shared(p)) {
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    if (!get_layer(i).load_from_checkpoint_shared(p)) {
       return false;
     }
   }
@@ -1600,22 +1610,21 @@ bool model::save_to_checkpoint_distributed(persist& p){
   if (p.get_cb_type() != callback_type::validation) {
     p.write_uint32(persist_type::train, "execution_mode",     (uint32_t) m_execution_mode);
     p.write_uint32(persist_type::train, "terminate_training", (uint32_t) m_terminate_training);
-    p.write_uint64(persist_type::train, "current_epoch",      (uint64_t) m_current_epoch);
-    p.write_uint64(persist_type::train, "current_step",       (uint64_t) m_current_step);
-    p.write_uint64(persist_type::train, "current_testing_step",       (uint64_t) m_current_testing_step);
+    p.write_uint64(persist_type::train, "epoch",              (uint64_t) m_epoch);
+    p.write_uint64(persist_type::train, "training_step",      (uint64_t) get_step(execution_mode::training));
+    p.write_uint64(persist_type::train, "testing_step",       (uint64_t) get_step(execution_mode::testing));
     p.write_uint32(persist_type::train, "max_mini_batch_size",      (uint32_t) m_max_mini_batch_size);
     p.write_uint32(persist_type::train, "current_mini_batch_size",      (uint32_t) m_current_mini_batch_size);
-    p.write_uint32(persist_type::train, "current_phase",      (uint32_t) m_current_phase);
     p.write_uint32(persist_type::train, "persist_callback_type",      (uint32_t) p.get_cb_type());
     if(p.get_cb_type() == callback_type::batch)
-      p.write_uint64(persist_type::validate, "current_validataion_step",       (uint64_t) m_current_validation_step);
+      p.write_uint64(persist_type::validate, "validataion_step",       (uint64_t) get_step(execution_mode::validation));
 
     for (weights *w : m_weights) {
       w->save_to_checkpoint_distributed(p);
     }
 
-    for (size_t l = 0; l < m_layers.size(); l++) {
-      if (! m_layers[l]->save_to_checkpoint_distributed(p)) {
+    for (El::Int i = 0; i < get_num_layers(); ++i) {
+      if (!get_layer(i).save_to_checkpoint_distributed(p)) {
         return false;
       }
     }
@@ -1628,11 +1637,11 @@ bool model::save_to_checkpoint_distributed(persist& p){
   }
 
   else {
-    p.write_uint64(persist_type::validate, "current_validataion_step",       (uint64_t) m_current_validation_step);
+    p.write_uint64(persist_type::validate, "validataion_step",       (uint64_t) get_step(execution_mode::validation));
     save_rng_to_checkpoint_shared(p, m_comm);
 
-    for (size_t l = 0; l < m_layers.size(); l++) {
-      if (! m_layers[l]->save_to_checkpoint_distributed(p)) {
+    for (El::Int i = 0; i < get_num_layers(); ++i) {
+      if (!get_layer(i).save_to_checkpoint_distributed(p)) {
         return false;
       }
     }
@@ -1647,26 +1656,24 @@ bool model::load_from_checkpoint_distributed(persist& p){
   struct lbann_model_header header;
   p.read_uint32(persist_type::train, "execution_mode",     &header.execution_mode);
   p.read_uint32(persist_type::train, "terminate_training", &header.terminate_training);
-  p.read_uint64(persist_type::train, "current_epoch",      &header.current_epoch);
-  p.read_uint64(persist_type::train, "current_step",       &header.current_step);
+  p.read_uint64(persist_type::train, "epoch",              &header.epoch);
+  p.read_uint64(persist_type::train, "training_step",      &header.training_step);
   if(get_num_iterations_per_epoch(execution_mode::validation) != 0)
-    p.read_uint64(persist_type::validate, "current_validation_step",       &header.current_validation_step);
-  p.read_uint64(persist_type::train, "current_testing_step",       &header.current_testing_step);
+    p.read_uint64(persist_type::validate, "validation_step",       &header.validation_step);
+  p.read_uint64(persist_type::train, "testing_step",               &header.testing_step);
   p.read_uint32(persist_type::train, "max_mini_batch_size",      &header.max_mini_batch_size);
   p.read_uint32(persist_type::train, "current_mini_batch_size",      &header.current_mini_batch_size);
-  p.read_uint32(persist_type::train, "current_phase",      &header.current_phase);
   p.read_uint32(persist_type::train, "persist_callback_type",     &header.callback_type);
 
   m_execution_mode     = (execution_mode) header.execution_mode;
   m_terminate_training = (bool)           header.terminate_training;
-  m_current_epoch      = (int)            header.current_epoch;
-  m_current_step       = (int)            header.current_step;
+  m_epoch              = (int)            header.epoch;
+  m_step[execution_mode::training] = (int) header.training_step;
   if(get_num_iterations_per_epoch(execution_mode::validation) != 0)
-    m_current_validation_step = (int)       header.current_validation_step;
-  m_current_testing_step = (int)          header.current_testing_step;
+    m_step[execution_mode::validation] = (int) header.validation_step;
+  m_step[execution_mode::testing] = (int) header.testing_step;
   m_max_mini_batch_size = (int)           header.max_mini_batch_size;
   m_current_mini_batch_size = (int)       header.current_mini_batch_size;
-  m_current_phase      =                  header.current_phase;
 
   p.set_cb_type((callback_type) header.callback_type);
   load_rng_from_checkpoint_shared(p, m_comm);
@@ -1675,8 +1682,8 @@ bool model::load_from_checkpoint_distributed(persist& p){
     w->load_from_checkpoint_distributed(p);
   }
 
-  for (size_t l = 0; l < m_layers.size(); l++) {
-    if (! m_layers[l]->load_from_checkpoint_distributed(p)) {
+  for (El::Int i = 0; i < get_num_layers(); ++i) {
+    if (!get_layer(i).load_from_checkpoint_distributed(p)) {
       return false;
     }
   }
diff --git a/src/objective_functions/layer_term.cpp b/src/objective_functions/layer_term.cpp
index 52e63135190..74d3edb9d9a 100644
--- a/src/objective_functions/layer_term.cpp
+++ b/src/objective_functions/layer_term.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -62,7 +62,7 @@ abstract_evaluation_layer& layer_term::get_evaluation_layer() {
   }
   return *eval;
 }
-  
+
 void layer_term::setup(model& m) {
   objective_function_term::setup(m);
   get_evaluation_layer().set_scale(m_scale_factor);
diff --git a/src/objective_functions/objective_function.cpp b/src/objective_functions/objective_function.cpp
index 3507de264b7..2c642070389 100644
--- a/src/objective_functions/objective_function.cpp
+++ b/src/objective_functions/objective_function.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/objective_functions/objective_function_term.cpp b/src/objective_functions/objective_function_term.cpp
index 9d492e9a1e1..67b6f649ac9 100644
--- a/src/objective_functions/objective_function_term.cpp
+++ b/src/objective_functions/objective_function_term.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/objective_functions/weight_regularization/l2.cpp b/src/objective_functions/weight_regularization/l2.cpp
index 52559def033..288c5adc4cd 100644
--- a/src/objective_functions/weight_regularization/l2.cpp
+++ b/src/objective_functions/weight_regularization/l2.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/objective_functions/weight_regularization/l2.cu b/src/objective_functions/weight_regularization/l2.cu
index 7d632504a71..f459a80ebc2 100644
--- a/src/objective_functions/weight_regularization/l2.cu
+++ b/src/objective_functions/weight_regularization/l2.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -69,11 +69,11 @@ __global__ void accumulate_contribution_kernel(El::Int height,
   if (tid == 0) {
     cuda::atomic_add(contribution, shared_contribution[0]);
   }
-    
+
 }
-  
+
 } // namespace
-  
+
 template <>
 void l2_weight_regularization::accumulate_contribution<El::Device::GPU>(const GPUMat& vals,
                                                                         GPUMat& contribution) {
@@ -90,5 +90,5 @@ void l2_weight_regularization::accumulate_contribution<El::Device::GPU>(const GP
         contribution.Buffer());
   }
 }
-                                   
+
 } // namespace lbann
diff --git a/src/optimizers/CMakeLists.txt b/src/optimizers/CMakeLists.txt
index d890a650f80..bb2d84ada91 100644
--- a/src/optimizers/CMakeLists.txt
+++ b/src/optimizers/CMakeLists.txt
@@ -13,6 +13,7 @@ if (LBANN_HAS_CUDA)
   set_full_path(THIS_DIR_CU_SOURCES
     adagrad.cu
     adam.cu
+    rmsprop.cu
     sgd.cu
     )
 endif ()
diff --git a/src/optimizers/adagrad.cpp b/src/optimizers/adagrad.cpp
index 0948da5518a..49198956f82 100644
--- a/src/optimizers/adagrad.cpp
+++ b/src/optimizers/adagrad.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -30,98 +30,84 @@
 namespace lbann {
 
 adagrad::adagrad(lbann_comm *comm, DataType learning_rate, DataType eps)
-  : optimizer(comm, learning_rate), m_eps(eps), m_cache(nullptr) {}
+  : optimizer(comm, learning_rate), m_eps(eps) {}
 
 adagrad::adagrad(const adagrad& other)
-  : optimizer(other), m_eps(other.m_eps), m_cache(other.m_cache) {
-  if (m_cache != nullptr) { m_cache = m_cache->Copy(); }
-}
+  : optimizer(other),
+    m_eps(other.m_eps),
+    m_cache(other.m_cache ? other.m_cache->Copy() : nullptr) {}
 
 adagrad& adagrad::operator=(const adagrad& other) {
   optimizer::operator=(other);
   m_eps = other.m_eps;
-
-  // Copy cache matrix
-  if (m_cache != nullptr && other.m_cache != nullptr
-      && m_cache->DistData() == other.m_cache->DistData()) {
-    El::Copy(*other.m_cache, *m_cache);
-  }
-  else {
-    if (m_cache != nullptr) { delete m_cache; }
-    m_cache = other.m_cache;
-    if (m_cache != nullptr) { m_cache = m_cache->Copy(); }
-  }
-
+  m_cache.reset(other.m_cache ? other.m_cache->Copy() : nullptr);
   return *this;
 }
 
-adagrad::~adagrad() {
-  if (m_cache != nullptr) { delete m_cache; }
-}
-
 description adagrad::get_description() const {
   auto&& desc = optimizer::get_description();
   desc.add("eps", m_eps);
   return desc;
 }
 
-void adagrad::setup(weights& w) {
+void adagrad::setup(weights* w) {
   optimizer::setup(w);
-  m_cache = m_gradient->Construct(m_gradient->Grid(),
-                                  m_gradient->Root());
-  El::Zeros(*m_cache, m_gradient->Height(), m_gradient->Width());
+  const auto& gradient = this->get_gradient();
+  m_cache.reset(AbsDistMat::Instantiate(gradient.DistData()));
+  El::Zeros(*m_cache, gradient.Height(), gradient.Width());
 }
 
 void adagrad::step_compute(AbsDistMat& values, const AbsDistMat& gradient) {
+  switch (values.GetLocalDevice()) {
+  case El::Device::CPU: step_compute_cpu(values, gradient); break;
+#ifdef LBANN_HAS_CUDA
+  case El::Device::GPU: step_compute_gpu(values, gradient); break;
+#endif // LBANN_HAS_CUDA
+  default:
+    std::ostringstream err;
+    err << "unsupported device type "
+        << "(" << static_cast<int>(values.GetLocalDevice()) << ")";
+    LBANN_ERROR(err.str());
+  }
+}
+
+void adagrad::step_compute_cpu(AbsDistMat& values, const AbsDistMat& gradient) {
 
   // Get local matrix data
-  const int local_height = values.LocalHeight();
-  const int local_width = values.LocalWidth();
-  DataType* __restrict__ values_buffer = values.Buffer();
-  const int values_ldim = values.LDim();
-  const DataType* __restrict__ gradient_buffer = gradient.LockedBuffer();
-  const int gradient_ldim = gradient.LDim();
-  DataType* __restrict__ cache_buffer = m_cache->Buffer();
-  const int cache_ldim = m_cache->LDim();
-
-  // Check if matrix data is contiguous
-  if (values_ldim != local_height
-      || gradient_ldim != local_height
-      || cache_ldim != local_height) {
-    // Update with non-contiguous data
-    LBANN_OMP_PARALLEL_FOR_COLLAPSE2
-    for (int j=0; j<local_width; ++j) {
-      for (int i=0; i<local_height; ++i) {
-        DataType& x = values_buffer[i+j*values_ldim];
-        const DataType g = gradient_buffer[i+j*gradient_ldim];
-        DataType& c = cache_buffer[i+j*cache_ldim];
-        c += g * g;
-        x -= m_learning_rate * g / (std::sqrt(c) + m_eps);
-      }
-    }
-  } else {
-    // Update with contiguous data
-    LBANN_OMP_PARALLEL_FOR
-    for (int i=0; i<local_height*local_width; ++i) {
-      DataType& x = values_buffer[i];
-      const DataType g = gradient_buffer[i];
-      DataType& c = cache_buffer[i];
+  const size_t local_height = values.LocalHeight();
+  const size_t local_width = values.LocalWidth();
+  auto* __restrict__ values_buffer = values.Buffer();
+  const size_t values_ldim = values.LDim();
+  const auto* __restrict__ gradient_buffer = gradient.LockedBuffer();
+  const size_t gradient_ldim = gradient.LDim();
+  auto* __restrict__ cache_buffer = m_cache->Buffer();
+  const size_t cache_ldim = m_cache->LDim();
+
+  // Apply AdaGrad step
+  const auto& learning_rate = get_learning_rate();
+  LBANN_OMP_PARALLEL_FOR_COLLAPSE2
+  for (size_t col = 0; col < local_width; ++col) {
+    for (size_t row = 0; row < local_height; ++row) {
+      auto& x = values_buffer[row+col*values_ldim];
+      const auto& g = gradient_buffer[row+col*gradient_ldim];
+      auto& c = cache_buffer[row+col*cache_ldim];
       c += g * g;
-      x -= m_learning_rate * g / (std::sqrt(c) + m_eps);
+      x -= learning_rate * g / (std::sqrt(c) + m_eps);
     }
   }
+
 }
 
-////////////////////////////////////////////////////////////
+// =============================================
 // Checkpointing
-////////////////////////////////////////////////////////////
+// =============================================
 
 bool adagrad::save_to_checkpoint_shared(persist& p, std::string name_prefix) {
   optimizer::save_to_checkpoint_shared(p, name_prefix);
 
   char l_name[512];
   sprintf(l_name, "%s_optimizer_cache_%lldx%lld", name_prefix.c_str(), m_cache->Height(), m_cache->Width());
-  p.write_distmat(persist_type::train, l_name, m_cache);
+  p.write_distmat(persist_type::train, l_name, m_cache.get());
 
   return true;
 }
@@ -131,7 +117,7 @@ bool adagrad::load_from_checkpoint_shared(persist& p, std::string name_prefix) {
   char l_name[512];
 
   sprintf(l_name, "%s_optimizer_cache_%lldx%lld.bin", name_prefix.c_str(), m_cache->Height(), m_cache->Width());
-  p.read_distmat(persist_type::train, l_name, m_cache);
+  p.read_distmat(persist_type::train, l_name, m_cache.get());
 
   return true;
 }
@@ -156,4 +142,4 @@ bool adagrad::load_from_checkpoint_distributed(persist& p, std::string name_pref
   return true;
 }
 
-}  // namespace lbann
+} // namespace lbann
diff --git a/src/optimizers/adagrad.cu b/src/optimizers/adagrad.cu
index 65abf7dbdc3..39360e1f2be 100644
--- a/src/optimizers/adagrad.cu
+++ b/src/optimizers/adagrad.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -30,40 +30,42 @@ namespace lbann {
 
 namespace {
 
-__global__ void adagrad_kernel(int height,
-                               int width,
+__global__ void adagrad_kernel(size_t height,
+                               size_t width,
                                DataType learning_rate,
                                DataType eps,
                                DataType * __restrict__ values,
-                               int values_ldim,
+                               size_t values_ldim,
                                const DataType * __restrict__ gradient,
-                               int gradient_ldim,
+                               size_t gradient_ldim,
                                DataType * __restrict__ cache,
-                               int cache_ldim) {
-  const int gid = blockIdx.x * blockDim.x + threadIdx.x;
-  const int num_threads = gridDim.x * blockDim.x;
-  for (int pos = gid; pos < height * width; pos += num_threads) {
-    const auto& i = pos % height;
-    const auto& j = pos / height;
-    auto& x = values[i + j * values_ldim];
-    const auto& g = gradient[i + j * gradient_ldim];
-    auto& c = cache[i + j * cache_ldim];
+                               size_t cache_ldim) {
+  const size_t gid = threadIdx.x + blockIdx.x * blockDim.x;
+  const size_t nthreads = blockDim.x * gridDim.x;
+  for (size_t pos = gid; pos < height * width; pos += nthreads) {
+    const auto& row = pos % height;
+    const auto& col = pos / height;
+    auto& x = values[row + col * values_ldim];
+    const auto& g = gradient[row + col * gradient_ldim];
+    auto& c = cache[row + col * cache_ldim];
     c += g * g;
     x -= learning_rate * g / (cuda::sqrt(c) + eps);
   }
 }
 
-}
+} // namespace
 
 void adagrad::step_compute_gpu(AbsDistMat& values, const AbsDistMat& gradient) {
-  const int local_height = values.LocalHeight();
-  const int local_width = values.LocalWidth();
-  const int size = local_height * local_width;
-  const int block_dim = 256;
-  const int grid_dim = (size + block_dim - 1) / block_dim;
-  if (grid_dim > 0) {
-    adagrad_kernel<<<grid_dim, block_dim, 0, El::GPUManager::Stream()>>>(
-      local_height, local_width, m_learning_rate, m_eps,
+  const size_t local_height = values.LocalHeight();
+  const size_t local_width = values.LocalWidth();
+  const size_t local_size = local_height * local_width;
+  if (local_size > 0) {
+    constexpr size_t block_size = 256;
+    const size_t grid_size = (local_size + block_size - 1) / block_size;
+    auto&& stream = El::GPUManager::Stream();
+    adagrad_kernel<<<grid_size, block_size, 0, stream>>>(
+      local_height, local_width,
+      this->get_learning_rate(), m_eps,
       values.Buffer(), values.LDim(),
       gradient.LockedBuffer(), gradient.LDim(),
       m_cache->Buffer(), m_cache->LDim());
diff --git a/src/optimizers/adam.cpp b/src/optimizers/adam.cpp
index ba137ec9c81..37a6c912f5c 100644
--- a/src/optimizers/adam.cpp
+++ b/src/optimizers/adam.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -29,7 +29,7 @@
 
 namespace lbann {
 
-adam::adam(lbann_comm *comm,
+adam::adam(lbann_comm* comm,
            DataType learning_rate,
            DataType beta1,
            DataType beta2,
@@ -71,7 +71,7 @@ description adam::get_description() const {
 
 const AbsDistMat& adam::get_moment1() const {
   if (m_moment1 == nullptr) {
-    LBANN_ERROR(get_type() + " optimizer "
+    LBANN_ERROR(this->get_type() + " optimizer "
                 + "attempted to access moment1 before it was setup");
   }
   return *m_moment1;
@@ -82,7 +82,7 @@ AbsDistMat& adam::get_moment1() {
 }
 const AbsDistMat& adam::get_moment2() const {
   if (m_moment2 == nullptr) {
-    LBANN_ERROR(get_type() + " optimizer "
+    LBANN_ERROR(this->get_type() + " optimizer "
                 + "attempted to access moment2 before it was setup");
   }
   return *m_moment2;
@@ -92,7 +92,7 @@ AbsDistMat& adam::get_moment2() {
   return const_cast<AbsDistMat&>(static_cast<const adam&>(*this).get_moment2());
 }
 
-void adam::setup(weights& w) {
+void adam::setup(weights* w) {
   optimizer::setup(w);
   const auto& gradient = this->get_gradient();
   m_moment1.reset(AbsDistMat::Instantiate(gradient.DistData()));
@@ -102,71 +102,85 @@ void adam::setup(weights& w) {
 }
 
 void adam::step_compute(AbsDistMat& values, const AbsDistMat& gradient) {
+  switch (values.GetLocalDevice()) {
+  case El::Device::CPU: step_compute_cpu(values, gradient); break;
+#ifdef LBANN_HAS_CUDA
+  case El::Device::GPU: step_compute_gpu(values, gradient); break;
+#endif // LBANN_HAS_CUDA
+  default:
+    std::ostringstream err;
+    err << "unsupported device type "
+        << "(" << static_cast<int>(values.GetLocalDevice()) << ")";
+    LBANN_ERROR(err.str());
+  }
+}
+
+void adam::step_compute_cpu(AbsDistMat& values, const AbsDistMat& gradient) {
+  constexpr DataType one = 1;
 
   // Precompute the bias correction and learning rate.
   m_current_beta1 *= m_beta1;
   m_current_beta2 *= m_beta2;
-  const DataType correction = m_learning_rate *
-                              (std::sqrt(DataType(1) - m_current_beta2)
-                               / (DataType(1) - m_current_beta1));
+  const DataType correction = this->get_learning_rate() *
+                              (std::sqrt(one - m_current_beta2)
+                               / (one - m_current_beta1));
 
   // Get local matrix data
-  const int local_height = values.LocalHeight();
-  const int local_width = values.LocalWidth();
-  DataType* __restrict__ values_buffer = values.Buffer();
-  const int values_ldim = values.LDim();
-  const DataType* __restrict__ gradient_buffer = gradient.LockedBuffer();
-  const int gradient_ldim = gradient.LDim();
-  DataType* __restrict__ moment1_buffer = m_moment1->Buffer();
-  const int moment1_ldim = m_moment1->LDim();
-  DataType* __restrict__ moment2_buffer = m_moment2->Buffer();
-  const int moment2_ldim = m_moment2->LDim();
-
-  // Check if matrix data is contiguous
-  if (values_ldim != local_height
-      || gradient_ldim != local_height
-      || moment1_ldim != local_height
-      || moment2_ldim != local_height) {
+  const size_t local_height = values.LocalHeight();
+  const size_t local_width = values.LocalWidth();
+  auto* __restrict__ values_buffer = values.Buffer();
+  const auto* __restrict__ gradient_buffer = gradient.LockedBuffer();
+  auto* __restrict__ moment1_buffer = m_moment1->Buffer();
+  auto* __restrict__ moment2_buffer = m_moment2->Buffer();
+
+  if (values.Contiguous() && gradient.Contiguous()
+      && m_moment1->Contiguous() && m_moment2->Contiguous()) {
+
+    // Update with contiguous data
+    const size_t local_size = local_height * local_width;
+    LBANN_OMP_PARALLEL_FOR
+    for (size_t i = 0; i < local_size; ++i) {
+      auto& x = values_buffer[i];
+      const auto& g = gradient_buffer[i] + m_eps; // Avoid denormalized floats
+      auto& m1 = moment1_buffer[i];
+      auto& m2 = moment2_buffer[i];
+      m1 = m_beta1 * m1 + (one - m_beta1) * g;
+      m2 = m_beta2 * m2 + (one - m_beta2) * g * g;
+      x -= correction * m1 / (std::sqrt(m2) + m_eps);
+    }
+
+  } else {
+
     // Update with non-contiguous data
+    const size_t values_ldim = values.LDim();
+    const size_t gradient_ldim = gradient.LDim();
+    const size_t moment1_ldim = m_moment1->LDim();
+    const size_t moment2_ldim = m_moment2->LDim();
     LBANN_OMP_PARALLEL_FOR_COLLAPSE2
-    for (int j=0; j<local_width; ++j) {
-      for (int i=0; i<local_height; ++i) {
-        DataType& x = values_buffer[i+j*values_ldim];
-        // See below; avoid denormalization.
-        const DataType g = gradient_buffer[i+j*gradient_ldim] + m_eps;
-        DataType& m1 = moment1_buffer[i+j*moment1_ldim];
-        DataType& m2 = moment2_buffer[i+j*moment2_ldim];
-        m1 = m_beta1 * m1 + (DataType(1) - m_beta1) * g;
-        m2 = m_beta2 * m2 + (DataType(1) - m_beta2) * g * g;
+    for (size_t col = 0; col < local_width; ++col) {
+      for (size_t row = 0; row < local_height; ++row) {
+        auto& x = values_buffer[row+col*values_ldim];
+        const auto& g = gradient_buffer[row+col*gradient_ldim] + m_eps; // Avoid denormalized floats
+        auto& m1 = moment1_buffer[row+col*moment1_ldim];
+        auto& m2 = moment2_buffer[row+col*moment2_ldim];
+        m1 = m_beta1 * m1 + (one - m_beta1) * g;
+        m2 = m_beta2 * m2 + (one - m_beta2) * g * g;
         x -= correction * m1 / (std::sqrt(m2) + m_eps);
       }
     }
-  } else {
-    // Update with contiguous data
-    LBANN_OMP_PARALLEL_FOR
-    for (int i=0; i<local_height*local_width; ++i) {
-      DataType& x = values_buffer[i];
-      // We add eps here because sometimes the gradient is small enough that
-      // g*g can become denormalized, which can significantly impact the
-      // performance.
-      const DataType g = gradient_buffer[i] + m_eps;
-      DataType& m1 = moment1_buffer[i];
-      DataType& m2 = moment2_buffer[i];
-      m1 = m_beta1 * m1 + (DataType(1) - m_beta1) * g;
-      m2 = m_beta2 * m2 + (DataType(1) - m_beta2) * g * g;
-      x -= correction * m1 / (std::sqrt(m2) + m_eps);
-    }
+
   }
+
 }
 
-////////////////////////////////////////////////////////////
+// =============================================
 // Checkpointing
-////////////////////////////////////////////////////////////
+// =============================================
 
 bool adam::save_to_checkpoint_shared(persist& p, std::string name_prefix) {
   optimizer::save_to_checkpoint_shared(p, name_prefix);
 
-  if (m_comm->am_trainer_master()) {
+  if (get_comm().am_trainer_master()) {
     pack_scalars(p);
   }
 
@@ -183,11 +197,11 @@ bool adam::save_to_checkpoint_shared(persist& p, std::string name_prefix) {
 bool adam::load_from_checkpoint_shared(persist& p, std::string name_prefix) {
   optimizer::load_from_checkpoint_shared(p, name_prefix);
   struct packing_header header;
-  if (m_comm->am_trainer_master()) {
+  if (get_comm().am_trainer_master()) {
     unpack_scalars(p, &header);
   }
 
-  m_comm->trainer_broadcast(0, header);
+  get_comm().trainer_broadcast(0, header);
 
   unpack_header(header);
 
diff --git a/src/optimizers/adam.cu b/src/optimizers/adam.cu
index a224ed026bc..335ea3fcd6f 100644
--- a/src/optimizers/adam.cu
+++ b/src/optimizers/adam.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -30,65 +30,92 @@ namespace lbann {
 
 namespace {
 
-__global__ void adam_kernel(int height,
-                            int width,
-                            DataType correction,
-                            DataType eps,
-                            DataType beta1,
-                            DataType beta2,
-                            DataType * __restrict__ values,
-                            int values_ldim,
-                            const DataType * __restrict__ gradient,
-                            int gradient_ldim,
-                            DataType * __restrict__ moment1,
-                            int moment1_ldim,
-                            DataType * __restrict__ moment2,
-                            int moment2_ldim) {
-  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-  const int num_threads = gridDim.x * blockDim.x;
-  for (int pos = tid; pos < height * width; pos += num_threads) {
-    const auto& i = pos % height;
-    const auto& j = pos / height;
-    const auto& g = gradient[i + j * gradient_ldim] + eps;
-    auto& m1 = moment1[i + j * moment1_ldim];
-    auto& m2 = moment2[i + j * moment2_ldim];
-    auto& x = values[i + j * values_ldim];
+__global__ void adam_noncontiguous_kernel(size_t height,
+                                          size_t width,
+                                          DataType correction,
+                                          DataType eps,
+                                          DataType beta1,
+                                          DataType beta2,
+                                          DataType * __restrict__ values,
+                                          size_t values_ldim,
+                                          const DataType * __restrict__ gradient,
+                                          size_t gradient_ldim,
+                                          DataType * __restrict__ moment1,
+                                          size_t moment1_ldim,
+                                          DataType * __restrict__ moment2,
+                                          size_t moment2_ldim) {
+  const size_t gid = threadIdx.x + blockIdx.x * blockDim.x;
+  if (gid < height * width) {
+    const auto& row = gid % height;
+    const auto& col = gid / height;
+    const auto& g = gradient[row + col * gradient_ldim] + eps;
+    auto& m1 = moment1[row + col * moment1_ldim];
+    auto& m2 = moment2[row + col * moment2_ldim];
+    auto& x = values[row + col * values_ldim];
     m1 = beta1 * m1 + (DataType(1) - beta1) * g;
     m2 = beta2 * m2 + (DataType(1) - beta2) * g * g;
-    x -= correction * m1 / (sqrt(m2) + eps);
+    x -= correction * m1 / (cuda::sqrt(m2) + eps);
   }
 }
 
+__global__ void adam_contiguous_kernel(size_t size,
+                                       DataType correction,
+                                       DataType eps,
+                                       DataType beta1,
+                                       DataType beta2,
+                                       DataType * __restrict__ values,
+                                       const DataType * __restrict__ gradient,
+                                       DataType * __restrict__ moment1,
+                                       DataType * __restrict__ moment2) {
+  const size_t gid = threadIdx.x + blockIdx.x * blockDim.x;
+  if (gid < size) {
+    const auto& g = gradient[gid] + eps;
+    auto& m1 = moment1[gid];
+    auto& m2 = moment2[gid];
+    auto& x = values[gid];
+    m1 = beta1 * m1 + (DataType(1) - beta1) * g;
+    m2 = beta2 * m2 + (DataType(1) - beta2) * g * g;
+    x -= correction * m1 / (cuda::sqrt(m2) + eps);
+  }
 }
 
+} // namespace
+
 void adam::step_compute_gpu(AbsDistMat& values, const AbsDistMat& gradient) {
+  constexpr DataType one = 1;
 
   // Precompute the bias correction and learning rate.
   m_current_beta1 *= m_beta1;
   m_current_beta2 *= m_beta2;
-  const DataType correction = m_learning_rate *
-                              (std::sqrt(DataType(1) - m_current_beta2)
-                               / (DataType(1) - m_current_beta1));
+  const DataType correction = this->get_learning_rate() *
+                              (std::sqrt(one - m_current_beta2)
+                               / (one - m_current_beta1));
 
   // Get matrix dimensions
-  const int local_height = values.LocalHeight();
-  const int local_width = values.LocalWidth();
-  const int size = local_height * local_width;
-  if (size <= 0) { return; }
+  const size_t local_height = values.LocalHeight();
+  const size_t local_width = values.LocalWidth();
+  const size_t local_size = local_height * local_width;
+  if (local_size <= 0) { return; }
 
-  // Launch CUDA kernels
-  const int block_size = 256;
-  dim3 block_dims, grid_dims;
-  block_dims.x = block_size;
-  grid_dims.x = (size + block_size - 1) / block_size;
-  cudaStream_t stream = El::GPUManager::Stream();
-  adam_kernel<<<grid_dims, block_dims, 0, stream>>>
-    (local_height, local_width, correction, m_eps, m_beta1, m_beta2,
-     values.Buffer(), values.LDim(),
-     gradient.LockedBuffer(), gradient.LDim(),
-     m_moment1->Buffer(), m_moment1->LDim(),
-     m_moment2->Buffer(), m_moment2->LDim());
+  // Launch CUDA kernel
+  constexpr size_t block_size = 256;
+  const size_t grid_size = (local_size + block_size - 1) / block_size;
+  auto&& stream = El::GPUManager::Stream();
+  if (values.Contiguous() && gradient.Contiguous()
+      && m_moment1->Contiguous() && m_moment2->Contiguous()) {
+    adam_contiguous_kernel<<<grid_size, block_size, 0, stream>>>(
+      local_size, correction, m_eps, m_beta1, m_beta2,
+      values.Buffer(), gradient.LockedBuffer(),
+      m_moment1->Buffer(), m_moment2->Buffer());
+  } else {
+    adam_noncontiguous_kernel<<<grid_size, block_size, 0, stream>>>(
+      local_height, local_width, correction, m_eps, m_beta1, m_beta2,
+      values.Buffer(), values.LDim(),
+      gradient.LockedBuffer(), gradient.LDim(),
+      m_moment1->Buffer(), m_moment1->LDim(),
+      m_moment2->Buffer(), m_moment2->LDim());
+  }
 
 }
 
-}  // namespace lbann
+} // namespace lbann
diff --git a/src/optimizers/hypergradient_adam.cpp b/src/optimizers/hypergradient_adam.cpp
index 1c10649e13c..0da3b9852bb 100644
--- a/src/optimizers/hypergradient_adam.cpp
+++ b/src/optimizers/hypergradient_adam.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -41,10 +41,7 @@ hypergradient_adam::hypergradient_adam(lbann_comm *comm,
     m_beta2(beta2),
     m_eps(eps),
     m_current_beta1(1),
-    m_current_beta2(1),
-    m_moment1(nullptr),
-    m_moment2(nullptr),
-    m_old_gradient(nullptr) {}
+    m_current_beta2(1) {}
 
 hypergradient_adam::hypergradient_adam(const hypergradient_adam& other)
   : optimizer(other),
@@ -54,13 +51,10 @@ hypergradient_adam::hypergradient_adam(const hypergradient_adam& other)
     m_eps(other.m_eps),
     m_current_beta1(other.m_current_beta1),
     m_current_beta2(other.m_current_beta2),
-    m_moment1(other.m_moment1),
-    m_moment2(other.m_moment2),
-    m_old_gradient(other.m_old_gradient) {
-  if (m_moment1 != nullptr)      { m_moment1 = m_moment1->Copy(); }
-  if (m_moment2 != nullptr)      { m_moment2 = m_moment2->Copy(); }
-  if (m_old_gradient != nullptr) { m_old_gradient = m_old_gradient->Copy(); }
-}
+    m_moment1(other.m_moment1 ? other.m_moment1->Copy() : nullptr),
+    m_moment2(other.m_moment2 ? other.m_moment2->Copy() : nullptr),
+    m_old_gradient(other.m_old_gradient ?
+                   other.m_old_gradient->Copy() : nullptr) {}
 
 hypergradient_adam& hypergradient_adam::operator=(const hypergradient_adam& other) {
   optimizer::operator=(other);
@@ -70,45 +64,13 @@ hypergradient_adam& hypergradient_adam::operator=(const hypergradient_adam& othe
   m_eps = other.m_eps;
   m_current_beta1 = other.m_current_beta1;
   m_current_beta2 = other.m_current_beta2;
-
-  // Copy matrices
-  if (m_moment1 != nullptr && other.m_moment1 != nullptr
-      && m_moment1->DistData() == other.m_moment1->DistData()) {
-    El::Copy(*other.m_moment1, *m_moment1);
-  }
-  else {
-    if (m_moment1 != nullptr) { delete m_moment1; }
-    m_moment1 = other.m_moment1;
-    if (m_moment1 != nullptr) { m_moment1 = m_moment1->Copy(); }
-  }
-  if (m_moment2 != nullptr && other.m_moment2 != nullptr
-      && m_moment2->DistData() == other.m_moment2->DistData()) {
-    El::Copy(*other.m_moment2, *m_moment2);
-  }
-  else {
-    if (m_moment2 != nullptr) { delete m_moment2; }
-    m_moment2 = other.m_moment2;
-    if (m_moment2 != nullptr) { m_moment2 = m_moment2->Copy(); }
-  }
-  if (m_old_gradient != nullptr && other.m_old_gradient != nullptr
-      && m_old_gradient->DistData() == other.m_old_gradient->DistData()) {
-    El::Copy(*other.m_old_gradient, *m_old_gradient);
-  }
-  else {
-    if (m_old_gradient != nullptr) { delete m_old_gradient; }
-    m_old_gradient = other.m_old_gradient;
-    if (m_old_gradient != nullptr) { m_old_gradient = m_old_gradient->Copy(); }
-  }
-
+  m_moment1.reset(other.m_moment1 ? other.m_moment1->Copy() : nullptr);
+  m_moment2.reset(other.m_moment2 ? other.m_moment2->Copy() : nullptr);
+  m_old_gradient.reset(other.m_old_gradient ?
+                       other.m_old_gradient->Copy() : nullptr);
   return *this;
 }
 
-hypergradient_adam::~hypergradient_adam() {
-  if(m_moment1 != nullptr)      { delete m_moment1; }
-  if(m_moment2 != nullptr)      { delete m_moment2; }
-  if(m_old_gradient != nullptr) { delete m_old_gradient; }
-}
-
 description hypergradient_adam::get_description() const {
   auto&& desc = optimizer::get_description();
   desc.add("Hypergradient learning rate", m_hyper_learning_rate);
@@ -118,21 +80,22 @@ description hypergradient_adam::get_description() const {
   return desc;
 }
 
-void hypergradient_adam::setup(weights& w) {
+void hypergradient_adam::setup(weights* w) {
   optimizer::setup(w);
-  m_moment1 = m_gradient->Construct(m_gradient->Grid(),
-                                    m_gradient->Root());
-  m_moment2 = m_gradient->Construct(m_gradient->Grid(),
-                                    m_gradient->Root());
-  m_old_gradient = m_gradient->Construct(m_gradient->Grid(),
-                                    m_gradient->Root());
-  El::Zeros(*m_moment1, m_gradient->Height(), m_gradient->Width());
-  El::Zeros(*m_moment2, m_gradient->Height(), m_gradient->Width());
-  El::Zeros(*m_old_gradient, m_gradient->Height(), m_gradient->Width());
+  const auto& gradient = this->get_gradient();
+  m_moment1.reset(AbsDistMat::Instantiate(gradient.DistData()));
+  m_moment2.reset(AbsDistMat::Instantiate(gradient.DistData()));
+  m_old_gradient.reset(AbsDistMat::Instantiate(gradient.DistData()));
+  El::Zeros(*m_moment1, gradient.Height(), gradient.Width());
+  El::Zeros(*m_moment2, gradient.Height(), gradient.Width());
+  El::Zeros(*m_old_gradient, gradient.Height(), gradient.Width());
 }
 
 void hypergradient_adam::step_compute(AbsDistMat& values,
                                       const AbsDistMat& gradient) {
+  if (values.GetLocalDevice() != El::Device::CPU) {
+    LBANN_ERROR("hypergradient Adam is only supported on CPU");
+  }
 
   // Precompute the bias correction.
   m_current_beta1 *= m_beta1;
@@ -141,81 +104,59 @@ void hypergradient_adam::step_compute(AbsDistMat& values,
                               (DataType(1) - m_current_beta1);
 
   // Get local matrix data
-  const int local_height = values.LocalHeight();
-  const int local_width = values.LocalWidth();
-  DataType* __restrict__ values_buffer = values.Buffer();
-  const int values_ldim = values.LDim();
+  const size_t local_height = values.LocalHeight();
+  const size_t local_width = values.LocalWidth();
+  auto* __restrict__ values_buffer = values.Buffer();
+  const size_t values_ldim = values.LDim();
   const DataType* __restrict__ gradient_buffer = gradient.LockedBuffer();
-  const int gradient_ldim = gradient.LDim();
-  DataType* __restrict__ moment1_buffer = m_moment1->Buffer();
-  const int moment1_ldim = m_moment1->LDim();
-  DataType* __restrict__ moment2_buffer = m_moment2->Buffer();
-  const int moment2_ldim = m_moment2->LDim();
-  DataType* __restrict__ old_gradient_buffer = m_old_gradient->Buffer();
-  const int old_gradient_ldim = m_old_gradient->LDim();
+  const size_t gradient_ldim = gradient.LDim();
+  auto* __restrict__ moment1_buffer = m_moment1->Buffer();
+  const size_t moment1_ldim = m_moment1->LDim();
+  auto* __restrict__ moment2_buffer = m_moment2->Buffer();
+  const size_t moment2_ldim = m_moment2->LDim();
+  auto* __restrict__ old_gradient_buffer = m_old_gradient->Buffer();
+  const size_t old_gradient_ldim = m_old_gradient->LDim();
 
   // Compute the learning rate update.
   DataType lr_update = El::Dot(gradient, *m_old_gradient);
-  m_learning_rate += m_hyper_learning_rate * lr_update;
-
-  // Check if matrix data is contiguous.
-  if (values_ldim != local_height
-      || gradient_ldim != local_height
-      || moment1_ldim != local_height
-      || moment2_ldim != local_height
-      || old_gradient_ldim != local_height) {
-    // Non-contiguous data.
-    LBANN_OMP_PARALLEL_FOR_COLLAPSE2
-    for (int j = 0; j < local_width; ++j) {
-      for (int i = 0; i < local_height; ++i) {
-        DataType& x = values_buffer[i+j*values_ldim];
-        const DataType g = gradient_buffer[i+j*gradient_ldim] + m_eps;
-        DataType& m1 = moment1_buffer[i+j*moment1_ldim];
-        DataType& m2 = moment2_buffer[i+j*moment2_ldim];
-        DataType& old_c = old_gradient_buffer[i+j*old_gradient_ldim];
-        m1 = m_beta1 * m1 + (DataType(1) - m_beta1) * g;
-        m2 = m_beta2 * m2 + (DataType(1) - m_beta2) * g * g;
-        old_c = correction * m1 / (std::sqrt(m2) + m_eps);
-        x -= m_learning_rate * old_c;
-      }
-    }
-  } else {
-    // Contiguous data.
-    LBANN_OMP_PARALLEL_FOR
-    for (int i = 0; i < local_height * local_width; ++i) {
-      DataType& x = values_buffer[i];
-      // Add eps here to avoid denormalized floats.
-      const DataType g = gradient_buffer[i] + m_eps;
-      DataType& m1 = moment1_buffer[i];
-      DataType& m2 = moment2_buffer[i];
-      DataType& old_c = old_gradient_buffer[i];
-      // Update the first/second moment estimates.
+  auto learning_rate = this->get_learning_rate();
+  learning_rate += m_hyper_learning_rate * lr_update;
+  this->set_learning_rate(learning_rate);
+
+  // Hypergradient Adam step
+  LBANN_OMP_PARALLEL_FOR_COLLAPSE2
+  for (size_t col = 0; col < local_width; ++col) {
+    for (size_t row = 0; row < local_height; ++row) {
+      auto& x = values_buffer[row+col*values_ldim];
+      const auto g = gradient_buffer[row+col*gradient_ldim] + m_eps;
+      auto& m1 = moment1_buffer[row+col*moment1_ldim];
+      auto& m2 = moment2_buffer[row+col*moment2_ldim];
+      auto& old_c = old_gradient_buffer[row+col*old_gradient_ldim];
       m1 = m_beta1 * m1 + (DataType(1) - m_beta1) * g;
       m2 = m_beta2 * m2 + (DataType(1) - m_beta2) * g * g;
-      // Compute the unbiased gradient estimate.
       old_c = correction * m1 / (std::sqrt(m2) + m_eps);
-      // Parameter update.
-      x -= m_learning_rate * old_c;
+      x -= learning_rate * old_c;
     }
   }
+
 }
 
 bool hypergradient_adam::save_to_checkpoint_shared(persist& p, std::string name_prefix) {
   if(p.get_cb_type() == callback_type::batch)
     optimizer::save_to_checkpoint_shared(p,name_prefix);
-  if (m_comm->am_trainer_master()) {
+  if (get_comm().am_trainer_master()) {
     pack_scalars(p);
   }
 
   char l_name[512];
   sprintf(l_name, "%s_optimizer_adam_moment1_%lldx%lld", name_prefix.c_str(), m_moment1->Height(), m_moment2->Width());
-  p.write_distmat(persist_type::train, l_name, m_moment1);
+  p.write_distmat(persist_type::train, l_name, m_moment1.get());
 
   sprintf(l_name, "%s_optimizer_adam_moment2_%lldx%lld", name_prefix.c_str(), m_moment2->Height(), m_moment2->Width());
-  p.write_distmat(persist_type::train, l_name, m_moment2);
+  p.write_distmat(persist_type::train, l_name, m_moment2.get());
 
   sprintf(l_name, "%s_optimizer_adam_old_gradient_%lldx%lld", name_prefix.c_str(), m_old_gradient->Height(), m_old_gradient->Width());
-  p.write_distmat(persist_type::train, l_name, m_old_gradient);
+  p.write_distmat(persist_type::train, l_name, m_old_gradient.get());
 
   return true;
 }
@@ -224,23 +165,23 @@ bool hypergradient_adam::load_from_checkpoint_shared(persist& p, std::string nam
   if(p.get_cb_type() == callback_type::batch)
     optimizer::load_from_checkpoint_shared(p,name_prefix);
   struct packing_header header;
-  if (m_comm->am_trainer_master()) {
+  if (get_comm().am_trainer_master()) {
     unpack_scalars(p, &header);
   }
 
-  m_comm->trainer_broadcast(0, header);
+  get_comm().trainer_broadcast(0, header);
 
   unpack_header(header);
 
   char l_name[512];
   sprintf(l_name, "%s_optimizer_adam_moment1_%lldx%lld.bin", name_prefix.c_str(), m_moment1->Height(), m_moment2->Width());
-  p.read_distmat(persist_type::train, l_name, m_moment1);
+  p.read_distmat(persist_type::train, l_name, m_moment1.get());
 
   sprintf(l_name, "%s_optimizer_adam_moment2_%lldx%lld.bin", name_prefix.c_str(), m_moment2->Height(), m_moment2->Width());
-  p.read_distmat(persist_type::train, l_name, m_moment2);
+  p.read_distmat(persist_type::train, l_name, m_moment2.get());
 
   sprintf(l_name, "%s_optimizer_adam_old_gradient_%lldx%lld.bin", name_prefix.c_str(), m_old_gradient->Height(), m_old_gradient->Width());
-  p.read_distmat(persist_type::train, l_name, m_old_gradient);
+  p.read_distmat(persist_type::train, l_name, m_old_gradient.get());
   return true;
 }
 
diff --git a/src/optimizers/optimizer.cpp b/src/optimizers/optimizer.cpp
index 695b775b88b..7320ddbe696 100644
--- a/src/optimizers/optimizer.cpp
+++ b/src/optimizers/optimizer.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -29,65 +29,59 @@
 
 namespace lbann {
 
-optimizer::optimizer(lbann_comm *comm, DataType learning_rate)
-  : m_comm(comm),
-    m_weights(nullptr),
-    m_learning_rate(learning_rate),
-    m_gradient(nullptr),
-    m_gradient_staging(nullptr),
-    m_gradient_allreduce_needed(false),
-    m_gradient_allreduce_started(false),
-    m_gradient_allreduce_finished(false) {}
+std::string to_string(optimizer_gradient_status status) {
+  switch (status) {
+  case optimizer_gradient_status::ready:
+    return "ready";
+  case optimizer_gradient_status::cleared:
+    return "cleared";
+  case optimizer_gradient_status::allreduce_needed:
+    return "allreduce needed";
+  case optimizer_gradient_status::allreduce_started:
+    return "allreduce started";
+  default:
+    return "unknown";
+  }
+}
+
+optimizer::optimizer(lbann_comm* comm, DataType learning_rate)
+  : m_comm(comm), m_learning_rate(learning_rate) {
+  if (m_comm == nullptr) {
+    LBANN_ERROR("got null pointer for lbann_comm");
+  }
+}
 
 optimizer::optimizer(const optimizer& other)
   : m_comm(other.m_comm),
     m_weights(other.m_weights),
+    m_gradient(other.m_gradient ? other.m_gradient->Copy() : nullptr),
+    m_gradient_v(other.m_gradient_v ? other.m_gradient_v->Copy() : nullptr),
+    m_gradient_sources(other.m_gradient_sources),
+    m_gradient_status(other.m_gradient_status),
     m_learning_rate(other.m_learning_rate),
-    m_gradient(other.m_gradient),
-    m_gradient_staging(other.m_gradient_staging),
-    m_gradient_allreduce_needed(other.m_gradient_allreduce_needed),
-    m_gradient_allreduce_started(other.m_gradient_allreduce_started),
-    m_gradient_allreduce_finished(other.m_gradient_allreduce_finished),
-    m_step_time(other.m_step_time)
-{
-  if (m_gradient != nullptr) {
-    m_gradient = m_gradient->Copy();
-  }
-  if (m_gradient_staging != nullptr) {
-    m_gradient_staging = m_gradient_staging->Copy();
+    m_step_time(other.m_step_time) {
+  if (m_gradient_status == optimizer_gradient_status::allreduce_started) {
+    LBANN_ERROR("attempted to copy optimizer while a "
+                "gradient allreduce is in progress");
   }
 }
 
 optimizer& optimizer::operator=(const optimizer& other) {
   m_comm = other.m_comm;
   m_weights = other.m_weights;
+  m_gradient.reset(other.m_gradient ? other.m_gradient->Copy() : nullptr);
+  m_gradient_v.reset(other.m_gradient_v ? other.m_gradient_v->Copy() : nullptr);
+  m_gradient_sources = other.m_gradient_sources;
+  m_gradient_status = other.m_gradient_status;
   m_learning_rate = other.m_learning_rate;
   m_step_time = other.m_step_time;
-  m_gradient_allreduce_needed = other.m_gradient_allreduce_needed;
-  m_gradient_allreduce_started = other.m_gradient_allreduce_started;
-  m_gradient_allreduce_finished = other.m_gradient_allreduce_finished;
-  m_gradient_allreduce_started = other.m_gradient_allreduce_started;
-
-  // Deep copy matrices
-  if (m_gradient != nullptr) { delete m_gradient; }
-  if (m_gradient_staging != nullptr) { delete m_gradient_staging; }
-  m_gradient = other.m_gradient;
-  m_gradient_staging = other.m_gradient_staging;
-  if (m_gradient != nullptr) {
-    m_gradient = m_gradient->Copy();
-  }
-  if (m_gradient_staging != nullptr) {
-    m_gradient_staging = m_gradient_staging->Copy();
+  if (m_gradient_status == optimizer_gradient_status::allreduce_started) {
+    LBANN_ERROR("attempted to copy optimizer while a "
+                "gradient allreduce is in progress");
   }
-
   return *this;
 }
 
-optimizer::~optimizer() {
-  if (m_gradient != nullptr) { delete m_gradient; }
-  if (m_gradient_staging != nullptr)  { delete m_gradient_staging; }
-}
-
 description optimizer::get_description() const {
   description desc(get_type() + " optimizer");
   desc.add("Learning rate", m_learning_rate);
@@ -95,120 +89,166 @@ description optimizer::get_description() const {
 }
 
 weights& optimizer::get_weights() {
-  if (!is_initialized()) {
-    LBANN_ERROR("attempted to access the weights being optimized before they are set");
+  // Item 3, p. 23 in "Effective C++", 3rd ed., by Scott Meyers
+  return const_cast<weights&>(static_cast<const optimizer&>(*this).get_weights());
+}
+
+const weights& optimizer::get_weights() const {
+  if (m_weights == nullptr) {
+    LBANN_ERROR("attempted to access the weights being optimized "
+                "before they are set");
   }
   return *m_weights;
 }
 
-const AbsDistMat& optimizer::get_gradient() {
+AbsDistMat& optimizer::get_gradient() {
 
-  // Check if gradient is initialized
-  if (!is_initialized()) {
-    LBANN_ERROR("attempted to access gradients before they are set up");
+  // Make sure gradient matrix has been setup
+  if (m_gradient == nullptr) {
+    LBANN_ERROR("attempted to access gradient before it is set up");
   }
 
-  // Perform allreduce on staging matrix if needed
-  if (m_gradient_allreduce_needed && !m_gradient_allreduce_started) {
-    start_gradient_staging_allreduce();
+  // Make sure gradient values are ready
+  start_gradient_allreduce();
+  finish_gradient_allreduce();
+  if (m_gradient_status == optimizer_gradient_status::cleared) {
+    El::Zero(*m_gradient);
+    m_gradient_status = optimizer_gradient_status::ready;
   }
-  if (m_gradient_allreduce_started && !m_gradient_allreduce_finished) {
-    m_comm->wait(m_gradient_allreduce_req);
-    m_gradient_allreduce_finished = true;
-  }
-  if (m_gradient_allreduce_needed) {
-    add_to_gradient(*m_gradient_staging);
+  if (m_gradient_status != optimizer_gradient_status::ready) {
+    std::ostringstream err;
+    err << "unexpected gradient status (expected "
+        << "\"" << to_string(optimizer_gradient_status::ready) << "\", "
+        << "but found \"" << to_string(m_gradient_status) << "\")";
+    LBANN_ERROR(err.str());
   }
-  m_gradient_allreduce_needed = false;
-  m_gradient_allreduce_started = false;
-  m_gradient_allreduce_finished = false;
 
+  // Return gradient
   return *m_gradient;
 
 }
 
-void optimizer::start_gradient_staging_allreduce() {
-  if (!m_gradient_allreduce_needed || m_gradient_allreduce_started) {
-    return;
-  }
-
-  m_gradient_allreduce_started = true;
-  m_comm->nb_allreduce(*m_gradient_staging,
-                       m_gradient_staging->RedundantComm(),
-                       m_gradient_allreduce_req,
-                       El::mpi::SUM);
-  m_gradient_allreduce_finished = false;
-}
-
-void optimizer::clear_gradient() {
-
-  // Clear matrices
-  El::Zero(*m_gradient);
-
-  // Reset gradient allreduce flags
-  m_gradient_allreduce_needed = false;
-  m_gradient_allreduce_started = false;
-  m_gradient_allreduce_finished = false;
-
-}
-
 void optimizer::add_to_gradient(const AbsDistMat& gradient,
-                                DataType scale) {
-  if (!is_initialized()) {
-    LBANN_ERROR("attempted to access gradients before they are set up");
+                                DataType scale,
+                                bool allreduce_needed) {
+
+  // Check that matrices have been setup
+  if (m_gradient == nullptr || m_gradient_v == nullptr) {
+    LBANN_ERROR("attempted to access gradient before it is set up");
   }
   if (scale == DataType(0)) { return; }
 
-  // Add to gradient
-  const auto dist_data = m_gradient->DistData();
-  if (gradient.DistData() == dist_data) {
-    El::Axpy(scale, gradient, *m_gradient);
+  // Make sure input matrix is in correct distribution
+  // Note: If input matrix is already in correct distribution, just
+  // make a matrix view. Otherwise redistribute and possibly allreduce
+  // the matrix.
+  m_gradient_v->Empty();
+  m_gradient_v->AlignWith(*m_gradient);
+  if (m_gradient_v->DistData() == gradient.DistData()) {
+    El::LockedView(*m_gradient_v, gradient);
+  } else if (allreduce_needed) {
+    std::unique_ptr<AbsDistMat> temp(gradient.Copy());
+    get_comm().allreduce(*temp, temp->RedundantComm());
+    El::Copy(*temp, *m_gradient_v);
+    allreduce_needed = false;
   } else {
-    std::unique_ptr<AbsDistMat> workspace(m_gradient->Construct(*dist_data.grid,
-                                                                dist_data.root));
-#ifdef HYDROGEN_HAVE_CUB
-    if (workspace->GetLocalDevice() == El::Device::GPU) {
-      workspace->Matrix().SetMemoryMode(1); // CUB GPU memory pool
+    El::Copy(gradient, *m_gradient_v);
+  }
+
+  // Add to gradient
+  if (m_gradient_status == optimizer_gradient_status::allreduce_started) {
+    finish_gradient_allreduce();
+  }
+  switch (m_gradient_status) {
+  case optimizer_gradient_status::ready:
+    if (allreduce_needed) {
+      // Properly scale contributions that have already been allreduced or that
+      // do not need allreduces.
+      El::Scale(DataType(1) / m_gradient->RedundantSize(), *m_gradient);
+      m_gradient_status = optimizer_gradient_status::allreduce_needed;
     }
-#endif // HYDROGEN_HAVE_CUB
-    El::Copy(gradient, *workspace);
-    El::Axpy(scale, *workspace, *m_gradient);
+    El::Axpy(scale, *m_gradient_v, *m_gradient);
+    break;
+  case optimizer_gradient_status::cleared:
+    El::Copy(*m_gradient_v, *m_gradient);
+    El::Scale(scale, *m_gradient);
+    m_gradient_status = (allreduce_needed ?
+                         optimizer_gradient_status::allreduce_needed :
+                         optimizer_gradient_status::ready);
+    break;
+  case optimizer_gradient_status::allreduce_needed:
+    {
+      // Properly scale data that does not need to be allreduced.
+      const auto& scale_ = (allreduce_needed ?
+                            scale :
+                            scale / m_gradient->RedundantSize());
+      El::Axpy(scale_, *m_gradient_v, *m_gradient);
+    }
+    break;
+  case optimizer_gradient_status::allreduce_started:
+  default:
+    LBANN_ERROR("unexpected gradient status "
+                "(" + to_string(m_gradient_status) + ")");
   }
 
+  // Clean up
+  m_gradient_v->Empty();
+
 }
 
-void optimizer::add_to_gradient_staging(const AbsDistMat& gradient,
-                                        DataType scale) {
-  if (!is_initialized()) {
-    LBANN_ERROR("attempted to access gradients before they are set up");
-  }
-  if (m_gradient_allreduce_started) {
-    LBANN_ERROR("attempted to add to staging matrix after gradient accumulation has started");
+void optimizer::clear_gradient() {
+  if (m_gradient_status == optimizer_gradient_status::allreduce_started) {
+    finish_gradient_allreduce();
   }
-  if (scale == DataType(0)) { return; }
+  m_gradient_status = optimizer_gradient_status::cleared;
+  m_gradient_sources.clear();
+}
 
-  // Clear staging matrix if needed
-  if (!m_gradient_allreduce_needed) {
-    El::Zero(*m_gradient_staging);
+AbsDistMat& optimizer::get_gradient_buffer(DataType& buf_scale,
+                                           DataType& in_scale,
+                                           bool allreduce_needed) {
+  if (m_gradient == nullptr) {
+    LBANN_ERROR("attempted to access gradient before it is set up");
   }
-  m_gradient_allreduce_needed = true;
 
-  // Add to staging matrix
-  const auto dist_data = m_gradient_staging->DistData();
-  if (gradient.DistData() == dist_data) {
-    El::Axpy(scale, gradient, *m_gradient_staging);
-  } else {
-    std::unique_ptr<AbsDistMat> workspace(m_gradient_staging->Construct(*dist_data.grid,
-                                                                        dist_data.root));
-#ifdef HYDROGEN_HAVE_CUB
-    if (workspace->GetLocalDevice() == El::Device::GPU) {
-      workspace->Matrix().SetMemoryMode(1); // CUB GPU memory pool
+  // Complete outstanding allreduce.
+  if (m_gradient_status == optimizer_gradient_status::allreduce_started) {
+    finish_gradient_allreduce();
+  }
+  // Determine scaling factor and transition state.
+  switch (m_gradient_status) {
+  case optimizer_gradient_status::ready:
+    buf_scale = DataType(1);
+    in_scale = DataType(1);
+    if (allreduce_needed) {
+      buf_scale /= m_gradient->RedundantSize();
+      m_gradient_status = optimizer_gradient_status::allreduce_needed;
     }
-#endif // HYDROGEN_HAVE_CUB
-    El::Copy(gradient, *workspace);
-    El::Axpy(scale, *workspace, *m_gradient_staging);
+    break;
+  case optimizer_gradient_status::cleared:
+    buf_scale = DataType(0);
+    in_scale = DataType(1);
+    m_gradient_status = (allreduce_needed ?
+                         optimizer_gradient_status::allreduce_needed :
+                         optimizer_gradient_status::ready);
+    break;
+  case optimizer_gradient_status::allreduce_needed:
+    buf_scale = DataType(1);
+    // Properly scale data that does not need to be allreduced.
+    in_scale = (allreduce_needed ?
+                DataType(1) :
+                DataType(1) / m_gradient->RedundantSize());
+    break;
+  case optimizer_gradient_status::allreduce_started:
+  default:
+    LBANN_ERROR("unexpected gradient status ("
+                + to_string(m_gradient_status) + ")");
   }
+  return *m_gradient;
+}
 
+El::Int optimizer::get_num_gradient_sources() const {
+  return m_gradient_sources.size();
 }
 
 void optimizer::add_gradient_source(const void* source) {
@@ -221,70 +261,92 @@ void optimizer::remove_gradient_source(const void* source) {
   m_gradient_sources.erase(nullptr);
   m_gradient_sources.erase(source);
   if (m_gradient_sources.empty()) {
-    start_gradient_staging_allreduce();
+    start_gradient_allreduce();
   }
 }
 
-void optimizer::setup(weights& w) {
-  if (is_initialized()) {
-    LBANN_ERROR("attempted to setup an optimizer that is already set up");
+void optimizer::setup(weights* w) {
+  clear_gradient();
+
+  // Set weights being optimized
+  if (w != nullptr) { set_weights(w); }
+  if (m_weights == nullptr) {
+    LBANN_ERROR("attempted to setup optimizer without weights");
   }
-  set_weights(w);
 
   // Initialize matrices
-  const int height = m_weights->get_matrix_height();
-  const int width = m_weights->get_matrix_width();
+  const auto& height = m_weights->get_matrix_height();
+  const auto& width = m_weights->get_matrix_width();
   const AbsDistMat& values = m_weights->get_values();
-
-  m_gradient = values.Construct(values.Grid(), values.Root());
-  m_gradient_staging = values.Construct(values.Grid(), values.Root());
+  m_gradient.reset(AbsDistMat::Instantiate(values.DistData()));
+  m_gradient->AlignWith(values);
   m_gradient->Resize(height, width);
-  m_gradient_staging->Resize(height, width);
-
-  // Initialize with zero gradient
-  clear_gradient();
+  m_gradient_v.reset(AbsDistMat::Instantiate(values.DistData()));
+  m_gradient_v->AlignWith(values);
+#ifdef HYDROGEN_HAVE_CUB
+  if (m_gradient_v->GetLocalDevice() == El::Device::GPU) {
+    m_gradient_v->Matrix().SetMemoryMode(1); // CUB GPU memory pool
+  }
+#endif // HYDROGEN_HAVE_CUB
 
 }
 
 void optimizer::step() {
-  if (!is_initialized()) {
-    LBANN_ERROR("optimizer must be set up before performing optimization step");
+  if (m_weights == nullptr) {
+    LBANN_ERROR("attempted to perform optimization step without weights");
   }
+  const auto start_time = get_time();
+  step_compute(m_weights->get_values(), get_gradient());
+  m_step_time += get_time() - start_time;
+}
 
-  double step_start = get_time();
+DataType optimizer::get_learning_rate() const {
+  return m_learning_rate;
+}
 
-  // Apply optimization step
-  auto& values = m_weights->get_values();
-  const auto& gradient = get_gradient();
-  switch (values.GetLocalDevice()) {
-  case El::Device::CPU:
-    step_compute(values, gradient);
+void optimizer::set_learning_rate(DataType learning_rate) {
+  m_learning_rate = learning_rate;
+};
+
+void optimizer::start_gradient_allreduce() {
+  switch (m_gradient_status) {
+  case optimizer_gradient_status::allreduce_needed:
+    get_comm().nb_allreduce(*m_gradient,
+                            m_gradient->RedundantComm(),
+                            m_gradient_allreduce_req);
+    m_gradient_status = optimizer_gradient_status::allreduce_started;
     break;
-#ifdef LBANN_HAS_GPU
-  case El::Device::GPU:
-    step_compute_gpu(values, gradient);
+  case optimizer_gradient_status::ready:
+  case optimizer_gradient_status::cleared:
+  case optimizer_gradient_status::allreduce_started:
     break;
-#endif // LBANN_HAS_GPU
-  default:
-    std::stringstream err;
-    err << "invalid device (" << (int) values.GetLocalDevice() << ")";
-    LBANN_ERROR(err.str());
+  default: LBANN_ERROR("unexpected gradient status "
+                       "(" + to_string(m_gradient_status) + ")");
   }
-
-  m_step_time += get_time() - step_start;
-
 }
 
-#ifdef LBANN_HAS_GPU
-void optimizer::step_compute_gpu(AbsDistMat& values, const AbsDistMat& gradient) {
-  /// @todo Automatically use CPU implementation
-  LBANN_ERROR("no GPU implementation detected");
+void optimizer::finish_gradient_allreduce() {
+  switch (m_gradient_status) {
+  case optimizer_gradient_status::allreduce_started:
+    get_comm().wait(m_gradient_allreduce_req);
+    m_gradient_status = optimizer_gradient_status::ready;
+    break;
+  case optimizer_gradient_status::ready:
+  case optimizer_gradient_status::cleared:
+    break;
+  case optimizer_gradient_status::allreduce_needed:
+    LBANN_ERROR("attempted to finish gradient allreduce "
+                "before starting it");
+    break;
+  default:
+    LBANN_ERROR("unexpected gradient status "
+                "(" + to_string(m_gradient_status) + ")");
+  }
 }
-#endif // LBANN_HAS_GPU
 
-//************************************************************************
+// =============================
 // Checkpointing
-//************************************************************************
+// =============================
 
 bool optimizer::save_to_checkpoint_shared(persist& p, std::string m_name) {
   //  m_learning_rate;
@@ -294,7 +356,7 @@ bool optimizer::save_to_checkpoint_shared(persist& p, std::string m_name) {
 
 bool optimizer::load_from_checkpoint_shared(persist& p, std::string m_name) {
   p.read_datatype(persist_type::train, "learning_rate", &m_learning_rate);
-  m_comm->trainer_broadcast(0, m_learning_rate);
+  get_comm().trainer_broadcast(0, m_learning_rate);
   return true;
 }
 
@@ -307,4 +369,5 @@ bool optimizer::load_from_checkpoint_distributed(persist& p, std::string m_name)
   p.read_datatype(persist_type::train, "learning_rate", &m_learning_rate);
   return true;
 }
-}  // namespace lbann
+
+} // namespace lbann
diff --git a/src/optimizers/rmsprop.cpp b/src/optimizers/rmsprop.cpp
index da7bb49fa87..870af9f0470 100644
--- a/src/optimizers/rmsprop.cpp
+++ b/src/optimizers/rmsprop.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -35,40 +35,22 @@ rmsprop::rmsprop(lbann_comm *comm,
                  DataType eps)
   : optimizer(comm, learning_rate),
     m_decay_rate(decay_rate),
-    m_eps(eps),
-    m_cache(nullptr) {}
+    m_eps(eps) {}
 
 rmsprop::rmsprop(const rmsprop& other) :
   optimizer(other),
   m_decay_rate(other.m_decay_rate),
   m_eps(other.m_eps),
-  m_cache(other.m_cache) {
-  if (m_cache != nullptr) { m_cache = m_cache->Copy(); }
-}
+  m_cache(other.m_cache ? other.m_cache->Copy() : nullptr) {}
 
 rmsprop& rmsprop::operator=(const rmsprop& other) {
   optimizer::operator=(other);
   m_decay_rate = other.m_decay_rate;
   m_eps = other.m_eps;
-
-  // Copy cache matrix
-  if (m_cache != nullptr && other.m_cache != nullptr
-      && m_cache->DistData() == other.m_cache->DistData()) {
-    El::Copy(*other.m_cache, *m_cache);
-  }
-  else {
-    if (m_cache != nullptr) { delete m_cache; }
-    m_cache = other.m_cache;
-    if (m_cache != nullptr) { m_cache = m_cache->Copy(); }
-  }
-
+  m_cache.reset(other.m_cache ? other.m_cache->Copy() : nullptr);
   return *this;
 }
 
-rmsprop::~rmsprop() {
-  if (m_cache != nullptr) { delete m_cache; }
-}
-
 description rmsprop::get_description() const {
   auto&& desc = optimizer::get_description();
   desc.add("Decay rate", m_decay_rate);
@@ -76,59 +58,64 @@ description rmsprop::get_description() const {
   return desc;
 }
 
-void rmsprop::setup(weights& w) {
+void rmsprop::setup(weights* w) {
   optimizer::setup(w);
-  m_cache = m_gradient->Construct(m_gradient->Grid(),
-                                  m_gradient->Root());
-  El::Zeros(*m_cache, m_gradient->Height(), m_gradient->Width());
+  const auto& gradient = this->get_gradient();
+  m_cache.reset(AbsDistMat::Instantiate(gradient.DistData()));
+  El::Zeros(*m_cache, gradient.Height(), gradient.Width());
 }
 
 void rmsprop::step_compute(AbsDistMat& values, const AbsDistMat& gradient) {
+  switch (values.GetLocalDevice()) {
+  case El::Device::CPU: step_compute_cpu(values, gradient); break;
+#ifdef LBANN_HAS_CUDA
+  case El::Device::GPU: step_compute_gpu(values, gradient); break;
+#endif // LBANN_HAS_CUDA
+  default:
+    std::ostringstream err;
+    err << "unsupported device type "
+        << "(" << static_cast<int>(values.GetLocalDevice()) << ")";
+    LBANN_ERROR(err.str());
+  }
+}
+
+void rmsprop::step_compute_cpu(AbsDistMat& values, const AbsDistMat& gradient) {
 
   // Get local matrix data
-  const int local_height = values.LocalHeight();
-  const int local_width = values.LocalWidth();
-  DataType* __restrict__ values_buffer = values.Buffer();
-  const int values_ldim = values.LDim();
-  const DataType* __restrict__ gradient_buffer = gradient.LockedBuffer();
-  const int gradient_ldim = gradient.LDim();
-  DataType* __restrict__ cache_buffer = m_cache->Buffer();
-  const int cache_ldim = m_cache->LDim();
-
-  // Check if matrix data is contiguous
-  if (values_ldim != local_height
-      || gradient_ldim != local_height
-      || cache_ldim != local_height) {
-    // Update with non-contiguous data
-    LBANN_OMP_PARALLEL_FOR_COLLAPSE2
-    for (int j=0; j<local_width; ++j) {
-      for (int i=0; i<local_height; ++i) {
-        DataType& x = values_buffer[i+j*values_ldim];
-        const DataType g = gradient_buffer[i+j*gradient_ldim];
-        DataType& c = cache_buffer[i+j*cache_ldim];
-        c = m_decay_rate * c + (DataType(1) - m_decay_rate) * g * g;
-        x -= m_learning_rate * g / (std::sqrt(c) + m_eps);
-      }
-    }
-  } else {
-    // Update with contiguous data
-    LBANN_OMP_PARALLEL_FOR
-    for (int i=0; i<local_height*local_width; ++i) {
-      DataType& x = values_buffer[i];
-      const DataType g = gradient_buffer[i];
-      DataType& c = cache_buffer[i];
+  const size_t local_height = values.LocalHeight();
+  const size_t local_width = values.LocalWidth();
+  auto* __restrict__ values_buffer = values.Buffer();
+  const size_t values_ldim = values.LDim();
+  const auto* __restrict__ gradient_buffer = gradient.LockedBuffer();
+  const size_t gradient_ldim = gradient.LDim();
+  auto* __restrict__ cache_buffer = m_cache->Buffer();
+  const size_t cache_ldim = m_cache->LDim();
+
+  // Apply RMSprop step
+  const auto& learning_rate = get_learning_rate();
+  LBANN_OMP_PARALLEL_FOR_COLLAPSE2
+  for (size_t col = 0; col < local_width; ++col) {
+    for (size_t row = 0; row < local_height; ++row) {
+      auto& x = values_buffer[row+col*values_ldim];
+      const auto& g = gradient_buffer[row+col*gradient_ldim];
+      auto& c = cache_buffer[row+col*cache_ldim];
       c = m_decay_rate * c + (DataType(1) - m_decay_rate) * g * g;
-      x -= m_learning_rate * g / (std::sqrt(c) + m_eps);
+      x -= learning_rate * g / (std::sqrt(c) + m_eps);
     }
   }
+
 }
 
+// =============================================
+// Checkpointing
+// =============================================
+
 bool rmsprop::save_to_checkpoint_shared(persist& p, std::string name_prefix) {
   optimizer::save_to_checkpoint_shared(p, name_prefix);
 
   char l_name[512];
   sprintf(l_name, "%s_optimizer_cache_%lldx%lld", name_prefix.c_str(), m_cache->Height(), m_cache->Width());
-  p.write_distmat(persist_type::train, l_name, m_cache);
+  p.write_distmat(persist_type::train, l_name, m_cache.get());
 
   return true;
 }
@@ -138,7 +125,7 @@ bool rmsprop::load_from_checkpoint_shared(persist& p, std::string name_prefix) {
   char l_name[512];
 
   sprintf(l_name, "%s_optimizer_cache_%lldx%lld.bin", name_prefix.c_str(), m_cache->Height(), m_cache->Width());
-  p.read_distmat(persist_type::train, l_name, m_cache);
+  p.read_distmat(persist_type::train, l_name, m_cache.get());
 
   return true;
 }
@@ -163,4 +150,4 @@ bool rmsprop::load_from_checkpoint_shared(persist& p, std::string name_prefix) {
    return true;
  }
 
-}  // namespace lbann
+} // namespace lbann
diff --git a/src/optimizers/rmsprop.cu b/src/optimizers/rmsprop.cu
new file mode 100644
index 00000000000..f312c43d103
--- /dev/null
+++ b/src/optimizers/rmsprop.cu
@@ -0,0 +1,77 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/optimizers/rmsprop.hpp"
+#include "lbann/utils/cuda.hpp"
+
+namespace lbann {
+
+namespace {
+
+__global__ void rmsprop_kernel(size_t height,
+                               size_t width,
+                               DataType learning_rate,
+                               DataType decay_rate,
+                               DataType eps,
+                               DataType * __restrict__ values,
+                               size_t values_ldim,
+                               const DataType * __restrict__ gradient,
+                               size_t gradient_ldim,
+                               DataType * __restrict__ cache,
+                               size_t cache_ldim) {
+  const size_t gid = threadIdx.x + blockIdx.x * blockDim.x;
+  const size_t nthreads = gridDim.x * blockDim.x;
+  for (size_t pos = gid; pos < height * width; pos += nthreads) {
+    const auto& row = pos % height;
+    const auto& col = pos / height;
+    const auto& g = gradient[row + col * gradient_ldim];
+    auto& c = cache[row + col * cache_ldim];
+    auto& x = values[row + col * values_ldim];
+    c = decay_rate * c + (DataType(1) - decay_rate) * g * g;
+    x -= learning_rate * g / (cuda::sqrt(c) + eps);
+  }
+}
+
+} // namespace
+
+void rmsprop::step_compute_gpu(AbsDistMat& values, const AbsDistMat& gradient) {
+  const size_t local_height = values.LocalHeight();
+  const size_t local_width = values.LocalWidth();
+  const size_t local_size = local_height * local_width;
+  if (local_size > 0) {
+    constexpr size_t block_size = 256;
+    const size_t grid_size = (local_size + block_size - 1) / block_size;
+    auto&& stream = El::GPUManager::Stream();
+    rmsprop_kernel<<<grid_size, block_size, 0, stream>>>(
+      local_height, local_width,
+      this->get_learning_rate(), m_decay_rate, m_eps,
+      values.Buffer(), values.LDim(),
+      gradient.LockedBuffer(), gradient.LDim(),
+      m_cache->Buffer(), m_cache->LDim());
+  }
+}
+
+} // namespace lbann
diff --git a/src/optimizers/sgd.cpp b/src/optimizers/sgd.cpp
index d243ce820b6..147a7ee2937 100644
--- a/src/optimizers/sgd.cpp
+++ b/src/optimizers/sgd.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -29,7 +29,7 @@
 
 namespace lbann {
 
-sgd::sgd(lbann_comm *comm,
+sgd::sgd(lbann_comm* comm,
          DataType learning_rate,
          DataType momentum,
          bool nesterov)
@@ -71,7 +71,7 @@ AbsDistMat& sgd::get_velocity() {
   return const_cast<AbsDistMat&>(static_cast<const sgd&>(*this).get_velocity());
 }
 
-void sgd::setup(weights& w) {
+void sgd::setup(weights* w) {
   optimizer::setup(w);
   const auto& gradient = this->get_gradient();
   m_velocity.reset(AbsDistMat::Instantiate(gradient.DistData()));
@@ -79,74 +79,94 @@ void sgd::setup(weights& w) {
 }
 
 void sgd::step_compute(AbsDistMat& values, const AbsDistMat& gradient) {
-
-  // SGD without momentum is just an Axpy
   if (m_momentum == DataType(0)) {
-    El::Axpy(-m_learning_rate, gradient, values);
-    return;
+    // Vanilla SGD
+    El::Axpy(-this->get_learning_rate(), gradient, values);
+  } else {
+    // Momentum or Nesterov SGD
+    switch (values.GetLocalDevice()) {
+    case El::Device::CPU: momentum_step_cpu(values, gradient); break;
+#ifdef LBANN_HAS_CUDA
+    case El::Device::GPU: momentum_step_gpu(values, gradient); break;
+#endif // LBANN_HAS_CUDA
+    default:
+      std::ostringstream err;
+      err << "unsupported device type "
+        << "(" << static_cast<int>(values.GetLocalDevice()) << ")";
+      LBANN_ERROR(err.str());
+    }
   }
+}
+
+void sgd::momentum_step_cpu(AbsDistMat& values, const AbsDistMat& gradient) {
 
   // Get local matrix data
-  const int local_height = values.LocalHeight();
-  const int local_width = values.LocalWidth();
-  DataType* __restrict__ values_buffer = values.Buffer();
-  const int values_ldim = values.LDim();
-  const DataType* __restrict__ gradient_buffer = gradient.LockedBuffer();
-  const int gradient_ldim = gradient.LDim();
-  DataType* __restrict__ velocity_buffer = m_velocity->Buffer();
-  const int velocity_ldim = m_velocity->LDim();
-
-  // Check if matrix data is contiguous
-  if (values_ldim != local_height
-      || gradient_ldim != local_height
-      || velocity_ldim != local_height) {
-    // (Nesterov) momentum SGD for non-contiguous data
-    LBANN_OMP_PARALLEL_FOR_COLLAPSE2
-    for (int j=0; j<local_width; ++j) {
-      for (int i=0; i<local_height; ++i) {
-        const DataType g = gradient_buffer[i+j*gradient_ldim];
-        DataType& v = velocity_buffer[i+j*velocity_ldim];
-        DataType& x = values_buffer[i+j*values_ldim];
-        v = m_momentum * v + g;
-        x -= (m_nesterov ?
-              m_learning_rate * (m_momentum * v + g) :
-              m_learning_rate * v);
-      }
-    }
-  } else {
+  const auto& learning_rate = this->get_learning_rate();
+  const size_t local_height = values.LocalHeight();
+  const size_t local_width = values.LocalWidth();
+  auto* __restrict__ values_buffer = values.Buffer();
+  const auto* __restrict__ gradient_buffer = gradient.LockedBuffer();
+  auto* __restrict__ velocity_buffer = m_velocity->Buffer();
+
+  if (values.Contiguous() && gradient.Contiguous()
+      && m_velocity->Contiguous()) {
+    const size_t local_size = local_height * local_width;
     if (m_nesterov) {
-      // Nesterov's accelerated gradient descent for contiguous data
+
+      // Nesterov SGD for contiguous data
       LBANN_OMP_PARALLEL_FOR
-      for (int i=0; i<local_height*local_width; ++i) {
-        DataType& x = values_buffer[i];
-        const DataType g = gradient_buffer[i];
-        DataType& v = velocity_buffer[i];
+      for (size_t i = 0; i < local_size; ++i) {
+        auto& x = values_buffer[i];
+        const auto& g = gradient_buffer[i];
+        auto& v = velocity_buffer[i];
         v = m_momentum * v + g;
-        x -= m_learning_rate * (m_momentum * v + g);
+        x -= learning_rate * (m_momentum * v + g);
       }
+
     } else {
-      // Momentum SGD for contiguous data
+
+      // Momentum SGD with contiguous data
       LBANN_OMP_PARALLEL_FOR
-      for (int i=0; i<local_height*local_width; ++i) {
-        DataType& x = values_buffer[i];
-        const DataType g = gradient_buffer[i];
-        DataType& v = velocity_buffer[i];
+      for (size_t i = 0; i < local_size; ++i) {
+        auto& x = values_buffer[i];
+        const auto& g = gradient_buffer[i];
+        auto& v = velocity_buffer[i];
         v = m_momentum * v + g;
-        x -= m_learning_rate * v;
+        x -= learning_rate * v;
       }
+
     }
+  } else {
+
+    // Momentum or Nesterov SGD with non-contiguous data
+    const size_t values_ldim = values.LDim();
+    const size_t gradient_ldim = gradient.LDim();
+    const size_t velocity_ldim = m_velocity->LDim();
+    LBANN_OMP_PARALLEL_FOR_COLLAPSE2
+    for (size_t col = 0; col < local_width; ++col) {
+      for (size_t row=0; row < local_height; ++row) {
+        const auto& g = gradient_buffer[row+col*gradient_ldim];
+        auto& v = velocity_buffer[row+col*velocity_ldim];
+        auto& x = values_buffer[row+col*values_ldim];
+        v = m_momentum * v + g;
+        x -= (m_nesterov ?
+              learning_rate * (m_momentum * v + g) :
+              learning_rate * v);
+      }
+    }
+
   }
 
 }
 
-////////////////////////////////////////////////////////////
+// =============================================
 // Checkpointing
-////////////////////////////////////////////////////////////
+// =============================================
 
 bool sgd::save_to_checkpoint_shared(persist& p, std::string name_prefix) {
   optimizer::save_to_checkpoint_shared(p, name_prefix);
 
-  if (m_comm->am_trainer_master()) {
+  if (get_comm().am_trainer_master()) {
     pack_scalars(p);
   }
 
@@ -160,11 +180,11 @@ bool sgd::save_to_checkpoint_shared(persist& p, std::string name_prefix) {
 bool sgd::load_from_checkpoint_shared(persist& p, std::string name_prefix) {
   optimizer::load_from_checkpoint_shared(p, name_prefix);
   struct packing_header header;
-  if (m_comm->am_trainer_master()) {
+  if (get_comm().am_trainer_master()) {
     unpack_scalars(p, &header);
   }
 
-  m_comm->trainer_broadcast(0, header);
+  get_comm().trainer_broadcast(0, header);
 
   unpack_header(header);
   char l_name[512];
diff --git a/src/optimizers/sgd.cu b/src/optimizers/sgd.cu
index 5ccc1b3093d..8a51f93cf6e 100644
--- a/src/optimizers/sgd.cu
+++ b/src/optimizers/sgd.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -30,88 +30,104 @@ namespace lbann {
 
 namespace {
 
-__global__ void momentum_kernel(int height,
-                                int width,
-                                DataType learning_rate,
-                                DataType momentum,
-                                DataType * __restrict__ values,
-                                int values_ldim,
-                                const DataType * __restrict__ gradient,
-                                int gradient_ldim,
-                                DataType * __restrict__ velocity,
-                                int velocity_ldim) {
-  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-  const int num_threads = gridDim.x * blockDim.x;
-  for (int pos = tid; pos < height * width; pos += num_threads) {
-    const auto& i = pos % height;
-    const auto& j = pos / height;
-    const auto& g = gradient[i + j * gradient_ldim];
-    auto& v = velocity[i + j * velocity_ldim];
-    auto& x = values[i + j * values_ldim];
+__global__ void momentum_noncontiguous_kernel(size_t height,
+                                              size_t width,
+                                              DataType learning_rate,
+                                              DataType momentum,
+                                              DataType * __restrict__ values,
+                                              size_t values_ldim,
+                                              const DataType * __restrict__ gradient,
+                                              size_t gradient_ldim,
+                                              DataType * __restrict__ velocity,
+                                              size_t velocity_ldim) {
+  const size_t gid = threadIdx.x + blockIdx.x * blockDim.x;
+  if (gid < height * width) {
+    const auto& row = gid % height;
+    const auto& col = gid / height;
+    const auto& g = gradient[row + col * gradient_ldim];
+    auto& v = velocity[row + col * velocity_ldim];
+    auto& x = values[row + col * values_ldim];
     v = momentum * v + g;
     x -= learning_rate * v;
   }
 }
 
-__global__ void nesterov_kernel(int height,
-                                int width,
+__global__ void momentum_contiguous_kernel(size_t size,
+                                           DataType learning_rate,
+                                           DataType momentum,
+                                           DataType * __restrict__ values,
+                                           const DataType * __restrict__ gradient,
+                                           DataType * __restrict__ velocity) {
+  const size_t gid = threadIdx.x + blockIdx.x * blockDim.x;
+  if (gid < size) {
+    const auto& g = gradient[gid];
+    auto& v = velocity[gid];
+    auto& x = values[gid];
+    v = momentum * v + g;
+    x -= learning_rate * v;
+  }
+}
+
+__global__ void nesterov_kernel(size_t height,
+                                size_t width,
                                 DataType learning_rate,
                                 DataType momentum,
                                 DataType * __restrict__ values,
-                                int values_ldim,
+                                size_t values_ldim,
                                 const DataType * __restrict__ gradient,
-                                int gradient_ldim,
+                                size_t gradient_ldim,
                                 DataType * __restrict__ velocity,
-                                int velocity_ldim) {
-  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-  const int num_threads = gridDim.x * blockDim.x;
-  for (int pos = tid; pos < height * width; pos += num_threads) {
-    const auto& i = pos % height;
-    const auto& j = pos / height;
-    const auto& g = gradient[i + j * gradient_ldim];
-    auto& v = velocity[i + j * velocity_ldim];
-    auto& x = values[i + j * values_ldim];
+                                size_t velocity_ldim) {
+  const size_t gid = threadIdx.x + blockIdx.x * blockDim.x;
+  const size_t nthreads = gridDim.x * blockDim.x;
+  for (size_t pos = gid; pos < height * width; pos += nthreads) {
+    const auto& row = pos % height;
+    const auto& col = pos / height;
+    const auto& g = gradient[row + col * gradient_ldim];
+    auto& v = velocity[row + col * velocity_ldim];
+    auto& x = values[row + col * values_ldim];
     v = momentum * v + g;
     x -= learning_rate * (momentum * v + g);
   }
 }
 
-}
+} // namespace
 
-void sgd::step_compute_gpu(AbsDistMat& values, const AbsDistMat& gradient) {
+void sgd::momentum_step_gpu(AbsDistMat& values, const AbsDistMat& gradient) {
 
   // Get matrix dimensions
-  const int local_height = values.LocalHeight();
-  const int local_width = values.LocalWidth();
-  const int size = local_height * local_width;
-  if (size <= 0) { return; }
-
-  // SGD without momentum
-  if (m_momentum == DataType(0)) {
-    El::Axpy(-m_learning_rate, gradient, values);
-    return;
-  }
+  const size_t local_height = values.LocalHeight();
+  const size_t local_width = values.LocalWidth();
+  const size_t local_size = local_height * local_width;
+  if (local_size <= 0) { return; }
 
   // Launch CUDA kernels for momentum SGD or NAG
-  const int block_size = 256;
-  dim3 block_dims, grid_dims;
-  block_dims.x = block_size;
-  grid_dims.x = (size + block_size - 1) / block_size;
-  cudaStream_t stream = El::GPUManager::Stream();
+  constexpr size_t block_size = 256;
+  const size_t grid_size = (local_size + block_size - 1) / block_size;
+  auto&& stream = El::GPUManager::Stream();
   if (m_nesterov) {
-    nesterov_kernel<<<grid_dims, block_dims, 0, stream>>>
-      (local_height, local_width, m_learning_rate, m_momentum,
-       values.Buffer(), values.LDim(),
-       gradient.LockedBuffer(), gradient.LDim(),
-       m_velocity->Buffer(), m_velocity->LDim());
+    nesterov_kernel<<<grid_size, block_size, 0, stream>>>(
+      local_height, local_width,
+      this->get_learning_rate(), m_momentum,
+      values.Buffer(), values.LDim(),
+      gradient.LockedBuffer(), gradient.LDim(),
+      m_velocity->Buffer(), m_velocity->LDim());
   } else {
-    momentum_kernel<<<grid_dims, block_dims, 0, stream>>>
-      (local_height, local_width, m_learning_rate, m_momentum,
-       values.Buffer(), values.LDim(),
-       gradient.LockedBuffer(), gradient.LDim(),
-       m_velocity->Buffer(), m_velocity->LDim());
+    if (values.Contiguous() && gradient.Contiguous()
+        && m_velocity->Contiguous()) {
+      momentum_contiguous_kernel<<<grid_size, block_size, 0, stream>>>(
+        local_size, this->get_learning_rate(), m_momentum,
+        values.Buffer(), gradient.LockedBuffer(), m_velocity->Buffer());
+    } else {
+      momentum_noncontiguous_kernel<<<grid_size, block_size, 0, stream>>>(
+        local_height, local_width,
+        this->get_learning_rate(), m_momentum,
+        values.Buffer(), values.LDim(),
+        gradient.LockedBuffer(), gradient.LDim(),
+        m_velocity->Buffer(), m_velocity->LDim());
+    }
   }
 
 }
 
-}  // namespace lbann
+} // namespace lbann
diff --git a/src/proto/CMakeLists.txt b/src/proto/CMakeLists.txt
index d2d1ce308c4..98abbc423e4 100644
--- a/src/proto/CMakeLists.txt
+++ b/src/proto/CMakeLists.txt
@@ -44,8 +44,9 @@ if (LBANN_HAS_PROTOBUF)
   # Install the newly built headers
   install(FILES ${PROTO_HDRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
 
-  # Install the Python module.
-  install(FILES ${PROTO_PY} DESTINATION ${CMAKE_INSTALL_DATADIR}/python)
+  # Install the Python module into the site-packages directory
+  install(FILES ${PROTO_PY} DESTINATION
+                ${PYTHON_INSTALL_PREFIX}/${CMAKE_INSTALL_PYTHONDIR})
 
 endif (LBANN_HAS_PROTOBUF)
 
diff --git a/src/proto/factories/callback_factory.cpp b/src/proto/factories/callback_factory.cpp
index 5e5d0c959de..6a3eb421f61 100644
--- a/src/proto/factories/callback_factory.cpp
+++ b/src/proto/factories/callback_factory.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -66,8 +66,9 @@ lbann_callback* construct_callback(lbann_comm* comm,
   //////////////////////////////////////////////////////////////
 
   if (proto_cb.has_print()) {
-    const auto& interval = proto_cb.print().interval();
-    return new lbann_callback_print(interval);
+    const auto& params = proto_cb.print();
+    return new lbann_callback_print(params.interval(),
+                                    params.print_global_stat_only());
   }
   if (proto_cb.has_timer()) {
     return new lbann_callback_timer(summarizer);
@@ -183,6 +184,7 @@ lbann_callback* construct_callback(lbann_comm* comm,
     return new lbann_callback_poly_learning_rate(params.power(),
                                                  params.num_epochs(),
                                                  params.max_iter(),
+                                                 params.end_lr(),
                                                  selected_weights);
   }
 
@@ -260,7 +262,8 @@ lbann_callback* construct_callback(lbann_comm* comm,
                                       params.mat_interval());
   }
   if (proto_cb.has_profiler()) {
-    return new lbann_callback_profiler(proto_cb.profiler().sync());
+    return new lbann_callback_profiler(proto_cb.profiler().sync(),
+                                       proto_cb.profiler().skip_init());
   }
   if (proto_cb.has_sync_layers()) {
     const auto& params = proto_cb.sync_layers();
diff --git a/src/proto/factories/factories.cpp b/src/proto/factories/factories.cpp
index 629e419ac00..617e639d8c1 100644
--- a/src/proto/factories/factories.cpp
+++ b/src/proto/factories/factories.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/proto/factories/layer_factory.cpp b/src/proto/factories/layer_factory.cpp
index 6c9e49f2ae8..2521314d2ae 100644
--- a/src/proto/factories/layer_factory.cpp
+++ b/src/proto/factories/layer_factory.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -30,45 +30,24 @@
 namespace lbann {
 namespace proto {
 
-#define LAYOUT_ERR(layer_name, layer_type) \
-  { \
-    std::stringstream s;  \
-    s << "\nlayer type: " << layer_type << " layer name: " << layer_name << " -- is only supported for data_layout::DATA_PARALLEL";                  \
-    LBANN_ERROR(s.str()); \
-  }
-
-#define DEVICE_ERR(layer_name, layer_type, layout, Dev) \
-  { \
-    if (layout != data_layout::DATA_PARALLEL) { \
-      LAYOUT_ERR(layer_name, layer_type)  \
-    } else if (Dev != El::Device::CPU) { \
-      std::stringstream s;  \
-      s << "\nlayer type: " << layer_type " layer name: " << layer_name << " -- is only supported for El::Device::CPU; it looks like you're attempting to run with a cuda build. You should be able to run by adding --disable_cuda to your command line (in which case you won't be using GPUs, which may not be what you want)";\
-      LBANN_ERROR(s.str()); \
-    } else {     \
-      std::stringstream s;  \
-      s << "\nsomething is weird with data_layout and/or El::Device but we can't determine what."; \
-      LBANN_ERROR(s.str()); \
-    } \
-  }
-
 std::vector<El::Int> get_slice_points_from_reader(const generic_data_reader* dr,
                                                   const std::string& var_category,
                                                   bool& is_supported);
 
-template <data_layout layout, El::Device Dev>
-Layer* construct_layer(lbann_comm* comm,
-                       const std::map<execution_mode, generic_data_reader*>& data_readers,
-                       int num_parallel_readers,
-                       const lbann_data::Layer& proto_layer) {
+template <data_layout Layout, El::Device Device>
+std::unique_ptr<Layer> construct_layer(
+  lbann_comm* comm,
+  const std::map<execution_mode, generic_data_reader*>& data_readers,
+  int num_parallel_readers,
+  const lbann_data::Layer& proto_layer) {
   std::stringstream err;
 
   // Convenience macro to construct layers with no parameters
-#define CONSTRUCT_LAYER(name)                           \
-  do {                                                  \
-    if (proto_layer.has_##name()) {                     \
-      return new name##_layer<layout, Dev>(comm);       \
-    }                                                   \
+#define CONSTRUCT_LAYER(name)                                           \
+  do {                                                                  \
+    if (proto_layer.has_##name()) {                                     \
+      return lbann::make_unique<name##_layer<Layout, Device>>(comm);    \
+    }                                                                   \
   } while (false)
 
   // Input layers
@@ -81,12 +60,15 @@ Layer* construct_layer(lbann_comm* comm,
     if (mode_str == "regression")                         { target_mode = data_reader_target_mode::REGRESSION; }
     if (mode_str == "reconstruction")                     { target_mode = data_reader_target_mode::RECONSTRUCTION; }
     if (mode_str == "na" || mode_str == "NA" || mode_str == "N/A") { target_mode = data_reader_target_mode::NA; }
-    if (io_buffer == "partitioned") {
-      return new input_layer<partitioned_io_buffer, layout, Dev>(comm,
-                                                                 num_parallel_readers,
-                                                                 data_readers,
-                                                                 !params.data_set_per_model(),
-                                                                 target_mode);
+    if (io_buffer == "partitioned" || io_buffer.empty()) {
+      return lbann::make_unique<input_layer<partitioned_io_buffer,Layout,Device>>(
+               comm,
+               num_parallel_readers,
+               data_readers,
+               !params.data_set_per_model(),
+               target_mode);
+    } else {
+      LBANN_ERROR("invalid IO buffer type (" + io_buffer + ")");
     }
   }
 
@@ -96,35 +78,7 @@ Layer* construct_layer(lbann_comm* comm,
     int num_neurons = 0;
     std::string num_neurons_method_name;
 
-    if (params.get_input_dimension_from_reader()
-        || params.get_image_dimension_from_reader()
-        || params.get_scalar_dimension_from_reader()
-        || params.get_image_and_scalar_dimension_from_reader()) {
-      num_neurons_method_name = "get_*_dimension_from_reader";
-    #if defined(LBANN_HAS_CONDUIT)
-      const auto dr_generic  = lbann::peek_map(data_readers, execution_mode::training);
-      const auto dr = dynamic_cast<lbann::data_reader_jag_conduit_hdf5*>(dr_generic);
-      if (dr != nullptr) {
-        size_t input_dim = dr->get_linearized_input_size();
-        size_t scalar_dim = dr->get_linearized_scalar_size();
-        size_t image_dim = dr->get_linearized_channel_size() * dr->get_num_channels();
-        size_t num_images = dr->get_num_img_srcs();
-
-        if (params.get_input_dimension_from_reader()) {
-          num_neurons += input_dim;
-        }
-        if (params.get_image_dimension_from_reader()) {
-          num_neurons += (num_images * image_dim);
-        }
-        if (params.get_scalar_dimension_from_reader()) {
-          num_neurons += scalar_dim;
-        }
-        if (params.get_image_and_scalar_dimension_from_reader()) {
-          num_neurons += (num_images * image_dim + scalar_dim);
-        }
-      }
-    #endif // defined(LBANN_HAS_CONDUIT)
-    } else if (params.get_num_neurons_of_slice_from_reader_size() > 0) {
+    if (params.get_num_neurons_of_slice_from_reader_size() > 0) {
       num_neurons_method_name = "get_num_neurons_of_slice_from_reader";
     #if defined(LBANN_HAS_CONDUIT)
       const auto dr_generic  = lbann::peek_map(data_readers, execution_mode::training);
@@ -155,11 +109,12 @@ Layer* construct_layer(lbann_comm* comm,
         num_neurons = dr->get_linearized_data_size();
       }
     }
-    return new fully_connected_layer<layout, Dev>(comm,
-                                                  num_neurons,
-                                                  params.transpose(),
-                                                  nullptr,
-                                                  params.has_bias());
+    return lbann::make_unique<fully_connected_layer<Layout, Device>>(
+             comm,
+             num_neurons,
+             params.transpose(),
+             nullptr,
+             params.has_bias());
   }
 
   // Convolution and deconvolution layer
@@ -171,6 +126,10 @@ Layer* construct_layer(lbann_comm* comm,
     if (num_groups == 0) {
       num_groups = 1;
     }
+    if (Layout != data_layout::DATA_PARALLEL) {
+      LBANN_ERROR("convolution layer is only supported with "
+                  "a data-parallel layout");
+    }
     if (params.has_vectors()) {
       const auto& dims = parse_list<int>(params.conv_dims());
       const auto& pads = parse_list<int>(params.conv_pads());
@@ -179,13 +138,9 @@ Layer* construct_layer(lbann_comm* comm,
       if (dilations.empty()) {
         dilations.resize(dims.size(), 1);
       }
-      if (layout == data_layout::DATA_PARALLEL) {
-        return new convolution_layer<data_layout::DATA_PARALLEL, Dev>(
-                     comm, dims.size(), num_output_channels,
-                     dims, pads, strides, dilations, num_groups, bias
-                   );
-      }
-      LAYOUT_ERR(proto_layer.name(), "convolution");
+      return lbann::make_unique<convolution_layer<data_layout::DATA_PARALLEL, Device>>(
+               comm, dims.size(), num_output_channels,
+               dims, pads, strides, dilations, num_groups, bias);
     } else {
       const auto& num_dims = params.num_dims();
       const auto& dim = params.conv_dims_i();
@@ -195,13 +150,9 @@ Layer* construct_layer(lbann_comm* comm,
       if (dilation == 0) {
         dilation = 1;
       }
-      if (layout == data_layout::DATA_PARALLEL) {
-        return new convolution_layer<data_layout::DATA_PARALLEL, Dev>(
-                     comm, num_dims, num_output_channels,
-                     dim, pad, stride, dilation, num_groups, bias
-                   );
-      }
-      LAYOUT_ERR(proto_layer.name(), "convolution");
+      return lbann::make_unique<convolution_layer<data_layout::DATA_PARALLEL, Device>>(
+               comm, num_dims, num_output_channels,
+               dim, pad, stride, dilation, num_groups, bias);
     }
   }
   if (proto_layer.has_deconvolution()) {
@@ -219,6 +170,10 @@ Layer* construct_layer(lbann_comm* comm,
       }
       num_output_channels = dr->get_linearized_data_size();
     }
+    if (Layout != data_layout::DATA_PARALLEL) {
+      LBANN_ERROR("deconvolution layer is only supported with "
+                  "a data-parallel layout");
+    }
     if (params.has_vectors()) {
       const auto& dims = parse_list<int>(params.conv_dims());
       const auto& pads = parse_list<int>(params.conv_pads());
@@ -227,13 +182,9 @@ Layer* construct_layer(lbann_comm* comm,
       if (dilations.empty()) {
         dilations.resize(dims.size(), 1);
       }
-      if (layout == data_layout::DATA_PARALLEL) {
-        return new deconvolution_layer<data_layout::DATA_PARALLEL, Dev>(
-                     comm, dims.size(), num_output_channels,
-                     dims, pads, strides, dilations, num_groups, bias
-                   );
-      }
-      LAYOUT_ERR(proto_layer.name(), "deconvolution");
+      return lbann::make_unique<deconvolution_layer<data_layout::DATA_PARALLEL, Device>>(
+               comm, dims.size(), num_output_channels,
+               dims, pads, strides, dilations, num_groups, bias);
     } else {
       const auto& num_dims = params.num_dims();
       const auto& dim = params.conv_dims_i();
@@ -243,13 +194,9 @@ Layer* construct_layer(lbann_comm* comm,
       if (dilation == 0) {
         dilation = 1;
       }
-      if (layout == data_layout::DATA_PARALLEL) {
-        return new deconvolution_layer<data_layout::DATA_PARALLEL, Dev>(
-                     comm, num_dims, num_output_channels,
-                     dim, pad, stride, dilation, num_groups, bias
-                   );
-      }
-      LAYOUT_ERR(proto_layer.name(), "deconvolution");
+      return lbann::make_unique<deconvolution_layer<data_layout::DATA_PARALLEL, Device>>(
+               comm, num_dims, num_output_channels,
+               dim, pad, stride, dilation, num_groups, bias);
     }
   }
 
@@ -268,22 +215,22 @@ Layer* construct_layer(lbann_comm* comm,
       }
       dims.push_back(dr->get_linearized_data_size());
     }
-    return new reshape_layer<layout, Dev>(comm, dims);
+    return lbann::make_unique<reshape_layer<Layout, Device>>(comm, dims);
   }
   if (proto_layer.has_sum()) {
-    return new sum_layer<layout, Dev>(comm);
+    return lbann::make_unique<sum_layer<Layout, Device>>(comm);
   }
   if (proto_layer.has_weighted_sum()) {
     const auto& params = proto_layer.weighted_sum();
     const auto& scaling_factors = parse_list<DataType>(params.scaling_factors());
-    return new weighted_sum_layer<layout, Dev>(comm, scaling_factors);
+    return lbann::make_unique<weighted_sum_layer<Layout, Device>>(comm, scaling_factors);
   }
   if (proto_layer.has_split()) {
-    return new split_layer<layout, Dev>(comm);
+    return lbann::make_unique<split_layer<Layout, Device>>(comm);
   }
   if (proto_layer.has_concatenation()) {
-    const auto& axis = proto_layer.concatenation().concatenation_axis();
-    return new concatenation_layer<layout, Dev>(comm, axis);
+    const auto& axis = proto_layer.concatenation().axis();
+    return lbann::make_unique<concatenation_layer<Layout, Device>>(comm, axis);
   }
   if (proto_layer.has_slice()) {
     const auto& params = proto_layer.slice();
@@ -291,24 +238,7 @@ Layer* construct_layer(lbann_comm* comm,
     bool is_supported = false;
     std::string slice_point_method_name;
 
-    if (params.get_slice_points_from_reader_bool()) {
-      slice_point_method_name = "'get_slice_points_from_reader_bool'";
-    #if defined(LBANN_HAS_CONDUIT)
-      size_t total = 0;
-      slice_points.push_back(total);
-      const auto dr_generic  = lbann::peek_map(data_readers, execution_mode::training);
-      if (dynamic_cast<lbann::data_reader_jag_conduit_hdf5*>(dr_generic) != nullptr) {
-        is_supported = true;
-        const auto dr1  = lbann::peek_map(data_readers, execution_mode::training);
-        lbann::data_reader_jag_conduit_hdf5 *dr = dynamic_cast<lbann::data_reader_jag_conduit_hdf5*>(dr1);
-        total += dr->get_num_img_srcs() * dr->get_linearized_channel_size() * dr->get_num_channels()
-              + dr->get_linearized_scalar_size();
-        slice_points.push_back(total);
-        total += dr->get_linearized_input_size();
-        slice_points.push_back(total);
-      }
-    #endif // defined(LBANN_HAS_CONDUIT)
-    } else if (params.get_slice_points_from_reader() != "") {
+    if (params.get_slice_points_from_reader() != "") {
       slice_point_method_name = "'get_slice_points_from_reader'";
     #if defined(LBANN_HAS_CONDUIT)
       const auto dr_generic  = lbann::peek_map(data_readers, execution_mode::training);
@@ -329,25 +259,24 @@ Layer* construct_layer(lbann_comm* comm,
       LBANN_ERROR(err.str());
       return nullptr;
     }
-    return  new slice_layer<layout, Dev>(comm,
-                                         params.slice_axis(),
-                                         slice_points);
+    return lbann::make_unique<slice_layer<Layout, Device>>(
+             comm, params.axis(), slice_points);
   }
   if (proto_layer.has_hadamard()) {
-    return new hadamard_layer<layout, Dev>(comm);
+    return lbann::make_unique<hadamard_layer<Layout, Device>>(comm);
   }
   if (proto_layer.has_constant()) {
     const auto& params = proto_layer.constant();
     const auto& dims = parse_list<int>(params.num_neurons());
-    return new constant_layer<layout, Dev>(comm, params.value(), dims);
+    return lbann::make_unique<constant_layer<Layout, Device>>(comm, params.value(), dims);
   }
   if (proto_layer.has_gaussian()) {
     const auto& params = proto_layer.gaussian();
     const auto& dims = parse_list<int>(params.neuron_dims());
     if (params.mean() == 0 && params.stdev() == 0) {
-      return new gaussian_layer<layout, Dev>(comm, dims);
+      return lbann::make_unique<gaussian_layer<Layout, Device>>(comm, dims);
     } else {
-      return new gaussian_layer<layout, Dev>(comm,
+      return lbann::make_unique<gaussian_layer<Layout, Device>>(comm,
                                              dims,
                                              params.mean(),
                                              params.stdev());
@@ -356,23 +285,19 @@ Layer* construct_layer(lbann_comm* comm,
   if (proto_layer.has_bernoulli()) {
     const auto& params = proto_layer.bernoulli();
     const auto& dims = parse_list<int>(params.neuron_dims());
-    return new bernoulli_layer<layout, Dev>(comm,
-                                            dims,
-                                            params.prob());
+    return lbann::make_unique<bernoulli_layer<Layout, Device>>(
+             comm, dims, params.prob());
   }
   if (proto_layer.has_uniform()) {
     const auto& params = proto_layer.uniform();
     const auto& dims = parse_list<int>(params.neuron_dims());
     if (params.min() == 0 && params.max() == 0) {
-      return new uniform_layer<layout, Dev>(comm, dims);
+      return lbann::make_unique<uniform_layer<Layout, Device>>(comm, dims);
     } else {
-      return new uniform_layer<layout, Dev>(comm, dims, params.min(), params.max());
+      return lbann::make_unique<uniform_layer<Layout, Device>>(
+               comm, dims, params.min(), params.max());
     }
   }
-  if (proto_layer.has_zero()) {
-    const auto& params = proto_layer.zero();
-    return new zero_layer<layout>(comm, params.first_half(), params.second_half());
-  }
   if (proto_layer.has_pooling()) {
     const auto& params = proto_layer.pooling();
     const auto& mode_str = params.pool_mode();
@@ -380,34 +305,32 @@ Layer* construct_layer(lbann_comm* comm,
     if (mode_str == "max" )            { mode = pool_mode::max; }
     if (mode_str == "average" )        { mode = pool_mode::average; }
     if (mode_str == "average_no_pad" ) { mode = pool_mode::average_no_pad; }
+    if (Layout != data_layout::DATA_PARALLEL) {
+      LBANN_ERROR("pooling layer is only supported with "
+                  "a data-parallel layout");
+    }
     if (params.has_vectors()) {
       const auto& dims = parse_list<int>(params.pool_dims());
       const auto& pads = parse_list<int>(params.pool_pads());
       const auto& strides = parse_list<int>(params.pool_strides());
-      if (layout == data_layout::DATA_PARALLEL) {
-        return new pooling_layer<data_layout::DATA_PARALLEL, Dev>(
-                     comm, dims.size(), dims, pads, strides, mode
-                   );
-      }
-      LAYOUT_ERR(proto_layer.name(), "pooling");
+      return lbann::make_unique<pooling_layer<data_layout::DATA_PARALLEL, Device>>(
+               comm, dims.size(), dims, pads, strides, mode);
     } else {
       const auto& num_dims = params.num_dims();
       const auto& dim = params.pool_dims_i();
       const auto& pad = params.pool_pads_i();
       const auto& stride = params.pool_strides_i();
-      if (layout == data_layout::DATA_PARALLEL) {
-        return new pooling_layer<data_layout::DATA_PARALLEL, Dev>(
-                     comm, num_dims, dim, pad, stride, mode
-                   );
-      }
-      LAYOUT_ERR(proto_layer.name(), "pooling");
+      return lbann::make_unique<pooling_layer<data_layout::DATA_PARALLEL, Device>>(
+               comm, num_dims, dim, pad, stride, mode);
     }
   }
   if (proto_layer.has_unpooling()) {
-    if (layout == data_layout::DATA_PARALLEL && Dev == El::Device::CPU) {
-      return new unpooling_layer<data_layout::DATA_PARALLEL, El::Device::CPU>(comm);
+    if (Layout == data_layout::DATA_PARALLEL && Device == El::Device::CPU) {
+      return lbann::make_unique<unpooling_layer<data_layout::DATA_PARALLEL, El::Device::CPU>>(comm);
+    } else {
+      LBANN_ERROR("unpooling layer is only supported with "
+                  "a data-parallel layout and on CPU");
     }
-    DEVICE_ERR(proto_layer.name(), "unpooling", layout, Dev);
   }
   if (proto_layer.has_reduction()) {
     const auto& params = proto_layer.reduction();
@@ -415,72 +338,80 @@ Layer* construct_layer(lbann_comm* comm,
     reduction_mode mode = reduction_mode::INVALID;
     if (mode_str == "sum" || mode_str.empty()) { mode = reduction_mode::SUM; }
     if (mode_str == "average") { mode = reduction_mode::AVERAGE; }
-    if (layout == data_layout::DATA_PARALLEL) {
-      return new reduction_layer<data_layout::DATA_PARALLEL, Dev>(comm, mode);
+    if (Layout == data_layout::DATA_PARALLEL) {
+      return lbann::make_unique<reduction_layer<data_layout::DATA_PARALLEL, Device>>(comm, mode);
+    } else {
+      LBANN_ERROR("reduction layer is only supported with "
+                  "a data-parallel layout");
     }
-    LAYOUT_ERR(proto_layer.name(), "reduction");
   }
   if (proto_layer.has_evaluation()) {
-    return new evaluation_layer<layout, Dev>(comm);
+    return lbann::make_unique<evaluation_layer<Layout, Device>>(comm);
   }
   if (proto_layer.has_crop()) {
     const auto& params = proto_layer.crop();
     const auto& dims = parse_list<int>(params.dims());
-    if (layout == data_layout::DATA_PARALLEL) {
-      return new crop_layer<data_layout::DATA_PARALLEL, Dev>(comm, dims);
+    if (Layout == data_layout::DATA_PARALLEL) {
+      return lbann::make_unique<crop_layer<data_layout::DATA_PARALLEL, Device>>(comm, dims);
+    } else {
+      LBANN_ERROR("crop layer is only supported with "
+                  "a data-parallel layout");
     }
-    LAYOUT_ERR(proto_layer.name(), "crop");
   }
   if (proto_layer.has_categorical_random()) {
-    if (layout == data_layout::DATA_PARALLEL
-        && Dev == El::Device::CPU) {
-      return new categorical_random_layer<data_layout::DATA_PARALLEL, El::Device::CPU>(comm);
+    if (Layout == data_layout::DATA_PARALLEL
+        && Device == El::Device::CPU) {
+      return lbann::make_unique<categorical_random_layer<data_layout::DATA_PARALLEL, El::Device::CPU>>(comm);
+    } else {
+      LBANN_ERROR("categorical random layer is only supported on CPU");
     }
-    DEVICE_ERR(proto_layer.name(), "categorical_random", layout, Dev);
   }
   if (proto_layer.has_discrete_random()) {
     const auto& params = proto_layer.discrete_random();
     const auto& values = parse_list<DataType>(params.values());
     const auto& dims = parse_list<int>(params.dims());
-    if (layout == data_layout::DATA_PARALLEL
-        && Dev == El::Device::CPU) {
-      return new discrete_random_layer<data_layout::DATA_PARALLEL, El::Device::CPU>(
-                   comm, values, dims);
+    if (Layout == data_layout::DATA_PARALLEL
+        && Device == El::Device::CPU) {
+      return lbann::make_unique<discrete_random_layer<data_layout::DATA_PARALLEL, El::Device::CPU>>(
+               comm, values, dims);
+    } else {
+      LBANN_ERROR("discrete random layer is only supported on CPU");
     }
-    DEVICE_ERR(proto_layer.name(), "discrete_random", layout, Dev);
   }
   if (proto_layer.has_dummy()) {
-    return new dummy_layer<layout, Dev>(comm);
+    return lbann::make_unique<dummy_layer<Layout, Device>>(comm);
   }
   if (proto_layer.has_stop_gradient()) {
-    return new stop_gradient_layer<layout, Dev>(comm);
+    return lbann::make_unique<stop_gradient_layer<Layout, Device>>(comm);
   }
   if (proto_layer.has_in_top_k()) {
     const auto& params = proto_layer.in_top_k();
-    return new in_top_k_layer<layout, Dev>(comm, params.k());
+    return lbann::make_unique<in_top_k_layer<Layout, Device>>(comm, params.k());
   }
   if (proto_layer.has_sort()) {
     const auto& params = proto_layer.sort();
-    if (layout == data_layout::DATA_PARALLEL) {
-      return new sort_layer<data_layout::DATA_PARALLEL, Dev>(comm, params.descending());
+    if (Layout == data_layout::DATA_PARALLEL) {
+      return lbann::make_unique<sort_layer<data_layout::DATA_PARALLEL, Device>>(comm, params.descending());
+    } else {
+      LBANN_ERROR("sort layer is only supported with "
+                  "a data-parallel layout");
     }
-    LAYOUT_ERR(proto_layer.name(), "sort");
   }
   if (proto_layer.has_weights_layer()) {
     const auto& params = proto_layer.weights_layer();
     const auto& dims = parse_list<El::Int>(params.dims());
-    return new weights_layer<layout, Dev>(comm, dims);
+    return lbann::make_unique<weights_layer<Layout, Device>>(comm, dims);
   }
   if (proto_layer.has_tessellate()) {
     const auto& params = proto_layer.tessellate();
     const auto& dims = parse_list<int>(params.dims());
-    return new tessellate_layer<layout, Dev>(comm, dims);
+    return lbann::make_unique<tessellate_layer<Layout, Device>>(comm, dims);
   }
 
   // Regularizer layers
   if (proto_layer.has_batch_normalization()) {
     const auto& params = proto_layer.batch_normalization();
-    if (layout == data_layout::DATA_PARALLEL) {
+    if (Layout == data_layout::DATA_PARALLEL) {
       const auto& aggr_str = params.stats_aggregation();
       batch_normalization_stats_aggregation aggr =
         batch_normalization_stats_aggregation::local;
@@ -504,29 +435,33 @@ Layer* construct_layer(lbann_comm* comm,
       if (epsilon == 0.0) {
         epsilon = 1e-5;
       }
-      return new batch_normalization_layer<data_layout::DATA_PARALLEL, Dev>(
+      return lbann::make_unique<batch_normalization_layer<data_layout::DATA_PARALLEL, Device>>(
         comm,
         decay,
         epsilon,
         aggr);
+    } else {
+      LBANN_ERROR("batch normalization layer is only supported with "
+                  "a data-parallel layout");
     }
-    LAYOUT_ERR(proto_layer.name(), "batch_normalization");
   }
   if (proto_layer.has_dropout()) {
     const auto& params = proto_layer.dropout();
-    return new dropout<layout, Dev>(comm, params.keep_prob());
+    return lbann::make_unique<dropout<Layout, Device>>(comm, params.keep_prob());
   }
   if (proto_layer.has_local_response_normalization()) {
  const auto& params = proto_layer.local_response_normalization();
-    if (layout == data_layout::DATA_PARALLEL) {
-      return new local_response_normalization_layer<data_layout::DATA_PARALLEL, Dev>(
+    if (Layout == data_layout::DATA_PARALLEL) {
+      return lbann::make_unique<local_response_normalization_layer<data_layout::DATA_PARALLEL, Device>>(
              comm,
              params.window_width(),
              params.lrn_alpha(),
              params.lrn_beta(),
              params.lrn_k());
+    } else {
+      LBANN_ERROR("local response normalization layer is only supported "
+                  "with a data-parallel layout");
     }
-    LAYOUT_ERR(proto_layer.name(), "local_response_normalization");
   }
   if (proto_layer.has_selu_dropout()) {
     const auto& params = proto_layer.selu_dropout();
@@ -534,9 +469,9 @@ Layer* construct_layer(lbann_comm* comm,
     const auto& alpha = params.alpha();
     const auto& scale = params.scale();
     if (alpha != 0.0 && scale != 0.0) {
-      return new selu_dropout<layout, Dev>(comm, keep_prob, alpha, scale);
+      return lbann::make_unique<selu_dropout<Layout, Device>>(comm, keep_prob, alpha, scale);
     } else {
-      return new selu_dropout<layout, Dev>(comm, keep_prob);
+      return lbann::make_unique<selu_dropout<Layout, Device>>(comm, keep_prob);
     }
   }
 
@@ -590,7 +525,7 @@ Layer* construct_layer(lbann_comm* comm,
   CONSTRUCT_LAYER(logical_xor);
   if (proto_layer.has_clamp()) {
     const auto& params = proto_layer.clamp();
-    return new clamp_layer<layout, Dev>(comm, params.min(), params.max());
+    return lbann::make_unique<clamp_layer<Layout, Device>>(comm, params.min(), params.max());
   }
 
   // Activation layers
@@ -598,9 +533,9 @@ Layer* construct_layer(lbann_comm* comm,
     const auto& params = proto_layer.elu();
     const auto& alpha = params.alpha();
     if (alpha != 0) {
-      return new elu_layer<layout, Dev>(comm, alpha);
+      return lbann::make_unique<elu_layer<Layout, Device>>(comm, alpha);
     } else {
-      return new elu_layer<layout, Dev>(comm);
+      return lbann::make_unique<elu_layer<Layout, Device>>(comm);
     }
   }
   CONSTRUCT_LAYER(identity);
@@ -608,9 +543,9 @@ Layer* construct_layer(lbann_comm* comm,
     const auto& params = proto_layer.leaky_relu();
     const auto& negative_slope = params.negative_slope();
     if (negative_slope != 0) {
-      return new leaky_relu_layer<layout, Dev>(comm, negative_slope);
+      return lbann::make_unique<leaky_relu_layer<Layout, Device>>(comm, negative_slope);
     } else {
-      return new leaky_relu_layer<layout, Dev>(comm);
+      return lbann::make_unique<leaky_relu_layer<Layout, Device>>(comm);
     }
   }
   CONSTRUCT_LAYER(log_sigmoid);
@@ -629,7 +564,7 @@ Layer* construct_layer(lbann_comm* comm,
   CONSTRUCT_LAYER(mean_absolute_error);
   if (proto_layer.has_top_k_categorical_accuracy()) {
     const auto& params = proto_layer.top_k_categorical_accuracy();
-    return new top_k_categorical_accuracy_layer<layout, Dev>(comm, params.k());
+    return lbann::make_unique<top_k_categorical_accuracy_layer<Layout, Device>>(comm, params.k());
   }
   CONSTRUCT_LAYER(l2_norm2);
   CONSTRUCT_LAYER(l1_norm);
@@ -642,30 +577,34 @@ Layer* construct_layer(lbann_comm* comm,
   // Image layers
   if (proto_layer.has_bilinear_resize()) {
     const auto& params = proto_layer.bilinear_resize();
-    if (layout == data_layout::DATA_PARALLEL) {
-      return new bilinear_resize_layer<data_layout::DATA_PARALLEL, Dev>(
-                         comm,
-                         params.height(),
-                         params.width());
+    if (Layout == data_layout::DATA_PARALLEL) {
+      return lbann::make_unique<bilinear_resize_layer<data_layout::DATA_PARALLEL, Device>>(
+               comm, params.height(), params.width());
+    } else {
+      LBANN_ERROR("bilinear resize layer is only supported with "
+                  "a data-parallel layout");
     }
-    LAYOUT_ERR(proto_layer.name(), "bilinear_resize");
   }
 
   // Miscellaneous layers
   if (proto_layer.has_covariance()) {
     const auto& params = proto_layer.covariance();
-    return new covariance_layer<layout, Dev>(comm, params.biased());
+    return lbann::make_unique<covariance_layer<Layout, Device>>(comm, params.biased());
   }
   if (proto_layer.has_variance()) {
     const auto& params = proto_layer.variance();
-    return new variance_layer<layout, Dev>(comm, params.biased());
+    return lbann::make_unique<variance_layer<Layout, Device>>(comm, params.biased());
   }
   if (proto_layer.has_channelwise_mean()) {
-    if (layout == data_layout::DATA_PARALLEL) {
-      return new channelwise_mean_layer<data_layout::DATA_PARALLEL, Dev>(comm);
+    if (Layout == data_layout::DATA_PARALLEL) {
+      return lbann::make_unique<channelwise_mean_layer<data_layout::DATA_PARALLEL, Device>>(comm);
+    } else {
+      LBANN_ERROR("channel-wise mean layer is only supported with "
+                  "a data-parallel layout");
     }
-    LAYOUT_ERR(proto_layer.name(), "channelwise_mean");
   }
+  CONSTRUCT_LAYER(mini_batch_index);
+  CONSTRUCT_LAYER(mini_batch_size);
 
   // Throw exception if layer has not been constructed
   err << "could not construct layer " << proto_layer.name();
@@ -675,26 +614,26 @@ Layer* construct_layer(lbann_comm* comm,
 }
 
 // Template instantiation
-template Layer* construct_layer<data_layout::DATA_PARALLEL, El::Device::CPU>(
+template std::unique_ptr<Layer> construct_layer<data_layout::DATA_PARALLEL, El::Device::CPU>(
   lbann_comm* comm,
   const std::map<execution_mode, generic_data_reader*>& data_readers,
   int num_parallel_readers,
   const lbann_data::Layer& proto_layer
 );
-template Layer* construct_layer<data_layout::MODEL_PARALLEL, El::Device::CPU>(
+template std::unique_ptr<Layer> construct_layer<data_layout::MODEL_PARALLEL, El::Device::CPU>(
   lbann_comm* comm,
   const std::map<execution_mode, generic_data_reader*>& data_readers,
   int num_parallel_readers,
   const lbann_data::Layer& proto_layer
 );
 #ifdef LBANN_HAS_GPU
-template Layer* construct_layer<data_layout::DATA_PARALLEL, El::Device::GPU>(
+template std::unique_ptr<Layer> construct_layer<data_layout::DATA_PARALLEL, El::Device::GPU>(
   lbann_comm* comm,
   const std::map<execution_mode, generic_data_reader*>& data_readers,
   int num_parallel_readers,
   const lbann_data::Layer& proto_layer
 );
-template Layer* construct_layer<data_layout::MODEL_PARALLEL, El::Device::GPU>(
+template std::unique_ptr<Layer> construct_layer<data_layout::MODEL_PARALLEL, El::Device::GPU>(
   lbann_comm* comm,
   const std::map<execution_mode, generic_data_reader*>& data_readers,
   int num_parallel_readers,
diff --git a/src/proto/factories/layer_graph_factory.cpp b/src/proto/factories/layer_graph_factory.cpp
index f06c3c9cb14..5e8c12dd98e 100644
--- a/src/proto/factories/layer_graph_factory.cpp
+++ b/src/proto/factories/layer_graph_factory.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -157,13 +157,15 @@ void setup_unpooling_pointers(lbann_comm* comm,
 
 } // namespace
 
-std::vector<Layer*> construct_layer_graph(lbann_comm* comm,
-                                          const std::map<execution_mode, generic_data_reader *>& data_readers,
-                                          const lbann_data::Model& proto_model) {
+std::vector<std::unique_ptr<Layer>> construct_layer_graph(
+  lbann_comm* comm,
+  const std::map<execution_mode, generic_data_reader *>& data_readers,
+  const lbann_data::Model& proto_model) {
   std::stringstream err;
 
   // List of layers
-  std::vector<Layer*> layers;
+  std::vector<std::unique_ptr<Layer>> layers;
+  layers.reserve(proto_model.layer_size());
 
   // Map from names to layer pointers
   std::unordered_map<std::string, Layer*> names_to_layers;
@@ -210,7 +212,7 @@ std::vector<Layer*> construct_layer_graph(lbann_comm* comm,
 #endif // LBANN_HAS_GPU
 
     // Construct layer
-    Layer* l = nullptr;
+    std::unique_ptr<Layer> l;
 #define TEMPLATE_INSTANTIATION(T_layout, T_device)                      \
     do {                                                                \
       if (layout == T_layout && device == T_device) {        \
@@ -244,7 +246,7 @@ std::vector<Layer*> construct_layer_graph(lbann_comm* comm,
       err << "layer name \"" << name << "\" is not unique";
       LBANN_ERROR(err.str());
     }
-    names_to_layers[name] = l;
+    names_to_layers[name] = l.get();
 
     if (proto_layer.freeze()) {
       #ifdef LBANN_DEBUG
@@ -255,17 +257,20 @@ std::vector<Layer*> construct_layer_graph(lbann_comm* comm,
       l->freeze();
     }
     // Add layer to list
-    layers.push_back(l);
+    layers.emplace_back(std::move(l));
 
   }
 
   // Setup pointers between layers
-  setup_parents_and_children(comm, layers, names_to_layers, proto_model);
-  setup_hints(layers, names_to_layers, proto_model);
-  setup_unpooling_pointers(comm, layers, names_to_layers, proto_model);
+  std::vector<Layer*> layer_pointers;
+  layer_pointers.reserve(layers.size());
+  for (auto&& ptr : layers) { layer_pointers.push_back(ptr.get()); }
+  setup_parents_and_children(comm, layer_pointers, names_to_layers, proto_model);
+  setup_hints(layer_pointers, names_to_layers, proto_model);
+  setup_unpooling_pointers(comm, layer_pointers, names_to_layers, proto_model);
 
   // Optionally Set num_neurons = num_labels
-  setup_fc_num_neurons(layers, data_readers, proto_model);
+  setup_fc_num_neurons(layer_pointers, data_readers, proto_model);
 
   // Return layer list
   return layers;
diff --git a/src/proto/factories/model_factory.cpp b/src/proto/factories/model_factory.cpp
index 71c621f8278..c3ceb22e725 100644
--- a/src/proto/factories/model_factory.cpp
+++ b/src/proto/factories/model_factory.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -237,11 +237,16 @@ model* construct_model(lbann_comm* comm,
   auto&& layer_list = construct_layer_graph(comm,
                                             data_readers,
                                             proto_model);
+  std::vector<Layer*> layer_pointers;
+  layer_pointers.reserve(layer_list.size());
+  for (auto&& ptr : layer_list) {
+    layer_pointers.push_back(ptr.get());
+  }
 
   // Construct objective function
   const auto& proto_obj = proto_model.objective_function();
   auto&& obj = construct_objective_function(proto_obj);
-  assign_layers_to_objective_function(layer_list, *obj, proto_obj);
+  assign_layers_to_objective_function(layer_pointers, *obj, proto_obj);
 
   // Construct weights
   std::vector<weights*> weights_list;
@@ -250,7 +255,7 @@ model* construct_model(lbann_comm* comm,
                                              proto_opt,
                                              proto_model.weights(i)));
   }
-  assign_weights_to_layers(layer_list, weights_list, proto_model);
+  assign_weights_to_layers(layer_pointers, weights_list, proto_model);
   assign_weights_to_objective_function(weights_list, *obj, proto_obj);
 
   // Construct metrics
@@ -261,7 +266,7 @@ model* construct_model(lbann_comm* comm,
                                            params.name(),
                                            params.unit()));
   }
-  assign_layers_to_metrics(layer_list, metric_list, proto_model);
+  assign_layers_to_metrics(layer_pointers, metric_list, proto_model);
 
   // Construct callbacks
   std::vector<lbann_callback*> callback_list;
@@ -270,14 +275,14 @@ model* construct_model(lbann_comm* comm,
     callback_list.push_back(construct_callback(comm,
                                                proto_model.callback(i),
                                                data_readers,
-                                               layer_list,
+                                               layer_pointers,
                                                weights_list,
                                                summarizer));
   }
 
   // Instantiate model
   auto&& m = instantiate_model(comm, obj, proto_opt, proto_model);
-  for (auto&& l   : layer_list   ) { m->add_layer(l);     }
+  for (auto&& l   : layer_list   ) { m->add_layer(std::move(l)); }
   for (auto&& w   : weights_list ) { m->add_weights(w);   }
   for (auto&& met : metric_list  ) { m->add_metric(met);  }
   for (auto&& cb  : callback_list) { m->add_callback(cb); }
diff --git a/src/proto/factories/objective_function_factory.cpp b/src/proto/factories/objective_function_factory.cpp
index c284538fcc3..9ca69a151bf 100644
--- a/src/proto/factories/objective_function_factory.cpp
+++ b/src/proto/factories/objective_function_factory.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/proto/factories/optimizer_factory.cpp b/src/proto/factories/optimizer_factory.cpp
index 8ba69185168..a4215b4e644 100644
--- a/src/proto/factories/optimizer_factory.cpp
+++ b/src/proto/factories/optimizer_factory.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/proto/factories/weights_factory.cpp b/src/proto/factories/weights_factory.cpp
index a5e19ffb5c1..17ded03f1ae 100644
--- a/src/proto/factories/weights_factory.cpp
+++ b/src/proto/factories/weights_factory.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -31,7 +31,7 @@ namespace proto {
 
 namespace {
 
-/** Construct a weights initialization specified with prototext. */  
+/** Construct a weights initialization specified with prototext. */
 weights_initializer* construct_initializer(const lbann_data::Weights& proto_weights) {
 
   // Constant initialization
@@ -45,7 +45,7 @@ weights_initializer* construct_initializer(const lbann_data::Weights& proto_weig
     const auto& params = proto_weights.value_initializer();
     return new value_initializer(parse_list<DataType>(params.values()));
   }
-  
+
   // Random initialization
   if (proto_weights.has_uniform_initializer()) {
     const auto& params = proto_weights.uniform_initializer();
@@ -94,7 +94,7 @@ weights* construct_weights(lbann_comm* comm,
 
   // Instantiate weights
   weights* w = new weights(comm);
-  
+
   // Set weights name if provided
   const auto& name = proto_weights.name();
   const auto& parsed_name = parse_list<std::string>(name);
diff --git a/src/proto/init_image_data_readers.cpp b/src/proto/init_image_data_readers.cpp
index e3297ac3d28..65a13c52d80 100644
--- a/src/proto/init_image_data_readers.cpp
+++ b/src/proto/init_image_data_readers.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -318,9 +318,9 @@ void init_image_data_reader(const lbann_data::Reader& pb_readme, const lbann_dat
 
   std::shared_ptr<cv_process> pp;
   // set up the image preprocessor
-  if ((name == "imagenet") || (name == "jag_conduit") || (name == "jag_conduit_hdf5") ||
-      (name == "triplet") || (name == "mnist_siamese") || (name == "multi_images") ||
-      (name == "moving_mnist")) {
+  if ((name == "imagenet") || (name == "jag_conduit") ||
+      (name == "multihead_siamese") || (name == "mnist_siamese") ||
+      (name == "multi_images") || (name == "moving_mnist")) {
     pp = std::make_shared<cv_process>();
   } else if (name == "imagenet_patches") {
     pp = std::make_shared<cv_process_patches>();
@@ -345,8 +345,8 @@ void init_image_data_reader(const lbann_data::Reader& pb_readme, const lbann_dat
     reader = new imagenet_reader_patches(ppp, shuffle);
   } else if (name == "imagenet") {
     reader = new imagenet_reader(pp, shuffle);
-  } else if (name == "triplet") {
-    reader = new data_reader_triplet(pp, shuffle);
+  } else if (name == "multihead_siamese") {
+    reader = new data_reader_multihead_siamese(pp, pb_readme.num_image_srcs(), shuffle);
   } else if (name == "mnist_siamese") {
     reader = new data_reader_mnist_siamese(pp, shuffle);
   } else if (name == "multi_images") {
@@ -354,17 +354,6 @@ void init_image_data_reader(const lbann_data::Reader& pb_readme, const lbann_dat
   } else if (name == "moving_mnist") {
     reader = new moving_mnist_reader(7, 40, 40, 2);
 #ifdef LBANN_HAS_CONDUIT
-  } else if (name =="jag_conduit_hdf5") {
-    data_reader_jag_conduit_hdf5* reader_jag = new data_reader_jag_conduit_hdf5(pp, shuffle);
-    const lbann_data::DataSetMetaData::Schema& pb_schema = pb_metadata.schema();
-    reader_jag->set_image_dims(width, height);
-    reader_jag->set_scalar_keys(pb_schema.scalar_keys());
-    reader_jag->set_input_keys(pb_schema.input_keys());
-    reader_jag->set_image_views(pb_schema.image_views());
-    reader_jag->set_image_channels(pb_schema.image_channels());
-    reader = reader_jag;
-    if (master) std::cout << reader->get_type() << " is set" << std::endl;
-    return;
   } else if (name =="jag_conduit") {
     data_reader_jag_conduit* reader_jag = new data_reader_jag_conduit(pp, shuffle);
     const lbann_data::DataSetMetaData::Schema& pb_schema = pb_metadata.schema();
@@ -550,6 +539,11 @@ void init_image_data_reader(const lbann_data::Reader& pb_readme, const lbann_dat
       throw lbann_exception(err.str());
     }
     multi_image_dr_ptr->set_input_params(width, height, channels, n_labels, n_img_srcs);
+  } else if(name == "multihead_siamese") {
+    const int n_img_srcs = pb_readme.num_image_srcs();
+    data_reader_multi_images* multi_image_dr_ptr
+      = dynamic_cast<data_reader_multi_images*>(image_data_reader_ptr);
+    multi_image_dr_ptr->set_input_params(width, height, channels, n_labels, n_img_srcs);
   } else {
     image_data_reader_ptr->set_input_params(width, height, channels, n_labels);
   }
diff --git a/src/proto/lbann.proto b/src/proto/lbann.proto
index 230973ae05f..4e344af5330 100644
--- a/src/proto/lbann.proto
+++ b/src/proto/lbann.proto
@@ -32,6 +32,7 @@ message Reader {
   int64 absolute_sample_count = 11;
   int64 first_n = 200;
   double percent_of_data_to_use = 12;
+
   //for GAN model
   bool gan_labelling = 201;
   int32 gan_label_value = 202;
@@ -55,6 +56,7 @@ message Reader {
   int64 num_neighbors = 112; // pilot2_molecular_reader
   int64 max_neighborhood = 113; // pilot2_molecular_reader
   int32 num_image_srcs = 114; // data_reader_multi_images
+  float scaling_factor_int16 = 116; // for numpy_npz_reader with int16 data
 
   int32 max_files_to_load = 1000;
 
@@ -70,6 +72,16 @@ message Reader {
   bool index_list_per_trainer = 400;
   bool index_list_per_model   = 401;
   //------------- end of only for index lists ------------------
+
+  PythonDataReader python = 501;
+}
+
+message PythonDataReader {
+  string module = 1;                // Python module
+  string module_dir = 2;            // Directory containing Python module
+  string sample_function = 3;       // Function that gets data sample
+  string num_samples_function = 4;  // Function that gets number of data samples
+  string sample_dims_function = 5;  // Function that gets dimensions of data sample
 }
 
 message ImagePreprocessor {
@@ -219,12 +231,6 @@ message DataSetMetaData {
     }
     repeated JAGDataSlice independent = 97;
     repeated JAGDataSlice dependent = 98;
-
-    // for jag_conduit_hdf5
-    string scalar_keys = 1004;
-    string input_keys = 1005;
-    string image_views = 1006;
-    string image_channels = 1007;
     //------------------  end of only for jag_conduit  -----------------------
   }
 
@@ -268,7 +274,7 @@ message Model {
   int64 num_gpus = 53; //has no effect
   int64 evaluation_frequency = 54;
   int64 num_parallel_readers = 100;
-  bool  serialize_background_io = 101;
+  bool  serialize_io = 101;
 
   bool disable_cuda = 8;
 
@@ -321,46 +327,45 @@ message LayerMetric {
 // Optimizers
 //========================================================================
 message Optimizer {
-  // An Optimizer should contain exactly one of the following
-  // (this may or may not be properly checked for in proto_common.cpp)
-  Adagrad adagrad = 1;
-  Rmsprop rmsprop = 2;
-  Adam adam = 3;
-  HypergradientAdam hypergradient_adam = 4;
-  Sgd sgd = 5;
+  oneof optimizer_type {
+    AdaGrad adagrad = 1;
+    Adam adam = 2;
+    HypergradientAdam hypergradient_adam = 3;
+    RMSprop rmsprop = 4;
+    SGD sgd = 5;
+  }
 }
 
-message Adagrad {
+message AdaGrad {
   double learn_rate = 1;
-  double eps = 2;  //default: 1e-8
+  double eps = 2;           // Suggested: 1e-8
 }
 
 message Adam {
   double learn_rate = 1;
-  double beta1 = 6;    //default: 0.9
-  double beta2 = 7;    //default: 0.99
-  double eps = 8;      //default: 1e-8
+  double beta1 = 6;         // Suggested: 0.9
+  double beta2 = 7;         // Suggested: 0.99
+  double eps = 8;           // Suggested: 1e-8
 }
 
 message HypergradientAdam {
   double init_learning_rate = 1;
-  double hyper_learning_rate = 2; //default: 1e-7
-  double beta1 = 6;    //default: 0.9
-  double beta2 = 7;    //default: 0.99
-  double eps = 8;      //default: 1e-8
+  double hyper_learning_rate = 2;   // Suggested: 1e-7
+  double beta1 = 6;                 // Suggested: 0.9
+  double beta2 = 7;                 // Suggested: 0.99
+  double eps = 8;                   // Suggested: 1e-8
 }
 
-message Rmsprop {
+message RMSprop {
   double learn_rate = 1;
   double decay_rate = 2;
-  double eps = 3; //default: 1e-8
+  double eps = 3;           // Suggested: 1e-8
 }
 
-message Sgd {
+message SGD {
   double learn_rate = 1;
-  double momentum = 2;     //default: 0
-  double decay_rate = 3;   //default: 0
-  bool nesterov = 4;       //default: false
+  double momentum = 2;      // Set to zero for vanilla SGD
+  bool nesterov = 4;
 }
 
 
@@ -441,10 +446,12 @@ message CallbackSaveImages {
 
 message CallbackPrint {
   int64 interval = 1; //default in lbann_callback_print.hpp is 1
+  bool  print_global_stat_only = 2; //useful in large scale multi-trainer, default is false
 }
 
 message CallbackProfiler {
   bool sync = 1;
+  bool skip_init = 2;
 }
 
 message CallbackTimer {
@@ -531,6 +538,7 @@ message CallbackPolyLearningRate {
   double power = 2;
   uint64 num_epochs = 3;
   uint64 max_iter = 4;
+  double end_lr = 5;
 }
 
 message CallbackStepMinibatch {
@@ -777,7 +785,6 @@ message Layer {
    Unpooling unpooling = 304;
    Hadamard hadamard = 308;
    Constant constant = 309;
-   Zero zero = 315;
    Reduction reduction = 310;
    Evaluation evaluation = 311;
    Gaussian gaussian = 312;
@@ -888,6 +895,8 @@ message Layer {
    Covariance covariance = 600;
    Variance variance = 601;
    ChannelwiseMean channelwise_mean = 602;
+   MiniBatchIndex mini_batch_index = 603;
+   MiniBatchSize mini_batch_size = 604;
 
 }
 ///////////////////////
@@ -1022,9 +1031,9 @@ message Dropout {
 // Input layers //
 //////////////////
 message Input {
-  bool data_set_per_model = 1;  //default: false
-  string io_buffer = 2;
-  string target_mode = 3;
+  bool data_set_per_model = 1;  // Default: false
+  string io_buffer = 2;         // Options: "partitioned" (default)
+  string target_mode = 3;       // Options: "classification" (default), "regression", "reconstruction", "N/A"
 }
 
 //////////////////////
@@ -1062,12 +1071,12 @@ message Unpooling {
 
 
 message Concatenation {
-  int64 concatenation_axis = 2;
+  int64 axis = 1;
 }
 
 message Slice {
-  int64 slice_axis = 2;
-  string slice_points = 3; //should be space-separated list of ints, e.g, "2 6 7"
+  int64 axis = 1;
+  string slice_points = 2; //should be space-separated list of ints, e.g, "2 6 7"
   //the following is for jag_conduit_hdf5;
   string get_slice_points_from_reader = 4;
   bool get_slice_points_from_reader_bool = 5;
@@ -1091,12 +1100,6 @@ message Constant {
   string num_neurons=2;
 }
 
-
-message Zero {
-  bool first_half=1; //default: true
-  bool second_half=2; //default: true
-}
-
 message Reduction {
   string mode=1; //"sum" or "average"
 }
@@ -1245,3 +1248,5 @@ message Variance {
   bool biased = 1; //Whether to use a biased variance estimate
 }
 message ChannelwiseMean {}
+message MiniBatchIndex {}
+message MiniBatchSize {}
diff --git a/src/proto/proto_common.cpp b/src/proto/proto_common.cpp
index 30e6a509560..3cf4cca001c 100644
--- a/src/proto/proto_common.cpp
+++ b/src/proto/proto_common.cpp
@@ -10,15 +10,19 @@
 #include <google/protobuf/io/zero_copy_stream_impl.h>
 #include <google/protobuf/text_format.h>
 
+#include <functional>
+#include <memory>
 #include <unordered_map>
 #include <unordered_set>
 #include <sys/stat.h>
 
 namespace lbann {
 
-bool has_motifs(lbann_comm *comm, const lbann_data::LbannPB& p) {
-  bool master = comm->am_world_master();
-  if (master) std::cout << "starting has_motifs\n";
+bool has_motifs(const lbann_comm& comm, const lbann_data::LbannPB& p) {
+  const bool master = comm.am_world_master();
+  if (master) {
+    std::cout << "starting has_motifs\n";
+  }
   const lbann_data::Model& m = p.model();
   const int num_layers = m.layer_size();
   for (int j=0; j<num_layers; j++) {
@@ -30,31 +34,39 @@ bool has_motifs(lbann_comm *comm, const lbann_data::LbannPB& p) {
   return false;
 }
 
-void expand_motifs(lbann_comm *comm, lbann_data::LbannPB& pb) {
-  bool master = comm->am_world_master();
-  if (master) std::cout << "starting expand_motifs\n";
+void expand_motifs(const lbann_comm& comm, lbann_data::LbannPB& pb) {
+  const bool master = comm.am_world_master();
+  if (master) {
+    std::cout << "starting expand_motifs\n";
+  }
   const lbann_data::MotifDefinitions& m = pb.motif_definitions();
   const int num_motifs = m.motif_size();
   for (int j=0; j<num_motifs; j++) {
   }
 }
 
-int get_requested_num_parallel_readers(const lbann_comm *comm, const lbann_data::LbannPB& p);
+int get_requested_num_parallel_readers(
+  const lbann_comm& comm, const lbann_data::LbannPB& p);
 
-void init_data_readers(lbann_comm *comm, const lbann_data::LbannPB& p, std::map<execution_mode, generic_data_reader *>& data_readers,
-                       bool is_shareable_training_data_reader, bool is_shareable_testing_data_reader, bool is_shareable_validation_data_reader)
+void init_data_readers(
+  lbann_comm* comm, const lbann_data::LbannPB& p,
+  std::map<execution_mode, generic_data_reader *>& data_readers,
+  bool is_shareable_training_data_reader,
+  bool is_shareable_testing_data_reader,
+  bool is_shareable_validation_data_reader)
 {
 #ifdef LBANN_HAS_CONDUIT
   static std::unordered_map<std::string, data_reader_jag_conduit*> leading_reader_jag_conduit;
 #endif
-  bool master = comm->am_world_master();
-  std::stringstream err;
+  const bool master = comm->am_world_master();
+  std::ostringstream err;
 
   options *opts = options::get();
-  bool create_tarball = opts->has_string("create_tarball") ? true : false;
+  const bool create_tarball
+    = opts->has_string("create_tarball") ? true : false;
 
   const lbann_data::DataReader & d_reader = p.data_reader();
-  int size = d_reader.reader_size();
+  const int size = d_reader.reader_size();
 
   const lbann_data::DataSetMetaData& pb_metadata = p.data_set_metadata();
 
@@ -86,7 +98,7 @@ void init_data_readers(lbann_comm *comm, const lbann_data::LbannPB& p, std::map<
       init_org_image_data_reader(readme, master, reader);
       set_up_generic_preprocessor = false;
     } else if ((name == "imagenet") || (name == "imagenet_patches") ||
-               (name == "triplet") || (name == "mnist_siamese") || (name == "multi_images")) {
+               (name == "multihead_siamese") || (name == "mnist_siamese") || (name == "multi_images")) {
       init_image_data_reader(readme, pb_metadata, master, reader);
       set_up_generic_preprocessor = false;
     } else if (name == "jag") {
@@ -97,7 +109,7 @@ void init_data_readers(lbann_comm *comm, const lbann_data::LbannPB& p, std::map<
       using var_t = data_reader_jag::variable_t;
 
       // composite independent variable
-      std::vector< std::vector<var_t> > independent_type(pb_schema.independent_size());
+      std::vector<std::vector<var_t>> independent_type(pb_schema.independent_size());
 
       for (int i=0; i < pb_schema.independent_size(); ++i) {
         const lbann_data::DataSetMetaData::Schema::JAGDataSlice& slice = pb_schema.independent(i);
@@ -111,7 +123,7 @@ void init_data_readers(lbann_comm *comm, const lbann_data::LbannPB& p, std::map<
       reader_jag->set_independent_variable_type(independent_type);
 
       // composite dependent variable
-      std::vector< std::vector<var_t> > dependent_type(pb_schema.dependent_size());
+      std::vector<std::vector<var_t>> dependent_type(pb_schema.dependent_size());
 
       for (int i=0; i < pb_schema.dependent_size(); ++i) {
         const lbann_data::DataSetMetaData::Schema::JAGDataSlice& slice = pb_schema.dependent(i);
@@ -159,7 +171,7 @@ void init_data_readers(lbann_comm *comm, const lbann_data::LbannPB& p, std::map<
           const auto& params = proto_layer.input();
           const auto& io_buffer = params.io_buffer();
           reader_jag_conduit->set_io_buffer_type(io_buffer);
-          const auto num_readers = get_requested_num_parallel_readers(comm, p);
+          const auto num_readers = get_requested_num_parallel_readers(*comm, p);
           reader_jag_conduit->set_num_parallel_readers(num_readers);
           reader_jag_conduit->set_local_id(readme.role());
           break;
@@ -183,11 +195,26 @@ void init_data_readers(lbann_comm *comm, const lbann_data::LbannPB& p, std::map<
       reader_csv->set_skip_rows(readme.skip_rows());
       reader_csv->set_has_header(readme.has_header());
       reader = reader_csv;
+    } else if (name == "numpy_npz_conduit_reader") {
+      auto *npz_conduit = new numpy_npz_conduit_reader(shuffle);
+      npz_conduit->set_has_labels(!readme.disable_labels());
+      npz_conduit->set_has_responses(!readme.disable_responses());
+      npz_conduit->set_scaling_factor_int16(readme.scaling_factor_int16());
+      if (readme.num_labels() != 0) {
+        npz_conduit->set_num_labels(readme.num_labels());
+      }
+      reader = npz_conduit;
     } else if (name == "numpy") {
       auto* reader_numpy = new numpy_reader(shuffle);
       reader_numpy->set_has_labels(!readme.disable_labels());
       reader_numpy->set_has_responses(!readme.disable_responses());
       reader = reader_numpy;
+    } else if (name == "numpy_npz") {
+      auto* reader_numpy_npz = new numpy_npz_reader(shuffle);
+      reader_numpy_npz->set_has_labels(!readme.disable_labels());
+      reader_numpy_npz->set_has_responses(!readme.disable_responses());
+      reader_numpy_npz->set_scaling_factor_int16(readme.scaling_factor_int16());
+      reader = reader_numpy_npz;
     } else if (name == "pilot2_molecular_reader") {
       pilot2_molecular_reader* reader_pilot2_molecular = new pilot2_molecular_reader(readme.num_neighbors(), readme.max_neighborhood(), shuffle);
       reader = reader_pilot2_molecular;
@@ -199,7 +226,8 @@ void init_data_readers(lbann_comm *comm, const lbann_data::LbannPB& p, std::map<
       }
       auto paths = glob(filedir + readme.data_file_pattern());
       std::vector<generic_data_reader*> npy_readers;
-      for (const auto path : paths) {
+      for(auto i = paths.begin(); i != paths.end(); i++) {
+        const auto path = *i;
         if(master) { std::cout << "Loading file: " << path << std::endl; }
         if (readme.format() == "numpy") {
           auto *reader_numpy = new numpy_reader(false);
@@ -207,6 +235,13 @@ void init_data_readers(lbann_comm *comm, const lbann_data::LbannPB& p, std::map<
           reader_numpy->set_has_labels(!readme.disable_labels());
           reader_numpy->set_has_responses(!readme.disable_responses());
           npy_readers.push_back(reader_numpy);
+        } else if (readme.format() == "numpy_npz") {
+          auto* reader_numpy_npz = new numpy_npz_reader(false);
+          reader_numpy_npz->set_data_filename(path);
+          reader_numpy_npz->set_has_labels(!readme.disable_labels());
+          reader_numpy_npz->set_has_responses(!readme.disable_responses());
+          reader_numpy_npz->set_scaling_factor_int16(readme.scaling_factor_int16());
+          npy_readers.push_back(reader_numpy_npz);
 #ifdef LBANN_HAS_CONDUIT
         } else if (readme.format() == "jag_conduit") {
           init_image_data_reader(readme, pb_metadata, master, reader);
@@ -299,12 +334,22 @@ void init_data_readers(lbann_comm *comm, const lbann_data::LbannPB& p, std::map<
       reader = new mesh_reader(shuffle);
     } else if (name == "moving_mnist") {
       reader = new moving_mnist_reader(7, 40, 40, 2);
+    } else if (name == "python") {
+#ifdef LBANN_HAS_PYTHON
+      const auto& params = readme.python();
+      reader = new python_reader(params.module(),
+                                 params.module_dir(),
+                                 params.sample_function(),
+                                 params.num_samples_function(),
+                                 params.sample_dims_function());
+#else
+      LBANN_ERROR("attempted to construct Python data reader, "
+                  "but LBANN is not built with Python/C API");
+#endif // LBANN_HAS_PYTHON
     } else {
-      if (master) {
         err << __FILE__ << " " << __LINE__ << " :: unknown name for data reader: "
             << name;
         throw lbann_exception(err.str());
-      }
     }
     reader->set_comm(comm);
 
@@ -383,12 +428,14 @@ void init_data_readers(lbann_comm *comm, const lbann_data::LbannPB& p, std::map<
       if (name == "mnist") {
         reader_validation = new mnist_reader(shuffle);
         (*(mnist_reader *)reader_validation) = (*(mnist_reader *)reader);
+      } else if (name == "numpy_npz_conduit_reader") {
+        reader_validation = new numpy_npz_conduit_reader(*dynamic_cast<const numpy_npz_conduit_reader*>(reader));
       } else if (name == "imagenet") {
         reader_validation = new imagenet_reader(*dynamic_cast<const imagenet_reader*>(reader));
       } else if (name == "imagenet_patches") {
         reader_validation = new imagenet_reader_patches(*dynamic_cast<const imagenet_reader_patches*>(reader));
-      } else if (name == "triplet") {
-        reader_validation = new data_reader_triplet(*dynamic_cast<const data_reader_triplet*>(reader));
+      } else if (name == "multihead_siamese") {
+  	reader_validation = new data_reader_multihead_siamese(*dynamic_cast<const data_reader_multihead_siamese*>(reader));
       } else if (name == "mnist_siamese") {
         reader_validation = new data_reader_mnist_siamese(*dynamic_cast<const data_reader_mnist_siamese*>(reader));
       } else if (name == "multi_images") {
@@ -419,7 +466,12 @@ void init_data_readers(lbann_comm *comm, const lbann_data::LbannPB& p, std::map<
             reader_jag_conduit->set_leading_reader(leader);
           }
         } else {
-          reader_validation = new data_reader_jag_conduit(*dynamic_cast<const data_reader_jag_conduit*>(reader));
+          reader_validation = new data_reader_jag_conduit(*dynamic_cast<const data_reader_jag_conduit*>(reader), reader->get_unused_indices());
+          const std::string role = "validate";
+          auto reader_jag_conduit = dynamic_cast<data_reader_jag_conduit*>(reader_validation);
+          reader_jag_conduit->set_leading_reader(reader_jag_conduit);
+          reader_jag_conduit->set_role(role);
+          leading_reader_jag_conduit[role] = reader_jag_conduit;
         }
 #endif // LBANN_HAS_CONDUIT
       } else if (name == "nci") {
@@ -447,10 +499,30 @@ void init_data_readers(lbann_comm *comm, const lbann_data::LbannPB& p, std::map<
       } else if (name == "moving_mnist") {
         reader_validation = new moving_mnist_reader(7, 40, 40, 2);
         (*(moving_mnist_reader *)reader_validation) = (*(moving_mnist_reader *)reader);
+      } else if (name == "python") {
+#ifdef LBANN_HAS_PYTHON
+        const auto& params = readme.python();
+        reader_validation = new python_reader(params.module(),
+                                              params.module_dir(),
+                                              params.sample_function(),
+                                              params.num_samples_function(),
+                                              params.sample_dims_function());
+#else
+        LBANN_ERROR("attempted to construct Python data reader, "
+                    "but LBANN is not built with Python/C API");
+#endif // LBANN_HAS_PYTHON
       }
 
       reader_validation->set_role("validate");
       reader_validation->use_unused_index_set();
+      if(reader_validation->get_data_store_ptr() != nullptr) {
+        reader_validation->get_data_store_ptr()->compact_nodes();
+      }
+      /// At this point clean up any unused samples from the main data store
+      if(reader->get_data_store_ptr() != nullptr) {
+        auto&& data_store = reader->get_data_store_ptr();
+        data_store->purge_unused_samples(reader->get_unused_indices());
+      }
 
       if (master) {
         size_t num_train = reader->get_num_data();
@@ -495,9 +567,9 @@ void init_data_readers(lbann_comm *comm, const lbann_data::LbannPB& p, std::map<
   }
 }
 
-void read_prototext_file(std::string fn, lbann_data::LbannPB& pb, bool master)
+void read_prototext_file(const std::string& fn, lbann_data::LbannPB& pb, const bool master)
 {
-  std::stringstream err;
+  std::ostringstream err;
   int fd = open(fn.c_str(), O_RDONLY);
   if (fd == -1) {
     if (master) {
@@ -505,21 +577,25 @@ void read_prototext_file(std::string fn, lbann_data::LbannPB& pb, bool master)
       throw lbann_exception(err.str());
     }
   }
-  auto *input = new google::protobuf::io::FileInputStream(fd);
-  bool success = google::protobuf::TextFormat::Parse(input, &pb);
+  using FIS=google::protobuf::io::FileInputStream;
+  auto input = std::unique_ptr<FIS, std::function<void(FIS*)>>(
+    new google::protobuf::io::FileInputStream(fd),
+    [](FIS* x) {
+      x->Close();
+      delete x;
+    });
+  bool success = google::protobuf::TextFormat::Parse(input.get(), &pb);
   if (!success) {
     if (master) {
       err <<  __FILE__ << " " << __LINE__ << " :: failed to read or parse prototext file: " << fn << std::endl;
       throw lbann_exception(err.str());
     }
   }
-  input->Close();
-  delete input;
 }
 
-bool write_prototext_file(const char *fn, lbann_data::LbannPB& pb)
+bool write_prototext_file(const std::string& fn, lbann_data::LbannPB& pb)
 {
-  int fd = open(fn, O_APPEND | O_CREAT | O_TRUNC, 0644);
+  int fd = open(fn.c_str(), O_APPEND | O_CREAT | O_TRUNC, 0644);
   if (fd == -1) {
     return false;
   }
@@ -534,14 +610,14 @@ bool write_prototext_file(const char *fn, lbann_data::LbannPB& pb)
   return true;
 }
 
-bool check_if_num_parallel_readers_set(const lbann_comm *comm, const lbann_data::Model& model)
+bool check_if_num_parallel_readers_set(const lbann_comm& comm, const lbann_data::Model& model)
 {
-  const bool master = comm->am_world_master();
+  const bool master = comm.am_world_master();
   const int parallel_io = model.num_parallel_readers();
 
   if (parallel_io == 0) {
     if (master) {
-      std::cout << "\tMax Parallel I/O Fetch: " << comm->get_procs_per_trainer() <<
+      std::cout << "\tMax Parallel I/O Fetch: " << comm.get_procs_per_trainer() <<
         " (Limited to # Processes)" << std::endl;
     }
     return false;
@@ -552,29 +628,30 @@ bool check_if_num_parallel_readers_set(const lbann_comm *comm, const lbann_data:
   return true;
 }
 
-void set_num_parallel_readers(const lbann_comm *comm, lbann_data::LbannPB& p)
+void set_num_parallel_readers(const lbann_comm& comm, lbann_data::LbannPB& p)
 {
   lbann_data::Model *model = p.mutable_model();
   const bool is_set = check_if_num_parallel_readers_set(comm, *model);
 
   if (!is_set) {
-    const int parallel_io = comm->get_procs_per_trainer();
+    const int parallel_io = comm.get_procs_per_trainer();
     model->set_num_parallel_readers(parallel_io); //adjust the prototext
   }
 }
 
-int get_requested_num_parallel_readers(const lbann_comm *comm, const lbann_data::LbannPB& p)
+int get_requested_num_parallel_readers(const lbann_comm& comm, const lbann_data::LbannPB& p)
 {
   const lbann_data::Model& model = p.model();
   const bool is_set = check_if_num_parallel_readers_set(comm, model);
 
   if (!is_set) {
-    return comm->get_procs_per_trainer();
+    return comm.get_procs_per_trainer();
   }
   return model.num_parallel_readers();
 }
 
-void set_data_readers_filenames(std::string which, lbann_data::LbannPB& p)
+void set_data_readers_filenames(
+  const std::string& which, lbann_data::LbannPB& p)
 {
   options *opts = options::get();
   lbann_data::DataReader *readers = p.mutable_data_reader();
@@ -582,7 +659,7 @@ void set_data_readers_filenames(std::string which, lbann_data::LbannPB& p)
   for (int j=0; j<size; j++) {
     lbann_data::Reader *r = readers->mutable_reader(j);
     if (r->role() == which) {
-      std::stringstream s;
+      std::ostringstream s;
       s << "data_filedir_" << which;
       if (opts->has_string(s.str())) {
         r->set_data_filedir(opts->get_string(s.str()));
@@ -610,12 +687,28 @@ void set_data_readers_filenames(std::string which, lbann_data::LbannPB& p)
   }
 }
 
+void set_data_readers_index_list(
+  const std::string& which, lbann_data::LbannPB& p)
+{
+  options *opts = options::get();
+  lbann_data::DataReader *readers = p.mutable_data_reader();
+  int size = readers->reader_size();
+  const std::string key_role = "index_list_" + which;
+
+  for (int j=0; j<size; j++) {
+    lbann_data::Reader *r = readers->mutable_reader(j);
+    if (r->role() == which) {
+      r->set_index_list(opts->get_string(key_role));
+    }
+  }
+}
+
 void set_data_readers_percent(lbann_data::LbannPB& p)
 {
   options *opts = options::get();
   double percent = opts->get_float("data_reader_percent");
   if (percent <= 0 || percent > 1.0) {
-      std::stringstream err;
+      std::ostringstream err;
       err << __FILE__ << " " << __LINE__ << " :: "
           << " --data_reader_percent=<float> must be > 0 and <= 1.0";
       throw lbann_exception(err.str());
@@ -628,21 +721,21 @@ void set_data_readers_percent(lbann_data::LbannPB& p)
   }
 }
 
-void customize_data_readers_index_list(lbann_comm *comm, lbann_data::LbannPB& p)
+void customize_data_readers_index_list(const lbann_comm& comm, lbann_data::LbannPB& p)
 {
   lbann_data::DataReader *readers = p.mutable_data_reader();
   const lbann_data::Model& pb_model = p.model();
   int size = readers->reader_size();
   for (int j=0; j<size; j++) {
     lbann_data::Reader *r = readers->mutable_reader(j);
-    std::stringstream s;
+    std::ostringstream s;
     std::string basename = get_basename_without_ext(r->index_list());
     std::string ext = get_ext_name(r->index_list());
     if(r->index_list_per_model()) {
       s << pb_model.name() << "_";
     }
     if(r->index_list_per_trainer()) {
-      s << "t" << comm->get_trainer_rank() << "_";
+      s << "t" << comm.get_trainer_rank() << "_";
     }
     s << basename;
     s << "." << ext;
@@ -650,10 +743,9 @@ void customize_data_readers_index_list(lbann_comm *comm, lbann_data::LbannPB& p)
   }
 }
 
-void get_cmdline_overrides(lbann_comm *comm, lbann_data::LbannPB& p)
+void get_cmdline_overrides(const lbann_comm& comm, lbann_data::LbannPB& p)
 {
-  bool master = comm->am_world_master();
-  std::stringstream err;
+  std::ostringstream err;
 
   options *opts = options::get();
   lbann_data::Model *model = p.mutable_model();
@@ -669,20 +761,6 @@ void get_cmdline_overrides(lbann_comm *comm, lbann_data::LbannPB& p)
     }
   }
 
-  if (opts->has_string("dag_model")) {
-    std::string sanity = model->type();
-    if (sanity != "dnn") {
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << " the current network model is: " << model->type()
-          << "; you can only change the model to 'dag_model' if the current model is 'dnn'";
-      throw lbann_exception(err.str());
-    }
-    if (master) {
-      std::cout << "\nchanging model from " << model->type() << " to: dag\n\n";
-    }
-    model->set_type("dag_model");
-  }
-
   if (opts->has_string("data_filedir")
       or opts->has_string("data_filedir_train")
       or opts->has_string("data_filename_train")
@@ -695,10 +773,16 @@ void get_cmdline_overrides(lbann_comm *comm, lbann_data::LbannPB& p)
       or opts->has_string("label_filename_test")) {
     set_data_readers_filenames("test", p);
   }
+  if (opts->has_string("index_list_train")) {
+    set_data_readers_index_list("train", p);
+  }
+  if (opts->has_string("index_list_test")) {
+    set_data_readers_index_list("test", p);
+  }
   if (opts->has_string("data_reader_percent")) {
     set_data_readers_percent(p);
   }
-  if (opts->has_bool("no_im_comm") and opts->get_bool("no_im_comm")) {
+  if (opts->get_bool("no_im_comm")) {
     int sz = model->callback_size();
     for (int j=0; j<sz; j++) {
       lbann_data::Callback *c = model->mutable_callback(j);
@@ -722,77 +806,21 @@ void get_cmdline_overrides(lbann_comm *comm, lbann_data::LbannPB& p)
   if (opts->has_int("num_parallel_readers")) {
     model->set_num_parallel_readers(opts->get_int("num_parallel_readers"));
   }
-  if (opts->has_bool("disable_cuda")) {
+  if (opts->get_bool("disable_cuda")) {
     model->set_disable_cuda(opts->get_bool("disable_cuda"));
   }
   if (opts->has_int("random_seed")) {
     model->set_random_seed(opts->get_int("random_seed"));
   }
-
-
-  if (opts->has_string("opt")) {
-    //defaults
-    double learn_rate = opts->has_float("learn_rate") ? opts->get_float("learn_rate") : 0.01;
-    double eps = opts->has_float("eps") ? opts->get_float("eps") : 1e-8;
-    double beta1 = opts->has_float("beta1") ? opts->get_float("beta1") : 0.9;
-    double beta2 = opts->has_float("beta2") ? opts->get_float("beta2") : 0.99;
-    double init_learning_rate = opts->has_float("init_learning_rate") ? opts->get_float("init_learning_rate") : 0.01;
-    double hyper_learning_rate = opts->has_float("hyper_learning_rate") ? opts->get_float("hyper_learning_rate") : 1e-7;
-    double momentum = opts->has_float("momentum") ? opts->get_float("momentum") : 0.9;
-    double decay_rate = opts->has_float("decay_rate") ? opts->get_float("decay_rate") : 0.5;
-    bool nesterov = opts->has_bool("nesterov") ? opts->get_float("nesterov") : false;
-
-    auto *opt = new lbann_data::Optimizer;
-
-    //construct the new optimizer
-    std::string opt_string = opts->get_string("opt");
-    if (opt_string == "adagrad") {
-      auto *a = new lbann_data::Adagrad;
-      a->set_learn_rate(learn_rate);
-      a->set_eps(eps);
-      opt->set_allocated_adagrad(a);
-    } else if (opt_string == "adam") {
-      auto *a = new lbann_data::Adam;
-      a->set_learn_rate(learn_rate);
-      a->set_eps(eps);
-      a->set_beta1(beta1);
-      a->set_beta2(beta2);
-      opt->set_allocated_adam(a);
-    } else if (opt_string == "hypergradient_adam") {
-      auto *a = new lbann_data::HypergradientAdam;
-      a->set_init_learning_rate(init_learning_rate);
-      a->set_hyper_learning_rate(hyper_learning_rate);
-      a->set_beta1(beta1);
-      a->set_beta2(beta2);
-      a->set_eps(eps);
-      opt->set_allocated_hypergradient_adam(a);
-    } else if (opt_string == "rmsprop") {
-      auto *a = new lbann_data::Rmsprop;
-      a->set_learn_rate(learn_rate);
-      a->set_decay_rate(decay_rate);
-      a->set_eps(eps);
-      opt->set_allocated_rmsprop(a);
-    } else if (opt_string == "sgd") {
-      if (master) std::cerr << "\n\nsetting: sgd\n\n";
-      auto *a = new lbann_data::Sgd;
-      a->set_learn_rate(learn_rate);
-      a->set_momentum(momentum);
-      a->set_decay_rate(decay_rate);
-      a->set_nesterov(nesterov);
-      opt->set_allocated_sgd(a);
-    } else {
-      err << __FILE__ << " " << __LINE__
-          << " :: unknown string for --optimizer: " << opt_string
-          << " should be on of: adagrad, adam, hypergradient_adam, rmsprop, sgd";
-      throw lbann_exception(err.str());
-    }
-    p.set_allocated_optimizer(opt);
+  if(opts->get_bool("serialize_io")) {
+    model->set_serialize_io(opts->get_bool("serialize_io"));
   }
+
 }
 
-void print_parameters(lbann_comm *comm, lbann_data::LbannPB& p)
+void print_parameters(const lbann_comm& comm, lbann_data::LbannPB& p)
 {
-  if (!comm->am_world_master()) {
+  if (!comm.am_world_master()) {
     return;
   }
 
@@ -807,20 +835,23 @@ void print_parameters(lbann_comm *comm, lbann_data::LbannPB& p)
             << "  block_size:              " << m.block_size()  << std::endl
             << "  procs_per_trainer:       " << m.procs_per_trainer()  << std::endl
             << "  num_parallel_readers:    " << m.num_parallel_readers()  << std::endl
-            << "  serialize_background_io: " << m.serialize_background_io()  << std::endl
+            << "  serialize_io:            " << m.serialize_io()  << std::endl
             << "  disable_cuda:            " << m.disable_cuda()  << std::endl
             << "  random_seed:             " << m.random_seed() << std::endl
             << "  data_layout:             " << m.data_layout()  << std::endl
             << "     (only used for metrics)\n";
 }
 
-void print_help(lbann_comm *comm)
+void print_help(const lbann_comm& comm)
 {
-  if (!comm->am_world_master()) {
-    return;
+  if (comm.am_world_master()) {
+    print_help(std::cerr);
   }
+}
 
-  std::cerr <<
+void print_help(std::ostream& os)
+{
+  os <<
        "General usage: you need to specify three prototext files, e.g:\n"
        "  srun -n# proto --model=<string> --optimizer=<string> --reader=<string> --metadata=<string>\n"
        "\n"
@@ -839,7 +870,6 @@ void print_help(lbann_comm *comm)
        "        e.g: --disable_cuda, then a value of '1' is assigned)\n"
        "\n"
        "General:\n"
-       "  --dag_model\n"
        "  --mini_batch_size=<int>\n"
        "  --num_epochs=<int>\n"
        "  --block_size=<int>\n"
@@ -847,7 +877,11 @@ void print_help(lbann_comm *comm)
        "  --num_gpus=<int>\n"
        "  --num_parallel_readers=<int>\n"
        "  --num_io_threads=<int>\n"
+       "      # of threads used for I/O by the data readers\n"
+       "  --serialize_io=<bool>\n"
+       "      force data readers to use a single thread for I/O\n"
        "  --disable_background_io_activity=<bool>\n"
+       "      prevent the input layers from fetching data in the background\n"
        "  --disable_cuda=<bool>\n"
        "     has no effect unless lbann was compiled with: LBANN_HAS_CUDNN\n"
        "  --random_seed=<int>\n"
@@ -861,14 +895,21 @@ void print_help(lbann_comm *comm)
        "      display information on how OpenMP threads are provisioned\n"
        "  --use_data_store \n"
        "      Enables the data store in-memory structure\n"
+       "  --preload_data_store \n"
+       "      Preloads the data store in-memory structure during data reader load time\n"
        "  --super_node \n"
        "      Enables the data store in-memory structure to use the supernode exchange structure\n"
+       "  --write_sample_list \n"
+       "      Writes out the sample list that was loaded into the current directory\n"
+       "  --ltfb_verbose \n"
+       "      Increases number of per-trainer messages that are reported\n"
        "\n"
        "DataReaders:\n"
        "  --data_filedir=<string>\n"
        "      sets the file directory for train and test data\n"
        "  --data_filedir_train=<string>   --data_filedir_test=<string>\n"
        "  --data_filename_train=<string>  --data_filename_test=<string>\n"
+       "  --index_list_train=<string>     --index_list_test=<string>\n"
        "  --label_filename_train=<string> --label_filename_test=<string>\n"
        "  --data_reader_percent=<float>\n"
        "  --share_testing_data_readers=<bool:[0|1]>\n"
@@ -886,39 +927,26 @@ void print_help(lbann_comm *comm)
        "            used if the option is not specified on the cmd line.\n"
        "            If you specify an option that is not applicable to your choice\n"
        "            of optimizer, the option is ignored\n"
-       "\n"
-       "  --opt=<string>\n"
-       "     <string> must be one of:\n"
-       "         adagrad, adam, hypergradient_adam, rmsprop, sgd\n"
-       "\n"
-       "  --learn_rate=< 0.01 >          (all except hypergradient_adam)\n"
-       "  --eps=< 1e-8 >                 (all except sgd)\n"
-       "  --beta1=< 0.9 >                (adam, hypergradient_adam)\n"
-       "  --beta2=< 0.99 >               (adam, hypergradient_adam)\n"
-       "  --init_learning_rate=< 0.01 >  (hypergradient_adam)\n"
-       "  --hyper_learning_rate=< 1e-7 > (hypergradient_adam)\n"
-       "  --momentum=< 0.9 >             (sgd)\n"
-       "  --decay_rate=< 0.5 >           (sgd, rmsprop)\n"
-       "  --nesterov=< false >           (sgd)\n";
+       "\n";
 }
 
 void copy_file(std::string fn, std::ofstream &out)
 {
   std::ifstream in(fn.c_str());
   if (!in.is_open()) {
-    std::stringstream err;
+    std::ostringstream err;
     err << __FILE__ << " " << __LINE__
         << " :: failed to open file for reading: " << fn;
     throw std::runtime_error(err.str());
   }
-  std::stringstream s;
+  std::ostringstream s;
   s << in.rdbuf();
   out << s.str();
 }
 
-void save_session(lbann_comm *comm, int argc, char **argv, lbann_data::LbannPB& p)
+void save_session(const lbann_comm& comm, const int argc, char * const* argv, lbann_data::LbannPB& p)
 {
-  if (!comm->am_world_master()) {
+  if (!comm.am_world_master()) {
     return;
   }
 
@@ -946,7 +974,7 @@ void save_session(lbann_comm *comm, int argc, char **argv, lbann_data::LbannPB&
   //open output file
   std::ofstream out(file_name.c_str());
   if (!out.is_open()) {
-    std::stringstream err;
+    std::ostringstream err;
     err << "failed to open file (" << file_name << ") for writing";
     LBANN_ERROR(err.str());
   }
@@ -971,7 +999,7 @@ void save_session(lbann_comm *comm, int argc, char **argv, lbann_data::LbannPB&
       <<  tm
       << "\n#\n#\n# Experiment was run with lbann version: "
       << lbann_version << "\n#\n#\n# To rerun the experiment: \n"
-      << "#  $ srun -n" << comm->get_procs_in_world() << " " << argv[0]
+      << "#  $ srun -n" << comm.get_procs_in_world() << " " << argv[0]
       << " --prototext=" << file_name << "\n#\n#\n";
 
   out << "# Selected SLURM Environment Variables:\n";
diff --git a/src/utils/cnpy_utils.cpp b/src/utils/cnpy_utils.cpp
index 153d0f7ca96..a9936a7a269 100644
--- a/src/utils/cnpy_utils.cpp
+++ b/src/utils/cnpy_utils.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/utils/cublas.cpp b/src/utils/cublas.cpp
index 5a7fa981cfe..765b02d9ae3 100644
--- a/src/utils/cublas.cpp
+++ b/src/utils/cublas.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/utils/cuda.cu b/src/utils/cuda.cu
index 23fc459bfac..2838f6e1f58 100644
--- a/src/utils/cuda.cu
+++ b/src/utils/cuda.cu
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -50,7 +50,7 @@ event_wrapper& event_wrapper::operator=(const event_wrapper& other) {
   if (!other.query()) { record(m_stream); }
   return *this;
 }
-  
+
 event_wrapper::~event_wrapper() {
   cudaEventDestroy(m_event);
 }
diff --git a/src/utils/cudnn.cpp b/src/utils/cudnn.cpp
index a4628f32d61..90debc6dc68 100644
--- a/src/utils/cudnn.cpp
+++ b/src/utils/cudnn.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -29,6 +29,7 @@
 
 #include "El.hpp"
 #include <iostream>
+#include <map>
 #include <unordered_map>
 #include <tuple>
 
@@ -151,7 +152,7 @@ void set_tensor_desc(cudnnTensorDescriptor_t& desc,
     }
   }
 #endif // LBANN_DEBUG
-  
+
   // Set cuDNN tensor descriptor
   // Note: cuDNN tensors should have at least 4 dimensions
   /// @todo Think about 1D convolution
@@ -270,7 +271,7 @@ layer_tensor_manager& layer_tensor_manager::operator=(const layer_tensor_manager
 
   // Set layer being managed
   m_layer = other.m_layer;
-  
+
   // Destroy tensor descriptors
   set_num_parents(0);
   set_num_children(0);
@@ -528,6 +529,338 @@ cudnnTensorDescriptor_t& entrywise_layer_tensor_manager::get_error_signals(int p
   return desc;
 }
 
+////////////////////////////////////////////////////////////
+// cuDNN algorithm selection
+////////////////////////////////////////////////////////////
+
+namespace {
+
+// Non-deterministic algorithms.
+std::vector<cudnnConvolutionFwdAlgo_t> nondet_fwd_algos = {};
+std::vector<cudnnConvolutionBwdDataAlgo_t> nondet_bwd_data_algos = {
+  CUDNN_CONVOLUTION_BWD_DATA_ALGO_0
+};
+std::vector<cudnnConvolutionBwdFilterAlgo_t> nondet_bwd_filter_algos = {
+  CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0,
+  CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3
+};
+
+template <typename AlgoType, typename PerfType>
+AlgoType find_best_heuristic_algorithm(
+  const std::vector<PerfType>& perf_results,
+  const std::vector<AlgoType>& nondeterministic_algos,
+  bool deterministic,
+  size_t max_ws_size) {
+  std::vector<AlgoType> algos;
+  for (const auto& p : perf_results) {
+    if (p.status != CUDNN_STATUS_SUCCESS) {
+      continue;
+    }
+    if (deterministic &&
+        std::find(nondeterministic_algos.begin(), nondeterministic_algos.end(),
+                  p.algo) != nondeterministic_algos.end()) {
+      continue;
+    }
+    if (p.memory > max_ws_size) {
+      continue;
+    }
+    algos.push_back(p.algo);
+  }
+  if (algos.empty()) {
+    LBANN_ERROR("No valid convolution algorithms.");
+  }
+  return algos[0];
+}
+
+template <typename AlgoType, typename PerfType>
+AlgoType find_best_algorithm(
+  const std::vector<PerfType>& perf_results,
+  const std::vector<AlgoType>& nondeterministic_algos,
+  bool deterministic,
+  size_t max_ws_size) {
+  std::map<AlgoType, float> time_map;
+  for (const auto& p : perf_results) {
+    if (p.status != CUDNN_STATUS_SUCCESS) {
+      // If an algorithm fails, we still add it in case the failure is
+      // nondeterministic.
+      time_map[p.algo] = std::numeric_limits<float>::max();
+      continue;
+    }
+    if (deterministic &&
+        std::find(nondeterministic_algos.begin(), nondeterministic_algos.end(),
+                  p.algo) != nondeterministic_algos.end()) {
+      continue;
+    }
+    if (p.memory > max_ws_size) {
+      continue;
+    }
+    if (time_map.count(p.algo) == 0) {
+      time_map[p.algo] = p.time;
+    } else {
+      time_map[p.algo] += p.time;
+    }
+  }
+  if (time_map.empty()) {
+    LBANN_ERROR("No valid convolution algorithms.");
+  }
+  AlgoType best_algo = time_map.begin()->first;
+  float min_time = std::numeric_limits<float>::max();
+  for (const auto& x : time_map) {
+    AlgoType algo = x.first;
+    float time = x.second;
+    if (time < min_time) {
+      min_time = time;
+      best_algo = algo;
+    }
+  }
+  if (min_time == std::numeric_limits<float>::max()) {
+    LBANN_ERROR("No valid convolution algorithms.");
+  }
+  return best_algo;
+}
+
+cudnnConvolutionFwdAlgo_t get_fwd_algo_heuristic(
+  bool deterministic,
+  const cudnnTensorDescriptor_t& input_desc,
+  const cudnnFilterDescriptor_t& kernel_desc,
+  const cudnnConvolutionDescriptor_t& conv_desc,
+  const cudnnTensorDescriptor_t& output_desc,
+  size_t ws_size) {
+  int num_algos;
+  CHECK_CUDNN(cudnnGetConvolutionForwardAlgorithmMaxCount(
+                get_handle(), &num_algos));
+  std::vector<cudnnConvolutionFwdAlgoPerf_t> perf_results(num_algos);
+  int num_tested_algos;
+  CHECK_CUDNN(cudnnGetConvolutionForwardAlgorithm_v7(
+                get_handle(), input_desc, kernel_desc, conv_desc, output_desc,
+                num_algos, &num_tested_algos, perf_results.data()));
+  return find_best_heuristic_algorithm(perf_results, nondet_fwd_algos,
+                                       deterministic, ws_size);
+}
+
+cudnnConvolutionBwdDataAlgo_t get_bwd_data_algo_heuristic(
+  bool deterministic,
+  const cudnnFilterDescriptor_t& kernel_desc,
+  const cudnnTensorDescriptor_t& prev_error_signal_desc,
+  const cudnnConvolutionDescriptor_t& conv_desc,
+  const cudnnTensorDescriptor_t& error_signal_desc,
+  size_t ws_size) {
+  int num_algos;
+  CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithmMaxCount(
+                get_handle(), &num_algos));
+  std::vector<cudnnConvolutionBwdDataAlgoPerf_t> perf_results(num_algos);
+  int num_tested_algos;
+  CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithm_v7(
+                get_handle(), kernel_desc, prev_error_signal_desc, conv_desc,
+                error_signal_desc, num_algos, &num_tested_algos,
+                perf_results.data()));
+  return find_best_heuristic_algorithm(perf_results, nondet_bwd_data_algos,
+                                       deterministic, ws_size);
+}
+
+cudnnConvolutionBwdFilterAlgo_t get_bwd_filter_algo_heuristic(
+  bool deterministic,
+  const cudnnTensorDescriptor_t& input_desc,
+  const cudnnTensorDescriptor_t& prev_error_signal_desc,
+  const cudnnConvolutionDescriptor_t& conv_desc,
+  const cudnnFilterDescriptor_t& kernel_gradient_desc,
+  size_t ws_size) {
+  int num_algos;
+  CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(
+                get_handle(), &num_algos));
+  std::vector<cudnnConvolutionBwdFilterAlgoPerf_t> perf_results(num_algos);
+  int num_tested_algos;
+  CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm_v7(
+                get_handle(), input_desc, prev_error_signal_desc, conv_desc,
+                kernel_gradient_desc, num_algos, &num_tested_algos,
+                perf_results.data()));
+  return find_best_heuristic_algorithm(perf_results, nondet_bwd_filter_algos,
+                                       deterministic, ws_size);
+}
+
+cudnnConvolutionFwdAlgo_t get_fwd_algo_autotune(
+  bool deterministic,
+  const cudnnTensorDescriptor_t& input_desc,
+  const void* input,
+  const cudnnFilterDescriptor_t& kernel_desc,
+  const void* kernel,
+  const cudnnConvolutionDescriptor_t& conv_desc,
+  const cudnnTensorDescriptor_t& output_desc,
+  void* output,
+  size_t ws_size,
+  void* ws) {
+  constexpr int num_trials = 3;
+  constexpr int num_skip = 1;
+  int num_algos;
+  CHECK_CUDNN(cudnnGetConvolutionForwardAlgorithmMaxCount(
+                get_handle(), &num_algos));
+  std::vector<cudnnConvolutionFwdAlgoPerf_t> perf_results_all;
+  std::vector<cudnnConvolutionFwdAlgoPerf_t> perf_results(num_algos);
+  for (int trial = 0; trial < num_trials + num_skip; ++trial) {
+    int num_tested_algos;
+    CHECK_CUDNN(cudnnFindConvolutionForwardAlgorithmEx(
+                  get_handle(), input_desc, input, kernel_desc, kernel,
+                  conv_desc, output_desc, output, num_algos, &num_tested_algos,
+                  perf_results.data(), ws, ws_size));
+    if (trial > num_skip) {
+      for (const auto& p : perf_results) {
+        perf_results_all.push_back(p);
+      }
+    }
+  }
+  return find_best_algorithm(perf_results_all, nondet_fwd_algos,
+                             deterministic, ws_size);
+}
+
+cudnnConvolutionBwdDataAlgo_t get_bwd_data_algo_autotune(
+  bool deterministic,
+  const cudnnFilterDescriptor_t& kernel_desc,
+  const void* kernel,
+  const cudnnTensorDescriptor_t& prev_error_signal_desc,
+  const void* prev_error_signal,
+  const cudnnConvolutionDescriptor_t& conv_desc,
+  const cudnnTensorDescriptor_t& error_signal_desc,
+  void* error_signal,
+  size_t ws_size,
+  void* ws) {
+  constexpr int num_trials = 3;
+  constexpr int num_skip = 1;
+  int num_algos;
+  CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithmMaxCount(
+                get_handle(), &num_algos));
+  std::vector<cudnnConvolutionBwdDataAlgoPerf_t> perf_results_all;
+  std::vector<cudnnConvolutionBwdDataAlgoPerf_t> perf_results(num_algos);
+  for (int trial = 0; trial < num_trials + num_skip; ++trial) {
+    int num_tested_algos;
+    CHECK_CUDNN(cudnnFindConvolutionBackwardDataAlgorithmEx(
+                  get_handle(), kernel_desc, kernel,
+                  prev_error_signal_desc, prev_error_signal,
+                  conv_desc, error_signal_desc, error_signal, num_algos,
+                  &num_tested_algos, perf_results.data(), ws, ws_size));
+    if (trial > num_skip) {
+      for (const auto& p : perf_results) {
+        perf_results_all.push_back(p);
+      }
+    }
+  }
+  return find_best_algorithm(perf_results_all, nondet_bwd_data_algos,
+                             deterministic, ws_size);
+}
+
+cudnnConvolutionBwdFilterAlgo_t get_bwd_filter_algo_autotune(
+  bool deterministic,
+  const cudnnTensorDescriptor_t& input_desc,
+  const void* input,
+  const cudnnTensorDescriptor_t& prev_error_signal_desc,
+  const void* prev_error_signal,
+  const cudnnConvolutionDescriptor_t& conv_desc,
+  const cudnnFilterDescriptor_t& kernel_gradient_desc,
+  void* kernel_gradient,
+  size_t ws_size,
+  void* ws) {
+  constexpr int num_trials = 3;
+  constexpr int num_skip = 1;
+  int num_algos;
+  CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(
+                get_handle(), &num_algos));
+  std::vector<cudnnConvolutionBwdFilterAlgoPerf_t> perf_results_all;
+  std::vector<cudnnConvolutionBwdFilterAlgoPerf_t> perf_results(num_algos);
+  for (int trial = 0; trial < num_trials + num_skip; ++trial) {
+    int num_tested_algos;
+    CHECK_CUDNN(cudnnFindConvolutionBackwardFilterAlgorithmEx(
+                  get_handle(), input_desc, input,
+                  prev_error_signal_desc, prev_error_signal,
+                  conv_desc, kernel_gradient_desc, kernel_gradient, num_algos,
+                  &num_tested_algos, perf_results.data(), ws, ws_size));
+    if (trial > num_skip) {
+      for (const auto& p : perf_results) {
+        perf_results_all.push_back(p);
+      }
+    }
+  }
+  return find_best_algorithm(perf_results_all, nondet_bwd_filter_algos,
+                             deterministic, ws_size);
+}
+
+}  // namespace
+
+cudnnConvolutionFwdAlgo_t get_fwd_algorithm(
+  bool autotune,
+  bool deterministic,
+  const cudnnTensorDescriptor_t& input_desc,
+  const void* input,
+  const cudnnFilterDescriptor_t& kernel_desc,
+  const void* kernel,
+  const cudnnConvolutionDescriptor_t& conv_desc,
+  const cudnnTensorDescriptor_t& output_desc,
+  void* output,
+  size_t ws_size,
+  void* ws) {
+  if (autotune) {
+    return get_fwd_algo_autotune(deterministic,
+                                 input_desc, input,
+                                 kernel_desc, kernel,
+                                 conv_desc,
+                                 output_desc, output,
+                                 ws_size, ws);
+  } else {
+    return get_fwd_algo_heuristic(deterministic, input_desc, kernel_desc,
+                                  conv_desc, output_desc, ws_size);
+  }
+}
+
+cudnnConvolutionBwdDataAlgo_t get_bwd_data_algorithm(
+  bool autotune,
+  bool deterministic,
+  const cudnnFilterDescriptor_t& kernel_desc,
+  const void* kernel,
+  const cudnnTensorDescriptor_t& prev_error_signal_desc,
+  const void* prev_error_signal,
+  const cudnnConvolutionDescriptor_t& conv_desc,
+  const cudnnTensorDescriptor_t& error_signal_desc,
+  void* error_signal,
+  size_t ws_size,
+  void* ws) {
+  if (autotune) {
+    return get_bwd_data_algo_autotune(deterministic,
+                                      kernel_desc, kernel,
+                                      prev_error_signal_desc, prev_error_signal,
+                                      conv_desc,
+                                      error_signal_desc, error_signal,
+                                      ws_size, ws);
+  } else {
+    return get_bwd_data_algo_heuristic(deterministic, kernel_desc,
+                                       prev_error_signal_desc, conv_desc,
+                                       error_signal_desc, ws_size);
+  }
+}
+
+cudnnConvolutionBwdFilterAlgo_t get_bwd_filter_algorithm(
+  bool autotune,
+  bool deterministic,
+  const cudnnTensorDescriptor_t& input_desc,
+  const void* input,
+  const cudnnTensorDescriptor_t& prev_error_signal_desc,
+  const void* prev_error_signal,
+  const cudnnConvolutionDescriptor_t& conv_desc,
+  const cudnnFilterDescriptor_t& kernel_gradient_desc,
+  void* kernel_gradient,
+  size_t ws_size,
+  void* ws) {
+  if (autotune) {
+    return get_bwd_filter_algo_autotune(deterministic,
+                                        input_desc, input,
+                                        prev_error_signal_desc, prev_error_signal,
+                                        conv_desc,
+                                        kernel_gradient_desc, kernel_gradient,
+                                        ws_size, ws);
+  } else {
+    return get_bwd_filter_algo_heuristic(deterministic, input_desc,
+                                         prev_error_signal_desc, conv_desc,
+                                         kernel_gradient_desc, ws_size);
+  }
+}
+
 } // namespace cudnn
 } // namespace lbann
 
diff --git a/src/utils/description.cpp b/src/utils/description.cpp
index ef78662a86d..d572216b7ea 100644
--- a/src/utils/description.cpp
+++ b/src/utils/description.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/utils/exception.cpp b/src/utils/exception.cpp
index eb68ab6917a..93a71aaa3a1 100644
--- a/src/utils/exception.cpp
+++ b/src/utils/exception.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -29,7 +29,7 @@
 #include "lbann/comm.hpp"
 
 namespace lbann {
-  
+
 exception::exception(std::string message, bool print)
   : m_message(message),
     m_stack_trace(stack_trace::get()) {
@@ -46,7 +46,7 @@ exception::exception(std::string message, bool print)
 
   // Print report to standard error stream
   if (print) { print_report(std::cerr); }
-  
+
 }
 
 const char* exception::what() const noexcept {
diff --git a/src/utils/file_utils.cpp b/src/utils/file_utils.cpp
index a12ddbd2d66..b2c806f9ac5 100644
--- a/src/utils/file_utils.cpp
+++ b/src/utils/file_utils.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -147,6 +147,7 @@ std::string modify_file_name(const std::string file_name, const std::string tag,
     name = name + '_' + tag;
   }
 
+  dir = add_delimiter(dir);
   if(!ext.empty()) {
     return (dir + name + '.' + ext);
   }else {
diff --git a/src/utils/graph.cpp b/src/utils/graph.cpp
index 9c4eb8cfbc0..1987676a0a8 100644
--- a/src/utils/graph.cpp
+++ b/src/utils/graph.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -34,8 +34,8 @@
 namespace lbann {
 namespace graph {
 
-void print(const std::set<int>& nodes,
-           const std::map<int,std::set<int>>& edges,
+void print(const std::set<El::Int>& nodes,
+           const std::map<El::Int,std::set<El::Int>>& edges,
            std::ostream& os) {
   for (const auto& node : nodes) {
     os << "node " << node << " neighbors :";
@@ -45,18 +45,18 @@ void print(const std::set<int>& nodes,
     os << "\n";
   }
 }
-  
-std::set<int> get_neighbors(int node,
-                            const std::map<int,std::set<int>>& edges) {
+
+std::set<El::Int> get_neighbors(El::Int node,
+                                const std::map<El::Int,std::set<El::Int>>& edges) {
   if (edges.count(node) > 0) {
     return edges.at(node);
   } else {
-    return std::set<int>();
+    return {};
   }
 }
 
-bool is_closure(const std::set<int>& nodes,
-                const std::map<int,std::set<int>>& edges) {
+bool is_closure(const std::set<El::Int>& nodes,
+                const std::map<El::Int,std::set<El::Int>>& edges) {
   for (const auto& node : nodes) {
     for (const auto& neighbor : get_neighbors(node, edges)) {
       if (nodes.count(neighbor) == 0) {
@@ -67,12 +67,10 @@ bool is_closure(const std::set<int>& nodes,
   return true;
 }
 
-bool is_topologically_sorted(const std::set<int>& nodes,
-                             const std::map<int,std::set<int>>& edges) {
+bool is_topologically_sorted(const std::set<El::Int>& nodes,
+                             const std::map<El::Int,std::set<El::Int>>& edges) {
   if (!is_closure(nodes, edges)) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: " << "graph is not a closure";
-    throw lbann_exception(err.str());
+    LBANN_ERROR("graph is not a closure");
   }
   for (const auto& node : nodes) {
     const auto& neighbors = get_neighbors(node, edges);
@@ -83,14 +81,12 @@ bool is_topologically_sorted(const std::set<int>& nodes,
   return true;
 }
 
-bool is_cyclic(const std::set<int>& nodes,
-               const std::map<int,std::set<int>>& edges) {
+bool is_cyclic(const std::set<El::Int>& nodes,
+               const std::map<El::Int,std::set<El::Int>>& edges) {
 
   // Check that graph is valid
   if (!is_closure(nodes, edges)) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: " << "graph is not a closure";
-    throw lbann_exception(err.str());
+    LBANN_ERROR("graph is not a closure");
   }
 
   // Topologically sorted graphs are not cyclic
@@ -99,8 +95,8 @@ bool is_cyclic(const std::set<int>& nodes,
   }
 
   // Perform depth-first searches to detect cycles
-  std::unordered_map<int,bool> is_visited, is_sorted;
-  std::stack<int> search_stack;
+  std::unordered_map<El::Int,bool> is_visited, is_sorted;
+  std::stack<El::Int> search_stack;
   for (auto&& it = nodes.rbegin(); it != nodes.rend(); ++it) {
     search_stack.push(*it);
   }
@@ -123,17 +119,15 @@ bool is_cyclic(const std::set<int>& nodes,
     }
   }
   return false;
-  
+
 }
 
-std::map<int,std::set<int>> transpose(const std::set<int>& nodes,
-                                      const std::map<int,std::set<int>>& edges) {
+std::map<El::Int,std::set<El::Int>> transpose(const std::set<El::Int>& nodes,
+                                              const std::map<El::Int,std::set<El::Int>>& edges) {
   if (!is_closure(nodes, edges)) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: " << "graph is not a closure";
-    throw lbann_exception(err.str());
+    LBANN_ERROR("attempted to transpose a graph that is not a closure");
   }
-  std::map<int,std::set<int>> transpose_edges;
+  std::map<El::Int,std::set<El::Int>> transpose_edges;
   for (const auto& node : nodes) {
     for (const auto& neighbor : get_neighbors(node, edges)) {
       transpose_edges[neighbor].insert(node);
@@ -142,9 +136,9 @@ std::map<int,std::set<int>> transpose(const std::set<int>& nodes,
   return transpose_edges;
 }
 
-std::map<int,std::set<int>> induce_subgraph(const std::set<int>& nodes,
-                                            const std::map<int,std::set<int>>& edges) {
-  std::map<int,std::set<int>> induced_edges;
+std::map<El::Int,std::set<El::Int>> induce_subgraph(const std::set<El::Int>& nodes,
+                                                    const std::map<El::Int,std::set<El::Int>>& edges) {
+  std::map<El::Int,std::set<El::Int>> induced_edges;
   for (const auto& node : nodes) {
     for (const auto& neighbor : get_neighbors(node, edges)) {
       if (nodes.count(neighbor) > 0) {
@@ -155,13 +149,13 @@ std::map<int,std::set<int>> induce_subgraph(const std::set<int>& nodes,
   return induced_edges;
 }
 
-std::vector<int> breadth_first_search(int root,
-                                      const std::map<int,std::set<int>>& edges) {
+std::vector<El::Int> breadth_first_search(El::Int root,
+                                          const std::map<El::Int,std::set<El::Int>>& edges) {
 
   // Initialize data structures
-  std::unordered_map<int,bool> is_visited;
-  std::vector<int> sorted_nodes;
-  std::queue<int> search_queue;
+  std::unordered_map<El::Int,bool> is_visited;
+  std::vector<El::Int> sorted_nodes;
+  std::queue<El::Int> search_queue;
   search_queue.push(root);
 
   // Visit nodes until search queue is exhausted
@@ -182,13 +176,13 @@ std::vector<int> breadth_first_search(int root,
 
 }
 
-std::vector<int> depth_first_search(int root,
-                                    const std::map<int,std::set<int>>& edges) {
+std::vector<El::Int> depth_first_search(El::Int root,
+                                        const std::map<El::Int,std::set<El::Int>>& edges) {
 
   // Initialize data structures
-  std::unordered_map<int,bool> is_visited, is_sorted;
-  std::vector<int> sorted_nodes;
-  std::stack<int> search_stack;
+  std::unordered_map<El::Int,bool> is_visited, is_sorted;
+  std::vector<El::Int> sorted_nodes;
+  std::stack<El::Int> search_stack;
   search_stack.push(root);
 
   // Visit nodes until search stack is exhausted
@@ -219,29 +213,26 @@ std::vector<int> depth_first_search(int root,
 }
 
 
-std::vector<int> topological_sort(const std::set<int>& nodes,
-                                  const std::map<int,std::set<int>>& edges) {
+std::vector<El::Int> topological_sort(const std::set<El::Int>& nodes,
+                                      const std::map<El::Int,std::set<El::Int>>& edges) {
 
   // Check that graph is valid
   if (!is_closure(nodes, edges)) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: " << "graph is not a closure";
-    throw lbann_exception(err.str());
+    LBANN_ERROR("attempted to topologically sort "
+                "a graph that is not a closure");
   }
   if (is_cyclic(nodes, edges)) {
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__ << " :: " << "graph is cyclic";
-    throw lbann_exception(err.str());
+    LBANN_ERROR("attempted to topologically sort a cyclic graph");
   }
 
   // Return original order if already sorted
   if (is_topologically_sorted(nodes, edges)) {
-    return std::vector<int>(nodes.begin(), nodes.end());
+    return std::vector<El::Int>(nodes.begin(), nodes.end());
   }
 
   // Perform depth-first searches on nodes
-  std::stack<int> sorted_stack;
-  std::unordered_map<int,bool> is_sorted;
+  std::stack<El::Int> sorted_stack;
+  std::unordered_map<El::Int,bool> is_sorted;
   for (const auto& root : nodes) {
     if (!is_sorted[root]) {
       const auto& dfs = depth_first_search(root, edges);
@@ -255,26 +246,26 @@ std::vector<int> topological_sort(const std::set<int>& nodes,
   }
 
   // Reverse DFS post-order is topologically sorted
-  std::vector<int> sorted_nodes;
+  std::vector<El::Int> sorted_nodes;
   while (!sorted_stack.empty()) {
     sorted_nodes.push_back(sorted_stack.top());
     sorted_stack.pop();
   }
   return sorted_nodes;
-  
+
 }
 
-void condensation(const std::set<int>& nodes,
-                  const std::map<int,std::set<int>>& edges,
-                  std::map<int,std::set<int>>& components,
-                  std::set<int>& condensation_nodes,
-                  std::map<int,std::set<int>>& condensation_edges) {
+void condensation(const std::set<El::Int>& nodes,
+                  const std::map<El::Int,std::set<El::Int>>& edges,
+                  std::map<El::Int,std::set<El::Int>>& components,
+                  std::set<El::Int>& condensation_nodes,
+                  std::map<El::Int,std::set<El::Int>>& condensation_edges) {
 
   // Initialize data structures for unsorted condensation
-  std::unordered_map<int,std::set<int>> unsorted_components;
-  std::unordered_map<int,int> unsorted_component_assignments;
-  std::set<int> unsorted_condensation_nodes;
-  std::map<int,std::set<int>> unsorted_condensation_edges;
+  std::unordered_map<El::Int,std::set<El::Int>> unsorted_components;
+  std::unordered_map<El::Int,El::Int> unsorted_component_assignments;
+  std::set<El::Int> unsorted_condensation_nodes;
+  std::map<El::Int,std::set<El::Int>> unsorted_condensation_edges;
 
   // Find strongly connected components with Kosaraju's algorithm
   // Note: First sort nodes by DFS post-order. Then, pick root nodes
@@ -282,8 +273,8 @@ void condensation(const std::set<int>& nodes,
   // DFS that visits a node determines the strongly connected
   // component it belongs to.
   const auto& transpose_edges = transpose(nodes, edges);
-  std::stack<int> dfs_stack;
-  std::unordered_map<int,bool> is_sorted, is_condensed;
+  std::stack<El::Int> dfs_stack;
+  std::unordered_map<El::Int,bool> is_sorted, is_condensed;
   for (const auto& root : nodes) {
     if (!is_sorted[root]) {
       for (const auto& node : depth_first_search(root, edges)) {
@@ -298,7 +289,7 @@ void condensation(const std::set<int>& nodes,
     const auto& root = dfs_stack.top();
     dfs_stack.pop();
     if (!is_condensed[root]) {
-      const int index = unsorted_condensation_nodes.size();
+      const El::Int index = unsorted_condensation_nodes.size();
       unsorted_condensation_nodes.insert(index);
       for (const auto& node : depth_first_search(root, transpose_edges)) {
         if (!is_condensed[node]) {
@@ -332,7 +323,7 @@ void condensation(const std::set<int>& nodes,
   for (size_t i = 0; i < unsorted_condensation_nodes.size(); ++i) {
     condensation_nodes.insert(i);
   }
-  std::unordered_map<int,int> unsorted_to_sorted;
+  std::unordered_map<El::Int,El::Int> unsorted_to_sorted;
   for (const auto& component : condensation_nodes) {
     const auto& unsorted_component = sorted_to_unsorted[component];
     unsorted_to_sorted[unsorted_component] = component;
diff --git a/src/utils/im2col.cpp b/src/utils/im2col.cpp
index 409692eedc9..aa374dd0ccf 100644
--- a/src/utils/im2col.cpp
+++ b/src/utils/im2col.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/utils/lbann_library.cpp b/src/utils/lbann_library.cpp
index fe7d2f5bfc1..5c82d30c8b2 100644
--- a/src/utils/lbann_library.cpp
+++ b/src/utils/lbann_library.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -30,7 +30,7 @@
 namespace lbann {
 
 /// Setup I/O thread pool that is shared across all models
-std::shared_ptr<thread_pool> construct_io_thread_pool(lbann_comm *comm) {
+std::unique_ptr<thread_pool> construct_io_thread_pool(lbann_comm *comm) {
   int num_io_threads = num_free_cores_per_process(comm);
 
   options *opts = options::get();
@@ -48,199 +48,193 @@ std::shared_ptr<thread_pool> construct_io_thread_pool(lbann_comm *comm) {
       " (Limited to # Unused Compute Cores or 1)" << std::endl;
   }
 
-  std::shared_ptr<thread_pool> io_thread_pool = std::make_shared<thread_pool>();
+  auto io_thread_pool = make_unique<thread_pool>();
   io_thread_pool->launch_pinned_threads(num_io_threads, io_threads_offset);
 
   return io_thread_pool;
 }
 
-model *build_model_from_prototext(int argc, char **argv,
-                                  lbann_data::LbannPB &pb,
-                                  lbann_comm *comm,
-                                  std::shared_ptr<thread_pool> io_thread_pool,
-                                  bool first_model) {
+std::unique_ptr<model> build_model_from_prototext(
+  int argc, char **argv,
+  lbann_data::LbannPB &pb,
+  lbann_comm *comm,
+  std::shared_ptr<thread_pool> io_thread_pool,
+  bool first_model) {
+
   int random_seed = lbann_default_random_seed;
   bool master = comm->am_world_master();
-  if (master) std::cerr << "starting build_model_from_prototext\n";
-  model *model = nullptr; //d hysom bad namimg! should fix
-  try {
-    std::stringstream err;
-    options *opts = options::get();
+  if (master) {
+    std::cerr << "starting build_model_from_prototext" << std::endl;
+  }
 
-    // Optionally over-ride some values in prototext
-    get_cmdline_overrides(comm, pb);
+  std::ostringstream err;
+  options *opts = options::get();
 
-    customize_data_readers_index_list(comm, pb);
+  // Optionally over-ride some values in prototext
+  get_cmdline_overrides(*comm, pb);
 
-    lbann_data::Model *pb_model = pb.mutable_model();
+  lbann_data::Model *pb_model = pb.mutable_model();
 
-    // Adjust the number of parallel readers; this may be adjusted
-    // after calling split_trainers()
-    set_num_parallel_readers(comm, pb);
+  // Adjust the number of parallel readers; this may be adjusted
+  // after calling split_trainers()
+  set_num_parallel_readers(*comm, pb);
 
-    // Check to see if the model wants to reduce the I/O parallelism
-    if(pb_model->serialize_background_io() && io_thread_pool->get_num_threads() != 1) {
-      if(master) {
-        std::cout << "Model " << pb_model->name() << " serialized the background I/O threads" << std::endl;
-      }
-      io_thread_pool->relaunch_pinned_threads(1);
+  // Check to see if the model wants to reduce the I/O parallelism
+  if(pb_model->serialize_io() && io_thread_pool->get_num_threads() != 1) {
+    if(master) {
+      std::cout << "Model " << pb_model->name() << " serialized the I/O threads" << std::endl;
     }
+    io_thread_pool->relaunch_pinned_threads(1);
+  }
 
-    // Setup I/O threads
-    auto io_threads_per_process = io_thread_pool->get_num_threads();
-    auto io_threads_offset = io_thread_pool->get_threads_offset();
+  // Setup I/O threads
+  auto io_threads_per_process = io_thread_pool->get_num_threads();
+  auto io_threads_offset = io_thread_pool->get_threads_offset();
 
-    // Set algorithmic blocksize
-    if (pb_model->block_size() == 0 and master) {
-      err << "model does not provide a valid block size (" << pb_model->block_size() << ")";
-      LBANN_ERROR(err.str());
-    }
-    El::SetBlocksize(pb_model->block_size());
-
-    // Change random seed if needed.
-    if (pb_model->random_seed() > 0) {
-      random_seed = pb_model->random_seed();
-      // Reseed here so that setup is done with this new seed.
-      init_random(random_seed);
-      init_data_seq_random(random_seed);
+  // Set algorithmic blocksize
+  if (pb_model->block_size() == 0 and master) {
+    err << "model does not provide a valid block size (" << pb_model->block_size() << ")";
+    LBANN_ERROR(err.str());
+  }
+  El::SetBlocksize(pb_model->block_size());
+
+  // Change random seed if needed.
+  if (pb_model->random_seed() > 0) {
+    random_seed = pb_model->random_seed();
+    // Reseed here so that setup is done with this new seed.
+    init_random(random_seed);
+    init_data_seq_random(random_seed);
+  }
+  // Set up the communicator and get the grid based on the first model's spec.
+  // We do not currently support splitting different models in different ways,
+  // as this implies different grids.
+  int procs_per_trainer = pb_model->procs_per_trainer();
+  if (procs_per_trainer == 0) {
+    procs_per_trainer = comm->get_procs_in_world();
+  }
+  if (first_model) {
+    comm->split_trainers(procs_per_trainer);
+    if (pb_model->num_parallel_readers() > procs_per_trainer) {
+      pb_model->set_num_parallel_readers(procs_per_trainer);
     }
-    // Initialize models differently if needed.
+  } else if (procs_per_trainer != comm->get_procs_per_trainer()) {
+    LBANN_ERROR("Model prototexts requesting different procs per model is not supported");
+  }
+
+  // Initialize models differently if needed.
 #ifndef LBANN_DETERMINISTIC
-    if (pb_model->random_init_models_differently()) {
-      random_seed = random_seed + comm->get_trainer_rank();
-      // Reseed here so that setup is done with this new seed.
-      init_random(random_seed);
-      init_data_seq_random(random_seed);
-    }
+  if (pb_model->random_init_models_differently()) {
+    random_seed = random_seed + comm->get_trainer_rank();
+    // Reseed here so that setup is done with this new seed.
+    init_random(random_seed);
+    init_data_seq_random(random_seed);
+  }
 #else
-    if (pb_model->random_init_models_differently()) {
-      if (master) {
-        std::cout << "WARNING: Ignoring random_init_models_differently " <<
-          "due to sequential consistency" << std::endl;
-      }
+  if (pb_model->random_init_models_differently()) {
+    if (master) {
+      std::cout << "WARNING: Ignoring random_init_models_differently " <<
+        "due to sequential consistency" << std::endl;
     }
+  }
 #endif
 
-    // Set up the communicator and get the grid based on the first model's spec.
-    // We do not currently support splitting different models in different ways,
-    // as this implies different grids.
-    int procs_per_trainer = pb_model->procs_per_trainer();
-    if (procs_per_trainer == 0) {
-      procs_per_trainer = comm->get_procs_in_world();
-    }
-    if (first_model) {
-      comm->split_trainers(procs_per_trainer);
-      if (pb_model->num_parallel_readers() > procs_per_trainer) {
-        pb_model->set_num_parallel_readers(procs_per_trainer);
-      }
-    } else if (procs_per_trainer != comm->get_procs_per_trainer()) {
-      LBANN_ERROR("Model prototexts requesting different procs per model is not supported");
-    }
+  // Save info to file; this includes the complete prototext (with any over-rides
+  // from the cmd line) and various other info
+  save_session(*comm, argc, argv, pb);
 
-    // Save info to file; this includes the complete prototext (with any over-rides
-    // from the cmd line) and various other info
-    save_session(comm, argc, argv, pb);
+  // Report useful information
+  if (master) {
+    print_lbann_configuration(pb_model, comm, io_threads_per_process, io_threads_offset);
+  }
 
-    // Report useful information
-    if (master) {
-      print_lbann_configuration(pb_model, comm, io_threads_per_process, io_threads_offset);
-    }
+  // Display how the OpenMP threads are provisioned
+  if (opts->has_string("print_affinity")) {
+    display_omp_setup();
+  }
 
-    // Display how the OpenMP threads are provisioned
-    if (opts->has_string("print_affinity")) {
-      display_omp_setup();
-    }
+  // Update the index lists to accomodate multi-trainer / multi-model specification
+  customize_data_readers_index_list(*comm, pb);
 
-    // Initialize data readers
-    //@todo: code not in place for correctly handling image preprocessing
-    std::map<execution_mode, generic_data_reader *> data_readers;
-    bool is_shared_training_data_reader = pb_model->shareable_training_data_reader();
-    bool is_shared_testing_data_reader = pb_model->shareable_testing_data_reader();
-    if (opts->has_string("share_testing_data_readers")) {
-      is_shared_testing_data_reader = opts->get_bool("share_testing_data_readers");
-    }
-    init_data_readers(comm, pb, data_readers, is_shared_training_data_reader, is_shared_testing_data_reader);
-
-    // hack to prevent all data readers from loading identical data; instead,
-    // share a single copy. See data_reader_jag_conduit_hdf5 for example
-    if (first_model) {
-      if (opts->has_string("share_data_reader_data")) {
-        for (auto t : data_readers) {
-          opts->set_ptr((void*)t.second);
-        }
+  // Initialize data readers
+  //@todo: code not in place for correctly handling image preprocessing
+  std::map<execution_mode, generic_data_reader *> data_readers;
+  bool is_shared_training_data_reader = pb_model->shareable_training_data_reader();
+  bool is_shared_testing_data_reader = pb_model->shareable_testing_data_reader();
+  if (opts->has_string("share_testing_data_readers")) {
+    is_shared_testing_data_reader = opts->get_bool("share_testing_data_readers");
+  }
+  init_data_readers(comm, pb, data_readers, is_shared_training_data_reader, is_shared_testing_data_reader);
+
+  // hack to prevent all data readers from loading identical data; instead,
+  // share a single copy. See data_reader_jag_conduit_hdf5 for example
+  if (first_model) {
+    if (opts->has_string("share_data_reader_data")) {
+      for (auto&& t : data_readers) {
+        opts->set_ptr((void*)t.second);
       }
     }
+  }
 
-    // User feedback
-    print_parameters(comm, pb);
+  // User feedback
+  print_parameters(*comm, pb);
 
-    // Initalize model
-    model = proto::construct_model(comm,
-                                   data_readers,
-                                   pb.optimizer(),
-                                   pb.model());
-    model->setup(io_thread_pool);
+  // Initalize model
+  std::unique_ptr<model> ret_model{
+    proto::construct_model(comm,
+                           data_readers,
+                           pb.optimizer(),
+                           pb.model())
+  };
+  ret_model->setup(std::move(io_thread_pool));
 
-    if(opts->get_bool("disable_background_io_activity")) {
-      model->allow_background_io_activity(false);
-    }
+  if(opts->get_bool("disable_background_io_activity")) {
+    ret_model->allow_background_io_activity(false);
+  }
 
-    //under development; experimental
-    if (opts->has_bool("use_data_store") && opts->get_bool("use_data_store")) {
-      if (master) {
-        std::cout << "\nUSING DATA STORE!\n\n";
-      }
-      for (auto r : data_readers) {
-        if (!r.second) continue;
-        r.second->setup_data_store(model);
-      }
+  if (opts->get_bool("use_data_store") || opts->get_bool("preload_data_store")) {
+    if (master) {
+      std::cout << "\nUSING DATA STORE!\n\n";
     }
-
-    if (opts->has_string("create_tarball")) {
-      finalize(comm);
-      return 0;
+    for (auto&& r : data_readers) {
+      if (!r.second) continue;
+      r.second->setup_data_store(pb_model->mini_batch_size());
     }
+  }
 
-    // restart model from checkpoint if we have one
-    //@todo
-    //model->restartShared();
+  // restart model from checkpoint if we have one
+  //@todo
+  //model->restartShared();
 
-    if (comm->am_world_master()) {
-      std::cout << std::endl;
-      std::cout << model->get_description();
-      std::cout << "Callbacks:" << std::endl;
-      for (lbann_callback *cb : model->get_callbacks()) {
-        std::cout << cb->name() << std::endl;
-      }
+  if (comm->am_world_master()) {
+    std::cout << "\n"
+              << ret_model->get_description()
+              << "Callbacks:" << std::endl;
+    for (lbann_callback *cb : ret_model->get_callbacks()) {
+      std::cout << cb->name() << std::endl;
     }
+  }
 
 #ifndef LBANN_DETERMINISTIC
-      // Under normal conditions, reinitialize the random number generator so
-      // that regularization techniques (e.g. dropout) generate unique patterns
-      // on different ranks.
-      init_random(random_seed + comm->get_rank_in_world());
+  // Under normal conditions, reinitialize the random number generator so
+  // that regularization techniques (e.g. dropout) generate unique patterns
+  // on different ranks.
+  init_random(random_seed + comm->get_rank_in_world());
 #else
-      if(comm->am_world_master()) {
-        std::cout <<
-          "--------------------------------------------------------------------------------\n"
-          "ALERT: executing in sequentially consistent mode -- performance will suffer\n"
-          "--------------------------------------------------------------------------------\n";
-      }
-#endif
-
-  } catch (lbann_exception& e) {
-    El::mpi::Abort(El::mpi::COMM_WORLD, 1);
-  } catch (std::exception& e) {
-    El::ReportException(e);  // Elemental exceptions
+  if(comm->am_world_master()) {
+    std::cout <<
+      "--------------------------------------------------------------------------------\n"
+      "ALERT: executing in sequentially consistent mode -- performance will suffer\n"
+      "--------------------------------------------------------------------------------\n";
   }
-
-  return model;
+#endif
+  return ret_model;
 }
 
 void print_lbann_configuration(lbann_data::Model *pb_model, lbann_comm *comm, int io_threads_per_process, int io_threads_offset) {
   // Report hardware settings
   std::cout << "Hardware properties (for master process)" << std::endl
             << "  Processes on node          : " << comm->get_procs_per_node() << std::endl
+            << "  Total number of processes  : " << comm->get_procs_in_world() << std::endl
             << "  OpenMP threads per process : " << omp_get_max_threads() << std::endl
             << "  I/O threads per process (+offset) : " << io_threads_per_process
             << " (+" << io_threads_offset << ")" << std::endl;
diff --git a/src/utils/number_theory.cpp b/src/utils/number_theory.cpp
index 31e93b109fe..5e39d255ec9 100644
--- a/src/utils/number_theory.cpp
+++ b/src/utils/number_theory.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -33,7 +33,7 @@ namespace lbann {
 namespace number_theory {
 
 int prime(int n) {
-  if (n < 0) { 
+  if (n < 0) {
     std::stringstream err;
     err << "invalid index (" << n << ")";
     LBANN_ERROR(err.str());
@@ -64,7 +64,7 @@ int prime(int n) {
 }
 
 std::vector<int> prime_factors(int n) {
-  if (n < 2) { 
+  if (n < 2) {
     std::stringstream err;
     err << "invalid number to factorize (" << n << ")";
     LBANN_ERROR(err.str());
@@ -98,7 +98,7 @@ std::vector<int> balanced_factors(int n, int num_factors) {
 
   // Trivial case when n = 1
   if (n == 1) { return std::vector<int>(num_factors, 1); };
-  
+
   // Get prime factorization
   const auto& primes = prime_factors(n);
 
diff --git a/src/utils/omp_diagnostics.cpp b/src/utils/omp_diagnostics.cpp
index b95e9296208..77b6ce6a2bc 100644
--- a/src/utils/omp_diagnostics.cpp
+++ b/src/utils/omp_diagnostics.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/utils/options.cpp b/src/utils/options.cpp
index c7b03569b09..1bfb0814fbb 100644
--- a/src/utils/options.cpp
+++ b/src/utils/options.cpp
@@ -9,6 +9,7 @@
 #include <cstring>
 #include <cstdlib>
 #include <stdexcept>
+#include <algorithm>
 
 namespace lbann {
 
@@ -106,16 +107,15 @@ bool options::get_bool(std::string option, bool the_default)
 
 bool options::get_bool(std::string option)
 {
+  if (m_opts.find(option) != m_opts.end()) {
+    std::string s1 = m_opts[option];
+    std::transform(s1.begin(), s1.end(), s1.begin(), ::tolower);
+    if (s1 == "true") return true;
+    if (s1 == "false") return false;
+  }
   int result;
   if (!m_test_int(option, result)) {
     return false;
-    /*
-    std::stringstream err;
-    err << __FILE__ << " " << __LINE__
-        << " ::options::get_int() - failed to find option: " << option
-        << ", or to convert to int";
-    throw std::runtime_error(err.str());
-    */
   }
   if (result == 0) return false;
   return true;
@@ -245,13 +245,6 @@ bool options::has_int(std::string option)
   return false;
 }
 
-bool options::has_bool(std::string option)
-{
-  int test;
-  if (m_test_int(option, test)) return true;
-  return false;
-}
-
 bool options::has_string(std::string option) {
   std::string test;
   if (m_test_string(option, test)) return true;
diff --git a/src/utils/profiling.cpp b/src/utils/profiling.cpp
index 84c6c38cb75..cc237411778 100644
--- a/src/utils/profiling.cpp
+++ b/src/utils/profiling.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -35,11 +35,24 @@
 #include "nvToolsExtCuda.h"
 #include "nvToolsExtCudaRt.h"
 #include "cuda_runtime.h"
+#include "cuda_profiler_api.h"
+#include "lbann/utils/cuda.hpp"
 #endif
 
+namespace {
+bool profiling_started = false;
+}
+
 namespace lbann {
 
 #if defined(LBANN_SCOREP)
+void prof_start() {
+  profiling_started = true;
+  return;
+}
+void prof_stop() {
+  return;
+}
 void prof_region_begin(const char *s, int, bool) {
   SCOREP_USER_REGION_BY_NAME_BEGIN(s, SCOREP_USER_REGION_TYPE_COMMON);
   return;
@@ -49,29 +62,46 @@ void prof_region_end(const char *s, bool) {
   return;
 }
 #elif defined(LBANN_NVPROF)
+void prof_start() {
+  CHECK_CUDA(cudaProfilerStart());
+  profiling_started = true;
+}
+void prof_stop() {
+  CHECK_CUDA(cudaProfilerStop());
+  profiling_started = false;
+}
 void prof_region_begin(const char *s, int c, bool sync) {
+  if (!profiling_started) return;
   if (sync) {
     El::GPUManager::SynchronizeDevice();
   }
   // Doesn't work with gcc 4.9
   // nvtxEventAttributes_t ev = {0};
-  nvtxEventAttributes_t ev;  
+  nvtxEventAttributes_t ev;
   memset(&ev, 0, sizeof(nvtxEventAttributes_t));
-  ev.version = NVTX_VERSION;   
+  ev.version = NVTX_VERSION;
   ev.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
   ev.colorType = NVTX_COLOR_ARGB;
   ev.color = c;
-  ev.messageType = NVTX_MESSAGE_TYPE_ASCII;     
-  ev.message.ascii = s; 
+  ev.messageType = NVTX_MESSAGE_TYPE_ASCII;
+  ev.message.ascii = s;
   nvtxRangePushEx(&ev);
 }
 void prof_region_end(const char *, bool sync) {
+  if (!profiling_started) return;
   if (sync) {
     El::GPUManager::SynchronizeDevice();
   }
   nvtxRangePop();
 }
 #else
+void prof_start() {
+  profiling_started = true;
+  return;
+}
+void prof_stop() {
+  return;
+}
 void prof_region_begin(const char *, int, bool) {
   return;
 }
diff --git a/src/utils/protobuf_utils.cpp b/src/utils/protobuf_utils.cpp
index f6d225c39cf..f93b62609ed 100644
--- a/src/utils/protobuf_utils.cpp
+++ b/src/utils/protobuf_utils.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -34,12 +34,13 @@
  */
 
 namespace lbann {
+namespace protobuf_utils {
 
-void protobuf_utils::parse_prototext_filenames_from_command_line(
-               bool master,
-               int argc,
-               char **argv,
-               std::vector<prototext_fn_triple> &names) {
+std::vector<prototext_fn_triple>
+parse_prototext_filenames_from_command_line(
+               const bool master,
+               const int argc,
+               char * const argv[]) {
   std::vector<std::string> models;
   std::vector<std::string> optimizers;
   std::vector<std::string> readers;
@@ -111,7 +112,7 @@ void protobuf_utils::parse_prototext_filenames_from_command_line(
     }
   }
 
-  names.clear();
+  std::vector<prototext_fn_triple> names;
   for (size_t i=0; i<models.size(); i++) {
     prototext_fn_triple t;
     t.model = models[i];
@@ -138,63 +139,63 @@ void protobuf_utils::parse_prototext_filenames_from_command_line(
     }
     names.push_back(t);
   }
+  return names;
 }
 
-
-void protobuf_utils::read_in_prototext_files(
-                bool master,
-                std::vector<prototext_fn_triple> &names,
-                std::vector<lbann_data::LbannPB*> &models_out) {
-  models_out.clear();
-  for (auto t : names) {
-    lbann_data::LbannPB *pb = new lbann_data::LbannPB;
+std::vector<std::unique_ptr<lbann_data::LbannPB>>
+read_in_prototext_files(
+  const bool master,
+  const std::vector<prototext_fn_triple> &names)
+{
+  std::vector<std::unique_ptr<lbann_data::LbannPB>> models_out;
+  for (auto const& t : names) {
+    auto pb = make_unique<lbann_data::LbannPB>();
     if (t.model != "none")
-      read_prototext_file(t.model.c_str(), *pb, master);
+      read_prototext_file(t.model, *pb, master);
     if (t.reader != "none") {
       lbann_data::LbannPB p;
-      read_prototext_file(t.reader.c_str(), p, master);
+      read_prototext_file(t.reader, p, master);
       pb->MergeFrom(p);
     }
     if (t.data_set_metadata != "none") {
       lbann_data::LbannPB p;
-      read_prototext_file(t.data_set_metadata.c_str(), p, master);
+      read_prototext_file(t.data_set_metadata, p, master);
       pb->MergeFrom(p);
     }
     if (t.optimizer != "none") {
       lbann_data::LbannPB p;
-      read_prototext_file(t.optimizer.c_str(), p, master);
+      read_prototext_file(t.optimizer, p, master);
       pb->MergeFrom(p);
     }
-    models_out.push_back(pb);
+    models_out.emplace_back(std::move(pb));
   }
+  return models_out;
 }
 
-void protobuf_utils::load_prototext(
-                const bool master,
-                const int argc,
-                char **argv,
-                std::vector<lbann_data::LbannPB *> &models_out) {
-    std::vector<prototext_fn_triple> names;
-    parse_prototext_filenames_from_command_line(master, argc, argv, names);
-    read_in_prototext_files(master, names, models_out);
-    if (models_out.size() == 0) {
-      if (master) {
-        std::stringstream err;
-        err << __FILE__ << __LINE__ << " :: "
-            << " failed to load any prototext files";
-        throw lbann_exception(err.str());
-      }
-    }
-    verify_prototext(master, models_out);
+std::vector<std::unique_ptr<lbann_data::LbannPB>>
+load_prototext(
+  const bool master,
+  const int argc,
+  char* const argv[])
+{
+  auto names = parse_prototext_filenames_from_command_line(master, argc, argv);
+  auto models_out = read_in_prototext_files(master, names);
+  if (models_out.size() == 0 && master) {
+    LBANN_ERROR("Failed to load any prototext files");
+  }
+  verify_prototext(master, models_out);
+  return models_out;
 }
 
-void protobuf_utils::verify_prototext(bool master, const std::vector<lbann_data::LbannPB *> &models) {
+void verify_prototext(
+  const bool master,
+  const std::vector<std::unique_ptr<lbann_data::LbannPB>> &models) {
   if (master) {
     std::cout << "protobuf_utils::verify_prototext; starting verify for " << models.size() << " models\n";
   }
   for (size_t j=0; j<models.size(); j++) {
     bool is_good = true;
-    lbann_data::LbannPB *t = models[j];
+    lbann_data::LbannPB *t = models[j].get();
     if (! t->has_data_reader()) {
       is_good = false;
       if (master) {
@@ -242,5 +243,5 @@ void protobuf_utils::verify_prototext(bool master, const std::vector<lbann_data:
   }
 }
 
-
-}  // namespace lbann
+}// namespace protobuf_utils
+}// namespace lbann
diff --git a/src/utils/random.cpp b/src/utils/random.cpp
index 929772172ad..799bf06d43e 100644
--- a/src/utils/random.cpp
+++ b/src/utils/random.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -27,6 +27,7 @@
 #include <omp.h>
 #include "lbann/utils/random.hpp"
 #include "lbann/io/file_io.hpp"
+#include <thread>
 
 namespace {
 #ifdef __ICC
@@ -35,9 +36,6 @@ lbann::rng_gen generator;
 
 lbann::fast_rng_gen fast_generator;
 #pragma omp threadprivate(fast_generator)
-
-lbann::rng_gen data_seq_generator;
-#pragma omp threadprivate(data_seq_generator)
 #else
 // Random number generator, file-visible only.
 // Defined like this to work around a GCC problem with threadprivate objects:
@@ -49,11 +47,19 @@ lbann::rng_gen generator;
 extern lbann::fast_rng_gen fast_generator;
 #pragma omp threadprivate(fast_generator)
 lbann::fast_rng_gen fast_generator;
-
-extern lbann::rng_gen data_seq_generator;
-#pragma omp threadprivate(data_seq_generator)
-lbann::rng_gen data_seq_generator;
 #endif
+
+thread_local lbann::rng_gen data_seq_generator;
+thread_local bool data_seq_generator_inited = false;
+int data_seq_generator_seed_base = 0;
+
+thread_local lbann::rng_gen io_generator;
+thread_local bool io_generator_inited = false;
+int io_generator_seed_base = 0;
+
+thread_local lbann::fast_rng_gen fast_io_generator;
+thread_local bool fast_io_generator_inited = false;
+int fast_io_generator_seed_base = 0;
 }
 
 namespace lbann {
@@ -67,17 +73,43 @@ fast_rng_gen& get_fast_generator() {
 }
 
 rng_gen& get_data_seq_generator() {
+  if (!::data_seq_generator_inited) {
+    ::data_seq_generator.seed(::data_seq_generator_seed_base);
+    ::data_seq_generator_inited = true;
+  }
   return ::data_seq_generator;
 }
 
+rng_gen& get_io_generator() {
+  if (!::io_generator_inited) {
+    std::hash<std::thread::id> h;
+    ::io_generator.seed((::io_generator_seed_base << 8) |
+                        h(std::this_thread::get_id()));
+    ::io_generator_inited = true;
+  }
+  return ::io_generator;
+}
+
+fast_rng_gen& get_fast_io_generator() {
+  if (!::fast_io_generator_inited) {
+    std::hash<std::thread::id> h;
+    ::fast_io_generator.seed((::fast_io_generator_seed_base << 8) |
+                             h(std::this_thread::get_id()));
+    ::fast_io_generator_inited = true;
+  }
+  return ::fast_io_generator;
+}
+
 bool save_rng_to_checkpoint_shared(persist& p, const lbann_comm* comm) {
   std::string dirname = std::string(p.m_checkpoint_dir) + "/rng_state";
   makedir(dirname.c_str());
   std::string rng_name;
 
+  /// @todo - Note that the RNG with thread local data is not correct
   rng_name = dirname + "/rng_seq_generator";
   std::ofstream rng_seq(rng_name);
   rng_seq << ::data_seq_generator;
+
 #ifdef LBANN_SET_EL_RNG
   rng_name = dirname + "/EL_generator";
   std::ofstream rng_EL(rng_name);
@@ -89,8 +121,18 @@ bool save_rng_to_checkpoint_shared(persist& p, const lbann_comm* comm) {
     rank_in_world = std::to_string(El::mpi::Rank(El::mpi::COMM_WORLD));
   } else {
     rank_in_world = std::to_string(comm->get_rank_in_world());
-
   }
+
+  /// @todo - Note that the RNG with thread local data is not correct
+  rng_name = dirname + "/rng_io_generator_" + rank_in_world;
+  std::ofstream rng_io(rng_name);
+  rng_io << ::io_generator;
+
+  /// @todo - Note that the RNG with thread local data is not correct
+  rng_name = dirname + "/rng_fast_io_generator_" + rank_in_world;
+  std::ofstream rng_fast_io(rng_name);
+  rng_fast_io << ::fast_io_generator;
+
 #ifdef _OPENMP
   #pragma omp parallel private(rng_name)
   {
@@ -120,9 +162,11 @@ bool load_rng_from_checkpoint_shared(persist& p, const lbann_comm* comm) {
   std::string dirname = std::string(p.m_checkpoint_dir) + "/rng_state";
   std::string rng_name;
 
+  /// @todo - Note that the RNG with thread local data is not correct
   rng_name = dirname + "/rng_seq_generator";
   std::ifstream rng_seq(rng_name);
   rng_seq >> ::data_seq_generator;
+
 #ifdef LBANN_SET_EL_RNG
   rng_name = dirname + "/EL_generator";
   std::ifstream rng_EL(rng_name);
@@ -136,6 +180,16 @@ bool load_rng_from_checkpoint_shared(persist& p, const lbann_comm* comm) {
     rank_in_world = std::to_string(comm->get_rank_in_world());
   }
 
+  /// @todo - Note that the RNG with thread local data is not correct
+  rng_name = dirname + "/rng_io_generator_" + rank_in_world;
+  std::ifstream rng_io(rng_name);
+  rng_io >> ::io_generator;
+
+  /// @todo - Note that the RNG with thread local data is not correct
+  rng_name = dirname + "/rng_fast_io_generator_" + rank_in_world;
+  std::ifstream rng_fast_io(rng_name);
+  rng_fast_io >> ::fast_io_generator;
+
 #ifdef _OPENMP
   #pragma omp parallel private(rng_name)
   {
@@ -199,6 +253,8 @@ void init_random(int seed, lbann_comm *comm) {
     El::Generator().seed(rand_val);
 #endif
   }
+
+  init_io_random(seed);
 }
 
 void init_data_seq_random(int seed) {
@@ -208,16 +264,25 @@ void init_data_seq_random(int seed) {
     seed = rd();
   }
 
-  // Seed every OpenMP thread, if present.
-  // Note: Threadprivate OMP variables don't work with dynamic threads.
-#ifdef _OPENMP
-  #pragma omp parallel
-  {
-    get_data_seq_generator().seed(seed);
+  ::data_seq_generator_seed_base = seed;
+  /// Reset the init flag so that generator will reinitialize
+  ::data_seq_generator_inited = false;
+}
+
+void init_io_random(int seed) {
+  if (seed == -1) {
+    // Seed with a random value.
+    std::random_device rd;
+    seed = rd();
   }
-#else
-  get_data_seq_generator().seed(seed);
-#endif
+
+  ::io_generator_seed_base = seed;
+  /// Reset the init flag so that generator will reinitialize
+  ::io_generator_inited = false;
+
+  ::fast_io_generator_seed_base = seed;
+  /// Reset the init flag so that generator will reinitialize
+  ::fast_io_generator_inited = false;
 }
 
 void gaussian_fill(AbsDistMat& mat, El::Int m, El::Int n, DataType mean,
diff --git a/src/utils/stack_profiler.cpp b/src/utils/stack_profiler.cpp
index 3530532c07c..16b6628bf9c 100644
--- a/src/utils/stack_profiler.cpp
+++ b/src/utils/stack_profiler.cpp
@@ -84,11 +84,11 @@ void stack_profiler::activate(int thread) {
   c_hash_thread_id = thread;
   options *opts = options::get();
 
-  if (opts->has_bool("st_on") and opts->get_bool("st_on")) {
+  if (opts->get_bool("st_on")) {
     std::cerr << "creating hash table!\n";
     c_hash_create(10000);
     c_hash_profiling_is_turned_on = 1;
-    if (opts->has_bool("st_full_trace") and opts->get_bool("st_full_trace")) {
+    if (opts->get_bool("st_full_trace")) {
       m_full_stack_trace = true;
       if (m_thread_id == 0) {
         c_hash_fp_full_stack_trace = fopen("full_stack_trace.bin", "wb");
diff --git a/src/utils/stack_trace.cpp b/src/utils/stack_trace.cpp
index eb57c1621f3..bb6a3a2be91 100644
--- a/src/utils/stack_trace.cpp
+++ b/src/utils/stack_trace.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -74,10 +74,10 @@ std::string get() {
     ss << std::endl;
   }
   std::free(symbols);
-  
+
   return ss.str();
 }
-  
+
 namespace {
 
 /** Get human-readable description of signal. */
@@ -119,19 +119,19 @@ std::string signal_description(int signal) {
   SIGNAL_CASE(SIGURG,  "high bandwidth data is available at a socket");
   SIGNAL_CASE(SIGXCPU, "CPU time limit exceeded");
   SIGNAL_CASE(SIGXFSZ, "file size limit exceeded");
-#undef SIGNAL_CASE  
+#undef SIGNAL_CASE
 
   // Construct signal description
   std::stringstream ss;
   ss << "signal " << signal;
   if (!desc.empty()) { ss << " (" << desc << ")"; }
   return ss.str();
-  
+
 }
 
 /** Base name for stack trace output file. */
 std::string stack_trace_file_base = "";
-  
+
 /** Signal handler.
  *  Output signal name and stack trace to standard error and to a file
  *  (if desired).
@@ -158,7 +158,7 @@ void handle_signal(int signal) {
 
   // Terminate program
   El::mpi::Abort(El::mpi::COMM_WORLD, 1);
-  
+
 }
 
 } // namespace
@@ -181,8 +181,8 @@ void register_signal_handler(std::string file_base) {
   for (const auto& signal : fatal_signals) {
     sigaction(signal, &sa, nullptr);
   }
-  
+
 }
 
-} //namespace stack_trace 
+} //namespace stack_trace
 } //namespace lbann
diff --git a/src/utils/statistics.cpp b/src/utils/statistics.cpp
index 72135e14a79..f4ec27d3f0f 100644
--- a/src/utils/statistics.cpp
+++ b/src/utils/statistics.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -412,8 +412,8 @@ void columnwise_covariance(const AbsDistMat& data1,
     }
     local_covs(0, col) = sum;
   }
-  AllReduce(covs, covs.RedundantComm(), El::mpi::SUM);
-  local_covs *= DataType(1) / height;
+  El::AllReduce(covs, covs.RedundantComm(), El::mpi::SUM);
+  El::Scale(DataType(1) / height, local_covs);
 
 }
 
diff --git a/src/utils/summary.cpp b/src/utils/summary.cpp
index edb3642adbd..fddb0446882 100644
--- a/src/utils/summary.cpp
+++ b/src/utils/summary.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/utils/threads/thread_utils.cpp b/src/utils/threads/thread_utils.cpp
index db4a4756bdf..1c1cf18165e 100644
--- a/src/utils/threads/thread_utils.cpp
+++ b/src/utils/threads/thread_utils.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/utils/unit_test/CMakeLists.txt b/src/utils/unit_test/CMakeLists.txt
new file mode 100644
index 00000000000..3578391b3a4
--- /dev/null
+++ b/src/utils/unit_test/CMakeLists.txt
@@ -0,0 +1,8 @@
+set_full_path(_DIR_LBANN_CATCH2_TEST_FILES
+  any_test.cpp
+  factory_test.cpp
+  type_erased_matrix_test.cpp
+  )
+
+set(LBANN_CATCH2_TEST_FILES
+  "${LBANN_CATCH2_TEST_FILES}" "${_DIR_LBANN_CATCH2_TEST_FILES}" PARENT_SCOPE)
diff --git a/src/utils/unit_test/any_test.cpp b/src/utils/unit_test/any_test.cpp
new file mode 100644
index 00000000000..2b7178727ce
--- /dev/null
+++ b/src/utils/unit_test/any_test.cpp
@@ -0,0 +1,114 @@
+// MUST include this
+#include <catch2/catch.hpp>
+
+// File being tested
+#include <lbann/utils/any.hpp>
+
+#include <memory>
+#include <numeric>
+#include <vector>
+
+namespace
+{
+struct base { virtual ~base() = default; };
+struct derived : base {};
+}// namespace <anon>
+
+TEST_CASE ("Testing the type-erased \"any\" class", "[any][utilities]")
+{
+  SECTION ("Default-constructing an \"any\" object")
+  {
+    lbann::utils::any empty_any;
+    lbann::utils::any* null_any_ptr = nullptr;
+    REQUIRE_FALSE(empty_any.has_value());
+    REQUIRE(lbann::utils::any_cast<int>(&empty_any) == nullptr);
+    REQUIRE(lbann::utils::any_cast<int>(null_any_ptr) == nullptr);
+  }
+
+  SECTION ("Storing a double in an \"any\" object")
+  {
+    lbann::utils::any eight_as_double(8.0);
+    REQUIRE(eight_as_double.has_value());
+    REQUIRE_NOTHROW(lbann::utils::any_cast<double>(eight_as_double));
+    REQUIRE(lbann::utils::any_cast<double>(eight_as_double) == 8.0);
+    REQUIRE_THROWS_AS(lbann::utils::any_cast<int>(eight_as_double),
+                      lbann::utils::bad_any_cast);
+
+    REQUIRE(eight_as_double.type() == typeid(double));
+    REQUIRE_FALSE(eight_as_double.type() == typeid(int));
+
+    eight_as_double.reset();
+    REQUIRE(eight_as_double.type() == typeid(void));
+    REQUIRE_FALSE(eight_as_double.has_value());
+  }
+
+  SECTION ("Storing a vector of ints in an \"any\" object")
+  {
+    lbann::utils::any int_vec_as_any(std::vector<int>(10));
+
+    REQUIRE(int_vec_as_any.has_value());
+    REQUIRE_NOTHROW(lbann::utils::any_cast<std::vector<int>&>(int_vec_as_any));
+
+    auto& vec = lbann::utils::any_cast<std::vector<int>&>(int_vec_as_any);
+    std::iota(vec.begin(),vec.end(),0);
+    REQUIRE(lbann::utils::any_cast<std::vector<int>&>(int_vec_as_any)[5] == 5);
+
+    REQUIRE_THROWS_AS(lbann::utils::any_cast<std::vector<double>>(int_vec_as_any),
+                      lbann::utils::bad_any_cast);
+
+    REQUIRE(int_vec_as_any.type() == typeid(std::vector<int>));
+    REQUIRE_FALSE(int_vec_as_any.type() == typeid(int[]));
+
+    int_vec_as_any.reset();
+    REQUIRE_FALSE(int_vec_as_any.has_value());
+  }
+
+  SECTION ("Storing a derived type as pointer-to-base in \"any\" object")
+  {
+    lbann::utils::any derived_as_base_any(std::shared_ptr<base>{new derived});
+
+    REQUIRE(derived_as_base_any.has_value());
+    REQUIRE_NOTHROW(
+      lbann::utils::any_cast<std::shared_ptr<base>&>(derived_as_base_any));
+
+    REQUIRE_THROWS_AS(
+      lbann::utils::any_cast<std::shared_ptr<derived>&>(derived_as_base_any),
+      lbann::utils::bad_any_cast);
+
+    derived_as_base_any.reset();
+    REQUIRE_FALSE(derived_as_base_any.has_value());
+  }
+
+  SECTION ("Storing a derived type in \"any\" object")
+  {
+    lbann::utils::any derived_as_any(std::make_shared<derived>());
+
+    REQUIRE(derived_as_any.has_value());
+    REQUIRE_NOTHROW(
+      lbann::utils::any_cast<std::shared_ptr<derived>&>(derived_as_any));
+
+    REQUIRE_THROWS_AS(
+      lbann::utils::any_cast<std::shared_ptr<base>&>(derived_as_any),
+      lbann::utils::bad_any_cast);
+
+    derived_as_any.reset();
+    REQUIRE_FALSE(derived_as_any.has_value());
+  }
+
+  SECTION ("Storing a \"shared_ptr<derived>\" and change to \"double\"")
+  {
+    lbann::utils::any my_any(std::make_shared<derived>());
+
+    REQUIRE(my_any.has_value());
+    REQUIRE_NOTHROW(
+      lbann::utils::any_cast<std::shared_ptr<derived>&>(my_any));
+
+    // Change to double
+    REQUIRE(my_any.emplace<double>(10.0) == 10.0);
+    REQUIRE(lbann::utils::any_cast<double>(&my_any) != nullptr);
+    REQUIRE(
+      lbann::utils::any_cast<std::shared_ptr<derived>>(&my_any) == nullptr);
+    my_any.reset();
+    REQUIRE_FALSE(my_any.has_value());
+  }
+}
diff --git a/src/utils/unit_test/factory_test.cpp b/src/utils/unit_test/factory_test.cpp
new file mode 100644
index 00000000000..d118e72030a
--- /dev/null
+++ b/src/utils/unit_test/factory_test.cpp
@@ -0,0 +1,155 @@
+// Be sure to include this!
+#include <catch2/catch.hpp>
+
+// The code being tested
+#include <lbann/utils/factory.hpp>
+
+// Other includes
+#include <lbann/utils/memory.hpp>
+
+namespace
+{
+struct widget_base {
+    virtual ~widget_base() = default;
+};
+struct widget : widget_base {};
+struct gizmo : widget_base {};
+}
+
+enum class generic_key
+{
+  INVALID,
+  WIDGET,
+  GIZMO
+};
+
+template <typename T> struct Key;
+
+template <>
+struct Key<std::string>
+{
+  static std::string get(generic_key key)
+  {
+    switch (key)
+    {
+    case generic_key::WIDGET:
+      return "widget";
+    case generic_key::GIZMO:
+      return "gizmo";
+    case generic_key::INVALID:
+      return "invalid";
+    }
+    return "";
+  }
+};
+
+template <>
+struct Key<int>
+{
+  static int get(generic_key key) noexcept
+  {
+    return static_cast<int>(key);
+  }
+};
+
+// This tests factories keyed with strings and ints. BDD-style
+// nomenclature is used inside the test case.
+TEMPLATE_TEST_CASE(
+  "testing the factory class", "[factory][utilities]", std::string, int)
+{
+  using widget_factory
+    = lbann::generic_factory<widget_base,TestType>;
+  using key = Key<TestType>;
+
+  GIVEN("an object factory")
+  {
+    widget_factory factory;
+
+    WHEN("Two new builders are registered")
+    {
+      factory.register_builder(
+        key::get(generic_key::WIDGET),[]()
+        {
+          return std::unique_ptr<widget_base>(
+            lbann::make_unique<widget>());
+        });
+
+      factory.register_builder(
+        key::get(generic_key::GIZMO),[]()
+        {
+          return std::unique_ptr<widget_base>(
+            lbann::make_unique<gizmo>());
+        });
+
+      THEN("The factory knows about two builders")
+      {
+        auto names = factory.get_registered_keys();
+        REQUIRE(std::distance(names.begin(), names.end()) == 2UL);
+      }
+      AND_WHEN("A builder is added with an existing key")
+      {
+        factory.register_builder(
+          key::get(generic_key::GIZMO),[]()
+          {
+            return std::unique_ptr<widget_base>(
+              lbann::make_unique<gizmo>());
+          });
+
+        THEN("The factory still knows about only two factories")
+        {
+          auto names = factory.get_registered_keys();
+          REQUIRE(std::distance(names.begin(), names.end()) == 2UL);
+        }
+      }
+
+      AND_WHEN("A new object is requested with a valid key")
+      {
+        auto obj = factory.create_object(key::get(generic_key::WIDGET));
+
+        THEN("The returned object is the right type.")
+        {
+          widget* obj_ptr = dynamic_cast<widget*>(obj.get());
+          REQUIRE(obj_ptr != nullptr);
+        }
+      }
+
+      AND_WHEN("A new object is requested with with an invalid key")
+      {
+        THEN("An exception is thrown.")
+        {
+          std::unique_ptr<widget_base> obj;
+          REQUIRE_THROWS_AS(
+            obj = factory.create_object(key::get(generic_key::INVALID)),
+            lbann::exception);
+        }
+      }
+
+      AND_WHEN("A key is removed")
+      {
+        auto success = factory.unregister(key::get(generic_key::WIDGET));
+        THEN("The number of known factories has decreased.")
+        {
+          REQUIRE(success == true);
+          auto names = factory.get_registered_keys();
+          REQUIRE(std::distance(names.begin(), names.end()) == 1UL);
+        }
+
+        THEN("The remaining key is still valid.")
+        {
+          auto obj = factory.create_object(key::get(generic_key::GIZMO));
+          gizmo* obj_ptr = dynamic_cast<gizmo*>(obj.get());
+          REQUIRE(obj_ptr != nullptr);
+        }
+
+        THEN("An exception is thrown when trying to create an "
+             "object with a removed key.")
+        {
+          std::unique_ptr<widget_base> obj;
+          REQUIRE_THROWS_AS(
+            obj = factory.create_object(key::get(generic_key::WIDGET)),
+            lbann::exception);
+        }
+      }
+    }
+  }
+}
diff --git a/src/utils/unit_test/type_erased_matrix_test.cpp b/src/utils/unit_test/type_erased_matrix_test.cpp
new file mode 100644
index 00000000000..37fc7e1d1ce
--- /dev/null
+++ b/src/utils/unit_test/type_erased_matrix_test.cpp
@@ -0,0 +1,121 @@
+// MUST include this
+#include <catch2/catch.hpp>
+
+// File being tested
+#include <lbann/utils/type_erased_matrix.hpp>
+
+// Other includes
+#include <El.hpp>
+
+namespace
+{
+template <typename SrcT, typename TgtT>
+struct TypePair
+{
+  using src_type = SrcT;
+  using tgt_type = TgtT;
+};
+}// namespace <Anon>
+
+TEMPLATE_PRODUCT_TEST_CASE(
+  "Testing type-erase Matrix","[type-erase][la][utilities]",
+  (TypePair),
+  ((int, float), (int, double),
+   (float, int), (float, double),
+   (double, int), (double,float)))
+{
+  using src_type = typename TestType::src_type;
+  using tgt_type = typename TestType::tgt_type;
+
+  GIVEN("A type-erased matrix")
+  {
+    auto mat = lbann::utils::create_type_erased_matrix<src_type>();
+
+    THEN ("the internal matrix has the correct storage type")
+    {
+      REQUIRE_NOTHROW(mat->template get<src_type>());
+      REQUIRE_THROWS_AS(mat->template get<tgt_type>(),
+                        lbann::utils::bad_any_cast);
+
+      auto&& internal_mat = mat->template get<src_type>();
+      REQUIRE(internal_mat.Height() == 0);
+      REQUIRE(internal_mat.Width() == 0);
+    }
+
+    WHEN ("The matrix is resized")
+    {
+      REQUIRE_NOTHROW(mat->template get<src_type>().Resize(10,12));
+
+      THEN ("The change is reflected in the internal matrix.")
+      {
+        auto&& internal_mat = mat->template get<src_type>();
+        REQUIRE(internal_mat.Height() == 10);
+        REQUIRE(internal_mat.Width() == 12);
+      }
+      AND_WHEN ("The matrix is changed")
+      {
+        REQUIRE_NOTHROW(mat->template emplace<tgt_type>(14,10));
+
+        THEN ("The internal matrix has the right type and size")
+        {
+          REQUIRE_NOTHROW(mat->template get<tgt_type>());
+          REQUIRE_THROWS_AS(mat->template get<src_type>(),
+                            lbann::utils::bad_any_cast);
+
+          REQUIRE(mat->template get<tgt_type>().Height() == 14);
+          REQUIRE(mat->template get<tgt_type>().Width() == 10);
+        }
+      }
+    }
+  }
+
+  GIVEN("A matrix of a given type")
+  {
+    El::Matrix<src_type> mat(10,12);
+    mat(1,1) = src_type(13);
+
+    WHEN("A type-erased matrix is constructed by copying it")
+    {
+      lbann::utils::type_erased_matrix erased_mat(mat);
+      THEN("The type-erased matrix is a copy")
+      {
+        REQUIRE(erased_mat.template get<src_type>().Height() == 10);
+        REQUIRE(erased_mat.template get<src_type>().Width() == 12);
+        REQUIRE(
+          erased_mat.template get<src_type>().operator()(1,1) == mat(1,1));
+
+        AND_WHEN("The original matrix is resized")
+        {
+          mat.Resize(5,5);
+          THEN("The type-erased matrix is unaffected.")
+          {
+            REQUIRE(erased_mat.template get<src_type>().Height() == 10);
+            REQUIRE(erased_mat.template get<src_type>().Width() == 12);
+          }
+        }
+      }
+    }
+
+    WHEN("A type-erased matrix is constructed by moving it")
+    {
+      lbann::utils::type_erased_matrix erased_mat(std::move(mat));
+      THEN("The type-erased matrix is sized correctly and has good values")
+      {
+        REQUIRE(erased_mat.template get<src_type>().Height() == 10);
+        REQUIRE(erased_mat.template get<src_type>().Width() == 12);
+        REQUIRE(
+          erased_mat.template get<src_type>().operator()(1,1) == src_type(13));
+
+        AND_WHEN("The original matrix is resized")
+        {
+          mat.Resize(5,5);
+          THEN("The type-erased matrix is unaffected.")
+          {
+            REQUIRE(erased_mat.template get<src_type>().Height() == 10);
+            REQUIRE(erased_mat.template get<src_type>().Width() == 12);
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/src/weights/initializer.cpp b/src/weights/initializer.cpp
index bdf13768142..01c49c72db3 100644
--- a/src/weights/initializer.cpp
+++ b/src/weights/initializer.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/weights/variance_scaling_initializers.cpp b/src/weights/variance_scaling_initializers.cpp
index 14cb11fdafd..278a81f5a11 100644
--- a/src/weights/variance_scaling_initializers.cpp
+++ b/src/weights/variance_scaling_initializers.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/weights/weights.cpp b/src/weights/weights.cpp
index 6b4c2d7557c..ee5fa776e0c 100644
--- a/src/weights/weights.cpp
+++ b/src/weights/weights.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -95,7 +95,7 @@ weights::weights(const weights& other)
   m_optimizer.reset(other.m_optimizer ?
                     other.m_optimizer->copy() : nullptr);
   if (m_optimizer != nullptr) {
-    m_optimizer->set_weights(*this);
+    m_optimizer->set_weights(this);
   }
 
 }
@@ -115,7 +115,7 @@ weights& weights::operator=(const weights& other) {
   m_optimizer.reset(other.m_optimizer ?
                     other.m_optimizer->copy() : nullptr);
   if (m_optimizer != nullptr) {
-    m_optimizer->set_weights(*this);
+    m_optimizer->set_weights(this);
   }
 
   return *this;
@@ -288,7 +288,7 @@ void weights::setup() {
 
   // Setup optimizer
   if (m_optimizer != nullptr) {
-    m_optimizer->setup(*this);
+    m_optimizer->setup(this);
   }
 
 }
@@ -397,7 +397,7 @@ void weights::set_value(DataType value, int row, int col) {
 void weights::reconcile_values() {
   auto& values = get_values();
   if (values.RedundantSize() > 1) {
-    values *= DataType(1) / values.RedundantSize();
+    El::Scale(DataType(1) / values.RedundantSize(), values);
     m_comm->allreduce(values, values.RedundantComm());
   }
 }
@@ -405,7 +405,7 @@ void weights::reconcile_values() {
 void weights::reconcile_values(Al::request& req) {
   auto& values = get_values();
   if (values.RedundantSize() > 1) {
-    values *= DataType(1) / values.RedundantSize();
+    El::Scale(DataType(1) / values.RedundantSize(), values);
     m_comm->nb_allreduce(values, values.RedundantComm(), req);
   }
 }
diff --git a/superbuild/conduit/CMakeLists.txt b/superbuild/conduit/CMakeLists.txt
index 3ca0a89e152..61128d47e3e 100644
--- a/superbuild/conduit/CMakeLists.txt
+++ b/superbuild/conduit/CMakeLists.txt
@@ -31,7 +31,7 @@ option(CONDUIT_BUILD_SHARED_LIBS
 
 # CONDUIT-specific configuration options explicitly exposed
 option(CONDUIT_ENABLE_MPI "Enable MPI support in CONDUIT." ON)
-option(CONDUIT_ENABLE_PYTHON "Enable CONDUIT Python module support." ON)
+option(CONDUIT_ENABLE_PYTHON "Enable CONDUIT Python module support." OFF)
 option(CONDUIT_ENABLE_TESTS "Build CONDUIT unit tests." OFF)
 option(CONDUIT_ENABLE_FORTRAN "Build CONDUIT with fortran support" OFF)
 
diff --git a/superbuild/cub/CMakeLists.txt b/superbuild/cub/CMakeLists.txt
index a503314ffe5..84af8a4ec71 100644
--- a/superbuild/cub/CMakeLists.txt
+++ b/superbuild/cub/CMakeLists.txt
@@ -9,7 +9,7 @@ else ()
     CACHE STRING "The URL from which to clone CUB.")
 endif ()
 
-set(CUB_TAG "1.5.2" CACHE STRING "The git tag or hash to checkout for CUB")
+set(CUB_TAG "1.8.0" CACHE STRING "The git tag or hash to checkout for CUB")
 
 # Where to install CUB
 set(CUB_CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}"
@@ -29,7 +29,8 @@ ExternalProject_Add(CUB
   PATCH_COMMAND
   ${CMAKE_COMMAND} -E copy_if_different
   ${CMAKE_CURRENT_SOURCE_DIR}/CUBCMakeLists.txt
-  ${CMAKE_CURRENT_BINARY_DIR}/src/CMakeLists.txt
+  ${CMAKE_CURRENT_BINARY_DIR}/src/CMakeLists.txt &&
+  patch -p1 < ${LBANN_SRC_DIR}/external/cub/cub_enable_alloc_free_logging.patch
   INSTALL_DIR ${CUB_CMAKE_INSTALL_PREFIX}
   USES_TERMINAL_BUILD 1
   LOG_DOWNLOAD 1
diff --git a/superbuild/lbann/CMakeLists.txt b/superbuild/lbann/CMakeLists.txt
index 437691bec8f..fd1310769bd 100644
--- a/superbuild/lbann/CMakeLists.txt
+++ b/superbuild/lbann/CMakeLists.txt
@@ -62,7 +62,7 @@ if (TARGET ALUMINUM)
 
     list(APPEND _LBANN_DEPENDS ALUMINUM)
   endif (LBANN_WITH_ALUMINUM)
-endif (TARGET Aluminum)
+endif (TARGET ALUMINUM)
 if (TARGET CEREAL)
   list(APPEND _LBANN_DEPENDS CEREAL)
   set(LBANN_SB_FWD_LBANN_CEREAL_DIR "${CEREAL_DIR}")
diff --git a/include/lbann/data_store/data_store_triplet.hpp b/test_numpy_conduit_cache.cpp
similarity index 52%
rename from include/lbann/data_store/data_store_triplet.hpp
rename to test_numpy_conduit_cache.cpp
index 5c004f06722..79928889029 100644
--- a/include/lbann/data_store/data_store_triplet.hpp
+++ b/test_numpy_conduit_cache.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -25,44 +25,44 @@
 //
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef __DATA_STORE_TRIPLET_HPP__
-#define __DATA_STORE_TRIPLET_HPP__
+#include "lbann_config.hpp"
 
-#include "lbann/data_store/data_store_multi_images.hpp"
+#ifdef LBANN_HAS_CONDUIT
 
-namespace lbann {
+#include "conduit/conduit.hpp"
+#include "conduit/conduit_relay.hpp"
+#include "conduit/conduit_relay_io_hdf5.hpp"
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <string>
+#include <sstream>
+#include "lbann/lbann.hpp"
+#include "lbann/utils/jag_utils.hpp"
+#include "lbann/data_readers/numpy_conduit_cache.hpp"
 
-/**
- * todo
- */
+using namespace lbann;
 
-class data_store_triplet : public data_store_multi_images {
- public:
+int main(int argc, char *argv[]) {
+  int random_seed = lbann_default_random_seed;
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
+  bool master = comm->am_world_master();
 
-  //! ctor
-  data_store_triplet(generic_data_reader *reader, model *m) :
-    data_store_multi_images(reader, m) {
-    set_name("data_store_triplet");
-  }
-
-  //! copy ctor
-  data_store_triplet(const data_store_triplet&) = default;
-
-  //! operator=
-  data_store_triplet& operator=(const data_store_triplet&) = default;
-
-  data_store_triplet * copy() const override { return new data_store_triplet(*this); }
+  try {
 
-  //! dtor
-  ~data_store_triplet() override {};
+  numpy_conduit_cache n(comm.get());
+  n.load("/g/g10/hysom/test.npz", 42);
 
-  void setup() override;
-
- protected :
-
-  std::vector<std::string> get_sample(size_t idx) const override;
-};
+  } catch (std::exception const &e) {
+    if (master) std::cerr << "caught exception: " << e.what() << "\n";
+    return EXIT_FAILURE;
+  } catch (...) {
+    std::cerr << "unknown exception in main\n";
+    return EXIT_FAILURE;
+  }
 
-}  // namespace lbann
+  // Clean up
+  return EXIT_SUCCESS;
+}
 
-#endif  // __DATA_STORE_TRIPLET_HPP__
+#endif //#ifdef LBANN_HAS_CONDUIT
diff --git a/tests/test_distributed_io_mnist.sh b/tests/test_distributed_io_mnist.sh
deleted file mode 100755
index 3c84976b2af..00000000000
--- a/tests/test_distributed_io_mnist.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/sh
-
-# You can submit this with something like:
-# sbatch -Abrain -N16 --enable-hyperthreads -t 1440 --clear-ssd --msr-safe --output="slurm-lbann-<scriptname>-test-%j.out" tests/<scriptname>.sh
-# and can include the working directory with:
-# --workdir=/g/g19/vanessen/DeepLearning/lbann.git/lbann_v0.2/examples 
-
-#SBATCH --time=1440
-
-TESTDIR=`dirname $0`
-DIRNAME=`dirname $TESTDIR`
-
-FULLSCRIPT=.
-# Figure out which cluster we are on
-CLUSTER=`hostname | sed 's/\([a-zA-Z][a-zA-Z]*\)[0-9]*/\1/g'`
-# Look for the binary in the cluster specific build directory
-SCRIPT="build/${CLUSTER}.llnl.gov/model_zoo/lbann"
-
-if [ -e "${DIRNAME}/${SCRIPT}" ] ; then
-    FULLSCRIPT="${DIRNAME}/${SCRIPT}"
-elif [ ! -z "$SLURM_SUBMIT_DIR" ] ; then
-  if [ -e "${SLURM_SUBMIT_DIR}/${SCRIPT}" ] ; then
-      FULLSCRIPT="${SLURM_SUBMIT_DIR}/${SCRIPT}"
-  fi
-fi
-
-echo "Executing script $0 -> ${SLURM_JOB_NAME}"
-echo "Clearing /l/ssd for batch execution"
-srun -N${SLURM_NNODES} --clear-ssd hostname
-
-MAX_MB=300
-STD_OPTS="--model=../model_zoo/tests/model_mnist_distributed_io.prototext --reader=../model_zoo/data_readers/data_reader_mnist.prototext --optimizer=../model_zoo/optimizers/opt_adagrad.prototext"
-echo "################################################################################"
-for b in 300 150 100 75 60 50; do
-  for k in 1 2 3 4 5 6; do
-    CMD="srun -n$((${k}*${MAX_MB}/${b})) ${FULLSCRIPT} ${STD_OPTS} --mini_batch_size=${b} --num_epochs=5 --procs_per_model=${k}"
-    echo "${CMD}"
-    ${CMD}
-    echo "################################################################################"
-  done
-done
diff --git a/tests/test_shuffled_indices.cpp b/tests/test_shuffled_indices.cpp
index 8bc9239512c..d260a341f01 100644
--- a/tests/test_shuffled_indices.cpp
+++ b/tests/test_shuffled_indices.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -33,31 +33,30 @@ using namespace lbann;
 
 int mini_batch_size = 128;
 
-void test_is_shuffled(generic_data_reader *reader, bool is_shuffled, const char *msg = nullptr);
+void test_is_shuffled(const generic_data_reader &reader, bool is_shuffled, const char *msg = nullptr);
 
 int main(int argc, char *argv[]) {
   int random_seed = lbann_default_random_seed;
-  lbann_comm *comm = initialize(argc, argv, random_seed);
-  bool master = comm->am_world_master();
+  world_comm_ptr comm = initialize(argc, argv, random_seed);
+  const bool master = comm->am_world_master();
 
   try {
     // Initialize options db (this parses the command line)
     options *opts = options::get();
     opts->init(argc, argv);
     if (opts->has_string("h") or opts->has_string("help") or argc == 1) {
-      print_help(comm);
-      finalize(comm);
-      return 0;
+      print_help(*comm);
+      return EXIT_SUCCESS;
     }
 
     //read data_reader prototext file
     if (not opts->has_string("fn")) {
-      std::stringstream err;
-      err << __FILE__ << " " << __LINE__ << " :: "
-          << "you must run with: --fn=<string> where <string> is\n"
-          << "a data_reader prototext filePathName\n";
-      throw lbann_exception(err.str());
+      std::cerr << __FILE__ << " " << __LINE__ << " :: "
+                << "you must run with: --fn=<string> where <string> is\n"
+                << "a data_reader prototext filePathName\n";
+      return EXIT_FAILURE;
     }
+
     lbann_data::LbannPB pb;
     std::string reader_fn(opts->get_string("fn").c_str());
     read_prototext_file(reader_fn.c_str(), pb, master);
@@ -67,53 +66,48 @@ int main(int argc, char *argv[]) {
     for (int j=0; j<size; j++) {
       const lbann_data::Reader& readme = d_reader.reader(j);
       if (readme.role() == "train") {
-        generic_data_reader *reader;
-
         bool shuffle = true;
-        reader = new mnist_reader(shuffle);
+        auto reader = make_unique<mnist_reader>(shuffle);
+
         if (readme.data_filename() != "") { reader->set_data_filename( readme.data_filename() ); }
         if (readme.label_filename() != "") { reader->set_label_filename( readme.label_filename() ); }
         if (readme.data_filedir() != "") { reader->set_file_dir( readme.data_filedir() ); }
         reader->load();
-        test_is_shuffled(reader, true, "TEST #1");
-        delete reader;
+        test_is_shuffled(*reader, true, "TEST #1");
 
         //test: indices should not be shuffled; same as previous, except we call
         //      shuffle(true);
         shuffle = false;
-        reader = new mnist_reader(shuffle);
+        reader = make_unique<mnist_reader>(shuffle);
         if (readme.data_filename() != "") { reader->set_data_filename( readme.data_filename() ); }
         if (readme.label_filename() != "") { reader->set_label_filename( readme.label_filename() ); }
         if (readme.data_filedir() != "") { reader->set_file_dir( readme.data_filedir() ); }
         reader->set_shuffle(shuffle);
         reader->load();
-        test_is_shuffled(reader, false, "TEST #2");
-        delete reader;
+        test_is_shuffled(*reader, false, "TEST #2");
 
         //test: indices should not be shuffled, due to ctor argument
         shuffle = false;
-        reader = new mnist_reader(shuffle);
+        reader = make_unique<mnist_reader>(shuffle);
         if (readme.data_filename() != "") { reader->set_data_filename( readme.data_filename() ); }
         if (readme.label_filename() != "") { reader->set_label_filename( readme.label_filename() ); }
         if (readme.data_filedir() != "") { reader->set_file_dir( readme.data_filedir() ); }
         reader->load();
-        test_is_shuffled(reader, false, "TEST #3");
-        delete reader;
+        test_is_shuffled(*reader, false, "TEST #3");
 
         //test: set_shuffled_indices; indices should not be shuffled
         shuffle = true;
-        reader = new mnist_reader(shuffle);
+        reader = make_unique<mnist_reader>(shuffle);
         if (readme.data_filename() != "") { reader->set_data_filename( readme.data_filename() ); }
         if (readme.label_filename() != "") { reader->set_label_filename( readme.label_filename() ); }
         if (readme.data_filedir() != "") { reader->set_file_dir( readme.data_filedir() ); }
         reader->load();
         //at this point the indices should be shuffled (same as first test)
-        test_is_shuffled(reader, true, "TEST #4");
+        test_is_shuffled(*reader, true, "TEST #4");
         std::vector<int> indices(mini_batch_size);
         std::iota(indices.begin(), indices.end(), 0);
         reader->set_shuffled_indices(indices);
-        test_is_shuffled(reader, false, "TEST #5");
-        delete reader;
+        test_is_shuffled(*reader, false, "TEST #5");
 
         break;
       }
@@ -121,15 +115,14 @@ int main(int argc, char *argv[]) {
 
   } catch (lbann_exception& e) {
     e.print_report();
-    El::mpi::Abort(El::mpi::COMM_WORLD, 1);
+    return EXIT_FAILURE;
   }
 
-  finalize(comm);
-  return 0;
+  return EXIT_SUCCESS;
 }
 
-void test_is_shuffled(generic_data_reader *reader, bool is_shuffled, const char *msg) {
-  const std::vector<int> &indices = reader->get_shuffled_indices();
+void test_is_shuffled(const generic_data_reader &reader, bool is_shuffled, const char *msg) {
+  const std::vector<int> &indices = reader.get_shuffled_indices();
   std::cerr << "\nstarting test_is_suffled; mini_batch_size: " << mini_batch_size
             << " indices.size(): " << indices.size();
   if (msg) {
diff --git a/tests/test_stack_tracing/test_lbann_exception_tracing.cpp b/tests/test_stack_tracing/test_lbann_exception_tracing.cpp
index 8ab22a97546..634182a54f8 100644
--- a/tests/test_stack_tracing/test_lbann_exception_tracing.cpp
+++ b/tests/test_stack_tracing/test_lbann_exception_tracing.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tests/test_stack_tracing/test_sigint_tracing.cpp b/tests/test_stack_tracing/test_sigint_tracing.cpp
index c7b6ca5e5ab..bdf6526a8eb 100644
--- a/tests/test_stack_tracing/test_sigint_tracing.cpp
+++ b/tests/test_stack_tracing/test_sigint_tracing.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -74,7 +74,7 @@ int main(int argc, char *argv[]) {
     options *opts = options::get();
     opts->init(argc, argv);
 
-    //must be called after opts->init(); must also specify "--catch-signals" 
+    //must be called after opts->init(); must also specify "--catch-signals"
     //on cmd line
     stack_trace::register_handler();
 
diff --git a/tests/test_stack_tracing/test_sigsev_tracing.cpp b/tests/test_stack_tracing/test_sigsev_tracing.cpp
index 5ccdb29be67..e6e1d153454 100644
--- a/tests/test_stack_tracing/test_sigsev_tracing.cpp
+++ b/tests/test_stack_tracing/test_sigsev_tracing.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -74,7 +74,7 @@ int main(int argc, char *argv[]) {
     options *opts = options::get();
     opts->init(argc, argv);
 
-    //must be called after opts->init(); must also specify "--catch-signals" 
+    //must be called after opts->init(); must also specify "--catch-signals"
     //on cmd line
     stack_trace::register_handler();
 
diff --git a/tools/compute_mean/compute_mean.cpp b/tools/compute_mean/compute_mean.cpp
index 5b24467e529..a5533cfa903 100644
--- a/tools/compute_mean/compute_mean.cpp
+++ b/tools/compute_mean/compute_mean.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/file_utils.hpp b/tools/compute_mean/file_utils.hpp
index 37fa89cae00..fadda6826bd 100644
--- a/tools/compute_mean/file_utils.hpp
+++ b/tools/compute_mean/file_utils.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/image_list.cpp b/tools/compute_mean/image_list.cpp
index e1b0b5e90b6..26bd744bbc7 100644
--- a/tools/compute_mean/image_list.cpp
+++ b/tools/compute_mean/image_list.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/image_list.hpp b/tools/compute_mean/image_list.hpp
index eba7477cb2c..99e15210396 100644
--- a/tools/compute_mean/image_list.hpp
+++ b/tools/compute_mean/image_list.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/lbann/base.hpp b/tools/compute_mean/lbann/base.hpp
index 0e97ec8a77b..8a942ffd58b 100644
--- a/tools/compute_mean/lbann/base.hpp
+++ b/tools/compute_mean/lbann/base.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////xecu
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/lbann/utils/exception.hpp b/tools/compute_mean/lbann/utils/exception.hpp
index 238cc38eea5..b46b1d6d5e0 100644
--- a/tools/compute_mean/lbann/utils/exception.hpp
+++ b/tools/compute_mean/lbann/utils/exception.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/lbann/utils/random.cpp b/tools/compute_mean/lbann/utils/random.cpp
index 1ee547ad85a..21092142969 100644
--- a/tools/compute_mean/lbann/utils/random.cpp
+++ b/tools/compute_mean/lbann/utils/random.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/lbann/utils/random.hpp b/tools/compute_mean/lbann/utils/random.hpp
index da2a80b3b6d..05012751658 100644
--- a/tools/compute_mean/lbann/utils/random.hpp
+++ b/tools/compute_mean/lbann/utils/random.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/mean_image.cpp b/tools/compute_mean/mean_image.cpp
index 9ab70d68a80..fe1d471313f 100644
--- a/tools/compute_mean/mean_image.cpp
+++ b/tools/compute_mean/mean_image.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/mean_image.hpp b/tools/compute_mean/mean_image.hpp
index 2b4a65ebba0..d0b2beee4c7 100644
--- a/tools/compute_mean/mean_image.hpp
+++ b/tools/compute_mean/mean_image.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/mpi_states.cpp b/tools/compute_mean/mpi_states.cpp
index 9a63561e124..8ddb587aa39 100644
--- a/tools/compute_mean/mpi_states.cpp
+++ b/tools/compute_mean/mpi_states.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/mpi_states.hpp b/tools/compute_mean/mpi_states.hpp
index fd85895f22a..f4c7b9d94e5 100644
--- a/tools/compute_mean/mpi_states.hpp
+++ b/tools/compute_mean/mpi_states.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/params.cpp b/tools/compute_mean/params.cpp
index be345851c95..5ee3bd21033 100644
--- a/tools/compute_mean/params.cpp
+++ b/tools/compute_mean/params.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/params.hpp b/tools/compute_mean/params.hpp
index 1617896df26..66e171e9714 100644
--- a/tools/compute_mean/params.hpp
+++ b/tools/compute_mean/params.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/process_images.cpp b/tools/compute_mean/process_images.cpp
index d642f5dd7b5..480ae3f5dfd 100644
--- a/tools/compute_mean/process_images.cpp
+++ b/tools/compute_mean/process_images.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/process_images.hpp b/tools/compute_mean/process_images.hpp
index 7f08cc75816..f6173a67588 100644
--- a/tools/compute_mean/process_images.hpp
+++ b/tools/compute_mean/process_images.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/text_read.hpp b/tools/compute_mean/text_read.hpp
index 3377c52f039..bfe1f69217d 100644
--- a/tools/compute_mean/text_read.hpp
+++ b/tools/compute_mean/text_read.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/uniform_mean.cpp b/tools/compute_mean/uniform_mean.cpp
index 5203b6dcc04..3b360d18d06 100644
--- a/tools/compute_mean/uniform_mean.cpp
+++ b/tools/compute_mean/uniform_mean.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -71,7 +71,7 @@ cv::Scalar reduce_image(const cv::Mat image) {
 
 /**
  * Scale the mean pixel to the depth of the output image with the adjustment for rounding.
- * Convert to the unit scale if to_unit_scale is set when in_depth is of integral and 
+ * Convert to the unit scale if to_unit_scale is set when in_depth is of integral and
  * out_depth is of float.
  */
 cv::Scalar scale_pixel(const cv::Scalar pixel_m, const int in_depth, const int out_depth) {
diff --git a/tools/compute_mean/walltimes.cpp b/tools/compute_mean/walltimes.cpp
index fae81b6ede5..5ffef72bc96 100644
--- a/tools/compute_mean/walltimes.cpp
+++ b/tools/compute_mean/walltimes.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/compute_mean/walltimes.hpp b/tools/compute_mean/walltimes.hpp
index 36f969b3133..0b7487d56a6 100644
--- a/tools/compute_mean/walltimes.hpp
+++ b/tools/compute_mean/walltimes.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/tools/mnist/.gitignore b/tools/mnist/.gitignore
new file mode 100644
index 00000000000..2c9503dc69d
--- /dev/null
+++ b/tools/mnist/.gitignore
@@ -0,0 +1,2 @@
+*.npy
+*.npz
diff --git a/tools/mnist/mnist_to_npy_and_npz.py b/tools/mnist/mnist_to_npy_and_npz.py
new file mode 100755
index 00000000000..f57adc1112d
--- /dev/null
+++ b/tools/mnist/mnist_to_npy_and_npz.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+
+"""
+Convert the MNIST training/test datasets into .npy and .npz files.
+
+The generated files contain the following tensors:
+* train.npy: shape=(60000, 785), dtype=np.float32
+* test.npy:  shape=(10000, 785), dtype=np.float32
+* train_int16.npz:
+   * "data":   shape=(60000, 784), dtype=np.int16
+   * "labels": shape=(60000, 1),   dtype=np.int32
+* test_int16.npz:
+   * "data":   shape=(10000, 784), dtype=np.int16
+   * "labels": shape=(10000, 1),   dtype=np.int32
+
+{train,test}.npy can be used for numpy_reader.
+{train,test}_int16.npz can be used for numpy_npz_reader.
+"""
+
+import numpy as np
+import argparse
+import os
+
+IMAGE_WIDTH = 28
+
+def convert_mnist_to_np_and_npz(imagePath, labelPath,
+                                imageMagicNumber, labelMagicNumber,
+                                out, int16):
+    with open(imagePath, "rb") as f:
+        imageBin = f.read()
+
+    assert imageMagicNumber == np.frombuffer(imageBin[ 0: 4], dtype=">u4")[0]
+    imageCount              =  np.frombuffer(imageBin[ 4: 8], dtype=">u4")[0]
+    assert IMAGE_WIDTH      == np.frombuffer(imageBin[ 8:12], dtype=">u4")[0]
+    assert IMAGE_WIDTH      == np.frombuffer(imageBin[12:16], dtype=">u4")[0]
+    pixels                  =  np.frombuffer(imageBin[16:], dtype=">u1") \
+                                 .reshape([imageCount, IMAGE_WIDTH*IMAGE_WIDTH])
+
+    with open(labelPath, "rb") as f:
+        labelBin = f.read()
+
+    assert labelMagicNumber == np.frombuffer(labelBin[ 0: 4], dtype=">u4")[0]
+    assert imageCount       == np.frombuffer(labelBin[ 4: 8], dtype=">u4")[0]
+    labels                  =  np.frombuffer(labelBin[8:], dtype=">u1") \
+                                 .reshape([imageCount, 1])
+
+    pixels = pixels.astype(np.float32) / 255.0
+    labels = labels.astype(np.int32)
+
+    npy = np.concatenate((pixels, labels.astype(np.float32)), axis=1)
+
+    if int16:
+        pixels = (pixels * 0x7FFF).astype(np.int16)
+
+    np.save("{}.npy".format(out), npy)
+    np.savez(
+        "{}{}.npz".format(out, "_int16" if int16 else ""),
+        data=pixels,
+        labels=labels)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Convert the MNIST training/test datasets into .npy and .npz files.",
+        epilog="Usage: ./mnist_to_npy_and_npz.py path/to/mnist/directory")
+    parser.add_argument(
+        "mnist_dir", type=str,
+        help="Path to a directory containing the MNIST dataset (decompressed binary files)")
+    parser.add_argument(
+        "--int16",
+        dest="int16", action="store_const",
+        const=True, default=True,
+        help="Convert the image data into int16 (each pixel is multiplied by 0x7FFFF)")
+    args = parser.parse_args()
+
+    convert_mnist_to_np_and_npz(
+        os.path.join(args.mnist_dir, "train-images-idx3-ubyte"),
+        os.path.join(args.mnist_dir, "train-labels-idx1-ubyte"),
+        2051, 2049,
+        "train",
+        args.int16)
+    convert_mnist_to_np_and_npz(
+        os.path.join(args.mnist_dir, "t10k-images-idx3-ubyte"),
+        os.path.join(args.mnist_dir, "t10k-labels-idx1-ubyte"),
+        2051, 2049,
+        "test",
+        args.int16)
diff --git a/unit_test/CMakeLists.txt b/unit_test/CMakeLists.txt
new file mode 100644
index 00000000000..1368eafc110
--- /dev/null
+++ b/unit_test/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Add the sequential test main() function
+add_executable(seq-catch-tests
+  SequentialCatchMain.cpp "${LBANN_CATCH2_TEST_FILES}")
+target_link_libraries(seq-catch-tests PRIVATE lbann Catch2::Catch2)
+
+catch_discover_tests(seq-catch-tests)
+
+# Add the parallel test main() function -- TODO
diff --git a/unit_test/SequentialCatchMain.cpp b/unit_test/SequentialCatchMain.cpp
new file mode 100644
index 00000000000..4ed06df1f7b
--- /dev/null
+++ b/unit_test/SequentialCatchMain.cpp
@@ -0,0 +1,2 @@
+#define CATCH_CONFIG_MAIN
+#include <catch2/catch.hpp>
diff --git a/viz/.gitignore b/viz/.gitignore
deleted file mode 100644
index 17f6207b207..00000000000
--- a/viz/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-graph.dot
-graph.pdf
diff --git a/viz/README b/viz/README
deleted file mode 100644
index fe61ea34a67..00000000000
--- a/viz/README
+++ /dev/null
@@ -1,4 +0,0 @@
-run: $viz.py
-with no arguments for usage
-
-see the "examples" directory for command lines and outputs
diff --git a/viz/examples/README b/viz/examples/README
deleted file mode 100644
index 7c322e6d713..00000000000
--- a/viz/examples/README
+++ /dev/null
@@ -1,33 +0,0 @@
-The following cmds were run in the parent (viz) directory
-
-
-
-$ viz.py ../model_zoo/models/char_rnn/model_char_rnn.prototext prop=properties_rect.txt brief=1 ranksep=.7 output=examples/rnn_1
-$ viz.py ../model_zoo/models/char_rnn/model_char_rnn.prototext prop=properties_rect.txt brief=1 ranksep=.7 output=examples/rnn_1 output=jpg
-
-  output: rnn_1.pdf, rnn_1.jpg
-  notes:
-    linked layers are enclosed by dotted rectangles
-    ranksep=.7 increases readability (IMO)
-
-$ viz.py ../model_zoo/models/char_rnn/model_char_rnn.prototext prop=properties_rect.txt brief=1 output=examples/rnn_1a
-
-  output: rnn_1a.pdf
-  notes: didn't specify nodesep=.7; harder to interpret (IMO)
-
-$ viz.py ../model_zoo/models/char_rnn/model_char_rnn.prototext prop=properties_rect.txt ranksep=.7 output=examples/rnn_2
-
-  output: rnn_2.pdf
-  notes: same as above, but print layer names as well as types
-
-$ viz.py ../model_zoo/models/char_rnn/model_char_rnn.prototext prop=properties_rect.txt full=1 ranksep=.7 output=examples/rnn_3
-$ viz.py ../model_zoo/models/char_rnn/model_char_rnn.prototext prop=properties_rect.txt full=1 ranksep=.7 output=examples/rnn_3 format=jpg
-
-  output: rnn_3.pdf, rnn_3.jpg
-  notes:  'full=1' prints all layer attributes
-
-$ viz.py ../model_zoo/models/char_rnn/model_char_rnn.prototext ranksep=.7 output=examples/rnn_4
-
-  output: rnn_3.pdf
-  notes: didn't specify properties file, so uses the default 'properties.txt'
-
diff --git a/viz/examples/rnn_1.jpg b/viz/examples/rnn_1.jpg
deleted file mode 100644
index e38df945bc8..00000000000
Binary files a/viz/examples/rnn_1.jpg and /dev/null differ
diff --git a/viz/examples/rnn_1.pdf b/viz/examples/rnn_1.pdf
deleted file mode 100644
index cefd0eb762b..00000000000
Binary files a/viz/examples/rnn_1.pdf and /dev/null differ
diff --git a/viz/examples/rnn_1a.pdf b/viz/examples/rnn_1a.pdf
deleted file mode 100644
index 8363ea3acb8..00000000000
Binary files a/viz/examples/rnn_1a.pdf and /dev/null differ
diff --git a/viz/examples/rnn_2.pdf b/viz/examples/rnn_2.pdf
deleted file mode 100644
index 7509de3d9df..00000000000
Binary files a/viz/examples/rnn_2.pdf and /dev/null differ
diff --git a/viz/examples/rnn_3.jpg b/viz/examples/rnn_3.jpg
deleted file mode 100644
index 1331675f22d..00000000000
Binary files a/viz/examples/rnn_3.jpg and /dev/null differ
diff --git a/viz/examples/rnn_3.pdf b/viz/examples/rnn_3.pdf
deleted file mode 100644
index 55ea77a836c..00000000000
Binary files a/viz/examples/rnn_3.pdf and /dev/null differ
diff --git a/viz/examples/rnn_4.pdf b/viz/examples/rnn_4.pdf
deleted file mode 100644
index 45ae6edf53e..00000000000
Binary files a/viz/examples/rnn_4.pdf and /dev/null differ
diff --git a/viz/layer.py b/viz/layer.py
deleted file mode 100644
index c5467a4f9d5..00000000000
--- a/viz/layer.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import pprint
-
-class Layer :
-  def __init__(self, a) :
-    self._layer = self.__getLayer(a)
-    self._parents = []
-    self._children = []
-    self._linked_layers = []
-    self._attr = []
-    for line in self._layer :
-      if line.find('name:') != -1 :
-        t = line.split()
-        self._name = t[1][1:-1]
-    for line in self._layer :
-      if line.find('{') != -1 :
-        t = line.split()
-        self._type = t[0]
-        j = self._type.find('{')
-        if j != -1 :
-          self._type = self._type[:j]
-    for line in self._layer :
-      if line.find('parents:') != -1 :
-        t = line.replace('"', '')
-        t = t.split()
-        self._parents = t[1:]
-    for line in self._layer :
-      if line.find('children:') != -1 :
-        t = line.replace('"', '')
-        t = t.split()
-        self._children = t[1:]
-    for line in self._layer :
-      if line.find('linked_layers:') != -1 :
-        t = line.replace('"', '')
-        t = t.split()
-        self._linked_layers = t[1:]
-    start =  0
-    end = 0
-    for j in range(1, len(self._layer)) :
-      if self._layer[j].find('{') != -1 :
-        start = j+1
-      if self._layer[j].find('}') != -1 :
-        end = j
-        break
-    self._attr = []
-    for a in self._layer[start:end] :
-      b = a.strip()
-      if b.find('weight_initialization') != -1 :
-        b = b.replace('weight_initialization', 'weight_init')
-      self._attr.append(b)
-    '''
-    if self.name().find('sum') != -1 :
-      self.printme()
-      print 'parents:', self.parents()
-      #exit(0)
-    '''
-
-  def __getLayer(self, a) :
-    '''for internal use'''
-    r = []
-    n = 0
-    for j in range(len(a)) :
-      r.append(a[j][:-1])
-      if a[j].find('}') != -1 :
-        n += 1
-        if n == 2 :
-          break
-    return r
-
-  def name(self) :
-    return self._name
-
-  def setParents(self, layer) :
-    self._parents = [layer.name()]
-
-  def parents(self) :
-    return self._parents
-
-  def children(self) :
-    return self._children
-
-  def type(self) :
-    return self._type
-
-  def linkedLayers(self) :
-    return self._linked_layers
-
-  def attributes(self) :
-    return self._attr
-
-  def printme(self) :
-    pprint.pprint(self._layer)
diff --git a/viz/properties.py b/viz/properties.py
deleted file mode 100644
index b18ed486c10..00000000000
--- a/viz/properties.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import pprint
-
-class properties :
-  def __init__(self, fn) :
-    a = open(fn).readlines()
-    shapes = {}
-    colors = {}
-    arrows = {}
-    self._layers = {}
-    for j in range(len(a)) :
-      if a[j].find('shapes_and_colors') != -1 :
-        k = j+1
-        while len(a[k]) > 3 :
-          t = a[k].split()
-          shapes[t[0]] = t[1]
-          colors[t[0]] = t[2]
-          arrows[t[0]] = t[3]
-          k += 1
-    for j in range(len(a)) :
-      if a[j].find('layer_names_and_overrides') != -1 :
-        k = j+1
-        while k < len(a) and len(a[k]) > 3 :
-          t = a[k].split()
-          layer_type = t[0]
-          layer_name = t[1]
-          self._layers[layer_name] = [shapes[layer_type], colors[layer_type], arrows[layer_type]]
-          if len(t) > 2 :
-            for i in t[2:] :
-              i = i.strip()
-              t2 = i.split('=')
-              if t2[0] == 'shape' : self._layers[layer_name][0] = t2[1]
-              if t2[0] == 'color' : self._layers[layer_name][1] = t2[1]
-              if t2[0] == 'arrow' : self._layers[layer_name][2] = t2[1]
-          k += 1
-
-  def shape(self, name) :
-    if not self._layers.has_key(name) :
-      return 'rect'
-      '''
-      print 'shape(): Nothing known about this layer:', name
-      print 'Please check your properties file'
-      print
-      exit(0)
-      '''
-    return self._layers[name][0]
-
-  def color(self, name) :
-    if not self._layers.has_key(name) :
-      return 'grey'
-      '''
-      print 'color(): Nothing known about this layer:', name
-      print 'Please check your properties file'
-      print
-      exit(0)
-      '''
-    return self._layers[name][1]
-
-  def arrow(self, name) :
-    if not self._layers.has_key(name) :
-      return 'grey'
-      '''
-      print 'arrow(): Nothing known about this layer:', name
-      print 'Please check your properties file'
-      print
-      exit(0)
-      '''
-    return self._layers[name][2]
diff --git a/viz/properties.txt b/viz/properties.txt
deleted file mode 100644
index 809e986997c..00000000000
--- a/viz/properties.txt
+++ /dev/null
@@ -1,62 +0,0 @@
-#
-# shapes: https://graphviz.gitlab.io/_pages/doc/info/shapes.html
-# color names: http://www.graphviz.org/doc/info/colors.html
-#
-# I found this tutorial useful: http://tonyballantyne.com/graphs.html
-
-# default shapes, colors, and arrows
-#
-shapes_and_colors  black
-learning      doubleoctagon  chartreuse      black
-activations   house          orchid1         black
-io            house          sienna2         black
-regularizers  rect           cornflowerblue  black
-transform     rect           antiquewhite1   red
-
-#leave at least one blank line after each section!
-#white space separating fields on each line is arbitrary
-
-layer_names_and_overrides
-learning     convolution             shape=tripleoctagon   color=chartreuse3
-learning     fully_connected
-learning     deconvolution           shape=octogon
-io           input
-activations  elu
-activations  identity
-activations  leaky_relu
-activations  log_sigmoid
-activations  log_softmax
-activations  relu
-activations  selu
-activations  sigmoid
-activations  softmax
-activations  softplus
-activations  softsign
-regularizers batch_normalization
-regularizers local_response_normalization
-regularizers dropout
-regularizers selu_dropout
-transform    reshape
-transform    pooling
-transform    concatenation
-transform    slice
-transform    split arrow=red
-transform    sum
-transform    unpooling
-transform    hadamard
-transform    zero
-transform    transform
-transform    constant
-transform    reduction
-transform    evaluation
-transform    gaussian
-transform    bernoulli
-transform    uniform
-transform    zero
-transform    noise
-transform    crop
-transform    categorical_random
-transform    discrete_random
-transform    stop_gradient
-transform    max
-transform    min
diff --git a/viz/properties_rect.txt b/viz/properties_rect.txt
deleted file mode 100644
index 3f16a27322a..00000000000
--- a/viz/properties_rect.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-#
-# shapes: http://www.graphviz.org/content/node-shapes
-# color names: http://www.graphviz.org/doc/info/colors.html
-#
-# I found this tutorial useful: http://tonyballantyne.com/graphs.html
-
-# default shapes, colors, and arrows
-#
-shapes_and_colors
-learning rectangle chartreuse black
-activations rectangle orchid1 black
-io rectangle sienna2 black
-regularizers rectangle cornflowerblue black
-transform rectangle antiquewhite1 black
-
-#leave at least one blank line after each section!
-#white space separating fields on each line is arbitrary
-
-layer_names_and_overrides
-learning     convolution             shape=tripleoctagon   color=chartreuse3
-learning     fully_connected
-learning     deconvolution           shape=octogon
-learning     fully_connected_cuda
-io           input
-activations  softmax
-activations  elu
-activations  identity
-activations  leaky_relu
-activations  relu
-activations  sigmoid
-activations  smooth_relu
-activations  softplus
-activations  selu
-activations  tanh
-regularizers batch_normalization
-regularizers local_response_normalization
-regularizers dropout
-regularizers selu_dropout
-transform    reshape
-transform    pooling
-transform    concatenation
-transform    slice
-transform    split
-transform    sum
-transform    unpooling
-transform    hadamard
-transform    gaussian
-transform    bernoulli
-transform    uniform
diff --git a/viz/viz.py b/viz/viz.py
deleted file mode 100755
index 647b50ffe68..00000000000
--- a/viz/viz.py
+++ /dev/null
@@ -1,224 +0,0 @@
-#!/usr/bin/python
-from sys import *
-import os
-import pprint
-from properties import *
-from layer import *
-
-usage = '''
-usage: %s model_fn.prototext [output=<string>] [format=<string>] [prop=<string>] [full=1] [brief=1] [ranksep=<double>]
-
-where: "output" is the output file basename; default is "graph"
-
-       "format" refers to the output file; default is pdf, so the default
-       output file is "graph.pdf" You can find a list of other formats at:
-       http://www.graphviz.org/content/output-formats or just try your
-       favorite (gif, png, jpg, etc) -- it's probably supported!
-       Note: some formats may take a while to render, so be patient.
-
-       "prop" is the name of the properties file; default is "properties.txt"
-       The properties file is a simple text file that lists colors and
-       shapes for the various layer types
-
-       if "full=1" is present, all layer attributes are printed (e.g,
-       num_neurons, has_bias, etc). The default is to print only the
-       layer type and layer name
-
-       if "brief=1", only the nodes' layer types are printed
-
-       use "ranksep=<double> to increase of decrease the verticle distance
-       between nodes. Hint: start with "ranksep=.75" and adjust up or down
-       from there
-
-note: the ordering of the optional params doesn't matter
-
-note: in addition to the output file, an intermediate file called
-      'graph.dot' will be written
-''' % argv[0]
-
-
-#=====================================================================
-def parsePrototext(fn) :
-  '''returns a list of Layers'''
-  a = open(fn).readlines()
-  r = []
-  for j in range(len(a)) :
-    if (a[j].find('layer {') != -1 or a[j].find('layer{') != -1) and a[j].find('#') == -1 :
-      r.append(Layer(a[j:]))
-  return r
-
-#=====================================================================
-
-if len(argv) < 2 :
-  print usage
-  exit(9)
-
-#parse cmd line
-output_fn = "graph"
-output_format = "pdf"
-prop_fn = "properties.txt"
-full = False
-brief = False
-ranksep=0
-for j in range(2, len(argv)) :
-  t = argv[j].split('=')
-  if t[0] == 'output' :
-    output_fn = t[1]
-  elif t[0] == 'format' :
-    output_format = t[1]
-  elif t[0] == 'prop' :
-    prop_fn = t[1]
-  elif t[0] == 'full' :
-    full = True
-  elif t[0] == 'brief' :
-    brief = True
-  elif t[0] == 'ranksep' :
-    ranksep = float(t[1])
-  else :
-    print 'badly formed or unknown cmd line option:', argv[j]
-    print '================================================================'
-    print
-    print usage
-    exit(9)
-
-#=====================================================================
-def fixSequentialParents(layers) :
-  '''a hack for models that don't contain parent and children fields'''
-  num_layers_with_parents = 0
-  num_layers_with_children = 0
-  for layer in layers :
-    if len(layer.parents()) != 0 : num_layers_with_parents += 1
-    if len(layer.children()) != 0 : num_layers_with_children += 1
-  if num_layers_with_parents == 0 :
-    print
-    print 'NOTE: this model does not appear to have any parent fields;'
-    print '      dealing with that ...'
-    print
-    assert(num_layers_with_children == 0)
-    for j in range(1, len(layers)) :
-      layers[j].setParents(layers[j-1])
-
-
-#=====================================================================
-#WARNING: this works for tim's rnn prototext, but may not generalize
-def getLinkedLayers(layers) :
-  r = []
-  w = {}
-  for layer in layers :
-    my_name = layer.name()
-    links = layer.linkedLayers()
-    for x in links :
-      if my_name == x :
-        w[my_name] = set([my_name])
-  for layer in layers :
-    links = layer.linkedLayers()
-    my_name = layer.name()
-    for x in links :
-      if my_name != x :
-        if w.has_key(my_name) : 
-          w[my_name].add(x)
-        elif w.has_key(x) : 
-          w[x].add(my_name)
-        else :
-          print 'error'
-          exit(9)
-
-  for x in w.keys() :
-    if len(w[x]) > 1 :
-      r.append(w[x])
-  return r
-
-#=====================================================================
-
-#load properties database
-props = properties(prop_fn)
-
-
-#parse the prototext file; 'layers' is a list of Layer objects
-layers = parsePrototext(argv[1])
-
-fixSequentialParents(layers)
-
-#get list of linked layer sets
-linked = getLinkedLayers(layers)
-
-
-#build a couple of maps
-edges = {}
-name_to_type = {}
-attributes = {}
-for layer in layers :
-  name = layer.name()
-  parents = layer.parents()
-
-  #children = layer.children()
-  attributes[name] = layer.attributes()
-  type = layer.type()
-  name_to_type[name] = type
-  for p in parents :
-    if not edges.has_key(p) :
-      edges[p] = set()
-    edges[p].add(name)
-
-#write the dot file
-out = open('graph.dot', 'w')
-out.write('digraph xyz {\n')
-if ranksep > 0 :
-  out.write('graph[ranksep="' + str(ranksep) + '"]\n')
-
-#write vertices
-for parent in edges.keys() :
- try :
-  type = name_to_type[parent]
-  label = ''
-  if brief:
-    label = '<<font point-size="18">' + type + '</font>'
-  else :
-    label = '<<font point-size="18">' + type + '</font><br/>name: ' + parent 
-  if full :
-    attr = attributes[parent]
-    if len(attr) :
-      label += '<br/>'
-      for x in attr :
-        label += x + '<br align="left"/>'
-  label += '> '
- except :
-   print '\n\ncaught exception; parent:', parent
-   exit(9)
-
- out.write('  ' + parent + '[label=' + label + ' shape=' + props.shape(type) + ', style=filled, fillcolor=' + props.color(type) + ']\n')
-
-#write edges
-for parent in edges.keys() :
-  type = name_to_type[parent]
-  for child in edges[parent] :
-    child_type = name_to_type[child]
-    if type == 'slice' : 
-      out.write(parent + ' -> ' + child + '[color=red, penwidth=2.0];')
-    elif type == 'split' : 
-      out.write(parent + ' -> ' + child + '[color=darkorange, penwidth=2.0];')
-    elif child_type == 'sum' : 
-      out.write(parent + ' -> ' + child + '[color=deepskyblue, penwidth=2.0];')
-    else :
-      out.write(parent + ' -> ' + child + '[];\n')
-
-
-#alternatove to above: use subgraphs
-#write linked layer subgraphs    
-n = 0
-for x in linked :
-  out.write('subgraph cluster_' + str(n) + ' {\n')
-  out.write('   style=dashed;\n')
-  n += 1
-  for node in x :
-    out.write('   '+ node + ';\n')
-  out.write('}\n')
-
-out.write('}\n')
-out.close()
-
-#run graphviz
-cmd = 'dot -T' + output_format + ' graph.dot -o' + output_fn + '.' + output_format
-print 
-print 'about to run:', cmd
-os.system(cmd)