Merge pull request #1408 from Imraj-Singh/cuda_stir

Add CudaRelativeDifferencePrior (currently limited to 3x3x3 neighbourhood)
UCL · Jul 8, 2024 · b153fda · b153fda
2 parents 32befa6 + 1f0d692
commit b153fda
Show file tree

Hide file tree

Showing 27 changed files with 1,038 additions and 129 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,4 +3,4 @@ repos:
     rev: '62302476'
     hooks:
     -   id: clang-format
-        files: \.(c|cc|cxx|cpp|h|hpp|hxx|inl|txx)$
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|inl|txx)$
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -186,6 +186,38 @@ else()
   message(STATUS "NiftyPET projector support disabled.")
 endif()
 
+if(NOT DISABLE_STIR_CUDA)
+  include(CheckLanguage)
+  check_language(CUDA)
+  if (CMAKE_CUDA_COMPILER)
+    enable_language(CUDA)
+    find_package(CUDAToolkit QUIET)
+    if (NOT CUDAToolkit_FOUND)
+      message(WARNING "CUDAToolkit not detected. Setting DISABLE_STIR_CUDA to ON.")
+      set(DISABLE_STIR_CUDA ON)
+      set(STIR_WITH_CUDA OFF)
+    else()
+      if("${CMAKE_VERSION}" VERSION_LESS "3.23")
+        message(FATAL_ERROR "CMake 3.23 or newer is required to use CMAKE_CUDA_ARCHITECTURES set to 'all'. Upgrade your CMake or set DISABLE_STIR_CUDA to ON.")
+      else()
+        set(CMAKE_CUDA_ARCHITECTURES "all")
+      endif()
+      message(STATUS "STIR CUDA support enabled. Using CUDA version ${CUDAToolkit_VERSION}.")
+      set(STIR_WITH_CUDA ON)
+    endif()
+    # check if run-time is available for ctests
+    find_program(NVIDIA_SMI nvidia-smi)
+    if (NOT NVIDIA_SMI)
+      message(WARNING "nvidia-smi not found. Disabling the ctests using CUDA")
+      set(SKIP_CUDA_TESTS ON)
+    endif()
+  else()
+    message(WARNING "No CUDA compiler found. Setting DISABLE_STIR_CUDA to ON.")
+    set(DISABLE_STIR_CUDA ON)
+    set(STIR_WITH_CUDA OFF)
+  endif()
+endif()
+
 # Parallelproj
 if(NOT DISABLE_Parallelproj_PROJECTOR)
     find_package(parallelproj 1.3.4 CONFIG)

diff --git a/documentation/release_6.2.htm b/documentation/release_6.2.htm
@@ -22,7 +22,8 @@ <h2>Overall summary</h2>
     </p>
 
     <p>
-      This release contains mainly code written by Nicole Jurjew (UCL) and Kris Thielemans (UCL).
+      This release contains mainly code written by Nicole Jurjew (UCL) (SSRB for TOF),
+      Imraj Singh (UCL) (CUDA version of the Relative Difference Prior) and Kris Thielemans (UCL).
     </p>
 
 <h2>Patch release info</h2>
@@ -51,6 +52,13 @@ <h3>New functionality</h3>
     in <code>ProjData</code>).<br>
     <a href=https://github.com/UCL/STIR/pull/1439>PR #1439</a> and <a href=https://github.com/UCL/STIR/pull/1448>PR #1448</a>
   </li>
+  <li>
+    New prior <code>CudaRelativeDifferencePrior</code> (use <tt>Cuda Relative Difference Prior</tt> in <tt>.par</tt> files), only
+    available if the CUDA toolkit is found during building. Results are identical to <code>RelativeDifferencePrior</code>
+    up to numerical rounding issues. However, the code is currently limited to 3x3x3 weights.<br>
+    Added timings for the RDP (both non-CUDA and CUDA) tothe  <tt>stir_timings</tt> utility.<br>
+    <a href=https://github.com/UCL/STIR/pull/1408>PR #1408</a>
+  </li>
 </ul>
 
 
@@ -111,7 +119,7 @@ <h3>Known problems</h3>
     <p>See <a href=https://github.com/UCL/STIR/labels/bug>our issue tracker</a>.</p>
 
 
-<H2>What's new for developers (aside from what should be obvious
+<H2>What is new for developers (aside from what should be obvious
 from the above):</H2>
 
 <h3>Changed functionality</h3>
@@ -160,6 +168,10 @@ <h3>Build system</h3>
     Force C++ version according to CERN ROOT versions: ROOT 6.28.10 needs C++17 and 6.30.2 needs C++20.
     Also some fixes when relying on <code>root-config</code>.
   </li>
+  <li>
+    Optionally enable CUDA as a CMake language (for the CUDA RDP). <strong>You should use CMake 3.23 or later</strong>
+    if you use CUDA.
+  </li>
 </ul>
 
 <h3>Test changes</h3>

diff --git a/recon_test_pack/CUDA/OSMAPOSL_test_sim_Parallelproj_CUDARDP.par b/recon_test_pack/CUDA/OSMAPOSL_test_sim_Parallelproj_CUDARDP.par
@@ -0,0 +1,46 @@
+OSMAPOSLParameters :=
+; test file for OSMAPOSL with a quadratic prior (and ray tracing projection matrix)
+objective function type:= PoissonLogLikelihoodWithLinearModelForMeanAndProjData
+PoissonLogLikelihoodWithLinearModelForMeanAndProjData Parameters:=
+
+input file := my_prompts${suffix}.hs
+zero end planes of segment 0:= 0 ; segment 0 is has direct and indirect planes
+; if disabled, defaults to maximum segment number in the file
+;maximum absolute segment number to process := 3
+
+projector pair type := parallelproj
+  Projector Pair Using Parallelproj Parameters:=
+  End Projector Pair Using Parallelproj Parameters:=
+
+  Bin Normalisation type := from projdata
+    Bin Normalisation From ProjData :=
+    normalisation projdata filename:= my_acfs${suffix}.hs
+    End Bin Normalisation From ProjData:=
+
+additive sinogram := my_additive_sinogram${suffix}.hs
+
+xy output image size (in pixels) := 91
+zoom := .5
+
+use subset sensitivities:=1
+subset sensitivity filenames:= my_test_sim${suffix}_sens_PP_s${num_subsets}_%d.hv
+recompute_sensitivity:=1
+
+prior type := Relative Difference Prior
+  Relative Difference Prior Parameters:=
+  penalisation factor := 0.5
+  ; next defaults to 0, set to 1 for 2D inverse Euclidean weights, 0 for 3D 
+  only 2D:= 0
+  END Relative Difference Prior Parameters:=
+
+end PoissonLogLikelihoodWithLinearModelForMeanAndProjData Parameters:=
+
+output filename prefix := my_test_sim${suffix}_image_OSMAPOSL_PP_CUDARDP
+number of subsets:= ${num_subsets}
+start at subset:= 0
+number of subiterations:= 28
+save estimates at subiteration intervals:= ${num_subsets}
+
+;report objective function values interval := 1
+
+END :=
diff --git a/recon_test_pack/run_test_simulate_and_recon.sh b/recon_test_pack/run_test_simulate_and_recon.sh
@@ -3,7 +3,7 @@
 #
 #  Copyright (C) 2011 - 2011-01-14, Hammersmith Imanet Ltd
 #  Copyright (C) 2011-07-01 - 2011, Kris Thielemans
-#  Copyright (C) 2014, 2022 University College London
+#  Copyright (C) 2014, 2022, 2024 University College London
 #  This file is part of STIR.
 #
 #  SPDX-License-Identifier: Apache-2.0
@@ -13,13 +13,7 @@
 # Author Kris Thielemans
 # 
 
-# Scripts should exit with error code when a test fails:
-if [ -n "$TRAVIS" -o -n "$GITHUB_WORKSPACE" ]; then
-    # The code runs inside Travis or GHA
-    set -e
-fi
-
-echo This script should work with STIR version 6.0. If you have
+echo This script should work with STIR version 6.2. If you have
 echo a later version, you might have to update your test pack.
 echo Please check the web site.
 echo
@@ -62,6 +56,8 @@ if [ $# -eq 1 ]; then
 fi
 
 echo "Using `command -v OSMAPOSL`"
+echo "Using `command -v OSSPS`"
+echo "Using `command -v FBP2D`"
 
 # first need to set this to the C locale, as this is what the STIR utilities use
 # otherwise, awk might interpret floating point numbers incorrectly
@@ -102,7 +98,23 @@ input_ROI_mean=`awk 'NR>2 {print $2}' ${input_image}.roistats`
 # and reuses its subset sensitivities
 for recon in FBP2D FBP3DRP OSMAPOSL OSSPS; do
   echo "========== Testing `command -v ${recon}`"
-  for parfile in ${recon}_test_sim*.par; do
+  # Check if we have CUDA code and parallelproj.
+  # If so, check for test files in CUDA/*
+  if stir_list_registries |grep -i cuda > /dev/null
+  then
+      if stir_list_registries |grep -i parallelproj > /dev/null
+      then
+          extra_par_files=`ls CUDA/${recon}_test_sim*.par 2> /dev/null`
+          if [ -n "$TRAVIS" -o -n "$GITHUB_WORKSPACE" ]; then
+              # The code runs inside Travis or GHA
+              if [ -n "$extra_par_files" ]; then
+                  echo "Not running ${extra_par_files} due to no CUDA run-time"
+                  extra_par_files=""
+              fi
+          fi
+      fi
+  fi
+  for parfile in ${recon}_test_sim*.par ${extra_par_files}; do
     for dataSuffix in "" "$TOF_suffix"; do
       echo "===== data suffix: \"$dataSuffix\""
       # test first if analytic reconstruction and if so, run pre-correction
@@ -136,7 +148,7 @@ for recon in FBP2D FBP3DRP OSMAPOSL OSSPS; do
 
       # run actual reconstruction
       echo "Running ${recon} ${parfile}"
-      logfile="my_${parfile}${suffix}.log"
+      logfile="my_`basename ${parfile}`${suffix}.log"
       ${MPIRUN} ${recon} ${parfile} > "$logfile" 2>&1
       if [ $? -ne 0 ]; then
           echo "Error running reconstruction. CHECK RECONSTRUCTION LOG \"$logfile\""

diff --git a/src/buildblock/SeparableGaussianArrayFilter.cxx b/src/buildblock/SeparableGaussianArrayFilter.cxx
@@ -94,7 +94,7 @@ SeparableGaussianArrayFilter<num_dimensions, elemT>::construct_filter(bool norma
       {
         std::stringstream ss;
         ss << "Gaussian filter dim[" << i << "] =" << filter_coefficients;
-        info(ss.str(), 2);
+        info(ss.str(), 3);
       }
 
       this->all_1d_array_filters[i - 1].reset(new ArrayFilter1DUsingConvolution<float>(filter_coefficients));

diff --git a/src/cmake/STIRConfig.cmake.in b/src/cmake/STIRConfig.cmake.in
@@ -143,6 +143,12 @@ if(@STIR_WITH_NiftyPET_PROJECTOR@)
   set(STIR_WITH_NiftyPET_PROJECTOR TRUE)
 endif()
 
+if(@STIR_WITH_CUDA@)
+  find_package(CUDAToolkit REQUIRED)
+  enable_language(CUDA)
+  set(STIR_WITH_CUDA TRUE)
+endif()
+
 if(@STIR_WITH_Parallelproj_PROJECTOR@)
   find_package(parallelproj REQUIRED CONFIG)
   set(STIR_WITH_Parallelproj_PROJECTOR TRUE)

diff --git a/src/cmake/STIRConfig.h.in b/src/cmake/STIRConfig.h.in
@@ -73,6 +73,8 @@ namespace stir {
 
 #cmakedefine STIR_WITH_NiftyPET_PROJECTOR
 
+#cmakedefine STIR_WITH_CUDA
+
 #cmakedefine STIR_WITH_Parallelproj_PROJECTOR
 #cmakedefine parallelproj_built_with_CUDA
 

diff --git a/src/include/stir/NumericVectorWithOffset.h b/src/include/stir/NumericVectorWithOffset.h
@@ -46,7 +46,10 @@ START_NAMESPACE_STIR
 template <class T, class elemT>
 class NumericVectorWithOffset : public VectorWithOffset<T>
 {
-private:
+#ifdef SWIG
+public: // needs to be public for SWIG to be able to parse the "using" statement below
+#endif
+
   typedef VectorWithOffset<T> base_type;
 
 public:

diff --git a/src/include/stir/RegisteredParsingObject.h b/src/include/stir/RegisteredParsingObject.h
@@ -77,6 +77,12 @@ template <typename Derived, typename Base, typename Parent = Base>
 class RegisteredParsingObject : public Parent
 {
 public:
+  // import constructors from Parent
+  // Note: disabled for older SWIG as that generates an error before 4.2, and a warning for 4.2
+#if !defined(SWIG) || (SWIG_VERSION >= 0x040300)
+  using Parent::Parent;
+#endif
+
   //! Construct a new object (of type Derived) by parsing the istream
   /*! When the istream * is 0, questions are asked interactively.
 
@@ -93,6 +99,7 @@ class RegisteredParsingObject : public Parent
   inline std::string parameter_info() override;
 
 public:
+#ifndef SWIG
   //! A helper class to allow automatic registration.
   struct RegisterIt
   {
@@ -111,18 +118,19 @@ class RegisteredParsingObject : public Parent
     */
     ~RegisterIt()
     {
-#if 0
+#  if 0
       // does not work yet, as registry might be destructed before this
       // RegisterIt object. A solution to this problem is coming up.
       cerr << "In RegisterIt destructor for " << Derived::registered_name<<endl;
       cerr <<"Current keys: ";
       Parent::registry().list_keys(cerr);
       Parent::registry().remove_from_registry(Derived::registered_name);
-#endif
+#  endif
     }
   };
   // RegisterIt needs to be a friend to have access to registry()
   friend struct RegisterIt;
+#endif
 };
 
 END_NAMESPACE_STIR

diff --git a/src/include/stir/cuda_utilities.h b/src/include/stir/cuda_utilities.h
@@ -0,0 +1,69 @@
+/*
+    Copyright (C) 2024, University College London
+    This file is part of STIR.
+
+    SPDX-License-Identifier: Apache-2.0
+
+    See STIR/LICENSE.txt for details
+*/
+
+#ifndef __stir_cuda_utilities_H__
+#define __stir_cuda_utilities_H__
+
+/*!
+  \file
+  \ingroup CUDA
+  \brief some utilities for STIR and CUDA
+
+  \author Kris Thielemans
+*/
+#include "stir/Array.h"
+#include "stir/info.h"
+#include <vector>
+
+START_NAMESPACE_STIR
+
+template <int num_dimensions, typename elemT>
+inline void
+array_to_device(elemT* dev_data, const Array<num_dimensions, elemT>& stir_array)
+{
+  if (stir_array.is_contiguous())
+    {
+      info("array_to_device contiguous", 100);
+      cudaMemcpy(dev_data, stir_array.get_const_full_data_ptr(), stir_array.size_all() * sizeof(elemT), cudaMemcpyHostToDevice);
+      stir_array.release_const_full_data_ptr();
+    }
+  else
+    {
+      info("array_to_device non-contiguous", 100);
+      // Allocate host memory to get contiguous vector, copy array to it and copy from device to host
+      std::vector<elemT> tmp_data(stir_array.size_all());
+      std::copy(stir_array.begin_all(), stir_array.end_all(), tmp_data.begin());
+      cudaMemcpy(dev_data, tmp_data.data(), stir_array.size_all() * sizeof(elemT), cudaMemcpyHostToDevice);
+    }
+}
+
+template <int num_dimensions, typename elemT>
+inline void
+array_to_host(Array<num_dimensions, elemT>& stir_array, const elemT* dev_data)
+{
+  if (stir_array.is_contiguous())
+    {
+      info("array_to_host contiguous", 100);
+      cudaMemcpy(stir_array.get_full_data_ptr(), dev_data, stir_array.size_all() * sizeof(elemT), cudaMemcpyDeviceToHost);
+      stir_array.release_full_data_ptr();
+    }
+  else
+    {
+      info("array_to_host non-contiguous", 100);
+      // Allocate host memory for the result and copy from device to host
+      std::vector<elemT> tmp_data(stir_array.size_all());
+      cudaMemcpy(tmp_data.data(), dev_data, stir_array.size_all() * sizeof(elemT), cudaMemcpyDeviceToHost);
+      // Copy the data to the stir_array
+      std::copy(tmp_data.begin(), tmp_data.end(), stir_array.begin_all());
+    }
+}
+
+END_NAMESPACE_STIR
+
+#endif
diff --git a/src/include/stir/doxygengroups.h b/src/include/stir/doxygengroups.h
@@ -170,6 +170,11 @@ be selected at run-time.
 \ingroup buildblock
 */
 
+/*!
+\defgroup CUDA Items relating to CUDA and STIR buildblock objects.
+\ingroup buildblock
+*/
+
 /*!
 \defgroup data_buildblock Acquisition data building blocks
 \ingroup STIR_library