From b1335bd83ad11cff4e9e1fe714be168f9bf6e75d Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.peter22@gmail.com>
Date: Wed, 26 Apr 2023 09:41:44 +0200
Subject: [PATCH 01/11] updates github workflow checkout version

---
 .github/workflows/CI.yml | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 6eecc1f61..80dac582c 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Check for changes
       id: diff
@@ -75,7 +75,7 @@ jobs:
     needs: changesCheck
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Install dependencies
       run: |
@@ -128,7 +128,7 @@ jobs:
         os: [ubuntu-latest,ubuntu-20.04]
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Install packages
       run: ./.github/scripts/run_install.sh
@@ -147,7 +147,7 @@ jobs:
     needs: changesCheck
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Cache Intel oneapi packages
       id: cache-intel-oneapi
@@ -224,7 +224,7 @@ jobs:
     needs: [linuxCheck]
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Install packages
       run: ./.github/scripts/run_install.sh
@@ -242,7 +242,7 @@ jobs:
      needs: [linuxCheck]
 
      steps:
-     - uses: actions/checkout@v2
+     - uses: actions/checkout@v3
 
      - name: Install packages
        run: ./.github/scripts/run_install.sh
@@ -267,7 +267,7 @@ jobs:
     needs: [linuxCheck]
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Install packages
       run: ./.github/scripts/run_install.sh
@@ -291,7 +291,7 @@ jobs:
     needs: [linuxCheck]
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Install packages
       run: ./.github/scripts/run_install.sh
@@ -315,7 +315,7 @@ jobs:
     needs: [linuxCheck]
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Install packages
       run: ./.github/scripts/run_install.sh
@@ -340,7 +340,7 @@ jobs:
     needs: [linuxCheck]
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Install packages
       run: ./.github/scripts/run_install.sh
@@ -364,7 +364,7 @@ jobs:
     needs: [linuxCheck]
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Install packages
       run: ./.github/scripts/run_install.sh

From 01d2ddde6a0ea88d97dea895332286f3704dd508 Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.peter22@gmail.com>
Date: Wed, 26 Apr 2023 09:49:38 +0200
Subject: [PATCH 02/11] updates default CUDA compute code for Maxwell cards
 (Quadro M6000, GeForce 900, GTX-970, GTX-980, GTX Titan X)

---
 Makefile.in | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 5cf182b5f..9fe5484de 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -150,6 +150,7 @@ GENCODE_30 = -gencode=arch=compute_30,code=\"sm_30,compute_30\"
 GENCODE_35 = -gencode=arch=compute_35,code=\"sm_35,compute_35\"
 GENCODE_37 = -gencode=arch=compute_37,code=\"sm_37\"
 GENCODE_50 = -gencode=arch=compute_50,code=\"sm_50,compute_50\"
+GENCODE_52 = -gencode=arch=compute_52,code=\"sm_52,compute_52\"
 GENCODE_60 = -gencode=arch=compute_60,code=\"sm_60,compute_60\"
 GENCODE_70 = -gencode=arch=compute_70,code=\"sm_70,compute_70\"
 GENCODE_75 = -gencode=arch=compute_75,code=\"sm_75,compute_75\"
@@ -165,7 +166,7 @@ GENCODE_80 = -gencode=arch=compute_80,code=\"sm_80,compute_80\"
 # CUDA version 8.0
 @COND_CUDA_TRUE@@COND_CUDA8_TRUE@GENCODE = $(GENCODE_60) $(FC_DEFINE)GPU_DEVICE_Pascal
 # CUDA version 7.x
-@COND_CUDA_TRUE@@COND_CUDA7_TRUE@GENCODE = $(GENCODE_50) $(FC_DEFINE)GPU_DEVICE_Maxwell
+@COND_CUDA_TRUE@@COND_CUDA7_TRUE@GENCODE = $(GENCODE_52) $(FC_DEFINE)GPU_DEVICE_Maxwell
 # CUDA version 6.5
 @COND_CUDA_TRUE@@COND_CUDA6_TRUE@GENCODE = $(GENCODE_37) $(FC_DEFINE)GPU_DEVICE_K80
 # CUDA version 5.x
@@ -235,7 +236,7 @@ GENCODE_AMD_MI250 = --amdgpu-target=gfx90a
 
 @COND_HIP_TRUE@@COND_HIP_CUDA5_TRUE@GENCODE_HIP = $(GENCODE_35)         # --with-hip=cuda5 ..
 @COND_HIP_TRUE@@COND_HIP_CUDA6_TRUE@GENCODE_HIP = $(GENCODE_37)         # --with-hip=cuda6 ..
-@COND_HIP_TRUE@@COND_HIP_CUDA7_TRUE@GENCODE_HIP = $(GENCODE_50)         # --with-hip=cuda7 ..
+@COND_HIP_TRUE@@COND_HIP_CUDA7_TRUE@GENCODE_HIP = $(GENCODE_52)         # --with-hip=cuda7 ..
 @COND_HIP_TRUE@@COND_HIP_CUDA8_TRUE@GENCODE_HIP = $(GENCODE_60)         # --with-hip=cuda8 ..
 @COND_HIP_TRUE@@COND_HIP_CUDA9_TRUE@GENCODE_HIP = $(GENCODE_70)         # --with-hip=cuda9 ..
 @COND_HIP_TRUE@@COND_HIP_CUDA10_TRUE@GENCODE_HIP = $(GENCODE_75)         # --with-hip=cuda10 ..

From a8e5bfee922cb4b2c42f8cf3bfa04a74ca647a0c Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.peter22@gmail.com>
Date: Wed, 3 May 2023 11:18:31 +0200
Subject: [PATCH 03/11] updates github workflow

---
 .github/workflows/CI.yml | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 80dac582c..c8e5c4cc7 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -29,9 +29,20 @@ jobs:
           export DIFF=$( git diff --name-only ${{ github.event.before }} $GITHUB_SHA )
           echo "  diff between ${{ github.event.before }} and $GITHUB_SHA"
         fi
-        echo "$DIFF"
+        echo "***"; echo "$DIFF"; echo "***"
         # Escape newlines (replace \n with %0A)
-        echo "::set-output name=diff::$( echo "$DIFF" | sed ':a;N;$!ba;s/\n/%0A/g' )"
+        # deprecated:
+        #echo "::set-output name=diff::$( echo "$DIFF" | sed ':a;N;$!ba;s/\n/%0A/g' )"
+        # new:
+        # replace new line with %0A - will result finding only one file with a very long name...
+        #echo "diff=$( echo "$DIFF" | sed ':a;N;$!ba;s/\n/%0A/g' )" >> $GITHUB_OUTPUT
+        # doesn't work...
+        #echo "diff=\"$DIFF\"" >> "$GITHUB_OUTPUT"
+        # new multi-line format:
+        # (https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#multiline-strings)
+        echo "diff<<EOF" >> $GITHUB_OUTPUT
+        echo "$DIFF" >> $GITHUB_OUTPUT
+        echo "EOF" >> $GITHUB_OUTPUT
 
     - name: Output changes
       run: echo "${{ steps.diff.outputs.diff }}"
@@ -151,7 +162,7 @@ jobs:
 
     - name: Cache Intel oneapi packages
       id: cache-intel-oneapi
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: /opt/intel/oneapi
         key: install-${{ runner.os }}-all

From 33905b3855af9b879f54b4daf0bfd84c49e1edbb Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.peter22@gmail.com>
Date: Wed, 3 May 2023 13:05:19 +0200
Subject: [PATCH 04/11] updates azure workflow

---
 .azure-pipelines.yml                  | 73 ++++++++++++++++++++-------
 .azure-pipelines/install-template.yml | 61 ++++++++++++++++++----
 2 files changed, 107 insertions(+), 27 deletions(-)

diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml
index cb93a63e7..c31dcb278 100644
--- a/.azure-pipelines.yml
+++ b/.azure-pipelines.yml
@@ -72,7 +72,7 @@ jobs:
           # tests/ directory
           RUN_CHECKS=1
         elif [ "$directory" == .azure-pipelines ]; then
-          # .travis/ directory
+          # azure directory
           RUN_CHECKS=1
         fi
       done <<< "$DIFF"
@@ -84,7 +84,8 @@ jobs:
     displayName: 'Run checks'
 
 - job: compilation_default
-  displayName: 'Compilation Default GCC 9'
+  # ubuntu-latest: ubuntu-22.04 w/ GCC 11
+  displayName: 'Compilation Default GCC'
   steps:
   - template: .azure-pipelines/install-template.yml
     parameters:
@@ -95,14 +96,14 @@ jobs:
       CUDA: false
       BUILD: true
 
-- job: compilation_default_gcc7
-  displayName: 'Compilation Default GCC 7'
+- job: compilation_default_gcc9
+  displayName: 'Compilation Default GCC 9'
   pool:
-    vmImage: 'ubuntu-18.04'
+    vmImage: 'ubuntu-20.04'
   variables:
-    CC: gcc-7
-    CXX: g++-7
-    FC: gfortran-7
+    CC: gcc-9
+    CXX: g++-9
+    FC: gfortran-9
   steps:
   - template: .azure-pipelines/install-template.yml
     parameters:
@@ -131,27 +132,33 @@ jobs:
       CUDA: false
       BUILD: true
 
-- job: compilation_CUDA10
-  displayName: 'Compilation CUDA 10'
+- job: compilation_CUDA11_gcc9
+  displayName: 'Compilation CUDA 11 GCC 9'
   pool:
-    vmImage: 'ubuntu-18.04'
+    vmImage: 'ubuntu-20.04'
   variables:
-    CC: gcc-7
-    CXX: g++-7
-    FC: gfortran-7
+    CC: gcc-9
+    CXX: g++-9
+    FC: gfortran-9
   steps:
   - template: .azure-pipelines/install-template.yml
     parameters:
       CUDA: true
-      CUDA_V: '10.2'
+      CUDA_V: '11.4'
   - template: .azure-pipelines/configure-template.yml
     parameters:
       TESTFLAGS: '--enable-vectorization --with-cuda=cuda10'
       CUDA: true
       BUILD: true
 
-- job: compilation_CUDA11
-  displayName: 'Compilation CUDA 11'
+- job: compilation_CUDA11_gcc10
+  displayName: 'Compilation CUDA 11 GCC 10'
+  pool:
+    vmImage: 'ubuntu-20.04'
+  variables:
+    CC: gcc-10
+    CXX: g++-10
+    FC: gfortran-10
   steps:
   - template: .azure-pipelines/install-template.yml
     parameters:
@@ -163,6 +170,38 @@ jobs:
       CUDA: true
       BUILD: true
 
+- job: compilation_CUDA12
+  displayName: 'Compilation CUDA 12 GCC 10'
+  pool:
+    vmImage: 'ubuntu-20.04'
+  variables:
+    CC: gcc-10
+    CXX: g++-10
+    FC: gfortran-10
+  steps:
+  - template: .azure-pipelines/install-template.yml
+    parameters:
+      CUDA: true
+      CUDA_V: '12.1'
+  - template: .azure-pipelines/configure-template.yml
+    parameters:
+      TESTFLAGS: '--enable-vectorization --with-cuda=cuda11'
+      CUDA: true
+      BUILD: true
+
+- job: compilation_CUDA12_latest
+  displayName: 'Compilation CUDA 12'
+  steps:
+  - template: .azure-pipelines/install-template.yml
+    parameters:
+      CUDA: true
+      CUDA_V: '12.1'
+  - template: .azure-pipelines/configure-template.yml
+    parameters:
+      TESTFLAGS: '--enable-vectorization --with-cuda=cuda12'
+      CUDA: true
+      BUILD: true
+
 - job: test_example_1
   displayName: 'Test example 1 - regional_Greece_small'
   dependsOn: compilation_default
diff --git a/.azure-pipelines/install-template.yml b/.azure-pipelines/install-template.yml
index dd67ea468..a64f71681 100644
--- a/.azure-pipelines/install-template.yml
+++ b/.azure-pipelines/install-template.yml
@@ -4,8 +4,10 @@
 # software setup on VM nodes
 # ubuntu-18.04:
 # https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu1804-README.md
-# ubuntu-20.04 "ubuntu-latest":
+# ubuntu-20.04:
 # https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-README.md
+# ubuntu-22.04 "ubuntu-latest":
+# https://github.com/actions/runner-images/blob/main/images/linux/Ubuntu2204-Readme.md
 #
 parameters:
 - name: CUDA
@@ -20,10 +22,27 @@ steps:
     # fortran/openMPI compiler
     echo "CC: ${CC} CXX: ${CXX} FC: ${FC}"
     # updates repository
+    echo; echo `uname -a`; lsb_release -a; echo
     sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 6B05F25D762E3157
     sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 78BD65473CB3BD13
     sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 762E3157
-    if [ "${FC}" == "gfortran-10" ]; then
+    if [ "${FC}" == "gfortran-9" ]; then
+      echo "gfortran: gfortran-9 update"
+      # updating gfortran version
+      sudo add-apt-repository ppa:ubuntu-toolchain-r/test
+      sudo apt-get update
+      sudo apt-get install -y --reinstall gcc-9 g++-9 gfortran-9
+      # updates alternatives
+      echo
+      update-alternatives --query gfortran
+      echo
+      sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100
+      sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 100
+      sudo update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-9 100
+      echo
+      update-alternatives --query gfortran
+      echo
+    elif [ "${FC}" == "gfortran-10" ]; then
       echo "gfortran: gfortran-10 update"
       # updating gfortran version
       sudo add-apt-repository ppa:ubuntu-toolchain-r/test
@@ -94,9 +113,14 @@ steps:
       ## distribution xenial: from ubuntu 16.04
       #UBUNTU_VERSION=ubuntu1604
       ## distribution bionic: from ubuntu 18.04
-      UBUNTU_VERSION=ubuntu1804
+      #UBUNTU_VERSION=ubuntu1804
       ## distribution focal: from ubuntu 20.04
       #UBUNTU_VERSION=ubuntu2004
+      ## distribution jammy: from ubuntu 22.04
+      #UBUNTU_VERSION=ubuntu2204
+
+      # default
+      UBUNTU_VERSION=ubuntu2004
 
       # CUDA_VERSION - specifies CUDA toolkit version
       echo "CUDA version: $CUDA_V"
@@ -110,10 +134,20 @@ steps:
       elif [ "$CUDA_V" == "10.2" ]; then
         ## bionic
         CUDA_VERSION=10.2.89-1
+      elif [ "$CUDA_V" == "11.4" ]; then
+        ## focal
+        CUDA_VERSION=11.4.0-1
+      elif [ "$CUDA_V" == "12.1" ]; then
+        ## focal
+        CUDA_VERSION=12.1.1-1
       else
-        # note: on azure VM nodes with ubuntu-latest, default gcc version is 9.3;
+        # note: - on azure VM nodes with ubuntu 18.04, default gcc version is 9.3;
+        #       needs at least CUDA version 10.x
+        #       - on azure VM nodes with ubuntu 20.04, default gcc version is 10.3;
         #       needs at least CUDA version 11.x
-        CUDA_VERSION=11.4.0-1
+        #       - on azure VM nodes with ubuntu-latest (22.04), default gcc version is 11.3;
+        #       needs at least CUDA version 11.7
+        CUDA_VERSION=12.1.1-1
       fi
       echo
 
@@ -151,7 +185,12 @@ steps:
       echo
 
       # gets repo
-      if [ "${CUDA_VERSION}" == "11.4.0-1" ]; then
+      if [ "${CUDA_VERSION}" == "10.2.89-1" ]; then
+        # gets packages
+        INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA_VERSION}_${CUDA_ARCH}.deb
+        wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/${CUDA_OS}/${INSTALLER}
+        sudo dpkg -i ${INSTALLER}
+      elif [ "${CUDA_VERSION}" == "11.4.0-1" ]; then
         # new CUDA version 11.4 has no cuda-repo-** file, following instructions from website,
         # see: https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=18.04&target_type=deb_network
         wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/${CUDA_OS}/cuda-${UBUNTU_VERSION}.pin
@@ -160,10 +199,12 @@ steps:
         # adds repo
         sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/${CUDA_OS}/ /"
       else
-        # gets packages
-        INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA_VERSION}_${CUDA_ARCH}.deb
-        wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/${CUDA_OS}/${INSTALLER}
-        sudo dpkg -i ${INSTALLER}
+        # new versions
+        wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/${CUDA_OS}/cuda-${UBUNTU_VERSION}.pin
+        sudo mv cuda-${UBUNTU_VERSION}.pin /etc/apt/preferences.d/cuda-repository-pin-600
+        echo
+        # adds repo
+        sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/${CUDA_OS}/ /"
       fi
       #echo
 

From 5120147dd71fa3ef177ed127f7984c47290235a3 Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.peter22@gmail.com>
Date: Wed, 3 May 2023 13:28:31 +0200
Subject: [PATCH 05/11] adds CUDA Hopper support

---
 Makefile.in                            |  9 +++++++-
 configure                              | 29 ++++++++++++++++++++++++++
 configure.ac                           |  5 ++++-
 doc/USER_MANUAL/02_getting_started.tex |  1 +
 src/gpu/mesh_constants_cuda.h          |  8 +++++++
 src/gpu/mesh_constants_gpu.h           |  8 +++++++
 src/gpu/rules.mk                       |  4 +++-
 7 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 9fe5484de..a8432819b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -109,6 +109,9 @@ GPU_ELEM_PER_THREAD := 1
 @COND_CUDA11_TRUE@CUDA11 = yes
 @COND_CUDA11_FALSE@CUDA11 = no
 
+@COND_CUDA12_TRUE@CUDA12 = yes
+@COND_CUDA12_FALSE@CUDA12 = no
+
 # CUDA compilation with linking
 @COND_CUDA_PLUS_TRUE@CUDA_PLUS = yes
 @COND_CUDA_PLUS_FALSE@CUDA_PLUS = no
@@ -144,7 +147,7 @@ CUDA_DEBUG = --cudart=shared
 # Volta   (cuda9, V100): -gencode=arch=compute_70,code=sm_70
 # Turing  (cuda10, T4, GeForce RTX 2080): -gencode=arch=compute_75,code=sm_75
 # Ampere  (cuda11, A100, GeForce RTX 3080): -gencode=arch=compute_80,code=sm_80
-
+# Hopper  (cuda12, H100): -gencode=arch=compute_90,code=sm_90
 GENCODE_20 = -gencode=arch=compute_20,code=\"sm_20,compute_20\"
 GENCODE_30 = -gencode=arch=compute_30,code=\"sm_30,compute_30\"
 GENCODE_35 = -gencode=arch=compute_35,code=\"sm_35,compute_35\"
@@ -155,8 +158,11 @@ GENCODE_60 = -gencode=arch=compute_60,code=\"sm_60,compute_60\"
 GENCODE_70 = -gencode=arch=compute_70,code=\"sm_70,compute_70\"
 GENCODE_75 = -gencode=arch=compute_75,code=\"sm_75,compute_75\"
 GENCODE_80 = -gencode=arch=compute_80,code=\"sm_80,compute_80\"
+GENCODE_90 = -gencode=arch=compute_90,code=\"sm_90,compute_90\"
 
 # cuda preprocessor flag
+# CUDA version 12.0
+@COND_CUDA_TRUE@@COND_CUDA12_TRUE@GENCODE = $(GENCODE_90) $(FC_DEFINE)GPU_DEVICE_Hopper
 # CUDA version 11.0
 @COND_CUDA_TRUE@@COND_CUDA11_TRUE@GENCODE = $(GENCODE_80) $(FC_DEFINE)GPU_DEVICE_Ampere
 # CUDA version 10.0
@@ -241,6 +247,7 @@ GENCODE_AMD_MI250 = --amdgpu-target=gfx90a
 @COND_HIP_TRUE@@COND_HIP_CUDA9_TRUE@GENCODE_HIP = $(GENCODE_70)         # --with-hip=cuda9 ..
 @COND_HIP_TRUE@@COND_HIP_CUDA10_TRUE@GENCODE_HIP = $(GENCODE_75)         # --with-hip=cuda10 ..
 @COND_HIP_TRUE@@COND_HIP_CUDA11_TRUE@GENCODE_HIP = $(GENCODE_80)         # --with-hip=cuda11 ..
+@COND_HIP_TRUE@@COND_HIP_CUDA12_TRUE@GENCODE_HIP = $(GENCODE_90)         # --with-hip=cuda12 ..
 
 HIP_FLAGS = @HIP_FLAGS@
 HIP_INC = @HIP_CPPFLAGS@ $(CUDA_MPI_FLAG) $(MPI_CPPFLAGS) $(MPI_INCLUDES)
diff --git a/configure b/configure
index ae560f695..7cf0b7320 100755
--- a/configure
+++ b/configure
@@ -764,6 +764,8 @@ COND_XSMM_FALSE
 COND_XSMM_TRUE
 COND_MIC_FALSE
 COND_MIC_TRUE
+COND_HIP_CUDA12_FALSE
+COND_HIP_CUDA12_TRUE
 COND_HIP_CUDA11_FALSE
 COND_HIP_CUDA11_TRUE
 COND_HIP_CUDA10_FALSE
@@ -796,6 +798,8 @@ COND_CUDA_AWARE_MPI_FALSE
 COND_CUDA_AWARE_MPI_TRUE
 COND_CUDA_PLUS_FALSE
 COND_CUDA_PLUS_TRUE
+COND_CUDA12_FALSE
+COND_CUDA12_TRUE
 COND_CUDA11_FALSE
 COND_CUDA11_TRUE
 COND_CUDA10_FALSE
@@ -3510,6 +3514,14 @@ else
   COND_CUDA11_FALSE=
 fi
 
+ if test x"$want_cuda" = xcuda12; then
+  COND_CUDA12_TRUE=
+  COND_CUDA12_FALSE='#'
+else
+  COND_CUDA12_TRUE='#'
+  COND_CUDA12_FALSE=
+fi
+
 
 # cuda linking for cuda 5x and 6x and 7x and 8x and ..
  if test "$want_cuda" = cuda4 \
@@ -3520,6 +3532,7 @@ fi
     -o "$want_cuda" = cuda9 \
     -o "$want_cuda" = cuda10 \
     -o "$want_cuda" = cuda11 \
+    -o "$want_cuda" = cuda12 \
 ; then
   COND_CUDA_PLUS_TRUE=
   COND_CUDA_PLUS_FALSE='#'
@@ -3696,6 +3709,14 @@ else
   COND_HIP_CUDA11_FALSE=
 fi
 
+ if test x"$want_hip" = xcuda12; then
+  COND_HIP_CUDA12_TRUE=
+  COND_HIP_CUDA12_FALSE='#'
+else
+  COND_HIP_CUDA12_TRUE='#'
+  COND_HIP_CUDA12_FALSE=
+fi
+
 
 ###
 ### MIC (Xeon PHI)
@@ -11679,6 +11700,10 @@ if test -z "${COND_CUDA11_TRUE}" && test -z "${COND_CUDA11_FALSE}"; then
   as_fn_error $? "conditional \"COND_CUDA11\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
 fi
+if test -z "${COND_CUDA12_TRUE}" && test -z "${COND_CUDA12_FALSE}"; then
+  as_fn_error $? "conditional \"COND_CUDA12\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
 if test -z "${COND_CUDA_PLUS_TRUE}" && test -z "${COND_CUDA_PLUS_FALSE}"; then
   as_fn_error $? "conditional \"COND_CUDA_PLUS\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
@@ -11743,6 +11768,10 @@ if test -z "${COND_HIP_CUDA11_TRUE}" && test -z "${COND_HIP_CUDA11_FALSE}"; then
   as_fn_error $? "conditional \"COND_HIP_CUDA11\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
 fi
+if test -z "${COND_HIP_CUDA12_TRUE}" && test -z "${COND_HIP_CUDA12_FALSE}"; then
+  as_fn_error $? "conditional \"COND_HIP_CUDA12\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
 if test -z "${COND_MIC_TRUE}" && test -z "${COND_MIC_FALSE}"; then
   as_fn_error $? "conditional \"COND_MIC\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
diff --git a/configure.ac b/configure.ac
index 724163cd6..e60898e53 100644
--- a/configure.ac
+++ b/configure.ac
@@ -112,6 +112,7 @@ AM_CONDITIONAL([COND_CUDA8], [test x"$want_cuda" = xcuda8])
 AM_CONDITIONAL([COND_CUDA9], [test x"$want_cuda" = xcuda9])
 AM_CONDITIONAL([COND_CUDA10], [test x"$want_cuda" = xcuda10])
 AM_CONDITIONAL([COND_CUDA11], [test x"$want_cuda" = xcuda11])
+AM_CONDITIONAL([COND_CUDA12], [test x"$want_cuda" = xcuda12])
 
 # cuda linking for cuda 5x and 6x and 7x and 8x and ..
 AM_CONDITIONAL([COND_CUDA_PLUS],
@@ -122,7 +123,8 @@ AM_CONDITIONAL([COND_CUDA_PLUS],
     -o "$want_cuda" = cuda8 \
     -o "$want_cuda" = cuda9 \
     -o "$want_cuda" = cuda10 \
-    -o "$want_cuda" = cuda11 \]
+    -o "$want_cuda" = cuda11 \
+    -o "$want_cuda" = cuda12 \]
 )
 
 # CUDA-aware MPI setting
@@ -166,6 +168,7 @@ AM_CONDITIONAL([COND_HIP_CUDA8], [test x"$want_hip" = xcuda8])
 AM_CONDITIONAL([COND_HIP_CUDA9], [test x"$want_hip" = xcuda9])
 AM_CONDITIONAL([COND_HIP_CUDA10], [test x"$want_hip" = xcuda10])
 AM_CONDITIONAL([COND_HIP_CUDA11], [test x"$want_hip" = xcuda11])
+AM_CONDITIONAL([COND_HIP_CUDA12], [test x"$want_hip" = xcuda12])
 
 ###
 ### MIC (Xeon PHI)
diff --git a/doc/USER_MANUAL/02_getting_started.tex b/doc/USER_MANUAL/02_getting_started.tex
index 6e48ae8a4..5c384a119 100644
--- a/doc/USER_MANUAL/02_getting_started.tex
+++ b/doc/USER_MANUAL/02_getting_started.tex
@@ -112,6 +112,7 @@ \section{Using the GPU version of the code}
   - CUDA 9 for Volta,   like V100
   - CUDA 10 for Turing, like GeForce RTX 2080
   - CUDA 11 for Ampere, like A100
+  - CUDA 12 for Hopper, like H100
 \end{verbatim}
 }
 \noindent
diff --git a/src/gpu/mesh_constants_cuda.h b/src/gpu/mesh_constants_cuda.h
index 9358a805b..cf6221bb3 100644
--- a/src/gpu/mesh_constants_cuda.h
+++ b/src/gpu/mesh_constants_cuda.h
@@ -85,6 +85,10 @@ static inline void print_CUDA_error_if_any(cudaError_t err, int num) {
 
 #if CUSTOM_REAL == 4
 // textures
+// textures
+// note: texture templates are supported only for CUDA versions <= 11.x
+//       since CUDA 12.x, these are deprecated and texture objects should be used instead
+//       see: https://developer.nvidia.com/blog/cuda-pro-tip-kepler-texture-objects-improve-performance-and-flexibility/
 #if defined(USE_TEXTURES_FIELDS) || defined(USE_TEXTURES_CONSTANTS)
 typedef texture<float, cudaTextureType1D, cudaReadModeElementType> realw_texture;
 #endif
@@ -95,6 +99,10 @@ typedef float* __restrict__ realw_p; // otherwise use: //typedef float* realw_p;
 
 #elif CUSTOM_REAL == 8
 // textures
+// textures
+// note: texture templates are supported only for CUDA versions <= 11.x
+//       since CUDA 12.x, these are deprecated and texture objects should be used instead
+//       see: https://developer.nvidia.com/blog/cuda-pro-tip-kepler-texture-objects-improve-performance-and-flexibility/
 #if defined(USE_TEXTURES_FIELDS) || defined(USE_TEXTURES_CONSTANTS)
 typedef texture<double, cudaTextureType1D, cudaReadModeElementType> realw_texture;
 #endif
diff --git a/src/gpu/mesh_constants_gpu.h b/src/gpu/mesh_constants_gpu.h
index b40aff45d..af126018d 100644
--- a/src/gpu/mesh_constants_gpu.h
+++ b/src/gpu/mesh_constants_gpu.h
@@ -374,6 +374,14 @@ typedef double realw;
 //#define CUDA_SHARED_ASYNC
 #endif
 
+#ifdef GPU_DEVICE_Hopper
+// specifics see: https://docs.nvidia.com/cuda/hopper-tuning-guide/index.html
+// register file size 64k 32-bit registers per SM
+// shared memory size 228KB per SM (maximum shared memory, 227KB per thread block)
+// maximum registers 255 per thread
+#undef USE_LAUNCH_BOUNDS
+#endif
+
 // CUDA Graphs
 #if defined (__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 10)
 // CUDA graphs: (experimental feature) requires compilation with CUDA toolkit versions >= 10.0
diff --git a/src/gpu/rules.mk b/src/gpu/rules.mk
index c9487c0f1..6a858f6db 100644
--- a/src/gpu/rules.mk
+++ b/src/gpu/rules.mk
@@ -153,7 +153,9 @@ ifeq ($(CUDA),yes)
   ifeq ($(CUDA11),yes)
 	  BUILD_VERSION_TXT += (v11)
   endif
-
+  ifeq ($(CUDA12),yes)
+	  BUILD_VERSION_TXT += (v12)
+  endif
 endif
 
 ifeq ($(GPU_CUDA_AND_OCL),yes)

From 61a8fcbf3ad6280ec836a690388b1beb7ee0dac5 Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.peter22@gmail.com>
Date: Tue, 30 May 2023 08:56:00 +0200
Subject: [PATCH 06/11] updates element size in regular point gridding

---
 src/specfem3D/locate_regular_points.f90 | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/specfem3D/locate_regular_points.f90 b/src/specfem3D/locate_regular_points.f90
index f4511b0eb..b77bbcfff 100644
--- a/src/specfem3D/locate_regular_points.f90
+++ b/src/specfem3D/locate_regular_points.f90
@@ -188,7 +188,8 @@ subroutine locate_regular_points(npoints_slice_reg,points_slice_reg,GRID, &
 
   use shared_parameters, only: R_PLANET
 
-  use specfem_par, only: myrank, NEX_XI
+  use specfem_par, only: myrank, &
+    NCHUNKS_VAL,NEX_XI_VAL,NEX_ETA_VAL,ANGULAR_WIDTH_XI_IN_DEGREES_VAL,ANGULAR_WIDTH_ETA_IN_DEGREES_VAL
 
   use specfem_par_crustmantle, only: kl_reg_grid_variables
 
@@ -222,13 +223,14 @@ subroutine locate_regular_points(npoints_slice_reg,points_slice_reg,GRID, &
   integer :: ispec_in, ispec, iter_loop, ia, ipoint
   double precision :: lat, lon, radius, th, ph, x,y,z
   double precision :: x_target, y_target, z_target
-  double precision :: distmin_squared,dist_squared,typical_size_squared
+  double precision :: distmin_squared,dist_squared
+  double precision :: typical_size_squared,element_size
   double precision :: xi,eta,gamma,dx,dy,dz,dxi,deta,dgamma
   double precision :: xix,xiy,xiz
   double precision :: etax,etay,etaz
   double precision :: gammax,gammay,gammaz
-
-  logical locate_target
+  double precision :: ANGULAR_WIDTH_XI_RAD,ANGULAR_WIDTH_ETA_RAD
+  logical :: locate_target
   double precision, dimension(NGNOD) :: xelm, yelm, zelm
 
   double precision, dimension(NGLLX) :: hxir
@@ -240,10 +242,19 @@ subroutine locate_regular_points(npoints_slice_reg,points_slice_reg,GRID, &
   call hex_nodes_anchor_ijk(anchor_iax,anchor_iay,anchor_iaz)
 
   ! compute typical size of elements at the surface
-  typical_size_squared = TWO_PI * R_UNIT_SPHERE / (4.0 * NEX_XI)
+  ! (normalized)
+  if (NCHUNKS_VAL == 6) then
+    ! estimation for global meshes (assuming 90-degree chunks)
+    element_size = TWO_PI * R_UNIT_SPHERE / (4.d0 * NEX_XI_VAL)
+  else
+    ! estimation for 1-chunk meshes
+    ANGULAR_WIDTH_XI_RAD = ANGULAR_WIDTH_XI_IN_DEGREES_VAL * DEGREES_TO_RADIANS
+    ANGULAR_WIDTH_ETA_RAD = ANGULAR_WIDTH_ETA_IN_DEGREES_VAL * DEGREES_TO_RADIANS
+    element_size = max( ANGULAR_WIDTH_XI_RAD/NEX_XI_VAL,ANGULAR_WIDTH_ETA_RAD/NEX_ETA_VAL ) * R_UNIT_SPHERE
+  endif
 
-  ! use 10 times the distance as a criterion for source detection
-  typical_size_squared = (10.0 * typical_size_squared)**2
+  ! use 10 times the distance as a criterion for point detections
+  typical_size_squared = (10.d0 * element_size)**2
 
   do ipoint = 1, npoints_slice_reg
     isp = points_slice_reg(ipoint)

From 43161bf2dabee72cb8b25d0f0eb2f2ba9f5c9337 Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.peter22@gmail.com>
Date: Tue, 30 May 2023 12:58:06 +0200
Subject: [PATCH 07/11] updates parameter usage

---
 src/create_header_file/create_header_file.f90 |  1 -
 src/meshfem3D/gravity_integrals.F90           |  7 ++++++-
 src/meshfem3D/meshfem3D_par.f90               | 13 ++++++++++++-
 src/meshfem3D/write_AVS_DX_output_adios.f90   |  2 +-
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/create_header_file/create_header_file.f90 b/src/create_header_file/create_header_file.f90
index 75188f9f8..9a3c42a34 100644
--- a/src/create_header_file/create_header_file.f90
+++ b/src/create_header_file/create_header_file.f90
@@ -31,7 +31,6 @@
   program xcreate_header_file
 
   use shared_parameters
-  use constants
 
   implicit none
 
diff --git a/src/meshfem3D/gravity_integrals.F90 b/src/meshfem3D/gravity_integrals.F90
index 28cb2980f..ab0bdb345 100644
--- a/src/meshfem3D/gravity_integrals.F90
+++ b/src/meshfem3D/gravity_integrals.F90
@@ -35,7 +35,11 @@ subroutine gravity_initialize_integrals()
 
 ! initializes integrals
 
-  use constants
+  use constants, only: myrank,IMAIN,ZERO,CUSTOM_REAL,SIZE_DOUBLE, &
+    GRAVITY_INTEGRALS,REUSE_EXISTING_OBSERVATION_SURF, &
+    ONLY_COMPUTE_CENTER_OF_MASS,COMPUTE_CRUST_CONTRIB_ONLY,SHIFT_TO_THIS_CENTER_OF_MASS, &
+    NX_OBSERVATION,NY_OBSERVATION, &
+    x_shift,y_shift,z_shift
 
   use meshfem_par, only: g_x,g_y,g_z,G_xx,G_yy,G_zz,G_xy,G_xz,G_yz
 
@@ -518,6 +522,7 @@ end subroutine gravity_compute_integrals
 
   subroutine finalize_gravity_integrals()
 
+  use constants, only: SI_UNITS_TO_EOTVOS,IXR,IYR,ICHUNKR,ONLY_COMPUTE_CENTER_OF_MASS
   use meshfem_par
   use meshfem_models_par
 
diff --git a/src/meshfem3D/meshfem3D_par.f90 b/src/meshfem3D/meshfem3D_par.f90
index 7a6b0fd84..1ffd0a0b1 100644
--- a/src/meshfem3D/meshfem3D_par.f90
+++ b/src/meshfem3D/meshfem3D_par.f90
@@ -110,7 +110,18 @@ module meshfem_par
 
 ! main parameter module for specfem simulations
 
-  use constants
+  use constants, only: myrank,CUSTOM_REAL,IMAIN,ISTANDARD_OUTPUT,IOUT,OUTPUT_FILES_BASE, &
+    ZERO,TINYVAL,GRAV,R_UNIT_SPHERE,SIZE_REAL, &
+    NDIM,NGLLX,NGLLY,NGLLZ, &
+    NX_OBSERVATION,NY_OBSERVATION,NCHUNKS_MAX, &
+    NUMFACES_SHARED,NUMCORNERS_SHARED, &
+    MAX_NUM_REGIONS,NB_SQUARE_CORNERS,NB_SQUARE_EDGES_ONEDIR, &
+    USE_MESH_COLORING_GPU,MAX_NUMBER_OF_COLORS, &
+    GRAVITY_INTEGRALS, &
+    SUPPRESS_CRUSTAL_MESH,SUPPRESS_MOHO_STRETCHING,SUPPRESS_INTERNAL_TOPOGRAPHY, &
+    IREGION_CRUST_MANTLE,IREGION_OUTER_CORE,IREGION_INNER_CORE,IFLAG_IN_FICTITIOUS_CUBE, &
+    THREE_D_MODEL_S362ANI,THREE_D_MODEL_S362WMANI,THREE_D_MODEL_S362ANI_PREM,THREE_D_MODEL_S29EA, &
+    THREE_D_MODEL_MANTLE_SH,THREE_D_MODEL_SPIRAL
 
   use shared_parameters
 
diff --git a/src/meshfem3D/write_AVS_DX_output_adios.f90 b/src/meshfem3D/write_AVS_DX_output_adios.f90
index 6f33dcf36..aad362424 100644
--- a/src/meshfem3D/write_AVS_DX_output_adios.f90
+++ b/src/meshfem3D/write_AVS_DX_output_adios.f90
@@ -35,7 +35,7 @@ subroutine write_AVS_DX_output_adios(npointot,iregion_code, &
     myrank,NGLLX,NGLLY,NGLLZ, &
     RICB,RCMB,RTOPDDOUBLEPRIME,R670,R220,R771,R400,R120,R80,RMOHO, &
     RMIDDLE_CRUST, &
-    LOCAL_PATH,IMAIN,ADIOS_TRANSPORT_METHOD
+    LOCAL_PATH,IMAIN
 
   use meshfem_models_par, only: &
     ELLIPTICITY,MODEL_3D_MANTLE_PERTUBATIONS, &

From ab38c310fe32b82b2f45522e38da9766a614ea54 Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.peter22@gmail.com>
Date: Wed, 31 May 2023 14:04:03 +0200
Subject: [PATCH 08/11] fixes compilation warnings

---
 src/meshfem3D/model_sh_mars.f90            | 1 +
 src/specfem3D/prepare_optimized_arrays.F90 | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/meshfem3D/model_sh_mars.f90 b/src/meshfem3D/model_sh_mars.f90
index f155b4b61..fab8b0afe 100644
--- a/src/meshfem3D/model_sh_mars.f90
+++ b/src/meshfem3D/model_sh_mars.f90
@@ -222,6 +222,7 @@ subroutine read_SH_mars_model()
 
       ! parameter type
       ! converts all string characters to lowercase (to make user input case-insensitive)
+      irange = iachar('a') - iachar('A')
       do i = 1,len_trim(substring)
         if (lge(substring(i:i),'A') .and. lle(substring(i:i),'Z')) then
           substring(i:i) = achar(iachar(substring(i:i)) + irange)
diff --git a/src/specfem3D/prepare_optimized_arrays.F90 b/src/specfem3D/prepare_optimized_arrays.F90
index 63e0e6538..de08bd23e 100644
--- a/src/specfem3D/prepare_optimized_arrays.F90
+++ b/src/specfem3D/prepare_optimized_arrays.F90
@@ -78,7 +78,7 @@ subroutine prepare_timerun_ibool_inv_tbl()
   ! local parameters
   integer :: iphase,ier
   integer :: num_elements
-  integer,dimension(1) :: idummy
+  integer,dimension(1) :: idummy = (/ 0 /)
 
   ! inverse arrays use 1D indexing for better compiler vectorization
   ! only used for Deville routines and FORCE_VECTORIZATION)

From 71f9313ad1316d357af78d3cfb7956c111bfaaa1 Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.peter22@gmail.com>
Date: Thu, 1 Jun 2023 09:17:00 +0200
Subject: [PATCH 09/11] avoids inlining mxm5_* routines in compute forces calls
 by Cray compilers (due to problems w/ -O3 optimization)

---
 .../compute_forces_crust_mantle_Dev.F90       |  24 +-
 .../compute_forces_inner_core_Dev.F90         |  36 +-
 .../compute_forces_outer_core_Dev.F90         | 498 ++++++++++--------
 3 files changed, 322 insertions(+), 236 deletions(-)

diff --git a/src/specfem3D/compute_forces_crust_mantle_Dev.F90 b/src/specfem3D/compute_forces_crust_mantle_Dev.F90
index 7a7f1c6f3..c0403cdb7 100644
--- a/src/specfem3D/compute_forces_crust_mantle_Dev.F90
+++ b/src/specfem3D/compute_forces_crust_mantle_Dev.F90
@@ -564,7 +564,11 @@ subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3)
 !DIR$ ATTRIBUTES FORCEINLINE :: mxm5_3comp_singleA
 #else
 ! cray
-!DIR$ INLINEALWAYS mxm5_3comp_singleA
+! note: with Cray Fortran versions >= 14 on Frontier, inlining this routine together with optimization -O3 leads to problems.
+!       for now, will avoid inlining by this directive INLINENEVER to allow for default compilation,
+!       otherwise the compilation flag -hipa0 would need to be added to suppress all inlining as well.
+!!DIR$ INLINEALWAYS mxm5_3comp_singleA
+!DIR$ INLINENEVER mxm5_3comp_singleA
 #endif
 
 ! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays
@@ -603,7 +607,9 @@ subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3)
   ! matrix-matrix multiplication
   do j = 1,n3
 !DIR$ IVDEP
+#if defined __INTEL_COMPILER
 !DIR$ SIMD
+#endif
     do i = 1,n1
       C1(i,j) =  A(i,1) * B1(1,j) &
                + A(i,2) * B1(2,j) &
@@ -637,7 +643,11 @@ subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3)
 !DIR$ ATTRIBUTES FORCEINLINE :: mxm5_3comp_singleB
 #else
 ! cray
-!DIR$ INLINEALWAYS mxm5_3comp_singleB
+! note: with Cray Fortran versions >= 14 on Frontier, inlining this routine together with optimization -O3 leads to problems.
+!       for now, will avoid inlining by this directive INLINENEVER to allow for default compilation,
+!       otherwise the compilation flag -hipa0 would need to be added to suppress all inlining as well.
+!!DIR$ INLINEALWAYS mxm5_3comp_singleB
+!DIR$ INLINENEVER mxm5_3comp_singleB
 #endif
 
 ! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays
@@ -676,7 +686,9 @@ subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3)
   ! matrix-matrix multiplication
   do j = 1,n3
 !DIR$ IVDEP
+#if defined __INTEL_COMPILER
 !DIR$ SIMD
+#endif
     do i = 1,n1
       C1(i,j) =  A1(i,1) * B(1,j) &
                + A1(i,2) * B(2,j) &
@@ -710,7 +722,11 @@ subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3)
 !DIR$ ATTRIBUTES FORCEINLINE :: mxm5_3comp_3dmat_singleB
 #else
 ! cray
-!DIR$ INLINEALWAYS mxm5_3comp_3dmat_singleB
+! note: with Cray Fortran versions >= 14 on Frontier, inlining this routine together with optimization -O3 leads to problems.
+!       for now, will avoid inlining by this directive INLINENEVER to allow for default compilation,
+!       otherwise the compilation flag -hipa0 would need to be added to suppress all inlining as well.
+!!DIR$ INLINEALWAYS mxm5_3comp_3dmat_singleB
+!DIR$ INLINENEVER mxm5_3comp_3dmat_singleB
 #endif
 
 ! 3 different arrays for x/y/z-components, 3-dimensional arrays (5,5,5), same B matrix for all 3 component arrays
@@ -774,7 +790,9 @@ subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3)
   do k = 1,n3
     do j = 1,n2
 !DIR$ IVDEP
+#if defined __INTEL_COMPILER
 !DIR$ SIMD
+#endif
       do i = 1,n1
         C1(i,j,k) =  A1(i,1,k) * B(1,j) &
                    + A1(i,2,k) * B(2,j) &
diff --git a/src/specfem3D/compute_forces_inner_core_Dev.F90 b/src/specfem3D/compute_forces_inner_core_Dev.F90
index b96992499..833ad2d9d 100644
--- a/src/specfem3D/compute_forces_inner_core_Dev.F90
+++ b/src/specfem3D/compute_forces_inner_core_Dev.F90
@@ -431,7 +431,11 @@ subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3)
 !DIR$ ATTRIBUTES FORCEINLINE :: mxm5_3comp_singleA
 #else
 ! cray
-!DIR$ INLINEALWAYS mxm5_3comp_singleA
+! note: with Cray Fortran versions >= 14 on Frontier, inlining this routine together with optimization -O3 leads to problems.
+!       for now, will avoid inlining by this directive INLINENEVER to allow for default compilation,
+!       otherwise the compilation flag -hipa0 would need to be added to suppress all inlining as well.
+!!DIR$ INLINEALWAYS mxm5_3comp_singleA
+!DIR$ INLINENEVER mxm5_3comp_singleA
 #endif
 
 ! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays
@@ -469,8 +473,10 @@ subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3)
 
   ! matrix-matrix multiplication
   do j = 1,n3
-!dir$ ivdep
-!dir$ SIMD
+!DIR$ IVDEP
+#if defined __INTEL_COMPILER
+!DIR$ SIMD
+#endif
     do i = 1,n1
       C1(i,j) =  A(i,1) * B1(1,j) &
                + A(i,2) * B1(2,j) &
@@ -504,7 +510,11 @@ subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3)
 !DIR$ ATTRIBUTES FORCEINLINE :: mxm5_3comp_singleB
 #else
 ! cray
-!DIR$ INLINEALWAYS mxm5_3comp_singleB
+! note: with Cray Fortran versions >= 14 on Frontier, inlining this routine together with optimization -O3 leads to problems.
+!       for now, will avoid inlining by this directive INLINENEVER to allow for default compilation,
+!       otherwise the compilation flag -hipa0 would need to be added to suppress all inlining as well.
+!!DIR$ INLINEALWAYS mxm5_3comp_singleB
+!DIR$ INLINENEVER mxm5_3comp_singleB
 #endif
 
 ! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays
@@ -542,8 +552,10 @@ subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3)
 
   ! matrix-matrix multiplication
   do j = 1,n3
-!dir$ ivdep
-!dir$ SIMD
+!DIR$ IVDEP
+#if defined __INTEL_COMPILER
+!DIR$ SIMD
+#endif
     do i = 1,n1
       C1(i,j) =  A1(i,1) * B(1,j) &
                + A1(i,2) * B(2,j) &
@@ -577,7 +589,11 @@ subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3)
 !DIR$ ATTRIBUTES FORCEINLINE :: mxm5_3comp_3dmat_singleB
 #else
 ! cray
-!DIR$ INLINEALWAYS mxm5_3comp_3dmat_singleB
+! note: with Cray Fortran versions >= 14 on Frontier, inlining this routine together with optimization -O3 leads to problems.
+!       for now, will avoid inlining by this directive INLINENEVER to allow for default compilation,
+!       otherwise the compilation flag -hipa0 would need to be added to suppress all inlining as well.
+!!DIR$ INLINEALWAYS mxm5_3comp_3dmat_singleB
+!DIR$ INLINENEVER mxm5_3comp_3dmat_singleB
 #endif
 
 ! 3 different arrays for x/y/z-components, 3-dimensional arrays (5,5,5), same B matrix for all 3 component arrays
@@ -640,8 +656,10 @@ subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3)
   ! matrix-matrix multiplication
   do k = 1,n3
     do j = 1,n2
-!dir$ ivdep
-!dir$ SIMD
+!DIR$ IVDEP
+#if defined __INTEL_COMPILER
+!DIR$ SIMD
+#endif
       do i = 1,n1
         C1(i,j,k) =  A1(i,1,k) * B(1,j) &
                    + A1(i,2,k) * B(2,j) &
diff --git a/src/specfem3D/compute_forces_outer_core_Dev.F90 b/src/specfem3D/compute_forces_outer_core_Dev.F90
index 709b11daa..216819a23 100644
--- a/src/specfem3D/compute_forces_outer_core_Dev.F90
+++ b/src/specfem3D/compute_forces_outer_core_Dev.F90
@@ -211,26 +211,32 @@ subroutine compute_forces_outer_core_Dev(timeval,deltat,two_omega_earth, &
     ! computes 1. matrix multiplication for temp1
     ! computes 2. matrix multiplication for temp2
     ! computes 3. matrix multiplication for temp3
-    select case (NGLLX)
-    case (5)
-      call mxm5_single(hprime_xx,m1,chi_elem,temp1,m2)
-      call mxm5_3dmat_single(chi_elem,m1,hprime_xxT,m1,temp2,NGLLX)
-      call mxm5_single(chi_elem,m2,hprime_xxT,temp3,m1)
-    case (6)
-      call mxm6_single(hprime_xx,m1,chi_elem,temp1,m2)
-      call mxm6_3dmat_single(chi_elem,m1,hprime_xxT,m1,temp2,NGLLX)
-      call mxm6_single(chi_elem,m2,hprime_xxT,temp3,m1)
-    case (7)
-      call mxm7_single(hprime_xx,m1,chi_elem,temp1,m2)
-      call mxm7_3dmat_single(chi_elem,m1,hprime_xxT,m1,temp2,NGLLX)
-      call mxm7_single(chi_elem,m2,hprime_xxT,temp3,m1)
-    case (8)
-      call mxm8_single(hprime_xx,m1,chi_elem,temp1,m2)
-      call mxm8_3dmat_single(chi_elem,m1,hprime_xxT,m1,temp2,NGLLX)
-      call mxm8_single(chi_elem,m2,hprime_xxT,temp3,m1)
-    end select
-
-
+    call mxm5_single(hprime_xx,m1,chi_elem,temp1,m2)
+    call mxm5_3dmat_single(chi_elem,m1,hprime_xxT,m1,temp2,NGLLX)
+    call mxm5_single(chi_elem,m2,hprime_xxT,temp3,m1)
+
+    ! note: this compute_forces_outer_core_Dev() routine is called for USE_DEVILLE_PRODUCTS_VAL == .true.
+    !       which is only the case for NGLLX == NGLLY == NGLLZ == 5
+    !
+    ! for more general cases one could do the following:
+    !select case (NGLLX)
+    !case (5)
+    !  call mxm5_single(hprime_xx,m1,chi_elem,temp1,m2)
+    !  call mxm5_3dmat_single(chi_elem,m1,hprime_xxT,m1,temp2,NGLLX)
+    !  call mxm5_single(chi_elem,m2,hprime_xxT,temp3,m1)
+    !case (6)
+    !  call mxm6_single(hprime_xx,m1,chi_elem,temp1,m2)
+    !  call mxm6_3dmat_single(chi_elem,m1,hprime_xxT,m1,temp2,NGLLX)
+    !  call mxm6_single(chi_elem,m2,hprime_xxT,temp3,m1)
+    !case (7)
+    !  call mxm7_single(hprime_xx,m1,chi_elem,temp1,m2)
+    !  call mxm7_3dmat_single(chi_elem,m1,hprime_xxT,m1,temp2,NGLLX)
+    !  call mxm7_single(chi_elem,m2,hprime_xxT,temp3,m1)
+    !case (8)
+    !  call mxm8_single(hprime_xx,m1,chi_elem,temp1,m2)
+    !  call mxm8_3dmat_single(chi_elem,m1,hprime_xxT,m1,temp2,NGLLX)
+    !  call mxm8_single(chi_elem,m2,hprime_xxT,temp3,m1)
+    !end select
 
     DO_LOOP_IJK
       ! get derivatives of potential with respect to x, y and z
@@ -391,24 +397,32 @@ subroutine compute_forces_outer_core_Dev(timeval,deltat,two_omega_earth, &
     ! computes 1. matrix multiplication for newtemp1
     ! computes 2. matrix multiplication for newtemp2
     ! computes 3. matrix multiplication for newtemp3
-    select case (NGLLX)
-    case (5)
-      call mxm5_single(hprimewgll_xxT,m1,temp1,newtemp1,m2)
-      call mxm5_3dmat_single(temp2,m1,hprimewgll_xx,m1,newtemp2,NGLLX)
-      call mxm5_single(temp3,m2,hprimewgll_xx,newtemp3,m1)
-    case (6)
-      call mxm6_single(hprimewgll_xxT,m1,temp1,newtemp1,m2)
-      call mxm6_3dmat_single(temp2,m1,hprimewgll_xx,m1,newtemp2,NGLLX)
-      call mxm6_single(temp3,m2,hprimewgll_xx,newtemp3,m1)
-    case (7)
-      call mxm7_single(hprimewgll_xxT,m1,temp1,newtemp1,m2)
-      call mxm7_3dmat_single(temp2,m1,hprimewgll_xx,m1,newtemp2,NGLLX)
-      call mxm7_single(temp3,m2,hprimewgll_xx,newtemp3,m1)
-    case (8)
-      call mxm8_single(hprimewgll_xxT,m1,temp1,newtemp1,m2)
-      call mxm8_3dmat_single(temp2,m1,hprimewgll_xx,m1,newtemp2,NGLLX)
-      call mxm8_single(temp3,m2,hprimewgll_xx,newtemp3,m1)
-    end select
+    call mxm5_single(hprimewgll_xxT,m1,temp1,newtemp1,m2)
+    call mxm5_3dmat_single(temp2,m1,hprimewgll_xx,m1,newtemp2,NGLLX)
+    call mxm5_single(temp3,m2,hprimewgll_xx,newtemp3,m1)
+
+    ! note: this compute_forces_outer_core_Dev() routine is called for USE_DEVILLE_PRODUCTS_VAL == .true.
+    !       which is only the case for NGLLX == NGLLY == NGLLZ == 5
+    !
+    ! for more general cases one could do the following:
+    !select case (NGLLX)
+    !case (5)
+    !  call mxm5_single(hprimewgll_xxT,m1,temp1,newtemp1,m2)
+    !  call mxm5_3dmat_single(temp2,m1,hprimewgll_xx,m1,newtemp2,NGLLX)
+    !  call mxm5_single(temp3,m2,hprimewgll_xx,newtemp3,m1)
+    !case (6)
+    !  call mxm6_single(hprimewgll_xxT,m1,temp1,newtemp1,m2)
+    !  call mxm6_3dmat_single(temp2,m1,hprimewgll_xx,m1,newtemp2,NGLLX)
+    !  call mxm6_single(temp3,m2,hprimewgll_xx,newtemp3,m1)
+    !case (7)
+    !  call mxm7_single(hprimewgll_xxT,m1,temp1,newtemp1,m2)
+    !  call mxm7_3dmat_single(temp2,m1,hprimewgll_xx,m1,newtemp2,NGLLX)
+    !  call mxm7_single(temp3,m2,hprimewgll_xx,newtemp3,m1)
+    !case (8)
+    !  call mxm8_single(hprimewgll_xxT,m1,temp1,newtemp1,m2)
+    !  call mxm8_3dmat_single(temp2,m1,hprimewgll_xx,m1,newtemp2,NGLLX)
+    !  call mxm8_single(temp3,m2,hprimewgll_xx,newtemp3,m1)
+    !end select
 
     ! sum contributions from each element to the global mesh and add gravity term
     DO_LOOP_IJK
@@ -500,6 +514,18 @@ subroutine compute_forces_outer_core_Dev(timeval,deltat,two_omega_earth, &
 
   subroutine mxm5_single(A,n1,B,C,n3)
 
+! we can force inlining (Intel compiler)
+#if defined __INTEL_COMPILER
+!DIR$ ATTRIBUTES FORCEINLINE :: mxm5_single
+#else
+! cray
+! note: with Cray Fortran versions >= 14 on Frontier, inlining this routine together with optimization -O3 leads to problems.
+!       for now, will avoid inlining by this directive INLINENEVER to allow for default compilation,
+!       otherwise the compilation flag -hipa0 would need to be added to suppress all inlining as well.
+!!DIR$ INLINEALWAYS mxm5_single
+!DIR$ INLINENEVER mxm5_single
+#endif
+
 ! 2-dimensional arrays (25,5)/(5,25)
 
   use constants_solver, only: CUSTOM_REAL
@@ -554,107 +580,125 @@ end subroutine mxm5_single
 
   !-------------
 
-  subroutine mxm6_single(A,n1,B,C,n3)
-
-! two-dimensional arrays (36,6)/(6,36)
-
-  use constants, only: CUSTOM_REAL
-
-  implicit none
-
-  integer,intent(in) :: n1,n3
-  real(kind=CUSTOM_REAL),dimension(n1,6),intent(in) :: A
-  real(kind=CUSTOM_REAL),dimension(6,n3),intent(in) :: B
-  real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C
-
-  ! local parameters
-  integer :: i,j
-
-  ! matrix-matrix multiplication
-  do j = 1,n3
-    do i = 1,n1
-      C(i,j) =  A(i,1) * B(1,j) &
-              + A(i,2) * B(2,j) &
-              + A(i,3) * B(3,j) &
-              + A(i,4) * B(4,j) &
-              + A(i,5) * B(5,j) &
-              + A(i,6) * B(6,j)
-    enddo
-  enddo
-
-  end subroutine mxm6_single
+! unused so far..
+!
+!  subroutine mxm6_single(A,n1,B,C,n3)
+!
+!! two-dimensional arrays (36,6)/(6,36)
+!
+!  use constants, only: CUSTOM_REAL
+!
+!  implicit none
+!
+!  integer,intent(in) :: n1,n3
+!  real(kind=CUSTOM_REAL),dimension(n1,6),intent(in) :: A
+!  real(kind=CUSTOM_REAL),dimension(6,n3),intent(in) :: B
+!  real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C
+!
+!  ! local parameters
+!  integer :: i,j
+!
+!  ! matrix-matrix multiplication
+!  do j = 1,n3
+!    do i = 1,n1
+!      C(i,j) =  A(i,1) * B(1,j) &
+!              + A(i,2) * B(2,j) &
+!              + A(i,3) * B(3,j) &
+!              + A(i,4) * B(4,j) &
+!              + A(i,5) * B(5,j) &
+!              + A(i,6) * B(6,j)
+!    enddo
+!  enddo
+!
+!  end subroutine mxm6_single
 
   !-------------
 
-  subroutine mxm7_single(A,n1,B,C,n3)
-
-! two-dimensional arrays (49,7)/(7,49)
-
-  use constants, only: CUSTOM_REAL
-
-  implicit none
-
-  integer,intent(in) :: n1,n3
-  real(kind=CUSTOM_REAL),dimension(n1,7),intent(in) :: A
-  real(kind=CUSTOM_REAL),dimension(7,n3),intent(in) :: B
-  real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C
-
-  ! local parameters
-  integer :: i,j
-
-  ! matrix-matrix multiplication
-  do j = 1,n3
-    do i = 1,n1
-      C(i,j) =  A(i,1) * B(1,j) &
-              + A(i,2) * B(2,j) &
-              + A(i,3) * B(3,j) &
-              + A(i,4) * B(4,j) &
-              + A(i,5) * B(5,j) &
-              + A(i,6) * B(6,j) &
-              + A(i,7) * B(7,j)
-    enddo
-  enddo
-
-  end subroutine mxm7_single
+! unused so far..
+!
+!  subroutine mxm7_single(A,n1,B,C,n3)
+!
+!! two-dimensional arrays (49,7)/(7,49)
+!
+!  use constants, only: CUSTOM_REAL
+!
+!  implicit none
+!
+!  integer,intent(in) :: n1,n3
+!  real(kind=CUSTOM_REAL),dimension(n1,7),intent(in) :: A
+!  real(kind=CUSTOM_REAL),dimension(7,n3),intent(in) :: B
+!  real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C
+!
+!  ! local parameters
+!  integer :: i,j
+!
+!  ! matrix-matrix multiplication
+!  do j = 1,n3
+!    do i = 1,n1
+!      C(i,j) =  A(i,1) * B(1,j) &
+!              + A(i,2) * B(2,j) &
+!              + A(i,3) * B(3,j) &
+!              + A(i,4) * B(4,j) &
+!              + A(i,5) * B(5,j) &
+!              + A(i,6) * B(6,j) &
+!              + A(i,7) * B(7,j)
+!    enddo
+!  enddo
+!
+!  end subroutine mxm7_single
 
   !-------------
 
-  subroutine mxm8_single(A,n1,B,C,n3)
-
-! two-dimensional arrays (64,8)/(8,64)
-
-  use constants, only: CUSTOM_REAL
-
-  implicit none
-
-  integer,intent(in) :: n1,n3
-  real(kind=CUSTOM_REAL),dimension(n1,8),intent(in) :: A
-  real(kind=CUSTOM_REAL),dimension(8,n3),intent(in) :: B
-  real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C
-
-  ! local parameters
-  integer :: i,j
-
-  ! matrix-matrix multiplication
-  do j = 1,n3
-    do i = 1,n1
-      C(i,j) =  A(i,1) * B(1,j) &
-              + A(i,2) * B(2,j) &
-              + A(i,3) * B(3,j) &
-              + A(i,4) * B(4,j) &
-              + A(i,5) * B(5,j) &
-              + A(i,6) * B(6,j) &
-              + A(i,7) * B(7,j) &
-              + A(i,8) * B(8,j)
-    enddo
-  enddo
-
-  end subroutine mxm8_single
+! unused so far..
+!
+!  subroutine mxm8_single(A,n1,B,C,n3)
+!
+!! two-dimensional arrays (64,8)/(8,64)
+!
+!  use constants, only: CUSTOM_REAL
+!
+!  implicit none
+!
+!  integer,intent(in) :: n1,n3
+!  real(kind=CUSTOM_REAL),dimension(n1,8),intent(in) :: A
+!  real(kind=CUSTOM_REAL),dimension(8,n3),intent(in) :: B
+!  real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C
+!
+!  ! local parameters
+!  integer :: i,j
+!
+!  ! matrix-matrix multiplication
+!  do j = 1,n3
+!    do i = 1,n1
+!      C(i,j) =  A(i,1) * B(1,j) &
+!              + A(i,2) * B(2,j) &
+!              + A(i,3) * B(3,j) &
+!              + A(i,4) * B(4,j) &
+!              + A(i,5) * B(5,j) &
+!              + A(i,6) * B(6,j) &
+!              + A(i,7) * B(7,j) &
+!              + A(i,8) * B(8,j)
+!    enddo
+!  enddo
+!
+!  end subroutine mxm8_single
 
 !--------------------------------------------------------------------------------------------
 
   subroutine mxm5_3dmat_single(A,n1,B,n2,C,n3)
 
+! we can force inlining (Intel compiler)
+#if defined __INTEL_COMPILER
+!DIR$ ATTRIBUTES FORCEINLINE :: mxm5_3dmat_single
+#else
+! cray
+! note: with Cray Fortran versions >= 14 on Frontier, inlining this routine together with optimization -O3 leads to problems.
+!       for now, will avoid inlining by this directive INLINENEVER to allow for default compilation,
+!       otherwise the compilation flag -hipa0 would need to be added to suppress all inlining as well.
+!!DIR$ INLINEALWAYS mxm5_3dmat_single
+!DIR$ INLINENEVER mxm5_3dmat_single
+#endif
+
 ! 3-dimensional arrays (5,5,5) for A and C
 
   use constants_solver, only: CUSTOM_REAL
@@ -714,108 +758,114 @@ end subroutine mxm5_3dmat_single
 
   !-------------
 
-  subroutine mxm6_3dmat_single(A,n1,B,n2,C,n3)
-
-! three-dimensional arrays (6,6,6) for A and C
-
-  use constants, only: CUSTOM_REAL
-
-  implicit none
-
-  integer,intent(in) :: n1,n2,n3
-  real(kind=CUSTOM_REAL),dimension(n1,6,n3),intent(in) :: A
-  real(kind=CUSTOM_REAL),dimension(6,n2),intent(in) :: B
-  real(kind=CUSTOM_REAL),dimension(n1,n2,n3),intent(out) :: C
-
-  ! local parameters
-  integer :: i,j,k
-
-  ! matrix-matrix multiplication
-  do k = 1,n3
-    do j = 1,n2
-      do i = 1,n1
-        C(i,j,k) =  A(i,1,k) * B(1,j) &
-                  + A(i,2,k) * B(2,j) &
-                  + A(i,3,k) * B(3,j) &
-                  + A(i,4,k) * B(4,j) &
-                  + A(i,5,k) * B(5,j) &
-                  + A(i,6,k) * B(6,j)
-      enddo
-    enddo
-  enddo
-
-  end subroutine mxm6_3dmat_single
+! unused so far..
+!
+!  subroutine mxm6_3dmat_single(A,n1,B,n2,C,n3)
+!
+!! three-dimensional arrays (6,6,6) for A and C
+!
+!  use constants, only: CUSTOM_REAL
+!
+!  implicit none
+!
+!  integer,intent(in) :: n1,n2,n3
+!  real(kind=CUSTOM_REAL),dimension(n1,6,n3),intent(in) :: A
+!  real(kind=CUSTOM_REAL),dimension(6,n2),intent(in) :: B
+!  real(kind=CUSTOM_REAL),dimension(n1,n2,n3),intent(out) :: C
+!
+!  ! local parameters
+!  integer :: i,j,k
+!
+!  ! matrix-matrix multiplication
+!  do k = 1,n3
+!    do j = 1,n2
+!      do i = 1,n1
+!        C(i,j,k) =  A(i,1,k) * B(1,j) &
+!                  + A(i,2,k) * B(2,j) &
+!                  + A(i,3,k) * B(3,j) &
+!                  + A(i,4,k) * B(4,j) &
+!                  + A(i,5,k) * B(5,j) &
+!                  + A(i,6,k) * B(6,j)
+!      enddo
+!    enddo
+!  enddo
+!
+!  end subroutine mxm6_3dmat_single
 
   !-------------
 
-  subroutine mxm7_3dmat_single(A,n1,B,n2,C,n3)
-
-! three-dimensional arrays (7,7,7) for A and C
-
-  use constants, only: CUSTOM_REAL
-
-  implicit none
-
-  integer,intent(in) :: n1,n2,n3
-  real(kind=CUSTOM_REAL),dimension(n1,7,n3),intent(in) :: A
-  real(kind=CUSTOM_REAL),dimension(7,n2),intent(in) :: B
-  real(kind=CUSTOM_REAL),dimension(n1,n2,n3),intent(out) :: C
-
-  ! local parameters
-  integer :: i,j,k
-
-  ! matrix-matrix multiplication
-  do k = 1,n3
-    do j = 1,n2
-      do i = 1,n1
-        C(i,j,k) =  A(i,1,k) * B(1,j) &
-                  + A(i,2,k) * B(2,j) &
-                  + A(i,3,k) * B(3,j) &
-                  + A(i,4,k) * B(4,j) &
-                  + A(i,5,k) * B(5,j) &
-                  + A(i,6,k) * B(6,j) &
-                  + A(i,7,k) * B(7,j)
-      enddo
-    enddo
-  enddo
-
-  end subroutine mxm7_3dmat_single
+! unused so far..
+!
+!  subroutine mxm7_3dmat_single(A,n1,B,n2,C,n3)
+!
+!! three-dimensional arrays (7,7,7) for A and C
+!
+!  use constants, only: CUSTOM_REAL
+!
+!  implicit none
+!
+!  integer,intent(in) :: n1,n2,n3
+!  real(kind=CUSTOM_REAL),dimension(n1,7,n3),intent(in) :: A
+!  real(kind=CUSTOM_REAL),dimension(7,n2),intent(in) :: B
+!  real(kind=CUSTOM_REAL),dimension(n1,n2,n3),intent(out) :: C
+!
+!  ! local parameters
+!  integer :: i,j,k
+!
+!  ! matrix-matrix multiplication
+!  do k = 1,n3
+!    do j = 1,n2
+!      do i = 1,n1
+!        C(i,j,k) =  A(i,1,k) * B(1,j) &
+!                  + A(i,2,k) * B(2,j) &
+!                  + A(i,3,k) * B(3,j) &
+!                  + A(i,4,k) * B(4,j) &
+!                  + A(i,5,k) * B(5,j) &
+!                  + A(i,6,k) * B(6,j) &
+!                  + A(i,7,k) * B(7,j)
+!      enddo
+!    enddo
+!  enddo
+!
+!  end subroutine mxm7_3dmat_single
 
   !-------------
 
-  subroutine mxm8_3dmat_single(A,n1,B,n2,C,n3)
-
-! three-dimensional arrays (8,8,8) for A and C
-
-  use constants, only: CUSTOM_REAL
-
-  implicit none
-
-  integer,intent(in) :: n1,n2,n3
-  real(kind=CUSTOM_REAL),dimension(n1,8,n3),intent(in) :: A
-  real(kind=CUSTOM_REAL),dimension(8,n2),intent(in) :: B
-  real(kind=CUSTOM_REAL),dimension(n1,n2,n3),intent(out) :: C
-
-  ! local parameters
-  integer :: i,j,k
-
-  ! matrix-matrix multiplication
-  do k = 1,n3
-    do j = 1,n2
-      do i = 1,n1
-        C(i,j,k) =  A(i,1,k) * B(1,j) &
-                  + A(i,2,k) * B(2,j) &
-                  + A(i,3,k) * B(3,j) &
-                  + A(i,4,k) * B(4,j) &
-                  + A(i,5,k) * B(5,j) &
-                  + A(i,6,k) * B(6,j) &
-                  + A(i,7,k) * B(7,j) &
-                  + A(i,8,k) * B(8,j)
-      enddo
-    enddo
-  enddo
-
-  end subroutine mxm8_3dmat_single
+! unused so far..
+!
+!  subroutine mxm8_3dmat_single(A,n1,B,n2,C,n3)
+!
+!! three-dimensional arrays (8,8,8) for A and C
+!
+!  use constants, only: CUSTOM_REAL
+!
+!  implicit none
+!
+!  integer,intent(in) :: n1,n2,n3
+!  real(kind=CUSTOM_REAL),dimension(n1,8,n3),intent(in) :: A
+!  real(kind=CUSTOM_REAL),dimension(8,n2),intent(in) :: B
+!  real(kind=CUSTOM_REAL),dimension(n1,n2,n3),intent(out) :: C
+!
+!  ! local parameters
+!  integer :: i,j,k
+!
+!  ! matrix-matrix multiplication
+!  do k = 1,n3
+!    do j = 1,n2
+!      do i = 1,n1
+!        C(i,j,k) =  A(i,1,k) * B(1,j) &
+!                  + A(i,2,k) * B(2,j) &
+!                  + A(i,3,k) * B(3,j) &
+!                  + A(i,4,k) * B(4,j) &
+!                  + A(i,5,k) * B(5,j) &
+!                  + A(i,6,k) * B(6,j) &
+!                  + A(i,7,k) * B(7,j) &
+!                  + A(i,8,k) * B(8,j)
+!      enddo
+!    enddo
+!  enddo
+!
+!  end subroutine mxm8_3dmat_single
 
   end subroutine compute_forces_outer_core_Dev
 

From 7317e3d1a08bbc828d5f2eb7147fd0c6942575d4 Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.peter22@gmail.com>
Date: Fri, 2 Jun 2023 16:59:57 +0200
Subject: [PATCH 10/11] updates initializations

---
 src/meshfem3D/create_MPI_interfaces.f90       | 133 +++++++++++++-----
 src/meshfem3D/create_central_cube_buffers.f90 |   2 +-
 src/meshfem3D/create_regions_mesh.F90         |   2 +-
 src/meshfem3D/fix_non_blocking_flags.f90      |  23 +--
 4 files changed, 113 insertions(+), 47 deletions(-)

diff --git a/src/meshfem3D/create_MPI_interfaces.f90 b/src/meshfem3D/create_MPI_interfaces.f90
index 6de621472..ccefcf45b 100644
--- a/src/meshfem3D/create_MPI_interfaces.f90
+++ b/src/meshfem3D/create_MPI_interfaces.f90
@@ -45,11 +45,14 @@ end subroutine create_MPI_interfaces
 
   subroutine cmi_allocate_addressing(iregion_code)
 
-  use meshfem_par, only: myrank,ibool, &
+  use constants, only: CUSTOM_REAL,NUMCORNERS_SHARED,myrank
+
+  use meshfem_par, only: ibool, &
     NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX, &
     NSPEC2DMAX_XMIN_XMAX,NSPEC2DMAX_YMIN_YMAX, &
-    NSPEC2D_BOTTOM,NSPEC2D_TOP,NSPEC_REGIONS,NGLOB_REGIONS, &
-    NGLOB1D_RADIAL,NUMCORNERS_SHARED
+    NSPEC2D_BOTTOM,NSPEC2D_TOP, &
+    NSPEC_REGIONS,NGLOB_REGIONS, &
+    NGLOB1D_RADIAL
 
   use MPI_interfaces_par
 
@@ -150,38 +153,81 @@ subroutine cmi_allocate_addressing(iregion_code)
   allocate(buffer_send_chunkcorn_scalar(NGLOB1D_RADIAL_CM), &
            buffer_recv_chunkcorn_scalar(NGLOB1D_RADIAL_CM),stat=ier)
   if (ier /= 0) stop 'Error allocating buffer buffer_send_chunkcorn_scalar,.. arrays'
+  buffer_send_chunkcorn_scalar(:) = 0.0_CUSTOM_REAL
+  buffer_recv_chunkcorn_scalar(:) = 0.0_CUSTOM_REAL
 
   allocate(buffer_send_chunkcorn_vector(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC), &
            buffer_recv_chunkcorn_vector(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC),stat=ier)
   if (ier /= 0) stop 'Error allocating buffer buffer_send_chunkcorn_vector,.. arrays'
+  buffer_send_chunkcorn_vector(:,:) = 0.0_CUSTOM_REAL
+  buffer_recv_chunkcorn_vector(:,:) = 0.0_CUSTOM_REAL
 
   select case (iregion_code)
   case (IREGION_CRUST_MANTLE)
     ! crust mantle
-    allocate(iboolcorner_crust_mantle(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED))
+    allocate(iboolcorner_crust_mantle(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED),stat=ier)
+    if (ier /= 0) stop 'Error allocating iboolcorner_crust_mantle array'
+    iboolcorner_crust_mantle(:,:) = 0
+
     allocate(iboolleft_xi_crust_mantle(NGLOB2DMAX_XMIN_XMAX_CM), &
-             iboolright_xi_crust_mantle(NGLOB2DMAX_XMIN_XMAX_CM))
+             iboolright_xi_crust_mantle(NGLOB2DMAX_XMIN_XMAX_CM),stat=ier)
+    if (ier /= 0) stop 'Error allocating iboolcorner_crust_mantle array'
+    iboolleft_xi_crust_mantle(:) = 0
+    iboolright_xi_crust_mantle(:) = 0
+
     allocate(iboolleft_eta_crust_mantle(NGLOB2DMAX_YMIN_YMAX_CM), &
-             iboolright_eta_crust_mantle(NGLOB2DMAX_YMIN_YMAX_CM))
-    allocate(iboolfaces_crust_mantle(NGLOB2DMAX_XY,NUMFACES_SHARED))
+             iboolright_eta_crust_mantle(NGLOB2DMAX_YMIN_YMAX_CM),stat=ier)
+    if (ier /= 0) stop 'Error allocating iboolcorner_crust_mantle array'
+    iboolleft_eta_crust_mantle(:) = 0
+    iboolright_eta_crust_mantle(:) = 0
+
+    allocate(iboolfaces_crust_mantle(NGLOB2DMAX_XY,NUMFACES_SHARED),stat=ier)
+    if (ier /= 0) stop 'Error allocating iboolcorner_crust_mantle array'
+    iboolfaces_crust_mantle(:,:) = 0
 
   case (IREGION_OUTER_CORE)
     ! outer core
-    allocate(iboolcorner_outer_core(NGLOB1D_RADIAL_OC,NUMCORNERS_SHARED))
+    allocate(iboolcorner_outer_core(NGLOB1D_RADIAL_OC,NUMCORNERS_SHARED),stat=ier)
+    if (ier /= 0) stop 'Error allocating iboolcorner_crust_mantle array'
+    iboolcorner_outer_core(:,:) = 0
+
     allocate(iboolleft_xi_outer_core(NGLOB2DMAX_XMIN_XMAX_OC), &
-             iboolright_xi_outer_core(NGLOB2DMAX_XMIN_XMAX_OC))
+             iboolright_xi_outer_core(NGLOB2DMAX_XMIN_XMAX_OC),stat=ier)
+    if (ier /= 0) stop 'Error allocating iboolcorner_crust_mantle array'
+    iboolleft_xi_outer_core(:) = 0
+    iboolright_xi_outer_core(:) = 0
+
     allocate(iboolleft_eta_outer_core(NGLOB2DMAX_YMIN_YMAX_OC), &
-             iboolright_eta_outer_core(NGLOB2DMAX_YMIN_YMAX_OC))
-    allocate(iboolfaces_outer_core(NGLOB2DMAX_XY,NUMFACES_SHARED))
+             iboolright_eta_outer_core(NGLOB2DMAX_YMIN_YMAX_OC),stat=ier)
+    if (ier /= 0) stop 'Error allocating iboolcorner_crust_mantle array'
+    iboolleft_eta_outer_core(:) = 0
+    iboolright_eta_outer_core(:) = 0
+
+    allocate(iboolfaces_outer_core(NGLOB2DMAX_XY,NUMFACES_SHARED),stat=ier)
+    if (ier /= 0) stop 'Error allocating iboolcorner_crust_mantle array'
+    iboolfaces_outer_core(:,:) = 0
 
   case (IREGION_INNER_CORE)
     ! inner core
-    allocate(iboolcorner_inner_core(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED))
+    allocate(iboolcorner_inner_core(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED),stat=ier)
+    if (ier /= 0) stop 'Error allocating iboolcorner_crust_mantle array'
+    iboolcorner_inner_core(:,:) = 0
+
     allocate(iboolleft_xi_inner_core(NGLOB2DMAX_XMIN_XMAX_IC), &
-             iboolright_xi_inner_core(NGLOB2DMAX_XMIN_XMAX_IC))
+             iboolright_xi_inner_core(NGLOB2DMAX_XMIN_XMAX_IC),stat=ier)
+    if (ier /= 0) stop 'Error allocating iboolcorner_crust_mantle array'
+    iboolleft_xi_inner_core(:) = 0
+    iboolright_xi_inner_core(:) = 0
+
     allocate(iboolleft_eta_inner_core(NGLOB2DMAX_YMIN_YMAX_IC), &
-             iboolright_eta_inner_core(NGLOB2DMAX_YMIN_YMAX_IC))
-    allocate(iboolfaces_inner_core(NGLOB2DMAX_XY,NUMFACES_SHARED))
+             iboolright_eta_inner_core(NGLOB2DMAX_YMIN_YMAX_IC),stat=ier)
+    if (ier /= 0) stop 'Error allocating iboolcorner_crust_mantle array'
+    iboolleft_eta_inner_core(:) = 0
+    iboolright_eta_inner_core(:) = 0
+
+    allocate(iboolfaces_inner_core(NGLOB2DMAX_XY,NUMFACES_SHARED),stat=ier)
+    if (ier /= 0) stop 'Error allocating iboolcorner_crust_mantle array'
+    iboolfaces_inner_core(:,:) = 0
 
   end select
 
@@ -343,14 +389,14 @@ subroutine cmi_get_buffers(iregion_code)
         call flush_IMAIN()
       endif
       call cmi_read_buffer_data(IREGION_INNER_CORE, &
-                              NGLOB2DMAX_XMIN_XMAX(IREGION_INNER_CORE), &
-                              NGLOB2DMAX_YMIN_YMAX(IREGION_INNER_CORE), &
-                              NGLOB1D_RADIAL(IREGION_INNER_CORE), &
-                              iboolleft_xi_inner_core,iboolright_xi_inner_core, &
-                              iboolleft_eta_inner_core,iboolright_eta_inner_core, &
-                              npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
-                              iboolfaces_inner_core,npoin2D_faces_inner_core, &
-                              iboolcorner_inner_core)
+                               NGLOB2DMAX_XMIN_XMAX(IREGION_INNER_CORE), &
+                               NGLOB2DMAX_YMIN_YMAX(IREGION_INNER_CORE), &
+                               NGLOB1D_RADIAL(IREGION_INNER_CORE), &
+                               iboolleft_xi_inner_core,iboolright_xi_inner_core, &
+                               iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+                               npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+                               iboolfaces_inner_core,npoin2D_faces_inner_core, &
+                               iboolcorner_inner_core)
 
       ! central cube buffers
       if (INCLUDE_CENTRAL_CUBE) then
@@ -364,12 +410,11 @@ subroutine cmi_get_buffers(iregion_code)
 
         ! allocates boundary indexing arrays for central cube
         allocate(ibelm_xmin_inner_core(NSPEC2DMAX_XMIN_XMAX_IC), &
-                ibelm_xmax_inner_core(NSPEC2DMAX_XMIN_XMAX_IC), &
-                ibelm_ymin_inner_core(NSPEC2DMAX_YMIN_YMAX_IC), &
-                ibelm_ymax_inner_core(NSPEC2DMAX_YMIN_YMAX_IC), &
-                ibelm_top_inner_core(NSPEC2D_TOP_IC), &
-                ibelm_bottom_inner_core(NSPEC2D_BOTTOM_IC), &
-                stat=ier)
+                 ibelm_xmax_inner_core(NSPEC2DMAX_XMIN_XMAX_IC), &
+                 ibelm_ymin_inner_core(NSPEC2DMAX_YMIN_YMAX_IC), &
+                 ibelm_ymax_inner_core(NSPEC2DMAX_YMIN_YMAX_IC), &
+                 ibelm_top_inner_core(NSPEC2D_TOP_IC), &
+                 ibelm_bottom_inner_core(NSPEC2D_BOTTOM_IC),stat=ier)
         if (ier /= 0 ) call exit_MPI(myrank,'Error allocating central cube index arrays')
 
         ! gets coupling arrays for inner core
@@ -387,8 +432,8 @@ subroutine cmi_get_buffers(iregion_code)
 
         ! compute number of messages to expect in cube as well as their size
         call comp_central_cube_buffer_size(iproc_xi,iproc_eta,ichunk, &
-                    NPROC_XI,NPROC_ETA,NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
-                    nb_msgs_theor_in_cube,npoin2D_cube_from_slices)
+                                           NPROC_XI,NPROC_ETA,NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
+                                           nb_msgs_theor_in_cube,npoin2D_cube_from_slices)
 
         ! this value is used for dynamic memory allocation, therefore make sure it is never zero
         if (nb_msgs_theor_in_cube > 0) then
@@ -396,6 +441,12 @@ subroutine cmi_get_buffers(iregion_code)
         else
           non_zero_nb_msgs_theor_in_cube = 1
         endif
+        if (myrank == 0) then
+          write(IMAIN,*) '  number of messages in cube : ',nb_msgs_theor_in_cube
+          write(IMAIN,*) '  number of 2D points in cube: ',npoin2D_cube_from_slices
+          call flush_IMAIN()
+        endif
+        call synchronize_all()
 
         ! allocate buffers for cube and slices
         allocate(sender_from_slices_to_cube(non_zero_nb_msgs_theor_in_cube), &
@@ -404,6 +455,11 @@ subroutine cmi_get_buffers(iregion_code)
                  buffer_slices2(npoin2D_cube_from_slices,NDIM), &
                  ibool_central_cube(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices),stat=ier)
         if (ier /= 0 ) call exit_MPI(myrank,'Error allocating cube buffers')
+        sender_from_slices_to_cube(:) = -1
+        ibool_central_cube(:,:) = -1
+        buffer_slices(:,:) = 0.d0
+        buffer_slices2(:,:) = 0.d0
+        buffer_all_cube_from_slices(:,:,:) = 0.d0
 
         ! handles the communications with the central cube if it was included in the mesh
         ! create buffers to assemble with the central cube
@@ -422,7 +478,12 @@ subroutine cmi_get_buffers(iregion_code)
                                          receiver_cube_from_slices,sender_from_slices_to_cube,ibool_central_cube, &
                                          buffer_slices,buffer_slices2,buffer_all_cube_from_slices)
 
-        if (myrank == 0) write(IMAIN,*)
+        if (myrank == 0) then
+          write(IMAIN,*) '  creating central cube done'
+          write(IMAIN,*)
+          call flush_IMAIN()
+        endif
+        call synchronize_all()
 
         ! frees memory
         deallocate(ibelm_xmin_inner_core,ibelm_xmax_inner_core)
@@ -459,10 +520,10 @@ subroutine cmi_get_buffers(iregion_code)
       if (INCLUDE_CENTRAL_CUBE) then
         ! updates flags for elements on slice boundaries
         call fix_non_blocking_central_cube(is_on_a_slice_edge, &
-             ibool,NSPEC_INNER_CORE,NGLOB_INNER_CORE,nb_msgs_theor_in_cube,ibelm_bottom_inner_core, &
-             idoubling,npoin2D_cube_from_slices, &
-             ibool_central_cube,NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
-             ichunk,NPROC_XI)
+                                           ibool,NSPEC_INNER_CORE,NGLOB_INNER_CORE,nb_msgs_theor_in_cube,ibelm_bottom_inner_core, &
+                                           idoubling,npoin2D_cube_from_slices, &
+                                           ibool_central_cube,NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
+                                           ichunk,NPROC_XI)
       endif
 
       ! debug: saves element flags
diff --git a/src/meshfem3D/create_central_cube_buffers.f90 b/src/meshfem3D/create_central_cube_buffers.f90
index f9a90fbc2..29f83bf12 100644
--- a/src/meshfem3D/create_central_cube_buffers.f90
+++ b/src/meshfem3D/create_central_cube_buffers.f90
@@ -553,7 +553,7 @@ end subroutine create_central_cube_buffers
 !
 
   subroutine comp_central_cube_buffer_size(iproc_xi,iproc_eta,ichunk,NPROC_XI,NPROC_ETA,NSPEC2D_BOTTOM_INNER_CORE, &
-                nb_msgs_theor_in_cube,npoin2D_cube_from_slices)
+                                           nb_msgs_theor_in_cube,npoin2D_cube_from_slices)
 
 !--- compute number of messages to expect in cube as well as their size
 !--- take into account vertical sides and bottom side
diff --git a/src/meshfem3D/create_regions_mesh.F90 b/src/meshfem3D/create_regions_mesh.F90
index 903d1089d..6fcda0876 100644
--- a/src/meshfem3D/create_regions_mesh.F90
+++ b/src/meshfem3D/create_regions_mesh.F90
@@ -1126,7 +1126,7 @@ subroutine crm_setup_indexing(npointot)
   if (npointot > 0) then
     if (myrank == 0) then
       write(IMAIN,*) '    total number of points            : ',npointot
-      write(IMAIN,*) '    array memory required per process : ',dble(npointot) * dble(8) / 1024.d0 / 1024.d0,'MB'
+      write(IMAIN,*) '    array memory required per process : ',3.d0 * dble(npointot) * dble(8) / 1024.d0 / 1024.d0,'MB'
       call flush_IMAIN()
     endif
 
diff --git a/src/meshfem3D/fix_non_blocking_flags.f90 b/src/meshfem3D/fix_non_blocking_flags.f90
index bc63ffb27..c33e8a2f3 100644
--- a/src/meshfem3D/fix_non_blocking_flags.f90
+++ b/src/meshfem3D/fix_non_blocking_flags.f90
@@ -114,22 +114,22 @@ subroutine fix_non_blocking_central_cube(is_on_a_slice_edge, &
 
   implicit none
 
-  integer :: nspec,nglob,nb_msgs_theor_in_cube,NSPEC2D_BOTTOM_INNER_CORE
-  integer :: ichunk,npoin2D_cube_from_slices,NPROC_XI
+  integer,intent(in) :: nspec,nglob,nb_msgs_theor_in_cube,NSPEC2D_BOTTOM_INNER_CORE
+  integer,intent(in) :: ichunk,npoin2D_cube_from_slices,NPROC_XI
 
-  logical, dimension(nspec) :: is_on_a_slice_edge
+  logical, dimension(nspec),intent(inout) :: is_on_a_slice_edge
 
-  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: ibool
 
-  integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices) :: ibool_central_cube
+  integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices),intent(in) :: ibool_central_cube
 
-  integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
+  integer, dimension(NSPEC2D_BOTTOM_INNER_CORE),intent(in) :: ibelm_bottom_inner_core
 
-  integer, dimension(nspec) :: idoubling_inner_core
+  integer, dimension(nspec),intent(in) :: idoubling_inner_core
 
   ! local parameters
-  logical, dimension(nglob) :: mask_ibool
-  integer :: ipoin,ispec,i,j,k,imsg,ispec2D
+  logical, dimension(:),allocatable :: mask_ibool
+  integer :: ipoin,ispec,i,j,k,imsg,ispec2D,ier
 
   if (ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
     do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
@@ -149,6 +149,8 @@ subroutine fix_non_blocking_central_cube(is_on_a_slice_edge, &
   if (ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
 
     ! clean the mask
+    allocate(mask_ibool(nglob),stat=ier)
+    if (ier /= 0) stop 'Error allocating mask_ibool array'
     mask_ibool(:) = .false.
 
     do imsg = 1,nb_msgs_theor_in_cube
@@ -182,6 +184,9 @@ subroutine fix_non_blocking_central_cube(is_on_a_slice_edge, &
     888 continue
     enddo
 
+    ! free array
+    deallocate(mask_ibool)
+
   endif
 
   end subroutine fix_non_blocking_central_cube

From 93d15600a4052fcf5381083d27485adb033e38ad Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.peter22@gmail.com>
Date: Fri, 2 Jun 2023 18:02:20 +0200
Subject: [PATCH 11/11] updates test compilation

---
 tests/meshfem3D/test_models.f90 | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/meshfem3D/test_models.f90 b/tests/meshfem3D/test_models.f90
index 82289db90..3ab898fa7 100644
--- a/tests/meshfem3D/test_models.f90
+++ b/tests/meshfem3D/test_models.f90
@@ -1,5 +1,6 @@
 program test_models
 
+  use constants, only: DEGREES_TO_RADIANS,MAX_STRING_LEN,myrank
   use meshfem_par
   use manager_adios