cms-sw · cmsbuild · Nov 10, 2023 · Oct 17, 2023 · Oct 17, 2023 · Nov 5, 2023
diff --git a/HeterogeneousCore/AlpakaCore/README.md b/HeterogeneousCore/AlpakaCore/README.md
@@ -443,3 +443,30 @@ process.ProcessAcceleratorAlpaka.setBackend("serial_sync") # or "cuda_async" or
 process.options.accelerators = ["cpu"] # or "gpu-nvidia" or "gpu-amd"
 ```
 
+
+## Unit tests
+
+Unit tests that depend on Alpaka and define `<flags ALPAKA_BACKENDS="1"/>`, e.g. as a binary along
+```xml
+<bin name="<unique test binary name>" file="<comma-separated list of files">
+  <use name="alpaka"/>
+  <flags ALPAKA_BACKENDS="1"/>
+</bin>
+```
+or as a command (e.g. `cmsRun` or a shell script) to run
+
+```xml
+<test name="<unique name of the test>" command="<command to run>">
+  <use name="alpaka"/>
+  <flags ALPAKA_BACKENDS="1"/>
+</test>
+```
+
+will be run as part of `scram build runtests` according to the
+availability of the hardware:
+- `serial_sync` version is run always
+- `cuda_async` version is run if NVIDIA GPU is present (i.e. `cudaIsEnabled` returns 0)
+- `rocm_async` version is run if AMD GPU is present (i.e. `rocmIsEnabled` returns 0)
+
+Tests for specific backend (or hardware) can be explicitly specified to be run by setting `USER_UNIT_TESTS=cuda` or `USER_UNIT_TESTS=rocm` environment variable. Tests not depending on the hardware are skipped. If the corresponding hardware is not available, the tests will fail.
+
diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducer.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducer.cc
@@ -44,6 +44,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       edm::ParameterSetDescription psetSize;
       psetSize.add<int32_t>("alpaka_serial_sync");
       psetSize.add<int32_t>("alpaka_cuda_async");
+      psetSize.add<int32_t>("alpaka_rocm_async");
       desc.add("size", psetSize);
 
       descriptions.addWithDefaultLabel(desc);

diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerOffset.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerOffset.cc
@@ -47,6 +47,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       edm::ParameterSetDescription psetX;
       psetX.add<double>("alpaka_serial_sync", 0.);
       psetX.add<double>("alpaka_cuda_async", 0.);
+      psetX.add<double>("alpaka_rocm_async", 0.);
       desc.add("xvalue", psetX);
 
       descriptions.addWithDefaultLabel(desc);

diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamProducer.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamProducer.cc
@@ -52,6 +52,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       edm::ParameterSetDescription psetSize;
       psetSize.add<int32_t>("alpaka_serial_sync");
       psetSize.add<int32_t>("alpaka_cuda_async");
+      psetSize.add<int32_t>("alpaka_rocm_async");
       desc.add("size", psetSize);
 
       descriptions.addWithDefaultLabel(desc);

diff --git a/HeterogeneousCore/AlpakaTest/test/BuildFile.xml b/HeterogeneousCore/AlpakaTest/test/BuildFile.xml
@@ -1,10 +1,15 @@
-<!-- TODO: dependence on CUDA only to make GPU IBs to run these tests -->
-<use name="cuda"/>
+<test name="testHeterogeneousCoreAlpakaTestWriteRead" command="testHeterogeneousCoreAlpakaTestWriteRead.sh">
+  <!-- dependence and flag only to trigger the unit test for each Alpaka backend -->
+  <use name="alpaka"/>
+  <flags ALPAKA_BACKENDS="1"/>
+</test>
 
-<test name="testHeterogeneousCoreAlpakaTestWriteRead" command="testHeterogeneousCoreAlpakaTestWriteRead.sh"/>
-
-<ifrelease name="_GPU_">
-  <test name="testHeterogeneousCoreAlpakaTestModules" command="testAlpakaModules.sh cuda"/>
-<else/>
-  <test name="testHeterogeneousCoreAlpakaTestModules" command="testAlpakaModules.sh cpu"/>
-</ifrelease>
+<test name="testHeterogeneousCoreAlpakaTestModulesCPU" command="testAlpakaModules.sh cpu"/>
+<test name="testHeterogeneousCoreAlpakaTestModulesCUDA" command="testAlpakaModules.sh cuda">
+  <!-- dependence only to trigger the unit test when NVIDIA GPU is (expected to be) present -->
+  <use name="cuda"/>
+</test>
+<test name="testHeterogeneousCoreAlpakaTestModulesROCm" command="testAlpakaModules.sh rocm">
+  <!-- dependence only to trigger the unit test when AMD GPU is (expected to be) present -->
+  <use name="rocm"/>
+</test>
diff --git a/HeterogeneousCore/AlpakaTest/test/testAlpakaModules.sh b/HeterogeneousCore/AlpakaTest/test/testAlpakaModules.sh
@@ -5,21 +5,26 @@ function die { echo Failed $1: status $2 ; exit $2 ; }
 TEST_DIR=${LOCALTOP}/src/HeterogeneousCore/AlpakaTest/test
 
 if [ "$#" != "1" ]; then
-    die "Need exactly 1 argument ('cpu', 'cuda'), got $#" 1
+    die "Need exactly 1 argument ('cpu', 'cuda', or 'rocm'), got $#" 1
 fi
-if [ "$1" = "cuda" ]; then
-    TARGET=cuda
-elif [ "$1" = "cpu" ]; then
-    # In non-_GPU_ IBs, if CUDA is enabled, run the GPU-targeted tests
+if [[ "$1" =~ ^(cpu|cuda|rocm)$ ]]; then
+    TARGET=$1
+else
+    die "Argument needs to be 'cpu', 'cuda', or 'rocm'; got '$1'" 1
+fi
+
+# Some of the CPU-only tests fail if run on machine with GPU
+if [ "$TARGET" == "cpu" ]; then
     cudaIsEnabled
-    CUDA_ENABLED=$?
-    if [ "${CUDA_ENABLED}" = "0" ]; then
-        TARGET=cuda
-    else
-        TARGET=cpu
+    if [ "$?" == "0" ]; then
+        echo "Test target is 'cpu', but NVIDIA GPU is detected. Ignoring the CPU tests."
+        exit 0
+    fi
+    rocmIsEnabled
+    if [ "$?" == "0" ]; then
+        echo "Test target is 'cpu', but AMD GPU is detected. Ignoring the CPU tests."
+        exit 0
     fi
-else
-    die "Argument needs to be 'cpu' or 'cuda', got $1" 1
 fi
 
 function runSuccess {
@@ -43,6 +48,30 @@ function runFailure {
     echo
 }
 
+function runForGPU {
+    ACCELERATOR=$1
+    BACKEND=$2
+
+    runSuccess "--expectBackend=$BACKEND"
+    runSuccess "--accelerators=$ACCELERATOR --expectBackend=$BACKEND"
+    runSuccess "--processAcceleratorBackend=$BACKEND --expectBackend=$BACKEND"
+    runSuccess "--moduleBackend=$BACKEND --expectBackend=$BACKEND"
+
+    runSuccess "--processAcceleratorBackend=$BACKEND --moduleBackend=serial_sync --expectBackend=serial_sync"
+    runSuccess "--processAcceleratorBackend=serial_sync --moduleBackend=$BACKEND --expectBackend=$BACKEND"
+
+    runFailure "--accelerators=$ACCELERATOR --processAcceleratorBackend=serial_sync --expectBackend=serial_sync"
+    runFailure "--accelerators=$ACCELERATOR --moduleBackend=serial_sync --expectBackend=serial_sync"
+    runFailure "--accelerators=$ACCELERATOR --processAcceleratorBackend=$BACKEND --moduleBackend=serial_sync --expectBackend=serial_sync"
+    runFailure "--accelerators=$ACCELERATOR --processAcceleratorBackend=serial_sync --moduleBackend=$BACKEND --expectBackend=$BACKEND"
+    runFailure "--accelerators=cpu --processAcceleratorBackend=$BACKEND --expectBackend=$BACKEND"
+    runFailure "--accelerators=cpu --moduleBackend=$BACKEND --expectBackend=$BACKEND"
+    runFailure "--accelerators=cpu --processAcceleratorBackend=serial_sync --moduleBackend=$BACKEND --expectBackend=$BACKEND"
+    runFailure "--accelerators=cpu --processAcceleratorBackend=$BACKEND --moduleBackend=serial_sync --expectBackend=serial_sync"
+
+    runSuccessHostAndDevice "--expectBackend=$BACKEND"
+}
+
 runSuccess "--accelerators=cpu --expectBackend=serial_sync"
 runSuccess "--processAcceleratorBackend=serial_sync --expectBackend=serial_sync"
 runSuccess "--moduleBackend=serial_sync --expectBackend=serial_sync"
@@ -60,23 +89,9 @@ if [ "${TARGET}" == "cpu" ]; then
     runSuccessHostAndDevice "--expectBackend=serial_sync"
 
 elif [ "${TARGET}" == "cuda" ]; then
-    runSuccess "--expectBackend=cuda_async"
-    runSuccess "--accelerators=gpu-nvidia --expectBackend=cuda_async"
-    runSuccess "--processAcceleratorBackend=cuda_async --expectBackend=cuda_async"
-    runSuccess "--moduleBackend=cuda_async --expectBackend=cuda_async"
-
-    runSuccess "--processAcceleratorBackend=cuda_async --moduleBackend=serial_sync --expectBackend=serial_sync"
-    runSuccess "--processAcceleratorBackend=serial_sync --moduleBackend=cuda_async --expectBackend=cuda_async"
-
-    runFailure "--accelerators=gpu-nvidia --processAcceleratorBackend=serial_sync --expectBackend=serial_sync"
-    runFailure "--accelerators=gpu-nvidia --moduleBackend=serial_sync --expectBackend=serial_sync"
-    runFailure "--accelerators=gpu-nvidia --processAcceleratorBackend=cuda_async --moduleBackend=serial_sync --expectBackend=serial_sync"
-    runFailure "--accelerators=gpu-nvidia --processAcceleratorBackend=serial_sync --moduleBackend=cuda_async --expectBackend=cuda_async"
-    runFailure "--accelerators=cpu --processAcceleratorBackend=cuda_async --expectBackend=cuda_async"
-    runFailure "--accelerators=cpu --moduleBackend=cuda_async --expectBackend=cuda_async"
-    runFailure "--accelerators=cpu --processAcceleratorBackend=serial_sync --moduleBackend=cuda_async --expectBackend=cuda_async"
-    runFailure "--accelerators=cpu --processAcceleratorBackend=cuda_async --moduleBackend=serial_sync --expectBackend=serial_sync"
-
-    runSuccessHostAndDevice "--expectBackend=cuda_async"
+    runForGPU "gpu-nvidia" "cuda_async"
+
+elif [ "${TARGET}" == "rocm" ]; then
+    runForGPU "gpu-amd" "rocm_async"
 
 fi
diff --git a/HeterogeneousCore/AlpakaTest/test/testAlpakaModulesHostAndDevice_cfg.py b/HeterogeneousCore/AlpakaTest/test/testAlpakaModulesHostAndDevice_cfg.py
@@ -39,7 +39,8 @@
 process.producer = cms.EDProducer("TestAlpakaGlobalProducerOffset@alpaka",
     xvalue = cms.PSet(
         alpaka_serial_sync = cms.double(1.0),
-        alpaka_cuda_async = cms.double(2.0)
+        alpaka_cuda_async = cms.double(2.0),
+        alpaka_rocm_async = cms.double(3.0),
     )
 )
 process.producerHost = process.producer.clone(
@@ -55,6 +56,8 @@
 )
 if args.expectBackend == "cuda_async":
     process.compare.expectedXdiff = -1.0
+elif args.expectBackend == "rocm_async":
+    process.compare.expectedXdiff = -2.0
 
 process.t = cms.Task(process.producer, process.producerHost)
 process.p = cms.Path(process.compare, process.t)
diff --git a/HeterogeneousCore/AlpakaTest/test/testAlpakaModules_cfg.py b/HeterogeneousCore/AlpakaTest/test/testAlpakaModules_cfg.py
@@ -24,7 +24,6 @@
     process.options.accelerators = args.accelerators.split(",")
 
 process.load('Configuration.StandardSequences.Accelerators_cff')
-process.load("HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi")
 process.load("HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi")
 
 process.alpakaESRecordASource = cms.ESSource("EmptyESSource",
@@ -63,15 +62,17 @@
     eventSetupSource = cms.ESInputTag("alpakaESProducerA", "appendedLabel"),
     size = dict(
         alpaka_serial_sync = 10,
-        alpaka_cuda_async = 20
+        alpaka_cuda_async = 20,
+        alpaka_rocm_async = 30,
     )
 )
 process.alpakaStreamProducer = cms.EDProducer("TestAlpakaStreamProducer@alpaka",
     source = cms.InputTag("intProduct"),
     eventSetupSource = cms.ESInputTag("alpakaESProducerB", "explicitLabel"),
     size = cms.PSet(
         alpaka_serial_sync = cms.int32(5),
-        alpaka_cuda_async = cms.int32(25)
+        alpaka_cuda_async = cms.int32(25),
+        alpaka_rocm_async = cms.int32(125),
     )
 )
 process.alpakaStreamInstanceProducer = cms.EDProducer("TestAlpakaStreamProducer@alpaka",
@@ -80,7 +81,8 @@
     productInstanceName = cms.string("testInstance"),
     size = cms.PSet(
         alpaka_serial_sync = cms.int32(6),
-        alpaka_cuda_async = cms.int32(36)
+        alpaka_cuda_async = cms.int32(36),
+        alpaka_rocm_async = cms.int32(216),
     )
 )
 process.alpakaStreamSynchronizingProducer = cms.EDProducer("TestAlpakaStreamSynchronizingProducer@alpaka",
@@ -125,6 +127,14 @@ def setExpect(m, size):
     setExpect(process.alpakaStreamConsumer, size=25)
     setExpect(process.alpakaStreamInstanceConsumer, size=36)
     setExpect(process.alpakaStreamSynchronizingConsumer, size=20)
+elif args.expectBackend == "rocm_async":
+    def setExpect(m, size):
+        m.expectSize = size
+        m.expectBackend = "ROCmAsync"
+    setExpect(process.alpakaGlobalConsumer, size = 30)
+    setExpect(process.alpakaStreamConsumer, size = 125)
+    setExpect(process.alpakaStreamInstanceConsumer, size = 216)
+    setExpect(process.alpakaStreamSynchronizingConsumer, size = 30)
 
 process.output = cms.OutputModule('PoolOutputModule',
     fileName = cms.untracked.string('testAlpaka.root'),