Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make AlpakaTest tests to use their dependencies for controlling their execution, and extend module tests to ROCm. #43204

Merged
merged 3 commits into from
Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions HeterogeneousCore/AlpakaCore/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -443,3 +443,30 @@ process.ProcessAcceleratorAlpaka.setBackend("serial_sync") # or "cuda_async" or
process.options.accelerators = ["cpu"] # or "gpu-nvidia" or "gpu-amd"
```


## Unit tests

Unit tests that depend on Alpaka and define `<flags ALPAKA_BACKENDS="1"/>`, e.g. as a binary along
```xml
<bin name="<unique test binary name>" file="<comma-separated list of files">
<use name="alpaka"/>
<flags ALPAKA_BACKENDS="1"/>
</bin>
```
or as a command (e.g. `cmsRun` or a shell script) to run

```xml
<test name="<unique name of the test>" command="<command to run>">
<use name="alpaka"/>
<flags ALPAKA_BACKENDS="1"/>
</test>
```

will be run as part of `scram build runtests` according to the
availability of the hardware:
- `serial_sync` version is run always
- `cuda_async` version is run if NVIDIA GPU is present (i.e. `cudaIsEnabled` returns 0)
- `rocm_async` version is run if AMD GPU is present (i.e. `rocmIsEnabled` returns 0)

Tests for specific backend (or hardware) can be explicitly specified to be run by setting `USER_UNIT_TESTS=cuda` or `USER_UNIT_TESTS=rocm` environment variable. Tests not depending on the hardware are skipped. If the corresponding hardware is not available, the tests will fail.

Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
edm::ParameterSetDescription psetSize;
psetSize.add<int32_t>("alpaka_serial_sync");
psetSize.add<int32_t>("alpaka_cuda_async");
psetSize.add<int32_t>("alpaka_rocm_async");
desc.add("size", psetSize);

descriptions.addWithDefaultLabel(desc);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
edm::ParameterSetDescription psetX;
psetX.add<double>("alpaka_serial_sync", 0.);
psetX.add<double>("alpaka_cuda_async", 0.);
psetX.add<double>("alpaka_rocm_async", 0.);
desc.add("xvalue", psetX);

descriptions.addWithDefaultLabel(desc);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
edm::ParameterSetDescription psetSize;
psetSize.add<int32_t>("alpaka_serial_sync");
psetSize.add<int32_t>("alpaka_cuda_async");
psetSize.add<int32_t>("alpaka_rocm_async");
desc.add("size", psetSize);

descriptions.addWithDefaultLabel(desc);
Expand Down
23 changes: 14 additions & 9 deletions HeterogeneousCore/AlpakaTest/test/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
<!-- TODO: dependence on CUDA only to make GPU IBs to run these tests -->
<use name="cuda"/>
<test name="testHeterogeneousCoreAlpakaTestWriteRead" command="testHeterogeneousCoreAlpakaTestWriteRead.sh">
<!-- dependence and flag only to trigger the unit test for each Alpaka backend -->
<use name="alpaka"/>
<flags ALPAKA_BACKENDS="1"/>
</test>

<test name="testHeterogeneousCoreAlpakaTestWriteRead" command="testHeterogeneousCoreAlpakaTestWriteRead.sh"/>

<ifrelease name="_GPU_">
<test name="testHeterogeneousCoreAlpakaTestModules" command="testAlpakaModules.sh cuda"/>
<else/>
<test name="testHeterogeneousCoreAlpakaTestModules" command="testAlpakaModules.sh cpu"/>
</ifrelease>
<test name="testHeterogeneousCoreAlpakaTestModulesCPU" command="testAlpakaModules.sh cpu"/>
<test name="testHeterogeneousCoreAlpakaTestModulesCUDA" command="testAlpakaModules.sh cuda">
<!-- dependence only to trigger the unit test when NVIDIA GPU is (expected to be) present -->
<use name="cuda"/>
</test>
<test name="testHeterogeneousCoreAlpakaTestModulesROCm" command="testAlpakaModules.sh rocm">
<!-- dependence only to trigger the unit test when AMD GPU is (expected to be) present -->
<use name="rocm"/>
</test>
75 changes: 45 additions & 30 deletions HeterogeneousCore/AlpakaTest/test/testAlpakaModules.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,26 @@ function die { echo Failed $1: status $2 ; exit $2 ; }
TEST_DIR=${LOCALTOP}/src/HeterogeneousCore/AlpakaTest/test

if [ "$#" != "1" ]; then
die "Need exactly 1 argument ('cpu', 'cuda'), got $#" 1
die "Need exactly 1 argument ('cpu', 'cuda', or 'rocm'), got $#" 1
fi
if [ "$1" = "cuda" ]; then
TARGET=cuda
elif [ "$1" = "cpu" ]; then
# In non-_GPU_ IBs, if CUDA is enabled, run the GPU-targeted tests
if [[ "$1" =~ ^(cpu|cuda|rocm)$ ]]; then
TARGET=$1
else
die "Argument needs to be 'cpu', 'cuda', or 'rocm'; got '$1'" 1
fi

# Some of the CPU-only tests fail if run on machine with GPU
if [ "$TARGET" == "cpu" ]; then
cudaIsEnabled
CUDA_ENABLED=$?
if [ "${CUDA_ENABLED}" = "0" ]; then
TARGET=cuda
else
TARGET=cpu
if [ "$?" == "0" ]; then
echo "Test target is 'cpu', but NVIDIA GPU is detected. Ignoring the CPU tests."
exit 0
fi
rocmIsEnabled
if [ "$?" == "0" ]; then
echo "Test target is 'cpu', but AMD GPU is detected. Ignoring the CPU tests."
exit 0
rappoccio marked this conversation as resolved.
Show resolved Hide resolved
fi
else
die "Argument needs to be 'cpu' or 'cuda', got $1" 1
fi

function runSuccess {
Expand All @@ -43,6 +48,30 @@ function runFailure {
echo
}

function runForGPU {
ACCELERATOR=$1
BACKEND=$2

runSuccess "--expectBackend=$BACKEND"
runSuccess "--accelerators=$ACCELERATOR --expectBackend=$BACKEND"
runSuccess "--processAcceleratorBackend=$BACKEND --expectBackend=$BACKEND"
runSuccess "--moduleBackend=$BACKEND --expectBackend=$BACKEND"

runSuccess "--processAcceleratorBackend=$BACKEND --moduleBackend=serial_sync --expectBackend=serial_sync"
runSuccess "--processAcceleratorBackend=serial_sync --moduleBackend=$BACKEND --expectBackend=$BACKEND"

runFailure "--accelerators=$ACCELERATOR --processAcceleratorBackend=serial_sync --expectBackend=serial_sync"
runFailure "--accelerators=$ACCELERATOR --moduleBackend=serial_sync --expectBackend=serial_sync"
runFailure "--accelerators=$ACCELERATOR --processAcceleratorBackend=$BACKEND --moduleBackend=serial_sync --expectBackend=serial_sync"
runFailure "--accelerators=$ACCELERATOR --processAcceleratorBackend=serial_sync --moduleBackend=$BACKEND --expectBackend=$BACKEND"
runFailure "--accelerators=cpu --processAcceleratorBackend=$BACKEND --expectBackend=$BACKEND"
runFailure "--accelerators=cpu --moduleBackend=$BACKEND --expectBackend=$BACKEND"
runFailure "--accelerators=cpu --processAcceleratorBackend=serial_sync --moduleBackend=$BACKEND --expectBackend=$BACKEND"
runFailure "--accelerators=cpu --processAcceleratorBackend=$BACKEND --moduleBackend=serial_sync --expectBackend=serial_sync"

runSuccessHostAndDevice "--expectBackend=$BACKEND"
}

runSuccess "--accelerators=cpu --expectBackend=serial_sync"
runSuccess "--processAcceleratorBackend=serial_sync --expectBackend=serial_sync"
runSuccess "--moduleBackend=serial_sync --expectBackend=serial_sync"
Expand All @@ -60,23 +89,9 @@ if [ "${TARGET}" == "cpu" ]; then
runSuccessHostAndDevice "--expectBackend=serial_sync"

elif [ "${TARGET}" == "cuda" ]; then
runSuccess "--expectBackend=cuda_async"
runSuccess "--accelerators=gpu-nvidia --expectBackend=cuda_async"
runSuccess "--processAcceleratorBackend=cuda_async --expectBackend=cuda_async"
runSuccess "--moduleBackend=cuda_async --expectBackend=cuda_async"

runSuccess "--processAcceleratorBackend=cuda_async --moduleBackend=serial_sync --expectBackend=serial_sync"
runSuccess "--processAcceleratorBackend=serial_sync --moduleBackend=cuda_async --expectBackend=cuda_async"

runFailure "--accelerators=gpu-nvidia --processAcceleratorBackend=serial_sync --expectBackend=serial_sync"
runFailure "--accelerators=gpu-nvidia --moduleBackend=serial_sync --expectBackend=serial_sync"
runFailure "--accelerators=gpu-nvidia --processAcceleratorBackend=cuda_async --moduleBackend=serial_sync --expectBackend=serial_sync"
runFailure "--accelerators=gpu-nvidia --processAcceleratorBackend=serial_sync --moduleBackend=cuda_async --expectBackend=cuda_async"
runFailure "--accelerators=cpu --processAcceleratorBackend=cuda_async --expectBackend=cuda_async"
runFailure "--accelerators=cpu --moduleBackend=cuda_async --expectBackend=cuda_async"
runFailure "--accelerators=cpu --processAcceleratorBackend=serial_sync --moduleBackend=cuda_async --expectBackend=cuda_async"
runFailure "--accelerators=cpu --processAcceleratorBackend=cuda_async --moduleBackend=serial_sync --expectBackend=serial_sync"

runSuccessHostAndDevice "--expectBackend=cuda_async"
runForGPU "gpu-nvidia" "cuda_async"

elif [ "${TARGET}" == "rocm" ]; then
runForGPU "gpu-amd" "rocm_async"

fi
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
process.producer = cms.EDProducer("TestAlpakaGlobalProducerOffset@alpaka",
xvalue = cms.PSet(
alpaka_serial_sync = cms.double(1.0),
alpaka_cuda_async = cms.double(2.0)
alpaka_cuda_async = cms.double(2.0),
alpaka_rocm_async = cms.double(3.0),
)
)
process.producerHost = process.producer.clone(
Expand All @@ -55,6 +56,8 @@
)
if args.expectBackend == "cuda_async":
process.compare.expectedXdiff = -1.0
elif args.expectBackend == "rocm_async":
process.compare.expectedXdiff = -2.0

process.t = cms.Task(process.producer, process.producerHost)
process.p = cms.Path(process.compare, process.t)
18 changes: 14 additions & 4 deletions HeterogeneousCore/AlpakaTest/test/testAlpakaModules_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
process.options.accelerators = args.accelerators.split(",")

process.load('Configuration.StandardSequences.Accelerators_cff')
process.load("HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi")
process.load("HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi")

process.alpakaESRecordASource = cms.ESSource("EmptyESSource",
Expand Down Expand Up @@ -63,15 +62,17 @@
eventSetupSource = cms.ESInputTag("alpakaESProducerA", "appendedLabel"),
size = dict(
alpaka_serial_sync = 10,
alpaka_cuda_async = 20
alpaka_cuda_async = 20,
alpaka_rocm_async = 30,
)
)
process.alpakaStreamProducer = cms.EDProducer("TestAlpakaStreamProducer@alpaka",
source = cms.InputTag("intProduct"),
eventSetupSource = cms.ESInputTag("alpakaESProducerB", "explicitLabel"),
size = cms.PSet(
alpaka_serial_sync = cms.int32(5),
alpaka_cuda_async = cms.int32(25)
alpaka_cuda_async = cms.int32(25),
alpaka_rocm_async = cms.int32(125),
)
)
process.alpakaStreamInstanceProducer = cms.EDProducer("TestAlpakaStreamProducer@alpaka",
Expand All @@ -80,7 +81,8 @@
productInstanceName = cms.string("testInstance"),
size = cms.PSet(
alpaka_serial_sync = cms.int32(6),
alpaka_cuda_async = cms.int32(36)
alpaka_cuda_async = cms.int32(36),
alpaka_rocm_async = cms.int32(216),
)
)
process.alpakaStreamSynchronizingProducer = cms.EDProducer("TestAlpakaStreamSynchronizingProducer@alpaka",
Expand Down Expand Up @@ -125,6 +127,14 @@ def setExpect(m, size):
setExpect(process.alpakaStreamConsumer, size=25)
setExpect(process.alpakaStreamInstanceConsumer, size=36)
setExpect(process.alpakaStreamSynchronizingConsumer, size=20)
elif args.expectBackend == "rocm_async":
def setExpect(m, size):
m.expectSize = size
m.expectBackend = "ROCmAsync"
setExpect(process.alpakaGlobalConsumer, size = 30)
setExpect(process.alpakaStreamConsumer, size = 125)
setExpect(process.alpakaStreamInstanceConsumer, size = 216)
setExpect(process.alpakaStreamSynchronizingConsumer, size = 30)

process.output = cms.OutputModule('PoolOutputModule',
fileName = cms.untracked.string('testAlpaka.root'),
Expand Down