Skip to content

Commit

Permalink
LUMI mpi4py OSU test scripts WIP.
Browse files Browse the repository at this point in the history
  • Loading branch information
Chroxvi committed Nov 29, 2023
1 parent 702bb34 commit 32a1a37
Show file tree
Hide file tree
Showing 8 changed files with 189 additions and 10 deletions.
3 changes: 0 additions & 3 deletions examples/LUMI/conda_mpi4py_mpich/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@ Copy everything to LUMI, update the `--account=project_<your_project_id>` SBATCH

TODO:
- Ideally, update the conda numpy package to 1.26.1, though it may be a problem: https://github.com/conda-forge/numpy-feedstock/pull/302
- mpi4py conda packages not compatible with mpich=3.4.3 conda packages (or is it?)
- Set a better shebang: #!/bin/bash -e
- Rename cray_python to cray-python in sbatch file names.
- TL;DR or somehow separate recommended example from tests/benchmarks


Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env bash
#!/bin/bash -e
#
# A LUMI SLURM batch script for the LUMI mpi4py MPICH example from
# https://github.com/DeiC-HPC/cotainr
Expand All @@ -24,7 +24,7 @@ CONTAINERS=(\
export MPIR_CVAR_DEBUG_SUMMARY=1
export FI_LOG_LEVEL=Info

source lumi-singularity-bindings.sh # or use the LUMI singularity-bindings module
source $PROJECT_DIR/lumi-singularity-bindings.sh # or use the LUMI singularity-bindings module

for container in ${CONTAINERS[@]}; do
echo "=============== Run using $container ==============="
Expand Down
69 changes: 69 additions & 0 deletions examples/LUMI/conda_mpi4py_mpich/run_cotainr_bind_osu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/bin/bash -e
#
# A LUMI SLURM batch script for the LUMI mpi4py MPICH example from
# https://github.com/DeiC-HPC/cotainr
# This script runs the OSU benchmarks with Numpy buffers
# using a cotainr container including a generic MPICH, bind mounting the host MPI.
#
#SBATCH --job-name=mpi4py-cotainr-bind-osu
#SBATCH --nodes=2
#SBATCH --tasks-per-node=1
#SBATCH --output="output_%x_%j.txt"
#SBATCH --partition=small
#SBATCH --exclusive
#SBATCH --time=00:10:00
#SBATCH --account=project_<your_project_id>

PROJECT_DIR=
OSU_PY_BENCHMARK_DIR=$PROJECT_DIR/osu-micro-benchmarks-7.0.1/python/
RESULTS_DIR=$PROJECT_DIR/test_results
CONTAINERS=(\
"cotainr-mpich3-pip-mpi4py.sif" \
"cotainr-mpich4-pip-mpi4py.sif")

set -x
mkdir -p $RESULTS_DIR

source $PROJECT_DIR/lumi-singularity-bindings.sh # or use the LUMI singularity-bindings module

for container in ${CONTAINERS[@]}; do
# Single node runs
srun --nodes=1 --tasks-per-node=2 \
singularity exec \
--bind=$PROJECT_DIR \
$PROJECT_DIR/containers/$container \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=bw --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-bw-single-$container.txt
srun --nodes=1 --tasks-per-node=2 \
singularity exec \
--bind=$PROJECT_DIR \
$PROJECT_DIR/containers/$container \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=latency --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-latency-single-$container.txt
srun --nodes=1 --tasks-per-node=2 \
singularity exec \
--bind=$PROJECT_DIR \
$PROJECT_DIR/containers/$container \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=allgather --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-allgather-single-$container.txt

# Multi node runs
srun \
singularity exec \
--bind=$PROJECT_DIR \
$PROJECT_DIR/containers/$container \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=bw --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-bw-multi-$container.txt
srun \
singularity exec \
--bind=$PROJECT_DIR \
$PROJECT_DIR/containers/$container \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=latency --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-latency-multi-$container.txt
srun \
singularity exec \
--bind=$PROJECT_DIR \
$PROJECT_DIR/containers/$container \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=allgather --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-allgather-multi-$container.txt
done
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env bash
#!/bin/bash -e
#
# A LUMI SLURM batch script for the LUMI mpi4py MPICH example from
# https://github.com/DeiC-HPC/cotainr
Expand Down
67 changes: 67 additions & 0 deletions examples/LUMI/conda_mpi4py_mpich/run_cotainr_hybrid_osu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/bash -e
#
# A LUMI SLURM batch script for the LUMI mpi4py MPICH example from
# https://github.com/DeiC-HPC/cotainr
# This script runs the OSU benchmarks with Numpy buffers
# using a cotainr container including a generic MPICH, using the container MPI.
#
#SBATCH --job-name=mpi4py-cotainr-hybrid-osu
#SBATCH --nodes=2
#SBATCH --tasks-per-node=1
#SBATCH --output="output_%x_%j.txt"
#SBATCH --partition=small
#SBATCH --exclusive
#SBATCH --time=00:30:00
#SBATCH --account=project_<your_project_id>

PROJECT_DIR=
OSU_PY_BENCHMARK_DIR=$PROJECT_DIR/osu-micro-benchmarks-7.0.1/python/
RESULTS_DIR=$PROJECT_DIR/test_results
CONTAINERS=(\
"cotainr-mpich3-pip-mpi4py.sif" \
"cotainr-mpich4-pip-mpi4py.sif")

set -x
mkdir -p $RESULTS_DIR

for container in ${CONTAINERS[@]}; do
# Single node runs
srun --nodes=1 --tasks-per-node=2 --mpi=pmi2 \
singularity exec \
--bind=$PROJECT_DIR \
$PROJECT_DIR/containers/$container \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=bw --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-bw-single-$container.txt
srun --nodes=1 --tasks-per-node=2 --mpi=pmi2 \
singularity exec \
--bind=$PROJECT_DIR \
$PROJECT_DIR/containers/$container \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=latency --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-latency-single-$container.txt
srun --nodes=1 --tasks-per-node=2 --mpi=pmi2 \
singularity exec \
--bind=$PROJECT_DIR \
$PROJECT_DIR/containers/$container \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=allgather --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-allgather-single-$container.txt

# Multi node runs
srun --mpi=pmi2 \
singularity exec \
--bind=$PROJECT_DIR \
$PROJECT_DIR/containers/$container \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=bw --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-bw-multi-$container.txt
srun --mpi=pmi2 \
singularity exec \
--bind=$PROJECT_DIR \
$PROJECT_DIR/containers/$container \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=latency --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-latency-multi-$container.txt
srun --mpi=pmi2 \
singularity exec \
--bind=$PROJECT_DIR \
$PROJECT_DIR/containers/$container \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=allgather --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-allgather-multi-$container.txt
done
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env bash
#!/bin/bash -e
#
# A LUMI SLURM batch script for the LUMI mpi4py MPICH example from
# https://github.com/DeiC-HPC/cotainr
Expand All @@ -21,4 +21,4 @@ PROJECT_DIR=
export MPIR_CVAR_DEBUG_SUMMARY=1
export FI_LOG_LEVEL=Info

srun python $PROJECT_DIR/mpi_hello_world.py
srun python3 $PROJECT_DIR/mpi_hello_world.py
46 changes: 46 additions & 0 deletions examples/LUMI/conda_mpi4py_mpich/run_cray_python_osu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash -e
#
# A LUMI SLURM batch script for the LUMI mpi4py MPICH example from
# https://github.com/DeiC-HPC/cotainr
# This script runs the OSU benchmarks with Numpy buffers
# using the LUMI cray-python module
#
#SBATCH --job-name=mpi4py-cray-python-osu
#SBATCH --nodes=2
#SBATCH --tasks-per-node=1
#SBATCH --output="output_%x_%j.txt"
#SBATCH --partition=small
#SBATCH --exclusive
#SBATCH --time=00:10:00
#SBATCH --account=project_<your_project_id>

module load cray-python

PROJECT_DIR=
OSU_PY_BENCHMARK_DIR=$PROJECT_DIR/osu-micro-benchmarks-7.0.1/python/
RESULTS_DIR=$PROJECT_DIR/test_results

set -x
mkdir -p $RESULTS_DIR

# Single node runs
srun --nodes=1 --tasks-per-node=2 \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=bw --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-bw-single.txt
srun --nodes=1 --tasks-per-node=2 \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=latency --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-latency-single.txt
srun --nodes=1 --tasks-per-node=2 \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=allgather --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-allgather-single.txt

# Multi node runs
srun \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=bw --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-bw-multi.txt
srun \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=latency --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-latency-multi.txt
srun \
python3 $OSU_PY_BENCHMARK_DIR/run.py --benchmark=allgather --buffer=numpy \
> $RESULTS_DIR/$SLURM_JOB_NAME-allgather-multi.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env bash
#!/bin/bash -e
#
# A LUMI SLURM batch script for the LUMI mpi4py MPICH example from
# https://github.com/DeiC-HPC/cotainr
Expand All @@ -25,7 +25,7 @@ cat > $PROJECT_DIR/run-script.sh << EOF
\$WITH_CONDA
# Run application
python $PROJECT_DIR/mpi_hello_world.py
python3 $PROJECT_DIR/mpi_hello_world.py
EOF
chmod +x $PROJECT_DIR/run-script.sh
Expand Down

0 comments on commit 32a1a37

Please sign in to comment.