Skip to content

Commit

Permalink
feat: Add the ability to run the docker Slurm images as compute or he…
Browse files Browse the repository at this point in the history
…ad nodes (issue #127)
  • Loading branch information
neilmunday committed May 1, 2024
1 parent c95b426 commit 96b1482
Show file tree
Hide file tree
Showing 12 changed files with 103 additions and 50 deletions.
5 changes: 4 additions & 1 deletion tests/integration/docker-slurm/Dockerfile.amzn2
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ RUN yum update -y && \
useradd -r -g slurm -d /var/empty/slurm -m -s /bin/bash slurm && \
groupadd test && \
useradd -g test -d /home/test -m test && \
install -d -o slurm -g slurm /etc/slurm /var/spool/slurm /var/log/slurm
install -d -o slurm -g slurm /etc/slurm /var/spool/slurm /var/log/slurm && \
dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key 2>/dev/null && \
chown munge:munge /etc/munge/munge.key && \
chmod 0400 /etc/munge/munge.key

COPY supervisord.conf /etc/
COPY --chown=slurm slurm.*.conf /etc/slurm/
Expand Down
5 changes: 4 additions & 1 deletion tests/integration/docker-slurm/Dockerfile.amzn2023
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ RUN dnf update -y && \
useradd -r -g slurm -d /var/empty/slurm -m -s /bin/bash slurm && \
groupadd test && \
useradd -g test -d /home/test -m test && \
install -d -o slurm -g slurm /etc/slurm /var/spool/slurm /var/log/slurm
install -d -o slurm -g slurm /etc/slurm /var/spool/slurm /var/log/slurm && \
dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key 2>/dev/null && \
chown munge:munge /etc/munge/munge.key && \
chmod 0400 /etc/munge/munge.key

COPY supervisord.conf /etc/
COPY --chown=slurm slurm.*.conf /etc/slurm/
Expand Down
5 changes: 4 additions & 1 deletion tests/integration/docker-slurm/Dockerfile.el7
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ RUN yum update -y && \
useradd -r -g slurm -d /var/empty/slurm -m -s /bin/bash slurm && \
groupadd test && \
useradd -g test -d /home/test -m test && \
install -d -o slurm -g slurm /etc/slurm /var/spool/slurm /var/log/slurm
install -d -o slurm -g slurm /etc/slurm /var/spool/slurm /var/log/slurm && \
dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key 2>/dev/null && \
chown munge:munge /etc/munge/munge.key && \
chmod 0400 /etc/munge/munge.key

COPY supervisord.conf /etc/
COPY --chown=slurm slurm.*.conf /etc/slurm/
Expand Down
5 changes: 4 additions & 1 deletion tests/integration/docker-slurm/Dockerfile.el8
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ RUN dnf install -y dnf-plugins-core && \
useradd -r -g slurm -d /var/empty/slurm -m -s /bin/bash slurm && \
groupadd test && \
useradd -g test -d /home/test -m test && \
install -d -o slurm -g slurm /etc/slurm /var/spool/slurm /var/log/slurm
install -d -o slurm -g slurm /etc/slurm /var/spool/slurm /var/log/slurm && \
dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key 2>/dev/null && \
chown munge:munge /etc/munge/munge.key && \
chmod 0400 /etc/munge/munge.key

COPY supervisord.conf /etc/
COPY --chown=slurm slurm.*.conf /etc/slurm/
Expand Down
5 changes: 4 additions & 1 deletion tests/integration/docker-slurm/Dockerfile.el9
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ RUN dnf install -y dnf-plugins-core && \
useradd -r -g slurm -d /var/empty/slurm -m -s /bin/bash slurm && \
groupadd test && \
useradd -g test -d /home/test -m test && \
install -d -o slurm -g slurm /etc/slurm /var/spool/slurm /var/log/slurm
install -d -o slurm -g slurm /etc/slurm /var/spool/slurm /var/log/slurm && \
dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key 2>/dev/null && \
chown munge:munge /etc/munge/munge.key && \
chmod 0400 /etc/munge/munge.key

COPY supervisord.conf /etc/
COPY --chown=slurm slurm.*.conf /etc/slurm/
Expand Down
5 changes: 4 additions & 1 deletion tests/integration/docker-slurm/Dockerfile.sl15
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ RUN zypper update -y && \
groupadd test && \
useradd -g test -d /home/test -m test && \
install -d -o slurm -g slurm /etc/slurm /var/spool/slurm /var/log/slurm && \
rm -rfv /root/slurm*
rm -rfv /root/slurm* && \
dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key 2>/dev/null && \
chown munge:munge /etc/munge/munge.key && \
chmod 0400 /etc/munge/munge.key

COPY supervisord.conf /etc/
COPY --chown=slurm slurm.*.conf /etc/slurm/
Expand Down
5 changes: 4 additions & 1 deletion tests/integration/docker-slurm/Dockerfile.ub20
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && \
libmariadb-dev \
libmunge-dev && \
apt-get clean && \
rm -rfv /root/slurm*
rm -rfv /root/slurm* && \
dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key 2>/dev/null && \
chown munge:munge /etc/munge/munge.key && \
chmod 0400 /etc/munge/munge.key

COPY supervisord.conf /etc/
COPY --chown=slurm slurm.*.conf /etc/slurm/
Expand Down
5 changes: 4 additions & 1 deletion tests/integration/docker-slurm/Dockerfile.ub22
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && \
libmariadb-dev \
libmunge-dev && \
apt-get clean && \
rm -rfv /root/slurm*
rm -rfv /root/slurm* && \
dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key 2>/dev/null && \
chown munge:munge /etc/munge/munge.key && \
chmod 0400 /etc/munge/munge.key

COPY supervisord.conf /etc/
COPY --chown=slurm slurm.*.conf /etc/slurm/
Expand Down
92 changes: 62 additions & 30 deletions tests/integration/docker-slurm/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,26 @@ function die {
exit 1
}

if [ -z $ROLE ]; then
ROLE="HEAD"
fi

if [ -z $NODE_PREFIX ]; then
NODE_PREFIX="compute0"
fi

if [ -z $NODES ]; then
NODES=1
fi

if [[ "$ROLE" != "HEAD" ]] && [[ "$ROLE" != "COMPUTE" ]]; then
die "Unsupported role: ${ROLE}"
fi

echo "using $NODES node(s) with prefix: $NODE_PREFIX"

echo "container role: $ROLE"

mkdir -p /var/run/mysqld
chown mysql. /var/run/mysqld
chown munge. /var/log/munge
Expand All @@ -17,43 +37,55 @@ chown munge. /var/run/munge

supervisord --configuration /etc/supervisord.conf

supervisorctl start mysqld
supervisorctl start munged

for i in `seq 1 60`; do
if [ -e /var/lib/mysql/mysql.sock ] || [ -e /run/mysql/mysql.sock ]; then
# echo "mysqld started"
break
fi
sleep 1
echo -e "# auto generated\n" > /etc/slurm/nodes.conf
# create nodes file
for i in `seq 1 $NODES`; do
cat << EOF >> /etc/slurm/nodes.conf
NodeName=${NODE_PREFIX}${i} CPUs=1 Boards=1 SocketsPerBoard=1 CoresPerSocket=1 ThreadsPerCore=1 RealMemory=500
EOF
done

if ! mysql -e "show databases;" > /dev/null 2>&1; then
echo "failed to query mysql - did it start?"
#exit 1
fi
# create controller file
cat << EOF >> /etc/slurm/controller.conf
# auto generated
ControlMachine=${NODE_PREFIX}1
EOF

# create Slurm database
mysql -e "CREATE DATABASE IF NOT EXISTS slurm_acct_db;" || die "failed to create database"
if [ -e /etc/redhat-release ] && grep -q 'release 7' /etc/redhat-release; then
mysql -e "GRANT ALL ON slurm_acct_db.* TO 'slurm'@'localhost' IDENTIFIED BY 'password'" || die "failed to create slurm mysql user"
elif [ -e /etc/system-release ] && grep -q 'Amazon Linux release 2' /etc/system-release; then
mysql -e "GRANT ALL ON slurm_acct_db.* TO 'slurm'@'localhost' IDENTIFIED BY 'password'" || die "failed to create slurm mysql user"
else
mysql -e "CREATE USER IF NOT EXISTS 'slurm'@'localhost' identified by 'password';" || die "failed to create slurm mysql user"
fi
mysql -e "GRANT ALL ON slurm_acct_db.* TO 'slurm'@'localhost';" || die "failed to grant privs to slurm mysql user"
if [[ "$ROLE" == "HEAD" ]]; then

supervisorctl start mysqld

for i in `seq 1 60`; do
if [ -e /var/lib/mysql/mysql.sock ] || [ -e /run/mysql/mysql.sock ]; then
# echo "mysqld started"
break
fi
sleep 1
done

# create munge key
if [ ! -e /etc/munge/munge.key ]; then
dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key 2>/dev/null
chown munge:munge /etc/munge/munge.key
chmod 0400 /etc/munge/munge.key
if ! mysql -e "show databases;" > /dev/null 2>&1; then
echo "failed to query mysql - did it start?"
#exit 1
fi

# create Slurm database
mysql -e "CREATE DATABASE IF NOT EXISTS slurm_acct_db;" || die "failed to create database"
if [ -e /etc/redhat-release ] && grep -q 'release 7' /etc/redhat-release; then
mysql -e "GRANT ALL ON slurm_acct_db.* TO 'slurm'@'localhost' IDENTIFIED BY 'password'" || die "failed to create slurm mysql user"
elif [ -e /etc/system-release ] && grep -q 'Amazon Linux release 2' /etc/system-release; then
mysql -e "GRANT ALL ON slurm_acct_db.* TO 'slurm'@'localhost' IDENTIFIED BY 'password'" || die "failed to create slurm mysql user"
else
mysql -e "CREATE USER IF NOT EXISTS 'slurm'@'localhost' identified by 'password';" || die "failed to create slurm mysql user"
fi
mysql -e "GRANT ALL ON slurm_acct_db.* TO 'slurm'@'localhost';" || die "failed to grant privs to slurm mysql user"

supervisorctl start slurmdbd || die "slurmdbd failed to start" /var/log/slurm/slurmdbd.log
supervisorctl start slurmctld || die "slurmctld failed to start" /var/log/slurm/slurmctld.log
fi

supervisorctl start munged
supervisorctl start slurmdbd || die "slurmdbd failed to start" /var/log/slurm/slurmdbd.log
supervisorctl start slurmctld || die "slurmctld failed to start" /var/log/slurm/slurmctld.log
supervisorctl start slurmd || die "slurmd failed to start" /var/log/slurm/slurmd.log
supervisorctl start slurmd || die "slurmd failed to start" /var/log/slurm/slurmd.$HOSTNAME.log

sinfo

Expand Down
7 changes: 3 additions & 4 deletions tests/integration/docker-slurm/slurm.21.conf
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
#
# See the slurm.conf man page for more information.
#
Include /etc/slurm/controller.conf
ClusterName=test
ControlMachine=localhost
MailProg=/usr/bin/mailx
#ControlAddr=
#BackupController=
Expand Down Expand Up @@ -90,7 +90,6 @@ AccountingStoreFlags=job_comment
#AccountingStorageUser=
#
# COMPUTE NODES
Include /etc/slurm/nodes.conf

NodeName=node01 NodeHostname=localhost NodeAddr=127.0.0.1 CPUs=1 Boards=1 SocketsPerBoard=1 CoresPerSocket=1 ThreadsPerCore=1 RealMemory=500

PartitionName=all Nodes=node01 Default=YES MaxTime=INFINITE State=UP
PartitionName=all Nodes=ALL Default=YES MaxTime=INFINITE State=UP
7 changes: 3 additions & 4 deletions tests/integration/docker-slurm/slurm.22.conf
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
#
# See the slurm.conf man page for more information.
#
Include /etc/slurm/controller.conf
ClusterName=test
ControlMachine=localhost
MailProg=/usr/bin/mailx
#ControlAddr=
#BackupController=
Expand Down Expand Up @@ -90,7 +90,6 @@ AccountingStoreFlags=job_comment
#AccountingStorageUser=
#
# COMPUTE NODES
Include /etc/slurm/nodes.conf

NodeName=node01 NodeHostname=localhost NodeAddr=127.0.0.1 CPUs=1 Boards=1 SocketsPerBoard=1 CoresPerSocket=1 ThreadsPerCore=1 RealMemory=500

PartitionName=all Nodes=node01 Default=YES MaxTime=INFINITE State=UP
PartitionName=all Nodes=ALL Default=YES MaxTime=INFINITE State=UP
7 changes: 3 additions & 4 deletions tests/integration/docker-slurm/slurm.23.conf
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
#
# See the slurm.conf man page for more information.
#
Include /etc/slurm/controller.conf
ClusterName=test
ControlMachine=localhost
MailProg=/usr/bin/mailx
#ControlAddr=
#BackupController=
Expand Down Expand Up @@ -90,7 +90,6 @@ AccountingStoreFlags=job_comment
#AccountingStorageUser=
#
# COMPUTE NODES
Include /etc/slurm/nodes.conf

NodeName=node01 NodeHostname=localhost NodeAddr=127.0.0.1 CPUs=1 Boards=1 SocketsPerBoard=1 CoresPerSocket=1 ThreadsPerCore=1 RealMemory=500

PartitionName=all Nodes=node01 Default=YES MaxTime=INFINITE State=UP
PartitionName=all Nodes=ALL Default=YES MaxTime=INFINITE State=UP

0 comments on commit 96b1482

Please sign in to comment.