Skip to content

Commit

Permalink
Add an example image that uses Intel MPI
Browse files Browse the repository at this point in the history
  • Loading branch information
alculquicondor committed Jul 28, 2021
1 parent b6113fb commit 36056ff
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 4 deletions.
36 changes: 36 additions & 0 deletions examples/pi/intel-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env bash

set_intel_vars=/opt/intel/oneapi/setvars.sh
if [ -f $set_intel_vars ]; then
source $set_intel_vars
fi

function resolve_host() {
host="$1"
check="nslookup $host"
max_retry=5
counter=0
backoff=0.1
until $check > /dev/null
do
if [ $counter -eq $max_retry ]; then
echo "Couldn't resolve $host"
return
fi
sleep $backoff
echo "Couldn't resolve $host... Retrying"
((counter++))
backoff=$(echo - | awk "{print $backoff + $backoff}")
done
echo "Resolved $host"
}

if [ "$K_MPI_JOB_ROLE" == "launcher" ]; then
resolve_host "$HOSTNAME"
cat /etc/mpi/hostfile | while read host
do
resolve_host $host
done
fi

exec "$@"
45 changes: 45 additions & 0 deletions examples/pi/intel.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
FROM bash AS downloader

RUN wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB -O key.PUB


FROM debian:buster

COPY --from=downloader key.PUB /tmp/key.PUB

# Install Intel oneAPI keys.
RUN apt update \
&& apt install -y --no-install-recommends gnupg2 ca-certificates \
&& apt-key add /tmp/key.PUB \
&& rm /tmp/key.PUB \
&& echo "deb https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list \
&& apt remove -y gnupg2 ca-certificates \
&& apt autoremove -y \
&& rm -rf /var/lib/apt/lists/*

RUN apt update \
&& apt install -y --no-install-recommends \
openssh-server \
openssh-client \
dnsutils \
libstdc++-8-dev binutils \
intel-oneapi-compiler-dpcpp-cpp \
intel-oneapi-mpi \
intel-oneapi-mpi-devel \
&& rm -rf /var/lib/apt/lists/*

# Add priviledge separation directoy to run sshd as root.
RUN mkdir -p /var/run/sshd
# Add capability to run sshd as non-root.
RUN setcap CAP_NET_BIND_SERVICE=+eip /usr/sbin/sshd

RUN useradd -m mpiuser
WORKDIR /home/mpiuser
COPY intel-entrypoint.sh /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
COPY --chown=mpiuser sshd_config .sshd_config
RUN sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config

ENV I_MPI_CC=clang I_MPI_CXX=clang++
COPY pi.cc /src/pi.cc
RUN bash -c "source /opt/intel/oneapi/setvars.sh && mpicxx /src/pi.cc -o /home/mpiuser/pi"
52 changes: 52 additions & 0 deletions examples/pi/pi-intel.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
apiVersion: kubeflow.org/v2beta1
kind: MPIJob
metadata:
name: pi
spec:
slotsPerWorker: 1
cleanPodPolicy: Running
sshAuthMountPath: /home/mpiuser/.ssh
mpiImplementation: Intel
mpiReplicaSpecs:
Launcher:
replicas: 1
template:
spec:
containers:
- image: docker.io/kubeflow/mpi-pi:intel
imagePullPolicy: Always
name: mpi-launcher
securityContext:
runAsUser: 1000
args:
- mpirun
- -n
- "2"
- /home/mpiuser/pi
resources:
limits:
cpu: 1
memory: 1Gi
Worker:
replicas: 2
template:
spec:
containers:
- image: docker.io/kubeflow/mpi-pi:intel
imagePullPolicy: Always
name: mpi-worker
securityContext:
runAsUser: 1000
capabilities:
add:
- NET_BIND_SERVICE
command:
args:
- /usr/sbin/sshd
- -De
- -f
- /home/mpiuser/.sshd_config
resources:
limits:
cpu: 1
memory: 1Gi
8 changes: 4 additions & 4 deletions examples/pi/pi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ spec:
command:
- mpirun
args:
- -np
- -n
- "2"
- /home/mpiuser/pi
resources:
limits:
cpu: 1
memory: 2Gi
memory: 1Gi
Worker:
replicas: 2
template:
Expand All @@ -46,5 +46,5 @@ spec:
- /home/mpiuser/.sshd_config
resources:
limits:
cpu: 2
memory: 4Gi
cpu: 1
memory: 1Gi

0 comments on commit 36056ff

Please sign in to comment.