-
Notifications
You must be signed in to change notification settings - Fork 220
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Allow running MPI applications as non-root #383
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
FROM debian:buster | ||
|
||
RUN apt update && apt install -y --no-install-recommends \ | ||
build-essential \ | ||
libopenmpi-dev \ | ||
openmpi-bin \ | ||
openssh-server \ | ||
openssh-client \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
# Add priviledge separation directoy to run sshd as root. | ||
RUN mkdir -p /var/run/sshd | ||
# Add capability to run sshd as non-root. | ||
RUN setcap CAP_NET_BIND_SERVICE=+eip /usr/sbin/sshd | ||
|
||
RUN useradd -m mpiuser | ||
WORKDIR /home/mpiuser | ||
COPY --chown=mpiuser sshd_config .sshd_config | ||
RUN sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config | ||
COPY pi.cc /src/pi.cc | ||
RUN mpic++ /src/pi.cc -o /home/mpiuser/pi |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Pure MPI example | ||
|
||
This example shows to run a pure MPI application. | ||
|
||
The program prints some basic information about the workers. | ||
Then, it calculates an approximate value for pi. | ||
|
||
## How to build Image | ||
|
||
```bash | ||
docker build -t mpi-pi . | ||
``` | ||
|
||
## Create MPIJob | ||
|
||
Modify `pi.yaml` to set up the image name from your own registry. | ||
|
||
Then, run: | ||
|
||
``` | ||
kubectl create -f pi.yaml | ||
``` | ||
|
||
The YAML shows how to run the binaries as a non-root user. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
// Copyright 2021 The Kubeflow Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#include "mpi.h" | ||
#include <random> | ||
#include <cstdio> | ||
|
||
int main(int argc, char *argv[]) { | ||
int rank, workers, proc_name_size; | ||
MPI_Init(&argc, &argv); | ||
MPI_Comm_rank(MPI_COMM_WORLD, &rank); | ||
MPI_Comm_size(MPI_COMM_WORLD, &workers); | ||
if (rank == 0) { | ||
printf("Workers: %d\n", workers); | ||
} | ||
char hostname[MPI_MAX_PROCESSOR_NAME]; | ||
MPI_Get_processor_name(hostname, &proc_name_size); | ||
printf("Rank %d on host %s\n", rank, hostname); | ||
|
||
std::minstd_rand generator(rank); | ||
std::uniform_real_distribution<double> distribution(-1.0, 1.0); | ||
double x, y; | ||
long long worker_count = 0; | ||
int worker_tests = 100000000; | ||
for (int i = 0; i < worker_tests; i++) { | ||
x = distribution(generator); | ||
y = distribution(generator); | ||
if (x * x + y * y <= 1.0) { | ||
worker_count++; | ||
} | ||
} | ||
long long total_count = 0; | ||
MPI_Reduce(&worker_count, &total_count, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); | ||
if (rank == 0) { | ||
double pi = 4 * (double)total_count / (double)(worker_tests) / (double)(workers); | ||
printf("pi is approximately %.16lf\n", pi); | ||
} | ||
MPI_Finalize(); | ||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
apiVersion: kubeflow.org/v2beta1 | ||
kind: MPIJob | ||
metadata: | ||
name: pi | ||
spec: | ||
slotsPerWorker: 1 | ||
cleanPodPolicy: Running | ||
sshAuthMountPath: /home/mpiuser/.ssh | ||
mpiReplicaSpecs: | ||
Launcher: | ||
replicas: 1 | ||
template: | ||
spec: | ||
containers: | ||
- image: docker.io/kubeflow/mpi-pi | ||
name: mpi-launcher | ||
securityContext: | ||
runAsUser: 1000 | ||
command: | ||
- mpirun | ||
args: | ||
- -np | ||
- "2" | ||
- /home/mpiuser/pi | ||
resources: | ||
limits: | ||
cpu: 1 | ||
memory: 2Gi | ||
Worker: | ||
replicas: 2 | ||
template: | ||
spec: | ||
containers: | ||
- image: docker.io/kubeflow/mpi-pi | ||
name: mpi-worker | ||
securityContext: | ||
runAsUser: 1000 | ||
capabilities: | ||
add: | ||
- NET_BIND_SERVICE | ||
command: | ||
- /usr/sbin/sshd | ||
args: | ||
- -De | ||
- -f | ||
- /home/mpiuser/.sshd_config | ||
resources: | ||
limits: | ||
cpu: 2 | ||
memory: 4Gi |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
PidFile /home/mpiuser/sshd.pid | ||
HostKey /home/mpiuser/.ssh/id_rsa |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,6 +46,9 @@ func validateMPIJobSpec(spec *kubeflow.MPIJobSpec, path *field.Path) field.Error | |
} else if !validCleanPolicies.Has(string(*spec.CleanPodPolicy)) { | ||
errs = append(errs, field.NotSupported(path.Child("cleanPodPolicy"), *spec.CleanPodPolicy, validCleanPolicies.List())) | ||
} | ||
if spec.SSHAuthMountPath == "" { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this validation contradict with the defaulting here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wanted to keep validation independent from defaulting. But yes, if you put them together, this line of code would not be reachable. |
||
errs = append(errs, field.Required(path.Child("sshAuthMountPath"), "must have a mount path for SSH credentials")) | ||
} | ||
return errs | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could you make the default path (
"/root/.ssh"
) a constant?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that makes sense for controller code, where a value might be used more than once. However, this file is all about defaults. Creating a constant would just make the reader have to jump from one line to another to see what the default for a field is.