Skip to content

Commit

Permalink
Allow running MPI applications as non-root (#383)
Browse files Browse the repository at this point in the history
* Allow running MPI applications as non-root

Adds the spec field sshAuthMountPath for MPIJob.
The init script sets the permissions and ownership based on the securityContext of the launcherPod

* Add pure MPI sample that run as non-root
  • Loading branch information
alculquicondor authored Jul 27, 2021
1 parent 84604c8 commit 9ce6467
Show file tree
Hide file tree
Showing 16 changed files with 781 additions and 217 deletions.
20 changes: 20 additions & 0 deletions examples/pi/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM debian:buster

RUN apt update && apt install -y --no-install-recommends \
build-essential \
libopenmpi-dev \
openmpi-bin \
openssh-server \
openssh-client \
&& rm -rf /var/lib/apt/lists/*
# Add priviledge separation directoy to run sshd as root.
RUN mkdir -p /var/run/sshd
# Add capability to run sshd as non-root.
RUN setcap CAP_NET_BIND_SERVICE=+eip /usr/sbin/sshd

RUN useradd -m mpiuser
WORKDIR /home/mpiuser
COPY --chown=mpiuser sshd_config .sshd_config
RUN sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config
COPY pi.cc /src/pi.cc
RUN mpic++ /src/pi.cc -o /home/mpiuser/pi
24 changes: 24 additions & 0 deletions examples/pi/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Pure MPI example

This example shows to run a pure MPI application.

The program prints some basic information about the workers.
Then, it calculates an approximate value for pi.

## How to build Image

```bash
docker build -t mpi-pi .
```

## Create MPIJob

Modify `pi.yaml` to set up the image name from your own registry.

Then, run:

```
kubectl create -f pi.yaml
```

The YAML shows how to run the binaries as a non-root user.
51 changes: 51 additions & 0 deletions examples/pi/pi.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright 2021 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "mpi.h"
#include <random>
#include <cstdio>

int main(int argc, char *argv[]) {
int rank, workers, proc_name_size;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &workers);
if (rank == 0) {
printf("Workers: %d\n", workers);
}
char hostname[MPI_MAX_PROCESSOR_NAME];
MPI_Get_processor_name(hostname, &proc_name_size);
printf("Rank %d on host %s\n", rank, hostname);

std::minstd_rand generator(rank);
std::uniform_real_distribution<double> distribution(-1.0, 1.0);
double x, y;
long long worker_count = 0;
int worker_tests = 100000000;
for (int i = 0; i < worker_tests; i++) {
x = distribution(generator);
y = distribution(generator);
if (x * x + y * y <= 1.0) {
worker_count++;
}
}
long long total_count = 0;
MPI_Reduce(&worker_count, &total_count, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank == 0) {
double pi = 4 * (double)total_count / (double)(worker_tests) / (double)(workers);
printf("pi is approximately %.16lf\n", pi);
}
MPI_Finalize();
return 0;
}
50 changes: 50 additions & 0 deletions examples/pi/pi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
apiVersion: kubeflow.org/v2beta1
kind: MPIJob
metadata:
name: pi
spec:
slotsPerWorker: 1
cleanPodPolicy: Running
sshAuthMountPath: /home/mpiuser/.ssh
mpiReplicaSpecs:
Launcher:
replicas: 1
template:
spec:
containers:
- image: docker.io/kubeflow/mpi-pi
name: mpi-launcher
securityContext:
runAsUser: 1000
command:
- mpirun
args:
- -np
- "2"
- /home/mpiuser/pi
resources:
limits:
cpu: 1
memory: 2Gi
Worker:
replicas: 2
template:
spec:
containers:
- image: docker.io/kubeflow/mpi-pi
name: mpi-worker
securityContext:
runAsUser: 1000
capabilities:
add:
- NET_BIND_SERVICE
command:
- /usr/sbin/sshd
args:
- -De
- -f
- /home/mpiuser/.sshd_config
resources:
limits:
cpu: 2
memory: 4Gi
2 changes: 2 additions & 0 deletions examples/pi/sshd_config
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
PidFile /home/mpiuser/sshd.pid
HostKey /home/mpiuser/.ssh/id_rsa
2 changes: 2 additions & 0 deletions manifests/base/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ spec:
type: string
enum: ["None", "Running", "All"]
description: "Defines which Pods must be deleted after the Job completes"
sshAuthMountPath:
type: string
mpiReplicaSpecs:
type: object
properties:
Expand Down
3 changes: 3 additions & 0 deletions v2/cmd/mpi-operator/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type ServerOption struct {
LockNamespace string
QPS int
Burst int
ScriptingImage string
}

// NewServerOption creates a new CMServer with a default config.
Expand Down Expand Up @@ -68,4 +69,6 @@ func (s *ServerOption) AddFlags(fs *flag.FlagSet) {

fs.IntVar(&s.QPS, "kube-api-qps", 5, "QPS indicates the maximum QPS to the master from this client.")
fs.IntVar(&s.Burst, "kube-api-burst", 10, "Maximum burst for throttle.")

fs.StringVar(&s.ScriptingImage, "scripting-image", "alpine:3.14", "Container image used for scripting, such as in init containers.")
}
3 changes: 2 additions & 1 deletion v2/cmd/mpi-operator/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ func Run(opt *options.ServerOption) error {
kubeInformerFactory.Core().V1().Pods(),
podgroupsInformer,
kubeflowInformerFactory.Kubeflow().V2beta1().MPIJobs(),
opt.GangSchedulingName)
opt.GangSchedulingName,
opt.ScriptingImage)

go kubeInformerFactory.Start(ctx.Done())
go kubeflowInformerFactory.Start(ctx.Done())
Expand Down
3 changes: 3 additions & 0 deletions v2/pkg/apis/kubeflow/v2beta1/default.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ func SetDefaults_MPIJob(mpiJob *MPIJob) {
if mpiJob.Spec.SlotsPerWorker == nil {
mpiJob.Spec.SlotsPerWorker = newInt32(1)
}
if mpiJob.Spec.SSHAuthMountPath == "" {
mpiJob.Spec.SSHAuthMountPath = "/root/.ssh"
}

// set default to Launcher
setDefaultsTypeLauncher(mpiJob.Spec.MPIReplicaSpecs[MPIReplicaTypeLauncher])
Expand Down
25 changes: 15 additions & 10 deletions v2/pkg/apis/kubeflow/v2beta1/default_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,25 @@ func TestSetDefaults_MPIJob(t *testing.T) {
"base defaults": {
want: MPIJob{
Spec: MPIJobSpec{
SlotsPerWorker: newInt32(1),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyNone),
SlotsPerWorker: newInt32(1),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyNone),
SSHAuthMountPath: "/root/.ssh",
},
},
},
"base defaults overridden": {
job: MPIJob{
Spec: MPIJobSpec{
SlotsPerWorker: newInt32(10),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SlotsPerWorker: newInt32(10),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SSHAuthMountPath: "/home/mpiuser/.ssh",
},
},
want: MPIJob{
Spec: MPIJobSpec{
SlotsPerWorker: newInt32(10),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SlotsPerWorker: newInt32(10),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SSHAuthMountPath: "/home/mpiuser/.ssh",
},
},
},
Expand All @@ -58,8 +61,9 @@ func TestSetDefaults_MPIJob(t *testing.T) {
},
want: MPIJob{
Spec: MPIJobSpec{
SlotsPerWorker: newInt32(1),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyNone),
SlotsPerWorker: newInt32(1),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyNone),
SSHAuthMountPath: "/root/.ssh",
MPIReplicaSpecs: map[MPIReplicaType]*common.ReplicaSpec{
MPIReplicaTypeLauncher: {
Replicas: newInt32(1),
Expand All @@ -79,8 +83,9 @@ func TestSetDefaults_MPIJob(t *testing.T) {
},
want: MPIJob{
Spec: MPIJobSpec{
SlotsPerWorker: newInt32(1),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyNone),
SlotsPerWorker: newInt32(1),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyNone),
SSHAuthMountPath: "/root/.ssh",
MPIReplicaSpecs: map[MPIReplicaType]*common.ReplicaSpec{
MPIReplicaTypeWorker: {
Replicas: newInt32(0),
Expand Down
6 changes: 5 additions & 1 deletion v2/pkg/apis/kubeflow/v2beta1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,13 @@ type MPIJobSpec struct {
// Defaults to None.
CleanPodPolicy *common.CleanPodPolicy `json:"cleanPodPolicy,omitempty"`

// `MPIReplicaSpecs` contains maps from `MPIReplicaType` to `ReplicaSpec` that
// MPIReplicaSpecs contains maps from `MPIReplicaType` to `ReplicaSpec` that
// specify the MPI replicas to run.
MPIReplicaSpecs map[MPIReplicaType]*common.ReplicaSpec `json:"mpiReplicaSpecs"`

// SSHAuthMountPath is the directory where SSH keys are mounted.
// Defaults to "/root/.ssh".
SSHAuthMountPath string `json:"sshAuthMountPath,omitempty"`
}

// MPIReplicaType is the type for MPIReplica.
Expand Down
3 changes: 3 additions & 0 deletions v2/pkg/apis/kubeflow/validation/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ func validateMPIJobSpec(spec *kubeflow.MPIJobSpec, path *field.Path) field.Error
} else if !validCleanPolicies.Has(string(*spec.CleanPodPolicy)) {
errs = append(errs, field.NotSupported(path.Child("cleanPodPolicy"), *spec.CleanPodPolicy, validCleanPolicies.List()))
}
if spec.SSHAuthMountPath == "" {
errs = append(errs, field.Required(path.Child("sshAuthMountPath"), "must have a mount path for SSH credentials"))
}
return errs
}

Expand Down
31 changes: 20 additions & 11 deletions v2/pkg/apis/kubeflow/validation/validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ func TestValidateMPIJob(t *testing.T) {
Name: "foo",
},
Spec: v2beta1.MPIJobSpec{
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SSHAuthMountPath: "/home/mpiuser/.ssh",
MPIReplicaSpecs: map[v2beta1.MPIReplicaType]*common.ReplicaSpec{
v2beta1.MPIReplicaTypeLauncher: {
Replicas: newInt32(1),
Expand All @@ -58,8 +59,9 @@ func TestValidateMPIJob(t *testing.T) {
Name: "foo",
},
Spec: v2beta1.MPIJobSpec{
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SSHAuthMountPath: "/home/mpiuser/.ssh",
MPIReplicaSpecs: map[v2beta1.MPIReplicaType]*common.ReplicaSpec{
v2beta1.MPIReplicaTypeLauncher: {
Replicas: newInt32(1),
Expand Down Expand Up @@ -99,6 +101,10 @@ func TestValidateMPIJob(t *testing.T) {
Type: field.ErrorTypeRequired,
Field: "spec.cleanPodPolicy",
},
&field.Error{
Type: field.ErrorTypeRequired,
Field: "spec.sshAuthMountPath",
},
},
},
"invalid name": {
Expand Down Expand Up @@ -142,9 +148,10 @@ func TestValidateMPIJob(t *testing.T) {
Name: "foo",
},
Spec: v2beta1.MPIJobSpec{
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
MPIReplicaSpecs: map[v2beta1.MPIReplicaType]*common.ReplicaSpec{},
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SSHAuthMountPath: "/root/.ssh",
MPIReplicaSpecs: map[v2beta1.MPIReplicaType]*common.ReplicaSpec{},
},
},
wantErrs: field.ErrorList{
Expand All @@ -160,8 +167,9 @@ func TestValidateMPIJob(t *testing.T) {
Name: "foo",
},
Spec: v2beta1.MPIJobSpec{
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SSHAuthMountPath: "/root/.ssh",
MPIReplicaSpecs: map[v2beta1.MPIReplicaType]*common.ReplicaSpec{
v2beta1.MPIReplicaTypeLauncher: {},
v2beta1.MPIReplicaTypeWorker: {},
Expand Down Expand Up @@ -193,8 +201,9 @@ func TestValidateMPIJob(t *testing.T) {
Name: "foo",
},
Spec: v2beta1.MPIJobSpec{
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SSHAuthMountPath: "/root/.ssh",
MPIReplicaSpecs: map[v2beta1.MPIReplicaType]*common.ReplicaSpec{
v2beta1.MPIReplicaTypeLauncher: {
Replicas: newInt32(2),
Expand Down
Loading

0 comments on commit 9ce6467

Please sign in to comment.