Skip to content

Commit

Permalink
adapt hostfile to IntelMPI
Browse files Browse the repository at this point in the history
Signed-off-by: Yuki Iwai <[email protected]>
  • Loading branch information
tenzen-y committed Feb 9, 2023
1 parent dd26fa7 commit aefd100
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 7 deletions.
2 changes: 1 addition & 1 deletion build/base/intel-entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ function resolve_host() {

if [ "$K_MPI_JOB_ROLE" == "launcher" ]; then
resolve_host "$HOSTNAME"
cut -d ' ' -f 1 /etc/mpi/hostfile | while read -r host
cut -d ':' -f 1 /etc/mpi/hostfile | while read -r host
do
resolve_host "$host"
done
Expand Down
6 changes: 5 additions & 1 deletion pkg/controller/mpi_job_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1187,7 +1187,11 @@ func newConfigMap(mpiJob *kubeflow.MPIJob, workerReplicas int32) *corev1.ConfigM
slots = int(*mpiJob.Spec.SlotsPerWorker)
}
for i := 0; i < int(workerReplicas); i++ {
buffer.WriteString(fmt.Sprintf("%s%s-%d.%s.%s.svc slots=%d\n", mpiJob.Name, workerSuffix, i, workersService, mpiJob.Namespace, slots))
if mpiJob.Spec.MPIImplementation == kubeflow.MPIImplementationOpenMPI {
buffer.WriteString(fmt.Sprintf("%s%s-%d.%s.%s.svc slots=%d\n", mpiJob.Name, workerSuffix, i, workersService, mpiJob.Namespace, slots))
} else if mpiJob.Spec.MPIImplementation == kubeflow.MPIImplementationIntel {
buffer.WriteString(fmt.Sprintf("%s%s-%d.%s.%s.svc:%d\n", mpiJob.Name, workerSuffix, i, workersService, mpiJob.Namespace, slots))
}
}

return &corev1.ConfigMap{
Expand Down
13 changes: 8 additions & 5 deletions pkg/controller/mpi_job_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1506,13 +1506,15 @@ func TestNewConfigMap(t *testing.T) {
workerReplicas int32
wantCM *corev1.ConfigMap
}{
"without slots": {
"OpenMPI without slots": {
mpiJob: &kubeflow.MPIJob{
ObjectMeta: metav1.ObjectMeta{
Name: "without-slots",
Namespace: "tenant-a",
},
Spec: kubeflow.MPIJobSpec{},
Spec: kubeflow.MPIJobSpec{
MPIImplementation: kubeflow.MPIImplementationOpenMPI,
},
},
workerReplicas: 2,
wantCM: &corev1.ConfigMap{
Expand All @@ -1528,14 +1530,15 @@ func TestNewConfigMap(t *testing.T) {
},
},
},
"with slots": {
"IntelMPI with slots": {
mpiJob: &kubeflow.MPIJob{
ObjectMeta: metav1.ObjectMeta{
Name: "with-slots",
Namespace: "project-x",
},
Spec: kubeflow.MPIJobSpec{
SlotsPerWorker: pointer.Int32(10),
SlotsPerWorker: pointer.Int32(10),
MPIImplementation: kubeflow.MPIImplementationIntel,
},
},
workerReplicas: 1,
Expand All @@ -1548,7 +1551,7 @@ func TestNewConfigMap(t *testing.T) {
},
},
Data: map[string]string{
"hostfile": "with-slots-worker-0.with-slots-worker.project-x.svc slots=10\n",
"hostfile": "with-slots-worker-0.with-slots-worker.project-x.svc:10\n",
},
},
},
Expand Down

0 comments on commit aefd100

Please sign in to comment.