Skip to content

Commit

Permalink
Merge pull request #19 from grondo/cray-pals-prolog
Browse files Browse the repository at this point in the history
cray-libpals: allow shell plugin to detect missing port-distributor jobtap plugin
  • Loading branch information
mergify[bot] authored Feb 8, 2022
2 parents e0b88dd + 61f409d commit 76f5c9a
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 10 deletions.
16 changes: 14 additions & 2 deletions src/job-manager/plugins/cray_pals_port_distributor.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@ static void count_job_shells (flux_future_t *fut, void *arg)
}

cleanup:
if (flux_jobtap_prolog_finish (triple->plugin,
triple->jobid,
"cray-pals-port-distributor",
0) < 0)
flux_log_error (h,
"cray_pals_port_distributor: prolog_finish");
if (hlist)
hostlist_destroy (hlist);
flux_future_destroy (fut);
Expand Down Expand Up @@ -168,11 +174,17 @@ static int run_cb (flux_plugin_t *p,
flux_log_error (h,
"cray_pals_port_distributor: "
"Error creating shell-counting future");
free (triple);
return -1;
goto error;
}
if (flux_jobtap_prolog_start (p, "cray-pals-port-distributor") < 0) {
flux_log_error (h, "cray_pals_port_distributor: prolog_start");
goto error;
}
// 'triple' freed in callback
return 0;
error:
free (triple);
return -1;
}

/* On a job's cleanup event, get the ports and return them
Expand Down
27 changes: 19 additions & 8 deletions src/shell/plugins/cray_pals.c
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,13 @@ static int read_future (flux_future_t *fut, char *buf, size_t bufsize)
json_decref (o);
return -1;
}
if (!strcmp (name, "start")) {
/* 'start' event with no cray_port_distribution event.
* assume cray-pals jobtap plugin is not loaded.
*/
shell_debug ("jobtap plugin not loaded: disabling operation");
return 0;
}
if (!strcmp (name, "cray_port_distribution")) {
if (!(array = json_object_get (context, "ports"))) {
shell_log_error ("No port context in cray_port_distribution");
Expand Down Expand Up @@ -722,13 +729,15 @@ static int read_future (flux_future_t *fut, char *buf, size_t bufsize)
}
}
json_decref (o);
return 0;
/* Return 1 on success
*/
return 1;
} else {
flux_future_reset (fut);
json_decref (o);
}
}
shell_log_error ("No cray_port_distribution event posted");
shell_log_error ("Timed out waiting for start event");
return -1;
}

Expand All @@ -737,22 +746,24 @@ static int get_pals_ports (flux_shell_t *shell, json_int_t jobid)
flux_t *h;
char buf[256];
flux_future_t *fut = NULL;
int rc;

if (!(h = flux_shell_get_flux (shell))
|| !(fut = flux_job_event_watch (h, (flux_jobid_t)jobid, "eventlog", 0))) {
shell_log_error ("Error creating event_watch future");
return -1;
}
if (read_future (fut, buf, sizeof (buf)) < 0) {
if ((rc = read_future (fut, buf, sizeof (buf))) < 0)
shell_log_error ("Error reading ports from eventlog");
flux_future_destroy (fut);
return -1;
}
flux_future_destroy (fut);
if (flux_shell_setenvf (shell, 1, "PMI_CONTROL_PORT", "%s", buf) < 0) {

/* read_future() returns 1 if port distribution event was found:
*/
if (rc == 1
&& flux_shell_setenvf (shell, 1, "PMI_CONTROL_PORT", "%s", buf) < 0) {
return -1;
}
return 0;
return rc;
}

/*
Expand Down
8 changes: 8 additions & 0 deletions t/t1001-cray-pals.t
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,12 @@ test_expect_success 'shell: pals shell plugin creates apinfo file' '
&& test ! -z \$PALS_APINFO && test -f \$PALS_APINFO"
'

test_expect_success 'shell: pals shell plugin ignores missing jobtap plugin' '
flux jobtap remove cray_pals_port_distributor.so &&
flux mini run -o verbose -o userrc=$(pwd)/$USERRC_NAME \
-N2 -n2 hostname > no-jobtap.log 2>&1 &&
test_debug "cat no-jobtap.log" &&
grep "jobtap plugin not loaded" no-jobtap.log
'

test_done

0 comments on commit 76f5c9a

Please sign in to comment.