Skip to content

Commit

Permalink
Merge pull request #4617 from chu11/job_list_duration
Browse files Browse the repository at this point in the history
job-list: support retrieval of job duration
  • Loading branch information
mergify[bot] authored Sep 27, 2022
2 parents 086372b + 3ff24d3 commit 99ecf1d
Show file tree
Hide file tree
Showing 10 changed files with 146 additions and 32 deletions.
13 changes: 9 additions & 4 deletions doc/man1/flux-jobs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,14 @@ the following conversion flags are supported by *flux-jobs*:
datetime of epoch if timestamp field does not exist.

**!F**
convert a duration in floating point seconds to Flux Standard Duration (FSD).
string. Defaults to empty string if duration field does not exist.
convert a time duration in floating point seconds to Flux Standard
Duration (FSD) string (e.g. *{runtime!F}*). Defaults to empty string if
field does not exist.

**!H**
convert a duration to hours:minutes:seconds form (e.g. *{runtime!H}*).
Defaults to empty string if duration field does not exist.
convert a time duration in floating point seconds to
hours:minutes:seconds form (e.g. *{runtime!H}*). Defaults to empty
string if time duration field does not exist.

**!P**
convert a floating point number into a percentage fitting in 5 characters
Expand Down Expand Up @@ -264,6 +266,9 @@ The field names that can be specified are:
**ntasks**
job task count

**duration**
job duration in seconds

**nnodes**
job node count (if job ran / is running), empty string otherwise

Expand Down
2 changes: 2 additions & 0 deletions src/bindings/python/flux/job/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ class JobInfo:
"t_run": 0.0,
"t_cleanup": 0.0,
"t_inactive": 0.0,
"duration": 0.0,
"expiration": 0.0,
"name": "",
"queue": "",
Expand Down Expand Up @@ -506,6 +507,7 @@ def get_field(self, field_name, args, kwargs):
"name": "NAME",
"queue": "QUEUE",
"ntasks": "NTASKS",
"duration": "DURATION",
"nnodes": "NNODES",
"expiration": "EXPIRATION",
"t_remaining": "T_REMAINING",
Expand Down
1 change: 1 addition & 0 deletions src/cmd/flux-jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ def fetch_jobs_flux(args, fields, flux_handle=None):
"name": ("name",),
"queue": ("queue",),
"ntasks": ("ntasks",),
"duration": ("duration",),
"nnodes": ("nnodes",),
"ranks": ("ranks",),
"nodelist": ("nodelist",),
Expand Down
2 changes: 1 addition & 1 deletion src/modules/job-list/job-list.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
static const char *attrs[] = {
"userid", "urgency", "priority", "t_submit",
"t_depend", "t_run", "t_cleanup", "t_inactive",
"state", "name", "queue", "ntasks", "nnodes",
"state", "name", "queue", "ntasks", "duration", "nnodes",
"ranks", "nodelist", "success", "exception_occurred",
"exception_type", "exception_severity",
"exception_note", "result", "expiration",
Expand Down
13 changes: 13 additions & 0 deletions src/modules/job-list/job_data.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ struct job *job_create (struct list_ctx *ctx, flux_jobid_t id)
job->priority = FLUX_JOB_PRIORITY_MIN;
job->state = FLUX_JOB_STATE_NEW;
job->ntasks = -1;
job->duration = -1.0;
job->nnodes = -1;
job->expiration = -1.0;
job->wait_status = -1;
Expand Down Expand Up @@ -153,6 +154,15 @@ static int parse_jobspec_nnodes (struct job *job, struct jj_counts *jj)
return 0;
}

static int parse_jobspec_duration (struct job *job, struct jj_counts *jj)
{
/* N.B. Jobspec V1 requires duration to be set, so duration will
* always be >= 0 from libjj.
*/
job->duration = jj->duration;
return 0;
}

static int parse_per_resource (struct job *job,
const char **type,
int *count)
Expand Down Expand Up @@ -288,6 +298,9 @@ int job_parse_jobspec (struct job *job, const char *s)
if (parse_jobspec_ntasks (job, &jj) < 0)
goto nonfatal_error;

if (parse_jobspec_duration (job, &jj) < 0)
goto nonfatal_error;

/* nonfatal error - jobspec illegal, but we'll continue on. job
* listing will return whatever data is available */
nonfatal_error:
Expand Down
1 change: 1 addition & 0 deletions src/modules/job-list/job_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ struct job {
const char *name;
const char *queue;
int ntasks;
double duration;
int nnodes;
char *ranks;
char *nodelist;
Expand Down
6 changes: 6 additions & 0 deletions src/modules/job-list/job_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ static int store_attr (struct job *job,
return 0;
val = json_integer (job->ntasks);
}
else if (!strcmp (attr, "duration")) {
/* job->duration potentially < 0 if jobspec invalid */
if (job->duration < 0)
return 0;
val = json_real (job->duration);
}
else if (!strcmp (attr, "nnodes")) {
/* job->nnodes < 0 if not set yet or R invalid, may be set in
* DEPEND or RUN state */
Expand Down
1 change: 1 addition & 0 deletions t/python/t0010-job.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,7 @@ def test_25_job_list_attrs(self):
"name",
"queue",
"ntasks",
"duration",
"nnodes",
"ranks",
"nodelist",
Expand Down
123 changes: 97 additions & 26 deletions t/t2260-job-list.t
Original file line number Diff line number Diff line change
Expand Up @@ -797,28 +797,75 @@ test_expect_success HAVE_JQ 'flux job list lists nnodes for pending jobs correct
flux queue start
'

test_expect_success 'reload the job-list module' '
flux module reload job-list
'

test_expect_success HAVE_JQ 'verify nnodes/ranks/nodelist preserved across restart' '
jobid1=`cat nodecount1.id` &&
jobid2=`cat nodecount2.id` &&
jobid3=`cat nodecount3.id` &&
jobid4=`cat nodecount4.id` &&
obj=$(flux job list -s inactive | grep ${jobid1}) &&
echo $obj | jq -e ".nnodes == 1" &&
echo $obj | jq -e ".ranks == \"0\"" &&
nodes=`flux job info ${jobid1} R | flux R decode --nodelist` &&
echo $obj | jq -e ".nodelist == \"${nodes}\"" &&
obj=$(flux job list -s inactive | grep ${jobid2}) &&
echo $obj | jq -e ".nnodes == 1" &&
echo $obj | jq -e ".ranks == \"0\"" &&
nodes=`flux job info ${jobid2} R | flux R decode --nodelist` &&
echo $obj | jq -e ".nodelist == \"${nodes}\"" &&
obj=$(flux job list -s inactive | grep ${jobid3}) &&
echo $obj | jq -e ".nnodes == 2" &&
echo $obj | jq -e ".ranks == \"[0-1]\"" &&
nodes=`flux job info ${jobid3} R | flux R decode --nodelist` &&
echo $obj | jq -e ".nodelist == \"${nodes}\"" &&
obj=$(flux job list -s inactive | grep ${jobid4}) &&
echo $obj | jq -e ".nnodes == 3" &&
echo $obj | jq -e ".ranks == \"[0-2]\"" &&
nodes=`flux job info ${jobid4} R | flux R decode --nodelist` &&
echo $obj | jq -e ".nodelist == \"${nodes}\""
'

#
# job success
#

test_expect_success HAVE_JQ 'flux job list outputs success correctly (true)' '
jobid=`flux mini submit --wait hostname | flux job id` &&
echo $jobid > success1.id &&
wait_jobid_state $jobid inactive &&
obj=$(flux job list -s inactive | grep $jobid) &&
echo $obj | jq -e ".success == true"
'

test_expect_success HAVE_JQ 'flux job list outputs success correctly (false)' '
jobid=`flux mini submit --wait nosuchcommand | flux job id` &&
echo $jobid > success2.id &&
wait_jobid_state $jobid inactive &&
obj=$(flux job list -s inactive | grep $jobid) &&
echo $obj | jq -e ".success == false"
'

test_expect_success 'reload the job-list module' '
flux module reload job-list
'

test_expect_success HAVE_JQ 'verify task count preserved across restart' '
jobid1=`cat success1.id` &&
jobid2=`cat success2.id` &&
obj=$(flux job list -s inactive | grep ${jobid1}) &&
echo $obj | jq -e ".success == true" &&
obj=$(flux job list -s inactive | grep ${jobid2}) &&
echo $obj | jq -e ".success == false"
'

# job exceptions

test_expect_success HAVE_JQ 'flux job list outputs exceptions correctly (no exception)' '
jobid=`flux mini submit --wait hostname | flux job id` &&
echo $jobid > exceptions1.id &&
wait_jobid_state $jobid inactive &&
obj=$(flux job list -s inactive | grep $jobid) &&
echo $obj | jq -e ".exception_occurred == false" &&
Expand All @@ -829,6 +876,7 @@ test_expect_success HAVE_JQ 'flux job list outputs exceptions correctly (no exce

test_expect_success HAVE_JQ 'flux job list outputs exceptions correctly (exception)' '
jobid=`flux mini submit --wait nosuchcommand | flux job id` &&
echo $jobid > exceptions2.id &&
wait_jobid_state $jobid inactive &&
obj=$(flux job list -s inactive | grep $jobid) &&
echo $obj | jq -e ".exception_occurred == true" &&
Expand All @@ -837,10 +885,31 @@ test_expect_success HAVE_JQ 'flux job list outputs exceptions correctly (excepti
echo $obj | jq .exception_note | grep "No such file or directory"
'

test_expect_success 'reload the job-list module' '
flux module reload job-list
'

test_expect_success HAVE_JQ 'verify task count preserved across restart' '
jobid1=`cat exceptions1.id` &&
jobid2=`cat exceptions2.id` &&
obj=$(flux job list -s inactive | grep ${jobid1}) &&
echo $obj | jq -e ".success == true" &&
echo $obj | jq -e ".exception_occurred == false" &&
echo $obj | jq -e ".exception_severity == null" &&
echo $obj | jq -e ".exception_type == null" &&
echo $obj | jq -e ".exception_note == null" &&
obj=$(flux job list -s inactive | grep ${jobid2}) &&
echo $obj | jq -e ".exception_occurred == true" &&
echo $obj | jq -e ".exception_severity == 0" &&
echo $obj | jq -e ".exception_type == \"exec\"" &&
echo $obj | jq .exception_note | grep "No such file or directory"
'

# expiration time

test_expect_success HAVE_JQ 'flux job list outputs expiration time when set' '
jobid=$(flux mini submit -t 30s sleep 1000 | flux job id) &&
jobid=$(flux mini submit -t 500s sleep 1000 | flux job id) &&
echo $jobid > expiration.id &&
fj_wait_event $jobid start &&
flux job list | grep $jobid > expiration.json &&
test_debug "cat expiration.json" &&
Expand All @@ -852,31 +921,32 @@ test_expect_success 'reload the job-list module' '
flux module reload job-list
'

test_expect_success HAVE_JQ 'verify nnodes/ranks/nodelist preserved across restart' '
jobid1=`cat nodecount1.id` &&
jobid2=`cat nodecount2.id` &&
jobid3=`cat nodecount3.id` &&
jobid4=`cat nodecount4.id` &&
obj=$(flux job list -s inactive | grep ${jobid1}) &&
echo $obj | jq -e ".nnodes == 1" &&
echo $obj | jq -e ".ranks == \"0\"" &&
nodes=`flux job info ${jobid1} R | flux R decode --nodelist` &&
echo $obj | jq -e ".nodelist == \"${nodes}\"" &&
obj=$(flux job list -s inactive | grep ${jobid2}) &&
echo $obj | jq -e ".nnodes == 1" &&
echo $obj | jq -e ".ranks == \"0\"" &&
nodes=`flux job info ${jobid2} R | flux R decode --nodelist` &&
echo $obj | jq -e ".nodelist == \"${nodes}\"" &&
obj=$(flux job list -s inactive | grep ${jobid3}) &&
echo $obj | jq -e ".nnodes == 2" &&
echo $obj | jq -e ".ranks == \"[0-1]\"" &&
nodes=`flux job info ${jobid3} R | flux R decode --nodelist` &&
echo $obj | jq -e ".nodelist == \"${nodes}\"" &&
obj=$(flux job list -s inactive | grep ${jobid4}) &&
echo $obj | jq -e ".nnodes == 3" &&
echo $obj | jq -e ".ranks == \"[0-2]\"" &&
nodes=`flux job info ${jobid4} R | flux R decode --nodelist` &&
echo $obj | jq -e ".nodelist == \"${nodes}\""
test_expect_success HAVE_JQ 'verify task count preserved across restart' '
jobid=`cat expiration.id` &&
flux job list -s inactive | grep ${jobid} > expiration2.json &&
jq -e ".expiration > now" < expiration2.json
'

# duration time

test_expect_success HAVE_JQ 'flux job list outputs duration time when set' '
jobid=$(flux mini submit -t 60m sleep 1000 | flux job id) &&
echo $jobid > duration.id &&
fj_wait_event $jobid start &&
flux job list | grep $jobid > duration.json &&
test_debug "cat duration.json" &&
jq -e ".duration == 3600.0" < duration.json &&
flux job cancel $jobid
'

test_expect_success 'reload the job-list module' '
flux module reload job-list
'

test_expect_success HAVE_JQ 'verify task count preserved across restart' '
jobid=`cat duration.id` &&
flux job list -s inactive | grep ${jobid} > duration2.json &&
jq -e ".duration == 3600.0" < duration2.json
'

# all job attributes
Expand Down Expand Up @@ -1004,6 +1074,7 @@ t_inactive \
state \
name \
ntasks \
duration \
nnodes \
ranks \
nodelist \
Expand Down
16 changes: 15 additions & 1 deletion t/t2800-jobs-cmd.t
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,19 @@ test_expect_success 'flux-jobs --format={ntasks},{nnodes},{nnodes:h} works' '
test_cmp nodecountI.exp nodecountI.out
'

test_expect_success 'flux-jobs --format={duration},{duration:h},{duration!F},{duration!H},{duration!F:h},{duration!H:h} works' '
fmt="{duration},{duration:h},{duration!F},{duration!H},{duration!F:h},{duration!H:h}" &&
flux jobs --filter=pending,running -no "${fmt}" > durationPR.out &&
for i in `seq 1 $(state_count sched run)`; do
echo "300.0,300.0,5m,0:05:00,5m,0:05:00" >> durationPR.exp
done &&
test_cmp durationPR.exp durationPR.out &&
flux jobs --filter=completed -no "${fmt}" > durationCD.out &&
for i in `seq 1 $(state_count completed)`;
do echo "0.0,-,0s,0:00:00,-,-" >> durationCD.exp
done &&
test_cmp durationCD.exp durationCD.out
'

test_expect_success 'flux-jobs --format={runtime:0.3f} works' '
flux jobs --filter=pending -no "{runtime:0.3f}" > runtime-dotP.out &&
Expand Down Expand Up @@ -657,7 +670,7 @@ test_expect_success 'flux jobs --format={t_cleanup/{in}active} works' '
test $count -eq $(state_count inactive)
'

test_expect_success 'flux-jobs --format={runtime},{runtime!F},{runtime!F:h},{runtime!H},{runtime!H:h} works' '
test_expect_success 'flux-jobs --format={runtime},{runtime!F},{runtime!H},{runtime!F:h},{runtime!H:h} works' '
fmt="{runtime},{runtime!F},{runtime!H},{runtime!F:h},{runtime!H:h}" &&
flux jobs --filter=pending -no "${fmt}" > runtimeP.out &&
for i in `seq 1 $(state_count sched)`; do
Expand Down Expand Up @@ -917,6 +930,7 @@ test_expect_success 'flux-jobs: header included with all custom formats' '
name==NAME
queue==QUEUE
ntasks==NTASKS
duration==DURATION
nnodes==NNODES
ranks==RANKS
nodelist==NODELIST
Expand Down

0 comments on commit 99ecf1d

Please sign in to comment.