From bf62d8a74591353cf2aff43e9aeb41be81da8bef Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Tue, 31 Oct 2023 19:49:07 -0700 Subject: [PATCH 1/2] traverser: set default job duration based on current time left Problem: When a job is submitted without a duration specified and the current graph has a limited duration, the jobspec duration is set to the graph duration. This is not ideal for any job submitted after the first instant of the graph's lifetime, since the job's expiration will be set after the instance has expired. Ideally, jobs without a specified duration should inherit the instance expiration not its duration. Do the next best thing and set the duration to the instance expiration (graph_end) - now, which should result in the expiration of the job matching that of the instance. Fixes #1103 --- resource/traversers/dfu_impl.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resource/traversers/dfu_impl.hpp b/resource/traversers/dfu_impl.hpp index 23048c01a..098fcd41c 100644 --- a/resource/traversers/dfu_impl.hpp +++ b/resource/traversers/dfu_impl.hpp @@ -71,7 +71,8 @@ struct jobmeta_t { jobid = id; alloc_type = alloc; int64_t g_duration = std::chrono::duration_cast - (graph_duration.graph_end - graph_duration.graph_start).count (); + (graph_duration.graph_end - + std::chrono::system_clock::now()).count (); if (g_duration <= 0) { errno = EINVAL; From 48b14448c4f7f3fefbe4f30b86426d479f0b425f Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Tue, 31 Oct 2023 19:45:46 -0700 Subject: [PATCH 2/2] testsuite: fix expiration propagation test Problem: The test in t4011-match-duration.t tests that the duration of a job is inherited from the enclosing instance duration, but we should really be testing that the *expiration* of a child job is inherited from the parent when no duration is specified. O/w, a job submitted when the instance has only a few minutes remaining could have its expiration set for long after the instance is terminated. Update the test to ensure that expiration of a job with no duration specified is inherited from the enclosing instance expiration. Use flux-alloc(1) to launch the test instance instead of running standalone flux-start(1) instances even though test_under_flux() is used. --- t/t4011-match-duration.t | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/t/t4011-match-duration.t b/t/t4011-match-duration.t index 7ab888a75..278efce1f 100755 --- a/t/t4011-match-duration.t +++ b/t/t4011-match-duration.t @@ -9,17 +9,27 @@ test_description='Test that parent duration is inherited according to RFC14' # test_under_flux 1 -test_expect_success HAVE_JQ 'parent duration is inherited when duration=0' ' +export FLUX_URI_RESOLVE_LOCAL=t + +test_expect_success HAVE_JQ 'parent expiration is inherited when duration=0' ' cat >get_R.sh <<-EOT && #!/bin/sh - flux job info \$FLUX_JOB_ID R EOT chmod +x get_R.sh && - out=$(flux run -t20s -n1 flux start flux run -n1 ./get_R.sh) && - echo "$out" | jq -e ".execution.expiration - .execution.starttime <= 20" && - out=$(flux run -t30s -n1 flux start flux run -n1 ./get_R.sh) && - echo "$out" | jq -e ".execution.expiration - .execution.starttime <= 30" + jobid=$(flux alloc -n1 -t5m --bg) && + expiration=$(flux job info $jobid R | jq .execution.expiration) && + test_debug "echo expiration of alloc job is $expiration" && + R1=$(flux proxy $jobid flux run -n1 ./get_R.sh) && + exp1=$(echo "$R1" | jq .execution.expiration) && + test_debug "echo expiration of job is $exp1" && + echo $exp1 | jq ". == $expiration" && + sleep 1 && + R1=$(flux proxy $jobid flux run -n1 ./get_R.sh) && + exp1=$(echo "$R1" | jq .execution.expiration) && + test_debug "echo expiration of second job is $exp1" && + echo $exp1 | jq ". == $expiration" && + flux shutdown --quiet $jobid ' test_done