From 23a611a95dd6b710d4088fcf32ce08b0b97dd211 Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Thu, 10 May 2018 11:48:10 -0700 Subject: [PATCH 1/7] sched: Add completing state support --- sched/sched.c | 3 +++ simulator/sim_execsrv.c | 22 +++++++++++++++++----- simulator/simulator.c | 7 ++++++- simulator/simulator.h | 3 ++- t/t1000-jsc.t | 6 ++++-- 5 files changed, 32 insertions(+), 9 deletions(-) diff --git a/sched/sched.c b/sched/sched.c index ba8406053..7fb201c23 100644 --- a/sched/sched.c +++ b/sched/sched.c @@ -1781,6 +1781,9 @@ static int action (ssrvctx_t *ctx, flux_lwj_t *job, job_state_t newstate, } break; case J_RUNNING: + VERIFY (trans (J_COMPLETING, newstate, &(job->state))); + break; + case J_COMPLETING: VERIFY (trans (J_COMPLETE, newstate, &(job->state)) || trans (J_CANCELLED, newstate, &(job->state))); if (!ctx->arg.schedonce) { diff --git a/simulator/sim_execsrv.c b/simulator/sim_execsrv.c index 4320b4c11..555dfbc90 100644 --- a/simulator/sim_execsrv.c +++ b/simulator/sim_execsrv.c @@ -103,11 +103,13 @@ static int update_job_state (ctx_t *ctx, int rc; double t_starting = SIM_TIME_NONE; double t_running = SIM_TIME_NONE; + double t_completing = SIM_TIME_NONE; double t_complete = SIM_TIME_NONE; switch (new_state) { case J_STARTING: t_starting = update_time; break; case J_RUNNING: t_running = update_time; break; + case J_COMPLETING: t_completing = update_time; break; case J_COMPLETE: t_complete = update_time; break; default: flux_log (ctx->h, LOG_ERR, "Unknown state %d", (int) new_state); @@ -124,6 +126,7 @@ static int update_job_state (ctx_t *ctx, rc = set_job_timestamps (kvs_dir, t_starting, t_running, + t_completing, t_complete, SIM_TIME_NONE); // io if (rc < 0) @@ -209,12 +212,14 @@ static int complete_job (ctx_t *ctx, job_t *job, double completion_time) set_event_timer (ctx, "sched", ctx->sim_state->sim_time + .00001); rc = set_job_timestamps (job->kvs_dir, - SIM_TIME_NONE, // starting - SIM_TIME_NONE, // running - completion_time, - job->io_time); + SIM_TIME_NONE, // starting + SIM_TIME_NONE, // running + SIM_TIME_NONE, // completing + completion_time, + job->io_time); if (rc < 0) - flux_log_error (h, "%s: set_job_timestamps", __FUNCTION__); + flux_log_error (h, "%s: set_job_timestamps", __FUNCTION__); + free_job (job); return rc; @@ -543,6 +548,13 @@ static int handle_queued_events (ctx_t *ctx) *jobid); return -1; } + if (update_job_state (ctx, *jobid, kvs_dir, J_COMPLETING, sim_time) < 0) { + flux_log (h, + LOG_ERR, + "failed to set job %d's state to completing", + *jobid); + return -1; + } flux_log (h, LOG_INFO, "job %d's state to starting then running", diff --git a/simulator/simulator.c b/simulator/simulator.c index fd1266059..637a9909a 100644 --- a/simulator/simulator.c +++ b/simulator/simulator.c @@ -209,7 +209,8 @@ int put_job_in_kvs (job_t *job, const char *initial_state) } int set_job_timestamps (flux_kvsdir_t *dir, double t_starting, - double t_running, double t_complete, double t_io) + double t_running, double t_completing, + double t_complete, double t_io) { flux_t *h = flux_kvsdir_handle (dir); flux_kvs_txn_t *txn; @@ -225,6 +226,10 @@ int set_job_timestamps (flux_kvsdir_t *dir, double t_starting, if (txn_dir_pack (txn, dir, "running_time", "f", t_running) < 0) goto error; } + if (t_completing != SIM_TIME_NONE) { + if (txn_dir_pack (txn, dir, "completing_time", "f", t_completing) < 0) + goto error; + } if (t_complete != SIM_TIME_NONE) { if (txn_dir_pack (txn, dir, "complete_time", "f", t_complete) < 0) goto error; diff --git a/simulator/simulator.h b/simulator/simulator.h index 844c1c497..7d8ad895d 100644 --- a/simulator/simulator.h +++ b/simulator/simulator.h @@ -62,7 +62,8 @@ job_t *pull_job_from_kvs (int id, flux_kvsdir_t *kvs_dir); #define SIM_TIME_NONE (-1.) // skip setting this timestamp int set_job_timestamps (flux_kvsdir_t *dir, double t_starting, - double t_running, double t_complete, double t_io); + double t_running, double t_completing, + double t_complete, double t_io); void free_job (job_t *job); job_t *blank_job (); diff --git a/t/t1000-jsc.t b/t/t1000-jsc.t index 03fcd3a7c..1bab062f9 100755 --- a/t/t1000-jsc.t +++ b/t/t1000-jsc.t @@ -17,13 +17,15 @@ tr2="submitted->allocated" tr3="allocated->runrequest" tr4="runrequest->starting" tr5="starting->running" -tr6="running->complete" +tr6="running->completing" +tr7="completing->complete" trans="$tr1 $tr2 $tr3 $tr4 $tr5 -$tr6" +$tr6 +$tr7" test_expect_success 'jsc: expected job-event sequence for single-job scheduling' ' adjust_session_info 1 && From 77fae7da2f42da7c967d559c48a22a3cfcddff3b Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Fri, 11 May 2018 10:07:33 -0700 Subject: [PATCH 2/7] testsuite: update sharness.sh and t0000-sharness.t Update the flux-sched sharness.sh and t0000-sharness.t scripts to the lastest version from flux-core. --- t/sharness.sh | 205 +++++++++++++++++++++++++++++++++------------ t/t0000-sharness.t | 186 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 335 insertions(+), 56 deletions(-) diff --git a/t/sharness.sh b/t/sharness.sh index 43a241c3d..c9f2bcac4 100644 --- a/t/sharness.sh +++ b/t/sharness.sh @@ -18,33 +18,39 @@ # along with this program. If not, see http://www.gnu.org/licenses/ . # Public: Current version of Sharness. -SHARNESS_VERSION="0.3.0" +SHARNESS_VERSION="1.0.0" export SHARNESS_VERSION # Public: The file extension for tests. By default, it is set to "t". : ${SHARNESS_TEST_EXTENSION:=t} export SHARNESS_TEST_EXTENSION -# Keep the original TERM for say_color -ORIGINAL_TERM=$TERM +# Reset TERM to original terminal if found, otherwise save orignal TERM +[ "x" = "x$SHARNESS_ORIG_TERM" ] && + SHARNESS_ORIG_TERM="$TERM" || + TERM="$SHARNESS_ORIG_TERM" +# Public: The unsanitized TERM under which sharness is originally run +export SHARNESS_ORIG_TERM + +# Export SHELL_PATH +: ${SHELL_PATH:=$SHELL} +export SHELL_PATH # For repeatability, reset the environment to a known state. +# TERM is sanitized below, after saving color control sequences. LANG=C LC_ALL=C PAGER=cat TZ=UTC -TERM=dumb EDITOR=: -export LANG LC_ALL PAGER TZ TERM EDITOR +export LANG LC_ALL PAGER TZ EDITOR unset VISUAL CDPATH GREP_OPTIONS # Line feed LF=' ' -[ "x$ORIGINAL_TERM" != "xdumb" ] && ( - TERM=$ORIGINAL_TERM && - export TERM && +[ "x$TERM" != "xdumb" ] && ( [ -t 1 ] && tput bold >/dev/null 2>&1 && tput setaf 1 >/dev/null 2>&1 && @@ -54,14 +60,16 @@ LF=' while test "$#" -ne 0; do case "$1" in - --logfile) - logfile=t; shift;; + --logfile) + logfile=t; shift;; -d|--d|--de|--deb|--debu|--debug) debug=t; shift ;; -i|--i|--im|--imm|--imme|--immed|--immedi|--immedia|--immediat|--immediate) immediate=t; shift ;; -l|--l|--lo|--lon|--long|--long-|--long-t|--long-te|--long-tes|--long-test|--long-tests) TEST_LONG=t; export TEST_LONG; shift ;; + --in|--int|--inte|--inter|--intera|--interac|--interact|--interacti|--interactiv|--interactive|--interactive-|--interactive-t|--interactive-te|--interactive-tes|--interactive-test|--interactive-tests): + TEST_INTERACTIVE=t; export TEST_INTERACTIVE; verbose=t; shift ;; -h|--h|--he|--hel|--help) help=t; shift ;; -v|--v|--ve|--ver|--verb|--verbo|--verbos|--verbose) @@ -70,6 +78,10 @@ while test "$#" -ne 0; do # Ignore --quiet under a TAP::Harness. Saying how many tests # passed without the ok/not ok details is always an error. test -z "$HARNESS_ACTIVE" && quiet=t; shift ;; + --chain-lint) + chain_lint=t; shift ;; + --no-chain-lint) + chain_lint=; shift ;; --no-color) color=; shift ;; --root=*) @@ -81,29 +93,30 @@ while test "$#" -ne 0; do done if test -n "$color"; then + # Save the color control sequences now rather than run tput + # each time say_color() is called. This is done for two + # reasons: + # * TERM will be changed to dumb + # * HOME will be changed to a temporary directory and tput + # might need to read ~/.terminfo from the original HOME + # directory to get the control sequences + # Note: This approach assumes the control sequences don't end + # in a newline for any terminal of interest (command + # substitutions strip trailing newlines). Given that most + # (all?) terminals in common use are related to ECMA-48, this + # shouldn't be a problem. + say_color_error=$(tput bold; tput setaf 1) # bold red + say_color_skip=$(tput setaf 4) # blue + say_color_warn=$(tput setaf 3) # brown/yellow + say_color_pass=$(tput setaf 2) # green + say_color_info=$(tput setaf 6) # cyan + say_color_reset=$(tput sgr0) + say_color_="" # no formatting for normal text say_color() { - ( - TERM=$ORIGINAL_TERM - export TERM - case "$1" in - error) - tput bold; tput setaf 1;; # bold red - skip) - tput setaf 4;; # blue - warn) - tput setaf 3;; # brown/yellow - pass) - tput setaf 2;; # green - info) - tput setaf 6;; # cyan - *) - test -n "$quiet" && return;; - esac + test -z "$1" && test -n "$quiet" && return + eval "say_color_color=\$say_color_$1" shift - printf "%s" "$*" - tput sgr0 - echo - ) + printf "%s\\n" "$say_color_color$*$say_color_reset" } else say_color() { @@ -113,6 +126,9 @@ else } fi +TERM=dumb +export TERM + error() { say_color error "error: $*" EXIT_OK=t @@ -135,8 +151,8 @@ exec 6<&0 if test "$verbose" = "t"; then exec 4>&2 3>&1 elif test "$logfile" = "t"; then - logfile=$(basename $0 .t).output - exec 4>${logfile} 3>&4 + logfile=$(basename $0 .t).output + exec 4>${logfile} 3>&4 else exec 4>/dev/null 3>/dev/null fi @@ -297,10 +313,29 @@ test_debug() { test "$debug" = "" || eval "$1" } +# Public: Stop execution and start a shell. +# +# This is useful for debugging tests and only makes sense together with "-v". +# Be sure to remove all invocations of this command before submitting. +test_pause() { + if test "$verbose" = t; then + "$SHELL_PATH" <&6 >&3 2>&4 + else + error >&5 "test_pause requires --verbose" + fi +} + test_eval_() { # This is a separate function because some tests use # "return" to end a test_expect_success block early. - eval &3 2>&4 "$*" + case ",$test_prereq," in + *,INTERACTIVE,*) + eval "$*" + ;; + *) + eval &3 2>&4 "$*" + ;; + esac } test_run_() { @@ -309,6 +344,13 @@ test_run_() { test_eval_ "$1" eval_ret=$? + if test "$chain_lint" = "t"; then + test_eval_ "(exit 117) && $1" + if test "$?" != 117; then + error "bug in the test script: broken &&-chain: $1" + fi + fi + if test -z "$immediate" || test $eval_ret = 0 || test -n "$expecting_failure"; then test_eval_ "$test_cleanup" fi @@ -561,6 +603,49 @@ test_cmp() { ${TEST_CMP:-diff -u} "$@" } +# Public: portably print a sequence of numbers. +# +# seq is not in POSIX and GNU seq might not be available everywhere, +# so it is nice to have a seq implementation, even a very simple one. +# +# $1 - Starting number. +# $2 - Ending number. +# +# Examples +# +# test_expect_success 'foo works 10 times' ' +# for i in $(test_seq 1 10) +# do +# foo || return +# done +# ' +# +# Returns 0 if all the specified numbers can be displayed. +test_seq() { + i="$1" + j="$2" + while test "$i" -le "$j" + do + echo "$i" || return + i=$(expr "$i" + 1) + done +} + +# Public: Check if the file expected to be empty is indeed empty, and barfs +# otherwise. +# +# $1 - File to check for emptyness. +# +# Returns 0 if file is empty, 1 otherwise. +test_must_be_empty() { + if test -s "$1" + then + echo "'$1' is not empty, it contains:" + cat "$1" + return 1 + fi +} + # Public: Schedule cleanup commands to be run unconditionally at the end of a # test. # @@ -677,7 +762,6 @@ test_done() { test -d "$remove_trash" && cd "$(dirname "$remove_trash")" && rm -rf "$(basename "$remove_trash")" - test -n "$logfile" && test -f "$logfile" && rm -f "${logfile}" @@ -702,11 +786,11 @@ test_done() { : ${SHARNESS_TEST_DIRECTORY:=$(pwd)} export SHARNESS_TEST_DIRECTORY -# # Public: Source directory of test code and sharness library. -# This directory may be different from the directory in which tests are -# being run. -: ${SHARNESS_TEST_SRCDIR:=$(cd `dirname $0` && pwd)} +# This directory may be different from the directory in which tests are +# being run. +: ${SHARNESS_TEST_SRCDIR:=$(cd $(dirname $0) && pwd)} +export SHARNESS_TEST_SRCDIR # Public: Build directory that will be added to PATH. By default, it is set to # the parent directory of SHARNESS_TEST_DIRECTORY. @@ -722,14 +806,14 @@ SHARNESS_TEST_NAME=${SHARNESS_TEST_FILE##*/} SHARNESS_TEST_NAME=${SHARNESS_TEST_NAME%.${SHARNESS_TEST_EXTENSION}} # Prepare test area. -test_dir="trash-directory.$SHARNESS_TEST_NAME" -test -n "$root" && test_dir="$root/$test_dir" -case "$test_dir" in -/*) SHARNESS_TRASH_DIRECTORY="$test_dir" ;; - *) SHARNESS_TRASH_DIRECTORY="$SHARNESS_TEST_DIRECTORY/$test_dir" ;; +SHARNESS_TRASH_DIRECTORY="trash-directory.$SHARNESS_TEST_NAME" +test -n "$root" && SHARNESS_TRASH_DIRECTORY="$root/$SHARNESS_TRASH_DIRECTORY" +case "$SHARNESS_TRASH_DIRECTORY" in +/*) ;; # absolute path is good + *) SHARNESS_TRASH_DIRECTORY="$SHARNESS_TEST_DIRECTORY/$SHARNESS_TRASH_DIRECTORY" ;; esac test "$debug" = "t" || remove_trash="$SHARNESS_TRASH_DIRECTORY" -rm -rf "$test_dir" || { +rm -rf "$SHARNESS_TRASH_DIRECTORY" || { EXIT_OK=t echo >&5 "FATAL: Cannot prepare test area" exit 1 @@ -739,14 +823,21 @@ rm -rf "$test_dir" || { # # Load any extensions in $srcdir/sharness.d/*.sh # -if test -d "${SHARNESS_TEST_SRCDIR}/sharness.d"; then - for file in "${SHARNESS_TEST_SRCDIR}"/sharness.d/*.sh; do - if test -n "$debug"; then - echo 2>&1 "Attempting to load ${file}" +if test -d "${SHARNESS_TEST_SRCDIR}/sharness.d" +then + for file in "${SHARNESS_TEST_SRCDIR}"/sharness.d/*.sh + do + # Ensure glob was not an empty match: + test -e "${file}" || break + + if test -n "$debug" + then + echo >&5 "sharness: loading extensions from ${file}" fi - . ${file} - if test $? != 0; then - echo 2>&1 "sharness: Error loading ${file}. Aborting." + . "${file}" + if test $? != 0 + then + echo >&5 "sharness: Error loading ${file}. Aborting." exit 1 fi done @@ -759,10 +850,10 @@ export SHARNESS_TRASH_DIRECTORY HOME="$SHARNESS_TRASH_DIRECTORY" export HOME -mkdir -p "$test_dir" || exit 1 +mkdir -p "$SHARNESS_TRASH_DIRECTORY" || exit 1 # Use -P to resolve symlinks in our working directory so that the cwd # in subprocesses like git equals our $PWD (for pathname comparisons). -cd -P "$test_dir" || exit 1 +cd -P "$SHARNESS_TRASH_DIRECTORY" || exit 1 for skp in $SKIP_TESTS; do case "$SHARNESS_TEST_NAME" in @@ -773,4 +864,10 @@ for skp in $SKIP_TESTS; do esac done +test -n "$TEST_LONG" && test_set_prereq EXPENSIVE +test -n "$TEST_INTERACTIVE" && test_set_prereq INTERACTIVE + +# Make sure this script ends with code 0 +: + # vi: set ts=4 sw=4 noet : diff --git a/t/t0000-sharness.t b/t/t0000-sharness.t index 7410f8894..b476ba41d 100755 --- a/t/t0000-sharness.t +++ b/t/t0000-sharness.t @@ -21,6 +21,12 @@ test_description='Test Sharness itself' . `dirname $0`/sharness.sh +ret="$?" + +test_expect_success 'sourcing sharness succeeds' ' + test "$ret" = 0 +' + test_expect_success 'success is reported like this' ' : ' @@ -28,13 +34,23 @@ test_expect_failure 'pretend we have a known breakage' ' false ' +test_terminal () { + perl "$SHARNESS_TEST_DIRECTORY"/test-terminal.perl "$@" +} + +# If test_terminal works, then set a PERL_AND_TTY prereq for future tests: +# (PERL and TTY prereqs may later be split if needed separately) +test_terminal sh -c "test -t 1 && test -t 2" && test_set_prereq PERL_AND_TTY + run_sub_test_lib_test () { name="$1" descr="$2" # stdin is the body of the test code + prefix="$3" # optionally run sub-test under command + opt="$4" # optionally call the script with extra option(s) mkdir "$name" && ( cd "$name" && cat >".$name.t" <<-EOF && - #!$SHELL_PATH + #!/bin/sh test_description='$descr (run in sub sharness) @@ -48,7 +64,7 @@ run_sub_test_lib_test () { cat >>".$name.t" && chmod +x ".$name.t" && export SHARNESS_TEST_SRCDIR && - ./".$name.t" >out 2>err + $prefix ./".$name.t" $opt --chain-lint >out 2>err ) } @@ -286,6 +302,172 @@ test_expect_success 'cleanup functions tun at the end of the test' " EOF " +test_expect_success 'We detect broken && chains' " + test_must_fail run_sub_test_lib_test \ + broken-chain 'Broken && chain' <<-\\EOF + test_expect_success 'Cannot fail' ' + true + true + ' + test_done + EOF +" + +test_expect_success 'tests can be run from an alternate directory' ' + # Act as if we have an installation of sharness in current dir: + ln -sf $SHARNESS_TEST_SRCDIR/sharness.sh . && + export working_path="$(pwd)" && + cat >test.t <<-EOF && + test_description="test run of script from alternate dir" + . \$(dirname \$0)/sharness.sh + test_expect_success "success" " + true + " + test_expect_success "trash dir is subdir of working path" " + test \"\$(cd .. && pwd)\" = \"\$working_path/test-rundir\" + " + test_done + EOF + ( + # unset SHARNESS variables before sub-test + unset SHARNESS_TEST_DIRECTORY SHARNESS_TEST_SRCDIR && + # unset HARNESS_ACTIVE so we get a test-results dir + unset HARNESS_ACTIVE && + chmod +x test.t && + mkdir test-rundir && + cd test-rundir && + ../test.t >output 2>err && + cat >expected <<-EOF && + ok 1 - success + ok 2 - trash dir is subdir of working path + # passed all 2 test(s) + 1..2 + EOF + test_cmp expected output && + test -d test-results + ) +' + +test_expect_success 'SHARNESS_ORIG_TERM propagated to sub-sharness' " + ( + export TERM=foo && + unset SHARNESS_ORIG_TERM && + run_sub_test_lib_test orig-term 'check original term' <<-\\EOF + test_expect_success 'SHARNESS_ORIG_TERM is foo' ' + test \"x\$SHARNESS_ORIG_TERM\" = \"xfoo\" ' + test_done + EOF + ) +" + +[ -z "$color" ] || test_set_prereq COLOR +test_expect_success COLOR,PERL_AND_TTY 'sub-sharness still has color' " + run_sub_test_lib_test \ + test-color \ + 'sub-sharness color check' \ + test_terminal <<-\\EOF + test_expect_success 'color is enabled' '[ -n \"\$color\" ]' + test_done + EOF +" + +test_expect_success 'EXPENSIVE prereq not activated by default' " + run_sub_test_lib_test no-long 'long test' <<-\\EOF && + test_expect_success 'passing test' 'true' + test_expect_success EXPENSIVE 'passing suposedly long test' 'true' + test_done + EOF + check_sub_test_lib_test no-long <<-\\EOF + > ok 1 - passing test + > ok 2 # skip passing suposedly long test (missing EXPENSIVE) + > # passed all 2 test(s) + > 1..2 + EOF +" + +test_expect_success 'EXPENSIVE prereq is activated by --long' " + run_sub_test_lib_test long 'long test' '' '--long' <<-\\EOF && + test_expect_success 'passing test' 'true' + test_expect_success EXPENSIVE 'passing suposedly long test' 'true' + test_done + EOF + check_sub_test_lib_test long <<-\\EOF + > ok 1 - passing test + > ok 2 - passing suposedly long test + > # passed all 2 test(s) + > 1..2 + EOF +" + +test_expect_success 'loading sharness extensions works' ' + # Act as if we have a new installation of sharness + # under `extensions` directory. Then create + # a sharness.d/ directory with a test extension function: + mkdir extensions && + ( + cd extensions && + mkdir sharness.d && + cat >sharness.d/test.sh <<-EOF && + this_is_a_test() { + return 0 + } + EOF + ln -sf $SHARNESS_TEST_SRCDIR/sharness.sh . && + cat >test-extension.t <<-\EOF && + test_description="test sharness extensions" + . ./sharness.sh + test_expect_success "extension function is present" " + this_is_a_test + " + test_done + EOF + unset SHARNESS_TEST_DIRECTORY SHARNESS_TEST_SRCDIR && + chmod +x ./test-extension.t && + ./test-extension.t >out 2>err && + cat >expected <<-\EOF && + ok 1 - extension function is present + # passed all 1 test(s) + 1..1 + EOF + test_cmp expected out + ) +' + +test_expect_success 'empty sharness.d directory does not cause failure' ' + # Act as if we have a new installation of sharness + # under `extensions` directory. Then create + # an empty sharness.d/ directory + mkdir nil-extensions && + ( + cd nil-extensions && + mkdir sharness.d && + ln -sf $SHARNESS_TEST_SRCDIR/sharness.sh . && + cat >test.t <<-\EOF && + test_description="sharness works" + . ./sharness.sh + test_expect_success "test success" " + /bin/true + " + test_done + EOF + unset SHARNESS_TEST_DIRECTORY SHARNESS_TEST_SRCDIR && + chmod +x ./test.t && + ./test.t >out 2>err && + cat >expected <<-\EOF && + ok 1 - test success + # passed all 1 test(s) + 1..1 + EOF + test_cmp expected out + ) +' + +test_expect_success INTERACTIVE 'Interactive tests work' ' + echo -n "Please type yes and hit enter " && + read -r var && + test "$var" = "yes" +' + test_done # vi: set ft=sh : From 25cdda69ba621c62df7aa0166924af20bd414044 Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Fri, 11 May 2018 10:08:46 -0700 Subject: [PATCH 3/7] testsuite: add kvs-watch-until.lua Add the kvs-watch-until.lua helper script from flux-core to t/scripts in flux-sched. --- t/scripts/kvs-watch-until.lua | 84 +++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100755 t/scripts/kvs-watch-until.lua diff --git a/t/scripts/kvs-watch-until.lua b/t/scripts/kvs-watch-until.lua new file mode 100755 index 000000000..218f617cf --- /dev/null +++ b/t/scripts/kvs-watch-until.lua @@ -0,0 +1,84 @@ +#!/usr/bin/env lua +-- +-- Exit only if/when all ranks have exited 'unknown' state +-- +local usage = [[ +Usage: kvs-wait-until [OPTIONS] KEY CODE + +Watch kvs KEY until Lua code CODE returns true. +(CODE is supplied key value in variable 'v') +If -t, --timeout is provided, and the timeout expires, then +exit with non-zero exit status. + + -h, --help Display this message + -v, --verbose Print value on each watch callback + -t, --timeout=T Wait at most T seconds (before exiting +]] + +local getopt = require 'flux.alt_getopt' .get_opts +local timer = require 'flux.timer'.new() +local f = require 'flux' .new() + +local function printf (...) + io.stdout:write (string.format (...)) +end +local function log_err (...) + io.stdout:write (string.format (...)) +end + +local opts, optind = getopt (arg, "hvt:", + { verbose = 'v', + timeout = 't', + help = 'h' + } + ) +if opts.h then print (usage); os.exit (0) end + +local key = arg [optind] +local callback = arg [optind+1] + +if not key or not callback then + log_err ("KVS key and callback code required\n") + print (usage) + os.exit (1) +end + +callback = "return function (v) return "..callback.." end" +local fn, err = loadstring (callback, "callback") +if not fn then + log_err ("code compile error: %s", err) + os.exit (1) +end +local cb = fn () + +local kw, err = f:kvswatcher { + key = key, + handler = function (kw, result) + if opts.v then + printf ("%4.03fs: %s = %s\n", + timer:get0(), + key, tostring (result)) + end + -- Do not pass nil result to callback: + if result == nil then return end + local ok, rv = pcall (cb, result) + if not ok then error (rv) end + if ok and rv then + os.exit (0) + end + end +} + +if opts.t then + local tw, err = f:timer { + timeout = opts.t * 1000, + handler = function (f, to) + log_err ("%4.03fs: Timeout expired!\n", timer:get0()) + os.exit (1) + end + } +end + +timer:set () +f:reactor () +-- vi: ts=4 sw=4 expandtab From 293ffdbfeb1c3b8155e0bc6d387930d4337daa72 Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Fri, 11 May 2018 10:09:46 -0700 Subject: [PATCH 4/7] testsuite: ensure last job has completed in valgrind test Synchronize to ensure last flux-wreckrun job has reached the completed state in the t5000-valgrind.t test to avoid racing with the sched module releasing memory for complete jobs. --- t/valgrind/valgrind-workload.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/t/valgrind/valgrind-workload.sh b/t/valgrind/valgrind-workload.sh index c655d4c16..29173e297 100755 --- a/t/valgrind/valgrind-workload.sh +++ b/t/valgrind/valgrind-workload.sh @@ -10,4 +10,8 @@ for i in `seq 1 $NJOBS`; do flux wreckrun --ntasks $size /bin/true done +# Wait up to 5s for last job to be fully complete before removing sched module: +KEY=$(echo $(flux wreck last-jobid -p).state) +${SHARNESS_TEST_SRCDIR}/scripts/kvs-watch-until.lua -t 5 $KEY 'v == "complete"' + flux module remove sched From a7aa2867725e7c9f8053574ac19f3e3791bb48bb Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Thu, 10 May 2018 13:58:48 -0700 Subject: [PATCH 5/7] format: Add vi's pretty whitespace support --- simulator/sim_execsrv.c | 4 ++++ simulator/simsrv.c | 4 ++++ simulator/simulator.c | 4 ++++ simulator/simulator.h | 4 ++++ simulator/submitsrv.c | 4 ++++ 5 files changed, 20 insertions(+) diff --git a/simulator/sim_execsrv.c b/simulator/sim_execsrv.c index 555dfbc90..7e2e91f9d 100644 --- a/simulator/sim_execsrv.c +++ b/simulator/sim_execsrv.c @@ -698,3 +698,7 @@ int mod_main (flux_t *h, int argc, char **argv) } MOD_NAME ("sim_exec"); + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/simulator/simsrv.c b/simulator/simsrv.c index 35462ed60..8e53d3b2b 100644 --- a/simulator/simsrv.c +++ b/simulator/simsrv.c @@ -493,3 +493,7 @@ int mod_main (flux_t *h, int argc, char **argv) } MOD_NAME ("sim"); + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/simulator/simulator.c b/simulator/simulator.c index 637a9909a..1b428d521 100644 --- a/simulator/simulator.c +++ b/simulator/simulator.c @@ -509,3 +509,7 @@ flux_kvsdir_t *job_kvsdir (flux_t *h, int jobid) flux_future_destroy (f); return cpy; } + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/simulator/simulator.h b/simulator/simulator.h index 7d8ad895d..4a9e1f4f7 100644 --- a/simulator/simulator.h +++ b/simulator/simulator.h @@ -80,3 +80,7 @@ struct rdl *get_rdl (flux_t *h, char *path); void close_rdl (); */ #endif /* SIMULATOR_H */ + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/simulator/submitsrv.c b/simulator/submitsrv.c index 8069a3bdf..e17301f00 100644 --- a/simulator/submitsrv.c +++ b/simulator/submitsrv.c @@ -437,3 +437,7 @@ int mod_main (flux_t *h, int argc, char **argv) } MOD_NAME ("submit"); + +/* + * vi: ts=4 sw=4 expandtab + */ From 2522d30e1580a9bcc2348cfe30908ec0ca21c80a Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Fri, 11 May 2018 10:06:54 -0700 Subject: [PATCH 6/7] FSM: complete->cancelled isn't a valid transition --- sched/sched.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sched/sched.c b/sched/sched.c index 7fb201c23..b9c65a138 100644 --- a/sched/sched.c +++ b/sched/sched.c @@ -1784,8 +1784,7 @@ static int action (ssrvctx_t *ctx, flux_lwj_t *job, job_state_t newstate, VERIFY (trans (J_COMPLETING, newstate, &(job->state))); break; case J_COMPLETING: - VERIFY (trans (J_COMPLETE, newstate, &(job->state)) - || trans (J_CANCELLED, newstate, &(job->state))); + VERIFY (trans (J_COMPLETE, newstate, &(job->state))); if (!ctx->arg.schedonce) { /* support testing by actually not releasing the resrc */ if (resrc_tree_release (job->resrc_tree, job->lwj_id)) { From 9c685831c9e5fff7092ae03c384ed7c286d28c47 Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Fri, 11 May 2018 10:21:17 -0700 Subject: [PATCH 7/7] cleanup: Remove NULL checks for NULL-safe deallocate calls --- sched/sched.c | 69 +++++++++++++++++---------------------------------- 1 file changed, 23 insertions(+), 46 deletions(-) diff --git a/sched/sched.c b/sched/sched.c index b9c65a138..0ffbe64c1 100644 --- a/sched/sched.c +++ b/sched/sched.c @@ -213,16 +213,11 @@ static inline void ssrvarg_init (ssrvarg_t *arg) static inline void ssrvarg_free (ssrvarg_t *arg) { - if (arg->path) - free (arg->path); - if (arg->uri) - free (arg->uri); - if (arg->userplugin) - free (arg->userplugin); - if (arg->userplugin_opts) - free (arg->userplugin_opts); - if (arg->prio_plugin) - free (arg->prio_plugin); + free (arg->path); + free (arg->uri); + free (arg->userplugin); + free (arg->userplugin_opts); + free (arg->prio_plugin); } static inline int ssrvarg_process_args (int argc, char **argv, ssrvarg_t *a) @@ -495,8 +490,7 @@ static int plugin_process_args (ssrvctx_t *ctx, char *userplugin_opts) rc = 0; done: - if (argz) - free (argz); + free (argz); return rc; } @@ -535,8 +529,7 @@ static int q_enqueue_into_pqueue (ssrvctx_t *ctx, json_t *jcb) /* please don't free the job using job_index; this is just a lookup table */ rc = 0; done: - if (key) - free (key); + free (key); return rc; } @@ -639,10 +632,8 @@ static json_t *get_string_blocking (flux_t *h, const char *key) free (json_str); return o; error: - if (json_str) - free (json_str); - if (o) - Jput (o); + free (json_str); + Jput (o); errno = saved_errno; return NULL; } @@ -1213,10 +1204,8 @@ static inline int bridge_send_runrequest (ssrvctx_t *ctx, flux_lwj_t *job) rc = 0; } } - if (msg) - flux_msg_destroy (msg); - if (topic) - free (topic); + flux_msg_destroy (msg); + free (topic); return rc; } @@ -1331,10 +1320,8 @@ static int req_tpexec_allocate (ssrvctx_t *ctx, flux_lwj_t *job) bridge_update_timer (ctx); rc = 0; done: - if (jcb) - Jput (jcb); - if (red) - Jput (red); + Jput (jcb); + Jput (red); resrc_api_map_destroy (&gmap); resrc_api_map_destroy (&rmap); return rc; @@ -1498,8 +1485,7 @@ static resrc_reqst_t *get_resrc_reqst (ssrvctx_t *ctx, flux_lwj_t *job, resrc_reqst = resrc_reqst_from_json (ctx->rsapi, req_res, NULL); done: - if (req_res) - Jput (req_res); + Jput (req_res); return resrc_reqst; } @@ -1732,8 +1718,7 @@ static int action (ssrvctx_t *ctx, flux_lwj_t *job, job_state_t newstate, q_move_to_cqueue (ctx, job); } else { q_rm_from_pqueue (ctx, job); - if (job->req) - free (job->req); + free (job->req); resrc_tree_destroy (ctx->rsapi, job->resrc_tree, false, false); key = xasprintf ("%"PRId64"", job->lwj_id); zhash_delete (ctx->job_index, key); @@ -1770,8 +1755,7 @@ static int action (ssrvctx_t *ctx, flux_lwj_t *job, job_state_t newstate, q_move_to_cqueue (ctx, job); } else { q_rm_from_pqueue (ctx, job); - if (job->req) - free (job->req); + free (job->req); resrc_tree_destroy (ctx->rsapi, job->resrc_tree, false, false); key = xasprintf ("%"PRId64"", job->lwj_id); zhash_delete (ctx->job_index, key); @@ -1809,8 +1793,7 @@ static int action (ssrvctx_t *ctx, flux_lwj_t *job, job_state_t newstate, } else { /* free resource here if reap is not enabled */ q_rm_from_rqueue (ctx, job); - if (job->req) - free (job->req); + free (job->req); resrc_tree_destroy (ctx->rsapi, job->resrc_tree, false, false); key = xasprintf ("%"PRId64"", job->lwj_id); zhash_delete (ctx->job_index, key); @@ -1821,8 +1804,7 @@ static int action (ssrvctx_t *ctx, flux_lwj_t *job, job_state_t newstate, case J_CANCELLED: VERIFY (trans (J_REAPED, newstate, &(job->state))); if (ctx->arg.reap) { - if (job->req) - free (job->req); + free (job->req); key = xasprintf ("%"PRId64"", job->lwj_id); zhash_delete (ctx->job_index, key); free (key); @@ -1837,8 +1819,7 @@ static int action (ssrvctx_t *ctx, flux_lwj_t *job, job_state_t newstate, if (priority_plugin) priority_plugin->record_job_usage (ctx->h, job); zlist_remove (ctx->c_queue, job); - if (job->req) - free (job->req); + free (job->req); resrc_tree_destroy (ctx->rsapi, job->resrc_tree, false, false); key = xasprintf ("%"PRId64"", job->lwj_id); zhash_delete (ctx->job_index, key); @@ -1952,10 +1933,8 @@ static int kill_jobs_on_node (flux_t *h, resrc_t *node) return 0; error: - if (topic) - free (topic); - if (msg) - flux_msg_destroy (msg); + free (topic); + flux_msg_destroy (msg); return -1; } @@ -2007,10 +1986,8 @@ static void exclude_request_cb (flux_t *h, flux_msg_handler_t *w, return; error: - if (topic) - free (topic); - if (msg2) - flux_msg_destroy (msg2); + free (topic); + flux_msg_destroy (msg2); if (flux_respond (h, msg, errno, NULL) < 0) flux_log_error (h, "%s", __FUNCTION__); }