From 6530c41c361baf791a32d028252807ca9c1e9eec Mon Sep 17 00:00:00 2001 From: Graham Percival Date: Mon, 15 Jan 2024 14:27:22 -0800 Subject: [PATCH 1/2] tests/valgrind: fix handling of forks We tried to support valgrind checking of fork() in: 2023-09-20 tests/valgrind: improve handling for fork() and exec() 38e07b49d7bc662fb19917f07d19176d22cf9b54 However, that method assumed that the lowest pid would be the parent, while the higher one(s) would be the children. That has two flaws: 1) If the pid rolls over, the order would be broken. For example, if the parent pid was 99999 on FreeBSD (the maximum value), then the child would be something much lower (probably in the hundreds). (I knew about that problem, considered it sufficiently unlikely.) 2) Since $_valgrind_check_logfiles are defined by `ls "blah"*`, they are listed in lexicographical order, not numerical order. For example, given two logfiles foo-9999.log and foo-10000.log, foo-10000.log would come first. (I did not anticipate this problem, but even if it had occurred to me, I probably would have deemed it sufficiently unlikely. However, by unlikely fluke, this seems to occur roughly 50% of the time with github actions: when checking 24-fork-func with the gcc binaries, the parent lands on pid 9999 exactly and the child is 10000.) The fix is not to rely on the order: instead, generate a list of all pids involved. If a valgrind logfile lists a parent pid which is in that list, it's a child; otherwise, it's the parent. --- tests/shared_valgrind_functions.sh | 104 ++++++++++++++++++++--------- 1 file changed, 73 insertions(+), 31 deletions(-) diff --git a/tests/shared_valgrind_functions.sh b/tests/shared_valgrind_functions.sh index f56a0690..1bfc98c2 100644 --- a/tests/shared_valgrind_functions.sh +++ b/tests/shared_valgrind_functions.sh @@ -344,6 +344,44 @@ _val_checkl() { fi } +## _get_pids (logfiles): +# Extract a list of pids in the format %08d from ${logfiles}. +_get_pids() { + _get_pids_logfiles=$1 + + _get_pids_pids="" + for _get_pids_logfile in ${_valgrind_check_logfiles} ; do + # Get the pid. + _get_pids_pid=$(printf "%s" "${_get_pids_logfile%%.log}" | \ + rev | cut -d "-" -f 1 | rev) + # Zero-pad it and add it to the new list. + _get_pids_pids=$(printf "%s %08d" \ + "${_get_pids_pids}" "${_get_pids_pid}") + done + + echo "${_get_pids_pids}" +} + +## _is_parent (logfile, pids): +# If the parent pid of ${logfile} is in ${pids}, return 0; otherwise, return 1. +_is_parent () { + _is_parent_logfile=$1 + _is_parent_pids=$2 + + # Get the parent pid from the valgrind logfile + ppid=$(grep "Parent PID:" "${_is_parent_logfile}" | \ + awk '{ print $4 }') + ppid=$(printf "%08d" "${ppid}") + + # If the parent is in the list of pids, this isn't the parent process. + if [ "${_is_parent_pids#*"${ppid}"}" != "${_is_parent_pids}" ] ; then + return 1 + fi + + # Yes, this is the parent process. + return 0 +} + ## valgrind_check (exitfile): # Check for any memory leaks recorded in valgrind logfiles associated with a # test exitfile. Return the filename if there's a leak; otherwise return an @@ -368,41 +406,45 @@ valgrind_check() { return fi + # Get a normalized list of pids. + _valgrind_check_pids=$(_get_pids "${_valgrind_check_logfiles}") + # If the valgrind logfiles contain "-valgrind-parent-", then we only - # want to check the parent (the lowest pid). - for _valgrind_check_logfile in ${_valgrind_check_logfiles} ; do - if [ "${_valgrind_check_logfile#*-valgrind-parent-}" != "${_valgrind_check_logfile}" ]; then - # Only check the parent - _val_checkl "${_valgrind_check_logfile}" - return "$?" - fi - done + # want to check the parent. The parent is the logfile whose "parent + # pid" is not in the list of pids. (If one logfile contains + # "-valgrind-parent-" then all of them should have it, so we can + # simply check if that string occurs in the list of logfiles.) + if [ "${_valgrind_check_logfiles#*-valgrind-parent-}" != \ + "${_valgrind_check_logfiles}" ]; then + _valgrind_check_parent=1 + else + _valgrind_check_parent=0 + fi - # If there's two files, there's a fork() -- likely within - # daemonize() -- so only pay attention to the child. - if [ "${_valgrind_check_num}" -eq "2" ]; then - # Find both pids. - _valgrind_check_val_pids="" - for _valgrind_check_logfile in ${_valgrind_check_logfiles} ; do - _valgrind_check_val_pid=$(head -n 1 "${_valgrind_check_logfile}" | cut -d "=" -f 3) - _valgrind_check_val_pids="${_valgrind_check_val_pids} ${_valgrind_check_val_pid}" - done - - # Find the logfile which has a parent in the list of pids. - for _valgrind_check_logfile in ${_valgrind_check_logfiles} ; do - _valgrind_check_val_parent_pid=$(grep "Parent PID:" "${_valgrind_check_logfile}" | \ - awk '{ print $4 }') - if [ "${_valgrind_check_val_pids#*"${_valgrind_check_val_parent_pid}"}" != \ - "${_valgrind_check_val_pids}" ]; then + # Check the logfiles depending on whether it's the parent or not, + # and whether we want to check the parent or children. + for _valgrind_check_logfile in ${_valgrind_check_logfiles} ; do + if _is_parent "${_valgrind_check_logfile}" \ + "${_valgrind_check_pids}" ; then + # This is the parent. + if [ "${_valgrind_check_parent}" -eq 1 ] ; then _val_checkl "${_valgrind_check_logfile}" - return "$?" + # Bail if there's a problem. + if [ "$?" -ne 0 ]; then + return + fi fi - done - fi - - # Programmer error; hard bail. - echo "Programmer error: wrong number of valgrind logfiles!" 1>&2 - exit 1 + else + # This is a child. + if [ "${_valgrind_check_parent}" -eq 0 ] ; then + _val_checkl "${_valgrind_check_logfile}" + # Bail if there's a problem. + if [ "$?" -ne 0 ]; then + return + fi + fi + fi + done } ## valgrind_init(): From e2d35cc2ea41bbbb68683b507360e25c9ed3cefa Mon Sep 17 00:00:00 2001 From: Graham Percival Date: Thu, 18 Jan 2024 11:04:10 -0800 Subject: [PATCH 2/2] tests: add timeout to wait_while() --- tests/shared_test_functions.sh | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/tests/shared_test_functions.sh b/tests/shared_test_functions.sh index 1c23bb07..342f560f 100644 --- a/tests/shared_test_functions.sh +++ b/tests/shared_test_functions.sh @@ -22,8 +22,8 @@ # Look for ${cmd} in the ${PATH}, and ensure that it supports ${args}. # - has_pid(cmd): # Look for a ${cmd} in $(ps). -# - wait_while(func): -# Wait until ${func} returns non-zero. +# - wait_while(timeout, func): +# Wait up to ${timeout} milliseconds, or until ${func} returns non-zero. # - setup_check(description, check_prev): # Set up the below variables. # - expected_exitcode(expected, actual): @@ -133,11 +133,14 @@ has_pid() { return 1 } -## wait_while(func): +## wait_while(timeout, func): # Wait while ${func} returns 0. If ${msleep} is defined, use that to wait -# 100ms; otherwise, wait in 1 second increments. +# 100ms; otherwise, wait in 1 second increments. If ${timeout} is non-zero, +# return 1 if ${timeout} milliseconds have passed. wait_while() { _wait_while_ms=0 + _wait_while_timeout=$1 + shift 1 # Check for the ending condition while "$@"; do @@ -147,8 +150,17 @@ wait_while() { "${_wait_while_ms}" "$*" 1>&2 fi + # Bail if we've exceeded the timeout + if [ "${_wait_while_timeout}" -gt 0 ] && \ + [ "${_wait_while_ms}" -gt "${_wait_while_timeout}" ]; then + if [ "${VERBOSE}" -ne 0 ]; then + printf "Bail; timeout exceeded\n" 1>&2 + fi + return 1 + fi + # Wait using the appropriate binary - if [ -n "${msleep:-}" ]; then + if [ -n "${msleep:-}" ]; then "${msleep}" 100 _wait_while_ms=$((_wait_while_ms + 100)) else