From 09bdb9f121f0097461dba40d1689087fc3112462 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Mon, 20 Nov 2023 23:34:09 +0100 Subject: [PATCH 1/3] test: clean up the save_journal() stuff a bit Let's save all journals from the test machine instead of calling export on each journal file separately, which makes the code less complicated (and probably faster). --- test/test-functions | 57 ++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/test/test-functions b/test/test-functions index dfe4b8e24b5d3..674ba1ce91dbc 100644 --- a/test/test-functions +++ b/test/test-functions @@ -1765,48 +1765,53 @@ check_coverage_reports() { } save_journal() { + local source_dir="${1:?}" + local state="${2:?}" # Default to always saving journal local save="yes" + local dest_dir dest_name dest - if [ "${TEST_SAVE_JOURNAL}" = "no" ]; then + if [[ "${TEST_SAVE_JOURNAL:-}" == "no" ]]; then save="no" - elif [ "${TEST_SAVE_JOURNAL}" = "fail" ] && [ "$2" = "0" ]; then + elif [[ "${TEST_SAVE_JOURNAL:-}" == "fail" && "$state" -eq 0 ]]; then save="no" fi - if [ -n "${ARTIFACT_DIRECTORY}" ]; then - dest="${ARTIFACT_DIRECTORY}/${testname:?}.journal" + if [[ -n "${ARTIFACT_DIRECTORY:-}" ]]; then + dest_dir="$ARTIFACT_DIRECTORY" + dest_name="${testname:?}.journal" else - dest="${TESTDIR:?}/system.journal" + dest_dir="${TESTDIR:?}" + dest_name="system.journal" fi - for j in "${1:?}"/*; do - if get_bool "$save"; then - if [ "$SYSTEMD_JOURNAL_REMOTE" = "" ]; then - cp -a "$j" "$dest" - else - "$SYSTEMD_JOURNAL_REMOTE" -o "$dest" --getter="$JOURNALCTL -o export -D $j" - fi - fi + if [[ -n "$TEST_SHOW_JOURNAL" ]]; then + echo "---- $source_dir ----" + "$JOURNALCTL" --no-pager -o short-monotonic --no-hostname --priority="$TEST_SHOW_JOURNAL" -D "$source_dir" + fi - if [ -n "${TEST_SHOW_JOURNAL}" ]; then - echo "---- $j ----" - "$JOURNALCTL" --no-pager -o short-monotonic --no-hostname --priority="${TEST_SHOW_JOURNAL}" -D "$j" + if get_bool "$save"; then + # If we don't have systemd-journal-remote copy all journals from /var/log/journal/ + # to $dest_dir/journals/ as is, otherwise merge all journals into a single .journal + # file + if [[ -z "${SYSTEMD_JOURNAL_REMOTE:-}" ]]; then + dest="$dest_dir/journals" + mkdir -p "$dest" + cp -a "$source_dir/*" "$dest/" + else + dest="$dest_dir/$dest_name" + "$SYSTEMD_JOURNAL_REMOTE" -o "$dest" --getter="$JOURNALCTL -o export -D $source_dir" fi - rm -r "$j" - done - - if ! get_bool "$save"; then - return 0 - fi + if [[ -n "${SUDO_USER:-}" ]]; then + setfacl -R -m "user:$SUDO_USER:r-X" "$dest" + fi - if [ -n "${SUDO_USER}" ]; then - setfacl -m "user:${SUDO_USER:?}:r-X" "$dest"* + # we want to print this sometime later, so save this in a variable + JOURNAL_LIST="$(ls -lR "$dest")" fi - # we want to print this sometime later, so save this in a variable - JOURNAL_LIST="$(ls -l "$dest"*)" + rm -rf "${source_dir:?}"/* } check_result_common() { From c5035a30e290ca602c01e41056225b9475fab399 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Tue, 21 Nov 2023 11:31:36 +0100 Subject: [PATCH 2/3] test: don't make the final vacuum multiple of 8M As this would usually mean we'd nuke everything, since there are two open journals - system and user - both being 8M in size. --- test/units/testsuite-04.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/units/testsuite-04.sh b/test/units/testsuite-04.sh index adcbf8be19fa7..2a8f7972e407c 100755 --- a/test/units/testsuite-04.sh +++ b/test/units/testsuite-04.sh @@ -4,7 +4,7 @@ set -eux set -o pipefail # Limit the maximum journal size -trap "journalctl --rotate --vacuum-size=16M" EXIT +trap "journalctl --rotate --vacuum-size=20M" EXIT # shellcheck source=test/units/test-control.sh . "$(dirname "$0")"/test-control.sh From fa6f37c04391d0103c95e24813ad345c2d5c4b67 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Tue, 21 Nov 2023 09:37:21 +0100 Subject: [PATCH 3/3] test: don't redirect stdout/stderr to console Turns out that redirecting a lot of output to the console can have some funny effects, like random kernel soft lockups. I spotted this in various CIs, but it remained almost entirely hidden thanks to `softlockup_panic=1`, until 1a36d2672f which introduced a couple of tests that log quite a lot in a short amount of time. This, in combination with newer kernel version, which, for some reason, seem to be more susceptible to such soft lockups, made the Arch Linux jobs soft lockup quite a lot, see [0]. While debugging this I also noticed that runs which don't redirect stdout/stderr to the console are noticeably faster, e.g.: # TEST-71 nspawn + QEMU (KVM), StandardOutput=journal+console Elapsed (wall clock) time (h:mm:ss or m:ss): 0:24.64 # TEST-71 nspawn + QEMU (KVM), StandardOutput=journal Elapsed (wall clock) time (h:mm:ss or m:ss): 0:17.95 # TEST-71 nspawn + QEMU, StandardOutput=journal+console Elapsed (wall clock) time (h:mm:ss or m:ss): 2:04.70 # TEST-71 nspawn + QEMU, StandardOutput=journal Elapsed (wall clock) time (h:mm:ss or m:ss): 1:44.48 # TEST-04 QEMU, StandardOutput=journal+console Elapsed (wall clock) time (h:mm:ss or m:ss): 4:22.70 # TEST-04 QEMU, StandardOutput=console Elapsed (wall clock) time (h:mm:ss or m:ss): 5:04.67 Given all this, let's effectively revert ba7abf79a5, and dump the testsuite-related journal messages only after the test finishes, so they don't go through the slow console. Resolves: systemd/systemd-centos-ci#660 [0] https://github.com/systemd/systemd-centos-ci/issues/660 --- test/test-functions | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/test/test-functions b/test/test-functions index 674ba1ce91dbc..e2d3f109a21b2 100644 --- a/test/test-functions +++ b/test/test-functions @@ -1032,7 +1032,6 @@ EOF DefaultEnvironment=${default_environment[*]} ManagerEnvironment=${manager_environment[*]} DefaultTimeoutStartSec=180s -DefaultStandardOutput=journal+console EOF # ASAN and syscall filters aren't compatible with each other. @@ -1785,10 +1784,10 @@ save_journal() { dest_name="system.journal" fi - if [[ -n "$TEST_SHOW_JOURNAL" ]]; then - echo "---- $source_dir ----" - "$JOURNALCTL" --no-pager -o short-monotonic --no-hostname --priority="$TEST_SHOW_JOURNAL" -D "$source_dir" - fi + # Show messages from the testsuite-XX.service or messages with priority "warning" and higher + echo " --- $source_dir ---" + "$JOURNALCTL" --no-pager --no-hostname -o short-monotonic -D "$source_dir" \ + _SYSTEMD_UNIT="testsuite-${TESTID:?}.service" + PRIORITY=4 + PRIORITY=3 + PRIORITY=2 + PRIORITY=1 + PRIORITY=0 if get_bool "$save"; then # If we don't have systemd-journal-remote copy all journals from /var/log/journal/ @@ -3339,18 +3338,6 @@ test_setup() { mask_supporting_services fi - # Send stdout/stderr of testsuite-*.service units to both journal and - # console to make debugging in CIs easier - # Note: we can't use a dropin for `testsuite-.service`, since that also - # overrides 'sub-units' of some tests that already use a specific - # value for Standard(Output|Error)= - # (e.g. test/units/testsuite-66-deviceisolation.service) - if ! get_bool "$INTERACTIVE_DEBUG"; then - local dropin_dir="${initdir:?}/etc/systemd/system/testsuite-${TESTID:?}.service.d" - mkdir -p "$dropin_dir" - printf '[Service]\nStandardOutput=journal+console\nStandardError=journal+console' >"$dropin_dir/99-stdout.conf" - fi - if get_bool "$IS_BUILT_WITH_COVERAGE"; then # Do an initial coverage capture, to make sure the final report includes # files that the tests didn't touch at all