From 045912cd6abbfd411e29db000b4f945a73d3753a Mon Sep 17 00:00:00 2001 From: Marc Herbert Date: Tue, 24 May 2022 16:21:40 -0700 Subject: [PATCH] hijack: add a quick write test to monitor storage perf. after each test We've experienced a fair number of test failures that seem to point to storage performance issues: https://github.com/thesofproject/linux/issues/3387 https://github.com/thesofproject/linux/issues/3669 This (temporary?) addition runs a quick write test after each audio test to monitor storage sanity. As a bonus feature the "sync" could help us collect more logs. On our (slowest) BYT devices the test adds 3s per test; much less on newer devices. Sample output: ``` 2022-05-24 23:20:41 UTC [INFO] pkill -TERM sof-logger 2022-05-24 23:20:42 UTC [INFO] nlines=1132 /home/mherber2/SOF/sof-test/logs/BOGUS-check-playback/2022-05-24-16:20:35-3288/slogger.txt + timeout -s CONT 5 sudo sync real 0m0.062s user 0m0.005s sys 0m0.019s + timeout -s CONT 10 dd if=/dev/zero of=/home/mherber2/HD_TEST_DELETE_ME bs=1M count=200 conv=fsync 200+0 records in 200+0 records out 209715200 bytes (210 MB, 200 MiB) copied, 2.0893 s, 100 MB/s + timeout -s CONT 5 sudo sync real 0m0.037s user 0m0.004s sys 0m0.018s 2022-05-24 23:20:44 UTC [INFO] Test Result: PASS! ``` Signed-off-by: Marc Herbert --- case-lib/hijack.sh | 2 ++ case-lib/lib.sh | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/case-lib/hijack.sh b/case-lib/hijack.sh index 65a6e7ff3..6b0f3f1bd 100644 --- a/case-lib/hijack.sh +++ b/case-lib/hijack.sh @@ -100,6 +100,8 @@ function func_exit_handler() fi + storage_checks || exit_status=1 + if [[ "$KERNEL_CHECKPOINT" =~ ^[0-9]{10} ]]; then # Do not collect the entire duration of the test but only the # last iteration. diff --git a/case-lib/lib.sh b/case-lib/lib.sh index c9ea3e355..eda3cf85d 100644 --- a/case-lib/lib.sh +++ b/case-lib/lib.sh @@ -75,6 +75,25 @@ poll_wait_for() $pass } + +storage_checks() +{ + local max_sync_duration=5 + + ( set -x + # Thanks to CONT this does not actually timeout; it only returns a + # non-zero exit status when taking too long. + time timeout -s CONT "$max_sync_duration" sudo sync || return $? + # Spend a few seconds to test and show the current write speed + timeout -s CONT 6 dd if=/dev/zero of=~/HD_TEST_DELETE_ME bs=1M count=200 conv=fsync || + return $? + time timeout -s CONT "$max_sync_duration" sudo sync + ) || return $? + + rm ~/HD_TEST_DELETE_ME +} + + setup_kernel_check_point() { # Make the check point $SOF_TEST_INTERVAL second(s) earlier to avoid