Skip to content

Commit

Permalink
Unify run_mrt.sh scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
snukky committed Feb 25, 2022
1 parent d1b224e commit c0497dd
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 51 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ pure C++ with minimal dependencies.
This repository contains the regression test framework for the main development
repository: https://github.com/marian-nmt/marian-dev.

Tests have been developed for Linux for Marian compiled using GCC 7+.
Tests have been developed for Linux for Marian compiled using GCC 8+ and Nvidia
Maxwell/Pascal GPUs.


## Structure
Expand Down
166 changes: 116 additions & 50 deletions run_mrt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,47 @@
SHELL=/bin/bash

export LC_ALL=C.UTF-8
export MRT_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
export MRT_TOOLS="$MRT_ROOT/tools"
export MRT_MARIAN="$( realpath "${MARIAN:-$MRT_ROOT/../build}" )"

RUN_LOGS="$MRT_ROOT/previous.log.tmp" # Logging file for log and logn commands
rm -f $RUN_LOGS

# Needed so that previous.log is not overwritten when it is provided as an argument
function cleanup {
test -s "$RUN_LOGS" && mv "$RUN_LOGS" "$MRT_ROOT/previous.log"
}
trap cleanup EXIT

function log {
echo [$(date "+%m/%d/%Y %T")] $@
echo "[$(date '+%m/%d/%Y %T')] $@" | tee -a $RUN_LOGS
}

function logn {
echo -n [$(date "+%m/%d/%Y %T")] $@
echo -n "[$(date '+%m/%d/%Y %T')] $@" | tee -a $RUN_LOGS
}

function loge {
echo $@ | tee -a $RUN_LOGS
}

log "Running on $(hostname) as process $$"

export MRT_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
export MRT_TOOLS=$MRT_ROOT/tools
export MRT_MARIAN="$( realpath ${MARIAN:-$MRT_ROOT/../build} )"
# On Windows, the .exe suffix should be added to executables
UNAME=$(uname)
if [ "$UNAME" == "Linux" ]; then
log "Running on Linux machine"
export MRT_BIN=
export MRT_OS=linux
elif [[ "$UNAME" == CYGWIN* || "$UNAME" == MINGW* ]]; then
log "Running on Windows machine"
export MRT_BIN=.exe
export MRT_OS=windows
else
log "Unsupported or unrecognized machine with uname= $UNAME"
exit 1
fi

# Print folders which contain models and data for regression tests
export MRT_MODELS="$( realpath ${MODELS:-$MRT_ROOT/models} )"
Expand All @@ -43,41 +70,51 @@ log "Using models from: $MRT_MODELS"
log "Using data from: $MRT_DATA"

# Try adding build/ to MARIAN for backward compatibility
if [[ ! -e $MRT_MARIAN/marian-decoder ]]; then
if [[ ! -e "$MRT_MARIAN/marian-decoder$MRT_BIN" ]]; then
MRT_MARIAN="$MRT_MARIAN/build"
fi

# Check if required tools are present in marian directory
for cmd in marian marian-decoder marian-scorer marian-vocab; do
if [ ! -e $MRT_MARIAN/$cmd ]; then
echo "Error: '$MRT_MARIAN/$cmd' not found. Do you need to compile the toolkit first?"
if [ ! -e "$MRT_MARIAN/$cmd$MRT_BIN" ]; then
loge "Error: '$MRT_MARIAN/$cmd$MRT_BIN' not found. Do you need to compile the toolkit first?"
exit 1
fi
done

log "Using Marian binary: $MRT_MARIAN/marian"
# Common Marian executables
export MRT_MARIAN_TRAINER="$MRT_MARIAN/marian$MRT_BIN"
export MRT_MARIAN_DECODER="$MRT_MARIAN/marian-decoder$MRT_BIN"
export MRT_MARIAN_SCORER="$MRT_MARIAN/marian-scorer$MRT_BIN"
export MRT_MARIAN_VOCAB="$MRT_MARIAN/marian-vocab$MRT_BIN"

log "Using Marian binary: $MRT_MARIAN_DECODER"

# Log Marian version
export MRT_MARIAN_VERSION=$($MRT_MARIAN/marian --version 2>&1)
export MRT_MARIAN_VERSION=$($MRT_MARIAN_TRAINER --version 2>&1)
log "Version: $MRT_MARIAN_VERSION"

# Get CMake settings from the --build-info option
if ! grep -q "build-info" < <( $MRT_MARIAN/marian --help ); then
echo "Error: Marian is too old as it does not have the required --build-info option"
if ! grep -q "build-info" < <( $MRT_MARIAN_TRAINER --help ); then
loge "Error: Marian does not have the required --build-info option. Use newer version of Marian"
exit 1
fi

$MRT_MARIAN/marian --build-info all 2> $MRT_ROOT/cmake.log
$MRT_MARIAN_TRAINER --build-info all 2> "$MRT_ROOT/cmake.log"

if test ! -s "$MRT_ROOT/cmake.log" || grep -q "Error: build-info is not available" "$MRT_ROOT/cmake.log"; then
loge "Warning: Marian does not set the required --build-info option. Tests may not work properly"
fi

# Check Marian compilation settings
export MRT_MARIAN_BUILD_TYPE=$(cat $MRT_ROOT/cmake.log | grep "CMAKE_BUILD_TYPE=" | cut -f2 -d=)
export MRT_MARIAN_COMPILER=$(cat $MRT_ROOT/cmake.log | grep "CMAKE_CXX_COMPILER=" | cut -f2 -d=)
export MRT_MARIAN_USE_MKL=$(cat $MRT_ROOT/cmake.log | egrep "COMPILE_CPU=(ON|on|1)")
export MRT_MARIAN_USE_CUDA=$(cat $MRT_ROOT/cmake.log | egrep "COMPILE_CUDA=(ON|on|1)")
export MRT_MARIAN_USE_CUDNN=$(cat $MRT_ROOT/cmake.log | egrep "USE_CUDNN=(ON|on|1)")
export MRT_MARIAN_USE_SENTENCEPIECE=$(cat $MRT_ROOT/cmake.log | egrep "USE_SENTENCEPIECE=(ON|on|1)")
export MRT_MARIAN_USE_FBGEMM=$(cat $MRT_ROOT/cmake.log | egrep "USE_FBGEMM=(ON|on|1)")
export MRT_MARIAN_USE_UNITTESTS=$(cat $MRT_ROOT/cmake.log | egrep "COMPILE_TESTS=(ON|on|1)")
export MRT_MARIAN_BUILD_TYPE=$(cat $MRT_ROOT/cmake.log | grep -i "CMAKE_BUILD_TYPE=" | cut -f2 -d=)
export MRT_MARIAN_COMPILER=$(cat $MRT_ROOT/cmake.log | grep -i "CMAKE_CXX_COMPILER=" | cut -f2 -d=)
export MRT_MARIAN_USE_MKL=$(cat $MRT_ROOT/cmake.log | egrep -i "COMPILE_CPU=(true|on|1)" | cat)
export MRT_MARIAN_USE_CUDA=$(cat $MRT_ROOT/cmake.log | egrep -i "COMPILE_CUDA=(true|on|1)" | cat)
export MRT_MARIAN_USE_CUDNN=$(cat $MRT_ROOT/cmake.log | egrep -i "USE_CUDNN=(true|on|1)" | cat)
export MRT_MARIAN_USE_SENTENCEPIECE=$(cat $MRT_ROOT/cmake.log | egrep -i "USE_SENTENCEPIECE=(true|on|1)" | cat)
export MRT_MARIAN_USE_FBGEMM=$(cat $MRT_ROOT/cmake.log | egrep -i "USE_FBGEMM=(true|on|1)" | cat)
export MRT_MARIAN_USE_UNITTESTS=$(cat $MRT_ROOT/cmake.log | egrep -i "COMPILE_TESTS=(true|on|1)" | cat)

log "Build type: $MRT_MARIAN_BUILD_TYPE"
log "Using compiler: $MRT_MARIAN_COMPILER"
Expand All @@ -87,13 +124,28 @@ log "Using SentencePiece: $MRT_MARIAN_USE_SENTENCEPIECE"
log "Using FBGEMM: $MRT_MARIAN_USE_FBGEMM"
log "Unit tests: $MRT_MARIAN_USE_UNITTESTS"


# Number of available devices
cuda_num_devices=$(($(echo $CUDA_VISIBLE_DEVICES | grep -c ',')+1))
cuda_num_devices=$(($(echo $CUDA_VISIBLE_DEVICES | grep -c ',' | cat)+1))
export MRT_NUM_DEVICES=${NUM_DEVICES:-$cuda_num_devices}

log "Using CUDA visible devices: $CUDA_VISIBLE_DEVICES"
log "Using number of GPU devices: $MRT_NUM_DEVICES"


# CPU architecture details
test -e "$MRT_ROOT/cpuinfo.log" || cat /proc/cpuinfo > "$MRT_ROOT/cpuinfo.log"
grep -qi "avx2" "$MRT_ROOT/cpuinfo.log" && MRT_CPU_AVX2=true
grep -qi "avx512" "$MRT_ROOT/cpuinfo.log" && MRT_CPU_AVX512=true
grep -qi "avx512_vnni" "$MRT_ROOT/cpuinfo.log" && MRT_CPU_AVX512VNNI=true
export MRT_CPU_AVX2
export MRT_CPU_AVX512
export MRT_CPU_AVX512VNNI

log "CPU intrinsics: avx2=$MRT_CPU_AVX2 avx512=$MRT_CPU_AVX512 avx512vnni=$MRT_CPU_AVX512VNNI"


# Time out
export MRT_TIMEOUT=${TIMEOUT:-5m} # the default time out is 5 minutes, see `man timeout`
cmd_timeout=""
if [ $MRT_TIMEOUT != "0" ]; then
Expand All @@ -105,14 +157,19 @@ log "Using time out: $MRT_TIMEOUT"
# Exit codes
export EXIT_CODE_SUCCESS=0
export EXIT_CODE_SKIP=100
export EXIT_CODE_SKIP_MISSING_FILE=101
export EXIT_CODE_SKIP_NO_FBGEMM=105
export EXIT_CODE_SKIP_NO_SENTENCEPIECE=106
export EXIT_CODE_SKIP_NO_AVX2=110
export EXIT_CODE_SKIP_NO_AVX512=111
export EXIT_CODE_TIMEOUT=124 # Exit code returned by the timeout command if timed out

function format_time {
dt=$(echo "$2 - $1" | bc 2>/dev/null)
dh=$(echo "$dt/3600" | bc 2>/dev/null)
dt2=$(echo "$dt-3600*$dh" | bc 2>/dev/null)
dm=$(echo "$dt2/60" | bc 2>/dev/null)
ds=$(echo "$dt2-60*$dm" | bc 2>/dev/null)
dt=$(python -c "print($2 - $1)" 2>/dev/null)
dh=$(python -c "print(int($dt/3600))" 2>/dev/null)
dt2=$(python -c "print($dt-3600*$dh)" 2>/dev/null)
dm=$(python -c "print(int($dt2/60))" 2>/dev/null)
ds=$(python -c "print($dt2-60*$dm)" 2>/dev/null)
LANG=C printf "%02d:%02d:%02.3fs" $dh $dm $ds
}

Expand All @@ -126,7 +183,7 @@ if [ $# -ge 1 ]; then
# A log file with paths to test files
if [[ "$arg" = *.log ]]; then
# Extract tests from .log file
args=$(cat $arg | grep '/test_.*\.sh' | grep -v '/_' | sed 's/^ *- *//' | tr '\n' ' ' | sed 's/ *$//')
args=$(cat $arg | grep -vP '^\[' | grep '/test_.*\.sh' | grep -v '/_' | sed 's/^ *- *//' | tr '\n' ' ' | sed 's/ *$//')
test_prefixes="$test_prefixes $args"
# A hash tag
elif [[ "$arg" = '#'* ]]; then
Expand All @@ -141,8 +198,14 @@ if [ $# -ge 1 ]; then
done
fi

# Check if the variable is empty or contains only spaces
if [[ -z "${test_prefixes// }" ]]; then
log "Error: no tests found in the specified input(s): $@"
exit 1
fi

# Extract all subdirectories, which will be traversed to look for regression tests
test_dirs=$(find $test_prefixes -type d | grep -v "/_")
test_dirs=$(find $test_prefixes -type d | grep -v "/_" | cat)

if grep -q "/test_.*\.sh" <<< "$test_prefixes"; then
test_files=$(printf '%s\n' $test_prefixes | sed 's!*/!!')
Expand Down Expand Up @@ -208,7 +271,7 @@ do
if [ "$nosetup" = true ]; then
((++count_skipped))
tests_skipped+=($test_path)
echo " skipped"
loge " skipped"
cd $MRT_ROOT
continue;
fi
Expand All @@ -221,24 +284,24 @@ do
# Check exit code
if [ $exit_code -eq $EXIT_CODE_SUCCESS ]; then
((++count_passed))
echo " OK"
loge " OK"
elif [ $exit_code -eq $EXIT_CODE_SKIP ]; then
((++count_skipped))
tests_skipped+=($test_path)
echo " skipped"
loge " skipped"
elif [ $exit_code -eq $EXIT_CODE_TIMEOUT ]; then
((++count_timedout))
tests_timedout+=($test_path)
# Add a comment to the test log file that it timed out
echo "The test timed out after $TIMEOUT" >> $test_file.log
# A timed out test is a failed test
((++count_failed))
echo " timed out"
loge " timed out"
success=false
else
((++count_failed))
tests_failed+=($test_path)
echo " failed"
loge " failed"
success=false
fi

Expand Down Expand Up @@ -270,39 +333,42 @@ done
time_end=$(date +%s.%N)
time_total=$(format_time $time_start $time_end)

prev_log=previous.log
rm -f $prev_log


###############################################################################
# Print skipped and failed tests
if [ -n "$tests_skipped" ] || [ -n "$tests_failed" ] || [ -n "$tests_timedout" ]; then
echo "---------------------"
loge "---------------------"
fi
[[ -z "$tests_skipped" ]] || echo "Skipped:" | tee -a $prev_log
[[ -z "$tests_skipped" ]] || loge "Skipped:"
for test_name in "${tests_skipped[@]}"; do
echo " - $test_name" | tee -a $prev_log
loge "- $test_name"
done
[[ -z "$tests_failed" ]] || echo "Failed:" | tee -a $prev_log
[[ -z "$tests_failed" ]] || loge "Failed:"
for test_name in "${tests_failed[@]}"; do
echo " - $test_name" | tee -a $prev_log
loge "- $test_name"
done
[[ -z "$tests_timedout" ]] || echo "Timed out:" | tee -a $prev_log
[[ -z "$tests_timedout" ]] || loge "Timed out:"
for test_name in "${tests_timedout[@]}"; do
echo " - $test_name" | tee -a $prev_log
loge "- $test_name"
done
[[ -z "$tests_failed" ]] || echo "Logs:"
for test_name in "${tests_failed[@]}"; do
echo " - $(realpath $test_name | sed 's/\.sh/.sh.log/')"
echo "- $(realpath $test_name | sed 's/\.sh/.sh.log/')"
done


###############################################################################
# Print summary
echo "---------------------" | tee -a $prev_log
echo -n "Ran $count_all tests in $time_total, $count_passed passed, $count_skipped skipped, $count_failed failed" | tee -a $prev_log
[ -n "$tests_timedout" ] && (echo -n " (incl. $count_timedout timed out)" | tee -a $prev_log)
echo "" | tee -a $prev_log
loge "---------------------"
loge -n "Ran $count_all tests in $time_total, $count_passed passed, $count_skipped skipped, $count_failed failed"
[ -n "$tests_timedout" ] && loge -n " (incl. $count_timedout timed out)"
loge ""

# Return exit code
$success && [ $count_all -gt 0 ]
if $success && [ $count_all -gt 0 ]; then
loge "OK"
exit 0
else
loge "FAILED"
exit 1
fi

0 comments on commit c0497dd

Please sign in to comment.