From 5f2a2f5ecaf857c246056ceeb69c9d58814561ff Mon Sep 17 00:00:00 2001 From: Alexey Rivkin Date: Sun, 7 Apr 2024 22:09:55 +0300 Subject: [PATCH] AZP/TEST: Implement remarks by Yossi --- buildlib/pr/mad_tests.yml | 8 ++----- buildlib/tools/test_mad.sh | 48 +++++++++++++++++--------------------- 2 files changed, 24 insertions(+), 32 deletions(-) diff --git a/buildlib/pr/mad_tests.yml b/buildlib/pr/mad_tests.yml index 8580f2d0d22..c2f6c850628 100644 --- a/buildlib/pr/mad_tests.yml +++ b/buildlib/pr/mad_tests.yml @@ -21,7 +21,6 @@ jobs: displayName: Set Vars - bash: | source ./buildlib/tools/test_mad.sh - node_setup build_ucx_in_docker docker_run_srv displayName: Setup Server @@ -40,7 +39,6 @@ jobs: retryCountOnTaskFailure: 5 - bash: | source ./buildlib/tools/test_mad.sh - node_setup build_ucx displayName: Setup Client @@ -60,9 +58,8 @@ jobs: - checkout: none - bash: | source ./buildlib/tools/test_mad.sh - run_mad_test_lid + run_mad_test lid $(LID) env: - LID: $(LID) HCA: $(HCA) displayName: Test LID @@ -95,9 +92,8 @@ jobs: - checkout: none - bash: | source ./buildlib/tools/test_mad.sh - run_mad_test_guid + run_mad_test guid $(GUID) env: - GUID: $(GUID) HCA: $(HCA) displayName: Test GUID diff --git a/buildlib/tools/test_mad.sh b/buildlib/tools/test_mad.sh index cb69b78d04f..a647d81f3cf 100755 --- a/buildlib/tools/test_mad.sh +++ b/buildlib/tools/test_mad.sh @@ -2,12 +2,12 @@ set -exE -o pipefail IMAGE="rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/rhel8.2/builder:mofed-5.0-1.0.0.0" -cd "$BUILD_SOURCESDIRECTORY" -node_setup() { - funcname - sudo chmod 777 /dev/infiniband/umad* -} +if [ -z "$BUILD_SOURCESDIRECTORY" ]; then + echo "Not running in Azure" + exit 1 +fi +cd "$BUILD_SOURCESDIRECTORY" build_ucx() { funcname @@ -25,6 +25,7 @@ build_ucx() { build_ucx_in_docker() { docker run --rm \ --name ucx_build_"$BUILD_BUILDID" \ + -e BUILD_SOURCESDIRECTORY="$BUILD_SOURCESDIRECTORY" \ -v "$PWD":"$PWD" -w "$PWD" \ -v /hpc/local:/hpc/local \ $IMAGE \ @@ -34,19 +35,19 @@ build_ucx_in_docker() { } docker_run_srv() { - detect_hca + HCA=$(detect_hca) docker_stop_srv docker run --rm \ --detach \ --net=host \ - -e HCA="$HCA" \ --name ucx_perftest_"$BUILD_BUILDID" \ + -e HCA="$HCA" \ + -e BUILD_SOURCESDIRECTORY="$BUILD_SOURCESDIRECTORY" \ -v "$PWD":"$PWD" -w "$PWD" \ -v /hpc/local:/hpc/local \ - --gpus all --ulimit memlock=-1:-1 --device=/dev/infiniband/ \ + --ulimit memlock=-1:-1 --device=/dev/infiniband/ \ $IMAGE \ - bash -c "source ./buildlib/tools/test_mad.sh && \ - ${PWD}/install/bin/ucx_perftest -K ${HCA}" + bash -c "${PWD}/install/bin/ucx_perftest -K ${HCA}" } docker_stop_srv() { @@ -55,29 +56,25 @@ docker_stop_srv() { set_vars() { set +x - detect_hca + HCA=$(detect_hca) # Replace ':' with space for 'ibstat' format - HCA_IBSTAT=${HCA/:/ } + HCA_DEV=${HCA/:/ } # shellcheck disable=SC2086 - LID=$(ibstat $HCA_IBSTAT | grep Base | awk '{print $NF}') + LID=$(ibstat $HCA_DEV | grep Base | awk '{print $NF}') # shellcheck disable=SC2086 - GUID=$(ibstat $HCA_IBSTAT | grep GUID | awk '{print $NF}') + GUID=$(ibstat $HCA_DEV | grep GUID | awk '{print $NF}') echo "##vso[task.setvariable variable=LID;isOutput=true]$LID" echo "##vso[task.setvariable variable=GUID;isOutput=true]$GUID" - echo "##vso[task.setvariable variable=HCA;isOutput=true]$HCA" + echo "##vso[task.setvariable variable=HCA;isOutput=true]$HCA" echo "LID: $LID" echo "GUID: $GUID" echo "HCA: $HCA" } -run_mad_test_lid() { - funcname - "$PWD"/install/bin/ucx_perftest -t tag_bw -e -K "$HCA" -e lid:"$LID" -} - -run_mad_test_guid() { - funcname - "$PWD"/install/bin/ucx_perftest -t tag_bw -e -K "$HCA" guid:"$GUID" +run_mad_test() { + test_type="$1" + ib_add="$2" + "$PWD"/install/bin/ucx_perftest -t tag_bw -e -K "$HCA" -e "$test_type":"$ib_add" } funcname() { @@ -87,7 +84,6 @@ funcname() { } detect_hca() { - echo "Detect first active HCA port" - HCA="$(ibv_devinfo | awk '/hca_id:/ {hca=$2} /port:/ {port=$2} /PORT_ACTIVE/ {print hca ":" port; exit}')" - export HCA + # Detect first active HCA port + ibv_devinfo | awk '/hca_id:/ {hca=$2} /port:/ {port=$2} /PORT_ACTIVE/ {print hca ":" port; exit}' }