Skip to content

Commit

Permalink
AZP/TEST: Implement remarks by Yossi
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexey-Rivkin committed Apr 8, 2024
1 parent 20ced8f commit 917b2c8
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 32 deletions.
8 changes: 2 additions & 6 deletions buildlib/pr/mad_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ jobs:
displayName: Set Vars
- bash: |
source ./buildlib/tools/test_mad.sh
node_setup
build_ucx_in_docker
docker_run_srv
displayName: Setup Server
Expand All @@ -40,7 +39,6 @@ jobs:
retryCountOnTaskFailure: 5
- bash: |
source ./buildlib/tools/test_mad.sh
node_setup
build_ucx
displayName: Setup Client
Expand All @@ -60,9 +58,8 @@ jobs:
- checkout: none
- bash: |
source ./buildlib/tools/test_mad.sh
run_mad_test_lid
run_mad_test lid $(LID)
env:
LID: $(LID)
HCA: $(HCA)
displayName: Test LID
Expand Down Expand Up @@ -95,9 +92,8 @@ jobs:
- checkout: none
- bash: |
source ./buildlib/tools/test_mad.sh
run_mad_test_guid
run_mad_test guid $(GUID)
env:
GUID: $(GUID)
HCA: $(HCA)
displayName: Test GUID
Expand Down
47 changes: 21 additions & 26 deletions buildlib/tools/test_mad.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
set -exE -o pipefail

IMAGE="rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/rhel8.2/builder:mofed-5.0-1.0.0.0"
cd "$BUILD_SOURCESDIRECTORY"

node_setup() {
funcname
sudo chmod 777 /dev/infiniband/umad*
}
if [ -z "$BUILD_SOURCESDIRECTORY" ]; then
echo "Not running in Azure"
exit 1
fi
cd "$BUILD_SOURCESDIRECTORY"

build_ucx() {
funcname
Expand All @@ -25,6 +25,7 @@ build_ucx() {
build_ucx_in_docker() {
docker run --rm \
--name ucx_build_"$BUILD_BUILDID" \
-e BUILD_SOURCESDIRECTORY="$BUILD_SOURCESDIRECTORY" \
-v "$PWD":"$PWD" -w "$PWD" \
-v /hpc/local:/hpc/local \
$IMAGE \
Expand All @@ -34,19 +35,18 @@ build_ucx_in_docker() {
}

docker_run_srv() {
detect_hca
HCA=$(detect_hca)
docker_stop_srv
docker run --rm \
--detach \
--net=host \
-e HCA="$HCA" \
--name ucx_perftest_"$BUILD_BUILDID" \
-e BUILD_SOURCESDIRECTORY="$BUILD_SOURCESDIRECTORY" \
-v "$PWD":"$PWD" -w "$PWD" \
-v /hpc/local:/hpc/local \
--gpus all --ulimit memlock=-1:-1 --device=/dev/infiniband/ \
--ulimit memlock=-1:-1 --device=/dev/infiniband/ \
$IMAGE \
bash -c "source ./buildlib/tools/test_mad.sh && \
${PWD}/install/bin/ucx_perftest -K ${HCA}"
bash -c "${PWD}/install/bin/ucx_perftest -K ${HCA}"
}

docker_stop_srv() {
Expand All @@ -55,29 +55,25 @@ docker_stop_srv() {

set_vars() {
set +x
detect_hca
HCA=$(detect_hca)
# Replace ':' with space for 'ibstat' format
HCA_IBSTAT=${HCA/:/ }
HCA_DEV=${HCA/:/ }
# shellcheck disable=SC2086
LID=$(ibstat $HCA_IBSTAT | grep Base | awk '{print $NF}')
LID=$(ibstat $HCA_DEV | grep Base | awk '{print $NF}')
# shellcheck disable=SC2086
GUID=$(ibstat $HCA_IBSTAT | grep GUID | awk '{print $NF}')
GUID=$(ibstat $HCA_DEV | grep GUID | awk '{print $NF}')
echo "##vso[task.setvariable variable=LID;isOutput=true]$LID"
echo "##vso[task.setvariable variable=GUID;isOutput=true]$GUID"
echo "##vso[task.setvariable variable=HCA;isOutput=true]$HCA"
echo "##vso[task.setvariable variable=HCA;isOutput=true]$HCA"
echo "LID: $LID"
echo "GUID: $GUID"
echo "HCA: $HCA"
}

run_mad_test_lid() {
funcname
"$PWD"/install/bin/ucx_perftest -t tag_bw -e -K "$HCA" -e lid:"$LID"
}

run_mad_test_guid() {
funcname
"$PWD"/install/bin/ucx_perftest -t tag_bw -e -K "$HCA" guid:"$GUID"
run_mad_test() {
test_type="$1"
ib_add="$2"
"$PWD"/install/bin/ucx_perftest -t tag_bw -e -K "$HCA" -e "$test_type":"$ib_add"
}

funcname() {
Expand All @@ -87,7 +83,6 @@ funcname() {
}

detect_hca() {
echo "Detect first active HCA port"
HCA="$(ibv_devinfo | awk '/hca_id:/ {hca=$2} /port:/ {port=$2} /PORT_ACTIVE/ {print hca ":" port; exit}')"
export HCA
# Detect first active HCA port
ibv_devinfo | awk '/hca_id:/ {hca=$2} /port:/ {port=$2} /PORT_ACTIVE/ {print hca ":" port; exit}'
}

0 comments on commit 917b2c8

Please sign in to comment.