Skip to content

Commit

Permalink
AZP/TEST: Add MAD tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexey-Rivkin committed Mar 11, 2024
1 parent 1c25669 commit 2fa0151
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 0 deletions.
87 changes: 87 additions & 0 deletions buildlib/pr/mad_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
parameters:
HCA:

jobs:
- job: Server_Up
pool:
name: MLNX
demands: mad_server
workspace:
clean: outputs
steps:
- checkout: self
clean: true
fetchDepth: 100
retryCountOnTaskFailure: 5
- bash: ./contrib/test_mad.sh run_mad_server
env:
HCA: ${{ parameters.HCA }}
displayName: Server Up

- task: Bash@3
name: Set_Vars
inputs:
targetType: "inline"
script: ./contrib/test_mad.sh set_vars
env:
HCA: ${{ parameters.HCA }}
displayName: Set Vars

- job: Test_LID
dependsOn: Server_Up
timeoutInMinutes: 10
pool:
name: MLNX
demands: mad_client
variables:
LID: $[ dependencies.Server_Up.outputs['Set_Vars.LID'] ]
steps:
- checkout: self
clean: true
fetchDepth: 100
retryCountOnTaskFailure: 5
- bash: ./contrib/test_mad.sh run_mad_test_lid
env:
HCA: ${{ parameters.HCA }}
LID: $(LID)
displayName: Test LID

- job: Server_Restart
dependsOn: Test_LID
pool:
name: MLNX
demands: mad_server
steps:
- checkout: none
- bash: ./contrib/test_mad.sh srv_restart
displayName: Server Restart

- job: Test_GUID
dependsOn:
- Server_Up
- Server_Restart
timeoutInMinutes: 10
pool:
name: MLNX
demands: mad_client
variables:
GUID: $[ dependencies.Server_Up.outputs['Set_Vars.GUID'] ]
steps:
- checkout: none
- bash: |
./contrib/test_mad.sh run_mad_test_guid
env:
HCA: ${{ parameters.HCA }}
GUID: $(GUID)
displayName: Test GUID
- job: Server_Stop
dependsOn: Test_GUID
condition: always()
pool:
name: MLNX
demands: mad_server
steps:
- checkout: none
- bash: ./contrib/test_mad.sh srv_stop
displayName: Server Stop
7 changes: 7 additions & 0 deletions buildlib/pr/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,13 @@ stages:
jobs:
- template: static_checks.yml

- stage: MAD_Tests
dependsOn: [Static_check]
jobs:
- template: mad_tests.yml
parameters:
HCA: "mlx5_0:1"

- stage: Build
dependsOn: [Static_check]
jobs:
Expand Down
23 changes: 23 additions & 0 deletions buildlib/tools/ucx_perftest.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[Unit]
Description=UCX Perftest running in agent mode
Requires=multi-user.target
After=multi-user.target

[Service]
Type=simple
Environment=UCX_KEEPALIVE_INTERVAL=2
Environment=UCX_DC_MLX5_TIMEOUT=5
Environment=UCX_RC_VERBS_TIMEOUT=5
Environment=UCX_RC_MLX5_TIMEOUT=5
Environment=UCX_UD_VERBS_TIMEOUT=5
Environment=UCX_UD_MLX5_TIMEOUT=5
Environment=UCX_RDMA_CM_TIMEOUT=5
Restart=always
RestartSec=500ms
User=root
Group=root
StartLimitInterval=10s
StartLimitBurst=100
StandardOutput=${PWD}/ucx_perftest.log
StandardError=${PWD}/ucx_perftest.log
ExecStart=${PWD}/install/bin/ucx_perftest -e -K ${HCA}
83 changes: 83 additions & 0 deletions contrib/test_mad.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/bin/bash
set -exE -o pipefail

cd "$BUILD_SOURCESDIRECTORY"

run_mad_server() {
build_ucx
setup
srv_stop
funcname
export HCA="$HCA"
sudo -E bash -c 'envsubst < "buildlib/tools/ucx_perftest.template" \
> /etc/systemd/system/ucx_perftest.service'
sudo systemctl daemon-reload
sudo systemctl start ucx_perftest
sudo systemctl status ucx_perftest
}

build_ucx() {
funcname
./autogen.sh
./contrib/configure-release \
--prefix="$PWD"/install \
--with-mad \
--without-valgrind \
--without-go \
--without-java
make -s -j"$(nproc)"
make install
}

setup() {
funcname
sudo chmod 777 /dev/infiniband/umad*
}

set_vars() {
set +x
HCA=${HCA/:/ } # Replace ':' with space
# shellcheck disable=SC2086
LID=$(ibstat $HCA | grep Base | awk '{print $NF}')
# shellcheck disable=SC2086
GUID=$(ibstat $HCA | grep GUID | awk '{print $NF}')
echo "LID: $LID"
echo "GUID: $GUID"
echo "##vso[task.setvariable variable=LID;isOutput=true]$LID"
echo "##vso[task.setvariable variable=GUID;isOutput=true]$GUID"
}

run_mad_test_lid() {
build_ucx
setup
funcname
"$PWD"/install/bin/ucx_perftest -t tag_bw -e -K "$HCA" -e lid:"$LID"
}

run_mad_test_guid() {
funcname
"$PWD"/install/bin/ucx_perftest -t tag_bw -e -K "$HCA" guid:"$GUID"
}

srv_restart() {
funcname
sudo systemctl stop ucx_perftest
sudo systemctl start ucx_perftest
sudo systemctl status ucx_perftest
}

srv_stop() {
funcname
set +e
sudo systemctl status ucx_perftest
sudo systemctl stop ucx_perftest
set -e
}

funcname() {
set +x
echo "==== Running: ${FUNCNAME[1]} ===="
set -x
}

"$@"

0 comments on commit 2fa0151

Please sign in to comment.