-
Notifications
You must be signed in to change notification settings - Fork 71
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Expand GPU testing coverage to more distros
- Loading branch information
Showing
10 changed files
with
261 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
38 changes: 38 additions & 0 deletions
38
integration_test/third_party_apps_data/applications/dcgm/centos_rhel/install
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
set -e | ||
source /etc/os-release | ||
|
||
sudo yum install -y kernel-devel pciutils gcc make wget yum-utils python3 | ||
|
||
# Install the driver the same way as the nvml app | ||
# Prefer to install from the package manager since it is normally faster and has | ||
# less errors on installation; fallback to the runfile method if the package | ||
# manager's package is not working or not compitible with the GPU model | ||
DEVICE_CODE=$(lspci -n | grep -Po '10de:[\w\d]{4}') | ||
case $DEVICE_CODE in | ||
10de:102d) | ||
# Install a specific version for NVIDIA Tesla K80, R470 is the last supported version | ||
DRIVER_VERSION=470.82.01 | ||
;; | ||
*) | ||
# Installing latest version of NVIDIA CUDA and driver | ||
DRIVER_VERSION=535.104.05 | ||
;; | ||
esac | ||
|
||
echo "Installing NVIDIA CUDA $CUDA_VERSION with driver $DRIVER_VERSION" | ||
curl -fSsl -O https://us.download.nvidia.com/tesla/$DRIVER_VERSION/NVIDIA-Linux-x86_64-$DRIVER_VERSION.run | ||
sudo bash ./NVIDIA-Linux-x86_64-$DRIVER_VERSION.run --silent | ||
|
||
# check NVIDIA driver installation succeeded | ||
nvidia-smi | ||
|
||
# Install DCGM | ||
VERSION_ID=${VERSION_ID%%.*} | ||
sudo yum-config-manager \ | ||
--add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel$VERSION_ID/x86_64/cuda-rhel$VERSION_ID.repo | ||
sudo yum clean expire-cache | ||
sudo yum install -y datacenter-gpu-manager | ||
sudo systemctl --now enable nvidia-dcgm | ||
|
||
# check DCGM service running and load profiling module | ||
dcgmi discovery --list |
40 changes: 25 additions & 15 deletions
40
integration_test/third_party_apps_data/applications/dcgm/debian_ubuntu/install
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,37 +1,47 @@ | ||
set -e | ||
source /etc/os-release | ||
|
||
sudo apt update | ||
kernel_version=`uname -r` | ||
sudo apt install -y linux-headers-${kernel_version} software-properties-common pciutils gcc make dkms | ||
sudo apt install -y linux-headers-${kernel_version} software-properties-common pciutils gcc make dkms wget | ||
|
||
BASE_URL=https://us.download.nvidia.com/tesla | ||
# Install CUDA and driver the same way as the nvml app | ||
# Prefer to install from the package manager since it is normally faster and has | ||
# less errors on installation; fallback to the runfile method if the package | ||
# manager's package is not working or not compitible with the GPU model | ||
DEVICE_CODE=$(lspci -n | grep -Po '10de:[\w\d]{4}') | ||
DISTRIBUTION=$(echo $ID$VERSION_ID | sed -e 's/\.//g') | ||
# Need to add the keyring for installing CUDA and DCGM | ||
wget --no-verbose https://developer.download.nvidia.com/compute/cuda/repos/${DISTRIBUTION}/x86_64/cuda-keyring_1.0-1_all.deb | ||
sudo dpkg -i cuda-keyring_1.0-1_all.deb | ||
case $DEVICE_CODE in | ||
10de:102d) | ||
# Install a specific version for NVIDIA Tesla K80 | ||
# Install a specific version for NVIDIA Tesla K80, R470 is the last supported version | ||
DRIVER_VERSION=470.82.01 | ||
CUDA_VERSION=11.4.4 | ||
echo "Installing NVIDIA CUDA $CUDA_VERSION with driver $DRIVER_VERSION" | ||
curl -fSsl -O https://us.download.nvidia.com/tesla/$DRIVER_VERSION/NVIDIA-Linux-x86_64-$DRIVER_VERSION.run | ||
sudo bash ./NVIDIA-Linux-x86_64-$DRIVER_VERSION.run --silent | ||
wget --no-verbose https://developer.download.nvidia.com/compute/cuda/$CUDA_VERSION/local_installers/cuda_${CUDA_VERSION}_${DRIVER_VERSION}_linux.run | ||
sudo sh cuda_${CUDA_VERSION}_${DRIVER_VERSION}_linux.run --toolkit --silent | ||
;; | ||
*) | ||
DRIVER_VERSION=525.60.13 | ||
echo "Installing latest version of NVIDIA CUDA and driver" | ||
if [[ $ID == debian ]]; then | ||
sudo add-apt-repository contrib | ||
fi | ||
sudo apt update | ||
sudo apt -y install cuda | ||
;; | ||
esac | ||
echo "Installing NVIDIA driver version $DRIVER_VERSION" | ||
curl -fSsl -O $BASE_URL/$DRIVER_VERSION/NVIDIA-Linux-x86_64-$DRIVER_VERSION.run | ||
|
||
sudo bash ./NVIDIA-Linux-x86_64-$DRIVER_VERSION.run --silent | ||
|
||
# check NVIDIA driver installation succeeded | ||
nvidia-smi | ||
|
||
sudo apt-get -y install wget | ||
|
||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb | ||
sudo dpkg -i cuda-keyring_1.0-1_all.deb | ||
|
||
# Install DCGM | ||
sudo apt-get update | ||
sudo apt-get install -y datacenter-gpu-manager | ||
sudo service nvidia-dcgm start | ||
sudo systemctl --now enable nvidia-dcgm | ||
|
||
# check DCGM service running and load profiling module | ||
dcgmi discovery --list | ||
dcgmi profile --resume |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
39 changes: 39 additions & 0 deletions
39
integration_test/third_party_apps_data/applications/dcgm/sles/install
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
set -e | ||
|
||
sudo zypper --non-interactive install -y kernel-source pciutils gcc make wget | ||
|
||
# Install CUDA and driver the same way as the nvml app | ||
# Prefer to install from the package manager since it is normally faster and has | ||
# less errors on installation; fallback to the runfile method if the package | ||
# manager's package is not working or not compitible with the GPU model | ||
DEVICE_CODE=$(/sbin/lspci -n | grep -Po '10de:[\w\d]{4}') | ||
DISTRIBUTION=$(. /etc/os-release;echo $ID$VERSION_ID | sed -e 's/\.[0-9]//') | ||
# Need to add the repo for installing CUDA and DCGM | ||
sudo zypper --non-interactive ar http://developer.download.nvidia.com/compute/cuda/repos/${DISTRIBUTION}/x86_64/cuda-${DISTRIBUTION}.repo | ||
sudo zypper --gpg-auto-import-keys --non-interactive refresh | ||
case $DEVICE_CODE in | ||
10de:102d) | ||
# Install a specific version for NVIDIA Tesla K80, R470 is the last supported version | ||
DRIVER_VERSION=470.82.01 | ||
CUDA_VERSION=11.4.4 | ||
echo "Installing NVIDIA CUDA $CUDA_VERSION with driver $DRIVER_VERSION" | ||
curl -fSsl -O https://us.download.nvidia.com/tesla/$DRIVER_VERSION/NVIDIA-Linux-x86_64-$DRIVER_VERSION.run | ||
sudo bash ./NVIDIA-Linux-x86_64-$DRIVER_VERSION.run --silent | ||
wget --no-verbose https://developer.download.nvidia.com/compute/cuda/$CUDA_VERSION/local_installers/cuda_${CUDA_VERSION}_${DRIVER_VERSION}_linux.run | ||
sudo sh cuda_${CUDA_VERSION}_${DRIVER_VERSION}_linux.run --toolkit --silent | ||
;; | ||
*) | ||
echo "Installing latest version of NVIDIA CUDA and driver" | ||
sudo zypper --non-interactive install -y cuda | ||
;; | ||
esac | ||
|
||
# check NVIDIA driver installation succeeded | ||
nvidia-smi | ||
|
||
# Install DCGM | ||
sudo zypper --non-interactive install datacenter-gpu-manager | ||
sudo systemctl --now enable nvidia-dcgm | ||
|
||
# check DCGM service running and load profiling module | ||
dcgmi discovery --list |
31 changes: 31 additions & 0 deletions
31
integration_test/third_party_apps_data/applications/nvml/centos_rhel/install
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
set -e | ||
|
||
sudo yum install -y kernel-devel pciutils gcc make wget yum-utils | ||
|
||
# Install CUDA and driver together, since the `exercise` script needs to run a | ||
# CUDA sample app to generating GPU process metrics | ||
# Prefer to install from the package manager since it is normally faster and has | ||
# less errors on installation; fallback to the runfile method if the package | ||
# manager's package is not working or not compitible with the GPU model | ||
DEVICE_CODE=$(lspci -n | grep -Po '10de:[\w\d]{4}') | ||
case $DEVICE_CODE in | ||
10de:102d) | ||
# Install a specific version for NVIDIA Tesla K80, R470 is the last supported version | ||
DRIVER_VERSION=470.82.01 | ||
CUDA_VERSION=11.4.4 | ||
;; | ||
*) | ||
# Installing latest version of NVIDIA CUDA and driver | ||
DRIVER_VERSION=535.104.05 | ||
CUDA_VERSION=12.2.2 | ||
;; | ||
esac | ||
|
||
echo "Installing NVIDIA CUDA $CUDA_VERSION with driver $DRIVER_VERSION" | ||
curl -fSsl -O https://us.download.nvidia.com/tesla/$DRIVER_VERSION/NVIDIA-Linux-x86_64-$DRIVER_VERSION.run | ||
sudo bash ./NVIDIA-Linux-x86_64-$DRIVER_VERSION.run --silent | ||
wget --no-verbose https://developer.download.nvidia.com/compute/cuda/$CUDA_VERSION/local_installers/cuda_${CUDA_VERSION}_${DRIVER_VERSION}_linux.run | ||
sudo sh cuda_${CUDA_VERSION}_${DRIVER_VERSION}_linux.run --toolkit --silent | ||
|
||
# check NVIDIA driver installation succeeded | ||
nvidia-smi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
32 changes: 32 additions & 0 deletions
32
integration_test/third_party_apps_data/applications/nvml/sles/install
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
set -e | ||
|
||
sudo zypper --non-interactive install -y kernel-source pciutils gcc make wget | ||
|
||
# Install CUDA and driver together, since the `exercise` script needs to run a | ||
# CUDA sample app to generating GPU process metrics | ||
# Prefer to install from the package manager since it is normally faster and has | ||
# less errors on installation; fallback to the runfile method if the package | ||
# manager's package is not working or not compitible with the GPU model | ||
DEVICE_CODE=$(/sbin/lspci -n | grep -Po '10de:[\w\d]{4}') | ||
DISTRIBUTION=$(. /etc/os-release;echo $ID$VERSION_ID | sed -e 's/\.[0-9]//') | ||
case $DEVICE_CODE in | ||
10de:102d) | ||
# Install a specific version for NVIDIA Tesla K80, R470 is the last supported version | ||
DRIVER_VERSION=470.82.01 | ||
CUDA_VERSION=11.4.4 | ||
echo "Installing NVIDIA CUDA $CUDA_VERSION with driver $DRIVER_VERSION" | ||
curl -fSsl -O https://us.download.nvidia.com/tesla/$DRIVER_VERSION/NVIDIA-Linux-x86_64-$DRIVER_VERSION.run | ||
sudo bash ./NVIDIA-Linux-x86_64-$DRIVER_VERSION.run --silent | ||
wget --no-verbose https://developer.download.nvidia.com/compute/cuda/$CUDA_VERSION/local_installers/cuda_${CUDA_VERSION}_${DRIVER_VERSION}_linux.run | ||
sudo sh cuda_${CUDA_VERSION}_${DRIVER_VERSION}_linux.run --toolkit --silent | ||
;; | ||
*) | ||
echo "Installing latest version of NVIDIA CUDA and driver" | ||
sudo zypper --non-interactive ar http://developer.download.nvidia.com/compute/cuda/repos/${DISTRIBUTION}/x86_64/cuda-${DISTRIBUTION}.repo | ||
sudo zypper --gpg-auto-import-keys --non-interactive refresh | ||
sudo zypper --non-interactive install -y cuda | ||
;; | ||
esac | ||
|
||
# check NVIDIA driver installation succeeded | ||
nvidia-smi |
Oops, something went wrong.