-
Notifications
You must be signed in to change notification settings - Fork 154
/
Copy pathentrypoint.sh
executable file
·180 lines (159 loc) · 6.35 KB
/
entrypoint.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#!/bin/bash
# Copyright 2021 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o pipefail
set -u
set -x
NVIDIA_DRIVER_BRANCH="${NVIDIA_DRIVER_BRANCH:-tesla}"
NVIDIA_DRIVER_VERSION="${NVIDIA_DRIVER_VERSION:-384.111}"
NVIDIA_DRIVER_DOWNLOAD_URL_DEFAULT="https://us.download.nvidia.com/${NVIDIA_DRIVER_BRANCH}/${NVIDIA_DRIVER_VERSION}/NVIDIA-Linux-x86_64-${NVIDIA_DRIVER_VERSION}.run"
NVIDIA_DRIVER_DOWNLOAD_URL="${NVIDIA_DRIVER_DOWNLOAD_URL:-$NVIDIA_DRIVER_DOWNLOAD_URL_DEFAULT}"
NVIDIA_INSTALL_DIR_HOST="${NVIDIA_INSTALL_DIR_HOST:-/var/lib/nvidia}"
NVIDIA_INSTALL_DIR_CONTAINER="${NVIDIA_INSTALL_DIR_CONTAINER:-/usr/local/nvidia}"
NVIDIA_INSTALLER_RUNFILE="$(basename "${NVIDIA_DRIVER_DOWNLOAD_URL}")"
ROOT_MOUNT_DIR="${ROOT_MOUNT_DIR:-/root}"
CACHE_FILE="${NVIDIA_INSTALL_DIR_CONTAINER}/.cache"
KERNEL_VERSION="$(uname -r)"
set +x
check_cached_version() {
echo "Checking cached version"
if [[ ! -f "${CACHE_FILE}" ]]; then
echo "Cache file ${CACHE_FILE} not found."
return 1
fi
# Source the cache file and check if the cached driver matches
# currently running kernel version and requested driver versions.
. "${CACHE_FILE}"
if [[ "${KERNEL_VERSION}" == "${CACHE_KERNEL_VERSION}" ]]; then
if [[ "${NVIDIA_DRIVER_VERSION}" == "${CACHE_NVIDIA_DRIVER_VERSION}" ]]; then
echo "Found existing driver installation for kernel version ${KERNEL_VERSION} and driver version ${NVIDIA_DRIVER_VERSION}."
return 0
fi
fi
echo "Cache file ${CACHE_FILE} found but existing versions didn't match."
return 1
}
update_cached_version() {
cat >"${CACHE_FILE}"<<__EOF__
CACHE_KERNEL_VERSION=${KERNEL_VERSION}
CACHE_NVIDIA_DRIVER_VERSION=${NVIDIA_DRIVER_VERSION}
__EOF__
echo "Updated cached version as:"
cat "${CACHE_FILE}"
}
update_container_ld_cache() {
echo "Updating container's ld cache..."
echo "${NVIDIA_INSTALL_DIR_CONTAINER}/lib64" > /etc/ld.so.conf.d/nvidia.conf
ldconfig
echo "Updating container's ld cache... DONE."
}
download_kernel_src() {
echo "Downloading kernel sources..."
apt-get update && apt-get install -y linux-headers-${KERNEL_VERSION}
echo "Downloading kernel sources... DONE."
}
configure_nvidia_installation_dirs() {
echo "Configuring installation directories..."
mkdir -p "${NVIDIA_INSTALL_DIR_CONTAINER}"
pushd "${NVIDIA_INSTALL_DIR_CONTAINER}"
# nvidia-installer does not provide an option to configure the
# installation path of `nvidia-modprobe` utility and always installs it
# under /usr/bin. The following workaround ensures that
# `nvidia-modprobe` is accessible outside the installer container
# filesystem.
mkdir -p bin bin-workdir
mount -t overlay -o lowerdir=/usr/bin,upperdir=bin,workdir=bin-workdir none /usr/bin
# nvidia-installer does not provide an option to configure the
# installation path of libraries such as libnvidia-ml.so. The following
# workaround ensures that the libs are accessible from outside the
# installer container filesystem.
mkdir -p lib64 lib64-workdir
mkdir -p /usr/lib/x86_64-linux-gnu
mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=lib64,workdir=lib64-workdir none /usr/lib/x86_64-linux-gnu
# nvidia-installer does not provide an option to configure the
# installation path of driver kernel modules such as nvidia.ko. The following
# workaround ensures that the modules are accessible from outside the
# installer container filesystem.
mkdir -p drivers drivers-workdir
mkdir -p /lib/modules/${KERNEL_VERSION}/video
mount -t overlay -o lowerdir=/lib/modules/${KERNEL_VERSION}/video,upperdir=drivers,workdir=drivers-workdir none /lib/modules/${KERNEL_VERSION}/video
# Populate ld.so.conf to avoid warning messages in nvidia-installer logs.
update_container_ld_cache
# Install an exit handler to cleanup the overlayfs mount points.
trap "{ umount /lib/modules/${KERNEL_VERSION}/video; umount /usr/lib/x86_64-linux-gnu ; umount /usr/bin; }" EXIT
popd
echo "Configuring installation directories... DONE."
}
download_nvidia_installer() {
echo "Downloading Nvidia installer..."
pushd "${NVIDIA_INSTALL_DIR_CONTAINER}"
curl -L -S -f "${NVIDIA_DRIVER_DOWNLOAD_URL}" -o "${NVIDIA_INSTALLER_RUNFILE}"
popd
echo "Downloading Nvidia installer... DONE."
}
run_nvidia_installer() {
echo "Running Nvidia installer..."
pushd "${NVIDIA_INSTALL_DIR_CONTAINER}"
sh "${NVIDIA_INSTALLER_RUNFILE}" \
--utility-prefix="${NVIDIA_INSTALL_DIR_CONTAINER}" \
--opengl-prefix="${NVIDIA_INSTALL_DIR_CONTAINER}" \
--no-install-compat32-libs \
--log-file-name="${NVIDIA_INSTALL_DIR_CONTAINER}/nvidia-installer.log" \
--no-drm \
--silent \
--accept-license
popd
echo "Running Nvidia installer... DONE."
}
configure_cached_installation() {
echo "Configuring cached driver installation..."
update_container_ld_cache
if ! lsmod | grep -w 'nvidia' > /dev/null; then
insmod "${NVIDIA_INSTALL_DIR_CONTAINER}/drivers/nvidia.ko"
fi
if ! lsmod | grep -w 'nvidia_uvm' > /dev/null; then
insmod "${NVIDIA_INSTALL_DIR_CONTAINER}/drivers/nvidia-uvm.ko"
fi
echo "Configuring cached driver installation... DONE"
}
verify_nvidia_installation() {
echo "Verifying Nvidia installation..."
export PATH="${NVIDIA_INSTALL_DIR_CONTAINER}/bin:${PATH}"
nvidia-smi
# Create unified memory device file.
nvidia-modprobe -c0 -u
echo "Verifying Nvidia installation... DONE."
}
update_host_ld_cache() {
echo "Updating host's ld cache..."
echo "${NVIDIA_INSTALL_DIR_HOST}/lib64" >> "${ROOT_MOUNT_DIR}/etc/ld.so.conf"
ldconfig -r "${ROOT_MOUNT_DIR}"
echo "Updating host's ld cache... DONE."
}
main() {
if check_cached_version; then
configure_cached_installation
verify_nvidia_installation
else
download_kernel_src
configure_nvidia_installation_dirs
download_nvidia_installer
run_nvidia_installer
update_cached_version
verify_nvidia_installation
fi
update_host_ld_cache
}
main "$@"