Skip to content

Commit

Permalink
Merge branch '2023.06-software.eessi.io' of github-trz:EESSI/software…
Browse files Browse the repository at this point in the history
…-layer into 2023.06-software.eessi.io-cuDNN-8.9.2.26-part-2
  • Loading branch information
truib committed Nov 22, 2024
2 parents 408b985 + db16c37 commit f87857a
Show file tree
Hide file tree
Showing 17 changed files with 437 additions and 145 deletions.
58 changes: 58 additions & 0 deletions .github/workflows/scripts/only_latest_easystacks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/bin/bash
#
# This script figures out the latest version of EasyBuild being used for the installation of easystack
# files.
#
# This file is part of the EESSI software layer, see
# https://github.com/EESSI/software-layer.git
#
# author: Alan O'Cais (CECAM)
#
# license: GPLv2
#

EESSI_VERSION=${EESSI_VERSION:-"2023.06"}

directory="easystacks/software.eessi.io/${EESSI_VERSION}"
# List of example filenames
files=($(find "$directory" -name "*.yml" | grep -e '-eb-'))
[ -n "$DEBUG" ] && echo "${files[@]}"

versions=()
# Loop over each filename
for filename in "${files[@]}"; do
# Extract the semantic version using grep
version=$(echo "$filename" | grep -oP '(?<=eb-)\d+\.\d+\.\d+?(?=-)')

# Output the result
[ -n "$DEBUG" ] && echo "Filename: $filename"
[ -n "$DEBUG" ] && echo "Extracted version: $version"
[ -n "$DEBUG" ] && echo
versions+=("$version")
done
highest_version=$(printf "%s\n" "${versions[@]}" | sort -V | tail -n 1)

[ -n "$DEBUG" ] && echo "Highest version: $highest_version"
[ -n "$DEBUG" ] && echo
[ -n "$DEBUG" ] && echo "Matching files:"
all_latest_easystacks=($(find $directory -type f -name "*eb-$highest_version*.yml"))

accel_latest_easystacks=()
cpu_latest_easystacks=()

# Loop through the array and split based on partial matching of string
accel="/accel/"
for item in "${all_latest_easystacks[@]}"; do
if [[ "$item" == *"$accel"* ]]; then
accel_latest_easystacks+=("$item")
else
cpu_latest_easystacks+=("$item")
fi
done

# Output the results
if [ -n "$ACCEL_EASYSTACKS" ]; then
echo "${accel_latest_easystacks[@]}"
else
echo "${cpu_latest_easystacks[@]}"
fi
21 changes: 13 additions & 8 deletions .github/workflows/test-software.eessi.io.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,20 @@ jobs:
env | grep ^EESSI | sort
# first check the CPU-only builds for this CPU target
echo "just run check_missing_installations.sh (should use easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-*.yml)"
for easystack_file in $(ls easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-eb-*.yml); do
echo "just run check_missing_installations.sh (should use easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-*.yml with latest EasyBuild release)"
for easystack_file in $(EESSI_VERSION=${{matrix.EESSI_VERSION}} .github/workflows/scripts/only_latest_easystacks.sh); do
if [ ${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} = "x86_64/amd/zen4" ]; then
if grep -q 2022b <<<"${easystack_file}"; then
# skip the check of installed software on zen4 for foss/2022b builds
continue
elif grep -q CUDA <<<"${easystack_file}"; then
# skip the check of install CUDA software in the CPU path for zen4
continue
# skip the check of installed software on zen4 for foss/2022b builds
continue
fi
if [[ $easystack_file == *"rebuilds"* ]]; then
# Also handle rebuilds, make a temporary EasyStack file where we clean out all 2022b stuff and use that
new_easystack=$(mktemp pruned_easystackXXX --suffix=.yml)
# first clean out the options then clean out the .eb name
sed '/2022b\|12\.2\.0/,/\.eb/{/\.eb/!d}' "${easystack_file}" | sed '/2022b\|12\.2\.0/d' > $new_easystack
diff --unified=0 "$easystack_file" "$new_easystack" || :
easystack_file="$new_easystack"
fi
fi
echo "check missing installations for ${easystack_file}..."
Expand All @@ -82,7 +87,7 @@ jobs:
for accel in ${accelerators}; do
module use ${EESSI_SOFTWARE_PATH}/accel/${accel}/modules/all
echo "checking missing installations for accelerator ${accel} using modulepath: ${MODULEPATH}"
for easystack_file in $(ls easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/accel/$(dirname ${accel})/eessi-${{matrix.EESSI_VERSION}}-eb-*.yml); do
for easystack_file in $(EESSI_VERSION=${{matrix.EESSI_VERSION}} ACCEL_EASYSTACKS=1 .github/workflows/scripts/only_latest_easystacks.sh); do
echo "check missing installations for ${easystack_file}..."
./check_missing_installations.sh ${easystack_file}
ec=$?
Expand Down
15 changes: 15 additions & 0 deletions EESSI-extend-2023.06-easybuild.eb
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,16 @@ if (os.getenv("EESSI_CVMFS_INSTALL") ~= nil) then
end
eessi_cvmfs_install = true
easybuild_installpath = os.getenv("EESSI_SOFTWARE_PATH")
eessi_accelerator_target = os.getenv("EESSI_ACCELERATOR_TARGET")
if (eessi_accelerator_target ~= nil) then
cuda_compute_capability = string.match(eessi_accelerator_target, "^nvidia/cc([0-9][0-9])$")
if (cuda_compute_capability ~= nil) then
easybuild_installpath = pathJoin(easybuild_installpath, 'accel', eessi_accelerator_target)
easybuild_cuda_compute_capabilities = cuda_compute_capability:sub(1, 1) .. "." .. cuda_compute_capability:sub(2, 2)
else
LmodError("Incorrect value for $EESSI_ACCELERATOR_TARGET: " .. eessi_accelerator_target)
end
end
elseif (os.getenv("EESSI_SITE_INSTALL") ~= nil) then
-- Make sure no other EESSI install environment variables are set
if ((os.getenv("EESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("EESSI_USER_INSTALL") ~= nil)) then
Expand Down Expand Up @@ -146,6 +156,11 @@ setenv ("EASYBUILD_UMASK", "022")
-- Allow this module to be loaded when running EasyBuild
setenv ("EASYBUILD_ALLOW_LOADED_MODULES", "EasyBuild,EESSI-extend")
-- Set environment variables if building for CUDA compute capabilities
if (easybuild_cuda_compute_capabilities ~= nil) then
setenv ("EASYBUILD_CUDA_COMPUTE_CAPABILITIES", easybuild_cuda_compute_capabilities)
end
-- Set all related environment variables if we have project or user installations (including extending MODULEPATH)
if (user_modulepath ~= nil) then
-- Use a more restrictive umask for this case
Expand Down
206 changes: 118 additions & 88 deletions EESSI-install-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,26 +112,8 @@ fi

TMPDIR=$(mktemp -d)

echo ">> Setting up environment..."

source $TOPDIR/init/minimal_eessi_env

if [ -d $EESSI_CVMFS_REPO ]; then
echo_green "$EESSI_CVMFS_REPO available, OK!"
else
fatal_error "$EESSI_CVMFS_REPO is not available!"
fi

# make sure we're in Prefix environment by checking $SHELL
if [[ ${SHELL} = ${EPREFIX}/bin/bash ]]; then
echo_green ">> It looks like we're in a Gentoo Prefix environment, good!"
else
fatal_error "Not running in Gentoo Prefix environment, run '${EPREFIX}/startprefix' first!"
fi

# avoid that pyc files for EasyBuild are stored in EasyBuild installation directory
export PYTHONPYCACHEPREFIX=$TMPDIR/pycache

# Get override subdir
DETECTION_PARAMETERS=''
GENERIC=0
EB='eb'
Expand All @@ -148,10 +130,76 @@ if [ -z $EESSI_SOFTWARE_SUBDIR_OVERRIDE ]; then
echo ">> Determined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE via 'eessi_software_subdir.py $DETECTION_PARAMETERS' script"
else
echo ">> Picking up pre-defined \$EESSI_SOFTWARE_SUBDIR_OVERRIDE: ${EESSI_SOFTWARE_SUBDIR_OVERRIDE}"
# make sure directory exists (since it's expected by init/eessi_environment_variables when using archdetect)
mkdir -p ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
# Run in a subshell, so that minimal_eessi_env doesn't change the shell environment for the rest of this script
(
# Make sure EESSI_PREFIX and EESSI_OS_TYPE are set
source $TOPDIR/init/minimal_eessi_env

# make sure directory exists (since it's expected by init/eessi_environment_variables when using archdetect)
mkdir -p ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
)
fi

echo ">> Setting up environment..."

# If EESSI_VERSION is not set, source the defaults script to set it
if [ -z ${EESSI_VERSION} ]; then
source $TOPDIR/init/eessi_defaults
fi

# If module command does not exist, use the one from the compat layer
command -v module
module_cmd_exists=$?
if [[ "$module_cmd_exists" -ne 0 ]]; then
echo_green "No module command found, initializing lmod from the compatibility layer"
# Minimal initalization of the lmod from the compat layer
source $TOPDIR/init/lmod/bash
else
echo_green "Module command found"
fi
ml_version_out=$TMPDIR/ml.out
ml --version &> $ml_version_out
if [[ $? -eq 0 ]]; then
echo_green ">> Found Lmod ${LMOD_VERSION}"
else
fatal_error "Failed to initialize Lmod?! (see output in ${ml_version_out}"
fi

# Make sure we start with no modules and clean $MODULEPATH
echo ">> Setting up \$MODULEPATH..."
module --force purge
module unuse $MODULEPATH

# Initialize the EESSI environment
module use $TOPDIR/init/modules
module load EESSI/$EESSI_VERSION

# make sure we're in Prefix environment by checking $SHELL
# We can only do this after loading the EESSI module, as we need ${EPREFIX}
if [[ ${SHELL} = ${EPREFIX}/bin/bash ]]; then
echo_green ">> It looks like we're in a Gentoo Prefix environment, good!"
else
fatal_error "Not running in Gentoo Prefix environment, run '${EPREFIX}/startprefix' first!"
fi

if [ -d $EESSI_CVMFS_REPO ]; then
echo_green "$EESSI_CVMFS_REPO available, OK!"
else
fatal_error "$EESSI_CVMFS_REPO is not available!"
fi

# Check that EESSI_SOFTWARE_SUBDIR now matches EESSI_SOFTWARE_SUBDIR_OVERRIDE
if [[ -z ${EESSI_SOFTWARE_SUBDIR} ]]; then
fatal_error "Failed to determine software subdirectory?!"
elif [[ "${EESSI_SOFTWARE_SUBDIR}" != "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then
fatal_error "Values for EESSI_SOFTWARE_SUBDIR_OVERRIDE (${EESSI_SOFTWARE_SUBDIR_OVERRIDE}) and EESSI_SOFTWARE_SUBDIR (${EESSI_SOFTWARE_SUBDIR}) differ!"
else
echo_green ">> Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory!"
fi

# avoid that pyc files for EasyBuild are stored in EasyBuild installation directory
export PYTHONPYCACHEPREFIX=$TMPDIR/pycache

# if we run the script for the first time, e.g., to start building for a new
# stack, we need to ensure certain files are present in
# ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
Expand All @@ -172,44 +220,67 @@ if [ ! -f ${_lmod_sitepackage_file} ]; then
python3 ${TOPDIR}/create_lmodsitepackage.py ${_eessi_software_path}
fi

# Set all the EESSI environment variables (respecting $EESSI_SOFTWARE_SUBDIR_OVERRIDE)
# $EESSI_SILENT - don't print any messages if set (use 'unset EESSI_SILENT' to let script show messages)
# $EESSI_BASIC_ENV - give a basic set of environment variables if set (use 'EESSI_BASIC_ENV=' to let script initialise a full environment)
EESSI_SILENT=1 EESSI_BASIC_ENV= source $TOPDIR/init/eessi_environment_variables
# install any additional required scripts
# order is important: these are needed to install a full CUDA SDK in host_injections
# for now, this just reinstalls all scripts. Note the most elegant, but works

if [[ -z ${EESSI_SOFTWARE_SUBDIR} ]]; then
fatal_error "Failed to determine software subdirectory?!"
elif [[ "${EESSI_SOFTWARE_SUBDIR}" != "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then
fatal_error "Values for EESSI_SOFTWARE_SUBDIR_OVERRIDE (${EESSI_SOFTWARE_SUBDIR_OVERRIDE}) and EESSI_SOFTWARE_SUBDIR (${EESSI_SOFTWARE_SUBDIR}) differ!"
else
echo_green ">> Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory!"
# Only run install_scripts.sh if not dev.eessi.io for security
if [[ "${EESSI_CVMFS_REPO}" != /cvmfs/dev.eessi.io ]]; then
${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX}
fi

echo ">> Initializing Lmod..."
source $EPREFIX/usr/share/Lmod/init/bash
ml_version_out=$TMPDIR/ml.out
ml --version &> $ml_version_out
if [[ $? -eq 0 ]]; then
echo_green ">> Found Lmod ${LMOD_VERSION}"
echo ">> Configuring EasyBuild..."

# Make sure EESSI-extend is not loaded, and configure location variables for a
# CVMFS installation
module unload EESSI-extend
unset EESSI_USER_INSTALL
unset EESSI_PROJECT_INSTALL
unset EESSI_SITE_INSTALL
export EESSI_CVMFS_INSTALL=1

# We now run 'source load_eessi_extend_module.sh' to load or install and load the
# EESSI-extend module which sets up all build environment settings.
# The script requires the EESSI_VERSION given as argument, a couple of
# environment variables set (TMPDIR, EB and EASYBUILD_INSTALLPATH) and the
# function check_exit_code defined.
# NOTE 1, the script exits if those variables/functions are undefined.
# NOTE 2, loading the EESSI-extend module may adjust the value of EASYBUILD_INSTALLPATH,
# e.g., to point to the installation directory for accelerators.
# NOTE 3, we have to set a default for EASYBUILD_INSTALLPATH here in cases the
# EESSI-extend module itself needs to be installed.
export EASYBUILD_INSTALLPATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
source load_eessi_extend_module.sh ${EESSI_VERSION}

# Install full CUDA SDK and cu* libraries in host_injections
# Hardcode this for now, see if it works
# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install
# Allow skipping CUDA SDK install in e.g. CI environments
echo "Going to install full CUDA SDK and cu* libraries under host_injections if necessary"
temp_install_storage=${TMPDIR}/temp_install_storage
mkdir -p ${temp_install_storage}
if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then
${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh \
-t ${temp_install_storage} \
--accept-cuda-eula \
--accept-cudnn-eula
else
fatal_error "Failed to initialize Lmod?! (see output in ${ml_version_out}"
echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed"
fi

# Install NVIDIA drivers in host_injections (if they exist)
if command_exists "nvidia-smi"; then
echo "Command 'nvidia-smi' found. Installing NVIDIA drivers for use in prefix shell..."
${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
fi

echo ">> Configuring EasyBuild..."
source $TOPDIR/configure_easybuild

if [ ! -z "${shared_fs_path}" ]; then
shared_eb_sourcepath=${shared_fs_path}/easybuild/sources
echo ">> Using ${shared_eb_sourcepath} as shared EasyBuild source path"
export EASYBUILD_SOURCEPATH=${shared_eb_sourcepath}:${EASYBUILD_SOURCEPATH}
fi

echo ">> Setting up \$MODULEPATH..."
# make sure no modules are loaded
module --force purge
# ignore current $MODULEPATH entirely
module unuse $MODULEPATH

# if an accelerator target is specified, we need to make sure that the CPU-only modules are also still available
if [ ! -z ${EESSI_ACCELERATOR_TARGET} ]; then
CPU_ONLY_MODULES_PATH=$(echo $EASYBUILD_INSTALLPATH | sed "s@/accel/${EESSI_ACCELERATOR_TARGET}@@g")/modules/all
Expand All @@ -220,7 +291,7 @@ if [ ! -z ${EESSI_ACCELERATOR_TARGET} ]; then
fi
fi

# If in dev.eessi.io, allow building on top of softw
# If in dev.eessi.io, allow building on top of software.eessi.io
if [[ "${EESSI_CVMFS_REPO}" == /cvmfs/dev.eessi.io ]]; then
module use /cvmfs/software.eessi.io/versions/$EESSI_VERSION/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}/modules/all
fi
Expand All @@ -236,47 +307,6 @@ fi
# assume there's only one diff file that corresponds to the PR patch file
pr_diff=$(ls [0-9]*.diff | head -1)

# install any additional required scripts
# order is important: these are needed to install a full CUDA SDK in host_injections
# for now, this just reinstalls all scripts. Note the most elegant, but works

# Only run install_scripts.sh if not dev.eessi.io for security
if [[ "${EESSI_CVMFS_REPO}" != /cvmfs/dev.eessi.io ]]; then
${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX}
fi

# Install full CUDA SDK and cu* libraries in host_injections
# Hardcode this for now, see if it works
# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install
# Allow skipping CUDA SDK install in e.g. CI environments
# The install_cuda... script uses EasyBuild. So, we need to check if we have EB
# or skip this step.
echo "Going to install full CUDA SDK and cu* libraries under host_injections if necessary"
module_avail_out=$TMPDIR/ml.out
module avail 2>&1 | grep EasyBuild &> ${module_avail_out}
if [[ $? -eq 0 ]]; then
echo_green ">> Found an EasyBuild module"
else
echo_yellow ">> No EasyBuild module found: skipping step to install CUDA (see output in ${module_avail_out})"
export skip_cuda_install=True
fi

temp_install_storage=${TMPDIR}/temp_install_storage
mkdir -p ${temp_install_storage}
if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then
${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh \
-t ${temp_install_storage} \
--accept-cuda-eula \
--accept-cudnn-eula
else
echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed OR no EasyBuild module was found"
fi

# Install NVIDIA drivers in host_injections (if they exist)
if command_exists "nvidia-smi"; then
echo "Command 'nvidia-smi' found. Installing NVIDIA drivers for use in prefix shell..."
${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh
fi

# use PR patch file to determine in which easystack files stuff was added
changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep 'easystacks/.*yml$' | egrep -v 'known-issues|missing')
Expand Down
3 changes: 3 additions & 0 deletions EESSI-remove-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ if [ $EUID -eq 0 ]; then
echo_yellow "Removing ${app_dir} and ${app_module}..."
rm -rf ${app_dir}
rm -rf ${app_module}
# recreate some directory to work around permission denied
# issues when rebuilding the package
mkdir -p ${app_dir}/easybuild
done
else
fatal_error "Easystack file ${easystack_file} not found!"
Expand Down
1 change: 1 addition & 0 deletions bot/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ else
declare -a REMOVAL_STEP_ARGS=()
REMOVAL_STEP_ARGS+=("--save" "${TARBALL_TMP_REMOVAL_STEP_DIR}")
REMOVAL_STEP_ARGS+=("--storage" "${STORAGE}")

# add fakeroot option in order to be able to remove software, see:
# https://github.com/EESSI/software-layer/issues/312
REMOVAL_STEP_ARGS+=("--fakeroot")
Expand Down
Loading

0 comments on commit f87857a

Please sign in to comment.