This repository has been archived by the owner on Oct 24, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
mnc-demo-batch.sh
executable file
·68 lines (54 loc) · 1.98 KB
/
mnc-demo-batch.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env bash
# Utility for running the MNC demo on ETHZ's Euryale mini-cluster as a Slurm
# batch job.
#
#
function fail {
LAST_ERR="$?"
echo >&2 "Failed to set up Caffe: $1"
exit $LAST_ERR
}
tstamp="$(date '+%D %T')"
hn="$(hostname -f)"
jobid=${SLURM_JOB_ID}
jobname=${SLURM_JOB_NAME}
if [ -z "${jobid}" ] ; then
echo "ERROR: SLURM_JOBID undefined, are you running this script directly ?"
exit 1
fi
printf "%s: starting %s(%s) on host %s\n" "${tstamp}" "${jobname}" "${jobid}" "${hn}"
echo "**"
echo "** SLURM_CLUSTER_NAME="$SLURM_CLUSTER_NAME
echo "** SLURM_JOB_NAME="$SLURM_JOB_NAME
echo "** SLURM_JOB_ID="$SLURM_JOBID
echo "** SLURM_JOB_NODELIST"=$SLURM_JOB_NODELIST
echo "** SLURM_NUM_NODES"=$SLURM_NUM_NODES
echo "** SLURMTMPDIR="$SLURMTMPDIR
echo "** working directory = "$SLURM_SUBMIT_DIR
echo
echo "Setting up modules and miniconda..."
# TODO(andrei): Common config with CUDA/cuDNN/openCV versions.
CUDA_VERSION="8.0.27"
WORKDIR=~/work
module load cuda/"${CUDA_VERSION}" || fail 'Could not load CUDA module.'
module load cudnn/v5 || fail 'Could not load CUDNN module (v4).'
module load opencv/3.1.0 || fail 'Could not load OpenCV module (v3.1.0)'
module load boost/1.62.0 || fail 'Could not load boost module (v1.62.0).'
module load mpich || fail 'Could not load mpi module.'
if ! which conda >/dev/null 2>&1; then
# Ensure conda is on the PATH.
export PATH="${HOME}/miniconda/bin:${PATH}"
fi
source activate mnc
# Mini hack to get OpenCV work even though it expects CUDA 7.5. Caffe itself
# will use CUDA 8, but OpenCV won't complain either.
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/site/opt/cuda/7.5.18/x64/lib64"
echo "Setup OK. srun-ing MNC demo..."
cd "${WORKDIR}/MNC"
dt="$(date '+%s')"
# This is where the useful stuff actually happens.
srun tools/demo.py "$@" 2>&1
# We're done. Report some misc info and exit.
stat="$?"
dt=$(( `date '+%s'` - ${dt} ))
echo "Job finished. Status=$stat, duration=$dt second(s)."