forked from dotnet/spark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
install-worker.sh
84 lines (67 loc) · 2.84 KB
/
install-worker.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/bin/bash
##############################################################################
# Description:
# This is a helper script to install the worker binaries on your Apache Spark cluster
#
# Usage:
# ./install-worker.sh <release-provider> <path-to-worker-release> <local-worker-installation-path>
#
# Sample usage:
# ./install-worker.sh
# github
# https://github.com/dotnet/spark/releases/download/v0.1.0/Microsoft.Spark.Worker.netcoreapp2.1.linux-x64-0.1.0.tar.gz
# /usr/local/bin
#
# or if you have your Worker release on filesystem like ABFS, here's how the path would
# look like:
# ./install-worker.sh
# azure
# abfs://<blobcontainer>@<gen2storageaccount>.dfs.core.windows.net/<path>/Microsoft.Spark.Worker.netcoreapp2.1.linux-x64-0.1.0.tar.gz
# /usr/local/bin
#
##############################################################################
set +e
# Uncomment if you want full tracing (for debugging purposes)
#set -o xtrace
# Cloud Provider
CLOUD_PROVIDER=$1
# Path where packaged worker file (tgz) exists.
SRC_WORKER_PATH_OR_URI=$2
# The path on the executor nodes where Microsoft.Spark.Worker executable is installed.
WORKER_INSTALLATION_PATH=$3
# The path where all the dependent libraies are installed so that it doesn't
# pollute the $WORKER_INSTALLATION_PATH.
SPARKDOTNET_ROOT=$WORKER_INSTALLATION_PATH/spark-dotnet
# Temporary worker file.
TEMP_WORKER_FILENAME=/tmp/temp_worker.tgz
# Extract version
IFS='-' read -ra BASE_FILENAME <<< "$(basename $SRC_WORKER_PATH_OR_URI .tar.gz)"
VERSION=${BASE_FILENAME[2]}
IFS='.' read -ra VERSION_CHECK <<< "$VERSION"
[[ ${#VERSION_CHECK[@]} == 3 ]] || { echo >&2 "Version check does not satisfy. Raise an issue here: https://github.com/dotnet/spark"; exit 1; }
# Path of the final destination for the worker binaries
# (the one we just downloaded and extracted)
DEST_WORKER_PATH=$SPARKDOTNET_ROOT/Microsoft.Spark.Worker-$VERSION
DEST_WORKER_BINARY=$DEST_WORKER_PATH/Microsoft.Spark.Worker
# Clean up any existing files.
sudo rm -f $WORKER_INSTALLATION_PATH/Microsoft.Spark.Worker
sudo rm -rf $SPARKDOTNET_ROOT
# Copy the worker file to a local temporary file.
if [ $"${CLOUD_PROVIDER,,}" = "github" ]; then
wget $SRC_WORKER_PATH_OR_URI -O $TEMP_WORKER_FILENAME
elif [ "${CLOUD_PROVIDER,,}" = "azure" ]; then
hdfs dfs -get $SRC_WORKER_PATH_OR_URI $TEMP_WORKER_FILENAME
elif [ "${CLOUD_PROVIDER,,}" = "aws" ]; then
aws s3 cp $SRC_WORKER_PATH_OR_URI $TEMP_WORKER_FILENAME
else
cp -f $SRC_WORKER_PATH_OR_URI $TEMP_WORKER_FILENAME
fi
# Untar the file.
sudo mkdir -p $SPARKDOTNET_ROOT
sudo tar xzf $TEMP_WORKER_FILENAME -C $SPARKDOTNET_ROOT
# Make the file executable since dotnet doesn't set this correctly.
sudo chmod 755 $DEST_WORKER_BINARY
# Create a symlink.
sudo ln -sf $DEST_WORKER_BINARY $WORKER_INSTALLATION_PATH/Microsoft.Spark.Worker
# Remove the temporary worker file.
sudo rm $TEMP_WORKER_FILENAME