Skip to content

Commit

Permalink
Merge pull request #46 from douglaz/configurable_workers
Browse files Browse the repository at this point in the history
Configurable workers
  • Loading branch information
pwendell committed May 3, 2014
2 parents df2d208 + 65f0cfe commit 695816e
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 0 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ and can be used to install any pre-requisites.
{{mapred_local_dirs}}
{{spark_local_dirs}}
{{default_spark_mem}}
{{spark_worker_instances}}
{{spark_worker_cores}}
{{spark_master_opts}}
You can add new variables by modifying `deploy_templates.py`

Expand Down
14 changes: 14 additions & 0 deletions deploy_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

# Find system memory in KB and compute Spark's default limit from that
mem_command = "cat /proc/meminfo | grep MemTotal | awk '{print $2}'"
cpu_command = "nproc"

master_ram_kb = int(
os.popen(mem_command).read().strip())
Expand All @@ -20,8 +21,14 @@

slave_mem_command = "ssh -t -o StrictHostKeyChecking=no %s %s" %\
(first_slave, mem_command)

slave_cpu_command = "ssh -t -o StrictHostKeyChecking=no %s %s" %\
(first_slave, cpu_command)

slave_ram_kb = int(os.popen(slave_mem_command).read().strip())

slave_cpus = int(os.popen(slave_cpu_command).read().strip())

system_ram_kb = min(slave_ram_kb, master_ram_kb)

system_ram_mb = system_ram_kb / 1024
Expand All @@ -42,6 +49,10 @@
# Make tachyon_mb as spark_mb for now.
tachyon_mb = spark_mb

worker_instances = int(os.getenv("SPARK_WORKER_INSTANCES", 1))
# Distribute equally cpu cores among worker instances
worker_cores = max(slave_cpus / worker_instances, 1)

template_vars = {
"master_list": os.getenv("MASTERS"),
"active_master": os.getenv("MASTERS").split("\n")[0],
Expand All @@ -50,6 +61,9 @@
"mapred_local_dirs": os.getenv("MAPRED_LOCAL_DIRS"),
"spark_local_dirs": os.getenv("SPARK_LOCAL_DIRS"),
"default_spark_mem": "%dm" % spark_mb,
"spark_worker_instances": "%d" % worker_instances,
"spark_worker_cores": "%d" % worker_cores,
"spark_master_opts": os.getenv("SPARK_MASTER_OPTS"),
"spark_version": os.getenv("SPARK_VERSION"),
"shark_version": os.getenv("SHARK_VERSION"),
"hadoop_major_version": os.getenv("HADOOP_MAJOR_VERSION"),
Expand Down
5 changes: 5 additions & 0 deletions templates/root/spark/conf/spark-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,17 @@ export SPARK_MEM={{default_spark_mem}}
SPARK_JAVA_OPTS+=" -Dspark.local.dir={{spark_local_dirs}}"
export SPARK_JAVA_OPTS

export SPARK_MASTER_OPTS="{{spark_master_opts}}"

export HADOOP_HOME="/root/ephemeral-hdfs"
export SPARK_LIBRARY_PATH="/root/ephemeral-hdfs/lib/native/"
export SPARK_MASTER_IP={{active_master}}
export MASTER=`cat /root/spark-ec2/cluster-url`
export SPARK_CLASSPATH=$SPARK_CLASSPATH":/root/ephemeral-hdfs/conf"

export SPARK_WORKER_INSTANCES={{spark_worker_instances}}
export SPARK_WORKER_CORES={{spark_worker_cores}}

# Bind Spark's web UIs to this machine's public EC2 hostname:
export SPARK_PUBLIC_DNS=`wget -q -O - http://169.254.169.254/latest/meta-data/public-hostname`

Expand Down

0 comments on commit 695816e

Please sign in to comment.