From 8e1f6d83162abe57a9abec7fb92dc9c5f6ed9435 Mon Sep 17 00:00:00 2001 From: "Allan Douglas R. de Oliveira" Date: Sun, 13 Apr 2014 21:10:09 -0300 Subject: [PATCH 1/4] Added SPARK_WORKER_INSTANCES and SPARK_WORKER_CORES support --- README.md | 2 ++ deploy_templates.py | 13 +++++++++++++ templates/root/spark/conf/spark-env.sh | 3 +++ 3 files changed, 18 insertions(+) diff --git a/README.md b/README.md index 11d2fb2..df277b4 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,8 @@ and can be used to install any pre-requisites. {{mapred_local_dirs}} {{spark_local_dirs}} {{default_spark_mem}} + {{spark_worker_instances}} + {{spark_worker_cores}} You can add new variables by modifying `deploy_templates.py` diff --git a/deploy_templates.py b/deploy_templates.py index a2840f7..379a984 100755 --- a/deploy_templates.py +++ b/deploy_templates.py @@ -12,6 +12,7 @@ # Find system memory in KB and compute Spark's default limit from that mem_command = "cat /proc/meminfo | grep MemTotal | awk '{print $2}'" +cpu_command = "nproc" master_ram_kb = int( os.popen(mem_command).read().strip()) @@ -20,8 +21,14 @@ slave_mem_command = "ssh -t -o StrictHostKeyChecking=no %s %s" %\ (first_slave, mem_command) + +slave_cpu_command = "ssh -t -o StrictHostKeyChecking=no %s %s" %\ + (first_slave, cpu_command) + slave_ram_kb = int(os.popen(slave_mem_command).read().strip()) +slave_cpus = int(os.popen(slave_cpu_command).read().strip()) + system_ram_kb = min(slave_ram_kb, master_ram_kb) system_ram_mb = system_ram_kb / 1024 @@ -42,6 +49,10 @@ # Make tachyon_mb as spark_mb for now. tachyon_mb = spark_mb +worker_instances = int(os.getenv("SPARK_WORKER_INSTANCES", 1)) +# Distribute equally cpu cores among worker instances +worker_cores = slave_cpus / worker_instances + template_vars = { "master_list": os.getenv("MASTERS"), "active_master": os.getenv("MASTERS").split("\n")[0], @@ -50,6 +61,8 @@ "mapred_local_dirs": os.getenv("MAPRED_LOCAL_DIRS"), "spark_local_dirs": os.getenv("SPARK_LOCAL_DIRS"), "default_spark_mem": "%dm" % spark_mb, + "spark_worker_instances": "%d" % worker_instances, + "spark_worker_cores": "%d" % worker_cores, "spark_version": os.getenv("SPARK_VERSION"), "shark_version": os.getenv("SHARK_VERSION"), "hadoop_major_version": os.getenv("HADOOP_MAJOR_VERSION"), diff --git a/templates/root/spark/conf/spark-env.sh b/templates/root/spark/conf/spark-env.sh index d5077ea..f2a82ac 100755 --- a/templates/root/spark/conf/spark-env.sh +++ b/templates/root/spark/conf/spark-env.sh @@ -26,6 +26,9 @@ export SPARK_MASTER_IP={{active_master}} export MASTER=`cat /root/spark-ec2/cluster-url` export SPARK_CLASSPATH=$SPARK_CLASSPATH":/root/ephemeral-hdfs/conf" +export SPARK_WORKER_INSTANCES={{spark_worker_instances}} +export SPARK_WORKER_CORES={{spark_worker_cores}} + # Bind Spark's web UIs to this machine's public EC2 hostname: export SPARK_PUBLIC_DNS=`wget -q -O - http://169.254.169.254/latest/meta-data/public-hostname` From c8e3f22ad268c17bbd53e636cc826c6059ed44cb Mon Sep 17 00:00:00 2001 From: "Allan Douglas R. de Oliveira" Date: Thu, 1 May 2014 12:20:20 -0300 Subject: [PATCH 2/4] Added SPARK_MASTER_OPTS configuration --- deploy_templates.py | 1 + templates/root/spark/conf/spark-env.sh | 2 ++ 2 files changed, 3 insertions(+) diff --git a/deploy_templates.py b/deploy_templates.py index 379a984..6b1ec29 100755 --- a/deploy_templates.py +++ b/deploy_templates.py @@ -63,6 +63,7 @@ "default_spark_mem": "%dm" % spark_mb, "spark_worker_instances": "%d" % worker_instances, "spark_worker_cores": "%d" % worker_cores, + "spark_master_opts": os.getenv("SPARK_MASTER_OPTS"), "spark_version": os.getenv("SPARK_VERSION"), "shark_version": os.getenv("SHARK_VERSION"), "hadoop_major_version": os.getenv("HADOOP_MAJOR_VERSION"), diff --git a/templates/root/spark/conf/spark-env.sh b/templates/root/spark/conf/spark-env.sh index f2a82ac..d5ecc89 100755 --- a/templates/root/spark/conf/spark-env.sh +++ b/templates/root/spark/conf/spark-env.sh @@ -20,6 +20,8 @@ export SPARK_MEM={{default_spark_mem}} SPARK_JAVA_OPTS+=" -Dspark.local.dir={{spark_local_dirs}}" export SPARK_JAVA_OPTS +export SPARK_MASTER_OPTS="{{spark_master_opts}}" + export HADOOP_HOME="/root/ephemeral-hdfs" export SPARK_LIBRARY_PATH="/root/ephemeral-hdfs/lib/native/" export SPARK_MASTER_IP={{active_master}} From 149c7671d13aee354c1cd790aacb0256a4909980 Mon Sep 17 00:00:00 2001 From: "Allan Douglas R. de Oliveira" Date: Thu, 1 May 2014 13:04:48 -0300 Subject: [PATCH 3/4] Added spark_master_opts to README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index df277b4..2c707af 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ and can be used to install any pre-requisites. {{default_spark_mem}} {{spark_worker_instances}} {{spark_worker_cores}} + {{spark_master_opts}} You can add new variables by modifying `deploy_templates.py` From 65f0cfe7a72539ba60d986364a4cdc0484309e79 Mon Sep 17 00:00:00 2001 From: "Allan Douglas R. de Oliveira" Date: Sat, 3 May 2014 18:26:48 -0300 Subject: [PATCH 4/4] Avoid workers with no cores when over-subscribing workers --- deploy_templates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy_templates.py b/deploy_templates.py index 6b1ec29..99b5568 100755 --- a/deploy_templates.py +++ b/deploy_templates.py @@ -51,7 +51,7 @@ worker_instances = int(os.getenv("SPARK_WORKER_INSTANCES", 1)) # Distribute equally cpu cores among worker instances -worker_cores = slave_cpus / worker_instances +worker_cores = max(slave_cpus / worker_instances, 1) template_vars = { "master_list": os.getenv("MASTERS"),