Skip to content

Commit

Permalink
Merge remote-tracking branch 'apache-github/master' into streaming-we…
Browse files Browse the repository at this point in the history
…b-ui
  • Loading branch information
tdas committed Apr 3, 2014
2 parents 53be2c5 + 92a86b2 commit 61358e3
Show file tree
Hide file tree
Showing 199 changed files with 6,796 additions and 1,477 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ spark-*-bin.tar.gz
unit-tests.log
/lib/
rat-results.txt
scalastyle.txt
226 changes: 168 additions & 58 deletions bin/spark-shell
Original file line number Diff line number Diff line change
Expand Up @@ -30,67 +30,189 @@ esac
# Enter posix mode for bash
set -o posix

CORE_PATTERN="^[0-9]+$"
MEM_PATTERN="^[0-9]+[m|g|M|G]$"

## Global script variables
FWDIR="$(cd `dirname $0`/..; pwd)"

if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then
echo "Usage: spark-shell [OPTIONS]"
echo "OPTIONS:"
echo "-c --cores num, the maximum number of cores to be used by the spark shell"
echo "-em --execmem num[m|g], the memory used by each executor of spark shell"
echo "-dm --drivermem num[m|g], the memory used by the spark shell and driver"
echo "-h --help, print this help information"
exit
fi
SPARK_REPL_OPTS="${SPARK_REPL_OPTS:-""}"
DEFAULT_MASTER="local"
MASTER=${MASTER:-""}

info_log=0

#CLI Color Templates
txtund=$(tput sgr 0 1) # Underline
txtbld=$(tput bold) # Bold
bldred=${txtbld}$(tput setaf 1) # red
bldyel=${txtbld}$(tput setaf 3) # yellow
bldblu=${txtbld}$(tput setaf 4) # blue
bldwht=${txtbld}$(tput setaf 7) # white
txtrst=$(tput sgr0) # Reset
info=${bldwht}*${txtrst} # Feedback
pass=${bldblu}*${txtrst}
warn=${bldred}*${txtrst}
ques=${bldblu}?${txtrst}

# Helper function to describe the script usage
function usage() {
cat << EOF
${txtbld}Usage${txtrst}: spark-shell [OPTIONS]
${txtbld}OPTIONS${txtrst}:
-h --help : Print this help information.
-c --cores : The maximum number of cores to be used by the Spark Shell.
-em --executor-memory : The memory used by each executor of the Spark Shell, the number
is followed by m for megabytes or g for gigabytes, e.g. "1g".
-dm --driver-memory : The memory used by the Spark Shell, the number is followed
by m for megabytes or g for gigabytes, e.g. "1g".
-m --master : A full string that describes the Spark Master, defaults to "local"
e.g. "spark://localhost:7077".
--log-conf : Enables logging of the supplied SparkConf as INFO at start of the
Spark Context.
e.g.
spark-shell -m spark://localhost:7077 -c 4 -dm 512m -em 2g
EOF
}

function out_error(){
echo -e "${txtund}${bldred}ERROR${txtrst}: $1"
usage
exit 1
}

function log_info(){
[ $info_log -eq 1 ] && echo -e "${bldyel}INFO${txtrst}: $1"
}

function log_warn(){
echo -e "${txtund}${bldyel}WARN${txtrst}: $1"
}

for o in "$@"; do
if [ "$1" = "-c" -o "$1" = "--cores" ]; then
shift
# PATTERNS used to validate more than one optional arg.
ARG_FLAG_PATTERN="^-"
MEM_PATTERN="^[0-9]+[m|g|M|G]$"
NUM_PATTERN="^[0-9]+$"
PORT_PATTERN="^[0-9]+$"

# Setters for optional args.
function set_cores(){
CORE_PATTERN="^[0-9]+$"
if [[ "$1" =~ $CORE_PATTERN ]]; then
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
shift
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
else
echo "ERROR: wrong format for -c/--cores"
exit 1
out_error "wrong format for $2"
fi
fi
if [ "$1" = "-em" -o "$1" = "--execmem" ]; then
shift
}

function set_em(){
if [[ $1 =~ $MEM_PATTERN ]]; then
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.executor.memory=$1"
shift
else
echo "ERROR: wrong format for --execmem/-em"
exit 1
out_error "wrong format for $2"
fi
fi
if [ "$1" = "-dm" -o "$1" = "--drivermem" ]; then
shift
}

function set_dm(){
if [[ $1 =~ $MEM_PATTERN ]]; then
export SPARK_DRIVER_MEMORY=$1
shift
else
echo "ERROR: wrong format for --drivermem/-dm"
exit 1
out_error "wrong format for $2"
fi
fi
done
}

function set_spark_log_conf(){
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.logConf=$1"
}

# Set MASTER from spark-env if possible
DEFAULT_SPARK_MASTER_PORT=7077
if [ -z "$MASTER" ]; then
. $FWDIR/bin/load-spark-env.sh
if [ "x" != "x$SPARK_MASTER_IP" ]; then
if [ "y" != "y$SPARK_MASTER_PORT" ]; then
SPARK_MASTER_PORT="${SPARK_MASTER_PORT}"
function set_spark_master(){
if ! [[ "$1" =~ $ARG_FLAG_PATTERN ]]; then
MASTER="$1"
else
SPARK_MASTER_PORT=$DEFAULT_SPARK_MASTER_PORT
out_error "wrong format for $2"
fi
}

function resolve_spark_master(){
# Set MASTER from spark-env if possible
DEFAULT_SPARK_MASTER_PORT=7077
if [ -z "$MASTER" ]; then
. $FWDIR/bin/load-spark-env.sh
if [ -n "$SPARK_MASTER_IP" ]; then
SPARK_MASTER_PORT="${SPARK_MASTER_PORT:-"$DEFAULT_SPARK_MASTER_PORT"}"
export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
fi
fi

if [ -z "$MASTER" ]; then
MASTER="$DEFAULT_MASTER"
fi
export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
fi
fi

}

function main(){
log_info "Base Directory set to $FWDIR"

resolve_spark_master
log_info "Spark Master is $MASTER"

log_info "Spark REPL options $SPARK_REPL_OPTS"
if $cygwin; then
# Workaround for issue involving JLine and Cygwin
# (see http://sourceforge.net/p/jline/bugs/40/).
# If you're using the Mintty terminal emulator in Cygwin, may need to set the
# "Backspace sends ^H" setting in "Keys" section of the Mintty options
# (see https://github.com/sbt/sbt/issues/562).
stty -icanon min 1 -echo > /dev/null 2>&1
export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
stty icanon echo > /dev/null 2>&1
else
export SPARK_REPL_OPTS
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
fi
}

for option in "$@"
do
case $option in
-h | --help )
usage
exit 1
;;
-c | --cores)
shift
_1=$1
shift
set_cores $_1 "-c/--cores"
;;
-em | --executor-memory)
shift
_1=$1
shift
set_em $_1 "-em/--executor-memory"
;;
-dm | --driver-memory)
shift
_1=$1
shift
set_dm $_1 "-dm/--driver-memory"
;;
-m | --master)
shift
_1=$1
shift
set_spark_master $_1 "-m/--master"
;;
--log-conf)
shift
set_spark_log_conf "true"
info_log=1
;;
?)
;;
esac
done

# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
# binary distribution of Spark where Scala is not installed
Expand Down Expand Up @@ -120,22 +242,10 @@ if [[ ! $? ]]; then
saved_stty=""
fi

if $cygwin; then
# Workaround for issue involving JLine and Cygwin
# (see http://sourceforge.net/p/jline/bugs/40/).
# If you're using the Mintty terminal emulator in Cygwin, may need to set the
# "Backspace sends ^H" setting in "Keys" section of the Mintty options
# (see https://github.com/sbt/sbt/issues/562).
stty -icanon min 1 -echo > /dev/null 2>&1
export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
stty icanon echo > /dev/null 2>&1
else
export SPARK_REPL_OPTS
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
fi
main

# record the exit status lest it be overwritten:
# then reenable echo and propagate the code.
exit_status=$?
onExit

38 changes: 38 additions & 0 deletions bin/spark-submit
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
ORIG_ARGS=$@

while (($#)); do
if [ $1 = "--deploy-mode" ]; then
DEPLOY_MODE=$2
elif [ $1 = "--driver-memory" ]; then
DRIVER_MEMORY=$2
fi

shift
done

if [ ! -z $DRIVER_MEMORY ] && [ ! -z $DEPLOY_MODE ] && [ $DEPLOY_MODE = "client" ]; then
export SPARK_MEM=$DRIVER_MEMORY
fi

$SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit $ORIG_ARGS

6 changes: 1 addition & 5 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,6 @@
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
Expand Down Expand Up @@ -150,7 +146,7 @@
<artifactId>json4s-jackson_${scala.binary.version}</artifactId>
<version>3.2.6</version>
<!-- see also exclusion for lift-json; this is necessary since it depends on
scala-library and scalap 2.10.0, but we use 2.10.3, and only override
scala-library and scalap 2.10.0, but we use 2.10.4, and only override
scala-library -->
<exclusions>
<exclusion>
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/SparkEnv.scala
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class SparkEnv private[spark] (
// Unfortunately Akka's awaitTermination doesn't actually wait for the Netty server to shut
// down, but let's call it anyway in case it gets fixed in a later release
// UPDATE: In Akka 2.1.x, this hangs if there are remote actors, so we can't call it.
//actorSystem.awaitTermination()
// actorSystem.awaitTermination()
}

private[spark]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ extends Logging {
private var initialized = false
private var conf: SparkConf = null
def initialize(_isDriver: Boolean, conf: SparkConf) {
TorrentBroadcast.conf = conf //TODO: we might have to fix it in tests
TorrentBroadcast.conf = conf // TODO: we might have to fix it in tests
synchronized {
if (!initialized) {
initialized = true
Expand Down
3 changes: 3 additions & 0 deletions core/src/main/scala/org/apache/spark/deploy/Client.scala
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ private class ClientActor(driverArgs: ClientArguments, conf: SparkConf) extends
*/
object Client {
def main(args: Array[String]) {
println("WARNING: This client is deprecated and will be removed in a future version of Spark.")
println("Use ./bin/spark-submit with \"--master spark://host:port\"")

val conf = new SparkConf()
val driverArgs = new ClientArguments(args)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ class LocalSparkCluster(numWorkers: Int, coresPerWorker: Int, memoryPerWorker: I
// TODO: In Akka 2.1.x, ActorSystem.awaitTermination hangs when you have remote actors!
// This is unfortunate, but for now we just comment it out.
workerActorSystems.foreach(_.shutdown())
//workerActorSystems.foreach(_.awaitTermination())
// workerActorSystems.foreach(_.awaitTermination())
masterActorSystems.foreach(_.shutdown())
//masterActorSystems.foreach(_.awaitTermination())
// masterActorSystems.foreach(_.awaitTermination())
masterActorSystems.clear()
workerActorSystems.clear()
}
Expand Down
Loading

0 comments on commit 61358e3

Please sign in to comment.