Skip to content

Commit

Permalink
merge master
Browse files Browse the repository at this point in the history
  • Loading branch information
witgo committed Jun 5, 2014
2 parents 3771474 + abea2d4 commit 51fb3d6
Show file tree
Hide file tree
Showing 244 changed files with 11,066 additions and 4,354 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
sbt/*.jar
.settings
.cache
.mima-excludes
.generated-mima-excludes
/build/
work/
out/
Expand Down
1 change: 1 addition & 0 deletions .rat-excludes
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ target
.project
.classpath
.mima-excludes
.generated-mima-excludes
.rat-excludes
.*md
derby.log
Expand Down
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@ You can find the latest Spark documentation, including a programming
guide, on the project webpage at <http://spark.apache.org/documentation.html>.
This README file only contains basic setup instructions.


## Building Spark

Spark is built on Scala 2.10. To build Spark and its example programs, run:

./sbt/sbt assembly

(You do not need to do this if you downloaded a pre-built package.)

## Interactive Scala Shell

The easiest way to start using Spark is through the Scala shell:
Expand All @@ -41,9 +42,9 @@ And run the following command, which should also return 1000:
Spark also comes with several sample programs in the `examples` directory.
To run one of them, use `./bin/run-example <class> [params]`. For example:

./bin/run-example org.apache.spark.examples.SparkLR
./bin/run-example SparkPi

will run the Logistic Regression example locally.
will run the Pi example locally.

You can set the MASTER environment variable when running examples to submit
examples to a cluster. This can be a mesos:// or spark:// URL,
Expand Down
2 changes: 1 addition & 1 deletion assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>org.datanucleus:*</exclude>
<exclude>org/datanucleus/**</exclude>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
Expand Down
2 changes: 0 additions & 2 deletions bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo
sc.stop()
sc = null
}
// To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
System.clearProperty("spark.driver.port")
}

test("halting by voting") {
Expand Down
24 changes: 23 additions & 1 deletion bin/compute-classpath.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ rem
rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
rem script and the ExecutorRunner in standalone cluster mode.

rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
rem need to set it here because we use !datanucleus_jars! below.
if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
setlocal enabledelayedexpansion
:skip_delayed_expansion

set SCALA_VERSION=2.10

rem Figure out where the Spark framework is installed
Expand All @@ -31,7 +38,7 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
rem Build up classpath
set CLASSPATH=%FWDIR%conf
if exist "%FWDIR%RELEASE" (
for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
set ASSEMBLY_JAR=%%d
)
) else (
Expand All @@ -42,6 +49,21 @@ if exist "%FWDIR%RELEASE" (

set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%

rem When Hive support is needed, Datanucleus jars must be included on the classpath.
rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
rem built with Hive, so look for them there.
if exist "%FWDIR%RELEASE" (
set datanucleus_dir=%FWDIR%lib
) else (
set datanucleus_dir=%FWDIR%lib_managed\jars
)
set "datanucleus_jars="
for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
set datanucleus_jars=!datanucleus_jars!;%%d
)
set CLASSPATH=%CLASSPATH%;%datanucleus_jars%

set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
Expand Down
37 changes: 32 additions & 5 deletions bin/pyspark
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,20 @@
# limitations under the License.
#

# Figure out where the Scala framework is installed
# Figure out where Spark is installed
FWDIR="$(cd `dirname $0`/..; pwd)"

# Export this as SPARK_HOME
export SPARK_HOME="$FWDIR"

SCALA_VERSION=2.10

if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
echo "Usage: ./bin/pyspark [options]"
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
exit 0
fi

# Exit if the user hasn't compiled Spark
if [ ! -f "$FWDIR/RELEASE" ]; then
# Exit if the user hasn't compiled Spark
Expand Down Expand Up @@ -52,13 +58,34 @@ export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.1-src.zip:$PYTHONPATH
export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py

# If IPython options are specified, assume user wants to run IPython
if [ -n "$IPYTHON_OPTS" ]; then
IPYTHON=1
fi

# Only use ipython if no command line arguments were provided [SPARK-1134]
if [[ "$IPYTHON" = "1" && $# = 0 ]] ; then
exec ipython $IPYTHON_OPTS
# Build up arguments list manually to preserve quotes and backslashes.
# We export Spark submit arguments as an environment variable because shell.py must run as a
# PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.

PYSPARK_SUBMIT_ARGS=""
whitespace="[[:space:]]"
for i in "$@"; do
if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
done
export PYSPARK_SUBMIT_ARGS

# If a python file is provided, directly run spark-submit.
if [[ "$1" =~ \.py$ ]]; then
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
exec $FWDIR/bin/spark-submit "$@"
else
exec "$PYSPARK_PYTHON" "$@"
# Only use ipython if no command line arguments were provided [SPARK-1134]
if [[ "$IPYTHON" = "1" ]]; then
exec ipython $IPYTHON_OPTS
else
exec "$PYSPARK_PYTHON"
fi
fi
21 changes: 18 additions & 3 deletions bin/pyspark2.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ set FOUND_JAR=0
for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
set FOUND_JAR=1
)
if "%FOUND_JAR%"=="0" (
if [%FOUND_JAR%] == [0] (
echo Failed to find Spark assembly JAR.
echo You need to build Spark with sbt\sbt assembly before running this program.
goto exit
Expand All @@ -42,15 +42,30 @@ rem Load environment variables from conf\spark-env.cmd, if it exists
if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"

rem Figure out which Python to use.
if "x%PYSPARK_PYTHON%"=="x" set PYSPARK_PYTHON=python
if [%PYSPARK_PYTHON%] == [] set PYSPARK_PYTHON=python

set PYTHONPATH=%FWDIR%python;%PYTHONPATH%
set PYTHONPATH=%FWDIR%python\lib\py4j-0.8.1-src.zip;%PYTHONPATH%

set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
set PYTHONSTARTUP=%FWDIR%python\pyspark\shell.py
set PYSPARK_SUBMIT_ARGS=%*

echo Running %PYSPARK_PYTHON% with PYTHONPATH=%PYTHONPATH%

"%PYSPARK_PYTHON%" %*
rem Check whether the argument is a file
for /f %%i in ('echo %1^| findstr /R "\.py"') do (
set PYTHON_FILE=%%i
)

if [%PYTHON_FILE%] == [] (
%PYSPARK_PYTHON%
) else (
echo.
echo WARNING: Running python applications through ./bin/pyspark.cmd is deprecated as of Spark 1.0.
echo Use ./bin/spark-submit ^<python file^>
echo.
"%FWDIR%\bin\spark-submit.cmd" %PYSPARK_SUBMIT_ARGS%
)

:exit
23 changes: 11 additions & 12 deletions bin/run-example
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
export SPARK_HOME="$FWDIR"
EXAMPLES_DIR="$FWDIR"/examples

if [ -n "$1" ]; then
EXAMPLE_CLASS="$1"
shift
else
echo "Usage: ./bin/run-example <example-class> [example-args]"
echo " - set MASTER=XX to use a specific master"
echo " - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)"
exit 1
fi

if [ -f "$FWDIR/RELEASE" ]; then
export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
Expand All @@ -37,23 +47,12 @@ fi

EXAMPLE_MASTER=${MASTER:-"local[*]"}

if [ -n "$1" ]; then
EXAMPLE_CLASS="$1"
shift
else
echo "usage: ./bin/run-example <example-class> [example-args]"
echo " - set MASTER=XX to use a specific master"
echo " - can use abbreviated example class name (e.g. SparkPi, mllib.MovieLensALS)"
echo
exit -1
fi

if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then
EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS"
fi

./bin/spark-submit \
--master $EXAMPLE_MASTER \
--class $EXAMPLE_CLASS \
$SPARK_EXAMPLES_JAR \
"$SPARK_EXAMPLES_JAR" \
"$@"
51 changes: 39 additions & 12 deletions bin/run-example2.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -30,32 +30,59 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"

rem Test that an argument was given
if not "x%1"=="x" goto arg_given
echo Usage: run-example ^<example-class^> [^<args^>]
echo Usage: run-example ^<example-class^> [example-args]
echo - set MASTER=XX to use a specific master
echo - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)
goto exit
:arg_given

set EXAMPLES_DIR=%FWDIR%examples

rem Figure out the JAR file that our examples were packaged into.
set SPARK_EXAMPLES_JAR=
for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*assembly*.jar") do (
set SPARK_EXAMPLES_JAR=%%d
if exist "%FWDIR%RELEASE" (
for %%d in ("%FWDIR%lib\spark-examples*.jar") do (
set SPARK_EXAMPLES_JAR=%%d
)
) else (
for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*.jar") do (
set SPARK_EXAMPLES_JAR=%%d
)
)
if "x%SPARK_EXAMPLES_JAR%"=="x" (
echo Failed to find Spark examples assembly JAR.
echo You need to build Spark with sbt\sbt assembly before running this program.
goto exit
)

rem Compute Spark classpath using external script
set DONT_PRINT_CLASSPATH=1
call "%FWDIR%bin\compute-classpath.cmd"
set DONT_PRINT_CLASSPATH=0
set CLASSPATH=%SPARK_EXAMPLES_JAR%;%CLASSPATH%
rem Set master from MASTER environment variable if given
if "x%MASTER%"=="x" (
set EXAMPLE_MASTER=local[*]
) else (
set EXAMPLE_MASTER=%MASTER%
)

rem If the EXAMPLE_CLASS does not start with org.apache.spark.examples, add that
set EXAMPLE_CLASS=%1
set PREFIX=%EXAMPLE_CLASS:~0,25%
if not %PREFIX%==org.apache.spark.examples (
set EXAMPLE_CLASS=org.apache.spark.examples.%EXAMPLE_CLASS%
)

rem Get the tail of the argument list, to skip the first one. This is surprisingly
rem complicated on Windows.
set "ARGS="
:top
shift
if "%~1" neq "" (
set ARGS=%ARGS% "%~1"
goto :top
)
if defined ARGS set ARGS=%ARGS:~1%

rem Figure out where java is.
set RUNNER=java
if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
call "%FWDIR%bin\spark-submit.cmd" ^
--master %EXAMPLE_MASTER% ^
--class %EXAMPLE_CLASS% ^
"%SPARK_EXAMPLES_JAR%" %ARGS%

"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %*
:exit
6 changes: 3 additions & 3 deletions bin/spark-class
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ esac

SCALA_VERSION=2.10

# Figure out where the Scala framework is installed
# Figure out where Spark is installed
FWDIR="$(cd `dirname $0`/..; pwd)"

# Export this as SPARK_HOME
Expand Down Expand Up @@ -99,14 +99,14 @@ else
fi

# Set JAVA_OPTS to be able to load native libraries and to set heap size
JAVA_OPTS="$OUR_JAVA_OPTS"
JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS"
JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
# Load extra JAVA_OPTS from conf/java-opts, if it exists
if [ -e "$FWDIR/conf/java-opts" ] ; then
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
fi
export JAVA_OPTS
# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala!
# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!

if [ ! -f "$FWDIR/RELEASE" ]; then
# Exit if the user hasn't compiled Spark
Expand Down
6 changes: 4 additions & 2 deletions bin/spark-class2.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ rem See the License for the specific language governing permissions and
rem limitations under the License.
rem

setlocal enabledelayedexpansion

set SCALA_VERSION=2.10

rem Figure out where the Spark framework is installed
Expand Down Expand Up @@ -75,8 +77,8 @@ rem All drivers use SPARK_JAVA_OPTS + SPARK_DRIVER_MEMORY. The repl also uses SP
)

rem Set JAVA_OPTS to be able to load native libraries and to set heap size
set JAVA_OPTS=%OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
rem Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala!
set JAVA_OPTS=-XX:MaxPermSize=128m %OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
rem Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!

rem Test whether the user has built Spark
if exist "%FWDIR%RELEASE" goto skip_build_test
Expand Down
Loading

0 comments on commit 51fb3d6

Please sign in to comment.