Skip to content

Commit

Permalink
when possible, run SQL tests only
Browse files Browse the repository at this point in the history
Includes some cosmetic/maintainability refactoring.
  • Loading branch information
nchammas committed Sep 17, 2014
1 parent da33acb commit f9e23f6
Showing 1 changed file with 106 additions and 50 deletions.
156 changes: 106 additions & 50 deletions dev/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -21,44 +21,73 @@
FWDIR="$(cd "`dirname $0`"/..; pwd)"
cd "$FWDIR"

if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then
if [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop1.0" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=1.0.4"
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.0" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=2.0.0-mr1-cdh4.1.1"
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.2" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Dhadoop.version=2.2.0"
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.3" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
# Remove work directory
rm -rf ./work

# Build against the right verison of Hadoop.
{
if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then
if [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop1.0" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=1.0.4"
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.0" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=2.0.0-mr1-cdh4.1.1"
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.2" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Dhadoop.version=2.2.0"
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.3" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
fi
fi
fi

if [ -z "$SBT_MAVEN_PROFILES_ARGS" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
fi
if [ -z "$SBT_MAVEN_PROFILES_ARGS" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
fi
}

export SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Pkinesis-asl"

echo "SBT_MAVEN_PROFILES_ARGS=\"$SBT_MAVEN_PROFILES_ARGS\""

# Remove work directory
rm -rf ./work

if test -x "$JAVA_HOME/bin/java"; then
declare java_cmd="$JAVA_HOME/bin/java"
else
declare java_cmd=java
fi
JAVA_VERSION=$($java_cmd -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
[ "$JAVA_VERSION" -ge 18 ] && echo "" || echo "[Warn] Java 8 tests will not run because JDK version is < 1.8."
# Determine Java path and version.
{
if test -x "$JAVA_HOME/bin/java"; then
declare java_cmd="$JAVA_HOME/bin/java"
else
declare java_cmd=java
fi

# We can't use sed -r -e due to OS X / BSD compatibility; hence, all the parentheses.
JAVA_VERSION=$(
$java_cmd -version 2>&1 \
| grep -e "^java version" --max-count=1 \
| sed "s/java version \"\(.*\)\.\(.*\)\.\(.*\)\"/\1\2/"
)

if [ "$JAVA_VERSION" -lt 18 ]; then
echo "[warn] Java 8 tests will not run because JDK version is < 1.8."
fi
}

# Partial solution for SPARK-1455. Only run Hive tests if there are sql changes.
# Only run Hive tests if there are sql changes.
# Partial solution for SPARK-1455.
if [ -n "$AMPLAB_JENKINS" ]; then
git fetch origin master:master
diffs=`git diff --name-only master | grep "^\(sql/\)\|\(bin/spark-sql\)\|\(sbin/start-thriftserver.sh\)"`
if [ -n "$diffs" ]; then
echo "Detected changes in SQL. Will run Hive test suite."

sql_diffs=$(
git diff --name-only selective-testing \
| grep -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
)

non_sql_diffs=$(
git diff --name-only selective-testing \
| grep -v -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
)

if [ -n "$sql_diffs" ]; then
echo "[info] Detected changes in SQL. Will run Hive test suite."
_RUN_SQL_TESTS=true

if [ -z "$non_sql_diffs" ]; then
echo "[info] Detected no changes except in SQL. Will only run SQL tests."
_SQL_TESTS_ONLY=true
fi
fi
fi

Expand All @@ -70,42 +99,69 @@ echo ""
echo "========================================================================="
echo "Running Apache RAT checks"
echo "========================================================================="
dev/check-license
./dev/check-license

echo ""
echo "========================================================================="
echo "Running Scala style checks"
echo "========================================================================="
dev/lint-scala
./dev/lint-scala

echo ""
echo "========================================================================="
echo "Running Python style checks"
echo "========================================================================="
dev/lint-python
./dev/lint-python

echo ""
echo "========================================================================="
echo "Building Spark"
echo "========================================================================="

{
# We always build with Hive because the PySpark Spark SQL tests need it.
BUILD_MVN_PROFILE_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"

echo "[info] Building Spark with these arguments: $BUILD_MVN_PROFILE_ARGS"

# NOTE: echo "q" is needed because sbt on encountering a build file with failure
#+ (either resolution or compilation) prompts the user for input either q, r, etc
#+ to quit or retry. This echo is there to make it not block.
# QUESTION: Why doesn't 'yes "q"' work?
# QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
echo -e "q\n" \
| sbt/sbt $BUILD_MVN_PROFILE_ARGS clean package assembly/assembly \
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
}

echo ""
echo "========================================================================="
echo "Running Spark unit tests"
echo "========================================================================="

# Build Spark; we always build with Hive because the PySpark Spark SQL tests need it.
# echo "q" is needed because sbt on encountering a build file with failure
# (either resolution or compilation) prompts the user for input either q, r,
# etc to quit or retry. This echo is there to make it not block.
BUILD_MVN_PROFILE_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive "
echo -e "q\n" | sbt/sbt $BUILD_MVN_PROFILE_ARGS clean package assembly/assembly | \
grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"

# If the Spark SQL tests are enabled, run the tests with the Hive profiles enabled:
if [ -n "$_RUN_SQL_TESTS" ]; then
SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"
fi
# echo "q" is needed because sbt on encountering a build file with failure
# (either resolution or compilation) prompts the user for input either q, r,
# etc to quit or retry. This echo is there to make it not block.
echo -e "q\n" | sbt/sbt $SBT_MAVEN_PROFILES_ARGS test | \
grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
{
# If the Spark SQL tests are enabled, run the tests with the Hive profiles enabled.
if [ -n "$_RUN_SQL_TESTS" ]; then
SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"
fi

if [ -n "$_SQL_TESTS_ONLY" ]; then
SBT_MAVEN_TEST_ARGS="catalyst/test sql/test hive/test"
else
SBT_MAVEN_TEST_ARGS="test"
fi

echo "[info] Running Spark tests with these arguments: $SBT_MAVEN_PROFILES_ARGS $SBT_MAVEN_TEST_ARGS"

# NOTE: echo "q" is needed because sbt on encountering a build file with failure
#+ (either resolution or compilation) prompts the user for input either q, r, etc
#+ to quit or retry. This echo is there to make it not block.
# QUESTION: Why doesn't 'yes "q"' work?
# QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
echo -e "q\n" \
| sbt/sbt "$SBT_MAVEN_PROFILES_ARGS" "$SBT_MAVEN_TEST_ARGS" \
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
}

echo ""
echo "========================================================================="
Expand All @@ -117,4 +173,4 @@ echo ""
echo "========================================================================="
echo "Detecting binary incompatibilites with MiMa"
echo "========================================================================="
dev/mima
./dev/mima

0 comments on commit f9e23f6

Please sign in to comment.