Skip to content

Commit

Permalink
SPARK-1167: Remove metrics-ganglia from default build due to LGPL iss…
Browse files Browse the repository at this point in the history
…ues.

This patch removes Ganglia integration from the default build. It
allows users willing to link against LGPL code to use Ganglia
by adding build flags or linking against a new Spark artifact called
spark-ganglia-lgpl.

This brings Spark in line with the Apache policy on LGPL code
enumerated here:

https://www.apache.org/legal/3party.html#options-optional
  • Loading branch information
pwendell committed Mar 9, 2014
1 parent e59a3b6 commit 5f28ee4
Show file tree
Hide file tree
Showing 12 changed files with 190 additions and 16 deletions.
10 changes: 10 additions & 0 deletions assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,16 @@
</dependency>
</dependencies>
</profile>
<profile>
<id>spark-ganglia-lgpl</id>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-ganglia-lgpl_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</profile>
<profile>
<id>bigtop-dist</id>
<!-- This profile uses the assembly plugin to create a special "dist" package for BigTop
Expand Down
4 changes: 0 additions & 4 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,6 @@
<groupId>com.codahale.metrics</groupId>
<artifactId>metrics-json</artifactId>
</dependency>
<dependency>
<groupId>com.codahale.metrics</groupId>
<artifactId>metrics-ganglia</artifactId>
</dependency>
<dependency>
<groupId>com.codahale.metrics</groupId>
<artifactId>metrics-graphite</artifactId>
Expand Down
11 changes: 11 additions & 0 deletions dev/audit-release/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Test Application Builds
This directory includes test applications which are built when auditing releases. You can
run them locally by setting appropriate environment variables.

```
$ cd sbt_app_core
$ SCALA_VERSION=2.10.3 \
SPARK_VERSION=1.0.0-SNAPSHOT \
SPARK_RELEASE_REPOSITORY=file:///home/patrick/.ivy2/local \
sbt run
```
15 changes: 14 additions & 1 deletion dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package main.scala

import scala.util.Try

import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._

Expand All @@ -31,6 +33,17 @@ object SimpleApp {
println("Failed to parse log files with Spark")
System.exit(-1)
}
println("Test succeeded")

// Regression test for SPARK-1167: Remove metrics-ganglia from default build due to LGPL issue
val foundConsole = Try(Class.forName("org.apache.spark.metrics.sink.ConsoleSink")).isSuccess
val foundGanglia = Try(Class.forName("org.apache.spark.metrics.sink.GangliaSink")).isSuccess
if (!foundConsole) {
println("Console sink not loaded via spark-core")
System.exit(-1)
}
if (foundGanglia) {
println("Ganglia sink was loaded via spark-core")
System.exit(-1)
}
}
}
31 changes: 31 additions & 0 deletions dev/audit-release/sbt_app_ganglia/build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

name := "Ganglia Test"

version := "1.0"

scalaVersion := System.getenv.get("SCALA_VERSION")

libraryDependencies += "org.apache.spark" %% "spark-core" % System.getenv.get("SPARK_VERSION")

libraryDependencies += "org.apache.spark" %% "spark-ganglia-lgpl" % System.getenv.get("SPARK_VERSION")

resolvers ++= Seq(
"Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
"Akka Repository" at "http://repo.akka.io/releases/",
"Spray Repository" at "http://repo.spray.cc/")
39 changes: 39 additions & 0 deletions dev/audit-release/sbt_app_ganglia/src/main/scala/SparkApp.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package main.scala

import scala.util.Try

import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._

object SimpleApp {
def main(args: Array[String]) {
// Regression test for SPARK-1167: Remove metrics-ganglia from default build due to LGPL issue
val foundConsole = Try(Class.forName("org.apache.spark.metrics.sink.ConsoleSink")).isSuccess
val foundGanglia = Try(Class.forName("org.apache.spark.metrics.sink.GangliaSink")).isSuccess
if (!foundConsole) {
println("Console sink not loaded via spark-core")
System.exit(-1)
}
if (!foundGanglia) {
println("Ganglia sink not loaded via spark-ganglia-lgpl")
System.exit(-1)
}
}
}
4 changes: 2 additions & 2 deletions dev/create-release/create-release.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@ mvn -DskipTests \
-Darguments="-DskipTests=true -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -Dgpg.passphrase=${GPG_PASSPHRASE}" \
-Dusername=$GIT_USERNAME -Dpassword=$GIT_PASSWORD \
-Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
-Pyarn \
-Pyarn -Pspark-ganglia-lgpl \
-Dtag=$GIT_TAG -DautoVersionSubmodules=true \
--batch-mode release:prepare

mvn -DskipTests \
-Darguments="-DskipTests=true -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -Dgpg.passphrase=${GPG_PASSPHRASE}" \
-Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
-Pyarn \
-Pyarn -Pspark-ganglia-lgpl\
release:perform

rm -rf spark
Expand Down
13 changes: 12 additions & 1 deletion docs/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,22 @@ Each instance can report to zero or more _sinks_. Sinks are contained in the

* `ConsoleSink`: Logs metrics information to the console.
* `CSVSink`: Exports metrics data to CSV files at regular intervals.
* `GangliaSink`: Sends metrics to a Ganglia node or multicast group.
* `JmxSink`: Registers metrics for viewing in a JXM console.
* `MetricsServlet`: Adds a servlet within the existing Spark UI to serve metrics data as JSON data.
* `GraphiteSink`: Sends metrics to a Graphite node.

Spark also supports a Ganglia sink which is not included in the default build due to
licensing restrictions:

* `GangliaSink`: Sends metrics to a Ganglia node or multicast group.

To install the `GangliaSink` you'll need to perform a custom build of Spark. _**Note that
by embedding this library you will include [LGPL](http://www.gnu.org/copyleft/lesser.html)-licensed
code in your Spark package**_. For sbt users, set the
`SPARK_GANGLIA_LGPL` environment varaible before building. For Maven users, enable
the `-Pspark-ganglia-lgpl` profile. For users linking applications against Spark, link
include the `spark-ganglia-lgpl` artifact as a dependency.

The syntax of the metrics configuration file is defined in an example configuration file,
`$SPARK_HOME/conf/metrics.properties.template`.

Expand Down
45 changes: 45 additions & 0 deletions extras/spark-ganglia-lgpl/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.spark</groupId>
<artifactId>spark-parent</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

<!-- Ganglia integration is not included by default due to LGPL-licensed code -->
<groupId>org.apache.spark</groupId>
<artifactId>spark-ganglia-lgpl_2.10</artifactId>
<packaging>jar</packaging>
<name>Spark Ganglia Integration</name>

<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>com.codahale.metrics</groupId>
<artifactId>metrics-ganglia</artifactId>
</dependency>
</dependencies>
</project>
9 changes: 8 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -725,12 +725,19 @@
<hadoop.version>0.23.7</hadoop.version>
<!--<hadoop.version>2.0.5-alpha</hadoop.version> -->
</properties>

<modules>
<module>yarn</module>
</modules>
</profile>

<!-- Ganglia integration is not included by default due to LGPL-licensed code -->
<profile>
<id>spark-ganglia-lgpl</id>
<modules>
<module>extras/spark-ganglia-lgpl</module>
</modules>
</profile>

<profile>
<id>java8-tests</id>
<build>
Expand Down
25 changes: 18 additions & 7 deletions project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ object SparkBuild extends Build {
lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn(core)

lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings)
.dependsOn(core, graphx, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*)
.dependsOn(core, graphx, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*) dependsOn(maybeGanglia: _*)

lazy val assembleDeps = TaskKey[Unit]("assemble-deps", "Build assembly of dependencies and packages Spark projects")

Expand All @@ -91,19 +91,26 @@ object SparkBuild extends Build {
lazy val hadoopClient = if (hadoopVersion.startsWith("0.20.") || hadoopVersion == "1.0.0") "hadoop-core" else "hadoop-client"
val maybeAvro = if (hadoopVersion.startsWith("0.23.") && isYarnEnabled) Seq("org.apache.avro" % "avro" % "1.7.4") else Seq()

// Conditionally include the java 8 sub-project
// Include Ganglia integration if the user has enabled Ganglia
// This is isolated from the normal build due to LGPL-licensed code in the library
lazy val isGangliaEnabled = Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined
lazy val gangliaProj = Project("spark-ganglia-lgpl", file("extras/spark-ganglia-lgpl"), settings = gangliaSettings).dependsOn(core)
val maybeGanglia: Seq[ClasspathDependency] = if (isGangliaEnabled) Seq(gangliaProj) else Seq()
val maybeGangliaRef: Seq[ProjectReference] = if (isGangliaEnabled) Seq(gangliaProj) else Seq()

// Include the Java 8 project if the JVM version is 8+
lazy val javaVersion = System.getProperty("java.specification.version")
lazy val isJava8Enabled = javaVersion.toDouble >= "1.8".toDouble
val maybeJava8Tests = if (isJava8Enabled) Seq[ProjectReference](java8Tests) else Seq[ProjectReference]()
lazy val java8Tests = Project("java8-tests", file("extras/java8-tests"), settings = java8TestsSettings).
dependsOn(core) dependsOn(streaming % "compile->compile;test->test")

// Conditionally include the yarn sub-project
// Include the YARN project if the user has enabled YARN
lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core)
lazy val yarn = Project("yarn", file("yarn/stable"), settings = yarnSettings) dependsOn(core)

lazy val maybeYarn = if (isYarnEnabled) Seq[ClasspathDependency](if (isNewHadoop) yarn else yarnAlpha) else Seq[ClasspathDependency]()
lazy val maybeYarnRef = if (isYarnEnabled) Seq[ProjectReference](if (isNewHadoop) yarn else yarnAlpha) else Seq[ProjectReference]()
lazy val maybeYarn: Seq[ClasspathDependency] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq()
lazy val maybeYarnRef: Seq[ProjectReference] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq()

lazy val externalTwitter = Project("external-twitter", file("external/twitter"), settings = twitterSettings)
.dependsOn(streaming % "compile->compile;test->test")
Expand All @@ -127,7 +134,7 @@ object SparkBuild extends Build {
.dependsOn(core, mllib, graphx, bagel, streaming, externalTwitter) dependsOn(allExternal: _*)

// Everything except assembly, tools, java8Tests and examples belong to packageProjects
lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx) ++ maybeYarnRef
lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx) ++ maybeYarnRef ++ maybeGangliaRef

lazy val allProjects = packageProjects ++ allExternalRefs ++
Seq[ProjectReference](examples, tools, assemblyProj) ++ maybeJava8Tests
Expand Down Expand Up @@ -296,7 +303,6 @@ object SparkBuild extends Build {
"com.codahale.metrics" % "metrics-core" % "3.0.0",
"com.codahale.metrics" % "metrics-jvm" % "3.0.0",
"com.codahale.metrics" % "metrics-json" % "3.0.0",
"com.codahale.metrics" % "metrics-ganglia" % "3.0.0",
"com.codahale.metrics" % "metrics-graphite" % "3.0.0",
"com.twitter" %% "chill" % "0.3.1",
"com.twitter" % "chill-java" % "0.3.1",
Expand Down Expand Up @@ -384,6 +390,11 @@ object SparkBuild extends Build {
name := "spark-yarn"
)

def gangliaSettings = sharedSettings ++ Seq(
name := "spark-ganglia-lgpl",
libraryDependencies += "com.codahale.metrics" % "metrics-ganglia" % "3.0.0"
)

def java8TestsSettings = sharedSettings ++ Seq(
name := "java8-tests",
javacOptions := Seq("-target", "1.8", "-source", "1.8"),
Expand Down

0 comments on commit 5f28ee4

Please sign in to comment.