-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #259 from djfreels/develop
Adding SparkConfigurationSteps
Showing
13 changed files
with
375 additions
and
51 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
[Documentation Home](../../docs/readme.md) | [Common Home](../readme.md) | ||
|
||
# SparkConfigurationSteps | ||
This object exposes some basic functions to set configurations on the spark context at run time. | ||
|
||
##Set Spark Local Property | ||
Set a local property on the current thread. | ||
|
||
* **key** - The key of the property to set. | ||
* **value** - The value to set. Use None to remove the property. | ||
|
||
##Set Spark Local Properties | ||
Set a local property on the current thread for each entry in the properties map. | ||
|
||
* **properties** - A Map where each entry will be set as a key/value pair. | ||
* **keySeparator** - Replaces all occurrences of this string with periods in the keys. Default is __. | ||
|
||
##Set Hadoop Configuration Property | ||
Set a property on the hadoop configuration. | ||
|
||
* **key** - The key of the property to set. | ||
* **value** - The value to set. Use None to remove the property. | ||
|
||
##Set Hadoop Configuration Properties | ||
Set a property on the hadoop configuration for each entry in the properties map. | ||
|
||
* **properties** - A Map where each entry will be set as a key/value pair. | ||
* **keySeparator** - Replaces all occurrences of this string with periods in the keys. Default is __. | ||
|
||
##Set Job Group | ||
Set a job group id and description to group all upcoming jobs on the current thread. | ||
|
||
* **groupId** - The name of the group. | ||
* **description** - Description of the group. | ||
* **interruptOnCancel** - When true, then job cancellation will result in Thread.interrupt() | ||
getting called on the job's executor threads | ||
|
||
##Clear Job Group | ||
Clears the current job group. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 104 additions & 0 deletions
104
metalus-common/src/main/scala/com/acxiom/pipeline/steps/SparkConfigurationSteps.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
package com.acxiom.pipeline.steps | ||
|
||
import com.acxiom.pipeline.PipelineContext | ||
import com.acxiom.pipeline.annotations.{StepFunction, StepObject, StepParameter, StepParameters} | ||
|
||
import scala.annotation.tailrec | ||
|
||
@StepObject | ||
object SparkConfigurationSteps { | ||
|
||
@StepFunction("5c4d2d01-da85-4e2e-a551-f5a65f83653a", | ||
"Set Spark Local Property", | ||
"Set a property on the spark context.", | ||
"Pipeline", "Spark") | ||
@StepParameters(Map("key" -> StepParameter(None, Some(true), None, None, None, None, Some("The name of the property to set")), | ||
"value" -> StepParameter(None, Some(true), None, None, None, None, Some("The value to set")))) | ||
def setLocalProperty(key: String, value: Any, pipelineContext: PipelineContext): Unit = { | ||
setLocalProperties(Map(key -> value), None, pipelineContext) | ||
} | ||
|
||
@StepFunction("0b86b314-2657-4392-927c-e555af56b415", | ||
"Set Spark Local Properties", | ||
"Set each property on the spark context.", | ||
"Pipeline", "Spark") | ||
@StepParameters(Map("properties" -> StepParameter(None, Some(true), None, None, None, None, | ||
Some("Map representing local properties to set")), | ||
"keySeparator" -> StepParameter(None, Some(false), Some("__"), None, None, None, | ||
Some("String that will be replaced with a period character")))) | ||
def setLocalProperties(properties: Map[String, Any], keySeparator: Option[String] = None, pipelineContext: PipelineContext): Unit = { | ||
val sc = pipelineContext.sparkSession.get.sparkContext | ||
cleanseMap(properties, keySeparator).foreach { | ||
case (key, Some(value)) => sc.setLocalProperty(key, value.toString) | ||
case (key, None) => sc.setLocalProperty(key, None.orNull) | ||
case (key, value) => sc.setLocalProperty(key, value.toString) | ||
} | ||
} | ||
|
||
@StepFunction("c8c82365-e078-4a2a-99b8-0c0e20d8102d", | ||
"Set Hadoop Configuration Properties", | ||
"Set each property on the hadoop configuration.", | ||
"Pipeline", "Spark") | ||
@StepParameters(Map("properties" -> StepParameter(None, Some(true), None, None, None, None, | ||
Some("Map representing local properties to set")), | ||
"keySeparator" -> StepParameter(None, Some(false), Some("__"), None, None, None, | ||
Some("String that will be replaced with a period character")))) | ||
def setHadoopConfigurationProperties(properties: Map[String, Any], keySeparator: Option[String] = None, | ||
pipelineContext: PipelineContext): Unit = { | ||
val hc = pipelineContext.sparkSession.get.sparkContext.hadoopConfiguration | ||
cleanseMap(properties, keySeparator).foreach { | ||
case (key, Some(value)) => hc.set(key, value.toString) | ||
case (key, None) => hc.unset(key) | ||
case (key, value) => hc.set(key, value.toString) | ||
} | ||
} | ||
|
||
@StepFunction("ea7ea3e0-d1c2-40a2-b2b7-3488489509ca", | ||
"Set Hadoop Configuration Property", | ||
"Set a property on the hadoop configuration.", | ||
"Pipeline", "Spark") | ||
@StepParameters(Map("key" -> StepParameter(None, Some(true), None, None, None, None, Some("The name of the property to set")), | ||
"value" -> StepParameter(None, Some(true), None, None, None, None, Some("The value to set")))) | ||
def setHadoopConfigurationProperty(key: String, value: Any, | ||
pipelineContext: PipelineContext): Unit = { | ||
setHadoopConfigurationProperties(Map(key -> value), None, pipelineContext) | ||
} | ||
|
||
@StepFunction("b7373f02-4d1e-44cf-a9c9-315a5c1ccecc", | ||
"Set Job Group", | ||
"Set the current thread's group id and description that will be associated with any jobs.", | ||
"Pipeline", "Spark") | ||
@StepParameters(Map("groupId" -> StepParameter(None, Some(true), None, None, None, None, Some("The name of the group")), | ||
"description" -> StepParameter(None, Some(true), None, None, None, None, Some("Description of the job group")), | ||
"interruptOnCancel" -> StepParameter(None, Some(false), Some("false"), None, None, None, | ||
Some("When true, will trigger Thread.interrupt getting called on executor threads")))) | ||
def setJobGroup(groupId: String, description: String, interruptOnCancel: Option[Boolean] = None, | ||
pipelineContext: PipelineContext): Unit = { | ||
pipelineContext.sparkSession.get.sparkContext.setJobGroup(groupId, description, interruptOnCancel.getOrElse(false)) | ||
} | ||
|
||
@StepFunction("7394ff4d-f74d-4c9f-a55c-e0fd398fa264", | ||
"Clear Job Group", | ||
"Clear the current thread's job group", | ||
"Pipeline", "Spark") | ||
def clearJobGroup(pipelineContext: PipelineContext): Unit = { | ||
pipelineContext.sparkSession.get.sparkContext.clearJobGroup() | ||
} | ||
|
||
|
||
@tailrec | ||
private def unwrapOptions(value: Any): Any = { | ||
value match { | ||
case Some(v: Option[_]) => unwrapOptions(v) | ||
case v => v | ||
} | ||
} | ||
|
||
private def cleanseMap(map: Map[String, Any], keySeparator: Option[String] = None): Map[String, Any] = { | ||
val sep = keySeparator.getOrElse("__") | ||
map.map{ case (key, value) => | ||
key.replaceAllLiterally(sep, ".") -> unwrapOptions(value) | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
118 changes: 118 additions & 0 deletions
118
metalus-common/src/test/scala/com/acxiom/pipeline/steps/SparkConfigurationStepsTests.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
package com.acxiom.pipeline.steps | ||
|
||
import java.nio.file.{Files, Path} | ||
|
||
import com.acxiom.pipeline._ | ||
import org.apache.commons.io.FileUtils | ||
import org.apache.log4j.{Level, Logger} | ||
import org.apache.spark.SparkConf | ||
import org.apache.spark.sql.SparkSession | ||
import org.scalatest.{BeforeAndAfterAll, FunSpec, GivenWhenThen} | ||
|
||
class SparkConfigurationStepsTests extends FunSpec with BeforeAndAfterAll with GivenWhenThen { | ||
private val MASTER = "local[2]" | ||
private val APPNAME = "spark-config-steps-spark" | ||
private var sparkConf: SparkConf = _ | ||
private var sparkSession: SparkSession = _ | ||
private val sparkLocalDir: Path = Files.createTempDirectory("sparkLocal") | ||
private var pipelineContext: PipelineContext = _ | ||
|
||
override def beforeAll(): Unit = { | ||
Logger.getLogger("org.apache.spark").setLevel(Level.WARN) | ||
Logger.getLogger("org.apache.hadoop").setLevel(Level.WARN) | ||
Logger.getLogger("com.acxiom.pipeline").setLevel(Level.DEBUG) | ||
|
||
sparkConf = new SparkConf() | ||
.setMaster(MASTER) | ||
.setAppName(APPNAME) | ||
.set("spark.local.dir", sparkLocalDir.toFile.getAbsolutePath) | ||
sparkSession = SparkSession.builder().config(sparkConf).getOrCreate() | ||
|
||
pipelineContext = PipelineContext(Some(sparkConf), Some(sparkSession), Some(Map[String, Any]()), | ||
PipelineSecurityManager(), | ||
PipelineParameters(List(PipelineParameter("0", Map[String, Any]()), PipelineParameter("1", Map[String, Any]()))), | ||
Some(List("com.acxiom.pipeline.steps")), | ||
PipelineStepMapper(), | ||
Some(DefaultPipelineListener()), | ||
Some(sparkSession.sparkContext.collectionAccumulator[PipelineStepMessage]("stepMessages"))) | ||
} | ||
|
||
override def afterAll(): Unit = { | ||
sparkSession.sparkContext.cancelAllJobs() | ||
sparkSession.sparkContext.stop() | ||
sparkSession.stop() | ||
|
||
Logger.getRootLogger.setLevel(Level.INFO) | ||
// cleanup spark directories | ||
FileUtils.deleteDirectory(sparkLocalDir.toFile) | ||
} | ||
|
||
describe("SparkConfigurationSteps - Basic") { | ||
it("should set a local property") { | ||
try { | ||
SparkConfigurationSteps.setLocalProperty("moo", "moo2", pipelineContext) | ||
assert(sparkSession.sparkContext.getLocalProperty("moo") == "moo2") | ||
} finally { | ||
sparkSession.sparkContext.setLocalProperty("moo", None.orNull) | ||
} | ||
} | ||
|
||
it("should unset a local property") { | ||
sparkSession.sparkContext.setLocalProperty("unset", "moo") | ||
SparkConfigurationSteps.setLocalProperty("unset", None, pipelineContext) | ||
assert(Option(sparkSession.sparkContext.getLocalProperty("unset")).isEmpty) | ||
} | ||
|
||
it ("should set a local properties") { | ||
try { | ||
SparkConfigurationSteps.setLocalProperties(Map("moo_m1" -> "m1", "moo_m2" -> "m2"), Some("_"), pipelineContext) | ||
assert(sparkSession.sparkContext.getLocalProperty("moo.m1") == "m1") | ||
assert(sparkSession.sparkContext.getLocalProperty("moo.m2") == "m2") | ||
} finally { | ||
sparkSession.sparkContext.setLocalProperty("moo.m1", None.orNull) | ||
sparkSession.sparkContext.setLocalProperty("moo.m2", None.orNull) | ||
} | ||
} | ||
|
||
it ("should unset a local properties") { | ||
try { | ||
sparkSession.sparkContext.setLocalProperty("moo.m1", "m1") | ||
sparkSession.sparkContext.setLocalProperty("moo.m2", "m2") | ||
SparkConfigurationSteps.setLocalProperties(Map("moo_m1" -> None, "moo_m2" -> None), Some("_"), pipelineContext) | ||
assert(Option(sparkSession.sparkContext.getLocalProperty("moo.m1")).isEmpty) | ||
assert(Option(sparkSession.sparkContext.getLocalProperty("moo.m2")).isEmpty) | ||
} finally { | ||
sparkSession.sparkContext.setLocalProperty("moo.m1", None.orNull) | ||
sparkSession.sparkContext.setLocalProperty("moo.m2", None.orNull) | ||
} | ||
} | ||
} | ||
|
||
describe("SparkConfigurationSteps - Job Group") { | ||
it("should set a job group") { | ||
SparkConfigurationSteps.setJobGroup("group1", "test1", None, pipelineContext) | ||
val df = sparkSession.range(2) | ||
df.count() | ||
df.head() | ||
val group1Ids = sparkSession.sparkContext.statusTracker.getJobIdsForGroup("group1") | ||
assert(group1Ids.length == 2) | ||
SparkConfigurationSteps.setJobGroup("group2", "test2", None, pipelineContext) | ||
df.count() | ||
val group2Ids = sparkSession.sparkContext.statusTracker.getJobIdsForGroup("group2") | ||
assert(group2Ids.length == 1) | ||
} | ||
|
||
it("should clear a job group") { | ||
SparkConfigurationSteps.setJobGroup("clear1", "test1", None, pipelineContext) | ||
val df = sparkSession.range(2) | ||
df.count() | ||
df.head() | ||
val group1Ids = sparkSession.sparkContext.statusTracker.getJobIdsForGroup("clear1") | ||
assert(group1Ids.length == 2) | ||
SparkConfigurationSteps.clearJobGroup(pipelineContext) | ||
df.count() | ||
assert(sparkSession.sparkContext.statusTracker.getJobIdsForGroup("clear1").length == 2) | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.