Acxiom · dafreels · Oct 27, 2021 · Oct 25, 2021 · Oct 25, 2021
diff --git a/docs/dataconnectors.md b/docs/dataconnectors.md
@@ -115,6 +115,29 @@ val connector = MongoDataConnector("mongodb://127.0.0.1/test", "myCollectionName
   }
 }
 ```
+###JDBCDataConnector
+This connector provides access to JDBC. Security is handled using the uri or a _UserNameCredential_. In addition to
+the standard parameters, the following parameters are available:
+
+* **url** - The connection URL
+
+#### Scala
+```scala
+val connector = JDBCDataConnector("jdbc:derby:memory:test", "table_name", "my-connector", Some("my-credential-name-for-secrets-manager"), None)
+```
+#### Globals JSON
+```json
+{
+  "customJDBCConnector": {
+    "className": "com.acxiom.pipeline.connectors.JDBCDataConnector",
+    "object": {
+      "name": "my-jdbc-connector",
+      "credentialName": "my-credential-name-for-secrets-manager",
+      "url": "jdbc:derby:memory:test"
+    }
+  }
+}
+```
 ## Streaming
 Streaming connectors offer a way to use pipelines with [Spark Structured Streaming](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html) without 
 the need to write new [drivers](pipeline-drivers.md). When designing pipelines for streaming, care must be taken to not

diff --git a/metalus-application/pom.xml b/metalus-application/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>com.acxiom</groupId>
         <artifactId>metalus</artifactId>
-        <version>1.8.3-SNAPSHOT</version>
+        <version>1.8.4-SNAPSHOT</version>
     </parent>
     <dependencies>
         <dependency>

diff --git a/metalus-aws/pom.xml b/metalus-aws/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>com.acxiom</groupId>
         <artifactId>metalus</artifactId>
-        <version>1.8.3-SNAPSHOT</version>
+        <version>1.8.4-SNAPSHOT</version>
     </parent>
 
     <properties>

diff --git a/metalus-common/pom.xml b/metalus-common/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>com.acxiom</groupId>
         <artifactId>metalus</artifactId>
-        <version>1.8.3-SNAPSHOT</version>
+        <version>1.8.4-SNAPSHOT</version>
     </parent>
 
     <dependencies>

diff --git a/metalus-common/src/main/scala/com/acxiom/pipeline/connectors/JDBCDataConnector.scala b/metalus-common/src/main/scala/com/acxiom/pipeline/connectors/JDBCDataConnector.scala
@@ -0,0 +1,41 @@
+package com.acxiom.pipeline.connectors
+
+import com.acxiom.pipeline.steps.{DataFrameReaderOptions, DataFrameWriterOptions}
+import com.acxiom.pipeline.{Credential, PipelineContext}
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.streaming.StreamingQuery
+
+import java.util.Properties
+import scala.collection.JavaConversions._
+
+case class JDBCDataConnector(url: String,
+                             predicates: Option[List[String]],
+                             override val name: String,
+                             override val credentialName: Option[String],
+                             override val credential: Option[Credential]) extends BatchDataConnector {
+  override def load(source: Option[String], pipelineContext: PipelineContext, readOptions: DataFrameReaderOptions): DataFrame = {
+    val properties = new Properties()
+    properties.putAll(readOptions.options.getOrElse(Map[String, String]()))
+    val reader = DataConnectorUtilities.buildDataFrameReader(pipelineContext.sparkSession.get, readOptions.copy("jdbc"))
+      if (predicates.isDefined && predicates.get.nonEmpty) {
+        reader.jdbc(url, source.getOrElse(""), predicates.get.toArray, properties)
+      } else {
+        reader.jdbc(url, source.getOrElse(""), properties)
+      }
+  }
+
+  override def write(dataFrame: DataFrame, destination: Option[String],
+                     pipelineContext: PipelineContext,
+                     writeOptions: DataFrameWriterOptions): Option[StreamingQuery] = {
+    val properties = new Properties()
+    properties.putAll(writeOptions.options.getOrElse(Map[String, String]()))
+    if (dataFrame.isStreaming) {
+      Some(dataFrame.writeStream.foreachBatch { (batchDF: DataFrame, batchId: Long) =>
+        DataConnectorUtilities.buildDataFrameWriter(batchDF, writeOptions).jdbc(url, destination.getOrElse(""), properties)
+      }.start())
+    } else {
+      DataConnectorUtilities.buildDataFrameWriter(dataFrame, writeOptions).jdbc(url, destination.getOrElse(""), properties)
+      None
+    }
+  }
+}
diff --git a/metalus-common/src/main/scala/com/acxiom/pipeline/steps/JDBCSteps.scala b/metalus-common/src/main/scala/com/acxiom/pipeline/steps/JDBCSteps.scala
@@ -1,6 +1,7 @@
 package com.acxiom.pipeline.steps
 
 import com.acxiom.pipeline.annotations._
+import com.acxiom.pipeline.connectors.JDBCDataConnector
 import com.acxiom.pipeline.{PipelineContext, PipelineStepResponse}
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 import org.apache.spark.sql.{DataFrame, Dataset}
@@ -28,7 +29,8 @@ object JDBCSteps {
   def readWithJDBCOptions(jdbcOptions: JDBCOptions,
                           pipelineContext: PipelineContext): DataFrame = {
     val options = DataFrameReaderOptions("jdbc", Some(jdbcOptions.asProperties.toMap))
-    DataFrameSteps.getDataFrameReader(options, pipelineContext).load()
+    val jdbc = JDBCDataConnector(jdbcOptions.url, None, "JDBCSteps_readWithJDBCOptions", None, None)
+    jdbc.load(Some(jdbcOptions.tableOrQuery), pipelineContext, options)
   }
 
   /**
@@ -50,21 +52,8 @@ object JDBCSteps {
                           pipelineContext: PipelineContext): DataFrame = {
     val properties = new Properties()
     properties.putAll(jDBCStepsOptions.readerOptions.options.getOrElse(Map[String, String]()))
-    val reader = DataFrameSteps.getDataFrameReader(jDBCStepsOptions.readerOptions, pipelineContext)
-    if (jDBCStepsOptions.predicates.isDefined) {
-      reader.jdbc(
-        url = jDBCStepsOptions.url,
-        table = jDBCStepsOptions.table,
-        predicates = jDBCStepsOptions.predicates.get.toArray,
-        connectionProperties = properties
-      )
-    } else {
-      reader.jdbc(
-        url = jDBCStepsOptions.url,
-        table = jDBCStepsOptions.table,
-        properties = properties
-      )
-    }
+    val jdbc = JDBCDataConnector(jDBCStepsOptions.url, jDBCStepsOptions.predicates, "JDBCSteps_readWithStepOptions", None, None)
+    jdbc.load(Some(jDBCStepsOptions.table), pipelineContext, jDBCStepsOptions.readerOptions)
   }
 
   /**
@@ -92,15 +81,8 @@ object JDBCSteps {
                          predicates: Option[List[String]] = None,
                          connectionProperties: Option[Map[String, String]] = None,
                          pipelineContext: PipelineContext): DataFrame = {
-    val spark = pipelineContext.sparkSession.get
-    val properties = new Properties()
-    properties.putAll(connectionProperties.getOrElse(Map[String, String]()))
-
-    if (predicates.isDefined) {
-      spark.read.jdbc(url, table, predicates.get.toArray, properties)
-    } else {
-      spark.read.jdbc(url, table, properties)
-    }
+    val jdbc = JDBCDataConnector(url, predicates, "JDBCSteps_readWithProperties", None, None)
+    jdbc.load(Some(table), pipelineContext, DataFrameReaderOptions(options = connectionProperties))
   }
 
   /**
@@ -119,9 +101,11 @@ object JDBCSteps {
     "saveMode" -> StepParameter(None, Some(false), None, None, None, None, Some("The value for the mode option. Defaulted to Overwrite"))))
   def writeWithJDBCOptions(dataFrame: Dataset[_],
                            jdbcOptions: JDBCOptions,
-                           saveMode: String = "Overwrite"): Unit = {
+                           saveMode: String = "Overwrite",
+                           pipelineContext: PipelineContext): Unit = {
     val options = DataFrameWriterOptions("jdbc", saveMode, Some(jdbcOptions.asProperties.toMap))
-    DataFrameSteps.getDataFrameWriter(dataFrame, options).save()
+    val jdbc = JDBCDataConnector(jdbcOptions.url, None, "JDBCSteps_writeWithJDBCOptions", None, None)
+    jdbc.write(dataFrame.asInstanceOf[DataFrame], Some(jdbcOptions.tableOrQuery), pipelineContext, options)
   }
 
   /**
@@ -147,12 +131,10 @@ object JDBCSteps {
                           url: String,
                           table: String,
                           connectionProperties: Option[Map[String, String]] = None,
-                          saveMode: String = "Overwrite"): Unit = {
-    val properties = new Properties()
-    properties.putAll(connectionProperties.getOrElse(Map[String, String]()))
-    dataFrame.write
-      .mode(saveMode)
-      .jdbc(url, table, properties)
+                          saveMode: String = "Overwrite",
+                          pipelineContext: PipelineContext): Unit = {
+    val jdbc = JDBCDataConnector(url, None, "JDBCSteps_writeWithProperties", None, None)
+    jdbc.write(dataFrame.asInstanceOf[DataFrame], Some(table), pipelineContext, DataFrameWriterOptions(options = connectionProperties))
   }
 
   /**
@@ -169,11 +151,10 @@ object JDBCSteps {
   @StepParameters(Map("dataFrame" -> StepParameter(None, Some(true), None, None, None, None, Some("The DataFrame to be written")),
     "jDBCStepsOptions" -> StepParameter(None, Some(true), None, None, None, None, Some("Options for the JDBC connect and spark DataFrameWriter"))))
   def writeWithStepOptions(dataFrame: Dataset[_],
-                           jDBCStepsOptions: JDBCDataFrameWriterOptions): Unit = {
-    val properties = new Properties()
-    properties.putAll(jDBCStepsOptions.writerOptions.options.getOrElse(Map[String, String]()))
-    DataFrameSteps.getDataFrameWriter(dataFrame, jDBCStepsOptions.writerOptions)
-      .jdbc(jDBCStepsOptions.url, jDBCStepsOptions.table, properties)
+                           jDBCStepsOptions: JDBCDataFrameWriterOptions,
+                           pipelineContext: PipelineContext): Unit = {
+    val jdbc = JDBCDataConnector(jDBCStepsOptions.url, None, "JDBCSteps_writeWithStepOptions", None, None)
+    jdbc.write(dataFrame.asInstanceOf[DataFrame], Some(jDBCStepsOptions.table), pipelineContext, jDBCStepsOptions.writerOptions)
   }
 
   @StepFunction("713fff3d-d407-4970-89ae-7844e6fc60e3",

diff --git a/metalus-common/src/test/scala/com/acxiom/pipeline/steps/JDBCStepsTests.scala b/metalus-common/src/test/scala/com/acxiom/pipeline/steps/JDBCStepsTests.scala
@@ -1,17 +1,15 @@
 package com.acxiom.pipeline.steps
 
-import java.nio.file.{Files, Path}
-
 import com.acxiom.pipeline._
 import org.apache.commons.io.FileUtils
 import org.apache.log4j.{Level, Logger}
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.SparkSession
-import org.scalatest.{BeforeAndAfterAll, FunSpec, GivenWhenThen}
-import java.sql.DriverManager
-
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
+import org.scalatest.{BeforeAndAfterAll, FunSpec, GivenWhenThen}
 
+import java.nio.file.{Files, Path}
+import java.sql.DriverManager
 import scala.collection.mutable
 
 class JDBCStepsTests extends FunSpec with BeforeAndAfterAll with GivenWhenThen {
@@ -145,7 +143,9 @@ class JDBCStepsTests extends FunSpec with BeforeAndAfterAll with GivenWhenThen {
 
       JDBCSteps.writeWithJDBCOptions(
         dataFrame = chickens.toDF("ID", "NAME", "COLOR"),
-        jdbcOptions = new JDBCOptions(jDBCOptions.toMap)
+        jdbcOptions = new JDBCOptions(jDBCOptions.toMap),
+        "Overwrite",
+        pipelineContext
       )
       verifyCount(count = 2)
     }
@@ -165,7 +165,8 @@ class JDBCStepsTests extends FunSpec with BeforeAndAfterAll with GivenWhenThen {
           table = "CHICKENS",
           writerOptions = DataFrameWriterOptions("jdbc").setOptions(
             Map[String, String]("driver" -> "org.apache.derby.jdbc.EmbeddedDriver", "user" -> "test_fixture"))
-        )
+        ),
+        pipelineContext
       )
       verifyCount(count = 1)
     }
@@ -184,7 +185,9 @@ class JDBCStepsTests extends FunSpec with BeforeAndAfterAll with GivenWhenThen {
         dataFrame = chickens.toDF("ID", "NAME", "COLOR"),
         url = "jdbc:derby:memory:test",
         table = "CHICKENS",
-        connectionProperties = Some(Map("driver" -> "org.apache.derby.jdbc.EmbeddedDriver", "user" -> "test_fixture"))
+        connectionProperties = Some(Map("driver" -> "org.apache.derby.jdbc.EmbeddedDriver", "user" -> "test_fixture")),
+        "Overwrite",
+        pipelineContext
       )
       verifyCount(FOUR)
     }

diff --git a/metalus-core/pom.xml b/metalus-core/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>com.acxiom</groupId>
         <artifactId>metalus</artifactId>
-        <version>1.8.3-SNAPSHOT</version>
+        <version>1.8.4-SNAPSHOT</version>
     </parent>
 
     <properties>

diff --git a/metalus-delta/pom.xml b/metalus-delta/pom.xml
@@ -12,7 +12,7 @@
     <parent>
         <groupId>com.acxiom</groupId>
         <artifactId>metalus</artifactId>
-        <version>1.8.3-SNAPSHOT</version>
+        <version>1.8.4-SNAPSHOT</version>
     </parent>
 
     <dependencies>

diff --git a/metalus-examples/pom.xml b/metalus-examples/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>com.acxiom</groupId>
         <artifactId>metalus</artifactId>
-        <version>1.8.3-SNAPSHOT</version>
+        <version>1.8.4-SNAPSHOT</version>
     </parent>
 
     <properties>

diff --git a/metalus-gcp/pom.xml b/metalus-gcp/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>com.acxiom</groupId>
         <artifactId>metalus</artifactId>
-        <version>1.8.3-SNAPSHOT</version>
+        <version>1.8.4-SNAPSHOT</version>
     </parent>
 
     <dependencyManagement>

diff --git a/metalus-kafka/pom.xml b/metalus-kafka/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>com.acxiom</groupId>
         <artifactId>metalus</artifactId>
-        <version>1.8.3-SNAPSHOT</version>
+        <version>1.8.4-SNAPSHOT</version>
     </parent>
 
     <properties>

diff --git a/metalus-mongo/pom.xml b/metalus-mongo/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>com.acxiom</groupId>
         <artifactId>metalus</artifactId>
-        <version>1.8.3-SNAPSHOT</version>
+        <version>1.8.4-SNAPSHOT</version>
     </parent>
 
     <dependencies>

diff --git a/metalus-utils/pom.xml b/metalus-utils/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>com.acxiom</groupId>
         <artifactId>metalus</artifactId>
-        <version>1.8.3-SNAPSHOT</version>
+        <version>1.8.4-SNAPSHOT</version>
     </parent>
 
     <properties>

diff --git a/pom.xml b/pom.xml
@@ -3,7 +3,7 @@
     <modelVersion>4.0.0</modelVersion>
     <groupId>com.acxiom</groupId>
     <artifactId>metalus</artifactId>
-    <version>1.8.3-SNAPSHOT</version>
+    <version>1.8.4-SNAPSHOT</version>
     <name>${project.artifactId}</name>
     <packaging>pom</packaging>
     <description>Metalus Pipeline Library</description>