From eeab70fc6f3b583524047dfb16727d211213e537 Mon Sep 17 00:00:00 2001 From: djfreels Date: Fri, 15 Oct 2021 13:10:42 -0400 Subject: [PATCH] Added support to map from rootGlobals to application globals --- .../testData/metalus-common/steps.json | 2 +- .../acxiom/pipeline/PipelineStepMapper.scala | 7 +-- .../applications/ApplicationUtils.scala | 60 ++++++++++++------- .../applications/ApplicationTests.scala | 8 +-- 4 files changed, 44 insertions(+), 33 deletions(-) diff --git a/manual_tests/testData/metalus-common/steps.json b/manual_tests/testData/metalus-common/steps.json index f400fd4f..589a9b2e 100644 --- a/manual_tests/testData/metalus-common/steps.json +++ b/manual_tests/testData/metalus-common/steps.json @@ -1 +1 @@ -{"pkgs":["com.acxiom.pipeline.steps"],"steps":[{"id":"3806f23b-478c-4054-b6c1-37f11db58d38","displayName":"Read a DataFrame from Table","description":"This step will read a dataFrame in a given format from the meta store","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"table","required":true,"parameterType":"String","description":"The name of the table to read"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The DataFrameReaderOptions to use"}],"engineMeta":{"spark":"CatalogSteps.readDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"e2b4c011-e71b-46f9-a8be-cf937abc2ec4","displayName":"Write DataFrame to Table","description":"This step will write a dataFrame in a given format to the meta store","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to write"},{"type":"text","name":"table","required":true,"parameterType":"String","description":"The name of the table to write to"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameWriterOptions","description":"The DataFrameWriterOptions to use"}],"engineMeta":{"spark":"CatalogSteps.writeDataFrame","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"5874ab64-13c7-404c-8a4f-67ff3b0bc7cf","displayName":"Drop Catalog Object","description":"This step will drop an object from the meta store","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"name","required":true,"parameterType":"String","description":"Name of the object to drop"},{"type":"text","name":"objectType","required":false,"defaultValue":"TABLE","parameterType":"String","description":"Type of object to drop"},{"type":"boolean","name":"ifExists","required":false,"defaultValue":"false","parameterType":"Boolean","description":"Flag to control whether existence is checked"},{"type":"boolean","name":"cascade","required":false,"defaultValue":"false","parameterType":"Boolean","description":"Flag to control whether this deletion should cascade"}],"engineMeta":{"spark":"CatalogSteps.drop","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"17be71f9-1492-4404-a355-1cc973694cad","displayName":"Database Exists","description":"Check spark catalog for a database with the given name.","type":"branch","category":"Decision","params":[{"type":"text","name":"name","required":true,"parameterType":"String","description":"Name of the database"},{"type":"result","name":"true","required":false},{"type":"result","name":"false","required":false}],"engineMeta":{"spark":"CatalogSteps.databaseExists","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"95181811-d83e-4136-bedb-2cba1de90301","displayName":"Table Exists","description":"Check spark catalog for a table with the given name.","type":"branch","category":"Decision","params":[{"type":"text","name":"name","required":true,"parameterType":"String","description":"Name of the table"},{"type":"text","name":"database","required":false,"parameterType":"String","description":"Name of the database"},{"type":"result","name":"true","required":false},{"type":"result","name":"false","required":false}],"engineMeta":{"spark":"CatalogSteps.tableExists","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"f4adfe70-2ae3-4b8d-85d1-f53e91c8dfad","displayName":"Set Current Database","description":"Set the current default database for the spark session.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"name","required":true,"parameterType":"String","description":"Name of the database"}],"engineMeta":{"spark":"CatalogSteps.setCurrentDatabase","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"663f8c93-0a42-4c43-8263-33f89c498760","displayName":"Create Table","description":"Create a table in the meta store.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"name","required":true,"parameterType":"String","description":"Name of the table"},{"type":"text","name":"externalPath","required":false,"parameterType":"String","description":"Path of the external table"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"Options containing the format, schema, and settings"}],"engineMeta":{"spark":"CatalogSteps.createTable","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"836aab38-1140-4606-ab73-5b6744f0e7e7","displayName":"Load","description":"This step will create a DataFrame using the given DataConnector","type":"Pipeline","category":"Connectors","params":[{"type":"text","name":"connector","required":true,"parameterType":"com.acxiom.pipeline.connectors.DataConnector","description":"The data connector to use when writing"},{"type":"text","name":"source","required":false,"parameterType":"String","description":"The source path to load data"},{"type":"object","name":"readOptions","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The optional options to use while reading the data"}],"engineMeta":{"spark":"DataConnectorSteps.loadDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"5608eba7-e9ff-48e6-af77-b5e810b99d89","displayName":"Write","description":"This step will write a DataFrame using the given DataConnector","type":"Pipeline","category":"Connectors","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.DataFrame","description":"The DataFrame to write"},{"type":"text","name":"connector","required":true,"parameterType":"com.acxiom.pipeline.connectors.DataConnector","description":"The data connector to use when writing"},{"type":"text","name":"destination","required":false,"parameterType":"String","description":"The destination path to write data"},{"type":"object","name":"writeOptions","required":false,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameWriterOptions","description":"The optional DataFrame options to use while writing"}],"engineMeta":{"spark":"DataConnectorSteps.writeDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.streaming.StreamingQuery"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"87db259d-606e-46eb-b723-82923349640f","displayName":"Load DataFrame from HDFS path","description":"This step will read a dataFrame from the given HDFS path","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"path","required":true,"parameterType":"String","description":"The HDFS path to load data into the DataFrame"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The options to use when loading the DataFrameReader"}],"engineMeta":{"spark":"HDFSSteps.readFromPath","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"8daea683-ecde-44ce-988e-41630d251cb8","displayName":"Load DataFrame from HDFS paths","description":"This step will read a dataFrame from the given HDFS paths","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"paths","required":true,"parameterType":"List[String]","description":"The HDFS paths to load data into the DataFrame"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The options to use when loading the DataFrameReader"}],"engineMeta":{"spark":"HDFSSteps.readFromPaths","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"0a296858-e8b7-43dd-9f55-88d00a7cd8fa","displayName":"Write DataFrame to HDFS","description":"This step will write a dataFrame in a given format to HDFS","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to write"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The GCS path to write data"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameWriterOptions","description":"The optional DataFrame Options"}],"engineMeta":{"spark":"HDFSSteps.writeToPath","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"e4dad367-a506-5afd-86c0-82c2cf5cd15c","displayName":"Create HDFS FileManager","description":"Simple function to generate the HDFSFileManager for the local HDFS file system","type":"Pipeline","category":"InputOutput","params":[],"engineMeta":{"spark":"HDFSSteps.createFileManager","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.fs.HDFSFileManager"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"a7e17c9d-6956-4be0-a602-5b5db4d1c08b","displayName":"Scala script Step","description":"Executes a script and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"script","name":"script","required":true,"language":"scala","parameterType":"String","description":"A scala script to execute"}],"engineMeta":{"spark":"ScalaSteps.processScript","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"8bf8cef6-cf32-4d85-99f4-e4687a142f84","displayName":"Scala script Step with additional object provided","description":"Executes a script with the provided object and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"script","name":"script","required":true,"language":"scala","parameterType":"String","description":"A scala script to execute"},{"type":"text","name":"value","required":true,"parameterType":"Any","description":"A value to pass to the script"},{"type":"text","name":"type","required":false,"parameterType":"String","description":"The type of the value to pass to the script"}],"engineMeta":{"spark":"ScalaSteps.processScriptWithValue","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"3ab721e8-0075-4418-aef1-26abdf3041be","displayName":"Scala script Step with additional objects provided","description":"Executes a script with the provided object and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"script","name":"script","required":true,"language":"scala","parameterType":"String","description":"A scala script to execute"},{"type":"object","name":"values","required":true,"parameterType":"Map[String,Any]","description":"Map of name/value pairs that will be bound to the script"},{"type":"object","name":"types","required":false,"parameterType":"Map[String,String]","description":"Map of type overrides for the values provided"},{"type":"boolean","name":"unwrapOptions","required":false,"parameterType":"Boolean","description":"Flag to toggle option unwrapping behavior"}],"engineMeta":{"spark":"ScalaSteps.processScriptWithValues","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"6e42b0c3-340e-4848-864c-e1b5c57faa4f","displayName":"Join DataFrames","description":"Join two dataFrames together.","type":"Pipeline","category":"Data","params":[{"type":"text","name":"left","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"Left side of the join"},{"type":"text","name":"right","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"Right side of the join"},{"type":"text","name":"expression","required":false,"parameterType":"String","description":"Join expression. Optional for cross joins"},{"type":"text","name":"leftAlias","required":false,"defaultValue":"left","parameterType":"String","description":"Left side alias"},{"type":"text","name":"rightAlias","required":false,"defaultValue":"right","parameterType":"String","description":"Right side alias"},{"type":"text","name":"joinType","required":false,"defaultValue":"inner","parameterType":"String","description":"Type of join to perform"}],"engineMeta":{"spark":"DataSteps.join","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"823eeb28-ec81-4da6-83f2-24a1e580b0e5","displayName":"Group By","description":"Group by a list of grouping expressions and a list of aggregates.","type":"Pipeline","category":"Data","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to group"},{"type":"text","name":"groupings","required":true,"parameterType":"List[String]","description":"List of expressions to group by"},{"type":"text","name":"aggregations","required":true,"parameterType":"List[String]","description":"List of aggregations to apply"}],"engineMeta":{"spark":"DataSteps.groupBy","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"d322769c-18a0-49c2-9875-41446892e733","displayName":"Union","description":"Union two DataFrames together.","type":"Pipeline","category":"Data","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The initial DataFrame"},{"type":"text","name":"append","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The dataFrame to append"},{"type":"boolean","name":"distinct","required":false,"defaultValue":"true","parameterType":"Boolean","description":"Flag to control distinct behavior"}],"engineMeta":{"spark":"DataSteps.union","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"80583aa9-41b7-4906-8357-cc2d3670d970","displayName":"Add a Column with a Static Value to All Rows in a DataFrame (metalus-common)","description":"This step will add a column with a static value to all rows in the provided data frame","type":"Pipeline","category":"Data","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The data frame to add the column"},{"type":"text","name":"columnName","required":true,"parameterType":"String","description":"The name to provide the id column"},{"type":"text","name":"columnValue","required":true,"parameterType":"Any","description":"The name of the new column"},{"type":"boolean","name":"standardizeColumnName","required":false,"defaultValue":"true","parameterType":"Boolean","description":"The value to add"}],"engineMeta":{"spark":"DataSteps.addStaticColumnToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"e625eed6-51f0-44e7-870b-91c960cdc93d","displayName":"Adds a Unique Identifier to a DataFrame (metalus-common)","description":"This step will add a new unique identifier to an existing data frame using the monotonically_increasing_id method","type":"Pipeline","category":"Data","params":[{"type":"text","name":"idColumnName","required":true,"parameterType":"String","description":"The name to provide the id column"},{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The data frame to add the column"}],"engineMeta":{"spark":"DataSteps.addUniqueIdToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"fa0fcabb-d000-4a5e-9144-692bca618ddb","displayName":"Filter a DataFrame","description":"This step will filter a DataFrame based on the where expression provided","type":"Pipeline","category":"Data","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to filter"},{"type":"text","name":"expression","required":true,"parameterType":"String","description":"The expression to apply to the DataFrame to filter rows"}],"engineMeta":{"spark":"DataSteps.applyFilter","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"5d0d7c5c-c287-4565-80b2-2b1a847b18c6","displayName":"Get DataFrame Count","description":"Get a count of records in a DataFrame.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to count"}],"engineMeta":{"spark":"DataSteps.getDataFrameCount","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Long"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"252b6086-da45-4042-a9a8-31ebf57948af","displayName":"Drop Duplicate Records","description":"Drop duplicate records from a DataFrame","type":"Pipeline","category":"Data","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to drop duplicate records from"},{"type":"text","name":"columnNames","required":true,"parameterType":"List[String]","description":"Columns to use for determining distinct values to drop"}],"engineMeta":{"spark":"DataSteps.dropDuplicateRecords","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"d5ac88a2-caa2-473c-a9f7-ffb0269880b2","displayName":"Rename Column","description":"Rename a column on a DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to change"},{"type":"text","name":"oldColumnName","required":true,"parameterType":"String","description":"The name of the column you want to change"},{"type":"text","name":"newColumnName","required":true,"parameterType":"String","description":"The new name to give the column"}],"engineMeta":{"spark":"DataSteps.renameColumn","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"6ed36f89-35d1-4280-a555-fbcd8dd76bf2","displayName":"Retry (simple)","description":"Makes a decision to retry or stop based on a named counter","type":"branch","category":"RetryLogic","params":[{"type":"text","name":"counterName","required":true,"parameterType":"String","description":"The name of the counter to use for tracking"},{"type":"text","name":"maxRetries","required":true,"parameterType":"Int","description":"The maximum number of retries allowed"},{"type":"result","name":"retry","required":false},{"type":"result","name":"stop","required":false}],"engineMeta":{"spark":"FlowUtilsSteps.simpleRetry","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"a2f3e151-cb81-4c69-8475-c1a287bbb4cb","displayName":"Convert CSV String Dataset to DataFrame","description":"This step will convert the provided CSV string Dataset into a DataFrame that can be passed to other steps","type":"Pipeline","category":"CSV","params":[{"type":"text","name":"dataset","required":true,"parameterType":"org.apache.spark.sql.Dataset[String]","description":"The dataset containing CSV strings"},{"type":"object","name":"dataFrameReaderOptions","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The CSV parsing options"}],"engineMeta":{"spark":"CSVSteps.csvDatasetToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"d25209c1-53f6-49ad-a402-257ae756ac2a","displayName":"Convert CSV String to DataFrame","description":"This step will convert the provided CSV string into a DataFrame that can be passed to other steps","type":"Pipeline","category":"CSV","params":[{"type":"text","name":"csvString","required":true,"parameterType":"String","description":"The csv string to convert to a DataFrame"},{"type":"text","name":"delimiter","required":false,"defaultValue":",","parameterType":"String","description":"The field delimiter"},{"type":"text","name":"recordDelimiter","required":false,"defaultValue":"\\n","parameterType":"String","description":"The record delimiter"},{"type":"boolean","name":"header","required":false,"defaultValue":"false","parameterType":"Boolean","description":"Build header from the first row"}],"engineMeta":{"spark":"CSVSteps.csvStringToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"15889487-fd1c-4c44-b8eb-973c12f91fae","displayName":"Creates an HttpRestClient","description":"This step will build an HttpRestClient using a host url and optional authorization object","type":"Pipeline","category":"API","params":[{"type":"text","name":"hostUrl","required":true,"parameterType":"String","description":"The URL to connect including port"},{"type":"text","name":"authorization","required":false,"parameterType":"com.acxiom.pipeline.api.Authorization","description":"The optional authorization class to use when making connections"},{"type":"boolean","name":"allowSelfSignedCertificates","required":false,"parameterType":"Boolean","description":"Flag to allow using self signed certificates for http calls"}],"engineMeta":{"spark":"ApiSteps.createHttpRestClient","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.api.HttpRestClient"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"fcfd4b91-9a9c-438c-8afa-9f14c1e52a82","displayName":"Creates an HttpRestClient from protocol, host and port","description":"This step will build an HttpRestClient using url parts and optional authorization object","type":"Pipeline","category":"API","params":[{"type":"text","name":"protocol","required":true,"parameterType":"String","description":"The protocol to use when constructing the URL"},{"type":"text","name":"host","required":true,"parameterType":"String","description":"The host name to use when constructing the URL"},{"type":"text","name":"port","required":true,"parameterType":"Int","description":"The port to use when constructing the URL"},{"type":"text","name":"authorization","required":false,"parameterType":"com.acxiom.pipeline.api.Authorization","description":"The optional authorization class to use when making connections"}],"engineMeta":{"spark":"ApiSteps.createHttpRestClientFromParameters","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.api.HttpRestClient"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"b59f0486-78aa-4bd4-baf5-5c7d7c648ff0","displayName":"Check Path Exists","description":"Checks the path to determine whether it exists or not.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to verify"}],"engineMeta":{"spark":"ApiSteps.exists","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"7521ac47-84ec-4e50-b087-b9de4bf6d514","displayName":"Get the last modified date","description":"Gets the last modified date for the provided path","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to the resource to get the last modified date"}],"engineMeta":{"spark":"ApiSteps.getLastModifiedDate","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"java.util.Date"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"fff7f7b6-5d9a-40b3-8add-6432552920a8","displayName":"Get Path Content Length","description":"Get the size of the content at the given path.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to the resource to get the content length"}],"engineMeta":{"spark":"ApiSteps.getContentLength","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Long"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"dd351d47-125d-47fa-bafd-203bebad82eb","displayName":"Get Path Headers","description":"Get the headers for the content at the given path.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to get the headers"}],"engineMeta":{"spark":"ApiSteps.getHeaders","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Map[String,List[String]]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"532f72dd-8443-481d-8406-b74cdc08e342","displayName":"Delete Content","description":"Attempts to delete the provided path..","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to delete"}],"engineMeta":{"spark":"ApiSteps.delete","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"3b91e6e8-ec18-4468-9089-8474f4b4ba48","displayName":"GET String Content","description":"Retrieves the value at the provided path as a string.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to resource"}],"engineMeta":{"spark":"ApiSteps.getStringContent","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"34c2fc9a-2502-4c79-a0cb-3f866a0a0d6e","displayName":"POST String Content","description":"POSTs the provided string to the provided path using the content type and returns the response as a string.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to post the content"},{"type":"text","name":"content","required":true,"parameterType":"String","description":"The content to post"},{"type":"text","name":"contentType","required":false,"parameterType":"String","description":"The content type being sent to the path"}],"engineMeta":{"spark":"ApiSteps.postStringContent","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"49ae38b3-cb41-4153-9111-aa6aacf6721d","displayName":"PUT String Content","description":"PUTs the provided string to the provided path using the content type and returns the response as a string.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to post the content"},{"type":"text","name":"content","required":true,"parameterType":"String","description":"The content to put"},{"type":"text","name":"contentType","required":false,"parameterType":"String","description":"The content type being sent to the path"}],"engineMeta":{"spark":"ApiSteps.putStringContent","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"99b20c23-722f-4862-9f47-bc9f72440ae6","displayName":"GET Input Stream","description":"Creates a buffered input stream for the provided path","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to the resource"},{"type":"text","name":"bufferSize","required":false,"parameterType":"Int","description":"The size of buffer to use with the stream"}],"engineMeta":{"spark":"ApiSteps.getInputStream","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"java.io.InputStream"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"f4120b1c-91df-452f-9589-b77f8555ba44","displayName":"GET Output Stream","description":"Creates a buffered output stream for the provided path.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to the resource"},{"type":"text","name":"bufferSize","required":false,"parameterType":"Int","description":"The size of buffer to use with the stream"}],"engineMeta":{"spark":"ApiSteps.getOutputStream","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"java.io.OutputStream"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"cdb332e3-9ea4-4c96-8b29-c1d74287656c","displayName":"Load table as DataFrame using JDBCOptions","description":"This step will load a table from the provided JDBCOptions","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"jdbcOptions","required":true,"parameterType":"org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions","description":"The options to use when loading the DataFrame"}],"engineMeta":{"spark":"JDBCSteps.readWithJDBCOptions","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"72dbbfc8-bd1d-4ce4-ab35-28fa8385ea54","displayName":"Load table as DataFrame using StepOptions","description":"This step will load a table from the provided JDBCDataFrameReaderOptions","type":"Pipeline","category":"InputOutput","params":[{"type":"object","name":"jDBCStepsOptions","required":true,"className":"com.acxiom.pipeline.steps.JDBCDataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.JDBCDataFrameReaderOptions","description":"The options to use when loading the DataFrameReader"}],"engineMeta":{"spark":"JDBCSteps.readWithStepOptions","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"dcc57409-eb91-48c0-975b-ca109ba30195","displayName":"Load table as DataFrame","description":"This step will load a table from the provided jdbc information","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"url","required":true,"parameterType":"String","description":"A valid jdbc url"},{"type":"text","name":"table","required":true,"parameterType":"String","description":"A table name or subquery"},{"type":"text","name":"predicates","required":false,"parameterType":"List[String]","description":"Optional predicates used for partitioning"},{"type":"text","name":"connectionProperties","required":false,"parameterType":"Map[String,String]","description":"Optional properties for the jdbc connection"}],"engineMeta":{"spark":"JDBCSteps.readWithProperties","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"c9fddf52-34b1-4216-a049-10c33ccd24ab","displayName":"Write DataFrame to table using JDBCOptions","description":"This step will write a DataFrame as a table using JDBCOptions","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to be written"},{"type":"text","name":"jdbcOptions","required":true,"parameterType":"org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions","description":"Options for configuring the JDBC connection"},{"type":"text","name":"saveMode","required":false,"parameterType":"String","description":"The value for the mode option. Defaulted to Overwrite"}],"engineMeta":{"spark":"JDBCSteps.writeWithJDBCOptions","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"77ffcd02-fbd0-4f79-9b35-ac9dc5fb7190","displayName":"Write DataFrame to table","description":"This step will write a DataFrame to a table using the provided properties","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to be written"},{"type":"text","name":"url","required":true,"parameterType":"String","description":"A valid jdbc url"},{"type":"text","name":"table","required":true,"parameterType":"String","description":"A table name or subquery"},{"type":"text","name":"connectionProperties","required":false,"parameterType":"Map[String,String]","description":"Optional properties for the jdbc connection"},{"type":"text","name":"saveMode","required":false,"parameterType":"String","description":"The value for the mode option. Defaulted to Overwrite"}],"engineMeta":{"spark":"JDBCSteps.writeWithProperties","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"3d6b77a1-52c2-49ba-99a0-7ec773dac696","displayName":"Write DataFrame to JDBC table","description":"This step will write a DataFrame to a table using the provided JDBCDataFrameWriterOptions","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to be written"},{"type":"object","name":"jDBCStepsOptions","required":true,"className":"com.acxiom.pipeline.steps.JDBCDataFrameWriterOptions","parameterType":"com.acxiom.pipeline.steps.JDBCDataFrameWriterOptions","description":"Options for the JDBC connect and spark DataFrameWriter"}],"engineMeta":{"spark":"JDBCSteps.writeWithStepOptions","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"713fff3d-d407-4970-89ae-7844e6fc60e3","displayName":"Get JDBC Connection","description":"Get a jdbc connection.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"url","required":true,"parameterType":"String","description":"A valid jdbc url"},{"type":"text","name":"properties","required":false,"parameterType":"Map[String,String]","description":"Optional properties for the jdbc connection"}],"engineMeta":{"spark":"JDBCSteps.getConnection","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"java.sql.Connection"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"549828be-3d96-4561-bf94-7ad420f9d203","displayName":"Execute Sql","description":"Execute a sql command using jdbc.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"sql","required":true,"parameterType":"String","description":"Sql command to execute"},{"type":"text","name":"connection","required":true,"parameterType":"java.sql.Connection","description":"An open jdbc connection"},{"type":"text","name":"parameters","required":false,"parameterType":"List[Any]","description":"Optional list of bind variables"}],"engineMeta":{"spark":"JDBCSteps.executeSql","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"9c8957a3-899e-4f32-830e-d120b1917aa1","displayName":"Close JDBC Connection","description":"Close a JDBC Connection.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"connection","required":true,"parameterType":"java.sql.Connection","description":"An open jdbc connection"}],"engineMeta":{"spark":"JDBCSteps.closeConnection","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"3464dc85-5111-40fc-9bfb-1fd6fc8a2c17","displayName":"Convert JSON String to Map","description":"This step will convert the provided JSON string into a Map that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"jsonString","required":true,"parameterType":"String","description":"The JSON string to convert to a map"},{"type":"text","name":"formats","required":false,"parameterType":"org.json4s.Formats","description":"Json4s Formats object that will override the pipeline context formats"}],"engineMeta":{"spark":"JSONSteps.jsonStringToMap","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Map[String,Any]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"f4d19691-779b-4962-a52b-ee5d9a99068e","displayName":"Convert JSON Map to JSON String","description":"This step will convert the provided JSON map into a JSON string that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"jsonMap","required":true,"parameterType":"Map[String,Any]","description":"The JSON map to convert to a JSON string"},{"type":"text","name":"formats","required":false,"parameterType":"org.json4s.Formats","description":"Json4s Formats object that will override the pipeline context formats"}],"engineMeta":{"spark":"JSONSteps.jsonMapToString","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"1f23eb37-98ee-43c2-ac78-17b04db3cc8d","displayName":"Convert object to JSON String","description":"This step will convert the provided object into a JSON string that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"obj","required":true,"parameterType":"AnyRef","description":"The object to convert to a JSON string"},{"type":"text","name":"formats","required":false,"parameterType":"org.json4s.Formats","description":"Json4s Formats object that will override the pipeline context formats"}],"engineMeta":{"spark":"JSONSteps.objectToJsonString","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"880c5151-f7cd-40bb-99f2-06dbb20a6523","displayName":"Convert JSON String to object","description":"This step will convert the provided JSON string into an object that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"jsonString","required":true,"parameterType":"String","description":"The JSON string to convert to an object"},{"type":"text","name":"objectName","required":true,"parameterType":"String","description":"The fully qualified class name of the object"},{"type":"text","name":"formats","required":false,"parameterType":"org.json4s.Formats","description":"Json4s Formats object that will override the pipeline context formats"}],"engineMeta":{"spark":"JSONSteps.jsonStringToObject","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Any"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"68958a29-aab5-4f7e-9ffd-af99c33c512b","displayName":"Convert JSON String to Schema","description":"This step will convert the provided JSON string into a Schema that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"schema","required":true,"parameterType":"String","description":"The JSON string to convert to a Schema"},{"type":"text","name":"formats","required":false,"parameterType":"org.json4s.Formats","description":"Json4s Formats object that will override the pipeline context formats"}],"engineMeta":{"spark":"JSONSteps.jsonStringToSchema","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.steps.Schema"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"cf4e9e6c-98d6-4a14-ae74-52322782c504","displayName":"Convert JSON String to DataFrame","description":"This step will convert the provided JSON string into a DataFrame that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"jsonString","required":true,"parameterType":"String","description":"The JSON string to convert to a DataFrame"}],"engineMeta":{"spark":"JSONSteps.jsonStringToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"d5cd835e-5e8f-49c0-9706-746d5a4d7b3a","displayName":"Convert JSON String Dataset to DataFrame","description":"This step will convert the provided JSON string Dataset into a DataFrame that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"dataset","required":true,"parameterType":"org.apache.spark.sql.Dataset[String]","description":"The dataset containing JSON strings"},{"type":"object","name":"dataFrameReaderOptions","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The JSON parsing options"}],"engineMeta":{"spark":"JSONSteps.jsonDatasetToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"f3891201-5138-4cab-aebc-bcc319228543","displayName":"Build JSON4S Formats","description":"This step will build a json4s Formats object that can be used to override the default","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"customSerializers","required":false,"parameterType":"List[com.acxiom.pipeline.applications.ClassInfo]","description":"List of custom serializer classes"},{"type":"text","name":"enumIdSerializers","required":false,"parameterType":"List[com.acxiom.pipeline.applications.ClassInfo]","description":"List of Enumeration classes to serialize by id"},{"type":"text","name":"enumNameSerializers","required":false,"parameterType":"List[com.acxiom.pipeline.applications.ClassInfo]","description":"List of Enumeration classes to serialize by name"}],"engineMeta":{"spark":"JSONSteps.buildJsonFormats","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.json4s.Formats"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"b5485d97-d4e8-41a6-8af7-9ce79a435140","displayName":"To String","description":"Returns the result of the toString method, can unwrap options","type":"Pipeline","category":"String","params":[{"type":"text","name":"value","required":true,"parameterType":"Any","description":"The value to convert"},{"type":"boolean","name":"unwrapOption","required":false,"parameterType":"Boolean","description":"Boolean indicating whether to unwrap the value from an Option prior to calling toString"}],"engineMeta":{"spark":"StringSteps.toString","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"78e817ec-2bf2-4cbe-acba-e5bc9bdcffc5","displayName":"List To String","description":"Returns the result of the mkString method","type":"Pipeline","category":"String","params":[{"type":"text","name":"list","required":true,"parameterType":"List[Any]","description":"The list to convert"},{"type":"text","name":"separator","required":false,"parameterType":"String","description":"Separator character to use when making the string"},{"type":"boolean","name":"unwrapOptions","required":false,"parameterType":"Boolean","description":"Boolean indicating whether to unwrap each value from an Option"}],"engineMeta":{"spark":"StringSteps.listToString","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"fcd6b5fe-08ed-4cfd-acfe-eb676d7f4ecd","displayName":"To Lowercase","description":"Returns a lowercase string","type":"Pipeline","category":"String","params":[{"type":"text","name":"value","required":true,"parameterType":"String","description":"The value to lowercase"}],"engineMeta":{"spark":"StringSteps.toLowerCase","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"2f31ebf1-4ae2-4e04-9b29-4802cac8a198","displayName":"To Uppercase","description":"Returns an uppercase string","type":"Pipeline","category":"String","params":[{"type":"text","name":"value","required":true,"parameterType":"String","description":"The value to uppercase"}],"engineMeta":{"spark":"StringSteps.toUpperCase","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"96b7b521-5304-4e63-8435-63d84a358368","displayName":"String Split","description":"Returns a list of strings split off of the given string","type":"Pipeline","category":"String","params":[{"type":"text","name":"string","required":true,"parameterType":"String","description":"The string to split"},{"type":"text","name":"regex","required":true,"parameterType":"String","description":"Regex to use when splitting the string"},{"type":"integer","name":"limit","required":false,"parameterType":"Int","description":"Max number elements to return in the list"}],"engineMeta":{"spark":"StringSteps.stringSplit","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"List[String]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"f75abedd-4aee-4979-8d56-ea7b0c1a86e1","displayName":"Substring","description":"Returns a substring","type":"Pipeline","category":"String","params":[{"type":"text","name":"string","required":true,"parameterType":"String","description":"The string to parse"},{"type":"text","name":"begin","required":true,"parameterType":"Int","description":"The beginning index"},{"type":"integer","name":"end","required":false,"parameterType":"Int","description":"The end index"}],"engineMeta":{"spark":"StringSteps.substring","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"3fabf9ec-5383-4eb3-81af-6092ab7c370d","displayName":"String Equals","description":"Return whether string1 equals string2","type":"branch","category":"Decision","params":[{"type":"text","name":"string","required":true,"parameterType":"String","description":"The string to compare"},{"type":"text","name":"anotherString","required":true,"parameterType":"String","description":"The other string to compare"},{"type":"boolean","name":"caseInsensitive","required":false,"parameterType":"Boolean","description":"Boolean flag to indicate case sensitive compare"},{"type":"result","name":"true","required":false},{"type":"result","name":"false","required":false}],"engineMeta":{"spark":"StringSteps.stringEquals","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"ff0562f5-2917-406d-aa78-c5d49ba6b99f","displayName":"String Matches","description":"Return whether string matches a given regex","type":"branch","category":"Decision","params":[{"type":"text","name":"string","required":true,"parameterType":"String","description":"The string to match"},{"type":"text","name":"regex","required":true,"parameterType":"String","description":"Regex to use for the match"},{"type":"result","name":"true","required":false},{"type":"result","name":"false","required":false}],"engineMeta":{"spark":"StringSteps.stringMatches","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"416baf4e-a1dd-49fc-83a9-0f41b77e57b7","displayName":"String Replace All","description":"Perform a literal or regex replacement on a string","type":"pipeline","category":"String","params":[{"type":"text","name":"string","required":true,"parameterType":"String","description":"The string to modify"},{"type":"text","name":"matchString","required":true,"parameterType":"String","description":"The string to match"},{"type":"text","name":"replacement","required":false,"parameterType":"String","description":"The replacement string"},{"type":"boolean","name":"literal","required":false,"parameterType":"Boolean","description":"Perform \\'literal\\' match replacement"}],"engineMeta":{"spark":"StringSteps.stringReplaceAll","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"95438b82-8d50-41da-8094-c92449b9e7df","displayName":"String Replace First","description":"Perform a literal or regex replacement on the first occurrence in a string","type":"pipeline","category":"String","params":[{"type":"text","name":"string","required":true,"parameterType":"String","description":"The string to modify"},{"type":"text","name":"matchString","required":true,"parameterType":"String","description":"The string to match"},{"type":"text","name":"replacement","required":false,"parameterType":"String","description":"The replacement string"},{"type":"boolean","name":"literal","required":false,"parameterType":"Boolean","description":"Perform \\'literal\\' match replacement"}],"engineMeta":{"spark":"StringSteps.stringReplaceFirst","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"86c84fa3-ad45-4a49-ac05-92385b8e9572","displayName":"Get Credential","description":"This step provides access to credentials through the CredentialProvider","type":"Pipeline","category":"Credentials","params":[{"type":"text","name":"credentialName","required":true,"parameterType":"String","description":"The dataset containing CSV strings"}],"engineMeta":{"spark":"CredentialSteps.getCredential","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.Credential"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"219c787a-f502-4efc-b15d-5beeff661fc0","displayName":"Map a DataFrame to an existing DataFrame","description":"This step maps a new DataFrame to an existing DataFrame to make them compatible","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"inputDataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame that needs to be modified"},{"type":"text","name":"destinationDataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame that the new data needs to map to"},{"type":"object","name":"transforms","required":true,"className":"com.acxiom.pipeline.steps.Transformations","parameterType":"com.acxiom.pipeline.steps.Transformations","description":"The object with transform, alias, and filter logic details"},{"type":"boolean","name":"addNewColumns","required":false,"parameterType":"Boolean"}],"engineMeta":{"spark":"TransformationSteps.mapToDestinationDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"8f9c08ea-4882-4265-bac7-2da3e942758f","displayName":"Map a DataFrame to a pre-defined Schema","description":"This step maps a new DataFrame to a pre-defined spark schema","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"inputDataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame that needs to be modified"},{"type":"object","name":"destinationSchema","required":true,"className":"com.acxiom.pipeline.steps.Schema","parameterType":"com.acxiom.pipeline.steps.Schema","description":"The schema that the new data should map to"},{"type":"object","name":"transforms","required":true,"className":"com.acxiom.pipeline.steps.Transformations","parameterType":"com.acxiom.pipeline.steps.Transformations","description":"The object with transform, alias, and filter logic details"},{"type":"boolean","name":"addNewColumns","required":false,"parameterType":"Boolean"}],"engineMeta":{"spark":"TransformationSteps.mapDataFrameToSchema","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"3ee74590-9131-43e1-8ee8-ad320482a592","displayName":"Merge a DataFrame to an existing DataFrame","description":"This step merges two DataFrames to create a single DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"inputDataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The first DataFrame"},{"type":"text","name":"destinationDataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The second DataFrame used as the driver"},{"type":"object","name":"transforms","required":true,"className":"com.acxiom.pipeline.steps.Transformations","parameterType":"com.acxiom.pipeline.steps.Transformations","description":"The object with transform, alias, and filter logic details"},{"type":"boolean","name":"addNewColumns","required":false,"parameterType":"Boolean"},{"type":"boolean","name":"distinct","required":false,"defaultValue":"true","parameterType":"Boolean","description":"Flag to determine whether a distinct union should be performed"}],"engineMeta":{"spark":"TransformationSteps.mergeDataFrames","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"ac3dafe4-e6ee-45c9-8fc6-fa7f918cf4f2","displayName":"Modify or Create Columns using Transforms Provided","description":"This step transforms existing columns and/or adds new columns to an existing dataframe using expressions provided","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The input DataFrame"},{"type":"object","name":"transforms","required":true,"className":"com.acxiom.pipeline.steps.Transformations","parameterType":"com.acxiom.pipeline.steps.Transformations"}],"engineMeta":{"spark":"TransformationSteps.applyTransforms","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"3e2da5a8-387d-49b1-be22-c03764fb0fde","displayName":"Select Expressions","description":"Select each provided expresion from a DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to select from"},{"type":"text","name":"expressions","required":true,"parameterType":"List[String]","description":"List of expressions to select"}],"engineMeta":{"spark":"TransformationSteps.selectExpressions","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"1e0a234a-8ae5-4627-be6d-3052b33d9014","displayName":"Add Column","description":"Add a new column to a DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to add to"},{"type":"text","name":"columnName","required":true,"parameterType":"String","description":"The name of the new column"},{"type":"text","name":"expression","required":true,"parameterType":"String","description":"The expression used for the column"},{"type":"boolean","name":"standardizeColumnName","required":false,"parameterType":"Boolean"}],"engineMeta":{"spark":"TransformationSteps.addColumn","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"08c9c5a9-a10d-477e-a702-19bd24889d1e","displayName":"Add Columns","description":"Add multiple new columns to a DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to add to"},{"type":"text","name":"columns","required":true,"parameterType":"Map[String,String]","description":"A map of column names and expressions"},{"type":"boolean","name":"standardizeColumnNames","required":false,"parameterType":"Boolean"}],"engineMeta":{"spark":"TransformationSteps.addColumns","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"42c328ac-a6bd-49ca-b597-b706956d294c","displayName":"Flatten a DataFrame","description":"This step will flatten all nested fields contained in a DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to flatten"},{"type":"text","name":"separator","required":false,"defaultValue":"_","parameterType":"String","description":"Separator to place between nested field names"},{"type":"text","name":"fieldList","required":false,"parameterType":"List[String]","description":"List of fields to flatten. Will flatten all fields if left empty"},{"type":"integer","name":"depth","required":false,"parameterType":"Int","description":"How deep should we traverse when flattening."}],"engineMeta":{"spark":"TransformationSteps.flattenDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[_]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"a981080d-714c-4d36-8b09-d95842ec5655","displayName":"Standardize Column Names on a DataFrame","description":"This step will standardize columns names on existing DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.DataFrame"}],"engineMeta":{"spark":"TransformationSteps.standardizeColumnNames","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"541c4f7d-3524-4d53-bbd9-9f2cfd9d1bd1","displayName":"Save a Dataframe to a TempView","description":"This step stores an existing dataframe to a TempView to be used in future queries in the session","type":"Pipeline","category":"Query","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The dataframe to store"},{"type":"text","name":"viewName","required":false,"parameterType":"String","description":"The name of the view to create (optional, random name will be created if not provided)"}],"engineMeta":{"spark":"QuerySteps.dataFrameToTempView","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"71b71ef3-eaa7-4a1f-b3f3-603a1a54846d","displayName":"Create a TempView from a Query","description":"This step runs a SQL statement against existing TempViews from this session and returns a new TempView","type":"Pipeline","category":"Query","params":[{"type":"script","name":"query","required":true,"language":"sql","parameterType":"String","description":"The query to run (all tables referenced must exist as TempViews created in this session)"},{"type":"text","name":"variableMap","required":false,"parameterType":"Map[String,String]","description":"The key/value pairs to be used in variable replacement in the query"},{"type":"text","name":"viewName","required":false,"parameterType":"String","description":"The name of the view to create (optional, random name will be created if not provided)"}],"engineMeta":{"spark":"QuerySteps.queryToTempView","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"61378ed6-8a4f-4e6d-9c92-6863c9503a54","displayName":"Create a DataFrame from a Query","description":"This step runs a SQL statement against existing TempViews from this session and returns a new DataFrame","type":"Pipeline","category":"Query","params":[{"type":"script","name":"query","required":true,"language":"sql","parameterType":"String","description":"The query to run (all tables referenced must exist as TempViews created in this session)"},{"type":"text","name":"variableMap","required":false,"parameterType":"Map[String,String]","description":"The key/value pairs to be used in variable replacement in the query"}],"engineMeta":{"spark":"QuerySteps.queryToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"57b0e491-e09b-4428-aab2-cebe1f217eda","displayName":"Create a DataFrame from an Existing TempView","description":"This step pulls an existing TempView from this session into a new DataFrame","type":"Pipeline","category":"Query","params":[{"type":"text","name":"viewName","required":false,"parameterType":"String","description":"The name of the view to use"}],"engineMeta":{"spark":"QuerySteps.tempViewToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"648f27aa-6e3b-44ed-a093-bc284783731b","displayName":"Create a TempView from a DataFrame Query","description":"This step runs a SQL statement against an existing DataFrame from this session and returns a new TempView","type":"Pipeline","category":"Query","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The dataframe to query"},{"type":"script","name":"query","required":true,"language":"sql","parameterType":"String","description":"The query to run (all tables referenced must exist as TempViews created in this session)"},{"type":"text","name":"variableMap","required":false,"parameterType":"Map[String,String]","description":"The key/value pairs to be used in variable replacement in the query"},{"type":"text","name":"inputViewName","required":true,"parameterType":"String","description":"The name to use when creating the view representing the input dataframe (same name used in query)"},{"type":"text","name":"outputViewName","required":false,"parameterType":"String","description":"The name of the view to create (optional, random name will be created if not provided)"}],"engineMeta":{"spark":"QuerySteps.dataFrameQueryToTempView","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"dfb8a387-6245-4b1c-ae6c-94067eb83962","displayName":"Create a DataFrame from a DataFrame Query","description":"This step runs a SQL statement against an existing DataFrame from this session and returns a new DataFrame","type":"Pipeline","category":"Query","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The dataframe to query"},{"type":"script","name":"query","required":true,"language":"sql","parameterType":"String","description":"The query to run (all tables referenced must exist as TempViews created in this session)"},{"type":"text","name":"variableMap","required":false,"parameterType":"Map[String,String]","description":"The key/value pairs to be used in variable replacement in the query"},{"type":"text","name":"inputViewName","required":true,"parameterType":"String","description":"The name to use when creating the view representing the input dataframe (same name used in query)"}],"engineMeta":{"spark":"QuerySteps.dataFrameQueryToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"c88de095-14e0-4c67-8537-0325127e2bd2","displayName":"Cache an exising TempView","description":"This step will cache an existing TempView","type":"Pipeline","category":"Query","params":[{"type":"text","name":"viewName","required":false,"parameterType":"String","description":"The name of the view to cache"}],"engineMeta":{"spark":"QuerySteps.cacheTempView","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"0342654c-2722-56fe-ba22-e342169545af","displayName":"Copy (auto buffering)","description":"Copy the contents of the source path to the destination path. This function will call connect on both FileManagers.","type":"Pipeline","category":"FileManager","params":[{"type":"text","name":"srcFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The source FileManager"},{"type":"text","name":"srcPath","required":true,"parameterType":"String","description":"The path to copy from"},{"type":"text","name":"destFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The destination FileManager"},{"type":"text","name":"destPath","required":true,"parameterType":"String","description":"The path to copy to"}],"engineMeta":{"spark":"FileManagerSteps.copy","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.steps.CopyResults"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"c40169a3-1e77-51ab-9e0a-3f24fb98beef","displayName":"Copy (basic buffering)","description":"Copy the contents of the source path to the destination path using buffer sizes. This function will call connect on both FileManagers.","type":"Pipeline","category":"FileManager","params":[{"type":"text","name":"srcFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The source FileManager"},{"type":"text","name":"srcPath","required":true,"parameterType":"String","description":"The path to copy from"},{"type":"text","name":"destFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The destination FileManager"},{"type":"text","name":"destPath","required":true,"parameterType":"String","description":"The path to copy to"},{"type":"text","name":"inputBufferSize","required":true,"parameterType":"Int","description":"The size of the buffer to use for reading data during copy"},{"type":"text","name":"outputBufferSize","required":true,"parameterType":"Int","description":"The size of the buffer to use for writing data during copy"}],"engineMeta":{"spark":"FileManagerSteps.copy","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.steps.CopyResults"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"f5a24db0-e91b-5c88-8e67-ab5cff09c883","displayName":"Copy (advanced buffering)","description":"Copy the contents of the source path to the destination path using full buffer sizes. This function will call connect on both FileManagers.","type":"Pipeline","category":"FileManager","params":[{"type":"text","name":"srcFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The source FileManager"},{"type":"text","name":"srcPath","required":true,"parameterType":"String","description":"The path to copy from"},{"type":"text","name":"destFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The destination FileManager"},{"type":"text","name":"destPath","required":true,"parameterType":"String","description":"The path to copy to"},{"type":"text","name":"inputBufferSize","required":true,"parameterType":"Int","description":"The size of the buffer to use for reading data during copy"},{"type":"text","name":"outputBufferSize","required":true,"parameterType":"Int","description":"The size of the buffer to use for writing data during copy"},{"type":"text","name":"copyBufferSize","required":true,"parameterType":"Int","description":"The intermediate buffer size to use during copy"}],"engineMeta":{"spark":"FileManagerSteps.copy","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.steps.CopyResults"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"1af68ab5-a3fe-4afb-b5fa-34e52f7c77f5","displayName":"Compare File Sizes","description":"Compare the file sizes of the source and destination paths","type":"Pipeline","category":"FileManager","params":[{"type":"text","name":"srcFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The source FileManager"},{"type":"text","name":"srcPath","required":true,"parameterType":"String","description":"The path to the source"},{"type":"text","name":"destFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The destination FileManager"},{"type":"text","name":"destPath","required":true,"parameterType":"String","description":"The path to th destination"}],"engineMeta":{"spark":"FileManagerSteps.compareFileSizes","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Int"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"bf2c4df8-a215-480b-87d8-586984e04189","displayName":"Delete (file)","description":"Delete a file","type":"Pipeline","category":"FileManager","params":[{"type":"text","name":"fileManager","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The FileManager"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to the file being deleted"}],"engineMeta":{"spark":"FileManagerSteps.deleteFile","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"3d1e8519-690c-55f0-bd05-1e7b97fb6633","displayName":"Disconnect a FileManager","description":"Disconnects a FileManager from the underlying file system","type":"Pipeline","category":"FileManager","params":[{"type":"text","name":"fileManager","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The file manager to disconnect"}],"engineMeta":{"spark":"FileManagerSteps.disconnectFileManager","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"259a880a-3e12-4843-9f02-2cfc2a05f576","displayName":"Create a FileManager","description":"Creates a FileManager using the provided FileConnector","type":"Pipeline","category":"Connectors","params":[{"type":"text","name":"fileConnector","required":true,"parameterType":"com.acxiom.pipeline.connectors.FileConnector","description":"The FileConnector to use to create the FileManager implementation"}],"engineMeta":{"spark":"FileManagerSteps.getFileManager","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.fs.FileManager"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"9d467cb0-8b3d-40a0-9ccd-9cf8c5b6cb38","displayName":"Create SFTP FileManager","description":"Simple function to generate the SFTPFileManager for the remote SFTP file system","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"hostName","required":true,"parameterType":"String","description":"The name of the host to connect"},{"type":"text","name":"username","required":false,"parameterType":"String","description":"The username used for connection"},{"type":"text","name":"password","required":false,"parameterType":"String","description":"The password used for connection"},{"type":"integer","name":"port","required":false,"parameterType":"Int","description":"The optional port if other than 22"},{"type":"boolean","name":"strictHostChecking","required":false,"parameterType":"Boolean","description":"Option to automatically add keys to the known_hosts file. Default is false."}],"engineMeta":{"spark":"SFTPSteps.createFileManager","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.fs.SFTPFileManager"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"22fcc0e7-0190-461c-a999-9116b77d5919","displayName":"Build a DataFrameReader Object","description":"This step will build a DataFrameReader object that can be used to read a file into a dataframe","type":"Pipeline","category":"InputOutput","params":[{"type":"object","name":"dataFrameReaderOptions","required":true,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The options to use when loading the DataFrameReader"}],"engineMeta":{"spark":"DataFrameSteps.getDataFrameReader","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrameReader"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"66a451c8-ffbd-4481-9c37-71777c3a240f","displayName":"Load Using DataFrameReader","description":"This step will load a DataFrame given a dataFrameReader.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrameReader","required":true,"parameterType":"org.apache.spark.sql.DataFrameReader","description":"The DataFrameReader to use when creating the DataFrame"}],"engineMeta":{"spark":"DataFrameSteps.load","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"d7cf27e6-9ca5-4a73-a1b3-d007499f235f","displayName":"Load DataFrame","description":"This step will load a DataFrame given a DataFrameReaderOptions object.","type":"Pipeline","category":"InputOutput","params":[{"type":"object","name":"dataFrameReaderOptions","required":true,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The DataFrameReaderOptions to use when creating the DataFrame"}],"engineMeta":{"spark":"DataFrameSteps.loadDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"8a00dcf8-e6a9-4833-871e-c1f3397ab378","displayName":"Build a DataFrameWriter Object","description":"This step will build a DataFrameWriter object that can be used to write a file into a dataframe","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to use when creating the DataFrameWriter"},{"type":"object","name":"options","required":true,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameWriterOptions","description":"The DataFrameWriterOptions to use when writing the DataFrame"}],"engineMeta":{"spark":"DataFrameSteps.getDataFrameWriter","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrameWriter[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"9aa6ae9f-cbeb-4b36-ba6a-02eee0a46558","displayName":"Save Using DataFrameWriter","description":"This step will save a DataFrame given a dataFrameWriter[Row].","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrameWriter","required":true,"parameterType":"org.apache.spark.sql.DataFrameWriter[_]","description":"The DataFrameWriter to use when saving"}],"engineMeta":{"spark":"DataFrameSteps.save","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"e5ac3671-ee10-4d4e-8206-fec7effdf7b9","displayName":"Save DataFrame","description":"This step will save a DataFrame given a DataFrameWriterOptions object.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to save"},{"type":"object","name":"dataFrameWriterOptions","required":true,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameWriterOptions","description":"The DataFrameWriterOptions to use for saving"}],"engineMeta":{"spark":"DataFrameSteps.saveDataFrame","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"fa05a970-476d-4617-be4d-950cfa65f2f8","displayName":"Persist DataFrame","description":"Persist a DataFrame to provided storage level.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to persist"},{"type":"text","name":"storageLevel","required":false,"parameterType":"String","description":"The optional storage mechanism to use when persisting the DataFrame"}],"engineMeta":{"spark":"DataFrameSteps.persistDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"e6fe074e-a1fa-476f-9569-d37295062186","displayName":"Unpersist DataFrame","description":"Unpersist a DataFrame.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to unpersist"},{"type":"boolean","name":"blocking","required":false,"parameterType":"Boolean","description":"Optional flag to indicate whether to block while unpersisting"}],"engineMeta":{"spark":"DataFrameSteps.unpersistDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"71323226-bcfd-4fa1-bf9e-24e455e41144","displayName":"RepartitionDataFrame","description":"Repartition a DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to repartition"},{"type":"text","name":"partitions","required":true,"parameterType":"Int","description":"The number of partitions to use"},{"type":"boolean","name":"rangePartition","required":false,"parameterType":"Boolean","description":"Flag indicating whether to repartition by range. This takes precedent over the shuffle flag"},{"type":"boolean","name":"shuffle","required":false,"parameterType":"Boolean","description":"Flag indicating whether to perform a normal partition"},{"type":"text","name":"partitionExpressions","required":false,"parameterType":"List[String]","description":"The partition expressions to use"}],"engineMeta":{"spark":"DataFrameSteps.repartitionDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"5e0358a0-d567-5508-af61-c35a69286e4e","displayName":"Javascript Step","description":"Executes a script and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"script","name":"script","required":true,"language":"javascript","parameterType":"String","description":"Javascript to execute"}],"engineMeta":{"spark":"JavascriptSteps.processScript","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"570c9a80-8bd1-5f0c-9ae0-605921fe51e2","displayName":"Javascript Step with single object provided","description":"Executes a script with single object provided and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"script","name":"script","required":true,"language":"javascript","parameterType":"String","description":"Javascript script to execute"},{"type":"text","name":"value","required":true,"parameterType":"Any","description":"Value to bind to the script"}],"engineMeta":{"spark":"JavascriptSteps.processScriptWithValue","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]},{"id":"f92d4816-3c62-4c29-b420-f00994bfcd86","displayName":"Javascript Step with map of objects provided","description":"Executes a script with map of objects provided and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"script","name":"script","required":true,"language":"javascript","parameterType":"String"},{"type":"text","name":"values","required":true,"parameterType":"Map[String,Any]","description":"Map of name/value pairs to bind to the script"},{"type":"boolean","name":"unwrapOptions","required":false,"parameterType":"Boolean","description":"Flag to control option unwrapping behavior"}],"engineMeta":{"spark":"JavascriptSteps.processScriptWithValues","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3-SNAPSHOT.jar"]}],"pkgObjs":[{"id":"com.acxiom.pipeline.steps.JDBCDataFrameReaderOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"JDBC Data Frame Reader Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"url\":{\"type\":\"string\"},\"table\":{\"type\":\"string\"},\"predicates\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"readerOptions\":{\"$ref\":\"#/definitions/DataFrameReaderOptions\"}},\"definitions\":{\"DataFrameReaderOptions\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"schema\":{\"$ref\":\"#/definitions/Schema\"}}},\"Schema\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"attributes\":{\"type\":\"array\",\"items\":{\"$ref\":\"#/definitions/Attribute\"}}}},\"Attribute\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"name\":{\"type\":\"string\"},\"dataType\":{\"$ref\":\"#/definitions/AttributeType\"}}},\"AttributeType\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"baseType\":{\"type\":\"string\"},\"valueType\":{\"$ref\":\"#/definitions/AttributeType\"},\"nameType\":{\"$ref\":\"#/definitions/AttributeType\"},\"schema\":{\"$ref\":\"#/definitions/Schema\"}}}}}"},{"id":"com.acxiom.pipeline.steps.DataFrameWriterOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Data Frame Writer Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"saveMode\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"bucketingOptions\":{\"$ref\":\"#/definitions/BucketingOptions\"},\"partitionBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"sortBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"definitions\":{\"BucketingOptions\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"numBuckets\":{\"type\":\"integer\"},\"columns\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"numBuckets\"]}}}","template":"{\"form\":[{\"type\":\"select\",\"key\":\"format\",\"templateOptions\":{\"label\":\"Format\",\"placeholder\":\"\",\"valueProp\":\"value\",\"options\":[{\"value\":\"csv\",\"name\":\"CSV\"},{\"value\":\"json\",\"name\":\"JSON\"},{\"value\":\"parquet\",\"name\":\"Parquet\"},{\"value\":\"orc\",\"name\":\"Orc\"},{\"value\":\"text\",\"name\":\"Text\"}],\"labelProp\":\"name\",\"focus\":false,\"_flatOptions\":true,\"disabled\":false}},{\"key\":\"options.encoding\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Encoding\",\"placeholder\":\"\",\"focus\":false},\"expressionProperties\":{\"templateOptions.disabled\":\"!model.format\"}},{\"key\":\"saveMode\",\"type\":\"select\",\"defaultValue\":false,\"templateOptions\":{\"label\":\"Save Mode\",\"placeholder\":\"\",\"valueProp\":\"value\",\"options\":[{\"value\":\"OVERWRITE\",\"name\":\"Overwrite\"},{\"value\":\"append\",\"name\":\"Append\"},{\"value\":\"ignore\",\"name\":\"Ignore\"},{\"value\":\"error\",\"name\":\"Error\"},{\"value\":\"errorifexists\",\"name\":\"Error If Exists\"}],\"labelProp\":\"name\",\"focus\":false,\"_flatOptions\":true,\"disabled\":false}},{\"key\":\"partitionBy\",\"type\":\"stringArray\",\"templateOptions\":{\"label\":\"Partition By Columns\",\"placeholder\":\"Add column names to use during partitioning\"}},{\"key\":\"sortBy\",\"type\":\"stringArray\",\"templateOptions\":{\"label\":\"Sort By Columns\",\"placeholder\":\"Add column names to use during sorting\"}},{\"key\":\"bucketingOptions\",\"wrappers\":[\"panel\"],\"templateOptions\":{\"label\":\"Bucketing Options\"},\"fieldGroup\":[{\"key\":\"numBuckets\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Number of Buckets\",\"type\":\"number\"}},{\"key\":\"columns\",\"type\":\"stringArray\",\"templateOptions\":{\"label\":\"Bucket Columns\",\"placeholder\":\"Add column names to use during bucketing\"}}]},{\"key\":\"options.sep\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Field Separator\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'csv' ? false : true\"},{\"key\":\"options.lineSep\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Line Separator\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'text' || model.format === 'json' ? false : true\"},{\"key\":\"options.escapeQuotes\",\"hideExpression\":\"model.format === 'csv' ? false : true\",\"type\":\"select\",\"templateOptions\":{\"label\":\"Escape Quotes?\",\"options\":[{\"value\":\"true\",\"name\":\"True\"},{\"value\":\"false\",\"name\":\"False\"}],\"valueProp\":\"value\",\"labelProp\":\"name\"},\"defaultValue\":\"false\"},{\"key\":\"options.quote\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Field Quote\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'csv' ? false : true\"},{\"key\":\"options.escape\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Field Escape Character\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'csv' ? false : true\"}]}"},{"id":"com.acxiom.pipeline.steps.JDBCDataFrameWriterOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"JDBC Data Frame Writer Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"url\":{\"type\":\"string\"},\"table\":{\"type\":\"string\"},\"writerOptions\":{\"$ref\":\"#/definitions/DataFrameWriterOptions\"}},\"definitions\":{\"DataFrameWriterOptions\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"saveMode\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"bucketingOptions\":{\"$ref\":\"#/definitions/BucketingOptions\"},\"partitionBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"sortBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}}},\"BucketingOptions\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"numBuckets\":{\"type\":\"integer\"},\"columns\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"numBuckets\"]}}}"},{"id":"com.acxiom.pipeline.steps.Transformations","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Transformations\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"columnDetails\":{\"type\":\"array\",\"items\":{\"$ref\":\"#/definitions/ColumnDetails\"}},\"filter\":{\"type\":\"string\"},\"standardizeColumnNames\":{}},\"definitions\":{\"ColumnDetails\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"outputField\":{\"type\":\"string\"},\"inputAliases\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"expression\":{\"type\":\"string\"}}}}}","template":"{\"form\":[{\"key\":\"filter\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Filter\"}},{\"key\":\"standardizeColumnNames\",\"type\":\"checkbox\",\"defaultValue\":false,\"templateOptions\":{\"floatLabel\":\"always\",\"align\":\"start\",\"label\":\"Standardize Column Names?\",\"hideFieldUnderline\":true,\"color\":\"accent\",\"placeholder\":\"\",\"focus\":false,\"hideLabel\":true,\"disabled\":false,\"indeterminate\":true}},{\"fieldArray\":{\"fieldGroup\":[{\"key\":\"outputField\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Output Field\"}},{\"key\":\"inputAliases\",\"type\":\"stringArray\",\"templateOptions\":{\"label\":\"Input Alias\"}},{\"key\":\"expression\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Expression (Optional)\"}}]},\"key\":\"columnDetails\",\"wrappers\":[\"panel\"],\"type\":\"repeat\",\"templateOptions\":{\"label\":\"Column Details\"}}]}"},{"id":"com.acxiom.pipeline.steps.DataFrameReaderOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Data Frame Reader Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"schema\":{\"$ref\":\"#/definitions/Schema\"}},\"definitions\":{\"Schema\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"attributes\":{\"type\":\"array\",\"items\":{\"$ref\":\"#/definitions/Attribute\"}}}},\"Attribute\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"name\":{\"type\":\"string\"},\"dataType\":{\"$ref\":\"#/definitions/AttributeType\"}}},\"AttributeType\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"baseType\":{\"type\":\"string\"},\"valueType\":{\"$ref\":\"#/definitions/AttributeType\"},\"nameType\":{\"$ref\":\"#/definitions/AttributeType\"},\"schema\":{\"$ref\":\"#/definitions/Schema\"}}}}}","template":"{\"form\":[{\"type\":\"select\",\"key\":\"format\",\"templateOptions\":{\"label\":\"Format\",\"placeholder\":\"\",\"valueProp\":\"value\",\"options\":[{\"value\":\"csv\",\"name\":\"CSV\"},{\"value\":\"json\",\"name\":\"JSON\"},{\"value\":\"parquet\",\"name\":\"Parquet\"},{\"value\":\"orc\",\"name\":\"Orc\"},{\"value\":\"text\",\"name\":\"Text\"}],\"labelProp\":\"name\",\"focus\":false,\"_flatOptions\":true,\"disabled\":false}},{\"key\":\"options.encoding\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Encoding\",\"placeholder\":\"\",\"focus\":false},\"expressionProperties\":{\"templateOptions.disabled\":\"!model.format\"}},{\"key\":\"options.multiLine\",\"hideExpression\":\"model.format === 'csv' || model.format === 'json' ? false : true\",\"type\":\"select\",\"templateOptions\":{\"label\":\"Multiline?\",\"options\":[{\"value\":\"true\",\"name\":\"True\"},{\"value\":\"false\",\"name\":\"False\"}],\"valueProp\":\"value\",\"labelProp\":\"name\"},\"defaultValue\":\"false\"},{\"key\":\"options.header\",\"hideExpression\":\"model.format === 'csv' ? false : true\",\"type\":\"select\",\"templateOptions\":{\"label\":\"Skip Header?\",\"options\":[{\"value\":\"true\",\"name\":\"True\"},{\"value\":\"false\",\"name\":\"False\"}],\"valueProp\":\"value\",\"labelProp\":\"name\"},\"defaultValue\":\"false\"},{\"key\":\"options.sep\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Field Separator\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'csv' ? false : true\"},{\"key\":\"options.lineSep\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Line Separator\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'text' || model.format === 'json' ? false : true\"},{\"key\":\"options.quote\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Field Quote\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'csv' ? false : true\"},{\"key\":\"options.escape\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Field Escape Character\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'csv' ? false : true\"},{\"key\":\"options.primitivesAsString\",\"hideExpression\":\"model.format === 'json' ? false : true\",\"type\":\"select\",\"templateOptions\":{\"label\":\"Primitive As String?\",\"options\":[{\"value\":\"true\",\"name\":\"True\"},{\"value\":\"false\",\"name\":\"False\"}],\"valueProp\":\"value\",\"labelProp\":\"name\"},\"defaultValue\":\"false\"},{\"key\":\"options.inferSchema\",\"hideExpression\":\"model.format === 'csv' ? false : true\",\"type\":\"select\",\"templateOptions\":{\"label\":\"Infer Schema?\",\"options\":[{\"value\":\"true\",\"name\":\"True\"},{\"value\":\"false\",\"name\":\"False\"}],\"valueProp\":\"value\",\"labelProp\":\"name\"},\"defaultValue\":\"false\"},{\"expressionProperties\":{\"templateOptions.disabled\":\"model.options.inferSchema || model.format === 'json' ? false : true\"},\"key\":\"options.samplingRatio\",\"hideExpression\":\"model.format === 'csv' || model.format === 'json' ? false : true\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Sampling Ration\",\"type\":\"number\"}}]}"},{"id":"com.acxiom.pipeline.steps.Schema","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Schema\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"attributes\":{\"type\":\"array\",\"items\":{\"$ref\":\"#/definitions/Attribute\"}}},\"definitions\":{\"Attribute\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"name\":{\"type\":\"string\"},\"dataType\":{\"$ref\":\"#/definitions/AttributeType\"}}},\"AttributeType\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"baseType\":{\"type\":\"string\"},\"valueType\":{\"$ref\":\"#/definitions/AttributeType\"},\"nameType\":{\"$ref\":\"#/definitions/AttributeType\"},\"schema\":{\"$ref\":\"#/definitions/Schema\"}}},\"Schema\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"attributes\":{\"type\":\"array\",\"items\":{\"$ref\":\"#/definitions/Attribute\"}}}}}}"}]} +{"pkgs":["com.acxiom.pipeline.steps"],"steps":[{"id":"3806f23b-478c-4054-b6c1-37f11db58d38","displayName":"Read a DataFrame from Table","description":"This step will read a dataFrame in a given format from the meta store","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"table","required":true,"parameterType":"String","description":"The name of the table to read"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The DataFrameReaderOptions to use"}],"engineMeta":{"spark":"CatalogSteps.readDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"e2b4c011-e71b-46f9-a8be-cf937abc2ec4","displayName":"Write DataFrame to Table","description":"This step will write a dataFrame in a given format to the meta store","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to write"},{"type":"text","name":"table","required":true,"parameterType":"String","description":"The name of the table to write to"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameWriterOptions","description":"The DataFrameWriterOptions to use"}],"engineMeta":{"spark":"CatalogSteps.writeDataFrame","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"5874ab64-13c7-404c-8a4f-67ff3b0bc7cf","displayName":"Drop Catalog Object","description":"This step will drop an object from the meta store","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"name","required":true,"parameterType":"String","description":"Name of the object to drop"},{"type":"text","name":"objectType","required":false,"defaultValue":"TABLE","parameterType":"String","description":"Type of object to drop"},{"type":"boolean","name":"ifExists","required":false,"defaultValue":"false","parameterType":"Boolean","description":"Flag to control whether existence is checked"},{"type":"boolean","name":"cascade","required":false,"defaultValue":"false","parameterType":"Boolean","description":"Flag to control whether this deletion should cascade"}],"engineMeta":{"spark":"CatalogSteps.drop","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"17be71f9-1492-4404-a355-1cc973694cad","displayName":"Database Exists","description":"Check spark catalog for a database with the given name.","type":"branch","category":"Decision","params":[{"type":"text","name":"name","required":true,"parameterType":"String","description":"Name of the database"},{"type":"result","name":"true","required":false},{"type":"result","name":"false","required":false}],"engineMeta":{"spark":"CatalogSteps.databaseExists","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"95181811-d83e-4136-bedb-2cba1de90301","displayName":"Table Exists","description":"Check spark catalog for a table with the given name.","type":"branch","category":"Decision","params":[{"type":"text","name":"name","required":true,"parameterType":"String","description":"Name of the table"},{"type":"text","name":"database","required":false,"parameterType":"String","description":"Name of the database"},{"type":"result","name":"true","required":false},{"type":"result","name":"false","required":false}],"engineMeta":{"spark":"CatalogSteps.tableExists","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"f4adfe70-2ae3-4b8d-85d1-f53e91c8dfad","displayName":"Set Current Database","description":"Set the current default database for the spark session.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"name","required":true,"parameterType":"String","description":"Name of the database"}],"engineMeta":{"spark":"CatalogSteps.setCurrentDatabase","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"663f8c93-0a42-4c43-8263-33f89c498760","displayName":"Create Table","description":"Create a table in the meta store.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"name","required":true,"parameterType":"String","description":"Name of the table"},{"type":"text","name":"externalPath","required":false,"parameterType":"String","description":"Path of the external table"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"Options containing the format, schema, and settings"}],"engineMeta":{"spark":"CatalogSteps.createTable","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"836aab38-1140-4606-ab73-5b6744f0e7e7","displayName":"Load","description":"This step will create a DataFrame using the given DataConnector","type":"Pipeline","category":"Connectors","params":[{"type":"text","name":"connector","required":true,"parameterType":"com.acxiom.pipeline.connectors.DataConnector","description":"The data connector to use when writing"},{"type":"text","name":"source","required":false,"parameterType":"String","description":"The source path to load data"},{"type":"object","name":"readOptions","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The optional options to use while reading the data"}],"engineMeta":{"spark":"DataConnectorSteps.loadDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"5608eba7-e9ff-48e6-af77-b5e810b99d89","displayName":"Write","description":"This step will write a DataFrame using the given DataConnector","type":"Pipeline","category":"Connectors","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.DataFrame","description":"The DataFrame to write"},{"type":"text","name":"connector","required":true,"parameterType":"com.acxiom.pipeline.connectors.DataConnector","description":"The data connector to use when writing"},{"type":"text","name":"destination","required":false,"parameterType":"String","description":"The destination path to write data"},{"type":"object","name":"writeOptions","required":false,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameWriterOptions","description":"The optional DataFrame options to use while writing"}],"engineMeta":{"spark":"DataConnectorSteps.writeDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.streaming.StreamingQuery"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"87db259d-606e-46eb-b723-82923349640f","displayName":"Load DataFrame from HDFS path","description":"This step will read a dataFrame from the given HDFS path","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"path","required":true,"parameterType":"String","description":"The HDFS path to load data into the DataFrame"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The options to use when loading the DataFrameReader"}],"engineMeta":{"spark":"HDFSSteps.readFromPath","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"8daea683-ecde-44ce-988e-41630d251cb8","displayName":"Load DataFrame from HDFS paths","description":"This step will read a dataFrame from the given HDFS paths","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"paths","required":true,"parameterType":"List[String]","description":"The HDFS paths to load data into the DataFrame"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The options to use when loading the DataFrameReader"}],"engineMeta":{"spark":"HDFSSteps.readFromPaths","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"0a296858-e8b7-43dd-9f55-88d00a7cd8fa","displayName":"Write DataFrame to HDFS","description":"This step will write a dataFrame in a given format to HDFS","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to write"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The GCS path to write data"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameWriterOptions","description":"The optional DataFrame Options"}],"engineMeta":{"spark":"HDFSSteps.writeToPath","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"e4dad367-a506-5afd-86c0-82c2cf5cd15c","displayName":"Create HDFS FileManager","description":"Simple function to generate the HDFSFileManager for the local HDFS file system","type":"Pipeline","category":"InputOutput","params":[],"engineMeta":{"spark":"HDFSSteps.createFileManager","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.fs.HDFSFileManager"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"a7e17c9d-6956-4be0-a602-5b5db4d1c08b","displayName":"Scala script Step","description":"Executes a script and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"script","name":"script","required":true,"language":"scala","parameterType":"String","description":"A scala script to execute"}],"engineMeta":{"spark":"ScalaSteps.processScript","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"8bf8cef6-cf32-4d85-99f4-e4687a142f84","displayName":"Scala script Step with additional object provided","description":"Executes a script with the provided object and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"script","name":"script","required":true,"language":"scala","parameterType":"String","description":"A scala script to execute"},{"type":"text","name":"value","required":true,"parameterType":"Any","description":"A value to pass to the script"},{"type":"text","name":"type","required":false,"parameterType":"String","description":"The type of the value to pass to the script"}],"engineMeta":{"spark":"ScalaSteps.processScriptWithValue","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"3ab721e8-0075-4418-aef1-26abdf3041be","displayName":"Scala script Step with additional objects provided","description":"Executes a script with the provided object and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"script","name":"script","required":true,"language":"scala","parameterType":"String","description":"A scala script to execute"},{"type":"object","name":"values","required":true,"parameterType":"Map[String,Any]","description":"Map of name/value pairs that will be bound to the script"},{"type":"object","name":"types","required":false,"parameterType":"Map[String,String]","description":"Map of type overrides for the values provided"},{"type":"boolean","name":"unwrapOptions","required":false,"parameterType":"Boolean","description":"Flag to toggle option unwrapping behavior"}],"engineMeta":{"spark":"ScalaSteps.processScriptWithValues","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"6e42b0c3-340e-4848-864c-e1b5c57faa4f","displayName":"Join DataFrames","description":"Join two dataFrames together.","type":"Pipeline","category":"Data","params":[{"type":"text","name":"left","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"Left side of the join"},{"type":"text","name":"right","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"Right side of the join"},{"type":"text","name":"expression","required":false,"parameterType":"String","description":"Join expression. Optional for cross joins"},{"type":"text","name":"leftAlias","required":false,"defaultValue":"left","parameterType":"String","description":"Left side alias"},{"type":"text","name":"rightAlias","required":false,"defaultValue":"right","parameterType":"String","description":"Right side alias"},{"type":"text","name":"joinType","required":false,"defaultValue":"inner","parameterType":"String","description":"Type of join to perform"}],"engineMeta":{"spark":"DataSteps.join","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"823eeb28-ec81-4da6-83f2-24a1e580b0e5","displayName":"Group By","description":"Group by a list of grouping expressions and a list of aggregates.","type":"Pipeline","category":"Data","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to group"},{"type":"text","name":"groupings","required":true,"parameterType":"List[String]","description":"List of expressions to group by"},{"type":"text","name":"aggregations","required":true,"parameterType":"List[String]","description":"List of aggregations to apply"}],"engineMeta":{"spark":"DataSteps.groupBy","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"d322769c-18a0-49c2-9875-41446892e733","displayName":"Union","description":"Union two DataFrames together.","type":"Pipeline","category":"Data","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The initial DataFrame"},{"type":"text","name":"append","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The dataFrame to append"},{"type":"boolean","name":"distinct","required":false,"defaultValue":"true","parameterType":"Boolean","description":"Flag to control distinct behavior"}],"engineMeta":{"spark":"DataSteps.union","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"80583aa9-41b7-4906-8357-cc2d3670d970","displayName":"Add a Column with a Static Value to All Rows in a DataFrame (metalus-common)","description":"This step will add a column with a static value to all rows in the provided data frame","type":"Pipeline","category":"Data","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The data frame to add the column"},{"type":"text","name":"columnName","required":true,"parameterType":"String","description":"The name to provide the id column"},{"type":"text","name":"columnValue","required":true,"parameterType":"Any","description":"The name of the new column"},{"type":"boolean","name":"standardizeColumnName","required":false,"defaultValue":"true","parameterType":"Boolean","description":"The value to add"}],"engineMeta":{"spark":"DataSteps.addStaticColumnToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"e625eed6-51f0-44e7-870b-91c960cdc93d","displayName":"Adds a Unique Identifier to a DataFrame (metalus-common)","description":"This step will add a new unique identifier to an existing data frame using the monotonically_increasing_id method","type":"Pipeline","category":"Data","params":[{"type":"text","name":"idColumnName","required":true,"parameterType":"String","description":"The name to provide the id column"},{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The data frame to add the column"}],"engineMeta":{"spark":"DataSteps.addUniqueIdToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"fa0fcabb-d000-4a5e-9144-692bca618ddb","displayName":"Filter a DataFrame","description":"This step will filter a DataFrame based on the where expression provided","type":"Pipeline","category":"Data","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to filter"},{"type":"text","name":"expression","required":true,"parameterType":"String","description":"The expression to apply to the DataFrame to filter rows"}],"engineMeta":{"spark":"DataSteps.applyFilter","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"5d0d7c5c-c287-4565-80b2-2b1a847b18c6","displayName":"Get DataFrame Count","description":"Get a count of records in a DataFrame.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to count"}],"engineMeta":{"spark":"DataSteps.getDataFrameCount","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Long"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"252b6086-da45-4042-a9a8-31ebf57948af","displayName":"Drop Duplicate Records","description":"Drop duplicate records from a DataFrame","type":"Pipeline","category":"Data","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to drop duplicate records from"},{"type":"text","name":"columnNames","required":true,"parameterType":"List[String]","description":"Columns to use for determining distinct values to drop"}],"engineMeta":{"spark":"DataSteps.dropDuplicateRecords","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"d5ac88a2-caa2-473c-a9f7-ffb0269880b2","displayName":"Rename Column","description":"Rename a column on a DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to change"},{"type":"text","name":"oldColumnName","required":true,"parameterType":"String","description":"The name of the column you want to change"},{"type":"text","name":"newColumnName","required":true,"parameterType":"String","description":"The new name to give the column"}],"engineMeta":{"spark":"DataSteps.renameColumn","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"6ed36f89-35d1-4280-a555-fbcd8dd76bf2","displayName":"Retry (simple)","description":"Makes a decision to retry or stop based on a named counter","type":"branch","category":"RetryLogic","params":[{"type":"text","name":"counterName","required":true,"parameterType":"String","description":"The name of the counter to use for tracking"},{"type":"text","name":"maxRetries","required":true,"parameterType":"Int","description":"The maximum number of retries allowed"},{"type":"result","name":"retry","required":false},{"type":"result","name":"stop","required":false}],"engineMeta":{"spark":"FlowUtilsSteps.simpleRetry","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"a2f3e151-cb81-4c69-8475-c1a287bbb4cb","displayName":"Convert CSV String Dataset to DataFrame","description":"This step will convert the provided CSV string Dataset into a DataFrame that can be passed to other steps","type":"Pipeline","category":"CSV","params":[{"type":"text","name":"dataset","required":true,"parameterType":"org.apache.spark.sql.Dataset[String]","description":"The dataset containing CSV strings"},{"type":"object","name":"dataFrameReaderOptions","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The CSV parsing options"}],"engineMeta":{"spark":"CSVSteps.csvDatasetToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"d25209c1-53f6-49ad-a402-257ae756ac2a","displayName":"Convert CSV String to DataFrame","description":"This step will convert the provided CSV string into a DataFrame that can be passed to other steps","type":"Pipeline","category":"CSV","params":[{"type":"text","name":"csvString","required":true,"parameterType":"String","description":"The csv string to convert to a DataFrame"},{"type":"text","name":"delimiter","required":false,"defaultValue":",","parameterType":"String","description":"The field delimiter"},{"type":"text","name":"recordDelimiter","required":false,"defaultValue":"\\n","parameterType":"String","description":"The record delimiter"},{"type":"boolean","name":"header","required":false,"defaultValue":"false","parameterType":"Boolean","description":"Build header from the first row"}],"engineMeta":{"spark":"CSVSteps.csvStringToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"15889487-fd1c-4c44-b8eb-973c12f91fae","displayName":"Creates an HttpRestClient","description":"This step will build an HttpRestClient using a host url and optional authorization object","type":"Pipeline","category":"API","params":[{"type":"text","name":"hostUrl","required":true,"parameterType":"String","description":"The URL to connect including port"},{"type":"text","name":"authorization","required":false,"parameterType":"com.acxiom.pipeline.api.Authorization","description":"The optional authorization class to use when making connections"},{"type":"boolean","name":"allowSelfSignedCertificates","required":false,"parameterType":"Boolean","description":"Flag to allow using self signed certificates for http calls"}],"engineMeta":{"spark":"ApiSteps.createHttpRestClient","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.api.HttpRestClient"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"fcfd4b91-9a9c-438c-8afa-9f14c1e52a82","displayName":"Creates an HttpRestClient from protocol, host and port","description":"This step will build an HttpRestClient using url parts and optional authorization object","type":"Pipeline","category":"API","params":[{"type":"text","name":"protocol","required":true,"parameterType":"String","description":"The protocol to use when constructing the URL"},{"type":"text","name":"host","required":true,"parameterType":"String","description":"The host name to use when constructing the URL"},{"type":"text","name":"port","required":true,"parameterType":"Int","description":"The port to use when constructing the URL"},{"type":"text","name":"authorization","required":false,"parameterType":"com.acxiom.pipeline.api.Authorization","description":"The optional authorization class to use when making connections"}],"engineMeta":{"spark":"ApiSteps.createHttpRestClientFromParameters","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.api.HttpRestClient"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"b59f0486-78aa-4bd4-baf5-5c7d7c648ff0","displayName":"Check Path Exists","description":"Checks the path to determine whether it exists or not.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to verify"}],"engineMeta":{"spark":"ApiSteps.exists","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"7521ac47-84ec-4e50-b087-b9de4bf6d514","displayName":"Get the last modified date","description":"Gets the last modified date for the provided path","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to the resource to get the last modified date"}],"engineMeta":{"spark":"ApiSteps.getLastModifiedDate","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"java.util.Date"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"fff7f7b6-5d9a-40b3-8add-6432552920a8","displayName":"Get Path Content Length","description":"Get the size of the content at the given path.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to the resource to get the content length"}],"engineMeta":{"spark":"ApiSteps.getContentLength","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Long"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"dd351d47-125d-47fa-bafd-203bebad82eb","displayName":"Get Path Headers","description":"Get the headers for the content at the given path.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to get the headers"}],"engineMeta":{"spark":"ApiSteps.getHeaders","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Map[String,List[String]]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"532f72dd-8443-481d-8406-b74cdc08e342","displayName":"Delete Content","description":"Attempts to delete the provided path..","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to delete"}],"engineMeta":{"spark":"ApiSteps.delete","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"3b91e6e8-ec18-4468-9089-8474f4b4ba48","displayName":"GET String Content","description":"Retrieves the value at the provided path as a string.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to resource"}],"engineMeta":{"spark":"ApiSteps.getStringContent","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"34c2fc9a-2502-4c79-a0cb-3f866a0a0d6e","displayName":"POST String Content","description":"POSTs the provided string to the provided path using the content type and returns the response as a string.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to post the content"},{"type":"text","name":"content","required":true,"parameterType":"String","description":"The content to post"},{"type":"text","name":"contentType","required":false,"parameterType":"String","description":"The content type being sent to the path"}],"engineMeta":{"spark":"ApiSteps.postStringContent","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"49ae38b3-cb41-4153-9111-aa6aacf6721d","displayName":"PUT String Content","description":"PUTs the provided string to the provided path using the content type and returns the response as a string.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to post the content"},{"type":"text","name":"content","required":true,"parameterType":"String","description":"The content to put"},{"type":"text","name":"contentType","required":false,"parameterType":"String","description":"The content type being sent to the path"}],"engineMeta":{"spark":"ApiSteps.putStringContent","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"99b20c23-722f-4862-9f47-bc9f72440ae6","displayName":"GET Input Stream","description":"Creates a buffered input stream for the provided path","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to the resource"},{"type":"text","name":"bufferSize","required":false,"parameterType":"Int","description":"The size of buffer to use with the stream"}],"engineMeta":{"spark":"ApiSteps.getInputStream","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"java.io.InputStream"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"f4120b1c-91df-452f-9589-b77f8555ba44","displayName":"GET Output Stream","description":"Creates a buffered output stream for the provided path.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":true,"parameterType":"com.acxiom.pipeline.api.HttpRestClient","description":"The HttpRestClient to use when accessing the provided path"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to the resource"},{"type":"text","name":"bufferSize","required":false,"parameterType":"Int","description":"The size of buffer to use with the stream"}],"engineMeta":{"spark":"ApiSteps.getOutputStream","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"java.io.OutputStream"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"cdb332e3-9ea4-4c96-8b29-c1d74287656c","displayName":"Load table as DataFrame using JDBCOptions","description":"This step will load a table from the provided JDBCOptions","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"jdbcOptions","required":true,"parameterType":"org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions","description":"The options to use when loading the DataFrame"}],"engineMeta":{"spark":"JDBCSteps.readWithJDBCOptions","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"72dbbfc8-bd1d-4ce4-ab35-28fa8385ea54","displayName":"Load table as DataFrame using StepOptions","description":"This step will load a table from the provided JDBCDataFrameReaderOptions","type":"Pipeline","category":"InputOutput","params":[{"type":"object","name":"jDBCStepsOptions","required":true,"className":"com.acxiom.pipeline.steps.JDBCDataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.JDBCDataFrameReaderOptions","description":"The options to use when loading the DataFrameReader"}],"engineMeta":{"spark":"JDBCSteps.readWithStepOptions","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"dcc57409-eb91-48c0-975b-ca109ba30195","displayName":"Load table as DataFrame","description":"This step will load a table from the provided jdbc information","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"url","required":true,"parameterType":"String","description":"A valid jdbc url"},{"type":"text","name":"table","required":true,"parameterType":"String","description":"A table name or subquery"},{"type":"text","name":"predicates","required":false,"parameterType":"List[String]","description":"Optional predicates used for partitioning"},{"type":"text","name":"connectionProperties","required":false,"parameterType":"Map[String,String]","description":"Optional properties for the jdbc connection"}],"engineMeta":{"spark":"JDBCSteps.readWithProperties","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"c9fddf52-34b1-4216-a049-10c33ccd24ab","displayName":"Write DataFrame to table using JDBCOptions","description":"This step will write a DataFrame as a table using JDBCOptions","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to be written"},{"type":"text","name":"jdbcOptions","required":true,"parameterType":"org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions","description":"Options for configuring the JDBC connection"},{"type":"text","name":"saveMode","required":false,"parameterType":"String","description":"The value for the mode option. Defaulted to Overwrite"}],"engineMeta":{"spark":"JDBCSteps.writeWithJDBCOptions","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"77ffcd02-fbd0-4f79-9b35-ac9dc5fb7190","displayName":"Write DataFrame to table","description":"This step will write a DataFrame to a table using the provided properties","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to be written"},{"type":"text","name":"url","required":true,"parameterType":"String","description":"A valid jdbc url"},{"type":"text","name":"table","required":true,"parameterType":"String","description":"A table name or subquery"},{"type":"text","name":"connectionProperties","required":false,"parameterType":"Map[String,String]","description":"Optional properties for the jdbc connection"},{"type":"text","name":"saveMode","required":false,"parameterType":"String","description":"The value for the mode option. Defaulted to Overwrite"}],"engineMeta":{"spark":"JDBCSteps.writeWithProperties","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"3d6b77a1-52c2-49ba-99a0-7ec773dac696","displayName":"Write DataFrame to JDBC table","description":"This step will write a DataFrame to a table using the provided JDBCDataFrameWriterOptions","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to be written"},{"type":"object","name":"jDBCStepsOptions","required":true,"className":"com.acxiom.pipeline.steps.JDBCDataFrameWriterOptions","parameterType":"com.acxiom.pipeline.steps.JDBCDataFrameWriterOptions","description":"Options for the JDBC connect and spark DataFrameWriter"}],"engineMeta":{"spark":"JDBCSteps.writeWithStepOptions","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"713fff3d-d407-4970-89ae-7844e6fc60e3","displayName":"Get JDBC Connection","description":"Get a jdbc connection.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"url","required":true,"parameterType":"String","description":"A valid jdbc url"},{"type":"text","name":"properties","required":false,"parameterType":"Map[String,String]","description":"Optional properties for the jdbc connection"}],"engineMeta":{"spark":"JDBCSteps.getConnection","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"java.sql.Connection"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"549828be-3d96-4561-bf94-7ad420f9d203","displayName":"Execute Sql","description":"Execute a sql command using jdbc.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"sql","required":true,"parameterType":"String","description":"Sql command to execute"},{"type":"text","name":"connection","required":true,"parameterType":"java.sql.Connection","description":"An open jdbc connection"},{"type":"text","name":"parameters","required":false,"parameterType":"List[Any]","description":"Optional list of bind variables"}],"engineMeta":{"spark":"JDBCSteps.executeSql","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"9c8957a3-899e-4f32-830e-d120b1917aa1","displayName":"Close JDBC Connection","description":"Close a JDBC Connection.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"connection","required":true,"parameterType":"java.sql.Connection","description":"An open jdbc connection"}],"engineMeta":{"spark":"JDBCSteps.closeConnection","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"3464dc85-5111-40fc-9bfb-1fd6fc8a2c17","displayName":"Convert JSON String to Map","description":"This step will convert the provided JSON string into a Map that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"jsonString","required":true,"parameterType":"String","description":"The JSON string to convert to a map"},{"type":"text","name":"formats","required":false,"parameterType":"org.json4s.Formats","description":"Json4s Formats object that will override the pipeline context formats"}],"engineMeta":{"spark":"JSONSteps.jsonStringToMap","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Map[String,Any]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"f4d19691-779b-4962-a52b-ee5d9a99068e","displayName":"Convert JSON Map to JSON String","description":"This step will convert the provided JSON map into a JSON string that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"jsonMap","required":true,"parameterType":"Map[String,Any]","description":"The JSON map to convert to a JSON string"},{"type":"text","name":"formats","required":false,"parameterType":"org.json4s.Formats","description":"Json4s Formats object that will override the pipeline context formats"}],"engineMeta":{"spark":"JSONSteps.jsonMapToString","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"1f23eb37-98ee-43c2-ac78-17b04db3cc8d","displayName":"Convert object to JSON String","description":"This step will convert the provided object into a JSON string that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"obj","required":true,"parameterType":"AnyRef","description":"The object to convert to a JSON string"},{"type":"text","name":"formats","required":false,"parameterType":"org.json4s.Formats","description":"Json4s Formats object that will override the pipeline context formats"}],"engineMeta":{"spark":"JSONSteps.objectToJsonString","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"880c5151-f7cd-40bb-99f2-06dbb20a6523","displayName":"Convert JSON String to object","description":"This step will convert the provided JSON string into an object that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"jsonString","required":true,"parameterType":"String","description":"The JSON string to convert to an object"},{"type":"text","name":"objectName","required":true,"parameterType":"String","description":"The fully qualified class name of the object"},{"type":"text","name":"formats","required":false,"parameterType":"org.json4s.Formats","description":"Json4s Formats object that will override the pipeline context formats"}],"engineMeta":{"spark":"JSONSteps.jsonStringToObject","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Any"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"68958a29-aab5-4f7e-9ffd-af99c33c512b","displayName":"Convert JSON String to Schema","description":"This step will convert the provided JSON string into a Schema that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"schema","required":true,"parameterType":"String","description":"The JSON string to convert to a Schema"},{"type":"text","name":"formats","required":false,"parameterType":"org.json4s.Formats","description":"Json4s Formats object that will override the pipeline context formats"}],"engineMeta":{"spark":"JSONSteps.jsonStringToSchema","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.steps.Schema"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"cf4e9e6c-98d6-4a14-ae74-52322782c504","displayName":"Convert JSON String to DataFrame","description":"This step will convert the provided JSON string into a DataFrame that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"jsonString","required":true,"parameterType":"String","description":"The JSON string to convert to a DataFrame"}],"engineMeta":{"spark":"JSONSteps.jsonStringToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"d5cd835e-5e8f-49c0-9706-746d5a4d7b3a","displayName":"Convert JSON String Dataset to DataFrame","description":"This step will convert the provided JSON string Dataset into a DataFrame that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"dataset","required":true,"parameterType":"org.apache.spark.sql.Dataset[String]","description":"The dataset containing JSON strings"},{"type":"object","name":"dataFrameReaderOptions","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The JSON parsing options"}],"engineMeta":{"spark":"JSONSteps.jsonDatasetToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"f3891201-5138-4cab-aebc-bcc319228543","displayName":"Build JSON4S Formats","description":"This step will build a json4s Formats object that can be used to override the default","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"customSerializers","required":false,"parameterType":"List[com.acxiom.pipeline.applications.ClassInfo]","description":"List of custom serializer classes"},{"type":"text","name":"enumIdSerializers","required":false,"parameterType":"List[com.acxiom.pipeline.applications.ClassInfo]","description":"List of Enumeration classes to serialize by id"},{"type":"text","name":"enumNameSerializers","required":false,"parameterType":"List[com.acxiom.pipeline.applications.ClassInfo]","description":"List of Enumeration classes to serialize by name"}],"engineMeta":{"spark":"JSONSteps.buildJsonFormats","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.json4s.Formats"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"b5485d97-d4e8-41a6-8af7-9ce79a435140","displayName":"To String","description":"Returns the result of the toString method, can unwrap options","type":"Pipeline","category":"String","params":[{"type":"text","name":"value","required":true,"parameterType":"Any","description":"The value to convert"},{"type":"boolean","name":"unwrapOption","required":false,"parameterType":"Boolean","description":"Boolean indicating whether to unwrap the value from an Option prior to calling toString"}],"engineMeta":{"spark":"StringSteps.toString","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"78e817ec-2bf2-4cbe-acba-e5bc9bdcffc5","displayName":"List To String","description":"Returns the result of the mkString method","type":"Pipeline","category":"String","params":[{"type":"text","name":"list","required":true,"parameterType":"List[Any]","description":"The list to convert"},{"type":"text","name":"separator","required":false,"parameterType":"String","description":"Separator character to use when making the string"},{"type":"boolean","name":"unwrapOptions","required":false,"parameterType":"Boolean","description":"Boolean indicating whether to unwrap each value from an Option"}],"engineMeta":{"spark":"StringSteps.listToString","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"fcd6b5fe-08ed-4cfd-acfe-eb676d7f4ecd","displayName":"To Lowercase","description":"Returns a lowercase string","type":"Pipeline","category":"String","params":[{"type":"text","name":"value","required":true,"parameterType":"String","description":"The value to lowercase"}],"engineMeta":{"spark":"StringSteps.toLowerCase","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"2f31ebf1-4ae2-4e04-9b29-4802cac8a198","displayName":"To Uppercase","description":"Returns an uppercase string","type":"Pipeline","category":"String","params":[{"type":"text","name":"value","required":true,"parameterType":"String","description":"The value to uppercase"}],"engineMeta":{"spark":"StringSteps.toUpperCase","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"96b7b521-5304-4e63-8435-63d84a358368","displayName":"String Split","description":"Returns a list of strings split off of the given string","type":"Pipeline","category":"String","params":[{"type":"text","name":"string","required":true,"parameterType":"String","description":"The string to split"},{"type":"text","name":"regex","required":true,"parameterType":"String","description":"Regex to use when splitting the string"},{"type":"integer","name":"limit","required":false,"parameterType":"Int","description":"Max number elements to return in the list"}],"engineMeta":{"spark":"StringSteps.stringSplit","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"List[String]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"f75abedd-4aee-4979-8d56-ea7b0c1a86e1","displayName":"Substring","description":"Returns a substring","type":"Pipeline","category":"String","params":[{"type":"text","name":"string","required":true,"parameterType":"String","description":"The string to parse"},{"type":"text","name":"begin","required":true,"parameterType":"Int","description":"The beginning index"},{"type":"integer","name":"end","required":false,"parameterType":"Int","description":"The end index"}],"engineMeta":{"spark":"StringSteps.substring","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"3fabf9ec-5383-4eb3-81af-6092ab7c370d","displayName":"String Equals","description":"Return whether string1 equals string2","type":"branch","category":"Decision","params":[{"type":"text","name":"string","required":true,"parameterType":"String","description":"The string to compare"},{"type":"text","name":"anotherString","required":true,"parameterType":"String","description":"The other string to compare"},{"type":"boolean","name":"caseInsensitive","required":false,"parameterType":"Boolean","description":"Boolean flag to indicate case sensitive compare"},{"type":"result","name":"true","required":false},{"type":"result","name":"false","required":false}],"engineMeta":{"spark":"StringSteps.stringEquals","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"ff0562f5-2917-406d-aa78-c5d49ba6b99f","displayName":"String Matches","description":"Return whether string matches a given regex","type":"branch","category":"Decision","params":[{"type":"text","name":"string","required":true,"parameterType":"String","description":"The string to match"},{"type":"text","name":"regex","required":true,"parameterType":"String","description":"Regex to use for the match"},{"type":"result","name":"true","required":false},{"type":"result","name":"false","required":false}],"engineMeta":{"spark":"StringSteps.stringMatches","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"416baf4e-a1dd-49fc-83a9-0f41b77e57b7","displayName":"String Replace All","description":"Perform a literal or regex replacement on a string","type":"pipeline","category":"String","params":[{"type":"text","name":"string","required":true,"parameterType":"String","description":"The string to modify"},{"type":"text","name":"matchString","required":true,"parameterType":"String","description":"The string to match"},{"type":"text","name":"replacement","required":false,"parameterType":"String","description":"The replacement string"},{"type":"boolean","name":"literal","required":false,"parameterType":"Boolean","description":"Perform \\'literal\\' match replacement"}],"engineMeta":{"spark":"StringSteps.stringReplaceAll","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"95438b82-8d50-41da-8094-c92449b9e7df","displayName":"String Replace First","description":"Perform a literal or regex replacement on the first occurrence in a string","type":"pipeline","category":"String","params":[{"type":"text","name":"string","required":true,"parameterType":"String","description":"The string to modify"},{"type":"text","name":"matchString","required":true,"parameterType":"String","description":"The string to match"},{"type":"text","name":"replacement","required":false,"parameterType":"String","description":"The replacement string"},{"type":"boolean","name":"literal","required":false,"parameterType":"Boolean","description":"Perform \\'literal\\' match replacement"}],"engineMeta":{"spark":"StringSteps.stringReplaceFirst","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"86c84fa3-ad45-4a49-ac05-92385b8e9572","displayName":"Get Credential","description":"This step provides access to credentials through the CredentialProvider","type":"Pipeline","category":"Credentials","params":[{"type":"text","name":"credentialName","required":true,"parameterType":"String","description":"The dataset containing CSV strings"}],"engineMeta":{"spark":"CredentialSteps.getCredential","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.Credential"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"219c787a-f502-4efc-b15d-5beeff661fc0","displayName":"Map a DataFrame to an existing DataFrame","description":"This step maps a new DataFrame to an existing DataFrame to make them compatible","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"inputDataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame that needs to be modified"},{"type":"text","name":"destinationDataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame that the new data needs to map to"},{"type":"object","name":"transforms","required":true,"className":"com.acxiom.pipeline.steps.Transformations","parameterType":"com.acxiom.pipeline.steps.Transformations","description":"The object with transform, alias, and filter logic details"},{"type":"boolean","name":"addNewColumns","required":false,"parameterType":"Boolean"}],"engineMeta":{"spark":"TransformationSteps.mapToDestinationDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"8f9c08ea-4882-4265-bac7-2da3e942758f","displayName":"Map a DataFrame to a pre-defined Schema","description":"This step maps a new DataFrame to a pre-defined spark schema","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"inputDataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame that needs to be modified"},{"type":"object","name":"destinationSchema","required":true,"className":"com.acxiom.pipeline.steps.Schema","parameterType":"com.acxiom.pipeline.steps.Schema","description":"The schema that the new data should map to"},{"type":"object","name":"transforms","required":true,"className":"com.acxiom.pipeline.steps.Transformations","parameterType":"com.acxiom.pipeline.steps.Transformations","description":"The object with transform, alias, and filter logic details"},{"type":"boolean","name":"addNewColumns","required":false,"parameterType":"Boolean"}],"engineMeta":{"spark":"TransformationSteps.mapDataFrameToSchema","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"3ee74590-9131-43e1-8ee8-ad320482a592","displayName":"Merge a DataFrame to an existing DataFrame","description":"This step merges two DataFrames to create a single DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"inputDataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The first DataFrame"},{"type":"text","name":"destinationDataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The second DataFrame used as the driver"},{"type":"object","name":"transforms","required":true,"className":"com.acxiom.pipeline.steps.Transformations","parameterType":"com.acxiom.pipeline.steps.Transformations","description":"The object with transform, alias, and filter logic details"},{"type":"boolean","name":"addNewColumns","required":false,"parameterType":"Boolean"},{"type":"boolean","name":"distinct","required":false,"defaultValue":"true","parameterType":"Boolean","description":"Flag to determine whether a distinct union should be performed"}],"engineMeta":{"spark":"TransformationSteps.mergeDataFrames","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"ac3dafe4-e6ee-45c9-8fc6-fa7f918cf4f2","displayName":"Modify or Create Columns using Transforms Provided","description":"This step transforms existing columns and/or adds new columns to an existing dataframe using expressions provided","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The input DataFrame"},{"type":"object","name":"transforms","required":true,"className":"com.acxiom.pipeline.steps.Transformations","parameterType":"com.acxiom.pipeline.steps.Transformations"}],"engineMeta":{"spark":"TransformationSteps.applyTransforms","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"3e2da5a8-387d-49b1-be22-c03764fb0fde","displayName":"Select Expressions","description":"Select each provided expresion from a DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to select from"},{"type":"text","name":"expressions","required":true,"parameterType":"List[String]","description":"List of expressions to select"}],"engineMeta":{"spark":"TransformationSteps.selectExpressions","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"1e0a234a-8ae5-4627-be6d-3052b33d9014","displayName":"Add Column","description":"Add a new column to a DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to add to"},{"type":"text","name":"columnName","required":true,"parameterType":"String","description":"The name of the new column"},{"type":"text","name":"expression","required":true,"parameterType":"String","description":"The expression used for the column"},{"type":"boolean","name":"standardizeColumnName","required":false,"parameterType":"Boolean"}],"engineMeta":{"spark":"TransformationSteps.addColumn","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"08c9c5a9-a10d-477e-a702-19bd24889d1e","displayName":"Add Columns","description":"Add multiple new columns to a DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to add to"},{"type":"text","name":"columns","required":true,"parameterType":"Map[String,String]","description":"A map of column names and expressions"},{"type":"boolean","name":"standardizeColumnNames","required":false,"parameterType":"Boolean"}],"engineMeta":{"spark":"TransformationSteps.addColumns","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"42c328ac-a6bd-49ca-b597-b706956d294c","displayName":"Flatten a DataFrame","description":"This step will flatten all nested fields contained in a DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to flatten"},{"type":"text","name":"separator","required":false,"defaultValue":"_","parameterType":"String","description":"Separator to place between nested field names"},{"type":"text","name":"fieldList","required":false,"parameterType":"List[String]","description":"List of fields to flatten. Will flatten all fields if left empty"},{"type":"integer","name":"depth","required":false,"parameterType":"Int","description":"How deep should we traverse when flattening."}],"engineMeta":{"spark":"TransformationSteps.flattenDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[_]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"a981080d-714c-4d36-8b09-d95842ec5655","displayName":"Standardize Column Names on a DataFrame","description":"This step will standardize columns names on existing DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.DataFrame"}],"engineMeta":{"spark":"TransformationSteps.standardizeColumnNames","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"541c4f7d-3524-4d53-bbd9-9f2cfd9d1bd1","displayName":"Save a Dataframe to a TempView","description":"This step stores an existing dataframe to a TempView to be used in future queries in the session","type":"Pipeline","category":"Query","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The dataframe to store"},{"type":"text","name":"viewName","required":false,"parameterType":"String","description":"The name of the view to create (optional, random name will be created if not provided)"}],"engineMeta":{"spark":"QuerySteps.dataFrameToTempView","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"71b71ef3-eaa7-4a1f-b3f3-603a1a54846d","displayName":"Create a TempView from a Query","description":"This step runs a SQL statement against existing TempViews from this session and returns a new TempView","type":"Pipeline","category":"Query","params":[{"type":"script","name":"query","required":true,"language":"sql","parameterType":"String","description":"The query to run (all tables referenced must exist as TempViews created in this session)"},{"type":"text","name":"variableMap","required":false,"parameterType":"Map[String,String]","description":"The key/value pairs to be used in variable replacement in the query"},{"type":"text","name":"viewName","required":false,"parameterType":"String","description":"The name of the view to create (optional, random name will be created if not provided)"}],"engineMeta":{"spark":"QuerySteps.queryToTempView","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"61378ed6-8a4f-4e6d-9c92-6863c9503a54","displayName":"Create a DataFrame from a Query","description":"This step runs a SQL statement against existing TempViews from this session and returns a new DataFrame","type":"Pipeline","category":"Query","params":[{"type":"script","name":"query","required":true,"language":"sql","parameterType":"String","description":"The query to run (all tables referenced must exist as TempViews created in this session)"},{"type":"text","name":"variableMap","required":false,"parameterType":"Map[String,String]","description":"The key/value pairs to be used in variable replacement in the query"}],"engineMeta":{"spark":"QuerySteps.queryToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"57b0e491-e09b-4428-aab2-cebe1f217eda","displayName":"Create a DataFrame from an Existing TempView","description":"This step pulls an existing TempView from this session into a new DataFrame","type":"Pipeline","category":"Query","params":[{"type":"text","name":"viewName","required":false,"parameterType":"String","description":"The name of the view to use"}],"engineMeta":{"spark":"QuerySteps.tempViewToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"648f27aa-6e3b-44ed-a093-bc284783731b","displayName":"Create a TempView from a DataFrame Query","description":"This step runs a SQL statement against an existing DataFrame from this session and returns a new TempView","type":"Pipeline","category":"Query","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The dataframe to query"},{"type":"script","name":"query","required":true,"language":"sql","parameterType":"String","description":"The query to run (all tables referenced must exist as TempViews created in this session)"},{"type":"text","name":"variableMap","required":false,"parameterType":"Map[String,String]","description":"The key/value pairs to be used in variable replacement in the query"},{"type":"text","name":"inputViewName","required":true,"parameterType":"String","description":"The name to use when creating the view representing the input dataframe (same name used in query)"},{"type":"text","name":"outputViewName","required":false,"parameterType":"String","description":"The name of the view to create (optional, random name will be created if not provided)"}],"engineMeta":{"spark":"QuerySteps.dataFrameQueryToTempView","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"dfb8a387-6245-4b1c-ae6c-94067eb83962","displayName":"Create a DataFrame from a DataFrame Query","description":"This step runs a SQL statement against an existing DataFrame from this session and returns a new DataFrame","type":"Pipeline","category":"Query","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The dataframe to query"},{"type":"script","name":"query","required":true,"language":"sql","parameterType":"String","description":"The query to run (all tables referenced must exist as TempViews created in this session)"},{"type":"text","name":"variableMap","required":false,"parameterType":"Map[String,String]","description":"The key/value pairs to be used in variable replacement in the query"},{"type":"text","name":"inputViewName","required":true,"parameterType":"String","description":"The name to use when creating the view representing the input dataframe (same name used in query)"}],"engineMeta":{"spark":"QuerySteps.dataFrameQueryToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"c88de095-14e0-4c67-8537-0325127e2bd2","displayName":"Cache an exising TempView","description":"This step will cache an existing TempView","type":"Pipeline","category":"Query","params":[{"type":"text","name":"viewName","required":false,"parameterType":"String","description":"The name of the view to cache"}],"engineMeta":{"spark":"QuerySteps.cacheTempView","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"0342654c-2722-56fe-ba22-e342169545af","displayName":"Copy (auto buffering)","description":"Copy the contents of the source path to the destination path. This function will call connect on both FileManagers.","type":"Pipeline","category":"FileManager","params":[{"type":"text","name":"srcFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The source FileManager"},{"type":"text","name":"srcPath","required":true,"parameterType":"String","description":"The path to copy from"},{"type":"text","name":"destFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The destination FileManager"},{"type":"text","name":"destPath","required":true,"parameterType":"String","description":"The path to copy to"}],"engineMeta":{"spark":"FileManagerSteps.copy","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.steps.CopyResults"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"c40169a3-1e77-51ab-9e0a-3f24fb98beef","displayName":"Copy (basic buffering)","description":"Copy the contents of the source path to the destination path using buffer sizes. This function will call connect on both FileManagers.","type":"Pipeline","category":"FileManager","params":[{"type":"text","name":"srcFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The source FileManager"},{"type":"text","name":"srcPath","required":true,"parameterType":"String","description":"The path to copy from"},{"type":"text","name":"destFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The destination FileManager"},{"type":"text","name":"destPath","required":true,"parameterType":"String","description":"The path to copy to"},{"type":"text","name":"inputBufferSize","required":true,"parameterType":"Int","description":"The size of the buffer to use for reading data during copy"},{"type":"text","name":"outputBufferSize","required":true,"parameterType":"Int","description":"The size of the buffer to use for writing data during copy"}],"engineMeta":{"spark":"FileManagerSteps.copy","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.steps.CopyResults"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"f5a24db0-e91b-5c88-8e67-ab5cff09c883","displayName":"Copy (advanced buffering)","description":"Copy the contents of the source path to the destination path using full buffer sizes. This function will call connect on both FileManagers.","type":"Pipeline","category":"FileManager","params":[{"type":"text","name":"srcFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The source FileManager"},{"type":"text","name":"srcPath","required":true,"parameterType":"String","description":"The path to copy from"},{"type":"text","name":"destFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The destination FileManager"},{"type":"text","name":"destPath","required":true,"parameterType":"String","description":"The path to copy to"},{"type":"text","name":"inputBufferSize","required":true,"parameterType":"Int","description":"The size of the buffer to use for reading data during copy"},{"type":"text","name":"outputBufferSize","required":true,"parameterType":"Int","description":"The size of the buffer to use for writing data during copy"},{"type":"text","name":"copyBufferSize","required":true,"parameterType":"Int","description":"The intermediate buffer size to use during copy"}],"engineMeta":{"spark":"FileManagerSteps.copy","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.steps.CopyResults"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"1af68ab5-a3fe-4afb-b5fa-34e52f7c77f5","displayName":"Compare File Sizes","description":"Compare the file sizes of the source and destination paths","type":"Pipeline","category":"FileManager","params":[{"type":"text","name":"srcFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The source FileManager"},{"type":"text","name":"srcPath","required":true,"parameterType":"String","description":"The path to the source"},{"type":"text","name":"destFS","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The destination FileManager"},{"type":"text","name":"destPath","required":true,"parameterType":"String","description":"The path to th destination"}],"engineMeta":{"spark":"FileManagerSteps.compareFileSizes","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Int"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"bf2c4df8-a215-480b-87d8-586984e04189","displayName":"Delete (file)","description":"Delete a file","type":"Pipeline","category":"FileManager","params":[{"type":"text","name":"fileManager","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The FileManager"},{"type":"text","name":"path","required":true,"parameterType":"String","description":"The path to the file being deleted"}],"engineMeta":{"spark":"FileManagerSteps.deleteFile","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"3d1e8519-690c-55f0-bd05-1e7b97fb6633","displayName":"Disconnect a FileManager","description":"Disconnects a FileManager from the underlying file system","type":"Pipeline","category":"FileManager","params":[{"type":"text","name":"fileManager","required":true,"parameterType":"com.acxiom.pipeline.fs.FileManager","description":"The file manager to disconnect"}],"engineMeta":{"spark":"FileManagerSteps.disconnectFileManager","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"259a880a-3e12-4843-9f02-2cfc2a05f576","displayName":"Create a FileManager","description":"Creates a FileManager using the provided FileConnector","type":"Pipeline","category":"Connectors","params":[{"type":"text","name":"fileConnector","required":true,"parameterType":"com.acxiom.pipeline.connectors.FileConnector","description":"The FileConnector to use to create the FileManager implementation"}],"engineMeta":{"spark":"FileManagerSteps.getFileManager","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.fs.FileManager"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"9d467cb0-8b3d-40a0-9ccd-9cf8c5b6cb38","displayName":"Create SFTP FileManager","description":"Simple function to generate the SFTPFileManager for the remote SFTP file system","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"hostName","required":true,"parameterType":"String","description":"The name of the host to connect"},{"type":"text","name":"username","required":false,"parameterType":"String","description":"The username used for connection"},{"type":"text","name":"password","required":false,"parameterType":"String","description":"The password used for connection"},{"type":"integer","name":"port","required":false,"parameterType":"Int","description":"The optional port if other than 22"},{"type":"boolean","name":"strictHostChecking","required":false,"parameterType":"Boolean","description":"Option to automatically add keys to the known_hosts file. Default is false."}],"engineMeta":{"spark":"SFTPSteps.createFileManager","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.fs.SFTPFileManager"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"5c4d2d01-da85-4e2e-a551-f5a65f83653a","displayName":"Set Spark Local Property","description":"Set a property on the spark context.","type":"Pipeline","category":"Spark","params":[{"type":"text","name":"key","required":true,"parameterType":"String","description":"The name of the property to set"},{"type":"text","name":"value","required":true,"parameterType":"Any","description":"The value to set"}],"engineMeta":{"spark":"SparkConfigurationSteps.setLocalProperty","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"0b86b314-2657-4392-927c-e555af56b415","displayName":"Set Spark Local Properties","description":"Set each property on the spark context.","type":"Pipeline","category":"Spark","params":[{"type":"text","name":"properties","required":true,"parameterType":"Map[String,Any]","description":"Map representing local properties to set"},{"type":"text","name":"keySeparator","required":false,"defaultValue":"__","parameterType":"String","description":"String that will be replaced with a period character"}],"engineMeta":{"spark":"SparkConfigurationSteps.setLocalProperties","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"c8c82365-e078-4a2a-99b8-0c0e20d8102d","displayName":"Set Hadoop Configuration Properties","description":"Set each property on the hadoop configuration.","type":"Pipeline","category":"Spark","params":[{"type":"text","name":"properties","required":true,"parameterType":"Map[String,Any]","description":"Map representing local properties to set"},{"type":"text","name":"keySeparator","required":false,"defaultValue":"__","parameterType":"String","description":"String that will be replaced with a period character"}],"engineMeta":{"spark":"SparkConfigurationSteps.setHadoopConfigurationProperties","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"ea7ea3e0-d1c2-40a2-b2b7-3488489509ca","displayName":"Set Hadoop Configuration Property","description":"Set a property on the hadoop configuration.","type":"Pipeline","category":"Spark","params":[{"type":"text","name":"key","required":true,"parameterType":"String","description":"The name of the property to set"},{"type":"text","name":"value","required":true,"parameterType":"Any","description":"The value to set"}],"engineMeta":{"spark":"SparkConfigurationSteps.setHadoopConfigurationProperty","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"b7373f02-4d1e-44cf-a9c9-315a5c1ccecc","displayName":"Set Job Group","description":"Set the current thread\\'s group id and description that will be associated with any jobs.","type":"Pipeline","category":"Spark","params":[{"type":"text","name":"groupId","required":true,"parameterType":"String","description":"The name of the group"},{"type":"text","name":"description","required":true,"parameterType":"String","description":"Description of the job group"},{"type":"boolean","name":"interruptOnCancel","required":false,"defaultValue":"false","parameterType":"Boolean","description":"When true, will trigger Thread.interrupt getting called on executor threads"}],"engineMeta":{"spark":"SparkConfigurationSteps.setJobGroup","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"7394ff4d-f74d-4c9f-a55c-e0fd398fa264","displayName":"Clear Job Group","description":"Clear the current thread\\'s job group","type":"Pipeline","category":"Spark","params":[],"engineMeta":{"spark":"SparkConfigurationSteps.clearJobGroup","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"22fcc0e7-0190-461c-a999-9116b77d5919","displayName":"Build a DataFrameReader Object","description":"This step will build a DataFrameReader object that can be used to read a file into a dataframe","type":"Pipeline","category":"InputOutput","params":[{"type":"object","name":"dataFrameReaderOptions","required":true,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The options to use when loading the DataFrameReader"}],"engineMeta":{"spark":"DataFrameSteps.getDataFrameReader","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrameReader"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"66a451c8-ffbd-4481-9c37-71777c3a240f","displayName":"Load Using DataFrameReader","description":"This step will load a DataFrame given a dataFrameReader.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrameReader","required":true,"parameterType":"org.apache.spark.sql.DataFrameReader","description":"The DataFrameReader to use when creating the DataFrame"}],"engineMeta":{"spark":"DataFrameSteps.load","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"d7cf27e6-9ca5-4a73-a1b3-d007499f235f","displayName":"Load DataFrame","description":"This step will load a DataFrame given a DataFrameReaderOptions object.","type":"Pipeline","category":"InputOutput","params":[{"type":"object","name":"dataFrameReaderOptions","required":true,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The DataFrameReaderOptions to use when creating the DataFrame"}],"engineMeta":{"spark":"DataFrameSteps.loadDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"8a00dcf8-e6a9-4833-871e-c1f3397ab378","displayName":"Build a DataFrameWriter Object","description":"This step will build a DataFrameWriter object that can be used to write a file into a dataframe","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to use when creating the DataFrameWriter"},{"type":"object","name":"options","required":true,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameWriterOptions","description":"The DataFrameWriterOptions to use when writing the DataFrame"}],"engineMeta":{"spark":"DataFrameSteps.getDataFrameWriter","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrameWriter[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"9aa6ae9f-cbeb-4b36-ba6a-02eee0a46558","displayName":"Save Using DataFrameWriter","description":"This step will save a DataFrame given a dataFrameWriter[Row].","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrameWriter","required":true,"parameterType":"org.apache.spark.sql.DataFrameWriter[_]","description":"The DataFrameWriter to use when saving"}],"engineMeta":{"spark":"DataFrameSteps.save","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"e5ac3671-ee10-4d4e-8206-fec7effdf7b9","displayName":"Save DataFrame","description":"This step will save a DataFrame given a DataFrameWriterOptions object.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to save"},{"type":"object","name":"dataFrameWriterOptions","required":true,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions","parameterType":"com.acxiom.pipeline.steps.DataFrameWriterOptions","description":"The DataFrameWriterOptions to use for saving"}],"engineMeta":{"spark":"DataFrameSteps.saveDataFrame","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"fa05a970-476d-4617-be4d-950cfa65f2f8","displayName":"Persist DataFrame","description":"Persist a DataFrame to provided storage level.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to persist"},{"type":"text","name":"storageLevel","required":false,"parameterType":"String","description":"The optional storage mechanism to use when persisting the DataFrame"}],"engineMeta":{"spark":"DataFrameSteps.persistDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"e6fe074e-a1fa-476f-9569-d37295062186","displayName":"Unpersist DataFrame","description":"Unpersist a DataFrame.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to unpersist"},{"type":"boolean","name":"blocking","required":false,"parameterType":"Boolean","description":"Optional flag to indicate whether to block while unpersisting"}],"engineMeta":{"spark":"DataFrameSteps.unpersistDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"71323226-bcfd-4fa1-bf9e-24e455e41144","displayName":"RepartitionDataFrame","description":"Repartition a DataFrame","type":"Pipeline","category":"Transforms","params":[{"type":"text","name":"dataFrame","required":true,"parameterType":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to repartition"},{"type":"text","name":"partitions","required":true,"parameterType":"Int","description":"The number of partitions to use"},{"type":"boolean","name":"rangePartition","required":false,"parameterType":"Boolean","description":"Flag indicating whether to repartition by range. This takes precedent over the shuffle flag"},{"type":"boolean","name":"shuffle","required":false,"parameterType":"Boolean","description":"Flag indicating whether to perform a normal partition"},{"type":"text","name":"partitionExpressions","required":false,"parameterType":"List[String]","description":"The partition expressions to use"}],"engineMeta":{"spark":"DataFrameSteps.repartitionDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"5e0358a0-d567-5508-af61-c35a69286e4e","displayName":"Javascript Step","description":"Executes a script and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"script","name":"script","required":true,"language":"javascript","parameterType":"String","description":"Javascript to execute"}],"engineMeta":{"spark":"JavascriptSteps.processScript","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"570c9a80-8bd1-5f0c-9ae0-605921fe51e2","displayName":"Javascript Step with single object provided","description":"Executes a script with single object provided and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"script","name":"script","required":true,"language":"javascript","parameterType":"String","description":"Javascript script to execute"},{"type":"text","name":"value","required":true,"parameterType":"Any","description":"Value to bind to the script"}],"engineMeta":{"spark":"JavascriptSteps.processScriptWithValue","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]},{"id":"f92d4816-3c62-4c29-b420-f00994bfcd86","displayName":"Javascript Step with map of objects provided","description":"Executes a script with map of objects provided and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"script","name":"script","required":true,"language":"javascript","parameterType":"String"},{"type":"text","name":"values","required":true,"parameterType":"Map[String,Any]","description":"Map of name/value pairs to bind to the script"},{"type":"boolean","name":"unwrapOptions","required":false,"parameterType":"Boolean","description":"Flag to control option unwrapping behavior"}],"engineMeta":{"spark":"JavascriptSteps.processScriptWithValues","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.3.jar"]}],"pkgObjs":[{"id":"com.acxiom.pipeline.steps.JDBCDataFrameReaderOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"JDBC Data Frame Reader Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"url\":{\"type\":\"string\"},\"table\":{\"type\":\"string\"},\"predicates\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"readerOptions\":{\"$ref\":\"#/definitions/DataFrameReaderOptions\"}},\"definitions\":{\"DataFrameReaderOptions\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"schema\":{\"$ref\":\"#/definitions/Schema\"}}},\"Schema\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"attributes\":{\"type\":\"array\",\"items\":{\"$ref\":\"#/definitions/Attribute\"}}}},\"Attribute\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"name\":{\"type\":\"string\"},\"dataType\":{\"$ref\":\"#/definitions/AttributeType\"}}},\"AttributeType\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"baseType\":{\"type\":\"string\"},\"valueType\":{\"$ref\":\"#/definitions/AttributeType\"},\"nameType\":{\"$ref\":\"#/definitions/AttributeType\"},\"schema\":{\"$ref\":\"#/definitions/Schema\"}}}}}"},{"id":"com.acxiom.pipeline.steps.DataFrameWriterOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Data Frame Writer Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"saveMode\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"bucketingOptions\":{\"$ref\":\"#/definitions/BucketingOptions\"},\"partitionBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"sortBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"definitions\":{\"BucketingOptions\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"numBuckets\":{\"type\":\"integer\"},\"columns\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"numBuckets\"]}}}","template":"{\"form\":[{\"type\":\"select\",\"key\":\"format\",\"templateOptions\":{\"label\":\"Format\",\"placeholder\":\"\",\"valueProp\":\"value\",\"options\":[{\"value\":\"csv\",\"name\":\"CSV\"},{\"value\":\"json\",\"name\":\"JSON\"},{\"value\":\"parquet\",\"name\":\"Parquet\"},{\"value\":\"orc\",\"name\":\"Orc\"},{\"value\":\"text\",\"name\":\"Text\"}],\"labelProp\":\"name\",\"focus\":false,\"_flatOptions\":true,\"disabled\":false}},{\"key\":\"options.encoding\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Encoding\",\"placeholder\":\"\",\"focus\":false},\"expressionProperties\":{\"templateOptions.disabled\":\"!model.format\"}},{\"key\":\"saveMode\",\"type\":\"select\",\"defaultValue\":false,\"templateOptions\":{\"label\":\"Save Mode\",\"placeholder\":\"\",\"valueProp\":\"value\",\"options\":[{\"value\":\"OVERWRITE\",\"name\":\"Overwrite\"},{\"value\":\"append\",\"name\":\"Append\"},{\"value\":\"ignore\",\"name\":\"Ignore\"},{\"value\":\"error\",\"name\":\"Error\"},{\"value\":\"errorifexists\",\"name\":\"Error If Exists\"}],\"labelProp\":\"name\",\"focus\":false,\"_flatOptions\":true,\"disabled\":false}},{\"key\":\"partitionBy\",\"type\":\"stringArray\",\"templateOptions\":{\"label\":\"Partition By Columns\",\"placeholder\":\"Add column names to use during partitioning\"}},{\"key\":\"sortBy\",\"type\":\"stringArray\",\"templateOptions\":{\"label\":\"Sort By Columns\",\"placeholder\":\"Add column names to use during sorting\"}},{\"key\":\"bucketingOptions\",\"wrappers\":[\"panel\"],\"templateOptions\":{\"label\":\"Bucketing Options\"},\"fieldGroup\":[{\"key\":\"numBuckets\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Number of Buckets\",\"type\":\"number\"}},{\"key\":\"columns\",\"type\":\"stringArray\",\"templateOptions\":{\"label\":\"Bucket Columns\",\"placeholder\":\"Add column names to use during bucketing\"}}]},{\"key\":\"options.sep\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Field Separator\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'csv' ? false : true\"},{\"key\":\"options.lineSep\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Line Separator\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'text' || model.format === 'json' ? false : true\"},{\"key\":\"options.escapeQuotes\",\"hideExpression\":\"model.format === 'csv' ? false : true\",\"type\":\"select\",\"templateOptions\":{\"label\":\"Escape Quotes?\",\"options\":[{\"value\":\"true\",\"name\":\"True\"},{\"value\":\"false\",\"name\":\"False\"}],\"valueProp\":\"value\",\"labelProp\":\"name\"},\"defaultValue\":\"false\"},{\"key\":\"options.quote\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Field Quote\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'csv' ? false : true\"},{\"key\":\"options.escape\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Field Escape Character\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'csv' ? false : true\"}]}"},{"id":"com.acxiom.pipeline.steps.JDBCDataFrameWriterOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"JDBC Data Frame Writer Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"url\":{\"type\":\"string\"},\"table\":{\"type\":\"string\"},\"writerOptions\":{\"$ref\":\"#/definitions/DataFrameWriterOptions\"}},\"definitions\":{\"DataFrameWriterOptions\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"saveMode\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"bucketingOptions\":{\"$ref\":\"#/definitions/BucketingOptions\"},\"partitionBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"sortBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}}},\"BucketingOptions\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"numBuckets\":{\"type\":\"integer\"},\"columns\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"numBuckets\"]}}}"},{"id":"com.acxiom.pipeline.steps.Transformations","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Transformations\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"columnDetails\":{\"type\":\"array\",\"items\":{\"$ref\":\"#/definitions/ColumnDetails\"}},\"filter\":{\"type\":\"string\"},\"standardizeColumnNames\":{}},\"definitions\":{\"ColumnDetails\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"outputField\":{\"type\":\"string\"},\"inputAliases\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"expression\":{\"type\":\"string\"}}}}}","template":"{\"form\":[{\"key\":\"filter\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Filter\"}},{\"key\":\"standardizeColumnNames\",\"type\":\"checkbox\",\"defaultValue\":false,\"templateOptions\":{\"floatLabel\":\"always\",\"align\":\"start\",\"label\":\"Standardize Column Names?\",\"hideFieldUnderline\":true,\"color\":\"accent\",\"placeholder\":\"\",\"focus\":false,\"hideLabel\":true,\"disabled\":false,\"indeterminate\":true}},{\"fieldArray\":{\"fieldGroup\":[{\"key\":\"outputField\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Output Field\"}},{\"key\":\"inputAliases\",\"type\":\"stringArray\",\"templateOptions\":{\"label\":\"Input Alias\"}},{\"key\":\"expression\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Expression (Optional)\"}}]},\"key\":\"columnDetails\",\"wrappers\":[\"panel\"],\"type\":\"repeat\",\"templateOptions\":{\"label\":\"Column Details\"}}]}"},{"id":"com.acxiom.pipeline.steps.DataFrameReaderOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Data Frame Reader Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"schema\":{\"$ref\":\"#/definitions/Schema\"}},\"definitions\":{\"Schema\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"attributes\":{\"type\":\"array\",\"items\":{\"$ref\":\"#/definitions/Attribute\"}}}},\"Attribute\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"name\":{\"type\":\"string\"},\"dataType\":{\"$ref\":\"#/definitions/AttributeType\"}}},\"AttributeType\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"baseType\":{\"type\":\"string\"},\"valueType\":{\"$ref\":\"#/definitions/AttributeType\"},\"nameType\":{\"$ref\":\"#/definitions/AttributeType\"},\"schema\":{\"$ref\":\"#/definitions/Schema\"}}}}}","template":"{\"form\":[{\"type\":\"select\",\"key\":\"format\",\"templateOptions\":{\"label\":\"Format\",\"placeholder\":\"\",\"valueProp\":\"value\",\"options\":[{\"value\":\"csv\",\"name\":\"CSV\"},{\"value\":\"json\",\"name\":\"JSON\"},{\"value\":\"parquet\",\"name\":\"Parquet\"},{\"value\":\"orc\",\"name\":\"Orc\"},{\"value\":\"text\",\"name\":\"Text\"}],\"labelProp\":\"name\",\"focus\":false,\"_flatOptions\":true,\"disabled\":false}},{\"key\":\"options.encoding\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Encoding\",\"placeholder\":\"\",\"focus\":false},\"expressionProperties\":{\"templateOptions.disabled\":\"!model.format\"}},{\"key\":\"options.multiLine\",\"hideExpression\":\"model.format === 'csv' || model.format === 'json' ? false : true\",\"type\":\"select\",\"templateOptions\":{\"label\":\"Multiline?\",\"options\":[{\"value\":\"true\",\"name\":\"True\"},{\"value\":\"false\",\"name\":\"False\"}],\"valueProp\":\"value\",\"labelProp\":\"name\"},\"defaultValue\":\"false\"},{\"key\":\"options.header\",\"hideExpression\":\"model.format === 'csv' ? false : true\",\"type\":\"select\",\"templateOptions\":{\"label\":\"Skip Header?\",\"options\":[{\"value\":\"true\",\"name\":\"True\"},{\"value\":\"false\",\"name\":\"False\"}],\"valueProp\":\"value\",\"labelProp\":\"name\"},\"defaultValue\":\"false\"},{\"key\":\"options.sep\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Field Separator\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'csv' ? false : true\"},{\"key\":\"options.lineSep\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Line Separator\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'text' || model.format === 'json' ? false : true\"},{\"key\":\"options.quote\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Field Quote\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'csv' ? false : true\"},{\"key\":\"options.escape\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Field Escape Character\",\"placeholder\":\"\",\"focus\":false},\"hideExpression\":\"model.format === 'csv' ? false : true\"},{\"key\":\"options.primitivesAsString\",\"hideExpression\":\"model.format === 'json' ? false : true\",\"type\":\"select\",\"templateOptions\":{\"label\":\"Primitive As String?\",\"options\":[{\"value\":\"true\",\"name\":\"True\"},{\"value\":\"false\",\"name\":\"False\"}],\"valueProp\":\"value\",\"labelProp\":\"name\"},\"defaultValue\":\"false\"},{\"key\":\"options.inferSchema\",\"hideExpression\":\"model.format === 'csv' ? false : true\",\"type\":\"select\",\"templateOptions\":{\"label\":\"Infer Schema?\",\"options\":[{\"value\":\"true\",\"name\":\"True\"},{\"value\":\"false\",\"name\":\"False\"}],\"valueProp\":\"value\",\"labelProp\":\"name\"},\"defaultValue\":\"false\"},{\"expressionProperties\":{\"templateOptions.disabled\":\"model.options.inferSchema || model.format === 'json' ? false : true\"},\"key\":\"options.samplingRatio\",\"hideExpression\":\"model.format === 'csv' || model.format === 'json' ? false : true\",\"type\":\"input\",\"templateOptions\":{\"label\":\"Sampling Ration\",\"type\":\"number\"}}]}"},{"id":"com.acxiom.pipeline.steps.Schema","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Schema\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"attributes\":{\"type\":\"array\",\"items\":{\"$ref\":\"#/definitions/Attribute\"}}},\"definitions\":{\"Attribute\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"name\":{\"type\":\"string\"},\"dataType\":{\"$ref\":\"#/definitions/AttributeType\"}}},\"AttributeType\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"baseType\":{\"type\":\"string\"},\"valueType\":{\"$ref\":\"#/definitions/AttributeType\"},\"nameType\":{\"$ref\":\"#/definitions/AttributeType\"},\"schema\":{\"$ref\":\"#/definitions/Schema\"}}},\"Schema\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"attributes\":{\"type\":\"array\",\"items\":{\"$ref\":\"#/definitions/Attribute\"}}}}}}"}]} diff --git a/metalus-core/src/main/scala/com/acxiom/pipeline/PipelineStepMapper.scala b/metalus-core/src/main/scala/com/acxiom/pipeline/PipelineStepMapper.scala index 5a968b9d..a3d5b733 100644 --- a/metalus-core/src/main/scala/com/acxiom/pipeline/PipelineStepMapper.scala +++ b/metalus-core/src/main/scala/com/acxiom/pipeline/PipelineStepMapper.scala @@ -218,8 +218,7 @@ trait PipelineStepMapper { def mapParameter(parameter: Parameter, pipelineContext: PipelineContext): Any = { // Get the value/defaultValue for this parameter val value = getParamValue(parameter) - val returnValue = if (value.isDefined) { - removeOptions(value) match { + val returnValue = value.map(removeOptions).flatMap { case s: String => parameter.`type`.getOrElse("none").toLowerCase match { case "script" => @@ -239,13 +238,11 @@ trait PipelineStepMapper { case b: Boolean => Some(b) case i: Int => Some(i) case i: BigInt => Some(i.toInt) + case d: Double => Some(d) case l: List[_] => handleListParameter(l, parameter, pipelineContext) case m: Map[_, _] => handleMapParameter(m, parameter, pipelineContext) case t => // Handle other types - This function may need to be reworked to support this so that it can be overridden throw new RuntimeException(s"Unsupported value type ${t.getClass} for ${parameter.name.getOrElse("unknown")}!") - } - } else { - None } // use the first valid (non-empty) value found diff --git a/metalus-core/src/main/scala/com/acxiom/pipeline/applications/ApplicationUtils.scala b/metalus-core/src/main/scala/com/acxiom/pipeline/applications/ApplicationUtils.scala index 97f61789..0ce62b70 100644 --- a/metalus-core/src/main/scala/com/acxiom/pipeline/applications/ApplicationUtils.scala +++ b/metalus-core/src/main/scala/com/acxiom/pipeline/applications/ApplicationUtils.scala @@ -65,6 +65,7 @@ object ApplicationUtils { * @param pipelineListener An optional PipelineListener. This may be overridden by the application. * @return An execution plan. */ + //noinspection ScalaStyle def createExecutionPlan(application: Application, globals: Option[Map[String, Any]], sparkConf: SparkConf, pipelineListener: PipelineListener = PipelineListener(), applicationTriggers: ApplicationTriggers = ApplicationTriggers(), @@ -77,18 +78,23 @@ object ApplicationUtils { logger.info(s"setting parquet dictionary enabled to ${applicationTriggers.parquetDictionaryEnabled.toString}") sparkSession.sparkContext.hadoopConfiguration.set("parquet.enable.dictionary", applicationTriggers.parquetDictionaryEnabled.toString) implicit val formats: Formats = getJson4sFormats(application.json4sSerializers) + val globalStepMapper = generateStepMapper(application.stepMapper, Some(PipelineStepMapper()), + applicationTriggers.validateArgumentTypes, credentialProvider) val rootGlobals = globals.getOrElse(Map[String, Any]()) // Create the default globals - val defaultGlobals = generateGlobals(application.globals, rootGlobals, Some(rootGlobals)) val globalListener = generatePipelineListener(application.pipelineListener, Some(pipelineListener), applicationTriggers.validateArgumentTypes, credentialProvider) val globalSecurityManager = generateSecurityManager(application.securityManager, Some(PipelineSecurityManager()), applicationTriggers.validateArgumentTypes, credentialProvider) - val globalStepMapper = generateStepMapper(application.stepMapper, Some(PipelineStepMapper()), - applicationTriggers.validateArgumentTypes, credentialProvider) val globalPipelineParameters = generatePipelineParameters(application.pipelineParameters, Some(PipelineParameters())) val pipelineManager = generatePipelineManager(application.pipelineManager, Some(PipelineManager(application.pipelines.getOrElse(List[DefaultPipeline]()))), applicationTriggers.validateArgumentTypes, credentialProvider).get + val initialContext = PipelineContext(Some(sparkConf), Some(sparkSession), Some(rootGlobals), globalSecurityManager.get, + globalPipelineParameters.get, application.stepPackages, globalStepMapper.get, globalListener, + Some(sparkSession.sparkContext.collectionAccumulator[PipelineStepMessage]("stepMessages")), + ExecutionAudit("root", AuditType.EXECUTION, Map[String, Any](), System.currentTimeMillis()), pipelineManager, + credentialProvider, Some(formats)) + val defaultGlobals = generateGlobals(application.globals, rootGlobals , Some(rootGlobals), initialContext) generateSparkListeners(application.sparkListeners, applicationTriggers.validateArgumentTypes, credentialProvider).getOrElse(List()).foreach(sparkSession.sparkContext.addSparkListener) addSparkListener(globalListener, sparkSession) @@ -101,18 +107,17 @@ object ApplicationUtils { } generateSparkListeners(execution.sparkListeners, applicationTriggers.validateArgumentTypes, credentialProvider).getOrElse(List()).foreach(sparkSession.sparkContext.addSparkListener) + val stepMapper = generateStepMapper(execution.stepMapper, globalStepMapper, applicationTriggers.validateArgumentTypes, + credentialProvider).get // Extracting pipelines - val ctx = PipelineContext(Some(sparkConf), - Some(sparkSession), - generateGlobals(execution.globals, rootGlobals, defaultGlobals, execution.mergeGlobals.getOrElse(false)), - generateSecurityManager(execution.securityManager, globalSecurityManager, + val ctx = initialContext.copy( + globals = generateGlobals(execution.globals, rootGlobals, defaultGlobals, initialContext, execution.mergeGlobals.getOrElse(false)), + security = generateSecurityManager(execution.securityManager, globalSecurityManager, applicationTriggers.validateArgumentTypes, credentialProvider).get, - generatePipelineParameters(execution.pipelineParameters, globalPipelineParameters).get, application.stepPackages, - generateStepMapper(execution.stepMapper, globalStepMapper, applicationTriggers.validateArgumentTypes, - credentialProvider).get, pipelineListener, - Some(sparkSession.sparkContext.collectionAccumulator[PipelineStepMessage]("stepMessages")), - ExecutionAudit("root", AuditType.EXECUTION, Map[String, Any](), System.currentTimeMillis()), - pipelineManager, credentialProvider, Some(formats)) + parameters = generatePipelineParameters(execution.pipelineParameters, globalPipelineParameters).get, + parameterMapper = stepMapper, + pipelineListener = pipelineListener + ) PipelineExecution(execution.id.getOrElse(""), generatePipelines(execution, application, pipelineManager), execution.initialPipelineId, ctx, execution.parents) }) @@ -132,10 +137,12 @@ object ApplicationUtils { execution: Execution, pipelineExecution: PipelineExecution): PipelineExecution = { implicit val formats: Formats = getJson4sFormats(application.json4sSerializers) - val defaultGlobals = generateGlobals(application.globals, rootGlobals.get, rootGlobals) + val initialContext = pipelineExecution.pipelineContext.copy(globals = rootGlobals) + val defaultGlobals = generateGlobals(application.globals, rootGlobals.get, rootGlobals, initialContext) val globalPipelineParameters = generatePipelineParameters(application.pipelineParameters, Some(PipelineParameters())) val ctx = pipelineExecution.pipelineContext - .copy(globals = generateGlobals(execution.globals, rootGlobals.get, defaultGlobals, execution.mergeGlobals.getOrElse(false))) + .copy(globals = generateGlobals(execution.globals, rootGlobals.get, defaultGlobals, + initialContext, execution.mergeGlobals.getOrElse(false))) .copy(parameters = generatePipelineParameters(execution.pipelineParameters, globalPipelineParameters).get) pipelineExecution.asInstanceOf[DefaultPipelineExecution].copy(pipelineContext = ctx) } @@ -266,18 +273,25 @@ object ApplicationUtils { private def generateGlobals(globals: Option[Map[String, Any]], rootGlobals: Map[String, Any], defaultGlobals: Option[Map[String, Any]], + pipelineContext: PipelineContext, merge: Boolean = false)(implicit formats: Formats): Option[Map[String, Any]] = { - if (globals.isEmpty) { - defaultGlobals - } else { - val baseGlobals = globals.get - val result = baseGlobals.foldLeft(rootGlobals)((rootMap, entry) => parseValue(rootMap, entry._1, entry._2)) - Some(if (merge) { + globals.map { baseGlobals => + val result = rootGlobals ++ baseGlobals.map{ + case (key, m: Map[String, Any]) if m.contains("className") => + key -> Parameter(Some("object"), Some(key), value = m.get("object"), className = m.get("className").map(_.toString)) + case (key, l: List[Any]) => key -> Parameter(Some("list"), Some(key), value = Some(l)) + case (key, value) => key -> Parameter(Some("text"), Some(key), value = Some(value)) + }.map{ + case ("GlobalLinks", p) => "GlobalLinks" -> p.value.get // skip global links + case (key, p) => key -> pipelineContext.parameterMapper.mapParameter(p, pipelineContext) + } + // val result = baseGlobals.foldLeft(rootGlobals)((rootMap, entry) => parseValue(rootMap, entry._1, entry._2)) + if (merge) { defaultGlobals.getOrElse(Map[String, Any]()) ++ result } else { result - }) - } + } + }.orElse(defaultGlobals) } private def parseParameters(classInfo: ClassInfo, credentialProvider: Option[CredentialProvider])(implicit formats: Formats): Map[String, Any] = { diff --git a/metalus-core/src/test/scala/com/acxiom/pipeline/applications/ApplicationTests.scala b/metalus-core/src/test/scala/com/acxiom/pipeline/applications/ApplicationTests.scala index ffbc0226..4b3716f7 100644 --- a/metalus-core/src/test/scala/com/acxiom/pipeline/applications/ApplicationTests.scala +++ b/metalus-core/src/test/scala/com/acxiom/pipeline/applications/ApplicationTests.scala @@ -399,7 +399,7 @@ class ApplicationTests extends FunSpec with BeforeAndAfterAll with Suite { assert(globals.contains("rootLogLevel")) assert(globals.contains("rootLogLevel")) assert(globals.contains("number")) - assert(globals("number").asInstanceOf[BigInt] == 5) + assert(globals("number").asInstanceOf[Int] == 5) assert(globals.contains("float")) assert(globals("float").asInstanceOf[Double] == 1.5) assert(globals.contains("string")) @@ -452,7 +452,7 @@ class ApplicationTests extends FunSpec with BeforeAndAfterAll with Suite { assert(globals.contains("rootLogLevel")) assert(globals.contains("rootLogLevel")) assert(globals.contains("number")) - assert(globals("number").asInstanceOf[BigInt] == 2) + assert(globals("number").asInstanceOf[Int] == 2) assert(globals.contains("float")) assert(globals("float").asInstanceOf[Double] == 3.5) assert(globals.contains("string")) @@ -492,7 +492,7 @@ class ApplicationTests extends FunSpec with BeforeAndAfterAll with Suite { assert(globals1.contains("rootLogLevel")) assert(globals1.contains("rootLogLevel")) assert(globals1.contains("number")) - assert(globals1("number").asInstanceOf[BigInt] == 1) + assert(globals1("number").asInstanceOf[Int] == 1) assert(globals1.contains("float")) assert(globals1("float").asInstanceOf[Double] == 1.5) assert(globals1.contains("string")) @@ -565,7 +565,7 @@ class ApplicationTests extends FunSpec with BeforeAndAfterAll with Suite { assert(globals.contains("rootLogLevel")) assert(globals.contains("rootLogLevel")) assert(globals.contains("number")) - assert(globals("number").asInstanceOf[BigInt] == 2) + assert(globals("number").asInstanceOf[Int] == 2) assert(globals.contains("float")) assert(globals("float").asInstanceOf[Double] == 3.5) assert(globals.contains("string"))