From 1692ee7f20dc628756237c5c8c7b9e72c6ea6ae3 Mon Sep 17 00:00:00 2001 From: dafreels Date: Thu, 21 Jan 2021 09:32:09 -0500 Subject: [PATCH] #206 Fixed issue causing the Metadata Extractor to pull in older version of libraries which caused a conflict. #207 Removed support for Spark 2.3/Scala 2.11 and Spark 2.4/Scala 2.12 --- .travis.yml | 94 ---------------- docs/application-example.md | 32 +----- docs/contributions.md | 19 ++-- docs/dependency-manager.md | 10 +- docs/getting-started.md | 4 +- docs/introduction.md | 9 +- docs/json-pipelines.md | 1 - docs/metadata-extractor.md | 36 ------- docs/metalus-application.md | 11 +- docs/pipeline-steps.md | 1 - docs/step-templates.md | 1 - manual_tests/manual-tests.sh | 33 ++++++ manual_tests/metadata-extractor-test.sh | 78 ++++++++++++++ manual_tests/readme.txt | 33 ++++++ manual_tests/spark-test.sh | 101 ++++++++++++++++++ .../testData/metalus-aws/pipelines.json | 1 + manual_tests/testData/metalus-aws/steps.json | 1 + .../testData/metalus-common/pipelines.json | 1 + .../testData/metalus-common/steps.json | 1 + .../testData/metalus-gcp/pipelines.json | 1 + manual_tests/testData/metalus-gcp/steps.json | 1 + .../testData/metalus-kafka/pipelines.json | 1 + .../testData/metalus-kafka/steps.json | 1 + .../testData/metalus-mongo/pipelines.json | 1 + .../testData/metalus-mongo/steps.json | 1 + manual_tests/testData/stop_server.js | 2 + manual_tests/testData/validate_mongo_data.js | 21 ++++ metalus-utils/assembly.xml | 1 + pom.xml | 37 ++----- readme.md | 28 ----- 30 files changed, 315 insertions(+), 247 deletions(-) create mode 100755 manual_tests/manual-tests.sh create mode 100755 manual_tests/metadata-extractor-test.sh create mode 100644 manual_tests/readme.txt create mode 100755 manual_tests/spark-test.sh create mode 100644 manual_tests/testData/metalus-aws/pipelines.json create mode 100644 manual_tests/testData/metalus-aws/steps.json create mode 100644 manual_tests/testData/metalus-common/pipelines.json create mode 100644 manual_tests/testData/metalus-common/steps.json create mode 100644 manual_tests/testData/metalus-gcp/pipelines.json create mode 100644 manual_tests/testData/metalus-gcp/steps.json create mode 100644 manual_tests/testData/metalus-kafka/pipelines.json create mode 100644 manual_tests/testData/metalus-kafka/steps.json create mode 100644 manual_tests/testData/metalus-mongo/pipelines.json create mode 100644 manual_tests/testData/metalus-mongo/steps.json create mode 100644 manual_tests/testData/stop_server.js create mode 100644 manual_tests/testData/validate_mongo_data.js diff --git a/.travis.yml b/.travis.yml index f2d0d1ad..3cfc94da 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,24 +10,6 @@ jobs: allow_failures: - os: windows include: - - stage: Build - name: Spark 2.3 Scala 2.11 Linux Build - os: linux - dist: trusty - scala: - - 2.11.12 - jdk: - - oraclejdk8 - install: true - script: mvn -B -P spark_2.3 clean install - - - name: Spark 2.3 Scala 2.11 OSX Build - os: osx - osx_image: xcode9.3 - scala: - - 2.11.12 - install: true - script: mvn -B -P spark_2.3 clean install - name: Spark 2.4 Scala 2.11 Linux Build os: linux @@ -44,24 +26,6 @@ jobs: - 2.11.12 install: true script: mvn -B -P spark_2.4 clean install - - - name: Spark 2.4 Scala 2.12 Linux Build - os: linux - dist: trusty - scala: - - 2.12.10 - jdk: - - oraclejdk8 - install: true - script: mvn -B -P spark_2.4,scala_2.12 clean install - - - name: Spark 2.4 Scala 2.12 OSX Build - os: osx - osx_image: xcode9.3 - scala: - - 2.12.10 - install: true - script: mvn -B -P spark_2.4,scala_2.12 clean install - name: Spark 3.0 Scala 2.12 Linux Build os: linux @@ -103,36 +67,6 @@ jobs: script: mvn -B -Dsonar.organization=$SONAR_ORGANIZATION -Dsonar.login=$SONAR_TOKEN -Dsonar.projectKey=Acxiom_spark-pipeline-driver-develop -Dsonar.host.url=https://sonarcloud.io scoverage:report scalastyle:check org.jacoco:jacoco-maven-plugin:prepare-agent sonar:sonar - stage: Release - if: branch = master AND type != pull_request AND fork = false AND repo = Acxiom/metalus - name: Spark 2.3 Scala 2.11 Build - os: linux - dist: trusty - scala: - - 2.11.12 - jdk: - - oraclejdk8 - before_install: - - openssl aes-256-cbc -K $encrypted_60fb54548dfb_key -iv $encrypted_60fb54548dfb_iv -in deployment/acxsigningkey.asc.enc -out deployment/acxsigningkey.asc -d - - mvn -P spark_2.3 -B versions:set -DremoveSnapshot - install: - - mvn -B -DskipTests=true -P spark_2.3 clean install - - mvn -B -P spark_2.3 -DrepoToken=$coverallsToken -Dsonar.organization=$SONAR_ORGANIZATION -Dsonar.login=$SONAR_TOKEN -Dsonar.projectKey=Acxiom_spark-pipeline-driver-release -Dsonar.host.url=https://sonarcloud.io scoverage:report scala:doc scalastyle:check coveralls:report org.jacoco:jacoco-maven-plugin:prepare-agent sonar:sonar - before_script: - - gpg --keyring=$TRAVIS_BUILD_DIR/pubring.gpg --no-default-keyring --import deployment/acxsigningkey.asc - - gpg --secret-keyring=$TRAVIS_BUILD_DIR/secring.gpg --no-default-keyring --import deployment/acxsigningkey.asc - script: mvn --settings deployment/release-settings.xml -pl .,metalus-core,metalus-aws,metalus-common,metalus-gcp,metalus-kafka,metalus-mongo,metalus-utils -B -P release,spark_2.3 -DskipTests=true -Dgpg.executable=gpg -Dgpg.keyname=F40721C83ED4BD317EA73B225B996D862829DE42 -Dgpg.passphrase=$PASSPHRASE -Dgpg.publicKeyring=$TRAVIS_BUILD_DIR/pubring.gpg -Dgpg.secretKeyring=$TRAVIS_BUILD_DIR/secring.gpg deploy - before_deploy: - - export project_version=$(mvn -q -Dexec.executable="echo" -Dexec.args='${project.version}' --non-recursive exec:exec) - deploy: - provider: releases - api_key: $GITHUB_OAUTH_TOKEN - name: PipelineDriver-Spark_2.3-Scala_2.11-${project_version} - skip_cleanup: true - file_glob: true - file: - - metalus-utils/target/metalus-utils_2.11-spark_2.3-${project_version}.tar.gz - - metalus-application/target/metalus-application_2.11-spark_2.3-${project_version}.jar - - if: branch = master AND type != pull_request AND fork = false AND repo = Acxiom/metalus name: Spark 2.4 Scala 2.11 Build os: linux @@ -161,34 +95,6 @@ jobs: file: - metalus-utils/target/metalus-utils_2.11-spark_2.4-${project_version}.tar.gz - metalus-application/target/metalus-application_2.11-spark_2.4-${project_version}.jar - - if: branch = master AND type != pull_request AND fork = false AND repo = Acxiom/metalus - name: Spark 2.4 Scala 2.12 Build - os: linux - dist: trusty - scala: - - 2.12.10 - jdk: - - oraclejdk8 - before_install: - - openssl aes-256-cbc -K $encrypted_60fb54548dfb_key -iv $encrypted_60fb54548dfb_iv -in deployment/acxsigningkey.asc.enc -out deployment/acxsigningkey.asc -d - - mvn -B versions:set -DremoveSnapshot - install: - - mvn -B -DskipTests=true -P spark_2.4,scala_2.12 clean install - before_script: - - gpg --keyring=$TRAVIS_BUILD_DIR/pubring.gpg --no-default-keyring --import deployment/acxsigningkey.asc - - gpg --secret-keyring=$TRAVIS_BUILD_DIR/secring.gpg --no-default-keyring --import deployment/acxsigningkey.asc - script: mvn --settings deployment/release-settings.xml -pl metalus-core,metalus-aws,metalus-common,metalus-gcp,metalus-kafka,metalus-mongo,metalus-utils -B -P release,spark_2.4,scala_2.12 -DskipTests=true -Dgpg.executable=gpg -Dgpg.keyname=F40721C83ED4BD317EA73B225B996D862829DE42 -Dgpg.passphrase=$PASSPHRASE -Dgpg.publicKeyring=$TRAVIS_BUILD_DIR/pubring.gpg -Dgpg.secretKeyring=$TRAVIS_BUILD_DIR/secring.gpg deploy - before_deploy: - - export project_version=$(mvn -q -Dexec.executable="echo" -Dexec.args='${project.version}' --non-recursive exec:exec) - deploy: - provider: releases - api_key: $GITHUB_OAUTH_TOKEN - name: PipelineDriver-Spark_2.4-Scala_2.12-${project_version} - skip_cleanup: true - file_glob: true - file: - - metalus-utils/target/metalus-utils_2.12-spark_2.4-${project_version}.tar.gz - - metalus-application/target/metalus-application_2.12-spark_2.4-${project_version}.jar - if: branch = master AND type != pull_request AND fork = false AND repo = Acxiom/metalus name: Spark 3.0 Scala 2.12 Build os: linux diff --git a/docs/application-example.md b/docs/application-example.md index 1d69f342..d2beefa5 100644 --- a/docs/application-example.md +++ b/docs/application-example.md @@ -1062,21 +1062,6 @@ The application commands below provide the proper templates to run the example: * __ - The fully qualified path to the built jars * __ - The fully qualified path to the example data -### Spark 2.3/Scala 2.11 -```bash -spark-submit --class com.acxiom.pipeline.drivers.DefaultPipelineDriver \ ---master spark://localhost:7077 \ ---deploy-mode client \ ---jars metalus-common_2.11-spark_2.3-.jar,metalus-examples_2.11-spark_2.3-.jar,metalus-mongo_2.11-spark_2.3-.jar,mongo-spark-connector_2.11-2.3.2.jar,mongo-java-driver-3.11.2.jar \ -/metalus-application_2.11-spark_2.3-.jar \ ---driverSetupClass com.acxiom.pipeline.applications.DefaultApplicationDriverSetup \ ---applicationConfigPath /application-example.json \ ---input_url /orders.csv \ ---input_format csv \ ---input_separator , \ ---mongoURI mongodb://localhost:27017/application_examples \ ---logLevel DEBUG -``` ### Spark 2.4/Scala 2.11 ```bash spark-submit --class com.acxiom.pipeline.drivers.DefaultPipelineDriver \ @@ -1092,27 +1077,12 @@ spark-submit --class com.acxiom.pipeline.drivers.DefaultPipelineDriver \ --mongoURI mongodb://localhost:27017/application_examples \ --logLevel DEBUG ``` -### Spark 2.4/Scala 2.12 -```bash -spark-submit --class com.acxiom.pipeline.drivers.DefaultPipelineDriver \ ---master spark://localhost:7077 \ ---deploy-mode client \ ---jars metalus-common_2.12-spark_2.4-.jar,metalus-examples_2.12-spark_2.4-.jar,metalus-mongo_2.12-spark_2.4-.jar,mongo-spark-connector_2.12-2.4.1.jar,mongo-java-driver-3.11.2.jar \ -/metalus-application_2.12-spark_2.4-.jar \ ---driverSetupClass com.acxiom.pipeline.applications.DefaultApplicationDriverSetup \ ---applicationConfigPath /application-example.json \ ---input_url /orders.csv \ ---input_format csv \ ---input_separator , \ ---mongoURI mongodb://localhost:27017/application_examples \ ---logLevel DEBUG -``` ### Spark 3.0/Scala 2.12 ```bash spark-submit --class com.acxiom.pipeline.drivers.DefaultPipelineDriver \ --master spark://localhost:7077 \ --deploy-mode client \ ---jars metalus-common_2.12-spark_3.0-.jar,metalus-examples_2.12-spark_3.0-.jar,metalus-mongo_2.12-spark_3.0-.jar,mongo-spark-connector_2.12-2.4.1.jar,mongo-java-driver-3.11.2.jar \ +--jars metalus-common_2.12-spark_3.0-.jar,metalus-examples_2.12-spark_3.0-.jar,metalus-mongo_2.12-spark_3.0-.jar,mongo-spark-connector_2.12-3.0.0.jar,mongodb-driver-sync-4.0.5.jar,mongodb-driver-core-4.0.5.jar,bson-4.0.5.jar \ /metalus-application_2.12-spark_3.0-.jar \ --driverSetupClass com.acxiom.pipeline.applications.DefaultApplicationDriverSetup \ --applicationConfigPath /application-example.json \ diff --git a/docs/contributions.md b/docs/contributions.md index 3c356a63..e7f0a0b5 100644 --- a/docs/contributions.md +++ b/docs/contributions.md @@ -24,20 +24,23 @@ get familiar with the coding style and learn if the functionality already exists ## Building The project is built using [Apache Maven](http://maven.apache.org/). -To build the project using Scala 2.11 and Spark 2.3 run: - - mvn - To build the project using Scala 2.11 and Spark 2.4 run: - mvn -P spark_2.4 + mvn -To build the project using Scala 2.12 and Spark 2.4 run: +To build the project using Scala 2.12 and Spark 3.0 run: - mvn -P spark_2.4,scala_2.12 + mvn -P spark_3.0 (This will clean, build, test and package the jars and generate documentation) ## Running tests -Tests are part of the main build. +In addition to the unit tests that are part of the main build, a set of manual tests are available +that will build the project for each version, verify the Metadata Extractor and run the example +application against the appropriate Spark version. Mongo will need to be installed and on the path. + +Command to execute manual test: +```shell +manual_tests/manual-tests.sh +``` diff --git a/docs/dependency-manager.md b/docs/dependency-manager.md index 443d4a82..337cc8f7 100644 --- a/docs/dependency-manager.md +++ b/docs/dependency-manager.md @@ -22,12 +22,12 @@ Here is a working example: { "groupId": "org.mongodb.spark", "artifactId": "mongo-spark-connector_2.11", - "version": "2.3.2" + "version": "2.4.2" }, { "groupId": "org.mongodb", "artifactId": "mongo-java-driver", - "version": "3.11.2", + "version": "3.12.2", "scope": "extraction" } ] @@ -81,7 +81,7 @@ bin/dependency-resolver.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --ou Example Output with a _path-prefix_ of _hdfs://acxiom/jars/udl_: ```shell script -hdfs://acxiom/jars/udl/metalus-mongo_2.11-spark_2.3-1.6.0-SNAPSHOT.jar:hdfs://acxiom/jars/udl/mongo-spark-connector_2.11-2.3.2.jar:hdfs://acxiom/jars/udl/mongo-java-driver-3.11.2.jar +hdfs://acxiom/jars/udl/metalus-mongo_2.11-spark_2.4-1.6.0-SNAPSHOT.jar:hdfs://acxiom/jars/udl/mongo-spark-connector_2.11-2.4.2.jar:hdfs://acxiom/jars/udl/mongo-java-driver-3.12.7.jar ``` ## Maven Dependency Resolver @@ -96,12 +96,12 @@ Developers may choose to override the repo within the _dependencies.json_ file b { "groupId": "org.mongodb.spark", "artifactId": "mongo-spark-connector_2.11", - "version": "2.3.2" + "version": "2.4.2" }, { "groupId": "org.mongodb", "artifactId": "mongo-java-driver", - "version": "3.11.2", + "version": "3.12.7", "scope": "extraction" } ] diff --git a/docs/getting-started.md b/docs/getting-started.md index 2f872c28..8b6311a0 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -13,9 +13,7 @@ Below are the basic build commands required to build the Metalus libraries: |Spark Version|Scala Version|Command| |-------------|-------------|-------| -|2.3 |2.11 |mvn -P spark_2.3 clean install| |2.4 |2.11 |mvn clean install| -|2.4 |2.12 |mvn -P spark_2.4,scala_2.12 clean install| |3.0 |2.12 |mvn -P spark_3.0 clean install| @@ -25,7 +23,7 @@ This example requires a local Mongo instance to be running. A free version can b ## Spark Setup Download the desired version of [Spark](http://spark.apache.org/downloads.html) and unpack. Metalus supports versions -2.3, 2.4 or 3.0. +2.4 or 3.0. Start the spark server from the unpacked directory with the following commands: diff --git a/docs/introduction.md b/docs/introduction.md index 5d94ae25..976cafea 100644 --- a/docs/introduction.md +++ b/docs/introduction.md @@ -40,24 +40,21 @@ Metalus core provides the base library required to run metalus applications and |Spark Version|Scala Version|Library| |-------------|-------------|-------| -|2.3 |2.11 |[Maven 2.11 Spark 2.3 library](https://search.maven.org/search?q=a:metalus-core_2.11-spark_2.3)| |2.4 |2.11 |[Maven 2.11 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-core_2.11-spark_2.4)| -|2.4 |2.12 |[Maven 2.12 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-core_2.12-spark_2.4)| +|3.0 |2.12 |[Maven 2.12 Spark 3.0 library](https://search.maven.org/search?q=a:metalus-core_2.12-spark_3.0)| ## Metalus Common Metalus common provides a step library for building basic applications. |Spark Version|Scala Version|Library| |-------------|-------------|-------| -|2.3 |2.11 |[Maven 2.11 Spark 2.3 library](https://search.maven.org/search?q=a:metalus-common_2.11-spark_2.3)| |2.4 |2.11 |[Maven 2.11 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-common_2.11-spark_2.4)| -|2.4 |2.12 |[Maven 2.12 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-common_2.12-spark_2.4)| +|3.0 |2.12 |[Maven 2.12 Spark 3.0 library](https://search.maven.org/search?q=a:metalus-common_2.12-spark_3.0)| ## Metalus AWS Metalus AWS provides a step library for working with AWS technologies. |Spark Version|Scala Version|Library| |-------------|-------------|-------| -|2.3 |2.11 |[Maven 2.11 Spark 2.3 library](https://search.maven.org/search?q=a:metalus-aws_2.11-spark_2.3)| |2.4 |2.11 |[Maven 2.11 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-aws_2.11-spark_2.4)| -|2.4 |2.12 |[Maven 2.12 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-aws_2.12-spark_2.4)| +|3.0 |2.12 |[Maven 2.12 Spark 3.0 library](https://search.maven.org/search?q=a:metalus-aws_2.12-spark_3.0)| diff --git a/docs/json-pipelines.md b/docs/json-pipelines.md index 1dbeabcc..4fb37b75 100644 --- a/docs/json-pipelines.md +++ b/docs/json-pipelines.md @@ -75,7 +75,6 @@ Below is an example of how a basic two step pipeline may look once complete: "pkg": "com.acxiom.pipeline.steps" }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ], "stepId": "87db259d-606e-46eb-b723-82923349640f" diff --git a/docs/metadata-extractor.md b/docs/metadata-extractor.md index 56ef8c3a..f95f513e 100644 --- a/docs/metadata-extractor.md +++ b/docs/metadata-extractor.md @@ -188,7 +188,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -227,7 +226,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -260,7 +258,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -293,7 +290,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -332,7 +328,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -351,7 +346,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -378,7 +372,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -417,7 +410,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -443,7 +435,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -470,7 +461,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -514,7 +504,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -552,7 +541,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -602,7 +590,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -635,7 +622,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -680,7 +666,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -726,7 +711,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -771,7 +755,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -804,7 +787,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -836,7 +818,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -862,7 +843,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -894,7 +874,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -933,7 +912,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -966,7 +944,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -992,7 +969,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -1043,7 +1019,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -1088,7 +1063,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -1114,7 +1088,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -1158,7 +1131,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -1214,7 +1186,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -1276,7 +1247,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -1302,7 +1272,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -1352,7 +1321,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -1379,7 +1347,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -1412,7 +1379,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -1439,7 +1405,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] }, @@ -1472,7 +1437,6 @@ bin/metadata-extractor.sh --jar-files /tmp/steps.jar,/tmp/common-steps.jar --out } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] } diff --git a/docs/metalus-application.md b/docs/metalus-application.md index 33a8631a..bd7faf8f 100644 --- a/docs/metalus-application.md +++ b/docs/metalus-application.md @@ -10,9 +10,8 @@ run a basic application. Open a terminal window and change to the local Spark directory. **Note**: the version of Spark dictates the version of the jars that need to be used. -**Scala 2.11 Spark 2.3**: metalus-application_2.11-spark_2.3-.jar **Scala 2.11 Spark 2.4**: metalus-application_2.11-spark_2.4-.jar -**Scala 2.12 Spark 2.4**: metalus-application_2.12-spark_2.4-.jar +**Scala 2.12 Spark 3.0**: metalus-application_2.12-spark_3.0-.jar Once the master and worker has been started, the UI may be accessed using these URLs: @@ -44,18 +43,18 @@ sbin/stop-slave.sh sbin/stop-master.sh ``` -### Run the spark-submit command for Spark 2.3: +### Run the spark-submit command for Spark 2.4: ```bash spark-submit --class com.acxiom.pipeline.drivers.DefaultPipelineDriver \ --master spark://localhost:7077 \ --deploy-mode client \ --jars \ -/metalus-application_2.11-spark_2.3-.jar \ +/metalus-application_2.11-spark_2.4-.jar \ --driverSetupClass com.acxiom.pipeline.applications.DefaultApplicationDriverSetup \ --applicationConfigPath \ --logLevel DEBUG -``` +` ### Run the spark-submit command for Spark 2.4: @@ -64,7 +63,7 @@ spark-submit --class com.acxiom.pipeline.drivers.DefaultPipelineDriver \ --master spark://localhost:7077 \ --deploy-mode client \ --jars \ -/metalus-application_2.11-spark_2.4-.jar \ +/metalus-application_2.12-spark_3.0-.jar \ --driverSetupClass com.acxiom.pipeline.applications.DefaultApplicationDriverSetup \ --applicationConfigPath \ --logLevel DEBUG diff --git a/docs/pipeline-steps.md b/docs/pipeline-steps.md index 847790a4..71febfc5 100644 --- a/docs/pipeline-steps.md +++ b/docs/pipeline-steps.md @@ -31,7 +31,6 @@ A pipeline step begins with a step template, but makes several crucial changes. "pkg": "com.acxiom.pipeline.steps" }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] } diff --git a/docs/step-templates.md b/docs/step-templates.md index 83136351..6d71045f 100644 --- a/docs/step-templates.md +++ b/docs/step-templates.md @@ -80,7 +80,6 @@ function. Either an annotation must be provided or the step function developer w } }, "tags": [ - "metalus-common_2.11-spark_2.3-1.5.0-SNAPSHOT.jar", "metalus-common_2.11-spark_2.4-1.5.0-SNAPSHOT.jar" ] } diff --git a/manual_tests/manual-tests.sh b/manual_tests/manual-tests.sh new file mode 100755 index 00000000..cada9f43 --- /dev/null +++ b/manual_tests/manual-tests.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +validateResult() { + ret=$1 + msg=$2 + if [[ $ret -ne 0 ]]; then + echo $msg + exit $ret + fi +} + +# Setup the temporary location for testing +bindir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +dir=$(dirname "${bindir}") +cd "$dir" + +# 2.4 +echo "Testing Spark 2.4" +mvn clean install +validateResult ${?} "Failed to build project" +manual_tests/metadata-extractor-test.sh +validateResult ${?} "Failed Metadata Extractor Test" +manual_tests/spark-test.sh +validateResult ${?} "Failed Spark Test" + +# 3.0 +echo "Testing Spark 3.0" +mvn -P spark_3.0 clean install +validateResult ${?} "Failed to build project" +manual_tests/metadata-extractor-test.sh +validateResult ${?} "Failed Metadata Extractor Test" +manual_tests/spark-test.sh +validateResult ${?} "Failed Spark Test" diff --git a/manual_tests/metadata-extractor-test.sh b/manual_tests/metadata-extractor-test.sh new file mode 100755 index 00000000..c6795e33 --- /dev/null +++ b/manual_tests/metadata-extractor-test.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash + +# Function to compare the results of the test data against the generated metadata. +checkResults() { + project=$1 + file=$2 + stagingDir=`echo $tmpDir/staging/${project}*` + baseFile=$dir/manual_tests/testData/$project/${file}.json + compareFile=$stagingDir/${file}.json + $dir/manual_tests/jq 'del(.. | .tags?)' $baseFile > $stagingDir/${file}_base.json + $dir/manual_tests/jq 'del(.. | .tags?)' $stagingDir/${file}.json > $stagingDir/${file}_updated.json + results=`diff --brief $stagingDir/${file}_base.json $stagingDir/${file}_updated.json` + if [ ${#results} -gt 0 ] + then + echo "There was a difference found in the ${project} project for file ${file}!" + exit 1 + fi +} + +# Setup the temporary location for testing +bindir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +dir=$(dirname "${bindir}") +tmpDir="${dir}/manual_tests/tmp" +rm -rf $tmpDir +mkdir $tmpDir + +# Copy the built Metalus Utils to a temporary directory +cp ${dir}/metalus-utils/target/*.gz $tmpDir +cd $tmpDir +tar xf $tmpDir/*.gz + +# Get a list of jar files +jarFiles=`ls $dir/metalus-aws/target/metalus-aws*.jar | grep -v javadoc` +jarFiles+="," +jarFiles+=`ls $dir/metalus-gcp/target/metalus-gcp*.jar | grep -v javadoc` +jarFiles+="," +jarFiles+=`ls $dir/metalus-common/target/metalus-common*.jar | grep -v javadoc` +jarFiles+="," +jarFiles+=`ls $dir/metalus-kafka/target/metalus-kafka*.jar | grep -v javadoc` +jarFiles+="," +jarFiles+=`ls $dir/metalus-mongo/target/metalus-mongo*.jar | grep -v javadoc` + +# Run the command to get the data +mkdir $tmpDir/staging +$tmpDir/metalus-utils/bin/metadata-extractor.sh \ +--output-path $tmpDir/staging \ +--jar-files $jarFiles \ +--no-auth-download true \ +--repo ~/.m2 \ +--clean-staging true + +# Get the jq command +if [[ ! -f $dir/manual_tests/jq ]]; then + if [[ "$OSTYPE" == "linux-gnu"* ]] + then + curl -L https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 > $dir/manual_tests/jq + fi + if [[ "$OSTYPE" == "darwin"* ]] + then + curl -L https://github.com/stedolan/jq/releases/download/jq-1.6/jq-osx-amd64 > $dir/manual_tests/jq + fi +fi +chmod +x $dir/manual_tests/jq + +# Validate the results +checkResults "metalus-aws" "pipelines" +checkResults "metalus-aws" "steps" +checkResults "metalus-gcp" "pipelines" +checkResults "metalus-gcp" "steps" +checkResults "metalus-common" "pipelines" +checkResults "metalus-common" "steps" +checkResults "metalus-kafka" "pipelines" +checkResults "metalus-kafka" "steps" +checkResults "metalus-mongo" "pipelines" +checkResults "metalus-mongo" "steps" + +# Cleanup the temp directory +rm -rf $tmpDir diff --git a/manual_tests/readme.txt b/manual_tests/readme.txt new file mode 100644 index 00000000..e4c6eceb --- /dev/null +++ b/manual_tests/readme.txt @@ -0,0 +1,33 @@ +This directory contains manual tests that should be performed for each version prior to release. + +Metalus Spark Tests +This test requires that the maven build is executed for the specific version prior to running the test. A +smoke test running against different versions of Spark to ensure that the compiled jars will run as expected. + +Define the test: + * Download the specific versions of Spark if they don't already exist + * Use the application example + * Start the slave node + * Start the master node + * Run the Spark submit command against the jars in the target directories + * Connect to mongo and verify the results + * Drop database +Is there a way to automatically run these tests? + +Metalus Extraction Tests +This test requires that the maven build is executed for the specific version prior to running the test. The +local maven repo "~/.m2" will be used to pull dependent artifacts. Each build needs to be run prior too +running the manual tests. + + +Scala 2.11 Spark 2.4 +Build: mvn clean install +Tests: +manual_tests/metadata-extractor-test.sh +manual_tests/spark-test.sh + +Scala 2.12 Spark 3.0 +Build: mvn -P spark_3.0 clean install +Tests: +manual_tests/metadata-extractor-test.sh +manual_tests/spark-test.sh diff --git a/manual_tests/spark-test.sh b/manual_tests/spark-test.sh new file mode 100755 index 00000000..02d23cff --- /dev/null +++ b/manual_tests/spark-test.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash + +# Setup the temporary location for testing +bindir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +dir=$(dirname "${bindir}") +tmpDir="${dir}/manual_tests/tmp" +serversDir="${dir}/manual_tests/servers" +rm -rf $tmpDir +mkdir $tmpDir +mkdir -p $serversDir + +# Determine Scala/Spark Version +regex='(metalus-aws_)([^-]*)' +sparkRegex='(spark_)([^-]*)' +jarFiles=`ls $dir/metalus-aws/target/metalus-aws*.jar | grep -v javadoc` +[[ $jarFiles =~ $regex ]] +scalaCompat=${BASH_REMATCH[2]} +[[ $jarFiles =~ $sparkRegex ]] +sparkCompat=${BASH_REMATCH[2]} +# Download/Unpack Spark +cd $serversDir +jarFiles="" +if [[ "$sparkCompat" == "2.4" ]] +then + if [[ ! -f $serversDir/spark-2.4.7-bin-hadoop2.7.tgz ]] + then + echo "Downloading 2.4 Spark" + curl -L https://downloads.apache.org/spark/spark-2.4.7/spark-2.4.7-bin-hadoop2.7.tgz > $serversDir/spark-2.4.7-bin-hadoop2.7.tgz + curl -L https://repo1.maven.org/maven2/org/mongodb/spark/mongo-spark-connector_2.11/2.4.2/mongo-spark-connector_2.11-2.4.2.jar > $serversDir/mongo-spark-connector_2.11-2.4.2.jar + curl -L https://repo1.maven.org/maven2/org/mongodb/mongo-java-driver/3.12.7/mongo-java-driver-3.12.7.jar > $serversDir/mongo-java-driver-3.12.7.jar + tar xf $serversDir/spark-2.4.7-bin-hadoop2.7.tgz + fi + sparkDir="${serversDir}/spark-2.4.7-bin-hadoop2.7" + jarFiles="${serversDir}/mongo-spark-connector_2.11-2.4.2.jar,${serversDir}/mongo-java-driver-3.12.7.jar," +fi +if [[ "$sparkCompat" == "3.0" ]] +then + if [[ ! -f $serversDir/spark-3.0.1-bin-hadoop2.7.tgz ]] + then + echo "Downloading 3.0 Spark" + curl -L https://downloads.apache.org/spark/spark-3.0.1/spark-3.0.1-bin-hadoop2.7.tgz > $serversDir/spark-3.0.1-bin-hadoop2.7.tgz + curl -L https://repo1.maven.org/maven2/org/mongodb/spark/mongo-spark-connector_2.12/3.0.0/mongo-spark-connector_2.12-3.0.0.jar > $serversDir/mongo-spark-connector_2.12-3.0.0.jar + curl -L https://repo1.maven.org/maven2/org/mongodb/mongodb-driver-core/4.0.5/mongodb-driver-core-4.0.5.jar > $serversDir/mongodb-driver-core-4.0.5.jar + curl -L https://repo1.maven.org/maven2/org/mongodb/mongodb-driver-sync/4.0.5/mongodb-driver-sync-4.0.5.jar > $serversDir/mongodb-driver-sync-4.0.5.jar + curl -L https://repo1.maven.org/maven2/org/mongodb/bson/4.0.5/bson-4.0.5.jar > $serversDir/bson-4.0.5.jar + tar xf $serversDir/spark-3.0.1-bin-hadoop2.7.tgz + fi + sparkDir="${serversDir}/spark-3.0.1-bin-hadoop2.7" + jarFiles="${serversDir}/mongo-spark-connector_2.12-3.0.0.jar,${serversDir}/mongodb-driver-sync-4.0.5.jar,${serversDir}/mongodb-driver-core-4.0.5.jar,${serversDir}/bson-4.0.5.jar," +fi +cd .. +# Startup Mongo +mkdir -p $tmpDir/data +mkdir -p $tmpDir/mongodb +mongod --fork --logpath $tmpDir/mongodb/mongod.log --dbpath $tmpDir/data +# Start Spark +export SPARK_LOCAL_IP=127.0.0.1 +$sparkDir/sbin/start-master.sh -h localhost -p 7077 +$sparkDir/sbin/start-slave.sh localhost:7077 -h localhost +# Build classpath +jarFiles+=`ls $dir/metalus-common/target/metalus-common*.jar | grep -v javadoc` +jarFiles+="," +jarFiles+=`ls $dir/metalus-examples/target/metalus-examples*.jar | grep -v javadoc` +jarFiles+="," +jarFiles+=`ls $dir/metalus-mongo/target/metalus-mongo*.jar | grep -v javadoc` + +# Run Spark Submit +applicationJar=`ls $dir/metalus-application/target/metalus-application*.jar | grep -v javadoc` +echo $applicationJar +$sparkDir/bin/spark-submit --class com.acxiom.pipeline.drivers.DefaultPipelineDriver \ +--master spark://localhost:7077 \ +--deploy-mode client \ +--jars $jarFiles \ +$applicationJar \ +--driverSetupClass com.acxiom.pipeline.applications.DefaultApplicationDriverSetup \ +--applicationConfigPath $dir/metalus-examples/mock_data/application-example.json \ +--input_url $dir/metalus-examples/mock_data/orders.csv \ +--input_format csv \ +--mongoURI mongodb://localhost:27017/application_examples \ +--validateStepParameterTypes true \ +--logLevel DEBUG \ +--input_separator , + +# Validate Results (use mongo shell) +mongoResults=`mongo < $dir/manual_tests/testData/validate_mongo_data.js | grep "count is not correct!"` + +# Shutdown Spark +$sparkDir/sbin/stop-slave.sh +$sparkDir/sbin/stop-master.sh + +# Shutdown Mongo +mongod < $dir/manual_tests/testData/stop_server.js + +# Cleanup the temp directory +rm -rf $tmpDir + +if [ ${#mongoResults} -gt 0 ] +then + echo "Mongo data is not correct: ${mongoResults}" + exit 1 +fi diff --git a/manual_tests/testData/metalus-aws/pipelines.json b/manual_tests/testData/metalus-aws/pipelines.json new file mode 100644 index 00000000..0637a088 --- /dev/null +++ b/manual_tests/testData/metalus-aws/pipelines.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/manual_tests/testData/metalus-aws/steps.json b/manual_tests/testData/metalus-aws/steps.json new file mode 100644 index 00000000..eb102bc2 --- /dev/null +++ b/manual_tests/testData/metalus-aws/steps.json @@ -0,0 +1 @@ +{"pkgs":["com.acxiom.aws.steps"],"steps":[{"id":"207aa871-4f83-4e24-bab3-4e47bb3b667a","displayName":"Write DataFrame to a Kinesis Stream","description":"This step will write a DataFrame to a Kinesis Stream","type":"Pipeline","category":"AWS","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.DataFrame","description":"The DataFrame to post to the Kinesis stream"},{"type":"text","name":"region","required":false,"className":"String"},{"type":"text","name":"streamName","required":false,"className":"String"},{"type":"text","name":"partitionKey","required":false,"className":"String"},{"type":"text","name":"separator","required":false,"className":"String","description":"The separator character to use when combining the column data"},{"type":"text","name":"accessKeyId","required":false,"className":"String","description":"The optional API key to use for the Kinesis stream"},{"type":"text","name":"secretAccessKey","required":false,"className":"String","description":"The optional API secret to use for the Kinesis stream"}],"engineMeta":{"spark":"KinesisSteps.writeToStream","pkg":"com.acxiom.aws.steps"},"tags":["metalus-aws_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"5c9c7056-5c7a-4463-93c8-7e99bad66d4f","displayName":"Write DataFrame to a Kinesis Stream Using Global Credentials","description":"This step will write a DataFrame to a Kinesis Stream using the CredentialProvider to get Credentials","type":"Pipeline","category":"AWS","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.DataFrame","description":"The DataFrame to post to the Kinesis stream"},{"type":"text","name":"region","required":false,"className":"String"},{"type":"text","name":"streamName","required":false,"className":"String"},{"type":"text","name":"partitionKey","required":false,"className":"String"},{"type":"text","name":"separator","required":false,"className":"String","description":"The separator character to use when combining the column data"}],"engineMeta":{"spark":"KinesisSteps.writeStream","pkg":"com.acxiom.aws.steps"},"tags":["metalus-aws_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"52f161a5-3025-4e40-a10b-f201940b5cbf","displayName":"Write a single message to a Kinesis Stream Using Global Credentials","description":"This step will write a single message to a Kinesis Stream using the CredentialProvider to get Credentials","type":"Pipeline","category":"AWS","params":[{"type":"text","name":"message","required":false,"className":"String","description":"The message to post to the Kinesis stream"},{"type":"text","name":"region","required":false,"className":"String"},{"type":"text","name":"streamName","required":false,"className":"String"},{"type":"text","name":"partitionKey","required":false,"className":"String"}],"engineMeta":{"spark":"KinesisSteps.postMessage","pkg":"com.acxiom.aws.steps"},"tags":["metalus-aws_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"3079d815-9105-4194-a8f1-6546531b3373","displayName":"Write a single message to a Kinesis Stream","description":"This step will write a single message to a Kinesis Stream","type":"Pipeline","category":"AWS","params":[{"type":"text","name":"message","required":false,"className":"String","description":"The message to post to the Kinesis stream"},{"type":"text","name":"region","required":false,"className":"String"},{"type":"text","name":"streamName","required":false,"className":"String"},{"type":"text","name":"partitionKey","required":false,"className":"String"},{"type":"text","name":"accessKeyId","required":false,"className":"String","description":"The optional API key to use for the Kinesis stream"},{"type":"text","name":"secretAccessKey","required":false,"className":"String","description":"The optional API secret to use for the Kinesis stream"}],"engineMeta":{"spark":"KinesisSteps.postMessage","pkg":"com.acxiom.aws.steps"},"tags":["metalus-aws_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"bd4a944f-39ad-4b9c-8bf7-6d3c1f356510","displayName":"Load DataFrame from S3 path","description":"This step will read a DataFrame from the given S3 path","type":"Pipeline","category":"AWS","params":[{"type":"text","name":"path","required":false,"className":"String","description":"The S3 path to load data"},{"type":"text","name":"accessKeyId","required":false,"className":"String"},{"type":"text","name":"secretAccessKey","required":false,"className":"String"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions"}],"engineMeta":{"spark":"S3Steps.readFromPath","pkg":"com.acxiom.aws.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-aws_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"8714aa73-fdb5-4e9f-a8d3-5a813fe14a9e","displayName":"Load DataFrame from S3 paths","description":"This step will read a dataFrame from the given S3 paths","type":"Pipeline","category":"AWS","params":[{"type":"text","name":"paths","required":false,"className":"List[String]","description":"The S3 paths to load data"},{"type":"text","name":"accessKeyId","required":false,"className":"String"},{"type":"text","name":"secretAccessKey","required":false,"className":"String"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions"}],"engineMeta":{"spark":"S3Steps.readFromPaths","pkg":"com.acxiom.aws.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-aws_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"7dc79901-795f-4610-973c-f46da63f669c","displayName":"Write DataFrame to S3","description":"This step will write a DataFrame in a given format to S3","type":"Pipeline","category":"AWS","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.DataFrame","description":"The DataFrame to post to the Kinesis stream"},{"type":"text","name":"path","required":false,"className":"String","description":"The S3 path to write data"},{"type":"text","name":"accessKeyId","required":false,"className":"String"},{"type":"text","name":"secretAccessKey","required":false,"className":"String"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions"}],"engineMeta":{"spark":"S3Steps.writeToPath","pkg":"com.acxiom.aws.steps"},"tags":["metalus-aws_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"cc4694b9-5e54-4b12-8088-ed4ced056efd","displayName":"Create S3 FileManager","description":"Simple function to generate the S3FileManager for a S3 file system","type":"Pipeline","category":"AWS","params":[{"type":"text","name":"region","required":false,"className":"String","description":"The region of the S3 bucket"},{"type":"text","name":"bucket","required":false,"className":"String","description":"The S3 bucket"},{"type":"text","name":"accessKeyId","required":false,"className":"String"},{"type":"text","name":"secretAccessKey","required":false,"className":"String"}],"engineMeta":{"spark":"S3Steps.createFileManager","pkg":"com.acxiom.aws.steps","results":{"primaryType":"com.acxiom.aws.fs.S3FileManager"}},"tags":["metalus-aws_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"0e3bcadd-2d14-408f-982f-32ffd879d795d","displayName":"Create S3 FileManager with Client","description":"Simple function to generate the S3FileManager for a S3 file system using an existing client","type":"Pipeline","category":"AWS","params":[{"type":"text","name":"s3Client","required":false,"className":"com.amazonaws.services.s3.AmazonS3","description":"An existing S3 client use to access the bucket"},{"type":"text","name":"bucket","required":false,"className":"String","description":"The S3 bucket"}],"engineMeta":{"spark":"S3Steps.createFileManagerWithClient","pkg":"com.acxiom.aws.steps","results":{"primaryType":"com.acxiom.aws.fs.S3FileManager"}},"tags":["metalus-aws_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]}],"pkgObjs":[{"id":"com.acxiom.pipeline.steps.DataFrameReaderOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Data Frame Reader Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"schema\":{\"$ref\":\"#/definitions/Schema\"}},\"definitions\":{\"Schema\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"attributes\":{\"type\":\"array\",\"items\":{\"$ref\":\"#/definitions/Attribute\"}}}},\"Attribute\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"name\":{\"type\":\"string\"},\"dataType\":{\"$ref\":\"#/definitions/AttributeType\"}}},\"AttributeType\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"baseType\":{\"type\":\"string\"},\"valueType\":{\"$ref\":\"#/definitions/AttributeType\"},\"nameType\":{\"$ref\":\"#/definitions/AttributeType\"},\"schema\":{\"$ref\":\"#/definitions/Schema\"}}}}}"},{"id":"com.acxiom.pipeline.steps.DataFrameWriterOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Data Frame Writer Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"saveMode\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"bucketingOptions\":{\"$ref\":\"#/definitions/BucketingOptions\"},\"partitionBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"sortBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"definitions\":{\"BucketingOptions\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"numBuckets\":{\"type\":\"integer\"},\"columns\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"numBuckets\"]}}}"}]} diff --git a/manual_tests/testData/metalus-common/pipelines.json b/manual_tests/testData/metalus-common/pipelines.json new file mode 100644 index 00000000..cf59c4c4 --- /dev/null +++ b/manual_tests/testData/metalus-common/pipelines.json @@ -0,0 +1 @@ +[{"id":"f4835500-4c4a-11ea-9c79-f31d60741e3b","name":"DownloadToBronzeHdfs","steps":[{"id":"DownloadToHdfs","displayName":"Step Group","description":"Allows pipelines to be executed as a single step within a parent pipeline.","type":"step-group","params":[{"type":"text","name":"pipelineId","required":false,"value":"46f5e310-4c47-11ea-a0a7-a749c3ebbd62","description":""},{"type":"text","name":"pipeline","required":false,"value":"&46f5e310-4c47-11ea-a0a7-a749c3ebbd62","description":""},{"type":"object","name":"pipelineMappings","required":false,"value":{"fileId":"!fileId","output_buffer_size":"!outputBufferSize || 65536","input_buffer_size":"!inputBufferSize || 65536","sftp_port":"!sftpPort || 22","sftp_input_path":"!sftpInputPath","sftp_username":"!sftpUsername","landing_path":"!landingPath","sftp_password":"!sftpPassword","read_buffer_size":"!readBufferSize || 32768","sftp_host":"!sftpHost"},"description":""}],"nextStepId":"LandingFileToDataFrame","stepId":"f09b3b9c-82ac-56de-8dc8-f57c063dd4aa"},{"id":"LandingFileToDataFrame","displayName":"Load DataFrame from HDFS path","description":"This step will read a dataFrame from the given HDFS path","type":"Pipeline","params":[{"type":"text","name":"path","required":false,"value":"!{landingPath}/!{fileId}","description":""},{"type":"object","name":"options","required":false,"value":"!inputReaderOptions","className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":""}],"engineMeta":{"spark":"HDFSSteps.readFromPath","pkg":"com.acxiom.pipeline.steps"},"nextStepId":"StandardizeColumnNames","stepId":"87db259d-606e-46eb-b723-82923349640f"},{"id":"StandardizeColumnNames","displayName":"Standardize Column Names on a DataFrame","description":"This step will standardize columns names on existing dataframe","type":"Pipeline","params":[{"type":"text","name":"dataFrame","required":false,"value":"@LandingFileToDataFrame","description":""}],"engineMeta":{"spark":"TransformationSteps.standardizeColumnNames","pkg":"com.acxiom.pipeline.steps"},"nextStepId":"AddRecordId","stepId":"a981080d-714c-4d36-8b09-d95842ec5655"},{"id":"AddRecordId","displayName":"Adds a Unique Identifier to a DataFrame","description":"This step will add a new unique identifier to an existing data frame","type":"Pipeline","params":[{"type":"text","name":"idColumnName","required":false,"value":"metalus_record_id","description":""},{"type":"text","name":"dataFrame","required":false,"value":"@StandardizeColumnNames","description":""}],"engineMeta":{"spark":"TransformationSteps.addUniqueIdToDataFrame","pkg":"com.acxiom.pipeline.steps"},"nextStepId":"AddFileId","stepId":"9f7d84b0-ebab-57da-8b39-be4c47028242"},{"id":"AddFileId","displayName":"Add a Column with a Static Value to All Rows in a DataFrame","description":"This step will add a column with a static value to all rows in the provided data frame","type":"Pipeline","params":[{"type":"text","name":"dataFrame","required":false,"value":"@AddRecordId","description":""},{"type":"text","name":"columnName","required":false,"value":"metalus_file_id","description":""},{"type":"text","name":"columnValue","required":false,"value":"!fileId","description":""}],"engineMeta":{"spark":"TransformationSteps.addStaticColumnToDataFrame","pkg":"com.acxiom.pipeline.steps"},"nextStepId":"WriteToParquetHdfs","stepId":"37e10488-02c1-5c85-b47a-efecf681fdd4"},{"id":"WriteToParquetHdfs","displayName":"Write DataFrame to HDFS","description":"This step will write a dataFrame in a given format to HDFS","type":"Pipeline","params":[{"type":"text","name":"dataFrame","required":false,"value":"@AddFileId","description":""},{"type":"text","name":"path","required":false,"value":"!{bronzeZonePath}/!{fileId}","description":""},{"type":"object","name":"options","required":false,"value":{"format":"parquet","saveMode":"Overwrite","options":{},"schema":{"attributes":[]}},"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions","description":""}],"engineMeta":{"spark":"HDFSSteps.writeToPath","pkg":"com.acxiom.pipeline.steps"},"stepId":"0a296858-e8b7-43dd-9f55-88d00a7cd8fa"}],"category":"pipeline","tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"46f5e310-4c47-11ea-a0a7-a749c3ebbd62","name":"SG_SftpToHdfs","steps":[{"id":"CreateSFTPFileManager","displayName":"Create SFTP FileManager","description":"Simple function to generate the SFTPFileManager for the remote SFTP file system","type":"Pipeline","params":[{"type":"text","name":"hostName","required":false,"value":"!sftp_host","description":""},{"type":"text","name":"username","required":false,"value":"!sftp_username","description":""},{"type":"text","name":"password","required":false,"value":"!sftp_password","description":""},{"type":"integer","name":"port","required":false,"value":"!sftp_port || 22","description":""},{"type":"text","name":"strictHostChecking","required":false,"value":false,"description":""}],"engineMeta":{"spark":"SFTPSteps.createFileManager","pkg":"com.acxiom.pipeline.steps"},"nextStepId":"CreateHDFSFileManager","stepId":"9d467cb0-8b3d-40a0-9ccd-9cf8c5b6cb38"},{"id":"CreateHDFSFileManager","displayName":"Create HDFS FileManager","description":"Simple function to generate the HDFSFileManager for the local HDFS file system","type":"Pipeline","params":[],"engineMeta":{"spark":"HDFSSteps.createFileManager","pkg":"com.acxiom.pipeline.steps"},"nextStepId":"DownloadFile","stepId":"e4dad367-a506-5afd-86c0-82c2cf5cd15c"},{"id":"DownloadFile","displayName":"Buffered file copy","description":"Copy the contents of the source path to the destination path using full buffer sizes. This function will call connect on both FileManagers.","type":"Pipeline","params":[{"type":"text","name":"srcFS","required":false,"value":"@CreateSFTPFileManager","description":""},{"type":"text","name":"srcPath","required":false,"value":"!sftp_input_path","description":""},{"type":"text","name":"destFS","required":false,"value":"@CreateHDFSFileManager","description":""},{"type":"text","name":"destPath","required":false,"value":"!{landing_path}/!{fileId}","description":""},{"type":"text","name":"inputBufferSize","required":false,"value":"!input_buffer_size || 65536","description":""},{"type":"text","name":"outputBufferSize","required":false,"value":"!output_buffer_size || 65536","description":""},{"type":"text","name":"copyBufferSize","required":false,"value":"!read_buffer_size || 32768","description":""}],"engineMeta":{"spark":"FileManagerSteps.copy","pkg":"com.acxiom.pipeline.steps"},"nextStepId":"DisconnectSFTPFileManager","stepId":"f5a24db0-e91b-5c88-8e67-ab5cff09c883"},{"id":"DisconnectSFTPFileManager","displayName":"Disconnect a FileManager","description":"Disconnects a FileManager from the underlying file system","type":"Pipeline","params":[{"type":"text","name":"fileManager","required":false,"value":"@CreateSFTPFileManager","description":""}],"engineMeta":{"spark":"FileManagerSteps.disconnectFileManager","pkg":"com.acxiom.pipeline.steps"},"stepId":"3d1e8519-690c-55f0-bd05-1e7b97fb6633"}],"category":"step-group","tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]}] \ No newline at end of file diff --git a/manual_tests/testData/metalus-common/steps.json b/manual_tests/testData/metalus-common/steps.json new file mode 100644 index 00000000..72ecbae8 --- /dev/null +++ b/manual_tests/testData/metalus-common/steps.json @@ -0,0 +1 @@ +{"pkgs":["com.acxiom.pipeline.steps"],"steps":[{"id":"3806f23b-478c-4054-b6c1-37f11db58d38","displayName":"Read a DataFrame from Hive","description":"This step will read a dataFrame in a given format from Hive","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"table","required":true,"className":"String"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions"}],"engineMeta":{"spark":"HiveSteps.readDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"e2b4c011-e71b-46f9-a8be-cf937abc2ec4","displayName":"Write DataFrame to Hive","description":"This step will write a dataFrame in a given format to Hive","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":true,"className":"org.apache.spark.sql.Dataset[_]"},{"type":"text","name":"table","required":true,"className":"String"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions"}],"engineMeta":{"spark":"HiveSteps.writeDataFrame","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"5874ab64-13c7-404c-8a4f-67ff3b0bc7cf","displayName":"Drop Hive Object","description":"This step will drop an object from the hive meta store","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"name","required":true,"className":"String"},{"type":"text","name":"objectType","required":false,"className":"String"},{"type":"boolean","name":"ifExists","required":false,"className":"Boolean"},{"type":"boolean","name":"cascade","required":false,"className":"Boolean"}],"engineMeta":{"spark":"HiveSteps.drop","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"87db259d-606e-46eb-b723-82923349640f","displayName":"Load DataFrame from HDFS path","description":"This step will read a dataFrame from the given HDFS path","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"path","required":false,"className":"String","description":"The HDFS path to load data into the DataFrame"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The options to use when loading the DataFrameReader"}],"engineMeta":{"spark":"HDFSSteps.readFromPath","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"8daea683-ecde-44ce-988e-41630d251cb8","displayName":"Load DataFrame from HDFS paths","description":"This step will read a dataFrame from the given HDFS paths","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"paths","required":false,"className":"List[String]","description":"The HDFS paths to load data into the DataFrame"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The options to use when loading the DataFrameReader"}],"engineMeta":{"spark":"HDFSSteps.readFromPaths","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"0a296858-e8b7-43dd-9f55-88d00a7cd8fa","displayName":"Write DataFrame to HDFS","description":"This step will write a dataFrame in a given format to HDFS","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to write"},{"type":"text","name":"path","required":false,"className":"String","description":"The GCS path to write data"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions","description":"The optional DataFrame Options"}],"engineMeta":{"spark":"HDFSSteps.writeToPath","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"e4dad367-a506-5afd-86c0-82c2cf5cd15c","displayName":"Create HDFS FileManager","description":"Simple function to generate the HDFSFileManager for the local HDFS file system","type":"Pipeline","category":"InputOutput","params":[],"engineMeta":{"spark":"HDFSSteps.createFileManager","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.fs.HDFSFileManager"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"a7e17c9d-6956-4be0-a602-5b5db4d1c08b","displayName":"Scala script Step","description":"Executes a script and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"text","name":"script","required":true,"className":"String"}],"engineMeta":{"spark":"ScalaSteps.processScript","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"8bf8cef6-cf32-4d85-99f4-e4687a142f84","displayName":"Scala script Step with additional object provided","description":"Executes a script with the provided object and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"text","name":"script","required":true,"className":"String"},{"type":"text","name":"value","required":true,"className":"Any"},{"type":"text","name":"type","required":false,"className":"String"}],"engineMeta":{"spark":"ScalaSteps.processScriptWithValue","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"3ab721e8-0075-4418-aef1-26abdf3041be","displayName":"Scala script Step with additional objects provided","description":"Executes a script with the provided object and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"text","name":"script","required":true,"className":"String"},{"type":"text","name":"values","required":true,"className":"Map[String,Any]"},{"type":"text","name":"types","required":false,"parameterType":"Map[String,String]"},{"type":"boolean","name":"unwrapOptions","required":false,"className":"Boolean"}],"engineMeta":{"spark":"ScalaSteps.processScriptWithValues","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"15889487-fd1c-4c44-b8eb-973c12f91fae","displayName":"Creates an HttpRestClient","description":"This step will build an HttpRestClient using a host url and optional authorization object","type":"Pipeline","category":"API","params":[{"type":"text","name":"hostUrl","required":false,"className":"String","description":"The URL to connect including port"},{"type":"text","name":"authorization","required":false,"className":"com.acxiom.pipeline.api.Authorization","description":"The optional authorization class to use when making connections"},{"type":"boolean","name":"allowSelfSignedCertificates","required":false,"className":"Boolean","description":"Flag to allow using self signed certificates for http calls"}],"engineMeta":{"spark":"ApiSteps.createHttpRestClient","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.api.HttpRestClient"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"fcfd4b91-9a9c-438c-8afa-9f14c1e52a82","displayName":"Creates an HttpRestClient from protocol, host and port","description":"This step will build an HttpRestClient using url parts and optional authorization object","type":"Pipeline","category":"API","params":[{"type":"text","name":"protocol","required":false,"className":"String","description":"The protocol to use when constructing the URL"},{"type":"text","name":"host","required":false,"className":"String","description":"The host name to use when constructing the URL"},{"type":"text","name":"port","required":false,"className":"Int","description":"The port to use when constructing the URL"},{"type":"text","name":"authorization","required":false,"className":"com.acxiom.pipeline.api.Authorization","description":"The optional authorization class to use when making connections"}],"engineMeta":{"spark":"ApiSteps.createHttpRestClientFromParameters","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.api.HttpRestClient"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"b59f0486-78aa-4bd4-baf5-5c7d7c648ff0","displayName":"Check Path Exists","description":"Checks the path to determine whether it exists or not.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":false,"className":"com.acxiom.pipeline.api.HttpRestClient"},{"type":"text","name":"path","required":false,"className":"String","description":"The path to verify"}],"engineMeta":{"spark":"ApiSteps.exists","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"7521ac47-84ec-4e50-b087-b9de4bf6d514","displayName":"Get the last modified date","description":"Gets the last modified date for the provided path","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":false,"className":"com.acxiom.pipeline.api.HttpRestClient"},{"type":"text","name":"path","required":false,"className":"String","description":"The path to the resource to get the last modified date"}],"engineMeta":{"spark":"ApiSteps.getLastModifiedDate","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"java.util.Date"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"fff7f7b6-5d9a-40b3-8add-6432552920a8","displayName":"Get Path Content Length","description":"Get the size of the content at the given path.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":false,"className":"com.acxiom.pipeline.api.HttpRestClient"},{"type":"text","name":"path","required":false,"className":"String","description":"The path to the resource to get the content length"}],"engineMeta":{"spark":"ApiSteps.getContentLength","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Long"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"dd351d47-125d-47fa-bafd-203bebad82eb","displayName":"Get Path Headers","description":"Get the headers for the content at the given path.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":false,"className":"com.acxiom.pipeline.api.HttpRestClient"},{"type":"text","name":"path","required":false,"className":"String","description":"The path to get the headers"}],"engineMeta":{"spark":"ApiSteps.getHeaders","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Map[String,List[String]]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"532f72dd-8443-481d-8406-b74cdc08e342","displayName":"Delete Content","description":"Attempts to delete the provided path..","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":false,"className":"com.acxiom.pipeline.api.HttpRestClient"},{"type":"text","name":"path","required":false,"className":"String","description":"The path to delete"}],"engineMeta":{"spark":"ApiSteps.delete","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"3b91e6e8-ec18-4468-9089-8474f4b4ba48","displayName":"GET String Content","description":"Retrieves the value at the provided path as a string.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":false,"className":"com.acxiom.pipeline.api.HttpRestClient"},{"type":"text","name":"path","required":false,"className":"String","description":"The path to resource"}],"engineMeta":{"spark":"ApiSteps.getStringContent","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"34c2fc9a-2502-4c79-a0cb-3f866a0a0d6e","displayName":"POST String Content","description":"POSTs the provided string to the provided path using the content type and returns the response as a string.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":false,"className":"com.acxiom.pipeline.api.HttpRestClient"},{"type":"text","name":"path","required":false,"className":"String","description":"The path to post the content"},{"type":"text","name":"content","required":false,"className":"String","description":"The content to post"},{"type":"text","name":"contentType","required":false,"className":"String","description":"The content type being sent to the path"}],"engineMeta":{"spark":"ApiSteps.postStringContent","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"49ae38b3-cb41-4153-9111-aa6aacf6721d","displayName":"PUT String Content","description":"PUTs the provided string to the provided path using the content type and returns the response as a string.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":false,"className":"com.acxiom.pipeline.api.HttpRestClient"},{"type":"text","name":"path","required":false,"className":"String","description":"The path to post the content"},{"type":"text","name":"content","required":false,"className":"String","description":"The content to put"},{"type":"text","name":"contentType","required":false,"className":"String","description":"The content type being sent to the path"}],"engineMeta":{"spark":"ApiSteps.putStringContent","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"99b20c23-722f-4862-9f47-bc9f72440ae6","displayName":"GET Input Stream","description":"Creates a buffered input stream for the provided path","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":false,"parameterType":"com.acxiom.pipeline.api.HttpRestClient"},{"type":"text","name":"path","required":false,"parameterType":"String"},{"type":"text","name":"bufferSize","required":false,"parameterType":"Int"}],"engineMeta":{"spark":"ApiSteps.getInputStream","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"java.io.InputStream"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"f4120b1c-91df-452f-9589-b77f8555ba44","displayName":"GET Output Stream","description":"Creates a buffered output stream for the provided path.","type":"Pipeline","category":"API","params":[{"type":"text","name":"httpRestClient","required":false,"parameterType":"com.acxiom.pipeline.api.HttpRestClient"},{"type":"text","name":"path","required":false,"parameterType":"String"},{"type":"text","name":"bufferSize","required":false,"parameterType":"Int"}],"engineMeta":{"spark":"ApiSteps.getOutputStream","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"java.io.OutputStream"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"cdb332e3-9ea4-4c96-8b29-c1d74287656c","displayName":"Load table as DataFrame using JDBCOptions","description":"This step will load a table from the provided JDBCOptions","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"jdbcOptions","required":false,"className":"org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions","description":"The options to use when loading the DataFrame"}],"engineMeta":{"spark":"JDBCSteps.readWithJDBCOptions","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"72dbbfc8-bd1d-4ce4-ab35-28fa8385ea54","displayName":"Load table as DataFrame using StepOptions","description":"This step will load a table from the provided JDBCDataFrameReaderOptions","type":"Pipeline","category":"InputOutput","params":[{"type":"object","name":"jDBCStepsOptions","required":false,"className":"com.acxiom.pipeline.steps.JDBCDataFrameReaderOptions"}],"engineMeta":{"spark":"JDBCSteps.readWithStepOptions","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"dcc57409-eb91-48c0-975b-ca109ba30195","displayName":"Load table as DataFrame","description":"This step will load a table from the provided jdbc information","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"url","required":false,"className":"String"},{"type":"text","name":"table","required":false,"className":"String"},{"type":"text","name":"predicates","required":false,"className":"List[String]","description":"Optional predicates used for partitioning"},{"type":"text","name":"connectionProperties","required":false,"className":"Map[String,String]"}],"engineMeta":{"spark":"JDBCSteps.readWithProperties","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"c9fddf52-34b1-4216-a049-10c33ccd24ab","displayName":"Write DataFrame to table using JDBCOptions","description":"This step will write a DataFrame as a table using JDBCOptions","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to be written"},{"type":"text","name":"jdbcOptions","required":false,"className":"org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions","description":"Options for configuring the JDBC connection"},{"type":"text","name":"saveMode","required":false,"className":"String"}],"engineMeta":{"spark":"JDBCSteps.writeWithJDBCOptions","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"77ffcd02-fbd0-4f79-9b35-ac9dc5fb7190","displayName":"Write DataFrame to table","description":"This step will write a DataFrame to a table using the provided properties","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.Dataset[_]"},{"type":"text","name":"url","required":false,"className":"String"},{"type":"text","name":"table","required":false,"className":"String"},{"type":"text","name":"connectionProperties","required":false,"className":"Map[String,String]"},{"type":"text","name":"saveMode","required":false,"className":"String"}],"engineMeta":{"spark":"JDBCSteps.writeWithProperties","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"3d6b77a1-52c2-49ba-99a0-7ec773dac696","displayName":"Write DataFrame to JDBC table","description":"This step will write a DataFrame to a table using the provided JDBCDataFrameWriterOptions","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.Dataset[_]"},{"type":"object","name":"jDBCStepsOptions","required":false,"className":"com.acxiom.pipeline.steps.JDBCDataFrameWriterOptions","description":"Options for the JDBC connect and spark DataFrameWriter"}],"engineMeta":{"spark":"JDBCSteps.writeWithStepOptions","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"713fff3d-d407-4970-89ae-7844e6fc60e3","displayName":"Get JDBC Connection","description":"Get a jdbc connection.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"url","required":false,"className":"String"},{"type":"text","name":"properties","required":false,"className":"Map[String,String]"}],"engineMeta":{"spark":"JDBCSteps.getConnection","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"java.sql.Connection"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"549828be-3d96-4561-bf94-7ad420f9d203","displayName":"Execute Sql","description":"Execute a sql command using jdbc.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"sql","required":false,"className":"String","description":"Sql command to execute"},{"type":"text","name":"connection","required":false,"className":"java.sql.Connection","description":"An open jdbc connection"},{"type":"text","name":"parameters","required":false,"className":"List[Any]","description":"Optional list of bind variables"}],"engineMeta":{"spark":"JDBCSteps.executeSql","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"9c8957a3-899e-4f32-830e-d120b1917aa1","displayName":"Close JDBC Connection","description":"Close a JDBC Connection.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"connection","required":false,"className":"java.sql.Connection","description":"An open jdbc connection"}],"engineMeta":{"spark":"JDBCSteps.closeConnection","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"3464dc85-5111-40fc-9bfb-1fd6fc8a2c17","displayName":"Convert JSON String to Map","description":"This step will convert the provided JSON string into a Map that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"jsonString","required":false,"className":"String","description":"The JSON string to convert to a map"},{"type":"text","name":"formats","required":false,"className":"org.json4s.Formats"}],"engineMeta":{"spark":"JSONSteps.jsonStringToMap","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Map[String,Any]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"f4d19691-779b-4962-a52b-ee5d9a99068e","displayName":"Convert JSON Map to JSON String","description":"This step will convert the provided JSON map into a JSON string that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"jsonMap","required":false,"className":"Map[String,Any]","description":"The JSON map to convert to a JSON string"},{"type":"text","name":"formats","required":false,"className":"org.json4s.Formats"}],"engineMeta":{"spark":"JSONSteps.jsonMapToString","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"1f23eb37-98ee-43c2-ac78-17b04db3cc8d","displayName":"Convert object to JSON String","description":"This step will convert the provided object into a JSON string that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"obj","required":false,"className":"AnyRef","description":"The object to convert to a JSON string"},{"type":"text","name":"formats","required":false,"className":"org.json4s.Formats"}],"engineMeta":{"spark":"JSONSteps.objectToJsonString","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"880c5151-f7cd-40bb-99f2-06dbb20a6523","displayName":"Convert JSON String to object","description":"This step will convert the provided JSON string into an object that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"jsonString","required":false,"className":"String","description":"The JSON string to convert to an object"},{"type":"text","name":"objectName","required":false,"className":"String","description":"The fully qualified class name of the object"},{"type":"text","name":"formats","required":false,"className":"org.json4s.Formats"}],"engineMeta":{"spark":"JSONSteps.jsonStringToObject","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Any"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"68958a29-aab5-4f7e-9ffd-af99c33c512b","displayName":"Convert JSON String to Schema","description":"This step will convert the provided JSON string into a Schema that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"schema","required":false,"className":"String","description":"The JSON string to convert to a Schema"},{"type":"text","name":"formats","required":false,"className":"org.json4s.Formats"}],"engineMeta":{"spark":"JSONSteps.jsonStringToSchema","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.steps.Schema"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"cf4e9e6c-98d6-4a14-ae74-52322782c504","displayName":"Convert JSON String to DataFrame","description":"This step will convert the provided JSON string into a DataFrame that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"jsonString","required":false,"className":"String","description":"The JSON string to convert to a DataFrame"}],"engineMeta":{"spark":"JSONSteps.jsonStringToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"d5cd835e-5e8f-49c0-9706-746d5a4d7b3a","displayName":"Convert JSON String Dataset to DataFrame","description":"This step will convert the provided JSON string Dataset into a DataFrame that can be passed to other steps","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"dataset","required":false,"className":"org.apache.spark.sql.Dataset[String]","description":"The dataset containing JSON strings"},{"type":"object","name":"dataFrameReaderOptions","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The JSON parsing options"}],"engineMeta":{"spark":"JSONSteps.jsonDatasetToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"f3891201-5138-4cab-aebc-bcc319228543","displayName":"Build JSON4S Formats","description":"This step will build a json4s Formats object that can be used to override the default","type":"Pipeline","category":"JSON","params":[{"type":"text","name":"customSerializers","required":false,"className":"List[com.acxiom.pipeline.applications.ClassInfo]"},{"type":"text","name":"enumIdSerializers","required":false,"className":"List[com.acxiom.pipeline.applications.ClassInfo]"},{"type":"text","name":"enumNameSerializers","required":false,"className":"List[com.acxiom.pipeline.applications.ClassInfo]"}],"engineMeta":{"spark":"JSONSteps.buildJsonFormats","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.json4s.Formats"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"b5485d97-d4e8-41a6-8af7-9ce79a435140","displayName":"To String","description":"Returns the result of the toString method, can unwrap options","type":"Pipeline","category":"String","params":[{"type":"text","name":"value","required":false,"className":"Any","description":"The value to convert"},{"type":"boolean","name":"unwrapOption","required":false,"className":"Boolean","description":"Boolean indicating whether to unwrap the value from an Option prior to calling toString"}],"engineMeta":{"spark":"StringSteps.toString","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"78e817ec-2bf2-4cbe-acba-e5bc9bdcffc5","displayName":"List To String","description":"Returns the result of the mkString method","type":"Pipeline","category":"String","params":[{"type":"text","name":"list","required":false,"className":"List[Any]","description":"The list to convert"},{"type":"text","name":"separator","required":false,"className":"String","description":"Separator character to use when making the string"},{"type":"boolean","name":"unwrapOptions","required":false,"className":"Boolean","description":"Boolean indicating whether to unwrap each value from an Option"}],"engineMeta":{"spark":"StringSteps.listToString","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"fcd6b5fe-08ed-4cfd-acfe-eb676d7f4ecd","displayName":"To Lowercase","description":"Returns a lowercase string","type":"Pipeline","category":"String","params":[{"type":"text","name":"value","required":false,"className":"String","description":"The value to lowercase"}],"engineMeta":{"spark":"StringSteps.toLowerCase","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"2f31ebf1-4ae2-4e04-9b29-4802cac8a198","displayName":"To Uppercase","description":"Returns an uppercase string","type":"Pipeline","category":"String","params":[{"type":"text","name":"value","required":false,"className":"String","description":"The value to uppercase"}],"engineMeta":{"spark":"StringSteps.toUpperCase","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"96b7b521-5304-4e63-8435-63d84a358368","displayName":"String Split","description":"Returns a list of strings split off of the given string","type":"Pipeline","category":"String","params":[{"type":"text","name":"string","required":false,"className":"String","description":"The string to split"},{"type":"text","name":"regex","required":false,"className":"String","description":"Regex to use when splitting the string"},{"type":"integer","name":"limit","required":false,"className":"Int","description":"Max number elements to return in the list"}],"engineMeta":{"spark":"StringSteps.stringSplit","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"List[String]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"f75abedd-4aee-4979-8d56-ea7b0c1a86e1","displayName":"Substring","description":"Returns a substring","type":"Pipeline","category":"String","params":[{"type":"text","name":"string","required":false,"className":"String","description":"The string to parse"},{"type":"text","name":"begin","required":false,"className":"Int","description":"The beginning index"},{"type":"integer","name":"end","required":false,"className":"Int","description":"The end index"}],"engineMeta":{"spark":"StringSteps.substring","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"3fabf9ec-5383-4eb3-81af-6092ab7c370d","displayName":"String Equals","description":"Return whether string1 equals string2","type":"branch","category":"Decision","params":[{"type":"text","name":"string","required":false,"className":"String","description":"The string to compare"},{"type":"text","name":"anotherString","required":false,"className":"String","description":"The other string to compare"},{"type":"boolean","name":"caseInsensitive","required":false,"className":"Boolean","description":"Boolean flag to indicate case sensitive compare"},{"type":"result","name":"true","required":false},{"type":"result","name":"false","required":false}],"engineMeta":{"spark":"StringSteps.stringEquals","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"ff0562f5-2917-406d-aa78-c5d49ba6b99f","displayName":"String Matches","description":"Return whether string matches a given regex","type":"branch","category":"Decision","params":[{"type":"text","name":"string","required":false,"className":"String","description":"The string to match"},{"type":"text","name":"regex","required":false,"className":"String","description":"Regex to use for the match"},{"type":"result","name":"true","required":false},{"type":"result","name":"false","required":false}],"engineMeta":{"spark":"StringSteps.stringMatches","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"Boolean"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"416baf4e-a1dd-49fc-83a9-0f41b77e57b7","displayName":"String Replace All","description":"Perform a literal or regex replacement on a string","type":"pipeline","category":"String","params":[{"type":"text","name":"string","required":false,"parameterType":"String"},{"type":"text","name":"matchString","required":false,"parameterType":"String"},{"type":"text","name":"replacement","required":false,"parameterType":"String"},{"type":"boolean","name":"literal","required":false,"parameterType":"Boolean"}],"engineMeta":{"spark":"StringSteps.stringReplaceAll","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"95438b82-8d50-41da-8094-c92449b9e7df","displayName":"String Replace First","description":"Perform a literal or regex replacement on the first occurrence in a string","type":"pipeline","category":"String","params":[{"type":"text","name":"string","required":false,"parameterType":"String"},{"type":"text","name":"matchString","required":false,"parameterType":"String"},{"type":"text","name":"replacement","required":false,"parameterType":"String"},{"type":"boolean","name":"literal","required":false,"parameterType":"Boolean"}],"engineMeta":{"spark":"StringSteps.stringReplaceFirst","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"541c4f7d-3524-4d53-bbd9-9f2cfd9d1bd1","displayName":"Save a Dataframe to a TempView","description":"This step stores an existing dataframe to a TempView to be used in future queries in the session","type":"Pipeline","category":"Query","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.Dataset[_]","description":"The dataframe to store"},{"type":"text","name":"viewName","required":false,"className":"String"}],"engineMeta":{"spark":"QuerySteps.dataFrameToTempView","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"71b71ef3-eaa7-4a1f-b3f3-603a1a54846d","displayName":"Create a TempView from a Query","description":"This step runs a SQL statement against existing TempViews from this session and returns a new TempView","type":"Pipeline","category":"Query","params":[{"type":"script","name":"query","required":false,"language":"sql","className":"String"},{"type":"text","name":"variableMap","required":false,"className":"Map[String,String]"},{"type":"text","name":"viewName","required":false,"className":"String"}],"engineMeta":{"spark":"QuerySteps.queryToTempView","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"61378ed6-8a4f-4e6d-9c92-6863c9503a54","displayName":"Create a DataFrame from a Query","description":"This step runs a SQL statement against existing TempViews from this session and returns a new DataFrame","type":"Pipeline","category":"Query","params":[{"type":"script","name":"query","required":false,"language":"sql","className":"String"},{"type":"text","name":"variableMap","required":false,"className":"Map[String,String]"}],"engineMeta":{"spark":"QuerySteps.queryToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"57b0e491-e09b-4428-aab2-cebe1f217eda","displayName":"Create a DataFrame from an Existing TempView","description":"This step pulls an existing TempView from this session into a new DataFrame","type":"Pipeline","category":"Query","params":[{"type":"text","name":"viewName","required":false,"className":"String","description":"The name of the view to use"}],"engineMeta":{"spark":"QuerySteps.tempViewToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"648f27aa-6e3b-44ed-a093-bc284783731b","displayName":"Create a TempView from a DataFrame Query","description":"This step runs a SQL statement against an existing DataFrame from this session and returns a new TempView","type":"Pipeline","category":"Query","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.Dataset[_]","description":"The dataframe to query"},{"type":"script","name":"query","required":false,"language":"sql","className":"String"},{"type":"text","name":"variableMap","required":false,"className":"Map[String,String]"},{"type":"text","name":"inputViewName","required":false,"className":"String"},{"type":"text","name":"outputViewName","required":false,"className":"String"}],"engineMeta":{"spark":"QuerySteps.dataFrameQueryToTempView","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"String"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"dfb8a387-6245-4b1c-ae6c-94067eb83962","displayName":"Create a DataFrame from a DataFrame Query","description":"This step runs a SQL statement against an existing DataFrame from this session and returns a new DataFrame","type":"Pipeline","category":"Query","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.Dataset[_]","description":"The dataframe to query"},{"type":"script","name":"query","required":false,"language":"sql","className":"String"},{"type":"text","name":"variableMap","required":false,"className":"Map[String,String]"},{"type":"text","name":"inputViewName","required":false,"className":"String"}],"engineMeta":{"spark":"QuerySteps.dataFrameQueryToDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"c88de095-14e0-4c67-8537-0325127e2bd2","displayName":"Cache an exising TempView","description":"This step will cache an existing TempView","type":"Pipeline","category":"Query","params":[{"type":"text","name":"viewName","required":false,"className":"String","description":"The name of the view to cache"}],"engineMeta":{"spark":"QuerySteps.cacheTempView","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"0342654c-2722-56fe-ba22-e342169545af","displayName":"Copy source contents to destination","description":"Copy the contents of the source path to the destination path. This function will call connect on both FileManagers.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"srcFS","required":false,"className":"com.acxiom.pipeline.fs.FileManager"},{"type":"text","name":"srcPath","required":false,"className":"String"},{"type":"text","name":"destFS","required":false,"className":"com.acxiom.pipeline.fs.FileManager"},{"type":"text","name":"destPath","required":false,"className":"String"}],"engineMeta":{"spark":"FileManagerSteps.copy","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.steps.CopyResults"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"c40169a3-1e77-51ab-9e0a-3f24fb98beef","displayName":"Copy source contents to destination with buffering","description":"Copy the contents of the source path to the destination path using buffer sizes. This function will call connect on both FileManagers.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"srcFS","required":false,"className":"com.acxiom.pipeline.fs.FileManager"},{"type":"text","name":"srcPath","required":false,"className":"String"},{"type":"text","name":"destFS","required":false,"className":"com.acxiom.pipeline.fs.FileManager"},{"type":"text","name":"destPath","required":false,"className":"String"},{"type":"text","name":"inputBufferSize","required":false,"className":"Int"},{"type":"text","name":"outputBufferSize","required":false,"className":"Int"}],"engineMeta":{"spark":"FileManagerSteps.copy","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.steps.CopyResults"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"f5a24db0-e91b-5c88-8e67-ab5cff09c883","displayName":"Buffered file copy","description":"Copy the contents of the source path to the destination path using full buffer sizes. This function will call connect on both FileManagers.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"srcFS","required":false,"className":"com.acxiom.pipeline.fs.FileManager"},{"type":"text","name":"srcPath","required":false,"className":"String"},{"type":"text","name":"destFS","required":false,"className":"com.acxiom.pipeline.fs.FileManager"},{"type":"text","name":"destPath","required":false,"className":"String"},{"type":"text","name":"inputBufferSize","required":false,"className":"Int"},{"type":"text","name":"outputBufferSize","required":false,"className":"Int"},{"type":"text","name":"copyBufferSize","required":false,"className":"Int","description":"The intermediate buffer size to use during copy"}],"engineMeta":{"spark":"FileManagerSteps.copy","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.steps.CopyResults"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"3d1e8519-690c-55f0-bd05-1e7b97fb6633","displayName":"Disconnect a FileManager","description":"Disconnects a FileManager from the underlying file system","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"fileManager","required":false,"className":"com.acxiom.pipeline.fs.FileManager","description":"The file manager to disconnect"}],"engineMeta":{"spark":"FileManagerSteps.disconnectFileManager","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"9d467cb0-8b3d-40a0-9ccd-9cf8c5b6cb38","displayName":"Create SFTP FileManager","description":"Simple function to generate the SFTPFileManager for the remote SFTP file system","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"hostName","required":false,"className":"String","description":"The name of the host to connect"},{"type":"text","name":"username","required":false,"className":"String","description":"The username used for connection"},{"type":"text","name":"password","required":false,"className":"String","description":"The password used for connection"},{"type":"integer","name":"port","required":false,"className":"Int","description":"The optional port if other than 22"},{"type":"boolean","name":"strictHostChecking","required":false,"className":"Boolean","description":"Option to automatically add keys to the known_hosts file. Default is false."}],"engineMeta":{"spark":"SFTPSteps.createFileManager","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.fs.SFTPFileManager"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"22fcc0e7-0190-461c-a999-9116b77d5919","displayName":"Build a DataFrameReader Object","description":"This step will build a DataFrameReader object that can be used to read a file into a dataframe","type":"Pipeline","category":"InputOutput","params":[{"type":"object","name":"dataFrameReaderOptions","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The options to use when loading the DataFrameReader"}],"engineMeta":{"spark":"DataFrameSteps.getDataFrameReader","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrameReader"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"66a451c8-ffbd-4481-9c37-71777c3a240f","displayName":"Load Using DataFrameReader","description":"This step will load a DataFrame given a dataFrameReader.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrameReader","required":false,"className":"org.apache.spark.sql.DataFrameReader","description":"The DataFrameReader to use when creating the DataFrame"}],"engineMeta":{"spark":"DataFrameSteps.load","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"d7cf27e6-9ca5-4a73-a1b3-d007499f235f","displayName":"Load DataFrame","description":"This step will load a DataFrame given a DataFrameReaderOptions object.","type":"Pipeline","category":"InputOutput","params":[{"type":"object","name":"dataFrameReaderOptions","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions","description":"The DataFrameReaderOptions to use when creating the DataFrame"}],"engineMeta":{"spark":"DataFrameSteps.loadDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"8a00dcf8-e6a9-4833-871e-c1f3397ab378","displayName":"Build a DataFrameWriter Object","description":"This step will build a DataFrameWriter object that can be used to write a file into a dataframe","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to use when creating the DataFrameWriter"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions","description":"The DataFrameWriterOptions to use when writing the DataFrame"}],"engineMeta":{"spark":"DataFrameSteps.getDataFrameWriter","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.DataFrameWriter[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"9aa6ae9f-cbeb-4b36-ba6a-02eee0a46558","displayName":"Save Using DataFrameWriter","description":"This step will save a DataFrame given a dataFrameWriter[Row].","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrameWriter","required":false,"className":"org.apache.spark.sql.DataFrameWriter[_]","description":"The DataFrameWriter to use when saving"}],"engineMeta":{"spark":"DataFrameSteps.save","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"e5ac3671-ee10-4d4e-8206-fec7effdf7b9","displayName":"Save DataFrame","description":"This step will save a DataFrame given a DataFrameWriterOptions object.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.Dataset[_]","description":"The DataFrame to save"},{"type":"object","name":"dataFrameWriterOptions","required":false,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions","description":"The DataFrameWriterOptions to use for saving"}],"engineMeta":{"spark":"DataFrameSteps.saveDataFrame","pkg":"com.acxiom.pipeline.steps"},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"fa05a970-476d-4617-be4d-950cfa65f2f8","displayName":"Persist DataFrame","description":"Persist a DataFrame to provided storage level.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to persist"},{"type":"text","name":"storageLevel","required":false,"className":"String","description":"The optional storage mechanism to use when persisting the DataFrame"}],"engineMeta":{"spark":"DataFrameSteps.persistDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"e6fe074e-a1fa-476f-9569-d37295062186","displayName":"Unpersist DataFrame","description":"Unpersist a DataFrame.","type":"Pipeline","category":"InputOutput","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to unpersist"},{"type":"boolean","name":"blocking","required":false,"className":"Boolean","description":"Optional flag to indicate whether to block while unpersisting"}],"engineMeta":{"spark":"DataFrameSteps.unpersistDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"71323226-bcfd-4fa1-bf9e-24e455e41144","displayName":"RepartitionDataFrame","description":"Repartition a DataFrame","type":"Pipeline","category":"Transformation","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.Dataset[T]","description":"The DataFrame to repartition"},{"type":"text","name":"partitions","required":false,"className":"Int","description":"The number of partitions to use"},{"type":"boolean","name":"rangePartition","required":false,"className":"Boolean","description":"Flag indicating whether to repartition by range. This takes precedent over the shuffle flag"},{"type":"boolean","name":"shuffle","required":false,"className":"Boolean","description":"Flag indicating whether to perform a normal partition"},{"type":"text","name":"partitionExpressions","required":false,"className":"List[String]","description":"The partition expressions to use"}],"engineMeta":{"spark":"DataFrameSteps.repartitionDataFrame","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"org.apache.spark.sql.Dataset[T]"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"5e0358a0-d567-5508-af61-c35a69286e4e","displayName":"Javascript Step","description":"Executes a script and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"text","name":"script","required":true,"className":"String"}],"engineMeta":{"spark":"JavascriptSteps.processScript","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"570c9a80-8bd1-5f0c-9ae0-605921fe51e2","displayName":"Javascript Step with single object provided","description":"Executes a script with single object provided and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"text","name":"script","required":true,"className":"String"},{"type":"text","name":"value","required":true,"className":"Any"}],"engineMeta":{"spark":"JavascriptSteps.processScriptWithValue","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"f92d4816-3c62-4c29-b420-f00994bfcd86","displayName":"Javascript Step with map of objects provided","description":"Executes a script with map of objects provided and returns the result","type":"Pipeline","category":"Scripting","params":[{"type":"script","name":"script","required":false,"language":"javascript","className":"String"},{"type":"text","name":"values","required":true,"className":"Map[String,Any]"},{"type":"boolean","name":"unwrapOptions","required":false,"className":"Boolean"}],"engineMeta":{"spark":"JavascriptSteps.processScriptWithValues","pkg":"com.acxiom.pipeline.steps","results":{"primaryType":"com.acxiom.pipeline.PipelineStepResponse"}},"tags":["metalus-common_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]}],"pkgObjs":[{"id":"com.acxiom.pipeline.steps.DataFrameReaderOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Data Frame Reader Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"schema\":{\"$ref\":\"#/definitions/Schema\"}},\"definitions\":{\"Schema\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"attributes\":{\"type\":\"array\",\"items\":{\"$ref\":\"#/definitions/Attribute\"}}}},\"Attribute\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"name\":{\"type\":\"string\"},\"dataType\":{\"$ref\":\"#/definitions/AttributeType\"}}},\"AttributeType\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"baseType\":{\"type\":\"string\"},\"valueType\":{\"$ref\":\"#/definitions/AttributeType\"},\"nameType\":{\"$ref\":\"#/definitions/AttributeType\"},\"schema\":{\"$ref\":\"#/definitions/Schema\"}}}}}"},{"id":"com.acxiom.pipeline.steps.DataFrameWriterOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Data Frame Writer Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"saveMode\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"bucketingOptions\":{\"$ref\":\"#/definitions/BucketingOptions\"},\"partitionBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"sortBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"definitions\":{\"BucketingOptions\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"numBuckets\":{\"type\":\"integer\"},\"columns\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"numBuckets\"]}}}"},{"id":"com.acxiom.pipeline.steps.JDBCDataFrameReaderOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"JDBC Data Frame Reader Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"url\":{\"type\":\"string\"},\"table\":{\"type\":\"string\"},\"predicates\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"readerOptions\":{\"$ref\":\"#/definitions/DataFrameReaderOptions\"}},\"definitions\":{\"DataFrameReaderOptions\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"schema\":{\"$ref\":\"#/definitions/Schema\"}}},\"Schema\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"attributes\":{\"type\":\"array\",\"items\":{\"$ref\":\"#/definitions/Attribute\"}}}},\"Attribute\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"name\":{\"type\":\"string\"},\"dataType\":{\"$ref\":\"#/definitions/AttributeType\"}}},\"AttributeType\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"baseType\":{\"type\":\"string\"},\"valueType\":{\"$ref\":\"#/definitions/AttributeType\"},\"nameType\":{\"$ref\":\"#/definitions/AttributeType\"},\"schema\":{\"$ref\":\"#/definitions/Schema\"}}}}}"},{"id":"com.acxiom.pipeline.steps.JDBCDataFrameWriterOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"JDBC Data Frame Writer Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"url\":{\"type\":\"string\"},\"table\":{\"type\":\"string\"},\"writerOptions\":{\"$ref\":\"#/definitions/DataFrameWriterOptions\"}},\"definitions\":{\"DataFrameWriterOptions\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"saveMode\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"bucketingOptions\":{\"$ref\":\"#/definitions/BucketingOptions\"},\"partitionBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"sortBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}}},\"BucketingOptions\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"numBuckets\":{\"type\":\"integer\"},\"columns\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"numBuckets\"]}}}"}]} \ No newline at end of file diff --git a/manual_tests/testData/metalus-gcp/pipelines.json b/manual_tests/testData/metalus-gcp/pipelines.json new file mode 100644 index 00000000..0637a088 --- /dev/null +++ b/manual_tests/testData/metalus-gcp/pipelines.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/manual_tests/testData/metalus-gcp/steps.json b/manual_tests/testData/metalus-gcp/steps.json new file mode 100644 index 00000000..40d6df5b --- /dev/null +++ b/manual_tests/testData/metalus-gcp/steps.json @@ -0,0 +1 @@ +{"pkgs":["com.acxiom.gcp.steps"],"steps":[{"id":"451d4dc8-9bce-4cb4-a91d-1a09e0efd9b8","displayName":"Write DataFrame to a PubSub Topic","description":"This step will write a DataFrame to a PubSub Topic","type":"Pipeline","category":"GCP","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.DataFrame","description":"The DataFrame to post to the Pub/Sub topic"},{"type":"text","name":"topicName","required":false,"className":"String"},{"type":"text","name":"separator","required":false,"className":"String","description":"The separator character to use when combining the column data"},{"type":"text","name":"credentials","required":false,"className":"Map[String,String]","description":"The optional credentials to use for Pub/Sub access"}],"engineMeta":{"spark":"PubSubSteps.writeToStreamWithCredentials","pkg":"com.acxiom.gcp.steps"},"tags":["metalus-gcp_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"aaa880e1-4190-4ffe-9fda-4150680f17c9","displayName":"Write DataFrame to a PubSub Topic Using Global Credentials","description":"This step will write a DataFrame to a PubSub Topic using the CredentialProvider to get Credentials","type":"Pipeline","category":"GCP","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.DataFrame","description":"The DataFrame to post to the Pub/Sub topic"},{"type":"text","name":"topicName","required":false,"className":"String"},{"type":"text","name":"separator","required":false,"className":"String","description":"The separator character to use when combining the column data"}],"engineMeta":{"spark":"PubSubSteps.writeToStream","pkg":"com.acxiom.gcp.steps"},"tags":["metalus-gcp_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"2c937e74-8735-46d6-abfe-0c040ae8f435","displayName":"Write a single message to a PubSub Topic Using Provided Credentials","description":"This step will write a DataFrame to a PubSub Topic using the providedt Credentials","type":"Pipeline","category":"GCP","params":[{"type":"text","name":"message","required":false,"className":"String","description":"The message to post to the Pub/Sub topic"},{"type":"text","name":"topicName","required":false,"className":"String"},{"type":"text","name":"credentials","required":false,"className":"Map[String,String]","description":"The optional credentials to use when posting"}],"engineMeta":{"spark":"PubSubSteps.postMessage","pkg":"com.acxiom.gcp.steps"},"tags":["metalus-gcp_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"b359130d-8e11-44e4-b552-9cef6150bc2b","displayName":"Write a single message to a PubSub Topic Using Global Credentials","description":"This step will write a message to a PubSub Topic using the CredentialProvider to get Credentials","type":"Pipeline","category":"GCP","params":[{"type":"text","name":"message","required":false,"className":"String","description":"The message to post to the Pub/Sub topic"},{"type":"text","name":"topicName","required":false,"className":"String"}],"engineMeta":{"spark":"PubSubSteps.postMessage","pkg":"com.acxiom.gcp.steps"},"tags":["metalus-gcp_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"1bc6e2b3-6513-4763-b441-4c136a51daa8","displayName":"Load DataFrame from GCS path","description":"This step will read a DataFrame from the given GCS path","type":"Pipeline","category":"GCP","params":[{"type":"text","name":"path","required":false,"className":"String","description":"The GCS path to load data"},{"type":"text","name":"credentials","required":false,"className":"Map[String,String]"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions"}],"engineMeta":{"spark":"GCSSteps.readFromPath","pkg":"com.acxiom.gcp.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-gcp_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"bee8b059-9be5-45b9-8fa5-dd58bb5114ee","displayName":"Load DataFrame from GCS paths","description":"This step will read a DataFrame from the given GCS paths","type":"Pipeline","category":"GCP","params":[{"type":"text","name":"paths","required":false,"className":"List[String]","description":"The GCS paths to load data"},{"type":"text","name":"credentials","required":false,"className":"Map[String,String]"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameReaderOptions"}],"engineMeta":{"spark":"GCSSteps.readFromPaths","pkg":"com.acxiom.gcp.steps","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-gcp_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"1d1ff5ad-379f-4dfa-9403-019a0eb0032c","displayName":"Write DataFrame to GCS","description":"This step will write a DataFrame in a given format to GCS","type":"Pipeline","category":"GCP","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.DataFrame","description":"The DataFrame to write"},{"type":"text","name":"path","required":false,"className":"String","description":"The GCS path to write data"},{"type":"text","name":"credentials","required":false,"className":"Map[String,String]"},{"type":"object","name":"options","required":false,"className":"com.acxiom.pipeline.steps.DataFrameWriterOptions"}],"engineMeta":{"spark":"GCSSteps.writeToPath","pkg":"com.acxiom.gcp.steps"},"tags":["metalus-gcp_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"2827de67-26c0-4719-be57-6fc5f7af17c7","displayName":"Create GCS FileManager","description":"Simple function to generate the GCSFileManager for a GCS file system","type":"Pipeline","category":"GCP","params":[{"type":"text","name":"projectId","required":false,"className":"String","description":"The projectId for the GCS bucket"},{"type":"text","name":"bucket","required":false,"className":"String","description":"The GCS bucket"},{"type":"text","name":"credentials","required":false,"className":"Map[String,String]"}],"engineMeta":{"spark":"GCSSteps.createFileManager","pkg":"com.acxiom.gcp.steps","results":{"primaryType":"com.acxiom.gcp.fs.GCSFileManager"}},"tags":["metalus-gcp_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]}],"pkgObjs":[{"id":"com.acxiom.pipeline.steps.DataFrameReaderOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Data Frame Reader Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"schema\":{\"$ref\":\"#/definitions/Schema\"}},\"definitions\":{\"Schema\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"attributes\":{\"type\":\"array\",\"items\":{\"$ref\":\"#/definitions/Attribute\"}}}},\"Attribute\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"name\":{\"type\":\"string\"},\"dataType\":{\"$ref\":\"#/definitions/AttributeType\"}}},\"AttributeType\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"baseType\":{\"type\":\"string\"},\"valueType\":{\"$ref\":\"#/definitions/AttributeType\"},\"nameType\":{\"$ref\":\"#/definitions/AttributeType\"},\"schema\":{\"$ref\":\"#/definitions/Schema\"}}}}}"},{"id":"com.acxiom.pipeline.steps.DataFrameWriterOptions","schema":"{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Data Frame Writer Options\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"format\":{\"type\":\"string\"},\"saveMode\":{\"type\":\"string\"},\"options\":{\"type\":\"object\",\"additionalProperties\":{\"type\":\"string\"}},\"bucketingOptions\":{\"$ref\":\"#/definitions/BucketingOptions\"},\"partitionBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"sortBy\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"definitions\":{\"BucketingOptions\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\"numBuckets\":{\"type\":\"integer\"},\"columns\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"numBuckets\"]}}}"}]} \ No newline at end of file diff --git a/manual_tests/testData/metalus-kafka/pipelines.json b/manual_tests/testData/metalus-kafka/pipelines.json new file mode 100644 index 00000000..0637a088 --- /dev/null +++ b/manual_tests/testData/metalus-kafka/pipelines.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/manual_tests/testData/metalus-kafka/steps.json b/manual_tests/testData/metalus-kafka/steps.json new file mode 100644 index 00000000..d69f9e14 --- /dev/null +++ b/manual_tests/testData/metalus-kafka/steps.json @@ -0,0 +1 @@ +{"pkgs":["com.acxiom.kafka.steps"],"steps":[{"id":"abd6cf0f-f328-41a2-a84b-044e76928017","displayName":"Write DataFrame to a Kafka Topic Using Key Field","description":"This step will write a DataFrame to a Kafka Topic using the value in the keyField for each row as the key","type":"Pipeline","category":"Kafka","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.DataFrame","description":"The DataFrame to post to the Kakfa topic"},{"type":"text","name":"topic","required":false,"className":"String"},{"type":"text","name":"kafkaNodes","required":false,"className":"String"},{"type":"text","name":"keyField","required":false,"className":"String","description":"The column name to use to get the key value"},{"type":"text","name":"separator","required":false,"className":"String","description":"The separator character to use when combining the column data"},{"type":"text","name":"clientId","required":false,"className":"String"}],"engineMeta":{"spark":"KafkaSteps.writeToStreamByKeyField","pkg":"com.acxiom.kafka.steps"},"tags":["metalus-kafka_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"eaf68ea6-1c37-4427-85be-165ee9777c4d","displayName":"Write DataFrame to a Kafka Topic Using static key","description":"This step will write a DataFrame to a Kafka Topic using the provided key","type":"Pipeline","category":"Kafka","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.DataFrame","description":"The DataFrame to post to the Kakfa topic"},{"type":"text","name":"topic","required":false,"className":"String"},{"type":"text","name":"kafkaNodes","required":false,"className":"String"},{"type":"text","name":"key","required":false,"className":"String","description":"The key value"},{"type":"text","name":"separator","required":false,"className":"String","description":"The separator character to use when combining the column data"},{"type":"text","name":"clientId","required":false,"className":"String"}],"engineMeta":{"spark":"KafkaSteps.writeToStreamByKey","pkg":"com.acxiom.kafka.steps"},"tags":["metalus-kafka_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"74efe1e1-edd1-4c38-8e2b-bb693e3e3f4c","displayName":"Write a single message to a Kafka Topic Using static key","description":"This step will write a simgle message to a Kafka Topic using the provided key","type":"Pipeline","category":"Kafka","params":[{"type":"text","name":"message","required":false,"className":"String","description":"The message to post to the Kakfa topic"},{"type":"text","name":"topic","required":false,"className":"String"},{"type":"text","name":"kafkaNodes","required":false,"className":"String"},{"type":"text","name":"key","required":false,"className":"String","description":"The key value"},{"type":"text","name":"clientId","required":false,"className":"String"}],"engineMeta":{"spark":"KafkaSteps.postMessage","pkg":"com.acxiom.kafka.steps"},"tags":["metalus-kafka_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]}],"pkgObjs":[]} \ No newline at end of file diff --git a/manual_tests/testData/metalus-mongo/pipelines.json b/manual_tests/testData/metalus-mongo/pipelines.json new file mode 100644 index 00000000..0637a088 --- /dev/null +++ b/manual_tests/testData/metalus-mongo/pipelines.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/manual_tests/testData/metalus-mongo/steps.json b/manual_tests/testData/metalus-mongo/steps.json new file mode 100644 index 00000000..c1c8d8ed --- /dev/null +++ b/manual_tests/testData/metalus-mongo/steps.json @@ -0,0 +1 @@ +{"pkgs":["com.acxiom.metalus.steps.mongo"],"steps":[{"id":"bb6fe036-a981-41ad-afeb-b9c79e44e11d","displayName":"Writes a DataFrame to a Mongo database","description":"This step will write the contents of a DataFrame to the Mongo database and collection specified","type":"Pipeline","category":"Mongo","params":[{"type":"text","name":"dataFrame","required":false,"className":"org.apache.spark.sql.DataFrame","description":"The DataFrame to write"},{"type":"text","name":"uri","required":false,"className":"String"},{"type":"text","name":"collectionName","required":false,"className":"String"}],"engineMeta":{"spark":"MongoSteps.writeDataFrameToMongo","pkg":"com.acxiom.metalus.steps.mongo"},"tags":["metalus-mongo_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]},{"id":"c4baa4a2-1c37-47e7-bea7-85aeb4477a03","displayName":"Creates a DataFrame from a Mongo database","description":"This step will read the contents of a Mongo database and collection into a DataFrame","type":"Pipeline","category":"Mongo","params":[{"type":"text","name":"uri","required":false,"className":"String"},{"type":"text","name":"collectionName","required":false,"className":"String"}],"engineMeta":{"spark":"MongoSteps.loadDataFrameFromMongo","pkg":"com.acxiom.metalus.steps.mongo","results":{"primaryType":"org.apache.spark.sql.DataFrame"}},"tags":["metalus-mongo_2.11-spark_2.4-1.8.0-SNAPSHOT.jar"]}],"pkgObjs":[]} \ No newline at end of file diff --git a/manual_tests/testData/stop_server.js b/manual_tests/testData/stop_server.js new file mode 100644 index 00000000..18f0dbfa --- /dev/null +++ b/manual_tests/testData/stop_server.js @@ -0,0 +1,2 @@ +use admin +db.shutdownServer() diff --git a/manual_tests/testData/validate_mongo_data.js b/manual_tests/testData/validate_mongo_data.js new file mode 100644 index 00000000..c96eb31e --- /dev/null +++ b/manual_tests/testData/validate_mongo_data.js @@ -0,0 +1,21 @@ +use application_examples; + +var ccCount = db.getCollection('creditCards').count(); +if (ccCount !== 1001) { + print('Credit Card count is not correct!'); +} + +var customerCount = db.getCollection('customers').count(); +if (customerCount !== 1001) { + print('Customer count is not correct!'); +} + +var orderCount = db.getCollection('orders').count(); +if (orderCount !== 2001) { + print('Order count is not correct!'); +} + +var productCount = db.getCollection('products').count(); +if (productCount !== 868) { + print('Product count is not correct!'); +} diff --git a/metalus-utils/assembly.xml b/metalus-utils/assembly.xml index e1372444..c263485b 100644 --- a/metalus-utils/assembly.xml +++ b/metalus-utils/assembly.xml @@ -24,6 +24,7 @@ true + false metalus-utils/libraries diff --git a/pom.xml b/pom.xml index 54fb0bf9..8a54c837 100644 --- a/pom.xml +++ b/pom.xml @@ -49,19 +49,12 @@ 1.8 1.8 UTF-8 - 2.11.12 - 2.11 - 2.3 - 2.3.3 - 3.2.11 - 2.3.2 ${basedir}/scalastyle_config.xml UTF-8 scoverage ${project.build.directory}/scoverage.xml ${basedir}/target/checkstyle-result.xml src/test/scala/** - 2.9.7 @@ -175,6 +168,10 @@ net.alchim31.maven scala-maven-plugin 3.2.0 + + ${scala.compat.version} + ${scala.version} + @@ -341,49 +338,35 @@ - - spark_2.3 - - 2.3 - 2.3.3 - 3.2.11 - 2.3.2 - 1.7.3 - - spark_2.4 true + 2.11.12 + 2.11 2.4 2.4.6 3.5.3 - 2.4.1 + 2.4.2 1.12.0 + 2.9.7 spark_3.0 3.0 - 3.0.0 + 3.0.1 3.6.6 - 2.4.1 + 3.0.0 2.12 2.12.11 2.11.1 1.12.0 - - scala_2.12 - - 2.12 - 2.12.11 - - diff --git a/readme.md b/readme.md index fa8c41c6..e184dfba 100644 --- a/readme.md +++ b/readme.md @@ -20,70 +20,46 @@ There are several sub-projects: ### [Metalus Pipeline Core](metalus-core/readme.md) This project contains the core library and is the minimum requirement for any application. -[Maven 2.11 Spark 2.3 library](https://search.maven.org/search?q=a:metalus-core_2.11-spark_2.3) - [Maven 2.11 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-core_2.11-spark_2.4) -[Maven 2.12 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-core_2.12-spark_2.4) - [Maven 2.12 Spark 3.0 library](https://search.maven.org/search?q=a:metalus-core_2.12-spark_3.0) ### [Metalus Common Step Library](metalus-common/readme.md) This step library contains steps that are considered generic enough to be used in any project. -[Maven 2.11 Spark 2.3 library](https://search.maven.org/search?q=a:metalus-common_2.11-spark_2.3) - [Maven 2.11 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-common_2.11-spark_2.4) -[Maven 2.12 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-common_2.12-spark_2.4) - [Maven 2.12 Spark 3.0 library](https://search.maven.org/search?q=a:metalus-common_2.12-spark_3.0) ### [Metalus AWS Step Library](metalus-aws/readme.md) This step library contains AWS specific components. The [Kinesis](https://aws.amazon.com/kinesis/) driver provides a basic implementation that gathers data and then initiates the Metalus Pipeline Core for processing of the incoming data. -[Maven 2.11 Spark 2.3 library](https://search.maven.org/search?q=a:metalus-aws_2.11-spark_2.3) - [Maven 2.11 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-aws_2.11-spark_2.4) -[Maven 2.12 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-aws_2.12-spark_2.4) - [Maven 2.12 Spark 3.0 library](https://search.maven.org/search?q=a:metalus-aws_2.12-spark_3.0) ### [Metalus GCP Step Library](metalus-gcp/readme.md) This step library contains GCP specific components. The [Pub/Sub](https://cloud.google.com/pubsub/docs/overview) driver provides a basic implementation that gathers data and then initiates the Metalus Pipeline Core for processing of the incoming data. -[Maven 2.11 Spark 2.3 library](https://search.maven.org/search?q=a:metalus-gcp_2.11-spark_2.3) - [Maven 2.11 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-gcp_2.11-spark_2.4) -[Maven 2.12 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-gcp_2.12-spark_2.4) - [Maven 2.12 Spark 3.0 library](https://search.maven.org/search?q=a:metalus-gcp_2.12-spark_3.0) ### [Metalus Kafka Step Library](metalus-kafka/readme.md) This step library contains GCP specific components. The [Kafka](https://kafka.apache.org/) driver provides a basic implementation that gathers data and then initiates the Metalus Pipeline Core for processing of the incoming data. -[Maven 2.11 Spark 2.3 library](https://search.maven.org/search?q=a:metalus-kafka_2.11-spark_2.3) - [Maven 2.11 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-kafka_2.11-spark_2.4) -[Maven 2.12 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-kafka_2.12-spark_2.4) - [Maven 2.12 Spark 3.0 library](https://search.maven.org/search?q=a:metalus-kafka_2.12-spark_3.0) ### [Metalus Mongo Step Library](metalus-mongo/readme.md) This step library adds support for working with Mongo. -[Maven 2.11 Spark 2.3 library](https://search.maven.org/search?q=a:metalus-mongo_2.11-spark_2.3) - [Maven 2.11 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-mongo_2.11-spark_2.4) -[Maven 2.12 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-mongo_2.12-spark_2.4) - [Maven 2.12 Spark 3.0 library](https://search.maven.org/search?q=a:metalus-mongo_2.12-spark_3.0) ### [Metalus Pipeline Examples](metalus-examples/readme.md) @@ -92,12 +68,8 @@ This project provides several examples to help demonstrate how to use the librar ### [Metalus Utilities](metalus-utils/readme.md) This project provides utilities that help work with the project. -[Maven 2.11 Spark 2.3 library](https://search.maven.org/search?q=a:metalus-utils_2.11-spark_2.3) - [Maven 2.11 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-utils_2.11-spark_2.4) -[Maven 2.12 Spark 2.4 library](https://search.maven.org/search?q=a:metalus-utils_2.12-spark_2.4) - [Maven 2.12 Spark 3.0 library](https://search.maven.org/search?q=a:metalus-utils_2.12-spark_3.0) ### [Metalus Application](metalus-application/readme.md)