From 0158e96c5a50298b91232208946b4e26d794408c Mon Sep 17 00:00:00 2001 From: Xiaochang Wu Date: Thu, 15 Apr 2021 11:42:49 +0800 Subject: [PATCH] [ML-50] Merge #47 and prepare for OAP 1.1 (#51) * [ML-35] Restrict printNumericTable to first 10 eigenvalues with first 20 dimensions (#36) * restrict PCA printNumericTable to first 10 eigenvalues with first 20 dimensions * fix ALS printNumericTable * [ML-44] [PIP] Update to oneAPI 2021.2 and Rework examples for validation (#47) * remove hibench examples * Fix tbb linking * Add data * Add env.sh.template * Revise examples * Add ALS scala and modify als-pyspark.py * nit * Add build-all & run-all * remove test-cluster/workloads and use examples for validation * add setup-python3 and setup-cluster, fix paths * fix java home * add config ssh * add config ssh * add config ssh * add config ssh * fix config-ssh * fix config-ssh * fix config-ssh * fix config-ssh * fix config-ssh * fix config-ssh * set strict modes no * clear out comments * Update oneCCL and oneDAL to oneAPI 2021.2.0, don't build oneCCL from source * nit * Fix install oneapi and source setvars * nit * Add spark.driver.host * Add ci-build * nit * Update * Update * Add --ccl-configuration=cpu_icc * Update * Update * revert to build oneCCL from source and package related so * nit * nit * Add ci-test-cluster * update * update * update * update * update * Add check: OneCCL doesn't support loopback IP * update * update * update * update * update * update * update * update * update * update * update * update README * update * update * update * update * Update README and nit changes --- .github/workflows/oap-mllib-ci.yml | 27 +-- README.md | 55 ++++-- conf/env.sh.template | 46 +++++ dev/ci-build.sh | 43 +++++ dev/ci-test.sh | 61 +++++++ dev/install-build-deps-centos.sh | 21 +-- dev/install-build-deps-ubuntu.sh | 19 +- dev/setup-all.sh | 16 ++ dev/test-cluster/ci-test-cluster.sh | 19 ++ dev/test-cluster/config-ssh.sh | 11 +- dev/test-cluster/env.sh | 46 +++++ dev/test-cluster/envs.sh | 22 --- dev/test-cluster/hadoop-env.sh | 3 +- dev/test-cluster/setup-cluster.sh | 29 +-- ...{setup-python3-env.sh => setup-python3.sh} | 0 dev/test-cluster/setup-spark-envs.sh | 11 ++ dev/test-cluster/spark-defaults.conf | 4 +- dev/test-cluster/workloads/kmeans-pyspark.py | 70 -------- .../workloads/run-kmeans-pyspark.sh | 48 ----- examples/als-hibench/pom.xml | 100 ----------- examples/als-hibench/run-hibench-oap-mllib.sh | 73 -------- examples/als-hibench/run-hibench-vanilla.sh | 61 ------- .../hibench/sparkbench/ml/ALSExample.scala | 111 ------------ examples/als-pyspark/als-pyspark.py | 18 +- examples/als-pyspark/run.sh | 57 +----- examples/{als-hibench => als}/build.sh | 3 +- examples/{kmeans-hibench => als}/pom.xml | 11 +- examples/als/run.sh | 28 +++ .../apache/spark/examples/ml/ALSExample.scala | 91 ++++++++++ examples/build-all.sh | 8 + examples/data/onedal_als_csr_ratings.txt | 167 ++++++++++++++++++ examples/{pca-pyspark => }/data/pca_data.csv | 0 examples/data/sample_kmeans_data.txt | 6 + .../kmeans-hibench/run-hibench-oap-mllib.sh | 86 --------- .../kmeans-hibench/run-hibench-vanilla.sh | 58 ------ .../hibench/sparkbench/ml/DenseKMeansDS.scala | 107 ----------- examples/kmeans-pyspark/run.sh | 57 +----- examples/kmeans/pom.xml | 3 +- examples/kmeans/run.sh | 60 +------ examples/pca-pyspark/run.sh | 50 +----- examples/{kmeans-hibench => pca}/build.sh | 0 examples/pca/pom.xml | 3 +- examples/pca/run.sh | 23 ++- examples/run-all-pyspark.sh | 8 + examples/run-all-scala.sh | 8 + mllib-dal/build.sh | 10 +- mllib-dal/pom.xml | 42 ++--- mllib-dal/src/assembly/assembly.xml | 12 +- mllib-dal/src/main/native/ALSDALImpl.cpp | 4 +- mllib-dal/src/main/native/Makefile | 8 +- mllib-dal/src/main/native/PCADALImpl.cpp | 4 +- .../org/apache/spark/ml/util/Utils.scala | 6 + mllib-dal/test-cluster.sh | 5 - mllib-dal/test.sh | 14 +- 54 files changed, 761 insertions(+), 1092 deletions(-) create mode 100644 conf/env.sh.template create mode 100755 dev/ci-build.sh create mode 100755 dev/ci-test.sh create mode 100755 dev/setup-all.sh create mode 100755 dev/test-cluster/ci-test-cluster.sh create mode 100644 dev/test-cluster/env.sh delete mode 100644 dev/test-cluster/envs.sh rename dev/test-cluster/{setup-python3-env.sh => setup-python3.sh} (100%) create mode 100755 dev/test-cluster/setup-spark-envs.sh delete mode 100644 dev/test-cluster/workloads/kmeans-pyspark.py delete mode 100755 dev/test-cluster/workloads/run-kmeans-pyspark.sh delete mode 100644 examples/als-hibench/pom.xml delete mode 100755 examples/als-hibench/run-hibench-oap-mllib.sh delete mode 100755 examples/als-hibench/run-hibench-vanilla.sh delete mode 100644 examples/als-hibench/src/main/scala/com/intel/hibench/sparkbench/ml/ALSExample.scala rename examples/{als-hibench => als}/build.sh (52%) rename examples/{kmeans-hibench => als}/pom.xml (91%) create mode 100755 examples/als/run.sh create mode 100644 examples/als/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala create mode 100755 examples/build-all.sh create mode 100644 examples/data/onedal_als_csr_ratings.txt rename examples/{pca-pyspark => }/data/pca_data.csv (100%) create mode 100644 examples/data/sample_kmeans_data.txt delete mode 100755 examples/kmeans-hibench/run-hibench-oap-mllib.sh delete mode 100755 examples/kmeans-hibench/run-hibench-vanilla.sh delete mode 100644 examples/kmeans-hibench/src/main/scala/com/intel/hibench/sparkbench/ml/DenseKMeansDS.scala rename examples/{kmeans-hibench => pca}/build.sh (100%) create mode 100755 examples/run-all-pyspark.sh create mode 100755 examples/run-all-scala.sh delete mode 100755 mllib-dal/test-cluster.sh diff --git a/.github/workflows/oap-mllib-ci.yml b/.github/workflows/oap-mllib-ci.yml index 2c6973321..4f567086f 100644 --- a/.github/workflows/oap-mllib-ci.yml +++ b/.github/workflows/oap-mllib-ci.yml @@ -11,32 +11,21 @@ jobs: - name: Set up JDK 1.8 uses: actions/setup-java@v1 with: - java-version: 1.8 + java-version: 1.8 - name: Restore cached dependencies uses: actions/cache@v2 with: path: | + # /var/cache/apt/archives/*.deb ~/.m2/repository - ~/downloads - /opt/intel/inteloneapi /opt/intel/oneapi + ~/opt key: ${{ runner.os }}-${{ hashFiles('**/pom.xml', '{{github.workspace}}/dev/install-build-deps-ubuntu.sh') }} restore-keys: | ${{ runner.os }}- - - name: Set up dependencies - run: | - [ -d ~/downloads ] || mkdir ~/downloads - cd ~/downloads - [ -f spark-3.0.0-bin-hadoop2.7.tgz ] || wget http://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop2.7.tgz - [ -d spark-3.0.0-bin-hadoop2.7 ] || cd ~ && tar -zxf downloads/spark-3.0.0-bin-hadoop2.7.tgz - export SPARK_HOME=~/spark-3.0.0-bin-hadoop2.7 - ${{github.workspace}}/dev/install-build-deps-ubuntu.sh + - name: Set up environments + run: | + source ${{github.workspace}}/dev/setup-all.sh - name: Build and Test - run: | - cd ${{github.workspace}}/mllib-dal - export ONEAPI_ROOT=/opt/intel/oneapi - source /opt/intel/oneapi/dal/latest/env/vars.sh - source /opt/intel/oneapi/tbb/latest/env/vars.sh - source /tmp/oneCCL/build/_install/env/setvars.sh - # temp disable and will enable for new release of oneCCL - #./build.sh + run: | + ${{github.workspace}}/dev/ci-test.sh diff --git a/README.md b/README.md index 477c74b1f..c9e003ba5 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,13 @@ For those algorithms that are not accelerated by OAP MLlib, the original Spark M ## Online Documentation -You can find the all the OAP MLlib documents on the [project web page](https://oap-project.github.io/oap-mllib/). +You can find the all the OAP MLlib documents on the [project web page](https://oap-project.github.io/oap-mllib). ## Getting Started ### Java/Scala Users Preferred -Use a pre-built OAP MLlib JAR to get started. You can firstly download OAP package from [OAP-JARs-Tarball](https://github.com/Intel-bigdata/OAP/releases/download/v1.0.0-spark-3.0.0/oap-1.0.0-bin-spark-3.0.0.tar.gz) and extract this Tarball to get `oap-mllib-x.x.x-with-spark-x.x.x.jar` under `oap-1.0.0-bin-spark-3.0.0/jars`. +Use a pre-built OAP MLlib JAR to get started. You can firstly download OAP package from [OAP-JARs-Tarball](https://github.com/Intel-bigdata/OAP/releases/download/v1.1.0-spark-3.0.0/oap-1.1.0-bin-spark-3.0.0.tar.gz) and extract this Tarball to get `oap-mllib-x.x.x-with-spark-x.x.x.jar` under `oap-1.1.0-bin-spark-3.0.0/jars`. Then you can refer to the following [Running](#running) section to try out. @@ -58,24 +58,31 @@ spark.executor.extraClassPath ./oap-mllib-x.x.x-with-spark-x.x.x.jar ### Sanity Check -To use K-means example for sanity check, you need to upload a data file to your HDFS and change related variables in `run.sh` of kmeans example. Then run the following commands: +#### Setup `env.sh` ``` - $ cd oap-mllib/examples/kmeans - $ ./build.sh - $ ./run.sh + $ cd conf + $ cp env.sh.template env.sh ``` +Edit related variables in "`Minimun Settings`" of `env.sh` -### Benchmark with HiBench -Use [Hibench](https://github.com/Intel-bigdata/HiBench) to generate dataset with various profiles, and change related variables in `run-XXX.sh` script when applicable. Then run the following commands: +#### Upload example data files to HDFS ``` - $ cd oap-mllib/examples/kmeans-hibench + $ cd examples + $ hadoop fs -mkdir -p /user/$USER + $ hadoop fs -copyFromLocal data + $ hadoop fs -ls data +``` +#### Run K-means + +``` + $ cd examples/kmeans $ ./build.sh - $ ./run-hibench-oap-mllib.sh + $ ./run.sh ``` ### PySpark Support -As PySpark-based applications call their Scala couterparts, they shall be supported out-of-box. An example can be found in the [Examples](#examples) section. +As PySpark-based applications call their Scala couterparts, they shall be supported out-of-box. Examples can be found in the [Examples](#examples) section. ## Building @@ -86,7 +93,8 @@ We use [Apache Maven](https://maven.apache.org/) to manage and build source code * JDK 8.0+ * Apache Maven 3.6.2+ * GNU GCC 4.8.5+ -* Intel® oneAPI Toolkits 2021.1.1 Components: +* Intel® oneAPI Toolkits 2021.2+ Components: + - DPC++/C++ Compiler (dpcpp/clang++) - Data Analytics Library (oneDAL) - Threading Building Blocks (oneTBB) * [Open Source Intel® oneAPI Collective Communications Library (oneCCL)](https://github.com/oneapi-src/oneCCL) @@ -95,7 +103,7 @@ Intel® oneAPI Toolkits and its components can be downloaded and install from [h More details about oneAPI can be found [here](https://software.intel.com/content/www/us/en/develop/tools/oneapi.html). -You can also refer to [this script and comments in it](https://github.com/Intel-bigdata/OAP/blob/branch-1.0-spark-3.x/oap-mllib/dev/install-build-deps-centos.sh) to install correct oneAPI version and manually setup the environments. +You can refer to [this script](dev/install-build-deps-centos.sh) to install correct dependencies. Scala and Java dependency descriptions are already included in Maven POM file. @@ -107,7 +115,7 @@ To clone and build from open source oneCCL, run the following commands: ``` $ git clone https://github.com/oneapi-src/oneCCL $ cd oneCCL - $ git checkout beta08 + $ git checkout 2021.2 $ mkdir build && cd build $ cmake .. $ make -j install @@ -138,17 +146,19 @@ CCL_ROOT | Path to oneCCL home directory We suggest you to source `setvars.sh` script into current shell to setup building environments as following: ``` - $ source /opt/intel/inteloneapi/setvars.sh + $ source /opt/intel/oneapi/setvars.sh $ source /your/oneCCL_source_code/build/_install/env/setvars.sh ``` __Be noticed we are using our own built oneCCL instead, we should source oneCCL's `setvars.sh` to overwrite oneAPI one.__ +You can also refer to [this CI script](dev/ci-build.sh) to setup the building environments. + If you prefer to buid your own open source [oneDAL](https://github.com/oneapi-src/oneDAL), [oneTBB](https://github.com/oneapi-src/oneTBB) versions rather than use the ones included in oneAPI TookKits, you can refer to the related build instructions and manually source `setvars.sh` accordingly. To build, run the following commands: ``` - $ cd oap-mllib/mllib-dal + $ cd mllib-dal $ ./build.sh ``` @@ -156,12 +166,19 @@ The built JAR package will be placed in `target` directory with the name `oap-ml ## Examples -Example | Description +Example | Description ----------------|--------------------------- kmeans | K-means example for Scala kmeans-pyspark | K-means example for PySpark -kmeans-hibench | Use HiBench-generated input dataset to benchmark K-means performance +pca | PCA example for Scala +pca-pyspark | PCA example for PySpark +als | ALS example for Scala +als-pyspark | ALS example for PySpark ## List of Accelerated Algorithms -* K-Means (CPU, Experimental) +Algorithm | Category | Maturity +----------|----------|------------- +K-Means | CPU | Experimental +PCA | CPU | Experimental +ALS | CPU | Experimental diff --git a/conf/env.sh.template b/conf/env.sh.template new file mode 100644 index 000000000..ddcc84665 --- /dev/null +++ b/conf/env.sh.template @@ -0,0 +1,46 @@ +# == OAP MLlib users to customize the following environments for running examples ======= # + +# ============== Minimum Settings ============= # + +# Set OAP MLlib version (e.g. 1.1.0) +OAP_MLLIB_VERSION=x.x.x +# Set Spark master +SPARK_MASTER=yarn +# Set Hadoop home path +export HADOOP_HOME=/path/to/your/hadoop/home +# Set Spark home path +export SPARK_HOME=/path/to/your/spark/home +# Set HDFS Root, should be hdfs://xxx or file://xxx +export HDFS_ROOT=hdfs://localhost:8020 +# Set OAP MLlib source code root directory +export OAP_MLLIB_ROOT=/path/to/oap-mllib/home + +# ============================================= # + +# Set HADOOP_CONF_DIR for Spark +export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop + +# Set JAR name & path +OAP_MLLIB_JAR_NAME=oap-mllib-$OAP_MLLIB_VERSION.jar +OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME +# Set Spark driver & executor classpaths, +# absolute path for driver, relative path for executor +SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR +SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME + +# Set Spark resources, can be overwritten in example +SPARK_DRIVER_MEMORY=1G +SPARK_NUM_EXECUTORS=2 +SPARK_EXECUTOR_CORES=1 +SPARK_EXECUTOR_MEMORY=1G +SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2) + +# Checks + +for dir in $SPARK_HOME $HADOOP_HOME $OAP_MLLIB_JAR +do + if [[ ! -e $dir ]]; then + echo $dir does not exist! + exit 1 + fi +done diff --git a/dev/ci-build.sh b/dev/ci-build.sh new file mode 100755 index 000000000..6f70ea372 --- /dev/null +++ b/dev/ci-build.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +# Setup building envs +source /opt/intel/oneapi/setvars.sh +source /tmp/oneCCL/build/_install/env/setvars.sh + +# Check envs for building +if [[ -z $JAVA_HOME ]]; then + echo JAVA_HOME not defined! + exit 1 +fi + +if [[ -z $(which mvn) ]]; then + echo Maven not found! + exit 1 +fi + +if [[ -z $DAALROOT ]]; then + echo DAALROOT not defined! + exit 1 +fi + +if [[ -z $TBBROOT ]]; then + echo TBBROOT not defined! + exit 1 +fi + +if [[ -z $CCL_ROOT ]]; then + echo CCL_ROOT not defined! + exit 1 +fi + +echo === Building Environments === +echo JAVA_HOME=$JAVA_HOME +echo DAALROOT=$DAALROOT +echo TBBROOT=$TBBROOT +echo CCL_ROOT=$CCL_ROOT +echo Maven Version: $(mvn -v | head -n 1 | cut -f3 -d" ") +echo Clang Version: $(clang -dumpversion) +echo ============================= + +cd $GITHUB_WORKSPACE/mllib-dal +mvn --no-transfer-progress -DskipTests clean package diff --git a/dev/ci-test.sh b/dev/ci-test.sh new file mode 100755 index 000000000..7ce665089 --- /dev/null +++ b/dev/ci-test.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +# Setup building envs +source /opt/intel/oneapi/setvars.sh +source /tmp/oneCCL/build/_install/env/setvars.sh + +# Check envs for building +if [[ -z $JAVA_HOME ]]; then + echo JAVA_HOME not defined! + exit 1 +fi + +if [[ -z $(which mvn) ]]; then + echo Maven not found! + exit 1 +fi + +if [[ -z $DAALROOT ]]; then + echo DAALROOT not defined! + exit 1 +fi + +if [[ -z $TBBROOT ]]; then + echo TBBROOT not defined! + exit 1 +fi + +if [[ -z $CCL_ROOT ]]; then + echo CCL_ROOT not defined! + exit 1 +fi + +echo === Testing Environments === +echo JAVA_HOME=$JAVA_HOME +echo DAALROOT=$DAALROOT +echo TBBROOT=$TBBROOT +echo CCL_ROOT=$CCL_ROOT +echo Maven Version: $(mvn -v | head -n 1 | cut -f3 -d" ") +echo Clang Version: $(clang -dumpversion) +echo ============================= + +cd $GITHUB_WORKSPACE/mllib-dal + +# Build test +$GITHUB_WORKSPACE/dev/ci-build.sh + +# Enable signal chaining support for JNI +# export LD_PRELOAD=$JAVA_HOME/jre/lib/amd64/libjsig.so + +# -Dtest=none to turn off the Java tests + +# Test all +# mvn -Dtest=none -Dmaven.test.skip=false test + +# Individual test +mvn --no-transfer-progress -Dtest=none -DwildcardSuites=org.apache.spark.ml.clustering.IntelKMeansSuite test +mvn --no-transfer-progress -Dtest=none -DwildcardSuites=org.apache.spark.ml.feature.IntelPCASuite test +# mvn -Dtest=none -DwildcardSuites=org.apache.spark.ml.recommendation.IntelALSSuite test + +# Yarn cluster test +$GITHUB_WORKSPACE/dev/test-cluster/ci-test-cluster.sh \ No newline at end of file diff --git a/dev/install-build-deps-centos.sh b/dev/install-build-deps-centos.sh index 8a347fdef..5f8a6e5c8 100755 --- a/dev/install-build-deps-centos.sh +++ b/dev/install-build-deps-centos.sh @@ -12,8 +12,10 @@ gpgcheck=1 repo_gpgcheck=1 gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB EOF - sudo mv /tmp/oneAPI.repo /etc/yum.repos.d - sudo yum install -y intel-oneapi-dal-devel-2021.1.1 intel-oneapi-tbb-devel-2021.1.1 + sudo mv /tmp/oneAPI.repo /etc/yum.repos.d + # sudo yum groupinstall -y "Development Tools" + # sudo yum install -y cmake + sudo yum install -y intel-oneapi-dpcpp-cpp-2021.2.0 intel-oneapi-dal-devel-2021.2.0 intel-oneapi-tbb-devel-2021.2.0 else echo "oneAPI components already installed!" fi @@ -23,16 +25,7 @@ cd /tmp rm -rf oneCCL git clone https://github.com/oneapi-src/oneCCL cd oneCCL -git checkout 2021.1 -mkdir -p build && cd build +git checkout 2021.2 +mkdir build && cd build cmake .. -make -j 2 install - -# -# Setup building environments manually: -# -# export ONEAPI_ROOT=/opt/intel/oneapi -# source /opt/intel/oneapi/dal/latest/env/vars.sh -# source /opt/intel/oneapi/tbb/latest/env/vars.sh -# source /tmp/oneCCL/build/_install/env/setvars.sh -# +make -j 2 install \ No newline at end of file diff --git a/dev/install-build-deps-ubuntu.sh b/dev/install-build-deps-ubuntu.sh index d43e35b89..36fafc1f6 100755 --- a/dev/install-build-deps-ubuntu.sh +++ b/dev/install-build-deps-ubuntu.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -if [ ! -f /opt/intel/oneapi ]; then +if [ ! -d /opt/intel/oneapi ]; then echo "Installing oneAPI components ..." cd /tmp wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB @@ -8,25 +8,18 @@ if [ ! -f /opt/intel/oneapi ]; then rm GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list sudo apt-get update - sudo apt-get install intel-oneapi-dal-devel-2021.1.1 intel-oneapi-tbb-devel-2021.1.1 + # sudo apt-get install -y build-essential cmake + sudo apt-get install -y intel-oneapi-dpcpp-cpp-2021.2.0 intel-oneapi-dal-devel-2021.2.0 intel-oneapi-tbb-devel-2021.2.0 else echo "oneAPI components already installed!" -fi +fi echo "Building oneCCL ..." cd /tmp +rm -rf oneCCL git clone https://github.com/oneapi-src/oneCCL cd oneCCL -git checkout 2021.1 +git checkout 2021.2 mkdir build && cd build cmake .. make -j 2 install - -# -# Setup building environments manually: -# -# export ONEAPI_ROOT=/opt/intel/oneapi -# source /opt/intel/oneapi/dal/latest/env/vars.sh -# source /opt/intel/oneapi/tbb/latest/env/vars.sh -# source /tmp/oneCCL/build/_install/env/setvars.sh -# diff --git a/dev/setup-all.sh b/dev/setup-all.sh new file mode 100755 index 000000000..75defbe5a --- /dev/null +++ b/dev/setup-all.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Setup hosts +# Use second internal IP, use first IP will be SSH timeout +HOST_IP=$(hostname -I | cut -f2 -d" ") +echo $HOST_IP $(hostname) | sudo tee -a /etc/hosts + +# Install dependencies for building +$GITHUB_WORKSPACE/dev/install-build-deps-ubuntu.sh + +# Setup password-less & python3 +$GITHUB_WORKSPACE/dev/test-cluster/config-ssh.sh +$GITHUB_WORKSPACE/dev/test-cluster/setup-python3.sh + +# Setup cluster and envs +source $GITHUB_WORKSPACE/dev/test-cluster/setup-cluster.sh diff --git a/dev/test-cluster/ci-test-cluster.sh b/dev/test-cluster/ci-test-cluster.sh new file mode 100755 index 000000000..7a4600267 --- /dev/null +++ b/dev/test-cluster/ci-test-cluster.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +# Setup Spark envs +source $GITHUB_WORKSPACE/dev/test-cluster/setup-spark-envs.sh + +# Setup OAP MLlib envs +cp $GITHUB_WORKSPACE/dev/test-cluster/env.sh $GITHUB_WORKSPACE/conf + +cd $GITHUB_WORKSPACE/examples + +# Copy examples data to HDFS +hadoop fs -mkdir -p /user/$USER +hadoop fs -copyFromLocal data +hadoop fs -ls data + +# Build and run all examples +./build-all.sh +./run-all-scala.sh +./run-all-pyspark.sh diff --git a/dev/test-cluster/config-ssh.sh b/dev/test-cluster/config-ssh.sh index d093fa17a..a6fc2699e 100755 --- a/dev/test-cluster/config-ssh.sh +++ b/dev/test-cluster/config-ssh.sh @@ -2,5 +2,14 @@ ssh-keygen -q -N "" -t rsa -f ~/.ssh/id_rsa cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys +chmod 600 ~/.ssh/authorized_keys + +# Disable strict host key checking echo " StrictHostKeyChecking no " | sudo tee -a /etc/ssh/ssh_config -sudo service ssh restart +# Disable strict modes for less strict permission checking +echo "StrictModes no" | sudo tee -a /etc/ssh/sshd_config + +ls -ld ~/.ssh +ls -l ~/.ssh + +sudo systemctl restart ssh diff --git a/dev/test-cluster/env.sh b/dev/test-cluster/env.sh new file mode 100644 index 000000000..225db0b7b --- /dev/null +++ b/dev/test-cluster/env.sh @@ -0,0 +1,46 @@ +# == OAP MLlib users to customize the following environments for running examples ======= # + +# ============== Minimum Settings ============= # + +# Set OAP MLlib version (e.g. 1.1.0) +OAP_MLLIB_VERSION=1.1.0 +# Set Spark master +SPARK_MASTER=yarn +# Set Hadoop home path +export HADOOP_HOME=$HADOOP_HOME +# Set Spark home path +export SPARK_HOME=$SPARK_HOME +# Set HDFS Root, should be hdfs://xxx or file://xxx +export HDFS_ROOT=hdfs://localhost:8020 +# Set OAP MLlib source code root directory +export OAP_MLLIB_ROOT=$GITHUB_WORKSPACE + +# ============================================= # + +# Set HADOOP_CONF_DIR for Spark +export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop + +# Set JAR name & path +OAP_MLLIB_JAR_NAME=oap-mllib-$OAP_MLLIB_VERSION.jar +OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME +# Set Spark driver & executor classpaths, +# absolute path for driver, relative path for executor +SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR +SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME + +# Set Spark resources, can be overwritten in example +SPARK_DRIVER_MEMORY=1G +SPARK_NUM_EXECUTORS=2 +SPARK_EXECUTOR_CORES=1 +SPARK_EXECUTOR_MEMORY=1G +SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2) + +# Checks + +for dir in $SPARK_HOME $HADOOP_HOME $OAP_MLLIB_JAR +do + if [[ ! -e $dir ]]; then + echo $dir does not exist! + exit 1 + fi +done diff --git a/dev/test-cluster/envs.sh b/dev/test-cluster/envs.sh deleted file mode 100644 index 71e8506e6..000000000 --- a/dev/test-cluster/envs.sh +++ /dev/null @@ -1,22 +0,0 @@ -# Set user Spark and Hadoop home directory -export HADOOP_HOME=~/opt/hadoop-2.7.7 -export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop -export SPARK_HOME=~/opt/spark-3.0.0-bin-hadoop2.7 - -export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH -export PYSPARK_PYTHON=python3 - -# Set user HDFS Root -export HDFS_ROOT=hdfs://localhost:8020 -export OAP_MLLIB_DATA_ROOT=OAPMLlib/Data -# Set user Intel MLlib Root directory -export OAP_MLLIB_ROOT=${GITHUB_WORKSPACE} - -# Target jar built -OAP_MLLIB_JAR_NAME=oap-mllib-1.1.0.jar -OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME - -# Use absolute path -SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR -# Use relative path -SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME diff --git a/dev/test-cluster/hadoop-env.sh b/dev/test-cluster/hadoop-env.sh index bee6c1f69..f60b65a0b 100755 --- a/dev/test-cluster/hadoop-env.sh +++ b/dev/test-cluster/hadoop-env.sh @@ -22,8 +22,7 @@ # remote nodes. # The java implementation to use. -# export JAVA_HOME=${JAVA_HOME} -export JAVA_HOME=/usr/local/lib/jvm/openjdk8 +export JAVA_HOME=${JAVA_HOME} # The jsvc implementation to use. Jsvc is required to run secure datanodes # that bind to privileged ports to provide authentication of data transfer diff --git a/dev/test-cluster/setup-cluster.sh b/dev/test-cluster/setup-cluster.sh index eea058f80..b58c676ab 100755 --- a/dev/test-cluster/setup-cluster.sh +++ b/dev/test-cluster/setup-cluster.sh @@ -6,33 +6,38 @@ cd $WORK_DIR echo JAVA_HOME is $JAVA_HOME -mkdir ~/opt +[ -d ~/opt ] || mkdir ~/opt cd ~/opt -wget https://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop2.7.tgz -tar -xzf spark-3.0.0-bin-hadoop2.7.tgz -wget https://archive.apache.org/dist/hadoop/core/hadoop-2.7.7/hadoop-2.7.7.tar.gz -tar -xzf hadoop-2.7.7.tar.gz +[ -f spark-3.0.0-bin-hadoop2.7.tgz ] || wget --no-verbose https://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop2.7.tgz +[ -d spark-3.0.0-bin-hadoop2.7 ] || tar -xzf spark-3.0.0-bin-hadoop2.7.tgz +[ -f hadoop-2.7.7.tar.gz ] || wget --no-verbose https://archive.apache.org/dist/hadoop/core/hadoop-2.7.7/hadoop-2.7.7.tar.gz +[ -d hadoop-2.7.7 ] || tar -xzf hadoop-2.7.7.tar.gz cd $WORK_DIR +# Use second internal IP, use first IP will be SSH timeout +HOST_IP=$(hostname -I | cut -f2 -d" ") + +sed -i "s/localhost/$HOST_IP/g" core-site.xml +sed -i "s/localhost/$HOST_IP/g" yarn-site.xml + cp ./core-site.xml ~/opt/hadoop-2.7.7/etc/hadoop/ cp ./hdfs-site.xml ~/opt/hadoop-2.7.7/etc/hadoop/ cp ./yarn-site.xml ~/opt/hadoop-2.7.7/etc/hadoop/ cp ./hadoop-env.sh ~/opt/hadoop-2.7.7/etc/hadoop/ cp ./spark-defaults.conf ~/opt/spark-3.0.0-bin-hadoop2.7/conf +source ./setup-spark-envs.sh + +echo $HOST_IP > $HADOOP_HOME/etc/hadoop/slaves +echo $HOST_IP > $SPARK_HOME/conf/slaves + # create directories mkdir -p /tmp/run/hdfs/namenode mkdir -p /tmp/run/hdfs/datanode # hdfs format -~/opt/hadoop-2.7.7/bin/hdfs namenode -format - -export HADOOP_HOME=~/opt/hadoop-2.7.7 -export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop -export SPARK_HOME=~/opt/spark-3.0.0-bin-hadoop2.7 - -export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PATH +$HADOOP_HOME/bin/hdfs namenode -format # start hdfs and yarn $HADOOP_HOME/sbin/start-dfs.sh diff --git a/dev/test-cluster/setup-python3-env.sh b/dev/test-cluster/setup-python3.sh similarity index 100% rename from dev/test-cluster/setup-python3-env.sh rename to dev/test-cluster/setup-python3.sh diff --git a/dev/test-cluster/setup-spark-envs.sh b/dev/test-cluster/setup-spark-envs.sh new file mode 100755 index 000000000..3819f6ee8 --- /dev/null +++ b/dev/test-cluster/setup-spark-envs.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +export HADOOP_HOME=~/opt/hadoop-2.7.7 +export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native + +export SPARK_HOME=~/opt/spark-3.0.0-bin-hadoop2.7 +export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH +export PYSPARK_PYTHON=python3 + +export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PATH \ No newline at end of file diff --git a/dev/test-cluster/spark-defaults.conf b/dev/test-cluster/spark-defaults.conf index 1c25bb2ec..05f1c31e3 100644 --- a/dev/test-cluster/spark-defaults.conf +++ b/dev/test-cluster/spark-defaults.conf @@ -28,7 +28,7 @@ spark.master yarn spark.serializer org.apache.spark.serializer.KryoSerializer -spark.driver.memory 3g +spark.driver.memory 1g spark.executor.num 2 spark.executor.cores 1 -spark.executor.memory 4g +spark.executor.memory 2g diff --git a/dev/test-cluster/workloads/kmeans-pyspark.py b/dev/test-cluster/workloads/kmeans-pyspark.py deleted file mode 100644 index cf93e6034..000000000 --- a/dev/test-cluster/workloads/kmeans-pyspark.py +++ /dev/null @@ -1,70 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" -An example demonstrating k-means clustering. -Run with: - bin/spark-submit examples/src/main/python/ml/kmeans_example.py - -This example requires NumPy (http://www.numpy.org/). -""" -from __future__ import print_function -import sys - -# $example on$ -from pyspark.ml.clustering import KMeans -from pyspark.ml.evaluation import ClusteringEvaluator -# $example off$ - -from pyspark.sql import SparkSession - -if __name__ == "__main__": - spark = SparkSession\ - .builder\ - .appName("KMeansExample")\ - .getOrCreate() - - if (len(sys.argv) != 2) : - println("Require data file path as input parameter") - sys.exit(1) - - # $example on$ - # Loads data. - dataset = spark.read.format("libsvm").load(sys.argv[1]) - - # Trains a k-means model. - kmeans = KMeans().setK(2).setSeed(1) - model = kmeans.fit(dataset) - - # Make predictions - predictions = model.transform(dataset) - - # Evaluate clustering by computing Silhouette score - evaluator = ClusteringEvaluator() - - silhouette = evaluator.evaluate(predictions) - print("Silhouette with squared euclidean distance = " + str(silhouette)) - - # Shows the result. - centers = model.clusterCenters() - print("Cluster Centers: ") - for center in centers: - print(center) - # $example off$ - - spark.stop() - diff --git a/dev/test-cluster/workloads/run-kmeans-pyspark.sh b/dev/test-cluster/workloads/run-kmeans-pyspark.sh deleted file mode 100755 index e07f3f7b6..000000000 --- a/dev/test-cluster/workloads/run-kmeans-pyspark.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env bash - -source ../envs.sh - -# Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt), the data file should be copied to HDFS -$HADOOP_HOME/bin/hadoop fs -mkdir -p $OAP_MLLIB_DATA_ROOT -$HADOOP_HOME/bin/hadoop fs -copyFromLocal $SPARK_HOME/data/mllib/sample_kmeans_data.txt $OAP_MLLIB_DATA_ROOT - -# User should check the requested resources are acturally allocated by cluster manager or Intel MLlib will behave incorrectly -SPARK_MASTER=yarn -SPARK_DRIVER_MEMORY=1G -SPARK_NUM_EXECUTORS=2 -SPARK_EXECUTOR_CORES=1 -SPARK_EXECUTOR_MEMORY=1G - -SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2) - -# ======================================================= # - -# Check env -if [[ -z $SPARK_HOME ]]; then - echo SPARK_HOME not defined! - exit 1 -fi - -if [[ -z $HADOOP_HOME ]]; then - echo HADOOP_HOME not defined! - exit 1 -fi - -APP_PY="$OAP_MLLIB_ROOT/dev/test-cluster/workloads/kmeans-pyspark.py" -DATA_FILE=$OAP_MLLIB_DATA_ROOT/sample_kmeans_data.txt - -$SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ - --num-executors $SPARK_NUM_EXECUTORS \ - --driver-memory $SPARK_DRIVER_MEMORY \ - --executor-cores $SPARK_EXECUTOR_CORES \ - --executor-memory $SPARK_EXECUTOR_MEMORY \ - --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ - --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ - --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ - --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ - --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ - --conf "spark.shuffle.reduceLocality.enabled=false" \ - --conf "spark.network.timeout=1200s" \ - --conf "spark.task.maxFailures=1" \ - --jars $OAP_MLLIB_JAR \ - $APP_PY $DATA_FILE diff --git a/examples/als-hibench/pom.xml b/examples/als-hibench/pom.xml deleted file mode 100644 index 68e02c256..000000000 --- a/examples/als-hibench/pom.xml +++ /dev/null @@ -1,100 +0,0 @@ - - 4.0.0 - - com.intel.oap - oap-mllib-examples - 0.9.0-with-spark-3.0.0 - jar - - ALSHiBenchExample - https://github.com/Intel-bigdata/OAP - - - UTF-8 - 2.12.10 - 2.12 - 3.0.0 - - - - - - org.scala-lang - scala-library - 2.12.10 - - - - com.github.scopt - scopt_2.12 - 3.7.0 - - - - - - - - - - - org.apache.spark - spark-sql_2.12 - ${spark.version} - provided - - - - org.apache.spark - spark-mllib_2.12 - ${spark.version} - provided - - - - - - - - org.scala-tools - maven-scala-plugin - 2.15.2 - - - - compile - testCompile - - - - - ${scala.version} - - -target:jvm-1.8 - - - - - maven-assembly-plugin - 3.0.0 - - false - - jar-with-dependencies - - - - - assembly - package - - single - - - - - - - - diff --git a/examples/als-hibench/run-hibench-oap-mllib.sh b/examples/als-hibench/run-hibench-oap-mllib.sh deleted file mode 100755 index 050b80558..000000000 --- a/examples/als-hibench/run-hibench-oap-mllib.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env bash - -export HDFS_ROOT=hdfs://sr591:8020 -export OAP_MLLIB_ROOT=/home/xiaochang/Works/OAP-xwu99-als/oap-mllib - -SPARK_MASTER=yarn -SPARK_DRIVER_MEMORY=16G -SPARK_NUM_EXECUTORS=6 -SPARK_EXECUTOR_CORES=28 -SPARK_EXECUTOR_MEMORY_OVERHEAD=25G -SPARK_EXECUTOR_MEMORY=100G - -SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2) -#SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES) - -# ======================================================= # - -# for log suffix -SUFFIX=$( basename -s .sh "${BASH_SOURCE[0]}" ) - -# Check envs -if [[ -z $SPARK_HOME ]]; then - echo SPARK_HOME not defined! - exit 1 -fi - -if [[ -z $HADOOP_HOME ]]; then - echo HADOOP_HOME not defined! - exit 1 -fi - -export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop - -# Target jar built -OAP_MLLIB_JAR_NAME=oap-mllib-0.9.0-with-spark-3.0.0.jar -OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME - -# Use absolute path -SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR -# Use relative path -SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME - -APP_JAR=target/oap-mllib-examples-0.9.0-with-spark-3.0.0.jar -APP_CLASS=com.intel.hibench.sparkbench.ml.ALSExample - -HDFS_INPUT=hdfs://sr591:8020/HiBench/ALS/Input -RANK=10 -NUM_ITERATIONS=1 -LAMBDA=0.1 -IMPLICIT=true - -/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ - --num-executors $SPARK_NUM_EXECUTORS \ - --driver-memory $SPARK_DRIVER_MEMORY \ - --executor-cores $SPARK_EXECUTOR_CORES \ - --executor-memory $SPARK_EXECUTOR_MEMORY \ - --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ - --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ - --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ - --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ - --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ - --conf "spark.shuffle.reduceLocality.enabled=false" \ - --conf "spark.executor.memoryOverhead=$SPARK_EXECUTOR_MEMORY_OVERHEAD" \ - --conf "spark.network.timeout=1200s" \ - --conf "spark.task.maxFailures=1" \ - --jars $OAP_MLLIB_JAR \ - --class $APP_CLASS \ - $APP_JAR \ - --rank $RANK --numIterations $NUM_ITERATIONS --implicitPrefs $IMPLICIT --lambda $LAMBDA \ - --numProductBlocks $SPARK_DEFAULT_PARALLELISM --numUserBlocks $SPARK_DEFAULT_PARALLELISM \ - $HDFS_INPUT \ - 2>&1 | tee ALS-$SUFFIX-$(date +%m%d_%H_%M_%S).log - diff --git a/examples/als-hibench/run-hibench-vanilla.sh b/examples/als-hibench/run-hibench-vanilla.sh deleted file mode 100755 index 6cb6b3ae7..000000000 --- a/examples/als-hibench/run-hibench-vanilla.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env bash - -export HDFS_ROOT=hdfs://sr591:8020 - -SPARK_MASTER=yarn -SPARK_DRIVER_MEMORY=16G -SPARK_NUM_EXECUTORS=6 -SPARK_EXECUTOR_CORES=28 -SPARK_EXECUTOR_MEMORY_OVERHEAD=25G -SPARK_EXECUTOR_MEMORY=100G - -SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2) - -# ======================================================= # - -# for log suffix -SUFFIX=$( basename -s .sh "${BASH_SOURCE[0]}" ) - -# Check envs -if [[ -z $SPARK_HOME ]]; then - echo SPARK_HOME not defined! - exit 1 -fi - -if [[ -z $HADOOP_HOME ]]; then - echo HADOOP_HOME not defined! - exit 1 -fi - -export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop - -APP_JAR=target/oap-mllib-examples-0.9.0-with-spark-3.0.0.jar -APP_CLASS=com.intel.hibench.sparkbench.ml.ALSExample - -HDFS_INPUT=hdfs://sr591:8020/HiBench/ALS/Input -RANK=10 -NUM_ITERATIONS=1 -LAMBDA=0.1 -IMPLICIT=true - -/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ - --num-executors $SPARK_NUM_EXECUTORS \ - --driver-memory $SPARK_DRIVER_MEMORY \ - --executor-cores $SPARK_EXECUTOR_CORES \ - --executor-memory $SPARK_EXECUTOR_MEMORY \ - --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ - --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ - --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ - --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ - --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ - --conf "spark.shuffle.reduceLocality.enabled=false" \ - --conf "spark.executor.memoryOverhead=$SPARK_EXECUTOR_MEMORY_OVERHEAD" \ - --conf "spark.network.timeout=1200s" \ - --conf "spark.task.maxFailures=1" \ - --class $APP_CLASS \ - $APP_JAR \ - --rank $RANK --numIterations $NUM_ITERATIONS --implicitPrefs $IMPLICIT --lambda $LAMBDA \ - --numProductBlocks $SPARK_DEFAULT_PARALLELISM --numUserBlocks $SPARK_DEFAULT_PARALLELISM \ - $HDFS_INPUT \ - 2>&1 | tee ALS-$SUFFIX-$(date +%m%d_%H_%M_%S).log - diff --git a/examples/als-hibench/src/main/scala/com/intel/hibench/sparkbench/ml/ALSExample.scala b/examples/als-hibench/src/main/scala/com/intel/hibench/sparkbench/ml/ALSExample.scala deleted file mode 100644 index 5a29bcc80..000000000 --- a/examples/als-hibench/src/main/scala/com/intel/hibench/sparkbench/ml/ALSExample.scala +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.intel.hibench.sparkbench.ml - -import org.apache.spark.ml.evaluation.RegressionEvaluator -import org.apache.spark.ml.recommendation.ALS -import org.apache.spark.ml.recommendation.ALS.Rating -import org.apache.spark.sql.SparkSession -import scopt.OptionParser - -object ALSExample { - - case class Params( - dataPath: String = null, - numIterations: Int = 10, - lambda: Double = 0.1, - rank: Int = 10, - numUserBlocks: Int = 10, - numItemBlocks: Int = 10, - implicitPrefs: Boolean = false) - - def main(args: Array[String]) { - val defaultParams = Params() - - val parser = new OptionParser[Params]("ALS") { - head("ALS: an example app for ALS on User-Item data.") - opt[Int]("rank") - .text(s"rank, default: ${defaultParams.rank}") - .action((x, c) => c.copy(rank = x)) - opt[Int]("numIterations") - .text(s"number of iterations, default: ${defaultParams.numIterations}") - .action((x, c) => c.copy(numIterations = x)) - opt[Double]("lambda") - .text(s"regularization parameter, default: ${defaultParams.lambda}") - .action((x, c) => c.copy(lambda = x)) - opt[Int]("numUserBlocks") - .text(s"number of user blocks, default: ${defaultParams.numUserBlocks}") - .action((x, c) => c.copy(numUserBlocks = x)) - opt[Int]("numProductBlocks") - .text(s"number of product blocks, default: ${defaultParams.numItemBlocks}") - .action((x, c) => c.copy(numItemBlocks = x)) - opt[Boolean]("implicitPrefs") - .text("implicit preference, default: ${defaultParams.implicitPrefs}") - .action((x, c) => c.copy(implicitPrefs = x)) - arg[String]("") - .required() - .text("Input paths to a User-Product dataset of ratings") - .action((x, c) => c.copy(dataPath = x)) - } - parser.parse(args, defaultParams) match { - case Some(params) => run(params) - case _ => sys.exit(1) - } - } - - def run(params: Params): Unit = { - val spark = SparkSession - .builder - .appName(s"ALS with $params") - .getOrCreate() - val sc = spark.sparkContext - - import spark.implicits._ - - val ratings = sc.objectFile[Rating[Int]](params.dataPath).toDF() - - val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2), 1L) - - // Build the recommendation model using ALS on the training data - val als = new ALS() - .setRank(params.rank) - .setMaxIter(params.numIterations) - .setRegParam(params.lambda) - .setImplicitPrefs(params.implicitPrefs) - .setNumUserBlocks(params.numUserBlocks) - .setNumItemBlocks(params.numItemBlocks) - .setUserCol("user") - .setItemCol("item") - .setRatingCol("rating") - val model = als.fit(training) - - // Evaluate the model by computing the RMSE on the test data - // Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics - model.setColdStartStrategy("drop") - val predictions = model.transform(test) - - val evaluator = new RegressionEvaluator() - .setMetricName("rmse") - .setLabelCol("rating") - .setPredictionCol("prediction") - val rmse = evaluator.evaluate(predictions) - println(s"Root-mean-square error = $rmse") - - spark.stop() - } -} diff --git a/examples/als-pyspark/als-pyspark.py b/examples/als-pyspark/als-pyspark.py index 8847ca2b9..12622a4fd 100644 --- a/examples/als-pyspark/als-pyspark.py +++ b/examples/als-pyspark/als-pyspark.py @@ -44,8 +44,8 @@ parts = lines.map(lambda row: row.value.split("::")) ratingsRDD = parts.map(lambda p: Row(userId=int(p[0]), movieId=int(p[1]), rating=float(p[2]))) - ratings = spark.createDataFrame(ratingsRDD) - # (training, test) = ratings.randomSplit([0.8, 0.2]) + ratings = spark.createDataFrame(ratingsRDD) + (training, test) = ratings.randomSplit([0.8, 0.2]) # Build the recommendation model using ALS on the training data # Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics @@ -55,13 +55,13 @@ print("\nALS training with implicitPrefs={}, rank={}, maxIter={}, regParam={}, alpha={}, seed={}\n".format( als.getImplicitPrefs(), als.getRank(), als.getMaxIter(), als.getRegParam(), als.getAlpha(), als.getSeed() )) - model = als.fit(ratings) + model = als.fit(training) - # Evaluate the model by computing the RMSE on the test data - # predictions = model.transform(test) - # evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating", - # predictionCol="prediction") - # rmse = evaluator.evaluate(predictions) - # print("Root-mean-square error = " + str(rmse)) + # Evaluate the model by computing the RMSE on the test data + predictions = model.transform(test) + evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating", + predictionCol="prediction") + rmse = evaluator.evaluate(predictions) + print("Root-mean-square error = " + str(rmse)) spark.stop() diff --git a/examples/als-pyspark/run.sh b/examples/als-pyspark/run.sh index b3ba1b6d2..044c857f6 100755 --- a/examples/als-pyspark/run.sh +++ b/examples/als-pyspark/run.sh @@ -1,62 +1,14 @@ #!/usr/bin/env bash -# == User to customize the following environments ======= # +source ../../conf/env.sh -# Set user Spark and Hadoop home directory -#export SPARK_HOME=/path/to/your/spark/home -#export HADOOP_HOME=/path/to/your/hadoop/home -# Set user HDFS Root -export HDFS_ROOT=hdfs://sr549:8020 -# Set user Intel MLlib Root directory -export OAP_MLLIB_ROOT=/home/xiaochang/Works/OAP-xwu99-als/oap-mllib -# Set IP and Port for oneCCL KVS, you can select any one of the worker nodes and set CCL_KVS_IP_PORT to its IP and Port -# IP can be got with `hostname -I`, if multiple IPs are returned, the first IP should be used. Port can be any available port. -# For example, if one of the worker IP is 192.168.0.1 and an available port is 51234. -# CCL_KVS_IP_PORT can be set in the format of 192.168.0.1_51234 -# Incorrectly setting this value will result in hanging when oneCCL initialize -export CCL_KVS_IP_PORT=10.0.2.149_51234 - -# Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt), the data file should be copied to HDFS +# Data file is converted from oneDAL examples ($DAALROOT/examples/daal/data/batch/implicit_als_csr.csv) +# The data file should be copied to $HDFS_ROOT before running examples DATA_FILE=data/onedal_als_csr_ratings.txt -# == User to customize Spark executor cores and memory == # - -# User should check the requested resources are acturally allocated by cluster manager or Intel MLlib will behave incorrectly -SPARK_MASTER=yarn -SPARK_DRIVER_MEMORY=1G -SPARK_NUM_EXECUTORS=2 -SPARK_EXECUTOR_CORES=1 -SPARK_EXECUTOR_MEMORY=1G - -SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2) - -# ======================================================= # - -# Check env -if [[ -z $SPARK_HOME ]]; then - echo SPARK_HOME not defined! - exit 1 -fi - -if [[ -z $HADOOP_HOME ]]; then - echo HADOOP_HOME not defined! - exit 1 -fi - -export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop - -# Target jar built -OAP_MLLIB_JAR_NAME=oap-mllib-0.9.0-with-spark-3.0.0.jar -OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME - -# Use absolute path -SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR -# Use relative path -SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME - APP_PY=als-pyspark.py -/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ +time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ --num-executors $SPARK_NUM_EXECUTORS \ --driver-memory $SPARK_DRIVER_MEMORY \ --executor-cores $SPARK_EXECUTOR_CORES \ @@ -66,7 +18,6 @@ APP_PY=als-pyspark.py --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ - --conf "spark.executorEnv.CCL_KVS_IP_PORT=$CCL_KVS_IP_PORT" \ --conf "spark.shuffle.reduceLocality.enabled=false" \ --conf "spark.network.timeout=1200s" \ --conf "spark.task.maxFailures=1" \ diff --git a/examples/als-hibench/build.sh b/examples/als/build.sh similarity index 52% rename from examples/als-hibench/build.sh rename to examples/als/build.sh index 8cbc692be..3c01d1689 100755 --- a/examples/als-hibench/build.sh +++ b/examples/als/build.sh @@ -1,3 +1,4 @@ #!/usr/bin/env bash -mvn clean package \ No newline at end of file +mvn clean package + diff --git a/examples/kmeans-hibench/pom.xml b/examples/als/pom.xml similarity index 91% rename from examples/kmeans-hibench/pom.xml rename to examples/als/pom.xml index 3f5b56e29..94a72189a 100644 --- a/examples/kmeans-hibench/pom.xml +++ b/examples/als/pom.xml @@ -4,14 +4,15 @@ com.intel.oap oap-mllib-examples - 1.1.0-with-spark-3.0.0 + ${oap.version}-with-spark-${spark.version} jar - KMeansHiBenchExample + ALSExample https://github.com/oap-project/oap-mllib.git UTF-8 + 1.1.0 2.12.10 2.12 3.0.0 @@ -31,12 +32,6 @@ 3.7.0 - - org.apache.mahout - mahout-hdfs - 14.1 - - org.apache.spark spark-sql_2.12 diff --git a/examples/als/run.sh b/examples/als/run.sh new file mode 100755 index 000000000..8a317dbc7 --- /dev/null +++ b/examples/als/run.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +source ../../conf/env.sh + +# Data file is converted from oneDAL examples ($DAALROOT/examples/daal/data/batch/implicit_als_csr.csv) +# The data file should be copied to $HDFS_ROOT before running examples +DATA_FILE=data/onedal_als_csr_ratings.txt + +APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION-with-spark-3.0.0.jar +APP_CLASS=org.apache.spark.examples.ml.ALSExample + +time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ + --num-executors $SPARK_NUM_EXECUTORS \ + --driver-memory $SPARK_DRIVER_MEMORY \ + --executor-cores $SPARK_EXECUTOR_CORES \ + --executor-memory $SPARK_EXECUTOR_MEMORY \ + --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ + --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ + --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ + --conf "spark.shuffle.reduceLocality.enabled=false" \ + --conf "spark.network.timeout=1200s" \ + --conf "spark.task.maxFailures=1" \ + --jars $OAP_MLLIB_JAR \ + --class $APP_CLASS \ + $APP_JAR $DATA_FILE \ + 2>&1 | tee ALS-$(date +%m%d_%H_%M_%S).log diff --git a/examples/als/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala b/examples/als/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala new file mode 100644 index 000000000..1071e906b --- /dev/null +++ b/examples/als/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// scalastyle:off println +package org.apache.spark.examples.ml + +// $example on$ +import org.apache.spark.ml.evaluation.RegressionEvaluator +import org.apache.spark.ml.recommendation.ALS +// $example off$ +import org.apache.spark.sql.SparkSession + +/** + * An example demonstrating ALS. + * Run with + * {{{ + * bin/run-example ml.ALSExample + * }}} + */ +object ALSExample { + + // $example on$ + case class Rating(userId: Int, movieId: Int, rating: Float) + def parseRating(str: String): Rating = { + val fields = str.split("::") + assert(fields.size == 3) + Rating(fields(0).toInt, fields(1).toInt, fields(2).toFloat) + } + // $example off$ + + def main(args: Array[String]): Unit = { + val spark = SparkSession + .builder + .appName("ALSExample") + .getOrCreate() + import spark.implicits._ + + if (args.length != 1) { + println("Require data file path as input parameter") + sys.exit(1) + } + + // $example on$ + val ratings = spark.read.textFile(args(0)) + .map(parseRating) + .toDF() + val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2)) + + // Build the recommendation model using ALS on the training data + val als = new ALS() + .setImplicitPrefs(true) + .setRank(10) + .setMaxIter(5) + .setRegParam(0.01) + .setAlpha(40.0) + .setUserCol("userId") + .setItemCol("movieId") + .setRatingCol("rating") + val model = als.fit(training) + + // Evaluate the model by computing the RMSE on the test data + // Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics + model.setColdStartStrategy("drop") + val predictions = model.transform(test) + + val evaluator = new RegressionEvaluator() + .setMetricName("rmse") + .setLabelCol("rating") + .setPredictionCol("prediction") + val rmse = evaluator.evaluate(predictions) + println(s"Root-mean-square error = $rmse") + + spark.stop() + } +} +// scalastyle:on println + diff --git a/examples/build-all.sh b/examples/build-all.sh new file mode 100755 index 000000000..62a95d255 --- /dev/null +++ b/examples/build-all.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +for dir in kmeans pca als +do + cd $dir + ./build.sh + cd .. +done diff --git a/examples/data/onedal_als_csr_ratings.txt b/examples/data/onedal_als_csr_ratings.txt new file mode 100644 index 000000000..e3f5d3fae --- /dev/null +++ b/examples/data/onedal_als_csr_ratings.txt @@ -0,0 +1,167 @@ +0::17::0.938283 +1::1::0.124207 +1::8::0.411504 +1::13::0.746992 +1::16::0.169437 +2::6::0.42021 +2::12::0.322446 +2::17::0.561519 +3::2::0.760795 +4::6::0.252532 +4::8::0.155777 +5::1::0.593943 +5::2::0.5289 +5::9::0.827276 +5::16::0.291741 +6::1::0.642664 +6::9::0.843839 +6::18::0.801948 +7::17::0.162832 +8::15::0.764005 +8::19::0.545399 +9::4::0.871051 +9::8::0.2094 +9::9::0.900738 +9::17::0.998866 +9::19::0.154139 +10::0::0.59703 +10::6::0.727774 +10::15::0.877197 +11::3::0.598636 +11::5::0.999655 +11::7::0.23638 +11::14::0.463678 +11::17::0.802767 +11::18::0.828629 +12::3::0.449008 +12::4::0.108126 +12::5::0.17944 +12::11::0.14992 +12::15::0.645085 +12::17::0.356908 +13::7::0.54838 +13::13::0.719667 +14::1::0.144589 +14::2::0.956232 +14::4::0.410129 +14::5::0.237406 +14::9::0.701227 +15::5::0.598455 +15::13::0.534545 +15::18::0.85741 +16::0::0.08512 +16::1::0.306062 +17::2::0.87395 +17::6::0.680554 +17::15::0.383043 +17::20::0.16813 +18::2::0.641488 +18::4::0.542261 +18::10::0.69714 +18::11::0.776203 +18::17::0.498716 +18::18::0.788093 +18::20::0.52406 +19::4::0.10402 +19::7::0.276732 +20::1::0.666263 +20::6::0.280048 +21::5::0.898574 +21::6::0.892768 +21::9::0.061185 +21::18::0.691028 +22::7::0.813807 +22::16::0.293614 +23::10::0.217541 +23::14::0.98958 +23::15::0.20269 +24::7::0.67432 +24::8::0.520428 +24::10::0.138665 +24::13::0.364809 +24::14::0.970167 +24::19::0.68381 +25::0::0.166145 +25::1::0.194913 +25::2::0.265607 +25::18::0.740052 +25::19::0.209377 +26::2::0.122306 +26::8::0.742562 +26::11::0.405206 +26::16::0.442783 +27::12::0.010994 +27::16::0.632512 +27::17::0.421555 +28::1::0.854519 +28::3::0.843519 +28::7::0.388753 +28::12::0.020689 +28::13::0.071531 +28::14::0.537579 +28::16::0.079456 +29::17::0.548573 +30::1::0.959732 +30::3::0.913432 +30::4::0.88553 +31::0::0.653987 +31::13::0.736684 +31::20::0.629751 +32::2::0.420538 +32::6::0.110444 +32::12::0.55993 +32::13::0.730668 +32::17::0.588223 +32::18::0.188579 +33::8::0.717314 +33::9::0.249797 +33::10::0.404286 +33::18::0.83197 +34::4::0.364628 +34::6::0.023655 +34::10::0.94169 +35::3::0.015393 +35::11::0.356229 +35::18::0.328241 +36::0::0.03866 +36::1::0.21685 +36::5::0.725101 +36::8::0.191972 +36::9::0.658415 +36::12::0.592436 +37::2::0.812225 +37::4::0.411506 +37::6::0.613151 +37::9::0.345352 +37::10::0.89008 +37::12::0.139664 +37::17::0.7633 +37::20::0.488679 +38::0::0.594923 +38::1::0.441561 +38::13::0.467085 +39::0::0.949957 +39::7::0.360488 +39::12::0.354949 +39::15::0.976556 +39::17::0.024024 +40::1::0.121904 +40::4::0.871203 +40::8::0.102956 +41::9::0.593112 +42::3::0.542693 +42::4::0.340404 +42::6::0.997438 +42::7::0.335679 +42::10::0.657767 +42::11::0.382666 +42::14::0.621782 +42::17::0.150028 +43::16::0.318803 +43::17::0.83869 +44::10::0.460685 +45::0::0.926797 +45::4::0.257822 +45::8::0.714351 +45::17::0.333358 +45::18::0.134587 diff --git a/examples/pca-pyspark/data/pca_data.csv b/examples/data/pca_data.csv similarity index 100% rename from examples/pca-pyspark/data/pca_data.csv rename to examples/data/pca_data.csv diff --git a/examples/data/sample_kmeans_data.txt b/examples/data/sample_kmeans_data.txt new file mode 100644 index 000000000..50013776b --- /dev/null +++ b/examples/data/sample_kmeans_data.txt @@ -0,0 +1,6 @@ +0 1:0.0 2:0.0 3:0.0 +1 1:0.1 2:0.1 3:0.1 +2 1:0.2 2:0.2 3:0.2 +3 1:9.0 2:9.0 3:9.0 +4 1:9.1 2:9.1 3:9.1 +5 1:9.2 2:9.2 3:9.2 diff --git a/examples/kmeans-hibench/run-hibench-oap-mllib.sh b/examples/kmeans-hibench/run-hibench-oap-mllib.sh deleted file mode 100755 index caa42584f..000000000 --- a/examples/kmeans-hibench/run-hibench-oap-mllib.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env bash - -# == User to customize the following environments ======= # - -# Set user Spark and Hadoop home directory -export SPARK_HOME=/path/to/your/spark/home -export HADOOP_HOME=/path/to/your/hadoop/home -# Set user HDFS Root -export HDFS_ROOT=hdfs://your_hostname:8020 -# Set user Intel MLlib Root directory -export OAP_MLLIB_ROOT=/path/to/your/OAP/oap-mllib -# Set IP and Port for oneCCL KVS, you can select any one of the worker nodes and set CCL_KVS_IP_PORT to its IP and Port -# IP can be got with `hostname -I`, if multiple IPs are returned, the first IP should be used. Port can be any available port. -# For example, if one of the worker IP is 192.168.0.1 and an available port is 51234. -# CCL_KVS_IP_PORT can be set in the format of 192.168.0.1_51234 -# Incorrectly setting this value will result in hanging when oneCCL initialize -export CCL_KVS_IP_PORT=192.168.0.1_51234 - -# == User to customize Spark executor cores and memory == # - -# User should check the requested resources are acturally allocated by cluster manager or Intel MLlib will behave incorrectly -SPARK_MASTER=yarn -SPARK_DRIVER_MEMORY=8G -SPARK_NUM_EXECUTORS=6 -SPARK_EXECUTOR_CORES=15 -SPARK_EXECUTOR_MEMORY_OVERHEAD=25G -SPARK_EXECUTOR_MEMORY=50G - -SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2) - -# ======================================================= # - -# for log suffix -SUFFIX=$( basename -s .sh "${BASH_SOURCE[0]}" ) - -# Check envs -if [[ -z $SPARK_HOME ]]; then - echo SPARK_HOME not defined! - exit 1 -fi - -if [[ -z $HADOOP_HOME ]]; then - echo HADOOP_HOME not defined! - exit 1 -fi - -export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop - -# Target jar built -OAP_MLLIB_JAR_NAME=oap-mllib-0.9.0-with-spark-3.0.0.jar -OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME - -# Use absolute path -SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR -# Use relative path -SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME - -APP_JAR=target/oap-mllib-examples-0.9.0-with-spark-3.0.0.jar -APP_CLASS=com.intel.hibench.sparkbench.ml.DenseKMeansDS - -K=200 -INIT_MODE=Random -MAX_ITERATION=20 -INPUT_HDFS=$HDFS_ROOT/HiBench/Kmeans/Input/samples - -/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ - --num-executors $SPARK_NUM_EXECUTORS \ - --driver-memory $SPARK_DRIVER_MEMORY \ - --executor-cores $SPARK_EXECUTOR_CORES \ - --executor-memory $SPARK_EXECUTOR_MEMORY \ - --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ - --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ - --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ - --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ - --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ - --conf "spark.executorEnv.CCL_KVS_IP_PORT=$CCL_KVS_IP_PORT" \ - --conf "spark.shuffle.reduceLocality.enabled=false" \ - --conf "spark.executor.memoryOverhead=$SPARK_EXECUTOR_MEMORY_OVERHEAD" \ - --conf "spark.memory.fraction=0.8" \ - --conf "spark.network.timeout=1200s" \ - --conf "spark.task.maxFailures=1" \ - --jars $OAP_MLLIB_JAR \ - --class $APP_CLASS \ - $APP_JAR \ - -k $K --initMode $INIT_MODE --numIterations $MAX_ITERATION $INPUT_HDFS \ - 2>&1 | tee KMeansHiBench-$SUFFIX-$(date +%m%d_%H_%M_%S).log diff --git a/examples/kmeans-hibench/run-hibench-vanilla.sh b/examples/kmeans-hibench/run-hibench-vanilla.sh deleted file mode 100755 index 475c25aff..000000000 --- a/examples/kmeans-hibench/run-hibench-vanilla.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env bash - -# == User to customize the following environments ======= # - -# Set user Spark and Hadoop home directory -export SPARK_HOME=/path/to/your/spark/home -export HADOOP_HOME=/path/to/your/hadoop/home -# Set user HDFS Root -export HDFS_ROOT=hdfs://your_hostname:8020 - -# == User to customize Spark executor cores and memory == # - -SPARK_MASTER=yarn -SPARK_DRIVER_MEMORY=8G -SPARK_NUM_EXECUTORS=6 -SPARK_EXECUTOR_CORES=15 -SPARK_EXECUTOR_MEMORY=75G - -SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2) - -# ======================================================= # - -# for log suffix -SUFFIX=$( basename -s .sh "${BASH_SOURCE[0]}" ) - -# Check envs -if [[ -z $SPARK_HOME ]]; then - echo SPARK_HOME not defined! - exit 1 -fi - -if [[ -z $HADOOP_HOME ]]; then - echo HADOOP_HOME not defined! - exit 1 -fi - -export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop - -APP_JAR=target/oap-mllib-examples-0.9.0-with-spark-3.0.0.jar -APP_CLASS=com.intel.hibench.sparkbench.ml.DenseKMeansDS - -K=200 -INIT_MODE=Random -MAX_ITERATION=20 -INPUT_HDFS=$HDFS_ROOT/HiBench/Kmeans/Input/samples - -/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ - --num-executors $SPARK_NUM_EXECUTORS \ - --driver-memory $SPARK_DRIVER_MEMORY \ - --executor-cores $SPARK_EXECUTOR_CORES \ - --executor-memory $SPARK_EXECUTOR_MEMORY \ - --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ - --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ - --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ - --class $APP_CLASS \ - $APP_JAR \ - -k $K --initMode $INIT_MODE --numIterations $MAX_ITERATION $INPUT_HDFS \ - 2>&1 | tee KMeansHiBench-$SUFFIX-$(date +%m%d_%H_%M_%S).log diff --git a/examples/kmeans-hibench/src/main/scala/com/intel/hibench/sparkbench/ml/DenseKMeansDS.scala b/examples/kmeans-hibench/src/main/scala/com/intel/hibench/sparkbench/ml/DenseKMeansDS.scala deleted file mode 100644 index 3a949bb1c..000000000 --- a/examples/kmeans-hibench/src/main/scala/com/intel/hibench/sparkbench/ml/DenseKMeansDS.scala +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.intel.hibench.sparkbench.ml - -import breeze.linalg.DenseVector -import org.apache.hadoop.io.LongWritable -import org.apache.mahout.math.VectorWritable -import org.apache.spark.ml.clustering.KMeans -import org.apache.spark.ml.evaluation.ClusteringEvaluator -import org.apache.spark.ml.linalg.{Vector, Vectors} -import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.sql._ -import scopt.OptionParser -import org.apache.spark.sql.SparkSession - -object DenseKMeansDS { - - object InitializationMode extends Enumeration { - type InitializationMode = Value - val Random, Parallel = Value - } - - import com.intel.hibench.sparkbench.ml.DenseKMeansDS.InitializationMode._ - - case class Params(input: String = null, - k: Int = -1, - numIterations: Int = 10, - initializationMode: InitializationMode = Random) - - def main(args: Array[String]) { - val defaultParams = Params() - - val parser = new OptionParser[Params]("DenseKMeans") { - head("DenseKMeans: an example k-means app for dense data.") - opt[Int]('k', "k") - .required() - .text(s"number of clusters, required") - .action((x, c) => c.copy(k = x)) - opt[Int]("numIterations") - .text(s"number of iterations, default; ${defaultParams.numIterations}") - .action((x, c) => c.copy(numIterations = x)) - opt[String]("initMode") - .text(s"initialization mode (${InitializationMode.values.mkString(",")}), " + - s"default: ${defaultParams.initializationMode}") - .action((x, c) => c.copy(initializationMode = InitializationMode.withName(x))) - arg[String]("") - .text("input paths to examples") - .required() - .action((x, c) => c.copy(input = x)) - } - - parser.parse(args, defaultParams).map { params => - run(params) - }.getOrElse { - sys.exit(1) - } - } - - def run(params: Params) { - val spark = SparkSession - .builder - .appName(s"DenseKMeansDS with $params") - .getOrCreate() - import spark.implicits._ - - val sc = spark.sparkContext - - val data = sc.sequenceFile[LongWritable, VectorWritable](params.input) - - // Should use Tuple1 to warp around for calling toDF - val dataset = data.map { case (k, v) => - var vector: Array[Double] = new Array[Double](v.get().size) - for (i <- 0 until v.get().size) vector(i) = v.get().get(i) - Tuple1(Vectors.dense(vector)) - }.toDF("features") - - val initMode = params.initializationMode match { - case Random => "random" - case Parallel => "k-means||" - } - - val model = new KMeans() - .setInitMode(initMode) - .setK(params.k) - .setMaxIter(params.numIterations) - .setSeed(1L) - .fit(dataset) - - spark.stop() - } -} - diff --git a/examples/kmeans-pyspark/run.sh b/examples/kmeans-pyspark/run.sh index d029bf294..0fa2a7bcb 100755 --- a/examples/kmeans-pyspark/run.sh +++ b/examples/kmeans-pyspark/run.sh @@ -1,62 +1,14 @@ #!/usr/bin/env bash -# == User to customize the following environments ======= # +source ../../conf/env.sh -# Set user Spark and Hadoop home directory -export SPARK_HOME=/path/to/your/spark/home -export HADOOP_HOME=/path/to/your/hadoop/home -# Set user HDFS Root -export HDFS_ROOT=hdfs://your_hostname:8020 -# Set user Intel MLlib Root directory -export OAP_MLLIB_ROOT=/path/to/your/OAP/oap-mllib -# Set IP and Port for oneCCL KVS, you can select any one of the worker nodes and set CCL_KVS_IP_PORT to its IP and Port -# IP can be got with `hostname -I`, if multiple IPs are returned, the first IP should be used. Port can be any available port. -# For example, if one of the worker IP is 192.168.0.1 and an available port is 51234. -# CCL_KVS_IP_PORT can be set in the format of 192.168.0.1_51234 -# Incorrectly setting this value will result in hanging when oneCCL initialize -export CCL_KVS_IP_PORT=192.168.0.1_51234 - -# Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt), the data file should be copied to HDFS +# Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt) and put in examples/data +# The data file should be copied to $HDFS_ROOT before running examples DATA_FILE=data/sample_kmeans_data.txt -# == User to customize Spark executor cores and memory == # - -# User should check the requested resources are acturally allocated by cluster manager or Intel MLlib will behave incorrectly -SPARK_MASTER=yarn -SPARK_DRIVER_MEMORY=1G -SPARK_NUM_EXECUTORS=2 -SPARK_EXECUTOR_CORES=1 -SPARK_EXECUTOR_MEMORY=1G - -SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2) - -# ======================================================= # - -# Check env -if [[ -z $SPARK_HOME ]]; then - echo SPARK_HOME not defined! - exit 1 -fi - -if [[ -z $HADOOP_HOME ]]; then - echo HADOOP_HOME not defined! - exit 1 -fi - -export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop - -# Target jar built -OAP_MLLIB_JAR_NAME=oap-mllib-0.9.0-with-spark-3.0.0.jar -OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME - -# Use absolute path -SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR -# Use relative path -SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME - APP_PY=kmeans-pyspark.py -/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ +time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ --num-executors $SPARK_NUM_EXECUTORS \ --driver-memory $SPARK_DRIVER_MEMORY \ --executor-cores $SPARK_EXECUTOR_CORES \ @@ -66,7 +18,6 @@ APP_PY=kmeans-pyspark.py --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ - --conf "spark.executorEnv.CCL_KVS_IP_PORT=$CCL_KVS_IP_PORT" \ --conf "spark.shuffle.reduceLocality.enabled=false" \ --conf "spark.network.timeout=1200s" \ --conf "spark.task.maxFailures=1" \ diff --git a/examples/kmeans/pom.xml b/examples/kmeans/pom.xml index 71c4ea6d7..476a98ce6 100644 --- a/examples/kmeans/pom.xml +++ b/examples/kmeans/pom.xml @@ -4,7 +4,7 @@ com.intel.oap oap-mllib-examples - 1.1.0-with-spark-3.0.0 + ${oap.version}-with-spark-${spark.version} jar KMeansExample @@ -12,6 +12,7 @@ UTF-8 + 1.1.0 2.12.10 2.12 3.0.0 diff --git a/examples/kmeans/run.sh b/examples/kmeans/run.sh index bb0f9ac78..00b782464 100755 --- a/examples/kmeans/run.sh +++ b/examples/kmeans/run.sh @@ -1,64 +1,15 @@ #!/usr/bin/env bash -# == User to customize the following environments ======= # +source ../../conf/env.sh -# Set user Spark and Hadoop home directory -export SPARK_HOME=/path/to/your/spark/home -export HADOOP_HOME=/path/to/your/hadoop/home -# Set user HDFS Root -export HDFS_ROOT=hdfs://your_hostname:8020 -# Set user Intel MLlib Root directory -export OAP_MLLIB_ROOT=/path/to/your/OAP/oap-mllib -# Set IP and Port for oneCCL KVS, you can select any one of the worker nodes and set CCL_KVS_IP_PORT to its IP and Port -# IP can be got with `hostname -I`, if multiple IPs are returned, the first IP should be used. Port can be any available port. -# For example, if one of the worker IP is 192.168.0.1 and an available port is 51234. -# CCL_KVS_IP_PORT can be set in the format of 192.168.0.1_51234 -# Incorrectly setting this value will result in hanging when oneCCL initialize -export CCL_KVS_IP_PORT=192.168.0.1_51234 - -# Data file is from Spark examples' data (data/mllib/sample_kmeans_data.txt) -# This data file should be copied to HDFS hdfs://your_hostname:8020/user//data/sample_kmeans_data.txt +# Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt) and put in examples/data +# The data file should be copied to $HDFS_ROOT before running examples DATA_FILE=data/sample_kmeans_data.txt -# == User to customize Spark executor cores and memory == # - -# User should check the requested resources are acturally allocated by cluster manager or Intel MLlib will behave incorrectly -SPARK_MASTER=yarn -SPARK_DRIVER_MEMORY=1G -SPARK_NUM_EXECUTORS=2 -SPARK_EXECUTOR_CORES=1 -SPARK_EXECUTOR_MEMORY=1G - -SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2) - -# ======================================================= # - -# Check envs -if [[ -z $SPARK_HOME ]]; then - echo SPARK_HOME not defined! - exit 1 -fi - -if [[ -z $HADOOP_HOME ]]; then - echo HADOOP_HOME not defined! - exit 1 -fi - -export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop - -# Target jar built -OAP_MLLIB_JAR_NAME=oap-mllib-0.9.0-with-spark-3.0.0.jar -OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME - -# Use absolute path -SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR -# Use relative path -SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME - -APP_JAR=target/oap-mllib-examples-0.9.0-with-spark-3.0.0.jar +APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION-with-spark-3.0.0.jar APP_CLASS=org.apache.spark.examples.ml.KMeansExample -/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ +time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ --num-executors $SPARK_NUM_EXECUTORS \ --driver-memory $SPARK_DRIVER_MEMORY \ --executor-cores $SPARK_EXECUTOR_CORES \ @@ -68,7 +19,6 @@ APP_CLASS=org.apache.spark.examples.ml.KMeansExample --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ - --conf "spark.executorEnv.CCL_KVS_IP_PORT=$CCL_KVS_IP_PORT" \ --conf "spark.shuffle.reduceLocality.enabled=false" \ --conf "spark.network.timeout=1200s" \ --conf "spark.task.maxFailures=1" \ diff --git a/examples/pca-pyspark/run.sh b/examples/pca-pyspark/run.sh index 20010ce6b..b3776e877 100755 --- a/examples/pca-pyspark/run.sh +++ b/examples/pca-pyspark/run.sh @@ -1,57 +1,15 @@ #!/usr/bin/env bash -# == User to customize the following environments ======= # +source ../../conf/env.sh -# Set user Spark and Hadoop home directory -export SPARK_HOME=/path/to/your/spark/home -export HADOOP_HOME=/path/to/your/hadoop/home -# Set user HDFS Root -export HDFS_ROOT=hdfs://your_hostname:8020 -# Set user Intel MLlib Root directory -export OAP_MLLIB_ROOT=/path/to/your/OAP/oap-mllib - -# CSV data is the same as in Spark example "ml/pca_example.py", the data file should be copied to HDFS +# CSV data is the same as in Spark example "ml/pca_example.py" +# The data file should be copied to $HDFS_ROOT before running examples DATA_FILE=data/pca_data.csv -# == User to customize Spark executor cores and memory == # - -# User should check the requested resources are acturally allocated by cluster manager or Intel MLlib will behave incorrectly -SPARK_MASTER=yarn -SPARK_DRIVER_MEMORY=1G -SPARK_NUM_EXECUTORS=2 -SPARK_EXECUTOR_CORES=1 -SPARK_EXECUTOR_MEMORY=1G - -SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2) - -# ======================================================= # - -# Check env -if [[ -z $SPARK_HOME ]]; then - echo SPARK_HOME not defined! - exit 1 -fi - -if [[ -z $HADOOP_HOME ]]; then - echo HADOOP_HOME not defined! - exit 1 -fi - -export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop - -# Target jar built -OAP_MLLIB_JAR_NAME=oap-mllib-0.9.0-with-spark-3.0.0.jar -OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME - -# Use absolute path -SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR -# Use relative path -SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME - APP_PY=pca-pyspark.py K=3 -/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ +time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ --num-executors $SPARK_NUM_EXECUTORS \ --driver-memory $SPARK_DRIVER_MEMORY \ --executor-cores $SPARK_EXECUTOR_CORES \ diff --git a/examples/kmeans-hibench/build.sh b/examples/pca/build.sh similarity index 100% rename from examples/kmeans-hibench/build.sh rename to examples/pca/build.sh diff --git a/examples/pca/pom.xml b/examples/pca/pom.xml index 06cf2343c..75641da81 100644 --- a/examples/pca/pom.xml +++ b/examples/pca/pom.xml @@ -4,7 +4,7 @@ com.intel.oap oap-mllib-examples - 1.1.0-with-spark-3.0.0 + ${oap.version}-with-spark-${spark.version} jar PCAExample @@ -12,6 +12,7 @@ UTF-8 + 1.1.0 2.12.10 2.12 3.0.0 diff --git a/examples/pca/run.sh b/examples/pca/run.sh index da373645b..64d22c6fe 100755 --- a/examples/pca/run.sh +++ b/examples/pca/run.sh @@ -1,3 +1,24 @@ #!/usr/bin/env bash -mvn clean package +source ../../conf/env.sh + +APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION-with-spark-3.0.0.jar +APP_CLASS=org.apache.spark.examples.ml.PCAExample + +time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ + --num-executors $SPARK_NUM_EXECUTORS \ + --driver-memory $SPARK_DRIVER_MEMORY \ + --executor-cores $SPARK_EXECUTOR_CORES \ + --executor-memory $SPARK_EXECUTOR_MEMORY \ + --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ + --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ + --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ + --conf "spark.shuffle.reduceLocality.enabled=false" \ + --conf "spark.network.timeout=1200s" \ + --conf "spark.task.maxFailures=1" \ + --jars $OAP_MLLIB_JAR \ + --class $APP_CLASS \ + $APP_JAR $DATA_FILE $K \ + 2>&1 | tee PCA-$(date +%m%d_%H_%M_%S).log diff --git a/examples/run-all-pyspark.sh b/examples/run-all-pyspark.sh new file mode 100755 index 000000000..06decd242 --- /dev/null +++ b/examples/run-all-pyspark.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +for dir in kmeans-pyspark pca-pyspark als-pyspark +do + cd $dir + ./run.sh + cd .. +done diff --git a/examples/run-all-scala.sh b/examples/run-all-scala.sh new file mode 100755 index 000000000..65636c250 --- /dev/null +++ b/examples/run-all-scala.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +for dir in kmeans pca als +do + cd $dir + ./run.sh + cd .. +done diff --git a/mllib-dal/build.sh b/mllib-dal/build.sh index da1d8df75..bd88c998c 100755 --- a/mllib-dal/build.sh +++ b/mllib-dal/build.sh @@ -2,7 +2,12 @@ # Check envs for building if [[ -z $JAVA_HOME ]]; then - echo $JAVA_HOME not defined! + echo JAVA_HOME not defined! + exit 1 +fi + +if [[ -z $(which mvn) ]]; then + echo Maven not found! exit 1 fi @@ -26,7 +31,8 @@ echo JAVA_HOME=$JAVA_HOME echo DAALROOT=$DAALROOT echo TBBROOT=$TBBROOT echo CCL_ROOT=$CCL_ROOT -echo GCC Version: $(gcc -dumpversion) +echo Maven Version: $(mvn -v | head -n 1 | cut -f3 -d" ") +echo Clang Version: $(clang -dumpversion) echo ============================= mvn -DskipTests clean package diff --git a/mllib-dal/pom.xml b/mllib-dal/pom.xml index 4e51f9157..979c53d06 100644 --- a/mllib-dal/pom.xml +++ b/mllib-dal/pom.xml @@ -17,6 +17,13 @@ 2.12.10 2.12 3.0.0 + 2021.2.0 + libtbb.so.12.2 + libtbbmalloc.so.2.2 + libJavaAPI.so.1.1 + libccl.so + libfabric.so.1 + libmpi.so.12.0.0 @@ -55,11 +62,11 @@ - com.intel.daal - daal - 2021.1 + com.intel.onedal + onedal + ${oneapi.version} system - ${env.ONEAPI_ROOT}/dal/latest/lib/onedal.jar + ${env.DAALROOT}/lib/onedal.jar @@ -218,14 +225,11 @@ ${env.CCL_ROOT}/lib - - - libmpi.so.12.0.0 - libfabric.so.1 - libccl.so - + ${ccl.lib} + ${ccl.mpi.lib} + ${ccl.fabric.lib} - + ${env.CCL_ROOT}/lib/prov @@ -236,8 +240,8 @@ ${env.TBBROOT}/lib/intel64/gcc4.8 - libtbb.so.12.1 - libtbbmalloc.so.2.1 + ${tbb.lib} + ${tbb.malloc.lib} @@ -263,23 +267,19 @@ + rename to workaround. See https://github.com/oneapi-src/oneDAL/issues/1254 --> - ${project.build.testOutputDirectory}/lib/libtbb.so.12.1 + ${project.build.testOutputDirectory}/lib/${tbb.lib} ${project.build.testOutputDirectory}/lib/libtbb.so.2 - ${project.build.testOutputDirectory}/lib/libtbbmalloc.so.2.1 + ${project.build.testOutputDirectory}/lib/${tbb.malloc.lib} ${project.build.testOutputDirectory}/lib/libtbbmalloc.so.2 - ${project.build.testOutputDirectory}/lib/libmpi.so.12.0.0 + ${project.build.testOutputDirectory}/lib/${ccl.mpi.lib} ${project.build.testOutputDirectory}/lib/libmpi.so.12 - - - - diff --git a/mllib-dal/src/assembly/assembly.xml b/mllib-dal/src/assembly/assembly.xml index 498b90e02..541c4f2bd 100644 --- a/mllib-dal/src/assembly/assembly.xml +++ b/mllib-dal/src/assembly/assembly.xml @@ -41,28 +41,28 @@ - ${env.TBBROOT}/lib/intel64/gcc4.8/libtbb.so.12.1 + ${env.TBBROOT}/lib/intel64/gcc4.8/${tbb.lib} lib libtbb.so.2 - ${env.TBBROOT}/lib/intel64/gcc4.8/libtbbmalloc.so.2.1 + ${env.TBBROOT}/lib/intel64/gcc4.8/${tbb.malloc.lib} lib libtbbmalloc.so.2 - ${env.DAALROOT}/lib/intel64/libJavaAPI.so.1.0 + ${env.DAALROOT}/lib/intel64/${dal.java.lib} lib libJavaAPI.so - ${env.CCL_ROOT}/lib/libfabric.so.1 + ${env.CCL_ROOT}/lib/${ccl.fabric.lib} lib - ${env.CCL_ROOT}/lib/libmpi.so.12.0.0 + ${env.CCL_ROOT}/lib/${ccl.mpi.lib} lib libmpi.so.12 @@ -75,4 +75,4 @@ lib - \ No newline at end of file + diff --git a/mllib-dal/src/main/native/ALSDALImpl.cpp b/mllib-dal/src/main/native/ALSDALImpl.cpp index 53212dc1d..29162fddd 100644 --- a/mllib-dal/src/main/native/ALSDALImpl.cpp +++ b/mllib-dal/src/main/native/ALSDALImpl.cpp @@ -613,8 +613,8 @@ JNIEXPORT jlong JNICALL Java_org_apache_spark_ml_recommendation_ALSDALImpl_cDALI std::cout << "\n=== Results for Rank " << rankId << "===\n" << std::endl; // std::cout << "Partition ID: " << partitionId << std::endl; - printNumericTable(pUser, "User Factors (first 10 rows):", 10); - printNumericTable(pItem, "Item Factors (first 10 rows):", 10); + printNumericTable(pUser, "User Factors (first 10 rows x 20 columns):", 10, 20); + printNumericTable(pItem, "Item Factors (first 10 rows x 20 columns):", 10, 20); std::cout << "User Offset: " << getOffsetFromOffsetTable(userOffset) << std::endl; std::cout << "Item Offset: " << getOffsetFromOffsetTable(itemOffset) << std::endl; std::cout << std::endl; diff --git a/mllib-dal/src/main/native/Makefile b/mllib-dal/src/main/native/Makefile index 23222e646..e3a7e2161 100644 --- a/mllib-dal/src/main/native/Makefile +++ b/mllib-dal/src/main/native/Makefile @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -CC := gcc -CXX := g++ +CC := clang +CXX := clang++ RM := rm -rf -CFLAGS := -g -Wall -fPIC -std=c++11 +CFLAGS := -g -Wall -Wno-deprecated-declarations -fPIC -std=c++11 # The following paths setting works for self-built libs from source code # https://github.com/oneapi-src/oneCCL. If oneCCL package in oneAPI Toolkit is used, @@ -33,7 +33,7 @@ INCS := -I $(JAVA_HOME)/include \ LIBS := -L${CCL_ROOT}/lib -lccl \ -L$(DAALROOT)/lib/intel64 -l:libdaal_core.a -l:libdaal_thread.a \ - -L$(TBBROOT)/lib -ltbb -ltbbmalloc + -L$(TBBROOT)/lib/intel64/gcc4.8 -ltbb -ltbbmalloc # TODO: Add signal chaining support, should fix linking, package so and loading # -L$(JAVA_HOME)/jre/lib/amd64 -ljsig diff --git a/mllib-dal/src/main/native/PCADALImpl.cpp b/mllib-dal/src/main/native/PCADALImpl.cpp index 33e2bc95d..95172d05f 100644 --- a/mllib-dal/src/main/native/PCADALImpl.cpp +++ b/mllib-dal/src/main/native/PCADALImpl.cpp @@ -125,8 +125,8 @@ JNIEXPORT jlong JNICALL Java_org_apache_spark_ml_feature_PCADALImpl_cPCATrainDAL std::cout << "PCA (native): master step took " << duration << " secs" << std::endl; /* Print the results */ - printNumericTable(result->get(pca::eigenvalues), "First 10 Eigenvalues:", 10); - printNumericTable(result->get(pca::eigenvectors), "First 10 Eigenvectors:", 10); + printNumericTable(result->get(pca::eigenvalues), "First 10 eigenvalues with first 20 dimensions:", 10, 20); + printNumericTable(result->get(pca::eigenvectors), "First 10 eigenvectors with first 20 dimensions:", 10, 20); // Return all eigenvalues & eigenvectors diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala index aa8eb8979..0dd43d24f 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala @@ -72,6 +72,12 @@ object Utils { } def checkExecutorAvailPort(data: RDD[_], localIP: String) : Int = { + + if (localIP == "127.0.0.1" || localIP == "127.0.1.1") { + println(s"\nOneCCL: Error: doesn't support loopback IP ${localIP}, please assign IP address to your host.\n") + System.exit(-1) + } + val sc = data.sparkContext val result = data.mapPartitions { p => LibLoader.loadLibraries() diff --git a/mllib-dal/test-cluster.sh b/mllib-dal/test-cluster.sh deleted file mode 100755 index 4f5a6132a..000000000 --- a/mllib-dal/test-cluster.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -cd ../dev/test-cluster/workloads - -./run-kmeans-pyspark.sh diff --git a/mllib-dal/test.sh b/mllib-dal/test.sh index 0157c22a4..befd66495 100755 --- a/mllib-dal/test.sh +++ b/mllib-dal/test.sh @@ -2,7 +2,12 @@ # Check envs for building if [[ -z $JAVA_HOME ]]; then - echo $JAVA_HOME not defined! + echo JAVA_HOME not defined! + exit 1 +fi + +if [[ -z $(which mvn) ]]; then + echo Maven not found! exit 1 fi @@ -21,16 +26,17 @@ if [[ -z $CCL_ROOT ]]; then exit 1 fi -echo === Building Environments === +echo === Testing Environments === echo JAVA_HOME=$JAVA_HOME echo DAALROOT=$DAALROOT echo TBBROOT=$TBBROOT echo CCL_ROOT=$CCL_ROOT -echo GCC Version: $(gcc -dumpversion) +echo Maven Version: $(mvn -v | head -n 1 | cut -f3 -d" ") +echo Clang Version: $(clang -dumpversion) echo ============================= # Enable signal chaining support for JNI -export LD_PRELOAD=$JAVA_HOME/jre/lib/amd64/libjsig.so +# export LD_PRELOAD=$JAVA_HOME/jre/lib/amd64/libjsig.so # -Dtest=none to turn off the Java tests