diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE
index b0289f4f6..3d8952163 100644
--- a/.github/PULL_REQUEST_TEMPLATE
+++ b/.github/PULL_REQUEST_TEMPLATE
@@ -1,3 +1,9 @@
## What changes were proposed in this pull request?
(Please fill in changes proposed in this fix)
+
+## Does this PR also require the following changes?
+
+- CI
+- Documentation
+- Example
diff --git a/.github/workflows/oap-mllib-ci.yml b/.github/workflows/oap-mllib-ci.yml
index f0c1ab3e5..1b28d0a79 100644
--- a/.github/workflows/oap-mllib-ci.yml
+++ b/.github/workflows/oap-mllib-ci.yml
@@ -20,7 +20,7 @@ jobs:
~/.m2/repository
/opt/intel/oneapi
~/opt
- key: ${{ runner.os }}_spark-3.1.1_hadoop-3.2.0_oneapi-2021.3.0
+ key: ${{ runner.os }}_spark-3.1.1_hadoop-3.2.0_oneapi-2021.4.0
restore-keys: |
${{ runner.os }}-
- name: Set up environments
diff --git a/.gitignore b/.gitignore
index 1d621bdd4..b69b6d7f3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
*.o
*.log
-.vscode
*.iml
+.vscode/
target/
.idea/
.idea_modules/
diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json
new file mode 100644
index 000000000..baa3db3b1
--- /dev/null
+++ b/.vscode/c_cpp_properties.json
@@ -0,0 +1,19 @@
+{
+ "configurations": [
+ {
+ "name": "Linux",
+ "includePath": [
+ "${workspaceFolder}/mllib-dal/src/main/native/**",
+ "${CCL_ROOT}/include/**",
+ "${DAALROOT}/include/**",
+ "${JAVA_HOME}/include/**"
+ ],
+ "defines": [],
+ "compilerPath": "${CMPLR_ROOT}/linux/bin/clang",
+ "cStandard": "c17",
+ "cppStandard": "c++14",
+ "intelliSenseMode": "clang-x64"
+ }
+ ],
+ "version": 4
+}
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 000000000..2edd51bcb
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,37 @@
+{
+ "files.associations": {
+ "*.tcc": "cpp",
+ "cctype": "cpp",
+ "chrono": "cpp",
+ "cstdint": "cpp",
+ "ctime": "cpp",
+ "cwchar": "cpp",
+ "exception": "cpp",
+ "initializer_list": "cpp",
+ "iosfwd": "cpp",
+ "iostream": "cpp",
+ "istream": "cpp",
+ "limits": "cpp",
+ "ostream": "cpp",
+ "ratio": "cpp",
+ "string_view": "cpp",
+ "type_traits": "cpp",
+ "clocale": "cpp",
+ "streambuf": "cpp",
+ "algorithm": "cpp",
+ "cstdarg": "cpp",
+ "cstddef": "cpp",
+ "cstdio": "cpp",
+ "deque": "cpp",
+ "vector": "cpp",
+ "functional": "cpp",
+ "memory_resource": "cpp",
+ "string": "cpp",
+ "utility": "cpp",
+ "fstream": "cpp",
+ "iomanip": "cpp",
+ "new": "cpp",
+ "sstream": "cpp",
+ "*.template": "shellscript"
+ }
+}
\ No newline at end of file
diff --git a/README.md b/README.md
index 6088d0350..0667dc69c 100644
--- a/README.md
+++ b/README.md
@@ -10,11 +10,11 @@ OAP MLlib is an optimized package to accelerate machine learning algorithms in
## Compatibility
-OAP MLlib maintains the same API interfaces with Spark MLlib. That means the application built with Spark MLlib can be running directly with minimum configuration.
+OAP MLlib maintains the same API interfaces with Spark MLlib. That means the application built with Spark MLlib can be running directly with minimum configuration.
-Most of the algorithms can produce the same results that are identical with Spark MLlib. However due to the nature of distributed float point operations, there may be some small deviation from the original result, we will make sure the error is within acceptable range and the accuracy is on par with Spark MLlib.
+Most of the algorithms can produce the same results that are identical with Spark MLlib. However due to the nature of distributed float point operations, there may be some small deviation from the original result, we will make sure the error is within acceptable range and the accuracy is on par with Spark MLlib.
-For those algorithms that are not accelerated by OAP MLlib, the original Spark MLlib one will be used.
+For those algorithms that are not accelerated by OAP MLlib, the original Spark MLlib one will be used.
## Online Documentation
@@ -55,7 +55,7 @@ Intel® oneAPI Toolkits components used by the project are already included into
#### General Configuration
##### YARN Cluster Manager
-Users usually run Spark application on __YARN__ with __client__ mode. In that case, you only need to add the following configurations in `spark-defaults.conf` or in `spark-submit` command line before running.
+Users usually run Spark application on __YARN__ with __client__ mode. In that case, you only need to add the following configurations in `spark-defaults.conf` or in `spark-submit` command line before running.
```
# absolute path of the jar for uploading
@@ -85,14 +85,14 @@ OAP MLlib expects 1 executor acts as 1 oneCCL rank for compute. As `spark.shuffl
### Sanity Check
#### Setup `env.sh`
-```
+```bash
$ cd conf
$ cp env.sh.template env.sh
```
Edit related variables in "`Minimun Settings`" of `env.sh`
#### Upload example data files to HDFS
-```
+```bash
$ cd examples
$ hadoop fs -mkdir -p /user/$USER
$ hadoop fs -copyFromLocal data
@@ -100,7 +100,7 @@ Edit related variables in "`Minimun Settings`" of `env.sh`
```
#### Run K-means
-```
+```bash
$ cd examples/kmeans
$ ./build.sh
$ ./run.sh
@@ -119,45 +119,27 @@ We use [Apache Maven](https://maven.apache.org/) to manage and build source code
* JDK 8.0+
* Apache Maven 3.6.2+
* GNU GCC 4.8.5+
-* Intel® oneAPI Toolkits 2021.3.0 Components:
+* Intel® oneAPI Base Toolkit (>=2021.4.0) Components :
- DPC++/C++ Compiler (dpcpp/clang++)
- Data Analytics Library (oneDAL)
- Threading Building Blocks (oneTBB)
-* [Open Source Intel® oneAPI Collective Communications Library (oneCCL)](https://github.com/oneapi-src/oneCCL)
-
-Intel® oneAPI Toolkits and its components can be downloaded and install from [here](https://software.intel.com/content/www/us/en/develop/tools/oneapi.html). Installation process for oneAPI using Package Managers (YUM (DNF), APT, and ZYPPER) is also available. Generally you only need to install oneAPI Base Toolkit for Linux with all or selected components mentioned above. Instead of using oneCCL included in Intel® oneAPI Toolkits, we prefer to build from open source oneCCL to resolve some bugs.
+ - Collective Communications Library (oneCCL)]
-More details about oneAPI can be found [here](https://software.intel.com/content/www/us/en/develop/tools/oneapi.html).
+Generally you only need to install __Intel® oneAPI Base Toolkit for Linux__ with all or selected components mentioned above. Intel® oneAPI Base Toolkit can be downloaded and installed from [here](https://software.intel.com/content/www/us/en/develop/tools/oneapi.html). Installation process for oneAPI using Package Managers (YUM (DNF), APT, and ZYPPER) is also available. More details about oneAPI can be found [here](https://software.intel.com/content/www/us/en/develop/tools/oneapi.html).
-Scala and Java dependency descriptions are already included in Maven POM file.
+Scala and Java dependency descriptions are already included in Maven POM file.
***Note:*** You can refer to [this script](dev/install-build-deps-centos.sh) to install correct dependencies: DPC++/C++, oneDAL, oneTBB, oneCCL.
### Build
-#### Building oneCCL
-
-To clone and build from open source oneCCL, run the following commands:
-```
- $ git clone https://github.com/oneapi-src/oneCCL
- $ cd oneCCL
- $ git checkout 2021.2.1
- $ mkdir build && cd build
- $ cmake ..
- $ make -j install
-```
-
-The generated files will be placed in `/your/oneCCL_source_code/build/_install`
-
-#### Building OAP MLlib
-
To clone and checkout source code, run the following commands:
-```
- $ git clone https://github.com/oap-project/oap-mllib.git
+```bash
+ $ git clone https://github.com/oap-project/oap-mllib.git
```
__Optional__ to checkout specific release branch:
-```
- $ cd oap-mllib && git checkout ${version}
+```bash
+ $ cd oap-mllib && git checkout ${version}
```
We rely on environment variables to find required toolchains and libraries. Please make sure the following environment variables are set for building:
@@ -171,25 +153,22 @@ CCL_ROOT | Path to oneCCL home directory
We suggest you to source `setvars.sh` script into current shell to setup building environments as following:
-```
+```bash
$ source /opt/intel/oneapi/setvars.sh
- $ source /your/oneCCL_source_code/build/_install/env/setvars.sh
```
-__Be noticed we are using our own built oneCCL instead, we should source oneCCL's `setvars.sh` to overwrite oneAPI one.__
-
You can also refer to [this CI script](dev/ci-test.sh) to setup the building environments.
-If you prefer to buid your own open source [oneDAL](https://github.com/oneapi-src/oneDAL), [oneTBB](https://github.com/oneapi-src/oneTBB) versions rather than use the ones included in oneAPI TookKits, you can refer to the related build instructions and manually source `setvars.sh` accordingly.
+If you prefer to buid your own open source [oneDAL](https://github.com/oneapi-src/oneDAL), [oneTBB](https://github.com/oneapi-src/oneTBB), [oneCCL](https://github.com/oneapi-src/oneCCL) versions rather than use the ones included in oneAPI Base Toolkit, you can refer to the related build instructions and manually source `setvars.sh` accordingly.
-To build, run the following commands:
-```
+To build, run the following commands:
+```bash
$ cd mllib-dal
$ ./build.sh
```
If no parameter is given, the Spark version __3.1.1__ will be activated by default. You can also specify a different Spark version with option `-p spark-x.x.x`. For example:
-```
+```bash
$ ./build.sh -p spark-3.0.0
```
@@ -206,6 +185,7 @@ pca | PCA example for Scala
als | ALS example for Scala
naive-bayes | Naive Bayes example for Scala
linear-regression | Linear Regression example for Scala
+correlation | Correlation example for Scala
### Python Examples
@@ -217,12 +197,11 @@ als-pyspark | ALS example for PySpark
## List of Accelerated Algorithms
-Algorithm | Category | Maturity
-------------------|----------|-------------
-K-Means | CPU | Stable
-K-Means | GPU | Experimental
-PCA | CPU | Stable
-PCA | GPU | Experimental
-ALS | CPU | Stable
-Naive Bayes | CPU | Experimental
-Linear Regression | CPU | Experimental
+Algorithm | CPU | GPU | Maturity
+------------------|-----|-----|---------
+K-Means | X | X | Stable
+PCA | X | X | Stable
+ALS | X | | Experimental
+Naive Bayes | X | | Stable
+Linear Regression | X | | Stable
+Correlation | X | X | Experimental
diff --git a/RELEASE b/RELEASE
new file mode 100644
index 000000000..a72a32503
--- /dev/null
+++ b/RELEASE
@@ -0,0 +1 @@
+OAP_MLLIB_VERSION=1.2.0
\ No newline at end of file
diff --git a/conf/env.sh.template b/conf/env.sh.template
index 7bdb97f22..168f9d133 100644
--- a/conf/env.sh.template
+++ b/conf/env.sh.template
@@ -2,8 +2,6 @@
# ============== Minimum Settings ============= #
-# Set OAP MLlib version (e.g. 1.1.0)
-OAP_MLLIB_VERSION=x.x.x
# Set Spark master
SPARK_MASTER=yarn
# Set Hadoop home path
@@ -17,6 +15,9 @@ export OAP_MLLIB_ROOT=/path/to/oap-mllib/home
# ============================================= #
+# Import RELEASE envs
+source $OAP_MLLIB_ROOT/RELEASE
+
# Set HADOOP_CONF_DIR for Spark
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
@@ -42,7 +43,7 @@ SPARK_TOTAL_CORES=$((SPARK_NUM_EXECUTORS * SPARK_EXECUTOR_CORES))
SPARK_DEFAULT_PARALLELISM=$((SPARK_TOTAL_CORES * 2))
# Checks
-for dir in $SPARK_HOME $HADOOP_HOME $OAP_MLLIB_JAR
+for dir in $SPARK_HOME $HADOOP_HOME $OAP_MLLIB_JAR
do
if [[ ! -e $dir ]]; then
echo $dir does not exist!
diff --git a/dev/build-maven-local-repo.sh b/dev/build-maven-local-repo.sh
new file mode 100755
index 000000000..44a94a794
--- /dev/null
+++ b/dev/build-maven-local-repo.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+if [[ -z $DAALROOT ]]; then
+ echo DAALROOT not defined!
+ exit 1
+fi
+
+echo "Building Maven Repo for oneDAL ..."
+
+mkdir maven-repository
+mvn deploy:deploy-file -Dfile=$DAALROOT/lib/onedal.jar -DgroupId=com.intel.onedal -Dversion=2021.4.0 -Dpackaging=jar -Durl=file:./maven-repository -DrepositoryId=maven-repository -DupdateReleaseInfo=true
+
+echo "DONE"
+
+find ./maven-repository
+
+# Add the following into pom.xml:
+
+#
+#
+# maven-repository
+# file:///${project.basedir}/maven-repository
+#
+#
+
+#
+# com.intel.dal
+# dal
+# 2021.4.0
+#
\ No newline at end of file
diff --git a/dev/ci-test.sh b/dev/ci-test.sh
index ce079fe7d..59c64eb7d 100755
--- a/dev/ci-test.sh
+++ b/dev/ci-test.sh
@@ -1,11 +1,22 @@
#!/usr/bin/env bash
+# exit when any command fails
+set -e
+
+# keep track of the last executed command
+trap 'last_command=$current_command; current_command=$BASH_COMMAND' DEBUG
+# echo an error message before exiting
+trap 'echo "\"${last_command}\" command filed with exit code $?."' EXIT
+
# Setup building envs
source /opt/intel/oneapi/setvars.sh
-source /tmp/oneCCL/build/_install/env/setvars.sh
-SupportedSparkVersions=("spark-3.0.0" "spark-3.0.1" "spark-3.0.2" "spark-3.1.1")
+# Prepare lib resources
+cd $GITHUB_WORKSPACE/mllib-dal
+../dev/prepare-build-deps.sh
+# Test for all versions
+SupportedSparkVersions=("spark-3.0.0" "spark-3.0.1" "spark-3.0.2" "spark-3.1.1")
for SparkVer in ${SupportedSparkVersions[*]}; do
echo
echo "========================================"
@@ -13,6 +24,7 @@ for SparkVer in ${SupportedSparkVersions[*]}; do
echo "========================================"
echo
cd $GITHUB_WORKSPACE/mllib-dal
+ ./build.sh -q
./test.sh -q -p $SparkVer
done
diff --git a/dev/install-build-deps-centos.sh b/dev/install-build-deps-centos.sh
index 275222be8..877992228 100755
--- a/dev/install-build-deps-centos.sh
+++ b/dev/install-build-deps-centos.sh
@@ -15,17 +15,7 @@ EOF
sudo mv /tmp/oneAPI.repo /etc/yum.repos.d
# sudo yum groupinstall -y "Development Tools"
# sudo yum install -y cmake
- sudo yum install -y intel-oneapi-dpcpp-cpp-2021.3.0 intel-oneapi-dal-devel-2021.3.0 intel-oneapi-tbb-devel-2021.3.0
+ sudo yum install -y intel-oneapi-dpcpp-cpp-2021.4.0 intel-oneapi-dal-devel-2021.4.0 intel-oneapi-tbb-devel-2021.4.0 intel-oneapi-ccl-devel-2021.4.0 intel-oneapi-mpi-devel-2021.4.0
else
echo "oneAPI components already installed!"
fi
-
-echo "Building oneCCL ..."
-cd /tmp
-rm -rf oneCCL
-git clone https://github.com/oneapi-src/oneCCL
-cd oneCCL
-git checkout 2021.2.1
-mkdir build && cd build
-cmake ..
-make -j 2 install
diff --git a/dev/install-build-deps-ubuntu.sh b/dev/install-build-deps-ubuntu.sh
index a6379dae9..027956b74 100755
--- a/dev/install-build-deps-ubuntu.sh
+++ b/dev/install-build-deps-ubuntu.sh
@@ -9,17 +9,7 @@ if [ ! -d /opt/intel/oneapi ]; then
echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
sudo apt-get update
# sudo apt-get install -y build-essential cmake
- sudo apt-get install -y intel-oneapi-dpcpp-cpp-2021.3.0 intel-oneapi-dal-devel-2021.3.0 intel-oneapi-tbb-devel-2021.3.0
+ sudo apt-get install -y intel-oneapi-dpcpp-cpp-2021.4.0 intel-oneapi-dal-devel-2021.4.0 intel-oneapi-tbb-devel-2021.4.0 intel-oneapi-ccl-devel-2021.4.0 intel-oneapi-mpi-devel-2021.4.0
else
echo "oneAPI components already installed!"
fi
-
-echo "Building oneCCL ..."
-cd /tmp
-rm -rf oneCCL
-git clone https://github.com/oneapi-src/oneCCL
-cd oneCCL
-git checkout 2021.2.1
-mkdir build && cd build
-cmake ..
-make -j 2 install
diff --git a/dev/prepare-build-deps-gpu.sh b/dev/prepare-build-deps-gpu.sh
new file mode 100755
index 000000000..e6762e1c7
--- /dev/null
+++ b/dev/prepare-build-deps-gpu.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+
+if [ -z ${ONEAPI_ROOT} ]; then
+ echo Please source Intel oneAPI Toolkit environments!
+ exit 1
+fi
+
+if [[ -z $DAALROOT ]]; then
+ echo DAALROOT not defined!
+ exit 1
+fi
+
+if [[ -z $TBBROOT ]]; then
+ echo TBBROOT not defined!
+ exit 1
+fi
+
+if [[ -z $I_MPI_ROOT ]]; then
+ echo I_MPI_ROOT not defined!
+ exit 1
+fi
+
+if [[ -z $CCL_ROOT ]]; then
+ echo CCL_ROOT not defined!
+ exit 1
+fi
+
+# Use patchelf to change SONAME for libfabric
+if [[ -z $(which patchelf) ]]; then
+ echo Please install \"patchelf\"!
+ exit 1
+fi
+
+if [[ $(basename $(pwd)) != "mllib-dal" ]]; then
+ echo Please execute the script from \"mllib-dal\" directory!
+ exit 1
+fi
+
+TARGET_DIR=./src/main/resources/lib
+
+rm -f $TARGET_DIR/*.so*
+
+cp $CCL_ROOT/lib/cpu_icc/libccl.so.1.0 $TARGET_DIR/libccl.so.1
+
+cp $I_MPI_ROOT/libfabric/lib/libfabric.so.1 $TARGET_DIR/libfabric.so.1
+cp $I_MPI_ROOT/libfabric/lib/prov/libsockets-fi.so $TARGET_DIR
+
+# Workaround dlopen (libfabric.so) in oneCCL
+cp $I_MPI_ROOT/libfabric/lib/libfabric.so.1 $TARGET_DIR/libfabric.so
+patchelf --set-soname libfabric.so $TARGET_DIR/libfabric.so
+
+cp $I_MPI_ROOT/lib/release_mt/libmpi.so.12.0.0 $TARGET_DIR/libmpi.so.12
+
+cp $DAALROOT/lib/intel64/libJavaAPI.so.1.1 $TARGET_DIR/libJavaAPI.so
+
+cp $TBBROOT/lib/intel64/gcc4.8/libtbb.so.12.4 $TARGET_DIR/libtbb.so.12
+cp $TBBROOT/lib/intel64/gcc4.8/libtbbmalloc.so.2.4 $TARGET_DIR/libtbbmalloc.so.2
+
+# SYCL libs
+cp $CMPLR_ROOT/linux/compiler/lib/intel64_lin/libintlc.so.5 $TARGET_DIR
+cp $CMPLR_ROOT/linux/compiler/lib/intel64_lin/libsvml.so $TARGET_DIR
+
+# Workaround lib loading for JNI as libirng.so doesn't have soname
+cp $CMPLR_ROOT/linux/compiler/lib/intel64_lin/libirng.so $TARGET_DIR
+patchelf --set-soname libirng.so $TARGET_DIR/libirng.so
+
+cp $CMPLR_ROOT/linux/compiler/lib/intel64_lin/libimf.so $TARGET_DIR
+cp $CMPLR_ROOT/linux/lib/libOpenCL.so.1 $TARGET_DIR
+cp $CMPLR_ROOT/linux/lib/libsycl.so.5 $TARGET_DIR
+
+echo oneAPI Toolkit version: $(basename $CCL_ROOT) > $TARGET_DIR/VERSION
diff --git a/dev/prepare-build-deps.sh b/dev/prepare-build-deps.sh
new file mode 100755
index 000000000..6b74dfed2
--- /dev/null
+++ b/dev/prepare-build-deps.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+
+if [ -z ${ONEAPI_ROOT} ]; then
+ echo Please source Intel oneAPI Toolkit environments!
+ exit 1
+fi
+
+if [[ -z $DAALROOT ]]; then
+ echo DAALROOT not defined!
+ exit 1
+fi
+
+if [[ -z $TBBROOT ]]; then
+ echo TBBROOT not defined!
+ exit 1
+fi
+
+if [[ -z $I_MPI_ROOT ]]; then
+ echo I_MPI_ROOT not defined!
+ exit 1
+fi
+
+if [[ -z $CCL_ROOT ]]; then
+ echo CCL_ROOT not defined!
+ exit 1
+fi
+
+# Use patchelf to change SONAME for libfabric
+if [[ -z $(which patchelf) ]]; then
+ echo Please install \"patchelf\"!
+ exit 1
+fi
+
+if [[ $(basename $(pwd)) != "mllib-dal" ]]; then
+ echo Please execute the script from \"mllib-dal\" directory!
+ exit 1
+fi
+
+TARGET_DIR=./src/main/resources/lib
+
+rm -f $TARGET_DIR/*.so*
+
+cp $CCL_ROOT/lib/cpu_icc/libccl.so.1.0 $TARGET_DIR/libccl.so.1
+
+cp $I_MPI_ROOT/libfabric/lib/libfabric.so.1 $TARGET_DIR/libfabric.so.1
+cp $I_MPI_ROOT/libfabric/lib/prov/libsockets-fi.so $TARGET_DIR
+
+# Workaround dlopen (libfabric.so) in oneCCL
+cp $I_MPI_ROOT/libfabric/lib/libfabric.so.1 $TARGET_DIR/libfabric.so
+patchelf --set-soname libfabric.so $TARGET_DIR/libfabric.so
+
+cp $I_MPI_ROOT/lib/release_mt/libmpi.so.12.0.0 $TARGET_DIR/libmpi.so.12
+
+cp $DAALROOT/lib/intel64/libJavaAPI.so.1.1 $TARGET_DIR/libJavaAPI.so
+
+cp $TBBROOT/lib/intel64/gcc4.8/libtbb.so.12.4 $TARGET_DIR/libtbb.so.12
+cp $TBBROOT/lib/intel64/gcc4.8/libtbbmalloc.so.2.4 $TARGET_DIR/libtbbmalloc.so.2
+
+echo oneAPI Toolkit version: $(basename $CCL_ROOT) > $TARGET_DIR/VERSION
diff --git a/dev/setup-all.sh b/dev/setup-all.sh
index 66510e85e..7c08ce0e4 100755
--- a/dev/setup-all.sh
+++ b/dev/setup-all.sh
@@ -1,5 +1,13 @@
#!/usr/bin/env bash
+# exit when any command fails
+set -e
+
+# keep track of the last executed command
+trap 'last_command=$current_command; current_command=$BASH_COMMAND' DEBUG
+# echo an error message before exiting
+trap 'echo "\"${last_command}\" command filed with exit code $?."' EXIT
+
# Install dependencies for building
$GITHUB_WORKSPACE/dev/install-build-deps-ubuntu.sh
diff --git a/dev/test-cluster/ci-test-cluster.sh b/dev/test-cluster/ci-test-cluster.sh
index 7a4600267..d86d89aef 100755
--- a/dev/test-cluster/ci-test-cluster.sh
+++ b/dev/test-cluster/ci-test-cluster.sh
@@ -1,5 +1,13 @@
#!/usr/bin/env bash
+# exit when any command fails
+set -e
+
+# keep track of the last executed command
+trap 'last_command=$current_command; current_command=$BASH_COMMAND' DEBUG
+# echo an error message before exiting
+trap 'echo "\"${last_command}\" command filed with exit code $?."' EXIT
+
# Setup Spark envs
source $GITHUB_WORKSPACE/dev/test-cluster/setup-spark-envs.sh
@@ -8,12 +16,14 @@ cp $GITHUB_WORKSPACE/dev/test-cluster/env.sh $GITHUB_WORKSPACE/conf
cd $GITHUB_WORKSPACE/examples
+HOST_NAME=$(hostname -f)
+export HDFS_ROOT=hdfs://$HOST_NAME:8020
+
# Copy examples data to HDFS
-hadoop fs -mkdir -p /user/$USER
-hadoop fs -copyFromLocal data
-hadoop fs -ls data
+hadoop fs -copyFromLocal data /
+hadoop fs -find /
# Build and run all examples
-./build-all.sh
+./build-all-scala.sh
./run-all-scala.sh
./run-all-pyspark.sh
diff --git a/dev/test-cluster/env.sh b/dev/test-cluster/env.sh
index 225db0b7b..0a92a1a10 100644
--- a/dev/test-cluster/env.sh
+++ b/dev/test-cluster/env.sh
@@ -2,8 +2,6 @@
# ============== Minimum Settings ============= #
-# Set OAP MLlib version (e.g. 1.1.0)
-OAP_MLLIB_VERSION=1.1.0
# Set Spark master
SPARK_MASTER=yarn
# Set Hadoop home path
@@ -11,33 +9,43 @@ export HADOOP_HOME=$HADOOP_HOME
# Set Spark home path
export SPARK_HOME=$SPARK_HOME
# Set HDFS Root, should be hdfs://xxx or file://xxx
-export HDFS_ROOT=hdfs://localhost:8020
+
+HOST_NAME=$(hostname -f)
+export HDFS_ROOT=hdfs://$HOST_NAME:8020
# Set OAP MLlib source code root directory
export OAP_MLLIB_ROOT=$GITHUB_WORKSPACE
# ============================================= #
+# Import RELEASE envs
+source $OAP_MLLIB_ROOT/RELEASE
+
# Set HADOOP_CONF_DIR for Spark
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
# Set JAR name & path
OAP_MLLIB_JAR_NAME=oap-mllib-$OAP_MLLIB_VERSION.jar
OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME
-# Set Spark driver & executor classpaths,
-# absolute path for driver, relative path for executor
+# Set Spark driver & executor classpaths
+# YARN mode: use absolute path for driver, relative path for executors
+# Standalone mode: use absolute path for both driver and executors
SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR
-SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME
+if [[ $SPARK_MASTER == yarn ]]; then
+ SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME
+else
+ SPARK_EXECUTOR_CLASSPATH=$OAP_MLLIB_JAR
+fi
# Set Spark resources, can be overwritten in example
SPARK_DRIVER_MEMORY=1G
SPARK_NUM_EXECUTORS=2
SPARK_EXECUTOR_CORES=1
SPARK_EXECUTOR_MEMORY=1G
-SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2)
+SPARK_TOTAL_CORES=$((SPARK_NUM_EXECUTORS * SPARK_EXECUTOR_CORES))
+SPARK_DEFAULT_PARALLELISM=$((SPARK_TOTAL_CORES * 2))
# Checks
-
-for dir in $SPARK_HOME $HADOOP_HOME $OAP_MLLIB_JAR
+for dir in $SPARK_HOME $HADOOP_HOME $OAP_MLLIB_JAR
do
if [[ ! -e $dir ]]; then
echo $dir does not exist!
diff --git a/dev/test-cluster/log4j.properties b/dev/test-cluster/log4j.properties
new file mode 100644
index 000000000..ff29121c2
--- /dev/null
+++ b/dev/test-cluster/log4j.properties
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the console
+log4j.rootCategory=WARN, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Set the default spark-shell log level to WARN. When running the spark-shell, the
+# log level for this class is used to overwrite the root logger's log level, so that
+# the user can have different defaults for the shell and regular Spark apps.
+log4j.logger.org.apache.spark.repl.Main=WARN
+
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.sparkproject.jetty=WARN
+log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
+log4j.logger.org.apache.parquet=ERROR
+log4j.logger.parquet=ERROR
+
+# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
+log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
+log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
+
+log4j.logger.org.apache.spark.ml.util.LibLoader=DEBUG
diff --git a/dev/test-cluster/setup-cluster.sh b/dev/test-cluster/setup-cluster.sh
index 633d848e9..a5b48490e 100755
--- a/dev/test-cluster/setup-cluster.sh
+++ b/dev/test-cluster/setup-cluster.sh
@@ -1,15 +1,21 @@
#!/usr/bin/env bash
-WORK_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+# exit when any command fails
+set -e
-cd $WORK_DIR
+# keep track of the last executed command
+trap 'last_command=$current_command; current_command=$BASH_COMMAND' DEBUG
+# echo an error message before exiting
+trap 'echo "\"${last_command}\" command filed with exit code $?."' EXIT
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
echo JAVA_HOME is $JAVA_HOME
-HADOOP_VERSION=3.2.0
-SPARK_VERSION=3.1.1
-SPARK_HADOOP_VERSION=hadoop3.2
+# setup envs
+source $SCRIPT_DIR/setup-spark-envs.sh
+# download spark & hadoop bins
[ -d ~/opt ] || mkdir ~/opt
cd ~/opt
[ -f spark-$SPARK_VERSION-bin-$SPARK_HADOOP_VERSION.tgz ] || wget --no-verbose https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-$SPARK_HADOOP_VERSION.tgz
@@ -17,7 +23,7 @@ cd ~/opt
[ -f hadoop-$HADOOP_VERSION.tar.gz ] || wget --no-verbose https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
[ -d hadoop-$HADOOP_VERSION ] || tar -xzf hadoop-$HADOOP_VERSION.tar.gz
-cd $WORK_DIR
+cd $SCRIPT_DIR
HOST_IP=$(hostname -f)
@@ -28,13 +34,14 @@ cp ./core-site.xml ~/opt/hadoop-$HADOOP_VERSION/etc/hadoop/
cp ./hdfs-site.xml ~/opt/hadoop-$HADOOP_VERSION/etc/hadoop/
cp ./yarn-site.xml ~/opt/hadoop-$HADOOP_VERSION/etc/hadoop/
cp ./hadoop-env.sh ~/opt/hadoop-$HADOOP_VERSION/etc/hadoop/
+cp ./log4j.properties ~/opt/spark-$SPARK_VERSION-bin-$SPARK_HADOOP_VERSION/conf
cp ./spark-defaults.conf ~/opt/spark-$SPARK_VERSION-bin-$SPARK_HADOOP_VERSION/conf
-source ./setup-spark-envs.sh
-
echo $HOST_IP > $HADOOP_HOME/etc/hadoop/slaves
echo $HOST_IP > $SPARK_HOME/conf/slaves
+ls -l $SPARK_HOME/conf
+
# create directories
mkdir -p /tmp/run/hdfs/namenode
mkdir -p /tmp/run/hdfs/datanode
diff --git a/dev/test-cluster/setup-spark-envs.sh b/dev/test-cluster/setup-spark-envs.sh
index 6e4e06423..5e988c3a9 100755
--- a/dev/test-cluster/setup-spark-envs.sh
+++ b/dev/test-cluster/setup-spark-envs.sh
@@ -1,5 +1,7 @@
#!/usr/bin/env bash
+set -x
+
HADOOP_VERSION=3.2.0
SPARK_VERSION=3.1.1
SPARK_HADOOP_VERSION=hadoop3.2
@@ -12,4 +14,6 @@ export SPARK_HOME=~/opt/spark-$SPARK_VERSION-bin-$SPARK_HADOOP_VERSION
export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH
export PYSPARK_PYTHON=python3
-export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PATH
\ No newline at end of file
+export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PATH
+
+set +x
\ No newline at end of file
diff --git a/examples/build-all.sh b/examples/build-all-scala.sh
similarity index 100%
rename from examples/build-all.sh
rename to examples/build-all-scala.sh
diff --git a/examples/correlation/build.sh b/examples/correlation/build.sh
old mode 100644
new mode 100755
diff --git a/examples/correlation/run.sh b/examples/correlation/run.sh
old mode 100644
new mode 100755
diff --git a/mllib-dal/build-cpu-gpu.sh b/mllib-dal/build-cpu-gpu.sh
index 27b1777d9..4317471e1 100755
--- a/mllib-dal/build-cpu-gpu.sh
+++ b/mllib-dal/build-cpu-gpu.sh
@@ -26,6 +26,19 @@ if [[ -z $CCL_ROOT ]]; then
exit 1
fi
+# Check lib dependencies for building
+RESOURCE_PATH=src/main/resources/lib
+LIBS=(libccl.so.1 libfabric.so libfabric.so.1 libJavaAPI.so libmpi.so.12 \
+ libsockets-fi.so libtbbmalloc.so.2 libtbb.so.12 libintlc.so.5 libsvml.so libirng.so libimf.so \
+ libOpenCL.so.1 libsycl.so.5)
+for lib in ${LIBS[@]}
+do
+ if [[ ! -f ./$RESOURCE_PATH/$lib ]]; then
+ echo $RESOURCE_PATH/$lib does not exsit, please run ../dev/prepare-build-deps-gpu.sh!
+ exit 1
+fi
+done
+
versionArray=(
spark-3.0.0 \
spark-3.0.1 \
@@ -45,7 +58,7 @@ print_usage() {
do
echo " $version"
done
- echo
+ echo
}
while getopts "hqp:" opt
diff --git a/mllib-dal/build.sh b/mllib-dal/build.sh
index 7ae84e01f..96393f1ca 100755
--- a/mllib-dal/build.sh
+++ b/mllib-dal/build.sh
@@ -26,6 +26,23 @@ if [[ -z $CCL_ROOT ]]; then
exit 1
fi
+# Check lib dependencies for building
+RESOURCE_PATH=src/main/resources/lib
+LIBS=(libccl.so.1 libfabric.so libfabric.so.1 libJavaAPI.so libmpi.so.12 \
+ libsockets-fi.so libtbbmalloc.so.2 libtbb.so.12)
+for lib in ${LIBS[@]}
+do
+ if [[ ! -f ./$RESOURCE_PATH/$lib ]]; then
+ echo $RESOURCE_PATH/$lib does not exsit, please run ../dev/prepare-build-deps.sh!
+ exit 1
+fi
+done
+
+if [[ -f ./$RESOURCE_PATH/libsycl.so.5 ]]; then
+ echo GPU libs found! Please re-run ../dev/prepare-build-deps.sh!
+ exit 1
+fi
+
versionArray=(
spark-3.0.0 \
spark-3.0.1 \
@@ -45,7 +62,7 @@ print_usage() {
do
echo " $version"
done
- echo
+ echo
}
while getopts "hqp:" opt
diff --git a/mllib-dal/pom.xml b/mllib-dal/pom.xml
index a3f81ae3a..d2b74863d 100644
--- a/mllib-dal/pom.xml
+++ b/mllib-dal/pom.xml
@@ -1,6 +1,5 @@
+ xmlns="http://maven.apache.org/POM/4.0.0" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
4.0.0
com.intel.oap
@@ -29,63 +28,84 @@
src/assembly/assembly.xml
-
+
+
+ gcs-maven-central-mirror
+
+ GCS Maven Central mirror
+ https://maven-central.storage-download.googleapis.com/maven2/
+
+ true
+
+
+ false
+
+
+
+
+ central
+ Maven Repository
+ https://repo.maven.apache.org/maven2
+
+ true
+
+
+ false
+
+
+
+
org.scala-lang
scala-library
2.12.10
-
com.github.scopt
scopt_2.12
3.7.0
-
org.apache.spark
spark-core_2.12
${spark.version}
provided
-
org.apache.spark
spark-sql_2.12
${spark.version}
provided
-
org.apache.spark
spark-mllib_2.12
${spark.version}
provided
-
- com.intel.onedal
- onedal
- ${oneapi.version}
- system
- ${env.DAALROOT}/lib/onedal.jar
+ com.intel.dal
+ dal
+ 2021.4.0.83
-
junit
junit
4.12
test
-
org.scalatest
scalatest_${scala.binary.version}
${scalatest.version}
test
-
org.apache.spark
spark-mllib_2.12
@@ -93,7 +113,6 @@
test-jar
test
-
org.apache.spark
spark-mllib-local_${scala.binary.version}
@@ -101,7 +120,6 @@
test-jar
test
-
org.jpmml
pmml-model
@@ -114,7 +132,6 @@
-
org.apache.spark
spark-sql_2.12
@@ -122,7 +139,6 @@
test-jar
test
-
org.apache.spark
spark-core_2.12
@@ -130,7 +146,6 @@
test-jar
test
-
org.apache.spark
spark-catalyst_2.12
@@ -138,7 +153,6 @@
test-jar
test
-
org.apache.spark
spark-tags_2.12
@@ -146,11 +160,9 @@
test-jar
test
-
-
cpu-gpu
@@ -159,11 +171,7 @@
CPU_GPU_PROFILE
-
- src/assembly/assembly-cpu-gpu.xml
-
-
spark-3.0.0
@@ -171,7 +179,6 @@
3.0.8
-
spark-3.0.1
@@ -179,7 +186,6 @@
3.0.8
-
spark-3.0.2
@@ -187,7 +193,6 @@
3.0.8
-
spark-3.1.1
@@ -202,58 +207,58 @@
-
- org.codehaus.mojo
- build-helper-maven-plugin
- 3.2.0
-
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+ 3.2.0
+
- add-source
- generate-sources
-
- add-source
-
-
-
-
-
-
-
+ add-source
+ generate-sources
+
+ add-source
+
+
+
+
+
+
+
- add-test-source
- generate-sources
-
- add-test-source
-
-
-
-
-
-
+ add-test-source
+ generate-sources
+
+ add-test-source
+
+
+
+
+
+
-
-
+
+
net.alchim31.maven
scala-maven-plugin
4.4.0
-
- scala-compile-first
- process-resources
-
- add-source
- compile
-
-
-
- scala-test-compile
- process-test-resources
-
- testCompile
-
-
+
+ scala-compile-first
+ process-resources
+
+ add-source
+ compile
+
+
+
+ scala-test-compile
+ process-test-resources
+
+ testCompile
+
+
${scala.version}
@@ -329,7 +334,6 @@
true
-
org.scalatest
scalatest-maven-plugin
@@ -348,18 +352,16 @@
-
maven-antrun-plugin
1.8
- process-classes
+ process-resources
Building native code
-
+
@@ -369,89 +371,10 @@
-
maven-resources-plugin
3.0.2
-
- ${project.build.testOutputDirectory}/lib
-
-
- ${env.CCL_ROOT}/lib
-
- ${ccl.lib}
- ${ccl.mpi.lib}
- ${ccl.fabric.lib}
-
-
-
- ${env.CCL_ROOT}/lib/prov
-
- libsockets-fi.so
-
-
-
- ${env.TBBROOT}/lib/intel64/gcc4.8
-
- ${tbb.lib}
- ${tbb.malloc.lib}
-
-
-
- ${env.DAALROOT}/lib/intel64
-
- ${dal.java.lib}
-
-
-
- ${project.build.directory}
-
- libMLlibDAL.so
-
-
-
-
-
-
-
- com.coderplus.maven.plugins
- copy-rename-maven-plugin
- 1.0
-
-
- rename-file
- process-test-resources
-
- rename
-
-
-
-
- ${project.build.testOutputDirectory}/lib/${tbb.lib}
- ${project.build.testOutputDirectory}/lib/libtbb.so.12
-
-
-
- ${project.build.testOutputDirectory}/lib/${tbb.malloc.lib}
- ${project.build.testOutputDirectory}/lib/libtbbmalloc.so.2
-
-
-
- ${project.build.testOutputDirectory}/lib/${ccl.mpi.lib}
- ${project.build.testOutputDirectory}/lib/libmpi.so.12
-
-
-
- ${project.build.testOutputDirectory}/lib/${dal.java.lib}
- ${project.build.testOutputDirectory}/lib/libJavaAPI.so
-
-
-
-
-
-
-
maven-assembly-plugin
3.0.0
@@ -472,8 +395,6 @@
-
-
diff --git a/mllib-dal/src/assembly/assembly.xml b/mllib-dal/src/assembly/assembly.xml
index 1d6abe146..e0d177b95 100644
--- a/mllib-dal/src/assembly/assembly.xml
+++ b/mllib-dal/src/assembly/assembly.xml
@@ -13,12 +13,6 @@
true
runtime
-
-
- /
- true
- system
-
@@ -28,51 +22,8 @@
README*
LICENSE*
NOTICE*
-
-
-
- ${project.build.directory}
- lib
-
- *.so
+ RELEASE*
-
-
-
-
- lib
- libtbb.so.12
-
-
-
- lib
- libtbbmalloc.so.2
-
-
-
-
- lib
- libJavaAPI.so
-
-
-
-
- lib
-
-
-
- lib
- libmpi.so.12
-
-
-
- lib
-
-
-
- lib
-
-
diff --git a/mllib-dal/src/main/java/org/apache/spark/ml/util/LibLoader.java b/mllib-dal/src/main/java/org/apache/spark/ml/util/LibLoader.java
index 7741e29ce..52a898efd 100644
--- a/mllib-dal/src/main/java/org/apache/spark/ml/util/LibLoader.java
+++ b/mllib-dal/src/main/java/org/apache/spark/ml/util/LibLoader.java
@@ -28,7 +28,7 @@ public final class LibLoader {
// Make sure loading libraries from different temp directory for each process
private static final String subDir = "MLlibDAL_" + UUID.randomUUID();
- private static final Logger log = LoggerFactory.getLogger("LibLoader");
+ private static final Logger log = LoggerFactory.getLogger(LibLoader.class);
private static boolean isLoaded = false;
@@ -65,11 +65,15 @@ public static synchronized void loadLibraries() throws IOException {
private static synchronized void loadLibCCL() throws IOException {
// Load libfabric from system first, if failed load from jar
if (!loadFromSystem("libfabric.so.1")) {
+ // Fix dlopen(libfabric.so) error:
+ // $ cp libfabric.so.1 libfabric.so
+ // $ patchelf --set-soname libfabric.so libfabric.so
+ loadFromJar(subDir, "libfabric.so");
loadFromJar(subDir, "libfabric.so.1");
loadFromJar(subDir, "libsockets-fi.so");
}
loadFromJar(subDir, "libmpi.so.12");
- loadFromJar(subDir, "libccl.so");
+ loadFromJar(subDir, "libccl.so.1");
}
/**
@@ -140,8 +144,7 @@ private static void loadFromJar(String path, String name) throws IOException {
}
try (OutputStream streamOut = new FileOutputStream(fileOut)) {
- log.debug("Writing resource to temp file.");
-
+ // Writing resource to temp file
byte[] buffer = new byte[32768];
while (true) {
int read = streamIn.read(buffer);
@@ -158,8 +161,8 @@ private static void loadFromJar(String path, String name) throws IOException {
streamIn.close();
}
- System.load(fileOut.toString());
- log.debug("DONE: Loading library as resource.");
+ System.load(fileOut.toString());
+ log.debug("DONE: Loading library " + fileOut.toString() +" as resource.");
}
/**
diff --git a/mllib-dal/src/main/native/CorrelationDALImpl.cpp b/mllib-dal/src/main/native/CorrelationDALImpl.cpp
index 347f5afda..f2efb70ea 100644
--- a/mllib-dal/src/main/native/CorrelationDALImpl.cpp
+++ b/mllib-dal/src/main/native/CorrelationDALImpl.cpp
@@ -150,8 +150,6 @@ Java_org_apache_spark_ml_stat_CorrelationDALImpl_cCorrelationTrainDAL(
ccl::communicator &comm = getComm();
size_t rankId = comm.rank();
- std::cout << " rankId : " << rankId << " ! "
- << std::endl;
const size_t nBlocks = executor_num;
diff --git a/mllib-dal/src/main/native/Makefile b/mllib-dal/src/main/native/Makefile
index 4f18a363b..cdc79a071 100644
--- a/mllib-dal/src/main/native/Makefile
+++ b/mllib-dal/src/main/native/Makefile
@@ -33,20 +33,18 @@ else
exit 1
endif
-# The following paths setting works for self-built libs from source code
-# https://github.com/oneapi-src/oneCCL. If oneCCL package in oneAPI Toolkit is used,
-# Should change paths to $(CCL_ROOT)/{include,lib}/cpu_icc instead
INCS := -I $(JAVA_HOME)/include \
-I $(JAVA_HOME)/include/linux \
- -I $(CCL_ROOT)/include \
+ -I $(CCL_ROOT)/include/cpu_icc \
-I $(DAALROOT)/include \
-I ./javah \
-I ./
# Use static link if possible, TBB is only available as dynamic libs
-LIBS_COMMON := -L$(CCL_ROOT)/lib -lccl \
+LIBS_COMMON := -L$(CCL_ROOT)/lib/cpu_icc -lccl \
+ -L$(CMPLR_ROOT)/linux/compiler/lib/intel64_lin -l:libirc.a \
-L$(DAALROOT)/lib/intel64 -l:libonedal_core.a -l:libonedal_thread.a \
- -L$(TBBROOT)/lib/lib/intel64/gcc4.8 -ltbb -ltbbmalloc
+ -L$(TBBROOT)/lib/intel64/gcc4.8 -ltbb -ltbbmalloc
ifeq ($(PLATFORM_PROFILE),CPU_ONLY_PROFILE)
LIBS := $(LIBS_COMMON)
@@ -80,7 +78,7 @@ ifeq ($(PLATFORM_PROFILE),CPU_GPU_PROFILE)
endif
# Output Binary
-OUTPUT = ../../../target/libMLlibDAL.so
+OUTPUT = ../../../src/main/resources/lib/libMLlibDAL.so
all: $(OUTPUT)
diff --git a/mllib-dal/src/main/native/build.sh b/mllib-dal/src/main/native/build.sh
index d271c5d97..cfa1ef844 100755
--- a/mllib-dal/src/main/native/build.sh
+++ b/mllib-dal/src/main/native/build.sh
@@ -14,5 +14,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+if [[ $OAP_MLLIB_TESTING == "true" ]]; then
+ exit 0
+fi
+
make clean
make -j
diff --git a/mllib-dal/src/main/resources/lib/.gitignore b/mllib-dal/src/main/resources/lib/.gitignore
new file mode 100644
index 000000000..86d0cb272
--- /dev/null
+++ b/mllib-dal/src/main/resources/lib/.gitignore
@@ -0,0 +1,4 @@
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore
\ No newline at end of file
diff --git a/mllib-dal/src/main/resources/log4j.properties b/mllib-dal/src/main/resources/log4j.properties
new file mode 100644
index 000000000..a33c21109
--- /dev/null
+++ b/mllib-dal/src/main/resources/log4j.properties
@@ -0,0 +1 @@
+log4j.logger.org.apache.spark.ml.util.LibLoader=DEBUG
diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneCCL.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneCCL.scala
index 7fccae192..643ed8f54 100644
--- a/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneCCL.scala
+++ b/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneCCL.scala
@@ -27,8 +27,10 @@ object OneCCL extends Logging {
// Run on Executor
def setExecutorEnv(): Unit = {
setEnv("CCL_ATL_TRANSPORT", "ofi")
+ // Set CCL_ROOT to workaround CCL_ROOT env read bug, should remove when upstream fix this
+ setEnv("CCL_ROOT", "/opt/intel/oneapi/ccl/latest")
// Uncomment this if you whant to debug oneCCL
- // setEnv("CCL_LOG_LEVEL", "2")
+ // setEnv("CCL_LOG_LEVEL", "debug")
}
def init(executor_num: Int, rank: Int, ip_port: String): Unit = {
diff --git a/mllib-dal/src/test/resources/log4j.properties b/mllib-dal/src/test/resources/log4j.properties
new file mode 100644
index 000000000..ff29121c2
--- /dev/null
+++ b/mllib-dal/src/test/resources/log4j.properties
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the console
+log4j.rootCategory=WARN, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Set the default spark-shell log level to WARN. When running the spark-shell, the
+# log level for this class is used to overwrite the root logger's log level, so that
+# the user can have different defaults for the shell and regular Spark apps.
+log4j.logger.org.apache.spark.repl.Main=WARN
+
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.sparkproject.jetty=WARN
+log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
+log4j.logger.org.apache.parquet=ERROR
+log4j.logger.parquet=ERROR
+
+# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
+log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
+log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
+
+log4j.logger.org.apache.spark.ml.util.LibLoader=DEBUG
diff --git a/mllib-dal/test.sh b/mllib-dal/test.sh
index b4c1cde36..13235cb26 100755
--- a/mllib-dal/test.sh
+++ b/mllib-dal/test.sh
@@ -1,31 +1,39 @@
#!/usr/bin/env bash
-# Check envs for building
-if [[ -z $JAVA_HOME ]]; then
- echo JAVA_HOME not defined!
- exit 1
+if [[ -n $DAALROOT ]]; then
+ echo
+ echo ====================================================================================
+ echo WARNING: DAALROOT detected. It is recommended to test without oneAPI environment!
+ echo ====================================================================================
+ echo
fi
-if [[ -z $(which mvn) ]]; then
- echo Maven not found!
- exit 1
+# Unset FI_PROVIDER_PATH if present otherwise may hang
+if [[ -n $FI_PROVIDER_PATH ]]; then
+ echo ====================================================================================
+ echo WARNING: FI_PROVIDER_PATH detected. Will unset FI_PROVIDER_PATH before proceeding!
+ unset FI_PROVIDER_PATH
+ echo ====================================================================================
fi
-if [[ -z $DAALROOT ]]; then
- echo DAALROOT not defined!
- exit 1
+if [[ ! -f target/oap-mllib-1.2.0.jar ]]; then
+ echo Please run ./build.sh first to do a complete build before testing!
+ exit 1
fi
-if [[ -z $TBBROOT ]]; then
- echo TBBROOT not defined!
+# Check envs for building
+if [[ -z $JAVA_HOME ]]; then
+ echo JAVA_HOME not defined!
exit 1
fi
-if [[ -z $CCL_ROOT ]]; then
- echo CCL_ROOT not defined!
+if [[ -z $(which mvn) ]]; then
+ echo Maven not found!
exit 1
fi
+export OAP_MLLIB_TESTING=true
+
versionArray=(
spark-3.0.0 \
spark-3.0.1 \
@@ -84,11 +92,7 @@ export PLATFORM_PROFILE=CPU_ONLY_PROFILE
echo === Testing Environments ===
echo JAVA_HOME=$JAVA_HOME
-echo DAALROOT=$DAALROOT
-echo TBBROOT=$TBBROOT
-echo CCL_ROOT=$CCL_ROOT
echo Maven Version: $(mvn -v | head -n 1 | cut -f3 -d" ")
-echo Clang Version: $(clang -dumpversion)
echo Spark Version: $SPARK_VER
echo Platform Profile: $PLATFORM_PROFILE
echo ============================
@@ -109,10 +113,10 @@ if [[ -z $SUITE ]]; then
echo
echo Testing ALL suites...
echo
- mvn $MVN_NO_TRANSFER_PROGRESS -P$SPARK_VER -Dtest=none clean test
+ mvn $MVN_NO_TRANSFER_PROGRESS -P$SPARK_VER -Dtest=none test
else
echo
echo Testing org.apache.spark.ml.$SUITE ...
echo
- mvn $MVN_NO_TRANSFER_PROGRESS -P$SPARK_VER -Dtest=none -DwildcardSuites=org.apache.spark.ml.$SUITE clean test
+ mvn $MVN_NO_TRANSFER_PROGRESS -P$SPARK_VER -Dtest=none -DwildcardSuites=org.apache.spark.ml.$SUITE test
fi