diff --git a/.github/workflows/oap-mllib-ci.yml b/.github/workflows/oap-mllib-ci.yml index 4f567086f..6528a4b98 100644 --- a/.github/workflows/oap-mllib-ci.yml +++ b/.github/workflows/oap-mllib-ci.yml @@ -27,5 +27,5 @@ jobs: run: | source ${{github.workspace}}/dev/setup-all.sh - name: Build and Test - run: | + run: | ${{github.workspace}}/dev/ci-test.sh diff --git a/dev/ci-test.sh b/dev/ci-test.sh index 7b5a1939c..e8e51b1ca 100755 --- a/dev/ci-test.sh +++ b/dev/ci-test.sh @@ -54,8 +54,9 @@ for SparkVer in ${SupportedSparkVersions[*]}; do mvn --no-transfer-progress -P$SparkVer -Dtest=none -DwildcardSuites=org.apache.spark.ml.clustering.IntelKMeansSuite test mvn --no-transfer-progress -P$SparkVer -Dtest=none -DwildcardSuites=org.apache.spark.ml.feature.IntelPCASuite test - # mvn -P$SparkVer -Dtest=none -DwildcardSuites=org.apache.spark.ml.recommendation.IntelALSSuite test + # mvn --no-transfer-progress -P$SparkVer -Dtest=none -DwildcardSuites=org.apache.spark.ml.recommendation.IntelALSSuite test done # Yarn cluster test without profile -$GITHUB_WORKSPACE/dev/test-cluster/ci-test-cluster.sh \ No newline at end of file +$GITHUB_WORKSPACE/dev/ci-build.sh +$GITHUB_WORKSPACE/dev/test-cluster/ci-test-cluster.sh diff --git a/dev/codestyle/lint-scala.sh b/dev/codestyle/lint-scala.sh index e18a731b4..2d947dda8 100755 --- a/dev/codestyle/lint-scala.sh +++ b/dev/codestyle/lint-scala.sh @@ -23,7 +23,7 @@ if [ -z $MVN ]; then exit 1 fi -ERRORS=$($MVN scalastyle:check | grep error) +ERRORS=$($MVN scalastyle:check | grep "error file") if test ! -z "$ERRORS"; then echo -e "Scalastyle checks failed at following occurrences:\n$ERRORS" diff --git a/mllib-dal/pom.xml b/mllib-dal/pom.xml index 055308cd6..d37ff879c 100644 --- a/mllib-dal/pom.xml +++ b/mllib-dal/pom.xml @@ -1,5 +1,5 @@ - 4.0.0 @@ -273,7 +273,7 @@ ${basedir}/src/test/java - ${basedir}/../dev/codestyle/checkstyle.xml + ${basedir}/../dev/codestyle/checkstyle.xml ${basedir}/target/checkstyle-output.xml @@ -303,7 +303,7 @@ true false false - ${basedir}/src/main/scala + ${basedir}/src/main/scala/org/apache/spark ${basedir}/src/test/scala scalastyle-config.xml ${basedir}/target/scalastyle-output.xml @@ -344,7 +344,7 @@ - + maven-antrun-plugin @@ -354,8 +354,9 @@ process-classes - Building native code - + Building native code + @@ -375,11 +376,11 @@ ${env.CCL_ROOT}/lib - ${ccl.lib} + ${ccl.lib} ${ccl.mpi.lib} ${ccl.fabric.lib} - + ${env.CCL_ROOT}/lib/prov @@ -418,18 +419,21 @@ + rename to workaround. See https://github.com/oneapi-src/oneDAL/issues/1254 --> ${project.build.testOutputDirectory}/lib/${tbb.lib} - ${project.build.testOutputDirectory}/lib/libtbb.so.2 + ${project.build.testOutputDirectory}/lib/libtbb.so.2 + ${project.build.testOutputDirectory}/lib/${tbb.malloc.lib} - ${project.build.testOutputDirectory}/lib/libtbbmalloc.so.2 + ${project.build.testOutputDirectory}/lib/libtbbmalloc.so.2 + ${project.build.testOutputDirectory}/lib/${ccl.mpi.lib} - ${project.build.testOutputDirectory}/lib/libmpi.so.12 + ${project.build.testOutputDirectory}/lib/libmpi.so.12 + diff --git a/mllib-dal/scalastyle-config.xml b/mllib-dal/scalastyle-config.xml index c1dc57be5..7ddb59629 100644 --- a/mllib-dal/scalastyle-config.xml +++ b/mllib-dal/scalastyle-config.xml @@ -49,14 +49,13 @@ This file is divided into 3 sections: - + + ^println$ { - private final Iterator base; - private final int batchSize; - - public BatchIterator(Iterator base, int batchSize) { - this.base = base; - this.batchSize = batchSize; - } - - @Override - public boolean hasNext() { - return base.hasNext(); - } - - @Override - public DataBatch next() { - try { - int numRows = 0; - int numCols = -1; - List batch = new ArrayList<>(batchSize); - while (base.hasNext() && batch.size() < batchSize) { - double[] curValue = base.next(); - if (numCols == -1) { - numCols = curValue.length; - } else if (numCols != curValue.length) { - throw new RuntimeException("Feature size is not the same"); - } - batch.add(curValue); - - numRows++; - } - - long[] rowOffset = new long[numRows]; - double[] values = new double[numRows * numCols]; - - int offset = 0; - for (int i = 0; i < batch.size(); i++) { - double[] curValue = batch.get(i); - rowOffset[i] = i; - System.arraycopy(curValue, 0, values, offset, - curValue.length); - offset += curValue.length; - } - - return new DataBatch(rowOffset, values, numCols); - } catch (RuntimeException runtimeError) { - - return null; - } - } - - @Override - public void remove() { - throw new UnsupportedOperationException("DataBatch.BatchIterator.remove"); - } - } -} diff --git a/mllib-dal/src/main/java/org/apache/spark/ml/util/LibLoader.java b/mllib-dal/src/main/java/org/apache/spark/ml/util/LibLoader.java index d8ea09a23..eada9b20c 100644 --- a/mllib-dal/src/main/java/org/apache/spark/ml/util/LibLoader.java +++ b/mllib-dal/src/main/java/org/apache/spark/ml/util/LibLoader.java @@ -1,4 +1,4 @@ -/******************************************************************************* +/* * Copyright 2020 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,142 +12,141 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - *******************************************************************************/ - -// Based on oneDAL Java com.intel.daal.utils.libUtils code + */ package org.apache.spark.ml.util; -import java.io.*; -import java.util.UUID; -import java.util.logging.Level; +import com.intel.daal.utils.LibUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.intel.daal.utils.LibUtils; +import java.io.*; +import java.util.UUID; public final class LibLoader { - private static final String LIBRARY_PATH_IN_JAR = "/lib"; - // Make sure loading libraries from different temp directory for each process - private final static String subDir = "MLlibDAL_" + UUID.randomUUID(); - - private static final Logger log = LoggerFactory.getLogger("LibLoader"); - - /** - * Get temp dir for exacting lib files - * @return path of temp dir - */ - public static String getTempSubDir() { - String tempSubDirectory = System.getProperty("java.io.tmpdir") + "/" + subDir + LIBRARY_PATH_IN_JAR; - return tempSubDirectory; - } - - - /** - * Load oneCCL and MLlibDAL libs - */ - public static synchronized void loadLibraries() throws IOException { - loadLibCCL(); - loadLibMLlibDAL(); + private static final String LIBRARY_PATH_IN_JAR = "/lib"; + // Make sure loading libraries from different temp directory for each process + private static final String subDir = "MLlibDAL_" + UUID.randomUUID(); + + private static final Logger log = LoggerFactory.getLogger("LibLoader"); + + /** + * Get temp dir for exacting lib files + * + * @return path of temp dir + */ + public static String getTempSubDir() { + String tempSubDirectory = System.getProperty("java.io.tmpdir") + + "/" + subDir + LIBRARY_PATH_IN_JAR; + return tempSubDirectory; + } + + /** + * Load oneCCL and MLlibDAL libs + */ + public static synchronized void loadLibraries() throws IOException { + loadLibCCL(); + loadLibMLlibDAL(); + } + + /** + * Load oneCCL libs in dependency order + */ + private static synchronized void loadLibCCL() throws IOException { + loadFromJar(subDir, "libfabric.so.1"); + loadFromJar(subDir, "libmpi.so.12"); + loadFromJar(subDir, "libccl.so"); + loadFromJar(subDir, "libsockets-fi.so"); + } + + /** + * Load MLlibDAL lib, it depends TBB libs that are loaded by oneDAL, so this + * function should be called after oneDAL loadLibrary + */ + private static synchronized void loadLibMLlibDAL() throws IOException { + // oneDAL Java API doesn't load correct libtbb version for oneAPI Beta 10 + // Rename in pom.xml and assembly.xml to workaround. + // See https://github.com/oneapi-src/oneDAL/issues/1254 --> + LibUtils.loadLibrary(); + + loadFromJar(subDir, "libMLlibDAL.so"); + } + + /** + * Load lib as resource + * + * @param path sub folder (in temporary folder) name + * @param name library name + */ + private static void loadFromJar(String path, String name) throws IOException { + log.debug("Loading " + name + " ..."); + + File fileOut = createTempFile(path, name); + // File exists already + if (fileOut == null) { + log.debug("DONE: Loading library as resource."); + return; } - /** - * Load oneCCL libs in dependency order - */ - private static synchronized void loadLibCCL() throws IOException { - loadFromJar(subDir, "libfabric.so.1"); - loadFromJar(subDir, "libmpi.so.12"); - loadFromJar(subDir, "libccl.so"); - loadFromJar(subDir, "libsockets-fi.so"); + InputStream streamIn = LibLoader.class.getResourceAsStream(LIBRARY_PATH_IN_JAR + "/" + name); + if (streamIn == null) { + throw new IOException("Error: No resource found."); } - /** - * Load MLlibDAL lib, it depends TBB libs that are loaded by oneDAL, - * so this function should be called after oneDAL loadLibrary - */ - private static synchronized void loadLibMLlibDAL() throws IOException { - // oneDAL Java API doesn't load correct libtbb version for oneAPI Beta 10 - // Rename in pom.xml and assembly.xml to workaround. - // See https://github.com/oneapi-src/oneDAL/issues/1254 --> - LibUtils.loadLibrary(); - - loadFromJar(subDir, "libMLlibDAL.so"); - } + try (OutputStream streamOut = new FileOutputStream(fileOut)) { + log.debug("Writing resource to temp file."); - /** - * Load lib as resource - * - * @param path sub folder (in temporary folder) name - * @param name library name - */ - private static void loadFromJar(String path, String name) throws IOException { - log.debug("Loading " + name + " ..."); - - File fileOut = createTempFile(path, name); - // File exists already - if (fileOut == null) { - log.debug("DONE: Loading library as resource."); - return; + byte[] buffer = new byte[32768]; + while (true) { + int read = streamIn.read(buffer); + if (read < 0) { + break; } - - InputStream streamIn = LibLoader.class.getResourceAsStream(LIBRARY_PATH_IN_JAR + "/" + name); - if (streamIn == null) { - throw new IOException("Error: No resource found."); - } - - try (OutputStream streamOut = new FileOutputStream(fileOut)) { - log.debug("Writing resource to temp file."); - - byte[] buffer = new byte[32768]; - while (true) { - int read = streamIn.read(buffer); - if (read < 0) { - break; - } - streamOut.write(buffer, 0, read); - } - - streamOut.flush(); - } catch (IOException e) { - throw new IOException("Error: I/O error occurs from/to temp file."); - } finally { - streamIn.close(); - } - - System.load(fileOut.toString()); - log.debug("DONE: Loading library as resource."); + streamOut.write(buffer, 0, read); + } + + streamOut.flush(); + } catch (IOException e) { + throw new IOException("Error: I/O error occurs from/to temp file."); + } finally { + streamIn.close(); } - /** - * Create temporary file - * - * @param name library name - * @param tempSubDirName sub folder (in temporary folder) name - * @return temporary file handler. null if file exist already. - */ - private static File createTempFile(String tempSubDirName, String name) throws IOException { - File tempSubDirectory = new File(System.getProperty("java.io.tmpdir") + "/" + tempSubDirName + LIBRARY_PATH_IN_JAR); - - if (!tempSubDirectory.exists()) { - tempSubDirectory.mkdirs(); - // Check existance again, don't use return bool of mkdirs - if (!tempSubDirectory.exists()) { - throw new IOException("Error: Can`t create folder for temp file."); - } - } - - String tempFileName = tempSubDirectory + "/" + name; - File tempFile = new File(tempFileName); + System.load(fileOut.toString()); + log.debug("DONE: Loading library as resource."); + } + + /** + * Create temporary file + * + * @param name library name + * @param tempSubDirName sub folder (in temporary folder) name + * @return temporary file handler. null if file exist already. + */ + private static File createTempFile(String tempSubDirName, String name) throws IOException { + File tempSubDirectory = new File( + System.getProperty("java.io.tmpdir") + "/" + tempSubDirName + LIBRARY_PATH_IN_JAR); + + if (!tempSubDirectory.exists()) { + tempSubDirectory.mkdirs(); + // Check existance again, don't use return bool of mkdirs + if (!tempSubDirectory.exists()) { + throw new IOException("Error: Can`t create folder for temp file."); + } + } - if (tempFile == null) { - throw new IOException("Error: Can`t create temp file."); - } + String tempFileName = tempSubDirectory + "/" + name; + File tempFile = new File(tempFileName); - if (tempFile.exists()) { - return null; - } + if (tempFile == null) { + throw new IOException("Error: Can`t create temp file."); + } - return tempFile; + if (tempFile.exists()) { + return null; } + return tempFile; + } + } diff --git a/mllib-dal/src/main/java/org/apache/spark/ml/util/Service.java b/mllib-dal/src/main/java/org/apache/spark/ml/util/Service.java index 4a091562a..91456bc96 100644 --- a/mllib-dal/src/main/java/org/apache/spark/ml/util/Service.java +++ b/mllib-dal/src/main/java/org/apache/spark/ml/util/Service.java @@ -1,29 +1,29 @@ -/* file: Service.java */ -/******************************************************************************* -* Copyright 2014-2020 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*******************************************************************************/ - /* - // Based on oneDAL Java example code - // Content: - // Auxiliary functions used in Java examples - //////////////////////////////////////////////////////////////////////////////// + * Copyright 2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ +// Based on oneDAL Java example code + package org.apache.spark.ml.util; +import com.intel.daal.data_management.data.CSRNumericTable; +import com.intel.daal.data_management.data.HomogenNumericTable; +import com.intel.daal.data_management.data.NumericTable; +import com.intel.daal.services.DaalContext; +import com.intel.daal.services.ErrorHandling; + import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; @@ -32,562 +32,562 @@ import java.text.DecimalFormat; import java.util.ArrayList; -import com.intel.daal.data_management.data.CSRNumericTable; -import com.intel.daal.data_management.data.HomogenNumericTable; -import com.intel.daal.data_management.data.NumericTable; -import com.intel.daal.data_management.data.KeyValueDataCollection; -import com.intel.daal.data_management.data_source.*; -import com.intel.daal.services.DaalContext; -import com.intel.daal.services.ErrorHandling; - public class Service { - public static void readRow(String line, int offset, int nCols, double[] data) throws IOException { - if (line == null) { - throw new IOException("Unable to read input dataset"); - } - - String[] elements = line.split(","); - for (int j = 0; j < nCols; j++) { - data[offset + j] = Double.parseDouble(elements[j]); - } + public static void readRow(String line, int offset, int nCols, double[] data) throws IOException { + if (line == null) { + throw new IOException("Unable to read input dataset"); } - public static void readRow(String line, int offset, int nCols, long[] data) throws IOException { - if (line == null) { - throw new IOException("Unable to read input dataset"); - } - - String[] elements = line.split(","); - for (int j = 0; j < nCols; j++) { - data[offset + j] = Long.parseLong(elements[j]); - } + String[] elements = line.split(","); + for (int j = 0; j < nCols; j++) { + data[offset + j] = Double.parseDouble(elements[j]); } + } - public static void readRow(String line, int offset, int nCols, float[] data) throws IOException { - if (line == null) { - throw new IOException("Unable to read input dataset"); - } - - String[] elements = line.split(","); - for (int j = 0; j < nCols; j++) { - data[offset + j] = Float.parseFloat(elements[j]); - } + public static void readRow(String line, int offset, int nCols, long[] data) throws IOException { + if (line == null) { + throw new IOException("Unable to read input dataset"); } - public static void readSparseData(String dataset, int nVectors, int nNonZeroValues, long[] rowOffsets, - long[] colIndices, double[] data) { - try { - BufferedReader bufferedReader = new BufferedReader(new FileReader(dataset)); - readRow(bufferedReader.readLine(), 0, nVectors + 1, rowOffsets); - readRow(bufferedReader.readLine(), 0, nNonZeroValues, colIndices); - readRow(bufferedReader.readLine(), 0, nNonZeroValues, data); - bufferedReader.close(); - } catch (IOException e) { - ErrorHandling.printThrowable(e); - } catch (NumberFormatException e) { - ErrorHandling.printThrowable(e); - } + String[] elements = line.split(","); + for (int j = 0; j < nCols; j++) { + data[offset + j] = Long.parseLong(elements[j]); } + } - private static int getRowLength(String line) { - String[] elements = line.split(","); - return elements.length; + public static void readRow(String line, int offset, int nCols, float[] data) throws IOException { + if (line == null) { + throw new IOException("Unable to read input dataset"); } - public static CSRNumericTable createSparseTable(DaalContext context, String dataset) throws IOException { - BufferedReader bufferedReader = new BufferedReader(new FileReader(dataset)); + String[] elements = line.split(","); + for (int j = 0; j < nCols; j++) { + data[offset + j] = Float.parseFloat(elements[j]); + } + } + + public static void readSparseData(String dataset, int nVectors, int nNonZeroValues, + long[] rowOffsets, long[] colIndices, double[] data) { + try { + BufferedReader bufferedReader = new BufferedReader(new FileReader(dataset)); + readRow(bufferedReader.readLine(), 0, nVectors + 1, rowOffsets); + readRow(bufferedReader.readLine(), 0, nNonZeroValues, colIndices); + readRow(bufferedReader.readLine(), 0, nNonZeroValues, data); + bufferedReader.close(); + } catch (IOException e) { + ErrorHandling.printThrowable(e); + } catch (NumberFormatException e) { + ErrorHandling.printThrowable(e); + } + } - String rowIndexLine = bufferedReader.readLine(); - int nVectors = getRowLength(rowIndexLine); - long[] rowOffsets = new long[nVectors]; + private static int getRowLength(String line) { + String[] elements = line.split(","); + return elements.length; + } - readRow(rowIndexLine, 0, nVectors, rowOffsets); - nVectors = nVectors - 1; + public static CSRNumericTable createSparseTable(DaalContext context, + String dataset) throws IOException { + BufferedReader bufferedReader = new BufferedReader(new FileReader(dataset)); - String columnsLine = bufferedReader.readLine(); - int nCols = getRowLength(columnsLine); + String rowIndexLine = bufferedReader.readLine(); + int nVectors = getRowLength(rowIndexLine); + long[] rowOffsets = new long[nVectors]; - long[] colIndices = new long[nCols]; - readRow(columnsLine, 0, nCols, colIndices); + readRow(rowIndexLine, 0, nVectors, rowOffsets); + nVectors = nVectors - 1; - String valuesLine = bufferedReader.readLine(); - int nNonZeros = getRowLength(valuesLine); + String columnsLine = bufferedReader.readLine(); + int nCols = getRowLength(columnsLine); - float[] data = new float[nNonZeros]; - readRow(valuesLine, 0, nNonZeros, data); + long[] colIndices = new long[nCols]; + readRow(columnsLine, 0, nCols, colIndices); - bufferedReader.close(); + String valuesLine = bufferedReader.readLine(); + int nNonZeros = getRowLength(valuesLine); - long maxCol = 0; - for (int i = 0; i < nCols; i++) { - if (colIndices[i] > maxCol) { - maxCol = colIndices[i]; - } - } - int nFeatures = (int) maxCol; + float[] data = new float[nNonZeros]; + readRow(valuesLine, 0, nNonZeros, data); - if (nCols != nNonZeros || nNonZeros != (rowOffsets[nVectors] - 1) || nFeatures == 0 || nVectors == 0) { - throw new IOException("Unable to read input dataset"); - } + bufferedReader.close(); - return new CSRNumericTable(context, data, colIndices, rowOffsets, nFeatures, nVectors); + long maxCol = 0; + for (int i = 0; i < nCols; i++) { + if (colIndices[i] > maxCol) { + maxCol = colIndices[i]; + } } + int nFeatures = (int) maxCol; - public static void printClassificationResult(float[] groundTruth, float[] classificationResults, - String classificatorName) { - System.out.println(classificatorName + " classification:"); - System.out.println("Ground truth | Classification results"); - - for (int i = 0; i < Math.min(groundTruth.length, 20); i++) { - System.out.format("%+f\t\t%+f\n", groundTruth[i], classificationResults[i]); - } + if (nCols != nNonZeros || nNonZeros != (rowOffsets[nVectors] - 1) + || nFeatures == 0 || nVectors == 0) { + throw new IOException("Unable to read input dataset"); } - public static void printClassificationResult(NumericTable groundTruth, NumericTable classificationResults, - String header1, String header2, String message, int nMaxRows) { - int nCols = (int) groundTruth.getNumberOfColumns(); - int nRows = Math.min((int) groundTruth.getNumberOfRows(), nMaxRows); + return new CSRNumericTable(context, data, colIndices, rowOffsets, nFeatures, nVectors); + } - FloatBuffer dataGroundTruth = FloatBuffer.allocate(nCols * nRows); - FloatBuffer dataClassificationResults = FloatBuffer.allocate(nCols * nRows); - try { - dataGroundTruth = groundTruth.getBlockOfRows(0, nRows, dataGroundTruth); - dataClassificationResults = classificationResults.getBlockOfRows(0, nRows, dataClassificationResults); - } catch (IllegalAccessException e) { - ErrorHandling.printThrowable(e); - return; - } - System.out.println(message); - System.out.println(header1 + "\t" + header2); - for (int i = 0; i < nRows; i++) { - for (int j = 0; j < 1; j++) { - System.out.format("%+.0f\t\t%+.0f\n", dataGroundTruth.get(i * nCols + j), - dataClassificationResults.get(i * nCols + j)); - } - } + public static void printClassificationResult(float[] groundTruth, float[] classificationResults, + String classificatorName) { + System.out.println(classificatorName + " classification:"); + System.out.println("Ground truth | Classification results"); + + for (int i = 0; i < Math.min(groundTruth.length, 20); i++) { + System.out.format("%+f\t\t%+f\n", groundTruth[i], classificationResults[i]); + } + } + + public static void printClassificationResult(NumericTable groundTruth, + NumericTable classificationResults, + String header1, String header2, + String message, int nMaxRows) { + int nCols = (int) groundTruth.getNumberOfColumns(); + int nRows = Math.min((int) groundTruth.getNumberOfRows(), nMaxRows); + + FloatBuffer dataGroundTruth = FloatBuffer.allocate(nCols * nRows); + FloatBuffer dataClassificationResults = FloatBuffer.allocate(nCols * nRows); + try { + dataGroundTruth = groundTruth.getBlockOfRows(0, nRows, dataGroundTruth); + dataClassificationResults = classificationResults.getBlockOfRows(0, nRows, + dataClassificationResults); + } catch (IllegalAccessException e) { + ErrorHandling.printThrowable(e); + return; + } + System.out.println(message); + System.out.println(header1 + "\t" + header2); + for (int i = 0; i < nRows; i++) { + for (int j = 0; j < 1; j++) { + System.out.format("%+.0f\t\t%+.0f\n", dataGroundTruth.get(i * nCols + j), + dataClassificationResults.get(i * nCols + j)); + } } + } - public static void printClassificationResult(long[] groundTruth, long[] classificationResults, - String classificatorName) { - System.out.println(classificatorName + " classification:"); - System.out.println("Ground truth | Classification results"); + public static void printClassificationResult(long[] groundTruth, long[] classificationResults, + String classificatorName) { + System.out.println(classificatorName + " classification:"); + System.out.println("Ground truth | Classification results"); - for (int i = 0; i < Math.min(groundTruth.length, 20); i++) { - System.out.format("%+d\t\t%+d\n", groundTruth[i], classificationResults[i]); - } + for (int i = 0; i < Math.min(groundTruth.length, 20); i++) { + System.out.format("%+d\t\t%+d\n", groundTruth[i], classificationResults[i]); } + } - public static void printClassificationResult(long[] groundTruth, int[] classificationResults, - String classificatorName) { - System.out.println(classificatorName + " classification:"); - System.out.println("Ground truth | Classification results"); + public static void printClassificationResult(long[] groundTruth, int[] classificationResults, + String classificatorName) { + System.out.println(classificatorName + " classification:"); + System.out.println("Ground truth | Classification results"); - for (int i = 0; i < Math.min(groundTruth.length, 20); i++) { - System.out.format("%+d\t\t%+d\n", groundTruth[i], classificationResults[i]); - } + for (int i = 0; i < Math.min(groundTruth.length, 20); i++) { + System.out.format("%+d\t\t%+d\n", groundTruth[i], classificationResults[i]); } - - public static void printMatrix(double[] matrix, int nCols, int nRows, String header) { - System.out.println(header); - DecimalFormat numberFormat = new DecimalFormat("##0.00"); - for (int i = 0; i < nRows; i++) { - for (int j = 0; j < nCols; j++) { - System.out.print(numberFormat.format(matrix[i * nCols + j]) + "\t\t"); - } - System.out.println(); - } + } + + public static void printMatrix(double[] matrix, int nCols, int nRows, String header) { + System.out.println(header); + DecimalFormat numberFormat = new DecimalFormat("##0.00"); + for (int i = 0; i < nRows; i++) { + for (int j = 0; j < nCols; j++) { + System.out.print(numberFormat.format(matrix[i * nCols + j]) + "\t\t"); + } + System.out.println(); } - - public static void printTriangularMatrix(double[] triangularMatrix, int nDimensions, String header) { - int index = 0; - for (int i = 0; i < nDimensions; i++) { - for (int j = 0; j <= i; j++) { - System.out.print(triangularMatrix[index++] + " "); - } - System.out.println(); - } + } + + public static void printTriangularMatrix(double[] triangularMatrix, int nDimensions, + String header) { + int index = 0; + for (int i = 0; i < nDimensions; i++) { + for (int j = 0; j <= i; j++) { + System.out.print(triangularMatrix[index++] + " "); + } + System.out.println(); } - - public static void printPackedNumericTable(HomogenNumericTable nt, long nDimensions, String header) { - double[] results = nt.getDoubleArray(); - printTriangularMatrix(results, (int) nDimensions, header); + } + + public static void printPackedNumericTable(HomogenNumericTable nt, long nDimensions, + String header) { + double[] results = nt.getDoubleArray(); + printTriangularMatrix(results, (int) nDimensions, header); + } + + public static boolean isUpper(NumericTable.StorageLayout layout) { + return layout.ordinal() == NumericTable.StorageLayout.upperPackedSymmetricMatrix.ordinal() + || layout.ordinal() == NumericTable.StorageLayout.upperPackedTriangularMatrix.ordinal(); + } + + public static boolean isLower(NumericTable.StorageLayout layout) { + return layout.ordinal() == NumericTable.StorageLayout.lowerPackedSymmetricMatrix.ordinal() + || layout.ordinal() == NumericTable.StorageLayout.lowerPackedTriangularMatrix.ordinal(); + } + + public static void printNumericTable(String header, NumericTable nt, + long nPrintedRows, long nPrintedCols) { + long nNtCols = nt.getNumberOfColumns(); + long nNtRows = nt.getNumberOfRows(); + long nRows = nNtRows; + long nCols = nNtCols; + + NumericTable.StorageLayout layout = nt.getDataLayout(); + + if (nPrintedRows > 0) { + nRows = Math.min(nNtRows, nPrintedRows); } - public static boolean isUpper(NumericTable.StorageLayout layout) - { - return layout.ordinal() == NumericTable.StorageLayout.upperPackedSymmetricMatrix.ordinal() || - layout.ordinal() == NumericTable.StorageLayout.upperPackedTriangularMatrix.ordinal(); + FloatBuffer result = FloatBuffer.allocate((int) (nNtCols * nRows)); + try { + result = nt.getBlockOfRows(0, nRows, result); + } catch (IllegalAccessException e) { + ErrorHandling.printThrowable(e); + return; } - - public static boolean isLower(NumericTable.StorageLayout layout) - { - return layout.ordinal() == NumericTable.StorageLayout.lowerPackedSymmetricMatrix.ordinal() || - layout.ordinal() == NumericTable.StorageLayout.lowerPackedTriangularMatrix.ordinal(); + if (nPrintedCols > 0) { + nCols = Math.min(nNtCols, nPrintedCols); } - public static void printNumericTable(String header, NumericTable nt, long nPrintedRows, long nPrintedCols) { - long nNtCols = nt.getNumberOfColumns(); - long nNtRows = nt.getNumberOfRows(); - long nRows = nNtRows; - long nCols = nNtCols; - - NumericTable.StorageLayout layout = nt.getDataLayout(); - - if (nPrintedRows > 0) { - nRows = Math.min(nNtRows, nPrintedRows); - } + StringBuilder builder = new StringBuilder(); + builder.append(header); + builder.append("\n"); - FloatBuffer result = FloatBuffer.allocate((int) (nNtCols * nRows)); - try { - result = nt.getBlockOfRows(0, nRows, result); - } catch (IllegalAccessException e) { - ErrorHandling.printThrowable(e); - return; + if (isLower(layout)) { + for (long i = 0; i < nRows; i++) { + for (long j = 0; j <= i; j++) { + String tmp = String.format("%-6.3f ", result.get((int) (i * nNtCols + j))); + builder.append(tmp); } - if (nPrintedCols > 0) { - nCols = Math.min(nNtCols, nPrintedCols); - } - - StringBuilder builder = new StringBuilder(); - builder.append(header); builder.append("\n"); + } + } else if (isUpper(layout)) { - if( isLower(layout) ) - { - for (long i = 0; i < nRows; i++) { - for (long j = 0; j <= i; j++) { - String tmp = String.format("%-6.3f ", result.get((int) (i * nNtCols + j))); - builder.append(tmp); - } - builder.append("\n"); - } - } - else if( isUpper(layout) ) - { - - for (long i = 0; i < nRows; i++) { - for(int k=0; k < i; k++) - builder.append(" "); - for (long j = i; j < nCols; j++) { - String tmp = String.format("%-6.3f ", result.get((int) (i * nNtCols + j))); - builder.append(tmp); - } - builder.append("\n"); - } - - } - else if( isLower(layout) != true && isUpper(layout) != true) - { - for (long i = 0; i < nRows; i++) { - for (long j = 0; j < nCols; j++) { - String tmp = String.format("%-6.3f ", result.get((int) (i * nNtCols + j))); - builder.append(tmp); - } - builder.append("\n"); - } - } - System.out.println(builder.toString()); - } - - public static void printNumericTable(String header, CSRNumericTable nt, long nPrintedRows, long nPrintedCols) { - long[] rowOffsets = nt.getRowOffsetsArray(); - long[] colIndices = nt.getColIndicesArray(); - float[] values = nt.getFloatArray(); - - long nNtCols = nt.getNumberOfColumns(); - long nNtRows = nt.getNumberOfRows(); - long nRows = nNtRows; - long nCols = nNtCols; - - if (nPrintedRows > 0) { - nRows = Math.min(nNtRows, nPrintedRows); + for (long i = 0; i < nRows; i++) { + for (int k = 0; k < i; k++) { + builder.append(" "); } - - if (nPrintedCols > 0) { - nCols = Math.min(nNtCols, nPrintedCols); + for (long j = i; j < nCols; j++) { + String tmp = String.format("%-6.3f ", result.get((int) (i * nNtCols + j))); + builder.append(tmp); } - - StringBuilder builder = new StringBuilder(); - builder.append(header); builder.append("\n"); + } - float[] oneDenseRow = new float[(int) nCols]; - for (int i = 0; i < nRows; i++) { - for (int j = 0; j < nCols; j++) { - oneDenseRow[j] = 0; - } - int nElementsInRow = (int) (rowOffsets[i + 1] - rowOffsets[i]); - for (int k = 0; k < nElementsInRow; k++) { - oneDenseRow[(int) (colIndices[(int) (rowOffsets[i] - 1 + k)] - 1)] = values[(int) (rowOffsets[i] - 1 - + k)]; - } - for (int j = 0; j < nCols; j++) { - String tmp = String.format("%-6.3f ", oneDenseRow[j]); - builder.append(tmp); - } - builder.append("\n"); + } else if (isLower(layout) != true && isUpper(layout) != true) { + for (long i = 0; i < nRows; i++) { + for (long j = 0; j < nCols; j++) { + String tmp = String.format("%-6.3f ", result.get((int) (i * nNtCols + j))); + builder.append(tmp); } - System.out.println(builder.toString()); + builder.append("\n"); + } } - - public static void printNumericTable(String header, NumericTable nt, long nRows) { - printNumericTable(header, nt, nRows, nt.getNumberOfColumns()); + System.out.println(builder.toString()); + } + + public static void printNumericTable(String header, CSRNumericTable nt, + long nPrintedRows, long nPrintedCols) { + long[] rowOffsets = nt.getRowOffsetsArray(); + long[] colIndices = nt.getColIndicesArray(); + float[] values = nt.getFloatArray(); + + long nNtCols = nt.getNumberOfColumns(); + long nNtRows = nt.getNumberOfRows(); + long nRows = nNtRows; + long nCols = nNtCols; + + if (nPrintedRows > 0) { + nRows = Math.min(nNtRows, nPrintedRows); } - public static void printNumericTable(String header, NumericTable nt) { - printNumericTable(header, nt, nt.getNumberOfRows()); + if (nPrintedCols > 0) { + nCols = Math.min(nNtCols, nPrintedCols); } - public static void printNumericTable(String header, CSRNumericTable nt, long nRows) { - printNumericTable(header, nt, nRows, nt.getNumberOfColumns()); + StringBuilder builder = new StringBuilder(); + builder.append(header); + builder.append("\n"); + + float[] oneDenseRow = new float[(int) nCols]; + for (int i = 0; i < nRows; i++) { + for (int j = 0; j < nCols; j++) { + oneDenseRow[j] = 0; + } + int nElementsInRow = (int) (rowOffsets[i + 1] - rowOffsets[i]); + for (int k = 0; k < nElementsInRow; k++) { + oneDenseRow[(int) (colIndices[(int) (rowOffsets[i] - 1 + k)] - 1)] + = values[(int) (rowOffsets[i] - 1 + k)]; + } + for (int j = 0; j < nCols; j++) { + String tmp = String.format("%-6.3f ", oneDenseRow[j]); + builder.append(tmp); + } + builder.append("\n"); } - - public static void printNumericTable(String header, CSRNumericTable nt) { - printNumericTable(header, nt, nt.getNumberOfRows()); + System.out.println(builder.toString()); + } + + public static void printNumericTable(String header, NumericTable nt, long nRows) { + printNumericTable(header, nt, nRows, nt.getNumberOfColumns()); + } + + public static void printNumericTable(String header, NumericTable nt) { + printNumericTable(header, nt, nt.getNumberOfRows()); + } + + public static void printNumericTable(String header, CSRNumericTable nt, long nRows) { + printNumericTable(header, nt, nRows, nt.getNumberOfColumns()); + } + + public static void printNumericTable(String header, CSRNumericTable nt) { + printNumericTable(header, nt, nt.getNumberOfRows()); + } + + public static void printNumericTables(NumericTable dataTable1, NumericTable dataTable2, + String title1, String title2, + String message, long nPrintedRows) { + long nRows1 = dataTable1.getNumberOfRows(); + long nRows2 = dataTable2.getNumberOfRows(); + long nCols1 = dataTable1.getNumberOfColumns(); + long nCols2 = dataTable2.getNumberOfColumns(); + + long nRows = Math.min(nRows1, nRows2); + if (nPrintedRows > 0) { + nRows = Math.min(Math.min(nRows1, nRows2), nPrintedRows); } - public static void printNumericTables(NumericTable dataTable1, NumericTable dataTable2,String title1, String title2 , - String message, long nPrintedRows) - { - long nRows1 = dataTable1.getNumberOfRows(); - long nRows2 = dataTable2.getNumberOfRows(); - long nCols1 = dataTable1.getNumberOfColumns(); - long nCols2 = dataTable2.getNumberOfColumns(); - - long nRows = Math.min(nRows1, nRows2); - if (nPrintedRows > 0) - { - nRows = Math.min(Math.min(nRows1, nRows2), nPrintedRows); - } - - FloatBuffer result1 = FloatBuffer.allocate((int) (nCols1 * nRows)); - FloatBuffer result2 = FloatBuffer.allocate((int) (nCols2 * nRows)); - try { - result1 = dataTable1.getBlockOfRows(0, nRows, result1); - result2 = dataTable2.getBlockOfRows(0, nRows, result2); - } catch (IllegalAccessException e) { - ErrorHandling.printThrowable(e); - return; - } - StringBuilder builder = new StringBuilder(); - builder.append(message); - builder.append("\n"); - builder.append(title1); - - StringBuilder builderHelp = new StringBuilder(); - for (long j = 0; j < nCols1; j++) { - String tmp = String.format("%-6.3f ", result1.get((int) (0 * nCols1 + j))); - builderHelp.append(tmp); - } - int interval = builderHelp.length() - title1.length(); - - for(int i=0; i < interval; i++) - { - builder.append(" "); - } - builder.append(" "); - builder.append(title2); - builder.append("\n"); - - for (long i = 0; i < nRows; i++) { - for (long j = 0; j < nCols1; j++) { - String tmp = String.format("%-6.3f ", result1.get((int) (i * nCols1 + j))); - builder.append(tmp); - } - builder.append(" "); - for (long j = 0; j < nCols2; j++) { - String tmp = String.format("%-6.3f ", result2.get((int) (i * nCols2 + j))); - builder.append(tmp); - } - builder.append("\n"); - } - System.out.println(builder.toString()); - } - - public static void printAprioriItemsets(HomogenNumericTable largeItemsetsTable, - HomogenNumericTable largeItemsetsSupportTable) { - /* Get sizes of tables to store large item sets */ - int nItemsInLargeItemsets = (int) largeItemsetsTable.getNumberOfRows(); - int largeItemsetCount = (int) largeItemsetsSupportTable.getNumberOfRows(); - int nItemsetToPrint = 20; - - /* Get item sets and their support values */ - IntBuffer bufLargeItemsets = IntBuffer - .allocate(nItemsInLargeItemsets * (int) largeItemsetsTable.getNumberOfColumns()); - try { - bufLargeItemsets = largeItemsetsTable.getBlockOfRows(0, nItemsInLargeItemsets, bufLargeItemsets); - } catch (IllegalAccessException e) { - ErrorHandling.printThrowable(e); - return; - } - int[] largeItemsets = new int[bufLargeItemsets.capacity()]; - bufLargeItemsets.get(largeItemsets); - - IntBuffer bufLargeItemsetsSupportData = IntBuffer - .allocate(largeItemsetCount * (int) largeItemsetsSupportTable.getNumberOfColumns()); - try { - bufLargeItemsetsSupportData = largeItemsetsSupportTable.getBlockOfRows(0, largeItemsetCount, - bufLargeItemsetsSupportData); - } catch (IllegalAccessException e) { - ErrorHandling.printThrowable(e); - return; - } - int[] largeItemsetsSupportData = new int[bufLargeItemsetsSupportData.capacity()]; - bufLargeItemsetsSupportData.get(largeItemsetsSupportData); - - ArrayList> largeItemsetsVector = new ArrayList>(largeItemsetCount); - - for (int i = 0; i < largeItemsetCount; i++) { - largeItemsetsVector.add(new ArrayList()); - } + FloatBuffer result1 = FloatBuffer.allocate((int) (nCols1 * nRows)); + FloatBuffer result2 = FloatBuffer.allocate((int) (nCols2 * nRows)); + try { + result1 = dataTable1.getBlockOfRows(0, nRows, result1); + result2 = dataTable2.getBlockOfRows(0, nRows, result2); + } catch (IllegalAccessException e) { + ErrorHandling.printThrowable(e); + return; + } + StringBuilder builder = new StringBuilder(); + builder.append(message); + builder.append("\n"); + builder.append(title1); + + StringBuilder builderHelp = new StringBuilder(); + for (long j = 0; j < nCols1; j++) { + String tmp = String.format("%-6.3f ", result1.get((int) (0 * nCols1 + j))); + builderHelp.append(tmp); + } + int interval = builderHelp.length() - title1.length(); - for (int i = 0; i < nItemsInLargeItemsets; i++) { - largeItemsetsVector.get(largeItemsets[2 * i]).add(largeItemsets[2 * i + 1]); - } + for (int i = 0; i < interval; i++) { + builder.append(" "); + } + builder.append(" "); + builder.append(title2); + builder.append("\n"); + + for (long i = 0; i < nRows; i++) { + for (long j = 0; j < nCols1; j++) { + String tmp = String.format("%-6.3f ", result1.get((int) (i * nCols1 + j))); + builder.append(tmp); + } + builder.append(" "); + for (long j = 0; j < nCols2; j++) { + String tmp = String.format("%-6.3f ", result2.get((int) (i * nCols2 + j))); + builder.append(tmp); + } + builder.append("\n"); + } + System.out.println(builder.toString()); + } + + public static void printAprioriItemsets(HomogenNumericTable largeItemsetsTable, + HomogenNumericTable largeItemsetsSupportTable) { + /* Get sizes of tables to store large item sets */ + int nItemsInLargeItemsets = (int) largeItemsetsTable.getNumberOfRows(); + int largeItemsetCount = (int) largeItemsetsSupportTable.getNumberOfRows(); + int nItemsetToPrint = 20; + + /* Get item sets and their support values */ + IntBuffer bufLargeItemsets = IntBuffer + .allocate(nItemsInLargeItemsets * (int) largeItemsetsTable.getNumberOfColumns()); + try { + bufLargeItemsets = largeItemsetsTable.getBlockOfRows(0, nItemsInLargeItemsets, + bufLargeItemsets); + } catch (IllegalAccessException e) { + ErrorHandling.printThrowable(e); + return; + } + int[] largeItemsets = new int[bufLargeItemsets.capacity()]; + bufLargeItemsets.get(largeItemsets); + + IntBuffer bufLargeItemsetsSupportData = IntBuffer + .allocate(largeItemsetCount * (int) largeItemsetsSupportTable.getNumberOfColumns()); + try { + bufLargeItemsetsSupportData = largeItemsetsSupportTable.getBlockOfRows(0, largeItemsetCount, + bufLargeItemsetsSupportData); + } catch (IllegalAccessException e) { + ErrorHandling.printThrowable(e); + return; + } + int[] largeItemsetsSupportData = new int[bufLargeItemsetsSupportData.capacity()]; + bufLargeItemsetsSupportData.get(largeItemsetsSupportData); - ArrayList supportVector = new ArrayList(largeItemsetCount); - for (int i = 0; i < largeItemsetCount; i++) { - supportVector.add(0); - } + ArrayList> largeItemsetsVector + = new ArrayList>(largeItemsetCount); - for (int i = 0; i < largeItemsetCount; i++) { - int index = largeItemsetsSupportData[2 * i]; - supportVector.set(index, largeItemsetsSupportData[2 * i + 1]); - } + for (int i = 0; i < largeItemsetCount; i++) { + largeItemsetsVector.add(new ArrayList()); + } - System.out.println("\nApriori example program results"); - System.out.println("\nLast " + nItemsetToPrint + " large itemsets: "); - System.out.println("\nItemset\t\t\tSupport"); + for (int i = 0; i < nItemsInLargeItemsets; i++) { + largeItemsetsVector.get(largeItemsets[2 * i]).add(largeItemsets[2 * i + 1]); + } - int iMin = ((largeItemsetCount > nItemsetToPrint) ? largeItemsetCount - nItemsetToPrint : 0); - for (int i = iMin; i < largeItemsetCount; i++) { - System.out.print("{"); - for (int l = 0; l < largeItemsetsVector.get(i).size() - 1; l++) { - System.out.print(largeItemsetsVector.get(i).get(l) + ", "); - } - System.out.print(largeItemsetsVector.get(i).get(largeItemsetsVector.get(i).size() - 1) + "}\t\t"); + ArrayList supportVector = new ArrayList(largeItemsetCount); + for (int i = 0; i < largeItemsetCount; i++) { + supportVector.add(0); + } - System.out.println(supportVector.get(i)); - } + for (int i = 0; i < largeItemsetCount; i++) { + int index = largeItemsetsSupportData[2 * i]; + supportVector.set(index, largeItemsetsSupportData[2 * i + 1]); } - public static void printAprioriRules(HomogenNumericTable leftItemsTable, HomogenNumericTable rightItemsTable, - HomogenNumericTable confidenceTable) { - int nRulesToPrint = 20; - /* Get sizes of tables to store association rules */ - int nLeftItems = (int) leftItemsTable.getNumberOfRows(); - int nRightItems = (int) rightItemsTable.getNumberOfRows(); - int nRules = (int) confidenceTable.getNumberOfRows(); + System.out.println("\nApriori example program results"); + System.out.println("\nLast " + nItemsetToPrint + " large itemsets: "); + System.out.println("\nItemset\t\t\tSupport"); - /* Get association rules data */ + int iMin = ((largeItemsetCount > nItemsetToPrint) ? largeItemsetCount - nItemsetToPrint : 0); + for (int i = iMin; i < largeItemsetCount; i++) { + System.out.print("{"); + for (int l = 0; l < largeItemsetsVector.get(i).size() - 1; l++) { + System.out.print(largeItemsetsVector.get(i).get(l) + ", "); + } + System.out.print(largeItemsetsVector.get(i).get( + largeItemsetsVector.get(i).size() - 1) + "}\t\t"); - IntBuffer bufLeftItems = IntBuffer.allocate(nLeftItems * (int) leftItemsTable.getNumberOfColumns()); - try { - bufLeftItems = leftItemsTable.getBlockOfRows(0, nLeftItems, bufLeftItems); - } catch (IllegalAccessException e) { - ErrorHandling.printThrowable(e); - return; - } - int[] leftItems = new int[bufLeftItems.capacity()]; - bufLeftItems.get(leftItems); - - IntBuffer bufRightItems = IntBuffer.allocate(nRightItems * (int) rightItemsTable.getNumberOfColumns()); - try { - bufRightItems = rightItemsTable.getBlockOfRows(0, nRightItems, bufRightItems); - } catch (IllegalAccessException e) { - ErrorHandling.printThrowable(e); - return; - } - int[] rightItems = new int[bufRightItems.capacity()]; - bufRightItems.get(rightItems); - - FloatBuffer bufConfidence = FloatBuffer.allocate(nRules * (int) confidenceTable.getNumberOfColumns()); - try { - bufConfidence = confidenceTable.getBlockOfRows(0, nRules, bufConfidence); - } catch (IllegalAccessException e) { - ErrorHandling.printThrowable(e); - return; - } - float[] confidence = new float[bufConfidence.capacity()]; - bufConfidence.get(confidence); + System.out.println(supportVector.get(i)); + } + } + + public static void printAprioriRules(HomogenNumericTable leftItemsTable, + HomogenNumericTable rightItemsTable, + HomogenNumericTable confidenceTable) { + int nRulesToPrint = 20; + /* Get sizes of tables to store association rules */ + int nLeftItems = (int) leftItemsTable.getNumberOfRows(); + int nRightItems = (int) rightItemsTable.getNumberOfRows(); + int nRules = (int) confidenceTable.getNumberOfRows(); + + /* Get association rules data */ + + IntBuffer bufLeftItems = IntBuffer.allocate( + nLeftItems * (int) leftItemsTable.getNumberOfColumns()); + try { + bufLeftItems = leftItemsTable.getBlockOfRows(0, nLeftItems, bufLeftItems); + } catch (IllegalAccessException e) { + ErrorHandling.printThrowable(e); + return; + } + int[] leftItems = new int[bufLeftItems.capacity()]; + bufLeftItems.get(leftItems); + + IntBuffer bufRightItems = IntBuffer.allocate( + nRightItems * (int) rightItemsTable.getNumberOfColumns()); + try { + bufRightItems = rightItemsTable.getBlockOfRows(0, nRightItems, bufRightItems); + } catch (IllegalAccessException e) { + ErrorHandling.printThrowable(e); + return; + } + int[] rightItems = new int[bufRightItems.capacity()]; + bufRightItems.get(rightItems); + + FloatBuffer bufConfidence = FloatBuffer.allocate( + nRules * (int) confidenceTable.getNumberOfColumns()); + try { + bufConfidence = confidenceTable.getBlockOfRows(0, nRules, bufConfidence); + } catch (IllegalAccessException e) { + ErrorHandling.printThrowable(e); + return; + } + float[] confidence = new float[bufConfidence.capacity()]; + bufConfidence.get(confidence); - ArrayList> leftItemsVector = new ArrayList>(nRules); - for (int i = 0; i < nRules; i++) { - leftItemsVector.add(new ArrayList()); - } + ArrayList> leftItemsVector = new ArrayList>(nRules); + for (int i = 0; i < nRules; i++) { + leftItemsVector.add(new ArrayList()); + } - if (nRules == 0) { - System.out.println("No association rules were found "); - return; - } + if (nRules == 0) { + System.out.println("No association rules were found "); + return; + } - for (int i = 0; i < nLeftItems; i++) { - leftItemsVector.get((leftItems[2 * i])).add(leftItems[2 * i + 1]); - } + for (int i = 0; i < nLeftItems; i++) { + leftItemsVector.get((leftItems[2 * i])).add(leftItems[2 * i + 1]); + } - ArrayList> rightItemsVector = new ArrayList>(nRules); - for (int i = 0; i < nRules; i++) { - rightItemsVector.add(new ArrayList()); - } + ArrayList> rightItemsVector = new ArrayList>(nRules); + for (int i = 0; i < nRules; i++) { + rightItemsVector.add(new ArrayList()); + } - for (int i = 0; i < nRightItems; i++) { - rightItemsVector.get((rightItems[2 * i])).add(rightItems[2 * i + 1]); - } + for (int i = 0; i < nRightItems; i++) { + rightItemsVector.get((rightItems[2 * i])).add(rightItems[2 * i + 1]); + } - ArrayList confidenceVector = new ArrayList(nRules); - for (int i = 0; i < nRules; i++) { - confidenceVector.add(confidence[i]); - } + ArrayList confidenceVector = new ArrayList(nRules); + for (int i = 0; i < nRules; i++) { + confidenceVector.add(confidence[i]); + } - System.out.println("\nLast " + nRulesToPrint + " association rules: "); - System.out.println("\nRule" + "\t\t\t\tConfidence"); + System.out.println("\nLast " + nRulesToPrint + " association rules: "); + System.out.println("\nRule" + "\t\t\t\tConfidence"); - int iMin = ((nRules > nRulesToPrint) ? (nRules - nRulesToPrint) : 0); - for (int i = iMin; i < nRules; i++) { - System.out.print("{"); - for (int l = 0; l < leftItemsVector.get(i).size() - 1; l++) { - System.out.print(leftItemsVector.get(i).get(l) + ", "); - } - System.out.print(leftItemsVector.get(i).get(leftItemsVector.get(i).size() - 1) + "} => {"); + int iMin = ((nRules > nRulesToPrint) ? (nRules - nRulesToPrint) : 0); + for (int i = iMin; i < nRules; i++) { + System.out.print("{"); + for (int l = 0; l < leftItemsVector.get(i).size() - 1; l++) { + System.out.print(leftItemsVector.get(i).get(l) + ", "); + } + System.out.print(leftItemsVector.get(i).get(leftItemsVector.get(i).size() - 1) + "} => {"); - for (int l = 0; l < rightItemsVector.get(i).size() - 1; l++) { - System.out.print(rightItemsVector.get(i).get(l) + ", "); - } - System.out.print(rightItemsVector.get(i).get(rightItemsVector.get(i).size() - 1) + "}\t\t"); + for (int l = 0; l < rightItemsVector.get(i).size() - 1; l++) { + System.out.print(rightItemsVector.get(i).get(l) + ", "); + } + System.out.print(rightItemsVector.get(i).get(rightItemsVector.get(i).size() - 1) + "}\t\t"); - System.out.println(confidenceVector.get(i)); - } + System.out.println(confidenceVector.get(i)); } - - public static void printALSRatings(NumericTable usersOffsetTable, NumericTable itemsOffsetTable, - NumericTable ratings) { - long nUsers = ratings.getNumberOfRows(); - long nItems = ratings.getNumberOfColumns(); - - float[] ratingsData = ((HomogenNumericTable)ratings).getFloatArray(); - IntBuffer usersOffsetBuf = IntBuffer.allocate(1); - IntBuffer itemsOffsetBuf = IntBuffer.allocate(1); - try { - usersOffsetBuf = usersOffsetTable.getBlockOfRows(0, 1, usersOffsetBuf); - itemsOffsetBuf = itemsOffsetTable.getBlockOfRows(0, 1, itemsOffsetBuf); - } catch (IllegalAccessException e) { - ErrorHandling.printThrowable(e); - return; - } - int[] usersOffsetData = new int[1]; - int[] itemsOffsetData = new int[1]; - usersOffsetBuf.get(usersOffsetData); - itemsOffsetBuf.get(itemsOffsetData); - long usersOffset = (long)usersOffsetData[0]; - long itemsOffset = (long)itemsOffsetData[0]; - - System.out.println(" User ID, Item ID, rating"); - for (long i = 0; i < nUsers; i++) { - for (long j = 0; j < nItems; j++) { - long userId = i + usersOffset; - long itemId = j + itemsOffset; - System.out.println(userId + ", " + itemId + ", " + ratingsData[(int)(i * nItems + j)]); - } - } + } + + public static void printALSRatings(NumericTable usersOffsetTable, NumericTable itemsOffsetTable, + NumericTable ratings) { + long nUsers = ratings.getNumberOfRows(); + long nItems = ratings.getNumberOfColumns(); + + float[] ratingsData = ((HomogenNumericTable) ratings).getFloatArray(); + IntBuffer usersOffsetBuf = IntBuffer.allocate(1); + IntBuffer itemsOffsetBuf = IntBuffer.allocate(1); + try { + usersOffsetBuf = usersOffsetTable.getBlockOfRows(0, 1, usersOffsetBuf); + itemsOffsetBuf = itemsOffsetTable.getBlockOfRows(0, 1, itemsOffsetBuf); + } catch (IllegalAccessException e) { + ErrorHandling.printThrowable(e); + return; + } + int[] usersOffsetData = new int[1]; + int[] itemsOffsetData = new int[1]; + usersOffsetBuf.get(usersOffsetData); + itemsOffsetBuf.get(itemsOffsetData); + long usersOffset = usersOffsetData[0]; + long itemsOffset = itemsOffsetData[0]; + + System.out.println(" User ID, Item ID, rating"); + for (long i = 0; i < nUsers; i++) { + for (long j = 0; j < nItems; j++) { + long userId = i + usersOffset; + long itemId = j + itemsOffset; + System.out.println(userId + ", " + itemId + ", " + ratingsData[(int) (i * nItems + j)]); + } } + } } diff --git a/mllib-dal/src/main/native/.clang-format b/mllib-dal/src/main/native/.clang-format new file mode 100644 index 000000000..0a9ea9178 --- /dev/null +++ b/mllib-dal/src/main/native/.clang-format @@ -0,0 +1,136 @@ +--- +Language: Cpp +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveMacros: false +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Right +AlignOperands: true +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: MultiLine +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + SortPriority: 0 + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 0 + - Regex: '.*' + Priority: 1 + SortPriority: 0 +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentCaseLabels: false +IndentGotoLabels: true +IndentPPDirectives: None +IndentWidth: 4 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +Standard: Latest +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 4 +UseCRLF: false +UseTab: Never +... + diff --git a/mllib-dal/src/main/native/ALSDALImpl.cpp b/mllib-dal/src/main/native/ALSDALImpl.cpp index 29162fddd..157b39bae 100644 --- a/mllib-dal/src/main/native/ALSDALImpl.cpp +++ b/mllib-dal/src/main/native/ALSDALImpl.cpp @@ -1,13 +1,30 @@ +/******************************************************************************* + * Copyright 2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + #include -#include #include +#include #include #include "OneCCL.h" -#include "ALSShuffle.h" #include "org_apache_spark_ml_recommendation_ALSDALImpl.h" #include "service.h" +#include "ALSShuffle.h" + using namespace std; using namespace daal; using namespace daal::algorithms; @@ -20,9 +37,6 @@ typedef float algorithmFPType; /* Algorithm floating-point type */ NumericTablePtr userOffset; NumericTablePtr itemOffset; -// KeyValueDataCollectionPtr userOffsetsOnMaster; -// KeyValueDataCollectionPtr itemOffsetsOnMaster; - CSRNumericTablePtr dataTable; CSRNumericTablePtr transposedDataTable; @@ -31,492 +45,406 @@ KeyValueDataCollectionPtr itemStep3LocalInput; training::DistributedPartialResultStep4Ptr itemsPartialResultLocal; training::DistributedPartialResultStep4Ptr usersPartialResultLocal; -std::vector itemsPartialResultsMaster; -std::vector usersPartialResultsMaster; +std::vector + itemsPartialResultsMaster; +std::vector + usersPartialResultsMaster; + +template +void gather(size_t rankId, ccl::communicator &comm, size_t nBlocks, + const ByteBuffer &nodeResults, T *result) { + vector perNodeArchLengthMaster(nBlocks); + size_t perNodeArchLength = nodeResults.size(); + ByteBuffer serializedData; + + vector recv_counts(nBlocks); + for (size_t i = 0; i < nBlocks; i++) + recv_counts[i] = sizeof(size_t); + + ccl::allgatherv(&perNodeArchLength, sizeof(size_t), + perNodeArchLengthMaster.data(), recv_counts, + ccl::datatype::uint8, comm) + .wait(); + + // should resize for all ranks for ccl_allgatherv + size_t memoryBuf = 0; + for (size_t i = 0; i < nBlocks; i++) { + memoryBuf += perNodeArchLengthMaster[i]; + } + serializedData.resize(memoryBuf); + + std::vector displs(nBlocks); + if (rankId == ccl_root) { + size_t shift = 0; + for (size_t i = 0; i < nBlocks; i++) { + displs[i] = shift; + shift += perNodeArchLengthMaster[i]; + } + } + + /* Transfer partial results to step 2 on the root node */ + ccl::allgatherv(&nodeResults[0], perNodeArchLength, &serializedData[0], + perNodeArchLengthMaster, ccl::datatype::uint8, comm) + .wait(); + + if (rankId == ccl_root) { + for (size_t i = 0; i < nBlocks; i++) { + /* Deserialize partial results from step 1 */ + result[i] = result[i]->cast(deserializeDAALObject( + &serializedData[0] + displs[i], perNodeArchLengthMaster[i])); + } + } +} template -void gather(size_t rankId, ccl::communicator &comm, size_t nBlocks, const ByteBuffer& nodeResults, T* result) { - vector perNodeArchLengthMaster(nBlocks); - size_t perNodeArchLength = nodeResults.size(); - ByteBuffer serializedData; - - vector recv_counts(nBlocks); - for (size_t i = 0; i < nBlocks; i++) recv_counts[i] = sizeof(size_t); - - // MPI_Gather(&perNodeArchLength, sizeof(int), MPI_CHAR, perNodeArchLengthMaster, - // sizeof(int), MPI_CHAR, ccl_root, MPI_COMM_WORLD); - ccl::allgatherv(&perNodeArchLength, sizeof(size_t), perNodeArchLengthMaster.data(), recv_counts, - ccl::datatype::uint8, comm).wait(); - - // should resize for all ranks for ccl_allgatherv - size_t memoryBuf = 0; - for (size_t i = 0; i < nBlocks; i++) { - memoryBuf += perNodeArchLengthMaster[i]; - } - serializedData.resize(memoryBuf); - - std::vector displs(nBlocks); - if (rankId == ccl_root) { +void all2all(ccl::communicator &comm, ByteBuffer *nodeResults, size_t nBlocks, + KeyValueDataCollectionPtr result) { + size_t memoryBuf = 0; size_t shift = 0; + vector perNodeArchLengths(nBlocks); + vector perNodeArchLengthsRecv(nBlocks); + std::vector sdispls(nBlocks); + ByteBuffer serializedSendData; + ByteBuffer serializedRecvData; + + for (size_t i = 0; i < nBlocks; i++) { + perNodeArchLengths[i] = nodeResults[i].size(); + memoryBuf += perNodeArchLengths[i]; + sdispls[i] = shift; + shift += perNodeArchLengths[i]; + } + serializedSendData.resize(memoryBuf); + + /* memcpy to avoid double compute */ + memoryBuf = 0; for (size_t i = 0; i < nBlocks; i++) { - displs[i] = shift; - shift += perNodeArchLengthMaster[i]; + for (size_t j = 0; j < perNodeArchLengths[i]; j++) + serializedSendData[memoryBuf + j] = nodeResults[i][j]; + memoryBuf += perNodeArchLengths[i]; } - } - /* Transfer partial results to step 2 on the root node */ - // MPI_Gatherv(&nodeResults[0], perNodeArchLength, MPI_CHAR, &serializedData[0], - // perNodeArchLengthMaster, displs, MPI_CHAR, ccl_root, - // MPI_COMM_WORLD); - ccl::allgatherv(&nodeResults[0], perNodeArchLength, &serializedData[0], - perNodeArchLengthMaster, ccl::datatype::uint8, comm).wait(); + ccl::alltoall(perNodeArchLengths.data(), perNodeArchLengthsRecv.data(), + sizeof(size_t), ccl::datatype::uint8, comm) + .wait(); - if (rankId == ccl_root) { + memoryBuf = 0; + shift = 0; + std::vector rdispls(nBlocks); for (size_t i = 0; i < nBlocks; i++) { - /* Deserialize partial results from step 1 */ - result[i] = result[i]->cast(deserializeDAALObject(&serializedData[0] + displs[i], - perNodeArchLengthMaster[i])); + memoryBuf += perNodeArchLengthsRecv[i]; + rdispls[i] = shift; + shift += perNodeArchLengthsRecv[i]; } - } -} -// void gatherUsers(const ByteBuffer & nodeResults, int nBlocks) -// { -// size_t perNodeArchLengthMaster[nBlocks]; -// size_t perNodeArchLength = nodeResults.size(); -// ByteBuffer serializedData; -// size_t recv_counts[nBlocks]; -// for (int i = 0; i < nBlocks; i++) { -// recv_counts[i] = sizeof(size_t); -// } - -// ccl_request_t request; -// // MPI_Allgather(&perNodeArchLength, sizeof(int), MPI_CHAR, -// perNodeArchLengthMaster, sizeof(int), MPI_CHAR, MPI_COMM_WORLD); -// ccl_allgatherv(&perNodeArchLength, sizeof(size_t), perNodeArchLengthMaster, -// recv_counts, ccl_dtype_char, NULL, NULL, NULL, &request); ccl_wait(request); - -// size_t memoryBuf = 0; -// for (int i = 0; i < nBlocks; i++) -// { -// memoryBuf += perNodeArchLengthMaster[i]; -// } -// serializedData.resize(memoryBuf); - -// size_t shift = 0; -// std::vector displs(nBlocks); -// for (int i = 0; i < nBlocks; i++) -// { -// displs[i] = shift; -// shift += perNodeArchLengthMaster[i]; -// } - -// /* Transfer partial results to step 2 on the root node */ -// // MPI_Allgatherv(&nodeResults[0], perNodeArchLength, MPI_CHAR, &serializedData[0], -// perNodeArchLengthMaster, displs, MPI_CHAR, MPI_COMM_WORLD); -// ccl_allgatherv(&nodeResults[0], perNodeArchLength, &serializedData[0], -// perNodeArchLengthMaster, ccl_dtype_char, NULL, NULL, NULL, &request); -// ccl_wait(request); - -// usersPartialResultsMaster.resize(nBlocks); -// for (int i = 0; i < nBlocks; i++) -// { -// /* Deserialize partial results from step 4 */ -// usersPartialResultsMaster[i] = -// training::DistributedPartialResultStep4::cast(deserializeDAALObject(&serializedData[0] -// + displs[i], perNodeArchLengthMaster[i])); -// } -// } - -// void gatherItems(const ByteBuffer & nodeResults, size_t nBlocks) -// { -// size_t perNodeArchLengthMaster[nBlocks]; -// size_t perNodeArchLength = nodeResults.size(); -// ByteBuffer serializedData; -// size_t recv_counts[nBlocks]; -// for (size_t i = 0; i < nBlocks; i++) { -// recv_counts[i] = sizeof(size_t); -// } - -// ccl_request_t request; -// // MPI_Allgather(&perNodeArchLength, sizeof(int), MPI_CHAR, -// perNodeArchLengthMaster, sizeof(int), MPI_CHAR, MPI_COMM_WORLD); -// ccl_allgatherv(&perNodeArchLength, sizeof(size_t), perNodeArchLengthMaster, -// recv_counts, ccl_dtype_char, NULL, NULL, NULL, &request); ccl_wait(request); - -// size_t memoryBuf = 0; -// for (size_t i = 0; i < nBlocks; i++) -// { -// memoryBuf += perNodeArchLengthMaster[i]; -// } -// serializedData.resize(memoryBuf); - -// size_t shift = 0; -// std::vector displs(nBlocks); -// for (size_t i = 0; i < nBlocks; i++) -// { -// displs[i] = shift; -// shift += perNodeArchLengthMaster[i]; -// } - -// /* Transfer partial results to step 2 on the root node */ -// // MPI_Allgatherv(&nodeResults[0], perNodeArchLength, MPI_CHAR, &serializedData[0], -// perNodeArchLengthMaster, displs, MPI_CHAR, MPI_COMM_WORLD); -// ccl_allgatherv(&nodeResults[0], perNodeArchLength, &serializedData[0], -// perNodeArchLengthMaster, ccl_dtype_char, NULL, NULL, NULL, &request); -// ccl_wait(request); - -// itemsPartialResultsMaster.resize(nBlocks); -// for (size_t i = 0; i < nBlocks; i++) -// { -// /* Deserialize partial results from step 4 */ -// itemsPartialResultsMaster[i] = -// training::DistributedPartialResultStep4::cast(deserializeDAALObject(&serializedData[0] -// + displs[i], perNodeArchLengthMaster[i])); -// } -// } + serializedRecvData.resize(memoryBuf); -template -void all2all(ccl::communicator &comm, ByteBuffer* nodeResults, size_t nBlocks, KeyValueDataCollectionPtr result) { - size_t memoryBuf = 0; - size_t shift = 0; - vector perNodeArchLengths(nBlocks); - vector perNodeArchLengthsRecv(nBlocks); - std::vector sdispls(nBlocks); - ByteBuffer serializedSendData; - ByteBuffer serializedRecvData; - - for (size_t i = 0; i < nBlocks; i++) { - perNodeArchLengths[i] = nodeResults[i].size(); - memoryBuf += perNodeArchLengths[i]; - sdispls[i] = shift; - shift += perNodeArchLengths[i]; - } - serializedSendData.resize(memoryBuf); - - /* memcpy to avoid double compute */ - memoryBuf = 0; - for (size_t i = 0; i < nBlocks; i++) { - for (size_t j = 0; j < perNodeArchLengths[i]; j++) - serializedSendData[memoryBuf + j] = nodeResults[i][j]; - memoryBuf += perNodeArchLengths[i]; - } - - // MPI_Alltoall(perNodeArchLengths, sizeof(int), MPI_CHAR, perNodeArchLengthsRecv, - // sizeof(int), MPI_CHAR, MPI_COMM_WORLD); - ccl::alltoall(perNodeArchLengths.data(), perNodeArchLengthsRecv.data(), sizeof(size_t), ccl::datatype::uint8, comm).wait(); - - memoryBuf = 0; - shift = 0; - std::vector rdispls(nBlocks); - for (size_t i = 0; i < nBlocks; i++) { - memoryBuf += perNodeArchLengthsRecv[i]; - rdispls[i] = shift; - shift += perNodeArchLengthsRecv[i]; - } - - serializedRecvData.resize(memoryBuf); - - /* Transfer partial results to step 2 on the root node */ - // MPI_Alltoallv(&serializedSendData[0], perNodeArchLengths, sdispls, MPI_CHAR, - // &serializedRecvData[0], perNodeArchLengthsRecv, rdispls, MPI_CHAR, - // MPI_COMM_WORLD); - ccl::alltoallv(&serializedSendData[0], perNodeArchLengths, &serializedRecvData[0], - perNodeArchLengthsRecv, ccl::datatype::uint8, comm).wait(); - - for (size_t i = 0; i < nBlocks; i++) { - (*result)[i] = T::cast(deserializeDAALObject(&serializedRecvData[rdispls[i]], - perNodeArchLengthsRecv[i])); - } + /* Transfer partial results to step 2 on the root node */ + ccl::alltoallv(&serializedSendData[0], perNodeArchLengths, + &serializedRecvData[0], perNodeArchLengthsRecv, + ccl::datatype::uint8, comm) + .wait(); + + for (size_t i = 0; i < nBlocks; i++) { + (*result)[i] = T::cast(deserializeDAALObject( + &serializedRecvData[rdispls[i]], perNodeArchLengthsRecv[i])); + } } -KeyValueDataCollectionPtr initializeStep1Local(size_t rankId, size_t partitionId, +KeyValueDataCollectionPtr initializeStep1Local(size_t rankId, + size_t partitionId, size_t nBlocks, size_t nUsers, size_t nFactors) { - int usersPartition[1] = {(int)nBlocks}; - - /* Create an algorithm object to initialize the implicit ALS model with the default - * method */ - training::init::Distributed - initAlgorithm; - initAlgorithm.parameter.fullNUsers = nUsers; - initAlgorithm.parameter.nFactors = nFactors; - initAlgorithm.parameter.seed += rankId; - initAlgorithm.parameter.partition.reset( - new HomogenNumericTable((int*)usersPartition, 1, 1)); - /* Pass a training data set and dependent values to the algorithm */ - initAlgorithm.input.set(training::init::data, dataTable); - - /* Initialize the implicit ALS model */ - initAlgorithm.compute(); - - training::init::PartialResultPtr partialResult = initAlgorithm.getPartialResult(); - itemStep3LocalInput = partialResult->get(training::init::outputOfInitForComputeStep3); - userOffset = partialResult->get(training::init::offsets, (size_t)rankId); - // if (rankId == ccl_root) - // { - // userOffsetsOnMaster = partialResult->get(training::init::offsets); - // } - PartialModelPtr partialModelLocal = partialResult->get(training::init::partialModel); - - itemsPartialResultLocal.reset(new training::DistributedPartialResultStep4()); - itemsPartialResultLocal->set(training::outputOfStep4ForStep1, partialModelLocal); - - return partialResult->get(training::init::outputOfStep1ForStep2); + int usersPartition[1] = {(int)nBlocks}; + + /* Create an algorithm object to initialize the implicit ALS model with the + * default method */ + training::init::Distributed + initAlgorithm; + initAlgorithm.parameter.fullNUsers = nUsers; + initAlgorithm.parameter.nFactors = nFactors; + initAlgorithm.parameter.seed += rankId; + initAlgorithm.parameter.partition.reset( + new HomogenNumericTable((int *)usersPartition, 1, 1)); + /* Pass a training data set and dependent values to the algorithm */ + initAlgorithm.input.set(training::init::data, dataTable); + + /* Initialize the implicit ALS model */ + initAlgorithm.compute(); + + training::init::PartialResultPtr partialResult = + initAlgorithm.getPartialResult(); + itemStep3LocalInput = + partialResult->get(training::init::outputOfInitForComputeStep3); + userOffset = partialResult->get(training::init::offsets, (size_t)rankId); + + PartialModelPtr partialModelLocal = + partialResult->get(training::init::partialModel); + + itemsPartialResultLocal.reset( + new training::DistributedPartialResultStep4()); + itemsPartialResultLocal->set(training::outputOfStep4ForStep1, + partialModelLocal); + + return partialResult->get(training::init::outputOfStep1ForStep2); } -void initializeStep2Local(size_t rankId, size_t partitionId, - const KeyValueDataCollectionPtr& initStep2LocalInput) { - /* Create an algorithm object to perform the second step of the implicit ALS - * initialization algorithm */ - training::init::Distributed - initAlgorithm; - - initAlgorithm.input.set(training::init::inputOfStep2FromStep1, initStep2LocalInput); - - /* Compute partial results of the second step on local nodes */ - initAlgorithm.compute(); - - training::init::DistributedPartialResultStep2Ptr partialResult = - initAlgorithm.getPartialResult(); - transposedDataTable = - CSRNumericTable::cast(partialResult->get(training::init::transposedData)); - userStep3LocalInput = partialResult->get(training::init::outputOfInitForComputeStep3); - itemOffset = partialResult->get(training::init::offsets, (size_t)rankId); - // if (rankId == ccl_root) - // { - // itemOffsetsOnMaster = partialResult->get(training::init::offsets); - // } +void initializeStep2Local( + size_t rankId, size_t partitionId, + const KeyValueDataCollectionPtr &initStep2LocalInput) { + /* Create an algorithm object to perform the second step of the implicit ALS + * initialization algorithm */ + training::init::Distributed + initAlgorithm; + + initAlgorithm.input.set(training::init::inputOfStep2FromStep1, + initStep2LocalInput); + + /* Compute partial results of the second step on local nodes */ + initAlgorithm.compute(); + + training::init::DistributedPartialResultStep2Ptr partialResult = + initAlgorithm.getPartialResult(); + transposedDataTable = CSRNumericTable::cast( + partialResult->get(training::init::transposedData)); + userStep3LocalInput = + partialResult->get(training::init::outputOfInitForComputeStep3); + itemOffset = partialResult->get(training::init::offsets, (size_t)rankId); } -void initializeModel(size_t rankId, ccl::communicator &comm, size_t partitionId, size_t nBlocks, size_t nUsers, - size_t nFactors) { - std::cout << "ALS (native): initializeModel " << std::endl; +void initializeModel(size_t rankId, ccl::communicator &comm, size_t partitionId, + size_t nBlocks, size_t nUsers, size_t nFactors) { + std::cout << "ALS (native): initializeModel " << std::endl; - auto t1 = std::chrono::high_resolution_clock::now(); + auto t1 = std::chrono::high_resolution_clock::now(); - KeyValueDataCollectionPtr initStep1LocalResult = - initializeStep1Local(rankId, partitionId, nBlocks, nUsers, nFactors); + KeyValueDataCollectionPtr initStep1LocalResult = + initializeStep1Local(rankId, partitionId, nBlocks, nUsers, nFactors); - /* MPI_Alltoallv to populate initStep2LocalInput */ - ByteBuffer nodeCPs[nBlocks]; - for (size_t i = 0; i < nBlocks; i++) { - serializeDAALObject((*initStep1LocalResult)[i].get(), nodeCPs[i]); - } - KeyValueDataCollectionPtr initStep2LocalInput(new KeyValueDataCollection()); - all2all(comm, nodeCPs, nBlocks, initStep2LocalInput); + ByteBuffer nodeCPs[nBlocks]; + for (size_t i = 0; i < nBlocks; i++) { + serializeDAALObject((*initStep1LocalResult)[i].get(), nodeCPs[i]); + } + KeyValueDataCollectionPtr initStep2LocalInput(new KeyValueDataCollection()); + all2all(comm, nodeCPs, nBlocks, initStep2LocalInput); - initializeStep2Local(rankId, partitionId, initStep2LocalInput); + initializeStep2Local(rankId, partitionId, initStep2LocalInput); - auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "ALS (native): initializeModel took " << duration << " secs" << std::endl; + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(t2 - t1).count(); + std::cout << "ALS (native): initializeModel took " << duration << " secs" + << std::endl; } training::DistributedPartialResultStep1Ptr computeStep1Local( - const training::DistributedPartialResultStep4Ptr& partialResultLocal, + const training::DistributedPartialResultStep4Ptr &partialResultLocal, size_t nFactors) { - /* Create algorithm objects to compute implicit ALS algorithm in the distributed - * processing mode on the local node using the default method */ - training::Distributed algorithm; - algorithm.parameter.nFactors = nFactors; + /* Create algorithm objects to compute implicit ALS algorithm in the + * distributed processing mode on the local node using the default method */ + training::Distributed algorithm; + algorithm.parameter.nFactors = nFactors; - /* Set input objects for the algorithm */ - algorithm.input.set(training::partialModel, - partialResultLocal->get(training::outputOfStep4ForStep1)); + /* Set input objects for the algorithm */ + algorithm.input.set( + training::partialModel, + partialResultLocal->get(training::outputOfStep4ForStep1)); - /* Compute partial estimates on local nodes */ - algorithm.compute(); + /* Compute partial estimates on local nodes */ + algorithm.compute(); - /* Get the computed partial estimates */ - return algorithm.getPartialResult(); + /* Get the computed partial estimates */ + return algorithm.getPartialResult(); } NumericTablePtr computeStep2Master( - const training::DistributedPartialResultStep1Ptr* step1LocalResultsOnMaster, + const training::DistributedPartialResultStep1Ptr *step1LocalResultsOnMaster, size_t nFactors, size_t nBlocks) { - /* Create algorithm objects to compute implicit ALS algorithm in the distributed - * processing mode on the master node using the default method */ - training::Distributed algorithm; - algorithm.parameter.nFactors = nFactors; + /* Create algorithm objects to compute implicit ALS algorithm in the + * distributed processing mode on the master node using the default method + */ + training::Distributed algorithm; + algorithm.parameter.nFactors = nFactors; - /* Set input objects for the algorithm */ - for (size_t i = 0; i < nBlocks; i++) { - algorithm.input.add(training::inputOfStep2FromStep1, step1LocalResultsOnMaster[i]); - } + /* Set input objects for the algorithm */ + for (size_t i = 0; i < nBlocks; i++) { + algorithm.input.add(training::inputOfStep2FromStep1, + step1LocalResultsOnMaster[i]); + } - /* Compute a partial estimate on the master node from the partial estimates on local - * nodes */ - algorithm.compute(); + /* Compute a partial estimate on the master node from the partial estimates + * on local nodes */ + algorithm.compute(); - return algorithm.getPartialResult()->get(training::outputOfStep2ForStep4); + return algorithm.getPartialResult()->get(training::outputOfStep2ForStep4); } KeyValueDataCollectionPtr computeStep3Local( - const NumericTablePtr& offset, - const training::DistributedPartialResultStep4Ptr& partialResultLocal, - const KeyValueDataCollectionPtr& step3LocalInput, size_t nFactors) { - training::Distributed algorithm; - algorithm.parameter.nFactors = nFactors; + const NumericTablePtr &offset, + const training::DistributedPartialResultStep4Ptr &partialResultLocal, + const KeyValueDataCollectionPtr &step3LocalInput, size_t nFactors) { + training::Distributed algorithm; + algorithm.parameter.nFactors = nFactors; - algorithm.input.set(training::partialModel, - partialResultLocal->get(training::outputOfStep4ForStep3)); - algorithm.input.set(training::inputOfStep3FromInit, step3LocalInput); - algorithm.input.set(training::offset, offset); + algorithm.input.set( + training::partialModel, + partialResultLocal->get(training::outputOfStep4ForStep3)); + algorithm.input.set(training::inputOfStep3FromInit, step3LocalInput); + algorithm.input.set(training::offset, offset); - algorithm.compute(); + algorithm.compute(); - return algorithm.getPartialResult()->get(training::outputOfStep3ForStep4); + return algorithm.getPartialResult()->get(training::outputOfStep3ForStep4); } -training::DistributedPartialResultStep4Ptr computeStep4Local( - const CSRNumericTablePtr& dataTable, const NumericTablePtr& step2MasterResult, - const KeyValueDataCollectionPtr& step4LocalInput, size_t nFactors) { - training::Distributed algorithm; - algorithm.parameter.nFactors = nFactors; +training::DistributedPartialResultStep4Ptr +computeStep4Local(const CSRNumericTablePtr &dataTable, + const NumericTablePtr &step2MasterResult, + const KeyValueDataCollectionPtr &step4LocalInput, + size_t nFactors) { + training::Distributed algorithm; + algorithm.parameter.nFactors = nFactors; - algorithm.input.set(training::partialModels, step4LocalInput); - algorithm.input.set(training::partialData, dataTable); - algorithm.input.set(training::inputOfStep4FromStep2, step2MasterResult); + algorithm.input.set(training::partialModels, step4LocalInput); + algorithm.input.set(training::partialData, dataTable); + algorithm.input.set(training::inputOfStep4FromStep2, step2MasterResult); - algorithm.compute(); + algorithm.compute(); - return algorithm.getPartialResult(); + return algorithm.getPartialResult(); } -void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId, size_t nBlocks, size_t nFactors, - size_t maxIterations) { - std::cout << "ALS (native): trainModel" << std::endl; +void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId, + size_t nBlocks, size_t nFactors, size_t maxIterations) { + std::cout << "ALS (native): trainModel" << std::endl; - auto tStart = std::chrono::high_resolution_clock::now(); + auto tStart = std::chrono::high_resolution_clock::now(); - training::DistributedPartialResultStep1Ptr step1LocalResultsOnMaster[nBlocks]; - training::DistributedPartialResultStep1Ptr step1LocalResult; - NumericTablePtr step2MasterResult; - KeyValueDataCollectionPtr step3LocalResult; - KeyValueDataCollectionPtr step4LocalInput(new KeyValueDataCollection()); + training::DistributedPartialResultStep1Ptr + step1LocalResultsOnMaster[nBlocks]; + training::DistributedPartialResultStep1Ptr step1LocalResult; + NumericTablePtr step2MasterResult; + KeyValueDataCollectionPtr step3LocalResult; + KeyValueDataCollectionPtr step4LocalInput(new KeyValueDataCollection()); - ByteBuffer nodeCPs[nBlocks]; - ByteBuffer nodeResults; - ByteBuffer crossProductBuf; - int crossProductLen; + ByteBuffer nodeCPs[nBlocks]; + ByteBuffer nodeResults; + ByteBuffer crossProductBuf; + int crossProductLen; - for (size_t iteration = 0; iteration < maxIterations; iteration++) { - auto t1 = std::chrono::high_resolution_clock::now(); + for (size_t iteration = 0; iteration < maxIterations; iteration++) { + auto t1 = std::chrono::high_resolution_clock::now(); - // - // Update partial users factors - // - step1LocalResult = computeStep1Local(itemsPartialResultLocal, nFactors); + // + // Update partial users factors + // + step1LocalResult = computeStep1Local(itemsPartialResultLocal, nFactors); - serializeDAALObject(step1LocalResult.get(), nodeResults); + serializeDAALObject(step1LocalResult.get(), nodeResults); - /* Gathering step1LocalResult on the master */ - gather(rankId, comm, nBlocks, nodeResults, step1LocalResultsOnMaster); + /* Gathering step1LocalResult on the master */ + gather(rankId, comm, nBlocks, nodeResults, step1LocalResultsOnMaster); - if (rankId == ccl_root) { - step2MasterResult = - computeStep2Master(step1LocalResultsOnMaster, nFactors, nBlocks); - serializeDAALObject(step2MasterResult.get(), crossProductBuf); - crossProductLen = crossProductBuf.size(); - } + if (rankId == ccl_root) { + step2MasterResult = computeStep2Master(step1LocalResultsOnMaster, + nFactors, nBlocks); + serializeDAALObject(step2MasterResult.get(), crossProductBuf); + crossProductLen = crossProductBuf.size(); + } - // MPI_Bcast(&crossProductLen, sizeof(int), MPI_CHAR, ccl_root, MPI_COMM_WORLD); - ccl::broadcast(&crossProductLen, sizeof(int), ccl::datatype::uint8, ccl_root, comm).wait(); + ccl::broadcast(&crossProductLen, sizeof(int), ccl::datatype::uint8, + ccl_root, comm) + .wait(); - if (rankId != ccl_root) { - crossProductBuf.resize(crossProductLen); - } - // MPI_Bcast(&crossProductBuf[0], crossProductLen, MPI_CHAR, ccl_root, - // MPI_COMM_WORLD); - ccl::broadcast(&crossProductBuf[0], crossProductLen, ccl::datatype::uint8, ccl_root, comm).wait(); + if (rankId != ccl_root) { + crossProductBuf.resize(crossProductLen); + } - step2MasterResult = - NumericTable::cast(deserializeDAALObject(&crossProductBuf[0], crossProductLen)); + ccl::broadcast(&crossProductBuf[0], crossProductLen, + ccl::datatype::uint8, ccl_root, comm) + .wait(); - step3LocalResult = computeStep3Local(itemOffset, itemsPartialResultLocal, - itemStep3LocalInput, nFactors); + step2MasterResult = NumericTable::cast( + deserializeDAALObject(&crossProductBuf[0], crossProductLen)); - /* MPI_Alltoallv to populate step4LocalInput */ - for (size_t i = 0; i < nBlocks; i++) { - serializeDAALObject((*step3LocalResult)[i].get(), nodeCPs[i]); - } - all2all(comm, nodeCPs, nBlocks, step4LocalInput); + step3LocalResult = computeStep3Local( + itemOffset, itemsPartialResultLocal, itemStep3LocalInput, nFactors); - usersPartialResultLocal = computeStep4Local(transposedDataTable, step2MasterResult, - step4LocalInput, nFactors); + for (size_t i = 0; i < nBlocks; i++) { + serializeDAALObject((*step3LocalResult)[i].get(), nodeCPs[i]); + } + all2all(comm, nodeCPs, nBlocks, step4LocalInput); - // - // Update partial items factors - // - step1LocalResult = computeStep1Local(usersPartialResultLocal, nFactors); + usersPartialResultLocal = computeStep4Local( + transposedDataTable, step2MasterResult, step4LocalInput, nFactors); - serializeDAALObject(step1LocalResult.get(), nodeResults); + // + // Update partial items factors + // + step1LocalResult = computeStep1Local(usersPartialResultLocal, nFactors); - /* Gathering step1LocalResult on the master */ - gather(rankId, comm, nBlocks, nodeResults, step1LocalResultsOnMaster); + serializeDAALObject(step1LocalResult.get(), nodeResults); - if (rankId == ccl_root) { - step2MasterResult = - computeStep2Master(step1LocalResultsOnMaster, nFactors, nBlocks); - serializeDAALObject(step2MasterResult.get(), crossProductBuf); - crossProductLen = crossProductBuf.size(); - } + /* Gathering step1LocalResult on the master */ + gather(rankId, comm, nBlocks, nodeResults, step1LocalResultsOnMaster); - // MPI_Bcast(&crossProductLen, sizeof(int), MPI_CHAR, ccl_root, MPI_COMM_WORLD); - ccl::broadcast(&crossProductLen, sizeof(int), ccl::datatype::uint8, ccl_root, comm).wait(); + if (rankId == ccl_root) { + step2MasterResult = computeStep2Master(step1LocalResultsOnMaster, + nFactors, nBlocks); + serializeDAALObject(step2MasterResult.get(), crossProductBuf); + crossProductLen = crossProductBuf.size(); + } - if (rankId != ccl_root) { - crossProductBuf.resize(crossProductLen); - } + ccl::broadcast(&crossProductLen, sizeof(int), ccl::datatype::uint8, + ccl_root, comm) + .wait(); - // MPI_Bcast(&crossProductBuf[0], crossProductLen, MPI_CHAR, ccl_root, - // MPI_COMM_WORLD); - ccl::broadcast(&crossProductBuf[0], crossProductLen, ccl::datatype::uint8, ccl_root, comm).wait(); + if (rankId != ccl_root) { + crossProductBuf.resize(crossProductLen); + } - step2MasterResult = - NumericTable::cast(deserializeDAALObject(&crossProductBuf[0], crossProductLen)); + ccl::broadcast(&crossProductBuf[0], crossProductLen, + ccl::datatype::uint8, ccl_root, comm) + .wait(); - step3LocalResult = computeStep3Local(userOffset, usersPartialResultLocal, - userStep3LocalInput, nFactors); + step2MasterResult = NumericTable::cast( + deserializeDAALObject(&crossProductBuf[0], crossProductLen)); - /* MPI_Alltoallv to populate step4LocalInput */ - for (size_t i = 0; i < nBlocks; i++) { - serializeDAALObject((*step3LocalResult)[i].get(), nodeCPs[i]); - } - all2all(comm, nodeCPs, nBlocks, step4LocalInput); + step3LocalResult = computeStep3Local( + userOffset, usersPartialResultLocal, userStep3LocalInput, nFactors); - itemsPartialResultLocal = - computeStep4Local(dataTable, step2MasterResult, step4LocalInput, nFactors); + /* MPI_Alltoallv to populate step4LocalInput */ + for (size_t i = 0; i < nBlocks; i++) { + serializeDAALObject((*step3LocalResult)[i].get(), nodeCPs[i]); + } + all2all(comm, nodeCPs, nBlocks, step4LocalInput); + + itemsPartialResultLocal = computeStep4Local( + dataTable, step2MasterResult, step4LocalInput, nFactors); + + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(t2 - t1).count(); + std::cout << "ALS (native): iteration " << iteration << " took " + << duration << " secs" << std::endl; + } - auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "ALS (native): iteration " << iteration << " took " << duration - << " secs" << std::endl; - } - - auto tEnd = std::chrono::high_resolution_clock::now(); - auto durationTotal = - std::chrono::duration_cast(tEnd - tStart).count(); - std::cout << "ALS (native): trainModel took " << durationTotal << " secs" << std::endl; - - /*Gather all itemsPartialResultLocal to itemsPartialResultsMaster on the master and - * distributing the result over other ranks*/ - // serializeDAALObject(itemsPartialResultLocal.get(), nodeResults); - // gatherItems(nodeResults, nBlocks); - - // serializeDAALObject(usersPartialResultLocal.get(), nodeResults); - // gatherUsers(nodeResults, nBlocks); + auto tEnd = std::chrono::high_resolution_clock::now(); + auto durationTotal = + std::chrono::duration_cast(tEnd - tStart).count(); + std::cout << "ALS (native): trainModel took " << durationTotal << " secs" + << std::endl; } static size_t getOffsetFromOffsetTable(NumericTablePtr offsetTable) { - size_t ret; - BlockDescriptor block; - offsetTable->getBlockOfRows(0, 1, readOnly, block); - ret = (size_t)((block.getBlockPtr())[0]); - offsetTable->releaseBlockOfRows(block); + size_t ret; + BlockDescriptor block; + offsetTable->getBlockOfRows(0, 1, readOnly, block); + ret = (size_t)((block.getBlockPtr())[0]); + offsetTable->releaseBlockOfRows(block); - return ret; + return ret; } /* @@ -525,47 +453,42 @@ static size_t getOffsetFromOffsetTable(NumericTablePtr offsetTable) { * Signature: * (Ljava/nio/ByteBuffer;IILorg/apache/spark/ml/recommendation/ALSPartitionInfo;)Ljava/nio/ByteBuffer; */ -JNIEXPORT jobject JNICALL Java_org_apache_spark_ml_recommendation_ALSDALImpl_cShuffleData( - JNIEnv* env, jobject obj, jobject dataBuffer, jint nTotalKeys, jint nBlocks, +JNIEXPORT jobject JNICALL +Java_org_apache_spark_ml_recommendation_ALSDALImpl_cShuffleData( + JNIEnv *env, jobject obj, jobject dataBuffer, jint nTotalKeys, jint nBlocks, jobject infoObj) { - // cout << "cShuffleData: rank " << rankId << endl; - cout << "RATING_SIZE: " << RATING_SIZE << endl; - - ccl::communicator &comm = getComm(); - - jbyte* ratingsBuf = (jbyte*)env->GetDirectBufferAddress(dataBuffer); - - jlong ratingsNum = env->GetDirectBufferCapacity(dataBuffer) / RATING_SIZE; - - std::vector ratingPartitions(nBlocks); - - for (int i = 0; i < ratingsNum; i++) { - Rating* rating = (Rating*)(ratingsBuf + RATING_SIZE * i); - int partition = getPartiton(rating->user, nTotalKeys, nBlocks); - ratingPartitions[partition].push_back(*rating); - } - - // for (int i = 0; i < nBlocks; i++) { - // cout << "Partition " << i << endl; - // for (auto r : ratingPartitions[i]) { - // cout << r.user << " " << r.item << " " << r.rating << endl; - // } - // } - - size_t newRatingsNum = 0; - size_t newCsrRowNum = 0; - Rating* ratings = shuffle_all2all(comm, ratingPartitions, nBlocks, newRatingsNum, newCsrRowNum); - - // Get the class of the input object - jclass clazz = env->GetObjectClass(infoObj); - // Get Field references - jfieldID ratingsNumField = env->GetFieldID(clazz, "ratingsNum", "I"); - jfieldID csrRowNumField = env->GetFieldID(clazz, "csrRowNum", "I"); - - env->SetIntField(infoObj, ratingsNumField, newRatingsNum); - env->SetIntField(infoObj, csrRowNumField, newCsrRowNum); - - return env->NewDirectByteBuffer(ratings, newRatingsNum*RATING_SIZE); + // cout << "cShuffleData: rank " << rankId << endl; + cout << "RATING_SIZE: " << RATING_SIZE << endl; + + ccl::communicator &comm = getComm(); + + jbyte *ratingsBuf = (jbyte *)env->GetDirectBufferAddress(dataBuffer); + + jlong ratingsNum = env->GetDirectBufferCapacity(dataBuffer) / RATING_SIZE; + + std::vector ratingPartitions(nBlocks); + + for (int i = 0; i < ratingsNum; i++) { + Rating *rating = (Rating *)(ratingsBuf + RATING_SIZE * i); + int partition = getPartiton(rating->user, nTotalKeys, nBlocks); + ratingPartitions[partition].push_back(*rating); + } + + size_t newRatingsNum = 0; + size_t newCsrRowNum = 0; + Rating *ratings = shuffle_all2all(comm, ratingPartitions, nBlocks, + newRatingsNum, newCsrRowNum); + + // Get the class of the input object + jclass clazz = env->GetObjectClass(infoObj); + // Get Field references + jfieldID ratingsNumField = env->GetFieldID(clazz, "ratingsNum", "I"); + jfieldID csrRowNumField = env->GetFieldID(clazz, "csrRowNum", "I"); + + env->SetIntField(infoObj, ratingsNumField, newRatingsNum); + env->SetIntField(infoObj, csrRowNumField, newCsrRowNum); + + return env->NewDirectByteBuffer(ratings, newRatingsNum * RATING_SIZE); } /* @@ -574,95 +497,80 @@ JNIEXPORT jobject JNICALL Java_org_apache_spark_ml_recommendation_ALSDALImpl_cSh * Signature: (JJIIDDIIILorg/apache/spark/ml/recommendation/ALSResult;)J */ -JNIEXPORT jlong JNICALL Java_org_apache_spark_ml_recommendation_ALSDALImpl_cDALImplictALS( - JNIEnv* env, jobject obj, jlong numTableAddr, jlong nUsers, jint nFactors, - jint maxIter, jdouble regParam, jdouble alpha, jint executor_num, jint executor_cores, - jint partitionId, jobject resultObj) { - - ccl::communicator &comm = getComm(); - size_t rankId = comm.rank(); - - dataTable = *((CSRNumericTablePtr*)numTableAddr); - // dataTable.reset(createFloatSparseTable("/home/xiaochang/github/oneDAL-upstream/samples/daal/cpp/mpi/data/distributed/implicit_als_csr_1.csv")); - - // printNumericTable(dataTable, "cDALImplictALS", 10); - cout << "ALS (native): Input info: " << endl; - cout << "- NumberOfRows: " << dataTable->getNumberOfRows() << endl; - cout << "- NumberOfColumns: " << dataTable->getNumberOfColumns() << endl; - cout << "- NumberOfRatings: " << dataTable->getDataSize() << endl; - cout << "- fullNUsers: " << nUsers << endl; - cout << "- nFactors: " << nFactors << endl; - - // Set number of threads for oneDAL to use for each rank - services::Environment::getInstance()->setNumberOfThreads(executor_cores); - int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - cout << "oneDAL (native): Number of threads used: " << nThreadsNew << endl; - - int nBlocks = executor_num; - initializeModel(rankId, comm, partitionId, nBlocks, nUsers, nFactors); - trainModel(rankId, comm, partitionId, executor_num, nFactors, maxIter); - - auto pUser = - usersPartialResultLocal->get(training::outputOfStep4ForStep1)->getFactors(); - // auto pUserIndices = - // usersPartialResultLocal->get(training::outputOfStep4ForStep1)->getIndices(); - auto pItem = - itemsPartialResultLocal->get(training::outputOfStep4ForStep1)->getFactors(); - // auto pItemIndices = - // itemsPartialResultsMaster[i]->get(training::outputOfStep4ForStep1)->getIndices(); - - std::cout << "\n=== Results for Rank " << rankId << "===\n" << std::endl; - // std::cout << "Partition ID: " << partitionId << std::endl; - printNumericTable(pUser, "User Factors (first 10 rows x 20 columns):", 10, 20); - printNumericTable(pItem, "Item Factors (first 10 rows x 20 columns):", 10, 20); - std::cout << "User Offset: " << getOffsetFromOffsetTable(userOffset) << std::endl; - std::cout << "Item Offset: " << getOffsetFromOffsetTable(itemOffset) << std::endl; - std::cout << std::endl; - - // printNumericTable(userOffset, "userOffset"); - // printNumericTable(itemOffset, "itemOffset"); - - // if (rankId == ccl_root) { - // for (int i = 0; i < nBlocks; i++) { - // printNumericTable(NumericTable::cast((*userOffsetsOnMaster)[i]), - // "userOffsetsOnMaster"); - // } - - // for (int i = 0; i < nBlocks; i++) { - // printNumericTable(NumericTable::cast((*itemOffsetsOnMaster)[i]), - // "itemOffsetsOnMaster"); - // } - // } - - // printf("native pUser %ld, pItem %ld", (jlong)&pUser, (jlong)&pItem); - - // Get the class of the input object - jclass clazz = env->GetObjectClass(resultObj); - - // Fill in rankId - jfieldID cRankIdField = env->GetFieldID(clazz, "rankId", "J"); - env->SetLongField(resultObj, cRankIdField, (jlong)rankId); - - // Fill in cUsersFactorsNumTab & cItemsFactorsNumTab - // Get Field references - jfieldID cUsersFactorsNumTabField = env->GetFieldID(clazz, "cUsersFactorsNumTab", "J"); - jfieldID cItemsFactorsNumTabField = env->GetFieldID(clazz, "cItemsFactorsNumTab", "J"); - // Set factors as result, should use heap memory - NumericTablePtr* retUser = new NumericTablePtr(pUser); - NumericTablePtr* retItem = new NumericTablePtr(pItem); - env->SetLongField(resultObj, cUsersFactorsNumTabField, (jlong)retUser); - env->SetLongField(resultObj, cItemsFactorsNumTabField, (jlong)retItem); - - // Fill in cUserOffset & cItemOffset - jfieldID cUserOffsetField = env->GetFieldID(clazz, "cUserOffset", "J"); - assert(cUserOffsetField != NULL); - env->SetLongField(resultObj, cUserOffsetField, - (jlong)getOffsetFromOffsetTable(userOffset)); - - jfieldID cItemOffsetField = env->GetFieldID(clazz, "cItemOffset", "J"); - assert(cItemOffsetField != NULL); - env->SetLongField(resultObj, cItemOffsetField, - (jlong)getOffsetFromOffsetTable(itemOffset)); - - return 0; +JNIEXPORT jlong JNICALL +Java_org_apache_spark_ml_recommendation_ALSDALImpl_cDALImplictALS( + JNIEnv *env, jobject obj, jlong numTableAddr, jlong nUsers, jint nFactors, + jint maxIter, jdouble regParam, jdouble alpha, jint executor_num, + jint executor_cores, jint partitionId, jobject resultObj) { + + ccl::communicator &comm = getComm(); + size_t rankId = comm.rank(); + + dataTable = *((CSRNumericTablePtr *)numTableAddr); + + cout << "ALS (native): Input info: " << endl; + cout << "- NumberOfRows: " << dataTable->getNumberOfRows() << endl; + cout << "- NumberOfColumns: " << dataTable->getNumberOfColumns() << endl; + cout << "- NumberOfRatings: " << dataTable->getDataSize() << endl; + cout << "- fullNUsers: " << nUsers << endl; + cout << "- nFactors: " << nFactors << endl; + + // Set number of threads for oneDAL to use for each rank + services::Environment::getInstance()->setNumberOfThreads(executor_cores); + int nThreadsNew = + services::Environment::getInstance()->getNumberOfThreads(); + cout << "oneDAL (native): Number of CPU threads used: " << nThreadsNew + << endl; + + int nBlocks = executor_num; + initializeModel(rankId, comm, partitionId, nBlocks, nUsers, nFactors); + trainModel(rankId, comm, partitionId, executor_num, nFactors, maxIter); + + auto pUser = usersPartialResultLocal->get(training::outputOfStep4ForStep1) + ->getFactors(); + auto pItem = itemsPartialResultLocal->get(training::outputOfStep4ForStep1) + ->getFactors(); + + std::cout << "\n=== Results for Rank " << rankId << "===\n" << std::endl; + printNumericTable(pUser, "User Factors (first 10 rows x 20 columns):", 10, + 20); + printNumericTable(pItem, "Item Factors (first 10 rows x 20 columns):", 10, + 20); + std::cout << "User Offset: " << getOffsetFromOffsetTable(userOffset) + << std::endl; + std::cout << "Item Offset: " << getOffsetFromOffsetTable(itemOffset) + << std::endl; + std::cout << std::endl; + + // Get the class of the input object + jclass clazz = env->GetObjectClass(resultObj); + + // Fill in rankId + jfieldID cRankIdField = env->GetFieldID(clazz, "rankId", "J"); + env->SetLongField(resultObj, cRankIdField, (jlong)rankId); + + // Fill in cUsersFactorsNumTab & cItemsFactorsNumTab + // Get Field references + jfieldID cUsersFactorsNumTabField = + env->GetFieldID(clazz, "cUsersFactorsNumTab", "J"); + jfieldID cItemsFactorsNumTabField = + env->GetFieldID(clazz, "cItemsFactorsNumTab", "J"); + // Set factors as result, should use heap memory + NumericTablePtr *retUser = new NumericTablePtr(pUser); + NumericTablePtr *retItem = new NumericTablePtr(pItem); + env->SetLongField(resultObj, cUsersFactorsNumTabField, (jlong)retUser); + env->SetLongField(resultObj, cItemsFactorsNumTabField, (jlong)retItem); + + // Fill in cUserOffset & cItemOffset + jfieldID cUserOffsetField = env->GetFieldID(clazz, "cUserOffset", "J"); + assert(cUserOffsetField != NULL); + env->SetLongField(resultObj, cUserOffsetField, + (jlong)getOffsetFromOffsetTable(userOffset)); + + jfieldID cItemOffsetField = env->GetFieldID(clazz, "cItemOffset", "J"); + assert(cItemOffsetField != NULL); + env->SetLongField(resultObj, cItemOffsetField, + (jlong)getOffsetFromOffsetTable(itemOffset)); + + return 0; } diff --git a/mllib-dal/src/main/native/ALSShuffle.cpp b/mllib-dal/src/main/native/ALSShuffle.cpp index 73440d253..313a0c393 100644 --- a/mllib-dal/src/main/native/ALSShuffle.cpp +++ b/mllib-dal/src/main/native/ALSShuffle.cpp @@ -1,9 +1,25 @@ +/******************************************************************************* + * Copyright 2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#include #include #include -#include -#include -#include #include +#include +#include #include "ALSShuffle.h" @@ -13,19 +29,18 @@ std::vector recvData; jlong getPartiton(jlong key, jlong totalKeys, long nBlocks) { - jlong itemsInBlock = totalKeys / nBlocks; + jlong itemsInBlock = totalKeys / nBlocks; - return min(key / itemsInBlock, nBlocks - 1); + return min(key / itemsInBlock, nBlocks - 1); } -// Compares two Rating according to userId. -bool compareRatingByUser(Rating r1, Rating r2) -{ - if (r1.user < r2.user) - return true; - if (r1.user == r2.user && r1.item < r2.item) - return true; - return false; +// Compares two Rating according to userId. +bool compareRatingByUser(Rating r1, Rating r2) { + if (r1.user < r2.user) + return true; + if (r1.user == r2.user && r1.item < r2.item) + return true; + return false; } bool compareRatingUserEquality(Rating &r1, Rating &r2) { @@ -33,70 +48,80 @@ bool compareRatingUserEquality(Rating &r1, Rating &r2) { } int distinct_count(std::vector &data) { - long curUser = -1; - long count = 0; - for (auto i : data) { - if (i.user > curUser) { - curUser = i.user; - count += 1; - } - } - return count; + long curUser = -1; + long count = 0; + for (auto i : data) { + if (i.user > curUser) { + curUser = i.user; + count += 1; + } + } + return count; } -Rating * shuffle_all2all(ccl::communicator &comm, std::vector &partitions, size_t nBlocks, size_t &newRatingsNum, size_t &newCsrRowNum) { - size_t sendBufSize = 0; - size_t recvBufSize = 0; - vector perNodeSendLens(nBlocks); - vector perNodeRecvLens(nBlocks); - - ByteBuffer sendData; - - // Calculate send buffer size - for (size_t i = 0; i < nBlocks; i++) { - perNodeSendLens[i] = partitions[i].size() * RATING_SIZE; - // cout << "rank " << rankId << " Send partition " << i << " size " << perNodeSendLens[i] << endl; - sendBufSize += perNodeSendLens[i]; - } - cout << "sendData size " << sendBufSize << endl; - sendData.resize(sendBufSize); - - // Fill in send buffer - size_t offset = 0; - for (size_t i = 0; i < nBlocks; i++) - { - memcpy(sendData.data()+offset, partitions[i].data(), perNodeSendLens[i]); - offset += perNodeSendLens[i]; - } - - // Send lens first - ccl::alltoall(perNodeSendLens.data(), perNodeRecvLens.data(), sizeof(size_t), ccl::datatype::uint8, comm).wait(); - - // Calculate recv buffer size - for (size_t i = 0; i < nBlocks; i++) { - // cout << "rank " << rankId << " Recv partition " << i << " size " << perNodeRecvLens[i] << endl; - recvBufSize += perNodeRecvLens[i]; - } - - int ratingsNum = recvBufSize / RATING_SIZE; - recvData.resize(ratingsNum); - - // Send data - ccl::alltoallv(sendData.data(), perNodeSendLens, recvData.data(), perNodeRecvLens, ccl::datatype::uint8, comm).wait(); - - sort(recvData.begin(), recvData.end(), compareRatingByUser); - - // for (auto r : recvData) { - // cout << r.user << " " << r.item << " " << r.rating << endl; - // } - - newRatingsNum = recvData.size(); - // RatingPartition::iterator iter = std::unique(recvData.begin(), recvData.end(), compareRatingUserEquality); - // newCsrRowNum = std::distance(recvData.begin(), iter); - newCsrRowNum = distinct_count(recvData); - - cout << "newRatingsNum: " << newRatingsNum << " newCsrRowNum: " << newCsrRowNum << endl; - - return recvData.data(); +Rating *shuffle_all2all(ccl::communicator &comm, + std::vector &partitions, + size_t nBlocks, size_t &newRatingsNum, + size_t &newCsrRowNum) { + size_t sendBufSize = 0; + size_t recvBufSize = 0; + vector perNodeSendLens(nBlocks); + vector perNodeRecvLens(nBlocks); + + ByteBuffer sendData; + + // Calculate send buffer size + for (size_t i = 0; i < nBlocks; i++) { + perNodeSendLens[i] = partitions[i].size() * RATING_SIZE; + // cout << "rank " << rankId << " Send partition " << i << " size " << + // perNodeSendLens[i] << endl; + sendBufSize += perNodeSendLens[i]; + } + cout << "sendData size " << sendBufSize << endl; + sendData.resize(sendBufSize); + + // Fill in send buffer + size_t offset = 0; + for (size_t i = 0; i < nBlocks; i++) { + memcpy(sendData.data() + offset, partitions[i].data(), + perNodeSendLens[i]); + offset += perNodeSendLens[i]; + } + + // Send lens first + ccl::alltoall(perNodeSendLens.data(), perNodeRecvLens.data(), + sizeof(size_t), ccl::datatype::uint8, comm) + .wait(); + + // Calculate recv buffer size + for (size_t i = 0; i < nBlocks; i++) { + // cout << "rank " << rankId << " Recv partition " << i << " size " << + // perNodeRecvLens[i] << endl; + recvBufSize += perNodeRecvLens[i]; + } + + int ratingsNum = recvBufSize / RATING_SIZE; + recvData.resize(ratingsNum); + + // Send data + ccl::alltoallv(sendData.data(), perNodeSendLens, recvData.data(), + perNodeRecvLens, ccl::datatype::uint8, comm) + .wait(); + + sort(recvData.begin(), recvData.end(), compareRatingByUser); + + // for (auto r : recvData) { + // cout << r.user << " " << r.item << " " << r.rating << endl; + // } + + newRatingsNum = recvData.size(); + // RatingPartition::iterator iter = std::unique(recvData.begin(), + // recvData.end(), compareRatingUserEquality); newCsrRowNum = + // std::distance(recvData.begin(), iter); + newCsrRowNum = distinct_count(recvData); + + cout << "newRatingsNum: " << newRatingsNum + << " newCsrRowNum: " << newCsrRowNum << endl; + + return recvData.data(); } - diff --git a/mllib-dal/src/main/native/ALSShuffle.h b/mllib-dal/src/main/native/ALSShuffle.h index dbe864978..62238586a 100644 --- a/mllib-dal/src/main/native/ALSShuffle.h +++ b/mllib-dal/src/main/native/ALSShuffle.h @@ -1,11 +1,28 @@ +/******************************************************************************* + * Copyright 2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + #pragma once #include +#include struct Rating { - jlong user; - jlong item; - jfloat rating; + jlong user; + jlong item; + jfloat rating; } __attribute__((packed)); const int RATING_SIZE = sizeof(Rating); @@ -14,4 +31,6 @@ typedef std::vector ByteBuffer; typedef std::vector RatingPartition; jlong getPartiton(jlong key, jlong totalKeys, long nBlocks); -Rating * shuffle_all2all(ccl::communicator &comm, std::vector &partitions, size_t nBlocks, size_t &ratingsNum, size_t &csrRowNum); +Rating *shuffle_all2all(ccl::communicator &comm, + std::vector &partitions, + size_t nBlocks, size_t &ratingsNum, size_t &csrRowNum); diff --git a/mllib-dal/src/main/native/KMeansDALImpl.cpp b/mllib-dal/src/main/native/KMeansDALImpl.cpp index d9c7a2f29..db8db80b1 100644 --- a/mllib-dal/src/main/native/KMeansDALImpl.cpp +++ b/mllib-dal/src/main/native/KMeansDALImpl.cpp @@ -1,27 +1,27 @@ /******************************************************************************* -* Copyright 2020 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*******************************************************************************/ + * Copyright 2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ -#include +#include #include #include -#include +#include -#include "service.h" -#include "org_apache_spark_ml_clustering_KMeansDALImpl.h" #include "OneCCL.h" +#include "org_apache_spark_ml_clustering_KMeansDALImpl.h" +#include "service.h" using namespace std; using namespace daal; @@ -31,30 +31,36 @@ const int ccl_root = 0; typedef double algorithmFPType; /* Algorithm floating-point type */ -static NumericTablePtr kmeans_compute(int rankId, ccl::communicator &comm, - const NumericTablePtr & pData, const NumericTablePtr & initialCentroids, - size_t nClusters, size_t nBlocks, algorithmFPType &ret_cost) -{ - const bool isRoot = (rankId == ccl_root); +static NumericTablePtr kmeans_compute(int rankId, ccl::communicator &comm, + const NumericTablePtr &pData, + const NumericTablePtr &initialCentroids, + size_t nClusters, size_t nBlocks, + algorithmFPType &ret_cost) { + const bool isRoot = (rankId == ccl_root); size_t CentroidsArchLength = 0; InputDataArchive inputArch; - if (isRoot) - { - /*Retrieve the algorithm results and serialize them */ + if (isRoot) { + /* Retrieve the algorithm results and serialize them */ initialCentroids->serialize(inputArch); CentroidsArchLength = inputArch.getSizeOfArchive(); } /* Get partial results from the root node */ - ccl::broadcast(&CentroidsArchLength, sizeof(size_t), ccl::datatype::uint8, ccl_root, comm).wait(); + ccl::broadcast(&CentroidsArchLength, sizeof(size_t), ccl::datatype::uint8, + ccl_root, comm) + .wait(); ByteBuffer nodeCentroids(CentroidsArchLength); - if (isRoot) inputArch.copyArchiveToArray(&nodeCentroids[0], CentroidsArchLength); + if (isRoot) + inputArch.copyArchiveToArray(&nodeCentroids[0], CentroidsArchLength); - ccl::broadcast(&nodeCentroids[0], CentroidsArchLength, ccl::datatype::uint8, ccl_root, comm).wait(); + ccl::broadcast(&nodeCentroids[0], CentroidsArchLength, ccl::datatype::uint8, + ccl_root, comm) + .wait(); /* Deserialize centroids data */ - OutputDataArchive outArch(nodeCentroids.size() ? &nodeCentroids[0] : NULL, CentroidsArchLength); + OutputDataArchive outArch(nodeCentroids.size() ? &nodeCentroids[0] : NULL, + CentroidsArchLength); NumericTablePtr centroids(new HomogenNumericTable()); @@ -76,10 +82,10 @@ static NumericTablePtr kmeans_compute(int rankId, ccl::communicator &comm, size_t perNodeArchLength = dataArch.getSizeOfArchive(); ByteBuffer serializedData; - /* Serialized data is of equal size on each node if each node called compute() equal number of times */ + /* Serialized data is of equal size on each node if each node called + * compute() equal number of times */ vector recvCounts(nBlocks); - for (size_t i = 0; i < nBlocks; i++) - { + for (size_t i = 0; i < nBlocks; i++) { recvCounts[i] = perNodeArchLength; } serializedData.resize(perNodeArchLength * nBlocks); @@ -88,30 +94,37 @@ static NumericTablePtr kmeans_compute(int rankId, ccl::communicator &comm, dataArch.copyArchiveToArray(&nodeResults[0], perNodeArchLength); /* Transfer partial results to step 2 on the root node */ - ccl::allgatherv(&nodeResults[0], perNodeArchLength, &serializedData[0], recvCounts, ccl::datatype::uint8, comm).wait(); + ccl::allgatherv(&nodeResults[0], perNodeArchLength, &serializedData[0], + recvCounts, ccl::datatype::uint8, comm) + .wait(); - if (isRoot) - { + if (isRoot) { /* Create an algorithm to compute k-means on the master node */ - kmeans::Distributed masterAlgorithm(nClusters); + kmeans::Distributed masterAlgorithm( + nClusters); - for (size_t i = 0; i < nBlocks; i++) - { + for (size_t i = 0; i < nBlocks; i++) { /* Deserialize partial results from step 1 */ - OutputDataArchive dataArch(&serializedData[perNodeArchLength * i], perNodeArchLength); + OutputDataArchive dataArch(&serializedData[perNodeArchLength * i], + perNodeArchLength); - kmeans::PartialResultPtr dataForStep2FromStep1(new kmeans::PartialResult()); + kmeans::PartialResultPtr dataForStep2FromStep1( + new kmeans::PartialResult()); dataForStep2FromStep1->deserialize(dataArch); - /* Set local partial results as input for the master-node algorithm */ - masterAlgorithm.input.add(kmeans::partialResults, dataForStep2FromStep1); + /* Set local partial results as input for the master-node algorithm + */ + masterAlgorithm.input.add(kmeans::partialResults, + dataForStep2FromStep1); } /* Merge and finalizeCompute k-means on the master node */ masterAlgorithm.compute(); masterAlgorithm.finalizeCompute(); - ret_cost = masterAlgorithm.getResult()->get(kmeans::objectiveFunction)->getValue(0, 0); + ret_cost = masterAlgorithm.getResult() + ->get(kmeans::objectiveFunction) + ->getValue(0, 0); /* Retrieve the algorithm results */ return masterAlgorithm.getResult()->get(kmeans::centroids); @@ -119,7 +132,9 @@ static NumericTablePtr kmeans_compute(int rankId, ccl::communicator &comm, return NumericTablePtr(); } -static bool isCenterConverged(const algorithmFPType *oldCenter, const algorithmFPType *newCenter, size_t dim, double tolerance) { +static bool isCenterConverged(const algorithmFPType *oldCenter, + const algorithmFPType *newCenter, size_t dim, + double tolerance) { algorithmFPType sums = 0.0; @@ -129,7 +144,9 @@ static bool isCenterConverged(const algorithmFPType *oldCenter, const algorithmF return sums <= tolerance * tolerance; } -static bool areAllCentersConverged(const NumericTablePtr & oldCenters, const NumericTablePtr &newCenters, double tolerance) { +static bool areAllCentersConverged(const NumericTablePtr &oldCenters, + const NumericTablePtr &newCenters, + double tolerance) { size_t rows = oldCenters->getNumberOfRows(); size_t cols = oldCenters->getNumberOfColumns(); @@ -142,9 +159,8 @@ static bool areAllCentersConverged(const NumericTablePtr & oldCenters, const Num algorithmFPType *arrayNewCenters = blockNewCenters.getBlockPtr(); for (size_t i = 0; i < rows; i++) { - if (!isCenterConverged(&arrayOldCenters[i*cols], - &arrayNewCenters[i*cols], - cols, tolerance)) + if (!isCenterConverged(&arrayOldCenters[i * cols], + &arrayNewCenters[i * cols], cols, tolerance)) return false; } @@ -156,69 +172,78 @@ static bool areAllCentersConverged(const NumericTablePtr & oldCenters, const Num * Method: cKMeansDALComputeWithInitCenters * Signature: (JJIDIIILorg/apache/spark/ml/clustering/KMeansResult;)J */ -JNIEXPORT jlong JNICALL Java_org_apache_spark_ml_clustering_KMeansDALImpl_cKMeansDALComputeWithInitCenters - (JNIEnv *env, jobject obj, - jlong pNumTabData, jlong pNumTabCenters, - jint cluster_num, jdouble tolerance, jint iteration_num, - jint executor_num, jint executor_cores, - jobject resultObj) { +JNIEXPORT jlong JNICALL +Java_org_apache_spark_ml_clustering_KMeansDALImpl_cKMeansDALComputeWithInitCenters( + JNIEnv *env, jobject obj, jlong pNumTabData, jlong pNumTabCenters, + jint cluster_num, jdouble tolerance, jint iteration_num, jint executor_num, + jint executor_cores, jobject resultObj) { - ccl::communicator &comm = getComm(); - size_t rankId = comm.rank(); + ccl::communicator &comm = getComm(); + size_t rankId = comm.rank(); - NumericTablePtr pData = *((NumericTablePtr *)pNumTabData); - NumericTablePtr centroids = *((NumericTablePtr *)pNumTabCenters); + NumericTablePtr pData = *((NumericTablePtr *)pNumTabData); + NumericTablePtr centroids = *((NumericTablePtr *)pNumTabCenters); - // Set number of threads for oneDAL to use for each rank - services::Environment::getInstance()->setNumberOfThreads(executor_cores); + // Set number of threads for oneDAL to use for each rank + services::Environment::getInstance()->setNumberOfThreads(executor_cores); - int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - cout << "oneDAL (native): Number of threads used: " << nThreadsNew << endl; + int nThreadsNew = + services::Environment::getInstance()->getNumberOfThreads(); + cout << "oneDAL (native): Number of CPU threads used: " << nThreadsNew + << endl; - algorithmFPType totalCost; + algorithmFPType totalCost; - NumericTablePtr newCentroids; - bool converged = false; + NumericTablePtr newCentroids; + bool converged = false; - int it = 0; - for (it = 0; it < iteration_num && !converged; it++) { - auto t1 = std::chrono::high_resolution_clock::now(); + int it = 0; + for (it = 0; it < iteration_num && !converged; it++) { + auto t1 = std::chrono::high_resolution_clock::now(); - newCentroids = kmeans_compute(rankId, comm, pData, centroids, cluster_num, executor_num, totalCost); + newCentroids = kmeans_compute(rankId, comm, pData, centroids, + cluster_num, executor_num, totalCost); - if (rankId == ccl_root) { - converged = areAllCentersConverged(centroids, newCentroids, tolerance); + if (rankId == ccl_root) { + converged = + areAllCentersConverged(centroids, newCentroids, tolerance); + } + + // Sync converged status + ccl::broadcast(&converged, 1, ccl::datatype::uint8, ccl_root, comm) + .wait(); + + centroids = newCentroids; + + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(t2 - t1).count(); + std::cout << "KMeans (native): iteration " << it << " took " << duration + << " secs" << std::endl; } - // Sync converged status - ccl::broadcast(&converged, 1, ccl::datatype::uint8, ccl_root, comm).wait(); - - centroids = newCentroids; - - auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast( t2 - t1 ).count(); - std::cout << "KMeans (native): iteration " << it << " took " << duration << " secs" << std::endl; - } - - if (rankId == ccl_root) { - if (it == iteration_num) - std::cout << "KMeans (native): reached " << iteration_num << " max iterations." << std::endl; - else - std::cout << "KMeans (native): converged in " << it << " iterations." << std::endl; - - // Get the class of the input object - jclass clazz = env->GetObjectClass(resultObj); - // Get Field references - jfieldID totalCostField = env->GetFieldID(clazz, "totalCost", "D"); - jfieldID iterationNumField = env->GetFieldID(clazz, "iterationNum", "I"); - - // Set iteration num for result - env->SetIntField(resultObj, iterationNumField, it); - // Set cost for result - env->SetDoubleField(resultObj, totalCostField, totalCost); - - NumericTablePtr *ret = new NumericTablePtr(centroids); - return (jlong)ret; - } else - return (jlong)0; -} \ No newline at end of file + if (rankId == ccl_root) { + if (it == iteration_num) + std::cout << "KMeans (native): reached " << iteration_num + << " max iterations." << std::endl; + else + std::cout << "KMeans (native): converged in " << it + << " iterations." << std::endl; + + // Get the class of the input object + jclass clazz = env->GetObjectClass(resultObj); + // Get Field references + jfieldID totalCostField = env->GetFieldID(clazz, "totalCost", "D"); + jfieldID iterationNumField = + env->GetFieldID(clazz, "iterationNum", "I"); + + // Set iteration num for result + env->SetIntField(resultObj, iterationNumField, it); + // Set cost for result + env->SetDoubleField(resultObj, totalCostField, totalCost); + + NumericTablePtr *ret = new NumericTablePtr(centroids); + return (jlong)ret; + } else + return (jlong)0; +} diff --git a/mllib-dal/src/main/native/Makefile b/mllib-dal/src/main/native/Makefile index e3a7e2161..06b8e0c13 100644 --- a/mllib-dal/src/main/native/Makefile +++ b/mllib-dal/src/main/native/Makefile @@ -21,7 +21,6 @@ CFLAGS := -g -Wall -Wno-deprecated-declarations -fPIC -std=c++11 # The following paths setting works for self-built libs from source code # https://github.com/oneapi-src/oneCCL. If oneCCL package in oneAPI Toolkit is used, # Should change paths to ${CCL_ROOT}/{include,lib}/cpu_icc instead - INCS := -I $(JAVA_HOME)/include \ -I $(JAVA_HOME)/include/linux \ -I ${CCL_ROOT}/include \ @@ -30,18 +29,21 @@ INCS := -I $(JAVA_HOME)/include \ -I ./ # Use static link if possible, TBB is only available as dynamic libs - LIBS := -L${CCL_ROOT}/lib -lccl \ -L$(DAALROOT)/lib/intel64 -l:libdaal_core.a -l:libdaal_thread.a \ -L$(TBBROOT)/lib/intel64/gcc4.8 -ltbb -ltbbmalloc -# TODO: Add signal chaining support, should fix linking, package so and loading -# -L$(JAVA_HOME)/jre/lib/amd64 -ljsig CPP_SRCS += \ -./OneCCL.cpp ./OneDAL.cpp ./KMeansDALImpl.cpp ./PCADALImpl.cpp ./ALSDALImpl.cpp ./ALSShuffle.cpp ./service.cpp ./error_handling.cpp + ./OneCCL.cpp ./OneDAL.cpp ./service.cpp ./error_handling.cpp \ + ./KMeansDALImpl.cpp \ + ./PCADALImpl.cpp \ + ./ALSDALImpl.cpp ./ALSShuffle.cpp OBJS += \ -./OneCCL.o ./OneDAL.o ./KMeansDALImpl.o ./PCADALImpl.o ./ALSDALImpl.o ./ALSShuffle.o ./service.o ./error_handling.o + ./OneCCL.o ./OneDAL.o ./service.o ./error_handling.o \ + ./KMeansDALImpl.o \ + ./PCADALImpl.o \ + ./ALSDALImpl.o ./ALSShuffle.o # Output Binary OUTPUT = ../../../target/libMLlibDAL.so diff --git a/mllib-dal/src/main/native/OneCCL.cpp b/mllib-dal/src/main/native/OneCCL.cpp index c733c7b33..9b1aa6e2c 100644 --- a/mllib-dal/src/main/native/OneCCL.cpp +++ b/mllib-dal/src/main/native/OneCCL.cpp @@ -1,65 +1,88 @@ -#include +/******************************************************************************* + * Copyright 2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + #include +#include #include #include #include #include -#include #include +#include #include #include +#include "OneCCL.h" #include "org_apache_spark_ml_util_OneCCL__.h" -// todo: fill initial comm_size and rank_id -size_t comm_size; -size_t rank_id; +static const int CCL_IP_LEN = 128; +static std::list local_host_ips; +static size_t comm_size = 0; +static size_t rank_id = 0; +static std::vector g_comms; -std::vector g_comms; +ccl::communicator &getComm() { return g_comms[0]; } -ccl::communicator &getComm() { - return g_comms[0]; -} +/* + * Class: org_apache_spark_ml_util_OneCCL__ + * Method: c_init + * Signature: (IILjava/lang/String;Lorg/apache/spark/ml/util/CCLParam;)I + */ +JNIEXPORT jint JNICALL Java_org_apache_spark_ml_util_OneCCL_00024_c_1init( + JNIEnv *env, jobject obj, jint size, jint rank, jstring ip_port, + jobject param) { -JNIEXPORT jint JNICALL Java_org_apache_spark_ml_util_OneCCL_00024_c_1init - (JNIEnv *env, jobject obj, jint size, jint rank, jstring ip_port, jobject param) { - - std::cerr << "OneCCL (native): init" << std::endl; + std::cerr << "OneCCL (native): init" << std::endl; - auto t1 = std::chrono::high_resolution_clock::now(); + auto t1 = std::chrono::high_resolution_clock::now(); - ccl::init(); + ccl::init(); - const char *str = env->GetStringUTFChars(ip_port, 0); - ccl::string ccl_ip_port(str); + const char *str = env->GetStringUTFChars(ip_port, 0); + ccl::string ccl_ip_port(str); - auto kvs_attr = ccl::create_kvs_attr(); - kvs_attr.set(ccl_ip_port); + auto kvs_attr = ccl::create_kvs_attr(); + kvs_attr.set(ccl_ip_port); - ccl::shared_ptr_class kvs; - kvs = ccl::create_main_kvs(kvs_attr); + ccl::shared_ptr_class kvs; + kvs = ccl::create_main_kvs(kvs_attr); - g_comms.push_back(ccl::create_communicator(size, rank, kvs)); + g_comms.push_back(ccl::create_communicator(size, rank, kvs)); - auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast( t2 - t1 ).count(); - std::cerr << "OneCCL (native): init took " << duration << " secs" << std::endl; + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(t2 - t1).count(); + std::cerr << "OneCCL (native): init took " << duration << " secs" + << std::endl; - rank_id = getComm().rank(); - comm_size = getComm().size(); + rank_id = getComm().rank(); + comm_size = getComm().size(); - jclass cls = env->GetObjectClass(param); - jfieldID fid_comm_size = env->GetFieldID(cls, "commSize", "J"); - jfieldID fid_rank_id = env->GetFieldID(cls, "rankId", "J"); + jclass cls = env->GetObjectClass(param); + jfieldID fid_comm_size = env->GetFieldID(cls, "commSize", "J"); + jfieldID fid_rank_id = env->GetFieldID(cls, "rankId", "J"); - env->SetLongField(param, fid_comm_size, comm_size); - env->SetLongField(param, fid_rank_id, rank_id); - env->ReleaseStringUTFChars(ip_port, str); + env->SetLongField(param, fid_comm_size, comm_size); + env->SetLongField(param, fid_rank_id, rank_id); + env->ReleaseStringUTFChars(ip_port, str); - return 1; + return 1; } /* @@ -67,13 +90,12 @@ JNIEXPORT jint JNICALL Java_org_apache_spark_ml_util_OneCCL_00024_c_1init * Method: c_cleanup * Signature: ()V */ -JNIEXPORT void JNICALL Java_org_apache_spark_ml_util_OneCCL_00024_c_1cleanup - (JNIEnv *env, jobject obj) { - - g_comms.pop_back(); +JNIEXPORT void JNICALL Java_org_apache_spark_ml_util_OneCCL_00024_c_1cleanup( + JNIEnv *env, jobject obj) { - std::cerr << "OneCCL (native): cleanup" << std::endl; + g_comms.pop_back(); + std::cerr << "OneCCL (native): cleanup" << std::endl; } /* @@ -81,8 +103,8 @@ JNIEXPORT void JNICALL Java_org_apache_spark_ml_util_OneCCL_00024_c_1cleanup * Method: isRoot * Signature: ()Z */ -JNIEXPORT jboolean JNICALL Java_org_apache_spark_ml_util_OneCCL_00024_isRoot - (JNIEnv *env, jobject obj) { +JNIEXPORT jboolean JNICALL +Java_org_apache_spark_ml_util_OneCCL_00024_isRoot(JNIEnv *env, jobject obj) { return getComm().rank() == 0; } @@ -92,8 +114,8 @@ JNIEXPORT jboolean JNICALL Java_org_apache_spark_ml_util_OneCCL_00024_isRoot * Method: rankID * Signature: ()I */ -JNIEXPORT jint JNICALL Java_org_apache_spark_ml_util_OneCCL_00024_rankID - (JNIEnv *env, jobject obj) { +JNIEXPORT jint JNICALL +Java_org_apache_spark_ml_util_OneCCL_00024_rankID(JNIEnv *env, jobject obj) { return getComm().rank(); } @@ -102,11 +124,11 @@ JNIEXPORT jint JNICALL Java_org_apache_spark_ml_util_OneCCL_00024_rankID * Method: setEnv * Signature: (Ljava/lang/String;Ljava/lang/String;Z)I */ -JNIEXPORT jint JNICALL Java_org_apache_spark_ml_util_OneCCL_00024_setEnv - (JNIEnv *env , jobject obj, jstring key, jstring value, jboolean overwrite) { +JNIEXPORT jint JNICALL Java_org_apache_spark_ml_util_OneCCL_00024_setEnv( + JNIEnv *env, jobject obj, jstring key, jstring value, jboolean overwrite) { - char* k = (char *) env->GetStringUTFChars(key, NULL); - char* v = (char *) env->GetStringUTFChars(value, NULL); + char *k = (char *)env->GetStringUTFChars(key, NULL); + char *v = (char *)env->GetStringUTFChars(value, NULL); int err = setenv(k, v, overwrite); @@ -116,15 +138,12 @@ JNIEXPORT jint JNICALL Java_org_apache_spark_ml_util_OneCCL_00024_setEnv return err; } -static const int CCL_IP_LEN = 128; -std::list local_host_ips; - static int fill_local_host_ip() { struct ifaddrs *ifaddr, *ifa; int family = AF_UNSPEC; char local_ip[CCL_IP_LEN]; if (getifaddrs(&ifaddr) < 0) { - // LOG_ERROR("fill_local_host_ip: can not get host IP"); + std::cerr << "OneCCL (native): can not get host IP" << std::endl; return -1; } @@ -140,16 +159,13 @@ static int fill_local_host_ip() { memset(local_ip, 0, CCL_IP_LEN); int res = getnameinfo( ifa->ifa_addr, - (family == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6), - local_ip, - CCL_IP_LEN, - NULL, - 0, - NI_NUMERICHOST); + (family == AF_INET) ? sizeof(struct sockaddr_in) + : sizeof(struct sockaddr_in6), + local_ip, CCL_IP_LEN, NULL, 0, NI_NUMERICHOST); if (res != 0) { - std::string s("fill_local_host_ip: getnameinfo error > "); + std::string s("OneCCL (native): getnameinfo error > "); s.append(gai_strerror(res)); - // LOG_ERROR(s.c_str()); + std::cerr << s << std::endl; return -1; } local_host_ips.push_back(local_ip); @@ -157,30 +173,30 @@ static int fill_local_host_ip() { } } if (local_host_ips.empty()) { - // LOG_ERROR("fill_local_host_ip: can't find interface to get host IP"); + std::cerr << "OneCCL (native): can't find interface to get host IP" + << std::endl; return -1; } - // memset(local_host_ip, 0, CCL_IP_LEN); - // strncpy(local_host_ip, local_host_ips.front().c_str(), CCL_IP_LEN); - - // for (auto &ip : local_host_ips) - // cout << ip << endl; freeifaddrs(ifaddr); + return 0; } static bool is_valid_ip(char ip[]) { - if (fill_local_host_ip() == -1) { - std::cerr << "fill_local_host_ip error" << std::endl; - }; - for (std::list::iterator it = local_host_ips.begin(); it != local_host_ips.end(); ++it) { - if (*it == ip) { - return true; + if (fill_local_host_ip() == -1) { + std::cerr << "OneCCL (native): get local host ip error" << std::endl; + return false; + }; + + for (std::list::iterator it = local_host_ips.begin(); + it != local_host_ips.end(); ++it) { + if (*it == ip) { + return true; + } } - } - return false; + return false; } /* @@ -188,42 +204,44 @@ static bool is_valid_ip(char ip[]) { * Method: getAvailPort * Signature: (Ljava/lang/String;)I */ -JNIEXPORT jint JNICALL Java_org_apache_spark_ml_util_OneCCL_00024_c_1getAvailPort - (JNIEnv *env, jobject obj, jstring localIP) { - - // start from beginning of dynamic port - const int port_start_base = 3000; +JNIEXPORT jint JNICALL +Java_org_apache_spark_ml_util_OneCCL_00024_c_1getAvailPort(JNIEnv *env, + jobject obj, + jstring localIP) { - char* local_host_ip = (char *) env->GetStringUTFChars(localIP, NULL); + // start from beginning of dynamic port + const int port_start_base = 3000; - // check if the input ip is one of host's ips - if (!is_valid_ip(local_host_ip)) - return -1; + char *local_host_ip = (char *)env->GetStringUTFChars(localIP, NULL); - struct sockaddr_in main_server_address; - int server_listen_sock; - in_port_t port = port_start_base; + // check if the input ip is one of host's ips + if (!is_valid_ip(local_host_ip)) + return -1; - if ((server_listen_sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) { - perror("OneCCL (native) getAvailPort error!"); - return -1; - } + struct sockaddr_in main_server_address; + int server_listen_sock; + in_port_t port = port_start_base; - main_server_address.sin_family = AF_INET; - main_server_address.sin_addr.s_addr = inet_addr(local_host_ip); - main_server_address.sin_port = htons(port); + if ((server_listen_sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) { + perror("OneCCL (native) getAvailPort error!"); + return -1; + } - // search for available port - while (bind(server_listen_sock, - (const struct sockaddr *)&main_server_address, - sizeof(main_server_address)) < 0) { - port++; + main_server_address.sin_family = AF_INET; + main_server_address.sin_addr.s_addr = inet_addr(local_host_ip); main_server_address.sin_port = htons(port); - } - close(server_listen_sock); + // search for available port + while (bind(server_listen_sock, + (const struct sockaddr *)&main_server_address, + sizeof(main_server_address)) < 0) { + port++; + main_server_address.sin_port = htons(port); + } + + close(server_listen_sock); - env->ReleaseStringUTFChars(localIP, local_host_ip); + env->ReleaseStringUTFChars(localIP, local_host_ip); - return port; + return port; } diff --git a/mllib-dal/src/main/native/OneCCL.h b/mllib-dal/src/main/native/OneCCL.h index b579c4697..056d898d3 100644 --- a/mllib-dal/src/main/native/OneCCL.h +++ b/mllib-dal/src/main/native/OneCCL.h @@ -1,3 +1,19 @@ +/******************************************************************************* + * Copyright 2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + #pragma once #include diff --git a/mllib-dal/src/main/native/OneDAL.cpp b/mllib-dal/src/main/native/OneDAL.cpp index 792225c3e..9c3c48657 100644 --- a/mllib-dal/src/main/native/OneDAL.cpp +++ b/mllib-dal/src/main/native/OneDAL.cpp @@ -1,24 +1,24 @@ /******************************************************************************* -* Copyright 2020 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*******************************************************************************/ + * Copyright 2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ +#include #include #include -#include -#include "org_apache_spark_ml_util_OneDAL__.h" +#include "org_apache_spark_ml_util_OneDAL__.h" #include "service.h" using namespace daal; @@ -32,97 +32,72 @@ extern bool daal_check_is_intel_cpu(); * Method: setNumericTableValue * Signature: (JIID)V */ -JNIEXPORT void JNICALL Java_org_apache_spark_ml_util_OneDAL_00024_setNumericTableValue - (JNIEnv *, jobject, jlong numTableAddr, jint row, jint column, jdouble value) { - - HomogenNumericTable * nt = static_cast *>(((SerializationIfacePtr *)numTableAddr)->get()); - (*nt)[row][column] = (double)value; - +JNIEXPORT void JNICALL +Java_org_apache_spark_ml_util_OneDAL_00024_setNumericTableValue( + JNIEnv *, jobject, jlong numTableAddr, jint row, jint column, + jdouble value) { + HomogenNumericTable *nt = + static_cast *>( + ((SerializationIfacePtr *)numTableAddr)->get()); + (*nt)[row][column] = (double)value; } -JNIEXPORT void JNICALL Java_org_apache_spark_ml_util_OneDAL_00024_cSetDoubleBatch - (JNIEnv *env, jobject, jlong numTableAddr, jint curRows, jdoubleArray batch, jint numRows, jint numCols) { - - HomogenNumericTable *nt = static_cast *>( - ((SerializationIfacePtr *)numTableAddr)->get()); - jdouble* values = (jdouble*)env->GetPrimitiveArrayCritical(batch, 0); +/* + * Class: org_apache_spark_ml_util_OneDAL__ + * Method: cSetDoubleBatch + * Signature: (JI[DII)V + */ +JNIEXPORT void JNICALL +Java_org_apache_spark_ml_util_OneDAL_00024_cSetDoubleBatch( + JNIEnv *env, jobject, jlong numTableAddr, jint curRows, jdoubleArray batch, + jint numRows, jint numCols) { + HomogenNumericTable *nt = + static_cast *>( + ((SerializationIfacePtr *)numTableAddr)->get()); + jdouble *values = (jdouble *)env->GetPrimitiveArrayCritical(batch, 0); std::memcpy((*nt)[curRows], values, numRows * numCols * sizeof(double)); env->ReleasePrimitiveArrayCritical(batch, values, JNI_ABORT); - } - - -JNIEXPORT void JNICALL Java_org_apache_spark_ml_util_OneDAL_00024_cSetDoubleIterator - (JNIEnv *env, jobject, jlong numTableAddr, jobject jiter, jint curRows) { - - jclass iterClass = env->FindClass("java/util/Iterator"); - jmethodID hasNext = env->GetMethodID(iterClass, - "hasNext", "()Z"); - jmethodID next = env->GetMethodID(iterClass, - "next", "()Ljava/lang/Object;"); - - HomogenNumericTable *nt = static_cast *>( - ((SerializationIfacePtr *)numTableAddr)->get()); - - while (env->CallBooleanMethod(jiter, hasNext)) { - jobject batch = env->CallObjectMethod(jiter, next); - - jclass batchClass = env->GetObjectClass(batch); - jlongArray joffset = (jlongArray)env->GetObjectField( - batch, env->GetFieldID(batchClass, "rowOffset", "[J")); - jdoubleArray jvalue = (jdoubleArray)env->GetObjectField( - batch, env->GetFieldID(batchClass, "values", "[D")); - jint jcols = env->GetIntField( - batch, env->GetFieldID(batchClass, "numCols", "I")); - - long numRows = env->GetArrayLength(joffset); - - jlong* rowOffset = env->GetLongArrayElements(joffset, 0); - - jdouble* values = env->GetDoubleArrayElements(jvalue, 0); - - for (int i = 0; i < numRows; i ++){ - jlong curRow = rowOffset[i] + curRows; - for(int j = 0; j < jcols; j ++) { - (*nt)[curRow][j] = values[rowOffset[i] * jcols + j]; - } - } - - env->ReleaseLongArrayElements(joffset, rowOffset, 0); - env->DeleteLocalRef(joffset); - env->ReleaseDoubleArrayElements(jvalue, values, 0); - env->DeleteLocalRef(jvalue); - env->DeleteLocalRef(batch); - env->DeleteLocalRef(batchClass); - } - env->DeleteLocalRef(iterClass); - - } - -JNIEXPORT void JNICALL Java_org_apache_spark_ml_util_OneDAL_00024_cAddNumericTable - (JNIEnv *, jobject, jlong rowMergedNumericTableAddr, jlong numericTableAddr) { - - data_management::RowMergedNumericTablePtr pRowMergedNumericTable = (*(data_management::RowMergedNumericTablePtr *)rowMergedNumericTableAddr); - data_management::NumericTablePtr pNumericTable = (*(data_management::NumericTablePtr *)numericTableAddr); - pRowMergedNumericTable->addNumericTable(pNumericTable); - - } +} -JNIEXPORT void JNICALL Java_org_apache_spark_ml_util_OneDAL_00024_cFreeDataMemory - (JNIEnv *, jobject, jlong numericTableAddr) { +/* + * Class: org_apache_spark_ml_util_OneDAL__ + * Method: cAddNumericTable + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL +Java_org_apache_spark_ml_util_OneDAL_00024_cAddNumericTable( + JNIEnv *, jobject, jlong rowMergedNumericTableAddr, + jlong numericTableAddr) { + data_management::RowMergedNumericTablePtr pRowMergedNumericTable = (*( + data_management::RowMergedNumericTablePtr *)rowMergedNumericTableAddr); + data_management::NumericTablePtr pNumericTable = + (*(data_management::NumericTablePtr *)numericTableAddr); + pRowMergedNumericTable->addNumericTable(pNumericTable); +} - data_management::NumericTablePtr pNumericTable = (*(data_management::NumericTablePtr *)numericTableAddr); +/* + * Class: org_apache_spark_ml_util_OneDAL__ + * Method: cFreeDataMemory + * Signature: (J)V + */ +JNIEXPORT void JNICALL +Java_org_apache_spark_ml_util_OneDAL_00024_cFreeDataMemory( + JNIEnv *, jobject, jlong numericTableAddr) { + data_management::NumericTablePtr pNumericTable = + (*(data_management::NumericTablePtr *)numericTableAddr); pNumericTable->freeDataMemory(); - - } +} /* * Class: org_apache_spark_ml_util_OneDAL__ * Method: cCheckPlatformCompatibility * Signature: ()Z */ -JNIEXPORT jboolean JNICALL Java_org_apache_spark_ml_util_OneDAL_00024_cCheckPlatformCompatibility - (JNIEnv *, jobject) { - // Only guarantee compatibility and performance on Intel platforms, use oneDAL lib function +JNIEXPORT jboolean JNICALL +Java_org_apache_spark_ml_util_OneDAL_00024_cCheckPlatformCompatibility( + JNIEnv *, jobject) { + // Only guarantee compatibility and performance on Intel platforms, use + // oneDAL lib function return daal_check_is_intel_cpu(); } @@ -131,35 +106,32 @@ JNIEXPORT jboolean JNICALL Java_org_apache_spark_ml_util_OneDAL_00024_cCheckPlat * Method: cNewCSRNumericTable * Signature: ([F[J[JJJ)J */ -JNIEXPORT jlong JNICALL Java_org_apache_spark_ml_util_OneDAL_00024_cNewCSRNumericTable - (JNIEnv *env, jobject, jfloatArray data, jlongArray colIndices, jlongArray rowOffsets, jlong nFeatures, jlong nVectors) { +JNIEXPORT jlong JNICALL +Java_org_apache_spark_ml_util_OneDAL_00024_cNewCSRNumericTable( + JNIEnv *env, jobject, jfloatArray data, jlongArray colIndices, + jlongArray rowOffsets, jlong nFeatures, jlong nVectors) { long numData = env->GetArrayLength(data); - // long numColIndices = numData; - // long numRowOffsets = env->GetArrayLength(rowOffsets); - size_t * resultRowOffsets = NULL; - size_t * resultColIndices = NULL; - float * resultData = NULL; - CSRNumericTable * numericTable = new CSRNumericTable(resultData, resultColIndices, resultRowOffsets, nFeatures, nVectors); - numericTable->allocateDataMemory(numData); - numericTable->getArrays(&resultData, &resultColIndices, &resultRowOffsets); + size_t *resultRowOffsets = NULL; + size_t *resultColIndices = NULL; + float *resultData = NULL; - size_t * pRowOffsets = (size_t *)env->GetLongArrayElements(rowOffsets, 0); - size_t * pColIndices = (size_t *)env->GetLongArrayElements(colIndices, 0); - float * pData = env->GetFloatArrayElements(data, 0); + CSRNumericTable *numericTable = new CSRNumericTable( + resultData, resultColIndices, resultRowOffsets, nFeatures, nVectors); + numericTable->allocateDataMemory(numData); + numericTable->getArrays(&resultData, &resultColIndices, + &resultRowOffsets); - // std::memcpy(resultRowOffsets, pRowOffsets, numRowOffsets*sizeof(jlong)); - // std::memcpy(resultColIndices, pColIndices, numColIndices*sizeof(jlong)); - // std::memcpy(resultData, pData, numData*sizeof(float)); + size_t *pRowOffsets = (size_t *)env->GetLongArrayElements(rowOffsets, 0); + size_t *pColIndices = (size_t *)env->GetLongArrayElements(colIndices, 0); + float *pData = env->GetFloatArrayElements(data, 0); - for (size_t i = 0; i < (size_t)numData; ++i) - { - resultData[i] = pData[i]; + for (size_t i = 0; i < (size_t)numData; ++i) { + resultData[i] = pData[i]; resultColIndices[i] = pColIndices[i]; } - for (size_t i = 0; i < (size_t)nVectors + 1; ++i) - { + for (size_t i = 0; i < (size_t)nVectors + 1; ++i) { resultRowOffsets[i] = pRowOffsets[i]; } @@ -169,7 +141,5 @@ JNIEXPORT jlong JNICALL Java_org_apache_spark_ml_util_OneDAL_00024_cNewCSRNumeri CSRNumericTablePtr *ret = new CSRNumericTablePtr(numericTable); - //printNumericTable(*ret, "cNewCSRNumericTable", 10); - return (jlong)ret; } diff --git a/mllib-dal/src/main/native/PCADALImpl.cpp b/mllib-dal/src/main/native/PCADALImpl.cpp index 95172d05f..c04484a09 100644 --- a/mllib-dal/src/main/native/PCADALImpl.cpp +++ b/mllib-dal/src/main/native/PCADALImpl.cpp @@ -1,12 +1,26 @@ -#include - -#include "service.h" +/******************************************************************************* + * Copyright 2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ #include +#include #include -#include "org_apache_spark_ml_feature_PCADALImpl.h" #include "OneCCL.h" +#include "org_apache_spark_ml_feature_PCADALImpl.h" +#include "service.h" using namespace std; using namespace daal; @@ -21,127 +35,156 @@ typedef double algorithmFPType; /* Algorithm floating-point type */ * Method: cPCATrainDAL * Signature: (JIIILorg/apache/spark/ml/feature/PCAResult;)J */ -JNIEXPORT jlong JNICALL Java_org_apache_spark_ml_feature_PCADALImpl_cPCATrainDAL( - JNIEnv *env, jobject obj, jlong pNumTabData, jint k, jint executor_num, jint executor_cores, - jobject resultObj) { - - ccl::communicator &comm = getComm(); - size_t rankId = comm.rank(); - - const size_t nBlocks = executor_num; - const int comm_size = executor_num; - - NumericTablePtr pData = *((NumericTablePtr*)pNumTabData); - // Source data already normalized - pData->setNormalizationFlag(NumericTableIface::standardScoreNormalized); - - // Set number of threads for oneDAL to use for each rank - services::Environment::getInstance()->setNumberOfThreads(executor_cores); - - int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - cout << "oneDAL (native): Number of threads used: " << nThreadsNew << endl; - - auto t1 = std::chrono::high_resolution_clock::now(); - - pca::Distributed localAlgorithm; - - /* Set the input data set to the algorithm */ - localAlgorithm.input.set(pca::data, pData); - - /* Compute PCA decomposition */ - localAlgorithm.compute(); - - auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast( t2 - t1 ).count(); - std::cout << "PCA (native): local step took " << duration << " secs" << std::endl; +JNIEXPORT jlong JNICALL +Java_org_apache_spark_ml_feature_PCADALImpl_cPCATrainDAL( + JNIEnv *env, jobject obj, jlong pNumTabData, jint k, jint executor_num, + jint executor_cores, jobject resultObj) { - t1 = std::chrono::high_resolution_clock::now(); + ccl::communicator &comm = getComm(); + size_t rankId = comm.rank(); - /* Serialize partial results required by step 2 */ - services::SharedPtr serializedData; - InputDataArchive dataArch; - localAlgorithm.getPartialResult()->serialize(dataArch); - size_t perNodeArchLength = dataArch.getSizeOfArchive(); + const size_t nBlocks = executor_num; + const int comm_size = executor_num; - serializedData = services::SharedPtr(new byte[perNodeArchLength * nBlocks]); + NumericTablePtr pData = *((NumericTablePtr *)pNumTabData); + // Source data already normalized + pData->setNormalizationFlag(NumericTableIface::standardScoreNormalized); - byte* nodeResults = new byte[perNodeArchLength]; - dataArch.copyArchiveToArray(nodeResults, perNodeArchLength); + // Set number of threads for oneDAL to use for each rank + services::Environment::getInstance()->setNumberOfThreads(executor_cores); - t2 = std::chrono::high_resolution_clock::now(); + int nThreadsNew = + services::Environment::getInstance()->getNumberOfThreads(); + cout << "oneDAL (native): Number of CPU threads used: " << nThreadsNew + << endl; - duration = std::chrono::duration_cast( t2 - t1 ).count(); - std::cout << "PCA (native): serializing partial results took " << duration << " secs" << std::endl; - - vector recv_counts(comm_size * perNodeArchLength); - for (int i = 0; i < comm_size; i++) recv_counts[i] = perNodeArchLength; - - cout << "PCA (native): ccl_allgatherv receiving " << perNodeArchLength * nBlocks << " bytes" << endl; - - t1 = std::chrono::high_resolution_clock::now(); - - /* Transfer partial results to step 2 on the root node */ - // MPI_Gather(nodeResults, perNodeArchLength, MPI_CHAR, serializedData.get(), - // perNodeArchLength, MPI_CHAR, ccl_root, MPI_COMM_WORLD); - ccl::allgatherv(nodeResults, perNodeArchLength, serializedData.get(), recv_counts, - ccl::datatype::uint8, comm).wait(); - - t2 = std::chrono::high_resolution_clock::now(); - - duration = std::chrono::duration_cast( t2 - t1 ).count(); - std::cout << "PCA (native): ccl_allgatherv took " << duration << " secs" << std::endl; - - if (rankId == ccl_root) { auto t1 = std::chrono::high_resolution_clock::now(); - /* Create an algorithm for principal component analysis using the svdDense method - * on the master node */ - pca::Distributed masterAlgorithm; + pca::Distributed localAlgorithm; - for (size_t i = 0; i < nBlocks; i++) { - /* Deserialize partial results from step 1 */ - OutputDataArchive dataArch(serializedData.get() + perNodeArchLength * i, - perNodeArchLength); + /* Set the input data set to the algorithm */ + localAlgorithm.input.set(pca::data, pData); - services::SharedPtr > - dataForStep2FromStep1 = - services::SharedPtr >( - new pca::PartialResult()); - dataForStep2FromStep1->deserialize(dataArch); - - /* Set local partial results as input for the master-node algorithm */ - masterAlgorithm.input.add(pca::partialResults, dataForStep2FromStep1); - } - - /* Merge and finalizeCompute PCA decomposition on the master node */ - masterAlgorithm.compute(); - masterAlgorithm.finalizeCompute(); - - /* Retrieve the algorithm results */ - pca::ResultPtr result = masterAlgorithm.getResult(); + /* Compute PCA decomposition */ + localAlgorithm.compute(); auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast( t2 - t1 ).count(); - std::cout << "PCA (native): master step took " << duration << " secs" << std::endl; - - /* Print the results */ - printNumericTable(result->get(pca::eigenvalues), "First 10 eigenvalues with first 20 dimensions:", 10, 20); - printNumericTable(result->get(pca::eigenvectors), "First 10 eigenvectors with first 20 dimensions:", 10, 20); - - // Return all eigenvalues & eigenvectors - - // Get the class of the input object - jclass clazz = env->GetObjectClass(resultObj); - // Get Field references - jfieldID pcNumericTableField = env->GetFieldID(clazz, "pcNumericTable", "J"); - jfieldID explainedVarianceNumericTableField = env->GetFieldID(clazz, "explainedVarianceNumericTable", "J"); - - NumericTablePtr *eigenvalues = new NumericTablePtr(result->get(pca::eigenvalues)); - NumericTablePtr *eigenvectors = new NumericTablePtr(result->get(pca::eigenvectors)); - - env->SetLongField(resultObj, pcNumericTableField, (jlong)eigenvectors); - env->SetLongField(resultObj, explainedVarianceNumericTableField, (jlong)eigenvalues); - } + auto duration = + std::chrono::duration_cast(t2 - t1).count(); + std::cout << "PCA (native): local step took " << duration << " secs" + << std::endl; + + t1 = std::chrono::high_resolution_clock::now(); + + /* Serialize partial results required by step 2 */ + services::SharedPtr serializedData; + InputDataArchive dataArch; + localAlgorithm.getPartialResult()->serialize(dataArch); + size_t perNodeArchLength = dataArch.getSizeOfArchive(); + + serializedData = + services::SharedPtr(new byte[perNodeArchLength * nBlocks]); + + byte *nodeResults = new byte[perNodeArchLength]; + dataArch.copyArchiveToArray(nodeResults, perNodeArchLength); + + t2 = std::chrono::high_resolution_clock::now(); + + duration = + std::chrono::duration_cast(t2 - t1).count(); + std::cout << "PCA (native): serializing partial results took " << duration + << " secs" << std::endl; + + vector recv_counts(comm_size * perNodeArchLength); + for (int i = 0; i < comm_size; i++) + recv_counts[i] = perNodeArchLength; + + cout << "PCA (native): ccl_allgatherv receiving " + << perNodeArchLength * nBlocks << " bytes" << endl; + + t1 = std::chrono::high_resolution_clock::now(); + + /* Transfer partial results to step 2 on the root node */ + // MPI_Gather(nodeResults, perNodeArchLength, MPI_CHAR, + // serializedData.get(), perNodeArchLength, MPI_CHAR, ccl_root, + // MPI_COMM_WORLD); + ccl::allgatherv(nodeResults, perNodeArchLength, serializedData.get(), + recv_counts, ccl::datatype::uint8, comm) + .wait(); + + t2 = std::chrono::high_resolution_clock::now(); + + duration = + std::chrono::duration_cast(t2 - t1).count(); + std::cout << "PCA (native): ccl_allgatherv took " << duration << " secs" + << std::endl; + + if (rankId == ccl_root) { + auto t1 = std::chrono::high_resolution_clock::now(); + + /* Create an algorithm for principal component analysis using the + * svdDense method on the master node */ + pca::Distributed + masterAlgorithm; + + for (size_t i = 0; i < nBlocks; i++) { + /* Deserialize partial results from step 1 */ + OutputDataArchive dataArch(serializedData.get() + + perNodeArchLength * i, + perNodeArchLength); + + services::SharedPtr> + dataForStep2FromStep1 = + services::SharedPtr>( + new pca::PartialResult()); + dataForStep2FromStep1->deserialize(dataArch); + + /* Set local partial results as input for the master-node algorithm + */ + masterAlgorithm.input.add(pca::partialResults, + dataForStep2FromStep1); + } + + /* Merge and finalizeCompute PCA decomposition on the master node */ + masterAlgorithm.compute(); + masterAlgorithm.finalizeCompute(); + + /* Retrieve the algorithm results */ + pca::ResultPtr result = masterAlgorithm.getResult(); + + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(t2 - t1).count(); + std::cout << "PCA (native): master step took " << duration << " secs" + << std::endl; + + /* Print the results */ + printNumericTable(result->get(pca::eigenvalues), + "First 10 eigenvalues with first 20 dimensions:", 10, + 20); + printNumericTable(result->get(pca::eigenvectors), + "First 10 eigenvectors with first 20 dimensions:", 10, + 20); + + // Return all eigenvalues & eigenvectors + + // Get the class of the input object + jclass clazz = env->GetObjectClass(resultObj); + // Get Field references + jfieldID pcNumericTableField = + env->GetFieldID(clazz, "pcNumericTable", "J"); + jfieldID explainedVarianceNumericTableField = + env->GetFieldID(clazz, "explainedVarianceNumericTable", "J"); + + NumericTablePtr *eigenvalues = + new NumericTablePtr(result->get(pca::eigenvalues)); + NumericTablePtr *eigenvectors = + new NumericTablePtr(result->get(pca::eigenvectors)); + + env->SetLongField(resultObj, pcNumericTableField, (jlong)eigenvectors); + env->SetLongField(resultObj, explainedVarianceNumericTableField, + (jlong)eigenvalues); + } - return 0; + return 0; } diff --git a/mllib-dal/src/main/native/build-jni.sh b/mllib-dal/src/main/native/build-jni.sh index dacd8382b..3a07d62fc 100755 --- a/mllib-dal/src/main/native/build-jni.sh +++ b/mllib-dal/src/main/native/build-jni.sh @@ -1,5 +1,19 @@ #!/usr/bin/env bash +# Copyright 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + WORK_DIR="$( cd $( dirname "${BASH_SOURCE[0]}" ) && pwd )" DAAL_JAR=${ONEAPI_ROOT}/dal/latest/lib/onedal.jar diff --git a/mllib-dal/src/main/native/build.sh b/mllib-dal/src/main/native/build.sh index 763cf4bbe..d271c5d97 100755 --- a/mllib-dal/src/main/native/build.sh +++ b/mllib-dal/src/main/native/build.sh @@ -1,4 +1,18 @@ #!/usr/bin/env bash +# Copyright 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + make clean make -j diff --git a/mllib-dal/src/main/native/error_handling.cpp b/mllib-dal/src/main/native/error_handling.cpp index 2cb9a7270..ebd196901 100644 --- a/mllib-dal/src/main/native/error_handling.cpp +++ b/mllib-dal/src/main/native/error_handling.cpp @@ -1,19 +1,19 @@ /* file: error_handling.h */ /******************************************************************************* -* Copyright 2017-2020 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*******************************************************************************/ + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ /* ! Content: @@ -27,38 +27,31 @@ const int fileError = -1001; -void checkAllocation(void * ptr) -{ - if (!ptr) - { +void checkAllocation(void *ptr) { + if (!ptr) { std::cout << "Error: Memory allocation failed" << std::endl; exit(-1); } } -void checkPtr(void * ptr) -{ - if (!ptr) - { +void checkPtr(void *ptr) { + if (!ptr) { std::cout << "Error: NULL pointer" << std::endl; exit(-2); } } -void fileOpenError(const char * filename) -{ +void fileOpenError(const char *filename) { std::cout << "Unable to open file '" << filename << "'" << std::endl; exit(fileError); } -void fileReadError() -{ +void fileReadError() { std::cout << "Unable to read next line" << std::endl; exit(fileError); } -void sparceFileReadError() -{ +void sparceFileReadError() { std::cout << "Incorrect format of file" << std::endl; exit(fileError); } diff --git a/mllib-dal/src/main/native/error_handling.h b/mllib-dal/src/main/native/error_handling.h index c157a1ada..7852cab24 100644 --- a/mllib-dal/src/main/native/error_handling.h +++ b/mllib-dal/src/main/native/error_handling.h @@ -1,19 +1,19 @@ /* file: error_handling.h */ /******************************************************************************* -* Copyright 2017-2020 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*******************************************************************************/ + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ /* ! Content: @@ -25,9 +25,9 @@ const int fileError = -1001; -void checkAllocation(void * ptr); -void checkPtr(void * ptr); -void fileOpenError(const char * filename); +void checkAllocation(void *ptr); +void checkPtr(void *ptr); +void fileOpenError(const char *filename); void fileReadError(); void sparceFileReadError(); diff --git a/mllib-dal/src/main/native/service.cpp b/mllib-dal/src/main/native/service.cpp index 623767406..7cb26c385 100644 --- a/mllib-dal/src/main/native/service.cpp +++ b/mllib-dal/src/main/native/service.cpp @@ -1,11 +1,32 @@ +/* file: service.cpp */ +/******************************************************************************* + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +/* +! Content: +! Auxiliary functions used in C++ samples +!******************************************************************************/ + #include "service.h" #include "error_handling.h" -size_t readTextFile(const std::string & datasetFileName, daal::byte ** data) -{ - std::ifstream file(datasetFileName.c_str(), std::ios::binary | std::ios::ate); - if (!file.is_open()) - { +size_t readTextFile(const std::string &datasetFileName, daal::byte **data) { + std::ifstream file(datasetFileName.c_str(), + std::ios::binary | std::ios::ate); + if (!file.is_open()) { fileOpenError(datasetFileName.c_str()); } @@ -17,8 +38,7 @@ size_t readTextFile(const std::string & datasetFileName, daal::byte ** data) (*data) = new daal::byte[fileSize]; checkAllocation(data); - if (!file.read((char *)(*data), fileSize)) - { + if (!file.read((char *)(*data), fileSize)) { delete[] data; fileReadError(); } @@ -27,12 +47,11 @@ size_t readTextFile(const std::string & datasetFileName, daal::byte ** data) } template -void readLine(std::string & line, size_t nCols, item_type * data, size_t firstPos = 0) -{ +void readLine(std::string &line, size_t nCols, item_type *data, + size_t firstPos = 0) { std::stringstream iss(line); - for (size_t col = 0; col < nCols; ++col) - { + for (size_t col = 0; col < nCols; ++col) { std::string val; std::getline(iss, val, ','); @@ -42,138 +61,141 @@ void readLine(std::string & line, size_t nCols, item_type * data, size_t firstPo } template -void readRowUnknownLength(char * line, std::vector & data) -{ - size_t n = 0; - const char * prevDelim = line - 1; - char * ptr = line; - for (; *ptr; ++ptr) - { - if (*ptr == ',' || *ptr == '\r') - { - if (prevDelim != ptr - 1) ++n; - *ptr = ' '; +void readRowUnknownLength(char *line, std::vector &data) { + size_t n = 0; + const char *prevDelim = line - 1; + char *ptr = line; + for (; *ptr; ++ptr) { + if (*ptr == ',' || *ptr == '\r') { + if (prevDelim != ptr - 1) + ++n; + *ptr = ' '; prevDelim = ptr; } } - if (prevDelim != ptr - 1) ++n; + if (prevDelim != ptr - 1) + ++n; data.resize(n); std::stringstream iss(line); - for (size_t i = 0; i < n; ++i) - { + for (size_t i = 0; i < n; ++i) { iss >> data[i]; } } template -CSRNumericTable * createSparseTable(const std::string & datasetFileName) -{ +CSRNumericTable *createSparseTable(const std::string &datasetFileName) { std::ifstream file(datasetFileName.c_str()); - if (!file.is_open()) - { + if (!file.is_open()) { fileOpenError(datasetFileName.c_str()); } std::string str; - //read row offsets + // read row offsets std::getline(file, str); std::vector rowOffsets; readRowUnknownLength(&str[0], rowOffsets); - if (!rowOffsets.size()) return NULL; + if (!rowOffsets.size()) + return NULL; const size_t nVectors = rowOffsets.size() - 1; - //read cols indices + // read cols indices std::getline(file, str); std::vector colIndices; readRowUnknownLength(&str[0], colIndices); - //read values + // read values std::getline(file, str); std::vector data; readRowUnknownLength(&str[0], data); const size_t nNonZeros = data.size(); size_t maxCol = 0; - for (size_t i = 0; i < colIndices.size(); ++i) - { - if (colIndices[i] > maxCol) maxCol = colIndices[i]; + for (size_t i = 0; i < colIndices.size(); ++i) { + if (colIndices[i] > maxCol) + maxCol = colIndices[i]; } const size_t nFeatures = maxCol; - if (!nFeatures || !nVectors || colIndices.size() != nNonZeros || nNonZeros != (rowOffsets[nVectors] - 1)) - { + if (!nFeatures || !nVectors || colIndices.size() != nNonZeros || + nNonZeros != (rowOffsets[nVectors] - 1)) { sparceFileReadError(); } - size_t * resultRowOffsets = NULL; - size_t * resultColIndices = NULL; - item_type * resultData = NULL; - CSRNumericTable * numericTable = new CSRNumericTable(resultData, resultColIndices, resultRowOffsets, nFeatures, nVectors); + size_t *resultRowOffsets = NULL; + size_t *resultColIndices = NULL; + item_type *resultData = NULL; + CSRNumericTable *numericTable = new CSRNumericTable( + resultData, resultColIndices, resultRowOffsets, nFeatures, nVectors); numericTable->allocateDataMemory(nNonZeros); - numericTable->getArrays(&resultData, &resultColIndices, &resultRowOffsets); - for (size_t i = 0; i < nNonZeros; ++i) - { - resultData[i] = data[i]; + numericTable->getArrays(&resultData, &resultColIndices, + &resultRowOffsets); + for (size_t i = 0; i < nNonZeros; ++i) { + resultData[i] = data[i]; resultColIndices[i] = colIndices[i]; } - for (size_t i = 0; i < nVectors + 1; ++i) - { + for (size_t i = 0; i < nVectors + 1; ++i) { resultRowOffsets[i] = rowOffsets[i]; } return numericTable; } -CSRNumericTable * createFloatSparseTable(const std::string & datasetFileName) { +CSRNumericTable *createFloatSparseTable(const std::string &datasetFileName) { return createSparseTable(datasetFileName); } -void printAprioriItemsets(NumericTablePtr largeItemsetsTable, NumericTablePtr largeItemsetsSupportTable, size_t nItemsetToPrint = 20) -{ - size_t largeItemsetCount = largeItemsetsSupportTable->getNumberOfRows(); +void printAprioriItemsets(NumericTablePtr largeItemsetsTable, + NumericTablePtr largeItemsetsSupportTable, + size_t nItemsetToPrint = 20) { + size_t largeItemsetCount = largeItemsetsSupportTable->getNumberOfRows(); size_t nItemsInLargeItemsets = largeItemsetsTable->getNumberOfRows(); BlockDescriptor block1; - largeItemsetsTable->getBlockOfRows(0, nItemsInLargeItemsets, readOnly, block1); - int * largeItemsets = block1.getBlockPtr(); + largeItemsetsTable->getBlockOfRows(0, nItemsInLargeItemsets, readOnly, + block1); + int *largeItemsets = block1.getBlockPtr(); BlockDescriptor block2; - largeItemsetsSupportTable->getBlockOfRows(0, largeItemsetCount, readOnly, block2); - int * largeItemsetsSupportData = block2.getBlockPtr(); + largeItemsetsSupportTable->getBlockOfRows(0, largeItemsetCount, readOnly, + block2); + int *largeItemsetsSupportData = block2.getBlockPtr(); - std::vector > largeItemsetsVector; + std::vector> largeItemsetsVector; largeItemsetsVector.resize(largeItemsetCount); - for (size_t i = 0; i < nItemsInLargeItemsets; i++) - { - largeItemsetsVector[largeItemsets[2 * i]].push_back(largeItemsets[2 * i + 1]); + for (size_t i = 0; i < nItemsInLargeItemsets; i++) { + largeItemsetsVector[largeItemsets[2 * i]].push_back( + largeItemsets[2 * i + 1]); } std::vector supportVector; supportVector.resize(largeItemsetCount); - for (size_t i = 0; i < largeItemsetCount; i++) - { - supportVector[largeItemsetsSupportData[2 * i]] = largeItemsetsSupportData[2 * i + 1]; + for (size_t i = 0; i < largeItemsetCount; i++) { + supportVector[largeItemsetsSupportData[2 * i]] = + largeItemsetsSupportData[2 * i + 1]; } std::cout << std::endl << "Apriori example program results" << std::endl; - std::cout << std::endl << "Last " << nItemsetToPrint << " large itemsets: " << std::endl; + std::cout << std::endl + << "Last " << nItemsetToPrint << " large itemsets: " << std::endl; std::cout << std::endl << "Itemset" << "\t\t\tSupport" << std::endl; - size_t iMin = (((largeItemsetCount > nItemsetToPrint) && (nItemsetToPrint != 0)) ? largeItemsetCount - nItemsetToPrint : 0); - for (size_t i = iMin; i < largeItemsetCount; i++) - { + size_t iMin = + (((largeItemsetCount > nItemsetToPrint) && (nItemsetToPrint != 0)) + ? largeItemsetCount - nItemsetToPrint + : 0); + for (size_t i = iMin; i < largeItemsetCount; i++) { std::cout << "{"; - for (size_t l = 0; l < largeItemsetsVector[i].size() - 1; l++) - { + for (size_t l = 0; l < largeItemsetsVector[i].size() - 1; l++) { std::cout << largeItemsetsVector[i][l] << ", "; } - std::cout << largeItemsetsVector[i][largeItemsetsVector[i].size() - 1] << "}\t\t"; + std::cout << largeItemsetsVector[i][largeItemsetsVector[i].size() - 1] + << "}\t\t"; std::cout << supportVector[i] << std::endl; } @@ -182,74 +204,76 @@ void printAprioriItemsets(NumericTablePtr largeItemsetsTable, NumericTablePtr la largeItemsetsSupportTable->releaseBlockOfRows(block2); } -void printAprioriRules(NumericTablePtr leftItemsTable, NumericTablePtr rightItemsTable, NumericTablePtr confidenceTable, size_t nRulesToPrint = 20) -{ - size_t nRules = confidenceTable->getNumberOfRows(); - size_t nLeftItems = leftItemsTable->getNumberOfRows(); +void printAprioriRules(NumericTablePtr leftItemsTable, + NumericTablePtr rightItemsTable, + NumericTablePtr confidenceTable, + size_t nRulesToPrint = 20) { + size_t nRules = confidenceTable->getNumberOfRows(); + size_t nLeftItems = leftItemsTable->getNumberOfRows(); size_t nRightItems = rightItemsTable->getNumberOfRows(); BlockDescriptor block1; leftItemsTable->getBlockOfRows(0, nLeftItems, readOnly, block1); - int * leftItems = block1.getBlockPtr(); + int *leftItems = block1.getBlockPtr(); BlockDescriptor block2; rightItemsTable->getBlockOfRows(0, nRightItems, readOnly, block2); - int * rightItems = block2.getBlockPtr(); + int *rightItems = block2.getBlockPtr(); BlockDescriptor block3; confidenceTable->getBlockOfRows(0, nRules, readOnly, block3); - DAAL_DATA_TYPE * confidence = block3.getBlockPtr(); + DAAL_DATA_TYPE *confidence = block3.getBlockPtr(); - std::vector > leftItemsVector; + std::vector> leftItemsVector; leftItemsVector.resize(nRules); - if (nRules == 0) - { - std::cout << std::endl << "No association rules were found " << std::endl; + if (nRules == 0) { + std::cout << std::endl + << "No association rules were found " << std::endl; return; } - for (size_t i = 0; i < nLeftItems; i++) - { + for (size_t i = 0; i < nLeftItems; i++) { leftItemsVector[leftItems[2 * i]].push_back(leftItems[2 * i + 1]); } - std::vector > rightItemsVector; + std::vector> rightItemsVector; rightItemsVector.resize(nRules); - for (size_t i = 0; i < nRightItems; i++) - { + for (size_t i = 0; i < nRightItems; i++) { rightItemsVector[rightItems[2 * i]].push_back(rightItems[2 * i + 1]); } std::vector confidenceVector; confidenceVector.resize(nRules); - for (size_t i = 0; i < nRules; i++) - { + for (size_t i = 0; i < nRules; i++) { confidenceVector[i] = confidence[i]; } - std::cout << std::endl << "Last " << nRulesToPrint << " association rules: " << std::endl; + std::cout << std::endl + << "Last " << nRulesToPrint + << " association rules: " << std::endl; std::cout << std::endl << "Rule" << "\t\t\t\tConfidence" << std::endl; - size_t iMin = (((nRules > nRulesToPrint) && (nRulesToPrint != 0)) ? (nRules - nRulesToPrint) : 0); + size_t iMin = (((nRules > nRulesToPrint) && (nRulesToPrint != 0)) + ? (nRules - nRulesToPrint) + : 0); - for (size_t i = iMin; i < nRules; i++) - { + for (size_t i = iMin; i < nRules; i++) { std::cout << "{"; - for (size_t l = 0; l < leftItemsVector[i].size() - 1; l++) - { + for (size_t l = 0; l < leftItemsVector[i].size() - 1; l++) { std::cout << leftItemsVector[i][l] << ", "; } - std::cout << leftItemsVector[i][leftItemsVector[i].size() - 1] << "} => {"; + std::cout << leftItemsVector[i][leftItemsVector[i].size() - 1] + << "} => {"; - for (size_t l = 0; l < rightItemsVector[i].size() - 1; l++) - { + for (size_t l = 0; l < rightItemsVector[i].size() - 1; l++) { std::cout << rightItemsVector[i][l] << ", "; } - std::cout << rightItemsVector[i][rightItemsVector[i].size() - 1] << "}\t\t"; + std::cout << rightItemsVector[i][rightItemsVector[i].size() - 1] + << "}\t\t"; std::cout << confidenceVector[i] << std::endl; } @@ -259,44 +283,41 @@ void printAprioriRules(NumericTablePtr leftItemsTable, NumericTablePtr rightItem confidenceTable->releaseBlockOfRows(block3); } -bool isFull(NumericTableIface::StorageLayout layout) -{ +bool isFull(NumericTableIface::StorageLayout layout) { int layoutInt = (int)layout; - if (packed_mask & layoutInt) - { + if (packed_mask & layoutInt) { return false; } return true; } -bool isUpper(NumericTableIface::StorageLayout layout) -{ - if (layout == NumericTableIface::upperPackedSymmetricMatrix || layout == NumericTableIface::upperPackedTriangularMatrix) - { +bool isUpper(NumericTableIface::StorageLayout layout) { + if (layout == NumericTableIface::upperPackedSymmetricMatrix || + layout == NumericTableIface::upperPackedTriangularMatrix) { return true; } return false; } -bool isLower(NumericTableIface::StorageLayout layout) -{ - if (layout == NumericTableIface::lowerPackedSymmetricMatrix || layout == NumericTableIface::lowerPackedTriangularMatrix) - { +bool isLower(NumericTableIface::StorageLayout layout) { + if (layout == NumericTableIface::lowerPackedSymmetricMatrix || + layout == NumericTableIface::lowerPackedTriangularMatrix) { return true; } return false; } template -void printArray(T * array, const size_t nPrintedCols, const size_t nPrintedRows, const size_t nCols, const std::string& message, size_t interval = 10) -{ +void printArray(T *array, const size_t nPrintedCols, const size_t nPrintedRows, + const size_t nCols, const std::string &message, + size_t interval = 10) { std::cout << std::setiosflags(std::ios::left); std::cout << message << std::endl; - for (size_t i = 0; i < nPrintedRows; i++) - { - for (size_t j = 0; j < nPrintedCols; j++) - { - std::cout << std::setw(interval) << std::setiosflags(std::ios::fixed) << std::setprecision(3); + for (size_t i = 0; i < nPrintedRows; i++) { + for (size_t j = 0; j < nPrintedCols; j++) { + std::cout << std::setw(interval) + << std::setiosflags(std::ios::fixed) + << std::setprecision(3); std::cout << array[i * nCols + j]; } std::cout << std::endl; @@ -305,22 +326,22 @@ void printArray(T * array, const size_t nPrintedCols, const size_t nPrintedRows, } template -void printArray(T * array, const size_t nCols, const size_t nRows, const std::string& message, size_t interval = 10) -{ +void printArray(T *array, const size_t nCols, const size_t nRows, + const std::string &message, size_t interval = 10) { printArray(array, nCols, nRows, nCols, message, interval); } template -void printLowerArray(T * array, const size_t nPrintedRows, const std::string& message, size_t interval = 10) -{ +void printLowerArray(T *array, const size_t nPrintedRows, + const std::string &message, size_t interval = 10) { std::cout << std::setiosflags(std::ios::left); std::cout << message << std::endl; int ind = 0; - for (size_t i = 0; i < nPrintedRows; i++) - { - for (size_t j = 0; j <= i; j++) - { - std::cout << std::setw(interval) << std::setiosflags(std::ios::fixed) << std::setprecision(3); + for (size_t i = 0; i < nPrintedRows; i++) { + for (size_t j = 0; j <= i; j++) { + std::cout << std::setw(interval) + << std::setiosflags(std::ios::fixed) + << std::setprecision(3); std::cout << array[ind++]; } std::cout << std::endl; @@ -329,24 +350,23 @@ void printLowerArray(T * array, const size_t nPrintedRows, const std::string& me } template -void printUpperArray(T * array, const size_t nPrintedCols, const size_t nPrintedRows, const size_t nCols, const std::string& message, size_t interval = 10) -{ +void printUpperArray(T *array, const size_t nPrintedCols, + const size_t nPrintedRows, const size_t nCols, + const std::string &message, size_t interval = 10) { std::cout << std::setiosflags(std::ios::left); std::cout << message << std::endl; int ind = 0; - for (size_t i = 0; i < nPrintedRows; i++) - { - for (size_t j = 0; j < i; j++) - { + for (size_t i = 0; i < nPrintedRows; i++) { + for (size_t j = 0; j < i; j++) { std::cout << " "; } - for (size_t j = i; j < nPrintedCols; j++) - { - std::cout << std::setw(interval) << std::setiosflags(std::ios::fixed) << std::setprecision(3); + for (size_t j = i; j < nPrintedCols; j++) { + std::cout << std::setw(interval) + << std::setiosflags(std::ios::fixed) + << std::setprecision(3); std::cout << array[ind++]; } - for (size_t j = nPrintedCols; j < nCols; j++) - { + for (size_t j = nPrintedCols; j < nCols; j++) { ind++; } std::cout << std::endl; @@ -354,80 +374,77 @@ void printUpperArray(T * array, const size_t nPrintedCols, const size_t nPrinted std::cout << std::endl; } -void printNumericTable(NumericTable * dataTable, const char * message = "", size_t nPrintedRows = 0, size_t nPrintedCols = 0, size_t interval = 10) -{ - size_t nRows = dataTable->getNumberOfRows(); - size_t nCols = dataTable->getNumberOfColumns(); +void printNumericTable(NumericTable *dataTable, const char *message = "", + size_t nPrintedRows = 0, size_t nPrintedCols = 0, + size_t interval = 10) { + size_t nRows = dataTable->getNumberOfRows(); + size_t nCols = dataTable->getNumberOfColumns(); NumericTableIface::StorageLayout layout = dataTable->getDataLayout(); - if (nPrintedRows != 0) - { + if (nPrintedRows != 0) { nPrintedRows = std::min(nRows, nPrintedRows); - } - else - { + } else { nPrintedRows = nRows; } - if (nPrintedCols != 0) - { + if (nPrintedCols != 0) { nPrintedCols = std::min(nCols, nPrintedCols); - } - else - { + } else { nPrintedCols = nCols; } BlockDescriptor block; - if (isFull(layout) || layout == NumericTableIface::csrArray) - { + if (isFull(layout) || layout == NumericTableIface::csrArray) { dataTable->getBlockOfRows(0, nRows, readOnly, block); - printArray(block.getBlockPtr(), nPrintedCols, nPrintedRows, nCols, message, interval); + printArray(block.getBlockPtr(), nPrintedCols, + nPrintedRows, nCols, message, interval); dataTable->releaseBlockOfRows(block); - } - else - { - PackedArrayNumericTableIface * packedTable = dynamic_cast(dataTable); + } else { + PackedArrayNumericTableIface *packedTable = + dynamic_cast(dataTable); packedTable->getPackedArray(readOnly, block); - if (isLower(layout)) - { - printLowerArray(block.getBlockPtr(), nPrintedRows, message, interval); - } - else if (isUpper(layout)) - { - printUpperArray(block.getBlockPtr(), nPrintedCols, nPrintedRows, nCols, message, interval); + if (isLower(layout)) { + printLowerArray(block.getBlockPtr(), nPrintedRows, + message, interval); + } else if (isUpper(layout)) { + printUpperArray(block.getBlockPtr(), nPrintedCols, + nPrintedRows, nCols, message, + interval); } packedTable->releasePackedArray(block); } } -void printNumericTable(NumericTable & dataTable, const char * message = "", size_t nPrintedRows = 0, size_t nPrintedCols = 0, size_t interval = 10) -{ - printNumericTable(&dataTable, message, nPrintedRows, nPrintedCols, interval); +void printNumericTable(NumericTable &dataTable, const char *message = "", + size_t nPrintedRows = 0, size_t nPrintedCols = 0, + size_t interval = 10) { + printNumericTable(&dataTable, message, nPrintedRows, nPrintedCols, + interval); } -void printNumericTable(const NumericTablePtr & dataTable, const char * message, size_t nPrintedRows, size_t nPrintedCols, - size_t interval) -{ - printNumericTable(dataTable.get(), message, nPrintedRows, nPrintedCols, interval); +void printNumericTable(const NumericTablePtr &dataTable, const char *message, + size_t nPrintedRows, size_t nPrintedCols, + size_t interval) { + printNumericTable(dataTable.get(), message, nPrintedRows, nPrintedCols, + interval); } -void printPackedNumericTable(NumericTable * dataTable, size_t nFeatures, const char * message = "", size_t interval = 10) -{ +void printPackedNumericTable(NumericTable *dataTable, size_t nFeatures, + const char *message = "", size_t interval = 10) { BlockDescriptor block; dataTable->getBlockOfRows(0, 1, readOnly, block); - DAAL_DATA_TYPE * data = block.getBlockPtr(); + DAAL_DATA_TYPE *data = block.getBlockPtr(); std::cout << std::setiosflags(std::ios::left); std::cout << message << std::endl; size_t index = 0; - for (size_t i = 0; i < nFeatures; i++) - { - for (size_t j = 0; j <= i; j++, index++) - { - std::cout << std::setw(interval) << std::setiosflags(std::ios::fixed) << std::setprecision(3); + for (size_t i = 0; i < nFeatures; i++) { + for (size_t j = 0; j <= i; j++, index++) { + std::cout << std::setw(interval) + << std::setiosflags(std::ios::fixed) + << std::setprecision(3); std::cout << data[index]; } std::cout << std::endl; @@ -437,15 +454,16 @@ void printPackedNumericTable(NumericTable * dataTable, size_t nFeatures, const c dataTable->releaseBlockOfRows(block); } -void printPackedNumericTable(NumericTable & dataTable, size_t nFeatures, const char * message = "", size_t interval = 10) -{ +void printPackedNumericTable(NumericTable &dataTable, size_t nFeatures, + const char *message = "", size_t interval = 10) { printPackedNumericTable(&dataTable, nFeatures, message); } template -void printNumericTables(NumericTable * dataTable1, NumericTable * dataTable2, const char * title1 = "", const char * title2 = "", - const char * message = "", size_t nPrintedRows = 0, size_t interval = 15) -{ +void printNumericTables(NumericTable *dataTable1, NumericTable *dataTable2, + const char *title1 = "", const char *title2 = "", + const char *message = "", size_t nPrintedRows = 0, + size_t interval = 15) { size_t nRows1 = dataTable1->getNumberOfRows(); size_t nRows2 = dataTable2->getNumberOfRows(); size_t nCols1 = dataTable1->getNumberOfColumns(); @@ -455,31 +473,30 @@ void printNumericTables(NumericTable * dataTable1, NumericTable * dataTable2, co BlockDescriptor block2; size_t nRows = std::min(nRows1, nRows2); - if (nPrintedRows != 0) - { + if (nPrintedRows != 0) { nRows = std::min(std::min(nRows1, nRows2), nPrintedRows); } dataTable1->getBlockOfRows(0, nRows, readOnly, block1); dataTable2->getBlockOfRows(0, nRows, readOnly, block2); - type1 * data1 = block1.getBlockPtr(); - type2 * data2 = block2.getBlockPtr(); + type1 *data1 = block1.getBlockPtr(); + type2 *data2 = block2.getBlockPtr(); std::cout << std::setiosflags(std::ios::left); std::cout << message << std::endl; std::cout << std::setw(interval * nCols1) << title1; std::cout << std::setw(interval * nCols2) << title2 << std::endl; - for (size_t i = 0; i < nRows; i++) - { - for (size_t j = 0; j < nCols1; j++) - { - std::cout << std::setw(interval) << std::setiosflags(std::ios::fixed) << std::setprecision(3); + for (size_t i = 0; i < nRows; i++) { + for (size_t j = 0; j < nCols1; j++) { + std::cout << std::setw(interval) + << std::setiosflags(std::ios::fixed) + << std::setprecision(3); std::cout << data1[i * nCols1 + j]; } - for (size_t j = 0; j < nCols2; j++) - { - std::cout << std::setprecision(0) << std::setw(interval) << data2[i * nCols2 + j]; + for (size_t j = 0; j < nCols2; j++) { + std::cout << std::setprecision(0) << std::setw(interval) + << data2[i * nCols2 + j]; } std::cout << std::endl; } @@ -490,15 +507,18 @@ void printNumericTables(NumericTable * dataTable1, NumericTable * dataTable2, co } template -void printNumericTables(NumericTable * dataTable1, NumericTable & dataTable2, const char * title1 = "", const char * title2 = "", - const char * message = "", size_t nPrintedRows = 0, size_t interval = 10) -{ - printNumericTables(dataTable1, &dataTable2, title1, title2, message, nPrintedRows, interval); -} - -void printNumericTables(NumericTable * dataTable1, NumericTable * dataTable2, const char * title1 = "", const char * title2 = "", - const char * message = "", size_t nPrintedRows = 0, size_t interval = 10) -{ +void printNumericTables(NumericTable *dataTable1, NumericTable &dataTable2, + const char *title1 = "", const char *title2 = "", + const char *message = "", size_t nPrintedRows = 0, + size_t interval = 10) { + printNumericTables(dataTable1, &dataTable2, title1, title2, + message, nPrintedRows, interval); +} + +void printNumericTables(NumericTable *dataTable1, NumericTable *dataTable2, + const char *title1 = "", const char *title2 = "", + const char *message = "", size_t nPrintedRows = 0, + size_t interval = 10) { size_t nRows1 = dataTable1->getNumberOfRows(); size_t nRows2 = dataTable2->getNumberOfRows(); size_t nCols1 = dataTable1->getNumberOfColumns(); @@ -508,31 +528,30 @@ void printNumericTables(NumericTable * dataTable1, NumericTable * dataTable2, co BlockDescriptor block2; size_t nRows = std::min(nRows1, nRows2); - if (nPrintedRows != 0) - { + if (nPrintedRows != 0) { nRows = std::min(std::min(nRows1, nRows2), nPrintedRows); } dataTable1->getBlockOfRows(0, nRows, readOnly, block1); dataTable2->getBlockOfRows(0, nRows, readOnly, block2); - DAAL_DATA_TYPE * data1 = block1.getBlockPtr(); - DAAL_DATA_TYPE * data2 = block2.getBlockPtr(); + DAAL_DATA_TYPE *data1 = block1.getBlockPtr(); + DAAL_DATA_TYPE *data2 = block2.getBlockPtr(); std::cout << std::setiosflags(std::ios::left); std::cout << message << std::endl; std::cout << std::setw(interval * nCols1) << title1; std::cout << std::setw(interval * nCols2) << title2 << std::endl; - for (size_t i = 0; i < nRows; i++) - { - for (size_t j = 0; j < nCols1; j++) - { - std::cout << std::setw(interval) << std::setiosflags(std::ios::fixed) << std::setprecision(3); + for (size_t i = 0; i < nRows; i++) { + for (size_t j = 0; j < nCols1; j++) { + std::cout << std::setw(interval) + << std::setiosflags(std::ios::fixed) + << std::setprecision(3); std::cout << data1[i * nCols1 + j]; } - for (size_t j = 0; j < nCols2; j++) - { - std::cout << std::setprecision(0) << std::setw(interval) << data2[i * nCols2 + j]; + for (size_t j = 0; j < nCols2; j++) { + std::cout << std::setprecision(0) << std::setw(interval) + << data2[i * nCols2 + j]; } std::cout << std::endl; } @@ -542,112 +561,91 @@ void printNumericTables(NumericTable * dataTable1, NumericTable * dataTable2, co dataTable2->releaseBlockOfRows(block2); } -void printNumericTables(NumericTable * dataTable1, NumericTable & dataTable2, const char * title1 = "", const char * title2 = "", - const char * message = "", size_t nPrintedRows = 0, size_t interval = 10) -{ - printNumericTables(dataTable1, &dataTable2, title1, title2, message, nPrintedRows, interval); +void printNumericTables(NumericTable *dataTable1, NumericTable &dataTable2, + const char *title1 = "", const char *title2 = "", + const char *message = "", size_t nPrintedRows = 0, + size_t interval = 10) { + printNumericTables(dataTable1, &dataTable2, title1, title2, message, + nPrintedRows, interval); } template -void printNumericTables(NumericTablePtr dataTable1, NumericTablePtr dataTable2, const char * title1 = "", const char * title2 = "", - const char * message = "", size_t nPrintedRows = 0, size_t interval = 10) -{ - printNumericTables(dataTable1.get(), dataTable2.get(), title1, title2, message, nPrintedRows, interval); +void printNumericTables(NumericTablePtr dataTable1, NumericTablePtr dataTable2, + const char *title1 = "", const char *title2 = "", + const char *message = "", size_t nPrintedRows = 0, + size_t interval = 10) { + printNumericTables(dataTable1.get(), dataTable2.get(), title1, + title2, message, nPrintedRows, interval); } -bool checkFileIsAvailable(std::string filename, bool needExit = false) -{ +bool checkFileIsAvailable(std::string filename, bool needExit = false) { std::ifstream file(filename.c_str()); - if (file.good()) - { + if (file.good()) { return true; - } - else - { + } else { std::cout << "Can't open file " << filename << std::endl; - if (needExit) - { + if (needExit) { exit(fileError); } return false; } } -void checkArguments(int argc, char * argv[], int count, ...) -{ - std::string ** filelist = new std::string *[count]; +void checkArguments(int argc, char *argv[], int count, ...) { + std::string **filelist = new std::string *[count]; va_list ap; va_start(ap, count); - for (int i = 0; i < count; i++) - { + for (int i = 0; i < count; i++) { filelist[i] = va_arg(ap, std::string *); } va_end(ap); - if (argc == 1) - { - for (int i = 0; i < count; i++) - { + if (argc == 1) { + for (int i = 0; i < count; i++) { checkFileIsAvailable(*(filelist[i]), true); } - } - else if (argc == (count + 1)) - { + } else if (argc == (count + 1)) { bool isAllCorrect = true; - for (int i = 0; i < count; i++) - { - if (!checkFileIsAvailable(argv[i + 1])) - { + for (int i = 0; i < count; i++) { + if (!checkFileIsAvailable(argv[i + 1])) { isAllCorrect = false; break; } } - if (isAllCorrect == true) - { - for (int i = 0; i < count; i++) - { + if (isAllCorrect == true) { + for (int i = 0; i < count; i++) { (*filelist[i]) = argv[i + 1]; } - } - else - { - std::cout << "Warning: Try to open default datasetFileNames" << std::endl; - for (int i = 0; i < count; i++) - { + } else { + std::cout << "Warning: Try to open default datasetFileNames" + << std::endl; + for (int i = 0; i < count; i++) { checkFileIsAvailable(*(filelist[i]), true); } } - } - else - { + } else { std::cout << "Usage: " << argv[0] << " [ "; - for (int i = 0; i < count; i++) - { + for (int i = 0; i < count; i++) { std::cout << " "; } std::cout << "]" << std::endl; - std::cout << "Warning: Try to open default datasetFileNames" << std::endl; - for (int i = 0; i < count; i++) - { + std::cout << "Warning: Try to open default datasetFileNames" + << std::endl; + for (int i = 0; i < count; i++) { checkFileIsAvailable(*(filelist[i]), true); } } delete[] filelist; } -void copyBytes(daal::byte * dst, daal::byte * src, size_t size) -{ - for (size_t i = 0; i < size; i++) - { +void copyBytes(daal::byte *dst, daal::byte *src, size_t size) { + for (size_t i = 0; i < size; i++) { dst[i] = src[i]; } } -size_t checkBytes(daal::byte * dst, daal::byte * src, size_t size) -{ - for (size_t i = 0; i < size; i++) - { - if (dst[i] != src[i]) - { +size_t checkBytes(daal::byte *dst, daal::byte *src, size_t size) { + for (size_t i = 0; i < size; i++) { + if (dst[i] != src[i]) { return i + 1; } } @@ -655,34 +653,53 @@ size_t checkBytes(daal::byte * dst, daal::byte * src, size_t size) } static const unsigned int crcRem[] = { - 0x00000000, 0x741B8CD6, 0xE83719AC, 0x9C2C957A, 0xA475BF8E, 0xD06E3358, 0x4C42A622, 0x38592AF4, 0x3CF0F3CA, 0x48EB7F1C, 0xD4C7EA66, 0xA0DC66B0, - 0x98854C44, 0xEC9EC092, 0x70B255E8, 0x04A9D93E, 0x79E1E794, 0x0DFA6B42, 0x91D6FE38, 0xE5CD72EE, 0xDD94581A, 0xA98FD4CC, 0x35A341B6, 0x41B8CD60, - 0x4511145E, 0x310A9888, 0xAD260DF2, 0xD93D8124, 0xE164ABD0, 0x957F2706, 0x0953B27C, 0x7D483EAA, 0xF3C3CF28, 0x87D843FE, 0x1BF4D684, 0x6FEF5A52, - 0x57B670A6, 0x23ADFC70, 0xBF81690A, 0xCB9AE5DC, 0xCF333CE2, 0xBB28B034, 0x2704254E, 0x531FA998, 0x6B46836C, 0x1F5D0FBA, 0x83719AC0, 0xF76A1616, - 0x8A2228BC, 0xFE39A46A, 0x62153110, 0x160EBDC6, 0x2E579732, 0x5A4C1BE4, 0xC6608E9E, 0xB27B0248, 0xB6D2DB76, 0xC2C957A0, 0x5EE5C2DA, 0x2AFE4E0C, - 0x12A764F8, 0x66BCE82E, 0xFA907D54, 0x8E8BF182, 0x939C1286, 0xE7879E50, 0x7BAB0B2A, 0x0FB087FC, 0x37E9AD08, 0x43F221DE, 0xDFDEB4A4, 0xABC53872, - 0xAF6CE14C, 0xDB776D9A, 0x475BF8E0, 0x33407436, 0x0B195EC2, 0x7F02D214, 0xE32E476E, 0x9735CBB8, 0xEA7DF512, 0x9E6679C4, 0x024AECBE, 0x76516068, - 0x4E084A9C, 0x3A13C64A, 0xA63F5330, 0xD224DFE6, 0xD68D06D8, 0xA2968A0E, 0x3EBA1F74, 0x4AA193A2, 0x72F8B956, 0x06E33580, 0x9ACFA0FA, 0xEED42C2C, - 0x605FDDAE, 0x14445178, 0x8868C402, 0xFC7348D4, 0xC42A6220, 0xB031EEF6, 0x2C1D7B8C, 0x5806F75A, 0x5CAF2E64, 0x28B4A2B2, 0xB49837C8, 0xC083BB1E, - 0xF8DA91EA, 0x8CC11D3C, 0x10ED8846, 0x64F60490, 0x19BE3A3A, 0x6DA5B6EC, 0xF1892396, 0x8592AF40, 0xBDCB85B4, 0xC9D00962, 0x55FC9C18, 0x21E710CE, - 0x254EC9F0, 0x51554526, 0xCD79D05C, 0xB9625C8A, 0x813B767E, 0xF520FAA8, 0x690C6FD2, 0x1D17E304, 0x5323A9DA, 0x2738250C, 0xBB14B076, 0xCF0F3CA0, - 0xF7561654, 0x834D9A82, 0x1F610FF8, 0x6B7A832E, 0x6FD35A10, 0x1BC8D6C6, 0x87E443BC, 0xF3FFCF6A, 0xCBA6E59E, 0xBFBD6948, 0x2391FC32, 0x578A70E4, - 0x2AC24E4E, 0x5ED9C298, 0xC2F557E2, 0xB6EEDB34, 0x8EB7F1C0, 0xFAAC7D16, 0x6680E86C, 0x129B64BA, 0x1632BD84, 0x62293152, 0xFE05A428, 0x8A1E28FE, - 0xB247020A, 0xC65C8EDC, 0x5A701BA6, 0x2E6B9770, 0xA0E066F2, 0xD4FBEA24, 0x48D77F5E, 0x3CCCF388, 0x0495D97C, 0x708E55AA, 0xECA2C0D0, 0x98B94C06, - 0x9C109538, 0xE80B19EE, 0x74278C94, 0x003C0042, 0x38652AB6, 0x4C7EA660, 0xD052331A, 0xA449BFCC, 0xD9018166, 0xAD1A0DB0, 0x313698CA, 0x452D141C, - 0x7D743EE8, 0x096FB23E, 0x95432744, 0xE158AB92, 0xE5F172AC, 0x91EAFE7A, 0x0DC66B00, 0x79DDE7D6, 0x4184CD22, 0x359F41F4, 0xA9B3D48E, 0xDDA85858, - 0xC0BFBB5C, 0xB4A4378A, 0x2888A2F0, 0x5C932E26, 0x64CA04D2, 0x10D18804, 0x8CFD1D7E, 0xF8E691A8, 0xFC4F4896, 0x8854C440, 0x1478513A, 0x6063DDEC, - 0x583AF718, 0x2C217BCE, 0xB00DEEB4, 0xC4166262, 0xB95E5CC8, 0xCD45D01E, 0x51694564, 0x2572C9B2, 0x1D2BE346, 0x69306F90, 0xF51CFAEA, 0x8107763C, - 0x85AEAF02, 0xF1B523D4, 0x6D99B6AE, 0x19823A78, 0x21DB108C, 0x55C09C5A, 0xC9EC0920, 0xBDF785F6, 0x337C7474, 0x4767F8A2, 0xDB4B6DD8, 0xAF50E10E, - 0x9709CBFA, 0xE312472C, 0x7F3ED256, 0x0B255E80, 0x0F8C87BE, 0x7B970B68, 0xE7BB9E12, 0x93A012C4, 0xABF93830, 0xDFE2B4E6, 0x43CE219C, 0x37D5AD4A, - 0x4A9D93E0, 0x3E861F36, 0xA2AA8A4C, 0xD6B1069A, 0xEEE82C6E, 0x9AF3A0B8, 0x06DF35C2, 0x72C4B914, 0x766D602A, 0x0276ECFC, 0x9E5A7986, 0xEA41F550, - 0xD218DFA4, 0xA6035372, 0x3A2FC608, 0x4E344ADE -}; - -unsigned int getCRC32(daal::byte * input, unsigned int prevRes, size_t len) -{ + 0x00000000, 0x741B8CD6, 0xE83719AC, 0x9C2C957A, 0xA475BF8E, 0xD06E3358, + 0x4C42A622, 0x38592AF4, 0x3CF0F3CA, 0x48EB7F1C, 0xD4C7EA66, 0xA0DC66B0, + 0x98854C44, 0xEC9EC092, 0x70B255E8, 0x04A9D93E, 0x79E1E794, 0x0DFA6B42, + 0x91D6FE38, 0xE5CD72EE, 0xDD94581A, 0xA98FD4CC, 0x35A341B6, 0x41B8CD60, + 0x4511145E, 0x310A9888, 0xAD260DF2, 0xD93D8124, 0xE164ABD0, 0x957F2706, + 0x0953B27C, 0x7D483EAA, 0xF3C3CF28, 0x87D843FE, 0x1BF4D684, 0x6FEF5A52, + 0x57B670A6, 0x23ADFC70, 0xBF81690A, 0xCB9AE5DC, 0xCF333CE2, 0xBB28B034, + 0x2704254E, 0x531FA998, 0x6B46836C, 0x1F5D0FBA, 0x83719AC0, 0xF76A1616, + 0x8A2228BC, 0xFE39A46A, 0x62153110, 0x160EBDC6, 0x2E579732, 0x5A4C1BE4, + 0xC6608E9E, 0xB27B0248, 0xB6D2DB76, 0xC2C957A0, 0x5EE5C2DA, 0x2AFE4E0C, + 0x12A764F8, 0x66BCE82E, 0xFA907D54, 0x8E8BF182, 0x939C1286, 0xE7879E50, + 0x7BAB0B2A, 0x0FB087FC, 0x37E9AD08, 0x43F221DE, 0xDFDEB4A4, 0xABC53872, + 0xAF6CE14C, 0xDB776D9A, 0x475BF8E0, 0x33407436, 0x0B195EC2, 0x7F02D214, + 0xE32E476E, 0x9735CBB8, 0xEA7DF512, 0x9E6679C4, 0x024AECBE, 0x76516068, + 0x4E084A9C, 0x3A13C64A, 0xA63F5330, 0xD224DFE6, 0xD68D06D8, 0xA2968A0E, + 0x3EBA1F74, 0x4AA193A2, 0x72F8B956, 0x06E33580, 0x9ACFA0FA, 0xEED42C2C, + 0x605FDDAE, 0x14445178, 0x8868C402, 0xFC7348D4, 0xC42A6220, 0xB031EEF6, + 0x2C1D7B8C, 0x5806F75A, 0x5CAF2E64, 0x28B4A2B2, 0xB49837C8, 0xC083BB1E, + 0xF8DA91EA, 0x8CC11D3C, 0x10ED8846, 0x64F60490, 0x19BE3A3A, 0x6DA5B6EC, + 0xF1892396, 0x8592AF40, 0xBDCB85B4, 0xC9D00962, 0x55FC9C18, 0x21E710CE, + 0x254EC9F0, 0x51554526, 0xCD79D05C, 0xB9625C8A, 0x813B767E, 0xF520FAA8, + 0x690C6FD2, 0x1D17E304, 0x5323A9DA, 0x2738250C, 0xBB14B076, 0xCF0F3CA0, + 0xF7561654, 0x834D9A82, 0x1F610FF8, 0x6B7A832E, 0x6FD35A10, 0x1BC8D6C6, + 0x87E443BC, 0xF3FFCF6A, 0xCBA6E59E, 0xBFBD6948, 0x2391FC32, 0x578A70E4, + 0x2AC24E4E, 0x5ED9C298, 0xC2F557E2, 0xB6EEDB34, 0x8EB7F1C0, 0xFAAC7D16, + 0x6680E86C, 0x129B64BA, 0x1632BD84, 0x62293152, 0xFE05A428, 0x8A1E28FE, + 0xB247020A, 0xC65C8EDC, 0x5A701BA6, 0x2E6B9770, 0xA0E066F2, 0xD4FBEA24, + 0x48D77F5E, 0x3CCCF388, 0x0495D97C, 0x708E55AA, 0xECA2C0D0, 0x98B94C06, + 0x9C109538, 0xE80B19EE, 0x74278C94, 0x003C0042, 0x38652AB6, 0x4C7EA660, + 0xD052331A, 0xA449BFCC, 0xD9018166, 0xAD1A0DB0, 0x313698CA, 0x452D141C, + 0x7D743EE8, 0x096FB23E, 0x95432744, 0xE158AB92, 0xE5F172AC, 0x91EAFE7A, + 0x0DC66B00, 0x79DDE7D6, 0x4184CD22, 0x359F41F4, 0xA9B3D48E, 0xDDA85858, + 0xC0BFBB5C, 0xB4A4378A, 0x2888A2F0, 0x5C932E26, 0x64CA04D2, 0x10D18804, + 0x8CFD1D7E, 0xF8E691A8, 0xFC4F4896, 0x8854C440, 0x1478513A, 0x6063DDEC, + 0x583AF718, 0x2C217BCE, 0xB00DEEB4, 0xC4166262, 0xB95E5CC8, 0xCD45D01E, + 0x51694564, 0x2572C9B2, 0x1D2BE346, 0x69306F90, 0xF51CFAEA, 0x8107763C, + 0x85AEAF02, 0xF1B523D4, 0x6D99B6AE, 0x19823A78, 0x21DB108C, 0x55C09C5A, + 0xC9EC0920, 0xBDF785F6, 0x337C7474, 0x4767F8A2, 0xDB4B6DD8, 0xAF50E10E, + 0x9709CBFA, 0xE312472C, 0x7F3ED256, 0x0B255E80, 0x0F8C87BE, 0x7B970B68, + 0xE7BB9E12, 0x93A012C4, 0xABF93830, 0xDFE2B4E6, 0x43CE219C, 0x37D5AD4A, + 0x4A9D93E0, 0x3E861F36, 0xA2AA8A4C, 0xD6B1069A, 0xEEE82C6E, 0x9AF3A0B8, + 0x06DF35C2, 0x72C4B914, 0x766D602A, 0x0276ECFC, 0x9E5A7986, 0xEA41F550, + 0xD218DFA4, 0xA6035372, 0x3A2FC608, 0x4E344ADE}; + +unsigned int getCRC32(daal::byte *input, unsigned int prevRes, size_t len) { size_t i; - daal::byte * p; + daal::byte *p; unsigned int res, highDigit, nextDigit; const unsigned int crcPoly = 0xBA0DC66B; @@ -691,30 +708,29 @@ unsigned int getCRC32(daal::byte * input, unsigned int prevRes, size_t len) res = prevRes; - for (i = 0; i < len; i++) - { + for (i = 0; i < len; i++) { highDigit = res >> 24; nextDigit = (unsigned int)(p[len - 1 - i]); - res = (res << 8) ^ nextDigit; - res = res ^ crcRem[highDigit]; + res = (res << 8) ^ nextDigit; + res = res ^ crcRem[highDigit]; } - if (res >= crcPoly) - { + if (res >= crcPoly) { res = res ^ crcPoly; } return res; } -void printALSRatings(NumericTablePtr usersOffsetTable, NumericTablePtr itemsOffsetTable, NumericTablePtr ratings) -{ +void printALSRatings(NumericTablePtr usersOffsetTable, + NumericTablePtr itemsOffsetTable, + NumericTablePtr ratings) { size_t nUsers = ratings->getNumberOfRows(); size_t nItems = ratings->getNumberOfColumns(); BlockDescriptor block1; ratings->getBlockOfRows(0, nUsers, readOnly, block1); - DAAL_DATA_TYPE * ratingsData = block1.getBlockPtr(); + DAAL_DATA_TYPE *ratingsData = block1.getBlockPtr(); size_t usersOffset, itemsOffset; BlockDescriptor block; @@ -727,18 +743,16 @@ void printALSRatings(NumericTablePtr usersOffsetTable, NumericTablePtr itemsOffs itemsOffsetTable->releaseBlockOfRows(block); std::cout << " User ID, Item ID, rating" << std::endl; - for (size_t i = 0; i < nUsers; i++) - { - for (size_t j = 0; j < nItems; j++) - { - std::cout << i + usersOffset << ", " << j + itemsOffset << ", " << ratingsData[i * nItems + j] << std::endl; + for (size_t i = 0; i < nUsers; i++) { + for (size_t j = 0; j < nItems; j++) { + std::cout << i + usersOffset << ", " << j + itemsOffset << ", " + << ratingsData[i * nItems + j] << std::endl; } } ratings->releaseBlockOfRows(block1); } -size_t serializeDAALObject(SerializationIface * pData, ByteBuffer & buffer) -{ +size_t serializeDAALObject(SerializationIface *pData, ByteBuffer &buffer) { /* Create a data archive to serialize the numeric table */ InputDataArchive dataArch; @@ -750,12 +764,12 @@ size_t serializeDAALObject(SerializationIface * pData, ByteBuffer & buffer) /* Store the serialized data in an array */ buffer.resize(length); - if (length) dataArch.copyArchiveToArray(&buffer[0], length); + if (length) + dataArch.copyArchiveToArray(&buffer[0], length); return length; } -SerializationIfacePtr deserializeDAALObject(daal::byte * buff, size_t length) -{ +SerializationIfacePtr deserializeDAALObject(daal::byte *buff, size_t length) { /* Create a data archive to deserialize the object */ OutputDataArchive dataArch(buff, length); diff --git a/mllib-dal/src/main/native/service.h b/mllib-dal/src/main/native/service.h index b6a2cc5c5..8696993b5 100644 --- a/mllib-dal/src/main/native/service.h +++ b/mllib-dal/src/main/native/service.h @@ -1,19 +1,19 @@ /* file: service.h */ /******************************************************************************* -* Copyright 2017-2020 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*******************************************************************************/ + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ /* ! Content: @@ -28,23 +28,24 @@ using namespace daal::data_management; #include -#include -#include +#include #include -#include #include -#include -#include +#include #include +#include +#include +#include #include "error_handling.h" typedef std::vector ByteBuffer; -void printNumericTable(const NumericTablePtr & dataTable, const char * message = "", size_t nPrintedRows = 0, size_t nPrintedCols = 0, - size_t interval = 10); -size_t serializeDAALObject(SerializationIface * pData, ByteBuffer & buffer); -SerializationIfacePtr deserializeDAALObject(daal::byte * buff, size_t length); -CSRNumericTable * createFloatSparseTable(const std::string & datasetFileName); +void printNumericTable(const NumericTablePtr &dataTable, + const char *message = "", size_t nPrintedRows = 0, + size_t nPrintedCols = 0, size_t interval = 10); +size_t serializeDAALObject(SerializationIface *pData, ByteBuffer &buffer); +SerializationIfacePtr deserializeDAALObject(daal::byte *buff, size_t length); +CSRNumericTable *createFloatSparseTable(const std::string &datasetFileName); #endif diff --git a/mllib-dal/src/main/scala/org/apache/spark-3.0.0/ml/clustering/KMeans.scala b/mllib-dal/src/main/scala/org/apache/spark-3.0.0/ml/clustering/KMeans.scala index 77eb1e928..dc3dbbd6e 100644 --- a/mllib-dal/src/main/scala/org/apache/spark-3.0.0/ml/clustering/KMeans.scala +++ b/mllib-dal/src/main/scala/org/apache/spark-3.0.0/ml/clustering/KMeans.scala @@ -334,12 +334,8 @@ class KMeans @Since("1.5.0") ( override def fit(dataset: Dataset[_]): KMeansModel = instrumented { instr => transformSchema(dataset.schema, logging = true) - val isPlatformSupported = Utils.checkClusterPlatformCompatibility(dataset.sparkSession.sparkContext) + val handlePersistence = (dataset.storageLevel == StorageLevel.NONE) val handleWeight = isDefined(weightCol) && $(weightCol).nonEmpty - val useKMeansDAL = isPlatformSupported && $(distanceMeasure) == "euclidean" && !handleWeight - - // will handle persistence only for trainWithML - val handlePersistence = (dataset.storageLevel == StorageLevel.NONE && !useKMeansDAL) val w = if (handleWeight) { col($(weightCol)).cast(DoubleType) } else { @@ -351,25 +347,19 @@ class KMeans @Since("1.5.0") ( case Row(point: Vector, weight: Double) => (point, weight) } - if (handlePersistence) { - instances.persist(StorageLevel.MEMORY_AND_DISK) - } - instr.logPipelineStage(this) instr.logDataset(dataset) instr.logParams(this, featuresCol, predictionCol, k, initMode, initSteps, distanceMeasure, maxIter, seed, tol, weightCol) + val isPlatformSupported = Utils.checkClusterPlatformCompatibility( + dataset.sparkSession.sparkContext) + val useKMeansDAL = isPlatformSupported && $(distanceMeasure) == "euclidean" && !handleWeight + val model = if (useKMeansDAL) { - val offheapEnabled=instances.sparkContext.getConf.getBoolean("spark.memory.offHeap.enabled", false) - if (offheapEnabled) { - instances.setName("instancesRDD").persist(StorageLevel.OFF_HEAP) - } else { - instances.setName("instancesRDD").persist(StorageLevel.MEMORY_AND_DISK) - } - trainWithDAL(instances) + trainWithDAL(instances, handlePersistence) } else { - trainWithML(instances) + trainWithML(instances, handlePersistence) } val summary = new KMeansSummary( @@ -382,13 +372,12 @@ class KMeans @Since("1.5.0") ( model.setSummary(Some(summary)) instr.logNamedValue("clusterSizes", summary.clusterSizes) - if (handlePersistence) { - instances.unpersist() - } + model } - private def trainWithDAL(instances: RDD[(Vector, Double)]): KMeansModel = instrumented { instr => + private def trainWithDAL(instances: RDD[(Vector, Double)], + handlePersistence: Boolean): KMeansModel = instrumented { instr => val sc = instances.sparkContext @@ -414,12 +403,18 @@ class KMeans @Since("1.5.0") ( val dataWithNorm = instances.map { case (point: Vector, weight: Double) => new VectorWithNorm(point) } + + // Cache for init + dataWithNorm.persist(StorageLevel.MEMORY_AND_DISK) + val centersWithNorm = if ($(initMode) == "random") { mllibKMeans.initRandom(dataWithNorm) } else { mllibKMeans.initKMeansParallel(dataWithNorm, distanceMeasureInstance) } + dataWithNorm.unpersist() + val centers = centersWithNorm.map(_.vector) val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9 @@ -427,6 +422,10 @@ class KMeans @Since("1.5.0") ( val strInitMode = $(initMode) logInfo(f"Initialization with $strInitMode took $initTimeInSeconds%.3f seconds.") + if (handlePersistence) { + instances.persist(StorageLevel.MEMORY_AND_DISK) + } + val inputData = instances.map { case (point: Vector, weight: Double) => point } @@ -434,31 +433,44 @@ class KMeans @Since("1.5.0") ( val kmeansDAL = new KMeansDALImpl(getK, getMaxIter, getTol, DistanceMeasure.EUCLIDEAN, centers, executor_num, executor_cores) - val parentModel = kmeansDAL.runWithRDDVector(inputData, Option(instr)) + val parentModel = kmeansDAL.train(inputData, Option(instr)) val model = copyValues(new KMeansModel(uid, parentModel).setParent(this)) - model + if (handlePersistence) { + instances.unpersist() + } + model } - private def trainWithML(instances: RDD[(Vector, Double)]): KMeansModel = instrumented { instr => - val oldVectorInstances = instances.map { - case (point: Vector, weight: Double) => (OldVectors.fromML(point), weight) - } - val algo = new MLlibKMeans() - .setK($(k)) - .setInitializationMode($(initMode)) - .setInitializationSteps($(initSteps)) - .setMaxIterations($(maxIter)) - .setSeed($(seed)) - .setEpsilon($(tol)) - .setDistanceMeasure($(distanceMeasure)) - val parentModel = algo.runWithWeight(oldVectorInstances, Option(instr)) - val model = copyValues(new KMeansModel(uid, parentModel).setParent(this)) - model + private def trainWithML(instances: RDD[(Vector, Double)], + handlePersistence: Boolean): KMeansModel = instrumented { instr => + if (handlePersistence) { + instances.persist(StorageLevel.MEMORY_AND_DISK) + } + + val oldVectorInstances = instances.map { + case (point: Vector, weight: Double) => (OldVectors.fromML(point), weight) + } + val algo = new MLlibKMeans() + .setK($(k)) + .setInitializationMode($(initMode)) + .setInitializationSteps($(initSteps)) + .setMaxIterations($(maxIter)) + .setSeed($(seed)) + .setEpsilon($(tol)) + .setDistanceMeasure($(distanceMeasure)) + val parentModel = algo.runWithWeight(oldVectorInstances, Option(instr)) + val model = copyValues(new KMeansModel(uid, parentModel).setParent(this)) + + if (handlePersistence) { + instances.unpersist() } + model + } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) diff --git a/mllib-dal/src/main/scala/org/apache/spark-3.0.0/ml/feature/PCA.scala b/mllib-dal/src/main/scala/org/apache/spark-3.0.0/ml/feature/PCA.scala index 0c9c8ad9e..14e9a2ce1 100644 --- a/mllib-dal/src/main/scala/org/apache/spark-3.0.0/ml/feature/PCA.scala +++ b/mllib-dal/src/main/scala/org/apache/spark-3.0.0/ml/feature/PCA.scala @@ -96,14 +96,15 @@ class PCA @Since("1.5.0") ( s"source vector size $numFeatures must be no less than k=$k") val sc = dataset.sparkSession.sparkContext - val isPlatformSupported = Utils.checkClusterPlatformCompatibility(dataset.sparkSession.sparkContext) + val isPlatformSupported = Utils.checkClusterPlatformCompatibility( + dataset.sparkSession.sparkContext) // Call oneDAL Correlation PCA implementation when numFeatures < 65535 and fall back otherwise val parentModel = if (numFeatures < 65535 && isPlatformSupported) { val executor_num = Utils.sparkExecutorNum(dataset.sparkSession.sparkContext) val executor_cores = Utils.sparkExecutorCores() val pca = new PCADALImpl(k = $(k), executor_num, executor_cores) - val pcaModel = pca.fitWithDAL(inputVectors) + val pcaModel = pca.train(inputVectors) pcaModel } else { val inputOldVectors = inputVectors.map { diff --git a/mllib-dal/src/main/scala/org/apache/spark-3.0.0/ml/recommendation/ALS.scala b/mllib-dal/src/main/scala/org/apache/spark-3.0.0/ml/recommendation/ALS.scala index 9196873fb..e59c642c9 100644 --- a/mllib-dal/src/main/scala/org/apache/spark-3.0.0/ml/recommendation/ALS.scala +++ b/mllib-dal/src/main/scala/org/apache/spark-3.0.0/ml/recommendation/ALS.scala @@ -923,7 +923,7 @@ object ALS extends DefaultParamsReadable[ALS] with Logging { val (userIdAndFactors, itemIdAndFactors) = if (implicitPrefs && isPlatformSupported) { - new ALSDALImpl(ratings, rank, maxIter, regParam, alpha, seed).run() + new ALSDALImpl(ratings, rank, maxIter, regParam, alpha, seed).train() } else { trainMLlib(ratings, rank, numUserBlocks, numItemBlocks, maxIter, regParam, implicitPrefs, alpha, nonnegative, intermediateRDDStorageLevel, finalRDDStorageLevel, diff --git a/mllib-dal/src/main/scala/org/apache/spark-3.0.1/ml/clustering/KMeans.scala b/mllib-dal/src/main/scala/org/apache/spark-3.0.1/ml/clustering/KMeans.scala index 1aa016af7..7af0ffacf 100644 --- a/mllib-dal/src/main/scala/org/apache/spark-3.0.1/ml/clustering/KMeans.scala +++ b/mllib-dal/src/main/scala/org/apache/spark-3.0.1/ml/clustering/KMeans.scala @@ -329,51 +329,30 @@ class KMeans @Since("1.5.0") ( override def fit(dataset: Dataset[_]): KMeansModel = instrumented { instr => transformSchema(dataset.schema, logging = true) - val isPlatformSupported = Utils.checkClusterPlatformCompatibility(dataset.sparkSession.sparkContext) - val handleWeight = isDefined(weightCol) && $(weightCol).nonEmpty - val useKMeansDAL = isPlatformSupported && $(distanceMeasure) == "euclidean" && !handleWeight - logInfo(s"useKMeansDAL = $useKMeansDAL") - - // will handle persistence only for trainWithML - // val handlePersistence = (dataset.storageLevel == StorageLevel.NONE && !useKMeansDAL) - // val w = if (handleWeight) { - // col($(weightCol)).cast(DoubleType) - // } else { - // lit(1.0) - // } - - // val instances: RDD[(Vector, Double)] = dataset - // .select(DatasetUtils.columnToVector(dataset, getFeaturesCol), w).rdd.map { - // case Row(point: Vector, weight: Double) => (point, weight) - // } - - // if (handlePersistence) { - // instances.persist(StorageLevel.MEMORY_AND_DISK) - // } - instr.logPipelineStage(this) instr.logDataset(dataset) instr.logParams(this, featuresCol, predictionCol, k, initMode, initSteps, distanceMeasure, maxIter, seed, tol, weightCol) + val handlePersistence = (dataset.storageLevel == StorageLevel.NONE) + val handleWeight = isDefined(weightCol) && $(weightCol).nonEmpty val w = if (handleWeight) { col($(weightCol)).cast(DoubleType) } else { lit(1.0) } - val instances = dataset.select(DatasetUtils.columnToVector(dataset, getFeaturesCol), w) - .rdd.map { case Row(point: Vector, weight: Double) => (point, weight) } - val handlePersistence = (dataset.storageLevel == StorageLevel.NONE && !useKMeansDAL) + val instances: RDD[(Vector, Double)] = dataset + .select(DatasetUtils.columnToVector(dataset, getFeaturesCol), w).rdd.map { + case Row(point: Vector, weight: Double) => (point, weight) + } + + val isPlatformSupported = Utils.checkClusterPlatformCompatibility( + dataset.sparkSession.sparkContext) + val useKMeansDAL = isPlatformSupported && $(distanceMeasure) == "euclidean" && !handleWeight val model = if (useKMeansDAL) { - val offheapEnabled=instances.sparkContext.getConf.getBoolean("spark.memory.offHeap.enabled", false) - if (offheapEnabled) { - instances.setName("instancesRDD").persist(StorageLevel.OFF_HEAP) - } else { - instances.setName("instancesRDD").persist(StorageLevel.MEMORY_AND_DISK) - } - trainWithDAL(instances) + trainWithDAL(instances, handlePersistence) } else { trainWithML(instances, handlePersistence) } @@ -388,13 +367,12 @@ class KMeans @Since("1.5.0") ( model.setSummary(Some(summary)) instr.logNamedValue("clusterSizes", summary.clusterSizes) - // if (handlePersistence) { - // instances.unpersist() - // } + model } - private def trainWithDAL(instances: RDD[(Vector, Double)]): KMeansModel = instrumented { instr => + private def trainWithDAL(instances: RDD[(Vector, Double)], + handlePersistence: Boolean): KMeansModel = instrumented { instr => val sc = instances.sparkContext @@ -420,12 +398,18 @@ class KMeans @Since("1.5.0") ( val dataWithNorm = instances.map { case (point: Vector, weight: Double) => new VectorWithNorm(point) } + + // Cache for init + dataWithNorm.persist(StorageLevel.MEMORY_AND_DISK) + val centersWithNorm = if ($(initMode) == "random") { mllibKMeans.initRandom(dataWithNorm) } else { mllibKMeans.initKMeansParallel(dataWithNorm, distanceMeasureInstance) } + dataWithNorm.unpersist() + val centers = centersWithNorm.map(_.vector) val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9 @@ -433,6 +417,10 @@ class KMeans @Since("1.5.0") ( val strInitMode = $(initMode) logInfo(f"Initialization with $strInitMode took $initTimeInSeconds%.3f seconds.") + if (handlePersistence) { + instances.persist(StorageLevel.MEMORY_AND_DISK) + } + val inputData = instances.map { case (point: Vector, weight: Double) => point } @@ -440,32 +428,35 @@ class KMeans @Since("1.5.0") ( val kmeansDAL = new KMeansDALImpl(getK, getMaxIter, getTol, DistanceMeasure.EUCLIDEAN, centers, executor_num, executor_cores) - val parentModel = kmeansDAL.runWithRDDVector(inputData, Option(instr)) + val parentModel = kmeansDAL.train(inputData, Option(instr)) val model = copyValues(new KMeansModel(uid, parentModel).setParent(this)) - model + if (handlePersistence) { + instances.unpersist() + } + model } - private def trainWithML( - instances: RDD[(Vector, Double)], - handlePersistence: Boolean): KMeansModel = instrumented { instr => - val oldVectorInstances = instances.map { - case (point: Vector, weight: Double) => (OldVectors.fromML(point), weight) - } - val algo = new MLlibKMeans() - .setK($(k)) - .setInitializationMode($(initMode)) - .setInitializationSteps($(initSteps)) - .setMaxIterations($(maxIter)) - .setSeed($(seed)) - .setEpsilon($(tol)) - .setDistanceMeasure($(distanceMeasure)) - val parentModel = algo.runWithWeight(oldVectorInstances, handlePersistence, Some(instr)) - val model = copyValues(new KMeansModel(uid, parentModel).setParent(this)) - model + private def trainWithML(instances: RDD[(Vector, Double)], + handlePersistence: Boolean): KMeansModel = instrumented { instr => + val oldVectorInstances = instances.map { + case (point: Vector, weight: Double) => (OldVectors.fromML(point), weight) } + val algo = new MLlibKMeans() + .setK($(k)) + .setInitializationMode($(initMode)) + .setInitializationSteps($(initSteps)) + .setMaxIterations($(maxIter)) + .setSeed($(seed)) + .setEpsilon($(tol)) + .setDistanceMeasure($(distanceMeasure)) + val parentModel = algo.runWithWeight(oldVectorInstances, handlePersistence, Some(instr)) + val model = copyValues(new KMeansModel(uid, parentModel).setParent(this)) + + model + } @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { diff --git a/mllib-dal/src/main/scala/org/apache/spark-3.0.1/ml/feature/PCA.scala b/mllib-dal/src/main/scala/org/apache/spark-3.0.1/ml/feature/PCA.scala index 0c9c8ad9e..14e9a2ce1 100644 --- a/mllib-dal/src/main/scala/org/apache/spark-3.0.1/ml/feature/PCA.scala +++ b/mllib-dal/src/main/scala/org/apache/spark-3.0.1/ml/feature/PCA.scala @@ -96,14 +96,15 @@ class PCA @Since("1.5.0") ( s"source vector size $numFeatures must be no less than k=$k") val sc = dataset.sparkSession.sparkContext - val isPlatformSupported = Utils.checkClusterPlatformCompatibility(dataset.sparkSession.sparkContext) + val isPlatformSupported = Utils.checkClusterPlatformCompatibility( + dataset.sparkSession.sparkContext) // Call oneDAL Correlation PCA implementation when numFeatures < 65535 and fall back otherwise val parentModel = if (numFeatures < 65535 && isPlatformSupported) { val executor_num = Utils.sparkExecutorNum(dataset.sparkSession.sparkContext) val executor_cores = Utils.sparkExecutorCores() val pca = new PCADALImpl(k = $(k), executor_num, executor_cores) - val pcaModel = pca.fitWithDAL(inputVectors) + val pcaModel = pca.train(inputVectors) pcaModel } else { val inputOldVectors = inputVectors.map { diff --git a/mllib-dal/src/main/scala/org/apache/spark-3.0.1/ml/recommendation/ALS.scala b/mllib-dal/src/main/scala/org/apache/spark-3.0.1/ml/recommendation/ALS.scala index 9196873fb..e59c642c9 100644 --- a/mllib-dal/src/main/scala/org/apache/spark-3.0.1/ml/recommendation/ALS.scala +++ b/mllib-dal/src/main/scala/org/apache/spark-3.0.1/ml/recommendation/ALS.scala @@ -923,7 +923,7 @@ object ALS extends DefaultParamsReadable[ALS] with Logging { val (userIdAndFactors, itemIdAndFactors) = if (implicitPrefs && isPlatformSupported) { - new ALSDALImpl(ratings, rank, maxIter, regParam, alpha, seed).run() + new ALSDALImpl(ratings, rank, maxIter, regParam, alpha, seed).train() } else { trainMLlib(ratings, rank, numUserBlocks, numItemBlocks, maxIter, regParam, implicitPrefs, alpha, nonnegative, intermediateRDDStorageLevel, finalRDDStorageLevel, diff --git a/mllib-dal/src/main/scala/org/apache/spark-3.0.2/ml/clustering/KMeans.scala b/mllib-dal/src/main/scala/org/apache/spark-3.0.2/ml/clustering/KMeans.scala index 1aa016af7..54b406f3e 100644 --- a/mllib-dal/src/main/scala/org/apache/spark-3.0.2/ml/clustering/KMeans.scala +++ b/mllib-dal/src/main/scala/org/apache/spark-3.0.2/ml/clustering/KMeans.scala @@ -329,51 +329,30 @@ class KMeans @Since("1.5.0") ( override def fit(dataset: Dataset[_]): KMeansModel = instrumented { instr => transformSchema(dataset.schema, logging = true) - val isPlatformSupported = Utils.checkClusterPlatformCompatibility(dataset.sparkSession.sparkContext) - val handleWeight = isDefined(weightCol) && $(weightCol).nonEmpty - val useKMeansDAL = isPlatformSupported && $(distanceMeasure) == "euclidean" && !handleWeight - logInfo(s"useKMeansDAL = $useKMeansDAL") - - // will handle persistence only for trainWithML - // val handlePersistence = (dataset.storageLevel == StorageLevel.NONE && !useKMeansDAL) - // val w = if (handleWeight) { - // col($(weightCol)).cast(DoubleType) - // } else { - // lit(1.0) - // } - - // val instances: RDD[(Vector, Double)] = dataset - // .select(DatasetUtils.columnToVector(dataset, getFeaturesCol), w).rdd.map { - // case Row(point: Vector, weight: Double) => (point, weight) - // } - - // if (handlePersistence) { - // instances.persist(StorageLevel.MEMORY_AND_DISK) - // } - instr.logPipelineStage(this) instr.logDataset(dataset) instr.logParams(this, featuresCol, predictionCol, k, initMode, initSteps, distanceMeasure, maxIter, seed, tol, weightCol) + val handlePersistence = (dataset.storageLevel == StorageLevel.NONE) + val handleWeight = isDefined(weightCol) && $(weightCol).nonEmpty val w = if (handleWeight) { col($(weightCol)).cast(DoubleType) } else { lit(1.0) } - val instances = dataset.select(DatasetUtils.columnToVector(dataset, getFeaturesCol), w) - .rdd.map { case Row(point: Vector, weight: Double) => (point, weight) } - val handlePersistence = (dataset.storageLevel == StorageLevel.NONE && !useKMeansDAL) + val instances: RDD[(Vector, Double)] = dataset + .select(DatasetUtils.columnToVector(dataset, getFeaturesCol), w).rdd.map { + case Row(point: Vector, weight: Double) => (point, weight) + } + + val isPlatformSupported = Utils.checkClusterPlatformCompatibility( + dataset.sparkSession.sparkContext) + val useKMeansDAL = isPlatformSupported && $(distanceMeasure) == "euclidean" && !handleWeight val model = if (useKMeansDAL) { - val offheapEnabled=instances.sparkContext.getConf.getBoolean("spark.memory.offHeap.enabled", false) - if (offheapEnabled) { - instances.setName("instancesRDD").persist(StorageLevel.OFF_HEAP) - } else { - instances.setName("instancesRDD").persist(StorageLevel.MEMORY_AND_DISK) - } - trainWithDAL(instances) + trainWithDAL(instances, handlePersistence) } else { trainWithML(instances, handlePersistence) } @@ -388,13 +367,12 @@ class KMeans @Since("1.5.0") ( model.setSummary(Some(summary)) instr.logNamedValue("clusterSizes", summary.clusterSizes) - // if (handlePersistence) { - // instances.unpersist() - // } + model } - private def trainWithDAL(instances: RDD[(Vector, Double)]): KMeansModel = instrumented { instr => + private def trainWithDAL(instances: RDD[(Vector, Double)], + handlePersistence: Boolean): KMeansModel = instrumented { instr => val sc = instances.sparkContext @@ -420,12 +398,18 @@ class KMeans @Since("1.5.0") ( val dataWithNorm = instances.map { case (point: Vector, weight: Double) => new VectorWithNorm(point) } + + // Cache for init + dataWithNorm.persist(StorageLevel.MEMORY_AND_DISK) + val centersWithNorm = if ($(initMode) == "random") { mllibKMeans.initRandom(dataWithNorm) } else { mllibKMeans.initKMeansParallel(dataWithNorm, distanceMeasureInstance) } + dataWithNorm.unpersist() + val centers = centersWithNorm.map(_.vector) val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9 @@ -433,6 +417,10 @@ class KMeans @Since("1.5.0") ( val strInitMode = $(initMode) logInfo(f"Initialization with $strInitMode took $initTimeInSeconds%.3f seconds.") + if (handlePersistence) { + instances.persist(StorageLevel.MEMORY_AND_DISK) + } + val inputData = instances.map { case (point: Vector, weight: Double) => point } @@ -440,17 +428,19 @@ class KMeans @Since("1.5.0") ( val kmeansDAL = new KMeansDALImpl(getK, getMaxIter, getTol, DistanceMeasure.EUCLIDEAN, centers, executor_num, executor_cores) - val parentModel = kmeansDAL.runWithRDDVector(inputData, Option(instr)) + val parentModel = kmeansDAL.train(inputData, Option(instr)) val model = copyValues(new KMeansModel(uid, parentModel).setParent(this)) - model + if (handlePersistence) { + instances.unpersist() + } + model } - private def trainWithML( - instances: RDD[(Vector, Double)], - handlePersistence: Boolean): KMeansModel = instrumented { instr => + private def trainWithML(instances: RDD[(Vector, Double)], + handlePersistence: Boolean): KMeansModel = instrumented { instr => val oldVectorInstances = instances.map { case (point: Vector, weight: Double) => (OldVectors.fromML(point), weight) } diff --git a/mllib-dal/src/main/scala/org/apache/spark-3.0.2/ml/feature/PCA.scala b/mllib-dal/src/main/scala/org/apache/spark-3.0.2/ml/feature/PCA.scala index 0c9c8ad9e..14e9a2ce1 100644 --- a/mllib-dal/src/main/scala/org/apache/spark-3.0.2/ml/feature/PCA.scala +++ b/mllib-dal/src/main/scala/org/apache/spark-3.0.2/ml/feature/PCA.scala @@ -96,14 +96,15 @@ class PCA @Since("1.5.0") ( s"source vector size $numFeatures must be no less than k=$k") val sc = dataset.sparkSession.sparkContext - val isPlatformSupported = Utils.checkClusterPlatformCompatibility(dataset.sparkSession.sparkContext) + val isPlatformSupported = Utils.checkClusterPlatformCompatibility( + dataset.sparkSession.sparkContext) // Call oneDAL Correlation PCA implementation when numFeatures < 65535 and fall back otherwise val parentModel = if (numFeatures < 65535 && isPlatformSupported) { val executor_num = Utils.sparkExecutorNum(dataset.sparkSession.sparkContext) val executor_cores = Utils.sparkExecutorCores() val pca = new PCADALImpl(k = $(k), executor_num, executor_cores) - val pcaModel = pca.fitWithDAL(inputVectors) + val pcaModel = pca.train(inputVectors) pcaModel } else { val inputOldVectors = inputVectors.map { diff --git a/mllib-dal/src/main/scala/org/apache/spark-3.0.2/ml/recommendation/ALS.scala b/mllib-dal/src/main/scala/org/apache/spark-3.0.2/ml/recommendation/ALS.scala index 9196873fb..e59c642c9 100644 --- a/mllib-dal/src/main/scala/org/apache/spark-3.0.2/ml/recommendation/ALS.scala +++ b/mllib-dal/src/main/scala/org/apache/spark-3.0.2/ml/recommendation/ALS.scala @@ -923,7 +923,7 @@ object ALS extends DefaultParamsReadable[ALS] with Logging { val (userIdAndFactors, itemIdAndFactors) = if (implicitPrefs && isPlatformSupported) { - new ALSDALImpl(ratings, rank, maxIter, regParam, alpha, seed).run() + new ALSDALImpl(ratings, rank, maxIter, regParam, alpha, seed).train() } else { trainMLlib(ratings, rank, numUserBlocks, numItemBlocks, maxIter, regParam, implicitPrefs, alpha, nonnegative, intermediateRDDStorageLevel, finalRDDStorageLevel, diff --git a/mllib-dal/src/main/scala/org/apache/spark-3.1.1/ml/clustering/KMeans.scala b/mllib-dal/src/main/scala/org/apache/spark-3.1.1/ml/clustering/KMeans.scala index a3c8b8568..0878c146d 100644 --- a/mllib-dal/src/main/scala/org/apache/spark-3.1.1/ml/clustering/KMeans.scala +++ b/mllib-dal/src/main/scala/org/apache/spark-3.1.1/ml/clustering/KMeans.scala @@ -330,33 +330,12 @@ class KMeans @Since("1.5.0") ( override def fit(dataset: Dataset[_]): KMeansModel = instrumented { instr => transformSchema(dataset.schema, logging = true) - val isPlatformSupported = Utils.checkClusterPlatformCompatibility(dataset.sparkSession.sparkContext) - val handleWeight = isDefined(weightCol) && $(weightCol).nonEmpty - val useKMeansDAL = isPlatformSupported && $(distanceMeasure) == "euclidean" && !handleWeight - logInfo(s"useKMeansDAL = $useKMeansDAL") - - // will handle persistence only for trainWithML - // val handlePersistence = (dataset.storageLevel == StorageLevel.NONE && !useKMeansDAL) - // val w = if (handleWeight) { - // col($(weightCol)).cast(DoubleType) - // } else { - // lit(1.0) - // } - - // val instances: RDD[(Vector, Double)] = dataset - // .select(DatasetUtils.columnToVector(dataset, getFeaturesCol), w).rdd.map { - // case Row(point: Vector, weight: Double) => (point, weight) - // } - - // if (handlePersistence) { - // instances.persist(StorageLevel.MEMORY_AND_DISK) - // } - instr.logPipelineStage(this) instr.logDataset(dataset) instr.logParams(this, featuresCol, predictionCol, k, initMode, initSteps, distanceMeasure, maxIter, seed, tol, weightCol) + val handleWeight = isDefined(weightCol) && $(weightCol).nonEmpty val w = if (handleWeight) { checkNonNegativeWeight(col($(weightCol)).cast(DoubleType)) } else { @@ -365,16 +344,14 @@ class KMeans @Since("1.5.0") ( val instances = dataset.select(DatasetUtils.columnToVector(dataset, getFeaturesCol), w) .rdd.map { case Row(point: Vector, weight: Double) => (point, weight) } - val handlePersistence = (dataset.storageLevel == StorageLevel.NONE && !useKMeansDAL) + val handlePersistence = (dataset.storageLevel == StorageLevel.NONE) + + val isPlatformSupported = Utils.checkClusterPlatformCompatibility( + dataset.sparkSession.sparkContext) + val useKMeansDAL = isPlatformSupported && $(distanceMeasure) == "euclidean" && !handleWeight val model = if (useKMeansDAL) { - val offheapEnabled=instances.sparkContext.getConf.getBoolean("spark.memory.offHeap.enabled", false) - if (offheapEnabled) { - instances.setName("instancesRDD").persist(StorageLevel.OFF_HEAP) - } else { - instances.setName("instancesRDD").persist(StorageLevel.MEMORY_AND_DISK) - } - trainWithDAL(instances) + trainWithDAL(instances, handlePersistence) } else { trainWithML(instances, handlePersistence) } @@ -389,13 +366,12 @@ class KMeans @Since("1.5.0") ( model.setSummary(Some(summary)) instr.logNamedValue("clusterSizes", summary.clusterSizes) - // if (handlePersistence) { - // instances.unpersist() - // } + model } - private def trainWithDAL(instances: RDD[(Vector, Double)]): KMeansModel = instrumented { instr => + private def trainWithDAL(instances: RDD[(Vector, Double)], + handlePersistence: Boolean): KMeansModel = instrumented { instr => val sc = instances.sparkContext @@ -421,12 +397,18 @@ class KMeans @Since("1.5.0") ( val dataWithNorm = instances.map { case (point: Vector, weight: Double) => new VectorWithNorm(point) } + + // Cache for init + dataWithNorm.persist(StorageLevel.MEMORY_AND_DISK) + val centersWithNorm = if ($(initMode) == "random") { mllibKMeans.initRandom(dataWithNorm) } else { mllibKMeans.initKMeansParallel(dataWithNorm, distanceMeasureInstance) } + dataWithNorm.unpersist() + val centers = centersWithNorm.map(_.vector) val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9 @@ -434,6 +416,10 @@ class KMeans @Since("1.5.0") ( val strInitMode = $(initMode) logInfo(f"Initialization with $strInitMode took $initTimeInSeconds%.3f seconds.") + if (handlePersistence) { + instances.persist(StorageLevel.MEMORY_AND_DISK) + } + val inputData = instances.map { case (point: Vector, weight: Double) => point } @@ -441,17 +427,19 @@ class KMeans @Since("1.5.0") ( val kmeansDAL = new KMeansDALImpl(getK, getMaxIter, getTol, DistanceMeasure.EUCLIDEAN, centers, executor_num, executor_cores) - val parentModel = kmeansDAL.runWithRDDVector(inputData, Option(instr)) + val parentModel = kmeansDAL.train(inputData, Option(instr)) val model = copyValues(new KMeansModel(uid, parentModel).setParent(this)) - model + if (handlePersistence) { + instances.unpersist() + } + model } - private def trainWithML( - instances: RDD[(Vector, Double)], - handlePersistence: Boolean): KMeansModel = instrumented { instr => + private def trainWithML(instances: RDD[(Vector, Double)], + handlePersistence: Boolean): KMeansModel = instrumented { instr => val oldVectorInstances = instances.map { case (point: Vector, weight: Double) => (OldVectors.fromML(point), weight) } diff --git a/mllib-dal/src/main/scala/org/apache/spark-3.1.1/ml/feature/PCA.scala b/mllib-dal/src/main/scala/org/apache/spark-3.1.1/ml/feature/PCA.scala index 0c9c8ad9e..14e9a2ce1 100644 --- a/mllib-dal/src/main/scala/org/apache/spark-3.1.1/ml/feature/PCA.scala +++ b/mllib-dal/src/main/scala/org/apache/spark-3.1.1/ml/feature/PCA.scala @@ -96,14 +96,15 @@ class PCA @Since("1.5.0") ( s"source vector size $numFeatures must be no less than k=$k") val sc = dataset.sparkSession.sparkContext - val isPlatformSupported = Utils.checkClusterPlatformCompatibility(dataset.sparkSession.sparkContext) + val isPlatformSupported = Utils.checkClusterPlatformCompatibility( + dataset.sparkSession.sparkContext) // Call oneDAL Correlation PCA implementation when numFeatures < 65535 and fall back otherwise val parentModel = if (numFeatures < 65535 && isPlatformSupported) { val executor_num = Utils.sparkExecutorNum(dataset.sparkSession.sparkContext) val executor_cores = Utils.sparkExecutorCores() val pca = new PCADALImpl(k = $(k), executor_num, executor_cores) - val pcaModel = pca.fitWithDAL(inputVectors) + val pcaModel = pca.train(inputVectors) pcaModel } else { val inputOldVectors = inputVectors.map { diff --git a/mllib-dal/src/main/scala/org/apache/spark-3.1.1/ml/recommendation/ALS.scala b/mllib-dal/src/main/scala/org/apache/spark-3.1.1/ml/recommendation/ALS.scala index 9196873fb..e59c642c9 100644 --- a/mllib-dal/src/main/scala/org/apache/spark-3.1.1/ml/recommendation/ALS.scala +++ b/mllib-dal/src/main/scala/org/apache/spark-3.1.1/ml/recommendation/ALS.scala @@ -923,7 +923,7 @@ object ALS extends DefaultParamsReadable[ALS] with Logging { val (userIdAndFactors, itemIdAndFactors) = if (implicitPrefs && isPlatformSupported) { - new ALSDALImpl(ratings, rank, maxIter, regParam, alpha, seed).run() + new ALSDALImpl(ratings, rank, maxIter, regParam, alpha, seed).train() } else { trainMLlib(ratings, rank, numUserBlocks, numItemBlocks, maxIter, regParam, implicitPrefs, alpha, nonnegative, intermediateRDDStorageLevel, finalRDDStorageLevel, diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/clustering/KMeansDALImpl.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/clustering/KMeansDALImpl.scala index e9e7ec36d..f2d0bbe5e 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/clustering/KMeansDALImpl.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/clustering/KMeansDALImpl.scala @@ -1,12 +1,11 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Copyright 2020 Intel Corporation * - * http://www.apache.org/licenses/LICENSE-2.0 + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,104 +16,34 @@ package org.apache.spark.ml.clustering -import com.intel.daal.data_management.data.{NumericTable, RowMergedNumericTable, Matrix => DALMatrix} -import com.intel.daal.services.DaalContext import org.apache.spark.internal.Logging import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.util._ import org.apache.spark.mllib.clustering.{KMeansModel => MLlibKMeansModel} import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors} -import org.apache.spark.rdd.{ExecutorInProcessCoalescePartitioner, RDD} - -class KMeansDALImpl ( - var nClusters : Int, - var maxIterations : Int, - var tolerance : Double, - val distanceMeasure: String, - val centers: Array[OldVector], - val executorNum: Int, - val executorCores: Int -) extends Serializable with Logging { - - def runWithRDDVector(data: RDD[Vector], instr: Option[Instrumentation]) : MLlibKMeansModel = { - - instr.foreach(_.logInfo(s"Processing partitions with $executorNum executors")) - - // repartition to executorNum if not enough partitions - val dataForConversion = if (data.getNumPartitions < executorNum) { - data.repartition(executorNum).setName("Repartitioned for conversion").cache() - } else { - data - } +import org.apache.spark.rdd.RDD - val executorIPAddress = Utils.sparkFirstExecutorIP(dataForConversion.sparkContext) - val kvsIP = dataForConversion.sparkContext.conf.get("spark.oap.mllib.oneccl.kvs.ip", executorIPAddress) - val kvsPortDetected = Utils.checkExecutorAvailPort(dataForConversion, kvsIP) - val kvsPort = dataForConversion.sparkContext.conf.getInt("spark.oap.mllib.oneccl.kvs.port", kvsPortDetected) +class KMeansDALImpl(var nClusters: Int, + var maxIterations: Int, + var tolerance: Double, + val distanceMeasure: String, + val centers: Array[OldVector], + val executorNum: Int, + val executorCores: Int + ) extends Serializable with Logging { - val kvsIPPort = kvsIP+"_"+kvsPort + def train(data: RDD[Vector], instr: Option[Instrumentation]): MLlibKMeansModel = { - val partitionDims = Utils.getPartitionDims(dataForConversion) + val coalescedTables = OneDAL.vectorsToMergedNumericTables(data, executorNum) - // filter the empty partitions - val partRows = dataForConversion.mapPartitionsWithIndex { (index: Int, it: Iterator[Vector]) => - Iterator(Tuple3(partitionDims(index)._1, index, it)) - } - val nonEmptyPart = partRows.filter{entry => { entry._1 > 0 }} - - // convert RDD[Vector] to RDD[HomogenNumericTable] - val numericTables = nonEmptyPart.map { entry => - val numRows = entry._1 - val index = entry._2 - val it = entry._3 - val numCols = partitionDims(index)._2 - - logDebug(s"KMeansDALImpl: Partition index: $index, numCols: $numCols, numRows: $numRows") - - // Build DALMatrix, this will load libJavaAPI, libtbb, libtbbmalloc - val context = new DaalContext() - val matrix = new DALMatrix(context, classOf[java.lang.Double], - numCols.toLong, numRows.toLong, NumericTable.AllocationFlag.DoAllocate) - - logDebug("KMeansDALImpl: Loading native libraries" ) - // oneDAL libs should be loaded by now, extract libMLlibDAL.so to temp file and load - LibLoader.loadLibraries() - - import scala.collection.JavaConverters._ - - var dalRow = 0 - - it.foreach { curVector => - val rowArr = curVector.toArray - OneDAL.cSetDoubleBatch(matrix.getCNumericTable, dalRow, rowArr, 1, numCols) - dalRow += 1 - } + val executorIPAddress = Utils.sparkFirstExecutorIP(coalescedTables.sparkContext) + val kvsIP = coalescedTables.sparkContext.conf.get("spark.oap.mllib.oneccl.kvs.ip", + executorIPAddress) + val kvsPortDetected = Utils.checkExecutorAvailPort(coalescedTables, kvsIP) + val kvsPort = coalescedTables.sparkContext.conf.getInt("spark.oap.mllib.oneccl.kvs.port", + kvsPortDetected) - Iterator(matrix.getCNumericTable) - - }.cache() - - // workaround to fix the bug of multi executors handling same partition. - numericTables.foreachPartition(() => _) - numericTables.count() - - val cachedRdds = data.sparkContext.getPersistentRDDs - cachedRdds.filter(r => r._2.name=="instancesRDD").foreach (r => r._2.unpersist()) - - val coalescedRdd = numericTables.coalesce(1, - partitionCoalescer = Some(new ExecutorInProcessCoalescePartitioner())) - - val coalescedTables = coalescedRdd.mapPartitions { iter => - val context = new DaalContext() - val mergedData = new RowMergedNumericTable(context) - - iter.foreach{ curIter => - val address = curIter.next() - OneDAL.cAddNumericTable(mergedData.getCNumericTable, address ) - } - Iterator(mergedData.getCNumericTable) - - }.cache() + val kvsIPPort = kvsIP + "_" + kvsPort val results = coalescedTables.mapPartitionsWithIndex { (rank, table) => val tableArr = table.next() @@ -146,16 +75,12 @@ class KMeansDALImpl ( ret }.collect() - // Release the native memory allocated by NumericTable. - numericTables.foreach( tables => - tables.foreach { address => - OneDAL.cFreeDataMemory(address) - } - ) - // Make sure there is only one result from rank 0 assert(results.length == 1) + // Release native memory for numeric tables + OneDAL.releaseNumericTables(data.sparkContext) + val centerVectors = results(0)._1 val totalCost = results(0)._2 val iterationNum = results(0)._3 diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/feature/PCADALImpl.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/feature/PCADALImpl.scala index e1bba3d37..f2b8645a2 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/feature/PCADALImpl.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/feature/PCADALImpl.scala @@ -1,12 +1,11 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Copyright 2020 Intel Corporation * - * http://www.apache.org/licenses/LICENSE-2.0 + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -18,42 +17,36 @@ package org.apache.spark.ml.feature import java.util.Arrays + import com.intel.daal.data_management.data.{HomogenNumericTable, NumericTable} + import org.apache.spark.internal.Logging import org.apache.spark.ml.linalg._ import org.apache.spark.ml.util.{OneCCL, OneDAL, Utils} -import org.apache.spark.mllib.feature.{PCAModel => MLlibPCAModel} +import org.apache.spark.mllib.feature.{PCAModel => MLlibPCAModel, StandardScaler => MLlibStandardScaler} import org.apache.spark.mllib.linalg.{DenseMatrix => OldDenseMatrix, Vectors => OldVectors} import org.apache.spark.rdd.RDD -import org.apache.spark.mllib.feature.{StandardScaler => MLlibStandardScaler} -class PCADALImpl ( - val k: Int, - val executorNum: Int, - val executorCores: Int) +class PCADALImpl(val k: Int, + val executorNum: Int, + val executorCores: Int) extends Serializable with Logging { - // Normalize data before apply fitWithDAL - private def normalizeData(input: RDD[Vector]) : RDD[Vector] = { - val vectors = input.map(OldVectors.fromML(_)) - val scaler = new MLlibStandardScaler(withMean = true, withStd = false).fit(vectors) - val res = scaler.transform(vectors) - res.map(_.asML) - } - - def fitWithDAL(data: RDD[Vector]) : MLlibPCAModel = { + def train(data: RDD[Vector]): MLlibPCAModel = { val normalizedData = normalizeData(data) - val coalescedTables = OneDAL.rddVectorToNumericTables(normalizedData, executorNum) + val coalescedTables = OneDAL.vectorsToMergedNumericTables(normalizedData, executorNum) val executorIPAddress = Utils.sparkFirstExecutorIP(coalescedTables.sparkContext) - val kvsIP = coalescedTables.sparkContext.conf.get("spark.oap.mllib.oneccl.kvs.ip", executorIPAddress) + val kvsIP = coalescedTables.sparkContext.conf.get("spark.oap.mllib.oneccl.kvs.ip", + executorIPAddress) val kvsPortDetected = Utils.checkExecutorAvailPort(coalescedTables, kvsIP) - val kvsPort = coalescedTables.sparkContext.conf.getInt("spark.oap.mllib.oneccl.kvs.port", kvsPortDetected) + val kvsPort = coalescedTables.sparkContext.conf.getInt("spark.oap.mllib.oneccl.kvs.port", + kvsPortDetected) - val kvsIPPort = kvsIP+"_"+kvsPort + val kvsIPPort = kvsIP + "_" + kvsPort val results = coalescedTables.mapPartitionsWithIndex { (rank, table) => val tableArr = table.next() @@ -71,7 +64,8 @@ class PCADALImpl ( val ret = if (OneCCL.isRoot()) { val pcNumericTable = OneDAL.makeNumericTable(result.pcNumericTable) - val explainedVarianceNumericTable = OneDAL.makeNumericTable(result.explainedVarianceNumericTable) + val explainedVarianceNumericTable = OneDAL.makeNumericTable( + result.explainedVarianceNumericTable) val principleComponents = getPrincipleComponentsFromDAL(pcNumericTable, k) val explainedVariance = getExplainedVarianceFromDAL(explainedVarianceNumericTable, k) @@ -89,6 +83,9 @@ class PCADALImpl ( // Make sure there is only one result from rank 0 assert(results.length == 1) + // Release native memory for numeric tables + OneDAL.releaseNumericTables(data.sparkContext) + val pc = results(0)._1 val explainedVariance = results(0)._2 @@ -100,6 +97,14 @@ class PCADALImpl ( parentModel } + // Normalize data before training + private def normalizeData(input: RDD[Vector]): RDD[Vector] = { + val vectors = input.map(OldVectors.fromML(_)) + val scaler = new MLlibStandardScaler(withMean = true, withStd = false).fit(vectors) + val res = scaler.transform(vectors) + res.map(_.asML) + } + private def getPrincipleComponentsFromDAL(table: NumericTable, k: Int): DenseMatrix = { val data = table.asInstanceOf[HomogenNumericTable].getDoubleArray() @@ -124,7 +129,7 @@ class PCADALImpl ( val data = table_1xn.asInstanceOf[HomogenNumericTable].getDoubleArray() val sum = data.sum val topK = Arrays.copyOfRange(data, 0, k) - for ( i <- 0 until k ) + for (i <- 0 until k) topK(i) = topK(i) / sum new DenseVector(topK) } diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/recommendation/ALSDALImpl.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/recommendation/ALSDALImpl.scala index bcb95ca1f..1e16c97c3 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/recommendation/ALSDALImpl.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/recommendation/ALSDALImpl.scala @@ -1,185 +1,61 @@ +/* + * Copyright 2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.spark.ml.recommendation -import com.intel.daal.data_management.data.CSRNumericTable.Indexing -import org.apache.spark.rdd.{ExecutorInProcessCoalescePartitioner, RDD} +import java.nio.{ByteBuffer, ByteOrder, FloatBuffer} +import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag -import com.intel.daal.data_management.data.{CSRNumericTable, HomogenNumericTable, RowMergedNumericTable, Matrix => DALMatrix} + +import com.intel.daal.data_management.data.CSRNumericTable import com.intel.daal.services.DaalContext + import org.apache.spark.Partitioner import org.apache.spark.internal.Logging import org.apache.spark.ml.recommendation.ALS.Rating import org.apache.spark.ml.util._ - -import java.nio.{ByteBuffer, ByteOrder} -import scala.collection.mutable.ArrayBuffer -//import java.nio.DoubleBuffer -import java.nio.FloatBuffer +import org.apache.spark.rdd.RDD class ALSDataPartitioner(blocks: Int, itemsInBlock: Long) extends Partitioner { def numPartitions: Int = blocks + def getPartition(key: Any): Int = { val k = key.asInstanceOf[Long] // itemsInBlock = numItems / partitions // remaining records will belog to the last partition // 21 => 5, 5, 5, 6 // 46 => 11, 11, 11, 13 - math.min((k / itemsInBlock).toInt, blocks-1) + math.min((k / itemsInBlock).toInt, blocks - 1) } } -class ALSDALImpl[@specialized(Int, Long) ID: ClassTag]( - data: RDD[Rating[ID]], - nFactors: Int, - maxIter: Int, - regParam: Double, - alpha: Double, - seed: Long, -) extends Serializable with Logging { +class ALSDALImpl[@specialized(Int, Long) ID: ClassTag]( data: RDD[Rating[ID]], + nFactors: Int, + maxIter: Int, + regParam: Double, + alpha: Double, + seed: Long + ) extends Serializable with Logging { // Rating struct size is size of Long+Long+Float val RATING_SIZE = 8 + 8 + 4 - // Return Map partitionId -> (ratingsNum, csrRowNum, rowOffset) - private def getRatingsPartitionInfo(data: RDD[Rating[ID]]): Map[Int, (Int, Int, Int)] = { - val collectd = data.mapPartitionsWithIndex { case (index: Int, it: Iterator[Rating[ID]]) => - var ratingsNum = 0 - var s = Set[ID]() - it.foreach { v => - s += v.user - ratingsNum += 1 - } - Iterator((index, (ratingsNum, s.count(_ => true)))) - }.collect - - var ret = Map[Int, (Int, Int, Int)]() - var rowOffset = 0 - collectd.foreach { v => - val partitionId = v._1 - val ratingsNum = v._2._1 - val csrRowNum = v._2._2 - ret += ( partitionId -> (ratingsNum, csrRowNum, rowOffset)) - rowOffset = rowOffset + csrRowNum - } - - ret - } - - private def ratingsToCSRNumericTables(ratings: RDD[Rating[ID]], - nVectors: Long, nFeatures: Long, nBlocks: Long): RDD[CSRNumericTable] = { - -// val rowSortedRatings = ratings.sortBy(_.user.toString.toLong) - -// val itemsInBlock = (nFeatures + nBlocks - 1) / nBlocks - val itemsInBlock = nFeatures / nBlocks -// val rowSortedGrouped = rowSortedRatings.groupBy(value => value.user.toString.toLong / itemsInBlock).flatMap(_._2) - val rowSortedGrouped = ratings - // Transpose the dataset - .map { p => - Rating(p.item, p.user, p.rating) - } - .groupBy(value => value.user.toString.toLong) - .partitionBy(new ALSDataPartitioner(nBlocks.toInt, itemsInBlock)) - .flatMap(_._2).mapPartitions { p => - p.toArray.sortBy(_.user.toString.toLong).toIterator - } - - println("rowSortedGrouped partition number: ", rowSortedGrouped.getNumPartitions) - - // rowSortedGrouped.mapPartitionsWithIndex { case (partitionId, partition) => -// println("partitionId", partitionId) -// partition.foreach { p => -// println(p.user, p.item, p.rating) } -// Iterator(partitionId) -// }.collect() - - val ratingsPartitionInfo = getRatingsPartitionInfo(rowSortedGrouped) - println("ratingsPartitionInfo:", ratingsPartitionInfo) - - rowSortedGrouped.mapPartitionsWithIndex { case (partitionId, partition) => - val ratingsNum = ratingsPartitionInfo(partitionId)._1 - val csrRowNum = ratingsPartitionInfo(partitionId)._2 - val values = Array.fill(ratingsNum) { 0.0f } - val columnIndices = Array.fill(ratingsNum) { 0L } - val rowOffsets = ArrayBuffer[Long](1L) - - - var index = 0 - var curRow = 0L - // Each partition converted to one CSRNumericTable - partition.foreach { p => - // Modify row index for each partition (start from 0) - val row = p.user.toString.toLong - ratingsPartitionInfo(partitionId)._3 - val column = p.item.toString.toLong - val rating = p.rating - - values(index) = rating - // one-based index - columnIndices(index) = column + 1 - - if (row > curRow) { - curRow = row - // one-based index - rowOffsets += index + 1 - } - - index = index + 1 - } - // one-based row index - rowOffsets += index+1 - - println("PartitionId:", partitionId) - println("csrRowNum", csrRowNum) -// println("rowOffsets", rowOffsets.mkString(",")) -// println("columnIndices", columnIndices.mkString(",")) -// println("values", values.mkString(",")) - - val contextLocal = new DaalContext() - - println("ALSDALImpl: Loading native libraries ..." ) - LibLoader.loadLibraries() - - val cTable = OneDAL.cNewCSRNumericTable(values, columnIndices, rowOffsets.toArray, nVectors, csrRowNum) - val table = new CSRNumericTable(contextLocal, cTable) -// table.pack() - - println("Input dimensions:", table.getNumberOfRows, table.getNumberOfColumns) - - // There is a bug https://github.com/oneapi-src/oneDAL/pull/1288, - // printNumericTable can't print correct result for CSRNumericTable, use C++ printNumericTable - // Service.printNumericTable("Input: ", table) - - Iterator(table) - }.cache() - } - -// def factorsToRDD(cUsersFactorsNumTab: Long, cItemsFactorsNumTab: Long) -// :(RDD[(ID, Array[Float])], RDD[(ID, Array[Float])]) = { -// val usersFactorsNumTab = OneDAL.makeNumericTable(cUsersFactorsNumTab) -// val itemsFactorsNumTab = OneDAL.makeNumericTable(cItemsFactorsNumTab) -// -// Service.printNumericTable("usersFactorsNumTab", usersFactorsNumTab) -// Service.printNumericTable("itemsFactorsNumTab", itemsFactorsNumTab) -// -// null -// } - - def ratingsToByteBuffer(ratings: Array[Rating[ID]]): ByteBuffer = { -// println("ratings len", ratings.length) - - val buffer= ByteBuffer.allocateDirect(ratings.length*(8+8+4)) - // Use little endian - buffer.order(ByteOrder.LITTLE_ENDIAN) - ratings.foreach { rating => - buffer.putLong(rating.user.toString.toLong) - buffer.putLong(rating.item.toString.toLong) - buffer.putFloat(rating.rating) - } - buffer - } - - def run(): (RDD[(ID, Array[Float])], RDD[(ID, Array[Float])]) = { + def train(): (RDD[(ID, Array[Float])], RDD[(ID, Array[Float])]) = { val executorNum = Utils.sparkExecutorNum(data.sparkContext) val executorCores = Utils.sparkExecutorCores() @@ -193,35 +69,32 @@ class ALSDALImpl[@specialized(Int, Long) ID: ClassTag]( Ordering[Long].compare(x.user.toString.toLong, y.user.toString.toLong) }).user.toString.toLong + 1 -// val largestItems = data.sortBy(_.item.toString.toLong, ascending = false).take(1) -// val nFeatures = largestItems(0).item.toString.toLong + 1 - -// val largestUsers = data.sortBy(_.user.toString.toLong, ascending = false).take(1) -// val nVectors = largestUsers(0).user.toString.toLong + 1 - val nBlocks = executorNum -// val nRatings = data.count() - - logInfo(s"ALSDAL fit using $executorNum Executors for $nVectors vectors and $nFeatures features") + logInfo(s"ALSDAL fit using $executorNum Executors " + + s"for $nVectors vectors and $nFeatures features") - val numericTables = data.repartition(executorNum).setName("Repartitioned for conversion").cache() + val numericTables = data.repartition(executorNum) + .setName("Repartitioned for conversion").cache() val executorIPAddress = Utils.sparkFirstExecutorIP(numericTables.sparkContext) - val kvsIP = numericTables.sparkContext.conf.get("spark.oap.mllib.oneccl.kvs.ip", executorIPAddress) + val kvsIP = numericTables.sparkContext.conf.get( + "spark.oap.mllib.oneccl.kvs.ip", executorIPAddress) val kvsPortDetected = Utils.checkExecutorAvailPort(numericTables, kvsIP) - val kvsPort = numericTables.sparkContext.conf.getInt("spark.oap.mllib.oneccl.kvs.port", kvsPortDetected) + val kvsPort = numericTables.sparkContext.conf.getInt( + "spark.oap.mllib.oneccl.kvs.port", kvsPortDetected) - val kvsIPPort = kvsIP+"_"+kvsPort + val kvsIPPort = kvsIP + "_" + kvsPort val results = numericTables // Transpose the dataset .map { p => - Rating(p.item, p.user, p.rating) } + Rating(p.item, p.user, p.rating) + } .mapPartitionsWithIndex { (rank, iter) => val context = new DaalContext() - println("ALSDALImpl: Loading libMLlibDAL.so" ) + println("ALSDALImpl: Loading libMLlibDAL.so") LibLoader.loadLibraries() OneCCL.init(executorNum, rank, kvsIPPort) @@ -233,7 +106,8 @@ class ALSDALImpl[@specialized(Int, Long) ID: ClassTag]( val bufferInfo = new ALSPartitionInfo val shuffledBuffer = cShuffleData(buffer, nFeatures.toInt, nBlocks, bufferInfo) - val table = bufferToCSRNumericTable(shuffledBuffer, bufferInfo, nVectors.toInt, nFeatures.toInt, nBlocks, rankId) + val table = bufferToCSRNumericTable(shuffledBuffer, bufferInfo, + nVectors.toInt, nFeatures.toInt, nBlocks, rankId) val result = new ALSResult() cDALImplictALS( @@ -245,87 +119,82 @@ class ALSDALImpl[@specialized(Int, Long) ID: ClassTag]( result ) Iterator(result) - }.cache() - -// results.foreach { p => -//// val usersFactorsNumTab = OneDAL.makeNumericTable(p.cUsersFactorsNumTab) -//// println("foreach", p.cUsersFactorsNumTab, p.cItemsFactorsNumTab) -// println("result", p.rankId, p.cUserOffset, p.cItemOffset); -// } - -// val usersFactorsRDD = results.mapPartitionsWithIndex { (index: Int, partiton: Iterator[ALSResult]) => -// partiton.foreach { p => -// val usersFactorsNumTab = OneDAL.makeNumericTable(p.cUsersFactorsNumTab) -// Service.printNumericTable("usersFactorsNumTab", usersFactorsNumTab) -// } -// Iterator() -// }.collect() - - val usersFactorsRDD = results.mapPartitionsWithIndex { (index: Int, partiton: Iterator[ALSResult]) => - val ret = partiton.flatMap { p => - val userOffset = p.cUserOffset.toInt - val usersFactorsNumTab = OneDAL.makeNumericTable(p.cUsersFactorsNumTab) - val nRows = usersFactorsNumTab.getNumberOfRows.toInt - val nCols = usersFactorsNumTab.getNumberOfColumns.toInt - var buffer = FloatBuffer.allocate(nCols * nRows) - // should use returned buffer - buffer = usersFactorsNumTab.getBlockOfRows(0, nRows, buffer) - (0 until nRows).map { index => - val array = Array.fill(nCols){0.0f} - buffer.get(array, 0, nCols) - ((index+userOffset).asInstanceOf[ID], array) - }.toIterator - } - ret - }.setName("userFactors").cache() - - val itemsFactorsRDD = results.mapPartitionsWithIndex { (index: Int, partiton: Iterator[ALSResult]) => - val ret = partiton.flatMap { p => - val itemOffset = p.cItemOffset.toInt - val itemsFactorsNumTab = OneDAL.makeNumericTable(p.cItemsFactorsNumTab) - val nRows = itemsFactorsNumTab.getNumberOfRows.toInt - val nCols = itemsFactorsNumTab.getNumberOfColumns.toInt - var buffer = FloatBuffer.allocate(nCols * nRows) - // should use returned buffer - buffer = itemsFactorsNumTab.getBlockOfRows(0, nRows, buffer) - (0 until nRows).map { index => - val array = Array.fill(nCols){0.0f} - buffer.get(array, 0, nCols) - ((index+itemOffset).asInstanceOf[ID], array) - }.toIterator - } - ret - }.setName("itemFactors").cache() + }.cache() + + val usersFactorsRDD = results + .mapPartitionsWithIndex { (index: Int, partiton: Iterator[ALSResult]) => + val ret = partiton.flatMap { p => + val userOffset = p.cUserOffset.toInt + val usersFactorsNumTab = OneDAL.makeNumericTable(p.cUsersFactorsNumTab) + val nRows = usersFactorsNumTab.getNumberOfRows.toInt + val nCols = usersFactorsNumTab.getNumberOfColumns.toInt + var buffer = FloatBuffer.allocate(nCols * nRows) + // should use returned buffer + buffer = usersFactorsNumTab.getBlockOfRows(0, nRows, buffer) + (0 until nRows).map { index => + val array = Array.fill(nCols) { + 0.0f + } + buffer.get(array, 0, nCols) + ((index + userOffset).asInstanceOf[ID], array) + }.toIterator + } + ret + }.setName("userFactors").cache() + + val itemsFactorsRDD = results + .mapPartitionsWithIndex { (index: Int, partiton: Iterator[ALSResult]) => + val ret = partiton.flatMap { p => + val itemOffset = p.cItemOffset.toInt + val itemsFactorsNumTab = OneDAL.makeNumericTable(p.cItemsFactorsNumTab) + val nRows = itemsFactorsNumTab.getNumberOfRows.toInt + val nCols = itemsFactorsNumTab.getNumberOfColumns.toInt + var buffer = FloatBuffer.allocate(nCols * nRows) + // should use returned buffer + buffer = itemsFactorsNumTab.getBlockOfRows(0, nRows, buffer) + (0 until nRows).map { index => + val array = Array.fill(nCols) { + 0.0f + } + buffer.get(array, 0, nCols) + ((index + itemOffset).asInstanceOf[ID], array) + }.toIterator + } + ret + }.setName("itemFactors").cache() usersFactorsRDD.count() itemsFactorsRDD.count() -// usersFactorsRDD.foreach { case (id, array) => -// println("usersFactorsRDD", id, array.mkString(", ")) -// } -// -// itemsFactorsRDD.foreach { case (id, array) => -// println("itemsFactorsRDD", id, array.mkString(", ")) -// } - (usersFactorsRDD, itemsFactorsRDD) } - private def getPartitionOffset(partitionId: Int, nRatings: Int, nBlocks: Int): Int = { - require(partitionId >=0 && partitionId < nBlocks) - val itemsInBlock = nRatings / nBlocks - return partitionId * itemsInBlock + def ratingsToByteBuffer(ratings: Array[Rating[ID]]): ByteBuffer = { + val buffer = ByteBuffer.allocateDirect(ratings.length * (8 + 8 + 4)) + // Use little endian + buffer.order(ByteOrder.LITTLE_ENDIAN) + ratings.foreach { rating => + buffer.putLong(rating.user.toString.toLong) + buffer.putLong(rating.item.toString.toLong) + buffer.putFloat(rating.rating) + } + buffer } private def bufferToCSRNumericTable(buffer: ByteBuffer, info: ALSPartitionInfo, - nVectors: Int, nFeatures: Int, nBlocks: Int, rankId: Int): CSRNumericTable = { + nVectors: Int, nFeatures: Int, + nBlocks: Int, rankId: Int): CSRNumericTable = { // Use little endian buffer.order(ByteOrder.LITTLE_ENDIAN) val ratingsNum = info.ratingsNum val csrRowNum = info.csrRowNum - val values = Array.fill(ratingsNum) { 0.0f } - val columnIndices = Array.fill(ratingsNum) { 0L } + val values = Array.fill(ratingsNum) { + 0.0f + } + val columnIndices = Array.fill(ratingsNum) { + 0L + } val rowOffsets = ArrayBuffer[Long](1L) var index = 0 @@ -333,9 +202,9 @@ class ALSDALImpl[@specialized(Int, Long) ID: ClassTag]( // Each partition converted to one CSRNumericTable for (i <- 0 until ratingsNum) { // Modify row index for each partition (start from 0) - val row = buffer.getLong(i*RATING_SIZE) - getPartitionOffset(rankId, nFeatures, nBlocks) - val column = buffer.getLong(i*RATING_SIZE+8) - val rating = buffer.getFloat(i*RATING_SIZE+16) + val row = buffer.getLong(i * RATING_SIZE) - getPartitionOffset(rankId, nFeatures, nBlocks) + val column = buffer.getLong(i * RATING_SIZE + 8) + val rating = buffer.getFloat(i * RATING_SIZE + 16) values(index) = rating // one-based index @@ -350,27 +219,49 @@ class ALSDALImpl[@specialized(Int, Long) ID: ClassTag]( index = index + 1 } // one-based row index - rowOffsets += index+1 - -// println("rankId:", rankId) -// println("csrRowNum", csrRowNum) - -// println(rowOffsets.mkString(" ")) -// println(columnIndices.mkString(" ")) -// println(values.mkString(" ")) + rowOffsets += index + 1 val contextLocal = new DaalContext() - val cTable = OneDAL.cNewCSRNumericTable(values, columnIndices, rowOffsets.toArray, nVectors, csrRowNum) + val cTable = OneDAL.cNewCSRNumericTable(values, columnIndices, rowOffsets.toArray, + nVectors, csrRowNum) val table = new CSRNumericTable(contextLocal, cTable) - println("Input dimensions:", table.getNumberOfRows, table.getNumberOfColumns) -// Service.printNumericTable("Input NumericTable", table) - table } + private def getPartitionOffset(partitionId: Int, nRatings: Int, nBlocks: Int): Int = { + require(partitionId >= 0 && partitionId < nBlocks) + val itemsInBlock = nRatings / nBlocks + return partitionId * itemsInBlock + } + + // Return Map partitionId -> (ratingsNum, csrRowNum, rowOffset) + private def getRatingsPartitionInfo(data: RDD[Rating[ID]]): Map[Int, (Int, Int, Int)] = { + val collectd = data.mapPartitionsWithIndex { case (index: Int, it: Iterator[Rating[ID]]) => + var ratingsNum = 0 + var s = Set[ID]() + it.foreach { v => + s += v.user + ratingsNum += 1 + } + Iterator((index, (ratingsNum, s.count(_ => true)))) + }.collect + + var ret = Map[Int, (Int, Int, Int)]() + var rowOffset = 0 + collectd.foreach { v => + val partitionId = v._1 + val ratingsNum = v._2._1 + val csrRowNum = v._2._2 + ret += (partitionId -> (ratingsNum, csrRowNum, rowOffset)) + rowOffset = rowOffset + csrRowNum + } + + ret + } + // Single entry to call Implict ALS DAL backend - @native private def cDALImplictALS(data: Long, + @native private def cDALImplictALS(data: Long, nUsers: Long, nFactors: Int, maxIter: Int, @@ -380,6 +271,7 @@ class ALSDALImpl[@specialized(Int, Long) ID: ClassTag]( executor_cores: Int, rankId: Int, result: ALSResult): Long + @native private def cShuffleData(data: ByteBuffer, nTotalKeys: Int, nBlocks: Int, diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneCCL.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneCCL.scala index 7581a1003..7ea7cb694 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneCCL.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneCCL.scala @@ -1,12 +1,11 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Copyright 2020 Intel Corporation * - * http://www.apache.org/licenses/LICENSE-2.0 + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -25,12 +24,12 @@ object OneCCL extends Logging { // Run on Executor def setExecutorEnv(): Unit = { - setEnv("CCL_ATL_TRANSPORT","ofi") + setEnv("CCL_ATL_TRANSPORT", "ofi") // Uncomment this if you whant to debug oneCCL // setEnv("CCL_LOG_LEVEL", "2") } - def init(executor_num: Int, rank: Int, ip_port: String) = { + def init(executor_num: Int, rank: Int, ip_port: String): Unit = { setExecutorEnv() @@ -42,7 +41,8 @@ object OneCCL extends Logging { // executor number should equal to oneCCL world size assert(executor_num == cclParam.commSize, "executor number should equal to oneCCL world size") - logInfo(s"Initialized with executorNum: $executor_num, commSize, ${cclParam.commSize}, rankId: ${cclParam.rankId}") + logInfo(s"Initialized with executorNum: $executor_num, " + + s"commSize, ${cclParam.commSize}, rankId: ${cclParam.rankId}") } // Run on Executor @@ -62,4 +62,4 @@ object OneCCL extends Logging { @native def setEnv(key: String, value: String, overwrite: Boolean = true): Int @native def c_getAvailPort(localIP: String): Int -} \ No newline at end of file +} diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneDAL.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneDAL.scala index 9b6c0f6c7..62a803dc3 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneDAL.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneDAL.scala @@ -1,12 +1,11 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Copyright 2020 Intel Corporation * - * http://www.apache.org/licenses/LICENSE-2.0 + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,14 +16,17 @@ package org.apache.spark.ml.util -import java.nio.DoubleBuffer +import java.util.logging.{Level, Logger} -import com.intel.daal.data_management.data.{HomogenNumericTable, NumericTable, RowMergedNumericTable, Matrix => DALMatrix} +import com.intel.daal.data_management.data.{HomogenNumericTable, Matrix => DALMatrix, NumericTable, + RowMergedNumericTable} import com.intel.daal.services.DaalContext -import org.apache.spark.ml.linalg.{DenseMatrix, DenseVector, Vector, Vectors} + +import org.apache.spark.SparkContext +import org.apache.spark.ml.linalg.{Vector, Vectors} import org.apache.spark.mllib.linalg.{Vector => OldVector} import org.apache.spark.rdd.{ExecutorInProcessCoalescePartitioner, RDD} -import java.util.logging.{Logger, Level} +import org.apache.spark.storage.StorageLevel object OneDAL { @@ -49,7 +51,7 @@ object OneDAL { resArray } - def makeNumericTable (cData: Long) : NumericTable = { + def makeNumericTable(cData: Long): NumericTable = { val context = new DaalContext() val table = new HomogenNumericTable(context, cData) @@ -57,7 +59,7 @@ object OneDAL { table } - def makeNumericTable (arrayVectors: Array[OldVector]): NumericTable = { + def makeNumericTable(arrayVectors: Array[OldVector]): NumericTable = { val numCols = arrayVectors.head.size val numRows: Int = arrayVectors.size @@ -68,42 +70,64 @@ object OneDAL { arrayVectors.zipWithIndex.foreach { case (v, rowIndex) => - for (colIndex <- 0 until numCols) - // matrix.set(rowIndex, colIndex, row.getString(colIndex).toDouble) + for (colIndex <- 0 until numCols) { setNumericTableValue(matrix.getCNumericTable, rowIndex, colIndex, v(colIndex)) + } } matrix } - def rddVectorToNumericTables(vectors: RDD[Vector], executorNum: Int): RDD[Long] = { - // repartition to executorNum if not enough partitions + def releaseNumericTables(sparkContext: SparkContext): Unit = { + sparkContext.getPersistentRDDs + .filter(r => r._2.name == "numericTables") + .foreach { rdd => + val numericTables = rdd._2.asInstanceOf[RDD[Long]] + numericTables.foreach { address => + OneDAL.cFreeDataMemory(address) + } + } + } + + def vectorsToMergedNumericTables(vectors: RDD[Vector], executorNum: Int): RDD[Long] = { + require(executorNum > 0) + + logger.info(s"Processing partitions with $executorNum executors") + + // Repartition to executorNum if not enough partitions val dataForConversion = if (vectors.getNumPartitions < executorNum) { vectors.repartition(executorNum).setName("Repartitioned for conversion").cache() } else { vectors } + // Get dimensions for each partition val partitionDims = Utils.getPartitionDims(dataForConversion) - // filter out empty partitions - val nonEmptyPartitions = dataForConversion.mapPartitionsWithIndex { (index: Int, it: Iterator[Vector]) => - Iterator(Tuple3(partitionDims(index)._1, index, it)) - }.filter { entry => { entry._1 > 0 }} + // Filter out empty partitions + val nonEmptyPartitions = dataForConversion.mapPartitionsWithIndex { + (index: Int, it: Iterator[Vector]) => Iterator(Tuple3(partitionDims(index)._1, index, it)) + }.filter { entry => { + entry._1 > 0 + } + } + // Convert to RDD[HomogenNumericTable] val numericTables = nonEmptyPartitions.map { entry => val numRows = entry._1 val index = entry._2 val it = entry._3 val numCols = partitionDims(index)._2 + logger.info(s"Partition index: $index, numCols: $numCols, numRows: $numRows") + // Build DALMatrix, this will load libJavaAPI, libtbb, libtbbmalloc val context = new DaalContext() val matrix = new DALMatrix(context, classOf[java.lang.Double], numCols.toLong, numRows.toLong, NumericTable.AllocationFlag.DoAllocate) // oneDAL libs should be loaded by now, loading other native libs - logger.log(logLevel, "IntelMLlib: Loading other native libraries ...") + logger.info("Loading native libraries") LibLoader.loadLibraries() var dalRow = 0 @@ -115,16 +139,17 @@ object OneDAL { } matrix.getCNumericTable - }.cache() + }.setName("numericTables").cache() - // workaroud to fix the bug of multi executors handling same partition. - numericTables.foreachPartition(() => _) numericTables.count() - val cachedRdds = vectors.sparkContext.getPersistentRDDs - cachedRdds.filter(r => r._2.name=="instancesRDD").foreach (r => r._2.unpersist()) + // Unpersist instances RDD + if (vectors.getStorageLevel != StorageLevel.NONE) { + vectors.unpersist() + } - val coalescedRdd = numericTables.coalesce(1, + // Coalesce partitions belonging to the same executor + val coalescedRdd = numericTables.coalesce(executorNum, partitionCoalescer = Some(new ExecutorInProcessCoalescePartitioner())) val coalescedTables = coalescedRdd.mapPartitions { iter => @@ -144,12 +169,14 @@ object OneDAL { @native def cAddNumericTable(cObject: Long, numericTableAddr: Long) - @native def cSetDoubleBatch(numTableAddr: Long, curRows: Int, batch: Array[Double], numRows: Int, numCols: Int) - + @native def cSetDoubleBatch(numTableAddr: Long, curRows: Int, batch: Array[Double], + numRows: Int, numCols: Int) + @native def cFreeDataMemory(numTableAddr: Long) - @native def cCheckPlatformCompatibility() : Boolean + @native def cCheckPlatformCompatibility(): Boolean - @native def cNewCSRNumericTable(data: Array[Float], colIndices: Array[Long], rowOffsets: Array[Long], nFeatures: Long, - nVectors: Long) : Long + @native def cNewCSRNumericTable(data: Array[Float], + colIndices: Array[Long], rowOffsets: Array[Long], + nFeatures: Long, nVectors: Long): Long } diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala index 0dd43d24f..525afc78b 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala @@ -1,10 +1,26 @@ +/* + * Copyright 2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.spark.ml.util import java.net.InetAddress import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.rdd.RDD import org.apache.spark.ml.linalg.Vector +import org.apache.spark.rdd.RDD object Utils { @@ -32,21 +48,6 @@ object Utils { ret } - def sparkExecutorNum(sc: SparkContext): Int = { - - if (sc.master.contains("local")) - return 1 - - // Create empty partitions to start executors - sc.parallelize(Seq[Int]()).count() - - // Get running executors infos - val executorInfos = sc.statusTracker.getExecutorInfos - - // Return executor number (exclude driver) - executorInfos.length - 1 - } - def sparkExecutorCores(): Int = { val conf = new SparkConf(true) @@ -63,18 +64,20 @@ object Utils { val info = sc.statusTracker.getExecutorInfos // get first executor, info(0) is driver - val host = if (sc.master.startsWith("local")) + val host = if (sc.master.startsWith("local")) { info(0).host() - else + } else { info(1).host() + } val ip = InetAddress.getByName(host).getHostAddress ip } - def checkExecutorAvailPort(data: RDD[_], localIP: String) : Int = { + def checkExecutorAvailPort(data: RDD[_], localIP: String): Int = { if (localIP == "127.0.0.1" || localIP == "127.0.1.1") { - println(s"\nOneCCL: Error: doesn't support loopback IP ${localIP}, please assign IP address to your host.\n") + println(s"\nOneCCL: Error: doesn't support loopback IP ${localIP}, " + + s"please assign IP address to your host.\n") System.exit(-1) } @@ -82,21 +85,23 @@ object Utils { val result = data.mapPartitions { p => LibLoader.loadLibraries() val port = OneCCL.getAvailPort(localIP) - if (port != -1) + if (port != -1) { Iterator(port) - else + } else { Iterator() + } }.collect() - return result(0) + result(0) } - def checkClusterPlatformCompatibility(sc: SparkContext) : Boolean = { + def checkClusterPlatformCompatibility(sc: SparkContext): Boolean = { LibLoader.loadLibraries() // check driver platform compatibility - if (!OneDAL.cCheckPlatformCompatibility()) + if (!OneDAL.cCheckPlatformCompatibility()) { return false + } // check workers' platform compatibility val executor_num = Utils.sparkExecutorNum(sc) @@ -106,6 +111,22 @@ object Utils { OneDAL.cCheckPlatformCompatibility() }.collect() - return result.forall( _ == true) + result.forall(_ == true) + } + + def sparkExecutorNum(sc: SparkContext): Int = { + + if (sc.master.contains("local")) { + return 1 + } + + // Create empty partitions to start executors + sc.parallelize(Seq[Int]()).count() + + // Get running executors infos + val executorInfos = sc.statusTracker.getExecutorInfos + + // Return executor number (exclude driver) + executorInfos.length - 1 } } diff --git a/mllib-dal/src/main/scala/org/apache/spark/rdd/ExecutorInProcessCoalescePartitioner.scala b/mllib-dal/src/main/scala/org/apache/spark/rdd/ExecutorInProcessCoalescePartitioner.scala index 6a19990e8..12f045a79 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/rdd/ExecutorInProcessCoalescePartitioner.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/rdd/ExecutorInProcessCoalescePartitioner.scala @@ -1,12 +1,11 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Copyright 2020 Intel Corporation * - * http://www.apache.org/licenses/LICENSE-2.0 + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,35 +16,28 @@ package org.apache.spark.rdd -import org.apache.commons.logging.LogFactory - -import org.apache.spark.Partition -import org.apache.spark.SparkException -import org.apache.spark.scheduler.ExecutorCacheTaskLocation -import org.apache.spark.scheduler.TaskLocation - import scala.collection.mutable import scala.collection.mutable.ArrayBuffer +import org.apache.spark.{Partition, SparkException} +import org.apache.spark.scheduler.{ExecutorCacheTaskLocation, TaskLocation} + class ExecutorInProcessCoalescePartitioner extends PartitionCoalescer with Serializable { def coalesce(maxPartitions: Int, prev: RDD[_]): Array[PartitionGroup] = { val map = new mutable.HashMap[String, mutable.HashSet[Partition]]() - val groupArr = ArrayBuffer[PartitionGroup]() prev.partitions.foreach(p => { val loc = prev.context.getPreferredLocs(prev, p.index) - loc.foreach{ - case location : ExecutorCacheTaskLocation => - - val execLoc = "executor_" + location.host + "_" + location.executorId - val partValue = map.getOrElse(execLoc, new mutable.HashSet[Partition]()) - partValue.add(p) - map.put(execLoc, partValue) - case loc : TaskLocation => - throw new SparkException("Invalid location !!!") - + loc.foreach { + case location : ExecutorCacheTaskLocation => + val execLoc = "executor_" + location.host + "_" + location.executorId + val partValue = map.getOrElse(execLoc, new mutable.HashSet[Partition]()) + partValue.add(p) + map.put(execLoc, partValue) + case _ : TaskLocation => + throw new SparkException("ExecutorInProcessCoalescePartitioner: Invalid task location!") } }) map.foreach(x => { @@ -54,12 +46,13 @@ class ExecutorInProcessCoalescePartitioner list.foreach(part => pg.partitions += part) groupArr += pg }) - if (groupArr.length == 0) throw new SparkException("No partitions or" + - " no locations for partitions found.") + if (groupArr.length == 0) { + throw new SparkException( + "ExecutorInProcessCoalescePartitioner: No partitions or no locations for partitions found.") + } val sortedGroupArr = groupArr.sortWith(_.partitions(0).index < _.partitions(0).index) - return sortedGroupArr.toArray + sortedGroupArr.toArray } } - diff --git a/mllib-dal/test.sh b/mllib-dal/test.sh index 63ae4eccb..d7df508fe 100755 --- a/mllib-dal/test.sh +++ b/mllib-dal/test.sh @@ -27,7 +27,7 @@ if [[ -z $CCL_ROOT ]]; then fi if [[ -z $1 ]]; then - echo SPARK_VER not defined, using default (3.0.0). + echo SPARK_VER not defined, using default version spark-3.0.0. else SPARK_VER=$1 fi @@ -42,13 +42,8 @@ echo Clang Version: $(clang -dumpversion) echo SPARK_VER=$SPARK_VER echo ============================= -# Enable signal chaining support for JNI -# export LD_PRELOAD=$JAVA_HOME/jre/lib/amd64/libjsig.so - -# -Dtest=none to turn off the Java tests - -# Test all -# mvn -Dtest=none -Dmaven.test.skip=false test +# Clean +mvn clean # Individual test if [[ -z $SPARK_VER ]]; then @@ -56,7 +51,7 @@ if [[ -z $SPARK_VER ]]; then mvn -Dtest=none -DwildcardSuites=org.apache.spark.ml.feature.IntelPCASuite test # mvn -Dtest=none -DwildcardSuites=org.apache.spark.ml.recommendation.IntelALSSuite test else - mvn -Dtest=none -DwildcardSuites=org.apache.spark.ml.clustering.IntelKMeansSuite test -P$SPARK_VER - mvn -Dtest=none -DwildcardSuites=org.apache.spark.ml.feature.IntelPCASuite test -P$SPARK_VER -# mvn -Dtest=none -DwildcardSuites=org.apache.spark.ml.recommendation.IntelALSSuite test -P$SPARK_VER + mvn -P$SPARK_VER -Dtest=none -DwildcardSuites=org.apache.spark.ml.clustering.IntelKMeansSuite test + mvn -P$SPARK_VER -Dtest=none -DwildcardSuites=org.apache.spark.ml.feature.IntelPCASuite test +# mvn -P$SPARK_VER -Dtest=none -DwildcardSuites=org.apache.spark.ml.recommendation.IntelALSSuite test fi