From 0ee1996ccc03614848c2342f6565288159f8daa4 Mon Sep 17 00:00:00 2001 From: Wang Yanzhang Date: Tue, 20 Sep 2016 17:22:35 +0800 Subject: [PATCH 01/12] support for mkl dnn api, which is migrated from WebscaleML. --- .../sparkdl/nn/mkl/BatchNormalization.scala | 203 ++++++ .../analytics/sparkdl/nn/mkl/Linear.scala | 256 ++++++++ .../LocalNormalizationAcrossChannels.scala | 159 +++++ .../analytics/sparkdl/nn/mkl/Pooling.scala | 205 +++++++ .../intel/analytics/sparkdl/nn/mkl/ReLU.scala | 125 ++++ .../sparkdl/nn/mkl/SpatialConvolution.scala | 337 ++++++++++ .../com/intel/analytics/sparkdl/mkl/MKL.java | 178 ++++++ mkl/native/pom.xml | 19 +- mkl/native/src/main/c/jni/.clang-format | 90 +++ mkl/native/src/main/c/jni/MKLWrapper.h | 471 ++++++++++++++ mkl/native/src/main/c/jni/batch_norm.cpp | 428 +++++++++++++ mkl/native/src/main/c/jni/convolution.cpp | 580 ++++++++++++++++++ mkl/native/src/main/c/jni/debug.cpp | 37 ++ mkl/native/src/main/c/jni/debug.h | 93 +++ mkl/native/src/main/c/jni/layer.cpp | 23 + mkl/native/src/main/c/jni/layer.h | 112 ++++ mkl/native/src/main/c/jni/linear.cpp | 501 +++++++++++++++ mkl/native/src/main/c/jni/lrn.cpp | 306 +++++++++ mkl/native/src/main/c/jni/memory.h | 425 +++++++++++++ .../src/main/c/jni/{mkl.c => omp_threads.cpp} | 11 +- mkl/native/src/main/c/jni/pooling.cpp | 364 +++++++++++ mkl/native/src/main/c/jni/relu.cpp | 288 +++++++++ mkl/native/src/main/c/jni/utils.cpp | 45 ++ mkl/native/src/main/c/jni/utils.h | 7 + 24 files changed, 5256 insertions(+), 7 deletions(-) create mode 100644 dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/BatchNormalization.scala create mode 100644 dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Linear.scala create mode 100644 dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/LocalNormalizationAcrossChannels.scala create mode 100644 dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Pooling.scala create mode 100644 dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/ReLU.scala create mode 100644 dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/SpatialConvolution.scala create mode 100644 mkl/native/src/main/c/jni/.clang-format create mode 100644 mkl/native/src/main/c/jni/MKLWrapper.h create mode 100644 mkl/native/src/main/c/jni/batch_norm.cpp create mode 100644 mkl/native/src/main/c/jni/convolution.cpp create mode 100644 mkl/native/src/main/c/jni/debug.cpp create mode 100644 mkl/native/src/main/c/jni/debug.h create mode 100644 mkl/native/src/main/c/jni/layer.cpp create mode 100644 mkl/native/src/main/c/jni/layer.h create mode 100644 mkl/native/src/main/c/jni/linear.cpp create mode 100644 mkl/native/src/main/c/jni/lrn.cpp create mode 100644 mkl/native/src/main/c/jni/memory.h rename mkl/native/src/main/c/jni/{mkl.c => omp_threads.cpp} (71%) create mode 100644 mkl/native/src/main/c/jni/pooling.cpp create mode 100644 mkl/native/src/main/c/jni/relu.cpp create mode 100644 mkl/native/src/main/c/jni/utils.cpp create mode 100644 mkl/native/src/main/c/jni/utils.h diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/BatchNormalization.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/BatchNormalization.scala new file mode 100644 index 00000000000..6a1f9dee787 --- /dev/null +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/BatchNormalization.scala @@ -0,0 +1,203 @@ +package com.intel.analytics.sparkdl.nn.mkl + +import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric +import com.intel.analytics.sparkdl.tensor.Tensor +import com.intel.analytics.sparkdl.utils.RandomGenerator._ +import com.intel.analytics.sparkdl.nn.Module +import com.intel.analytics.sparkdl.mkl.MKL + +import scala.language.implicitConversions + +import scala.reflect.ClassTag + +/** + * Created by wyz on 16-9-5. + */ +class SpatialBatchNormalization[@specialized(Float, Double) T:ClassTag] (val nOutput: Int, + val eps: Double = 1e-5, + val momentum: Double = 0.1, + val affine: Boolean = true) + (implicit ev: TensorNumeric[T]) extends Module[T] { + + require(nOutput > 0, "To set affine=false call SpatialBatchNormalization(nFeature, eps, momentum, false)") + + val nDim = 2 + val runningMean = Tensor[T](nOutput) + val runningVar = Tensor[T](nOutput).fill(ev.fromType[Int](1)) + val saveMean = Tensor[T](nOutput) + val saveStd = Tensor[T](nOutput).fill(ev.fromType[Int](1)) + + private var prevLayout : Array[Long] = Array() + private var nextLayout : Array[Long] = Array() + private var usePrev = false + private var useNext = false + private var forNext = false + private var forPrev = false + + private var classPtr = 0L + private var firstPass = true + + override def getClassPtr(): Long = classPtr + + val weight: Tensor[T] = if (affine) Tensor[T](nOutput) else null + val bias: Tensor[T] = if (affine) Tensor[T](nOutput) else null + gradWeight = if (affine) Tensor[T](nOutput) else null + gradBias = if (affine) Tensor[T](nOutput) else null + + val useWeight : Boolean = if (weight != null) true else false + val useBias : Boolean = if (bias != null) true else false + + if (affine) { + reset() + } + + override def reset(): Unit = { + if (null != weight) { + weight.apply1(_ => ev.fromType[Double](RNG.uniform(0, 1))) + } + + if (null != bias) { + bias.fill(ev.fromType[Int](0)) + } + + runningMean.zero() + runningVar.fill(ev.fromType[Int](1)) + } + + def checkInputDim(input : Tensor[T]): Unit ={ + require(input.dim() == nDim, s"only mini-batch supported (${nDim}D tensor), got ${input.dim()}D tensor instead") + require(input.size(2) == runningMean.nElement(), s"got ${input.size(2)}-feature tensor, expected ${runningMean.nElement()}") + } + + override def updateOutput(input : Tensor[T]) : Tensor[T] = { + //checkInputDim(input) + + output.resizeAs(input) + //saveMean.resizeAs(runningMean) + //saveStd.resizeAs(runningVar) + + val inputOffset = input.storageOffset() - 1; + val outputOffset = output.storageOffset() - 1; + + // +---------+-------+-------+ + // | | 3-dim | 4-dim | + // +=========+=======+=======+ + // | Number | ? | 1 | + // +---------+-------+-------+ + // | Channel | 1 | 2 | + // +---------+-------+-------+ + // | Height | 2 | 3 | + // +---------+-------+-------+ + // | Width | 3 | 4 | + // +---------+-------+-------+ + // Table: Index of 3-dim/4-dim input + + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) + val inputChannel = if (input.dim() <= 2) 1 else input.size(input.dim() - 2) + val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3) + // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 + + val kernelOffset = weight.storageOffset() - 1 + val biasOffset = bias.storageOffset() - 1 + + implicit def bool2int(b:Boolean) = if (b) 1 else 0 + if (firstPass) { + ev.getType() match { + case "Float" => classPtr = MKL.BatchNormInitFloat( + inputNumber, inputChannel, inputHeight, inputWidth, + eps, useWeight, useBias, 4) + case "Double" => classPtr = MKL.BatchNormInitDouble( + inputNumber, inputChannel, inputHeight, inputWidth, + eps, useBias, useBias, 4) + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + firstPass = false + } + + ev.getType() match { + case "Float" => MKL.BatchNormForwardFloat( + input.storage().array().asInstanceOf[Array[Float]], inputOffset, + output.storage().array().asInstanceOf[Array[Float]], outputOffset, + weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], biasOffset, classPtr) + case "Double" => MKL.BatchNormForwardDouble( + input.storage().array().asInstanceOf[Array[Double]], inputOffset, + output.storage().array().asInstanceOf[Array[Double]], outputOffset, + weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], biasOffset, classPtr) + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + output + } + + override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { + gradInput.resizeAs(input) + + val inputOffset = input.storageOffset() - 1; + val outputOffset = output.storageOffset() - 1; + + // +---------+-------+-------+ + // | | 3-dim | 4-dim | + // +=========+=======+=======+ + // | Number | ? | 1 | + // +---------+-------+-------+ + // | Channel | 1 | 2 | + // +---------+-------+-------+ + // | Height | 2 | 3 | + // +---------+-------+-------+ + // | Width | 3 | 4 | + // +---------+-------+-------+ + // Table: Index of 3-dim/4-dim input + + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) + val inputChannel = if (input.dim() <= 2) 1 else input.size(input.dim() - 2) + val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3) + // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 + + val kernelOffset = weight.storageOffset() - 1 + val biasOffset = bias.storageOffset() - 1 + + val kernelDiffOffset = gradWeight.storageOffset() - 1 + val biasDiffOffset = gradBias.storageOffset() - 1 + + val gradOutputOffset = gradOutput.storageOffset() - 1 + val gradInputOffset = gradInput.storageOffset() -1 + + implicit def bool2int(b:Boolean) = if (b) 1 else 0 + ev.getType() match { + case "Float" => MKL.BatchNormBackwardFloat( + input.storage().array().asInstanceOf[Array[Float]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Float]], gradInputOffset, + gradWeight.storage().array().asInstanceOf[Array[Float]], kernelDiffOffset, + gradBias.storage().array().asInstanceOf[Array[Float]], biasDiffOffset, classPtr) + case "Double" => MKL.BatchNormBackwardDouble( + input.storage().array().asInstanceOf[Array[Double]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Double]], gradInputOffset, + gradWeight.storage().array().asInstanceOf[Array[Double]], kernelDiffOffset, + gradBias.storage().array().asInstanceOf[Array[Double]], biasDiffOffset, classPtr) + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + + gradInput + } + + override def accGradParameters(input: Tensor[T], gradOutput: Tensor[T], scale : Double): Unit = { + } + + override def zeroGradParameters(): Unit = { + gradWeight.zero() + gradBias.zero() + } + + override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) ={ + (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias)) + } + + override def toString(): String ={ + s"mkl.BatchNormalization[${ev.getType()}]($nOutput, $eps, $momentum, $affine)" + } +} diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Linear.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Linear.scala new file mode 100644 index 00000000000..ec7455b8f1b --- /dev/null +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Linear.scala @@ -0,0 +1,256 @@ +package com.intel.analytics.sparkdl.nn.mkl + +import com.intel.analytics.sparkdl.mkl.MKL +import com.intel.analytics.sparkdl.nn.{Default, InitializationMethod, Module, Xavier} +import com.intel.analytics.sparkdl.utils.RandomGenerator._ +import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric +import com.intel.analytics.sparkdl.tensor.Tensor + +import scala.reflect.ClassTag + +class Linear[@specialized(Float, Double) T: ClassTag]( + inputSize: Int, + outputSize:Int, + val needCompute : Boolean = true, + private var initMethod : InitializationMethod = Default +)(implicit ev: TensorNumeric[T]) extends Module[T]{ + val weight: Tensor[T] = Tensor[T](outputSize,inputSize) + val bias: Tensor[T] = Tensor[T](outputSize) + val addBuffer: Tensor[T] = Tensor[T]() + this.gradWeight = Tensor[T](outputSize,inputSize) + this.gradBias = Tensor[T](outputSize) + + private var classPtr = 0L + private var firstPass = true + + override def getClassPtr(): Long = classPtr + + reset() + + // this is pointer to the layout of MKL used internal and the memory is allocated in native code. + // the magic codes are: + // layoutMKL(0) -> input + // layoutMKL(1) -> inputDiff / gradInput + // layoutMKL(2) -> output + // layoutMKL(3) -> outputDiff + // layoutMKL(4) -> kernel / filter + // layoutMKL(5) -> kernelDiff / gradWeight + // layoutMKL(6) -> bias + // layoutMKL(7) -> biasDiff / gradBias + val layoutMKL = Array.fill[Long](8)(-1) + + def setInitMethod(initMethod : InitializationMethod) : this.type = { + this.initMethod = initMethod + this + } + + + override def reset(): Unit ={ + initMethod match { + case Default => + val stdv = 1.0 /math.sqrt(weight.size(2)) + weight.apply1(_=> ev.fromType[Double](RNG.uniform(0,1)*2*stdv - stdv)) //todo, better to support uniform + bias.apply1(_ => ev.fromType[Double](RNG.uniform(0,1)*2*stdv - stdv)) + case Xavier => + val fanIn = weight.size(2) + val fanOut = weight.size(1) + val stdv = math.sqrt(3 / (fanIn + fanOut)) + weight.apply1(_=>ev.fromType[Double](RNG.uniform(0,1)*2*stdv - stdv)) //todo, better to support uniform + bias.fill(ev.fromType(0)) + case _ => ??? + } + } + + override def updateOutput(input: Tensor[T]): Tensor[T] ={ + require(input.dim() == 2, "only batch mode supported") + + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) + + + val nFrame = input.size(1) + val nElement = output.nElement + output.resize(Array(nFrame, bias.size(1))) + if(output.nElement() != nElement) + output.zero() + + val inputOffset = input.storageOffset() - 1 + val outputOffset = output.storageOffset() - 1 + val biasOffset = bias.storageOffset() - 1 + val kernelOffset = weight.storageOffset() - 1 + + val kernelHeight = outputSize + val kernelWidth = inputSize + val outputChannels = outputSize + + if (firstPass) { + ev.getType() match { + case "Double" => classPtr = MKL.LinearInitDouble(inputHeight, inputWidth, outputChannels, + kernelHeight, kernelWidth) + case "Float" => classPtr = MKL.LinearInitFloat(inputHeight, inputWidth, outputChannels, + kernelHeight, kernelWidth) + case _ => throw new UnsupportedOperationException(s"Only Float supported") + } + + firstPass = false + } + + ev.getType() match { + case "Double" => MKL.LinearForwardDouble( + input.storage().array().asInstanceOf[Array[Double]], inputOffset, + output.storage().array().asInstanceOf[Array[Double]], outputOffset, + weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], biasOffset, + classPtr) + case "Float" => MKL.LinearForwardFloat( + input.storage().array().asInstanceOf[Array[Float]], inputOffset, + output.storage().array().asInstanceOf[Array[Float]], outputOffset, + weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], biasOffset, + classPtr) + case _ => throw new UnsupportedOperationException(s"Only Float supported") + } + output + } + + override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] ={ + require(input.dim() == 2, "only batch mode supported") + val nElement = gradInput.nElement() + gradInput.resizeAs(input) + if(nElement != gradInput.nElement()) { + gradInput.zero() + } + + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) + + val inputOffset = input.storageOffset() - 1 + val kernelOffset = weight.storageOffset() - 1 + val biasOffset = bias.storageOffset() - 1 + val gradOutputOffset = gradOutput.storageOffset() - 1 + val gradInputOffset = gradInput.storageOffset() - 1 + val gradWeightOffset = gradWeight.storageOffset() - 1 + val gradBiasOffset = gradBias.storageOffset() - 1 + + val kernelHeight = outputSize + val kernelWidth = inputSize + val outputChannels = outputSize + + if(needCompute) { + ev.getType() match { + case "Double" => MKL.LinearBackwardDataDouble( + input.storage().array().asInstanceOf[Array[Double]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Double]], gradInputOffset, + weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], biasOffset, classPtr) + case "Float" => MKL.LinearBackwardDataFloat( + input.storage().array().asInstanceOf[Array[Float]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Float]], gradInputOffset, + weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], biasOffset, classPtr) + case _ => throw new UnsupportedOperationException(s"Only Float supported") + } + } + + ev.getType() match { + case "Double" => MKL.LinearBackwardKernelDouble( + input.storage().array().asInstanceOf[Array[Double]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, + gradWeight.storage().array().asInstanceOf[Array[Double]], gradWeightOffset, + weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], biasOffset, + classPtr) + + case "Float" => MKL.LinearBackwardKernelFloat( + input.storage().array().asInstanceOf[Array[Float]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, + gradWeight.storage().array().asInstanceOf[Array[Float]], gradWeightOffset, + weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], biasOffset, + classPtr) + + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + + ev.getType() match { + case "Double" => MKL.LinearBackwardBiasDouble( + input.storage().array().asInstanceOf[Array[Double]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, + gradBias.storage().array().asInstanceOf[Array[Double]], gradBiasOffset, + weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], biasOffset, + classPtr) + + case "Float" => MKL.LinearBackwardBiasFloat( + input.storage().array().asInstanceOf[Array[Float]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, + gradBias.storage().array().asInstanceOf[Array[Float]], gradBiasOffset, + weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], biasOffset, + classPtr) + + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + + gradInput + } + +// override def accGradParameters(input: Tensor[T], gradOutput: Tensor[T], scale: Double = 1.0): Unit ={ +// require(input.dim() == 2, "only batch mode supported") +// require(input.dim() == 1 || input.dim() == 2, "input must be vector or matrix") +// val value = ev.fromType[Double](scale) +// if(input.dim() == 1) { +// gradWeight.addr(value, gradOutput, input) +// gradBias.add(value, gradOutput) +// } +// else if(input.dim() == 2) { +// gradWeight.addmm(value, gradOutput.t, input) +// gradBias.addmv(value, gradOutput.t, addBuffer) +// } +// } + + override def updateParameters(learningRate:T): Unit ={ + //weight.map(gradWeight,(a,b)=>a - learningRate*b) + weight.add(ev.negative(learningRate), gradWeight) + //bias.map(gradBias,(a,b)=>a - learningRate*b) + bias.add(ev.negative(learningRate), gradBias) + } + + override def zeroGradParameters(): Unit = { + gradWeight.zero() + gradBias.zero() + } + + override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) ={ + (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias)) + } + + override def equals(obj : Any) : Boolean = { + + if(!super.equals(obj)) { + return false + } + + if(!obj.isInstanceOf[Linear[T]]) + return false + val other = obj.asInstanceOf[Linear[T]] + if(this.eq(other)) + return true + + gradWeight == other.gradWeight && + gradBias == other.gradBias && + weight == other.weight && + bias == other.bias + } + + override def toString() : String = { + s"nn.mkl.Linear($inputSize -> $outputSize)" + } + + override def findModel(paramOffset : Int, indexes : Array[Int]) : (Module[T], Int, Array[Int]) = { + (this, paramOffset - outputSize * inputSize - outputSize, indexes) + } + +} diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/LocalNormalizationAcrossChannels.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/LocalNormalizationAcrossChannels.scala new file mode 100644 index 00000000000..7b5fff5544c --- /dev/null +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/LocalNormalizationAcrossChannels.scala @@ -0,0 +1,159 @@ +package com.intel.analytics.sparkdl.nn.mkl + +import com.intel.analytics.sparkdl.mkl.MKL +import com.intel.analytics.sparkdl.nn.Module +import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric +import com.intel.analytics.sparkdl.tensor._ +import com.intel.analytics.sparkdl.utils.RandomGenerator._ +import scala.reflect.ClassTag +import scala.language.implicitConversions + +/** + * Created by wyz on 16-9-7. + */ +class LocalNormalizationAcrossChannels[@specialized(Float, Double) T: ClassTag] +(val size : Int = 5, val alpha : Double = 1.0, val beta : Double = 0.75, val k : Double = 1.0)( + implicit ev: TensorNumeric[T]) extends Module[T] { + + private val scale = Tensor[T]() + private val paddedSquare = Tensor[T]() + private val paddedRatio = Tensor[T]() + private val accumRatio = Tensor[T]() + private val accumRatioTimeInput = Tensor[T]() + + require(size % 2 == 1, "LRN only supports odd values for size") + val prePad = (size - 1) / 2 + + var classPtr = 0L + private var firstPass = true + + val layoutMKL = Array.fill[Long](8)(-1) + + override def getClassPtr(): Long = classPtr + + override def equals(obj: Any): Boolean = { + if (!super.equals(obj)) { + return false + } + + if (!obj.isInstanceOf[LocalNormalizationAcrossChannels[T]]) + return false + val other = obj.asInstanceOf[LocalNormalizationAcrossChannels[T]] + if (this.eq(other)) + return true + + size == other.size && + alpha == other.alpha && beta == other.beta && k == other.k + } + + override def toString(): String = { + s"mkl.LocalResponseNormalizationAcrossChannels($size, $alpha, $beta, $k)" + } + + override def updateOutput(input: Tensor[T]): Tensor[T] = { + require(input.nDimension() == 4, "Input must have 4 dimensions, corresponding to (batch, channels, height, width)") + require(input.isContiguous(), "Input is not contiguous") + + output.resizeAs(input) + + val inputOffset = input.storageOffset() - 1; + val outputOffset = output.storageOffset() - 1; + + // +---------+-------+-------+ + // | | 3-dim | 4-dim | + // +=========+=======+=======+ + // | Number | ? | 1 | + // +---------+-------+-------+ + // | Channel | 1 | 2 | + // +---------+-------+-------+ + // | Height | 2 | 3 | + // +---------+-------+-------+ + // | Width | 3 | 4 | + // +---------+-------+-------+ + // Table: Index of 3-dim/4-dim input + + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) + val inputChannel = if (input.dim() <= 3) 1 else input.size(input.dim() - 2) + val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3) + // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 + + if (firstPass) { + ev.getType() match { + case "Float" => classPtr = MKL.LRNInitFloat( + inputNumber, inputChannel, inputHeight, inputWidth, + size, alpha.toFloat, beta.toFloat, k.toFloat, 4) + case "Double" => classPtr = MKL.LRNInitDouble( + inputNumber, inputChannel, inputHeight, inputWidth, + size, alpha.toDouble, beta.toDouble, k.toDouble, 4) + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + firstPass = false + } + + implicit def bool2int(b:Boolean) = if (b) 1 else 0 + ev.getType() match { + case "Float" => MKL.LRNForwardFloat( + input.storage().array().asInstanceOf[Array[Float]], inputOffset, + output.storage().array().asInstanceOf[Array[Float]], outputOffset, + classPtr + ) + case "Double" => MKL.LRNForwardDouble( + input.storage().array().asInstanceOf[Array[Double]], inputOffset, + output.storage().array().asInstanceOf[Array[Double]], outputOffset, + classPtr + ) + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + + output + } + + override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { + require(input.nDimension() == 4, "Input must have 4 dimensions, corresponding to (batch, channels, height, width)") + require(gradOutput.isContiguous(), "gradOutput is not contiguous") + + gradInput.resizeAs(input) + + val inputOffset = input.storageOffset() - 1; + val outputOffset = output.storageOffset() - 1; + + // +---------+-------+-------+ + // | | 3-dim | 4-dim | + // +=========+=======+=======+ + // | Number | ? | 1 | + // +---------+-------+-------+ + // | Channel | 1 | 2 | + // +---------+-------+-------+ + // | Height | 2 | 3 | + // +---------+-------+-------+ + // | Width | 3 | 4 | + // +---------+-------+-------+ + // Table: Index of 3-dim/4-dim input + + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) + val inputChannel = input.size(input.dim() - 2) + val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3) + // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 + + val gradOutputOffset = gradOutput.storageOffset() - 1 + val gradInputOffset = gradInput.storageOffset() -1 + + ev.getType() match { + case "Float" => MKL.LRNBackwardFloat( + input.storage().array().asInstanceOf[Array[Float]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Float]], gradInputOffset, + classPtr) + case "Double" => MKL.LRNBackwardDouble( + input.storage().array().asInstanceOf[Array[Double]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Double]], gradInputOffset, + classPtr) + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + + gradInput + } +} diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Pooling.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Pooling.scala new file mode 100644 index 00000000000..5aa2b1347a3 --- /dev/null +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Pooling.scala @@ -0,0 +1,205 @@ +package com.intel.analytics.sparkdl.nn.mkl + +import com.intel.analytics.sparkdl.mkl.MKL +import com.intel.analytics.sparkdl.nn.Module +import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric +import com.intel.analytics.sparkdl.utils.RandomGenerator +import com.intel.analytics.sparkdl.tensor.Tensor + +import scala.language.implicitConversions + +import scala.reflect.ClassTag + +class SpatialPooling[@specialized(Float, Double) T: ClassTag](val kernelWidth: Int, + val kernelHeight: Int, + val strideWidth: Int, + val strideHeight: Int, + val padWidth: Int = 0, + val padHeight: Int = 0) + (implicit ev: TensorNumeric[T]) extends Module[T] { + implicit def bool2int(b: Boolean) = if (b) 1 else 0 + + var classPtr: Long = 0L + private var firstPass = true + + val algorithm = 0; + + override def getClassPtr(): Long = classPtr + + // TODO just for adopt to the testcase + var ceil_mode = false + def ceil(): SpatialPooling[T] = { + ceil_mode = true + this + } + + def floor(): SpatialPooling[T] = { + ceil_mode = false + this + } + + override def toString() : String = { + s"mkl.Pooling" + } + + def this(kernelWidth: Int, kernelHeight: Int)(implicit ev: TensorNumeric[T]){ + this(kernelWidth, kernelHeight, kernelWidth, kernelHeight) + } + + // compute the output height and width + def computeOut(input:Int, pad:Int, kernel:Int, stride:Int): Int = { + if (ceil_mode) + math.ceil(1.0 * (input + 2 * pad - kernel) / stride).toInt + 1 + else + math.floor(1.0 * (input + 2 * pad - kernel) / stride).toInt + 1 + } + + override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { + gradInput.resizeAs(input) + + val inputOffset = input.storageOffset() - 1; + val outputOffset = output.storageOffset() - 1; + val gradInputOffset = gradInput.storageOffset() - 1; + val gradOutputOffset = gradOutput.storageOffset() - 1; + + // +---------+-------+-------+ + // | | 3-dim | 4-dim | + // +=========+=======+=======+ + // | Number | ? | 1 | + // +---------+-------+-------+ + // | Channel | 1 | 2 | + // +---------+-------+-------+ + // | Height | 2 | 3 | + // +---------+-------+-------+ + // | Width | 3 | 4 | + // +---------+-------+-------+ + // Table: Index of 3-dim/4-dim input + + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) + val inputChannel = input.size(input.dim() - 2) + val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3) + // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 + + val outputHeight = computeOut(inputHeight, padHeight, kernelHeight, strideHeight) + val outputWidth = computeOut(inputWidth, padHeight, kernelWidth, strideWidth) + val outputChannel = inputChannel + val outputNumber = inputNumber + + ev.getType() match { + case "Float" => MKL.PoolingBackwardFloat( + input.storage().array().asInstanceOf[Array[Float]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Float]], gradInputOffset, + classPtr) + case "Double" => MKL.PoolingBackwardDouble( + input.storage().array().asInstanceOf[Array[Double]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, + classPtr) + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + + gradInput + } + + override def updateOutput(input: Tensor[T]): Tensor[T] = { + // +---------+-------+-------+ + // | | 3-dim | 4-dim | + // +=========+=======+=======+ + // | Number | ? | 1 | + // +---------+-------+-------+ + // | Channel | 1 | 2 | + // +---------+-------+-------+ + // | Height | 2 | 3 | + // +---------+-------+-------+ + // | Width | 3 | 4 | + // +---------+-------+-------+ + // Table: Index of 3-dim/4-dim input + + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) + val inputChannel = input.size(input.dim() - 2) + val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3) + // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 + + val outputHeight = computeOut(inputHeight, padHeight, kernelHeight, strideHeight) + val outputWidth = computeOut(inputWidth, padWidth, kernelWidth, strideWidth) + val outputChannel = inputChannel + val outputNumber = inputNumber + + val inputOffset = input.storageOffset() - 1; + val outputOffset = output.storageOffset() - 1; + + if (input.dim() == 3) + output.resize(Array(outputChannel, outputHeight, outputWidth)) + else + output.resize(Array(outputNumber, outputChannel, outputHeight, outputWidth)) + + // TODO algorithm = 0 means using MAX + val algorithm = 0 + + if (firstPass) { + ev.getType() match { + case "Float" => classPtr = MKL.PoolingInitFloat( + inputNumber, inputChannel, inputHeight, inputWidth, + kernelHeight, kernelWidth, strideHeight, strideWidth, padHeight, padWidth, 4, + ceil_mode, algorithm) + case "Double" => classPtr = MKL.PoolingInitDouble( + inputNumber, inputChannel, inputHeight, inputWidth, + kernelHeight, kernelWidth, strideHeight, strideWidth, padHeight, padWidth, 4, + ceil_mode, algorithm) + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + + firstPass = false + } + + ev.getType() match { + case "Float" => MKL.PoolingForwardFloat( + input.storage().array.asInstanceOf[Array[Float]], inputOffset, + output.storage().array.asInstanceOf[Array[Float]], outputOffset, classPtr) + case "Double" => MKL.PoolingForwardDouble( + input.storage().array.asInstanceOf[Array[Double]], inputOffset, + output.storage().array.asInstanceOf[Array[Double]], outputOffset, classPtr) + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + output + } +} + +class SpatialMaxPooling[T: ClassTag](kernelWidth: Int, + kernelHeight: Int, + strideWidth : Int, + strideHeight: Int, + padWidth: Int = 0, + padHeight: Int = 0) + (implicit ev: TensorNumeric[T]) + extends SpatialPooling[T](kernelWidth, kernelHeight, strideWidth, strideHeight, padWidth, padHeight) +{ + override val algorithm: Int = 0 + def this(kernelWidth: Int, kernelHeight: Int)(implicit ev: TensorNumeric[T]){ + this(kernelWidth, kernelHeight, kernelWidth, kernelHeight) + } + override def toString() : String = { + s"mkl.SpatialMaxPooling" + } +} + +class SpatialAveragePooling[T: ClassTag](kernelWidth: Int, + kernelHeight: Int, + strideWidth: Int, + strideHeight: Int, + padWidth: Int = 0, + padHeight: Int = 0) + (implicit ev: TensorNumeric[T]) + extends SpatialPooling[T](kernelWidth, kernelHeight, strideWidth, strideHeight, padWidth, padHeight) +{ + override val algorithm: Int = 1 + def this(kernelWidth: Int, kernelHeight: Int)(implicit ev: TensorNumeric[T]){ + this(kernelWidth, kernelHeight, kernelWidth, kernelHeight) + } + override def toString() : String = { + s"mkl.SpatialAvgPooling" + } +} diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/ReLU.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/ReLU.scala new file mode 100644 index 00000000000..5d2a650515b --- /dev/null +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/ReLU.scala @@ -0,0 +1,125 @@ +package com.intel.analytics.sparkdl.nn.mkl + +import com.intel.analytics.sparkdl.mkl.MKL +import com.intel.analytics.sparkdl.nn.Module +import com.intel.analytics.sparkdl.tensor.Tensor +import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric + +import scala.language.implicitConversions + +import scala.reflect.ClassTag + +class ReLU[@specialized(Float, Double) T: ClassTag](ip:Boolean = false)(implicit ev: TensorNumeric[T]) extends Module[T]{ + override def toString() : String = { + s"mkl.ReLU" + } + + private var firstPass = true + var classPtr = 0L; + + override def getClassPtr(): Long = classPtr + + override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { + gradInput.resizeAs(gradOutput) + // TODO Why does copy in mkl_dnn? Because it costs so much time, I comment is out. + // gradInput.copy(gradOutput) + + val inputOffset = input.storageOffset() - 1; + val outputOffset = output.storageOffset() - 1; + val gradInputOffset = gradInput.storageOffset() - 1; + val gradOutputOffset = gradOutput.storageOffset() - 1; + + // +---------+-------+-------+ + // | | 3-dim | 4-dim | + // +=========+=======+=======+ + // | Number | ? | 1 | + // +---------+-------+-------+ + // | Channel | 1 | 2 | + // +---------+-------+-------+ + // | Height | 2 | 3 | + // +---------+-------+-------+ + // | Width | 3 | 4 | + // +---------+-------+-------+ + // Table: Index of 3-dim/4-dim input + + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) + val inputChannel = if (input.dim() <= 2) 1 else input.size(input.dim() - 2) + val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3) + // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 + + implicit def bool2int(b:Boolean) = if (b) 1 else 0 + val start = System.nanoTime() + ev.getType() match { + case "Float" => MKL.ReLUBackwardFloat( + input.storage().array().asInstanceOf[Array[Float]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Float]], gradInputOffset, classPtr) + + case "Double" => MKL.ReLUBackwardDouble( + input.storage().array().asInstanceOf[Array[Double]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Double]], gradInputOffset, classPtr) + + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + //println("[SCALA] ReLU backward call JNI " + (System.nanoTime() - start) / 1e6) + + gradInput + } + + override def updateOutput(input: Tensor[T]): Tensor[T] = { + output.resizeAs(input) + + val inputOffset = input.storageOffset() - 1; + val outputOffset = output.storageOffset() - 1; + + // +---------+-------+-------+ + // | | 3-dim | 4-dim | + // +=========+=======+=======+ + // | Number | ? | 1 | + // +---------+-------+-------+ + // | Channel | 1 | 2 | + // +---------+-------+-------+ + // | Height | 2 | 3 | + // +---------+-------+-------+ + // | Width | 3 | 4 | + // +---------+-------+-------+ + // Table: Index of 3-dim/4-dim input + + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) + val inputChannel = if (input.dim() <= 2) 1 else input.size(input.dim() - 2) + val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3) + // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 + + + if (firstPass) { + ev.getType() match { + case "Float" => classPtr = MKL.ReLUInitFloat( + inputNumber, inputChannel, inputHeight, inputWidth, 4); + case "Double" => classPtr = MKL.ReLUInitDouble( + inputNumber, inputChannel, inputHeight, inputWidth, 4); + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + firstPass = false + } + + implicit def bool2int(b:Boolean) = if (b) 1 else 0 + val start = System.nanoTime() + ev.getType() match { + case "Float" => MKL.ReLUForwardFloat( + input.storage().array().asInstanceOf[Array[Float]], inputOffset, + output.storage().array().asInstanceOf[Array[Float]], outputOffset, classPtr) + + case "Double" => MKL.ReLUForwardDouble( + input.storage().array().asInstanceOf[Array[Double]], inputOffset, + output.storage().array().asInstanceOf[Array[Double]], outputOffset, classPtr) + + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + //println("[SCALA] ReLU forward call JNI " + (System.nanoTime() - start) / 1e6) + + output + } +} diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/SpatialConvolution.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/SpatialConvolution.scala new file mode 100644 index 00000000000..518283aa764 --- /dev/null +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/SpatialConvolution.scala @@ -0,0 +1,337 @@ +package com.intel.analytics.sparkdl.nn.mkl + +import com.intel.analytics.sparkdl.mkl.MKL +import com.intel.analytics.sparkdl.nn.Module +import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric +import com.intel.analytics.sparkdl.tensor._ +import com.intel.analytics.sparkdl.utils.RandomGenerator._ + +import scala.language.implicitConversions + +import com.intel.analytics.sparkdl.nn.InitializationMethod +import com.intel.analytics.sparkdl.nn.Default +import com.intel.analytics.sparkdl.nn.Xavier + +import scala.reflect.ClassTag + +class SpatialConvolution[@specialized(Float, Double) T:ClassTag] ( + val nInputPlane : Int, // The number of expected input planes in the image given into forward() + val nOutputPlane : Int, // The number of output planes the convolution layer will produce. + val kernelWidth : Int, // The kernel width of the convolution + val kernelHeight : Int, // The kernel height of the convolution + val strideWidth : Int = 1, // The step of the convolution in the width dimension. + val strideHeight : Int = 1, //The step of the convolution in the height dimension + val padWidth : Int = 0, // The additional zeros added per width to the input planes. A good number is (kW-1)/2. + val padHeight : Int = 0, // The additional zeros added per height to the input planes. A good number is (kH-1)/2. + val needCompute : Boolean = true, + val groups: Int = 1, + private var initMethod: InitializationMethod = Default + )(implicit ev: TensorNumeric[T]) extends Module[T] { + val weight : Tensor[T] = Tensor[T](nOutputPlane, nInputPlane, kernelHeight, kernelWidth) + val bias : Tensor[T] = Tensor[T](nOutputPlane) + this.gradInput = Tensor[T](nOutputPlane, nInputPlane, kernelHeight, kernelWidth) + this.gradBias = Tensor[T](nOutputPlane) + this.gradWeight = Tensor[T](nOutputPlane, nInputPlane, kernelHeight, kernelWidth) + val fInput = Tensor[T]() + val fGradInput = Tensor[T]() + reset() + + private var im2colTime = 0L + private var col2imTime = 0L + + var classPtr = 0L + private var firstPass = true + + override def getClassPtr(): Long = classPtr + + def getIm2ColTime() = im2colTime + def getCol2ImgTime() = col2imTime + + def setInitMethod(initMethod: InitializationMethod): this.type = { + this.initMethod = initMethod + this + } + + // this is pointer to the layout of MKL used internal and the memory is allocated in native code. + // the magic codes are: + // layoutMKL(0) -> input + // layoutMKL(1) -> inputDiff / gradInput + // layoutMKL(2) -> output + // layoutMKL(3) -> outputDiff + // layoutMKL(4) -> kernel / filter + // layoutMKL(5) -> kernelDiff / gradWeight + // layoutMKL(6) -> bias + // layoutMKL(7) -> biasDiff / gradBias + val layoutMKL = Array.fill[Long](10)(-1) + + override def reset(): Unit ={ + val stdv = 1.0 /math.sqrt(kernelWidth * kernelHeight * nInputPlane) + weight.apply1(_=>ev.fromType[Double](RNG.uniform(0,1)*2*stdv - stdv)) //todo, better to support uniform + bias.apply1(_=>ev.fromType[Double](RNG.uniform(0,1)*2*stdv - stdv)) + } + + override def updateOutput(input: Tensor[T]): Tensor[T] = { + //var time = System.nanoTime() + require(input.dim() == 3 || input.dim() == 4, "Only support 3D or 4D(batch mode) input") + // TODO the requirement of contiguous input may be not necessary for MKL 2017. + // because it supports the api of groups convolution. + require(input.isContiguous(), "input is not contiguous") + + // compute the output height and width + def computeOut(input:Int, pad:Int, kernel:Int, stride:Int): Int = { + (input + 2 * pad - kernel) / stride + 1 + } + + // +---------+-------+-------+ + // | | 3-dim | 4-dim | + // +=========+=======+=======+ + // | Number | ? | 1 | + // +---------+-------+-------+ + // | Channel | 1 | 2 | + // +---------+-------+-------+ + // | Height | 2 | 3 | + // +---------+-------+-------+ + // | Width | 3 | 4 | + // +---------+-------+-------+ + // Table: Index of 3-dim/4-dim input + + /* + for (i <- 1 to input.dim()) printf("%d\t", input.size(i)) + println("") + for (i <- 1 to input.dim()) printf("%d\t", input.stride(i)) + println("") + */ + + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) + val inputChannel = input.size(input.dim() - 2) + // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 + val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3) + + // output number is as same as input number + val outputNumber = inputNumber + val outputChannel = nOutputPlane + val outputWidth = computeOut(inputWidth, padWidth, kernelWidth, strideWidth) + val outputHeight = computeOut(inputHeight, padHeight, kernelHeight, strideHeight) + + require(outputWidth >= 1 && outputHeight >= 1, "output size is too small") + if (input.dim() == 3) + output.resize(Array(outputChannel, outputHeight, outputWidth)) + else + output.resize(Array(outputNumber, outputChannel, outputHeight, outputWidth)) + + // kernel number and bias number are as same as nOutputPlane + val biasNumber = nOutputPlane + val kernelNumber = nOutputPlane + // TODO kernel channel equals to input channel now + val kernelChannel = inputChannel + + val inputOffset = input.storageOffset() - 1 + val outputOffset = output.storageOffset() - 1 + val biasOffset = bias.storageOffset() - 1 + val kernelOffset = weight.storageOffset() - 1 + + if (firstPass) { + ev.getType() match { + case "Double" => classPtr = MKL.ConvolutionInitDouble( + inputNumber, inputChannel, inputHeight, inputWidth, + kernelNumber, kernelChannel, kernelHeight, kernelWidth, strideHeight, strideWidth, padHeight, + padWidth, 4, groups) + case "Float" => classPtr = MKL.ConvolutionInitFloat( + inputNumber, inputChannel, inputHeight, inputWidth, + kernelNumber, kernelChannel, kernelHeight, kernelWidth, strideHeight, strideWidth, padHeight, + padWidth, 4, groups) + case _ => throw new UnsupportedOperationException(s"Only Float supported") + } + firstPass = false + } + + implicit def bool2int(b:Boolean) = if (b) 1 else 0 + val start = System.nanoTime() + ev.getType() match { + case "Double" => MKL.ConvolutionForwardDouble( + input.storage().array().asInstanceOf[Array[Double]], inputOffset, + output.storage().array().asInstanceOf[Array[Double]], outputOffset, + weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], biasOffset, + classPtr + ) + case "Float" => MKL.ConvolutionForwardFloat( + input.storage().array().asInstanceOf[Array[Float]], inputOffset, + output.storage().array().asInstanceOf[Array[Float]], outputOffset, + weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], biasOffset, + classPtr + ) + + case _ => throw new UnsupportedOperationException(s"Only Float supported") + } + //println("[SCALA] spatialconvolution forward call JNI " + (System.nanoTime() - start) / 1e6) + + output + } + + override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]) : Tensor[T] = { + require(input.nDimension() == 3 || input.nDimension() == 4, "Only support 3D or 4D input") + require(nOutputPlane == (if (input.nDimension() == 3) gradOutput.size(1) else gradOutput.size(2)), + "Number of output features is not equal to nOutputPlane") + require(input.isContiguous(), "input is not contiguous") + require(gradInput.isContiguous(), "gradInput is not contiguous") + gradInput.resizeAs(input) + + val gradInputOffset = gradInput.storageOffset() - 1 + val gradKernelOffset = gradWeight.storageOffset() - 1 + val gradOutputOffset = gradOutput.storageOffset() - 1 + val gradBiasOffset = gradBias.storageOffset() - 1 + + // +---------+-------+-------+ + // | | 3-dim | 4-dim | + // +=========+=======+=======+ + // | Number | ? | 1 | + // +---------+-------+-------+ + // | Channel | 1 | 2 | + // +---------+-------+-------+ + // | Height | 2 | 3 | + // +---------+-------+-------+ + // | Width | 3 | 4 | + // +---------+-------+-------+ + // Table: Index of 3-dim/4-dim input + + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) + val inputChannel = input.size(input.dim() - 2) + // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 + val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3) + + val kernelNumber = nOutputPlane + val kernelChannel = inputChannel + + val inputOffset = input.storageOffset() - 1 + val biasOffset = bias.storageOffset() - 1 + val kernelOffset = weight.storageOffset() - 1 + + implicit def bool2int(b:Boolean) = if (b) 1 else 0 + val start = System.nanoTime() + if (needCompute) { + ev.getType() match { + case "Double" => MKL.ConvolutionBackwardDataDouble( + input.storage().array().asInstanceOf[Array[Double]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Double]], gradInputOffset, + weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], biasOffset, classPtr + ) + case "Float" => MKL.ConvolutionBackwardDataFloat( + input.storage().array().asInstanceOf[Array[Float]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Float]], gradInputOffset, + weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], biasOffset, classPtr + ) + + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + } + ev.getType() match { + case "Double" => + MKL.ConvolutionBackwardKernelDouble( + input.storage().array().asInstanceOf[Array[Double]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, + gradWeight.storage().array().asInstanceOf[Array[Double]], gradKernelOffset, + weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], biasOffset, classPtr + ) + case "Float" => + MKL.ConvolutionBackwardKernelFloat( + input.storage().array().asInstanceOf[Array[Float]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, + gradWeight.storage().array().asInstanceOf[Array[Float]], gradKernelOffset, + weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], biasOffset, classPtr + ) + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + ev.getType() match { + case "Double" => + MKL.ConvolutionBackwardBiasDouble( + input.storage().array().asInstanceOf[Array[Double]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, + gradBias.storage().array().asInstanceOf[Array[Double]], gradBiasOffset, + weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], biasOffset, classPtr + ) + + case "Float" => + MKL.ConvolutionBackwardBiasFloat( + input.storage().array().asInstanceOf[Array[Float]], inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, + gradBias.storage().array().asInstanceOf[Array[Float]], gradBiasOffset, + weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], biasOffset, classPtr + ) + + case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + } + //println("[SCALA] spatialconvolution backward call JNI " + (System.nanoTime() - start) / 1e6) + gradInput + } + + override def updateParameters(learningRate:T): Unit ={ + weight.map(gradWeight, (a, b)=>ev.minus(a, ev.times(learningRate,b))) + bias.map(gradBias,(a,b)=>ev.minus(a, ev.times(learningRate,b))) + } + + override def zeroGradParameters(): Unit = { + gradWeight.zero() + gradBias.zero() + } + + override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) ={ + (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias)) + } + + override def equals(obj : Any) : Boolean = { + if(!super.equals(obj)) { + return false + } + + if(!obj.isInstanceOf[SpatialConvolution[T]]) + return false + val other = obj.asInstanceOf[SpatialConvolution[T]] + if(this.eq(other)) + return true + + nInputPlane == other.nInputPlane && + nOutputPlane == other.nOutputPlane && + kernelWidth == other.kernelWidth && + kernelHeight == other.kernelHeight && + strideWidth == other.strideWidth && + strideHeight == other.strideHeight && + padWidth == other.padWidth && + padHeight == other.padHeight && + weight == other.weight && + bias == other.bias && + gradWeight == other.gradWeight && + gradBias == other.gradBias + } + + override def toString() : String = { + s"mkl.SpatialConvolution($nInputPlane -> $nOutputPlane, $kernelWidth x $kernelHeight, $strideWidth, $strideHeight, $padWidth, $padHeight)" + } + + override def findModel(paramOffset : Int, indexes : Array[Int]) : (Module[T], Int, Array[Int]) = { + (this, paramOffset - nOutputPlane * nInputPlane * kernelHeight * kernelWidth - nOutputPlane, indexes) + } + + /*mkl-dnn's convolution_backward has done updateGradInput and accGradParameters, so accGradParameters does nothing + * + override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { + backward(input, gradOutput) + } + */ + + override def accGradParameters(input: Tensor[T], gradOutput: Tensor[T], scale: Double = 1.0): Unit = { + + } +} + diff --git a/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java b/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java index 42e19c689b0..08cb838f9f7 100644 --- a/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java +++ b/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java @@ -83,4 +83,182 @@ private static File file(String path) throws IOException { String name = new File(path).getName(); return createTempFile("jniloader", name); } + + /* Convolution API */ + public native static long ConvolutionInitFloat( + int inputNumber, int inputChannel, int inputHeight, int inputWidth, + int kernelNumber, int kernelChannel, int kernelHeight, int kernelWidth, + int strideHeight, int strideWidth, int padHeight, int padWidth, + int dimension, int groups); + public native static void ConvolutionForwardFloat( + float[] input, int inputOffset, float[] output, int outputOffset, + float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr); + public native static void ConvolutionBackwardDataFloat( + float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset, + float[] gradInput, int gradInputOffset, + float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr); + public native static void ConvolutionBackwardKernelFloat( + float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset, + float[] gradKernel, int gradKernelOffset, + float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr); + public native static void ConvolutionBackwardBiasFloat( + float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset, + float[] gradBias, int gradBiasOffset, + float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr); + + public native static long ConvolutionInitDouble( + int inputNumber, int inputChannel, int inputHeight, int inputWidth, + int kernelNumber, int kernelChannel, int kernelHeight, int kernelWidth, + int strideHeight, int strideWidth, int padHeight, int padWidth, + int dimension, int groups); + public native static void ConvolutionForwardDouble( + double[] input, int inputOffset, double[] output, int outputOffset, + double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr); + public native static void ConvolutionBackwardDataDouble( + double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset, + double[] gradInput, int gradInputOffset, + double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr); + public native static void ConvolutionBackwardKernelDouble( + double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset, + double[] gradKernel, int gradKernelOffset, + double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr); + public native static void ConvolutionBackwardBiasDouble( + double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset, + double[] gradBias, int gradBiasOffset, + double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr); + + /* ReLU API */ + public native static long ReLUInitFloat( + int inputNumber, int inputChannel, int inputHeight, int inputWidth, int dimension); + public native static void ReLUForwardFloat( + float[] input, int inputOffset, float[] output, int outputOffset, long classPtr); + public native static void ReLUBackwardFloat( + float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset, + float[] gradInput, int gradInputOffset, long classPtr); + + public native static long ReLUInitDouble( + int inputNumber, int inputChannel, int inputHeight, int inputWidth, int dimension); + public native static void ReLUForwardDouble( + double[] input, int inputOffset, double[] output, int outputOffset, long classPtr); + public native static void ReLUBackwardDouble( + double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset, + double[] gradInput, int gradInputOffset, long classPtr); + + /* Pooling API */ + public native static long PoolingInitFloat( + int inputNumber, int inputChannel, int inputHeight, int inputWidth, + int kernelHeight, int kernelWidth, int strideHeight, int strideWidth, + int padHeight, int padWidth, int dimension, int ceilMode, + int algorithm); + public native static void PoolingForwardFloat( + float[] input, int inputOffset, float[] output, int outputOffset, + long classPtr); + public native static void PoolingBackwardFloat( + float[] input, int inputOffset, float[] outputDiff, + int outputDiffOffset, float[] inputDiff, int inputDiffOffset, + long classPtr); + + public native static long PoolingInitDouble( + int inputNumber, int inputChannel, int inputHeight, int inputWidth, + int kernelHeight, int kernelWidth, int strideHeight, int strideWidth, + int padHeight, int padWidth, int dimension, int ceilMode, + int algorithm); + public native static void PoolingForwardDouble( + double[] input, int inputOffset, double[] output, int outputOffset, + long classPtr); + public native static void PoolingBackwardDouble( + double[] input, int inputOffset, double[] outputDiff, + int outputDiffOffset, double[] inputDiff, int inputDiffOffset, + long classPtr); + + /* Batch Normalization */ + public native static long BatchNormInitFloat( + int inputNumber, int inputChannel, int inputHeight, int inputWidth, + double eps, int useKernel, int useBias, + int dimension); + public native static void BatchNormForwardFloat( + float[] input, int inputOffset, float[] output, int outputOffset, + float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr); + public native static void BatchNormBackwardFloat( + float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset, + float[] gradInput, int gradInputOffset, + float[] kernelDiff, int kernelDiffOffset, float[] biasDiff, int biasDiffOffset, long classPtr); + + public native static long BatchNormInitDouble( + int inputNumber, int inputChannel, int inputHeight, int inputWidth, + double eps, int useKernel, int useBias, + int dimension); + public native static void BatchNormForwardDouble( + double[] input, int inputOffset, double[] output, int outputOffset, + double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr); + public native static void BatchNormBackwardDouble( + double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset, + double[] gradInput, int gradInputOffset, + double[] kernelDiff, int kernelDiffOffset, double[] biasDiff, int biasDiffOffset, long classPtr); + + /* LRN API*/ + public native static long LRNInitFloat(int inputNumber, int inputChannel, int inputHeight, int inputWidth, + int size, float alpha, float beta, float k, int dimension); + public native static void LRNForwardFloat(float[] input, int inputOffset, float[] output, int outputOffset, long classPtr); + public native static void LRNBackwardFloat(float[] input, int inputOffset, + float[] outputDiff, int outputOffsetDiff, + float[] inputDiff, int inputDiffOffset, + long classPtr); + public native static long LRNInitDouble(int inputNumber, int inputChannel, int inputHeight, int inputWidth, + int size, double alpha, double beta, double k, int dimension); + public native static void LRNForwardDouble(double[] input, int inputOffset, double[] output, int outputOffset, long classPtr); + public native static void LRNBackwardDouble(double[] input, int inputOffset, + double[] outputDiff, int outputOffsetDiff, + double[] inputDiff, int inputDiffOffset, + long classPtr); + + + /* Init MKL Model */ + public native static void SetPrevFloat(long prev, long current); + public native static void SetPrevDouble(long prev, long current); + + /* Delete all memmory allocated */ + public native static void ReleaseAllMemFloat(long classPtr); + public native static void ReleaseAllMemDouble(long classPtr); + + + // TODO + /* Linear API */ + public native static long LinearInitFloat( + int inputHeight, int inputWidth, int outputChannel, + int kernelHeight, int kernelWidth); + public native static void LinearForwardFloat( + float[] input, int inputOffset, float[] output, int outputOffset, + float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr); + public native static void LinearBackwardDataFloat( + float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset, + float[] gradInput, int gradInputOffset, + float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr); + public native static void LinearBackwardKernelFloat( + float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset, + float[] gradKernel, int gradKernelOffset, + float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr); + public native static void LinearBackwardBiasFloat( + float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset, + float[] gradBias, int gradBiasOffset, + float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr); + + public native static long LinearInitDouble( + int inputHeight, int inputWidth, int outputChannel, + int kernelHeight, int kernelWidth); + public native static void LinearForwardDouble( + double[] input, int inputOffset, double[] output, int outputOffset, + double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr); + public native static void LinearBackwardDataDouble( + double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset, + double[] gradInput, int gradInputOffset, + double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr); + public native static void LinearBackwardKernelDouble( + double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset, + double[] gradKernel, int gradKernelOffset, + double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr); + public native static void LinearBackwardBiasDouble( + double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset, + double[] gradBias, int gradBiasOffset, + double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr); } diff --git a/mkl/native/pom.xml b/mkl/native/pom.xml index 3f695449888..1eaad8f69e9 100644 --- a/mkl/native/pom.xml +++ b/mkl/native/pom.xml @@ -46,7 +46,16 @@ ${basedir}/src/main/c/jni - mkl.c + omp_threads.cpp + layer.cpp + convolution.cpp + pooling.cpp + lrn.cpp + linear.cpp + relu.cpp + batch_norm.cpp + utils.cpp + debug.cpp @@ -63,7 +72,11 @@ -fPIC -fopenmp -Wall - -std=c99 + -std=c++11 + -I ${JAVA_HOME}/include/ @@ -73,6 +86,8 @@ -lpthread -lm -lrt + -lrt + -lmkl_rt -shared diff --git a/mkl/native/src/main/c/jni/.clang-format b/mkl/native/src/main/c/jni/.clang-format new file mode 100644 index 00000000000..4c24541ff91 --- /dev/null +++ b/mkl/native/src/main/c/jni/.clang-format @@ -0,0 +1,90 @@ +--- +Language: Cpp +BasedOnStyle: llvm +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: true +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: true +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Linux +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: true +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +IncludeCategories: + - Regex: '^<.*\.h>' + Priority: 1 + - Regex: '^<.*' + Priority: 2 + - Regex: '.*' + Priority: 3 +IndentCaseLabels: true +IndentWidth: 2 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: false +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Left +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Auto +TabWidth: 8 +UseTab: Never +AlignConsecutiveAssignments: true +AlignOperands: true diff --git a/mkl/native/src/main/c/jni/MKLWrapper.h b/mkl/native/src/main/c/jni/MKLWrapper.h new file mode 100644 index 00000000000..09da9adee8d --- /dev/null +++ b/mkl/native/src/main/c/jni/MKLWrapper.h @@ -0,0 +1,471 @@ +#ifndef _MKLWARPPER_H +#define _MKLWARPPER_H +#include +#include +#include + +template +dnnError_t dnnGroupsConvolutionCreateForwardBias( + dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes, + dnnAlgorithm_t algorithm, size_t groups, size_t dimension, + const size_t srcSize[], const size_t dstSize[], const size_t filterSize[], + const size_t convolutionStrides[], const int inputOffset[], + const dnnBorder_t borderType) +{ + return dnnGroupsConvolutionCreateForwardBias_F32( + pConvolution, attributes, algorithm, groups, dimension, srcSize, dstSize, + filterSize, convolutionStrides, inputOffset, borderType); +} +template <> +dnnError_t dnnGroupsConvolutionCreateForwardBias( + dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes, + dnnAlgorithm_t algorithm, size_t groups, size_t dimension, + const size_t srcSize[], const size_t dstSize[], const size_t filterSize[], + const size_t convolutionStrides[], const int inputOffset[], + const dnnBorder_t borderType) +{ + return dnnGroupsConvolutionCreateForwardBias_F64( + pConvolution, attributes, algorithm, groups, dimension, srcSize, dstSize, + filterSize, convolutionStrides, inputOffset, borderType); +} + +template +dnnError_t dnnGroupsConvolutionCreateBackwardData( + dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes, + dnnAlgorithm_t algorithm, size_t groups, size_t dimension, + const size_t srcSize[], const size_t dstSize[], const size_t filterSize[], + const size_t convolutionStrides[], const int inputOffset[], + const dnnBorder_t borderType) +{ + return dnnGroupsConvolutionCreateBackwardData_F32( + pConvolution, attributes, algorithm, groups, dimension, srcSize, dstSize, + filterSize, convolutionStrides, inputOffset, borderType); +} +template <> +dnnError_t dnnGroupsConvolutionCreateBackwardData( + dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes, + dnnAlgorithm_t algorithm, size_t groups, size_t dimension, + const size_t srcSize[], const size_t dstSize[], const size_t filterSize[], + const size_t convolutionStrides[], const int inputOffset[], + const dnnBorder_t borderType) +{ + return dnnGroupsConvolutionCreateBackwardData_F64( + pConvolution, attributes, algorithm, groups, dimension, srcSize, dstSize, + filterSize, convolutionStrides, inputOffset, borderType); +} +template +dnnError_t dnnGroupsConvolutionCreateBackwardFilter( + dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes, + dnnAlgorithm_t algorithm, size_t groups, size_t dimension, + const size_t srcSize[], const size_t dstSize[], const size_t filterSize[], + const size_t convolutionStrides[], const int inputOffset[], + const dnnBorder_t borderType) +{ + return dnnGroupsConvolutionCreateBackwardFilter_F32( + pConvolution, attributes, algorithm, groups, dimension, srcSize, dstSize, + filterSize, convolutionStrides, inputOffset, borderType); +} +template <> +dnnError_t dnnGroupsConvolutionCreateBackwardFilter( + dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes, + dnnAlgorithm_t algorithm, size_t groups, size_t dimension, + const size_t srcSize[], const size_t dstSize[], const size_t filterSize[], + const size_t convolutionStrides[], const int inputOffset[], + const dnnBorder_t borderType) +{ + return dnnGroupsConvolutionCreateBackwardFilter_F64( + pConvolution, attributes, algorithm, groups, dimension, srcSize, dstSize, + filterSize, convolutionStrides, inputOffset, borderType); +} +template +dnnError_t dnnGroupsConvolutionCreateBackwardBias( + dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes, + dnnAlgorithm_t algorithm, size_t groups, size_t dimension, + const size_t dstSize[]) +{ + return dnnGroupsConvolutionCreateBackwardBias_F32( + pConvolution, attributes, algorithm, groups, dimension, dstSize); +} +template <> +dnnError_t dnnGroupsConvolutionCreateBackwardBias( + dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes, + dnnAlgorithm_t algorithm, size_t groups, size_t dimension, + const size_t dstSize[]) +{ + return dnnGroupsConvolutionCreateBackwardBias_F64( + pConvolution, attributes, algorithm, groups, dimension, dstSize); +} + +template +dnnError_t dnnExecute(dnnPrimitive_t primitive, void *resources[]) +{ + return dnnExecute_F32(primitive, resources); +} +template <> +dnnError_t dnnExecute(dnnPrimitive_t primitive, void *resources[]) +{ + return dnnExecute_F64(primitive, resources); +} + +template +dnnError_t dnnReLUCreateForward(dnnPrimitive_t *pRelu, + dnnPrimitiveAttributes_t attributes, + const dnnLayout_t dataLayout, + Type negativeSlope) +{ + return dnnReLUCreateForward_F32(pRelu, attributes, dataLayout, negativeSlope); +} +template <> +dnnError_t dnnReLUCreateForward(dnnPrimitive_t *pRelu, + dnnPrimitiveAttributes_t attributes, + const dnnLayout_t dataLayout, + double negativeSlope) +{ + return dnnReLUCreateForward_F64(pRelu, attributes, dataLayout, negativeSlope); +} +template +dnnError_t dnnReLUCreateBackward(dnnPrimitive_t *pRelu, + dnnPrimitiveAttributes_t attributes, + const dnnLayout_t diffLayout, + const dnnLayout_t dataLayout, + Type negativeSlope) +{ + return dnnReLUCreateBackward_F32(pRelu, attributes, diffLayout, dataLayout, + negativeSlope); +} +template <> +dnnError_t dnnReLUCreateBackward(dnnPrimitive_t *pRelu, + dnnPrimitiveAttributes_t attributes, + const dnnLayout_t diffLayout, + const dnnLayout_t dataLayout, + double negativeSlope) +{ + return dnnReLUCreateBackward_F64(pRelu, attributes, diffLayout, dataLayout, + negativeSlope); +} + +template +dnnError_t dnnLayoutCreate(dnnLayout_t *pLayout, size_t dimension, + const size_t size[], const size_t strides[]) +{ + return dnnLayoutCreate_F32(pLayout, dimension, size, strides); +} + +template <> +dnnError_t dnnLayoutCreate(dnnLayout_t *pLayout, size_t dimension, + const size_t size[], const size_t strides[]) +{ + return dnnLayoutCreate_F64(pLayout, dimension, size, strides); +} + +template +dnnError_t dnnPoolingCreateForward( + dnnPrimitive_t *pPooling, dnnPrimitiveAttributes_t attributes, + dnnAlgorithm_t op, const dnnLayout_t srcLayout, const size_t kernelSize[], + const size_t kernelStride[], const int inputOffset[], + const dnnBorder_t borderType) +{ + return dnnPoolingCreateForward_F32(pPooling, attributes, op, srcLayout, + kernelSize, kernelStride, inputOffset, + borderType); +} + +template <> +dnnError_t dnnPoolingCreateForward( + dnnPrimitive_t *pPooling, dnnPrimitiveAttributes_t attributes, + dnnAlgorithm_t op, const dnnLayout_t srcLayout, const size_t kernelSize[], + const size_t kernelStride[], const int inputOffset[], + const dnnBorder_t borderType) +{ + return dnnPoolingCreateForward_F64(pPooling, attributes, op, srcLayout, + kernelSize, kernelStride, inputOffset, + borderType); +} + +template +dnnError_t dnnPoolingCreateBackward( + dnnPrimitive_t *pPooling, dnnPrimitiveAttributes_t attributes, + dnnAlgorithm_t op, const dnnLayout_t srcLayout, const size_t kernelSize[], + const size_t kernelStride[], const int inputOffset[], + const dnnBorder_t borderType) +{ + return dnnPoolingCreateBackward_F32(pPooling, attributes, op, srcLayout, + kernelSize, kernelStride, inputOffset, + borderType); +} + +template <> +dnnError_t dnnPoolingCreateBackward( + dnnPrimitive_t *pPooling, dnnPrimitiveAttributes_t attributes, + dnnAlgorithm_t op, const dnnLayout_t srcLayout, const size_t kernelSize[], + const size_t kernelStride[], const int inputOffset[], + const dnnBorder_t borderType) +{ + return dnnPoolingCreateBackward_F64(pPooling, attributes, op, srcLayout, + kernelSize, kernelStride, inputOffset, + borderType); +} + +template +dnnError_t dnnLayoutCreateFromPrimitive(dnnLayout_t *pLayout, + const dnnPrimitive_t primitive, + dnnResourceType_t type) +{ + return dnnLayoutCreateFromPrimitive_F32(pLayout, primitive, type); +} + +template <> +dnnError_t dnnLayoutCreateFromPrimitive(dnnLayout_t *pLayout, + const dnnPrimitive_t primitive, + dnnResourceType_t type) +{ + return dnnLayoutCreateFromPrimitive_F64(pLayout, primitive, type); +} + +template +dnnError_t dnnDelete(dnnPrimitive_t primitive) +{ + return dnnDelete_F32(primitive); +} + +template <> +dnnError_t dnnDelete(dnnPrimitive_t primitive) +{ + return dnnDelete_F64(primitive); +} + +template +dnnError_t dnnLayoutDelete(dnnLayout_t layout) +{ + return dnnLayoutDelete_F32(layout); +} +template <> +dnnError_t dnnLayoutDelete(dnnLayout_t layout) +{ + return dnnLayoutDelete_F64(layout); +} + +template +int dnnLayoutCompare(const dnnLayout_t L1, const dnnLayout_t L2) +{ + return dnnLayoutCompare_F32(L1, L2); +} +template <> +int dnnLayoutCompare(const dnnLayout_t L1, const dnnLayout_t L2) +{ + return dnnLayoutCompare_F64(L1, L2); +} + +template +size_t dnnLayoutGetMemorySize(const dnnLayout_t Layout) +{ + return dnnLayoutGetMemorySize_F32(Layout); +} +template <> +size_t dnnLayoutGetMemorySize(const dnnLayout_t Layout) +{ + return dnnLayoutGetMemorySize_F64(Layout); +} + +template +dnnError_t dnnAllocateBuffer(void **pPtr, dnnLayout_t layout) +{ + return dnnAllocateBuffer_F32(pPtr, layout); +} +template <> +dnnError_t dnnAllocateBuffer(void **pPtr, dnnLayout_t layout) +{ + return dnnAllocateBuffer_F64(pPtr, layout); +} + +template +dnnError_t dnnConversionCreate(dnnPrimitive_t *pConversion, + const dnnLayout_t from, const dnnLayout_t to) +{ + return dnnConversionCreate_F32(pConversion, from, to); +} +template <> +dnnError_t dnnConversionCreate(dnnPrimitive_t *pConversion, + const dnnLayout_t from, + const dnnLayout_t to) +{ + return dnnConversionCreate_F64(pConversion, from, to); +} + +template +dnnError_t dnnReleaseBuffer(void *pPtr) +{ + return dnnReleaseBuffer_F32(pPtr); +} +template <> +dnnError_t dnnReleaseBuffer(void *pPtr) +{ + return dnnReleaseBuffer_F64(pPtr); +} + +template +dnnError_t dnnBatchNormalizationCreateForward( + dnnPrimitive_t *pBatchNormalization, dnnPrimitiveAttributes_t attributes, + const dnnLayout_t dataLayout, float eps) +{ + return dnnBatchNormalizationCreateForward_F32(pBatchNormalization, attributes, + dataLayout, eps); +} + +template <> +dnnError_t dnnBatchNormalizationCreateForward( + dnnPrimitive_t *pBatchNormalization, dnnPrimitiveAttributes_t attributes, + const dnnLayout_t dataLayout, float eps) +{ + return dnnBatchNormalizationCreateForward_F64(pBatchNormalization, attributes, + dataLayout, eps); +} + +template +dnnError_t dnnBatchNormalizationCreateBackwardScaleShift( + dnnPrimitive_t *pBatchNormalization, dnnPrimitiveAttributes_t attributes, + const dnnLayout_t dataLayout, float eps) +{ + return dnnBatchNormalizationCreateBackwardScaleShift_F32( + pBatchNormalization, attributes, dataLayout, eps); +} + +template <> +dnnError_t dnnBatchNormalizationCreateBackwardScaleShift( + dnnPrimitive_t *pBatchNormalization, dnnPrimitiveAttributes_t attributes, + const dnnLayout_t dataLayout, float eps) +{ + return dnnBatchNormalizationCreateBackwardScaleShift_F64( + pBatchNormalization, attributes, dataLayout, eps); +} + +template +dnnError_t dnnBatchNormalizationCreateBackwardData( + dnnPrimitive_t *pBatchNormalization, dnnPrimitiveAttributes_t attributes, + const dnnLayout_t dataLayout, float eps) +{ + return dnnBatchNormalizationCreateBackwardData_F32( + pBatchNormalization, attributes, dataLayout, eps); +} + +template <> +dnnError_t dnnBatchNormalizationCreateBackwardData( + dnnPrimitive_t *pBatchNormalization, dnnPrimitiveAttributes_t attributes, + const dnnLayout_t dataLayout, float eps) +{ + return dnnBatchNormalizationCreateBackwardData_F64( + pBatchNormalization, attributes, dataLayout, eps); +} + +template +dnnError_t dnnLRNCreateForward(dnnPrimitive_t *pLrn, + dnnPrimitiveAttributes_t attributes, + const dnnLayout_t dataLayout, size_t kernelSie, + float alpha, float beta, float k) +{ + return dnnLRNCreateForward_F32(pLrn, attributes, dataLayout, kernelSie, alpha, + beta, k); +} + +template <> +dnnError_t dnnLRNCreateForward(dnnPrimitive_t *pLrn, + dnnPrimitiveAttributes_t attributes, + const dnnLayout_t dataLayout, + size_t kernelSie, float alpha, + float beta, float k) +{ + return dnnLRNCreateForward_F64(pLrn, attributes, dataLayout, kernelSie, alpha, + beta, k); +} + +template +dnnError_t dnnLRNCreateBackward(dnnPrimitive_t *pLrn, + dnnPrimitiveAttributes_t attributes, + const dnnLayout_t diffLayout, + const dnnLayout_t dataLayout, size_t kernelSize, + float alpha, float beta, float k) +{ + return dnnLRNCreateBackward_F32(pLrn, attributes, diffLayout, dataLayout, + kernelSize, alpha, beta, k); +} + +template <> +dnnError_t dnnLRNCreateBackward(dnnPrimitive_t *pLrn, + dnnPrimitiveAttributes_t attributes, + const dnnLayout_t diffLayout, + const dnnLayout_t dataLayout, + size_t kernelSize, float alpha, + float beta, float k) +{ + return dnnLRNCreateBackward_F64(pLrn, attributes, diffLayout, dataLayout, + kernelSize, alpha, beta, k); +} + +template +dnnError_t dnnInnerProductCreateForwardBias(dnnPrimitive_t *pInnerProduct, + dnnPrimitiveAttributes_t attributes, + size_t dimentions, + const size_t srcSize[], + size_t outputChannels) +{ + return dnnInnerProductCreateForwardBias_F32( + pInnerProduct, attributes, dimentions, srcSize, outputChannels); +} +template <> +dnnError_t dnnInnerProductCreateForwardBias( + dnnPrimitive_t *pInnerProduct, dnnPrimitiveAttributes_t attributes, + size_t dimentions, const size_t srcSize[], size_t outputChannels) +{ + return dnnInnerProductCreateForwardBias_F64( + pInnerProduct, attributes, dimentions, srcSize, outputChannels); +} + +template +dnnError_t dnnInnerProductCreateBackwardData( + dnnPrimitive_t *pInnerProduct, dnnPrimitiveAttributes_t attributes, + size_t dimentions, const size_t srcSize[], size_t outputChannels) +{ + return dnnInnerProductCreateBackwardData_F32( + pInnerProduct, attributes, dimentions, srcSize, outputChannels); +} +template <> +dnnError_t dnnInnerProductCreateBackwardData( + dnnPrimitive_t *pInnerProduct, dnnPrimitiveAttributes_t attributes, + size_t dimentions, const size_t srcSize[], size_t outputChannels) +{ + return dnnInnerProductCreateBackwardData_F64( + pInnerProduct, attributes, dimentions, srcSize, outputChannels); +} +template +dnnError_t dnnInnerProductCreateBackwardFilter( + dnnPrimitive_t *pInnerProduct, dnnPrimitiveAttributes_t attributes, + size_t dimentions, const size_t srcSize[], size_t outputChannels) +{ + return dnnInnerProductCreateBackwardFilter_F32( + pInnerProduct, attributes, dimentions, srcSize, outputChannels); +} +template <> +dnnError_t dnnInnerProductCreateBackwardFilter( + dnnPrimitive_t *pInnerProduct, dnnPrimitiveAttributes_t attributes, + size_t dimentions, const size_t srcSize[], size_t outputChannels) +{ + return dnnInnerProductCreateBackwardFilter_F64( + pInnerProduct, attributes, dimentions, srcSize, outputChannels); +} +template +dnnError_t dnnInnerProductCreateBackwardBias( + dnnPrimitive_t *pInnerProduct, dnnPrimitiveAttributes_t attributes, + size_t dimentions, const size_t dstSize[]) +{ + return dnnInnerProductCreateBackwardBias_F32(pInnerProduct, attributes, + dimentions, dstSize); +} +template <> +dnnError_t dnnInnerProductCreateBackwardBias( + dnnPrimitive_t *pInnerProduct, dnnPrimitiveAttributes_t attributes, + size_t dimentions, const size_t dstSize[]) +{ + return dnnInnerProductCreateBackwardBias_F64(pInnerProduct, attributes, + dimentions, dstSize); +} +#endif diff --git a/mkl/native/src/main/c/jni/batch_norm.cpp b/mkl/native/src/main/c/jni/batch_norm.cpp new file mode 100644 index 00000000000..c648e5c5ef1 --- /dev/null +++ b/mkl/native/src/main/c/jni/batch_norm.cpp @@ -0,0 +1,428 @@ +#include + +#include "debug.h" +#include "layer.h" +#include "memory.h" +#include "utils.h" + +template +class MKLBatchNorm : public MKLLayer +{ + public: + MKLBatchNorm(); + ~MKLBatchNorm(); + + void init(size_t inputNumber, size_t inputChannel, size_t inputHeight, + size_t inputWidth, double eps, int useKernel, int useBias, + int dimension); + + void updateOutput(DType *input, DType *output); + void updateGradInput(DType *input, DType *gradOutput, DType *gradInput); + + void setKernel(DType *ptr); + void setBias(DType *ptr); + void setGradKernel(DType *ptr); + void setGradBias(DType *ptr); + + private: + // this method is not the same as createMklLayout in MKLMemory + void firstPass(); + void preExecute(DType *input); + + std::shared_ptr> scaleShift; + std::shared_ptr> workspace; + + size_t inputSize[4]; + size_t inputStrides[4]; + + size_t outputSize[4]; + size_t outputStrides[4]; + + double eps; + bool useKernel; + bool useBias; + + DType *kernel; + DType *bias; + DType *gradKernel; + DType *gradBias; + + dnnPrimitive_t scaleShiftPrim; +}; + +template +MKLBatchNorm::MKLBatchNorm() + : scaleShift(new MKLData), + workspace(new MKLData), + kernel(NULL), + bias(NULL), + gradKernel(NULL), + gradBias(NULL), + scaleShiftPrim(NULL) +{ + eps = 0.00001; +} + +template +MKLBatchNorm::~MKLBatchNorm() +{ + dnnDelete(scaleShiftPrim); +} + +template +void MKLBatchNorm::setKernel(DType *ptr) +{ + kernel = ptr; +} +template +void MKLBatchNorm::setBias(DType *ptr) +{ + bias = ptr; +} +template +void MKLBatchNorm::setGradKernel(DType *ptr) +{ + gradKernel = ptr; +} +template +void MKLBatchNorm::setGradBias(DType *ptr) +{ + gradBias = ptr; +} + +template +void MKLBatchNorm::init(size_t inputNumber, size_t inputChannel, + size_t inputHeight, size_t inputWidth, + double eps, int useKernel, int useBias, + int dimension) +{ + this->dimension = dimension; + + inputSize[0] = inputWidth; + inputSize[1] = inputHeight; + inputSize[2] = inputChannel; + inputSize[3] = inputNumber; + + inputStrides[0] = 1; + for (int i = 1; i < 4; i++) + inputStrides[i] = inputStrides[i - 1] * inputSize[i - 1]; + + // the output channel is as same as the number of kernel. + // and the output number must be as same as the number of input too. + outputSize[0] = inputWidth; + outputSize[1] = inputHeight; + outputSize[2] = inputChannel; + outputSize[3] = inputNumber; + + outputStrides[0] = 1; + for (int i = 1; i < 4; i++) + outputStrides[i] = outputStrides[i - 1] * outputSize[i - 1]; + + this->eps = eps; + this->useKernel = useKernel > 0 ? true : false; + this->useBias = useBias > 0 ? true : false; + + // create usr layout + this->input->createUsrLayout(dimension, inputSize, inputStrides); + this->output->createUsrLayout(dimension, outputSize, outputStrides); + + this->gradInput->createUsrLayout(dimension, inputSize, inputStrides); + this->gradOutput->createUsrLayout(dimension, outputSize, outputStrides); +} + +template +void MKLBatchNorm::firstPass() +{ + dnnError_t status = E_UNIMPLEMENTED; + dnnLayout_t layout; + + status = + dnnLayoutCreate(&layout, this->dimension, inputSize, inputStrides); + CHECK_EQ(status, E_SUCCESS); + + // forward + status = dnnBatchNormalizationCreateForward(&(this->forwardPrim), NULL, + layout, eps); + CHECK_EQ(status, E_SUCCESS); + + this->input->createMklLayout(this->forwardPrim, dnnResourceSrc); + this->output->createMklLayout(this->forwardPrim, dnnResourceDst); + + // backward data + status = dnnBatchNormalizationCreateBackwardData(&(this->backwardPrim), + NULL, layout, eps); + CHECK_EQ(status, E_SUCCESS); + + this->gradOutput->createMklLayout(this->backwardPrim, dnnResourceDiffDst); + this->gradInput->createMklLayout(this->backwardPrim, dnnResourceDiffSrc); + + // scaleshift + this->scaleShift->createMklLayout(this->forwardPrim, dnnResourceScaleShift); + this->scaleShift->createConversion(true); + if (useKernel) { + status = dnnBatchNormalizationCreateBackwardScaleShift( + &scaleShiftPrim, NULL, layout, eps); + CHECK_EQ(status, E_SUCCESS); + } + + // workspace + this->workspace->createMklLayout(this->forwardPrim, dnnResourceWorkspace); + this->workspace->createConversion(true); + + // we create the layout only at the first time + this->isFirstPass = false; + + // delte the layout + dnnLayoutDelete(layout); +} + +template +void MKLBatchNorm::preExecute(DType *input) +{ + this->input->createConversion(); +} + +template +void MKLBatchNorm::updateOutput(DType *input, DType *output) +{ + if (this->isFirstPass) firstPass(); + + // Because the address will change every time, so we need create conversion + // every forward/backward. + // TODO Should we set the kernel and bias address every time? + preExecute(input); + this->output->createConversion(); + + DType *ptr = reinterpret_cast(scaleShift->getData()); + + // pad the scale shift with kernel and bias + if (useKernel) { + for (int i = 0; i < inputSize[2]; i++) { + ptr[i] = kernel[i]; + if (useBias) + ptr[i + inputSize[2]] = bias[i]; + else + ptr[i + inputSize[2]] = 0; + } + } else { + for (int i = 0; i < inputSize[2]; i++) { + ptr[i] = 1.0; + ptr[i + inputSize[2]] = 0; + } + } +#ifdef DEBUG + printData(reinterpret_cast(this->input->getUsrData()), + this->inputSize[3], this->inputSize[2], this->inputSize[1], + this->inputSize[0], "Forward input"); +#endif + + dnnError_t status; + void *resources[dnnResourceNumber]; + + resources[dnnResourceSrc] = this->input->getConvertedData(); + resources[dnnResourceDst] = this->output->getData(); + resources[dnnResourceScaleShift] = scaleShift->getData(); + resources[dnnResourceWorkspace] = workspace->getData(); + + PERFSTART(); + status = dnnExecute(this->forwardPrim, resources); + PERFEND("main computing"); + CHECK_EQ(status, E_SUCCESS); + +#ifdef DEBUG + printData(reinterpret_cast(this->output->getData()), + outputSize[3], outputSize[2], outputSize[1], outputSize[0], + "Forward output"); +#endif + + if (!this->output->isUseNext()) { + this->output->backToUsr(); + } +} + +template +void MKLBatchNorm::updateGradInput(DType *input, DType *gradOutput, + DType *gradInput) +{ + dnnError_t status; + void *resources[dnnResourceNumber]; + + preExecute(input); + + this->gradOutput->createConversion(); + this->gradInput->createConversion(); + + resources[dnnResourceDiffDst] = this->gradOutput->getConvertedData(); + resources[dnnResourceDiffSrc] = this->gradInput->getData(); + resources[dnnResourceSrc] = this->input->getConvertedData(); + resources[dnnResourceScaleShift] = scaleShift->getData(); + resources[dnnResourceWorkspace] = workspace->getData(); + + // 4. main computing parts. + PERFSTART(); + status = dnnExecute(this->backwardPrim, resources); + CHECK_EQ(status, E_SUCCESS); + PERFEND("main computing"); + + if (useKernel) { + void *diffRes[dnnResourceNumber]; + diffRes[dnnResourceDiffDst] = this->gradOutput->getConvertedData(); + diffRes[dnnResourceSrc] = this->input->getConvertedData(); + diffRes[dnnResourceDiffScaleShift] = scaleShift->getData(); + diffRes[dnnResourceWorkspace] = workspace->getData(); + + PERFSTART(); + status = dnnExecute(scaleShiftPrim, diffRes); + CHECK_EQ(status, E_SUCCESS); + PERFEND("weight and bias diff main computing"); + + DType *ptr = reinterpret_cast(scaleShift->getData()); + for (int i = 0; i < inputSize[2]; i++) { + gradKernel[i] = ptr[i]; + if (useBias) { + gradBias[i] = ptr[i + inputSize[2]]; + } + } + } + + if (!this->gradInput->isUsePrev()) { + this->gradInput->backToUsr(); + } + +#ifdef DEBUG + printData(reinterpret_cast(this->gradInput->getUsrData()), + inputSize[3], inputSize[2], inputSize[1], inputSize[0], + "backward gradient input"); +#endif +} + +template +jlong JNIBatchNormInit(JNIEnv *env, jclass thisClass, jint inputNumber, + jint inputChannel, jint inputHeight, jint inputWidth, + double eps, jint useKernel, jint useBias, jint dimension) +{ + MKLBatchNorm *ptr = new MKLBatchNorm(); + ptr->init(inputNumber, inputChannel, inputHeight, inputWidth, eps, useKernel, + useBias, dimension); + + return reinterpret_cast(ptr); +} + +template +void JNIBatchNormUpdateOutput(JNIEnv *env, jclass thisClass, ArrayType input, + jint inputOffset, ArrayType output, + jint outputOffset, ArrayType kernel, + jint kernelOffset, ArrayType bias, + jint biasOffset, long classPtr) +{ + MKLBatchNorm *ptr = reinterpret_cast *>(classPtr); + + std::shared_ptr> jInput( + new ZipArray(env, input, inputOffset, ptr->input)); + + std::shared_ptr> jOutput( + new ZipArray(env, output, outputOffset, ptr->output)); + + std::shared_ptr> jKernel( + new ZipArray(env, kernel, kernelOffset, NULL)); + + std::shared_ptr> jBias( + new ZipArray(env, bias, biasOffset, NULL)); + + ptr->setKernel(jKernel->getPtr()); + ptr->setBias(jBias->getPtr()); + + ptr->updateOutput(jInput->getPtr(), jOutput->getPtr()); +} + +template +void JNIBatchNormUpdateGradInput(JNIEnv *env, jclass thisClass, ArrayType input, + jint inputOffset, ArrayType outputDiff, + jint outputDiffOffset, ArrayType inputDiff, + jint inputDiffOffset, ArrayType kernelDiff, + jint kernelDiffOffset, ArrayType biasDiff, + jint biasDiffOffset, long classPtr) +{ + MKLBatchNorm *ptr = reinterpret_cast *>(classPtr); + std::shared_ptr> jInput( + new ZipArray(env, input, inputOffset, ptr->input)); + + std::shared_ptr> jOutputDiff( + new ZipArray(env, outputDiff, outputDiffOffset, + ptr->gradOutput)); + + std::shared_ptr> jInputDiff( + new ZipArray(env, inputDiff, inputDiffOffset, + ptr->gradInput)); + + std::shared_ptr> jKernelDiff( + new ZipArray(env, kernelDiff, kernelDiffOffset, NULL)); + + std::shared_ptr> jBiasDiff( + new ZipArray(env, biasDiff, biasDiffOffset, NULL)); + + ptr->setGradKernel(jKernelDiff->getPtr()); + ptr->setGradBias(jBiasDiff->getPtr()); + + ptr->updateGradInput(jInput->getPtr(), jOutputDiff->getPtr(), + jInputDiff->getPtr()); +} + +// Macro +#define BatchNormInit(DType, JType, JArrayType) \ + JNIEXPORT \ + jlong JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_BatchNormInit##DType( \ + JNIEnv *env, jclass thisClass, jint inputNumber, jint inputChannel, \ + jint inputHeight, jint inputWidth, jdouble eps, jint useKernel, \ + jint useBias, jint dimension) \ + { \ + return JNIBatchNormInit( \ + env, thisClass, inputNumber, inputChannel, inputHeight, inputWidth, \ + eps, useKernel, useBias, dimension); \ + } + +#define BatchNormForward(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL \ + Java_com_intel_analytics_sparkdl_mkl_MKL_BatchNormForward##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType output, jint outputOffset, JArrayType kernel, \ + jint kernelOffset, JArrayType bias, jint biasOffset, long classPtr) \ + { \ + JNIBatchNormUpdateOutput( \ + env, thisClass, input, inputOffset, output, outputOffset, kernel, \ + kernelOffset, bias, biasOffset, classPtr); \ + } + +#define BatchNormBackward(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL \ + Java_com_intel_analytics_sparkdl_mkl_MKL_BatchNormBackward##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType outputDiff, jint outputDiffOffset, JArrayType inputDiff, \ + jint inputDiffOffset, JArrayType kernelDiff, jint kernelDiffOffset, \ + JArrayType biasDiff, jint biasDiffOffset, long classPtr) \ + { \ + JNIBatchNormUpdateGradInput( \ + env, thisClass, input, inputOffset, outputDiff, outputDiffOffset, \ + inputDiff, inputDiffOffset, kernelDiff, kernelDiffOffset, biasDiff, \ + biasDiffOffset, classPtr); \ + } + +#ifdef __cplusplus +extern "C" { +#endif + +// double +BatchNormInit(Double, jdouble, jdoubleArray); +BatchNormForward(Double, jdouble, jdoubleArray); +BatchNormBackward(Double, jdouble, jdoubleArray); + +// float +BatchNormInit(Float, jfloat, jfloatArray); +BatchNormForward(Float, jfloat, jfloatArray); +BatchNormBackward(Float, jfloat, jfloatArray); + +#ifdef __cplusplus +} +#endif diff --git a/mkl/native/src/main/c/jni/convolution.cpp b/mkl/native/src/main/c/jni/convolution.cpp new file mode 100644 index 00000000000..36c821ba7aa --- /dev/null +++ b/mkl/native/src/main/c/jni/convolution.cpp @@ -0,0 +1,580 @@ +#include + +#include "debug.h" +#include "layer.h" +#include "memory.h" +#include "utils.h" + +static int getMKLBuildDate() +{ + static int build = 0; + if (build == 0) { + MKLVersion v; + mkl_get_version(&v); + build = atoi(v.Build); + } + return build; +} + +template +class MKLConvolution : public MKLLayer +{ + public: + MKLConvolution(); + ~MKLConvolution(); + + void init(size_t inputNumber, size_t inputChannel, size_t inputHeight, + size_t inputWidth, size_t kernelNumber, size_t kernelChannel, + size_t kernelHeight, size_t kernelWidth, size_t strideHeight, + size_t strideWidth, int padHeight, int padWidth, int dimension, + int groups); + + void updateOutput(DType *input, DType *output); + void updateGradInput(DType *input, DType *gradOutput, DType *gradInput); + void updateGradKernel(DType *input, DType *gradOutput, DType *gradKernel); + void updateGradBias(DType *input, DType *gradOutput, DType *gradBias); + + std::shared_ptr> kernel; + std::shared_ptr> bias; + + std::shared_ptr> gradKernel; + std::shared_ptr> gradBias; + + private: + // this method is not the same as createMklLayout in MKLMemory + void firstPass(); + void preExecute(DType *input); + + DType *kernelAdr; + DType *biasAdr; + + dnnPrimitive_t kernelPrim, biasPrim; + + size_t groups; + + size_t inputSize[4]; + size_t inputStrides[4]; + + size_t outputSize[4]; + size_t outputStrides[4]; + + size_t kernelDimension; + size_t kernelSize[5]; + size_t kernelStrides[5]; + + size_t biasSize[1]; + size_t biasStrides[1]; + + size_t stride[2]; + int pad[2]; +}; + +template +MKLConvolution::MKLConvolution() + : kernel(new MKLData), + bias(new MKLData), + gradKernel(new MKLData), + gradBias(new MKLData), + kernelAdr(NULL), + biasAdr(NULL), + kernelPrim(NULL), + biasPrim(NULL) +{ +} + +template +MKLConvolution::~MKLConvolution() +{ + dnnDelete(kernelPrim); + dnnDelete(biasPrim); +} + +template +void MKLConvolution::init(size_t inputNumber, size_t inputChannel, + size_t inputHeight, size_t inputWidth, + size_t kernelNumber, size_t kernelChannel, + size_t kernelHeight, size_t kernelWidth, + size_t strideHeight, size_t strideWidth, + int padHeight, int padWidth, int dimension, + int groups) +{ + this->dimension = dimension; + this->groups = groups; + + inputSize[0] = inputWidth; + inputSize[1] = inputHeight; + inputSize[2] = inputChannel; + inputSize[3] = inputNumber; + + inputStrides[0] = 1; + for (int i = 1; i < 4; i++) + inputStrides[i] = inputStrides[i - 1] * inputSize[i - 1]; + + size_t outputWidth = + computeOut(inputWidth, padWidth, kernelWidth, strideWidth, false); + size_t outputHeight = + computeOut(inputHeight, padHeight, kernelHeight, strideHeight, false); + + // the output channel is as same as the number of kernel. + // and the output number must be as same as the number of input too. + outputSize[0] = outputWidth; + outputSize[1] = outputHeight; + outputSize[2] = kernelNumber; + outputSize[3] = inputNumber; + + outputStrides[0] = 1; + for (int i = 1; i < 4; i++) + outputStrides[i] = outputStrides[i - 1] * outputSize[i - 1]; + + // comes from IntelCaffe. + size_t groupsMKL = groups; + kernelDimension = this->dimension + (groups != 1); + if (getMKLBuildDate() < 20160701) { + kernelDimension = this->dimension; + groupsMKL = 1; + } + + kernelSize[0] = kernelWidth; + kernelSize[1] = kernelHeight; + kernelSize[2] = kernelChannel / groups; + kernelSize[3] = kernelNumber / groupsMKL; + kernelSize[4] = groupsMKL; + + kernelStrides[0] = 1; + for (int i = 1; i < 5; i++) + kernelStrides[i] = kernelStrides[i - 1] * kernelSize[i - 1]; + + biasSize[0] = kernelNumber; + biasStrides[0] = 1; + + stride[0] = strideWidth; + stride[1] = strideHeight; + + pad[0] = -padWidth; + pad[1] = -padHeight; + + // create usr layout + this->input->createUsrLayout(dimension, inputSize, inputStrides); + this->output->createUsrLayout(dimension, outputSize, outputStrides); + this->kernel->createUsrLayout(kernelDimension, kernelSize, kernelStrides); + this->bias->createUsrLayout(1, biasSize, biasStrides); + + this->gradInput->createUsrLayout(dimension, inputSize, inputStrides); + this->gradOutput->createUsrLayout(dimension, outputSize, outputStrides); + this->gradKernel->createUsrLayout(kernelDimension, kernelSize, kernelStrides); + // bias dimension is 1 + this->gradBias->createUsrLayout(1, biasSize, biasStrides); +} + +template +void MKLConvolution::firstPass() +{ + dnnError_t status = E_UNIMPLEMENTED; + // forward + status = dnnGroupsConvolutionCreateForwardBias( + &(this->forwardPrim), NULL, dnnAlgorithmConvolutionDirect, groups, + this->dimension, inputSize, outputSize, kernelSize, stride, pad, + dnnBorderZeros); + CHECK_EQ(status, E_SUCCESS); + + this->input->createMklLayout(this->forwardPrim, dnnResourceSrc); + this->output->createMklLayout(this->forwardPrim, dnnResourceDst); + this->kernel->createMklLayout(this->forwardPrim, dnnResourceFilter); + this->bias->createMklLayout(this->forwardPrim, dnnResourceBias); + + // backward data + status = dnnGroupsConvolutionCreateBackwardData( + &(this->backwardPrim), NULL, dnnAlgorithmConvolutionDirect, groups, + this->dimension, inputSize, outputSize, kernelSize, stride, pad, + dnnBorderZeros); + CHECK_EQ(status, E_SUCCESS); + + this->gradOutput->createMklLayout(this->backwardPrim, dnnResourceDiffDst); + this->gradInput->createMklLayout(this->backwardPrim, dnnResourceDiffSrc); + + // backward kernel + status = dnnGroupsConvolutionCreateBackwardFilter( + &kernelPrim, NULL, dnnAlgorithmConvolutionDirect, groups, this->dimension, + inputSize, outputSize, kernelSize, stride, pad, dnnBorderZeros); + CHECK_EQ(status, E_SUCCESS); + + this->gradKernel->createMklLayout(this->kernelPrim, dnnResourceDiffFilter); + + // backward bias + status = dnnGroupsConvolutionCreateBackwardBias( + &biasPrim, NULL, dnnAlgorithmConvolutionDirect, groups, this->dimension, + outputSize); + CHECK_EQ(status, E_SUCCESS); + + this->gradBias->createMklLayout(this->biasPrim, dnnResourceDiffBias); + + // we create the layout only at the first time + this->isFirstPass = false; +} + +template +void MKLConvolution::preExecute(DType *input) +{ + this->input->createConversion(); + this->kernel->createConversion(); + this->bias->createConversion(); +} + +template +void MKLConvolution::updateOutput(DType *input, DType *output) +{ + if (this->isFirstPass) firstPass(); + + // Because the address will change every time, so we need create conversion + // every forward/backward. + // TODO Should we set the kernel and bias address every time? + preExecute(input); + this->output->createConversion(); + +#ifdef DEBUG + printData(reinterpret_cast(this->input->getUsrData()), + this->inputSize[3], this->inputSize[2], this->inputSize[1], + this->inputSize[0], "Forward input"); +#endif + + dnnError_t status; + void *resources[dnnResourceNumber]; + + resources[dnnResourceFilter] = this->kernel->getConvertedData(); + resources[dnnResourceBias] = this->bias->getConvertedData(); + resources[dnnResourceSrc] = this->input->getConvertedData(); + resources[dnnResourceDst] = this->output->getData(); + + PERFSTART(); + status = dnnExecute(this->forwardPrim, resources); + PERFEND("main computing"); + CHECK_EQ(status, E_SUCCESS); + +#ifdef DEBUG + printData(reinterpret_cast(this->output->getData()), + outputSize[3], outputSize[2], outputSize[1], outputSize[0], + "Forward output"); +#endif + + if (!this->output->isUseNext()) { + this->output->backToUsr(); + } +} + +template +void MKLConvolution::updateGradInput(DType *input, DType *gradOutput, + DType *gradInput) +{ + dnnError_t status; + void *resources[dnnResourceNumber]; + + preExecute(input); + + this->gradOutput->createConversion(); + this->gradInput->createConversion(); + + resources[dnnResourceDiffDst] = this->gradOutput->getConvertedData(); + resources[dnnResourceFilter] = this->kernel->getConvertedData(); + resources[dnnResourceDiffSrc] = this->gradInput->getData(); + + // 4. main computing parts. + PERFSTART(); + status = dnnExecute(this->backwardPrim, resources); + CHECK_EQ(status, E_SUCCESS); + PERFEND("main computing"); + + if (!this->gradInput->isUsePrev()) { + this->gradInput->backToUsr(); + } + +#ifdef DEBUG + printData(reinterpret_cast(this->gradInput->getUsrData()), + inputSize[3], inputSize[2], inputSize[1], inputSize[0], + "backward gradient input"); +#endif +} +template +void MKLConvolution::updateGradKernel(DType *input, DType *gradOutput, + DType *gradKernel) +{ + dnnError_t status; + void *resources[dnnResourceNumber]; + + preExecute(input); + + this->gradOutput->createConversion(); + this->gradKernel->createConversion(); + + resources[dnnResourceDiffDst] = this->gradOutput->getConvertedData(); + resources[dnnResourceSrc] = this->input->getConvertedData(); + resources[dnnResourceDiffFilter] = this->gradKernel->getData(); + + // 4. main computing parts. + PERFSTART(); + status = dnnExecute(this->kernelPrim, resources); + CHECK_EQ(status, E_SUCCESS); + PERFEND("main computing"); + + // the kernel need not re-use for previous layer + this->gradKernel->backToUsr(); +} + +template +void MKLConvolution::updateGradBias(DType *input, DType *gradOutput, + DType *gradBias) +{ + dnnError_t status; + void *resources[dnnResourceNumber]; + + preExecute(input); + + this->gradOutput->createConversion(); + this->gradBias->createConversion(); + + resources[dnnResourceDiffDst] = this->gradOutput->getConvertedData(); + resources[dnnResourceDiffBias] = this->gradBias->getData(); + + // 4. main computing parts. + PERFSTART(); + status = dnnExecute(this->biasPrim, resources); + CHECK_EQ(status, E_SUCCESS); + PERFEND("main computing"); + + this->gradBias->backToUsr(); +} + +template +jlong JNIConvolutionInit(JNIEnv *env, jclass thisClass, jint inputNumber, + jint inputChannel, jint inputHeight, jint inputWidth, + jint kernelNumber, jint kernelChannel, + jint kernelHeight, jint kernelWidth, jint strideHeight, + jint strideWidth, jint padHeight, jint padWidth, + jint dimension, jint groups) +{ + MKLConvolution *conv = new MKLConvolution(); + conv->init(inputNumber, inputChannel, inputHeight, inputWidth, kernelNumber, + kernelChannel, kernelHeight, kernelWidth, strideHeight, + strideWidth, padHeight, padWidth, dimension, groups); + + return reinterpret_cast(conv); +} + +template +void JNIConvolutionUpdateOutput(JNIEnv *env, jclass thisClass, ArrayType input, + jint inputOffset, ArrayType output, + jint outputOffset, ArrayType kernel, + jint kernelOffset, ArrayType bias, + jint biasOffset, long classPtr) +{ + MKLConvolution *ptr = + reinterpret_cast *>(classPtr); + + std::shared_ptr> jInput( + new ZipArray(env, input, inputOffset, ptr->input)); + + std::shared_ptr> jOutput( + new ZipArray(env, output, outputOffset, ptr->output)); + + std::shared_ptr> jKernel( + new ZipArray(env, kernel, kernelOffset, ptr->kernel)); + + std::shared_ptr> jBias( + new ZipArray(env, bias, biasOffset, ptr->bias)); + + ptr->updateOutput(jInput->getPtr(), jOutput->getPtr()); +} + +template +void JNIConvolutionUpdateGradInput(JNIEnv *env, jclass thisClass, + ArrayType input, jint inputOffset, + ArrayType outputDiff, jint outputDiffOffset, + ArrayType inputDiff, jint inputDiffOffset, + ArrayType kernel, jint kernelOffset, + ArrayType bias, jint biasOffset, + long classPtr) +{ + MKLConvolution *ptr = + reinterpret_cast *>(classPtr); + std::shared_ptr> jInput( + new ZipArray(env, input, inputOffset, ptr->input)); + + std::shared_ptr> jOutputDiff( + new ZipArray(env, outputDiff, outputDiffOffset, + ptr->gradOutput)); + + std::shared_ptr> jInputDiff( + new ZipArray(env, inputDiff, inputDiffOffset, + ptr->gradInput)); + + std::shared_ptr> jKernel( + new ZipArray(env, kernel, kernelOffset, ptr->kernel)); + + std::shared_ptr> jBias( + new ZipArray(env, bias, biasOffset, ptr->bias)); + + ptr->updateGradInput(jInput->getPtr(), jOutputDiff->getPtr(), + jInputDiff->getPtr()); +} + +template +void JNIConvolutionUpdateGradKernel(JNIEnv *env, jclass thisClass, + ArrayType input, jint inputOffset, + ArrayType outputDiff, jint outputDiffOffset, + ArrayType kernelDiff, jint kernelDiffOffset, + ArrayType kernel, jint kernelOffset, + ArrayType bias, jint biasOffset, + long classPtr) +{ + MKLConvolution *ptr = + reinterpret_cast *>(classPtr); + + std::shared_ptr> jInput( + new ZipArray(env, input, inputOffset, ptr->input)); + + std::shared_ptr> jOutputDiff( + new ZipArray(env, outputDiff, outputDiffOffset, + ptr->gradOutput)); + + std::shared_ptr> jKernelDiff( + new ZipArray(env, kernelDiff, kernelDiffOffset, + ptr->gradKernel)); + + std::shared_ptr> jKernel( + new ZipArray(env, kernel, kernelOffset, ptr->kernel)); + + std::shared_ptr> jBias( + new ZipArray(env, bias, biasOffset, ptr->bias)); + + ptr->updateGradKernel(jInput->getPtr(), jOutputDiff->getPtr(), + jKernelDiff->getPtr()); +} + +template +void JNIConvolutionUpdateGradBias(JNIEnv *env, jclass thisClass, + ArrayType input, jint inputOffset, + ArrayType outputDiff, jint outputDiffOffset, + ArrayType biasDiff, jint biasDiffOffset, + ArrayType kernel, jint kernelOffset, + ArrayType bias, jint biasOffset, + long classPtr) +{ + MKLConvolution *ptr = + reinterpret_cast *>(classPtr); + + std::shared_ptr> jInput( + new ZipArray(env, input, inputOffset, ptr->input)); + + std::shared_ptr> jOutputDiff( + new ZipArray(env, outputDiff, outputDiffOffset, + ptr->gradOutput)); + + std::shared_ptr> jBiasDiff( + new ZipArray(env, biasDiff, biasDiffOffset, + ptr->gradBias)); + + std::shared_ptr> jKernel( + new ZipArray(env, kernel, kernelOffset, ptr->kernel)); + + std::shared_ptr> jBias( + new ZipArray(env, bias, biasOffset, ptr->bias)); + + ptr->updateGradBias(jInput->getPtr(), jOutputDiff->getPtr(), + jBiasDiff->getPtr()); +} + +// Macro +#define ConvolutionInit(DType, JType, JArrayType) \ + JNIEXPORT \ + jlong JNICALL \ + Java_com_intel_analytics_sparkdl_mkl_MKL_ConvolutionInit##DType( \ + JNIEnv *env, jclass thisClass, jint inputNumber, jint inputChannel, \ + jint inputHeight, jint inputWidth, jint kernelNumber, \ + jint kernelChannel, jint kernelHeight, jint kernelWidth, \ + jint strideHeight, jint strideWidth, jint padHeight, jint padWidth, \ + jint dimension, jint groups) \ + { \ + return JNIConvolutionInit( \ + env, thisClass, inputNumber, inputChannel, inputHeight, inputWidth, \ + kernelNumber, kernelChannel, kernelHeight, kernelWidth, strideHeight, \ + strideWidth, padHeight, padWidth, dimension, groups); \ + } + +#define ConvolutionForward(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL \ + Java_com_intel_analytics_sparkdl_mkl_MKL_ConvolutionForward##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType output, jint outputOffset, JArrayType kernel, \ + jint kernelOffset, JArrayType bias, jint biasOffset, long classPtr) \ + { \ + JNIConvolutionUpdateOutput( \ + env, thisClass, input, inputOffset, output, outputOffset, kernel, \ + kernelOffset, bias, biasOffset, classPtr); \ + } + +#define ConvolutionBackwardData(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL \ + Java_com_intel_analytics_sparkdl_mkl_MKL_ConvolutionBackwardData##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType outputDiff, jint outputDiffOffset, JArrayType inputDiff, \ + jint inputDiffOffset, JArrayType kernel, jint kernelOffset, \ + JArrayType bias, jint biasOffset, long classPtr) \ + { \ + JNIConvolutionUpdateGradInput( \ + env, thisClass, input, inputOffset, outputDiff, outputDiffOffset, \ + inputDiff, inputDiffOffset, kernel, kernelOffset, bias, biasOffset, \ + classPtr); \ + } + +#define ConvolutionBackwardKernel(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL \ + Java_com_intel_analytics_sparkdl_mkl_MKL_ConvolutionBackwardKernel##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType outputDiff, jint outputDiffOffset, JArrayType kernelDiff, \ + jint kernelDiffOffset, JArrayType kernel, jint kernelOffset, \ + JArrayType bias, jint biasOffset, long classPtr) \ + { \ + JNIConvolutionUpdateGradKernel( \ + env, thisClass, input, inputOffset, outputDiff, outputDiffOffset, \ + kernelDiff, kernelDiffOffset, kernel, kernelOffset, bias, biasOffset, \ + classPtr); \ + } + +#define ConvolutionBackwardBias(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL \ + Java_com_intel_analytics_sparkdl_mkl_MKL_ConvolutionBackwardBias##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType outputDiff, jint outputDiffOffset, JArrayType biasDiff, \ + jint biasDiffOffset, JArrayType kernel, jint kernelOffset, \ + JArrayType bias, jint biasOffset, long classPtr) \ + { \ + JNIConvolutionUpdateGradBias( \ + env, thisClass, input, inputOffset, outputDiff, outputDiffOffset, \ + biasDiff, biasDiffOffset, kernel, kernelOffset, bias, biasOffset, \ + classPtr); \ + } + +#ifdef __cplusplus +extern "C" { +#endif + +// double +ConvolutionInit(Double, jdouble, jdoubleArray); +ConvolutionForward(Double, jdouble, jdoubleArray); +ConvolutionBackwardData(Double, jdouble, jdoubleArray); +ConvolutionBackwardKernel(Double, jdouble, jdoubleArray); +ConvolutionBackwardBias(Double, jdouble, jdoubleArray); + +// float +ConvolutionInit(Float, jfloat, jfloatArray); +ConvolutionForward(Float, jfloat, jfloatArray); +ConvolutionBackwardData(Float, jfloat, jfloatArray); +ConvolutionBackwardKernel(Float, jfloat, jfloatArray); +ConvolutionBackwardBias(Float, jfloat, jfloatArray); + +#ifdef __cplusplus +} +#endif diff --git a/mkl/native/src/main/c/jni/debug.cpp b/mkl/native/src/main/c/jni/debug.cpp new file mode 100644 index 00000000000..a542a04c9af --- /dev/null +++ b/mkl/native/src/main/c/jni/debug.cpp @@ -0,0 +1,37 @@ +#include +#include +#include +#include "debug.h" + +LogMessage::LogMessage(const char *file, int line, LogType type) +{ + int len = strlen(file) + 20; + char *buf = new char[len]; + type_ = type; + + const char *lastSlash = strrchr(file, '/'); + const char *fileName = (lastSlash == NULL) ? file : lastSlash + 1; + + snprintf(buf, len, "%c %s %s:%d] ", "DIWEFI"[type], "MKL", fileName, line); + stream() << buf; + + delete buf; +} + +LogMessage::~LogMessage() +{ + stream() << std::endl; + if (type_ == FATAL) { + stream() << "Aborting..." << std::endl; + abort(); + } +} + +std::ostream& LogMessage::stream() +{ + if (type_ >= WARNNING) { + return std::cerr; + } else { + return std::cout; + } +} diff --git a/mkl/native/src/main/c/jni/debug.h b/mkl/native/src/main/c/jni/debug.h new file mode 100644 index 00000000000..1545bf22481 --- /dev/null +++ b/mkl/native/src/main/c/jni/debug.h @@ -0,0 +1,93 @@ +#ifndef _DEBUG_H_ +#define _DEBUG_H_ + +#include + +const int DBG = 0, INFO = 1, WARNNING = 2, ERROR = 3, FATAL = 4, DEFALT = 5; +typedef int LogType; + +class LogMessage +{ + public: + LogMessage(const char *file, int line, LogType type); + ~LogMessage(); + std::ostream &stream(); + + private: + LogType type_; +}; + +#define CHECK(x) \ + if (!(x)) \ + LogMessage(__FILE__, __LINE__, WARNNING).stream() << "Check failed " #x; + +//#define CHECK_EQ(x, y) CHECK((x) == (y)) +#define CHECK_EQ(x, y) \ + if (!((x) == (y))) \ + LogMessage(__FILE__, __LINE__, WARNNING).stream() \ + << "Check failed. " #x << " = " << x << ",which should be " #y +#define CHECK_NE(x, y) CHECK((x) != (y)) + +#define LOG(x) LogMessage(__FILE__, __LINE__, x).stream() + +#ifdef PERF +const int INPERF = 1; +#else +const int INPERF = 0; +#endif + +#define PERFSTART() \ + do { \ + struct timespec start, end; \ + if (INPERF) { \ + clock_gettime(CLOCK_MONOTONIC, &start); \ + } + +#define PERFEND(msg) \ + if (INPERF) { \ + clock_gettime(CLOCK_MONOTONIC, &end); \ + LOG(INFO) << __func__ << " " << msg << " costs: " \ + << (end.tv_sec - start.tv_sec) * 1000 + \ + (double)(end.tv_nsec - start.tv_nsec) / 1000000; \ + } \ + } \ + while (0) \ + ; + +/** + * @brief print 4 dimensions data + * + * Because the input/output is orgnized as vector, it should be more human + * readable when we debug the result generated. + * + * @param input input/output data which is orgnized as vecotr/array. + * @param num how many images + * @param channel how many channels, like 3 + * @param height image height + * @param width image width + * @param msg messge user defined + */ +template +void printData(Type *input, size_t num, size_t channel, size_t height, + size_t width, const char *msg) +{ + std::cout << std::string(msg) << " CHECK IN CPP..." << std::endl; + + for (int i = 0; i < num; i++) { + std::cout << "The " << i << " num." << std::endl; + for (int j = 0; j < channel; j++) { + std::cout << "The " << j << " channel." << std::endl; + for (int k = 0; k < height; k++) { + for (int t = 0; t < width; t++) { + int index = ((i * channel + j) * height + k) * width + t; + std::cout << input[index] << '\t'; + } + std::cout << std::endl; + } + std::cout << std::endl; + } + std::cout << std::endl; + } +} + +#endif diff --git a/mkl/native/src/main/c/jni/layer.cpp b/mkl/native/src/main/c/jni/layer.cpp new file mode 100644 index 00000000000..59867fe0bcb --- /dev/null +++ b/mkl/native/src/main/c/jni/layer.cpp @@ -0,0 +1,23 @@ +#include "layer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +JNIEXPORT +void JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_SetPrevFloat( + JNIEnv *env, jclass thisClass, long prev, long curr) +{ + MKLLayer::setPrev(prev, curr); +} + +JNIEXPORT +void JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_SetPrevDouble( + JNIEnv *env, jclass thisClass, long prev, long curr) +{ + MKLLayer::setPrev(prev, curr); +} + +#ifdef __cplusplus +} +#endif diff --git a/mkl/native/src/main/c/jni/layer.h b/mkl/native/src/main/c/jni/layer.h new file mode 100644 index 00000000000..88189178842 --- /dev/null +++ b/mkl/native/src/main/c/jni/layer.h @@ -0,0 +1,112 @@ +#ifndef _MKL_LAYER_H +#define _MKL_LAYER_H +#include + +#include "MKLWrapper.h" +#include "memory.h" + +template +class MKLLayer +{ + public: + MKLLayer(); + ~MKLLayer(); + + static void setPrev(long prev, long curr); + + void init(size_t inputNumber, size_t inputChannel, size_t inputHeight, + size_t inputWidth, size_t dimension); + + std::shared_ptr> input, output, gradInput, gradOutput; + + int dimension; + + // parameters of pooling layer + size_t inputSize[4]; + size_t inputStrides[4]; + + // If it's the first pass, we should create some conversions. + // After that, we need not do that again. + // Default is true. + // + // Note: + // 1. Defaultly, we assume that the address of input will not change. + // 2. The address of input is real address of Array in JVM. + // 3. TODO It will set to false after an iteration (forward and backward). + bool isFirstPass; + + dnnPrimitive_t forwardPrim, backwardPrim; +}; + +template +void MKLLayer::init(size_t inputNumber, size_t inputChannel, + size_t inputHeight, size_t inputWidth, + size_t dimension) +{ + inputSize[0] = inputWidth; + inputSize[1] = inputHeight; + inputSize[2] = inputChannel; + inputSize[3] = inputNumber; + + this->dimension = dimension; + + inputStrides[0] = 1; + for (int i = 1; i < 4; i++) { + inputStrides[i] = inputStrides[i - 1] * inputSize[i - 1]; + } + + input->createUsrLayout(dimension, inputSize, inputStrides); + gradInput->createUsrLayout(dimension, inputSize, inputStrides); +} + +template +MKLLayer::MKLLayer() + : input(new MKLData()), + output(new MKLData()), + gradInput(new MKLData()), + gradOutput(new MKLData()), + isFirstPass(true), + forwardPrim(NULL), + backwardPrim(NULL) +{ +} + +template +MKLLayer::~MKLLayer() +{ + if (forwardPrim) { + dnnDelete(forwardPrim); + forwardPrim = NULL; + } + + if (backwardPrim) { + dnnDelete(backwardPrim); + backwardPrim = NULL; + } +} + +template +void MKLLayer::setPrev(long prev, long curr) +{ + MKLLayer *prevLayer = reinterpret_cast *>(prev); + MKLLayer *currLayer = reinterpret_cast *>(curr); + + dnnLayout_t prevLayout = prevLayer->gradOutput->getMklLayout(); + dnnLayout_t currLayout = currLayer->gradInput->getMklLayout(); + + if (dnnLayoutCompare(prevLayout, currLayout)) { + prevLayer->gradOutput->setUseNext(true); + prevLayer->gradOutput = currLayer->gradInput; + currLayer->gradInput->setUsePrev(true); + } + + prevLayout = prevLayer->output->getMklLayout(); + currLayout = currLayer->input->getMklLayout(); + + if (dnnLayoutCompare(prevLayout, currLayout)) { + prevLayer->output->setUseNext(true); + currLayer->input = prevLayer->output; + currLayer->input->setUsePrev(true); + } +} +#endif diff --git a/mkl/native/src/main/c/jni/linear.cpp b/mkl/native/src/main/c/jni/linear.cpp new file mode 100644 index 00000000000..ca6e14bef4e --- /dev/null +++ b/mkl/native/src/main/c/jni/linear.cpp @@ -0,0 +1,501 @@ +#include + +#include "debug.h" +#include "layer.h" +#include "memory.h" +#include "utils.h" + +template +class MKLLinear : public MKLLayer +{ + public: + MKLLinear(); + ~MKLLinear(); + + void init(size_t inputHeight, size_t inputWidth, size_t outputChannel, + size_t kernelHeight, size_t kernelWidth); + + void updateOutput(DType *input, DType *output); + void updateGradInput(DType *input, DType *gradOutput, DType *gradInput); + void updateGradKernel(DType *input, DType *gradOutput, DType *gradKernel); + void updateGradBias(DType *input, DType *gradOutput, DType *gradBias); + + std::shared_ptr> kernel; + std::shared_ptr> bias; + + std::shared_ptr> gradKernel; + std::shared_ptr> gradBias; + + private: + // this method is not the same as createMklLayout in MKLMemory + void firstPass(); + void preExecute(DType *input); + + size_t inputSize[2]; + size_t inputStrides[2]; + + size_t outputSize[2]; + size_t outputStrides[2]; + + size_t kernelSize[2]; + size_t kernelStrides[2]; + + size_t biasSize[1]; + size_t biasStrides[1]; + + size_t outputChannel; + + dnnPrimitive_t gradKernelPrim, gradBiasPrim; +}; + +template +MKLLinear::MKLLinear() + : kernel(new MKLData), + bias(new MKLData), + gradKernel(new MKLData), + gradBias(new MKLData), + outputChannel(0), + gradKernelPrim(NULL), + gradBiasPrim(NULL) +{ +} + +template +MKLLinear::~MKLLinear() +{ + dnnDelete(gradKernelPrim); + dnnDelete(gradBiasPrim); +} + +template +void MKLLinear::init(size_t inputHeight, size_t inputWidth, + size_t outputChannel, size_t kernelHeight, + size_t kernelWidth) +{ + this->dimension = 2; + + inputSize[0] = inputWidth; + inputSize[1] = inputHeight; + + outputSize[0] = outputChannel; + outputSize[1] = inputHeight; + + kernelSize[0] = kernelWidth; + kernelSize[1] = kernelHeight; + + inputStrides[0] = 1; + kernelStrides[0] = 1; + outputStrides[0] = 1; + for (int i = 1; i < this->dimension; i++) { + inputStrides[i] = inputStrides[i - 1] * inputSize[i - 1]; + kernelStrides[i] = kernelStrides[i - 1] * kernelSize[i - 1]; + outputStrides[i] = outputStrides[i - 1] * outputSize[i - 1]; + } + + biasSize[0] = outputChannel; + biasStrides[0] = 1; + + this->outputChannel = outputChannel; + + // create usr layout + this->input->createUsrLayout(this->dimension, inputSize, inputStrides); + this->output->createUsrLayout(this->dimension, outputSize, outputStrides); + this->kernel->createUsrLayout(this->dimension, kernelSize, kernelStrides); + this->bias->createUsrLayout(1, biasSize, biasStrides); + + this->gradInput->createUsrLayout(this->dimension, inputSize, inputStrides); + this->gradOutput->createUsrLayout(this->dimension, outputSize, outputStrides); + this->gradKernel->createUsrLayout(this->dimension, kernelSize, kernelStrides); + // bias dimension is 1 + this->gradBias->createUsrLayout(1, biasSize, biasStrides); +} + +template +void MKLLinear::firstPass() +{ + dnnError_t status = E_UNIMPLEMENTED; + // forward + status = dnnInnerProductCreateForwardBias( + &(this->forwardPrim), NULL, this->dimension, inputSize, outputChannel); + CHECK_EQ(status, E_SUCCESS); + + this->input->createMklLayout(this->forwardPrim, dnnResourceSrc); + this->output->createMklLayout(this->forwardPrim, dnnResourceDst); + this->kernel->createMklLayout(this->forwardPrim, dnnResourceFilter); + this->bias->createMklLayout(this->forwardPrim, dnnResourceBias); + + // backward data + status = dnnInnerProductCreateBackwardData( + &(this->backwardPrim), NULL, this->dimension, inputSize, outputChannel); + CHECK_EQ(status, E_SUCCESS); + + this->gradOutput->createMklLayout(this->backwardPrim, dnnResourceDiffDst); + this->gradInput->createMklLayout(this->backwardPrim, dnnResourceDiffSrc); + + // backward kernel + status = dnnInnerProductCreateBackwardFilter( + &gradKernelPrim, NULL, this->dimension, inputSize, outputChannel); + CHECK_EQ(status, E_SUCCESS); + + this->gradKernel->createMklLayout(this->gradKernelPrim, + dnnResourceDiffFilter); + + // backward bias + status = dnnInnerProductCreateBackwardBias( + &gradBiasPrim, NULL, this->dimension, outputSize); + CHECK_EQ(status, E_SUCCESS); + + this->gradBias->createMklLayout(this->gradBiasPrim, dnnResourceDiffBias); + + // we create the layout only at the first time + this->isFirstPass = false; +} + +template +void MKLLinear::preExecute(DType *input) +{ + this->input->createConversion(); + this->kernel->createConversion(); + this->bias->createConversion(); +} + +template +void MKLLinear::updateOutput(DType *input, DType *output) +{ + if (this->isFirstPass) firstPass(); + + // Because the address will change every time, so we need create conversion + // every forward/backward. + // TODO Should we set the kernel and bias address every time? + preExecute(input); + this->output->createConversion(); + +#ifdef DEBUG + printData(reinterpret_cast(this->input->getUsrData()), + this->inputSize[3], this->inputSize[2], this->inputSize[1], + this->inputSize[0], "Forward input"); +#endif + + dnnError_t status; + void *resources[dnnResourceNumber]; + + resources[dnnResourceFilter] = this->kernel->getConvertedData(); + resources[dnnResourceBias] = this->bias->getConvertedData(); + resources[dnnResourceSrc] = this->input->getConvertedData(); + resources[dnnResourceDst] = this->output->getData(); + + PERFSTART(); + status = dnnExecute(this->forwardPrim, resources); + PERFEND("main computing"); + CHECK_EQ(status, E_SUCCESS); + +#ifdef DEBUG + printData(reinterpret_cast(this->output->getData()), + outputSize[3], outputSize[2], outputSize[1], outputSize[0], + "Forward output"); +#endif + + if (!this->output->isUseNext()) { + this->output->backToUsr(); + } +} + +template +void MKLLinear::updateGradInput(DType *input, DType *gradOutput, + DType *gradInput) +{ + dnnError_t status; + void *resources[dnnResourceNumber]; + + preExecute(input); + + this->gradOutput->createConversion(); + this->gradInput->createConversion(); + + resources[dnnResourceDiffDst] = this->gradOutput->getConvertedData(); + resources[dnnResourceFilter] = this->kernel->getConvertedData(); + resources[dnnResourceDiffSrc] = this->gradInput->getData(); + + // 4. main computing parts. + PERFSTART(); + status = dnnExecute(this->backwardPrim, resources); + CHECK_EQ(status, E_SUCCESS); + PERFEND("main computing"); + + if (!this->gradInput->isUsePrev()) { + this->gradInput->backToUsr(); + } + +#ifdef DEBUG + printData(reinterpret_cast(this->gradInput->getUsrData()), + inputSize[3], inputSize[2], inputSize[1], inputSize[0], + "backward gradient input"); +#endif +} + +template +void MKLLinear::updateGradKernel(DType *input, DType *gradOutput, + DType *gradKernel) +{ + dnnError_t status; + void *resources[dnnResourceNumber]; + + preExecute(input); + + this->gradOutput->createConversion(); + this->gradKernel->createConversion(); + + resources[dnnResourceDiffDst] = this->gradOutput->getConvertedData(); + resources[dnnResourceSrc] = this->input->getConvertedData(); + resources[dnnResourceDiffFilter] = this->gradKernel->getData(); + + // 4. main computing parts. + PERFSTART(); + status = dnnExecute(this->gradKernelPrim, resources); + CHECK_EQ(status, E_SUCCESS); + PERFEND("main computing"); + + // the kernel need not re-use for previous layer + this->gradKernel->backToUsr(); +} + +template +void MKLLinear::updateGradBias(DType *input, DType *gradOutput, + DType *gradBias) +{ + dnnError_t status; + void *resources[dnnResourceNumber]; + + preExecute(input); + + this->gradOutput->createConversion(); + this->gradBias->createConversion(); + + resources[dnnResourceDiffDst] = this->gradOutput->getConvertedData(); + resources[dnnResourceDiffBias] = this->gradBias->getData(); + + // 4. main computing parts. + PERFSTART(); + status = dnnExecute(this->gradBiasPrim, resources); + CHECK_EQ(status, E_SUCCESS); + PERFEND("main computing"); + + this->gradBias->backToUsr(); +} + +template +jlong JNILinearInit(JNIEnv *env, jclass thisClass, jint inputHeight, + jint inputWidth, jint outputChannel, jint kernelHeight, + jint kernelWidth) +{ + MKLLinear *ptr = new MKLLinear(); + ptr->init(inputHeight, inputWidth, outputChannel, kernelHeight, kernelWidth); + + return reinterpret_cast(ptr); +} + +template +void JNILinearUpdateOutput(JNIEnv *env, jclass thisClass, ArrayType input, + jint inputOffset, ArrayType output, + jint outputOffset, ArrayType kernel, + jint kernelOffset, ArrayType bias, jint biasOffset, + long classPtr) +{ + MKLLinear *ptr = reinterpret_cast *>(classPtr); + + std::shared_ptr> jInput( + new ZipArray(env, input, inputOffset, ptr->input)); + + std::shared_ptr> jOutput( + new ZipArray(env, output, outputOffset, ptr->output)); + + std::shared_ptr> jKernel( + new ZipArray(env, kernel, kernelOffset, ptr->kernel)); + + std::shared_ptr> jBias( + new ZipArray(env, bias, biasOffset, ptr->bias)); + + ptr->updateOutput(jInput->getPtr(), jOutput->getPtr()); +} + +template +void JNILinearUpdateGradInput(JNIEnv *env, jclass thisClass, ArrayType input, + jint inputOffset, ArrayType outputDiff, + jint outputDiffOffset, ArrayType inputDiff, + jint inputDiffOffset, ArrayType kernel, + jint kernelOffset, ArrayType bias, + jint biasOffset, long classPtr) +{ + MKLLinear *ptr = reinterpret_cast *>(classPtr); + std::shared_ptr> jInput( + new ZipArray(env, input, inputOffset, ptr->input)); + + std::shared_ptr> jOutputDiff( + new ZipArray(env, outputDiff, outputDiffOffset, + ptr->gradOutput)); + + std::shared_ptr> jInputDiff( + new ZipArray(env, inputDiff, inputDiffOffset, + ptr->gradInput)); + + std::shared_ptr> jKernel( + new ZipArray(env, kernel, kernelOffset, ptr->kernel)); + + std::shared_ptr> jBias( + new ZipArray(env, bias, biasOffset, ptr->bias)); + + ptr->updateGradInput(jInput->getPtr(), jOutputDiff->getPtr(), + jInputDiff->getPtr()); +} + +template +void JNILinearUpdateGradKernel(JNIEnv *env, jclass thisClass, ArrayType input, + jint inputOffset, ArrayType outputDiff, + jint outputDiffOffset, ArrayType kernelDiff, + jint kernelDiffOffset, ArrayType kernel, + jint kernelOffset, ArrayType bias, + jint biasOffset, long classPtr) +{ + MKLLinear *ptr = reinterpret_cast *>(classPtr); + + std::shared_ptr> jInput( + new ZipArray(env, input, inputOffset, ptr->input)); + + std::shared_ptr> jOutputDiff( + new ZipArray(env, outputDiff, outputDiffOffset, + ptr->gradOutput)); + + std::shared_ptr> jKernelDiff( + new ZipArray(env, kernelDiff, kernelDiffOffset, + ptr->gradKernel)); + + std::shared_ptr> jKernel( + new ZipArray(env, kernel, kernelOffset, ptr->kernel)); + + std::shared_ptr> jBias( + new ZipArray(env, bias, biasOffset, ptr->bias)); + + ptr->updateGradKernel(jInput->getPtr(), jOutputDiff->getPtr(), + jKernelDiff->getPtr()); +} + +template +void JNILinearUpdateGradBias(JNIEnv *env, jclass thisClass, ArrayType input, + jint inputOffset, ArrayType outputDiff, + jint outputDiffOffset, ArrayType biasDiff, + jint biasDiffOffset, ArrayType kernel, + jint kernelOffset, ArrayType bias, jint biasOffset, + long classPtr) +{ + MKLLinear *ptr = reinterpret_cast *>(classPtr); + + std::shared_ptr> jInput( + new ZipArray(env, input, inputOffset, ptr->input)); + + std::shared_ptr> jOutputDiff( + new ZipArray(env, outputDiff, outputDiffOffset, + ptr->gradOutput)); + + std::shared_ptr> jBiasDiff( + new ZipArray(env, biasDiff, biasDiffOffset, + ptr->gradBias)); + + std::shared_ptr> jKernel( + new ZipArray(env, kernel, kernelOffset, ptr->kernel)); + + std::shared_ptr> jBias( + new ZipArray(env, bias, biasOffset, ptr->bias)); + + ptr->updateGradBias(jInput->getPtr(), jOutputDiff->getPtr(), + jBiasDiff->getPtr()); +} +// Macro +#define LinearInit(DType, JType, JArrayType) \ + JNIEXPORT \ + jlong JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_LinearInit##DType( \ + JNIEnv *env, jclass thisClass, jint inputHeight, jint inputWidth, \ + jint outputChannel, jint kernelHeight, jint kernelWidth) \ + { \ + return JNILinearInit(env, thisClass, inputHeight, \ + inputWidth, outputChannel, \ + kernelHeight, kernelWidth); \ + } + +#define LinearForward(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_LinearForward##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType output, jint outputOffset, JArrayType kernel, \ + jint kernelOffset, JArrayType bias, jint biasOffset, long classPtr) \ + { \ + JNILinearUpdateOutput( \ + env, thisClass, input, inputOffset, output, outputOffset, kernel, \ + kernelOffset, bias, biasOffset, classPtr); \ + } + +#define LinearBackwardData(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL \ + Java_com_intel_analytics_sparkdl_mkl_MKL_LinearBackwardData##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType outputDiff, jint outputDiffOffset, JArrayType inputDiff, \ + jint inputDiffOffset, JArrayType kernel, jint kernelOffset, \ + JArrayType bias, jint biasOffset, long classPtr) \ + { \ + JNILinearUpdateGradInput( \ + env, thisClass, input, inputOffset, outputDiff, outputDiffOffset, \ + inputDiff, inputDiffOffset, kernel, kernelOffset, bias, biasOffset, \ + classPtr); \ + } + +#define LinearBackwardKernel(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL \ + Java_com_intel_analytics_sparkdl_mkl_MKL_LinearBackwardKernel##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType outputDiff, jint outputDiffOffset, JArrayType kernelDiff, \ + jint kernelDiffOffset, JArrayType kernel, jint kernelOffset, \ + JArrayType bias, jint biasOffset, long classPtr) \ + { \ + JNILinearUpdateGradKernel( \ + env, thisClass, input, inputOffset, outputDiff, outputDiffOffset, \ + kernelDiff, kernelDiffOffset, kernel, kernelOffset, bias, biasOffset, \ + classPtr); \ + } + +#define LinearBackwardBias(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL \ + Java_com_intel_analytics_sparkdl_mkl_MKL_LinearBackwardBias##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType outputDiff, jint outputDiffOffset, JArrayType biasDiff, \ + jint biasDiffOffset, JArrayType kernel, jint kernelOffset, \ + JArrayType bias, jint biasOffset, long classPtr) \ + { \ + JNILinearUpdateGradBias( \ + env, thisClass, input, inputOffset, outputDiff, outputDiffOffset, \ + biasDiff, biasDiffOffset, kernel, kernelOffset, bias, biasOffset, \ + classPtr); \ + } + +#ifdef __cplusplus +extern "C" { +#endif + +// double +LinearInit(Double, jdouble, jdoubleArray); +LinearForward(Double, jdouble, jdoubleArray); +LinearBackwardData(Double, jdouble, jdoubleArray); +LinearBackwardKernel(Double, jdouble, jdoubleArray); +LinearBackwardBias(Double, jdouble, jdoubleArray); + +// float +LinearInit(Float, jfloat, jfloatArray); +LinearForward(Float, jfloat, jfloatArray); +LinearBackwardData(Float, jfloat, jfloatArray); +LinearBackwardKernel(Float, jfloat, jfloatArray); +LinearBackwardBias(Float, jfloat, jfloatArray); + +#ifdef __cplusplus +} +#endif diff --git a/mkl/native/src/main/c/jni/lrn.cpp b/mkl/native/src/main/c/jni/lrn.cpp new file mode 100644 index 00000000000..bead038a6f8 --- /dev/null +++ b/mkl/native/src/main/c/jni/lrn.cpp @@ -0,0 +1,306 @@ +#include + +#include "debug.h" +#include "layer.h" +#include "memory.h" +#include "utils.h" + +template +class MKLLRN : public MKLLayer +{ + public: + MKLLRN(); + ~MKLLRN(); + + void init(size_t inputNumber, size_t inputChannel, size_t inputHeight, + size_t inputWidth, int size, DType alpha, DType beta, DType k, + int dimension); + + void updateOutput(DType *input, DType *output); + void updateGradInput(DType *input, DType *gradOutput, DType *gradInput); + + private: + // this method is not the same as createMklLayout in MKLMemory + void firstPass(); + void preExecute(DType *input); + + std::shared_ptr> workspace; + + int size; + DType alpha; + DType beta; + DType k; + + size_t inputSize[4]; + size_t inputStrides[4]; + + size_t outputSize[4]; + size_t outputStrides[4]; +}; + +template +MKLLRN::MKLLRN() : workspace(new MKLData) +{ +} + +template +MKLLRN::~MKLLRN() +{ +} + +template +void MKLLRN::init(size_t inputNumber, size_t inputChannel, + size_t inputHeight, size_t inputWidth, int size, + DType alpha, DType beta, DType k, int dimension) +{ + this->dimension = dimension; + + inputSize[0] = inputWidth; + inputSize[1] = inputHeight; + inputSize[2] = inputChannel; + inputSize[3] = inputNumber; + + inputStrides[0] = 1; + for (int i = 1; i < 4; i++) + inputStrides[i] = inputStrides[i - 1] * inputSize[i - 1]; + + // the output channel is as same as the number of kernel. + // and the output number must be as same as the number of input too. + outputSize[0] = inputWidth; + outputSize[1] = inputHeight; + outputSize[2] = inputChannel; + outputSize[3] = inputNumber; + + outputStrides[0] = 1; + for (int i = 1; i < 4; i++) + outputStrides[i] = outputStrides[i - 1] * outputSize[i - 1]; + + this->size = size; + this->alpha = alpha; + this->beta = beta; + this->k = k; + + // create usr layout + this->input->createUsrLayout(dimension, inputSize, inputStrides); + this->output->createUsrLayout(dimension, outputSize, outputStrides); + + this->gradInput->createUsrLayout(dimension, inputSize, inputStrides); + this->gradOutput->createUsrLayout(dimension, outputSize, outputStrides); +} + +template +void MKLLRN::firstPass() +{ + dnnError_t status = E_UNIMPLEMENTED; + dnnLayout_t layout; + + status = + dnnLayoutCreate(&layout, this->dimension, inputSize, inputStrides); + CHECK_EQ(status, E_SUCCESS); + + status = dnnLRNCreateForward(&(this->forwardPrim), NULL, layout, size, + alpha, beta, k); + CHECK_EQ(status, E_SUCCESS); + + this->input->createMklLayout(this->forwardPrim, dnnResourceSrc); + this->output->createMklLayout(this->forwardPrim, dnnResourceDst); + + status = dnnLRNCreateBackward(&(this->backwardPrim), NULL, layout, + layout, size, alpha, beta, k); + CHECK_EQ(status, E_SUCCESS); + + this->gradOutput->createMklLayout(this->backwardPrim, dnnResourceDiffDst); + this->gradInput->createMklLayout(this->backwardPrim, dnnResourceDiffSrc); + + // create workspace + this->workspace->createMklLayout(this->forwardPrim, dnnResourceWorkspace); + this->workspace->createConversion(true); + + dnnLayoutDelete(layout); + + // we create the layout only at the first time + this->isFirstPass = false; +} + +template +void MKLLRN::preExecute(DType *input) +{ + this->input->createConversion(); +} + +template +void MKLLRN::updateOutput(DType *input, DType *output) +{ + if (this->isFirstPass) firstPass(); + + // Because the address will change every time, so we need create conversion + // every forward/backward. + // TODO Should we set the kernel and bias address every time? + preExecute(input); + this->output->createConversion(); + +#ifdef DEBUG + printData(reinterpret_cast(this->input->getUsrData()), + this->inputSize[3], this->inputSize[2], this->inputSize[1], + this->inputSize[0], "Forward input"); +#endif + + dnnError_t status; + void *resources[dnnResourceNumber]; + + resources[dnnResourceSrc] = this->input->getConvertedData(); + resources[dnnResourceDst] = this->output->getData(); + resources[dnnResourceWorkspace] = this->workspace->getData(); + + PERFSTART(); + status = dnnExecute(this->forwardPrim, resources); + PERFEND("main computing"); + CHECK_EQ(status, E_SUCCESS); + +#ifdef DEBUG + printData(reinterpret_cast(this->output->getData()), + outputSize[3], outputSize[2], outputSize[1], outputSize[0], + "Forward output"); +#endif + + if (!this->output->isUseNext()) { + this->output->backToUsr(); + } +} + +template +void MKLLRN::updateGradInput(DType *input, DType *gradOutput, + DType *gradInput) +{ + dnnError_t status; + void *resources[dnnResourceNumber]; + + preExecute(input); + + this->gradOutput->createConversion(); + this->gradInput->createConversion(); + + resources[dnnResourceDiffDst] = this->gradOutput->getConvertedData(); + resources[dnnResourceDiffSrc] = this->gradInput->getData(); + resources[dnnResourceSrc] = this->input->getConvertedData(); + resources[dnnResourceWorkspace] = this->workspace->getData(); + + // 4. main computing parts. + PERFSTART(); + status = dnnExecute(this->backwardPrim, resources); + CHECK_EQ(status, E_SUCCESS); + PERFEND("main computing"); + + if (!this->gradInput->isUsePrev()) { + this->gradInput->backToUsr(); + } + +#ifdef DEBUG + printData(reinterpret_cast(this->gradInput->getUsrData()), + inputSize[3], inputSize[2], inputSize[1], inputSize[0], + "backward gradient input"); +#endif +} + +template +jlong JNILRNInit(JNIEnv *env, jclass thisClass, jint inputNumber, + jint inputChannel, jint inputHeight, jint inputWidth, + jint size, DType alpha, DType beta, DType k, jint dimension) +{ + MKLLRN *lrn = new MKLLRN(); + lrn->init(inputNumber, inputChannel, inputHeight, inputWidth, size, alpha, + beta, k, dimension); + + return reinterpret_cast(lrn); +} + +template +void JNILRNUpdateOutput(JNIEnv *env, jclass thisClass, ArrayType input, + jint inputOffset, ArrayType output, jint outputOffset, + long classPtr) +{ + MKLLRN *ptr = reinterpret_cast *>(classPtr); + + std::shared_ptr> jInput( + new ZipArray(env, input, inputOffset, ptr->input)); + + std::shared_ptr> jOutput( + new ZipArray(env, output, outputOffset, ptr->output)); + + ptr->updateOutput(jInput->getPtr(), jOutput->getPtr()); +} + +template +void JNILRNUpdateGradInput(JNIEnv *env, jclass thisClass, ArrayType input, + jint inputOffset, ArrayType outputDiff, + jint outputDiffOffset, ArrayType inputDiff, + jint inputDiffOffset, long classPtr) +{ + MKLLRN *ptr = reinterpret_cast *>(classPtr); + std::shared_ptr> jInput( + new ZipArray(env, input, inputOffset, ptr->input)); + + std::shared_ptr> jOutputDiff( + new ZipArray(env, outputDiff, outputDiffOffset, + ptr->gradOutput)); + + std::shared_ptr> jInputDiff( + new ZipArray(env, inputDiff, inputDiffOffset, + ptr->gradInput)); + + ptr->updateGradInput(jInput->getPtr(), jOutputDiff->getPtr(), + jInputDiff->getPtr()); +} + +// Macro +#define LRNInit(DType, JType, JArrayType) \ + JNIEXPORT \ + jlong JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_LRNInit##DType( \ + JNIEnv *env, jclass thisClass, jint inputNumber, jint inputChannel, \ + jint inputHeight, jint inputWidth, jint size, JType alpha, JType beta, \ + JType k, jint dimension) \ + { \ + return JNILRNInit( \ + env, thisClass, inputNumber, inputChannel, inputHeight, inputWidth, \ + size, alpha, beta, k, dimension); \ + } + +#define LRNForward(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_LRNForward##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType output, jint outputOffset, long classPtr) \ + { \ + JNILRNUpdateOutput(env, thisClass, input, inputOffset, \ + output, outputOffset, classPtr); \ + } + +#define LRNBackward(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_LRNBackward##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType outputDiff, jint outputDiffOffset, JArrayType inputDiff, \ + jint inputDiffOffset, long classPtr) \ + { \ + JNILRNUpdateGradInput( \ + env, thisClass, input, inputOffset, outputDiff, outputDiffOffset, \ + inputDiff, inputDiffOffset, classPtr); \ + } + +#ifdef __cplusplus +extern "C" { +#endif + +// double +LRNInit(Double, jdouble, jdoubleArray); +LRNForward(Double, jdouble, jdoubleArray); +LRNBackward(Double, jdouble, jdoubleArray); + +// float +LRNInit(Float, jfloat, jfloatArray); +LRNForward(Float, jfloat, jfloatArray); +LRNBackward(Float, jfloat, jfloatArray); + +#ifdef __cplusplus +} +#endif diff --git a/mkl/native/src/main/c/jni/memory.h b/mkl/native/src/main/c/jni/memory.h new file mode 100644 index 00000000000..1d531f51d42 --- /dev/null +++ b/mkl/native/src/main/c/jni/memory.h @@ -0,0 +1,425 @@ +#ifndef _MKL_MEMORY_H +#define _MKL_MEMORY_H + +#include +#include +#include +#include "MKLWrapper.h" +#include "debug.h" + +template +class MKLData +{ + public: + MKLData(); + ~MKLData(); + + template + friend class ZipArray; + + // set + void createUsrLayout(int dimensions, size_t *size, size_t *stride); + void createMklLayout(dnnPrimitive_t primitive, dnnResourceType_t type); + /** + * @brief create an mkl conversion + * + * @param doNotCreateConversion This argument is only for pooling. Because it + * can't be converted when the mode is floor. + */ + void createConversion(bool doNotCreateConversion = false); + void backToUsr(); + // TODO If the input always the same, we should not have a set method. + void setUsrData(void *ptr); + // this is only for re-using previous layer memory. + void setMklData(void *ptr); + + // get + dnnLayout_t getUsrLayout(); + dnnLayout_t getMklLayout(); + + // TODO should we combine this two versions of getData -> one version? + void *getData(); + void *getConvertedData(); + + // for debug + void *getUsrData(); + void *getMklData(); + + // for re-using output generated by mkl. + bool isUseNext(); + bool isUsePrev(); + + void setUseNext(bool val); + void setUsePrev(bool val); + // ------------------------------------ + + // Currently, this two method substitude the backToUsr in pooling layer. + /** + * @brief cut the last row and column of every matrix in 4-D data. + * + * Note: MUST be used in mkl -> usr data. + * + * @param fromSize mkl data size. + * @param fromStrides mkl data strides. + * @param toStrides usr data strides. + */ + void cutLastRowColumn(size_t *fromSize, size_t *fromStrides, + size_t *toStrides); + /** + * @brief pad the last row and column of every matrix in 4-D data. + * + * Note: MUST be used in usr -> mkl data. + * + * @param fromSize usr data size + * @param fromStrides usr data strides + * @param toSize mkl data size + * @param toStrides mkl data strides + */ + void padLastRowColumn(size_t *fromSize, size_t *fromStrides, size_t *toSize, + size_t *toStrides); + + size_t getMklLayoutSize(); + + private: + // call dnnAllocateBuffer to allocate a new block of mem + void allocate(); + void convert(dnnPrimitive_t primitive, void *from, void *to); + + dnnLayout_t layoutUsr; + dnnLayout_t layoutMkl; + + void *dataUsr; + void *dataMkl; + + dnnPrimitive_t mklToUsr; + dnnPrimitive_t usrToMkl; + + bool useNext; + bool usePrev; +}; + +template +MKLData::MKLData() +{ + dataUsr = NULL; + dataMkl = NULL; + + layoutUsr = NULL; + layoutMkl = NULL; + + mklToUsr = NULL; + usrToMkl = NULL; + + useNext = false; + usePrev = false; +} + +template +MKLData::~MKLData() +{ + if (layoutUsr) { + dnnLayoutDelete(layoutUsr); + layoutUsr = NULL; + } + if (layoutMkl) { + dnnLayoutDelete(layoutMkl); + layoutMkl = NULL; + } + if (dataMkl) { + dnnReleaseBuffer(dataMkl); + dataMkl = NULL; + } + + dnnDelete(mklToUsr); + dnnDelete(usrToMkl); + + LOG(DBG) << "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; +} + +template +void MKLData::createUsrLayout(int dimension, size_t *size, + size_t *stride) +{ + dnnError_t status; + status = dnnLayoutCreate(&layoutUsr, dimension, size, stride); + CHECK_EQ(status, E_SUCCESS); +} + +template +void MKLData::createMklLayout(dnnPrimitive_t primitive, + dnnResourceType_t type) +{ + dnnError_t status; + status = dnnLayoutCreateFromPrimitive(&layoutMkl, primitive, type); + CHECK_EQ(status, E_SUCCESS); +} + +template +void MKLData::createConversion(bool doNotCreateConversion) +{ + if (!layoutUsr && !layoutMkl) return; + + if (isUsePrev() || isUseNext()) return; + + // this->willToUsr = willToUsr; + int isSame = dnnLayoutCompare(layoutUsr, layoutMkl); + // it not unnecessary to convert when the layout in scala and mkl is the same. + // But we shoud pay attention to that it's not sure layout must be the same + // when the dnnLayoutGetMemorySize is the same. + if (!isSame) { + if (!dataMkl) { + allocate(); + } + + if (!doNotCreateConversion) { + if (mklToUsr) { + dnnDelete(mklToUsr); + mklToUsr = NULL; + } + if (usrToMkl) { + dnnDelete(usrToMkl); + usrToMkl = NULL; + } + dnnError_t status; + status = dnnConversionCreate(&mklToUsr, layoutMkl, layoutUsr); + CHECK_EQ(status, E_SUCCESS); + + status = dnnConversionCreate(&usrToMkl, layoutUsr, layoutMkl); + CHECK_EQ(status, E_SUCCESS); + } + } +} + +template +void MKLData::backToUsr() +{ + // TODO we should put the if statement of isUseNex here. + if (dataUsr && dataMkl) { + convert(mklToUsr, dataMkl, dataUsr); + } +} + +template +void MKLData::allocate() +{ + dnnError_t status; + status = dnnAllocateBuffer(&dataMkl, layoutMkl); + CHECK_EQ(status, E_SUCCESS); + + size_t size = dnnLayoutGetMemorySize(layoutMkl); + memset(dataMkl, 0, size); + + LOG(INFO) << "Allocating layout memory -> " << size << " bytes..."; +} + +template +void MKLData::convert(dnnPrimitive_t primitive, void *from, void *to) +{ + dnnError_t status; + void *resources[dnnResourceNumber]; + + resources[dnnResourceFrom] = from; + resources[dnnResourceTo] = to; + + PERFSTART(); + status = dnnExecute(primitive, resources); + CHECK_EQ(status, E_SUCCESS); + PERFEND("main computing"); +} + +template +void *MKLData::getConvertedData() +{ + void *ret = dataUsr; + + // TODO something wrong + // 1. The data of previous layer we use should be allocated by mkl + // 2. Default it always convert the data. + if (usrToMkl) { + if (!isUsePrev() && !isUseNext()) { + convert(usrToMkl, dataUsr, dataMkl); + } + ret = dataMkl; + } else if (dataMkl) { + // sometimes, we need create memory for mkl, like workspace in pooling. + ret = dataMkl; + } + + return ret; +} + +template +void *MKLData::getData() +{ + void *ret = dataUsr; + + if (dataMkl) { + // sometimes, we need create memory for mkl, like workspace in pooling. + ret = dataMkl; + } + + return ret; +} + +template +void MKLData::setUsrData(void *ptr) +{ + dataUsr = ptr; +} + +template +void *MKLData::getUsrData() +{ + return dataUsr; +} + +template +void *MKLData::getMklData() +{ + return dataMkl; +} + +template +bool MKLData::isUseNext() +{ + return useNext; +} + +template +bool MKLData::isUsePrev() +{ + return usePrev; +} + +template +void MKLData::setUseNext(bool val) +{ + useNext = val; +} + +template +void MKLData::setUsePrev(bool val) +{ + usePrev = val; +} + +template +void MKLData::cutLastRowColumn(size_t *fromStrides, size_t *toSize, + size_t *toStrides) +{ + // TODO this should be optimized. It's terrible. + // The funciton of four depth loop cuts off the last column and + // the last row of every matrix (height * weight) in output generated by + // MKL2017. memcpy may be much better. + // Fortunately, it doesn't occur frequently and it will not cost so much. + // + // TODO the default dimension is 4 + DType *from = reinterpret_cast(dataMkl); + DType *to = reinterpret_cast(dataUsr); + PERFSTART(); + for (int n = 0; n < toSize[3]; n++) + for (int c = 0; c < toSize[2]; c++) + for (int h = 0; h < toSize[1]; h++) // height + for (int w = 0; w < toSize[0]; w++) { // width + int toIndex = + n * toStrides[3] + c * toStrides[2] + h * toStrides[1] + w; + int fromIndex = + n * fromStrides[3] + c * fromStrides[2] + h * fromStrides[1] + w; + *(to + toIndex) = *(from + fromIndex); + } + PERFEND("convert : cut last row and column of a matrix"); +} + +template +void MKLData::padLastRowColumn(size_t *fromSize, size_t *fromStrides, + size_t *toSize, size_t *toStrides) +{ + DType *from = reinterpret_cast(dataUsr); + DType *to = reinterpret_cast(dataMkl); + + PERFSTART(); + for (int n = 0; n < fromSize[3]; n++) { + for (int c = 0; c < fromSize[2]; c++) { + int baseIndex = n * toStrides[3] + c * toStrides[2]; + + for (int h = 0; h < fromSize[1]; h++) { // height + memcpy(to + baseIndex + h * toStrides[1], + from + baseIndex + h * fromStrides[1], + fromSize[0] * sizeof(DType)); + + // the last column of a matrix with 0. we only need to set + // one element to 0, because 0 <= ceil - floor <= 1 + if (toSize[0] != fromSize[0]) { + int end = baseIndex + h * toStrides[1] + fromSize[0]; + *(to + end) = 0; + } + } + + // pad the last row of a matrix with 0 * width + if (toSize[1] != fromSize[1]) { + int end = baseIndex + toSize[1] * toStrides[1]; + memset(to + end, 0, toSize[0] * sizeof(DType)); + } + } + } + PERFEND("convert : pad last row and column of a matrix with 0"); +} + +template +size_t MKLData::getMklLayoutSize() +{ + if (layoutMkl) + return dnnLayoutGetMemorySize(layoutMkl); + else + return 0; +} + +template +dnnLayout_t MKLData::getMklLayout() +{ + return layoutMkl; +} + +template +class ZipArray +{ + public: + ZipArray(JNIEnv *env, JArrayType array, jint offset, + std::shared_ptr> mklData); + ~ZipArray(); + + JType *getPtr(); + + private: + void *ptr; + JArrayType array; + JNIEnv *env; +}; + +template +ZipArray::ZipArray(JNIEnv *env, JArrayType array, + jint offset, + std::shared_ptr> mklData) +{ + this->ptr = env->GetPrimitiveArrayCritical(array, 0); + this->env = env; + this->array = array; + + JType *usrPtr = reinterpret_cast(ptr) + offset; + + if (mklData) mklData->setUsrData(usrPtr); +} + +template +ZipArray::~ZipArray() +{ + env->ReleasePrimitiveArrayCritical(array, ptr, 0); +} + +template +JType *ZipArray::getPtr() +{ + return reinterpret_cast(ptr); +} + +#endif diff --git a/mkl/native/src/main/c/jni/mkl.c b/mkl/native/src/main/c/jni/omp_threads.cpp similarity index 71% rename from mkl/native/src/main/c/jni/mkl.c rename to mkl/native/src/main/c/jni/omp_threads.cpp index fcb600f70b0..db7e50f2789 100644 --- a/mkl/native/src/main/c/jni/mkl.c +++ b/mkl/native/src/main/c/jni/omp_threads.cpp @@ -9,19 +9,20 @@ extern "C" { * Method: setNumThreads * Signature: (I)V */ -JNIEXPORT void JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_setNumThreads - (JNIEnv * env, jclass cls, jint num_threads) { +JNIEXPORT void JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_setNumThreads( + JNIEnv* env, jclass cls, jint num_threads) +{ omp_set_num_threads(num_threads); } - /* * Class: com_intel_webscaleml_mkl_MKL * Method: getNumThreads * Signature: ()I */ -JNIEXPORT jint JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_getNumThreads - (JNIEnv * env, jclass cls) { +JNIEXPORT jint JNICALL +Java_com_intel_analytics_sparkdl_mkl_MKL_getNumThreads(JNIEnv* env, jclass cls) +{ return omp_get_max_threads(); } diff --git a/mkl/native/src/main/c/jni/pooling.cpp b/mkl/native/src/main/c/jni/pooling.cpp new file mode 100644 index 00000000000..9ab1fbee322 --- /dev/null +++ b/mkl/native/src/main/c/jni/pooling.cpp @@ -0,0 +1,364 @@ +#include + +#include "debug.h" +#include "layer.h" +#include "memory.h" +#include "utils.h" + +enum Algorithm { MAX, AVG, MIN }; + +template +class MKLPooling : public MKLLayer +{ + public: + MKLPooling(); + ~MKLPooling(); + + void init(size_t inputNumber, size_t inputChannel, size_t inputHeight, + size_t inputWidth, size_t kernelHeight, size_t kernelWidth, + size_t strideHeight, size_t strideWidth, int padHeight, + int padWidth, int dimension, bool ceilMode, Algorithm pAl); + + void updateOutput(DType *input, DType *output); + void updateGradInput(DType *input, DType *gradOutput, DType *gradInput); + + private: + std::shared_ptr> workspace; + + size_t kernelSize[2]; + + size_t outputSizeCeil[4]; + size_t outputStridesCeil[4]; + + size_t outputSizeFloor[4]; + size_t outputStridesFloor[4]; + + size_t stride[2]; + int pad[2]; + + // Algorithm for pooling : max, average, min. The default is MAX + dnnAlgorithm_t algorithm; + // When $mod(input + 2 * pad - kernel)$ is not eqal 0, the divisible will be + // false. + bool ceilMode; +}; + +template +MKLPooling::MKLPooling() : workspace(new MKLData) +{ +} + +template +MKLPooling::~MKLPooling() +{ +} + +template +void MKLPooling::init(size_t inputNumber, size_t inputChannel, + size_t inputHeight, size_t inputWidth, + size_t kernelHeight, size_t kernelWidth, + size_t strideHeight, size_t strideWidth, + int padHeight, int padWidth, int dimension, + bool ceilMode, Algorithm pAl) +{ + MKLLayer::init(inputNumber, inputChannel, inputHeight, inputWidth, + dimension); + + switch (pAl) { + case MAX: + algorithm = dnnAlgorithmPoolingMax; + break; + case AVG: + algorithm = dnnAlgorithmPoolingAvg; + break; + case MIN: + algorithm = dnnAlgorithmPoolingMin; + break; + default: + algorithm = dnnAlgorithmPoolingMax; + } + + stride[0] = strideWidth; + stride[1] = strideHeight; + + kernelSize[0] = kernelWidth; + kernelSize[1] = kernelHeight; + + pad[0] = -padWidth; + pad[1] = -padHeight; + + this->ceilMode = ceilMode; + + // compute output + outputSizeCeil[0] = + computeOut(inputWidth, padWidth, kernelWidth, strideWidth, true); + outputSizeCeil[1] = + computeOut(inputHeight, padHeight, kernelHeight, strideHeight, true); + outputSizeCeil[2] = this->inputSize[2]; + outputSizeCeil[3] = this->inputSize[3]; + + outputSizeFloor[0] = + computeOut(inputWidth, padWidth, kernelWidth, strideWidth, false); + outputSizeFloor[1] = + computeOut(inputHeight, padHeight, kernelHeight, strideHeight, false); + outputSizeFloor[2] = this->inputSize[2]; + outputSizeFloor[3] = this->inputSize[3]; + + // strides of input, kernel, output + outputStridesFloor[0] = 1; + outputStridesCeil[0] = 1; + for (int i = 1; i < 4; i++) { + outputStridesFloor[i] = outputStridesFloor[i - 1] * outputSizeFloor[i - 1]; + outputStridesCeil[i] = outputStridesCeil[i - 1] * outputSizeCeil[i - 1]; + } + + if (outputSizeCeil[0] == outputSizeFloor[0] && + outputSizeCeil[1] == outputSizeFloor[1]) + this->ceilMode = true; + + // create usr layout. + if (this->ceilMode) { + this->output->createUsrLayout(dimension, outputSizeCeil, outputStridesCeil); + this->gradOutput->createUsrLayout(dimension, outputSizeCeil, + outputStridesCeil); + } else { + this->output->createUsrLayout(dimension, outputSizeFloor, + outputStridesFloor); + this->gradOutput->createUsrLayout(dimension, outputSizeFloor, + outputStridesFloor); + } +} + +template +void MKLPooling::updateOutput(DType *input, DType *output) +{ + dnnError_t status = E_UNIMPLEMENTED; + dnnLayout_t layout = NULL; + +// It's very stange, the address of input changes every time. +#ifdef DEBUG + if (this->input->getUsrData() && this->input->getUsrData() != input) + LOG(DBG) << "the address of input is not the same with preserved."; +#endif + + if (this->isFirstPass) { + status = dnnLayoutCreate(&layout, this->dimension, this->inputSize, + this->inputStrides); + CHECK_EQ(status, E_SUCCESS); + + // forward + status = dnnPoolingCreateForward(&(this->forwardPrim), NULL, + algorithm, layout, kernelSize, + stride, pad, dnnBorderZeros); + CHECK_EQ(status, E_SUCCESS); + this->input->createMklLayout(this->forwardPrim, dnnResourceSrc); + this->output->createMklLayout(this->forwardPrim, dnnResourceDst); + this->workspace->createMklLayout(this->forwardPrim, dnnResourceWorkspace); + this->workspace->createConversion(true); + + // backward + status = dnnPoolingCreateBackward(&(this->backwardPrim), NULL, + algorithm, layout, kernelSize, + stride, pad, dnnBorderZeros); + CHECK_EQ(status, E_SUCCESS); + + this->gradInput->createMklLayout(this->backwardPrim, dnnResourceDiffSrc); + this->gradOutput->createMklLayout(this->backwardPrim, dnnResourceDiffDst); + dnnLayoutDelete(layout); + + // the first pass we only create the layout, primitive, which are only + // created the first time and not change. + this->isFirstPass = false; + } + + // Because the address will change every time, so we need create conversion + // every forward/backward. + this->input->setUsrData(input); + this->input->createConversion(); + + this->output->setUsrData(output); + this->output->createConversion(!(ceilMode)); + + void *resources[dnnResourceNumber]; + resources[dnnResourceSrc] = this->input->getConvertedData(); + resources[dnnResourceDst] = this->output->getData(); + resources[dnnResourceWorkspace] = this->workspace->getData(); + + PERFSTART(); + status = dnnExecute(this->forwardPrim, resources); + CHECK_EQ(status, E_SUCCESS); + PERFEND("main computing"); + +#ifdef DEBUG + printData(reinterpret_cast(this->output->getUsrData()), + outputSizeCeil[3], outputSizeCeil[2], outputSizeCeil[1], + outputSizeCeil[0], + "Pooling forward output data generated by MKL2017"); +#endif + + if (!this->output->isUseNext()) { + if (ceilMode) { + this->output->backToUsr(); + } else { + this->output->cutLastRowColumn(outputStridesCeil, outputSizeFloor, + outputStridesFloor); + } + } + +#ifdef DEBUG + printData(reinterpret_cast(this->output->getUsrData()), + outputSizeFloor[3], outputSizeFloor[2], outputSizeFloor[1], + outputSizeCeil[0], + "Pooling forward output data generated by MKL2017"); +#endif +} + +template +void MKLPooling::updateGradInput(DType *input, DType *gradOutput, + DType *gradInput) +{ +#ifdef DEBUG + LOG(DBG) << "gradOutput = " << gradOutput + << " dataUsr = " << this->gradOutput->getUsrData(); +#endif + + // Because the address will change every time, so we need create conversion + // every forward/backward. + this->gradInput->setUsrData(gradInput); + this->gradInput->createConversion(); + + this->gradOutput->setUsrData(gradOutput); + this->gradOutput->createConversion(!(ceilMode)); + + if (!ceilMode) + this->gradOutput->padLastRowColumn(outputSizeFloor, outputStridesFloor, + outputSizeCeil, outputStridesCeil); + + void *resources[dnnResourceNumber]; + resources[dnnResourceDiffDst] = this->gradOutput->getConvertedData(); + resources[dnnResourceDiffSrc] = this->gradInput->getData(); + resources[dnnResourceWorkspace] = this->workspace->getData(); + + dnnError_t status; + PERFSTART(); + status = dnnExecute(this->backwardPrim, resources); + CHECK_EQ(status, E_SUCCESS); + PERFEND("main computing"); + + if (!this->gradInput->isUsePrev()) this->gradInput->backToUsr(); +} + +template +jlong JNIPoolingInit(jint inputNumber, jint inputChannel, jint inputHeight, + jint inputWidth, jint kernelHeight, jint kernelWidth, + jint strideHeight, jint strideWidth, jint padHeight, + jint padWidth, jint dimension, jint ceilMode, jint pAl) +{ + MKLPooling *pool = new MKLPooling(); + pool->init(inputNumber, inputChannel, inputHeight, inputWidth, kernelHeight, + kernelWidth, strideHeight, strideWidth, padHeight, padWidth, + dimension, ceilMode, static_cast(pAl)); + + return reinterpret_cast(pool); +} + +template +void JNIPoolingUpdateOutput(JNIEnv *env, jclass thisClass, ArrayType input, + jint inputOffset, ArrayType output, + jint outputOffset, long classPtr) +{ + DType *jInputStart = + reinterpret_cast(env->GetPrimitiveArrayCritical(input, 0)); + DType *jOutputStart = + reinterpret_cast(env->GetPrimitiveArrayCritical(output, 0)); + + DType *jInput = jInputStart + inputOffset; + DType *jOutput = jOutputStart + outputOffset; + + MKLPooling *ptr = reinterpret_cast *>(classPtr); + ptr->updateOutput(jInput, jOutput); + + env->ReleasePrimitiveArrayCritical(input, jInputStart, 0); + env->ReleasePrimitiveArrayCritical(output, jOutputStart, 0); +} + +template +void JNIPoolingUpdateGradInput(JNIEnv *env, jclass thisClass, ArrayType input, + jint inputOffset, ArrayType outputDiff, + jint outputDiffOffset, ArrayType inputDiff, + jint inputDiffOffset, long classPtr) +{ + DType *jInputStart = + reinterpret_cast(env->GetPrimitiveArrayCritical(input, 0)); + DType *jOutputDiffStart = + reinterpret_cast(env->GetPrimitiveArrayCritical(outputDiff, 0)); + DType *jInputDiffStart = + reinterpret_cast(env->GetPrimitiveArrayCritical(inputDiff, 0)); + + DType *jInput = jInputStart + inputOffset; + DType *jOutputDiff = jOutputDiffStart + outputDiffOffset; + DType *jInputDiff = jInputDiffStart + inputDiffOffset; + + MKLPooling *ptr = reinterpret_cast *>(classPtr); + ptr->updateGradInput(jInput, jOutputDiff, jInputDiff); + + env->ReleasePrimitiveArrayCritical(input, jInputStart, 0); + env->ReleasePrimitiveArrayCritical(outputDiff, jOutputDiffStart, 0); + env->ReleasePrimitiveArrayCritical(inputDiff, jInputDiffStart, 0); +} + +// Macro +#define PoolingInit(DType, JType, JArrayType) \ + JNIEXPORT \ + jlong JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_PoolingInit##DType( \ + JNIEnv *env, jclass thisClass, jint inputNumber, jint inputChannel, \ + jint inputHeight, jint inputWidth, jint kernelHeight, jint kernelWidth, \ + jint strideHeight, jint strideWidth, jint padHeight, jint padWidth, \ + jint dimension, jint ceilMode, jint pAl) \ + { \ + return JNIPoolingInit( \ + inputNumber, inputChannel, inputHeight, inputWidth, kernelHeight, \ + kernelWidth, strideHeight, strideWidth, padHeight, padWidth, \ + dimension, ceilMode, pAl); \ + } + +#define PoolingForward(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_PoolingForward##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType output, jint outputOffset, long classPtr) \ + { \ + JNIPoolingUpdateOutput( \ + env, thisClass, input, inputOffset, output, outputOffset, classPtr); \ + } + +#define PoolingBackward(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL \ + Java_com_intel_analytics_sparkdl_mkl_MKL_PoolingBackward##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType outputDiff, jint outputDiffOffset, JArrayType inputDiff, \ + jint inputDiffOffset, long classPtr) \ + { \ + JNIPoolingUpdateGradInput( \ + env, thisClass, input, inputOffset, outputDiff, outputDiffOffset, \ + inputDiff, inputDiffOffset, classPtr); \ + } + +#ifdef __cplusplus +extern "C" { +#endif + +// Double +PoolingInit(Double, jdouble, jdoubleArray) + PoolingForward(Double, jdouble, jdoubleArray) + PoolingBackward(Double, jdouble, jdoubleArray) + + // Float + PoolingInit(Float, jfloat, jfloatArray) + PoolingForward(Float, jfloat, jfloatArray) + PoolingBackward(Float, jfloat, jfloatArray) + +#ifdef __cplusplus +} +#endif diff --git a/mkl/native/src/main/c/jni/relu.cpp b/mkl/native/src/main/c/jni/relu.cpp new file mode 100644 index 00000000000..ad51a695b32 --- /dev/null +++ b/mkl/native/src/main/c/jni/relu.cpp @@ -0,0 +1,288 @@ +#include + +#include "debug.h" +#include "layer.h" +#include "memory.h" +#include "utils.h" + +template +class MKLReLU : public MKLLayer +{ + public: + MKLReLU(); + ~MKLReLU(); + + void init(size_t inputNumber, size_t inputChannel, size_t inputHeight, + size_t inputWidth, int dimension); + + void updateOutput(DType *input, DType *output); + void updateGradInput(DType *input, DType *gradOutput, DType *gradInput); + + private: + // this method is not the same as createMklLayout in MKLMemory + void firstPass(); + void preExecute(DType *input); + + size_t inputSize[4]; + size_t inputStrides[4]; + + size_t outputSize[4]; + size_t outputStrides[4]; + + DType nagtiveSlope; +}; + +template +MKLReLU::MKLReLU() +{ + nagtiveSlope = static_cast(0.0); +} + +template +MKLReLU::~MKLReLU() +{ +} + +template +void MKLReLU::init(size_t inputNumber, size_t inputChannel, + size_t inputHeight, size_t inputWidth, int dimension) +{ + this->dimension = dimension; + + inputSize[0] = inputWidth; + inputSize[1] = inputHeight; + inputSize[2] = inputChannel; + inputSize[3] = inputNumber; + + inputStrides[0] = 1; + for (int i = 1; i < 4; i++) + inputStrides[i] = inputStrides[i - 1] * inputSize[i - 1]; + + // the output channel is as same as the number of kernel. + // and the output number must be as same as the number of input too. + outputSize[0] = inputWidth; + outputSize[1] = inputHeight; + outputSize[2] = inputChannel; + outputSize[3] = inputNumber; + + outputStrides[0] = 1; + for (int i = 1; i < 4; i++) + outputStrides[i] = outputStrides[i - 1] * outputSize[i - 1]; + + // create usr layout + this->input->createUsrLayout(dimension, inputSize, inputStrides); + this->output->createUsrLayout(dimension, outputSize, outputStrides); + + this->gradInput->createUsrLayout(dimension, inputSize, inputStrides); + this->gradOutput->createUsrLayout(dimension, outputSize, outputStrides); +} + +template +void MKLReLU::firstPass() +{ + dnnError_t status = E_UNIMPLEMENTED; + dnnLayout_t layout; + + status = + dnnLayoutCreate(&layout, this->dimension, inputSize, inputStrides); + CHECK_EQ(status, E_SUCCESS); + + // forward + status = dnnReLUCreateForward(&(this->forwardPrim), NULL, layout, + nagtiveSlope); + CHECK_EQ(status, E_SUCCESS); + + this->input->createMklLayout(this->forwardPrim, dnnResourceSrc); + this->output->createMklLayout(this->forwardPrim, dnnResourceDst); + + // backward data + // the input layout is as same as input diff layout + status = dnnReLUCreateBackward(&(this->backwardPrim), NULL, layout, + layout, nagtiveSlope); + CHECK_EQ(status, E_SUCCESS); + + this->gradOutput->createMklLayout(this->backwardPrim, dnnResourceDiffDst); + this->gradInput->createMklLayout(this->backwardPrim, dnnResourceDiffSrc); + + // we create the layout only at the first time + this->isFirstPass = false; +} + +template +void MKLReLU::preExecute(DType *input) +{ + this->input->createConversion(); +} + +template +void MKLReLU::updateOutput(DType *input, DType *output) +{ + if (this->isFirstPass) firstPass(); + + // Because the address will change every time, so we need create conversion + // every forward/backward. + // TODO Should we set the kernel and bias address every time? + preExecute(input); + this->output->createConversion(); + +#ifdef DEBUG + printData(reinterpret_cast(this->input->getUsrData()), + this->inputSize[3], this->inputSize[2], this->inputSize[1], + this->inputSize[0], "Forward input"); +#endif + + dnnError_t status; + void *resources[dnnResourceNumber]; + + resources[dnnResourceSrc] = this->input->getConvertedData(); + resources[dnnResourceDst] = this->output->getData(); + + PERFSTART(); + status = dnnExecute(this->forwardPrim, resources); + PERFEND("main computing"); + CHECK_EQ(status, E_SUCCESS); + +#ifdef DEBUG + printData(reinterpret_cast(this->output->getData()), + outputSize[3], outputSize[2], outputSize[1], outputSize[0], + "Forward output"); +#endif + + if (!this->output->isUseNext()) { + this->output->backToUsr(); + } +} + +template +void MKLReLU::updateGradInput(DType *input, DType *gradOutput, + DType *gradInput) +{ + dnnError_t status; + void *resources[dnnResourceNumber]; + + preExecute(input); + + this->gradOutput->createConversion(); + this->gradInput->createConversion(); + + resources[dnnResourceDiffDst] = this->gradOutput->getConvertedData(); + resources[dnnResourceDiffSrc] = this->gradInput->getData(); + resources[dnnResourceSrc] = this->input->getConvertedData(); + + // 4. main computing parts. + PERFSTART(); + status = dnnExecute(this->backwardPrim, resources); + CHECK_EQ(status, E_SUCCESS); + PERFEND("main computing"); + + if (!this->gradInput->isUsePrev()) { + this->gradInput->backToUsr(); + } + +#ifdef DEBUG + printData(reinterpret_cast(this->gradInput->getUsrData()), + inputSize[3], inputSize[2], inputSize[1], inputSize[0], + "backward gradient input"); +#endif +} + +template +jlong JNIReLUInit(JNIEnv *env, jclass thisClass, jint inputNumber, + jint inputChannel, jint inputHeight, jint inputWidth, + jint dimension) +{ + MKLReLU *ptr = new MKLReLU(); + ptr->init(inputNumber, inputChannel, inputHeight, inputWidth, dimension); + + return reinterpret_cast(ptr); +} + +template +void JNIReLUUpdateOutput(JNIEnv *env, jclass thisClass, ArrayType input, + jint inputOffset, ArrayType output, jint outputOffset, + long classPtr) +{ + MKLReLU *ptr = reinterpret_cast *>(classPtr); + + std::shared_ptr> jInput( + new ZipArray(env, input, inputOffset, ptr->input)); + + std::shared_ptr> jOutput( + new ZipArray(env, output, outputOffset, ptr->output)); + + ptr->updateOutput(jInput->getPtr(), jOutput->getPtr()); +} + +template +void JNIReLUUpdateGradInput(JNIEnv *env, jclass thisClass, ArrayType input, + jint inputOffset, ArrayType outputDiff, + jint outputDiffOffset, ArrayType inputDiff, + jint inputDiffOffset, long classPtr) +{ + MKLReLU *ptr = reinterpret_cast *>(classPtr); + std::shared_ptr> jInput( + new ZipArray(env, input, inputOffset, ptr->input)); + + std::shared_ptr> jOutputDiff( + new ZipArray(env, outputDiff, outputDiffOffset, + ptr->gradOutput)); + + std::shared_ptr> jInputDiff( + new ZipArray(env, inputDiff, inputDiffOffset, + ptr->gradInput)); + + ptr->updateGradInput(jInput->getPtr(), jOutputDiff->getPtr(), + jInputDiff->getPtr()); +} + +// Macro +#define ReLUInit(DType, JType, JArrayType) \ + JNIEXPORT \ + jlong JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_ReLUInit##DType( \ + JNIEnv *env, jclass thisClass, jint inputNumber, jint inputChannel, \ + jint inputHeight, jint inputWidth, jint dimension) \ + { \ + return JNIReLUInit(env, thisClass, inputNumber, \ + inputChannel, inputHeight, \ + inputWidth, dimension); \ + } + +#define ReLUForward(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_ReLUForward##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType output, jint outputOffset, long classPtr) \ + { \ + JNIReLUUpdateOutput(env, thisClass, input, inputOffset, \ + output, outputOffset, classPtr); \ + } + +#define ReLUBackward(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_ReLUBackward##DType( \ + JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \ + JArrayType outputDiff, jint outputDiffOffset, JArrayType inputDiff, \ + jint inputDiffOffset, long classPtr) \ + { \ + JNIReLUUpdateGradInput( \ + env, thisClass, input, inputOffset, outputDiff, outputDiffOffset, \ + inputDiff, inputDiffOffset, classPtr); \ + } + +#ifdef __cplusplus +extern "C" { +#endif + +// double +ReLUInit(Double, jdouble, jdoubleArray); +ReLUForward(Double, jdouble, jdoubleArray); +ReLUBackward(Double, jdouble, jdoubleArray); + +// float +ReLUInit(Float, jfloat, jfloatArray); +ReLUForward(Float, jfloat, jfloatArray); +ReLUBackward(Float, jfloat, jfloatArray); + +#ifdef __cplusplus +} +#endif diff --git a/mkl/native/src/main/c/jni/utils.cpp b/mkl/native/src/main/c/jni/utils.cpp new file mode 100644 index 00000000000..3e1a8381c2d --- /dev/null +++ b/mkl/native/src/main/c/jni/utils.cpp @@ -0,0 +1,45 @@ +#include "utils.h" +#include +#include +#include + +#if 0 +int computeOut(int input, int pad, int kernel, int stride) +{ + // if (((input + 2 * pad - kernel) % stride) != 0) + // printf("%d %d %d %d\n", input, pad, kernel, stride); + // TODO Should we substitute with ceil or floor when compute the output? + //std::cout << static_cast(ceil(static_cast((input + 2 * pad - kernel) / stride) + 1)) << std::endl; + //std::cout << ((input + 2 * pad - kernel) / stride) + 1 << std::endl; + //return static_cast(floor(static_cast((input + 2 * pad - kernel) / stride) + 1)); + // return static_cast( + // static_cast((input + 2 * pad - kernel) / stride) + 1); + //return ((input + 2 * pad - kernel) / stride) + 1; + int tmp = ((input + 2 * pad - kernel) / stride) + 1; + //if (((input + 2 * pad - kernel) % stride) != 0) + // tmp += 1; + return tmp; +} +#endif + +int computeOut(int input, int pad, int kernel, int stride, bool ceilMode) +{ + if (ceilMode) { + return static_cast(ceil(static_cast( + input + 2 * pad - kernel) / stride)) + 1; + } else { + return static_cast(floor(static_cast( + input + 2 * pad - kernel) / stride)) + 1; + } +} + +int main() +{ + std::cout << computeOut(4, 0, 3, 2, true); + std::cout << computeOut(4, 0, 3, 2, false); + + std::cout << computeOut(3, 1, 2, 1, true); + std::cout << computeOut(3, 1, 2, 1, false); + + return 0; +} diff --git a/mkl/native/src/main/c/jni/utils.h b/mkl/native/src/main/c/jni/utils.h new file mode 100644 index 00000000000..117bfef15f2 --- /dev/null +++ b/mkl/native/src/main/c/jni/utils.h @@ -0,0 +1,7 @@ +#ifndef _UTILS_H_ +#define _UTILS_H_ + +int computeOut(int input, int pad, int kernle, int stride, + bool ceilMode = false); + +#endif From 2886ed2f208a3fecf65251726c2984d731a73e85 Mon Sep 17 00:00:00 2001 From: Wang Yanzhang Date: Tue, 20 Sep 2016 17:40:22 +0800 Subject: [PATCH 02/12] delete the unused codes --- .../sparkdl/nn/mkl/BatchNormalization.scala | 7 ------- .../com/intel/analytics/sparkdl/nn/mkl/Linear.scala | 13 ------------- .../nn/mkl/LocalNormalizationAcrossChannels.scala | 2 -- 3 files changed, 22 deletions(-) diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/BatchNormalization.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/BatchNormalization.scala index 6a1f9dee787..e6264c860f6 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/BatchNormalization.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/BatchNormalization.scala @@ -27,13 +27,6 @@ class SpatialBatchNormalization[@specialized(Float, Double) T:ClassTag] (val nOu val saveMean = Tensor[T](nOutput) val saveStd = Tensor[T](nOutput).fill(ev.fromType[Int](1)) - private var prevLayout : Array[Long] = Array() - private var nextLayout : Array[Long] = Array() - private var usePrev = false - private var useNext = false - private var forNext = false - private var forPrev = false - private var classPtr = 0L private var firstPass = true diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Linear.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Linear.scala index ec7455b8f1b..947d16892b9 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Linear.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Linear.scala @@ -27,24 +27,11 @@ class Linear[@specialized(Float, Double) T: ClassTag]( reset() - // this is pointer to the layout of MKL used internal and the memory is allocated in native code. - // the magic codes are: - // layoutMKL(0) -> input - // layoutMKL(1) -> inputDiff / gradInput - // layoutMKL(2) -> output - // layoutMKL(3) -> outputDiff - // layoutMKL(4) -> kernel / filter - // layoutMKL(5) -> kernelDiff / gradWeight - // layoutMKL(6) -> bias - // layoutMKL(7) -> biasDiff / gradBias - val layoutMKL = Array.fill[Long](8)(-1) - def setInitMethod(initMethod : InitializationMethod) : this.type = { this.initMethod = initMethod this } - override def reset(): Unit ={ initMethod match { case Default => diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/LocalNormalizationAcrossChannels.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/LocalNormalizationAcrossChannels.scala index 7b5fff5544c..bcb29736669 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/LocalNormalizationAcrossChannels.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/LocalNormalizationAcrossChannels.scala @@ -27,8 +27,6 @@ class LocalNormalizationAcrossChannels[@specialized(Float, Double) T: ClassTag] var classPtr = 0L private var firstPass = true - val layoutMKL = Array.fill[Long](8)(-1) - override def getClassPtr(): Long = classPtr override def equals(obj: Any): Boolean = { From 8157e8b407517af07f91a63f49753b80b3620384 Mon Sep 17 00:00:00 2001 From: Wang Yanzhang Date: Tue, 20 Sep 2016 19:35:33 +0800 Subject: [PATCH 03/12] support for cancel the data conversion between two mkl layers --- .../analytics/sparkdl/nn/Container.scala | 20 +++++++++++++++++++ .../intel/analytics/sparkdl/nn/Module.scala | 4 ++++ 2 files changed, 24 insertions(+) diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Container.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Container.scala index 40b73ac80be..0827c05a3be 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Container.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Container.scala @@ -19,6 +19,7 @@ package com.intel.analytics.sparkdl.nn import com.intel.analytics.sparkdl.tensor.Tensor import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric +import com.intel.analytics.sparkdl.mkl.MKL import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag @@ -93,4 +94,23 @@ private[nn] abstract class Container[@specialized(Float, Double) T: ClassTag]( }) (result, offset, newIndexes) } + + override def initMkl() : Unit = { + def containMkl(module : Module[T]) : Boolean = { + return if (module.toString.startsWith("mkl.")) true else false + } + + for (i <- 0 until modules.length) { + if (containMkl(modules(i))) { + if (i >= 1 && containMkl(modules(i - 1))) { + ev.getType() match { + case "Float" => MKL.SetPrevFloat(modules(i - 1).getClassPtr(), modules(i).getClassPtr()) + case "Double" => MKL.SetPrevDouble(modules(i - 1).getClassPtr(), modules(i).getClassPtr()) + } + } + } else { + modules(i).initMkl() + } + } + } } diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Module.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Module.scala index 026cc3e3b69..a874a5ee90c 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Module.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Module.scala @@ -199,6 +199,10 @@ abstract class Module[T: ClassTag](implicit ev: TensorNumeric[T]) extends Serial def cloneModule(): Module[T] = { SerializationUtils.clone(this) } + + // Support for mkl init. + def getClassPtr() : Long = {0L} + def initMkl() : Unit = {} } object Module { From fafbcd406aec7c3020d6dc193f1a98d523033927 Mon Sep 17 00:00:00 2001 From: Wang Yanzhang Date: Tue, 20 Sep 2016 20:14:00 +0800 Subject: [PATCH 04/12] fix the codestyle of scala source code --- .../analytics/sparkdl/nn/Container.scala | 6 +- .../sparkdl/nn/mkl/BatchNormalization.scala | 208 +++++---- .../analytics/sparkdl/nn/mkl/Linear.scala | 296 ++++++++----- .../LocalNormalizationAcrossChannels.scala | 187 ++++---- .../analytics/sparkdl/nn/mkl/Pooling.scala | 255 ++++++----- .../intel/analytics/sparkdl/nn/mkl/ReLU.scala | 147 ++++--- .../sparkdl/nn/mkl/SpatialConvolution.scala | 406 +++++++++++------- .../sparkdl/nn/mkl/GoogLeNetSpec.scala | 27 ++ 8 files changed, 916 insertions(+), 616 deletions(-) create mode 100644 dl/src/test/scala/com/intel/analytics/sparkdl/nn/mkl/GoogLeNetSpec.scala diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Container.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Container.scala index 0827c05a3be..a90cf9b0187 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Container.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Container.scala @@ -104,8 +104,10 @@ private[nn] abstract class Container[@specialized(Float, Double) T: ClassTag]( if (containMkl(modules(i))) { if (i >= 1 && containMkl(modules(i - 1))) { ev.getType() match { - case "Float" => MKL.SetPrevFloat(modules(i - 1).getClassPtr(), modules(i).getClassPtr()) - case "Double" => MKL.SetPrevDouble(modules(i - 1).getClassPtr(), modules(i).getClassPtr()) + case "Float" => MKL.SetPrevFloat(modules(i - 1).getClassPtr(), + modules(i).getClassPtr()) + case "Double" => MKL.SetPrevDouble(modules(i - 1).getClassPtr(), + modules(i).getClassPtr()) } } } else { diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/BatchNormalization.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/BatchNormalization.scala index e6264c860f6..6eebabdc02c 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/BatchNormalization.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/BatchNormalization.scala @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.intel.analytics.sparkdl.nn.mkl import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric @@ -10,16 +27,15 @@ import scala.language.implicitConversions import scala.reflect.ClassTag -/** - * Created by wyz on 16-9-5. - */ -class SpatialBatchNormalization[@specialized(Float, Double) T:ClassTag] (val nOutput: Int, - val eps: Double = 1e-5, - val momentum: Double = 0.1, - val affine: Boolean = true) - (implicit ev: TensorNumeric[T]) extends Module[T] { +class SpatialBatchNormalization[@specialized(Float, Double) T: ClassTag]( + val nOutput: Int, + val eps: Double = 1e-5, + val momentum: Double = 0.1, + val affine: Boolean = true)(implicit ev: TensorNumeric[T]) + extends Module[T] { - require(nOutput > 0, "To set affine=false call SpatialBatchNormalization(nFeature, eps, momentum, false)") + require(nOutput > 0, + "To set affine=false call SpatialBatchNormalization(nFeature, eps, momentum, false)") val nDim = 2 val runningMean = Tensor[T](nOutput) @@ -29,7 +45,7 @@ class SpatialBatchNormalization[@specialized(Float, Double) T:ClassTag] (val nOu private var classPtr = 0L private var firstPass = true - + override def getClassPtr(): Long = classPtr val weight: Tensor[T] = if (affine) Tensor[T](nOutput) else null @@ -37,8 +53,8 @@ class SpatialBatchNormalization[@specialized(Float, Double) T:ClassTag] (val nOu gradWeight = if (affine) Tensor[T](nOutput) else null gradBias = if (affine) Tensor[T](nOutput) else null - val useWeight : Boolean = if (weight != null) true else false - val useBias : Boolean = if (bias != null) true else false + val useWeight: Boolean = if (weight != null) true else false + val useBias: Boolean = if (bias != null) true else false if (affine) { reset() @@ -57,69 +73,78 @@ class SpatialBatchNormalization[@specialized(Float, Double) T:ClassTag] (val nOu runningVar.fill(ev.fromType[Int](1)) } - def checkInputDim(input : Tensor[T]): Unit ={ - require(input.dim() == nDim, s"only mini-batch supported (${nDim}D tensor), got ${input.dim()}D tensor instead") - require(input.size(2) == runningMean.nElement(), s"got ${input.size(2)}-feature tensor, expected ${runningMean.nElement()}") + def checkInputDim(input: Tensor[T]): Unit = { + require(input.dim() == nDim, + s"only mini-batch supported (${nDim}D tensor), got ${input.dim()}D tensor instead") + require(input.size(2) == runningMean.nElement(), + s"got ${input.size(2)}-feature tensor, expected ${runningMean.nElement()}") } - override def updateOutput(input : Tensor[T]) : Tensor[T] = { - //checkInputDim(input) - + override def updateOutput(input: Tensor[T]): Tensor[T] = { output.resizeAs(input) - //saveMean.resizeAs(runningMean) - //saveStd.resizeAs(runningVar) - val inputOffset = input.storageOffset() - 1; + val inputOffset = input.storageOffset() - 1; val outputOffset = output.storageOffset() - 1; - // +---------+-------+-------+ - // | | 3-dim | 4-dim | - // +=========+=======+=======+ - // | Number | ? | 1 | - // +---------+-------+-------+ - // | Channel | 1 | 2 | - // +---------+-------+-------+ - // | Height | 2 | 3 | - // +---------+-------+-------+ - // | Width | 3 | 4 | - // +---------+-------+-------+ - // Table: Index of 3-dim/4-dim input - - val inputWidth = input.size(input.dim()) - val inputHeight = input.size(input.dim() - 1) + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) val inputChannel = if (input.dim() <= 2) 1 else input.size(input.dim() - 2) - val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3) + val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3) // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 val kernelOffset = weight.storageOffset() - 1 val biasOffset = bias.storageOffset() - 1 - implicit def bool2int(b:Boolean) = if (b) 1 else 0 + implicit def bool2int(b: Boolean) = if (b) 1 else 0 if (firstPass) { ev.getType() match { - case "Float" => classPtr = MKL.BatchNormInitFloat( - inputNumber, inputChannel, inputHeight, inputWidth, - eps, useWeight, useBias, 4) - case "Double" => classPtr = MKL.BatchNormInitDouble( - inputNumber, inputChannel, inputHeight, inputWidth, - eps, useBias, useBias, 4) - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case "Float" => + classPtr = MKL.BatchNormInitFloat(inputNumber, + inputChannel, + inputHeight, + inputWidth, + eps, + useWeight, + useBias, + 4) + case "Double" => + classPtr = MKL.BatchNormInitDouble(inputNumber, + inputChannel, + inputHeight, + inputWidth, + eps, + useBias, + useBias, + 4) + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } firstPass = false } ev.getType() match { - case "Float" => MKL.BatchNormForwardFloat( - input.storage().array().asInstanceOf[Array[Float]], inputOffset, - output.storage().array().asInstanceOf[Array[Float]], outputOffset, - weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Float]], biasOffset, classPtr) - case "Double" => MKL.BatchNormForwardDouble( - input.storage().array().asInstanceOf[Array[Double]], inputOffset, - output.storage().array().asInstanceOf[Array[Double]], outputOffset, - weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Double]], biasOffset, classPtr) - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case "Float" => + MKL.BatchNormForwardFloat(input.storage().array().asInstanceOf[Array[Float]], + inputOffset, + output.storage().array().asInstanceOf[Array[Float]], + outputOffset, + weight.storage().array().asInstanceOf[Array[Float]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], + biasOffset, + classPtr) + case "Double" => + MKL.BatchNormForwardDouble(input.storage().array().asInstanceOf[Array[Double]], + inputOffset, + output.storage().array().asInstanceOf[Array[Double]], + outputOffset, + weight.storage().array().asInstanceOf[Array[Double]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], + biasOffset, + classPtr) + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } output } @@ -127,26 +152,13 @@ class SpatialBatchNormalization[@specialized(Float, Double) T:ClassTag] (val nOu override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { gradInput.resizeAs(input) - val inputOffset = input.storageOffset() - 1; + val inputOffset = input.storageOffset() - 1; val outputOffset = output.storageOffset() - 1; - // +---------+-------+-------+ - // | | 3-dim | 4-dim | - // +=========+=======+=======+ - // | Number | ? | 1 | - // +---------+-------+-------+ - // | Channel | 1 | 2 | - // +---------+-------+-------+ - // | Height | 2 | 3 | - // +---------+-------+-------+ - // | Width | 3 | 4 | - // +---------+-------+-------+ - // Table: Index of 3-dim/4-dim input - - val inputWidth = input.size(input.dim()) - val inputHeight = input.size(input.dim() - 1) + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) val inputChannel = if (input.dim() <= 2) 1 else input.size(input.dim() - 2) - val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3) + val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3) // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 val kernelOffset = weight.storageOffset() - 1 @@ -156,41 +168,53 @@ class SpatialBatchNormalization[@specialized(Float, Double) T:ClassTag] (val nOu val biasDiffOffset = gradBias.storageOffset() - 1 val gradOutputOffset = gradOutput.storageOffset() - 1 - val gradInputOffset = gradInput.storageOffset() -1 + val gradInputOffset = gradInput.storageOffset() - 1 - implicit def bool2int(b:Boolean) = if (b) 1 else 0 + implicit def bool2int(b: Boolean) = if (b) 1 else 0 ev.getType() match { - case "Float" => MKL.BatchNormBackwardFloat( - input.storage().array().asInstanceOf[Array[Float]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, - gradInput.storage().array().asInstanceOf[Array[Float]], gradInputOffset, - gradWeight.storage().array().asInstanceOf[Array[Float]], kernelDiffOffset, - gradBias.storage().array().asInstanceOf[Array[Float]], biasDiffOffset, classPtr) - case "Double" => MKL.BatchNormBackwardDouble( - input.storage().array().asInstanceOf[Array[Double]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, - gradInput.storage().array().asInstanceOf[Array[Double]], gradInputOffset, - gradWeight.storage().array().asInstanceOf[Array[Double]], kernelDiffOffset, - gradBias.storage().array().asInstanceOf[Array[Double]], biasDiffOffset, classPtr) - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case "Float" => + MKL.BatchNormBackwardFloat(input.storage().array().asInstanceOf[Array[Float]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], + gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Float]], + gradInputOffset, + gradWeight.storage().array().asInstanceOf[Array[Float]], + kernelDiffOffset, + gradBias.storage().array().asInstanceOf[Array[Float]], + biasDiffOffset, + classPtr) + case "Double" => + MKL.BatchNormBackwardDouble(input.storage().array().asInstanceOf[Array[Double]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], + gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Double]], + gradInputOffset, + gradWeight.storage().array().asInstanceOf[Array[Double]], + kernelDiffOffset, + gradBias.storage().array().asInstanceOf[Array[Double]], + biasDiffOffset, + classPtr) + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } gradInput } - override def accGradParameters(input: Tensor[T], gradOutput: Tensor[T], scale : Double): Unit = { - } + override def accGradParameters(input: Tensor[T], gradOutput: Tensor[T], scale: Double): Unit = {} override def zeroGradParameters(): Unit = { gradWeight.zero() gradBias.zero() } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) ={ + override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias)) } - override def toString(): String ={ + override def toString(): String = { s"mkl.BatchNormalization[${ev.getType()}]($nOutput, $eps, $momentum, $affine)" } } diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Linear.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Linear.scala index 947d16892b9..f049b31cff7 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Linear.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Linear.scala @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.intel.analytics.sparkdl.nn.mkl import com.intel.analytics.sparkdl.mkl.MKL @@ -9,15 +26,16 @@ import com.intel.analytics.sparkdl.tensor.Tensor import scala.reflect.ClassTag class Linear[@specialized(Float, Double) T: ClassTag]( - inputSize: Int, - outputSize:Int, - val needCompute : Boolean = true, - private var initMethod : InitializationMethod = Default -)(implicit ev: TensorNumeric[T]) extends Module[T]{ - val weight: Tensor[T] = Tensor[T](outputSize,inputSize) + inputSize: Int, + outputSize: Int, + val needCompute: Boolean = true, + private var initMethod: InitializationMethod = Default +)(implicit ev: TensorNumeric[T]) + extends Module[T] { + val weight: Tensor[T] = Tensor[T](outputSize, inputSize) val bias: Tensor[T] = Tensor[T](outputSize) val addBuffer: Tensor[T] = Tensor[T]() - this.gradWeight = Tensor[T](outputSize,inputSize) + this.gradWeight = Tensor[T](outputSize, inputSize) this.gradBias = Tensor[T](outputSize) private var classPtr = 0L @@ -27,43 +45,42 @@ class Linear[@specialized(Float, Double) T: ClassTag]( reset() - def setInitMethod(initMethod : InitializationMethod) : this.type = { + def setInitMethod(initMethod: InitializationMethod): this.type = { this.initMethod = initMethod this } - override def reset(): Unit ={ + override def reset(): Unit = { initMethod match { case Default => - val stdv = 1.0 /math.sqrt(weight.size(2)) - weight.apply1(_=> ev.fromType[Double](RNG.uniform(0,1)*2*stdv - stdv)) //todo, better to support uniform - bias.apply1(_ => ev.fromType[Double](RNG.uniform(0,1)*2*stdv - stdv)) + val stdv = 1.0 / math.sqrt(weight.size(2)) // todo, better to support uniform + weight.apply1(_ => ev.fromType[Double](RNG.uniform(0, 1) * 2 * stdv - stdv)) + bias.apply1(_ => ev.fromType[Double](RNG.uniform(0, 1) * 2 * stdv - stdv)) case Xavier => val fanIn = weight.size(2) val fanOut = weight.size(1) - val stdv = math.sqrt(3 / (fanIn + fanOut)) - weight.apply1(_=>ev.fromType[Double](RNG.uniform(0,1)*2*stdv - stdv)) //todo, better to support uniform + val stdv = math.sqrt(3 / (fanIn + fanOut)) // todo, better to support uniform + weight.apply1(_ => ev.fromType[Double](RNG.uniform(0, 1) * 2 * stdv - stdv)) bias.fill(ev.fromType(0)) - case _ => ??? + case _ => + throw new UnsupportedOperationException(s"Only Default / Xavier supported") } } - override def updateOutput(input: Tensor[T]): Tensor[T] ={ + override def updateOutput(input: Tensor[T]): Tensor[T] = { require(input.dim() == 2, "only batch mode supported") val inputWidth = input.size(input.dim()) val inputHeight = input.size(input.dim() - 1) - val nFrame = input.size(1) val nElement = output.nElement output.resize(Array(nFrame, bias.size(1))) - if(output.nElement() != nElement) - output.zero() + if (output.nElement() != nElement) { output.zero() } - val inputOffset = input.storageOffset() - 1 + val inputOffset = input.storageOffset() - 1 val outputOffset = output.storageOffset() - 1 - val biasOffset = bias.storageOffset() - 1 + val biasOffset = bias.storageOffset() - 1 val kernelOffset = weight.storageOffset() - 1 val kernelHeight = outputSize @@ -72,48 +89,60 @@ class Linear[@specialized(Float, Double) T: ClassTag]( if (firstPass) { ev.getType() match { - case "Double" => classPtr = MKL.LinearInitDouble(inputHeight, inputWidth, outputChannels, - kernelHeight, kernelWidth) - case "Float" => classPtr = MKL.LinearInitFloat(inputHeight, inputWidth, outputChannels, - kernelHeight, kernelWidth) - case _ => throw new UnsupportedOperationException(s"Only Float supported") + case "Double" => + classPtr = MKL + .LinearInitDouble(inputHeight, inputWidth, outputChannels, kernelHeight, kernelWidth) + case "Float" => + classPtr = + MKL.LinearInitFloat(inputHeight, inputWidth, outputChannels, kernelHeight, kernelWidth) + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } firstPass = false } ev.getType() match { - case "Double" => MKL.LinearForwardDouble( - input.storage().array().asInstanceOf[Array[Double]], inputOffset, - output.storage().array().asInstanceOf[Array[Double]], outputOffset, - weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Double]], biasOffset, - classPtr) - case "Float" => MKL.LinearForwardFloat( - input.storage().array().asInstanceOf[Array[Float]], inputOffset, - output.storage().array().asInstanceOf[Array[Float]], outputOffset, - weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Float]], biasOffset, - classPtr) - case _ => throw new UnsupportedOperationException(s"Only Float supported") + case "Double" => + MKL.LinearForwardDouble(input.storage().array().asInstanceOf[Array[Double]], + inputOffset, + output.storage().array().asInstanceOf[Array[Double]], + outputOffset, + weight.storage().array().asInstanceOf[Array[Double]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], + biasOffset, + classPtr) + case "Float" => + MKL.LinearForwardFloat(input.storage().array().asInstanceOf[Array[Float]], + inputOffset, + output.storage().array().asInstanceOf[Array[Float]], + outputOffset, + weight.storage().array().asInstanceOf[Array[Float]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], + biasOffset, + classPtr) + case _ => + throw new UnsupportedOperationException(s"Only Float supported") } output } - override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] ={ + override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { require(input.dim() == 2, "only batch mode supported") val nElement = gradInput.nElement() gradInput.resizeAs(input) - if(nElement != gradInput.nElement()) { + if (nElement != gradInput.nElement()) { gradInput.zero() } val inputWidth = input.size(input.dim()) val inputHeight = input.size(input.dim() - 1) - val inputOffset = input.storageOffset() - 1 + val inputOffset = input.storageOffset() - 1 val kernelOffset = weight.storageOffset() - 1 - val biasOffset = bias.storageOffset() - 1 + val biasOffset = bias.storageOffset() - 1 val gradOutputOffset = gradOutput.storageOffset() - 1 val gradInputOffset = gradInput.storageOffset() - 1 val gradWeightOffset = gradWeight.storageOffset() - 1 @@ -123,85 +152,121 @@ class Linear[@specialized(Float, Double) T: ClassTag]( val kernelWidth = inputSize val outputChannels = outputSize - if(needCompute) { + if (needCompute) { ev.getType() match { - case "Double" => MKL.LinearBackwardDataDouble( - input.storage().array().asInstanceOf[Array[Double]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, - gradInput.storage().array().asInstanceOf[Array[Double]], gradInputOffset, - weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Double]], biasOffset, classPtr) - case "Float" => MKL.LinearBackwardDataFloat( - input.storage().array().asInstanceOf[Array[Float]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, - gradInput.storage().array().asInstanceOf[Array[Float]], gradInputOffset, - weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Float]], biasOffset, classPtr) - case _ => throw new UnsupportedOperationException(s"Only Float supported") + case "Double" => + MKL.LinearBackwardDataDouble(input.storage().array().asInstanceOf[Array[Double]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], + gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Double]], + gradInputOffset, + weight.storage().array().asInstanceOf[Array[Double]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], + biasOffset, + classPtr) + case "Float" => + MKL.LinearBackwardDataFloat(input.storage().array().asInstanceOf[Array[Float]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], + gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Float]], + gradInputOffset, + weight.storage().array().asInstanceOf[Array[Float]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], + biasOffset, + classPtr) + case _ => + throw new UnsupportedOperationException(s"Only Float supported") } } ev.getType() match { - case "Double" => MKL.LinearBackwardKernelDouble( - input.storage().array().asInstanceOf[Array[Double]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, - gradWeight.storage().array().asInstanceOf[Array[Double]], gradWeightOffset, - weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Double]], biasOffset, - classPtr) - - case "Float" => MKL.LinearBackwardKernelFloat( - input.storage().array().asInstanceOf[Array[Float]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, - gradWeight.storage().array().asInstanceOf[Array[Float]], gradWeightOffset, - weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Float]], biasOffset, - classPtr) - - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case "Double" => + MKL.LinearBackwardKernelDouble(input.storage().array().asInstanceOf[Array[Double]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], + gradOutputOffset, + gradWeight.storage().array().asInstanceOf[Array[Double]], + gradWeightOffset, + weight.storage().array().asInstanceOf[Array[Double]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], + biasOffset, + classPtr) + + case "Float" => + MKL.LinearBackwardKernelFloat(input.storage().array().asInstanceOf[Array[Float]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], + gradOutputOffset, + gradWeight.storage().array().asInstanceOf[Array[Float]], + gradWeightOffset, + weight.storage().array().asInstanceOf[Array[Float]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], + biasOffset, + classPtr) + + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } ev.getType() match { - case "Double" => MKL.LinearBackwardBiasDouble( - input.storage().array().asInstanceOf[Array[Double]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, - gradBias.storage().array().asInstanceOf[Array[Double]], gradBiasOffset, - weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Double]], biasOffset, - classPtr) - - case "Float" => MKL.LinearBackwardBiasFloat( - input.storage().array().asInstanceOf[Array[Float]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, - gradBias.storage().array().asInstanceOf[Array[Float]], gradBiasOffset, - weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Float]], biasOffset, - classPtr) - - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case "Double" => + MKL.LinearBackwardBiasDouble(input.storage().array().asInstanceOf[Array[Double]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], + gradOutputOffset, + gradBias.storage().array().asInstanceOf[Array[Double]], + gradBiasOffset, + weight.storage().array().asInstanceOf[Array[Double]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], + biasOffset, + classPtr) + + case "Float" => + MKL.LinearBackwardBiasFloat(input.storage().array().asInstanceOf[Array[Float]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], + gradOutputOffset, + gradBias.storage().array().asInstanceOf[Array[Float]], + gradBiasOffset, + weight.storage().array().asInstanceOf[Array[Float]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], + biasOffset, + classPtr) + + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } gradInput } -// override def accGradParameters(input: Tensor[T], gradOutput: Tensor[T], scale: Double = 1.0): Unit ={ -// require(input.dim() == 2, "only batch mode supported") +// override def accGradParameters(input: Tensor[T], +// gradOutput: Tensor[T], +// scale: Double = 1.0): Unit = { +// require(input.dim() == 2, "only batch mode supported") // require(input.dim() == 1 || input.dim() == 2, "input must be vector or matrix") // val value = ev.fromType[Double](scale) -// if(input.dim() == 1) { +// if (input.dim() == 1) { // gradWeight.addr(value, gradOutput, input) // gradBias.add(value, gradOutput) -// } -// else if(input.dim() == 2) { +// } else if (input.dim() == 2) { // gradWeight.addmm(value, gradOutput.t, input) // gradBias.addmv(value, gradOutput.t, addBuffer) // } // } - override def updateParameters(learningRate:T): Unit ={ - //weight.map(gradWeight,(a,b)=>a - learningRate*b) + override def updateParameters(learningRate: T): Unit = { + // weight.map(gradWeight,(a,b)=>a - learningRate*b) weight.add(ev.negative(learningRate), gradWeight) - //bias.map(gradBias,(a,b)=>a - learningRate*b) + // bias.map(gradBias,(a,b)=>a - learningRate*b) bias.add(ev.negative(learningRate), gradBias) } @@ -210,33 +275,42 @@ class Linear[@specialized(Float, Double) T: ClassTag]( gradBias.zero() } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) ={ + override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias)) } - override def equals(obj : Any) : Boolean = { + override def equals(obj: Any): Boolean = { - if(!super.equals(obj)) { + if (!super.equals(obj)) { return false } - if(!obj.isInstanceOf[Linear[T]]) - return false + if (!obj.isInstanceOf[Linear[T]]) { return false } val other = obj.asInstanceOf[Linear[T]] - if(this.eq(other)) - return true + if (this.eq(other)) { return true } gradWeight == other.gradWeight && - gradBias == other.gradBias && - weight == other.weight && - bias == other.bias + gradBias == other.gradBias && + weight == other.weight && + bias == other.bias + } + + override def hashCode() : Int = { + val seed = 37 + var hash = super.hashCode() + hash = hash * seed + gradWeight.hashCode() + hash = hash * seed + gradBias.hashCode() + hash = hash * seed + weight.hashCode() + hash = hash * seed + bias.hashCode() + + hash } - override def toString() : String = { + override def toString(): String = { s"nn.mkl.Linear($inputSize -> $outputSize)" } - override def findModel(paramOffset : Int, indexes : Array[Int]) : (Module[T], Int, Array[Int]) = { + override def findModel(paramOffset: Int, indexes: Array[Int]): (Module[T], Int, Array[Int]) = { (this, paramOffset - outputSize * inputSize - outputSize, indexes) } diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/LocalNormalizationAcrossChannels.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/LocalNormalizationAcrossChannels.scala index bcb29736669..30e185c258f 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/LocalNormalizationAcrossChannels.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/LocalNormalizationAcrossChannels.scala @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.intel.analytics.sparkdl.nn.mkl import com.intel.analytics.sparkdl.mkl.MKL @@ -8,12 +25,12 @@ import com.intel.analytics.sparkdl.utils.RandomGenerator._ import scala.reflect.ClassTag import scala.language.implicitConversions -/** - * Created by wyz on 16-9-7. - */ -class LocalNormalizationAcrossChannels[@specialized(Float, Double) T: ClassTag] -(val size : Int = 5, val alpha : Double = 1.0, val beta : Double = 0.75, val k : Double = 1.0)( - implicit ev: TensorNumeric[T]) extends Module[T] { +class LocalNormalizationAcrossChannels[@specialized(Float, Double) T: ClassTag]( + val size: Int = 5, + val alpha: Double = 1.0, + val beta: Double = 0.75, + val k: Double = 1.0)(implicit ev: TensorNumeric[T]) + extends Module[T] { private val scale = Tensor[T]() private val paddedSquare = Tensor[T]() @@ -34,14 +51,23 @@ class LocalNormalizationAcrossChannels[@specialized(Float, Double) T: ClassTag] return false } - if (!obj.isInstanceOf[LocalNormalizationAcrossChannels[T]]) - return false + if (!obj.isInstanceOf[LocalNormalizationAcrossChannels[T]]) { return false } val other = obj.asInstanceOf[LocalNormalizationAcrossChannels[T]] - if (this.eq(other)) - return true + if (this.eq(other)) { return true } size == other.size && - alpha == other.alpha && beta == other.beta && k == other.k + alpha == other.alpha && beta == other.beta && k == other.k + } + + override def hashCode() : Int = { + val seed = 37 + var hash = super.hashCode() + hash = hash * seed + size.hashCode() + hash = hash * seed + alpha.hashCode() + hash = hash * seed + beta.hashCode() + hash = hash * seed + k.hashCode() + + hash } override def toString(): String = { @@ -49,107 +75,112 @@ class LocalNormalizationAcrossChannels[@specialized(Float, Double) T: ClassTag] } override def updateOutput(input: Tensor[T]): Tensor[T] = { - require(input.nDimension() == 4, "Input must have 4 dimensions, corresponding to (batch, channels, height, width)") + require(input.nDimension() == 4, + "Input must have 4 dimensions, corresponding to (batch, channels, height, width)") require(input.isContiguous(), "Input is not contiguous") output.resizeAs(input) - val inputOffset = input.storageOffset() - 1; + val inputOffset = input.storageOffset() - 1; val outputOffset = output.storageOffset() - 1; - // +---------+-------+-------+ - // | | 3-dim | 4-dim | - // +=========+=======+=======+ - // | Number | ? | 1 | - // +---------+-------+-------+ - // | Channel | 1 | 2 | - // +---------+-------+-------+ - // | Height | 2 | 3 | - // +---------+-------+-------+ - // | Width | 3 | 4 | - // +---------+-------+-------+ - // Table: Index of 3-dim/4-dim input - - val inputWidth = input.size(input.dim()) - val inputHeight = input.size(input.dim() - 1) + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) val inputChannel = if (input.dim() <= 3) 1 else input.size(input.dim() - 2) - val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3) + val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3) // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 if (firstPass) { ev.getType() match { - case "Float" => classPtr = MKL.LRNInitFloat( - inputNumber, inputChannel, inputHeight, inputWidth, - size, alpha.toFloat, beta.toFloat, k.toFloat, 4) - case "Double" => classPtr = MKL.LRNInitDouble( - inputNumber, inputChannel, inputHeight, inputWidth, - size, alpha.toDouble, beta.toDouble, k.toDouble, 4) - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case "Float" => + classPtr = MKL.LRNInitFloat(inputNumber, + inputChannel, + inputHeight, + inputWidth, + size, + alpha.toFloat, + beta.toFloat, + k.toFloat, + 4) + case "Double" => + classPtr = MKL.LRNInitDouble(inputNumber, + inputChannel, + inputHeight, + inputWidth, + size, + alpha.toDouble, + beta.toDouble, + k.toDouble, + 4) + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } firstPass = false } - implicit def bool2int(b:Boolean) = if (b) 1 else 0 + implicit def bool2int(b: Boolean) = if (b) 1 else 0 ev.getType() match { - case "Float" => MKL.LRNForwardFloat( - input.storage().array().asInstanceOf[Array[Float]], inputOffset, - output.storage().array().asInstanceOf[Array[Float]], outputOffset, - classPtr - ) - case "Double" => MKL.LRNForwardDouble( - input.storage().array().asInstanceOf[Array[Double]], inputOffset, - output.storage().array().asInstanceOf[Array[Double]], outputOffset, - classPtr - ) - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case "Float" => + MKL.LRNForwardFloat( + input.storage().array().asInstanceOf[Array[Float]], + inputOffset, + output.storage().array().asInstanceOf[Array[Float]], + outputOffset, + classPtr + ) + case "Double" => + MKL.LRNForwardDouble( + input.storage().array().asInstanceOf[Array[Double]], + inputOffset, + output.storage().array().asInstanceOf[Array[Double]], + outputOffset, + classPtr + ) + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } output } override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { - require(input.nDimension() == 4, "Input must have 4 dimensions, corresponding to (batch, channels, height, width)") + require(input.nDimension() == 4, + "Input must have 4 dimensions, corresponding to (batch, channels, height, width)") require(gradOutput.isContiguous(), "gradOutput is not contiguous") gradInput.resizeAs(input) - val inputOffset = input.storageOffset() - 1; + val inputOffset = input.storageOffset() - 1; val outputOffset = output.storageOffset() - 1; - // +---------+-------+-------+ - // | | 3-dim | 4-dim | - // +=========+=======+=======+ - // | Number | ? | 1 | - // +---------+-------+-------+ - // | Channel | 1 | 2 | - // +---------+-------+-------+ - // | Height | 2 | 3 | - // +---------+-------+-------+ - // | Width | 3 | 4 | - // +---------+-------+-------+ - // Table: Index of 3-dim/4-dim input - - val inputWidth = input.size(input.dim()) - val inputHeight = input.size(input.dim() - 1) + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) val inputChannel = input.size(input.dim() - 2) - val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3) + val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3) // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 val gradOutputOffset = gradOutput.storageOffset() - 1 - val gradInputOffset = gradInput.storageOffset() -1 + val gradInputOffset = gradInput.storageOffset() - 1 ev.getType() match { - case "Float" => MKL.LRNBackwardFloat( - input.storage().array().asInstanceOf[Array[Float]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, - gradInput.storage().array().asInstanceOf[Array[Float]], gradInputOffset, - classPtr) - case "Double" => MKL.LRNBackwardDouble( - input.storage().array().asInstanceOf[Array[Double]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, - gradInput.storage().array().asInstanceOf[Array[Double]], gradInputOffset, - classPtr) - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case "Float" => + MKL.LRNBackwardFloat(input.storage().array().asInstanceOf[Array[Float]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], + gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Float]], + gradInputOffset, + classPtr) + case "Double" => + MKL.LRNBackwardDouble(input.storage().array().asInstanceOf[Array[Double]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], + gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Double]], + gradInputOffset, + classPtr) + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } gradInput diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Pooling.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Pooling.scala index 5aa2b1347a3..796652b7104 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Pooling.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Pooling.scala @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.intel.analytics.sparkdl.nn.mkl import com.intel.analytics.sparkdl.mkl.MKL @@ -10,22 +27,26 @@ import scala.language.implicitConversions import scala.reflect.ClassTag -class SpatialPooling[@specialized(Float, Double) T: ClassTag](val kernelWidth: Int, - val kernelHeight: Int, - val strideWidth: Int, - val strideHeight: Int, - val padWidth: Int = 0, - val padHeight: Int = 0) - (implicit ev: TensorNumeric[T]) extends Module[T] { - implicit def bool2int(b: Boolean) = if (b) 1 else 0 +class SpatialPooling[@specialized(Float, Double) T: ClassTag]( + val kernelWidth: Int, + val kernelHeight: Int, + val strideWidth: Int, + val strideHeight: Int, + val padWidth: Int = 0, + val padHeight: Int = 0)(implicit ev: TensorNumeric[T]) + extends Module[T] { + + implicit def bool2int(b: Boolean) : Int = if (b) 1 else 0 var classPtr: Long = 0L private var firstPass = true - val algorithm = 0; - override def getClassPtr(): Long = classPtr + // algorithm = 0 -> max + // algorithm = 0 -> avg + val algorithm = 0; + // TODO just for adopt to the testcase var ceil_mode = false def ceil(): SpatialPooling[T] = { @@ -38,168 +59,190 @@ class SpatialPooling[@specialized(Float, Double) T: ClassTag](val kernelWidth: I this } - override def toString() : String = { - s"mkl.Pooling" - } - - def this(kernelWidth: Int, kernelHeight: Int)(implicit ev: TensorNumeric[T]){ + def this(kernelWidth: Int, kernelHeight: Int)(implicit ev: TensorNumeric[T]) { this(kernelWidth, kernelHeight, kernelWidth, kernelHeight) } // compute the output height and width - def computeOut(input:Int, pad:Int, kernel:Int, stride:Int): Int = { - if (ceil_mode) + def computeOut(input: Int, pad: Int, kernel: Int, stride: Int): Int = { + if (ceil_mode) { math.ceil(1.0 * (input + 2 * pad - kernel) / stride).toInt + 1 - else + } else { math.floor(1.0 * (input + 2 * pad - kernel) / stride).toInt + 1 + } } override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { gradInput.resizeAs(input) - val inputOffset = input.storageOffset() - 1; - val outputOffset = output.storageOffset() - 1; - val gradInputOffset = gradInput.storageOffset() - 1; + val inputOffset = input.storageOffset() - 1; + val outputOffset = output.storageOffset() - 1; + val gradInputOffset = gradInput.storageOffset() - 1; val gradOutputOffset = gradOutput.storageOffset() - 1; - // +---------+-------+-------+ - // | | 3-dim | 4-dim | - // +=========+=======+=======+ - // | Number | ? | 1 | - // +---------+-------+-------+ - // | Channel | 1 | 2 | - // +---------+-------+-------+ - // | Height | 2 | 3 | - // +---------+-------+-------+ - // | Width | 3 | 4 | - // +---------+-------+-------+ - // Table: Index of 3-dim/4-dim input - - val inputWidth = input.size(input.dim()) - val inputHeight = input.size(input.dim() - 1) + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) val inputChannel = input.size(input.dim() - 2) - val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3) + val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3) // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 - val outputHeight = computeOut(inputHeight, padHeight, kernelHeight, strideHeight) - val outputWidth = computeOut(inputWidth, padHeight, kernelWidth, strideWidth) + val outputHeight = + computeOut(inputHeight, padHeight, kernelHeight, strideHeight) + val outputWidth = + computeOut(inputWidth, padHeight, kernelWidth, strideWidth) val outputChannel = inputChannel - val outputNumber = inputNumber + val outputNumber = inputNumber ev.getType() match { - case "Float" => MKL.PoolingBackwardFloat( - input.storage().array().asInstanceOf[Array[Float]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, - gradInput.storage().array().asInstanceOf[Array[Float]], gradInputOffset, - classPtr) - case "Double" => MKL.PoolingBackwardDouble( - input.storage().array().asInstanceOf[Array[Double]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, - gradInput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, - classPtr) - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case "Float" => + MKL.PoolingBackwardFloat(input.storage().array().asInstanceOf[Array[Float]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], + gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Float]], + gradInputOffset, + classPtr) + case "Double" => + MKL.PoolingBackwardDouble(input.storage().array().asInstanceOf[Array[Double]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], + gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Double]], + gradOutputOffset, + classPtr) + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } gradInput } override def updateOutput(input: Tensor[T]): Tensor[T] = { - // +---------+-------+-------+ - // | | 3-dim | 4-dim | - // +=========+=======+=======+ - // | Number | ? | 1 | - // +---------+-------+-------+ - // | Channel | 1 | 2 | - // +---------+-------+-------+ - // | Height | 2 | 3 | - // +---------+-------+-------+ - // | Width | 3 | 4 | - // +---------+-------+-------+ - // Table: Index of 3-dim/4-dim input - - val inputWidth = input.size(input.dim()) - val inputHeight = input.size(input.dim() - 1) + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) val inputChannel = input.size(input.dim() - 2) - val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3) + val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3) // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 - val outputHeight = computeOut(inputHeight, padHeight, kernelHeight, strideHeight) - val outputWidth = computeOut(inputWidth, padWidth, kernelWidth, strideWidth) + val outputHeight = + computeOut(inputHeight, padHeight, kernelHeight, strideHeight) + val outputWidth = + computeOut(inputWidth, padWidth, kernelWidth, strideWidth) val outputChannel = inputChannel - val outputNumber = inputNumber + val outputNumber = inputNumber - val inputOffset = input.storageOffset() - 1; + val inputOffset = input.storageOffset() - 1; val outputOffset = output.storageOffset() - 1; - if (input.dim() == 3) + if (input.dim() == 3) { output.resize(Array(outputChannel, outputHeight, outputWidth)) - else + } else { output.resize(Array(outputNumber, outputChannel, outputHeight, outputWidth)) + } // TODO algorithm = 0 means using MAX val algorithm = 0 if (firstPass) { ev.getType() match { - case "Float" => classPtr = MKL.PoolingInitFloat( - inputNumber, inputChannel, inputHeight, inputWidth, - kernelHeight, kernelWidth, strideHeight, strideWidth, padHeight, padWidth, 4, - ceil_mode, algorithm) - case "Double" => classPtr = MKL.PoolingInitDouble( - inputNumber, inputChannel, inputHeight, inputWidth, - kernelHeight, kernelWidth, strideHeight, strideWidth, padHeight, padWidth, 4, - ceil_mode, algorithm) - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case "Float" => + classPtr = MKL.PoolingInitFloat(inputNumber, + inputChannel, + inputHeight, + inputWidth, + kernelHeight, + kernelWidth, + strideHeight, + strideWidth, + padHeight, + padWidth, + 4, + ceil_mode, + algorithm) + case "Double" => + classPtr = MKL.PoolingInitDouble(inputNumber, + inputChannel, + inputHeight, + inputWidth, + kernelHeight, + kernelWidth, + strideHeight, + strideWidth, + padHeight, + padWidth, + 4, + ceil_mode, + algorithm) + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } firstPass = false } ev.getType() match { - case "Float" => MKL.PoolingForwardFloat( - input.storage().array.asInstanceOf[Array[Float]], inputOffset, - output.storage().array.asInstanceOf[Array[Float]], outputOffset, classPtr) - case "Double" => MKL.PoolingForwardDouble( - input.storage().array.asInstanceOf[Array[Double]], inputOffset, - output.storage().array.asInstanceOf[Array[Double]], outputOffset, classPtr) - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case "Float" => + MKL.PoolingForwardFloat(input.storage().array.asInstanceOf[Array[Float]], + inputOffset, + output.storage().array.asInstanceOf[Array[Float]], + outputOffset, + classPtr) + case "Double" => + MKL.PoolingForwardDouble(input.storage().array.asInstanceOf[Array[Double]], + inputOffset, + output.storage().array.asInstanceOf[Array[Double]], + outputOffset, + classPtr) + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } output } + + override def toString(): String = { + s"mkl.Pooling" + } + } class SpatialMaxPooling[T: ClassTag](kernelWidth: Int, kernelHeight: Int, - strideWidth : Int, + strideWidth: Int, strideHeight: Int, padWidth: Int = 0, - padHeight: Int = 0) - (implicit ev: TensorNumeric[T]) - extends SpatialPooling[T](kernelWidth, kernelHeight, strideWidth, strideHeight, padWidth, padHeight) -{ + padHeight: Int = 0)(implicit ev: TensorNumeric[T]) + extends SpatialPooling[T](kernelWidth, + kernelHeight, + strideWidth, + strideHeight, + padWidth, + padHeight) { override val algorithm: Int = 0 - def this(kernelWidth: Int, kernelHeight: Int)(implicit ev: TensorNumeric[T]){ + def this(kernelWidth: Int, kernelHeight: Int)(implicit ev: TensorNumeric[T]) { this(kernelWidth, kernelHeight, kernelWidth, kernelHeight) } - override def toString() : String = { + override def toString(): String = { s"mkl.SpatialMaxPooling" } } class SpatialAveragePooling[T: ClassTag](kernelWidth: Int, - kernelHeight: Int, - strideWidth: Int, - strideHeight: Int, - padWidth: Int = 0, - padHeight: Int = 0) - (implicit ev: TensorNumeric[T]) - extends SpatialPooling[T](kernelWidth, kernelHeight, strideWidth, strideHeight, padWidth, padHeight) -{ + kernelHeight: Int, + strideWidth: Int, + strideHeight: Int, + padWidth: Int = 0, + padHeight: Int = 0)(implicit ev: TensorNumeric[T]) + extends SpatialPooling[T](kernelWidth, + kernelHeight, + strideWidth, + strideHeight, + padWidth, + padHeight) { override val algorithm: Int = 1 - def this(kernelWidth: Int, kernelHeight: Int)(implicit ev: TensorNumeric[T]){ + def this(kernelWidth: Int, kernelHeight: Int)(implicit ev: TensorNumeric[T]) { this(kernelWidth, kernelHeight, kernelWidth, kernelHeight) } - override def toString() : String = { + override def toString(): String = { s"mkl.SpatialAvgPooling" } } diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/ReLU.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/ReLU.scala index 5d2a650515b..77fb16e903d 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/ReLU.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/ReLU.scala @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.intel.analytics.sparkdl.nn.mkl import com.intel.analytics.sparkdl.mkl.MKL @@ -9,8 +26,11 @@ import scala.language.implicitConversions import scala.reflect.ClassTag -class ReLU[@specialized(Float, Double) T: ClassTag](ip:Boolean = false)(implicit ev: TensorNumeric[T]) extends Module[T]{ - override def toString() : String = { +class ReLU[@specialized(Float, Double) T: ClassTag](ip: Boolean = false)( + implicit ev: TensorNumeric[T]) + extends Module[T] { + + override def toString(): String = { s"mkl.ReLU" } @@ -24,101 +44,90 @@ class ReLU[@specialized(Float, Double) T: ClassTag](ip:Boolean = false)(implicit // TODO Why does copy in mkl_dnn? Because it costs so much time, I comment is out. // gradInput.copy(gradOutput) - val inputOffset = input.storageOffset() - 1; - val outputOffset = output.storageOffset() - 1; - val gradInputOffset = gradInput.storageOffset() - 1; + val inputOffset = input.storageOffset() - 1; + val outputOffset = output.storageOffset() - 1; + val gradInputOffset = gradInput.storageOffset() - 1; val gradOutputOffset = gradOutput.storageOffset() - 1; - // +---------+-------+-------+ - // | | 3-dim | 4-dim | - // +=========+=======+=======+ - // | Number | ? | 1 | - // +---------+-------+-------+ - // | Channel | 1 | 2 | - // +---------+-------+-------+ - // | Height | 2 | 3 | - // +---------+-------+-------+ - // | Width | 3 | 4 | - // +---------+-------+-------+ - // Table: Index of 3-dim/4-dim input - - val inputWidth = input.size(input.dim()) - val inputHeight = input.size(input.dim() - 1) + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) val inputChannel = if (input.dim() <= 2) 1 else input.size(input.dim() - 2) - val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3) + val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3) // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 - implicit def bool2int(b:Boolean) = if (b) 1 else 0 + implicit def bool2int(b: Boolean) = if (b) 1 else 0 val start = System.nanoTime() ev.getType() match { - case "Float" => MKL.ReLUBackwardFloat( - input.storage().array().asInstanceOf[Array[Float]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, - gradInput.storage().array().asInstanceOf[Array[Float]], gradInputOffset, classPtr) - - case "Double" => MKL.ReLUBackwardDouble( - input.storage().array().asInstanceOf[Array[Double]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, - gradInput.storage().array().asInstanceOf[Array[Double]], gradInputOffset, classPtr) - - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case "Float" => + MKL.ReLUBackwardFloat(input.storage().array().asInstanceOf[Array[Float]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], + gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Float]], + gradInputOffset, + classPtr) + + case "Double" => + MKL.ReLUBackwardDouble(input.storage().array().asInstanceOf[Array[Double]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], + gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Double]], + gradInputOffset, + classPtr) + + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } - //println("[SCALA] ReLU backward call JNI " + (System.nanoTime() - start) / 1e6) gradInput } - override def updateOutput(input: Tensor[T]): Tensor[T] = { + override def updateOutput(input: Tensor[T]): Tensor[T] = { output.resizeAs(input) - val inputOffset = input.storageOffset() - 1; + val inputOffset = input.storageOffset() - 1; val outputOffset = output.storageOffset() - 1; - // +---------+-------+-------+ - // | | 3-dim | 4-dim | - // +=========+=======+=======+ - // | Number | ? | 1 | - // +---------+-------+-------+ - // | Channel | 1 | 2 | - // +---------+-------+-------+ - // | Height | 2 | 3 | - // +---------+-------+-------+ - // | Width | 3 | 4 | - // +---------+-------+-------+ - // Table: Index of 3-dim/4-dim input - - val inputWidth = input.size(input.dim()) - val inputHeight = input.size(input.dim() - 1) + val inputWidth = input.size(input.dim()) + val inputHeight = input.size(input.dim() - 1) val inputChannel = if (input.dim() <= 2) 1 else input.size(input.dim() - 2) - val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3) + val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3) // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3 - if (firstPass) { ev.getType() match { - case "Float" => classPtr = MKL.ReLUInitFloat( - inputNumber, inputChannel, inputHeight, inputWidth, 4); - case "Double" => classPtr = MKL.ReLUInitDouble( - inputNumber, inputChannel, inputHeight, inputWidth, 4); - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case "Float" => + classPtr = MKL.ReLUInitFloat(inputNumber, inputChannel, inputHeight, inputWidth, 4); + case "Double" => + classPtr = MKL.ReLUInitDouble(inputNumber, inputChannel, inputHeight, inputWidth, 4); + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } firstPass = false } - implicit def bool2int(b:Boolean) = if (b) 1 else 0 + implicit def bool2int(b: Boolean) = if (b) 1 else 0 val start = System.nanoTime() ev.getType() match { - case "Float" => MKL.ReLUForwardFloat( - input.storage().array().asInstanceOf[Array[Float]], inputOffset, - output.storage().array().asInstanceOf[Array[Float]], outputOffset, classPtr) - - case "Double" => MKL.ReLUForwardDouble( - input.storage().array().asInstanceOf[Array[Double]], inputOffset, - output.storage().array().asInstanceOf[Array[Double]], outputOffset, classPtr) - - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case "Float" => + MKL.ReLUForwardFloat(input.storage().array().asInstanceOf[Array[Float]], + inputOffset, + output.storage().array().asInstanceOf[Array[Float]], + outputOffset, + classPtr) + + case "Double" => + MKL.ReLUForwardDouble(input.storage().array().asInstanceOf[Array[Double]], + inputOffset, + output.storage().array().asInstanceOf[Array[Double]], + outputOffset, + classPtr) + + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } - //println("[SCALA] ReLU forward call JNI " + (System.nanoTime() - start) / 1e6) + // println("[SCALA] ReLU forward call JNI " + (System.nanoTime() - start) / 1e6) output } diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/SpatialConvolution.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/SpatialConvolution.scala index 518283aa764..0c610d45ab2 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/SpatialConvolution.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/SpatialConvolution.scala @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.intel.analytics.sparkdl.nn.mkl import com.intel.analytics.sparkdl.mkl.MKL @@ -14,26 +31,28 @@ import com.intel.analytics.sparkdl.nn.Xavier import scala.reflect.ClassTag -class SpatialConvolution[@specialized(Float, Double) T:ClassTag] ( - val nInputPlane : Int, // The number of expected input planes in the image given into forward() - val nOutputPlane : Int, // The number of output planes the convolution layer will produce. - val kernelWidth : Int, // The kernel width of the convolution - val kernelHeight : Int, // The kernel height of the convolution - val strideWidth : Int = 1, // The step of the convolution in the width dimension. - val strideHeight : Int = 1, //The step of the convolution in the height dimension - val padWidth : Int = 0, // The additional zeros added per width to the input planes. A good number is (kW-1)/2. - val padHeight : Int = 0, // The additional zeros added per height to the input planes. A good number is (kH-1)/2. - val needCompute : Boolean = true, - val groups: Int = 1, - private var initMethod: InitializationMethod = Default - )(implicit ev: TensorNumeric[T]) extends Module[T] { - val weight : Tensor[T] = Tensor[T](nOutputPlane, nInputPlane, kernelHeight, kernelWidth) - val bias : Tensor[T] = Tensor[T](nOutputPlane) - this.gradInput = Tensor[T](nOutputPlane, nInputPlane, kernelHeight, kernelWidth) - this.gradBias = Tensor[T](nOutputPlane) - this.gradWeight = Tensor[T](nOutputPlane, nInputPlane, kernelHeight, kernelWidth) - val fInput = Tensor[T]() - val fGradInput = Tensor[T]() +class SpatialConvolution[@specialized(Float, Double) T: ClassTag]( + val nInputPlane: Int, + val nOutputPlane: Int, + val kernelWidth: Int, + val kernelHeight: Int, + val strideWidth: Int = 1, + val strideHeight: Int = 1, + val padWidth: Int = 0, + val padHeight: Int = 0, + val needCompute: Boolean = true, + val groups: Int = 1, + private var initMethod: InitializationMethod = Default +)(implicit ev: TensorNumeric[T]) + extends Module[T] { + val weight: Tensor[T] = + Tensor[T](nOutputPlane, nInputPlane, kernelHeight, kernelWidth) + val bias: Tensor[T] = Tensor[T](nOutputPlane) + this.gradInput = Tensor[T](nOutputPlane, nInputPlane, kernelHeight, kernelWidth) + this.gradBias = Tensor[T](nOutputPlane) + this.gradWeight = Tensor[T](nOutputPlane, nInputPlane, kernelHeight, kernelWidth) + val fInput = Tensor[T]() + val fGradInput = Tensor[T]() reset() private var im2colTime = 0L @@ -44,41 +63,29 @@ class SpatialConvolution[@specialized(Float, Double) T:ClassTag] ( override def getClassPtr(): Long = classPtr - def getIm2ColTime() = im2colTime - def getCol2ImgTime() = col2imTime + def getIm2ColTime() : Long = im2colTime + def getCol2ImgTime() : Long = col2imTime def setInitMethod(initMethod: InitializationMethod): this.type = { this.initMethod = initMethod this } - // this is pointer to the layout of MKL used internal and the memory is allocated in native code. - // the magic codes are: - // layoutMKL(0) -> input - // layoutMKL(1) -> inputDiff / gradInput - // layoutMKL(2) -> output - // layoutMKL(3) -> outputDiff - // layoutMKL(4) -> kernel / filter - // layoutMKL(5) -> kernelDiff / gradWeight - // layoutMKL(6) -> bias - // layoutMKL(7) -> biasDiff / gradBias - val layoutMKL = Array.fill[Long](10)(-1) - - override def reset(): Unit ={ - val stdv = 1.0 /math.sqrt(kernelWidth * kernelHeight * nInputPlane) - weight.apply1(_=>ev.fromType[Double](RNG.uniform(0,1)*2*stdv - stdv)) //todo, better to support uniform - bias.apply1(_=>ev.fromType[Double](RNG.uniform(0,1)*2*stdv - stdv)) + override def reset(): Unit = { + val stdv = 1.0 / math.sqrt(kernelWidth * kernelHeight * nInputPlane) + // todo, better to support uniform + weight.apply1(_ => ev.fromType[Double](RNG.uniform(0, 1) * 2 * stdv - stdv)) + bias.apply1(_ => ev.fromType[Double](RNG.uniform(0, 1) * 2 * stdv - stdv)) } override def updateOutput(input: Tensor[T]): Tensor[T] = { - //var time = System.nanoTime() require(input.dim() == 3 || input.dim() == 4, "Only support 3D or 4D(batch mode) input") // TODO the requirement of contiguous input may be not necessary for MKL 2017. // because it supports the api of groups convolution. require(input.isContiguous(), "input is not contiguous") // compute the output height and width - def computeOut(input:Int, pad:Int, kernel:Int, stride:Int): Int = { + def computeOut(input: Int, pad: Int, kernel: Int, stride: Int): Int = { (input + 2 * pad - kernel) / stride + 1 } @@ -95,13 +102,6 @@ class SpatialConvolution[@specialized(Float, Double) T:ClassTag] ( // +---------+-------+-------+ // Table: Index of 3-dim/4-dim input - /* - for (i <- 1 to input.dim()) printf("%d\t", input.size(i)) - println("") - for (i <- 1 to input.dim()) printf("%d\t", input.stride(i)) - println("") - */ - val inputWidth = input.size(input.dim()) val inputHeight = input.size(input.dim() - 1) val inputChannel = input.size(input.dim() - 2) @@ -111,70 +111,102 @@ class SpatialConvolution[@specialized(Float, Double) T:ClassTag] ( // output number is as same as input number val outputNumber = inputNumber val outputChannel = nOutputPlane - val outputWidth = computeOut(inputWidth, padWidth, kernelWidth, strideWidth) - val outputHeight = computeOut(inputHeight, padHeight, kernelHeight, strideHeight) + val outputWidth = + computeOut(inputWidth, padWidth, kernelWidth, strideWidth) + val outputHeight = + computeOut(inputHeight, padHeight, kernelHeight, strideHeight) require(outputWidth >= 1 && outputHeight >= 1, "output size is too small") - if (input.dim() == 3) + if (input.dim() == 3) { output.resize(Array(outputChannel, outputHeight, outputWidth)) - else + } else { output.resize(Array(outputNumber, outputChannel, outputHeight, outputWidth)) + } // kernel number and bias number are as same as nOutputPlane - val biasNumber = nOutputPlane + val biasNumber = nOutputPlane val kernelNumber = nOutputPlane // TODO kernel channel equals to input channel now val kernelChannel = inputChannel - val inputOffset = input.storageOffset() - 1 + val inputOffset = input.storageOffset() - 1 val outputOffset = output.storageOffset() - 1 - val biasOffset = bias.storageOffset() - 1 + val biasOffset = bias.storageOffset() - 1 val kernelOffset = weight.storageOffset() - 1 if (firstPass) { ev.getType() match { - case "Double" => classPtr = MKL.ConvolutionInitDouble( - inputNumber, inputChannel, inputHeight, inputWidth, - kernelNumber, kernelChannel, kernelHeight, kernelWidth, strideHeight, strideWidth, padHeight, - padWidth, 4, groups) - case "Float" => classPtr = MKL.ConvolutionInitFloat( - inputNumber, inputChannel, inputHeight, inputWidth, - kernelNumber, kernelChannel, kernelHeight, kernelWidth, strideHeight, strideWidth, padHeight, - padWidth, 4, groups) - case _ => throw new UnsupportedOperationException(s"Only Float supported") + case "Double" => + classPtr = MKL.ConvolutionInitDouble(inputNumber, + inputChannel, + inputHeight, + inputWidth, + kernelNumber, + kernelChannel, + kernelHeight, + kernelWidth, + strideHeight, + strideWidth, + padHeight, + padWidth, + 4, + groups) + case "Float" => + classPtr = MKL.ConvolutionInitFloat(inputNumber, + inputChannel, + inputHeight, + inputWidth, + kernelNumber, + kernelChannel, + kernelHeight, + kernelWidth, + strideHeight, + strideWidth, + padHeight, + padWidth, + 4, + groups) + case _ => + throw new UnsupportedOperationException(s"Only Float supported") } firstPass = false } - implicit def bool2int(b:Boolean) = if (b) 1 else 0 + implicit def bool2int(b: Boolean) = if (b) 1 else 0 val start = System.nanoTime() ev.getType() match { - case "Double" => MKL.ConvolutionForwardDouble( - input.storage().array().asInstanceOf[Array[Double]], inputOffset, - output.storage().array().asInstanceOf[Array[Double]], outputOffset, - weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Double]], biasOffset, - classPtr - ) - case "Float" => MKL.ConvolutionForwardFloat( - input.storage().array().asInstanceOf[Array[Float]], inputOffset, - output.storage().array().asInstanceOf[Array[Float]], outputOffset, - weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Float]], biasOffset, - classPtr - ) - - case _ => throw new UnsupportedOperationException(s"Only Float supported") + case "Double" => + MKL.ConvolutionForwardDouble(input.storage().array().asInstanceOf[Array[Double]], + inputOffset, + output.storage().array().asInstanceOf[Array[Double]], + outputOffset, + weight.storage().array().asInstanceOf[Array[Double]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], + biasOffset, + classPtr) + case "Float" => + MKL.ConvolutionForwardFloat(input.storage().array().asInstanceOf[Array[Float]], + inputOffset, + output.storage().array().asInstanceOf[Array[Float]], + outputOffset, + weight.storage().array().asInstanceOf[Array[Float]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], + biasOffset, + classPtr) + + case _ => + throw new UnsupportedOperationException(s"Only Float supported") } - //println("[SCALA] spatialconvolution forward call JNI " + (System.nanoTime() - start) / 1e6) - output } - override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]) : Tensor[T] = { + override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { require(input.nDimension() == 3 || input.nDimension() == 4, "Only support 3D or 4D input") - require(nOutputPlane == (if (input.nDimension() == 3) gradOutput.size(1) else gradOutput.size(2)), - "Number of output features is not equal to nOutputPlane") + require(nOutputPlane == (if (input.nDimension() == 3) gradOutput.size(1) + else gradOutput.size(2)), + "Number of output features is not equal to nOutputPlane") require(input.isContiguous(), "input is not contiguous") require(gradInput.isContiguous(), "gradInput is not contiguous") gradInput.resizeAs(input) @@ -210,75 +242,115 @@ class SpatialConvolution[@specialized(Float, Double) T:ClassTag] ( val biasOffset = bias.storageOffset() - 1 val kernelOffset = weight.storageOffset() - 1 - implicit def bool2int(b:Boolean) = if (b) 1 else 0 + implicit def bool2int(b: Boolean) = if (b) 1 else 0 val start = System.nanoTime() if (needCompute) { ev.getType() match { - case "Double" => MKL.ConvolutionBackwardDataDouble( - input.storage().array().asInstanceOf[Array[Double]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, - gradInput.storage().array().asInstanceOf[Array[Double]], gradInputOffset, - weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Double]], biasOffset, classPtr - ) - case "Float" => MKL.ConvolutionBackwardDataFloat( - input.storage().array().asInstanceOf[Array[Float]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, - gradInput.storage().array().asInstanceOf[Array[Float]], gradInputOffset, - weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Float]], biasOffset, classPtr - ) - - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case "Double" => + MKL.ConvolutionBackwardDataDouble( + input.storage().array().asInstanceOf[Array[Double]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], + gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Double]], + gradInputOffset, + weight.storage().array().asInstanceOf[Array[Double]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], + biasOffset, + classPtr + ) + case "Float" => + MKL.ConvolutionBackwardDataFloat( + input.storage().array().asInstanceOf[Array[Float]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], + gradOutputOffset, + gradInput.storage().array().asInstanceOf[Array[Float]], + gradInputOffset, + weight.storage().array().asInstanceOf[Array[Float]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], + biasOffset, + classPtr + ) + + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } } ev.getType() match { case "Double" => MKL.ConvolutionBackwardKernelDouble( - input.storage().array().asInstanceOf[Array[Double]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, - gradWeight.storage().array().asInstanceOf[Array[Double]], gradKernelOffset, - weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Double]], biasOffset, classPtr + input.storage().array().asInstanceOf[Array[Double]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], + gradOutputOffset, + gradWeight.storage().array().asInstanceOf[Array[Double]], + gradKernelOffset, + weight.storage().array().asInstanceOf[Array[Double]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], + biasOffset, + classPtr ) case "Float" => MKL.ConvolutionBackwardKernelFloat( - input.storage().array().asInstanceOf[Array[Float]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, - gradWeight.storage().array().asInstanceOf[Array[Float]], gradKernelOffset, - weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Float]], biasOffset, classPtr + input.storage().array().asInstanceOf[Array[Float]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], + gradOutputOffset, + gradWeight.storage().array().asInstanceOf[Array[Float]], + gradKernelOffset, + weight.storage().array().asInstanceOf[Array[Float]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], + biasOffset, + classPtr ) - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } ev.getType() match { case "Double" => MKL.ConvolutionBackwardBiasDouble( - input.storage().array().asInstanceOf[Array[Double]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutputOffset, - gradBias.storage().array().asInstanceOf[Array[Double]], gradBiasOffset, - weight.storage().array().asInstanceOf[Array[Double]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Double]], biasOffset, classPtr + input.storage().array().asInstanceOf[Array[Double]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], + gradOutputOffset, + gradBias.storage().array().asInstanceOf[Array[Double]], + gradBiasOffset, + weight.storage().array().asInstanceOf[Array[Double]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Double]], + biasOffset, + classPtr ) case "Float" => MKL.ConvolutionBackwardBiasFloat( - input.storage().array().asInstanceOf[Array[Float]], inputOffset, - gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutputOffset, - gradBias.storage().array().asInstanceOf[Array[Float]], gradBiasOffset, - weight.storage().array().asInstanceOf[Array[Float]], kernelOffset, - bias.storage().array().asInstanceOf[Array[Float]], biasOffset, classPtr + input.storage().array().asInstanceOf[Array[Float]], + inputOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], + gradOutputOffset, + gradBias.storage().array().asInstanceOf[Array[Float]], + gradBiasOffset, + weight.storage().array().asInstanceOf[Array[Float]], + kernelOffset, + bias.storage().array().asInstanceOf[Array[Float]], + biasOffset, + classPtr ) - case _ => throw new UnsupportedOperationException(s"Only Float/Double supported") + case _ => + throw new UnsupportedOperationException(s"Only Float/Double supported") } - //println("[SCALA] spatialconvolution backward call JNI " + (System.nanoTime() - start) / 1e6) gradInput } - override def updateParameters(learningRate:T): Unit ={ - weight.map(gradWeight, (a, b)=>ev.minus(a, ev.times(learningRate,b))) - bias.map(gradBias,(a,b)=>ev.minus(a, ev.times(learningRate,b))) + override def updateParameters(learningRate: T): Unit = { + weight.map(gradWeight, (a, b) => ev.minus(a, ev.times(learningRate, b))) + bias.map(gradBias, (a, b) => ev.minus(a, ev.times(learningRate, b))) } override def zeroGradParameters(): Unit = { @@ -286,52 +358,70 @@ class SpatialConvolution[@specialized(Float, Double) T:ClassTag] ( gradBias.zero() } - override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) ={ + override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = { (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias)) } - override def equals(obj : Any) : Boolean = { - if(!super.equals(obj)) { + override def equals(obj: Any): Boolean = { + if (!super.equals(obj)) { return false } - if(!obj.isInstanceOf[SpatialConvolution[T]]) - return false + if (!obj.isInstanceOf[SpatialConvolution[T]]) { return false } val other = obj.asInstanceOf[SpatialConvolution[T]] - if(this.eq(other)) - return true + if (this.eq(other)) { return true } nInputPlane == other.nInputPlane && - nOutputPlane == other.nOutputPlane && - kernelWidth == other.kernelWidth && - kernelHeight == other.kernelHeight && - strideWidth == other.strideWidth && - strideHeight == other.strideHeight && - padWidth == other.padWidth && - padHeight == other.padHeight && - weight == other.weight && - bias == other.bias && - gradWeight == other.gradWeight && - gradBias == other.gradBias + nOutputPlane == other.nOutputPlane && + kernelWidth == other.kernelWidth && + kernelHeight == other.kernelHeight && + strideWidth == other.strideWidth && + strideHeight == other.strideHeight && + padWidth == other.padWidth && + padHeight == other.padHeight && + weight == other.weight && + bias == other.bias && + gradWeight == other.gradWeight && + gradBias == other.gradBias } - override def toString() : String = { - s"mkl.SpatialConvolution($nInputPlane -> $nOutputPlane, $kernelWidth x $kernelHeight, $strideWidth, $strideHeight, $padWidth, $padHeight)" + override def hashCode() : Int = { + val seed = 37 + var hash = super.hashCode() + hash = hash * seed + nInputPlane.hashCode() + hash = hash * seed + nOutputPlane.hashCode() + hash = hash * seed + kernelWidth.hashCode() + hash = hash * seed + kernelHeight.hashCode() + hash = hash * seed + strideWidth.hashCode() + hash = hash * seed + strideHeight.hashCode() + hash = hash * seed + padWidth.hashCode() + hash = hash * seed + padWidth.hashCode() + hash = hash * seed + weight.hashCode() + hash = hash * seed + bias.hashCode() + hash = hash * seed + gradWeight.hashCode() + hash = hash * seed + gradBias.hashCode() + + hash } - override def findModel(paramOffset : Int, indexes : Array[Int]) : (Module[T], Int, Array[Int]) = { - (this, paramOffset - nOutputPlane * nInputPlane * kernelHeight * kernelWidth - nOutputPlane, indexes) + override def toString(): String = { + s"""mkl.SpatialConvolution($nInputPlane -> $nOutputPlane, $kernelWidth x $kernelHeight, + $strideWidth, $strideHeight, $padWidth, $padHeight)""" } - /*mkl-dnn's convolution_backward has done updateGradInput and accGradParameters, so accGradParameters does nothing - * - override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { - backward(input, gradOutput) + override def findModel(paramOffset: Int, indexes: Array[Int]): (Module[T], Int, Array[Int]) = { + (this, + paramOffset - nOutputPlane * nInputPlane * kernelHeight * kernelWidth - nOutputPlane, + indexes) } - */ - override def accGradParameters(input: Tensor[T], gradOutput: Tensor[T], scale: Double = 1.0): Unit = { + // mkl-dnn's convolution_backward has done updateGradInput and accGradParameters, + // so accGradParameters does nothing + // override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { + // backward(input, gradOutput) + // } - } + override def accGradParameters(input: Tensor[T], + gradOutput: Tensor[T], + scale: Double = 1.0): Unit = {} } - diff --git a/dl/src/test/scala/com/intel/analytics/sparkdl/nn/mkl/GoogLeNetSpec.scala b/dl/src/test/scala/com/intel/analytics/sparkdl/nn/mkl/GoogLeNetSpec.scala new file mode 100644 index 00000000000..cc127c24ff3 --- /dev/null +++ b/dl/src/test/scala/com/intel/analytics/sparkdl/nn/mkl/GoogLeNetSpec.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.analytics.sparkdl.nn.mkl + +import com.intel.analytics.sparkdl.models._ +import org.scalatest.FlatSpec + +class GoogLeNetSpec extends FlatSpec{ + "GoogLeNet V1 with mkl dnn" should "ends with no segment fault" in { + Perf.performance[Float](new Params(batchSize = 32, module = "alexnet")) + } +} From 70c0b6b574333fd6e55129f26ec8460c0cb2dacb Mon Sep 17 00:00:00 2001 From: Wang Yanzhang Date: Sat, 24 Sep 2016 12:23:05 +0800 Subject: [PATCH 05/12] add input size and strides to pooling --- .gitignore | 1 + mkl/native/src/main/c/jni/pooling.cpp | 30 ++++++++++++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 796f2a7c355..3ef13efe3ba 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ project/plugins/project/ # other *.txt +*.swp # vim swap file diff --git a/mkl/native/src/main/c/jni/pooling.cpp b/mkl/native/src/main/c/jni/pooling.cpp index 9ab1fbee322..be3b077b9b3 100644 --- a/mkl/native/src/main/c/jni/pooling.cpp +++ b/mkl/native/src/main/c/jni/pooling.cpp @@ -25,6 +25,9 @@ class MKLPooling : public MKLLayer private: std::shared_ptr> workspace; + size_t inputSize[4]; + size_t inputStrides[4]; + size_t kernelSize[2]; size_t outputSizeCeil[4]; @@ -89,6 +92,15 @@ void MKLPooling::init(size_t inputNumber, size_t inputChannel, this->ceilMode = ceilMode; + inputSize[0] = inputWidth; + inputSize[1] = inputHeight; + inputSize[2] = inputChannel; + inputSize[3] = inputNumber; + + inputStrides[0] = 1; + for (int i = 1; i < 4; i++) + inputStrides[i] = inputStrides[i - 1] * inputSize[i - 1]; + // compute output outputSizeCeil[0] = computeOut(inputWidth, padWidth, kernelWidth, strideWidth, true); @@ -117,6 +129,8 @@ void MKLPooling::init(size_t inputNumber, size_t inputChannel, this->ceilMode = true; // create usr layout. + this->input->createUsrLayout(dimension, inputSize, inputStrides); + this->gradInput->createUsrLayout(dimension, inputSize, inputStrides); if (this->ceilMode) { this->output->createUsrLayout(dimension, outputSizeCeil, outputStridesCeil); this->gradOutput->createUsrLayout(dimension, outputSizeCeil, @@ -349,15 +363,15 @@ void JNIPoolingUpdateGradInput(JNIEnv *env, jclass thisClass, ArrayType input, extern "C" { #endif -// Double -PoolingInit(Double, jdouble, jdoubleArray) - PoolingForward(Double, jdouble, jdoubleArray) - PoolingBackward(Double, jdouble, jdoubleArray) + // Double + PoolingInit(Double, jdouble, jdoubleArray); + PoolingForward(Double, jdouble, jdoubleArray); + PoolingBackward(Double, jdouble, jdoubleArray); - // Float - PoolingInit(Float, jfloat, jfloatArray) - PoolingForward(Float, jfloat, jfloatArray) - PoolingBackward(Float, jfloat, jfloatArray) + // Float + PoolingInit(Float, jfloat, jfloatArray); + PoolingForward(Float, jfloat, jfloatArray); + PoolingBackward(Float, jfloat, jfloatArray); #ifdef __cplusplus } From 88f005275e5a30ddebcdc5bc3276af526f6b9825 Mon Sep 17 00:00:00 2001 From: Wang Yanzhang Date: Sat, 24 Sep 2016 12:26:52 +0800 Subject: [PATCH 06/12] add concat support --- .../analytics/sparkdl/nn/mkl/Concat.scala | 255 ++++++++++++++ .../com/intel/analytics/sparkdl/mkl/MKL.java | 8 + mkl/native/pom.xml | 1 + mkl/native/src/main/c/jni/MKLWrapper.h | 39 +++ mkl/native/src/main/c/jni/concat.cpp | 331 ++++++++++++++++++ mkl/native/src/main/c/jni/memory.h | 11 +- 6 files changed, 644 insertions(+), 1 deletion(-) create mode 100644 dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Concat.scala create mode 100644 mkl/native/src/main/c/jni/concat.cpp diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Concat.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Concat.scala new file mode 100644 index 00000000000..9d3af1cb0dd --- /dev/null +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Concat.scala @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * ATTENTION: MKL version. The start and end layer must be MKL version too. + * Currently, it supports BatchNormalization, Linear, LRN, Pooling(Avg, Max), + * ReLU and SpatialConvolution. + */ + +package com.intel.analytics.sparkdl.nn.mkl + +import com.intel.analytics.sparkdl.nn.{Container, Module} +import com.intel.analytics.sparkdl.tensor.Tensor +import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric +import com.intel.analytics.sparkdl.mkl.MKL + +import scala.reflect.ClassTag + +class Concat[T: ClassTag](val dimension: Int)(implicit ev: TensorNumeric[T]) extends Container[T] { + + private var size: Array[Int] = null + private var gradouts: Array[Tensor[T]] = null + private var gradOutputs: Array[Array[T]] = Array[Array[T]]() + + var classPtr : Long = 0L + var firstPass: Boolean = true + + override def getClassPtr(): Long = classPtr + + def getSize(): Array[Int] = { + return size + } + + override def updateOutput(input: Tensor[T]): Tensor[T] = { + // TODO should check the size of every tensor. It must be same as the first tensor + val outs = new Array[Tensor[T]](this.modules.length) + var i = 0 + while (i < this.modules.length) { + val currentOutput = this.modules(i).updateOutput(input) + outs(i) = currentOutput + if (i == 0) { + this.size = currentOutput.size() + } else { + this.size(this.dimension - 1) += currentOutput.size(this.dimension) + } + i += 1 + } + + this.output.resize(this.size) + // TODO call mkl native code to update output + // TODO dimension here is different with "dimension" in MKL 2017 + // TODO check all dimensions of input tensors are same + if (firstPass) { + val nDimension = outs(0).nDimension() + val inputSize: Array[Int] = new Array[Int](this.modules.length * nDimension) + + for (i <- 0 until this.modules.length) { + for (j <- 0 until nDimension) { + inputSize(i * nDimension + j) = outs(i).size(nDimension - j) + } + } + + ev.getType() match { + case "Double" => + classPtr = MKL.ConcatInitDouble(this.modules.length, nDimension, inputSize) + case "Float" => + classPtr = MKL.ConcatInitFloat(this.modules.length, nDimension, inputSize) + case _ => + throw new UnsupportedOperationException(s"Only Float supported") + } + firstPass = false + } + + // get all of the tensors in outs to float/double array + val inputs: Array[Array[T]] = new Array[Array[T]](this.modules.length) + val inputsOffset: Array[Int] = new Array[Int](this.modules.length) + for (i <- 0 until this.modules.length) { + inputs(i) = outs(i).storage().array() + inputsOffset(i) = outs(i).storageOffset() - 1 + } + + + ev.getType() match { + case "Double" => + MKL.ConcatForwardDouble(inputs.asInstanceOf[Array[Array[Double]]], + inputsOffset, + output.storage().array().asInstanceOf[Array[Double]], + output.storageOffset() - 1, + classPtr) + case "Float" => + MKL.ConcatForwardFloat(inputs.asInstanceOf[Array[Array[Float]]], + inputsOffset, + output.storage().array().asInstanceOf[Array[Float]], + output.storageOffset() - 1, + classPtr) + case _ => + throw new UnsupportedOperationException(s"Only Float supported") + } + + this.output + } + + // TODO should we implement this function, what's the difference from @backward + override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { +// this.gradInput.resizeAs(input) +// +// var offset = 1 +// var i = 0 +// while (i < this.modules.length) { +// val currentOutput = this.modules(i).output +// val currentGradInput = this.modules(i).updateGradInput(input, +// gradOutput.narrow(dimension, offset, currentOutput.size(dimension))) +// +// if (currentGradInput != null) { +// if (i == 0) { +// this.gradInput.copy(currentGradInput) +// } else { +// this.gradInput.add(currentGradInput) +// } +// } +// i += 1 +// offset += currentOutput.size(dimension) +// } + + this.gradInput + } + + override def backward(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { + // TODO call mkl native code to update gradient input + var totalSize : Long = 0L + this.gradInput.resizeAs(input) + if (gradouts == null || gradouts.length != this.modules.length) { + gradouts = new Array[Tensor[T]](this.modules.length) + } + val gradOutputs: Array[Array[T]] = new Array[Array[T]](this.modules.length) + val gradOutputsOffset: Array[Int] = new Array[Int](this.modules.length) + for (i <- 0 until this.modules.length) { + if (gradouts(i) == null) gradouts(i) = Tensor() + gradouts(i).resizeAs(this.modules(i).output) + gradOutputs(i) = gradouts(i).storage().array() + gradOutputsOffset(i) = gradouts(i).storageOffset() - 1 + } + + ev.getType() match { + case "Double" => + MKL.ConcatBackwardDouble(gradOutputs.asInstanceOf[Array[Array[Double]]], + gradOutputsOffset, + gradOutput.storage().array().asInstanceOf[Array[Double]], + gradOutput.storageOffset() - 1, + classPtr) + case "Float" => + MKL.ConcatBackwardFloat(gradOutputs.asInstanceOf[Array[Array[Float]]], + gradOutputsOffset, + gradOutput.storage().array().asInstanceOf[Array[Float]], + gradOutput.storageOffset() - 1, + classPtr) + case _ => + throw new UnsupportedOperationException(s"Only Float / Double is supported") + } + + for (i <- 0 until this.modules.length) { + val currentOutput = this.modules(i).output + val currentGradInput = this.modules(i).backward(input, gradouts(i)) + + // It can't be converted to mkl dnn concat forward, becaus the size of all + // gradient input is the same. + // copy method here doesn't costs too much + // TODO convert to eltwise + if (currentGradInput != null) { + if (i == 0) { + this.gradInput.copy(currentGradInput) + } else { + this.gradInput.add(currentGradInput) + } + } + } + + this.gradInput + } + + override def equals(obj: Any): Boolean = { + if (!super.equals(obj)) { + return false + } + + if (!obj.isInstanceOf[Concat[T]]) { + return false + } + val other = obj.asInstanceOf[Concat[T]] + if (this.eq(other)) { + return true + } + if (dimension != other.dimension) { + return false + } + + if (this.modules.length != other.modules.length) { + return false + } + + val moduleLength = modules.length + var i = 0 + while (i < moduleLength) { + if (modules(i) != other.modules(i)) { + return false + } + i += 1 + } + + true + } + override def hashCode(): Int = { + + val seed = 37 + var hash = super.hashCode() + var i = 0 + val moduleLength = modules.length + while (i < moduleLength) { + hash = hash * seed + modules(i).hashCode() + i += 1 + } + + hash + } + + override def toString(): String = { + val tab = " " + val next = " |`-> " + val last = " ... -> " + val ext = " | " + val extlast = " " + s"mkl.Concat {$line${tab}input$line${modules.zipWithIndex.map { + case (model: Module[T], index: Int) => + s"$tab$next(${index + 1}): ${if (index == modules.length - 1) { + model.setLine(line + tab + extlast) + } else { + model.setLine(line + tab + ext) + }}" + }.mkString(line)}$line$tab${last}output$line$tab}" + } +} diff --git a/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java b/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java index 08cb838f9f7..4f6c882e63f 100644 --- a/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java +++ b/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java @@ -261,4 +261,12 @@ public native static void LinearBackwardBiasDouble( double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset, double[] gradBias, int gradBiasOffset, double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr); + + /* Concat API */ + public native static long ConcatInitFloat(int numChannels, int dimension, int[] size); + public native static void ConcatForwardFloat(float[][] input, int[] inputOffset, float[] output, int outputOffset, long classPtr); + public native static void ConcatBackwardFloat(float[][] gradInput, int[] gradInputOffset, float[] output, int outputOffset, long classPtr); + public native static long ConcatInitDouble(int numChannels, int dimension, int[] size); + public native static void ConcatForwardDouble(double[][] input, int[] inputOffset, double[] output, int outputOffset, long classPtr); + public native static void ConcatBackwardDouble(double[][] gradInput, int[] gradInputOffset, double[] output, int outputOffset, long classPtr); } diff --git a/mkl/native/pom.xml b/mkl/native/pom.xml index 1eaad8f69e9..e48e148d391 100644 --- a/mkl/native/pom.xml +++ b/mkl/native/pom.xml @@ -54,6 +54,7 @@ linear.cpp relu.cpp batch_norm.cpp + concat.cpp utils.cpp debug.cpp diff --git a/mkl/native/src/main/c/jni/MKLWrapper.h b/mkl/native/src/main/c/jni/MKLWrapper.h index 09da9adee8d..9b1bf4a70e8 100644 --- a/mkl/native/src/main/c/jni/MKLWrapper.h +++ b/mkl/native/src/main/c/jni/MKLWrapper.h @@ -468,4 +468,43 @@ dnnError_t dnnInnerProductCreateBackwardBias( return dnnInnerProductCreateBackwardBias_F64(pInnerProduct, attributes, dimentions, dstSize); } + +template +dnnError_t dnnConcatCreate(dnnPrimitive_t *pConcat, + dnnPrimitiveAttributes_t attributes, + size_t nSrcTensors, dnnLayout_t *src) +{ + return dnnConcatCreate_F32(pConcat, attributes, nSrcTensors, src); +} + +template <> +dnnError_t dnnConcatCreate(dnnPrimitive_t *pConcat, + dnnPrimitiveAttributes_t attributes, + size_t nSrcTensors, dnnLayout_t *src) +{ + return dnnConcatCreate_F64(pConcat, attributes, nSrcTensors, src); +} + +template +dnnError_t dnnSplitCreate(dnnPrimitive_t *pSplit, + dnnPrimitiveAttributes_t attributes, + const size_t nDstTensors, dnnLayout_t layout, + size_t dstChannelSize[]) +{ + + return dnnSplitCreate_F32(pSplit, attributes, nDstTensors, layout, + dstChannelSize); +} + +template <> +dnnError_t dnnSplitCreate(dnnPrimitive_t *pSplit, + dnnPrimitiveAttributes_t attributes, + const size_t nDstTensors, dnnLayout_t layout, + size_t dstChannelSize[]) +{ + + return dnnSplitCreate_F64(pSplit, attributes, nDstTensors, layout, + dstChannelSize); +} + #endif diff --git a/mkl/native/src/main/c/jni/concat.cpp b/mkl/native/src/main/c/jni/concat.cpp new file mode 100644 index 00000000000..f3b8fb557f6 --- /dev/null +++ b/mkl/native/src/main/c/jni/concat.cpp @@ -0,0 +1,331 @@ +#include +#include + +#include "debug.h" +#include "layer.h" +#include "memory.h" +#include "utils.h" + +using namespace std; + +template +class MKLConcat : public MKLLayer +{ + public: + MKLConcat(); + ~MKLConcat(); + + void init(int numConcats, int dimension, int *size); + + void updateOutput(DType **input, DType *output); + void updateGradInput(DType **gradInput, DType *gradOutput); + + // attention, we will override the four variables of MKLLayer + vector>> input; + vector>> gradInput; + + private: + // this method is not the same as createMklLayout in MKLMemory + void firstPass(); + void preExecute(DType *input); + + int numConcats; // number of concats + size_t *numSplits; +}; + +template +MKLConcat::MKLConcat() : numSplits(NULL), numConcats(0) +{ + // TODO +} + +template +MKLConcat::~MKLConcat() +{ + // TODO + delete[] numSplits; +} + +template +void MKLConcat::init(int numConcats, int dimension, int *size) +{ + this->numConcats = numConcats; + this->dimension = dimension; + this->numSplits = new size_t[numConcats]; + + size_t inputSize[dimension]; + size_t inputStrides[dimension]; + size_t outputSize[dimension]; + size_t outputStrides[dimension]; + + int offset = 0; + size_t channels = 0; + + for (int i = 0; i < numConcats; i++) { + input.push_back(shared_ptr>(new MKLData)); + gradInput.push_back(shared_ptr>(new MKLData)); + + // set the size. + // the size of every channel should be gaved in size. + // the dimension of every channel should be the same. + inputStrides[0] = 1; + inputSize[0] = size[offset]; + for (int j = 1; j < dimension; j++) { + inputSize[j] = size[offset + j]; + inputStrides[j] = inputStrides[j - 1] * inputSize[j - 1]; + } + offset += dimension; + + // we must be sure that inputSize[2] is channels, or it will be 1 + // if dimension == 2, which means there are only height and width. -> height + // if dimension > 2, which means there is channel in the tensor, -> channel + numSplits[i] = dimension <= 2 ? inputSize[1] : inputSize[2]; + channels += numSplits[i]; + + this->input[i]->createUsrLayout(dimension, inputSize, inputStrides); + this->gradInput[i]->createUsrLayout(dimension, inputSize, inputStrides); + } + + // the output size should be equal to the first input size, besides channel + // the channel of output (outputSize[2]) should be the sum of all + // input channels. + // the number of output is only 1 + outputStrides[0] = 1; + outputSize[0] = inputSize[0]; + for (int i = 1; i < dimension; i++) { + if (i == 2) + outputSize[i] = channels; + else + outputSize[i] = inputSize[i]; + outputStrides[i] = outputStrides[i - 1] * outputSize[i - 1]; + } + + this->output->createUsrLayout(dimension, outputSize, outputStrides); + this->gradOutput->createUsrLayout(dimension, outputSize, outputStrides); +} + +template +void MKLConcat::firstPass() +{ + dnnLayout_t *layouts = new dnnLayout_t[numConcats]; + + for (int i = 0; i < numConcats; i++) { + layouts[i] = this->input[i]->getUsrLayout(); + } + + dnnError_t status = E_UNIMPLEMENTED; + status = + dnnConcatCreate(&(this->forwardPrim), NULL, numConcats, layouts); + CHECK_EQ(status, E_SUCCESS); + + this->output->createMklLayout(this->forwardPrim, dnnResourceDst); + this->gradOutput->createMklLayout(this->forwardPrim, dnnResourceDst); + + // backward + status = dnnSplitCreate(&(this->backwardPrim), NULL, numConcats, + this->gradOutput->getMklLayout(), numSplits); + CHECK_EQ(status, E_SUCCESS); + + for (int i = 0; i < numConcats; i++) { + this->input[i]->createMklLayout( + this->forwardPrim, (dnnResourceType_t)(dnnResourceMultipleSrc + i)); + + // TODO comes from caffe, it's different with others (DiffSrc/DiffDst) + this->gradInput[i]->createMklLayout( + this->backwardPrim, (dnnResourceType_t)(dnnResourceMultipleDst + i)); + } + + delete[] layouts; + + this->isFirstPass = false; +} + +template +void MKLConcat::updateOutput(DType **input, DType *output) +{ + if (this->isFirstPass) firstPass(); + + for (int i = 0; i < numConcats; i++) { + this->input[i]->setUsrData(input[i]); + this->input[i]->createConversion(); + } + this->output->setUsrData(output); + this->output->createConversion(); + + dnnError_t status; + void *resources[dnnResourceNumber]; + + for (int i = 0; i < numConcats; i++) { + resources[dnnResourceMultipleSrc + i] = this->input[i]->getConvertedData(); + } + resources[dnnResourceDst] = this->output->getData(); + + PERFSTART(); + status = dnnExecute(this->forwardPrim, resources); + PERFEND("main computing"); + + if (!this->output->isUseNext()) this->output->backToUsr(); +} + +template +void MKLConcat::updateGradInput(DType **gradInput, DType *gradOutput) +{ + for (int i = 0; i < numConcats; i++) { + this->gradInput[i]->setUsrData(gradInput[i]); + this->gradInput[i]->createConversion(); + } + this->gradOutput->setUsrData(gradOutput); + this->gradOutput->createConversion(); + + dnnError_t status; + void *resources[dnnResourceNumber]; + + for (int i = 0; i < numConcats; i++) { + resources[dnnResourceMultipleDst + i] = this->gradInput[i]->getData(); + } + resources[dnnResourceSrc] = this->gradOutput->getConvertedData(); + + PERFSTART(); + status = dnnExecute(this->backwardPrim, resources); + PERFEND("main computing"); + + for (int i = 0; i < numConcats; i++) { + if (!this->gradInput[i]->isUsePrev()) this->gradInput[i]->backToUsr(); + } +} + +template +jlong JNIConcatInit(JNIEnv *env, jclass thisClass, int numConcats, + int dimension, jintArray size) +{ + MKLConcat *ptr = new MKLConcat(); + + jint *jSize = + reinterpret_cast(env->GetPrimitiveArrayCritical(size, 0)); + ptr->init(numConcats, dimension, jSize); + env->ReleasePrimitiveArrayCritical(size, jSize, 0); + + return reinterpret_cast(ptr); +} + +template +void JNIConcatUpdateOutput(JNIEnv *env, jclass thisClass, jobjectArray input, + jintArray inputOffset, ArrayType output, + jint outputOffset, long classPtr) +{ + MKLConcat *ptr = reinterpret_cast *>(classPtr); + + jint *jInputOffset = + reinterpret_cast(env->GetPrimitiveArrayCritical(inputOffset, 0)); + + // TODO we should re-write, this version makes a little complict. + int len = env->GetArrayLength(input); + DType *inputArrStart[len]; + DType *inputArr[len]; + ArrayType jInputArr[len]; + for (int i = 0; i < len; i++) { + jInputArr[i] = (ArrayType)(env->GetObjectArrayElement(input, i)); + inputArrStart[i] = reinterpret_cast( + env->GetPrimitiveArrayCritical(jInputArr[i], 0)); + inputArr[i] = inputArrStart[i] + jInputOffset[i]; + } + + std::shared_ptr> jOutput( + new ZipArray(env, output, outputOffset, ptr->output)); + + ptr->updateOutput(inputArr, jOutput->getPtr()); + + for (int i = 0; i < len; i++) { + env->ReleasePrimitiveArrayCritical(jInputArr[i], inputArrStart[i], 0); + } + + env->ReleasePrimitiveArrayCritical(inputOffset, jInputOffset, 0); +} + +template +void JNIConcatUpdateGradInput(JNIEnv *env, jclass thisClass, + jobjectArray inputDiff, jintArray inputDiffOffset, + ArrayType outputDiff, jint outputDiffOffset, + long classPtr) +{ + MKLConcat *ptr = reinterpret_cast *>(classPtr); + + jint *jInputDiffOffset = reinterpret_cast( + env->GetPrimitiveArrayCritical(inputDiffOffset, 0)); + + int len = env->GetArrayLength(inputDiff); + DType *inputDiffArrStart[len]; + DType *inputDiffArr[len]; + ArrayType jInputDiffArr[len]; + for (int i = 0; i < len; i++) { + jInputDiffArr[i] = (ArrayType)(env->GetObjectArrayElement(inputDiff, i)); + inputDiffArrStart[i] = reinterpret_cast( + env->GetPrimitiveArrayCritical(jInputDiffArr[i], 0)); + inputDiffArr[i] = inputDiffArrStart[i] + jInputDiffOffset[i]; + } + + std::shared_ptr> jOutputDiff( + new ZipArray(env, outputDiff, outputDiffOffset, + ptr->gradOutput)); + + ptr->updateGradInput(inputDiffArr, jOutputDiff->getPtr()); + + for (int i = 0; i < len; i++) { + env->ReleasePrimitiveArrayCritical(jInputDiffArr[i], inputDiffArrStart[i], + 0); + } + + env->ReleasePrimitiveArrayCritical(inputDiffOffset, jInputDiffOffset, 0); +} + +// Macro +#define ConcatInit(DType, JType, JArrayType) \ + JNIEXPORT \ + jlong JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_ConcatInit##DType( \ + JNIEnv *env, jclass thisClass, jint numConcats, jint dimension, \ + jintArray size) \ + { \ + return JNIConcatInit(env, thisClass, numConcats, \ + dimension, size); \ + } + +#define ConcatForward(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_ConcatForward##DType( \ + JNIEnv *env, jclass thisClass, jobjectArray input, \ + jintArray inputOffset, JArrayType output, jint outputOffset, \ + long classPtr) \ + { \ + JNIConcatUpdateOutput( \ + env, thisClass, input, inputOffset, output, outputOffset, classPtr); \ + } + +#define ConcatBackward(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_ConcatBackward##DType( \ + JNIEnv *env, jclass thisClass, jobjectArray inputDiff, \ + jintArray inputDiffOffset, JArrayType outputDiff, jint outputDiffOffset, \ + long classPtr) \ + { \ + JNIConcatUpdateGradInput(env, thisClass, inputDiff, \ + inputDiffOffset, outputDiff, \ + outputDiffOffset, classPtr); \ + } + +#ifdef __cplusplus +extern "C" { +#endif + +// Double +ConcatInit(Double, jdouble, jdoubleArray); +ConcatForward(Double, jdouble, jdoubleArray); +ConcatBackward(Double, jdouble, jdoubleArray); + +// Float +ConcatInit(Float, jfloat, jfloatArray); +ConcatForward(Float, jfloat, jfloatArray); +ConcatBackward(Float, jfloat, jfloatArray); + +#ifdef __cplusplus +} +#endif diff --git a/mkl/native/src/main/c/jni/memory.h b/mkl/native/src/main/c/jni/memory.h index 1d531f51d42..9d2b8b9ec98 100644 --- a/mkl/native/src/main/c/jni/memory.h +++ b/mkl/native/src/main/c/jni/memory.h @@ -374,10 +374,19 @@ size_t MKLData::getMklLayoutSize() return 0; } +template +dnnLayout_t MKLData::getUsrLayout() +{ + return layoutUsr; +} + template dnnLayout_t MKLData::getMklLayout() { - return layoutMkl; + if (layoutMkl) + return layoutMkl; + else + return layoutUsr; } template From e87bee4f57d60c4bf516a91d2a936497cd99db2b Mon Sep 17 00:00:00 2001 From: Wang Yanzhang Date: Sat, 24 Sep 2016 12:31:59 +0800 Subject: [PATCH 07/12] change the api of convolution to the same as nn --- .../scala/com/intel/analytics/sparkdl/nn/Module.scala | 11 +++++++++++ .../analytics/sparkdl/nn/mkl/SpatialConvolution.scala | 6 ++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Module.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Module.scala index a874a5ee90c..ebe61457f38 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Module.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Module.scala @@ -43,6 +43,17 @@ abstract class Module[T: ClassTag](implicit ev: TensorNumeric[T]) extends Serial if (this.name == null) this.toString else this.name } + private var needComputeBack = true + + def setNeedComputeBack(need: Boolean): this.type = { + needComputeBack = need + this + } + + def isNeedComputeBack(): Boolean = { + needComputeBack + } + // list of sub modules val modules: ArrayBuffer[Module[T]] = ArrayBuffer[Module[T]]() diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/SpatialConvolution.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/SpatialConvolution.scala index 0c610d45ab2..5e024697109 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/SpatialConvolution.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/SpatialConvolution.scala @@ -40,7 +40,6 @@ class SpatialConvolution[@specialized(Float, Double) T: ClassTag]( val strideHeight: Int = 1, val padWidth: Int = 0, val padHeight: Int = 0, - val needCompute: Boolean = true, val groups: Int = 1, private var initMethod: InitializationMethod = Default )(implicit ev: TensorNumeric[T]) @@ -244,7 +243,7 @@ class SpatialConvolution[@specialized(Float, Double) T: ClassTag]( implicit def bool2int(b: Boolean) = if (b) 1 else 0 val start = System.nanoTime() - if (needCompute) { + if (isNeedComputeBack()) { ev.getType() match { case "Double" => MKL.ConvolutionBackwardDataDouble( @@ -405,8 +404,7 @@ class SpatialConvolution[@specialized(Float, Double) T: ClassTag]( } override def toString(): String = { - s"""mkl.SpatialConvolution($nInputPlane -> $nOutputPlane, $kernelWidth x $kernelHeight, - $strideWidth, $strideHeight, $padWidth, $padHeight)""" + s"""mkl.SpatialConvolution($nInputPlane -> $nOutputPlane, $kernelWidth x $kernelHeight, $strideWidth, $strideHeight, $padWidth, $padHeight)""" } override def findModel(paramOffset: Int, indexes: Array[Int]): (Module[T], Int, Array[Int]) = { From 6e0b5bd70023a545eb63e18985fcd41e7798363d Mon Sep 17 00:00:00 2001 From: Wang Yanzhang Date: Sat, 24 Sep 2016 15:57:10 +0800 Subject: [PATCH 08/12] add support for sum --- .../analytics/sparkdl/nn/mkl/Concat.scala | 94 ++++++-- .../com/intel/analytics/sparkdl/mkl/MKL.java | 6 + mkl/native/pom.xml | 1 + mkl/native/src/main/c/jni/MKLWrapper.h | 17 ++ mkl/native/src/main/c/jni/sum.cpp | 221 ++++++++++++++++++ 5 files changed, 317 insertions(+), 22 deletions(-) create mode 100644 mkl/native/src/main/c/jni/sum.cpp diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Concat.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Concat.scala index 9d3af1cb0dd..5ec16d1026f 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Concat.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Concat.scala @@ -36,10 +36,13 @@ class Concat[T: ClassTag](val dimension: Int)(implicit ev: TensorNumeric[T]) ext private var gradouts: Array[Tensor[T]] = null private var gradOutputs: Array[Array[T]] = Array[Array[T]]() - var classPtr : Long = 0L - var firstPass: Boolean = true + var concatPtr : Long = 0L + var concat1Pass: Boolean = true - override def getClassPtr(): Long = classPtr + var sumPtr : Long = 0L + var sum1Pass : Boolean = true + + override def getClassPtr(): Long = concatPtr def getSize(): Array[Int] = { return size @@ -64,7 +67,7 @@ class Concat[T: ClassTag](val dimension: Int)(implicit ev: TensorNumeric[T]) ext // TODO call mkl native code to update output // TODO dimension here is different with "dimension" in MKL 2017 // TODO check all dimensions of input tensors are same - if (firstPass) { + if (concat1Pass) { val nDimension = outs(0).nDimension() val inputSize: Array[Int] = new Array[Int](this.modules.length * nDimension) @@ -76,13 +79,13 @@ class Concat[T: ClassTag](val dimension: Int)(implicit ev: TensorNumeric[T]) ext ev.getType() match { case "Double" => - classPtr = MKL.ConcatInitDouble(this.modules.length, nDimension, inputSize) + concatPtr = MKL.ConcatInitDouble(this.modules.length, nDimension, inputSize) case "Float" => - classPtr = MKL.ConcatInitFloat(this.modules.length, nDimension, inputSize) + concatPtr = MKL.ConcatInitFloat(this.modules.length, nDimension, inputSize) case _ => throw new UnsupportedOperationException(s"Only Float supported") } - firstPass = false + concat1Pass = false } // get all of the tensors in outs to float/double array @@ -100,13 +103,13 @@ class Concat[T: ClassTag](val dimension: Int)(implicit ev: TensorNumeric[T]) ext inputsOffset, output.storage().array().asInstanceOf[Array[Double]], output.storageOffset() - 1, - classPtr) + concatPtr) case "Float" => MKL.ConcatForwardFloat(inputs.asInstanceOf[Array[Array[Float]]], inputsOffset, output.storage().array().asInstanceOf[Array[Float]], output.storageOffset() - 1, - classPtr) + concatPtr) case _ => throw new UnsupportedOperationException(s"Only Float supported") } @@ -161,32 +164,79 @@ class Concat[T: ClassTag](val dimension: Int)(implicit ev: TensorNumeric[T]) ext gradOutputsOffset, gradOutput.storage().array().asInstanceOf[Array[Double]], gradOutput.storageOffset() - 1, - classPtr) + concatPtr) case "Float" => MKL.ConcatBackwardFloat(gradOutputs.asInstanceOf[Array[Array[Float]]], gradOutputsOffset, gradOutput.storage().array().asInstanceOf[Array[Float]], gradOutput.storageOffset() - 1, - classPtr) + concatPtr) case _ => throw new UnsupportedOperationException(s"Only Float / Double is supported") } + val tmpGradInputs : Array[Tensor[T]] = new Array[Tensor[T]](this.modules.length) + for (i <- 0 until this.modules.length) { val currentOutput = this.modules(i).output - val currentGradInput = this.modules(i).backward(input, gradouts(i)) - - // It can't be converted to mkl dnn concat forward, becaus the size of all - // gradient input is the same. - // copy method here doesn't costs too much - // TODO convert to eltwise - if (currentGradInput != null) { - if (i == 0) { - this.gradInput.copy(currentGradInput) - } else { - this.gradInput.add(currentGradInput) + tmpGradInputs(i) = this.modules(i).backward(input, gradouts(i)) + } + + // It can't be converted to mkl dnn concat forward, becaus the size of all + // gradient input is the same. + // copy method here doesn't costs too much + // TODO convert to eltwise + //if (currentGradInput != null) { + // if (i == 0) { + // this.gradInput.copy(currentGradInput) + // } else { + // this.gradInput.add(currentGradInput) + // } + //} + + val subGradInputs: Array[Array[T]] = new Array[Array[T]](this.modules.length) + val subGradInputsOffset: Array[Int] = new Array[Int](this.modules.length) + for (i <- 0 until this.modules.length) { + subGradInputs(i) = tmpGradInputs(i).storage().array() + subGradInputsOffset(i) = tmpGradInputs(i).storageOffset() - 1 + } + + if (sum1Pass) { + val nDimension = tmpGradInputs(0).nDimension() + val subGradInputSize: Array[Int] = new Array[Int](this.modules.length * nDimension) + + for (i <- 0 until this.modules.length) { + for (j <- 0 until nDimension) { + subGradInputSize(i * nDimension + j) = tmpGradInputs(i).size(nDimension - j) } } + + ev.getType() match { + case "Double" => + sumPtr = MKL.SumInitDouble(this.modules.length, nDimension, subGradInputSize) + case "Float" => + sumPtr = MKL.SumInitFloat(this.modules.length, nDimension, subGradInputSize) + case _ => + throw new UnsupportedOperationException(s"Only Float supported") + } + sum1Pass = false + } + + ev.getType() match { + case "Double" => + MKL.SumForwardDouble(subGradInputs.asInstanceOf[Array[Array[Double]]], + subGradInputsOffset, + gradInput.storage().array().asInstanceOf[Array[Double]], + gradInput.storageOffset() - 1, + sumPtr) + case "Float" => + MKL.SumForwardFloat(subGradInputs.asInstanceOf[Array[Array[Float]]], + subGradInputsOffset, + gradInput.storage().array().asInstanceOf[Array[Float]], + gradInput.storageOffset() - 1, + sumPtr) + case _ => + throw new UnsupportedOperationException(s"Only Float supported") } this.gradInput diff --git a/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java b/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java index 4f6c882e63f..4e2796a95e1 100644 --- a/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java +++ b/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java @@ -269,4 +269,10 @@ public native static void LinearBackwardBiasDouble( public native static long ConcatInitDouble(int numChannels, int dimension, int[] size); public native static void ConcatForwardDouble(double[][] input, int[] inputOffset, double[] output, int outputOffset, long classPtr); public native static void ConcatBackwardDouble(double[][] gradInput, int[] gradInputOffset, double[] output, int outputOffset, long classPtr); + + /* Sum API */ + public native static long SumInitFloat(int numChannels, int dimension, int[] size); + public native static void SumForwardFloat(float[][] input, int[] inputOffset, float[] output, int outputOffset, long classPtr); + public native static long SumInitDouble(int numChannels, int dimension, int[] size); + public native static void SumForwardDouble(double[][] input, int[] inputOffset, double[] output, int outputOffset, long classPtr); } diff --git a/mkl/native/pom.xml b/mkl/native/pom.xml index e48e148d391..bfe1c0bb6e5 100644 --- a/mkl/native/pom.xml +++ b/mkl/native/pom.xml @@ -55,6 +55,7 @@ relu.cpp batch_norm.cpp concat.cpp + sum.cpp utils.cpp debug.cpp diff --git a/mkl/native/src/main/c/jni/MKLWrapper.h b/mkl/native/src/main/c/jni/MKLWrapper.h index 9b1bf4a70e8..5d75ddd5385 100644 --- a/mkl/native/src/main/c/jni/MKLWrapper.h +++ b/mkl/native/src/main/c/jni/MKLWrapper.h @@ -507,4 +507,21 @@ dnnError_t dnnSplitCreate(dnnPrimitive_t *pSplit, dstChannelSize); } +template +dnnError_t dnnSumCreate( + dnnPrimitive_t *pSum, + dnnPrimitiveAttributes_t attributes, const size_t nSummands, + dnnLayout_t layout, Type *coefficients) +{ + return dnnSumCreate_F32(pSum, attributes, nSummands, layout, coefficients); +} + +template <> +dnnError_t dnnSumCreate( + dnnPrimitive_t *pSum, + dnnPrimitiveAttributes_t attributes, const size_t nSummands, + dnnLayout_t layout, double *coefficients) +{ + return dnnSumCreate_F64(pSum, attributes, nSummands, layout, coefficients); +} #endif diff --git a/mkl/native/src/main/c/jni/sum.cpp b/mkl/native/src/main/c/jni/sum.cpp new file mode 100644 index 00000000000..037e6fcd606 --- /dev/null +++ b/mkl/native/src/main/c/jni/sum.cpp @@ -0,0 +1,221 @@ +#include +#include + +#include "debug.h" +#include "layer.h" +#include "memory.h" +#include "utils.h" + +using namespace std; + +template +class MKLSum : public MKLLayer +{ + public: + MKLSum(); + ~MKLSum(); + + void init(int numSums, int dimension, int *size); + + void updateOutput(DType **input, DType *output); + void updateGradInput(DType **gradInput, DType *gradOutput); + + // attention, we will override the four variables of MKLLayer + vector>> input; + + private: + void firstPass(); + void preExecute(DType *input); + + int numSums; // number of concats + DType *coefficients; +}; + +template +MKLSum::MKLSum() : numSums(0) +{ + // TODO +} + +template +MKLSum::~MKLSum() +{ + // TODO +} + +template +void MKLSum::init(int numSums, int dimension, int *size) +{ + this->numSums = numSums; + this->dimension = dimension; + this->coefficients = new DType[numSums]; + + size_t inputSize[dimension]; + size_t inputStrides[dimension]; + size_t outputSize[dimension]; + size_t outputStrides[dimension]; + + int offset = 0; + + for (int i = 0; i < numSums; i++) { + input.push_back(shared_ptr>(new MKLData)); + + // set the size. + // the size of every channel should be gaved in size. + // the dimension of every channel should be the same. + inputStrides[0] = 1; + inputSize[0] = size[offset]; + for (int j = 1; j < dimension; j++) { + inputSize[j] = size[offset + j]; + inputStrides[j] = inputStrides[j - 1] * inputSize[j - 1]; + } + offset += dimension; + + this->input[i]->createUsrLayout(dimension, inputSize, inputStrides); + this->coefficients[i] = 1; + } + + // TODO check size of all input, they should be the same + + outputStrides[0] = 1; + outputSize[0] = inputSize[0]; + for (int i = 1; i < dimension; i++) { + outputSize[i] = inputSize[i]; + outputStrides[i] = outputStrides[i - 1] * outputSize[i - 1]; + } + + this->output->createUsrLayout(dimension, outputSize, outputStrides); +} + +template +void MKLSum::firstPass() +{ + dnnLayout_t layout = this->input[0]->getMklLayout(); + + dnnError_t status = E_UNIMPLEMENTED; + status = dnnSumCreate(&(this->forwardPrim), NULL, numSums, layout, + this->coefficients); + CHECK_EQ(status, E_SUCCESS); + + this->output->createMklLayout(this->forwardPrim, dnnResourceDst); + + for (int i = 0; i < numSums; i++) { + this->input[i]->createMklLayout( + this->forwardPrim, (dnnResourceType_t)(dnnResourceMultipleSrc + i)); + } + + this->isFirstPass = false; +} + +template +void MKLSum::updateOutput(DType **input, DType *output) +{ + if (this->isFirstPass) firstPass(); + + for (int i = 0; i < numSums; i++) { + this->input[i]->setUsrData(input[i]); + this->input[i]->createConversion(); + } + this->output->setUsrData(output); + this->output->createConversion(); + + dnnError_t status; + void *resources[dnnResourceNumber]; + + for (int i = 0; i < numSums; i++) { + resources[dnnResourceMultipleSrc + i] = this->input[i]->getConvertedData(); + } + resources[dnnResourceDst] = this->output->getData(); + + PERFSTART(); + status = dnnExecute(this->forwardPrim, resources); + PERFEND("main computing"); + + if (!this->output->isUseNext()) this->output->backToUsr(); +} + +template +jlong JNISumInit(JNIEnv *env, jclass thisClass, int numSums, int dimension, + jintArray size) +{ + MKLSum *ptr = new MKLSum(); + + jint *jSize = + reinterpret_cast(env->GetPrimitiveArrayCritical(size, 0)); + ptr->init(numSums, dimension, jSize); + env->ReleasePrimitiveArrayCritical(size, jSize, 0); + + return reinterpret_cast(ptr); +} + +template +void JNISumUpdateOutput(JNIEnv *env, jclass thisClass, jobjectArray input, + jintArray inputOffset, ArrayType output, + jint outputOffset, long classPtr) +{ + MKLSum *ptr = reinterpret_cast *>(classPtr); + + jint *jInputOffset = + reinterpret_cast(env->GetPrimitiveArrayCritical(inputOffset, 0)); + + // TODO we should re-write, this version makes a little complict. + int len = env->GetArrayLength(input); + DType *inputArrStart[len]; + DType *inputArr[len]; + ArrayType jInputArr[len]; + for (int i = 0; i < len; i++) { + jInputArr[i] = (ArrayType)(env->GetObjectArrayElement(input, i)); + inputArrStart[i] = reinterpret_cast( + env->GetPrimitiveArrayCritical(jInputArr[i], 0)); + inputArr[i] = inputArrStart[i] + jInputOffset[i]; + } + + std::shared_ptr> jOutput( + new ZipArray(env, output, outputOffset, ptr->output)); + + ptr->updateOutput(inputArr, jOutput->getPtr()); + + for (int i = 0; i < len; i++) { + env->ReleasePrimitiveArrayCritical(jInputArr[i], inputArrStart[i], 0); + } + + env->ReleasePrimitiveArrayCritical(inputOffset, jInputOffset, 0); +} + +// Macro +#define SumInit(DType, JType, JArrayType) \ + JNIEXPORT \ + jlong JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_SumInit##DType( \ + JNIEnv *env, jclass thisClass, jint numSums, jint dimension, \ + jintArray size) \ + { \ + return JNISumInit(env, thisClass, numSums, dimension, \ + size); \ + } + +#define SumForward(DType, JType, JArrayType) \ + JNIEXPORT \ + void JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_SumForward##DType( \ + JNIEnv *env, jclass thisClass, jobjectArray input, \ + jintArray inputOffset, JArrayType output, jint outputOffset, \ + long classPtr) \ + { \ + JNISumUpdateOutput(env, thisClass, input, inputOffset, \ + output, outputOffset, classPtr); \ + } + +#ifdef __cplusplus +extern "C" { +#endif + +// Double +SumInit(Double, jdouble, jdoubleArray); +SumForward(Double, jdouble, jdoubleArray); + +// Float +SumInit(Float, jfloat, jfloatArray); +SumForward(Float, jfloat, jfloatArray); + +#ifdef __cplusplus +} +#endif From 48d1ebce80621300275a2dc4fd27582baf58b587 Mon Sep 17 00:00:00 2001 From: ian Date: Wed, 21 Sep 2016 10:57:23 +0800 Subject: [PATCH 09/12] Set seed before training to avoid random failure --- .../intel/analytics/sparkdl/optim/EpochOptimizerSpec.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dl/src/test/scala/com/intel/analytics/sparkdl/optim/EpochOptimizerSpec.scala b/dl/src/test/scala/com/intel/analytics/sparkdl/optim/EpochOptimizerSpec.scala index 0284d54dff3..599fb1a0021 100644 --- a/dl/src/test/scala/com/intel/analytics/sparkdl/optim/EpochOptimizerSpec.scala +++ b/dl/src/test/scala/com/intel/analytics/sparkdl/optim/EpochOptimizerSpec.scala @@ -20,7 +20,7 @@ package com.intel.analytics.sparkdl.optim import com.intel.analytics.sparkdl.nn._ import com.intel.analytics.sparkdl.ps.{AllReduceParameterManager, OneReduceParameterManager} import com.intel.analytics.sparkdl.tensor.{Storage, Tensor} -import com.intel.analytics.sparkdl.utils.{Engine, T} +import com.intel.analytics.sparkdl.utils.{RandomGenerator, Engine, T} import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkContext import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} @@ -38,6 +38,7 @@ class EpochOptimizerSpec extends FlatSpec with Matchers with BeforeAndAfter { "An Artificial Neural Network with MSE and LBFGS" should "be trained with good result" in { Logger.getLogger("org").setLevel(Level.WARN) Logger.getLogger("akka").setLevel(Level.WARN) + RandomGenerator.RNG.setSeed(1000) sc = new SparkContext("local[1]", "SerialOptimizerSpec") @@ -98,6 +99,7 @@ class EpochOptimizerSpec extends FlatSpec with Matchers with BeforeAndAfter { Logger.getLogger("org").setLevel(Level.WARN) Logger.getLogger("akka").setLevel(Level.WARN) + RandomGenerator.RNG.setSeed(1000) sc = new SparkContext("local[1]", "SerialOptimizerSpec") // Prepare two kinds of input and their corresponding label From aefa30910c6ace5bd0e5467cea8fc8976a364c7c Mon Sep 17 00:00:00 2001 From: Yiheng Wang Date: Wed, 21 Sep 2016 16:19:20 +0800 Subject: [PATCH 10/12] Support top5 and save model in local mode training --- .../sparkdl/example/ImageNetLocal.scala | 190 ++---------------- 1 file changed, 18 insertions(+), 172 deletions(-) diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/example/ImageNetLocal.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/example/ImageNetLocal.scala index c58c9e9b563..dbfd76fed72 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/example/ImageNetLocal.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/example/ImageNetLocal.scala @@ -21,7 +21,7 @@ import java.awt.color.ColorSpace import java.util import com.intel.analytics.sparkdl.nn.ClassNLLCriterion -import com.intel.analytics.sparkdl.optim.SGD +import com.intel.analytics.sparkdl.optim.{EvaluateMethods, SGD} import com.intel.analytics.sparkdl.tensor.Tensor import com.intel.analytics.sparkdl.utils.{File, T} @@ -49,160 +49,9 @@ object ImageNetLocal { println(s"[${(System.nanoTime() - startTime) / 1e9}s] $msg") } - def runDouble(donkey: Donkey, dataSet: DataSets, netType: String, classNum: Int, + def run(donkey: Donkey, dataSet: DataSets, netType: String, classNum: Int, labelsMap: Map[String, Double], testInterval: Int, donkeyVal: Donkey, - dataSetVal: DataSets, batchSize: Int): Unit = { - // Compute Mean on amount of samples - val samples = 10000 - log(s"Start to calculate Mean on $samples samples") - var (meanR, meanG, meanB) = Array.tabulate(samples)(n => { - print(".") - val data = donkey.pull - dataSet.post(data._2) - ImageNetUtils.computeMean(data._1, data._2.dataOffset) - }).reduce((a, b) => (a._1 + b._1, a._2 + b._2, a._3 + b._3)) - meanR /= samples - meanG /= samples - meanB /= samples - println() - - // Compute std on amount of samples - log(s"Start to calculate std on $samples samples") - var (varR, varG, varB) = Array.tabulate(samples)(n => { - print(".") - val data = donkey.pull - dataSet.post(data._2) - ImageNetUtils.computeVar(data._1, meanR, meanG, meanB, data._2.dataOffset) - }).reduce((a, b) => (a._1 + b._1, a._2 + b._2, a._3 + b._3)) - varR /= samples - varG /= samples - varB /= samples - - val model = netType match { - case "alexnet" => AlexNet.getModel[Double](classNum) - case "googlenet" => GoogleNet.getModel[Double](classNum) - case "googlenet-bn" => GoogleNet.getModel[Double](classNum, "googlenet-bn") - case "googlenet-cf" => GoogleNet.getModelCaffe[Double](classNum) - case _ => throw new IllegalArgumentException - } - val (weights, grad) = model.getParameters() - println(s"modelsize ${weights.nElement()}") - println(model) - val criterion = new ClassNLLCriterion[Double]() - val epochNum = 90 - val featureShape = Array(3, 224, 224) - val targetShape = Array(1) - val sgd = new SGD[Double] - val state = T("momentum" -> 0.9, "dampening" -> 0.0) - val stageImgs = new util.ArrayDeque[Image](batchSize) - val input = Tensor[Double](batchSize, 3, 224, 224) - val target = Tensor[Double](batchSize) - val iter = ImageNetUtils.toTensorDouble( - donkey.map(d => { - stageImgs.push(d._2) - (labelsMap(d._2.label), d._1) - }), - featureShape, - targetShape, - batchSize, - (meanR, meanG, meanB), - (varR, varG, varB), - input, - target - ) - - val stageImgsVal = new util.ArrayDeque[Image](batchSize) - val iterVal = ImageNetUtils.toTensorDouble( - donkeyVal.map(d => { - stageImgsVal.push(d._2) - (labelsMap(d._2.label), d._1) - }), - featureShape, - targetShape, - batchSize, - (meanR, meanG, meanB), - (varR, varG, varB), - input, - target - ) - - log(s"meanR is $meanR meanG is $meanG meanB is $meanB") - log(s"varR is $varR varG is $varG varB is $varB") - log("Start to train...") - - var wallClockTime = 0L - for (i <- 1 to epochNum) { - println(s"Epoch[$i] Train") - - for (regime <- regimes(netType)) { - if (i >= regime._1 && i <= regime._2) { - state("learningRate") = regime._3 - state("weightDecay") = regime._4 - } - } - - var j = 0 - var c = 0 - model.training() - while (j < dataSet.getTotal) { - val start = System.nanoTime() - val (input, target) = iter.next() - val readImgTime = System.nanoTime() - model.zeroGradParameters() - val output = model.forward(input) - val loss = criterion.forward(output, target) - val gradOutput = criterion.backward(output, target) - model.backward(input, gradOutput) - sgd.optimize(_ => (loss, grad), weights, state, state) - val end = System.nanoTime() - wallClockTime += end - start - log(s"Epoch[$i][Iteration $c $j/${dataSet.getTotal}][Wall Clock ${wallClockTime / 1e9}s]" + - s" loss is $loss time ${(end - start) / 1e9}s read " + - s"time ${(readImgTime - start) / 1e9}s train time ${(end - readImgTime) / 1e9}s." + - s" Throughput is ${input.size(1).toDouble / (end - start) * 1e9} img / second") - while (!stageImgs.isEmpty) { - dataSet.post(stageImgs.poll()) - } - j += input.size(1) - c += 1 - } - - if (i % testInterval == 0) { - model.evaluate() - var correct = 0 - var k = 0 - while (k < dataSetVal.getTotal) { - val (input, target) = iterVal.next() - val output = model.forward(input) - output.max(2)._2.squeeze().map(target, (a, b) => { - if (a == b) { - correct += 1 - } - a - }) - while (!stageImgsVal.isEmpty) { - dataSetVal.post(stageImgsVal.poll()) - } - k += input.size(1) - } - - val accuracy = correct.toDouble / dataSetVal.getTotal - println(s"[Wall Clock ${wallClockTime / 1e9}s] Accuracy is $accuracy") - - // Save model to a file each epoch - File.save(model, s"${netType}${accuracy}.model${i}", true) - File.save(state, s"${netType}${accuracy}.state${i}", true) - } - - log("shuffle") - dataSet.shuffle - log("shuffle end") - } - } - - def runFloat(donkey: Donkey, dataSet: DataSets, netType: String, classNum: Int, - labelsMap: Map[String, Double], testInterval: Int, donkeyVal: Donkey, - dataSetVal: DataSets, batchSize: Int): Unit = { + dataSetVal: DataSets, batchSize: Int, modelPath : String): Unit = { // Compute Mean on amount of samples val samples = 10000 log(s"Start to calculate Mean on $samples samples") @@ -327,25 +176,27 @@ object ImageNetLocal { if (i % testInterval == 0) { model.evaluate() - var correct = 0 + var top1Correct = 0 + var top5Correct = 0 var k = 0 while (k < dataSetVal.getTotal) { val (input, target) = iterVal.next() val output = model.forward(input) - output.max(2)._2.squeeze().map(target, (a, b) => { - if (a == b) { - correct += 1 - } - a - }) + top1Correct += EvaluateMethods.calcAccuracy(output, target)._1 + top5Correct += EvaluateMethods.calcTop5Accuracy(output, target)._1 while (!stageImgsVal.isEmpty) { dataSetVal.post(stageImgsVal.poll()) } k += input.size(1) } - val accuracy = correct.toDouble / dataSetVal.getTotal - println(s"[Wall Clock ${wallClockTime / 1e9}s] Accuracy is $accuracy") + val top1Accuracy = top1Correct.toDouble / dataSetVal.getTotal + val top5Accuracy = top5Correct.toDouble / dataSetVal.getTotal + println(s"[Wall Clock ${wallClockTime / 1e9}s] Top-1 Accuracy is $top1Accuracy") + println(s"[Wall Clock ${wallClockTime / 1e9}s] Top-5 Accuracy is $top5Accuracy") + println(s"Save model and state to $modelPath-$i") + File.save(model, modelPath + s"-$i.model") + File.save(state, modelPath + s"-$i.state") } log("shuffle") @@ -371,8 +222,8 @@ object ImageNetLocal { val testInterval = args(4).toInt val netType = args(5) val classNum = args(6).toInt - val dataType = args(7) - val batchSize = args(8).toInt + val batchSize = args(7).toInt + val modelPath = args(8) val dataSet = new DataSets(path, classNum, labelsMap) val donkey = new Donkey(parallelism, dataSet) @@ -383,12 +234,7 @@ object ImageNetLocal { dataSet.shuffle log("shuffle end") - dataType match { - case "double" => runDouble(donkey, dataSet, netType, classNum, labelsMap, testInterval, - donkeyVal, dataSetVal, batchSize) - case "float" => runFloat(donkey, dataSet, netType, classNum, labelsMap, testInterval, - donkeyVal, dataSetVal, batchSize) - case _ => throw new IllegalArgumentException - } + run(donkey, dataSet, netType, classNum, labelsMap, testInterval, + donkeyVal, dataSetVal, batchSize, modelPath) } } From 87e279b7971a0e08704676b249bb5d6ec00c35af Mon Sep 17 00:00:00 2001 From: ian Date: Wed, 21 Sep 2016 17:03:40 +0800 Subject: [PATCH 11/12] Fix a bug when merge conflict code --- .../main/scala/com/intel/analytics/sparkdl/models/Perf.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/models/Perf.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/models/Perf.scala index d6be3bdb702..6191e890b2a 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/models/Perf.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/models/Perf.scala @@ -79,7 +79,7 @@ object Perf { def performance[T: ClassTag](param: Params)(implicit tn: TensorNumeric[T]): Unit = { val (model, input) = param.module match { - case "alexnet" => (AlexNet(1000), Tensor[T](param.batchSize, 3, 224, 224)) + case "alexnet" => (AlexNet(1000), Tensor[T](param.batchSize, 3, 227, 227)) case "alexnetowt" => (AlexNet_OWT(1000), Tensor[T](param.batchSize, 3, 224, 224)) case "googlenet_v1" => (GoogleNet_v1(1000), Tensor[T](param.batchSize, 3, 224, 224)) case "googlenet_v2" => (GoogleNet_v2(1000), Tensor[T](param.batchSize, 3, 224, 224)) @@ -139,8 +139,6 @@ object Perf { } } -case class TestCase[T](input: Tensor[T], target: Tensor[T], model: Module[T]) - case class Params( batchSize: Int = 128, iteration: Int = 10, From 10c178cb1cdb4582ed45c190c933b0704120009a Mon Sep 17 00:00:00 2001 From: ian Date: Wed, 21 Sep 2016 21:04:12 +0800 Subject: [PATCH 12/12] fix a layer name in googlenetv2 --- .../scala/com/intel/analytics/sparkdl/models/GoogleNet.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/models/GoogleNet.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/models/GoogleNet.scala index 12c1a41f100..cec63aefce5 100644 --- a/dl/src/main/scala/com/intel/analytics/sparkdl/models/GoogleNet.scala +++ b/dl/src/main/scala/com/intel/analytics/sparkdl/models/GoogleNet.scala @@ -232,7 +232,7 @@ object GoogleNet_v2 { val conv3 = new Sequential[D] conv3.add(new SpatialConvolution[D](inputSize, config[Table](2)(1), 1, 1, 1, 1) - .setName(namePrefix + "3x3_s2")) + .setName(namePrefix + "3x3_reduce")) conv3.add(new SpatialBatchNormalization(config[Table](2)(1), 1e-3) .setName(namePrefix + "3x3_reduce/bn")) conv3.add(new ReLU[D](true). setName(namePrefix + "3x3_reduce/bn/sc/relu"))