Skip to content

Commit

Permalink
add python notebook support (intel-analytics#3)
Browse files Browse the repository at this point in the history
* add python notebook support

* put post process calculation into executors
  • Loading branch information
SeaOfOcean authored May 18, 2017
1 parent 5ad8c44 commit d26dc50
Show file tree
Hide file tree
Showing 11 changed files with 709 additions and 55 deletions.
50 changes: 50 additions & 0 deletions notebook/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Demo Setup Guide

## Install Dependency Packages

Reference https://github.com/intel-analytics/BigDL/wiki/Python-Support

## Download BigDL jars

Download BigDL Nightly Build jars from https://github.com/intel-analytics/BigDL/wiki/Downloads

The default spark version is Spark 1.5.1


## 2 Start Jupyter Server

* Create start_notebook.sh, copy and paste the contents below, and edit SPARK_HOME, BigDL_HOME accordingly. Change other parameter settings as you need.
```bash
#!/bin/bash

#setup pathes
SPARK_HOME=/Users/bigdl/spark-1.6.0-bin-hadoop2.6/
Analytics_HOME=/Users/bigdl/analytics-zoo
BigDL_HOME=/Users/bigdl/dist-spark-1.5.1-scala-2.10.5-linux64-0.2.0-20170510.012057-18-dist
#use local mode or cluster mode
#MASTER=spark://xxxx:7077
MASTER="local[4]"

PYTHON_API_ZIP_PATH=${BigDL_HOME}/lib/bigdl-0.2.0-SNAPSHOT-python-api.zip
BigDL_JAR_PATH=${Analytics_HOME}/pipeline/target/pipeline-0.1-SNAPSHOT-jar-with-dependencies.jar

export PYTHONPATH=${PYTHON_API_ZIP_PATH}:$PYTHONPATH
export IPYTHON_OPTS="notebook --notebook-dir=./ --ip=* --no-browser --NotebookApp.token=''"

${SPARK_HOME}/bin/pyspark \
--master ${MASTER} \
--properties-file ${BigDL_HOME}/conf/spark-bigdl.conf \
--driver-cores 1 \
--driver-memory 10g \
--total-executor-cores 3 \
--executor-cores 1 \
--executor-memory 20g \
--conf spark.akka.frameSize=64 \
--py-files ${PYTHON_API_ZIP_PATH} \
--jars ${BigDL_JAR_PATH} \
--conf spark.driver.extraClassPath=${BigDL_JAR_PATH} \
--conf spark.executor.extraClassPath=pipeline-0.1-SNAPSHOT-jar-with-dependencies.jar
```
* Put start_notebook.sh and start_tensorboard.sh in home directory and execute them in bash.


417 changes: 417 additions & 0 deletions notebook/example/SSD.ipynb

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions pipeline/assembly/python-zip.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0 http://maven.apache.org/xsd/assembly-2.0.0.xsd">
<id>python-api</id>
<formats>
<format>zip</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<includes>
<include>**/*.py</include>
</includes>
<outputDirectory>/..</outputDirectory>
<directory>src/main/python/</directory>
</fileSet>
</fileSets>
</assembly>
15 changes: 15 additions & 0 deletions pipeline/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,21 @@
</configuration>

<executions>
<execution>
<id>python</id>
<inherited>false</inherited>
<configuration>
<finalName>pipeline-${project.version}</finalName>
<descriptors>
<descriptor>assembly/python-zip.xml
</descriptor>
</descriptors>
</configuration>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
<execution>
<phase>package</phase>
<goals>
Expand Down
4 changes: 4 additions & 0 deletions pipeline/src/main/python/ssd/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from util.common import JavaCreator

JavaCreator.set_creator_class(
"com.intel.analytics.bigdl.python.api.SSDPythonBigDL") # noqa
41 changes: 41 additions & 0 deletions pipeline/src/main/python/ssd/ssd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from nn.layer import *

class Test(Model):
"""
>>> test = Test("myworld")
creating: createTest
>>> print(test.value)
hello myworld
>>> linear = Linear(1, 2)
creating: createLinear
"""
def __init__(self, message, bigdl_type="float"):
super(Test, self).__init__(None, bigdl_type, message)


def _test():
import sys
print sys.path
import doctest
from pyspark import SparkContext
from util.common import init_engine
from util.common import create_spark_conf
from util.common import JavaCreator
import ssd
globs = ssd.__dict__.copy()
sc = SparkContext(master="local[4]", appName="test layer",
conf=create_spark_conf())
globs['sc'] = sc
JavaCreator.set_creator_class("com.intel.analytics.bigdl.python.api.SSDPythonBigDL") # noqa
init_engine()
(failure_count, test_count) = doctest.testmod(globs=globs,
optionflags=doctest.ELLIPSIS)
if failure_count:
exit(-1)

def predict(resolution, batch_size, n_partition, folder, _sc, _model, n_classes):
return callBigDlFunc("float", "ssdPredict", resolution, batch_size, n_partition,
folder, _sc, _model, n_classes)

if __name__ == "__main__":
_test()
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,55 @@
package com.intel.analytics.zoo.pipeline.common

import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.zoo.pipeline.common.dataset.roiimage.Target
import org.apache.log4j.Logger

object BboxUtil {

val logger = Logger.getLogger(getClass)

def decodeOutput(output: Tensor[Float], nclass: Int): Array[Target] = {
val num = output.valueAt(1).toInt
val result = output.narrow(1, 2, output.nElement() - 1).view(num, 6)

val indices = getClassIndices(result)
val decoded = new Array[Target](nclass)
val iter = indices.iterator
while (iter.hasNext) {
val item = iter.next()
val sub = result.narrow(1, item._2._1, item._2._2)
decoded(item._1) = Target(sub.select(2, 2),
sub.narrow(2, 3, 4))
}
decoded
}

private def getClassIndices(result: Tensor[Float]): Map[Int, (Int, Int)] = {
var indices = Map[Int, (Int, Int)]()
if (result.nElement() == 0) return indices
var prev = -1f
var i = 1
var start = 1
if (result.size(1) == 1) {
indices += (result.valueAt(i, 1).toInt -> (1, 1))
return indices
}
while (i <= result.size(1)) {
if (prev != result.valueAt(i, 1)) {
if (prev >= 0) {
indices += (prev.toInt -> (start, i - start))
}
start = i
}
prev = result.valueAt(i, 1)
if (i == result.size(1)) {
indices += (prev.toInt -> (start, i - start + 1))
}
i += 1
}
indices
}

// inplace scale
def scaleBBox(classBboxes: Tensor[Float], height: Float, width: Float): Unit = {
if (classBboxes.nElement() == 0) return
Expand All @@ -30,8 +76,6 @@ object BboxUtil {
}


val logger = Logger.getLogger(getClass)

private def decodeSingleBbox(i: Int, priorBox: Tensor[Float], priorVariance: Tensor[Float],
isClipBoxes: Boolean, bbox: Tensor[Float], varianceEncodedInTarget: Boolean,
decodedBoxes: Tensor[Float]): Unit = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@
package com.intel.analytics.zoo.pipeline.common.nn

import com.intel.analytics.bigdl.nn.abstractnn.AbstractModule
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.Table
import com.intel.analytics.zoo.pipeline.common.BboxUtil
import com.intel.analytics.zoo.pipeline.common.dataset.roiimage.Target
import com.intel.analytics.zoo.pipeline.common.nn.DetectionOutput._
import com.intel.analytics.zoo.pipeline.ssd.PostProcessParam
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.{T, Table}
import org.apache.log4j.Logger

class DetectionOutput(param: PostProcessParam) extends AbstractModule[Table, Table, Float] {
@SerialVersionUID(5253792953255433914L)
class DetectionOutput(param: PostProcessParam) extends AbstractModule[Table, Tensor[Float], Float] {
@transient private var nms: Nms = _

private def filterBboxes(decodedBboxes: Array[Tensor[Float]],
Expand Down Expand Up @@ -137,11 +137,7 @@ class DetectionOutput(param: PostProcessParam) extends AbstractModule[Table, Tab
}
}

override def updateOutput(input: Table): Table = {
if (isTraining()) {
output = input
return output
}
override def updateOutput(input: Table): Tensor[Float] = {
if (nms == null) nms = new Nms()
val loc = input[Tensor[Float]](1)
val conf = input[Tensor[Float]](2)
Expand All @@ -163,60 +159,55 @@ class DetectionOutput(param: PostProcessParam) extends AbstractModule[Table, Tab
val allDecodedBboxes = BboxUtil.decodeBboxesAll(allLocPreds, priorBoxes, priorVariances,
numLocClasses, param.bgLabel, false, param.varianceEncodedInTarget, param.shareLocation,
allLocPreds)
var numKept = 0
val numKepts = new Array[Int](batch)
var maxDetection = 0

i = 0
while (i < batch) {
numKept += filterBboxes(allDecodedBboxes(i), allConfScores(i),
val num = filterBboxes(allDecodedBboxes(i), allConfScores(i),
allIndices(i), allIndicesNum(i))
numKepts(i) = num
maxDetection = Math.max(maxDetection, num)
i += 1
}
val results = new Array[Array[Target]](batch)
if (numKept > 0) {
// the first element is the number of detection numbers
output = Tensor[Float](batch, 1 + maxDetection * 6)
if (numKepts.sum > 0) {
i = 0
while (i < batch) {
val outi = output(i + 1)
var c = 0
val result = new Array[Target](param.nClasses)
while (c < param.nClasses) {
outi.setValue(1, numKepts(i))
var offset = 2
while (c < allIndices(i).length) {
val indices = allIndices(i)(c)
if (indices != null) {
val indicesNum = allIndicesNum(i)(c)
val locLabel = if (param.shareLocation) allDecodedBboxes(i).length - 1 else c
val bboxes = allDecodedBboxes(i)(locLabel)
var j = 0
val classBboxes = Tensor[Float](indicesNum, 4)
val classScores = Tensor[Float](indicesNum)
while (j < indicesNum) {
val idx = indices(j)
classScores.setValue(j + 1, allConfScores(i)(c).valueAt(idx))
classBboxes.setValue(j + 1, 1, bboxes.valueAt(idx, 1))
classBboxes.setValue(j + 1, 2, bboxes.valueAt(idx, 2))
classBboxes.setValue(j + 1, 3, bboxes.valueAt(idx, 3))
classBboxes.setValue(j + 1, 4, bboxes.valueAt(idx, 4))
outi.setValue(offset, c)
outi.setValue(offset + 1, allConfScores(i)(c).valueAt(idx))
outi.setValue(offset + 2, bboxes.valueAt(idx, 1))
outi.setValue(offset + 3, bboxes.valueAt(idx, 2))
outi.setValue(offset + 4, bboxes.valueAt(idx, 3))
outi.setValue(offset + 5, bboxes.valueAt(idx, 4))
offset += 6
j += 1
}
// Clip the normalized bbox first.
BboxUtil.clipBoxes(classBboxes)
result(c) = Target(classScores, classBboxes)
}
c += 1
}
results(i) = result
i += 1
}
}
if (output == null) {
output = T()
output.insert(results)
} else {
output(1) = results
}
output
}

override def updateGradInput(input: Table, gradOutput: Table): Table = {
gradInput = gradOutput
gradInput
override def updateGradInput(input: Table, gradOutput: Tensor[Float]): Table = {
null
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright 2016 The BigDL Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.intel.analytics.bigdl.python.api

import java.lang.{Boolean => JBoolean}
import java.util.{ArrayList => JArrayList, HashMap => JHashMap, List => JList, Map => JMap}

import com.intel.analytics.bigdl.dataset.{Identity => DIdentity, Sample => JSample}
import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity}
import com.intel.analytics.bigdl.numeric._
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.zoo.pipeline.ssd._
import org.apache.spark.api.java.JavaSparkContext

import scala.collection.JavaConverters._
import scala.language.existentials
import scala.reflect.ClassTag

object SSDPythonBigDL {

def ofFloat(): PythonBigDL[Float] = new SSDPythonBigDL[Float]()

def ofDouble(): PythonBigDL[Double] = new SSDPythonBigDL[Double]()

}


class SSDPythonBigDL[T: ClassTag](implicit ev: TensorNumeric[T]) extends PythonBigDL[T] {

def ssdPredict(resolution: Int, batchSize: Int, nPartition: Int,
folder: String, sc: JavaSparkContext,
model: AbstractModule[Activity, Activity, Float], nClasses: Int = 21)
: JList[JList[JList[JList[Float]]]] = {
val predictor = new Predictor(model,
PreProcessParam(batchSize, resolution, (123f, 117f, 104f), false), nClasses)
val data = IOUtils.loadLocalFolder(nPartition, folder, sc)
val results = predictor.predict(data).collect()
val pathArr = data.map(x => x.path).collect()
results.zip(pathArr).map(res => {
val bboxes = res._1.map(r => if (r != null) r.bboxes.storage().array().toList.asJava
else null).toList.asJava
val scores = res._1.map(r => if (r != null) r.classes.storage().array().toList.asJava
else null).toList.asJava
List(scores, bboxes).asJava
}).toList.asJava
}
}



Loading

0 comments on commit d26dc50

Please sign in to comment.