diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/TrainImageNet.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/TrainImageNet.scala index b854e3ac729..53ac3765505 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/TrainImageNet.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/resnet/TrainImageNet.scala @@ -26,7 +26,7 @@ import com.intel.analytics.bigdl.dllib.optim._ import com.intel.analytics.bigdl.dllib.utils._ import com.intel.analytics.bigdl.dllib.tensor.TensorNumericMath.TensorNumeric._ import com.intel.analytics.bigdl.utils._ -import com.intel.analytics.bigdl.dllib.utils.visualization.{TrainSummary, ValidationSummary} +import com.intel.analytics.bigdl.dllib.visualization.{TrainSummary, ValidationSummary} import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkContext diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/Train.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/Train.scala index 7818e60ed4f..cfddf5879bd 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/Train.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/Train.scala @@ -27,7 +27,7 @@ import com.intel.analytics.bigdl.dllib.tensor.TensorNumericMath.TensorNumeric._ import com.intel.analytics.bigdl.dllib.utils.{T, Table} import com.intel.analytics.bigdl.utils.{Engine, OptimizerV1, OptimizerV2} import com.intel.analytics.bigdl.utils.LoggerFilter -import com.intel.analytics.bigdl.dllib.utils.visualization.{TrainSummary, ValidationSummary} +import com.intel.analytics.bigdl.dllib.visualization.{TrainSummary, ValidationSummary} import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkContext diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/TrainImageNet.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/TrainImageNet.scala index 11162007d22..ec96f72e56a 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/TrainImageNet.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/vgg/TrainImageNet.scala @@ -23,7 +23,7 @@ import com.intel.analytics.bigdl.dllib.optim._ import com.intel.analytics.bigdl.utils.{Engine, MklBlas, MklDnn, OptimizerV1, OptimizerV2} import com.intel.analytics.bigdl.dllib.utils._ import com.intel.analytics.bigdl.utils.LoggerFilter -import com.intel.analytics.bigdl.dllib.utils.visualization.TrainSummary +import com.intel.analytics.bigdl.dllib.visualization.TrainSummary import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkContext diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/Graph.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/Graph.scala index 7403cd4bcf4..a56b597a447 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/Graph.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/Graph.scala @@ -28,7 +28,7 @@ import com.intel.analytics.bigdl.dllib.utils._ import com.intel.analytics.bigdl.dllib.utils.serializer._ import com.intel.analytics.bigdl.dllib.utils.serializer.converters.DataConverter import com.intel.analytics.bigdl.dllib.utils.tf.Tensorflow -import com.intel.analytics.bigdl.dllib.utils.visualization.tensorboard.{FileWriter => TFFileWriter} +import com.intel.analytics.bigdl.dllib.visualization.tensorboard.{FileWriter => TFFileWriter} import scala.collection.mutable import scala.collection.mutable.ArrayBuffer diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/AbstractOptimizer.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/AbstractOptimizer.scala index eb3708b328a..5a48d5dbd92 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/AbstractOptimizer.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/AbstractOptimizer.scala @@ -26,7 +26,7 @@ import com.intel.analytics.bigdl.dllib.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.dllib.utils.intermediate.IRGraph import com.intel.analytics.bigdl.utils.{Engine, MklBlas, MklDnn} import com.intel.analytics.bigdl.dllib.utils.{Table} -import com.intel.analytics.bigdl.dllib.utils.visualization.{TrainSummary, ValidationSummary} +import com.intel.analytics.bigdl.dllib.visualization.{TrainSummary, ValidationSummary} import org.apache.spark.rdd.{RDD, ZippedPartitionsWithLocalityRDD} import scala.reflect.ClassTag diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizer.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizer.scala index 27b1ae79d2f..e18ffde0838 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizer.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizer.scala @@ -26,7 +26,7 @@ import com.intel.analytics.bigdl.dllib.tensor.Tensor import com.intel.analytics.bigdl.dllib.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.dllib.utils._ import com.intel.analytics.bigdl.dllib.utils.intermediate.{ConversionUtils, IRGraph} -import com.intel.analytics.bigdl.dllib.utils.visualization.{TrainSummary, ValidationSummary} +import com.intel.analytics.bigdl.dllib.visualization.{TrainSummary, ValidationSummary} import com.intel.analytics.bigdl.utils._ import com.intel.analytics.bigdl.{Module, _} import java.io.{File, FilenameFilter} diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizerV2.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizerV2.scala index e9a85144af8..428a323f71b 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizerV2.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizerV2.scala @@ -29,7 +29,7 @@ import com.intel.analytics.bigdl.dllib.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils._ import com.intel.analytics.bigdl.dllib.utils._ import com.intel.analytics.bigdl.dllib.utils.intermediate.ConversionUtils -import com.intel.analytics.bigdl.dllib.utils.visualization.{TrainSummary, ValidationSummary} +import com.intel.analytics.bigdl.dllib.visualization.{TrainSummary, ValidationSummary} import com.intel.analytics.bigdl.{Module, _} import org.apache.log4j.Logger import org.apache.spark.rdd.RDD diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/Optimizer.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/Optimizer.scala index 402253963a2..5d61c0fe528 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/Optimizer.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/Optimizer.scala @@ -26,7 +26,7 @@ import com.intel.analytics.bigdl.dllib.optim.parameters.{ConstantClippingProcess L2NormClippingProcessor, ParameterProcessor} import com.intel.analytics.bigdl.dllib.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.dllib.utils._ -import com.intel.analytics.bigdl.dllib.utils.visualization.{TrainSummary, ValidationSummary} +import com.intel.analytics.bigdl.dllib.visualization.{TrainSummary, ValidationSummary} import com.intel.analytics.bigdl.utils._ import org.apache.log4j.Logger import org.apache.spark.rdd.RDD diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/ParallelOptimizer.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/ParallelOptimizer.scala index 3695e17de98..4bae6b6b9ed 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/ParallelOptimizer.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/ParallelOptimizer.scala @@ -27,7 +27,7 @@ import com.intel.analytics.bigdl.dllib.tensor.Tensor import com.intel.analytics.bigdl.dllib.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.dllib.utils._ import com.intel.analytics.bigdl.utils._ -import com.intel.analytics.bigdl.dllib.utils.visualization.{TrainSummary, ValidationSummary} +import com.intel.analytics.bigdl.dllib.visualization.{TrainSummary, ValidationSummary} import com.intel.analytics.bigdl.{Module, _} import java.io.{File, FilenameFilter} import java.text.SimpleDateFormat diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/utils/python/api/PythonBigDL.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/utils/python/api/PythonBigDL.scala index 9b51ae5276d..05dd3b6a04a 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/utils/python/api/PythonBigDL.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/utils/python/api/PythonBigDL.scala @@ -28,7 +28,7 @@ import com.intel.analytics.bigdl.dllib.optim.{Optimizer, _} import com.intel.analytics.bigdl.dllib.tensor.{DenseType, SparseType, Storage, Tensor} import com.intel.analytics.bigdl.dllib.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.dllib.utils.{Table, _} -import com.intel.analytics.bigdl.dllib.utils.visualization.{Summary, TrainSummary, ValidationSummary} +import com.intel.analytics.bigdl.dllib.visualization.{Summary, TrainSummary, ValidationSummary} import com.intel.analytics.bigdl.utils._ import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} import org.apache.spark.rdd.RDD diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/README.md b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/README.md new file mode 100644 index 00000000000..fbbbd2041e7 --- /dev/null +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/README.md @@ -0,0 +1,40 @@ +# Visualization via Tensorboard + +> This wheel distribution is provided by https://github.com/dmlc/tensorboard + +> You can find the wheel repository at https://pypi.python.org/pypi/tensorboard + +Please follow the instructions below to install TensorBoard; it has been tested on both Ubuntu and Mac OS. Please refer to the [Know Issues](https://github.com/122689305/BigDL/tree/readme/spark/dl/src/main/scala/com/intel/analytics/bigdl/visualization#known-issues) section for possible errors. + +## Requirement + +Python verison: 2.7, 3.4, 3.5, 3.6 + +Pip version >= 9.0.1 + +## Installation + +### Python 2 +```pip install tensorboard==1.0.0a4``` +### Python 3 +```pip3 install tensorboard==1.0.0a4``` + +## Known Issues + +> #### 1. Issue: No compatible version of tensorboard + +Solutions +* [Update](https://pip.pypa.io/en/stable/installing/) your pip version to the latest: https://pip.pypa.io/en/stable/installing/ +*  Check whether your python support wide unicode if you use python 2.7 +``` +python -c 'import sys;print(sys.maxunicode)' +``` +  It should output `1114111` + +> #### 2. RuntimeError: module compiled against API version 0xa but this version of numpy is 0x9 + +  Check your python library path (sys.path) to see whether it includes numpy module + +> #### 3. RuntimeError: Cannot load some specific libraries, like '_pywrap_tensorflow.so'. + +  Set your 'PATH' environment variable so that `$ which python` outputs the path of your python that has installed tensorboard. diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/Summary.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/Summary.scala new file mode 100644 index 00000000000..81b87a58626 --- /dev/null +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/Summary.scala @@ -0,0 +1,186 @@ +/* + * Copyright 2016 The BigDL Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.analytics.bigdl.dllib.visualization + +import com.intel.analytics.bigdl.dllib.tensor.Tensor +import com.intel.analytics.bigdl.dllib.tensor.TensorNumericMath.TensorNumeric +import com.intel.analytics.bigdl.dllib.visualization.tensorboard.{FileWriter} +import org.tensorflow + +import scala.reflect.ClassTag + +/** + * Logger for tensorboard. + * Support scalar and histogram now. + * @param logDir + * @param appName + */ +abstract class Summary( + logDir: String, + appName: String) { + protected val writer: FileWriter + + /** + * Add a scalar summary. + * @param tag tag name. + * @param value tag value. + * @param step current step. + * @return this + */ + def addScalar( + tag: String, + value: Float, + step: Long): this.type = { + writer.addSummary( + Summary.scalar(tag, value), step + ) + this + } + + /** + * Add a histogram summary. + * @param tag tag name. + * @param value a tensor. + * @param step current step. + * @return this + */ + def addHistogram[T: ClassTag]( + tag: String, + value: Tensor[T], + step: Long)(implicit ev: TensorNumeric[T]): this.type = { + writer.addSummary( + Summary.histogram[T](tag, value), step + ) + this + } + + /** + * Read scalar values to an array of triple by tag name. + * First element of the triple is step, second is value, third is wallclocktime. + * @param tag tag name. + * @return an array of triple. + */ + def readScalar(tag: String): Array[(Long, Float, Double)] + + /** + * Close this logger. + */ + def close(): Unit = { + writer.close() + } +} + +object Summary { + + /** + * Create a scalar summary. + * @param tag tag name + * @param scalar scalar value + * @return + */ + def scalar(tag: String, scalar : Float): tensorflow.framework.Summary = { + val v = tensorflow.framework.Summary.Value.newBuilder().setTag(tag).setSimpleValue(scalar) + tensorflow.framework.Summary.newBuilder().addValue(v).build() + } + + private val limits = makeHistogramBuckets() + + /** + * Create a histogram summary. + * @param tag tag name. + * @param values values. + * @return + */ + def histogram[T: ClassTag]( + tag: String, + values: Tensor[T])(implicit ev: TensorNumeric[T]): tensorflow.framework.Summary = { + val counts = new Array[Int](limits.length) + + var squares = 0.0 + values.apply1{value => + val v = ev.toType[Double](value) + squares += v * v + val index = bisectLeft(limits, v) + counts(index) += 1 + value + } + + val histogram = tensorflow.framework.HistogramProto.newBuilder() + .setMin(ev.toType[Double](values.min())) + .setMax(ev.toType[Double](values.max())) + .setNum(values.nElement()) + .setSum(ev.toType[Double](values.sum())) + .setSumSquares(squares) + + var i = 0 + while (i < counts.length) { + if (counts(i) != 0) { + histogram.addBucket(counts(i)) + histogram.addBucketLimit(limits(i)) + } + i += 1 + } + val v = tensorflow.framework.Summary.Value.newBuilder().setTag(tag).setHisto(histogram) + tensorflow.framework.Summary.newBuilder().addValue(v).build() + } + + /** + * Find a bucket for x. + */ + private def bisectLeft( + a: Array[Double], + x: Double, + lo: Int = 0, + hi: Int = -1): Int = { + require(lo >= 0) + var high = if (hi == -1) { + a.length + } else { + hi + } + var low = lo + + while (low < high) { + val mid = (low + high) / 2 + if (a(mid) < x) { + low = mid + 1 + } else { + high = mid + } + } + low + } + + /** + * Create a histogram buckets. + * @return + */ + private def makeHistogramBuckets(): Array[Double] = { + var v = 1e-12 + val buckets = new Array[Double](1549) + var i = 1 + buckets(774) = 0.0 + while (i <= 774) { + buckets(774 + i) = v + buckets(774 - i) = -v + v *= 1.1 + i += 1 + } + buckets + } + +} diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/TrainSummary.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/TrainSummary.scala new file mode 100644 index 00000000000..98ee138d343 --- /dev/null +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/TrainSummary.scala @@ -0,0 +1,95 @@ +/* + * Copyright 2016 The BigDL Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.analytics.bigdl.dllib.visualization + +import com.intel.analytics.bigdl.dllib.optim.Trigger +import com.intel.analytics.bigdl.dllib.visualization.tensorboard.{FileReader, FileWriter} + +import scala.collection.mutable + +/** + * Train logger for tensorboard. + * Use optimize.setTrainSummary to enable train logger. Then the log will be saved to + * logDir/appName/train. + * + * @param logDir log dir. + * @param appName application Name. + */ +class TrainSummary( + logDir: String, + appName: String) extends Summary(logDir, appName) { + protected val folder = s"$logDir/$appName/train" + protected override val writer = new FileWriter(folder) + private val triggers: mutable.HashMap[String, Trigger] = mutable.HashMap( + "Loss" -> Trigger.severalIteration(1), + "Throughput" -> Trigger.severalIteration(1)) + + /** + * Read scalar values to an array of triple by tag name. + * First element of the triple is step, second is value, third is wallClockTime. + * @param tag tag name. Supported tag names is "LearningRate", "Loss", "Throughput" + * @return an array of triple. + */ + override def readScalar(tag: String): Array[(Long, Float, Double)] = { + FileReader.readScalar(folder, tag) + } + + /** + * Supported tag name are LearningRate, Loss, Throughput, Parameters. + * Parameters contains weight, bias, gradWeight, gradBias, and some running status(eg. + * runningMean and runningVar in BatchNormalization). + * + * Notice: By default, we record LearningRate, Loss and Throughput each iteration, while + * recording parameters is disabled. The reason is getting parameters from workers is a + * heavy operation when the model is very big. + * + * @param tag tag name + * @param trigger trigger + * @return + */ + def setSummaryTrigger(tag: String, trigger: Trigger): this.type = { + require(tag.equals("LearningRate") || tag.equals("Loss") || + tag.equals("Throughput") | tag.equals("Parameters"), + s"TrainSummary: only support LearningRate, Loss, Parameters and Throughput") + triggers(tag) = trigger + this + } + + /** + * Get a trigger by tag name. + * @param tag + * @return + */ + def getSummaryTrigger(tag: String): Option[Trigger] = { + if (triggers.contains(tag)) { + Some(triggers(tag)) + } else { + None + } + } + + private[bigdl] def getScalarTriggers(): Iterator[(String, Trigger)] = { + triggers.filter(!_._1.equals("Parameters")).toIterator + } +} + +object TrainSummary{ + def apply(logDir: String, + appName: String): TrainSummary = { + new TrainSummary(logDir, appName) + } +} diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/ValidationSummary.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/ValidationSummary.scala new file mode 100644 index 00000000000..5a66599abfc --- /dev/null +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/ValidationSummary.scala @@ -0,0 +1,51 @@ +/* + * Copyright 2016 The BigDL Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.analytics.bigdl.dllib.visualization + +import com.intel.analytics.bigdl.dllib.visualization.tensorboard.{FileReader, FileWriter} + +/** + * Validation logger for tensorboard. + * Use optimize.setValidation to enable validation logger. Then the log will be saved to + * logDir/appName/Validation. + * + * @param logDir + * @param appName + */ +class ValidationSummary( + logDir: String, + appName: String) extends Summary(logDir, appName) { + protected val folder = s"$logDir/$appName/validation" + protected override val writer = new FileWriter(folder) + + /** + * ReadScalar by tag name. Optional tag name is based on ValidationMethod, "Loss", + * "Top1Accuracy" or "Top5Accuracy". + * @param tag tag name. + * @return an array of triple. + */ + override def readScalar(tag: String): Array[(Long, Float, Double)] = { + FileReader.readScalar(folder, tag) + } +} + +object ValidationSummary{ + def apply(logDir: String, + appName: String): ValidationSummary = { + new ValidationSummary(logDir, appName) + } +} diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/tensorboard/EventWriter.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/tensorboard/EventWriter.scala new file mode 100644 index 00000000000..29dd80a0b4b --- /dev/null +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/tensorboard/EventWriter.scala @@ -0,0 +1,73 @@ +/* + * Copyright 2016 The BigDL Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.analytics.bigdl.dllib.visualization.tensorboard + +import java.net.InetAddress +import java.util.concurrent.{LinkedBlockingDeque, TimeUnit} + +import org.apache.hadoop.fs.{FileSystem, Path} +import org.tensorflow.util.Event + +/** + * Event writer, write event protocol buffers to file. + * + * @param logDir Support local directory and HDFS directory + * @param flushMillis + */ +private[bigdl] class EventWriter(logDir: String, + flushMillis: Int = 1000, + fs: FileSystem) extends Runnable { + private val eventQueue = new LinkedBlockingDeque[Event]() + private val outputFile = new Path(logDir + + s"/bigdl.tfevents.${(System.currentTimeMillis() / 1e3).toInt}" + + s".${InetAddress.getLocalHost().getHostName()}") + private val recordWriter = new RecordWriter(outputFile, fs) + // Add an empty Event to the queue. + eventQueue.add(Event.newBuilder().setWallTime(System.currentTimeMillis() / 1e3).build()) + @volatile private var running: Boolean = true + + def addEvent(event: Event): this.type = { + eventQueue.add(event) + this + } + + private def flush(): this.type = { + while (!eventQueue.isEmpty) { + recordWriter.write(eventQueue.pop()) + } + this + } + + private def writeEvent(): this.type = { + val e = eventQueue.poll(flushMillis, TimeUnit.MILLISECONDS) + if (null != e) recordWriter.write(e) + this + } + + def close(): this.type = { + running = false + this + } + + override def run(): Unit = { + while (running) { + writeEvent() + } + flush() + recordWriter.close() + } +} diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/tensorboard/FileReader.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/tensorboard/FileReader.scala new file mode 100644 index 00000000000..744abf37579 --- /dev/null +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/tensorboard/FileReader.scala @@ -0,0 +1,122 @@ +/* + * Copyright 2016 The BigDL Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.analytics.bigdl.dllib.visualization.tensorboard + +import java.io.{BufferedInputStream} +import java.nio.ByteBuffer + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, Path} +import org.tensorflow.util.Event + +import scala.collection.mutable.ArrayBuffer +import scala.util.matching.Regex + +private[bigdl] object FileReader { + val fileNameRegex = """bigdl.tfevents.*""".r + + /** + * Search file with regex. + * @param f + * @param r + * @return + */ + private def recursiveListFiles(f: Path, r: Regex, fs: FileSystem): Array[Path] = { + val buffer = new ArrayBuffer[Path]() + val files = fs.listFiles(f, true) + while (files.hasNext) { + val file = files.next().getPath + if (r.findFirstIn(file.getName).isDefined) { + buffer.append(file) + } + } + buffer.toArray + } + + /** + * List all events file in path. + * @param path should be a local/HDFS folder. + * @return + */ + def listFiles(path: String): Array[Path] = { + val logPath = new Path(path) + val fs = logPath.getFileSystem(new Configuration(false)) + require(fs.isDirectory(logPath), s"FileReader: $path should be a directory") + FileReader.recursiveListFiles(logPath, fileNameRegex, fs) + } + + /** + * List all folders contains event files in path. + * @param path should be a local/HDFS folder. + * @return + */ + def list(path: String): Array[String] = { + val logPath = new Path(path) + val fs = logPath.getFileSystem(new Configuration(false)) + require(fs.isDirectory(logPath), s"FileReader: $path should be a directory") + FileReader.recursiveListFiles(logPath, fileNameRegex, fs).map(_.getParent.toString).distinct + } + + /** + * Read all scalar events named tag from a path. + * @param path should be a local/HDFS folder. + * @param tag tag name. + * @return + */ + def readScalar(path: String, tag: String): Array[(Long, Float, Double)] = { + val logPath = new Path(path) + val fs = logPath.getFileSystem(new Configuration(false)) + require(fs.isDirectory(logPath), s"FileReader: $path should be a directory") + val files = FileReader.recursiveListFiles(logPath, fileNameRegex, fs) + files.map{file => + readScalar(file, tag, fs) + }.flatMap(_.toIterator).sortWith(_._1 < _._1) + } + + /** + * Read all scalar events named tag from a file. + * @param file The path of file. Support local/HDFS path. + * @param tag tag name. + * @return + */ + def readScalar(file: Path, tag: String, fs: FileSystem): Array[(Long, Float, Double)] = { + require(fs.isFile(file), s"FileReader: ${file} should be a file") + val bis = new BufferedInputStream(fs.open(file)) + val longBuffer = new Array[Byte](8) + val crcBuffer = new Array[Byte](4) + val bf = new ArrayBuffer[(Long, Float, Double)] + while (bis.read(longBuffer) > 0) { + val l = ByteBuffer.wrap(longBuffer.reverse).getLong() + bis.read(crcBuffer) + // TODO: checksum + // val crc1 = ByteBuffer.wrap(crcBuffer.reverse).getInt() + val eventBuffer = new Array[Byte](l.toInt) + bis.read(eventBuffer) + val e = Event.parseFrom(eventBuffer) + if (e.getSummary.getValueCount == 1 && + tag.equals(e.getSummary.getValue(0).getTag())) { + bf.append((e.getStep, e.getSummary.getValue(0).getSimpleValue, + e.getWallTime)) + } + bis.read(crcBuffer) + // val crc2 = ByteBuffer.wrap(crcBuffer.reverse).getInt() + } + bis.close() + bf.toArray.sortWith(_._1 < _._1) + } +} + diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/tensorboard/FileWriter.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/tensorboard/FileWriter.scala new file mode 100644 index 00000000000..80ef4eba4ca --- /dev/null +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/tensorboard/FileWriter.scala @@ -0,0 +1,81 @@ +/* + * Copyright 2016 The BigDL Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.analytics.bigdl.dllib.visualization.tensorboard + +import com.intel.analytics.bigdl.utils.Engine +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.tensorflow +import org.tensorflow.framework.GraphDef +import org.tensorflow.util.Event + +/** + * Writes Summary protocol buffers to event files. + * @param logDirectory Support local directory and HDFS directory + * @param flushMillis Interval to flush events queue. + */ +private[bigdl] class FileWriter(val logDirectory : String, flushMillis: Int = 1000) { + private val logPath = new Path(logDirectory) + // write to local disk by default + private val fs = logPath.getFileSystem(new Configuration(false)) + + require(!fs.exists(logPath) || fs.isDirectory(logPath), s"FileWriter: can not create $logPath") + if (!fs.exists(logPath)) fs.mkdirs(logPath) + + private val eventWriter = new EventWriter(logDirectory, flushMillis, fs) + Engine.default.invoke(() => eventWriter.run()) + + /** + * Adds a Summary protocol buffer to the event file. + * @param summary a Summary protobuf String generated by bigdl.utils.Summary's + * scalar()/histogram(). + * @param globalStep a consistent global count of the event. + * @return + */ + def addSummary(summary: tensorflow.framework.Summary, globalStep: Long): this.type = { + val event = Event.newBuilder().setSummary(summary).build() + addEvent(event, globalStep) + this + } + + def addGraphDef(graph: GraphDef): this.type = { + val event = Event.newBuilder().setGraphDef(graph.toByteString).build() + eventWriter.addEvent(event) + this + } + + /** + * Add a event protocol buffer to the event file. + * @param event A event protobuf contains summary protobuf. + * @param globalStep a consistent global count of the event. + * @return + */ + def addEvent(event: Event, globalStep: Long): this.type = { + eventWriter.addEvent( + event.toBuilder.setWallTime(System.currentTimeMillis() / 1e3).setStep(globalStep).build()) + this + } + + /** + * Close file writer. + * @return + */ + def close(): Unit = { + eventWriter.close() + fs.close() + } +} diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/tensorboard/RecordWriter.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/tensorboard/RecordWriter.scala new file mode 100644 index 00000000000..067e41c18ea --- /dev/null +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/visualization/tensorboard/RecordWriter.scala @@ -0,0 +1,57 @@ +/* + * Copyright 2016 The BigDL Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.analytics.bigdl.dllib.visualization.tensorboard + +import java.io.{File, FileOutputStream} + +import com.google.common.primitives.{Ints, Longs} +import com.intel.analytics.bigdl.dllib.utils.Crc32 +import netty.Crc32c +import org.apache.hadoop.fs.{FSDataOutputStream, FileSystem, Path} +import org.tensorflow.util.Event + +/** + * A writer to write event protobuf to file by tensorboard's format. + * @param file Support local path and HDFS path + */ +private[bigdl] class RecordWriter(file: Path, fs: FileSystem) { + val outputStream = if (file.toString.startsWith("hdfs://")) { + // FSDataOutputStream couldn't flush data to localFileSystem in time. So reading summaries + // will throw exception. + fs.create(file, true, 1024) + } else { + // Using FileOutputStream when write to local. + new FileOutputStream(new File(file.toString)) + } + val crc32 = new Crc32c() + def write(event: Event): Unit = { + val eventString = event.toByteArray + val header = Longs.toByteArray(eventString.length.toLong).reverse + outputStream.write(header) + outputStream.write(Ints.toByteArray(Crc32.maskedCRC32(crc32, header).toInt).reverse) + outputStream.write(eventString) + outputStream.write(Ints.toByteArray(Crc32.maskedCRC32(crc32, eventString).toInt).reverse) + if (outputStream.isInstanceOf[FSDataOutputStream]) { + // Flush data to HDFS. + outputStream.asInstanceOf[FSDataOutputStream].hflush() + } + } + + def close(): Unit = { + outputStream.close() + } +} diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/integration/HdfsSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/integration/HdfsSpec.scala index fc0ffc10e1a..a2b88ebd5c4 100644 --- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/integration/HdfsSpec.scala +++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/integration/HdfsSpec.scala @@ -31,8 +31,8 @@ import com.intel.analytics.bigdl.dllib.tensor.TensorNumericMath.TensorNumeric.Nu import com.intel.analytics.bigdl.dllib.utils.caffe.{CaffeLoader, CaffePersister} import com.intel.analytics.bigdl.dllib.utils.tf._ import com.intel.analytics.bigdl.dllib.utils.{File} -import com.intel.analytics.bigdl.dllib.utils.visualization.Summary -import com.intel.analytics.bigdl.dllib.utils.visualization.tensorboard.{FileReader, FileWriter} +import com.intel.analytics.bigdl.dllib.visualization.Summary +import com.intel.analytics.bigdl.dllib.visualization.tensorboard.{FileReader, FileWriter} import com.intel.analytics.bigdl.utils._ import org.apache.commons.compress.utils.IOUtils import org.apache.hadoop.conf.Configuration diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizerSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizerSpec.scala index 05c16b0541c..496b61458d3 100644 --- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizerSpec.scala +++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizerSpec.scala @@ -30,7 +30,7 @@ import com.intel.analytics.bigdl.dllib.nn.mkldnn.Phase.TrainingPhase import com.intel.analytics.bigdl.dllib.optim.parameters.AllReduceParameter import com.intel.analytics.bigdl.dllib.tensor.{DenseTensor, DnnStorage, Storage, Tensor} import com.intel.analytics.bigdl.dllib.utils._ -import com.intel.analytics.bigdl.dllib.utils.visualization.TrainSummary +import com.intel.analytics.bigdl.dllib.visualization.TrainSummary import com.intel.analytics.bigdl.utils._ import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkContext diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizerV2Spec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizerV2Spec.scala index 0f33fa707d6..2789fc00b7b 100644 --- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizerV2Spec.scala +++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/optim/DistriOptimizerV2Spec.scala @@ -30,7 +30,7 @@ import com.intel.analytics.bigdl.dllib.nn.mkldnn.Phase.TrainingPhase import com.intel.analytics.bigdl.dllib.optim.parameters.AllReduceParameter import com.intel.analytics.bigdl.dllib.tensor.{DenseTensor, DnnStorage, Storage, Tensor} import com.intel.analytics.bigdl.dllib.utils._ -import com.intel.analytics.bigdl.dllib.utils.visualization.TrainSummary +import com.intel.analytics.bigdl.dllib.visualization.TrainSummary import com.intel.analytics.bigdl.utils._ import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkContext diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/optim/LocalOptimizerSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/optim/LocalOptimizerSpec.scala index 6fc9a72d23f..743cdbab199 100644 --- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/optim/LocalOptimizerSpec.scala +++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/optim/LocalOptimizerSpec.scala @@ -26,7 +26,7 @@ import com.intel.analytics.bigdl.dllib.nn.mkldnn.HeapData import com.intel.analytics.bigdl.dllib.tensor.{DnnStorage, Storage, Tensor} import com.intel.analytics.bigdl.dllib.utils.{T} import com.intel.analytics.bigdl.utils._ -import com.intel.analytics.bigdl.dllib.utils.visualization.TrainSummary +import com.intel.analytics.bigdl.dllib.visualization.TrainSummary import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} object DummyDataSet extends LocalDataSet[MiniBatch[Float]] { diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/python/api/PythonSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/python/api/PythonSpec.scala index b34cbec6b01..0435d9653f4 100644 --- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/python/api/PythonSpec.scala +++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/python/api/PythonSpec.scala @@ -24,7 +24,7 @@ import com.intel.analytics.bigdl.dllib.feature.dataset.DataSet import com.intel.analytics.bigdl.dllib.nn._ import com.intel.analytics.bigdl.dllib.optim._ import com.intel.analytics.bigdl.dllib.utils.{T, Table, TestUtils} -import com.intel.analytics.bigdl.dllib.utils.visualization.{TrainSummary, ValidationSummary} +import com.intel.analytics.bigdl.dllib.visualization.{TrainSummary, ValidationSummary} import com.intel.analytics.bigdl.utils._ import org.apache.log4j.{Level, Logger} import org.apache.spark.{SparkConf, SparkContext} diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/visualization/SummarySpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/visualization/SummarySpec.scala index 17e34981018..61ea08b0629 100644 --- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/visualization/SummarySpec.scala +++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/visualization/SummarySpec.scala @@ -14,13 +14,13 @@ * limitations under the License. */ -package com.intel.analytics.bigdl.dllib.utils.visualization +package com.intel.analytics.bigdl.dllib.visualization import com.intel.analytics.bigdl.dllib.example.loadmodel.AlexNet import com.intel.analytics.bigdl.dllib.tensor.Tensor import com.intel.analytics.bigdl.dllib.utils.{TestUtils} import Summary._ -import com.intel.analytics.bigdl.dllib.utils.visualization.tensorboard.{FileReader, FileWriter} +import com.intel.analytics.bigdl.dllib.visualization.tensorboard.{FileReader, FileWriter} import com.intel.analytics.bigdl.utils._ import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} import org.tensorflow.framework