Skip to content
This repository has been archived by the owner on Feb 8, 2023. It is now read-only.

solve conflict #7

Merged
merged 1 commit into from
Feb 6, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Copyright 2016 The BigDL Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.intel.analytics.bigdl.nn.ops

import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric

import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag
import scala.util.hashing.MurmurHash3

class CategoricalColVocaList[T: ClassTag](
val vocaList: Array[String],
val strDelimiter: String = ",",
val defaultValue: Int = -1,
val numOovBuckets: Int = 0
) (implicit ev: TensorNumeric[T])
extends Operation[Tensor[String], Tensor[Int], T]{

private val vocaLen = vocaList.length
private val vocaMap = vocaList.zipWithIndex.toMap

require(numOovBuckets >= 0,
"numOovBuckets is a negative integer")
require(!(defaultValue != -1 && numOovBuckets != 0),
"defaultValue and numOovBuckets are both specified")
require(vocaLen > 0,
"the vocabulary list is empty")
require(vocaLen == vocaMap.size,
"the vocabulary list contains duplicate keys")

output = Tensor[Int]()

override def updateOutput(input: Tensor[String]): Tensor[Int] = {

val resTensor = Tensor[Int]()
val rows = input.size(dim = 1)
val cols = if (numOovBuckets==0) vocaLen + 1 else vocaLen + numOovBuckets
val shape = Array(rows, cols)
val indices0 = new ArrayBuffer[Int]()
val indices1 = new ArrayBuffer[Int]()
val values = new ArrayBuffer[Int]()

var i = 1
while (i <= rows) {
val feaStrArr = input.valueAt(i, 1).split(strDelimiter)
var j = 0
while (j < feaStrArr.length) {
val mapVal = numOovBuckets==0 match {
case true =>
vocaMap.getOrElse(feaStrArr(j), defaultValue)
case false =>
vocaMap.getOrElse(feaStrArr(j),
MurmurHash3.stringHash(feaStrArr(j)) % numOovBuckets match {
case v if v < 0 => v + numOovBuckets + vocaLen
case v if v >= 0 => v + vocaLen
})
}
indices0 += i-1
indices1 += j
values += mapVal
j += 1
}
i += 1
}
val indices = Array(indices0.toArray, indices1.toArray)
output = Tensor.sparse(indices, values.toArray, shape)
output
}
}

object CategoricalColVocaList {
def apply[T: ClassTag](
vocaList: Array[String],
strDelimiter: String = ",",
defaultValue: Int = -1,
numOovBuckets: Int = 0
) (implicit ev: TensorNumeric[T]): CategoricalColVocaList[T]
= new CategoricalColVocaList[T](
vocaList = vocaList,
strDelimiter = strDelimiter,
defaultValue = defaultValue,
numOovBuckets = numOovBuckets
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright 2016 The BigDL Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.intel.analytics.bigdl.nn.ops

import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.T
import org.scalatest.{FlatSpec, Matchers}

class CategoricalColVocaListSpec extends FlatSpec with Matchers{

"CategoricalColVocaList operation with default value" should "work correctly" in {
val input = Tensor[String](T(T("A"), T("B"), T("C"), T("D")))
val indices = Array(Array(0, 1, 2, 3), Array(0, 0, 0, 0))
val values = Array(0, 1, 2, -1)
val shape = Array(4, 4)
val expectOutput = Tensor.sparse(
indices, values, shape
)
val output = CategoricalColVocaList[Double](
vocaList = Array("A", "B", "C"),
strDelimiter = ",",
defaultValue = -1,
numOovBuckets = 0
).forward(input)

output should be(expectOutput)
}

"CategoricalColVocaList operation with numOvvBucket" should "work correctly" in {
val input = Tensor[String](T(T("A,B"), T("C"), T("B,C,D"), T("A,D")))
val indices = Array(
Array(0, 0, 1, 2, 2, 2, 3, 3),
Array(0, 1, 0, 0, 1, 2, 0, 1))
val values = Array(0, 1, 2, 1, 2, 4, 0, 4)
val shape = Array(4, 5)
val expectOutput = Tensor.sparse(
indices, values, shape
)
val output = CategoricalColVocaList[Double](
vocaList = Array("A", "B", "C"),
strDelimiter = ",",
numOovBuckets = 2
).forward(input)

output should be(expectOutput)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import java.io.{File => JFile}
import com.google.protobuf.{ByteString, CodedOutputStream}
import com.intel.analytics.bigdl.nn._
import com.intel.analytics.bigdl.nn.abstractnn.DataFormat
import com.intel.analytics.bigdl.nn.ops.{All, Any, ApproximateEqual, ArgMax, Assert, Assign, AssignGrad, AvgPoolGrad, BatchMatMul, BiasAddGrad, BroadcastGradientArgs, Cast, CategoricalColHashBucket, Ceil, ControlNodes, Conv2D, Conv2DBackFilter, Conv2DTranspose, Conv3D, Conv3DBackpropFilter, Conv3DBackpropFilterV2, Conv3DBackpropInput, Conv3DBackpropInputV2, CrossEntropy, DecodeImage, DepthwiseConv2D, DepthwiseConv2DBackpropFilter, DepthwiseConv2DBackpropInput, Digamma, Dilation2D, Dilation2DBackpropFilter, Dilation2DBackpropInput, EluGrad, Equal, Erf, Erfc, Expm1, Floor, FloorDiv, FloorMod, FusedBatchNorm, FusedBatchNormGrad, Greater, GreaterEqual, InTopK, Inv, InvGrad, IsFinite, IsInf, IsNan, Kv2Tensor, L2Loss, LRNGrad, Less, LessEqual, Lgamma, LogicalAnd, LogicalNot, LogicalOr, MaxPool, MaxPoolGrad, Maximum, MergeOps, Minimum, Mod, ModuleToOperation, NotEqual, OneHot, Pad, ParseExample, Prod, RandomUniform, RangeOps, Rank, Relu6Grad, ReluGrad, ResizeBilinearGrad, ResizeBilinearOps, Rint, Round, RsqrtGrad, SegmentSum, SigmoidGrad, Sign, Slice, SoftplusGrad, SoftsignGrad, SqrtGrad, SquaredDifference, Substr, SwitchOps, TanhGrad, TopK, TruncateDiv, TruncatedNormal, Add => AddOps, DecodeGif => DecodeGifOps, DecodeJpeg => DecodeJpegOps, DecodePng => DecodePngOps, DecodeRaw => DecodeRawOps, Exp => ExpOps, Pow => PowOps, Select => SelectOps, Sum => SumOps, Tile => TileOps}
import com.intel.analytics.bigdl.nn.ops.{All, Any, ApproximateEqual, ArgMax, Assert, Assign, AssignGrad, AvgPoolGrad, BatchMatMul, BiasAddGrad, BroadcastGradientArgs, Cast, CategoricalColHashBucket, CategoricalColVocaList, Ceil, ControlNodes, Conv2D, Conv2DBackFilter, Conv2DTranspose, Conv3D, Conv3DBackpropFilter, Conv3DBackpropFilterV2, Conv3DBackpropInput, Conv3DBackpropInputV2, CrossEntropy, DecodeImage, DepthwiseConv2D, DepthwiseConv2DBackpropFilter, DepthwiseConv2DBackpropInput, Digamma, Dilation2D, Dilation2DBackpropFilter, Dilation2DBackpropInput, EluGrad, Equal, Erf, Erfc, Expm1, Floor, FloorDiv, FloorMod, FusedBatchNorm, FusedBatchNormGrad, Greater, GreaterEqual, InTopK, Inv, InvGrad, IsFinite, IsInf, IsNan, Kv2Tensor, L2Loss, LRNGrad, Less, LessEqual, Lgamma, LogicalAnd, LogicalNot, LogicalOr, MaxPool, MaxPoolGrad, Maximum, MergeOps, Minimum, Mod, ModuleToOperation, NotEqual, OneHot, Pad, ParseExample, Prod, RandomUniform, RangeOps, Rank, Relu6Grad, ReluGrad, ResizeBilinearGrad, ResizeBilinearOps, Rint, Round, RsqrtGrad, SegmentSum, SigmoidGrad, Sign, Slice, SoftplusGrad, SoftsignGrad, SqrtGrad, SquaredDifference, Substr, SwitchOps, TanhGrad, TopK, TruncateDiv, TruncatedNormal, Add => AddOps, DecodeGif => DecodeGifOps, DecodeJpeg => DecodeJpegOps, DecodePng => DecodePngOps, DecodeRaw => DecodeRawOps, Exp => ExpOps, Pow => PowOps, Select => SelectOps, Sum => SumOps, Tile => TileOps}
import com.intel.analytics.bigdl.nn.tf._
import com.intel.analytics.bigdl.nn.{SoftPlus => BigDLSoftPlus}
import com.intel.analytics.bigdl.tensor._
Expand Down Expand Up @@ -466,6 +466,17 @@ class OperationSerializerSpec extends SerializerSpecHelper {
runSerializationTest(categoricalColHashBucket, input)
}

"CategoricalColVocaList" should "work properly" in {
val categoricalColVocaList = CategoricalColVocaList[Float](
vocaList = Array("A", "B", "C"),
strDelimiter = ",",
defaultValue = -1,
numOovBuckets = 0
).setName("categoricalColVocaList")
val input = Tensor[String](T(T("A"), T("B"), T("C"), T("D")))
runSerializationTest(categoricalColVocaList, input)
}

"LessEqual serializer" should "work properly" in {
val lessEqual = LessEqual[Float]().setName("lessEqual")
val input1 = Tensor[Float](5).apply1(_ => Random.nextFloat())
Expand Down