Skip to content

Commit

Permalink
[SPARK-1268] Adding XOR and AND-NOT operations to spark.util.collecti…
Browse files Browse the repository at this point in the history
…on.BitSet

Symmetric difference (xor) in particular is useful for computing some distance metrics (e.g. Hamming). Unit tests added.

Author: Petko Nikolov <[email protected]>

Closes alteryx#172 from petko-nikolov/bitset-imprv and squashes the following commits:

451f28b [Petko Nikolov] fixed style mistakes
5beba18 [Petko Nikolov] rm outer loop in andNot test
0e61035 [Petko Nikolov] conform to spark style; rm redundant asserts; more unit tests added; use arraycopy instead of loop
d53cdb9 [Petko Nikolov] rm incidentally added space
4e1df43 [Petko Nikolov] adding xor and and-not to BitSet; unit tests added
  • Loading branch information
Petko Nikolov authored and rxin committed Mar 27, 2014
1 parent 53953d0 commit 6f986f0
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 0 deletions.
39 changes: 39 additions & 0 deletions core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,45 @@ class BitSet(numBits: Int) extends Serializable {
newBS
}

/**
* Compute the symmetric difference by performing bit-wise XOR of the two sets returning the
* result.
*/
def ^(other: BitSet): BitSet = {
val newBS = new BitSet(math.max(capacity, other.capacity))
val smaller = math.min(numWords, other.numWords)
var ind = 0
while (ind < smaller) {
newBS.words(ind) = words(ind) ^ other.words(ind)
ind += 1
}
if (ind < numWords) {
Array.copy( words, ind, newBS.words, ind, numWords - ind )
}
if (ind < other.numWords) {
Array.copy( other.words, ind, newBS.words, ind, other.numWords - ind )
}
newBS
}

/**
* Compute the difference of the two sets by performing bit-wise AND-NOT returning the
* result.
*/
def andNot(other: BitSet): BitSet = {
val newBS = new BitSet(capacity)
val smaller = math.min(numWords, other.numWords)
var ind = 0
while (ind < smaller) {
newBS.words(ind) = words(ind) & ~other.words(ind)
ind += 1
}
if (ind < numWords) {
Array.copy( words, ind, newBS.words, ind, numWords - ind )
}
newBS
}

/**
* Sets the bit at the specified index to true.
* @param index the bit index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,87 @@ class BitSetSuite extends FunSuite {
assert(bitset.nextSetBit(96) === 96)
assert(bitset.nextSetBit(97) === -1)
}

test( "xor len(bitsetX) < len(bitsetY)" ) {
val setBitsX = Seq( 0, 2, 3, 37, 41 )
val setBitsY = Seq( 0, 1, 3, 37, 38, 41, 85)
val bitsetX = new BitSet(60)
setBitsX.foreach( i => bitsetX.set(i))
val bitsetY = new BitSet(100)
setBitsY.foreach( i => bitsetY.set(i))

val bitsetXor = bitsetX ^ bitsetY

assert(bitsetXor.nextSetBit(0) === 1)
assert(bitsetXor.nextSetBit(1) === 1)
assert(bitsetXor.nextSetBit(2) === 2)
assert(bitsetXor.nextSetBit(3) === 38)
assert(bitsetXor.nextSetBit(38) === 38)
assert(bitsetXor.nextSetBit(39) === 85)
assert(bitsetXor.nextSetBit(42) === 85)
assert(bitsetXor.nextSetBit(85) === 85)
assert(bitsetXor.nextSetBit(86) === -1)

}

test( "xor len(bitsetX) > len(bitsetY)" ) {
val setBitsX = Seq( 0, 1, 3, 37, 38, 41, 85)
val setBitsY = Seq( 0, 2, 3, 37, 41 )
val bitsetX = new BitSet(100)
setBitsX.foreach( i => bitsetX.set(i))
val bitsetY = new BitSet(60)
setBitsY.foreach( i => bitsetY.set(i))

val bitsetXor = bitsetX ^ bitsetY

assert(bitsetXor.nextSetBit(0) === 1)
assert(bitsetXor.nextSetBit(1) === 1)
assert(bitsetXor.nextSetBit(2) === 2)
assert(bitsetXor.nextSetBit(3) === 38)
assert(bitsetXor.nextSetBit(38) === 38)
assert(bitsetXor.nextSetBit(39) === 85)
assert(bitsetXor.nextSetBit(42) === 85)
assert(bitsetXor.nextSetBit(85) === 85)
assert(bitsetXor.nextSetBit(86) === -1)

}

test( "andNot len(bitsetX) < len(bitsetY)" ) {
val setBitsX = Seq( 0, 2, 3, 37, 41, 48 )
val setBitsY = Seq( 0, 1, 3, 37, 38, 41, 85)
val bitsetX = new BitSet(60)
setBitsX.foreach( i => bitsetX.set(i))
val bitsetY = new BitSet(100)
setBitsY.foreach( i => bitsetY.set(i))

val bitsetDiff = bitsetX.andNot( bitsetY )

assert(bitsetDiff.nextSetBit(0) === 2)
assert(bitsetDiff.nextSetBit(1) === 2)
assert(bitsetDiff.nextSetBit(2) === 2)
assert(bitsetDiff.nextSetBit(3) === 48)
assert(bitsetDiff.nextSetBit(48) === 48)
assert(bitsetDiff.nextSetBit(49) === -1)
assert(bitsetDiff.nextSetBit(65) === -1)
}

test( "andNot len(bitsetX) > len(bitsetY)" ) {
val setBitsX = Seq( 0, 1, 3, 37, 38, 41, 85)
val setBitsY = Seq( 0, 2, 3, 37, 41, 48 )
val bitsetX = new BitSet(100)
setBitsX.foreach( i => bitsetX.set(i))
val bitsetY = new BitSet(60)
setBitsY.foreach( i => bitsetY.set(i))

val bitsetDiff = bitsetX.andNot( bitsetY )

assert(bitsetDiff.nextSetBit(0) === 1)
assert(bitsetDiff.nextSetBit(1) === 1)
assert(bitsetDiff.nextSetBit(2) === 38)
assert(bitsetDiff.nextSetBit(3) === 38)
assert(bitsetDiff.nextSetBit(38) === 38)
assert(bitsetDiff.nextSetBit(39) === 85)
assert(bitsetDiff.nextSetBit(85) === 85)
assert(bitsetDiff.nextSetBit(86) === -1)
}
}

0 comments on commit 6f986f0

Please sign in to comment.