Skip to content

Commit

Permalink
server: Update the GolombEncoding to use Set and SortedSet
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexITC committed Mar 19, 2019
1 parent aeeb7f5 commit 8c0f92a
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 14 deletions.
26 changes: 14 additions & 12 deletions server/app/com/xsn/explorer/gcs/GolombEncoding.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package com.xsn.explorer.gcs

import com.google.common.hash.Hashing

import scala.collection.SortedSet

/**
* A Golomb-coded set, matches all items in the set with probability 1, and matches other items with probability 1/M.
*
Expand All @@ -17,9 +19,9 @@ class GolombEncoding(p: Int, m: Int, key: SipHashKey) {
/**
* Encodes the given word list.
*/
def encode(words: List[String]): GolombCodedSet = {
val hashList = hashes(words)
val diffList = differences(hashList)
def encode(words: Set[String]): GolombCodedSet = {
val sortedHashes = hashes(words)
val diffList = differences(sortedHashes)
val encodedBits = diffList.flatMap(golombEncode)
val encodedBytes = encodedBits
.grouped(8)
Expand All @@ -41,9 +43,9 @@ class GolombEncoding(p: Int, m: Int, key: SipHashKey) {
*
* @param encoded the encoded bytes, we expect them to be correct
* @param n the number of words encoded in the bytes
* @return the recovered list of hashes
* @return the recovered sorted set of hashes
*/
private[gcs] def decode(encoded: List[UnsignedByte], n: Int): List[BigInt] = {
private[gcs] def decode(encoded: List[UnsignedByte], n: Int): SortedSet[BigInt] = {
val encodedBits = encoded.flatMap(_.bits)
val (_, _, result) = List.fill(n)(0)
.foldLeft((encodedBits, BigInt(0), List.empty[BigInt])) { case ((bits, acc, hashes), _) =>
Expand All @@ -52,19 +54,19 @@ class GolombEncoding(p: Int, m: Int, key: SipHashKey) {
(remaining, hash, hash :: hashes)
}

result.reverse
result.to[SortedSet]
}

/**
* Maps the word list to a list of hashes.
* Maps the word set to a sorted set of hashes.
*/
private[gcs] def hashes(words: List[String]): List[BigInt] = {
val modulus = BigInt(m) * words.length
private[gcs] def hashes(words: Set[String]): SortedSet[BigInt] = {
val modulus = BigInt(m) * words.size
val f = fastReduction(_: BigInt, modulus)
words
.map(hash)
.map(f)
.sorted
.to[SortedSet]
}

private def golombEncode(x: BigInt): List[Bit] = {
Expand All @@ -88,8 +90,8 @@ class GolombEncoding(p: Int, m: Int, key: SipHashKey) {
(pending, x)
}

private def differences(sortedHashList: List[BigInt]): List[BigInt] = {
(BigInt(0) :: sortedHashList)
private def differences(sortedHashes: SortedSet[BigInt]): List[BigInt] = {
(BigInt(0) :: sortedHashes.toList)
.sliding(2)
.map { case a :: b :: Nil => b - a }
.toList
Expand Down
4 changes: 2 additions & 2 deletions server/test/com/xsn/explorer/gcs/GolombEncodingSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ class GolombEncodingSpec extends WordSpec with MustMatchers {

val key = SipHashKey.fromBtcutil(keyBytes)
val golomb = GolombEncoding.default(key)
val encoded = golomb.encode(words)
val encoded = golomb.encode(words.toSet)

"decode the same hashes" in {
val hashes = golomb.hashes(words)
val hashes = golomb.hashes(words.toSet)
val bytes = BaseEncoding
.base16()
.decode(encoded.hex.string.toUpperCase)
Expand Down

0 comments on commit 8c0f92a

Please sign in to comment.