diff --git a/server/app/com/xsn/explorer/gcs/GolombEncoding.scala b/server/app/com/xsn/explorer/gcs/GolombEncoding.scala index f9fe545d..80ce1ba6 100644 --- a/server/app/com/xsn/explorer/gcs/GolombEncoding.scala +++ b/server/app/com/xsn/explorer/gcs/GolombEncoding.scala @@ -2,6 +2,8 @@ package com.xsn.explorer.gcs import com.google.common.hash.Hashing +import scala.collection.SortedSet + /** * A Golomb-coded set, matches all items in the set with probability 1, and matches other items with probability 1/M. * @@ -17,9 +19,9 @@ class GolombEncoding(p: Int, m: Int, key: SipHashKey) { /** * Encodes the given word list. */ - def encode(words: List[String]): GolombCodedSet = { - val hashList = hashes(words) - val diffList = differences(hashList) + def encode(words: Set[String]): GolombCodedSet = { + val sortedHashes = hashes(words) + val diffList = differences(sortedHashes) val encodedBits = diffList.flatMap(golombEncode) val encodedBytes = encodedBits .grouped(8) @@ -41,9 +43,9 @@ class GolombEncoding(p: Int, m: Int, key: SipHashKey) { * * @param encoded the encoded bytes, we expect them to be correct * @param n the number of words encoded in the bytes - * @return the recovered list of hashes + * @return the recovered sorted set of hashes */ - private[gcs] def decode(encoded: List[UnsignedByte], n: Int): List[BigInt] = { + private[gcs] def decode(encoded: List[UnsignedByte], n: Int): SortedSet[BigInt] = { val encodedBits = encoded.flatMap(_.bits) val (_, _, result) = List.fill(n)(0) .foldLeft((encodedBits, BigInt(0), List.empty[BigInt])) { case ((bits, acc, hashes), _) => @@ -52,19 +54,19 @@ class GolombEncoding(p: Int, m: Int, key: SipHashKey) { (remaining, hash, hash :: hashes) } - result.reverse + result.to[SortedSet] } /** - * Maps the word list to a list of hashes. + * Maps the word set to a sorted set of hashes. */ - private[gcs] def hashes(words: List[String]): List[BigInt] = { - val modulus = BigInt(m) * words.length + private[gcs] def hashes(words: Set[String]): SortedSet[BigInt] = { + val modulus = BigInt(m) * words.size val f = fastReduction(_: BigInt, modulus) words .map(hash) .map(f) - .sorted + .to[SortedSet] } private def golombEncode(x: BigInt): List[Bit] = { @@ -88,8 +90,8 @@ class GolombEncoding(p: Int, m: Int, key: SipHashKey) { (pending, x) } - private def differences(sortedHashList: List[BigInt]): List[BigInt] = { - (BigInt(0) :: sortedHashList) + private def differences(sortedHashes: SortedSet[BigInt]): List[BigInt] = { + (BigInt(0) :: sortedHashes.toList) .sliding(2) .map { case a :: b :: Nil => b - a } .toList diff --git a/server/test/com/xsn/explorer/gcs/GolombEncodingSpec.scala b/server/test/com/xsn/explorer/gcs/GolombEncodingSpec.scala index 0d8c265d..2aff4396 100644 --- a/server/test/com/xsn/explorer/gcs/GolombEncodingSpec.scala +++ b/server/test/com/xsn/explorer/gcs/GolombEncodingSpec.scala @@ -32,10 +32,10 @@ class GolombEncodingSpec extends WordSpec with MustMatchers { val key = SipHashKey.fromBtcutil(keyBytes) val golomb = GolombEncoding.default(key) - val encoded = golomb.encode(words) + val encoded = golomb.encode(words.toSet) "decode the same hashes" in { - val hashes = golomb.hashes(words) + val hashes = golomb.hashes(words.toSet) val bytes = BaseEncoding .base16() .decode(encoded.hex.string.toUpperCase)