Skip to content
This repository has been archived by the owner on Jan 13, 2025. It is now read-only.

bloom for forking #2431

Merged
merged 3 commits into from
Jan 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 64 additions & 8 deletions src/bloom.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
//! Simple Bloom Filter
use crate::bloom_hash_index::BloomHashIndex;
use bv::BitVec;
use rand::{self, Rng};
use solana_sdk::hash::hashv;
use std::cmp;
use std::marker::PhantomData;

/// Generate a stable hash of `self` for each `hash_index`
/// Best effort can be made for uniqueness of each hash.
pub trait BloomHashIndex {
rob-solana marked this conversation as resolved.
Show resolved Hide resolved
fn hash_at_index(&self, hash_index: u64) -> u64;
}

#[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq)]
pub struct Bloom<T: BloomHashIndex> {
pub keys: Vec<u64>,
Expand All @@ -13,6 +19,14 @@ pub struct Bloom<T: BloomHashIndex> {
}

impl<T: BloomHashIndex> Bloom<T> {
pub fn new(num_bits: usize, keys: Vec<u64>) -> Self {
let bits = BitVec::new_fill(false, num_bits as u64);
Bloom {
keys,
bits,
_phantom: Default::default(),
}
}
/// create filter optimal for num size given the `false_rate`
/// the keys are randomized for picking data out of a collision resistant hash of size
/// `keysize` bytes
Expand All @@ -24,15 +38,13 @@ impl<T: BloomHashIndex> Bloom<T> {
let num_bits = cmp::max(1, cmp::min(min_num_bits, max_bits));
let num_keys = ((num_bits as f64 / num as f64) * 2f64.log(2f64)).round() as usize;
let keys: Vec<u64> = (0..num_keys).map(|_| rand::thread_rng().gen()).collect();
let bits = BitVec::new_fill(false, num_bits as u64);
Bloom {
keys,
bits,
_phantom: Default::default(),
}
Self::new(num_bits, keys)
}
fn pos(&self, key: &T, k: u64) -> u64 {
key.hash(k) % self.bits.len()
key.hash_at_index(k) % self.bits.len()
}
pub fn clear(&mut self) {
self.bits.clear();
}
pub fn add(&mut self, key: &T) {
for k in &self.keys {
Expand All @@ -51,10 +63,54 @@ impl<T: BloomHashIndex> Bloom<T> {
}
}

fn to_slice(v: u64) -> [u8; 8] {
[
v as u8,
(v >> 8) as u8,
(v >> 16) as u8,
(v >> 24) as u8,
(v >> 32) as u8,
(v >> 40) as u8,
(v >> 48) as u8,
(v >> 56) as u8,
]
}

fn from_slice(v: &[u8]) -> u64 {
u64::from(v[0])
| u64::from(v[1]) << 8
| u64::from(v[2]) << 16
| u64::from(v[3]) << 24
| u64::from(v[4]) << 32
| u64::from(v[5]) << 40
| u64::from(v[6]) << 48
| u64::from(v[7]) << 56
}

fn slice_hash(slice: &[u8], hash_index: u64) -> u64 {
let hash = hashv(&[slice, &to_slice(hash_index)]);
from_slice(hash.as_ref())
}

impl<T: AsRef<[u8]>> BloomHashIndex for T {
fn hash_at_index(&self, hash_index: u64) -> u64 {
slice_hash(self.as_ref(), hash_index)
}
}

#[cfg(test)]
mod test {
use super::*;
use solana_sdk::hash::{hash, Hash};
#[test]
fn test_slice() {
assert_eq!(from_slice(&to_slice(10)), 10);
assert_eq!(from_slice(&to_slice(0x7fff7fff)), 0x7fff7fff);
assert_eq!(
from_slice(&to_slice(0x7fff7fff7fff7fff)),
0x7fff7fff7fff7fff
);
}

#[test]
fn test_bloom_filter() {
Expand Down
31 changes: 0 additions & 31 deletions src/bloom_hash_index.rs

This file was deleted.

1 change: 0 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ pub mod bank;
pub mod banking_stage;
pub mod blob_fetch_stage;
pub mod bloom;
pub mod bloom_hash_index;
pub mod broadcast_service;
#[cfg(feature = "chacha")]
pub mod chacha;
Expand Down