-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbloom.go
158 lines (139 loc) · 3.54 KB
/
bloom.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
// Package bloom package implement a simple and scalable Bloom Filter algorithm
package bloom
import (
"encoding/base64"
"encoding/json"
"fmt"
"io/ioutil"
"math"
"os"
)
// Hasher : Pluggable hasher type
type Hasher func(string) uint64
// Filter : Implement a simple Filter
type Filter struct {
arr []byte
Size uint64
k uint64
inserted uint64
hasher func([]byte) []uint64
}
// EncodedFilter is the JSON filter structure
type EncodedFilter struct {
Arr []byte
Size uint64
K uint64
Inserted uint64
}
// New : constructor
func New(size uint64, k uint64) *Filter {
return &Filter{
arr: make([]byte, size),
k: k,
Size: size,
inserted: 0,
hasher: generateHasher(k, size*8),
}
}
// Reset : zeroes the bytearray, flushing the filter
func (bf *Filter) Reset() *Filter {
bf.arr = make([]byte, bf.Size)
return bf
}
// Match : Check if s have an entry in the filter
// May return false positive
func (bf *Filter) Match(s string) bool {
hashs := bf.hasher([]byte(s))
sectionSize := ((bf.Size * 8) / bf.k)
for hid := uint64(0); hid < bf.k; hid++ {
start := hid * sectionSize
if !bf.isSet(start + (hashs[hid] % (sectionSize))) {
return false
}
}
return true
}
// ToJSON : Export a byte array that can be later used with bf.FromJSON
func (bf *Filter) ToJSON() ([]byte, error) {
enc := &EncodedFilter{
Size: bf.Size,
Arr: bf.arr,
K: bf.k,
Inserted: bf.inserted,
}
return json.Marshal(enc)
}
// ToFile : Export filter to a file
func (bf *Filter) ToFile(path string) error {
json, err := bf.ToJSON()
if err != nil {
return err
}
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
_, err = f.Write(json)
return err
}
// FromFile : Import filter from a file
func FromFile(path string) (*Filter, error) {
bytes, err := ioutil.ReadFile(path)
if err != nil {
return nil, err
}
return FromJSON(bytes)
}
// FromJSON : Import a JSON serialized bloom filter
func FromJSON(raw []byte) (*Filter, error) {
var dat map[string]interface{}
if err := json.Unmarshal(raw, &dat); err != nil {
return nil, err
}
bf := New(uint64(dat["Size"].(float64)), uint64(dat["K"].(float64)))
bf.inserted = uint64(dat["Inserted"].(float64))
n, err := base64.StdEncoding.DecodeString(dat["Arr"].(string))
if err != nil {
return nil, err
}
bf.arr = n
return bf, nil
}
// Merge two Filters, filters must have the same size
// Take care of fillratio when merging filters : false positive
// rate will increase
func (bf *Filter) Merge(oth *Filter) error {
if bf.Size != oth.Size {
return fmt.Errorf("incompatible filters size for merge : %d != %d",
bf.Size, oth.Size)
}
if bf.k != oth.k {
return fmt.Errorf("hashes functions must be the same to perform merge")
}
for i := uint64(0); i < bf.Size; i++ {
bf.arr[i] |= oth.arr[i]
}
return nil
}
// Feed : Add an entry in the bloom filter
func (bf *Filter) Feed(s string) *Filter {
hashs := bf.hasher([]byte(s))
sectionSize := ((bf.Size * 8) / bf.k)
for hid := uint64(0); hid < bf.k; hid++ {
start := hid * sectionSize
bf.setBit(start + (hashs[hid] % (sectionSize)))
}
bf.inserted++
return bf
}
// FillRatio : Count each set bit into the Filter to compute the fillRatio
func (bf *Filter) FillRatio() float64 {
return float64(popcntSliceGo(bf.arr)) / float64(bf.Size*8)
}
// EstimateFillRatio : Optimization of the fillRatio function, estimate instead of counting bits
func (bf *Filter) EstimateFillRatio() float64 {
return 1.0 - math.Pow(
(1.0-(1.0/((float64(bf.Size)*8.0)/float64(bf.k)))),
float64(bf.inserted))
}