-
Notifications
You must be signed in to change notification settings - Fork 17
/
spool.go
56 lines (49 loc) · 1.18 KB
/
spool.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
package ngram
import (
"bytes"
"errors"
"sync"
"github.com/cespare/go-smaz"
)
type region struct {
begin int
end int
}
// string pool data structure
type stringPool struct {
items []region
buffer bytes.Buffer
sync.RWMutex
}
// Append adds new string to string pool. Function returns token ID and error.
// Strings doesn't need to be unique
func (pool *stringPool) Append(s string) (TokenID, error) {
begin := pool.buffer.Len()
bstr := []byte(s)
bstr = smaz.Compress(bstr)
n, error := pool.buffer.Write(bstr)
if error != nil {
return 0, error
}
end := begin + n
pool.Lock()
ixitem := TokenID(len(pool.items))
pool.items = append(pool.items, region{begin: begin, end: end})
pool.Unlock()
return ixitem, nil
}
// ReadAt converts token ID back to string.
func (pool *stringPool) ReadAt(index TokenID) (string, error) {
if index < TokenID(0) || index >= TokenID(len(pool.items)) {
return "", errors.New("index out of range")
}
pool.RLock()
item := pool.items[int(index)]
pool.RUnlock()
compressed := pool.buffer.Bytes()[item.begin:item.end]
decompressed, error := smaz.Decompress(compressed)
if error != nil {
return "", error
}
return string(decompressed), nil
}