-
Notifications
You must be signed in to change notification settings - Fork 2
/
index_test.go
95 lines (79 loc) · 2.09 KB
/
index_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package weakand
import (
"bufio"
"os"
"path"
"sort"
"strings"
"testing"
"github.com/huichen/sego"
"github.com/stretchr/testify/assert"
)
var (
testingCorpus = []string{
"apple pie",
"apple iphone",
"iphone jailbreak"}
sgmt *sego.Segmenter
)
func testBuildIndex() *SearchIndex {
guaranteeSegmenter(&sgmt)
ch := make(chan string)
go func() {
for _, d := range testingCorpus {
ch <- d
}
close(ch)
}()
return NewIndex(NewVocab(nil), sgmt).BatchAdd(ch)
}
func TestBuildIndex(t *testing.T) {
idx := testBuildIndex()
assert := assert.New(t)
assert.Equal(4, len(idx.Vocab.Terms))
assert.Equal(4, len(idx.Vocab.TermIndex))
assert.Equal(len(testingCorpus), len(idx.Fwd))
assert.Equal(4, len(idx.Ivt))
for i := range idx.Ivt {
assert.True(sort.IsSorted(idx.Ivt[i]))
}
assert.Equal(2, len(idx.Ivt[idx.Vocab.IdOrAdd("apple")]))
assert.Equal(1, len(idx.Ivt[idx.Vocab.IdOrAdd("pie")]))
assert.Equal(2, len(idx.Ivt[idx.Vocab.IdOrAdd("iphone")]))
assert.Equal(1, len(idx.Ivt[idx.Vocab.IdOrAdd("jailbreak")]))
assert.Equal(2, idx.Fwd[documentHash(testingCorpus[0])].Len)
assert.Equal(2, idx.Fwd[documentHash(testingCorpus[1])].Len)
assert.Equal(2, idx.Fwd[documentHash(testingCorpus[2])].Len)
}
func TestDocumentHashCollision(t *testing.T) {
WithFile(path.Join(gosrc(), "github.com/topicai/weakand/testdata/internet-zh.num"),
func(f *os.File) {
dict := make(map[DocId][]string)
scanner := bufio.NewScanner(f)
for scanner.Scan() {
fs := strings.Fields(scanner.Text())
if len(fs) == 2 {
content := fs[1]
did := documentHash(content)
if _, ok := dict[did]; ok {
t.Errorf("Collision between %v and %v", content, dict[did])
}
dict[did] = append(dict[did], content)
}
}
if e := scanner.Err(); e != nil {
t.Errorf("Reading %s error: %v", f.Name(), e)
}
})
}
func gosrc() string {
return path.Join(os.Getenv("GOPATH"), "src")
}
func guaranteeSegmenter(sgmt **sego.Segmenter) {
if *sgmt == nil {
s := new(sego.Segmenter)
s.LoadDictionary(path.Join(gosrc(),
"github.com/huichen/sego/data/dictionary.txt"))
*sgmt = s
}
}