Skip to content

Commit

Permalink
rename:recall
Browse files Browse the repository at this point in the history
  • Loading branch information
CocaineCong committed Aug 28, 2023
1 parent c080594 commit ac1ff18
Show file tree
Hide file tree
Showing 17 changed files with 58 additions and 58 deletions.
2 changes: 1 addition & 1 deletion app/gateway/routes/search_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ func SearchRegisterHandlers(rg *gin.RouterGroup) {
favoriteGroup := rg.Group("/search_engine")
{
favoriteGroup.GET("/search", http.SearchEngineSearch)
favoriteGroup.GET("/query", http.WordAssociation)
favoriteGroup.GET("/analyzer", http.WordAssociation)
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package query
package analyzer

import (
"github.com/go-ego/gse"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package query
package analyzer

import (
"fmt"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package query
package analyzer

import (
"fmt"
Expand Down
4 changes: 2 additions & 2 deletions app/search_engine/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"google.golang.org/grpc"

"github.com/CocaineCong/tangseng/app/gateway/rpc"
"github.com/CocaineCong/tangseng/app/search_engine/query"
"github.com/CocaineCong/tangseng/app/search_engine/analyzer"
"github.com/CocaineCong/tangseng/app/search_engine/service"
"github.com/CocaineCong/tangseng/config"
pb "github.com/CocaineCong/tangseng/idl/pb/search_engine"
Expand All @@ -20,7 +20,7 @@ import (
func main() {
loading.Loading()
rpc.Init()
query.InitSeg()
analyzer.InitSeg()

// etcd 地址
etcdAddress := []string{viper.GetString("etcd.address")}
Expand Down
4 changes: 2 additions & 2 deletions app/search_engine/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"sync"
"sync/atomic"

"github.com/CocaineCong/tangseng/app/search_engine/query"
"github.com/CocaineCong/tangseng/app/search_engine/analyzer"
"github.com/CocaineCong/tangseng/app/search_engine/segment"
"github.com/CocaineCong/tangseng/app/search_engine/types"
"github.com/CocaineCong/tangseng/consts"
Expand Down Expand Up @@ -58,7 +58,7 @@ func (e *Engine) AddForwardIndex(doc *types.Document) error {

// Text2PostingsLists 文本 转成 倒排索引记录表
func (e *Engine) Text2PostingsLists(text string, docId int64) (err error) {
tokens, err := query.GseCut(text)
tokens, err := analyzer.GseCut(text)
if err != nil {
log.LogrusObj.Errorf("text2PostingsLists err:%v", err)
return
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,22 @@
package index
package recall

import (
"time"

"github.com/CocaineCong/tangseng/app/search_engine/engine"
"github.com/CocaineCong/tangseng/app/search_engine/recall"
"github.com/CocaineCong/tangseng/app/search_engine/types"
log "github.com/CocaineCong/tangseng/pkg/logger"
)

// Recall 召回
type Recall struct {
*recall.Recall
// RecallServ 召回
type RecallServ struct {
*Recall
}

// NewRecallServ 创建召回服务
func NewRecallServ(meta *engine.Meta) *Recall {
r := recall.NewRecall(meta)
return &Recall{r}
func NewRecallServ(meta *engine.Meta) *RecallServ {
r := NewRecall(meta)
return &RecallServ{r}
}

// SearchRecall 词条回归
Expand Down
37 changes: 19 additions & 18 deletions app/search_engine/recall/recall.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,23 @@ import (
"errors"
"sort"

engine2 "github.com/CocaineCong/tangseng/app/search_engine/engine"
segment2 "github.com/CocaineCong/tangseng/app/search_engine/segment"
types2 "github.com/CocaineCong/tangseng/app/search_engine/types"
"github.com/CocaineCong/tangseng/app/search_engine/engine"
"github.com/CocaineCong/tangseng/app/search_engine/segment"
"github.com/CocaineCong/tangseng/app/search_engine/types"
log "github.com/CocaineCong/tangseng/pkg/logger"
"github.com/CocaineCong/tangseng/pkg/util/relevant"
)

// Recall 查询召回
type Recall struct {
*engine2.Engine
*engine.Engine
docCount int64 // 文档总数 ,用于计算相关性
enablePhrase bool
}

// NewRecall --
func NewRecall(meta *engine2.Meta) *Recall {
e := engine2.NewEngine(meta, segment2.SearchMode)
func NewRecall(meta *engine.Meta) *Recall {
e := engine.NewEngine(meta, segment.SearchMode)
var docCount int64 = 0
for _, seg := range e.Seg {
num, err := seg.ForwardCount()
Expand All @@ -29,11 +29,12 @@ func NewRecall(meta *engine2.Meta) *Recall {
}
docCount += num
}

return &Recall{e, docCount, true}
}

// Search 入口
func (r *Recall) Search(query string) ([]*types2.SearchItem, error) {
func (r *Recall) Search(query string) ([]*types.SearchItem, error) {
err := r.splitQuery2Tokens(query)
if err != nil {
log.LogrusObj.Errorf("splitQuery2Tokens err:%v", err)
Expand All @@ -44,7 +45,7 @@ func (r *Recall) Search(query string) ([]*types2.SearchItem, error) {
}

// SearchQuery 入口
func (r *Recall) SearchQuery(query string) ([]*types2.DictTireTree, error) {
func (r *Recall) SearchQuery(query string) ([]*types.DictTireTree, error) {
return r.GetDict(query)
}

Expand All @@ -58,8 +59,8 @@ func (r *Recall) splitQuery2Tokens(query string) (err error) {
return
}

func (r *Recall) searchDoc() (recalls []*types2.SearchItem, err error) {
recalls = make([]*types2.SearchItem, 0)
func (r *Recall) searchDoc() (recalls []*types.SearchItem, err error) {
recalls = make([]*types.SearchItem, 0)

// 为每个token初始化游标
for token, post := range r.PostingsHashBuf {
Expand All @@ -84,7 +85,7 @@ func (r *Recall) searchDoc() (recalls []*types2.SearchItem, err error) {
postings = postings.Next
continue
}
sItem := &types2.SearchItem{
sItem := &types.SearchItem{
DocId: docId,
Content: "",
Score: 0.0,
Expand All @@ -108,7 +109,7 @@ func (r *Recall) searchDoc() (recalls []*types2.SearchItem, err error) {
}

// calculateScore 计算相关性
func (r *Recall) calculateScore(token string, searchItem []*types2.SearchItem) (resp []*types2.SearchItem) {
func (r *Recall) calculateScore(token string, searchItem []*types.SearchItem) (resp []*types.SearchItem) {
recallToken := make([]string, 0)

for i := range searchItem {
Expand All @@ -132,15 +133,15 @@ func (r *Recall) calculateScore(token string, searchItem []*types2.SearchItem) (
sort.Slice(searchItem, func(i, j int) bool {
return searchItem[i].Score > searchItem[j].Score
})
resp = make([]*types2.SearchItem, 0)
resp = make([]*types.SearchItem, 0)
resp = searchItem

return
}

// 获取 token 所有seg的倒排表数据
func (r *Recall) fetchPostingsBySegs(token string) (postings *types2.PostingsList, docCount int64, err error) {
postings = new(types2.PostingsList)
func (r *Recall) fetchPostingsBySegs(token string) (postings *types.PostingsList, docCount int64, err error) {
postings = new(types.PostingsList)
for i, seg := range r.Engine.Seg {
p, errx := seg.FetchPostings(token)
if errx != nil {
Expand All @@ -149,7 +150,7 @@ func (r *Recall) fetchPostingsBySegs(token string) (postings *types2.PostingsLis
return
}
log.LogrusObj.Infof("post:%v", p)
postings = segment2.MergePostings(postings, p.PostingsList)
postings = segment.MergePostings(postings, p.PostingsList)
log.LogrusObj.Infof("pos next:%v", postings.Next)
docCount += p.DocCount
}
Expand All @@ -158,7 +159,7 @@ func (r *Recall) fetchPostingsBySegs(token string) (postings *types2.PostingsLis
return
}

func (r *Recall) getContentByDocId(s *types2.SearchItem) (item *types2.SearchItem, err error) {
func (r *Recall) getContentByDocId(s *types.SearchItem) (item *types.SearchItem, err error) {
for i, seg := range r.Engine.Seg {
p, errx := seg.GetForward(s.DocId)
if errx != nil {
Expand All @@ -168,7 +169,7 @@ func (r *Recall) getContentByDocId(s *types2.SearchItem) (item *types2.SearchIte
}
s.Content = string(p)
}
item = new(types2.SearchItem)
item = new(types.SearchItem)
item = s

return
Expand Down
4 changes: 2 additions & 2 deletions app/search_engine/segment/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package segment
import (
"fmt"

"github.com/CocaineCong/tangseng/app/search_engine/query"
"github.com/CocaineCong/tangseng/app/search_engine/analyzer"
"github.com/CocaineCong/tangseng/app/search_engine/storage"
"github.com/CocaineCong/tangseng/app/search_engine/types"
"github.com/CocaineCong/tangseng/config"
Expand Down Expand Up @@ -40,7 +40,7 @@ func InitSegmentDb(segId SegId) (invertedDb *storage.InvertedDB, forwardDb *stor
}

// CreateNewInvertedIndex 创建倒排索引
func CreateNewInvertedIndex(token query.Tokenization, docCount int64) *types.InvertedIndexValue {
func CreateNewInvertedIndex(token analyzer.Tokenization, docCount int64) *types.InvertedIndexValue {
return &types.InvertedIndexValue{ // TODO:优化一下结构
Token: token.Token,
PostingsList: new(types.PostingsList),
Expand Down
6 changes: 3 additions & 3 deletions app/search_engine/segment/postings_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"fmt"
"testing"

"github.com/CocaineCong/tangseng/app/search_engine/query"
"github.com/CocaineCong/tangseng/app/search_engine/analyzer"
"github.com/CocaineCong/tangseng/app/search_engine/types"
)

Expand Down Expand Up @@ -40,7 +40,7 @@ func TestMergePostings(t *testing.T) {

func TestMergeInvertedIndex(t *testing.T) {
base := make(InvertedIndexHash)
token := query.Tokenization{
token := analyzer.Tokenization{
Token: "测试文本",
Position: 10,
Offset: 100,
Expand All @@ -52,7 +52,7 @@ func TestMergeInvertedIndex(t *testing.T) {
fmt.Println("base", base)

addDoc := make(InvertedIndexHash)
token2 := query.Tokenization{
token2 := analyzer.Tokenization{
Token: "测试文本2",
Position: 101,
Offset: 1002,
Expand Down
4 changes: 2 additions & 2 deletions app/search_engine/segment/segment.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package segment

import (
"github.com/CocaineCong/tangseng/app/search_engine/query"
"github.com/CocaineCong/tangseng/app/search_engine/analyzer"
storage2 "github.com/CocaineCong/tangseng/app/search_engine/storage"
"github.com/CocaineCong/tangseng/app/search_engine/types"
log "github.com/CocaineCong/tangseng/pkg/logger"
Expand All @@ -16,7 +16,7 @@ type Segment struct {
}

// Token2PostingsLists 词条 转化成 倒排索引表
func Token2PostingsLists(bufInvertHash InvertedIndexHash, token query.Tokenization, docId int64) (err error) {
func Token2PostingsLists(bufInvertHash InvertedIndexHash, token analyzer.Tokenization, docId int64) (err error) {
bufInvert := new(types.InvertedIndexValue)
if len(bufInvertHash) > 0 {
if item, ok := bufInvertHash[token.Token]; ok {
Expand Down
8 changes: 4 additions & 4 deletions app/search_engine/service/search_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"fmt"
"sync"

"github.com/CocaineCong/tangseng/app/search_engine/index"
"github.com/CocaineCong/tangseng/app/search_engine/recall"
"github.com/CocaineCong/tangseng/app/search_engine/types"
"github.com/CocaineCong/tangseng/consts/e"
pb "github.com/CocaineCong/tangseng/idl/pb/search_engine"
Expand All @@ -31,11 +31,11 @@ func (s *SearchEngineSrv) SearchEngineSearch(ctx context.Context, req *pb.Search
resp = new(pb.SearchEngineResponse)
resp.Code = e.SUCCESS
query := req.Query
sResult, err := index.SearchRecall(query)
sResult, err := recall.SearchRecall(query)
if err != nil {
resp.Code = e.ERROR
resp.Msg = err.Error()
log.LogrusObj.Error("SearchEngineSearch-index.SearchRecall", err)
log.LogrusObj.Error("SearchEngineSearch-recall.SearchRecall", err)
return
}

Expand All @@ -55,7 +55,7 @@ func (s *SearchEngineSrv) WordAssociation(ctx context.Context, req *pb.SearchEng
resp = new(pb.WordAssociationResponse)
resp.Code = e.SUCCESS
query := req.Query
sResult, err := index.SearchQuery(query)
sResult, err := recall.SearchQuery(query)
wordAssociationList := make([]string, 0)
for _, v := range sResult {
if v != nil {
Expand Down
4 changes: 2 additions & 2 deletions app/search_engine/storage/forward_db_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (

bolt "go.etcd.io/bbolt"

"github.com/CocaineCong/tangseng/app/search_engine/query"
"github.com/CocaineCong/tangseng/app/search_engine/analyzer"
"github.com/CocaineCong/tangseng/config"
log "github.com/CocaineCong/tangseng/pkg/logger"
)
Expand All @@ -16,7 +16,7 @@ func TestMain(m *testing.M) {
re := config.ConfigReader{FileName: "../../../config/config.yaml"}
config.InitConfigForTest(&re)
log.InitLog()
query.InitSeg()
analyzer.InitSeg()
fmt.Println("Write tests on values: ", config.Conf)
m.Run()
}
Expand Down
8 changes: 4 additions & 4 deletions app/search_engine/test/recall_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import (
"fmt"
"testing"

"github.com/CocaineCong/tangseng/app/search_engine/index"
"github.com/CocaineCong/tangseng/app/search_engine/query"
"github.com/CocaineCong/tangseng/app/search_engine/analyzer"
"github.com/CocaineCong/tangseng/app/search_engine/recall"
"github.com/CocaineCong/tangseng/config"
log "github.com/CocaineCong/tangseng/pkg/logger"
)
Expand All @@ -14,15 +14,15 @@ func TestMain(m *testing.M) {
// 这个文件相对于config.yaml的位置
re := config.ConfigReader{FileName: "../../../config/config.yaml"}
config.InitConfigForTest(&re)
query.InitSeg()
analyzer.InitSeg()
log.InitLog()
fmt.Println("Write tests on values: ", config.Conf)
m.Run()
}

func TestRecall(t *testing.T) {
q := "国家,西游记"
searchItem, err := index.SearchRecall(q)
searchItem, err := recall.SearchRecall(q)
if err != nil {
fmt.Println(err)
}
Expand Down
4 changes: 2 additions & 2 deletions idl/pb/search_engine/search_engine.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ac1ff18

Please sign in to comment.