Skip to content

Commit

Permalink
tsearch: speed up TSVector.String
Browse files Browse the repository at this point in the history
This commit speeds up `TSVector.String` by reusing exactly the same
`strings.Builder` object for all parts of the TSVector.

This commit also introduces a simple benchmark for this method.
`benchstat` seems to be busted a bit, but the impact of this commit is:
```
                       │ /tmp/tmp.Unz2XLUFdB/bench.HEAD^^ │   /tmp/tmp.Unz2XLUFdB/bench.HEAD^   │
                       │              sec/op              │   sec/op     vs base                │
TSVector/String-24                           14.590m ± 0%   8.272m ± 0%  -43.31% (p=0.000 n=10)
TSVector/StringSize-24                       14.654m ± 0%   8.269m ± 0%  -43.57% (p=0.000 n=10)
geomean                                       14.62m        8.270m       -43.44%

                       │ /tmp/tmp.Unz2XLUFdB/bench.HEAD^^ │   /tmp/tmp.Unz2XLUFdB/bench.HEAD^    │
                       │               B/op               │     B/op      vs base                │
TSVector/String-24                           4.516Mi ± 0%   2.319Mi ± 0%  -48.65% (p=0.000 n=10)
TSVector/StringSize-24                       4.516Mi ± 0%   2.319Mi ± 0%  -48.65% (p=0.000 n=10)
geomean                                      4.516Mi        2.319Mi       -48.65%

                       │ /tmp/tmp.Unz2XLUFdB/bench.HEAD^^ │   /tmp/tmp.Unz2XLUFdB/bench.HEAD^   │
                       │            allocs/op             │  allocs/op   vs base                │
TSVector/String-24                            273.6k ± 0%   105.6k ± 0%  -61.41% (p=0.000 n=10)
TSVector/StringSize-24                        273.6k ± 0%   105.6k ± 0%  -61.41% (p=0.000 n=10)
geomean                                       273.6k        105.6k       -61.41%
```

Release note: None
  • Loading branch information
yuzefovich committed Mar 15, 2023
1 parent adb0556 commit 13d99ec
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 35 deletions.
32 changes: 18 additions & 14 deletions pkg/util/tsearch/tsquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,33 +134,35 @@ type tsNode struct {
}

func (n tsNode) String() string {
return n.infixString(0)
var buf strings.Builder
n.writeInfixString(&buf, 0)
return buf.String()
}

func (n tsNode) infixString(parentPrecedence int) string {
func (n tsNode) writeInfixString(buf *strings.Builder, parentPrecedence int) {
if n.op == invalid {
return n.term.String()
n.term.writeString(buf)
return
}
var s strings.Builder
prec := n.op.precedence()
needParen := prec < parentPrecedence
if needParen {
s.WriteString("( ")
buf.WriteString("( ")
}
switch n.op {
case not:
fmt.Fprintf(&s, "!%s", n.l.infixString(prec))
buf.WriteString("!")
n.l.writeInfixString(buf, prec)
default:
fmt.Fprintf(&s, "%s %s %s",
n.l.infixString(prec),
tsTerm{operator: n.op, followedN: n.followedN},
n.r.infixString(prec),
)
n.l.writeInfixString(buf, prec)
buf.WriteString(" ")
tsTerm{operator: n.op, followedN: n.followedN}.writeString(buf)
buf.WriteString(" ")
n.r.writeInfixString(buf, prec)
}
if needParen {
s.WriteString(" )")
buf.WriteString(" )")
}
return s.String()
}

// UnambiguousString returns a string representation of this tsNode that wraps
Expand All @@ -172,7 +174,9 @@ func (n tsNode) UnambiguousString() string {
case not:
return fmt.Sprintf("!%s", n.l.UnambiguousString())
}
return fmt.Sprintf("[%s%s%s]", n.l.UnambiguousString(), tsTerm{operator: n.op, followedN: n.followedN}, n.r.UnambiguousString())
var buf strings.Builder
tsTerm{operator: n.op, followedN: n.followedN}.writeString(&buf)
return fmt.Sprintf("[%s%s%s]", n.l.UnambiguousString(), buf.String(), n.r.UnambiguousString())
}

// TSQuery represents a tsNode AST root. A TSQuery is a tree of text search
Expand Down
46 changes: 25 additions & 21 deletions pkg/util/tsearch/tsvector.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
package tsearch

import (
"fmt"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -73,24 +72,22 @@ const (
weightAny = weightA | weightB | weightC | weightD
)

func (w tsWeight) String() string {
var ret strings.Builder
func (w tsWeight) writeString(buf *strings.Builder) {
if w&weightStar != 0 {
ret.WriteByte('*')
buf.WriteByte('*')
}
if w&weightA != 0 {
ret.WriteByte('A')
buf.WriteByte('A')
}
if w&weightB != 0 {
ret.WriteByte('B')
buf.WriteByte('B')
}
if w&weightC != 0 {
ret.WriteByte('C')
buf.WriteByte('C')
}
if w&weightD != 0 {
ret.WriteByte('D')
buf.WriteByte('D')
}
return ret.String()
}

// TSVectorPGEncoding returns the PG-compatible wire protocol encoding for a
Expand Down Expand Up @@ -167,28 +164,36 @@ func newLexemeTerm(lexeme string) (tsTerm, error) {
return tsTerm{lexeme: lexeme}, nil
}

func (t tsTerm) String() string {
func (t tsTerm) writeString(buf *strings.Builder) {
if t.operator != 0 {
switch t.operator {
case and:
return "&"
buf.WriteString("&")
return
case or:
return "|"
buf.WriteString("|")
return
case not:
return "!"
buf.WriteString("!")
return
case lparen:
return "("
buf.WriteString("(")
return
case rparen:
return ")"
buf.WriteString(")")
return
case followedby:
buf.WriteString("<")
if t.followedN == 1 {
return "<->"
buf.WriteString("-")
} else {
buf.WriteString(strconv.Itoa(int(t.followedN)))
}
return fmt.Sprintf("<%d>", t.followedN)
buf.WriteString(">")
return
}
}

var buf strings.Builder
buf.WriteByte('\'')
for _, r := range t.lexeme {
if r == '\'' {
Expand All @@ -208,9 +213,8 @@ func (t tsTerm) String() string {
if pos.position > 0 {
buf.WriteString(strconv.Itoa(int(pos.position)))
}
buf.WriteString(pos.weight.String())
pos.weight.writeString(buf)
}
return buf.String()
}

func (t tsTerm) matchesWeight(targetWeight tsWeight) bool {
Expand Down Expand Up @@ -240,7 +244,7 @@ func (t TSVector) String() string {
if i > 0 {
buf.WriteByte(' ')
}
buf.WriteString(term.String())
term.writeString(&buf)
}
return buf.String()
}
Expand Down
23 changes: 23 additions & 0 deletions pkg/util/tsearch/tsvector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,26 @@ func TestParseTSRandom(t *testing.T) {
assert.Equal(t, v, v2)
}
}

func BenchmarkTSVector(b *testing.B) {
r, _ := randutil.NewTestRand()
tsVectors := make([]TSVector, 10000)
for i := range tsVectors {
tsVectors[i] = RandomTSVector(r)
}
b.ResetTimer()
b.Run("String", func(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, v := range tsVectors {
_ = v.String()
}
}
})
b.Run("StringSize", func(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, v := range tsVectors {
_ = len(v.String())
}
}
})
}

0 comments on commit 13d99ec

Please sign in to comment.