Skip to content

Commit

Permalink
Merge #92087 #98609
Browse files Browse the repository at this point in the history
92087: jobs: add pause-point to pause on any error r=dt a=dt

Release note (sql change): the new 'error' pause-point can be enabled via the jobs.debug.pausepoints setting to pause, rather than fail, when a job execution returns an error to the job execution system.

Epic: none.

98609: tsearch: speed up TSVector.String r=yuzefovich a=yuzefovich

Fixes: #98595.

**tsearch: speed up TSVector.String**

This commit speeds up `TSVector.String` by reusing exactly the same
`strings.Builder` object for all parts of the TSVector.

This commit also introduces a simple benchmark for this method.
`benchstat` seems to be busted a bit, but the impact of this commit is:
```
                       │ /tmp/tmp.Unz2XLUFdB/bench.HEAD^^ │   /tmp/tmp.Unz2XLUFdB/bench.HEAD^   │
                       │              sec/op              │   sec/op     vs base                │
TSVector/String-24                           14.590m ± 0%   8.272m ± 0%  -43.31% (p=0.000 n=10)
TSVector/StringSize-24                       14.654m ± 0%   8.269m ± 0%  -43.57% (p=0.000 n=10)
geomean                                       14.62m        8.270m       -43.44%

                       │ /tmp/tmp.Unz2XLUFdB/bench.HEAD^^ │   /tmp/tmp.Unz2XLUFdB/bench.HEAD^    │
                       │               B/op               │     B/op      vs base                │
TSVector/String-24                           4.516Mi ± 0%   2.319Mi ± 0%  -48.65% (p=0.000 n=10)
TSVector/StringSize-24                       4.516Mi ± 0%   2.319Mi ± 0%  -48.65% (p=0.000 n=10)
geomean                                      4.516Mi        2.319Mi       -48.65%

                       │ /tmp/tmp.Unz2XLUFdB/bench.HEAD^^ │   /tmp/tmp.Unz2XLUFdB/bench.HEAD^   │
                       │            allocs/op             │  allocs/op   vs base                │
TSVector/String-24                            273.6k ± 0%   105.6k ± 0%  -61.41% (p=0.000 n=10)
TSVector/StringSize-24                        273.6k ± 0%   105.6k ± 0%  -61.41% (p=0.000 n=10)
geomean                                       273.6k        105.6k       -61.41%
```

Release note: None

**tsearch: add optimize StringSize method**

This commit introduces `TSVector.StringSize` method which is optimized
equivalent of `len(TSVector.String())` which avoids the construction of
the string. This method is now utilized by `DTSVector.Size`. This
results in some code duplication but it seems worth it:
```
                       │ /tmp/tmp.zD8FYqMrdo/bench.HEAD^ │   /tmp/tmp.zD8FYqMrdo/bench.HEAD    │
                       │             sec/op              │   sec/op     vs base                │
TSVector/String-24                           8.404m ± 0%   8.135m ± 0%   -3.20% (p=0.000 n=10)
TSVector/StringSize-24                       8.381m ± 0%   3.547m ± 0%  -57.67% (p=0.000 n=10)
geomean                                      8.393m        5.372m       -35.99%

                       │ /tmp/tmp.zD8FYqMrdo/bench.HEAD^ │    /tmp/tmp.zD8FYqMrdo/bench.HEAD    │
                       │              B/op               │     B/op      vs base                │
TSVector/String-24                          2.349Mi ± 0%   2.323Mi ± 0%   -1.12% (p=0.000 n=10)
TSVector/StringSize-24                     2405.6Ki ± 0%   192.1Ki ± 0%  -92.02% (p=0.000 n=10)
geomean                                     2.349Mi        675.9Ki       -71.90%

                       │ /tmp/tmp.zD8FYqMrdo/bench.HEAD^ │   /tmp/tmp.zD8FYqMrdo/bench.HEAD    │
                       │            allocs/op            │  allocs/op   vs base                │
TSVector/String-24                           106.7k ± 0%   105.8k ± 0%   -0.88% (p=0.000 n=10)
TSVector/StringSize-24                      106.72k ± 0%   61.46k ± 0%  -42.41% (p=0.000 n=10)
geomean                                      106.7k        80.63k       -24.44%
```

Release note: None

Co-authored-by: David Taylor <[email protected]>
Co-authored-by: Yahor Yuzefovich <[email protected]>
  • Loading branch information
3 people committed Mar 16, 2023
3 parents 8c4018a + 983e6b1 + 1dcb587 commit b88e0e5
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 36 deletions.
4 changes: 4 additions & 0 deletions pkg/jobs/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -1551,6 +1551,10 @@ func (r *Registry) stepThroughStateMachine(
return err
}

if pauseOnErrErr := r.CheckPausepoint("after_exec_error"); pauseOnErrErr != nil {
err = errors.WithSecondaryError(pauseOnErrErr, err)
}

if errors.Is(err, errPauseSelfSentinel) {
if err := r.PauseRequested(ctx, nil, job.ID(), err.Error()); err != nil {
return err
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/sem/tree/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -4059,7 +4059,7 @@ func (d *DTSVector) Min(_ CompareContext) (Datum, bool) {

// Size implements the Datum interface.
func (d *DTSVector) Size() uintptr {
return uintptr(len(d.TSVector.String()))
return uintptr(d.TSVector.StringSize())
}

// AsDTSVector attempts to retrieve a DTSVector from an Expr, returning a
Expand Down
32 changes: 18 additions & 14 deletions pkg/util/tsearch/tsquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,33 +134,35 @@ type tsNode struct {
}

func (n tsNode) String() string {
return n.infixString(0)
var buf strings.Builder
n.writeInfixString(&buf, 0)
return buf.String()
}

func (n tsNode) infixString(parentPrecedence int) string {
func (n tsNode) writeInfixString(buf *strings.Builder, parentPrecedence int) {
if n.op == invalid {
return n.term.String()
n.term.writeString(buf)
return
}
var s strings.Builder
prec := n.op.precedence()
needParen := prec < parentPrecedence
if needParen {
s.WriteString("( ")
buf.WriteString("( ")
}
switch n.op {
case not:
fmt.Fprintf(&s, "!%s", n.l.infixString(prec))
buf.WriteString("!")
n.l.writeInfixString(buf, prec)
default:
fmt.Fprintf(&s, "%s %s %s",
n.l.infixString(prec),
tsTerm{operator: n.op, followedN: n.followedN},
n.r.infixString(prec),
)
n.l.writeInfixString(buf, prec)
buf.WriteString(" ")
tsTerm{operator: n.op, followedN: n.followedN}.writeString(buf)
buf.WriteString(" ")
n.r.writeInfixString(buf, prec)
}
if needParen {
s.WriteString(" )")
buf.WriteString(" )")
}
return s.String()
}

// UnambiguousString returns a string representation of this tsNode that wraps
Expand All @@ -172,7 +174,9 @@ func (n tsNode) UnambiguousString() string {
case not:
return fmt.Sprintf("!%s", n.l.UnambiguousString())
}
return fmt.Sprintf("[%s%s%s]", n.l.UnambiguousString(), tsTerm{operator: n.op, followedN: n.followedN}, n.r.UnambiguousString())
var buf strings.Builder
tsTerm{operator: n.op, followedN: n.followedN}.writeString(&buf)
return fmt.Sprintf("[%s%s%s]", n.l.UnambiguousString(), buf.String(), n.r.UnambiguousString())
}

// TSQuery represents a tsNode AST root. A TSQuery is a tree of text search
Expand Down
111 changes: 90 additions & 21 deletions pkg/util/tsearch/tsvector.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@
package tsearch

import (
"fmt"
"math/bits"
"sort"
"strconv"
"strings"
"unicode"
"unicode/utf8"

"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
Expand Down Expand Up @@ -73,24 +74,31 @@ const (
weightAny = weightA | weightB | weightC | weightD
)

func (w tsWeight) String() string {
var ret strings.Builder
// NB: must be kept in sync with stringSize().
func (w tsWeight) writeString(buf *strings.Builder) {
if w&weightStar != 0 {
ret.WriteByte('*')
buf.WriteByte('*')
}
if w&weightA != 0 {
ret.WriteByte('A')
buf.WriteByte('A')
}
if w&weightB != 0 {
ret.WriteByte('B')
buf.WriteByte('B')
}
if w&weightC != 0 {
ret.WriteByte('C')
buf.WriteByte('C')
}
if w&weightD != 0 {
ret.WriteByte('D')
buf.WriteByte('D')
}
return ret.String()
}

// stringSize returns the length of the string that corresponds to this
// tsWeight.
// NB: must be kept in sync with writeString().
func (w tsWeight) stringSize() int {
// Count the number of bits set in the lowest 5 bits.
return bits.OnesCount8(uint8(w & 31))
}

// TSVectorPGEncoding returns the PG-compatible wire protocol encoding for a
Expand Down Expand Up @@ -167,28 +175,37 @@ func newLexemeTerm(lexeme string) (tsTerm, error) {
return tsTerm{lexeme: lexeme}, nil
}

func (t tsTerm) String() string {
// NB: must be kept in sync with stringSize().
func (t tsTerm) writeString(buf *strings.Builder) {
if t.operator != 0 {
switch t.operator {
case and:
return "&"
buf.WriteString("&")
return
case or:
return "|"
buf.WriteString("|")
return
case not:
return "!"
buf.WriteString("!")
return
case lparen:
return "("
buf.WriteString("(")
return
case rparen:
return ")"
buf.WriteString(")")
return
case followedby:
buf.WriteString("<")
if t.followedN == 1 {
return "<->"
buf.WriteString("-")
} else {
buf.WriteString(strconv.Itoa(int(t.followedN)))
}
return fmt.Sprintf("<%d>", t.followedN)
buf.WriteString(">")
return
}
}

var buf strings.Builder
buf.WriteByte('\'')
for _, r := range t.lexeme {
if r == '\'' {
Expand All @@ -208,9 +225,48 @@ func (t tsTerm) String() string {
if pos.position > 0 {
buf.WriteString(strconv.Itoa(int(pos.position)))
}
buf.WriteString(pos.weight.String())
pos.weight.writeString(buf)
}
return buf.String()
}

// stringSize returns the length of the string representation of this tsTerm.
// NB: must be kept in sync with writeString().
func (t tsTerm) stringSize() int {
if t.operator != 0 {
switch t.operator {
case and, or, not, lparen, rparen:
return 1
case followedby:
if t.followedN == 1 {
return 3 // '<->'
}
return 2 + len(strconv.Itoa(int(t.followedN))) // fmt.Sprintf("<%d>", t.followedN)
}
}
size := 1 // '\''
for _, r := range t.lexeme {
if r == '\'' {
// Single quotes are escaped as double single quotes inside of a
// TSVector.
size += 2
} else {
// Compare as uint32 to correctly handle negative runes.
if uint32(r) < utf8.RuneSelf {
size++
} else {
size += utf8.RuneLen(r)
}
}
}
size++ // '\''
size += len(t.positions) // ':' or ',' for each position
for _, pos := range t.positions {
if pos.position > 0 {
size += len(strconv.Itoa(int(pos.position)))
}
size += pos.weight.stringSize()
}
return size
}

func (t tsTerm) matchesWeight(targetWeight tsWeight) bool {
Expand Down Expand Up @@ -240,11 +296,24 @@ func (t TSVector) String() string {
if i > 0 {
buf.WriteByte(' ')
}
buf.WriteString(term.String())
term.writeString(&buf)
}
return buf.String()
}

// StringSize returns the length of the string that would have been returned on
// String() call, without actually constructing that string.
func (t TSVector) StringSize() int {
var size int
if len(t) > 0 {
size = len(t) - 1 // space
}
for _, term := range t {
size += term.stringSize()
}
return size
}

// ParseTSVector produces a TSVector from an input string. The input will be
// sorted by lexeme, but will not be automatically stemmed or stop-worded.
func ParseTSVector(input string) (TSVector, error) {
Expand Down
31 changes: 31 additions & 0 deletions pkg/util/tsearch/tsvector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,34 @@ func TestParseTSRandom(t *testing.T) {
assert.Equal(t, v, v2)
}
}

func TestTSVectorStringSize(t *testing.T) {
r, _ := randutil.NewTestRand()
for i := 0; i < 1000; i++ {
v := RandomTSVector(r)
require.Equal(t, len(v.String()), v.StringSize())
}
}

func BenchmarkTSVector(b *testing.B) {
r, _ := randutil.NewTestRand()
tsVectors := make([]TSVector, 10000)
for i := range tsVectors {
tsVectors[i] = RandomTSVector(r)
}
b.ResetTimer()
b.Run("String", func(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, v := range tsVectors {
_ = v.String()
}
}
})
b.Run("StringSize", func(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, v := range tsVectors {
_ = v.StringSize()
}
}
})
}

0 comments on commit b88e0e5

Please sign in to comment.