From 983e6b17dd5738e84ec5b1407fa607fc75f69b57 Mon Sep 17 00:00:00 2001 From: David Taylor Date: Thu, 17 Nov 2022 21:30:49 +0000 Subject: [PATCH 1/3] jobs: add pause-point to pause on any error Release note: none. Epic: none. --- pkg/jobs/registry.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/jobs/registry.go b/pkg/jobs/registry.go index 2f54afd45587..2fd98d2e59d3 100644 --- a/pkg/jobs/registry.go +++ b/pkg/jobs/registry.go @@ -1524,6 +1524,10 @@ func (r *Registry) stepThroughStateMachine( return errors.Errorf("job %d: node liveness error: restarting in background", job.ID()) } + if pauseOnErrErr := r.CheckPausepoint("after_exec_error"); pauseOnErrErr != nil { + err = errors.WithSecondaryError(pauseOnErrErr, err) + } + if errors.Is(err, errPauseSelfSentinel) { if err := r.PauseRequested(ctx, nil, job.ID(), err.Error()); err != nil { return err From 13d99ecf9cebff468c3af46ae43231809f77e6c2 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Wed, 15 Mar 2023 12:07:57 -0700 Subject: [PATCH 2/3] tsearch: speed up TSVector.String MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit speeds up `TSVector.String` by reusing exactly the same `strings.Builder` object for all parts of the TSVector. This commit also introduces a simple benchmark for this method. `benchstat` seems to be busted a bit, but the impact of this commit is: ``` │ /tmp/tmp.Unz2XLUFdB/bench.HEAD^^ │ /tmp/tmp.Unz2XLUFdB/bench.HEAD^ │ │ sec/op │ sec/op vs base │ TSVector/String-24 14.590m ± 0% 8.272m ± 0% -43.31% (p=0.000 n=10) TSVector/StringSize-24 14.654m ± 0% 8.269m ± 0% -43.57% (p=0.000 n=10) geomean 14.62m 8.270m -43.44% │ /tmp/tmp.Unz2XLUFdB/bench.HEAD^^ │ /tmp/tmp.Unz2XLUFdB/bench.HEAD^ │ │ B/op │ B/op vs base │ TSVector/String-24 4.516Mi ± 0% 2.319Mi ± 0% -48.65% (p=0.000 n=10) TSVector/StringSize-24 4.516Mi ± 0% 2.319Mi ± 0% -48.65% (p=0.000 n=10) geomean 4.516Mi 2.319Mi -48.65% │ /tmp/tmp.Unz2XLUFdB/bench.HEAD^^ │ /tmp/tmp.Unz2XLUFdB/bench.HEAD^ │ │ allocs/op │ allocs/op vs base │ TSVector/String-24 273.6k ± 0% 105.6k ± 0% -61.41% (p=0.000 n=10) TSVector/StringSize-24 273.6k ± 0% 105.6k ± 0% -61.41% (p=0.000 n=10) geomean 273.6k 105.6k -61.41% ``` Release note: None --- pkg/util/tsearch/tsquery.go | 32 +++++++++++---------- pkg/util/tsearch/tsvector.go | 46 +++++++++++++++++-------------- pkg/util/tsearch/tsvector_test.go | 23 ++++++++++++++++ 3 files changed, 66 insertions(+), 35 deletions(-) diff --git a/pkg/util/tsearch/tsquery.go b/pkg/util/tsearch/tsquery.go index 5200e8947cb2..078691b58cf3 100644 --- a/pkg/util/tsearch/tsquery.go +++ b/pkg/util/tsearch/tsquery.go @@ -134,33 +134,35 @@ type tsNode struct { } func (n tsNode) String() string { - return n.infixString(0) + var buf strings.Builder + n.writeInfixString(&buf, 0) + return buf.String() } -func (n tsNode) infixString(parentPrecedence int) string { +func (n tsNode) writeInfixString(buf *strings.Builder, parentPrecedence int) { if n.op == invalid { - return n.term.String() + n.term.writeString(buf) + return } - var s strings.Builder prec := n.op.precedence() needParen := prec < parentPrecedence if needParen { - s.WriteString("( ") + buf.WriteString("( ") } switch n.op { case not: - fmt.Fprintf(&s, "!%s", n.l.infixString(prec)) + buf.WriteString("!") + n.l.writeInfixString(buf, prec) default: - fmt.Fprintf(&s, "%s %s %s", - n.l.infixString(prec), - tsTerm{operator: n.op, followedN: n.followedN}, - n.r.infixString(prec), - ) + n.l.writeInfixString(buf, prec) + buf.WriteString(" ") + tsTerm{operator: n.op, followedN: n.followedN}.writeString(buf) + buf.WriteString(" ") + n.r.writeInfixString(buf, prec) } if needParen { - s.WriteString(" )") + buf.WriteString(" )") } - return s.String() } // UnambiguousString returns a string representation of this tsNode that wraps @@ -172,7 +174,9 @@ func (n tsNode) UnambiguousString() string { case not: return fmt.Sprintf("!%s", n.l.UnambiguousString()) } - return fmt.Sprintf("[%s%s%s]", n.l.UnambiguousString(), tsTerm{operator: n.op, followedN: n.followedN}, n.r.UnambiguousString()) + var buf strings.Builder + tsTerm{operator: n.op, followedN: n.followedN}.writeString(&buf) + return fmt.Sprintf("[%s%s%s]", n.l.UnambiguousString(), buf.String(), n.r.UnambiguousString()) } // TSQuery represents a tsNode AST root. A TSQuery is a tree of text search diff --git a/pkg/util/tsearch/tsvector.go b/pkg/util/tsearch/tsvector.go index 073269b057df..e75d15b83b2a 100644 --- a/pkg/util/tsearch/tsvector.go +++ b/pkg/util/tsearch/tsvector.go @@ -11,7 +11,6 @@ package tsearch import ( - "fmt" "sort" "strconv" "strings" @@ -73,24 +72,22 @@ const ( weightAny = weightA | weightB | weightC | weightD ) -func (w tsWeight) String() string { - var ret strings.Builder +func (w tsWeight) writeString(buf *strings.Builder) { if w&weightStar != 0 { - ret.WriteByte('*') + buf.WriteByte('*') } if w&weightA != 0 { - ret.WriteByte('A') + buf.WriteByte('A') } if w&weightB != 0 { - ret.WriteByte('B') + buf.WriteByte('B') } if w&weightC != 0 { - ret.WriteByte('C') + buf.WriteByte('C') } if w&weightD != 0 { - ret.WriteByte('D') + buf.WriteByte('D') } - return ret.String() } // TSVectorPGEncoding returns the PG-compatible wire protocol encoding for a @@ -167,28 +164,36 @@ func newLexemeTerm(lexeme string) (tsTerm, error) { return tsTerm{lexeme: lexeme}, nil } -func (t tsTerm) String() string { +func (t tsTerm) writeString(buf *strings.Builder) { if t.operator != 0 { switch t.operator { case and: - return "&" + buf.WriteString("&") + return case or: - return "|" + buf.WriteString("|") + return case not: - return "!" + buf.WriteString("!") + return case lparen: - return "(" + buf.WriteString("(") + return case rparen: - return ")" + buf.WriteString(")") + return case followedby: + buf.WriteString("<") if t.followedN == 1 { - return "<->" + buf.WriteString("-") + } else { + buf.WriteString(strconv.Itoa(int(t.followedN))) } - return fmt.Sprintf("<%d>", t.followedN) + buf.WriteString(">") + return } } - var buf strings.Builder buf.WriteByte('\'') for _, r := range t.lexeme { if r == '\'' { @@ -208,9 +213,8 @@ func (t tsTerm) String() string { if pos.position > 0 { buf.WriteString(strconv.Itoa(int(pos.position))) } - buf.WriteString(pos.weight.String()) + pos.weight.writeString(buf) } - return buf.String() } func (t tsTerm) matchesWeight(targetWeight tsWeight) bool { @@ -240,7 +244,7 @@ func (t TSVector) String() string { if i > 0 { buf.WriteByte(' ') } - buf.WriteString(term.String()) + term.writeString(&buf) } return buf.String() } diff --git a/pkg/util/tsearch/tsvector_test.go b/pkg/util/tsearch/tsvector_test.go index 544c3d78349d..6bf57fdb0a9a 100644 --- a/pkg/util/tsearch/tsvector_test.go +++ b/pkg/util/tsearch/tsvector_test.go @@ -187,3 +187,26 @@ func TestParseTSRandom(t *testing.T) { assert.Equal(t, v, v2) } } + +func BenchmarkTSVector(b *testing.B) { + r, _ := randutil.NewTestRand() + tsVectors := make([]TSVector, 10000) + for i := range tsVectors { + tsVectors[i] = RandomTSVector(r) + } + b.ResetTimer() + b.Run("String", func(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, v := range tsVectors { + _ = v.String() + } + } + }) + b.Run("StringSize", func(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, v := range tsVectors { + _ = len(v.String()) + } + } + }) +} From 1dcb587212569e15a11274f14ed4eb9dc33b6ac5 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Tue, 14 Mar 2023 13:56:27 -0700 Subject: [PATCH 3/3] tsearch: add optimize StringSize method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit introduces `TSVector.StringSize` method which is optimized equivalent of `len(TSVector.String())` which avoids the construction of the string. This method is now utilized by `DTSVector.Size`. This results in some code duplication but it seems worth it: ``` │ /tmp/tmp.zD8FYqMrdo/bench.HEAD^ │ /tmp/tmp.zD8FYqMrdo/bench.HEAD │ │ sec/op │ sec/op vs base │ TSVector/String-24 8.404m ± 0% 8.135m ± 0% -3.20% (p=0.000 n=10) TSVector/StringSize-24 8.381m ± 0% 3.547m ± 0% -57.67% (p=0.000 n=10) geomean 8.393m 5.372m -35.99% │ /tmp/tmp.zD8FYqMrdo/bench.HEAD^ │ /tmp/tmp.zD8FYqMrdo/bench.HEAD │ │ B/op │ B/op vs base │ TSVector/String-24 2.349Mi ± 0% 2.323Mi ± 0% -1.12% (p=0.000 n=10) TSVector/StringSize-24 2405.6Ki ± 0% 192.1Ki ± 0% -92.02% (p=0.000 n=10) geomean 2.349Mi 675.9Ki -71.90% │ /tmp/tmp.zD8FYqMrdo/bench.HEAD^ │ /tmp/tmp.zD8FYqMrdo/bench.HEAD │ │ allocs/op │ allocs/op vs base │ TSVector/String-24 106.7k ± 0% 105.8k ± 0% -0.88% (p=0.000 n=10) TSVector/StringSize-24 106.72k ± 0% 61.46k ± 0% -42.41% (p=0.000 n=10) geomean 106.7k 80.63k -24.44% ``` Release note: None --- pkg/sql/sem/tree/datum.go | 2 +- pkg/util/tsearch/tsvector.go | 65 +++++++++++++++++++++++++++++++ pkg/util/tsearch/tsvector_test.go | 10 ++++- 3 files changed, 75 insertions(+), 2 deletions(-) diff --git a/pkg/sql/sem/tree/datum.go b/pkg/sql/sem/tree/datum.go index baf6e2ef2438..166b5f530aa4 100644 --- a/pkg/sql/sem/tree/datum.go +++ b/pkg/sql/sem/tree/datum.go @@ -4059,7 +4059,7 @@ func (d *DTSVector) Min(_ CompareContext) (Datum, bool) { // Size implements the Datum interface. func (d *DTSVector) Size() uintptr { - return uintptr(len(d.TSVector.String())) + return uintptr(d.TSVector.StringSize()) } // AsDTSVector attempts to retrieve a DTSVector from an Expr, returning a diff --git a/pkg/util/tsearch/tsvector.go b/pkg/util/tsearch/tsvector.go index e75d15b83b2a..62e1bb018085 100644 --- a/pkg/util/tsearch/tsvector.go +++ b/pkg/util/tsearch/tsvector.go @@ -11,10 +11,12 @@ package tsearch import ( + "math/bits" "sort" "strconv" "strings" "unicode" + "unicode/utf8" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" @@ -72,6 +74,7 @@ const ( weightAny = weightA | weightB | weightC | weightD ) +// NB: must be kept in sync with stringSize(). func (w tsWeight) writeString(buf *strings.Builder) { if w&weightStar != 0 { buf.WriteByte('*') @@ -90,6 +93,14 @@ func (w tsWeight) writeString(buf *strings.Builder) { } } +// stringSize returns the length of the string that corresponds to this +// tsWeight. +// NB: must be kept in sync with writeString(). +func (w tsWeight) stringSize() int { + // Count the number of bits set in the lowest 5 bits. + return bits.OnesCount8(uint8(w & 31)) +} + // TSVectorPGEncoding returns the PG-compatible wire protocol encoding for a // given weight. Note that this is only allowable for TSVector tsweights, which // can't have more than one weight set at the same time. In a TSQuery, you might @@ -164,6 +175,7 @@ func newLexemeTerm(lexeme string) (tsTerm, error) { return tsTerm{lexeme: lexeme}, nil } +// NB: must be kept in sync with stringSize(). func (t tsTerm) writeString(buf *strings.Builder) { if t.operator != 0 { switch t.operator { @@ -217,6 +229,46 @@ func (t tsTerm) writeString(buf *strings.Builder) { } } +// stringSize returns the length of the string representation of this tsTerm. +// NB: must be kept in sync with writeString(). +func (t tsTerm) stringSize() int { + if t.operator != 0 { + switch t.operator { + case and, or, not, lparen, rparen: + return 1 + case followedby: + if t.followedN == 1 { + return 3 // '<->' + } + return 2 + len(strconv.Itoa(int(t.followedN))) // fmt.Sprintf("<%d>", t.followedN) + } + } + size := 1 // '\'' + for _, r := range t.lexeme { + if r == '\'' { + // Single quotes are escaped as double single quotes inside of a + // TSVector. + size += 2 + } else { + // Compare as uint32 to correctly handle negative runes. + if uint32(r) < utf8.RuneSelf { + size++ + } else { + size += utf8.RuneLen(r) + } + } + } + size++ // '\'' + size += len(t.positions) // ':' or ',' for each position + for _, pos := range t.positions { + if pos.position > 0 { + size += len(strconv.Itoa(int(pos.position))) + } + size += pos.weight.stringSize() + } + return size +} + func (t tsTerm) matchesWeight(targetWeight tsWeight) bool { if targetWeight == weightAny { return true @@ -249,6 +301,19 @@ func (t TSVector) String() string { return buf.String() } +// StringSize returns the length of the string that would have been returned on +// String() call, without actually constructing that string. +func (t TSVector) StringSize() int { + var size int + if len(t) > 0 { + size = len(t) - 1 // space + } + for _, term := range t { + size += term.stringSize() + } + return size +} + // ParseTSVector produces a TSVector from an input string. The input will be // sorted by lexeme, but will not be automatically stemmed or stop-worded. func ParseTSVector(input string) (TSVector, error) { diff --git a/pkg/util/tsearch/tsvector_test.go b/pkg/util/tsearch/tsvector_test.go index 6bf57fdb0a9a..07b08777e28e 100644 --- a/pkg/util/tsearch/tsvector_test.go +++ b/pkg/util/tsearch/tsvector_test.go @@ -188,6 +188,14 @@ func TestParseTSRandom(t *testing.T) { } } +func TestTSVectorStringSize(t *testing.T) { + r, _ := randutil.NewTestRand() + for i := 0; i < 1000; i++ { + v := RandomTSVector(r) + require.Equal(t, len(v.String()), v.StringSize()) + } +} + func BenchmarkTSVector(b *testing.B) { r, _ := randutil.NewTestRand() tsVectors := make([]TSVector, 10000) @@ -205,7 +213,7 @@ func BenchmarkTSVector(b *testing.B) { b.Run("StringSize", func(b *testing.B) { for i := 0; i < b.N; i++ { for _, v := range tsVectors { - _ = len(v.String()) + _ = v.StringSize() } } })