Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
53800: server: defer gossip start to when absolutely needed r=irfansharif a=irfansharif

This was pulled out of #52526 to keep the diff there focussed on the
introduction of the RPC (and to see if that alone shaked out any
failures). That change lets us make this one, were we can defer gossip
start until right before we start up Node.

Release justification: low risk, high benefit change to existing functionality
Release note: None

53846: sql: fix inflated "overhead" in statement timings r=solongordon a=solongordon

Fixes #40675
Fixes #50108

Release justification: Low-risk, high-reward fix to existing
functionality.

Release note (admin ui change): In some cases, the Execution Stats page
would show a confusingly high Overhead latency for a statement. This
could happen due to multiple statements being parsed together or due to
statement execution being retried. To avoid this, we stop considering
the time between when parsing ends and execution begins when determining
service latency.

53906: roachtest: Unskip disk-full roachtest r=tbg a=itsbilal

The disk-full roachtest no longer fails now that Pebble is the
default storage engine. Remove the skipped marker.

Fixes #35328.

Release justification: Roachtest-only change.
Release note: None.

53907: geos: bump to include seg fault fix r=sumeerbhola a=otan

Resolves #53254.

Release justification: bug fix to new functionality

Release note: None

Co-authored-by: irfan sharif <[email protected]>
Co-authored-by: Solon Gordon <[email protected]>
Co-authored-by: Bilal Akhtar <[email protected]>
Co-authored-by: Oliver Tan <[email protected]>
  • Loading branch information
5 people committed Sep 3, 2020
5 parents 713b612 + 0b54595 + 6371d37 + d51519e + f037af1 commit fcb30c7
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 19 deletions.
2 changes: 1 addition & 1 deletion c-deps/geos
5 changes: 2 additions & 3 deletions pkg/cmd/roachtest/disk_full.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@ import (
func registerDiskFull(r *testRegistry) {
r.Add(testSpec{
Name: "disk-full",
Owner: OwnerKV,
MinVersion: `v2.1.0`,
Skip: "https://github.com/cockroachdb/cockroach/issues/35328#issuecomment-478540195",
Owner: OwnerStorage,
MinVersion: `v20.2.0`,
Cluster: makeClusterSpec(5),
Run: func(ctx context.Context, t *test, c *cluster) {
if c.isLocal() {
Expand Down
19 changes: 10 additions & 9 deletions pkg/server/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -468,12 +468,13 @@ func (n *Node) start(

n.startComputePeriodicMetrics(n.stopper, base.DefaultMetricsSampleInterval)

// Be careful about moving this line above `startStores`; store migrations rely
// on the fact that the cluster version has not been updated via Gossip (we
// have migrations that want to run only if the server starts with a given
// cluster version, but not if the server starts with a lower one and gets
// bumped immediately, which would be possible if gossip got started earlier).
n.startGossip(ctx, n.stopper)
// Be careful about moving this line above where we start stores; store
// migrations rely on the fact that the cluster version has not been updated
// via Gossip (we have migrations that want to run only if the server starts
// with a given cluster version, but not if the server starts with a lower
// one and gets bumped immediately, which would be possible if gossip got
// started earlier).
n.startGossiping(ctx, n.stopper)

allEngines := append([]storage.Engine(nil), state.initializedEngines...)
allEngines = append(allEngines, state.newEngines...)
Expand Down Expand Up @@ -597,7 +598,7 @@ func (n *Node) bootstrapStores(
}
n.addStore(ctx, s)
log.Infof(ctx, "bootstrapped store %s", s)
// Done regularly in Node.startGossip, but this cuts down the time
// Done regularly in Node.startGossiping, but this cuts down the time
// until this store is used for range allocations.
if err := s.GossipStore(ctx, false /* useCached */); err != nil {
log.Warningf(ctx, "error doing initial gossiping: %s", err)
Expand All @@ -616,9 +617,9 @@ func (n *Node) bootstrapStores(
return nil
}

// startGossip loops on a periodic ticker to gossip node-related
// startGossiping loops on a periodic ticker to gossip node-related
// information. Starts a goroutine to loop until the node is closed.
func (n *Node) startGossip(ctx context.Context, stopper *stop.Stopper) {
func (n *Node) startGossiping(ctx context.Context, stopper *stop.Stopper) {
ctx = n.AnnotateCtx(ctx)
stopper.RunWorker(ctx, func(ctx context.Context) {
// Verify we've already gossiped our node descriptor.
Expand Down
5 changes: 3 additions & 2 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1303,8 +1303,6 @@ func (s *Server) Start(ctx context.Context) error {
}
}

// TODO(irfansharif): How late can we defer gossip start to?
startGossipFn()
if s.cfg.DelayedBootstrapFn != nil {
defer time.AfterFunc(30*time.Second, s.cfg.DelayedBootstrapFn).Stop()
}
Expand Down Expand Up @@ -1467,6 +1465,9 @@ func (s *Server) Start(ctx context.Context) error {

onSuccessfulReturnFn()

// We're going to need to start gossip before we spin up Node below.
startGossipFn()

// Now that we have a monotonic HLC wrt previous incarnations of the process,
// init all the replicas. At this point *some* store has been bootstrapped or
// we're joining an existing cluster for the first time.
Expand Down
3 changes: 0 additions & 3 deletions pkg/sql/distsql/columnar_operators_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,6 @@ func TestAggregatorAgainstProcessor(t *testing.T) {
execinfrapb.AggregatorSpec_PERCENTILE_CONT_IMPL:
// We skip percentile functions because those can only be
// planned as window functions.
case execinfrapb.AggregatorSpec_ST_UNION:
// We skip ST_Union for now because it seg faults in GEOS.
// TODO(#53254): figure out why.
default:
found = true
}
Expand Down
9 changes: 8 additions & 1 deletion pkg/sql/executor_statement_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,14 @@ type phaseTimes [sessionNumPhases]time.Time
// getServiceLatency returns the time between a query being received and the end
// of run.
func (p *phaseTimes) getServiceLatency() time.Duration {
return p[plannerEndExecStmt].Sub(p[sessionQueryReceived])
// To have an accurate representation of how long it took to service this
// single query, we ignore the time between when parsing ends and planning
// begins. This avoids the latency being inflated in a few different cases:
// when there are internal transaction retries, and when multiple statements
// are submitted together, e.g. "SELECT 1; SELECT 2".
parseLatency := p[sessionEndParse].Sub(p[sessionQueryReceived])
planAndExecuteLatency := p[plannerEndExecStmt].Sub(p[plannerStartLogicalPlan])
return parseLatency + planAndExecuteLatency
}

// getRunLatency returns the time between a query execution starting and ending.
Expand Down

0 comments on commit fcb30c7

Please sign in to comment.