Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql: add metrics to schema changer #54855

Merged
merged 1 commit into from
Oct 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pkg/server/server_sql.go
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,9 @@ func newSQLServer(ctx context.Context, cfg sqlServerArgs) (*sqlServer, error) {
} else {
execCfg.TypeSchemaChangerTestingKnobs = new(sql.TypeSchemaChangerTestingKnobs)
}
execCfg.SchemaChangerMetrics = sql.NewSchemaChangerMetrics()
cfg.registry.AddMetricStruct(execCfg.SchemaChangerMetrics)

if gcJobTestingKnobs := cfg.TestingKnobs.GCJob; gcJobTestingKnobs != nil {
execCfg.GCJobTestingKnobs = gcJobTestingKnobs.(*sql.GCJobTestingKnobs)
} else {
Expand Down
2 changes: 2 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,8 @@ type ExecutorConfig struct {
InternalExecutor *InternalExecutor
QueryCache *querycache.C

SchemaChangerMetrics *SchemaChangerMetrics

TestingKnobs ExecutorTestingKnobs
PGWireTestingKnobs *PGWireTestingKnobs
SchemaChangerTestingKnobs *SchemaChangerTestingKnobs
Expand Down
13 changes: 13 additions & 0 deletions pkg/sql/schema_changer.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ type SchemaChanger struct {
db *kv.DB
leaseMgr *lease.Manager

metrics *SchemaChangerMetrics

testingKnobs *SchemaChangerTestingKnobs
distSQLPlanner *DistSQLPlanner
jobRegistry *jobs.Registry
Expand Down Expand Up @@ -126,6 +128,7 @@ func NewSchemaChangerForTesting(
) sqlutil.InternalExecutor {
return execCfg.InternalExecutor
},
metrics: NewSchemaChangerMetrics(),
}
}

Expand Down Expand Up @@ -550,6 +553,9 @@ func (sc *SchemaChanger) getTargetDescriptor(ctx context.Context) (catalog.Descr
// If the txn that queued the schema changer did not commit, this will be a
// no-op, as we'll fail to find the job for our mutation in the jobs registry.
func (sc *SchemaChanger) exec(ctx context.Context) error {
sc.metrics.RunningSchemaChanges.Inc(1)
defer sc.metrics.RunningSchemaChanges.Dec(1)

ctx = logtags.AddTags(ctx, sc.execLogTags())

// Pull out the requested descriptor.
Expand Down Expand Up @@ -2075,6 +2081,7 @@ func (r schemaChangeResumer) Resume(
ieFactory: func(ctx context.Context, sd *sessiondata.SessionData) sqlutil.InternalExecutor {
return r.job.MakeSessionBoundInternalExecutor(ctx, sd)
},
metrics: p.ExecCfg().SchemaChangerMetrics,
}
opts := retry.Options{
InitialBackoff: 20 * time.Millisecond,
Expand All @@ -2092,6 +2099,7 @@ func (r schemaChangeResumer) Resume(
scErr = sc.exec(ctx)
switch {
case scErr == nil:
sc.metrics.Successes.Inc(1)
return nil
case errors.Is(scErr, catalog.ErrDescriptorNotFound):
// If the table descriptor for the ID can't be found, we assume that
Expand All @@ -2108,10 +2116,15 @@ func (r schemaChangeResumer) Resume(
// Check if the error is on a allowlist of errors we should retry on,
// including the schema change not having the first mutation in line.
log.Warningf(ctx, "error while running schema change, retrying: %v", scErr)
sc.metrics.RetryErrors.Inc(1)
default:
if ctx.Err() == nil {
sc.metrics.PermanentErrors.Inc(1)
}
// All other errors lead to a failed job.
return scErr
}

}
// If the context was canceled, the job registry will retry the job. We can
// just return the error without wrapping it in a retry error.
Expand Down
65 changes: 65 additions & 0 deletions pkg/sql/schema_changer_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package sql

import "github.com/cockroachdb/cockroach/pkg/util/metric"

// TODO(ajwerner): Add many more metrics.

var (
metaRunning = metric.Metadata{
Name: "sql.schema_changer.running",
Help: "Gauge of currently running schema changes",
Measurement: "Schema changes",
Unit: metric.Unit_COUNT,
}
metaSuccesses = metric.Metadata{
Name: "sql.schema_changer.successes",
Help: "Counter of the number of schema changer resumes which succeed",
Measurement: "Schema changes",
Unit: metric.Unit_COUNT,
}
metaRetryErrors = metric.Metadata{
Name: "sql.schema_changer.retry_errors",
Help: "Counter of the number of retriable errors experienced by the schema changer",
Measurement: "Errors",
Unit: metric.Unit_COUNT,
}
metaPermanentErrors = metric.Metadata{
Name: "sql.schema_changer.permanent_errors",
Help: "Counter of the number of permanent errors experienced by the schema changer",
Measurement: "Errors",
Unit: metric.Unit_COUNT,
}
)

// SchemaChangerMetrics are metrics corresponding to the schema changer.
type SchemaChangerMetrics struct {
RunningSchemaChanges *metric.Gauge
Successes *metric.Counter
RetryErrors *metric.Counter
PermanentErrors *metric.Counter
}

// MetricStruct makes SchemaChangerMetrics a metric.Struct.
func (s *SchemaChangerMetrics) MetricStruct() {}

var _ metric.Struct = (*SchemaChangerMetrics)(nil)

// NewSchemaChangerMetrics constructs a new SchemaChangerMetrics.
func NewSchemaChangerMetrics() *SchemaChangerMetrics {
return &SchemaChangerMetrics{
RunningSchemaChanges: metric.NewGauge(metaRunning),
Successes: metric.NewCounter(metaSuccesses),
RetryErrors: metric.NewCounter(metaRetryErrors),
PermanentErrors: metric.NewCounter(metaPermanentErrors),
}
}
7 changes: 7 additions & 0 deletions pkg/sql/schema_changer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6054,6 +6054,13 @@ CREATE UNIQUE INDEX i ON t.test(v);
testutils.SucceedsSoon(t, func() error {
return checkTableKeyCountExact(ctx, kvDB, 2)
})
var permanentErrors int
require.NoError(t, sqlDB.QueryRow(`
SELECT value
FROM crdb_internal.node_metrics
WHERE name = 'sql.schema_changer.permanent_errors';
`).Scan(&permanentErrors))
require.Equal(t, 1, permanentErrors)
}

t.Run("error-before-backfill", func(t *testing.T) {
Expand Down
20 changes: 20 additions & 0 deletions pkg/ts/catalog/chart_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -1483,6 +1483,26 @@ var charts = []sectionDescription{
},
},
},
{
Organization: [][]string{{SQLLayer, "Schema Changer"}},
Charts: []chartDescription{
{
Title: "Running",
Metrics: []string{"sql.schema_changer.running"},
},
{
Title: "Run Outcomes",
Downsampler: DescribeAggregator_MAX,
Aggregator: DescribeAggregator_SUM,
Metrics: []string{
"sql.schema_changer.permanent_errors",
"sql.schema_changer.retry_errors",
"sql.schema_changer.successes",
},
AxisLabel: "Schema Change Executions",
},
},
},
{
Organization: [][]string{{SQLLayer, "DistSQL", "Flows"}},
Charts: []chartDescription{
Expand Down