Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql: scaffolding for db metadata update job #128470

Merged
merged 1 commit into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions docs/generated/metrics/metrics.html
Original file line number Diff line number Diff line change
Expand Up @@ -1362,6 +1362,18 @@
<tr><td>APPLICATION</td><td>jobs.typedesc_schema_change.resume_completed</td><td>Number of typedesc_schema_change jobs which successfully resumed to completion</td><td>jobs</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>jobs.typedesc_schema_change.resume_failed</td><td>Number of typedesc_schema_change jobs which failed with a non-retriable error</td><td>jobs</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>jobs.typedesc_schema_change.resume_retry_error</td><td>Number of typedesc_schema_change jobs which failed with a retriable error</td><td>jobs</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>jobs.update_table_metadata_cache.currently_idle</td><td>Number of update_table_metadata_cache jobs currently considered Idle and can be freely shut down</td><td>jobs</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
<tr><td>APPLICATION</td><td>jobs.update_table_metadata_cache.currently_paused</td><td>Number of update_table_metadata_cache jobs currently considered Paused</td><td>jobs</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
<tr><td>APPLICATION</td><td>jobs.update_table_metadata_cache.currently_running</td><td>Number of update_table_metadata_cache jobs currently running in Resume or OnFailOrCancel state</td><td>jobs</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
<tr><td>APPLICATION</td><td>jobs.update_table_metadata_cache.expired_pts_records</td><td>Number of expired protected timestamp records owned by update_table_metadata_cache jobs</td><td>records</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>jobs.update_table_metadata_cache.fail_or_cancel_completed</td><td>Number of update_table_metadata_cache jobs which successfully completed their failure or cancelation process</td><td>jobs</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>jobs.update_table_metadata_cache.fail_or_cancel_failed</td><td>Number of update_table_metadata_cache jobs which failed with a non-retriable error on their failure or cancelation process</td><td>jobs</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>jobs.update_table_metadata_cache.fail_or_cancel_retry_error</td><td>Number of update_table_metadata_cache jobs which failed with a retriable error on their failure or cancelation process</td><td>jobs</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>jobs.update_table_metadata_cache.protected_age_sec</td><td>The age of the oldest PTS record protected by update_table_metadata_cache jobs</td><td>seconds</td><td>GAUGE</td><td>SECONDS</td><td>AVG</td><td>NONE</td></tr>
<tr><td>APPLICATION</td><td>jobs.update_table_metadata_cache.protected_record_count</td><td>Number of protected timestamp records held by update_table_metadata_cache jobs</td><td>records</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
<tr><td>APPLICATION</td><td>jobs.update_table_metadata_cache.resume_completed</td><td>Number of update_table_metadata_cache jobs which successfully resumed to completion</td><td>jobs</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>jobs.update_table_metadata_cache.resume_failed</td><td>Number of update_table_metadata_cache jobs which failed with a non-retriable error</td><td>jobs</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>jobs.update_table_metadata_cache.resume_retry_error</td><td>Number of update_table_metadata_cache jobs which failed with a retriable error</td><td>jobs</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>kv.protectedts.reconciliation.errors</td><td>number of errors encountered during reconciliation runs on this node</td><td>Count</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>kv.protectedts.reconciliation.num_runs</td><td>number of successful reconciliation runs on this node</td><td>Count</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
<tr><td>APPLICATION</td><td>kv.protectedts.reconciliation.records_processed</td><td>number of records processed without error during reconciliation on this node</td><td>Count</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/testdata/doctor/test_examine_cluster
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ debug doctor examine cluster
debug doctor examine cluster
Examining 64 descriptors and 63 namespace entries...
ParentID 100, ParentSchemaID 101: relation "foo" (105): expected matching namespace entry, found none
Examining 10 jobs...
Examining 11 jobs...
ERROR: validation failed
2 changes: 1 addition & 1 deletion pkg/cli/testdata/doctor/test_examine_cluster_dropped
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ debug doctor examine cluster
----
debug doctor examine cluster
Examining 63 descriptors and 63 namespace entries...
Examining 8 jobs...
Examining 9 jobs...
No problems found!
2 changes: 1 addition & 1 deletion pkg/cli/testdata/doctor/test_examine_cluster_jobs
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ Examining 63 descriptors and 64 namespace entries...
ParentID 183, ParentSchemaID 381: relation "foo" (104): expected matching namespace entry, found none
ParentID 183, ParentSchemaID 381: relation "foo" (104): mutation job 962952277419655169: job 962952277419655169 not found
ParentID 100, ParentSchemaID 101: namespace entry "foo" (104): mismatched name "foo" in relation descriptor
Examining 8 jobs...
Examining 9 jobs...
ERROR: validation failed
9 changes: 5 additions & 4 deletions pkg/jobs/jobs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,11 @@ func (rts *registryTestSuite) setUp(t *testing.T) func() {
ManagerDisableJobCreation: true,
}
args.Knobs.UpgradeManager = &upgradebase.TestingKnobs{
DontUseJobs: true,
SkipJobMetricsPollingJobBootstrap: true,
SkipUpdateSQLActivityJobBootstrap: true,
SkipMVCCStatisticsJobBootstrap: true,
DontUseJobs: true,
SkipJobMetricsPollingJobBootstrap: true,
SkipUpdateSQLActivityJobBootstrap: true,
SkipMVCCStatisticsJobBootstrap: true,
SkipUpdateTableMetadataCacheBootstrap: true,
}
args.Knobs.KeyVisualizer = &keyvisualizer.TestingKnobs{SkipJobBootstrap: true}

Expand Down
6 changes: 6 additions & 0 deletions pkg/jobs/jobspb/jobs.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1334,6 +1334,9 @@ message MVCCStatisticsJobProgress {

}

message UpdateTableMetadataCacheDetails {}
message UpdateTableMetadataCacheProgress {}

message ImportRollbackDetails {
// TableID is the descriptor ID of table that should be rolled back.
//
Expand Down Expand Up @@ -1413,6 +1416,7 @@ message Payload {
ImportRollbackDetails import_rollback_details = 46;
HistoryRetentionDetails history_retention_details = 47;
LogicalReplicationDetails logical_replication_details = 48;
UpdateTableMetadataCacheDetails update_table_metadata_cache_details = 49;
}
reserved 26;
// PauseReason is used to describe the reason that the job is currently paused
Expand Down Expand Up @@ -1490,6 +1494,7 @@ message Progress {
ImportRollbackProgress import_rollback_progress = 34;
HistoryRetentionProgress HistoryRetentionProgress = 35;
LogicalReplicationProgress LogicalReplication = 36;
UpdateTableMetadataCacheProgress table_metadata_cache = 37;
}

uint64 trace_id = 21 [(gogoproto.nullable) = false, (gogoproto.customname) = "TraceID", (gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb.TraceID"];
Expand Down Expand Up @@ -1530,6 +1535,7 @@ enum Type {
HISTORY_RETENTION = 26 [(gogoproto.enumvalue_customname) = "TypeHistoryRetention"];
LOGICAL_REPLICATION = 27 [(gogoproto.enumvalue_customname) = "TypeLogicalReplication"];
AUTO_CREATE_PARTIAL_STATS = 28 [(gogoproto.enumvalue_customname) = "TypeAutoCreatePartialStats"];
UPDATE_TABLE_METADATA_CACHE = 29 [(gogoproto.enumvalue_customname) = "TypeUpdateTableMetadataCache"];
}

message Job {
Expand Down
16 changes: 15 additions & 1 deletion pkg/jobs/jobspb/wrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ var (
_ Details = ImportRollbackDetails{}
_ Details = HistoryRetentionDetails{}
_ Details = LogicalReplicationDetails{}
_ Details = UpdateTableMetadataCacheDetails{}
)

// ProgressDetails is a marker interface for job progress details proto structs.
Expand Down Expand Up @@ -77,6 +78,7 @@ var (
_ ProgressDetails = ImportRollbackProgress{}
_ ProgressDetails = HistoryRetentionProgress{}
_ ProgressDetails = LogicalReplicationProgress{}
_ ProgressDetails = UpdateTableMetadataCacheProgress{}
)

// Type returns the payload's job type and panics if the type is invalid.
Expand Down Expand Up @@ -169,6 +171,7 @@ var AutomaticJobTypes = [...]Type{
TypeKeyVisualizer,
TypeAutoUpdateSQLActivity,
TypeMVCCStatisticsUpdate,
TypeUpdateTableMetadataCache,
}

// DetailsType returns the type for a payload detail.
Expand Down Expand Up @@ -232,6 +235,8 @@ func DetailsType(d isPayload_Details) (Type, error) {
return TypeHistoryRetention, nil
case *Payload_LogicalReplicationDetails:
return TypeLogicalReplication, nil
case *Payload_UpdateTableMetadataCacheDetails:
return TypeUpdateTableMetadataCache, nil
default:
return TypeUnspecified, errors.Newf("Payload.Type called on a payload with an unknown details type: %T", d)
}
Expand Down Expand Up @@ -283,6 +288,7 @@ var JobDetailsForEveryJobType = map[Type]Details{
TypeImportRollback: ImportRollbackDetails{},
TypeHistoryRetention: HistoryRetentionDetails{},
TypeLogicalReplication: LogicalReplicationDetails{},
TypeUpdateTableMetadataCache: UpdateTableMetadataCacheDetails{},
}

// WrapProgressDetails wraps a ProgressDetails object in the protobuf wrapper
Expand Down Expand Up @@ -346,6 +352,8 @@ func WrapProgressDetails(details ProgressDetails) interface {
return &Progress_HistoryRetentionProgress{HistoryRetentionProgress: &d}
case LogicalReplicationProgress:
return &Progress_LogicalReplication{LogicalReplication: &d}
case UpdateTableMetadataCacheProgress:
return &Progress_TableMetadataCache{TableMetadataCache: &d}
default:
panic(errors.AssertionFailedf("WrapProgressDetails: unknown progress type %T", d))
}
Expand Down Expand Up @@ -407,6 +415,8 @@ func (p *Payload) UnwrapDetails() Details {
return *d.HistoryRetentionDetails
case *Payload_LogicalReplicationDetails:
return *d.LogicalReplicationDetails
case *Payload_UpdateTableMetadataCacheDetails:
return *d.UpdateTableMetadataCacheDetails
default:
return nil
}
Expand Down Expand Up @@ -468,6 +478,8 @@ func (p *Progress) UnwrapDetails() ProgressDetails {
return *d.HistoryRetentionProgress
case *Progress_LogicalReplication:
return *d.LogicalReplication
case *Progress_TableMetadataCache:
return *d.TableMetadataCache
default:
return nil
}
Expand Down Expand Up @@ -553,6 +565,8 @@ func WrapPayloadDetails(details Details) interface {
return &Payload_HistoryRetentionDetails{HistoryRetentionDetails: &d}
case LogicalReplicationDetails:
return &Payload_LogicalReplicationDetails{LogicalReplicationDetails: &d}
case UpdateTableMetadataCacheDetails:
return &Payload_UpdateTableMetadataCacheDetails{UpdateTableMetadataCacheDetails: &d}
default:
panic(errors.AssertionFailedf("jobs.WrapPayloadDetails: unknown details type %T", d))
}
Expand Down Expand Up @@ -588,7 +602,7 @@ const (
func (Type) SafeValue() {}

// NumJobTypes is the number of jobs types.
const NumJobTypes = 29
const NumJobTypes = 30

// ChangefeedDetailsMarshaler allows for dependency injection of
// cloud.SanitizeExternalStorageURI to avoid the dependency from this
Expand Down
2 changes: 2 additions & 0 deletions pkg/jobs/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,8 @@ const (
// MVCCStatisticsJobID A static job ID used for the MVCC statistics update
// job.
MVCCStatisticsJobID = jobspb.JobID(104)

UpdateTableMetadataCacheJobID = jobspb.JobID(105)
)

// MakeJobID generates a new job ID.
Expand Down
25 changes: 14 additions & 11 deletions pkg/jobs/registry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,10 @@ func TestRegistryGC(t *testing.T) {
},
UpgradeManager: &upgradebase.TestingKnobs{
// This test wants to look at job records.
DontUseJobs: true,
SkipJobMetricsPollingJobBootstrap: true,
SkipMVCCStatisticsJobBootstrap: true,
DontUseJobs: true,
SkipJobMetricsPollingJobBootstrap: true,
SkipMVCCStatisticsJobBootstrap: true,
SkipUpdateTableMetadataCacheBootstrap: true,
},
KeyVisualizer: &keyvisualizer.TestingKnobs{
SkipJobBootstrap: true,
Expand Down Expand Up @@ -284,10 +285,11 @@ func TestRegistryGCPagination(t *testing.T) {
},
UpgradeManager: &upgradebase.TestingKnobs{
// This test wants to count job records.
DontUseJobs: true,
SkipJobMetricsPollingJobBootstrap: true,
SkipUpdateSQLActivityJobBootstrap: true,
SkipMVCCStatisticsJobBootstrap: true,
DontUseJobs: true,
SkipJobMetricsPollingJobBootstrap: true,
SkipUpdateSQLActivityJobBootstrap: true,
SkipMVCCStatisticsJobBootstrap: true,
SkipUpdateTableMetadataCacheBootstrap: true,
},
KeyVisualizer: &keyvisualizer.TestingKnobs{
SkipJobBootstrap: true,
Expand Down Expand Up @@ -758,10 +760,11 @@ func TestRetriesWithExponentialBackoff(t *testing.T) {
ManagerDisableJobCreation: true,
},
UpgradeManager: &upgradebase.TestingKnobs{
DontUseJobs: true,
SkipJobMetricsPollingJobBootstrap: true,
SkipUpdateSQLActivityJobBootstrap: true,
SkipMVCCStatisticsJobBootstrap: true,
DontUseJobs: true,
SkipJobMetricsPollingJobBootstrap: true,
SkipUpdateSQLActivityJobBootstrap: true,
SkipMVCCStatisticsJobBootstrap: true,
SkipUpdateTableMetadataCacheBootstrap: true,
},
KeyVisualizer: &keyvisualizer.TestingKnobs{
SkipJobBootstrap: true,
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ go_library(
"unsplit.go",
"unsupported_vars.go",
"update.go",
"update_table_metadata_cache_job.go",
"upsert.go",
"user.go",
"values.go",
Expand Down
65 changes: 65 additions & 0 deletions pkg/sql/update_table_metadata_cache_job.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package sql

import (
"context"

"github.com/cockroachdb/cockroach/pkg/jobs"
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
)

type tableMetadataUpdateJobResumer struct {
job *jobs.Job
}

var _ jobs.Resumer = (*tableMetadataUpdateJobResumer)(nil)

// Resume is part of the jobs.Resumer interface.
func (j *tableMetadataUpdateJobResumer) Resume(ctx context.Context, execCtxI interface{}) error {
log.Infof(ctx, "starting table metadata update job")
j.job.MarkIdle(true)

<-ctx.Done()
return nil
}

// OnFailOrCancel implements jobs.Resumer.
func (j *tableMetadataUpdateJobResumer) OnFailOrCancel(
ctx context.Context, execCtx interface{}, jobErr error,
) error {
if jobs.HasErrJobCanceled(jobErr) {
err := errors.NewAssertionErrorWithWrappedErrf(
jobErr, "mvcc statistics update job is not cancelable",
)
log.Errorf(ctx, "%v", err)
}
return nil
}

// CollectProfile implements jobs.Resumer.
func (j *tableMetadataUpdateJobResumer) CollectProfile(
ctx context.Context, execCtx interface{},
) error {
return nil
}

func init() {
jobs.RegisterConstructor(
jobspb.TypeUpdateTableMetadataCache,
func(job *jobs.Job, settings *cluster.Settings) jobs.Resumer {
return &tableMetadataUpdateJobResumer{job: job}
}, jobs.DisablesTenantCostControl,
)
}
2 changes: 2 additions & 0 deletions pkg/upgrade/upgradebase/testing_knobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ type TestingKnobs struct {
SkipUpdateSQLActivityJobBootstrap bool

SkipMVCCStatisticsJobBootstrap bool

SkipUpdateTableMetadataCacheBootstrap bool
}

// ModuleTestingKnobs makes TestingKnobs a base.ModuleTestingKnobs.
Expand Down
1 change: 1 addition & 0 deletions pkg/upgrade/upgrades/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ go_library(
"descriptor_utils.go",
"first_upgrade.go",
"permanent_create_jobs_metrics_polling_job.go",
"permanent_create_update_table_metadata_cache_job.go",
"permanent_ensure_sql_schema_telemetry_schedule.go",
"permanent_key_visualizer_migration.go",
"permanent_mvcc_statistics_migration.go",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package upgrades

import (
"context"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/jobs"
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
_ "github.com/cockroachdb/cockroach/pkg/jobs/metricspoller" // Ensure job implementation is linked.
"github.com/cockroachdb/cockroach/pkg/security/username"
"github.com/cockroachdb/cockroach/pkg/sql/isql"
"github.com/cockroachdb/cockroach/pkg/upgrade"
)

func createUpdateTableMetadataCacheJob(
ctx context.Context, _ clusterversion.ClusterVersion, d upgrade.TenantDeps,
) error {
if d.TestingKnobs != nil && d.TestingKnobs.SkipUpdateTableMetadataCacheBootstrap {
return nil
}

return d.DB.Txn(ctx, func(ctx context.Context, txn isql.Txn) error {
jr := jobs.Record{
JobID: jobs.UpdateTableMetadataCacheJobID,
Description: jobspb.TypeUpdateTableMetadataCache.String(),
Details: jobspb.UpdateTableMetadataCacheDetails{},
Progress: jobspb.UpdateTableMetadataCacheProgress{},
CreatedBy: &jobs.CreatedByInfo{Name: username.NodeUser, ID: username.NodeUserID},
Username: username.NodeUserName(),
NonCancelable: true,
}
return d.JobRegistry.CreateIfNotExistAdoptableJobWithTxn(ctx, jr, txn)
})
}
1 change: 1 addition & 0 deletions pkg/upgrade/upgrades/permanent_upgrades.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ func bootstrapCluster(
{"create jobs metrics polling job", createJobsMetricsPollingJob},
{"create sql activity updater job", createActivityUpdateJobMigration},
{"create mvcc stats job", createMVCCStatisticsJob},
{"create update cached table metadata job", createUpdateTableMetadataCacheJob},
} {
log.Infof(ctx, "executing bootstrap step %q", u.name)
if err := u.fn(ctx, cv, deps); err != nil {
Expand Down
4 changes: 2 additions & 2 deletions pkg/upgrade/upgrades/upgrades.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,10 @@ var upgrades = []upgradebase.Upgrade{
),

upgrade.NewTenantUpgrade(
"add new table_metadata table to the system tenant",
"add new table_metadata table and job to the system tenant",
clusterversion.V24_3_TableMetadata.Version(),
upgrade.NoPrecondition,
addTableMetadataTable,
addTableMetadataTableAndJob,
upgrade.RestoreActionNotRequired("cluster restore does not restore this table"),
),

Expand Down
Loading
Loading