Skip to content

Commit

Permalink
Merge #59760
Browse files Browse the repository at this point in the history
59760: migration,jobs: refactor long-running migrations and hook up job r=ajwerner a=ajwerner

This PR comes in three commits. The first commit reworks the package structure of long-running migrations a bit. The second introduces a migration job. The third introduces a `system.migrations` table to store completion state for migrations. The idea is that running long-running migrations in a job is handy because it provides leasing, observability, and control to users. 

Fixes #58183.


Co-authored-by: Andrew Werner <[email protected]>
  • Loading branch information
craig[bot] and ajwerner committed Feb 10, 2021
2 parents aaa8c54 + c17795f commit 3a3565a
Show file tree
Hide file tree
Showing 78 changed files with 4,047 additions and 2,180 deletions.
2 changes: 1 addition & 1 deletion docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,6 @@
<tr><td><code>trace.debug.enable</code></td><td>boolean</td><td><code>false</code></td><td>if set, traces for recent requests can be seen at https://<ui>/debug/requests</td></tr>
<tr><td><code>trace.lightstep.token</code></td><td>string</td><td><code></code></td><td>if set, traces go to Lightstep using this token</td></tr>
<tr><td><code>trace.zipkin.collector</code></td><td>string</td><td><code></code></td><td>if set, traces go to the given Zipkin instance (example: '127.0.0.1:9411'); ignored if trace.lightstep.token is set</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>20.2-18</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>20.2-24</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
</tbody>
</table>
1 change: 1 addition & 0 deletions pkg/base/testing_knobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ type TestingKnobs struct {
TenantTestingKnobs ModuleTestingKnobs
JobsTestingKnobs ModuleTestingKnobs
BackupRestore ModuleTestingKnobs
MigrationManager ModuleTestingKnobs
}
3 changes: 3 additions & 0 deletions pkg/ccl/backupccl/backupbase/system_schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,9 @@ var SystemTableBackupConfiguration = map[string]systemBackupConfiguration{
systemschema.WebSessionsTable.GetName(): {
IncludeInClusterBackup: OptOutOfClusterBackup,
},
systemschema.MigrationsTable.GetName(): {
IncludeInClusterBackup: OptOutOfClusterBackup,
},
}

// GetSystemTablesToIncludeInClusterBackup returns a set of system table names that
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/testdata/doctor/testcluster
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
doctor cluster
----
debug doctor cluster
Examining 34 descriptors and 35 namespace entries...
Examining 35 descriptors and 36 namespace entries...
Table 53: ParentID 50, ParentSchemaID 29, Name 'foo': not being dropped but no namespace entry found
Examining 1 running jobs...
ERROR: validation failed
9 changes: 6 additions & 3 deletions pkg/cli/testdata/zip/partial1
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ requesting goroutine files for node 1... writing: debug/nodes/1/goroutines.err.t
^- resulted in ...
requesting log file ...
requesting log file ...
requesting ranges... 35 found
requesting ranges... 36 found
writing: debug/nodes/1/ranges/1.json
writing: debug/nodes/1/ranges/2.json
writing: debug/nodes/1/ranges/3.json
Expand Down Expand Up @@ -94,6 +94,7 @@ writing: debug/nodes/1/ranges/32.json
writing: debug/nodes/1/ranges/33.json
writing: debug/nodes/1/ranges/34.json
writing: debug/nodes/1/ranges/35.json
writing: debug/nodes/1/ranges/36.json
writing: debug/nodes/2/status.json
using SQL connection URL for node 2: postgresql://...
retrieving SQL data for crdb_internal.feature_usage... writing: debug/nodes/2/crdb_internal.feature_usage.txt
Expand Down Expand Up @@ -194,7 +195,7 @@ requesting goroutine files for node 3... writing: debug/nodes/3/goroutines.err.t
^- resulted in ...
requesting log file ...
requesting log file ...
requesting ranges... 35 found
requesting ranges... 36 found
writing: debug/nodes/3/ranges/1.json
writing: debug/nodes/3/ranges/2.json
writing: debug/nodes/3/ranges/3.json
Expand Down Expand Up @@ -230,6 +231,7 @@ writing: debug/nodes/3/ranges/32.json
writing: debug/nodes/3/ranges/33.json
writing: debug/nodes/3/ranges/34.json
writing: debug/nodes/3/ranges/35.json
writing: debug/nodes/3/ranges/36.json
doctor examining cluster...No problems found!
writing: debug/reports/doctor.txt
requesting list of SQL databases... 3 found
Expand All @@ -238,7 +240,7 @@ requesting database details for defaultdb... writing: debug/schema/defaultdb@det
requesting database details for postgres... writing: debug/schema/[email protected]
0 tables found
requesting database details for system... writing: debug/schema/[email protected]
29 tables found
30 tables found
requesting table details for system.public.namespace... writing: debug/schema/system/public_namespace.json
requesting table details for system.public.descriptor... writing: debug/schema/system/public_descriptor.json
requesting table details for system.public.users... writing: debug/schema/system/public_users.json
Expand Down Expand Up @@ -268,5 +270,6 @@ requesting table details for system.public.statement_diagnostics_requests... wri
requesting table details for system.public.statement_diagnostics... writing: debug/schema/system/public_statement_diagnostics.json
requesting table details for system.public.scheduled_jobs... writing: debug/schema/system/public_scheduled_jobs.json
requesting table details for system.public.sqlliveness... writing: debug/schema/system/public_sqlliveness.json
requesting table details for system.public.migrations... writing: debug/schema/system/public_migrations.json
writing: debug/pprof-summary.sh
writing: debug/hot-ranges.sh
9 changes: 6 additions & 3 deletions pkg/cli/testdata/zip/partial1_excluded
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ requesting goroutine files for node 1... writing: debug/nodes/1/goroutines.err.t
^- resulted in ...
requesting log file ...
requesting log file ...
requesting ranges... 35 found
requesting ranges... 36 found
writing: debug/nodes/1/ranges/1.json
writing: debug/nodes/1/ranges/2.json
writing: debug/nodes/1/ranges/3.json
Expand Down Expand Up @@ -94,6 +94,7 @@ writing: debug/nodes/1/ranges/32.json
writing: debug/nodes/1/ranges/33.json
writing: debug/nodes/1/ranges/34.json
writing: debug/nodes/1/ranges/35.json
writing: debug/nodes/1/ranges/36.json
writing: debug/nodes/2.skipped
writing: debug/nodes/3/status.json
using SQL connection URL for node 3: postgresql://...
Expand Down Expand Up @@ -125,7 +126,7 @@ requesting goroutine files for node 3... writing: debug/nodes/3/goroutines.err.t
^- resulted in ...
requesting log file ...
requesting log file ...
requesting ranges... 35 found
requesting ranges... 36 found
writing: debug/nodes/3/ranges/1.json
writing: debug/nodes/3/ranges/2.json
writing: debug/nodes/3/ranges/3.json
Expand Down Expand Up @@ -161,6 +162,7 @@ writing: debug/nodes/3/ranges/32.json
writing: debug/nodes/3/ranges/33.json
writing: debug/nodes/3/ranges/34.json
writing: debug/nodes/3/ranges/35.json
writing: debug/nodes/3/ranges/36.json
doctor examining cluster...No problems found!
writing: debug/reports/doctor.txt
requesting list of SQL databases... 3 found
Expand All @@ -169,7 +171,7 @@ requesting database details for defaultdb... writing: debug/schema/defaultdb@det
requesting database details for postgres... writing: debug/schema/[email protected]
0 tables found
requesting database details for system... writing: debug/schema/[email protected]
29 tables found
30 tables found
requesting table details for system.public.namespace... writing: debug/schema/system/public_namespace.json
requesting table details for system.public.descriptor... writing: debug/schema/system/public_descriptor.json
requesting table details for system.public.users... writing: debug/schema/system/public_users.json
Expand Down Expand Up @@ -199,5 +201,6 @@ requesting table details for system.public.statement_diagnostics_requests... wri
requesting table details for system.public.statement_diagnostics... writing: debug/schema/system/public_statement_diagnostics.json
requesting table details for system.public.scheduled_jobs... writing: debug/schema/system/public_scheduled_jobs.json
requesting table details for system.public.sqlliveness... writing: debug/schema/system/public_sqlliveness.json
requesting table details for system.public.migrations... writing: debug/schema/system/public_migrations.json
writing: debug/pprof-summary.sh
writing: debug/hot-ranges.sh
9 changes: 6 additions & 3 deletions pkg/cli/testdata/zip/partial2
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ requesting goroutine files for node 1... writing: debug/nodes/1/goroutines.err.t
^- resulted in ...
requesting log file ...
requesting log file ...
requesting ranges... 35 found
requesting ranges... 36 found
writing: debug/nodes/1/ranges/1.json
writing: debug/nodes/1/ranges/2.json
writing: debug/nodes/1/ranges/3.json
Expand Down Expand Up @@ -94,6 +94,7 @@ writing: debug/nodes/1/ranges/32.json
writing: debug/nodes/1/ranges/33.json
writing: debug/nodes/1/ranges/34.json
writing: debug/nodes/1/ranges/35.json
writing: debug/nodes/1/ranges/36.json
writing: debug/nodes/3/status.json
using SQL connection URL for node 3: postgresql://...
retrieving SQL data for crdb_internal.feature_usage... writing: debug/nodes/3/crdb_internal.feature_usage.txt
Expand Down Expand Up @@ -124,7 +125,7 @@ requesting goroutine files for node 3... writing: debug/nodes/3/goroutines.err.t
^- resulted in ...
requesting log file ...
requesting log file ...
requesting ranges... 35 found
requesting ranges... 36 found
writing: debug/nodes/3/ranges/1.json
writing: debug/nodes/3/ranges/2.json
writing: debug/nodes/3/ranges/3.json
Expand Down Expand Up @@ -160,6 +161,7 @@ writing: debug/nodes/3/ranges/32.json
writing: debug/nodes/3/ranges/33.json
writing: debug/nodes/3/ranges/34.json
writing: debug/nodes/3/ranges/35.json
writing: debug/nodes/3/ranges/36.json
doctor examining cluster...No problems found!
writing: debug/reports/doctor.txt
requesting list of SQL databases... 3 found
Expand All @@ -168,7 +170,7 @@ requesting database details for defaultdb... writing: debug/schema/defaultdb@det
requesting database details for postgres... writing: debug/schema/[email protected]
0 tables found
requesting database details for system... writing: debug/schema/[email protected]
29 tables found
30 tables found
requesting table details for system.public.namespace... writing: debug/schema/system/public_namespace.json
requesting table details for system.public.descriptor... writing: debug/schema/system/public_descriptor.json
requesting table details for system.public.users... writing: debug/schema/system/public_users.json
Expand Down Expand Up @@ -198,5 +200,6 @@ requesting table details for system.public.statement_diagnostics_requests... wri
requesting table details for system.public.statement_diagnostics... writing: debug/schema/system/public_statement_diagnostics.json
requesting table details for system.public.scheduled_jobs... writing: debug/schema/system/public_scheduled_jobs.json
requesting table details for system.public.sqlliveness... writing: debug/schema/system/public_sqlliveness.json
requesting table details for system.public.migrations... writing: debug/schema/system/public_migrations.json
writing: debug/pprof-summary.sh
writing: debug/hot-ranges.sh
3 changes: 2 additions & 1 deletion pkg/cli/testdata/zip/specialnames
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ requesting table details for defaultdb.public."../system"... writing: debug/sche
requesting database details for postgres... writing: debug/schema/[email protected]
0 tables found
requesting database details for system... writing: debug/schema/[email protected]
29 tables found
30 tables found
requesting table details for system.public.namespace... writing: debug/schema/system-1/public_namespace.json
requesting table details for system.public.descriptor... writing: debug/schema/system-1/public_descriptor.json
requesting table details for system.public.users... writing: debug/schema/system-1/public_users.json
Expand Down Expand Up @@ -52,3 +52,4 @@ requesting table details for system.public.statement_diagnostics_requests... wri
requesting table details for system.public.statement_diagnostics... writing: debug/schema/system-1/public_statement_diagnostics.json
requesting table details for system.public.scheduled_jobs... writing: debug/schema/system-1/public_scheduled_jobs.json
requesting table details for system.public.sqlliveness... writing: debug/schema/system-1/public_sqlliveness.json
requesting table details for system.public.migrations... writing: debug/schema/system-1/public_migrations.json
6 changes: 4 additions & 2 deletions pkg/cli/testdata/zip/testzip
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ requesting heap profile for node 1... writing: debug/nodes/1/heap.pprof
requesting heap files for node 1... ? found
requesting goroutine files for node 1... 0 found
requesting log file ...
requesting ranges... 35 found
requesting ranges... 36 found
writing: debug/nodes/1/ranges/1.json
writing: debug/nodes/1/ranges/2.json
writing: debug/nodes/1/ranges/3.json
Expand Down Expand Up @@ -93,6 +93,7 @@ writing: debug/nodes/1/ranges/32.json
writing: debug/nodes/1/ranges/33.json
writing: debug/nodes/1/ranges/34.json
writing: debug/nodes/1/ranges/35.json
writing: debug/nodes/1/ranges/36.json
doctor examining cluster...No problems found!
writing: debug/reports/doctor.txt
requesting list of SQL databases... 3 found
Expand All @@ -101,7 +102,7 @@ requesting database details for defaultdb... writing: debug/schema/defaultdb@det
requesting database details for postgres... writing: debug/schema/[email protected]
0 tables found
requesting database details for system... writing: debug/schema/[email protected]
29 tables found
30 tables found
requesting table details for system.public.namespace... writing: debug/schema/system/public_namespace.json
requesting table details for system.public.descriptor... writing: debug/schema/system/public_descriptor.json
requesting table details for system.public.users... writing: debug/schema/system/public_users.json
Expand Down Expand Up @@ -131,5 +132,6 @@ requesting table details for system.public.statement_diagnostics_requests... wri
requesting table details for system.public.statement_diagnostics... writing: debug/schema/system/public_statement_diagnostics.json
requesting table details for system.public.scheduled_jobs... writing: debug/schema/system/public_scheduled_jobs.json
requesting table details for system.public.sqlliveness... writing: debug/schema/system/public_sqlliveness.json
requesting table details for system.public.migrations... writing: debug/schema/system/public_migrations.json
writing: debug/pprof-summary.sh
writing: debug/hot-ranges.sh
36 changes: 32 additions & 4 deletions pkg/clusterversion/cockroach_versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,24 @@ const (
CPutInline
// ReplicaVersions enables the versioning of Replica state.
ReplicaVersions
// replacedTruncatedAndRangeAppliedStateMigration stands in for
// TruncatedAndRangeAppliedStateMigration which was re-introduced after the
// migration job was introduced. This is necessary because the jobs
// infrastructure used to run this migration in v21.1 and its later alphas
// was introduced after this version was first introduced. Later code in the
// release relies on the job to run the migration but the job relies on
// its startup migrations having been run. Versions associated with long
// running migrations must follow LongRunningMigrations.
replacedTruncatedAndRangeAppliedStateMigration
// replacedPostTruncatedAndRangeAppliedStateMigration is like the above
// version. See its comment.
replacedPostTruncatedAndRangeAppliedStateMigration
// NewSchemaChanger enables the new schema changer.
NewSchemaChanger
// LongRunningMigrations introduces the LongRunningMigrations table and jobs.
// All versions which have a registered long-running migration must have a
// version higher than this version.
LongRunningMigrations
// TruncatedAndRangeAppliedStateMigration is part of the migration to stop
// using the legacy truncated state within KV. After the migration, we'll be
// using the unreplicated truncated state and the RangeAppliedState on all
Expand All @@ -217,8 +235,6 @@ const (
// using the replicated legacy TruncatedState. It's also used in asserting
// that no replicated truncated state representation is found.
PostTruncatedAndRangeAppliedStateMigration
// NewSchemaChanger enables the new schema changer.
NewSchemaChanger

// Step (1): Add new versions here.
)
Expand Down Expand Up @@ -350,17 +366,29 @@ var versionsSingleton = keyedVersions([]keyedVersion{
Version: roachpb.Version{Major: 20, Minor: 2, Internal: 12},
},
{
Key: TruncatedAndRangeAppliedStateMigration,
Key: replacedTruncatedAndRangeAppliedStateMigration,
Version: roachpb.Version{Major: 20, Minor: 2, Internal: 14},
},
{
Key: PostTruncatedAndRangeAppliedStateMigration,
Key: replacedPostTruncatedAndRangeAppliedStateMigration,
Version: roachpb.Version{Major: 20, Minor: 2, Internal: 16},
},
{
Key: NewSchemaChanger,
Version: roachpb.Version{Major: 20, Minor: 2, Internal: 18},
},
{
Key: LongRunningMigrations,
Version: roachpb.Version{Major: 20, Minor: 2, Internal: 20},
},
{
Key: TruncatedAndRangeAppliedStateMigration,
Version: roachpb.Version{Major: 20, Minor: 2, Internal: 22},
},
{
Key: PostTruncatedAndRangeAppliedStateMigration,
Version: roachpb.Version{Major: 20, Minor: 2, Internal: 24},
},
// Step (2): Add new versions here.
})

Expand Down
11 changes: 7 additions & 4 deletions pkg/clusterversion/key_string.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 16 additions & 7 deletions pkg/jobs/adopt.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,22 @@ import (
"github.com/cockroachdb/errors"
)

const claimableStatusTupleString = `(` +
`'` + string(StatusRunning) + `', ` +
`'` + string(StatusPending) + `', ` +
`'` + string(StatusCancelRequested) + `', ` +
`'` + string(StatusPauseRequested) + `', ` +
`'` + string(StatusReverting) + `'` +
`)`
const (
claimableStatusList = `'` + string(StatusRunning) + `', ` +
`'` + string(StatusPending) + `', ` +
`'` + string(StatusCancelRequested) + `', ` +
`'` + string(StatusPauseRequested) + `', ` +
`'` + string(StatusReverting) + `'`

claimableStatusTupleString = `(` + claimableStatusList + `)`

nonTerminalStatusList = claimableStatusList + `, ` +
`'` + string(StatusPaused) + `'`

// NonTerminalStatusTupleString is a sql tuple corresponding to statuses of
// non-terminal jobs.
NonTerminalStatusTupleString = `(` + nonTerminalStatusList + `)`
)

// claimJobs places a claim with the given SessionID to job rows that are
// available.
Expand Down
2 changes: 2 additions & 0 deletions pkg/jobs/jobspb/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ proto_library(
strip_import_prefix = "/pkg",
visibility = ["//visibility:public"],
deps = [
"//pkg/clusterversion:clusterversion_proto",
"//pkg/roachpb:roachpb_proto",
"//pkg/sql/catalog/descpb:descpb_proto",
"//pkg/sql/schemachanger/scpb:scpb_proto",
Expand All @@ -42,6 +43,7 @@ go_proto_library(
visibility = ["//visibility:public"],
deps = [
"//pkg/ccl/streamingccl", # keep
"//pkg/clusterversion",
"//pkg/roachpb",
"//pkg/security", # keep
"//pkg/sql/catalog/descpb",
Expand Down
Loading

0 comments on commit 3a3565a

Please sign in to comment.