-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
manager.go
312 lines (294 loc) · 11.9 KB
/
manager.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
// Package migrationmanager provides an implementation of migration.Manager
// for use on kv nodes.
package migrationmanager
import (
"context"
"fmt"
"strings"
"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/jobs"
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/migration"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/server/serverpb"
"github.com/cockroachdb/cockroach/pkg/sql/protoreflect"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sqlutil"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/logtags"
)
// Manager is the instance responsible for executing migrations across the
// cluster.
type Manager struct {
c migration.Cluster
ie sqlutil.InternalExecutor
jr *jobs.Registry
}
// NewManager constructs a new Manager.
//
// TODO(irfansharif): We'll need to eventually plumb in on a lease manager here.
func NewManager(c migration.Cluster, ie sqlutil.InternalExecutor, jr *jobs.Registry) *Manager {
return &Manager{
c: c,
ie: ie,
jr: jr,
}
}
// Migrate runs the set of migrations required to upgrade the cluster version
// from the current version to the target one.
func (m *Manager) Migrate(
ctx context.Context, user security.SQLUsername, from, to clusterversion.ClusterVersion,
) error {
// TODO(irfansharif): Should we inject every ctx here with specific labels
// for each migration, so they log distinctly?
ctx = logtags.AddTag(ctx, "migration-mgr", nil)
if from == to {
// Nothing to do here.
log.Infof(ctx, "no need to migrate, cluster already at newest version")
return nil
}
// TODO(irfansharif): We'll need to acquire a lease here and refresh it
// throughout during the migration to ensure mutual exclusion.
// TODO(irfansharif): We'll need to create a system table to store
// in-progress state of long running migrations, for introspection.
clusterVersions := clusterversion.ListBetween(from, to)
if len(clusterVersions) == 0 {
// We're attempt to migrate to something that's not defined in cluster
// versions. This only happens in tests, when we're exercising version
// upgrades over non-existent versions (like in the cluster_version
// logictest). These tests explicitly override the
// binary{,MinSupportedVersion} in order to work. End-user attempts to
// do something similar would be caught at the sql layer (also tested in
// the same logictest). We'll just explicitly append the target version
// here instead, so that we're able to actually migrate into it.
clusterVersions = append(clusterVersions, to)
}
log.Infof(ctx, "migrating cluster from %s to %s (stepping through %s)", from, to, clusterVersions)
for _, clusterVersion := range clusterVersions {
log.Infof(ctx, "stepping through %s", from, to, clusterVersions)
// First, run the actual migration if any.
if err := m.runMigration(ctx, user, clusterVersion); err != nil {
return err
}
// Next we'll push out the version gate to every node in the cluster.
// Each node will persist the version, bump the local version gates, and
// then return. The migration associated with the specific version is
// executed before every node in the cluster has the corresponding
// version activated. Migrations that depend on a certain version
// already being activated will need to registered using a cluster
// version greater than it.
//
// For each intermediate version, we'll need to first bump the fence
// version before bumping the "real" one. Doing so allows us to provide
// the invariant that whenever a cluster version is active, all Nodes in
// the cluster (including ones added concurrently during version
// upgrades) are running binaries that know about the version.
// Below-raft migrations mutate replica state, making use of the
// Migrate(version=V) primitive which they issue against the entire
// keyspace. These migrations typically want to rely on the invariant
// that there are no extant replicas in the system that haven't seen the
// specific Migrate command.
//
// This is partly achieved through the implementation of the Migrate
// command itself, which waits until it's applied on all followers[2]
// before returning. This also addresses the concern of extant snapshots
// with pre-migrated state possibly instantiating older version
// replicas. The intended learner replicas are listed as part of the
// range descriptor, and is also waited on for during command
// application. As for stale snapshots, if they specify a replicaID
// that's no longer part of the raft group, they're discarded by the
// recipient. Snapshots are also discarded unless they move the LAI
// forward.
//
// That still leaves rooms for replicas in the replica GC queue to evade
// detection. To address this, below-raft migrations typically take a
// two-phrase approach (the TruncatedAndRangeAppliedStateMigration being
// one example of this), where after having migrated the entire keyspace
// to version V, and after having prevented subsequent snapshots
// originating from replicas with versions < V, the migration sets out
// to purge outdated replicas in the system[3]. Specifically it
// processes all replicas in the GC queue with a version < V (which are
// not accessible during the application of the Migrate command).
//
// [1]: See ReplicaState.Version.
// [2]: See Replica.executeWriteBatch, specifically how proposals with the
// Migrate request are handled downstream of raft.
// [3]: See PurgeOutdatedReplicas from the Migration service.
{
// The migrations infrastructure makes use of internal fence
// versions when stepping through consecutive versions. It's
// instructive to walk through how we expect a version migration
// from v21.1 to v21.2 to take place, and how we behave in the
// presence of new v21.1 or v21.2 Nodes being added to the cluster.
//
// - All Nodes are running v21.1
// - All Nodes are rolled into v21.2 binaries, but with active
// cluster version still as v21.1
// - The first version bump will be into v21.2-1(fence), see the
// migration manager above for where that happens
//
// Then concurrently:
//
// - A new node is added to the cluster, but running binary v21.1
// - We try bumping the cluster gates to v21.2-1(fence)
//
// If the v21.1 Nodes manages to sneak in before the version bump,
// it's fine as the version bump is a no-op one (all fence versions
// are). Any subsequent bumps (including the "actual" one bumping to
// v21.2) will fail during the validation step where we'll first
// check to see that all Nodes are running v21.2 binaries.
//
// If the v21.1 node is only added after v21.2-1(fence) is active,
// it won't be able to actually join the cluster (it'll be prevented
// by the join RPC).
//
// All of which is to say that once we've seen the node list
// stabilize (as UntilClusterStable enforces), any new nodes that
// can join the cluster will run a release that support the fence
// version, and by design also supports the actual version (which is
// the direct successor of the fence).
fenceVersion := migration.FenceVersionFor(ctx, clusterVersion)
req := &serverpb.BumpClusterVersionRequest{ClusterVersion: &fenceVersion}
op := fmt.Sprintf("bump-cluster-version=%s", req.ClusterVersion.PrettyPrint())
if err := m.c.UntilClusterStable(ctx, func() error {
return m.c.ForEveryNode(ctx, op, func(ctx context.Context, client serverpb.MigrationClient) error {
_, err := client.BumpClusterVersion(ctx, req)
return err
})
}); err != nil {
return err
}
}
{
// Now sanity check that we'll actually be able to perform the real
// cluster version bump, cluster-wide.
req := &serverpb.ValidateTargetClusterVersionRequest{ClusterVersion: &clusterVersion}
op := fmt.Sprintf("validate-cluster-version=%s", req.ClusterVersion.PrettyPrint())
if err := m.c.UntilClusterStable(ctx, func() error {
return m.c.ForEveryNode(ctx, op, func(ctx context.Context, client serverpb.MigrationClient) error {
_, err := client.ValidateTargetClusterVersion(ctx, req)
return err
})
}); err != nil {
return err
}
}
{
// Finally, bump the real version cluster-wide.
req := &serverpb.BumpClusterVersionRequest{ClusterVersion: &clusterVersion}
op := fmt.Sprintf("bump-cluster-version=%s", req.ClusterVersion.PrettyPrint())
if err := m.c.UntilClusterStable(ctx, func() error {
return m.c.ForEveryNode(ctx, op, func(ctx context.Context, client serverpb.MigrationClient) error {
_, err := client.BumpClusterVersion(ctx, req)
return err
})
}); err != nil {
return err
}
}
}
return nil
}
func (m *Manager) runMigration(
ctx context.Context, user security.SQLUsername, version clusterversion.ClusterVersion,
) error {
if _, exists := migration.GetMigration(version); !exists {
return nil
}
id, err := m.getOrCreateMigrationJob(ctx, user, version)
if err != nil {
return err
}
return m.jr.Run(ctx, m.ie, []int64{id})
}
func (m *Manager) getOrCreateMigrationJob(
ctx context.Context, user security.SQLUsername, version clusterversion.ClusterVersion,
) (jobID int64, _ error) {
if err := m.c.DB().Txn(ctx, func(ctx context.Context, txn *kv.Txn) (err error) {
var found bool
found, jobID, err = m.getRunningMigrationJob(ctx, txn, version)
if err != nil {
return err
}
if found {
return nil
}
var j *jobs.Job
j, err = m.jr.CreateJobWithTxn(ctx, jobs.Record{
Description: "Long running migration",
Details: jobspb.LongRunningMigrationDetails{
ClusterVersion: &version,
},
Username: user,
Progress: jobspb.LongRunningMigrationProgress{},
NonCancelable: true,
}, txn)
if err != nil {
return err
}
jobID = *j.ID()
return nil
}); err != nil {
return 0, err
}
return jobID, nil
}
func (m *Manager) getRunningMigrationJob(
ctx context.Context, txn *kv.Txn, version clusterversion.ClusterVersion,
) (found bool, jobID int64, _ error) {
const query = `
SELECT id, status
FROM (
SELECT id,
status,
crdb_internal.pb_to_json(
'cockroach.sql.jobs.jobspb.Payload',
payload
) AS pl
FROM system.jobs
WHERE status IN ` + jobs.NonTerminalStatusTupleString + `
)
WHERE pl->'longRunningMigration'->'clusterVersion' = $1::JSON;`
// TODO(ajwerner): Flip the emitDefaults flag once this is rebased on master.
jsonMsg, err := protoreflect.MessageToJSON(&version, true /* emitDefaults */)
if err != nil {
return false, 0, errors.Wrap(err, "failed to marshal version to JSON")
}
rows, err := m.ie.Query(ctx, "migration-manager-find-jobs", txn, query, jsonMsg.String())
if err != nil {
return false, 0, err
}
parseRow := func(row tree.Datums) (id int64, status jobs.Status) {
return int64(*row[0].(*tree.DInt)), jobs.Status(*row[1].(*tree.DString))
}
switch len(rows) {
case 0:
return false, 0, nil
case 1:
id, status := parseRow(rows[0])
log.Infof(ctx, "found existing migration job %d for version %v in status %s, waiting",
id, &version, status)
return true, id, nil
default:
format := "found multiple non-terminal jobs for version %v: [" +
strings.Repeat("(%d, %s), ", len(rows)-1) + "(%d, %s)]"
args := make([]interface{}, 1+len(rows)*2)
args[0] = &version
for i := range rows {
args[2*i+1], args[2*i+2] = parseRow(rows[i])
}
log.Errorf(ctx, format, args...)
return false, 0, errors.AssertionFailedf(format, args...)
}
}