-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
in-progress-import-rollback
409 lines (315 loc) · 9.23 KB
/
in-progress-import-rollback
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
# test that we properly fully backup an offline span when it can be non-mvcc
#
# TODO(msbutler): waiting for https://github.com/cockroachdb/cockroach/pull/86689 to land
# Part 1 - ensure clear range induces full reintroduction of spans
# - begin import jobs and pause it
# - run inc backup - verify inc has captured the data
# - roll it back it back non-mvcc
# - run an inc backup and ensure we reintroduce the table spans
# disabled to run within tenant as they don't have access to the
# storage.mvcc.range_tombstones.enabled cluster setting
new-cluster name=s1 beforeVersion=23_1_MVCCTombstones disable-tenant
----
###########
# Case 1: an incremental backup captures a non-mvcc rollback
###########
exec-sql
CREATE DATABASE d;
USE d;
CREATE TABLE foo (i INT PRIMARY KEY, s STRING);
CREATE INDEX foo_idx ON foo (s);
CREATE INDEX foo_to_drop_idx ON foo (s);
CREATE TABLE foofoo (i INT PRIMARY KEY, s STRING);
INSERT INTO foofoo VALUES (10, 'x0');
CREATE TABLE baz (i INT PRIMARY KEY, s STRING);
INSERT INTO baz VALUES (1, 'x'),(2,'y'),(3,'z');
----
exec-sql
SET CLUSTER SETTING jobs.debug.pausepoints = 'import.after_ingest';
----
exec-sql
SET CLUSTER SETTING kv.bulkio.write_metadata_sst.enabled = false;
----
exec-sql
SET CLUSTER SETTING storage.mvcc.range_tombstones.enabled = false;
----
exec-sql
EXPORT INTO CSV 'nodelocal://1/export1/' FROM SELECT * FROM baz;
----
# Pause the import job, in order to back up the importing data.
import expect-pausepoint tag=a
IMPORT INTO foo (i,s) CSV DATA ('nodelocal://1/export1/export*-n*.0.csv')
----
job paused at pausepoint
import expect-pausepoint tag=aa
IMPORT INTO foofoo (i,s) CSV DATA ('nodelocal://1/export1/export*-n*.0.csv')
----
job paused at pausepoint
# Ensure table, database, and cluster full backups capture importing rows.
exec-sql
BACKUP INTO 'nodelocal://1/cluster/' with revision_history;
----
exec-sql
BACKUP DATABASE d INTO 'nodelocal://1/database/' with revision_history;
----
exec-sql
BACKUP TABLE d.* INTO 'nodelocal://1/table/' with revision_history;
----
exec-sql
SET CLUSTER SETTING jobs.debug.pausepoints = '';
----
# Resume the job so the next set of incremental backups observes that tables are back online
job cancel=a
----
job cancel=aa
----
job tag=a wait-for-state=cancelled
----
job tag=aa wait-for-state=cancelled
----
# Verify proper rollback
query-sql
SELECT count(*) FROM d.foo;
----
0
query-sql
SELECT count(*) FROM d.foofoo;
----
1
# Start and pause another import to ensure the restore checker doesn't spuriously fail on
# descriptors with an in-progress import.
exec-sql
SET CLUSTER SETTING jobs.debug.pausepoints = 'import.after_ingest';
----
exec-sql
CREATE TABLE foo_offline (i INT PRIMARY KEY, s STRING);
----
import expect-pausepoint tag=never_unpause
IMPORT INTO foo_offline (i,s) CSV DATA ('nodelocal://1/export1/export*-n*.0.csv')
----
job paused at pausepoint
# Even though the full table will get backed up from ts=0 during the next round of incremental
# backups, only active indexes (foo_idx and foo_new_idx) should appear in the restored cluster.
exec-sql
DROP INDEX foo@foo_to_drop_idx;
----
NOTICE: the data for dropped indexes is reclaimed asynchronously
HINT: The reclamation delay can be customized in the zone configuration for the table.
exec-sql
CREATE INDEX foo_new_idx ON foo (s);
----
# Ensure incremental backups backup the newly online spans from ts=0, as the
# import was rolled back via non-mvcc clear range. So, backup 0 rows from foo
# (it was empty pre-import), and 1 row from foo (had 1 row pre-import);
exec-sql
BACKUP INTO LATEST IN 'nodelocal://1/cluster/' with revision_history;
----
exec-sql
BACKUP DATABASE d INTO LATEST IN 'nodelocal://1/database/' with revision_history;
----
exec-sql
BACKUP TABLE d.* INTO LATEST IN 'nodelocal://1/table/' with revision_history;
----
query-sql
SELECT
database_name, object_name, object_type, rows, backup_type
FROM
[SHOW BACKUP FROM LATEST IN 'nodelocal://1/cluster/']
WHERE
object_name = 'foo' or object_name = 'foofoo'
ORDER BY
start_time, database_name;
----
d foo table 3 full
d foofoo table 4 full
d foo table 0 incremental
d foofoo table 1 incremental
query-sql
SELECT
database_name, object_name, object_type, rows, backup_type
FROM
[SHOW BACKUP FROM LATEST IN 'nodelocal://1/database/']
WHERE
object_name = 'foo' or object_name = 'foofoo'
ORDER BY
start_time, database_name;
----
d foo table 3 full
d foofoo table 4 full
d foo table 0 incremental
d foofoo table 1 incremental
query-sql
SELECT
database_name, object_name, object_type, rows, backup_type
FROM
[SHOW BACKUP FROM LATEST IN 'nodelocal://1/table/']
WHERE
object_name = 'foo' or object_name = 'foofoo'
ORDER BY
start_time, database_name;
----
d foo table 3 full
d foofoo table 4 full
d foo table 0 incremental
d foofoo table 1 incremental
# To verify the incremental backed up the pre-import state table, restore d and ensure all tables
# are in their pre-import state.
exec-sql
RESTORE DATABASE d FROM LATEST IN 'nodelocal://1/database/' with new_db_name=d2;
----
query-sql
SELECT count(*) FROM d2.foo;
----
0
query-sql
SELECT count(*) FROM d2.foofoo;
----
1
query-sql
select DISTINCT index_name FROM [SHOW INDEXES FROM d.foo];
----
foo_pkey
foo_idx
foo_new_idx
###########
# Case 2: an incremental backup captures an mvcc rollback
###########
exec-sql
DROP DATABASE d2;
CREATE TABLE foo2 (i INT PRIMARY KEY, s STRING);
CREATE INDEX foo2_idx ON foo2 (s);
CREATE INDEX foo2_to_drop_idx ON foo2 (s);
CREATE TABLE foofoo2 (i INT PRIMARY KEY, s STRING);
INSERT INTO foofoo2 VALUES (10, 'x0');
----
exec-sql
SET CLUSTER SETTING jobs.debug.pausepoints = 'import.after_ingest';
----
exec-sql
SET CLUSTER SETTING storage.mvcc.range_tombstones.enabled = true;
----
exec-sql
SET CLUSTER SETTING kv.bulkio.write_metadata_sst.enabled = false;
----
# Pause the import job, in order to back up the importing data.
import expect-pausepoint tag=b
IMPORT INTO foo2 (i,s) CSV DATA ('nodelocal://1/export1/export*-n*.0.csv')
----
job paused at pausepoint
import expect-pausepoint tag=bb
IMPORT INTO foofoo2 (i,s) CSV DATA ('nodelocal://1/export1/export*-n*.0.csv')
----
job paused at pausepoint
# Ensure table, database, and cluster full backups capture importing rows.
exec-sql
BACKUP INTO 'nodelocal://1/cluster/';
----
exec-sql
BACKUP DATABASE d INTO 'nodelocal://1/database/';
----
exec-sql
BACKUP TABLE d.* INTO 'nodelocal://1/table/';
----
exec-sql
SET CLUSTER SETTING jobs.debug.pausepoints = '';
----
# Resume the job so the next set of incremental backups observes that tables are back online
job cancel=b
----
job cancel=bb
----
job tag=b wait-for-state=cancelled
----
job tag=bb wait-for-state=cancelled
----
# Even though the full table will get backed up from ts=0 during the next round of incremental
# backups, only active indexes (foo2_idx and foo2_new_idx) should appear in the restored cluster.
exec-sql
DROP INDEX foo2@foo2_to_drop_idx;
----
NOTICE: the data for dropped indexes is reclaimed asynchronously
HINT: The reclamation delay can be customized in the zone configuration for the table.
exec-sql
CREATE INDEX foo2_new_idx ON foo2 (s);
----
# These incremental backups will back up all mvcc history from foo2 and foofoo2 because the
# tables returned online. For each table, this means:
# - foofoo2 will have 7 rows:
# - 1 row from before the import
# - 3 rows from the import
# - 3 delete tombstones from the import rollback
# - foo2: will have 3 rows:
# - 3 rows from the import
# - Note because foo2 had no data pre import, an mvcc range tombstone will delete the imported data.
# The incremental backup will capture this range tombstone, however, SHOW BACKUP currently will
# not record this range as a "row" in the backup.
exec-sql
BACKUP INTO LATEST IN 'nodelocal://1/cluster/';
----
exec-sql
BACKUP DATABASE d INTO LATEST IN 'nodelocal://1/database/';
----
exec-sql
BACKUP TABLE d.* INTO LATEST IN 'nodelocal://1/table/';
----
query-sql
SELECT
database_name, object_name, object_type, rows, backup_type
FROM
[SHOW BACKUP FROM LATEST IN 'nodelocal://1/cluster/']
WHERE
object_name = 'foo2' or object_name = 'foofoo2'
ORDER BY
start_time, database_name;
----
d foo2 table 3 full
d foofoo2 table 4 full
d foo2 table 3 incremental
d foofoo2 table 7 incremental
query-sql
SELECT
database_name, object_name, object_type, rows, backup_type
FROM
[SHOW BACKUP FROM LATEST IN 'nodelocal://1/database/']
WHERE
object_name = 'foo2' or object_name = 'foofoo2'
ORDER BY
start_time, database_name;
----
d foo2 table 3 full
d foofoo2 table 4 full
d foo2 table 3 incremental
d foofoo2 table 7 incremental
query-sql
SELECT
database_name, object_name, object_type, rows, backup_type
FROM
[SHOW BACKUP FROM LATEST IN 'nodelocal://1/table/']
WHERE
object_name = 'foo2' or object_name = 'foofoo2'
ORDER BY
start_time, database_name;
----
d foo2 table 3 full
d foofoo2 table 4 full
d foo2 table 3 incremental
d foofoo2 table 7 incremental
# To verify the incremental backup captured the tombstones, restore d and ensure all tables
# are in their pre-import state.
exec-sql
RESTORE DATABASE d FROM LATEST IN 'nodelocal://1/database/' with new_db_name=d2;
----
query-sql
SELECT count(*) FROM d2.foo2;
----
0
query-sql
SELECT count(*) FROM d2.foofoo2;
----
1
query-sql
select DISTINCT index_name FROM [SHOW INDEXES FROM d2.foo2];
----
foo2_pkey
foo2_idx
foo2_new_idx