-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
importer: add ImportStartWallTime and ImportType table descriptor fields
This patch introduces the new ImportStartWallTime table descriptor field which provides the time at which an in-progress import began writing data to disk. This field is nonzero if the table is offline during an import. The patch also adds the ImportType field which indicates the kind of import getting run on the table. In future PRs, this field will be used to: - incrementally back up in progress imports, preventing a large BACKUP workload after an Import finishes. - elide importing keys in RESTORE, ensuring a table -- with an in-progress import in the back up-- contains no in-progress importing keys and is available on the restored cluster. Informs #85138 Release note: none
- Loading branch information
Showing
12 changed files
with
295 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
157 changes: 157 additions & 0 deletions
157
pkg/ccl/backupccl/testdata/backup-restore/import-start-time
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
# Test that Import properly sets and removes the ImportStartTime from the table descriptor. | ||
# The basic idea: | ||
# For table with or without data: | ||
## -start and pause an import | ||
## -check that the ImportStartTime is set on the descriptor | ||
## -check that it's removed after cancellation / success | ||
|
||
new-server name=s1 | ||
---- | ||
|
||
|
||
exec-sql | ||
CREATE DATABASE d; | ||
USE d; | ||
CREATE TABLE foo (i INT PRIMARY KEY, s STRING); | ||
CREATE TABLE baz (i INT PRIMARY KEY, s STRING); | ||
INSERT INTO baz VALUES (1, 'x'),(2,'y'),(3,'z'); | ||
---- | ||
|
||
|
||
# Define two queries which grab the ImportStartTime and ImportType from the foo's table descriptor | ||
exec-sql | ||
CREATE VIEW import_time (importStartTime) | ||
AS WITH tbls AS ( | ||
SELECT id, crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', descriptor) AS orig FROM system.descriptor | ||
) | ||
SELECT orig->'table'->'importStartWallTime' FROM tbls | ||
INNER JOIN (SELECT id FROM system.namespace WHERE name='foo') AS sys | ||
ON sys.id = tbls.id; | ||
---- | ||
|
||
|
||
exec-sql | ||
CREATE VIEW import_type (importType) | ||
AS WITH tbls AS ( | ||
SELECT id, crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', descriptor) AS orig FROM system.descriptor | ||
) | ||
SELECT orig->'table'->'importType' FROM tbls | ||
INNER JOIN (SELECT id FROM system.namespace WHERE name='foo') AS sys | ||
ON sys.id = tbls.id; | ||
---- | ||
|
||
|
||
exec-sql | ||
EXPORT INTO CSV 'nodelocal://0/export1/' FROM SELECT * FROM baz WHERE i = 1; | ||
---- | ||
|
||
|
||
exec-sql | ||
SET CLUSTER SETTING jobs.debug.pausepoints = 'import.after_ingest'; | ||
---- | ||
|
||
|
||
import expect-pausepoint tag=a | ||
IMPORT INTO foo (i,s) CSV DATA ('nodelocal://0/export1/export*-n*.0.csv') | ||
---- | ||
job paused at pausepoint | ||
|
||
|
||
query-sql regex=^"\d. | ||
SELECT * FROM import_time | ||
---- | ||
true | ||
|
||
|
||
query-sql | ||
SELECT * FROM import_type | ||
---- | ||
"IMPORT_EXISTS_EMPTY" | ||
|
||
|
||
# attempting another import on the table should fail, as there's already an in-progress import | ||
# on the table. | ||
exec-sql | ||
IMPORT INTO foo (i,s) CSV DATA ('nodelocal://0/export1/export*-n*.0.csv') | ||
---- | ||
pq: relation "foo" is offline: importing | ||
|
||
|
||
# Cancel the job so that the cleanup hook runs, and ensure the importStartTime is 0. | ||
job cancel=a | ||
---- | ||
|
||
|
||
query-sql | ||
SELECT * FROM import_time | ||
---- | ||
<nil> | ||
|
||
|
||
query-sql | ||
SELECT * FROM import_type | ||
---- | ||
<nil> | ||
|
||
|
||
# remove the pause setting, and try the import again and ensure it succeeds. | ||
exec-sql | ||
SET CLUSTER SETTING jobs.debug.pausepoints = ''; | ||
---- | ||
|
||
|
||
exec-sql | ||
IMPORT INTO foo (i,s) CSV DATA ('nodelocal://0/export1/export*-n*.0.csv') | ||
---- | ||
|
||
|
||
query-sql | ||
SELECT * FROM import_time | ||
---- | ||
<nil> | ||
|
||
|
||
query-sql | ||
SELECT * FROM import_type | ||
---- | ||
<nil> | ||
|
||
|
||
# ensure importing into an existing table also modifies the descriptor properly | ||
exec-sql | ||
EXPORT INTO CSV 'nodelocal://0/export2/' FROM SELECT * FROM baz WHERE i = 2; | ||
---- | ||
|
||
|
||
exec-sql | ||
SET CLUSTER SETTING jobs.debug.pausepoints = 'import.after_ingest'; | ||
---- | ||
|
||
|
||
import expect-pausepoint tag=b | ||
IMPORT INTO foo (i,s) CSV DATA ('nodelocal://0/export2/export*-n*.0.csv') | ||
---- | ||
job paused at pausepoint | ||
|
||
|
||
query-sql regex=^"\d. | ||
SELECT * FROM import_time | ||
---- | ||
true | ||
|
||
|
||
query-sql | ||
SELECT * FROM import_type | ||
---- | ||
"IMPORT_EXISTS_NON_EMPTY" | ||
|
||
|
||
# Cancel the job so that the cleanup hook runs. | ||
job cancel=b | ||
---- | ||
|
||
|
||
query-sql | ||
SELECT * FROM import_time | ||
---- | ||
<nil> |
Oops, something went wrong.