Skip to content

Commit

Permalink
importer: add ImportStartWallTime and ImportType table descriptor fields
Browse files Browse the repository at this point in the history
This patch introduces the new ImportStartWallTime table descriptor field which
provides the time at which an in-progress import began writing data to disk.
This field is nonzero if the table is offline during an import. The patch also
adds the ImportType field which indicates the kind of import getting run on the
table.

In future PRs, this field will be used to:
- incrementally back up in progress imports, preventing a large BACKUP workload
 after an Import finishes.
- elide importing keys in RESTORE, ensuring a table -- with an in-progress import
  in the back up-- contains no in-progress importing keys and is available on
  the restored cluster.

Informs #85138

Release note: none
  • Loading branch information
msbutler committed Aug 11, 2022
1 parent 2fda23b commit 740d8ef
Show file tree
Hide file tree
Showing 12 changed files with 295 additions and 22 deletions.
2 changes: 1 addition & 1 deletion docs/generated/settings/settings-for-tenants.txt
Original file line number Diff line number Diff line change
Expand Up @@ -286,4 +286,4 @@ trace.jaeger.agent string the address of a Jaeger agent to receive traces using
trace.opentelemetry.collector string address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as <host>:<port>. If no port is specified, 4317 will be used.
trace.span_registry.enabled boolean true if set, ongoing traces can be seen at https://<ui>/#/debug/tracez
trace.zipkin.collector string the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.
version version 22.1-44 set the active cluster version in the format '<major>.<minor>'
version version 22.1-46 set the active cluster version in the format '<major>.<minor>'
2 changes: 1 addition & 1 deletion docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,6 @@
<tr><td><code>trace.opentelemetry.collector</code></td><td>string</td><td><code></code></td><td>address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as <host>:<port>. If no port is specified, 4317 will be used.</td></tr>
<tr><td><code>trace.span_registry.enabled</code></td><td>boolean</td><td><code>true</code></td><td>if set, ongoing traces can be seen at https://<ui>/#/debug/tracez</td></tr>
<tr><td><code>trace.zipkin.collector</code></td><td>string</td><td><code></code></td><td>the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>22.1-44</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>22.1-46</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
</tbody>
</table>
157 changes: 157 additions & 0 deletions pkg/ccl/backupccl/testdata/backup-restore/import-start-time
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# Test that Import properly sets and removes the ImportStartTime from the table descriptor.
# The basic idea:
# For table with or without data:
## -start and pause an import
## -check that the ImportStartTime is set on the descriptor
## -check that it's removed after cancellation / success

new-server name=s1
----


exec-sql
CREATE DATABASE d;
USE d;
CREATE TABLE foo (i INT PRIMARY KEY, s STRING);
CREATE TABLE baz (i INT PRIMARY KEY, s STRING);
INSERT INTO baz VALUES (1, 'x'),(2,'y'),(3,'z');
----


# Define two queries which grab the ImportStartTime and ImportType from the foo's table descriptor
exec-sql
CREATE VIEW import_time (importStartTime)
AS WITH tbls AS (
SELECT id, crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', descriptor) AS orig FROM system.descriptor
)
SELECT orig->'table'->'importStartWallTime' FROM tbls
INNER JOIN (SELECT id FROM system.namespace WHERE name='foo') AS sys
ON sys.id = tbls.id;
----


exec-sql
CREATE VIEW import_type (importType)
AS WITH tbls AS (
SELECT id, crdb_internal.pb_to_json('cockroach.sql.sqlbase.Descriptor', descriptor) AS orig FROM system.descriptor
)
SELECT orig->'table'->'importType' FROM tbls
INNER JOIN (SELECT id FROM system.namespace WHERE name='foo') AS sys
ON sys.id = tbls.id;
----


exec-sql
EXPORT INTO CSV 'nodelocal://0/export1/' FROM SELECT * FROM baz WHERE i = 1;
----


exec-sql
SET CLUSTER SETTING jobs.debug.pausepoints = 'import.after_ingest';
----


import expect-pausepoint tag=a
IMPORT INTO foo (i,s) CSV DATA ('nodelocal://0/export1/export*-n*.0.csv')
----
job paused at pausepoint


query-sql regex=^"\d.
SELECT * FROM import_time
----
true


query-sql
SELECT * FROM import_type
----
"IMPORT_EXISTS_EMPTY"


# attempting another import on the table should fail, as there's already an in-progress import
# on the table.
exec-sql
IMPORT INTO foo (i,s) CSV DATA ('nodelocal://0/export1/export*-n*.0.csv')
----
pq: relation "foo" is offline: importing


# Cancel the job so that the cleanup hook runs, and ensure the importStartTime is 0.
job cancel=a
----


query-sql
SELECT * FROM import_time
----
<nil>


query-sql
SELECT * FROM import_type
----
<nil>


# remove the pause setting, and try the import again and ensure it succeeds.
exec-sql
SET CLUSTER SETTING jobs.debug.pausepoints = '';
----


exec-sql
IMPORT INTO foo (i,s) CSV DATA ('nodelocal://0/export1/export*-n*.0.csv')
----


query-sql
SELECT * FROM import_time
----
<nil>


query-sql
SELECT * FROM import_type
----
<nil>


# ensure importing into an existing table also modifies the descriptor properly
exec-sql
EXPORT INTO CSV 'nodelocal://0/export2/' FROM SELECT * FROM baz WHERE i = 2;
----


exec-sql
SET CLUSTER SETTING jobs.debug.pausepoints = 'import.after_ingest';
----


import expect-pausepoint tag=b
IMPORT INTO foo (i,s) CSV DATA ('nodelocal://0/export2/export*-n*.0.csv')
----
job paused at pausepoint


query-sql regex=^"\d.
SELECT * FROM import_time
----
true


query-sql
SELECT * FROM import_type
----
"IMPORT_EXISTS_NON_EMPTY"


# Cancel the job so that the cleanup hook runs.
job cancel=b
----


query-sql
SELECT * FROM import_time
----
<nil>
Loading

0 comments on commit 740d8ef

Please sign in to comment.